1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Rubber Band Library
5 An audio time-stretching and pitch-shifting library.
6 Copyright 2007-2021 Particular Programs Ltd.
7
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2 of the
11 License, or (at your option) any later version. See the file
12 COPYING included with this distribution for more information.
13
14 Alternatively, if you have a valid commercial licence for the
15 Rubber Band Library obtained by agreement with the copyright
16 holders, you may redistribute and/or modify it under the terms
17 described in that licence.
18
19 If you wish to distribute code using the Rubber Band Library
20 under terms other than those of the GNU General Public License,
21 you must obtain a valid commercial licence before doing so.
22 */
23
24 #include "StretcherImpl.h"
25
26 #include "audiocurves/PercussiveAudioCurve.h"
27 #include "audiocurves/HighFrequencyAudioCurve.h"
28 #include "audiocurves/ConstantAudioCurve.h"
29
30 #include "StretchCalculator.h"
31 #include "StretcherChannelData.h"
32
33 #include "dsp/Resampler.h"
34 #include "base/Profiler.h"
35 #include "system/VectorOps.h"
36 #include "system/sysutils.h"
37
38 #include <cassert>
39 #include <cmath>
40 #include <set>
41 #include <map>
42 #include <deque>
43 #include <algorithm>
44
45 using namespace RubberBand;
46
47 using std::cerr;
48 using std::endl;
49
50 namespace RubberBand {
51
52 #ifndef NO_THREADING
53
ProcessThread(Impl * s,size_t c)54 RubberBandStretcher::Impl::ProcessThread::ProcessThread(Impl *s, size_t c) :
55 m_s(s),
56 m_channel(c),
57 m_dataAvailable(std::string("data ") + char('A' + c)),
58 m_abandoning(false)
59 { }
60
61 void
run()62 RubberBandStretcher::Impl::ProcessThread::run()
63 {
64 if (m_s->m_debugLevel > 1) {
65 cerr << "thread " << m_channel << " getting going" << endl;
66 }
67
68 ChannelData &cd = *m_s->m_channelData[m_channel];
69
70 while (cd.inputSize == -1 ||
71 cd.inbuf->getReadSpace() > 0) {
72
73 // if (cd.inputSize != -1) {
74 // cerr << "inputSize == " << cd.inputSize
75 // << ", readSpace == " << cd.inbuf->getReadSpace() << endl;
76 // }
77
78 bool any = false, last = false;
79 m_s->processChunks(m_channel, any, last);
80
81 if (last) break;
82
83 if (any) {
84 m_s->m_spaceAvailable.lock();
85 m_s->m_spaceAvailable.signal();
86 m_s->m_spaceAvailable.unlock();
87 }
88
89 m_dataAvailable.lock();
90 if (!m_s->testInbufReadSpace(m_channel) && !m_abandoning) {
91 m_dataAvailable.wait(50000); // bounded in case of abandonment
92 }
93 m_dataAvailable.unlock();
94
95 if (m_abandoning) {
96 if (m_s->m_debugLevel > 1) {
97 cerr << "thread " << m_channel << " abandoning" << endl;
98 }
99 return;
100 }
101 }
102
103 bool any = false, last = false;
104 m_s->processChunks(m_channel, any, last);
105 m_s->m_spaceAvailable.lock();
106 m_s->m_spaceAvailable.signal();
107 m_s->m_spaceAvailable.unlock();
108
109 if (m_s->m_debugLevel > 1) {
110 cerr << "thread " << m_channel << " done" << endl;
111 }
112 }
113
114 void
signalDataAvailable()115 RubberBandStretcher::Impl::ProcessThread::signalDataAvailable()
116 {
117 m_dataAvailable.lock();
118 m_dataAvailable.signal();
119 m_dataAvailable.unlock();
120 }
121
122 void
abandon()123 RubberBandStretcher::Impl::ProcessThread::abandon()
124 {
125 m_abandoning = true;
126 }
127
128 #endif
129
130 bool
resampleBeforeStretching() const131 RubberBandStretcher::Impl::resampleBeforeStretching() const
132 {
133 // We can't resample before stretching in offline mode, because
134 // the stretch calculation is based on doing it the other way
135 // around. It would take more work (and testing) to enable this.
136 if (!m_realtime) return false;
137
138 if (m_options & OptionPitchHighQuality) {
139 return (m_pitchScale < 1.0); // better sound
140 } else if (m_options & OptionPitchHighConsistency) {
141 return false;
142 } else {
143 return (m_pitchScale > 1.0); // better performance
144 }
145 }
146
147 void
prepareChannelMS(size_t c,const float * const * inputs,size_t offset,size_t samples,float * prepared)148 RubberBandStretcher::Impl::prepareChannelMS(size_t c,
149 const float *const *inputs,
150 size_t offset,
151 size_t samples,
152 float *prepared)
153 {
154 for (size_t i = 0; i < samples; ++i) {
155 float left = inputs[0][i + offset];
156 float right = inputs[1][i + offset];
157 float mid = (left + right) / 2;
158 float side = (left - right) / 2;
159 if (c == 0) {
160 prepared[i] = mid;
161 } else {
162 prepared[i] = side;
163 }
164 }
165 }
166
167 size_t
consumeChannel(size_t c,const float * const * inputs,size_t offset,size_t samples,bool final)168 RubberBandStretcher::Impl::consumeChannel(size_t c,
169 const float *const *inputs,
170 size_t offset,
171 size_t samples,
172 bool final)
173 {
174 Profiler profiler("RubberBandStretcher::Impl::consumeChannel");
175
176 ChannelData &cd = *m_channelData[c];
177 RingBuffer<float> &inbuf = *cd.inbuf;
178
179 size_t toWrite = samples;
180 size_t writable = inbuf.getWriteSpace();
181
182 bool resampling = resampleBeforeStretching();
183
184 const float *input = 0;
185
186 bool useMidSide = ((m_options & OptionChannelsTogether) &&
187 (m_channels >= 2) &&
188 (c < 2));
189
190 if (resampling) {
191
192 Profiler profiler2("RubberBandStretcher::Impl::resample");
193
194 toWrite = int(ceil(samples / m_pitchScale));
195 if (writable < toWrite) {
196 samples = int(floor(writable * m_pitchScale));
197 if (samples == 0) return 0;
198 }
199
200 size_t reqSize = int(ceil(samples / m_pitchScale));
201 if (reqSize > cd.resamplebufSize) {
202 cerr << "WARNING: RubberBandStretcher::Impl::consumeChannel: resizing resampler buffer from "
203 << cd.resamplebufSize << " to " << reqSize << endl;
204 cd.setResampleBufSize(reqSize);
205 }
206
207 #ifndef NO_THREADING
208 #if defined HAVE_IPP && !defined USE_SPEEX
209 if (m_threaded) {
210 m_resamplerMutex.lock();
211 }
212 #endif
213 #endif
214
215 if (useMidSide) {
216 prepareChannelMS(c, inputs, offset, samples, cd.ms);
217 input = cd.ms;
218 } else {
219 input = inputs[c] + offset;
220 }
221
222 toWrite = cd.resampler->resample(&cd.resamplebuf,
223 cd.resamplebufSize,
224 &input,
225 samples,
226 1.0 / m_pitchScale,
227 final);
228
229 #ifndef NO_THREADING
230 #if defined HAVE_IPP && !defined USE_SPEEX
231 if (m_threaded) {
232 m_resamplerMutex.unlock();
233 }
234 #endif
235 #endif
236 }
237
238 if (writable < toWrite) {
239 if (resampling) {
240 return 0;
241 }
242 toWrite = writable;
243 }
244
245 if (resampling) {
246
247 inbuf.write(cd.resamplebuf, toWrite);
248 cd.inCount += samples;
249 return samples;
250
251 } else {
252
253 if (useMidSide) {
254 prepareChannelMS(c, inputs, offset, toWrite, cd.ms);
255 input = cd.ms;
256 } else {
257 input = inputs[c] + offset;
258 }
259
260 inbuf.write(input, toWrite);
261 cd.inCount += toWrite;
262 return toWrite;
263 }
264 }
265
266 void
processChunks(size_t c,bool & any,bool & last)267 RubberBandStretcher::Impl::processChunks(size_t c, bool &any, bool &last)
268 {
269 Profiler profiler("RubberBandStretcher::Impl::processChunks");
270
271 // Process as many chunks as there are available on the input
272 // buffer for channel c. This requires that the increments have
273 // already been calculated.
274
275 // This is the normal process method in offline mode.
276
277 ChannelData &cd = *m_channelData[c];
278
279 last = false;
280 any = false;
281
282 float *tmp = 0;
283
284 while (!last) {
285
286 if (!testInbufReadSpace(c)) {
287 if (m_debugLevel > 1) {
288 cerr << "processChunks: out of input" << endl;
289 }
290 break;
291 }
292
293 any = true;
294
295 if (!cd.draining) {
296 size_t ready = cd.inbuf->getReadSpace();
297 assert(ready >= m_aWindowSize || cd.inputSize >= 0);
298 cd.inbuf->peek(cd.fltbuf, std::min(ready, m_aWindowSize));
299 cd.inbuf->skip(m_increment);
300 }
301
302 bool phaseReset = false;
303 size_t phaseIncrement, shiftIncrement;
304 getIncrements(c, phaseIncrement, shiftIncrement, phaseReset);
305
306 if (shiftIncrement <= m_aWindowSize) {
307 analyseChunk(c);
308 last = processChunkForChannel
309 (c, phaseIncrement, shiftIncrement, phaseReset);
310 } else {
311 size_t bit = m_aWindowSize/4;
312 if (m_debugLevel > 1) {
313 cerr << "channel " << c << " breaking down overlong increment " << shiftIncrement << " into " << bit << "-size bits" << endl;
314 }
315 if (!tmp) tmp = allocate<float>(m_aWindowSize);
316 analyseChunk(c);
317 v_copy(tmp, cd.fltbuf, m_aWindowSize);
318 for (size_t i = 0; i < shiftIncrement; i += bit) {
319 v_copy(cd.fltbuf, tmp, m_aWindowSize);
320 size_t thisIncrement = bit;
321 if (i + thisIncrement > shiftIncrement) {
322 thisIncrement = shiftIncrement - i;
323 }
324 last = processChunkForChannel
325 (c, phaseIncrement + i, thisIncrement, phaseReset);
326 phaseReset = false;
327 }
328 }
329
330 cd.chunkCount++;
331 if (m_debugLevel > 2) {
332 cerr << "channel " << c << ": last = " << last << ", chunkCount = " << cd.chunkCount << endl;
333 }
334 }
335
336 if (tmp) deallocate(tmp);
337 }
338
339 bool
processOneChunk()340 RubberBandStretcher::Impl::processOneChunk()
341 {
342 Profiler profiler("RubberBandStretcher::Impl::processOneChunk");
343
344 // Process a single chunk for all channels, provided there is
345 // enough data on each channel for at least one chunk. This is
346 // able to calculate increments as it goes along.
347
348 // This is the normal process method in RT mode.
349
350 for (size_t c = 0; c < m_channels; ++c) {
351 if (!testInbufReadSpace(c)) {
352 if (m_debugLevel > 1) {
353 cerr << "processOneChunk: out of input" << endl;
354 }
355 return false;
356 }
357 ChannelData &cd = *m_channelData[c];
358 if (!cd.draining) {
359 size_t ready = cd.inbuf->getReadSpace();
360 assert(ready >= m_aWindowSize || cd.inputSize >= 0);
361 cd.inbuf->peek(cd.fltbuf, std::min(ready, m_aWindowSize));
362 cd.inbuf->skip(m_increment);
363 analyseChunk(c);
364 }
365 }
366
367 bool phaseReset = false;
368 size_t phaseIncrement, shiftIncrement;
369 if (!getIncrements(0, phaseIncrement, shiftIncrement, phaseReset)) {
370 calculateIncrements(phaseIncrement, shiftIncrement, phaseReset);
371 }
372
373 bool last = false;
374 for (size_t c = 0; c < m_channels; ++c) {
375 last = processChunkForChannel(c, phaseIncrement, shiftIncrement, phaseReset);
376 m_channelData[c]->chunkCount++;
377 }
378
379 return last;
380 }
381
382 bool
testInbufReadSpace(size_t c)383 RubberBandStretcher::Impl::testInbufReadSpace(size_t c)
384 {
385 Profiler profiler("RubberBandStretcher::Impl::testInbufReadSpace");
386
387 ChannelData &cd = *m_channelData[c];
388 RingBuffer<float> &inbuf = *cd.inbuf;
389
390 size_t rs = inbuf.getReadSpace();
391
392 if (rs < m_aWindowSize && !cd.draining) {
393
394 if (cd.inputSize == -1) {
395
396 // Not all the input data has been written to the inbuf
397 // (that's why the input size is not yet set). We can't
398 // process, because we don't have a full chunk of data, so
399 // our process chunk would contain some empty padding in
400 // its input -- and that would give incorrect output, as
401 // we know there is more input to come.
402
403 #ifndef NO_THREADING
404 if (!m_threaded) {
405 #endif
406 if (m_debugLevel > 1) {
407 cerr << "Note: RubberBandStretcher: read space < chunk size ("
408 << inbuf.getReadSpace() << " < " << m_aWindowSize
409 << ") when not all input written, on processChunks for channel " << c << endl;
410 }
411
412 #ifndef NO_THREADING
413 }
414 #endif
415 return false;
416 }
417
418 if (rs == 0) {
419
420 if (m_debugLevel > 1) {
421 cerr << "read space = 0, giving up" << endl;
422 }
423 return false;
424
425 } else if (rs < m_aWindowSize/2) {
426
427 if (m_debugLevel > 1) {
428 cerr << "read space = " << rs << ", setting draining true" << endl;
429 }
430
431 cd.draining = true;
432 }
433 }
434
435 return true;
436 }
437
438 bool
processChunkForChannel(size_t c,size_t phaseIncrement,size_t shiftIncrement,bool phaseReset)439 RubberBandStretcher::Impl::processChunkForChannel(size_t c,
440 size_t phaseIncrement,
441 size_t shiftIncrement,
442 bool phaseReset)
443 {
444 Profiler profiler("RubberBandStretcher::Impl::processChunkForChannel");
445
446 // Process a single chunk on a single channel. This assumes
447 // enough input data is available; caller must have tested this
448 // using e.g. testInbufReadSpace first. Return true if this is
449 // the last chunk on the channel.
450
451 if (phaseReset && (m_debugLevel > 1)) {
452 cerr << "processChunkForChannel: phase reset found, incrs "
453 << phaseIncrement << ":" << shiftIncrement << endl;
454 }
455
456 ChannelData &cd = *m_channelData[c];
457
458 if (!cd.draining) {
459
460 // This is the normal processing case -- draining is only
461 // set when all the input has been used and we only need
462 // to write from the existing accumulator into the output.
463
464 // We know we have enough samples available in m_inbuf --
465 // this is usually m_aWindowSize, but we know that if fewer
466 // are available, it's OK to use zeroes for the rest
467 // (which the ring buffer will provide) because we've
468 // reached the true end of the data.
469
470 // We need to peek m_aWindowSize samples for processing, and
471 // then skip m_increment to advance the read pointer.
472
473 modifyChunk(c, phaseIncrement, phaseReset);
474 synthesiseChunk(c, shiftIncrement); // reads from cd.mag, cd.phase
475
476 if (m_debugLevel > 2) {
477 if (phaseReset) {
478 for (int i = 0; i < 10; ++i) {
479 cd.accumulator[i] = 1.2f - (i % 3) * 1.2f;
480 }
481 }
482 }
483 }
484
485 bool last = false;
486
487 if (cd.draining) {
488 if (m_debugLevel > 1) {
489 cerr << "draining: accumulator fill = " << cd.accumulatorFill << " (shiftIncrement = " << shiftIncrement << ")" << endl;
490 }
491 if (shiftIncrement == 0) {
492 cerr << "WARNING: draining: shiftIncrement == 0, can't handle that in this context: setting to " << m_increment << endl;
493 shiftIncrement = m_increment;
494 }
495 if (cd.accumulatorFill <= shiftIncrement) {
496 if (m_debugLevel > 1) {
497 cerr << "reducing shift increment from " << shiftIncrement
498 << " to " << cd.accumulatorFill
499 << " and marking as last" << endl;
500 }
501 shiftIncrement = cd.accumulatorFill;
502 last = true;
503 }
504 }
505
506 int required = shiftIncrement;
507
508 if (m_pitchScale != 1.0) {
509 required = int(required / m_pitchScale) + 1;
510 }
511
512 int ws = cd.outbuf->getWriteSpace();
513 if (ws < required) {
514 if (m_debugLevel > 0) {
515 cerr << "Buffer overrun on output for channel " << c << endl;
516 }
517
518 // The only correct thing we can do here is resize the buffer.
519 // We can't wait for the client thread to read some data out
520 // from the buffer so as to make more space, because the
521 // client thread (if we are threaded at all) is probably stuck
522 // in a process() call waiting for us to stow away enough
523 // input increments to allow the process() call to complete.
524 // This is an unhappy situation.
525
526 RingBuffer<float> *oldbuf = cd.outbuf;
527 cd.outbuf = oldbuf->resized(oldbuf->getSize() * 2);
528
529 if (m_debugLevel > 1) {
530 cerr << "(Write space was " << ws << ", needed " << required
531 << ": resized output buffer from " << oldbuf->getSize()
532 << " to " << cd.outbuf->getSize() << ")" << endl;
533 }
534
535 m_emergencyScavenger.claim(oldbuf);
536 }
537
538 writeChunk(c, shiftIncrement, last);
539 return last;
540 }
541
542 void
calculateIncrements(size_t & phaseIncrementRtn,size_t & shiftIncrementRtn,bool & phaseReset)543 RubberBandStretcher::Impl::calculateIncrements(size_t &phaseIncrementRtn,
544 size_t &shiftIncrementRtn,
545 bool &phaseReset)
546 {
547 Profiler profiler("RubberBandStretcher::Impl::calculateIncrements");
548
549 // cerr << "calculateIncrements" << endl;
550
551 // Calculate the next upcoming phase and shift increment, on the
552 // basis that both channels are in sync. This is in contrast to
553 // getIncrements, which requires that all the increments have been
554 // calculated in advance but can then return increments
555 // corresponding to different chunks in different channels.
556
557 // Requires frequency domain representations of channel data in
558 // the mag and phase buffers in the channel.
559
560 // This function is only used in real-time mode.
561
562 phaseIncrementRtn = m_increment;
563 shiftIncrementRtn = m_increment;
564 phaseReset = false;
565
566 if (m_channels == 0) return;
567
568 ChannelData &cd = *m_channelData[0];
569
570 size_t bc = cd.chunkCount;
571 for (size_t c = 1; c < m_channels; ++c) {
572 if (m_channelData[c]->chunkCount != bc) {
573 cerr << "ERROR: RubberBandStretcher::Impl::calculateIncrements: Channels are not in sync" << endl;
574 return;
575 }
576 }
577
578 const int hs = m_fftSize/2 + 1;
579
580 // Normally we would mix down the time-domain signal and apply a
581 // single FFT, or else mix down the Cartesian form of the
582 // frequency-domain signal. Both of those would be inefficient
583 // from this position. Fortunately, the onset detectors should
584 // work reasonably well (maybe even better?) if we just sum the
585 // magnitudes of the frequency-domain channel signals and forget
586 // about phase entirely. Normally we don't expect the channel
587 // phases to cancel each other, and broadband effects will still
588 // be apparent.
589
590 float df = 0.f;
591 bool silent = false;
592
593 if (m_channels == 1) {
594
595 if (sizeof(process_t) == sizeof(double)) {
596 df = m_phaseResetAudioCurve->processDouble((double *)cd.mag, m_increment);
597 silent = (m_silentAudioCurve->processDouble((double *)cd.mag, m_increment) > 0.f);
598 } else {
599 df = m_phaseResetAudioCurve->processFloat((float *)cd.mag, m_increment);
600 silent = (m_silentAudioCurve->processFloat((float *)cd.mag, m_increment) > 0.f);
601 }
602
603 } else {
604
605 process_t *tmp = (process_t *)alloca(hs * sizeof(process_t));
606
607 v_zero(tmp, hs);
608 for (size_t c = 0; c < m_channels; ++c) {
609 v_add(tmp, m_channelData[c]->mag, hs);
610 }
611
612 if (sizeof(process_t) == sizeof(double)) {
613 df = m_phaseResetAudioCurve->processDouble((double *)tmp, m_increment);
614 silent = (m_silentAudioCurve->processDouble((double *)tmp, m_increment) > 0.f);
615 } else {
616 df = m_phaseResetAudioCurve->processFloat((float *)tmp, m_increment);
617 silent = (m_silentAudioCurve->processFloat((float *)tmp, m_increment) > 0.f);
618 }
619 }
620
621 double effectivePitchRatio = 1.0 / m_pitchScale;
622 if (cd.resampler) {
623 effectivePitchRatio = cd.resampler->getEffectiveRatio(effectivePitchRatio);
624 }
625
626 int incr = m_stretchCalculator->calculateSingle
627 (m_timeRatio, effectivePitchRatio, df, m_increment,
628 m_aWindowSize, m_sWindowSize);
629
630 if (m_lastProcessPhaseResetDf.getWriteSpace() > 0) {
631 m_lastProcessPhaseResetDf.write(&df, 1);
632 }
633 if (m_lastProcessOutputIncrements.getWriteSpace() > 0) {
634 m_lastProcessOutputIncrements.write(&incr, 1);
635 }
636
637 if (incr < 0) {
638 phaseReset = true;
639 incr = -incr;
640 }
641
642 // The returned increment is the phase increment. The shift
643 // increment for one chunk is the same as the phase increment for
644 // the following chunk (see comment below). This means we don't
645 // actually know the shift increment until we see the following
646 // phase increment... which is a bit of a problem.
647
648 // This implies we should use this increment for the shift
649 // increment, and make the following phase increment the same as
650 // it. This means in RT mode we'll be one chunk later with our
651 // phase reset than we would be in non-RT mode. The sensitivity
652 // of the broadband onset detector may mean that this isn't a
653 // problem -- test it and see.
654
655 shiftIncrementRtn = incr;
656
657 if (cd.prevIncrement == 0) {
658 phaseIncrementRtn = shiftIncrementRtn;
659 } else {
660 phaseIncrementRtn = cd.prevIncrement;
661 }
662
663 cd.prevIncrement = shiftIncrementRtn;
664
665 if (silent) ++m_silentHistory;
666 else m_silentHistory = 0;
667
668 if (m_silentHistory >= int(m_aWindowSize / m_increment) && !phaseReset) {
669 phaseReset = true;
670 if (m_debugLevel > 1) {
671 cerr << "calculateIncrements: phase reset on silence (silent history == "
672 << m_silentHistory << ")" << endl;
673 }
674 }
675 }
676
677 bool
getIncrements(size_t channel,size_t & phaseIncrementRtn,size_t & shiftIncrementRtn,bool & phaseReset)678 RubberBandStretcher::Impl::getIncrements(size_t channel,
679 size_t &phaseIncrementRtn,
680 size_t &shiftIncrementRtn,
681 bool &phaseReset)
682 {
683 Profiler profiler("RubberBandStretcher::Impl::getIncrements");
684
685 if (channel >= m_channels) {
686 phaseIncrementRtn = m_increment;
687 shiftIncrementRtn = m_increment;
688 phaseReset = false;
689 return false;
690 }
691
692 // There are two relevant output increments here. The first is
693 // the phase increment which we use when recalculating the phases
694 // for the current chunk; the second is the shift increment used
695 // to determine how far to shift the processing buffer after
696 // writing the chunk. The shift increment for one chunk is the
697 // same as the phase increment for the following chunk.
698
699 // When an onset occurs for which we need to reset phases, the
700 // increment given will be negative.
701
702 // When we reset phases, the previous shift increment (and so
703 // current phase increments) must have been m_increment to ensure
704 // consistency.
705
706 // m_outputIncrements stores phase increments.
707
708 ChannelData &cd = *m_channelData[channel];
709 bool gotData = true;
710
711 if (cd.chunkCount >= m_outputIncrements.size()) {
712 // cerr << "WARNING: RubberBandStretcher::Impl::getIncrements:"
713 // << " chunk count " << cd.chunkCount << " >= "
714 // << m_outputIncrements.size() << endl;
715 if (m_outputIncrements.size() == 0) {
716 phaseIncrementRtn = m_increment;
717 shiftIncrementRtn = m_increment;
718 phaseReset = false;
719 return false;
720 } else {
721 cd.chunkCount = m_outputIncrements.size()-1;
722 gotData = false;
723 }
724 }
725
726 int phaseIncrement = m_outputIncrements[cd.chunkCount];
727
728 int shiftIncrement = phaseIncrement;
729 if (cd.chunkCount + 1 < m_outputIncrements.size()) {
730 shiftIncrement = m_outputIncrements[cd.chunkCount + 1];
731 }
732
733 if (phaseIncrement < 0) {
734 phaseIncrement = -phaseIncrement;
735 phaseReset = true;
736 }
737
738 if (shiftIncrement < 0) {
739 shiftIncrement = -shiftIncrement;
740 }
741 /*
742 if (shiftIncrement >= int(m_windowSize)) {
743 cerr << "*** ERROR: RubberBandStretcher::Impl::processChunks: shiftIncrement " << shiftIncrement << " >= windowSize " << m_windowSize << " at " << cd.chunkCount << " (of " << m_outputIncrements.size() << ")" << endl;
744 shiftIncrement = m_windowSize;
745 }
746 */
747 phaseIncrementRtn = phaseIncrement;
748 shiftIncrementRtn = shiftIncrement;
749 if (cd.chunkCount == 0) phaseReset = true; // don't mess with the first chunk
750 return gotData;
751 }
752
753 void
analyseChunk(size_t channel)754 RubberBandStretcher::Impl::analyseChunk(size_t channel)
755 {
756 Profiler profiler("RubberBandStretcher::Impl::analyseChunk");
757
758 ChannelData &cd = *m_channelData[channel];
759
760 process_t *const R__ dblbuf = cd.dblbuf;
761 float *const R__ fltbuf = cd.fltbuf;
762
763 // cd.fltbuf is known to contain m_aWindowSize samples
764
765 if (m_aWindowSize > m_fftSize) {
766 m_afilter->cut(fltbuf);
767 }
768
769 cutShiftAndFold(dblbuf, m_fftSize, fltbuf, m_awindow);
770
771 cd.fft->forwardPolar(dblbuf, cd.mag, cd.phase);
772 }
773
774 void
modifyChunk(size_t channel,size_t outputIncrement,bool phaseReset)775 RubberBandStretcher::Impl::modifyChunk(size_t channel,
776 size_t outputIncrement,
777 bool phaseReset)
778 {
779 Profiler profiler("RubberBandStretcher::Impl::modifyChunk");
780
781 ChannelData &cd = *m_channelData[channel];
782
783 if (phaseReset && m_debugLevel > 1) {
784 cerr << "phase reset: leaving phases unmodified" << endl;
785 }
786
787 const process_t rate = process_t(m_sampleRate);
788 const int count = m_fftSize / 2;
789
790 bool unchanged = cd.unchanged && (outputIncrement == m_increment);
791 bool fullReset = phaseReset;
792 bool laminar = !(m_options & OptionPhaseIndependent);
793 bool bandlimited = (m_options & OptionTransientsMixed);
794 int bandlow = lrint((150 * m_fftSize) / rate);
795 int bandhigh = lrint((1000 * m_fftSize) / rate);
796
797 float freq0 = m_freq0;
798 float freq1 = m_freq1;
799 float freq2 = m_freq2;
800
801 if (laminar) {
802 float r = getEffectiveRatio();
803 if (r > 1) {
804 float rf0 = 600 + (600 * ((r-1)*(r-1)*(r-1)*2));
805 float f1ratio = freq1 / freq0;
806 float f2ratio = freq2 / freq0;
807 freq0 = std::max(freq0, rf0);
808 freq1 = freq0 * f1ratio;
809 freq2 = freq0 * f2ratio;
810 }
811 }
812
813 int limit0 = lrint((freq0 * m_fftSize) / rate);
814 int limit1 = lrint((freq1 * m_fftSize) / rate);
815 int limit2 = lrint((freq2 * m_fftSize) / rate);
816
817 if (limit1 < limit0) limit1 = limit0;
818 if (limit2 < limit1) limit2 = limit1;
819
820 process_t prevInstability = 0.0;
821 bool prevDirection = false;
822
823 process_t distance = 0.0;
824 const process_t maxdist = 8.0;
825
826 const int lookback = 1;
827
828 process_t distacc = 0.0;
829
830 for (int i = count; i >= 0; i -= lookback) {
831
832 bool resetThis = phaseReset;
833
834 if (bandlimited) {
835 if (resetThis) {
836 if (i > bandlow && i < bandhigh) {
837 resetThis = false;
838 fullReset = false;
839 }
840 }
841 }
842
843 process_t p = cd.phase[i];
844 process_t perr = 0.0;
845 process_t outphase = p;
846
847 process_t mi = maxdist;
848 if (i <= limit0) mi = 0.0;
849 else if (i <= limit1) mi = 1.0;
850 else if (i <= limit2) mi = 3.0;
851
852 if (!resetThis) {
853
854 process_t omega = (2 * M_PI * m_increment * i) / (m_fftSize);
855
856 process_t pp = cd.prevPhase[i];
857 process_t ep = pp + omega;
858 perr = princarg(p - ep);
859
860 process_t instability = fabs(perr - cd.prevError[i]);
861 bool direction = (perr > cd.prevError[i]);
862
863 bool inherit = false;
864
865 if (laminar) {
866 if (distance >= mi || i == count) {
867 inherit = false;
868 } else if (bandlimited && (i == bandhigh || i == bandlow)) {
869 inherit = false;
870 } else if (instability > prevInstability &&
871 direction == prevDirection) {
872 inherit = true;
873 }
874 }
875
876 process_t advance = outputIncrement * ((omega + perr) / m_increment);
877
878 if (inherit) {
879 process_t inherited =
880 cd.unwrappedPhase[i + lookback] - cd.prevPhase[i + lookback];
881 advance = ((advance * distance) +
882 (inherited * (maxdist - distance)))
883 / maxdist;
884 outphase = p + advance;
885 distacc += distance;
886 distance += 1.0;
887 } else {
888 outphase = cd.unwrappedPhase[i] + advance;
889 distance = 0.0;
890 }
891
892 prevInstability = instability;
893 prevDirection = direction;
894
895 } else {
896 distance = 0.0;
897 }
898
899 cd.prevError[i] = perr;
900 cd.prevPhase[i] = p;
901 cd.phase[i] = outphase;
902 cd.unwrappedPhase[i] = outphase;
903 }
904
905 if (m_debugLevel > 2) {
906 cerr << "mean inheritance distance = " << distacc / count << endl;
907 }
908
909 if (fullReset) unchanged = true;
910 cd.unchanged = unchanged;
911
912 if (unchanged && m_debugLevel > 1) {
913 cerr << "frame unchanged on channel " << channel << endl;
914 }
915 }
916
917
918 void
formantShiftChunk(size_t channel)919 RubberBandStretcher::Impl::formantShiftChunk(size_t channel)
920 {
921 Profiler profiler("RubberBandStretcher::Impl::formantShiftChunk");
922
923 ChannelData &cd = *m_channelData[channel];
924
925 process_t *const R__ mag = cd.mag;
926 process_t *const R__ envelope = cd.envelope;
927 process_t *const R__ dblbuf = cd.dblbuf;
928
929 const int sz = m_fftSize;
930 const int hs = sz / 2;
931 const process_t factor = 1.0 / sz;
932
933 cd.fft->inverseCepstral(mag, dblbuf);
934
935 const int cutoff = m_sampleRate / 700;
936
937 // cerr <<"cutoff = "<< cutoff << ", m_sampleRate/cutoff = " << m_sampleRate/cutoff << endl;
938
939 dblbuf[0] /= 2;
940 dblbuf[cutoff-1] /= 2;
941
942 for (int i = cutoff; i < sz; ++i) {
943 dblbuf[i] = 0.0;
944 }
945
946 v_scale(dblbuf, factor, cutoff);
947
948 process_t *spare = (process_t *)alloca((hs + 1) * sizeof(process_t));
949 cd.fft->forward(dblbuf, envelope, spare);
950
951 v_exp(envelope, hs + 1);
952 v_divide(mag, envelope, hs + 1);
953
954 if (m_pitchScale > 1.0) {
955 // scaling up, we want a new envelope that is lower by the pitch factor
956 for (int target = 0; target <= hs; ++target) {
957 int source = lrint(target * m_pitchScale);
958 if (source > hs) {
959 envelope[target] = 0.0;
960 } else {
961 envelope[target] = envelope[source];
962 }
963 }
964 } else {
965 // scaling down, we want a new envelope that is higher by the pitch factor
966 for (int target = hs; target > 0; ) {
967 --target;
968 int source = lrint(target * m_pitchScale);
969 envelope[target] = envelope[source];
970 }
971 }
972
973 v_multiply(mag, envelope, hs+1);
974
975 cd.unchanged = false;
976 }
977
978 void
synthesiseChunk(size_t channel,size_t shiftIncrement)979 RubberBandStretcher::Impl::synthesiseChunk(size_t channel,
980 size_t shiftIncrement)
981 {
982 Profiler profiler("RubberBandStretcher::Impl::synthesiseChunk");
983
984
985 if ((m_options & OptionFormantPreserved) &&
986 (m_pitchScale != 1.0)) {
987 formantShiftChunk(channel);
988 }
989
990 ChannelData &cd = *m_channelData[channel];
991
992 process_t *const R__ dblbuf = cd.dblbuf;
993 float *const R__ fltbuf = cd.fltbuf;
994 float *const R__ accumulator = cd.accumulator;
995 float *const R__ windowAccumulator = cd.windowAccumulator;
996
997 const int fsz = m_fftSize;
998 const int hs = fsz / 2;
999
1000 const int wsz = m_sWindowSize;
1001
1002 if (!cd.unchanged) {
1003
1004 // Our FFTs produced unscaled results. Scale before inverse
1005 // transform rather than after, to avoid overflow if using a
1006 // fixed-point FFT.
1007 float factor = 1.f / fsz;
1008 v_scale(cd.mag, factor, hs + 1);
1009
1010 cd.fft->inversePolar(cd.mag, cd.phase, cd.dblbuf);
1011
1012 if (wsz == fsz) {
1013 v_convert(fltbuf, dblbuf + hs, hs);
1014 v_convert(fltbuf + hs, dblbuf, hs);
1015 } else {
1016 v_zero(fltbuf, wsz);
1017 int j = fsz - wsz/2;
1018 while (j < 0) j += fsz;
1019 for (int i = 0; i < wsz; ++i) {
1020 fltbuf[i] += dblbuf[j];
1021 if (++j == fsz) j = 0;
1022 }
1023 }
1024 }
1025
1026 if (wsz > fsz) {
1027 int p = shiftIncrement * 2;
1028 if (cd.interpolatorScale != p) {
1029 SincWindow<float>::write(cd.interpolator, wsz, p);
1030 cd.interpolatorScale = p;
1031 }
1032 v_multiply(fltbuf, cd.interpolator, wsz);
1033 }
1034
1035 m_swindow->cut(fltbuf);
1036 v_add(accumulator, fltbuf, wsz);
1037 cd.accumulatorFill = std::max(cd.accumulatorFill, size_t(wsz));
1038
1039 if (wsz > fsz) {
1040 // reuse fltbuf to calculate interpolating window shape for
1041 // window accumulator
1042 v_copy(fltbuf, cd.interpolator, wsz);
1043 m_swindow->cut(fltbuf);
1044 v_add(windowAccumulator, fltbuf, wsz);
1045 } else {
1046 m_swindow->add(windowAccumulator, m_awindow->getArea() * 1.5f);
1047 }
1048 }
1049
1050 void
writeChunk(size_t channel,size_t shiftIncrement,bool last)1051 RubberBandStretcher::Impl::writeChunk(size_t channel, size_t shiftIncrement, bool last)
1052 {
1053 Profiler profiler("RubberBandStretcher::Impl::writeChunk");
1054
1055 ChannelData &cd = *m_channelData[channel];
1056
1057 float *const R__ accumulator = cd.accumulator;
1058 float *const R__ windowAccumulator = cd.windowAccumulator;
1059
1060 const int sz = cd.accumulatorFill;
1061 const int si = shiftIncrement;
1062
1063 if (m_debugLevel > 2) {
1064 cerr << "writeChunk(" << channel << ", " << shiftIncrement << ", " << last << ")" << endl;
1065 }
1066
1067 v_divide(accumulator, windowAccumulator, si);
1068
1069 // for exact sample scaling (probably not meaningful if we
1070 // were running in RT mode)
1071 size_t theoreticalOut = 0;
1072 if (cd.inputSize >= 0) {
1073 theoreticalOut = lrint(cd.inputSize * m_timeRatio);
1074 }
1075
1076 bool resampledAlready = resampleBeforeStretching();
1077
1078 if (!resampledAlready &&
1079 (m_pitchScale != 1.0 || m_options & OptionPitchHighConsistency) &&
1080 cd.resampler) {
1081
1082 Profiler profiler2("RubberBandStretcher::Impl::resample");
1083
1084 size_t reqSize = int(ceil(si / m_pitchScale));
1085 if (reqSize > cd.resamplebufSize) {
1086 // This shouldn't normally happen -- the buffer is
1087 // supposed to be initialised with enough space in the
1088 // first place. But we retain this check in case the
1089 // pitch scale has changed since then, or the stretch
1090 // calculator has gone mad, or something.
1091 cerr << "WARNING: RubberBandStretcher::Impl::writeChunk: resizing resampler buffer from "
1092 << cd.resamplebufSize << " to " << reqSize << endl;
1093 cd.setResampleBufSize(reqSize);
1094 }
1095
1096 #ifndef NO_THREADING
1097 #if defined HAVE_IPP && !defined USE_SPEEX
1098 if (m_threaded) {
1099 m_resamplerMutex.lock();
1100 }
1101 #endif
1102 #endif
1103
1104 size_t outframes = cd.resampler->resample(&cd.resamplebuf,
1105 cd.resamplebufSize,
1106 &cd.accumulator,
1107 si,
1108 1.0 / m_pitchScale,
1109 last);
1110
1111 #ifndef NO_THREADING
1112 #if defined HAVE_IPP && !defined USE_SPEEX
1113 if (m_threaded) {
1114 m_resamplerMutex.unlock();
1115 }
1116 #endif
1117 #endif
1118
1119 writeOutput(*cd.outbuf, cd.resamplebuf,
1120 outframes, cd.outCount, theoreticalOut);
1121
1122 } else {
1123 writeOutput(*cd.outbuf, accumulator,
1124 si, cd.outCount, theoreticalOut);
1125 }
1126
1127 v_move(accumulator, accumulator + si, sz - si);
1128 v_zero(accumulator + sz - si, si);
1129
1130 v_move(windowAccumulator, windowAccumulator + si, sz - si);
1131 v_zero(windowAccumulator + sz - si, si);
1132
1133 if (int(cd.accumulatorFill) > si) {
1134 cd.accumulatorFill -= si;
1135 } else {
1136 cd.accumulatorFill = 0;
1137 if (cd.draining) {
1138 if (m_debugLevel > 1) {
1139 cerr << "RubberBandStretcher::Impl::processChunks: setting outputComplete to true" << endl;
1140 }
1141 cd.outputComplete = true;
1142 }
1143 }
1144 }
1145
1146 void
writeOutput(RingBuffer<float> & to,float * from,size_t qty,size_t & outCount,size_t theoreticalOut)1147 RubberBandStretcher::Impl::writeOutput(RingBuffer<float> &to, float *from, size_t qty, size_t &outCount, size_t theoreticalOut)
1148 {
1149 Profiler profiler("RubberBandStretcher::Impl::writeOutput");
1150
1151 // In non-RT mode, we don't want to write the first startSkip
1152 // samples, because the first chunk is centred on the start of the
1153 // output. In RT mode we didn't apply any pre-padding in
1154 // configure(), so we don't want to remove any here.
1155
1156 size_t startSkip = 0;
1157 if (!m_realtime) {
1158 startSkip = lrintf((m_sWindowSize/2) / m_pitchScale);
1159 }
1160
1161 if (outCount > startSkip) {
1162
1163 // this is the normal case
1164
1165 if (theoreticalOut > 0) {
1166 if (m_debugLevel > 1) {
1167 cerr << "theoreticalOut = " << theoreticalOut
1168 << ", outCount = " << outCount
1169 << ", startSkip = " << startSkip
1170 << ", qty = " << qty << endl;
1171 }
1172 if (outCount - startSkip <= theoreticalOut &&
1173 outCount - startSkip + qty > theoreticalOut) {
1174 qty = theoreticalOut - (outCount - startSkip);
1175 if (m_debugLevel > 1) {
1176 cerr << "reduce qty to " << qty << endl;
1177 }
1178 }
1179 }
1180
1181 if (m_debugLevel > 2) {
1182 cerr << "writing " << qty << endl;
1183 }
1184
1185 size_t written = to.write(from, qty);
1186
1187 if (written < qty) {
1188 cerr << "WARNING: RubberBandStretcher::Impl::writeOutput: "
1189 << "Buffer overrun on output: wrote " << written
1190 << " of " << qty << " samples" << endl;
1191 }
1192
1193 outCount += written;
1194 return;
1195 }
1196
1197 // the rest of this is only used during the first startSkip samples
1198
1199 if (outCount + qty <= startSkip) {
1200 if (m_debugLevel > 1) {
1201 cerr << "qty = " << qty << ", startSkip = "
1202 << startSkip << ", outCount = " << outCount
1203 << ", discarding" << endl;
1204 }
1205 outCount += qty;
1206 return;
1207 }
1208
1209 size_t off = startSkip - outCount;
1210 if (m_debugLevel > 1) {
1211 cerr << "qty = " << qty << ", startSkip = "
1212 << startSkip << ", outCount = " << outCount
1213 << ", writing " << qty - off
1214 << " from start offset " << off << endl;
1215 }
1216 to.write(from + off, qty - off);
1217 outCount += qty;
1218 }
1219
1220 int
available() const1221 RubberBandStretcher::Impl::available() const
1222 {
1223 Profiler profiler("RubberBandStretcher::Impl::available");
1224
1225 #ifndef NO_THREADING
1226 if (m_threaded) {
1227 MutexLocker locker(&m_threadSetMutex);
1228 if (m_channelData.empty()) return 0;
1229 } else {
1230 if (m_channelData.empty()) return 0;
1231 }
1232 #endif
1233
1234 #ifndef NO_THREADING
1235 if (!m_threaded) {
1236 #endif
1237 for (size_t c = 0; c < m_channels; ++c) {
1238 if (m_channelData[c]->inputSize >= 0) {
1239 // cerr << "available: m_done true" << endl;
1240 if (m_channelData[c]->inbuf->getReadSpace() > 0) {
1241 if (m_debugLevel > 1) {
1242 cerr << "calling processChunks(" << c << ") from available" << endl;
1243 }
1244 //!!! do we ever actually do this? if so, this method should not be const
1245 // ^^^ yes, we do sometimes -- e.g. when fed a very short file
1246 bool any = false, last = false;
1247 ((RubberBandStretcher::Impl *)this)->processChunks(c, any, last);
1248 }
1249 }
1250 }
1251 #ifndef NO_THREADING
1252 }
1253 #endif
1254
1255 size_t min = 0;
1256 bool consumed = true;
1257 bool haveResamplers = false;
1258
1259 for (size_t i = 0; i < m_channels; ++i) {
1260 size_t availIn = m_channelData[i]->inbuf->getReadSpace();
1261 size_t availOut = m_channelData[i]->outbuf->getReadSpace();
1262 if (m_debugLevel > 2) {
1263 cerr << "available on channel " << i << ": " << availOut << " (waiting: " << availIn << ")" << endl;
1264 }
1265 if (i == 0 || availOut < min) min = availOut;
1266 if (!m_channelData[i]->outputComplete) consumed = false;
1267 if (m_channelData[i]->resampler) haveResamplers = true;
1268 }
1269
1270 if (min == 0 && consumed) return -1;
1271 if (m_pitchScale == 1.0) return min;
1272
1273 if (haveResamplers) return min; // resampling has already happened
1274 return int(floor(min / m_pitchScale));
1275 }
1276
1277 size_t
retrieve(float * const * output,size_t samples) const1278 RubberBandStretcher::Impl::retrieve(float *const *output, size_t samples) const
1279 {
1280 Profiler profiler("RubberBandStretcher::Impl::retrieve");
1281
1282 size_t got = samples;
1283
1284 for (size_t c = 0; c < m_channels; ++c) {
1285 size_t gotHere = m_channelData[c]->outbuf->read(output[c], got);
1286 if (gotHere < got) {
1287 if (c > 0) {
1288 if (m_debugLevel > 0) {
1289 cerr << "RubberBandStretcher::Impl::retrieve: WARNING: channel imbalance detected" << endl;
1290 }
1291 }
1292 got = gotHere;
1293 }
1294 }
1295
1296 if ((m_options & OptionChannelsTogether) && (m_channels >= 2)) {
1297 for (size_t i = 0; i < got; ++i) {
1298 float mid = output[0][i];
1299 float side = output[1][i];
1300 float left = mid + side;
1301 float right = mid - side;
1302 output[0][i] = left;
1303 output[1][i] = right;
1304 }
1305 }
1306
1307 return got;
1308 }
1309
1310 }
1311
1312