1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2 
3 /*
4     Rubber Band Library
5     An audio time-stretching and pitch-shifting library.
6     Copyright 2007-2021 Particular Programs Ltd.
7 
8     This program is free software; you can redistribute it and/or
9     modify it under the terms of the GNU General Public License as
10     published by the Free Software Foundation; either version 2 of the
11     License, or (at your option) any later version.  See the file
12     COPYING included with this distribution for more information.
13 
14     Alternatively, if you have a valid commercial licence for the
15     Rubber Band Library obtained by agreement with the copyright
16     holders, you may redistribute and/or modify it under the terms
17     described in that licence.
18 
19     If you wish to distribute code using the Rubber Band Library
20     under terms other than those of the GNU General Public License,
21     you must obtain a valid commercial licence before doing so.
22 */
23 
24 #include "StretchCalculator.h"
25 
26 #include <math.h>
27 #include <iostream>
28 #include <deque>
29 #include <set>
30 #include <cassert>
31 #include <algorithm>
32 
33 #include "system/sysutils.h"
34 
35 namespace RubberBand
36 {
37 
StretchCalculator(size_t sampleRate,size_t inputIncrement,bool useHardPeaks)38 StretchCalculator::StretchCalculator(size_t sampleRate,
39                                      size_t inputIncrement,
40                                      bool useHardPeaks) :
41     m_sampleRate(sampleRate),
42     m_increment(inputIncrement),
43     m_prevDf(0),
44     m_prevRatio(1.0),
45     m_prevTimeRatio(1.0),
46     m_transientAmnesty(0),
47     m_debugLevel(0),
48     m_useHardPeaks(useHardPeaks),
49     m_inFrameCounter(0),
50     m_frameCheckpoint(0, 0),
51     m_outFrameCounter(0)
52 {
53 //    std::cerr << "StretchCalculator::StretchCalculator: useHardPeaks = " << useHardPeaks << std::endl;
54 }
55 
~StretchCalculator()56 StretchCalculator::~StretchCalculator()
57 {
58 }
59 
60 void
setKeyFrameMap(const std::map<size_t,size_t> & mapping)61 StretchCalculator::setKeyFrameMap(const std::map<size_t, size_t> &mapping)
62 {
63     m_keyFrameMap = mapping;
64 
65     // Ensure we always have a 0 -> 0 mapping. If there's nothing in
66     // the map at all, don't need to worry about this (empty map is
67     // handled separately anyway)
68     if (!m_keyFrameMap.empty()) {
69         if (m_keyFrameMap.find(0) == m_keyFrameMap.end()) {
70             m_keyFrameMap[0] = 0;
71         }
72     }
73 }
74 
75 std::vector<int>
calculate(double ratio,size_t inputDuration,const std::vector<float> & phaseResetDf,const std::vector<float> & stretchDf)76 StretchCalculator::calculate(double ratio, size_t inputDuration,
77                              const std::vector<float> &phaseResetDf,
78                              const std::vector<float> &stretchDf)
79 {
80     assert(phaseResetDf.size() == stretchDf.size());
81 
82     m_peaks = findPeaks(phaseResetDf);
83 
84     size_t totalCount = phaseResetDf.size();
85 
86     size_t outputDuration = lrint(inputDuration * ratio);
87 
88     if (m_debugLevel > 0) {
89         std::cerr << "StretchCalculator::calculate(): inputDuration " << inputDuration << ", ratio " << ratio << ", outputDuration " << outputDuration;
90     }
91 
92     outputDuration = lrint((phaseResetDf.size() * m_increment) * ratio);
93 
94     if (m_debugLevel > 0) {
95         std::cerr << " (rounded up to " << outputDuration << ")";
96         std::cerr << ", df size " << phaseResetDf.size() << ", increment "
97                   << m_increment << std::endl;
98     }
99 
100     std::vector<Peak> peaks; // peak position (in chunks) and hardness
101     std::vector<size_t> targets; // targets for mapping peaks (in samples)
102     mapPeaks(peaks, targets, outputDuration, totalCount);
103 
104     if (m_debugLevel > 1) {
105         std::cerr << "have " << peaks.size() << " fixed positions" << std::endl;
106     }
107 
108     size_t totalInput = 0, totalOutput = 0;
109 
110     // For each region between two consecutive time sync points, we
111     // want to take the number of output chunks to be allocated and
112     // the detection function values within the range, and produce a
113     // series of increments that sum to the number of output chunks,
114     // such that each increment is displaced from the input increment
115     // by an amount inversely proportional to the magnitude of the
116     // stretch detection function at that input step.
117 
118     size_t regionTotalChunks = 0;
119 
120     std::vector<int> increments;
121 
122     for (size_t i = 0; i <= peaks.size(); ++i) {
123 
124         size_t regionStart, regionStartChunk, regionEnd, regionEndChunk;
125         bool phaseReset = false;
126 
127         if (i == 0) {
128             regionStartChunk = 0;
129             regionStart = 0;
130         } else {
131             regionStartChunk = peaks[i-1].chunk;
132             regionStart = targets[i-1];
133             phaseReset = peaks[i-1].hard;
134         }
135 
136         if (i == peaks.size()) {
137 //            std::cerr << "note: i (=" << i << ") == peaks.size(); regionEndChunk " << regionEndChunk << " -> " << totalCount << ", regionEnd " << regionEnd << " -> " << outputDuration << std::endl;
138             regionEndChunk = totalCount;
139             regionEnd = outputDuration;
140         } else {
141             regionEndChunk = peaks[i].chunk;
142             regionEnd = targets[i];
143         }
144 
145         if (regionStartChunk > totalCount) regionStartChunk = totalCount;
146         if (regionStart > outputDuration) regionStart = outputDuration;
147         if (regionEndChunk > totalCount) regionEndChunk = totalCount;
148         if (regionEnd > outputDuration) regionEnd = outputDuration;
149 
150         size_t regionDuration = regionEnd - regionStart;
151         regionTotalChunks += regionDuration;
152 
153         std::vector<float> dfRegion;
154 
155         for (size_t j = regionStartChunk; j != regionEndChunk; ++j) {
156             dfRegion.push_back(stretchDf[j]);
157         }
158 
159         if (m_debugLevel > 1) {
160             std::cerr << "distributeRegion from " << regionStartChunk << " to " << regionEndChunk << " (samples " << regionStart << " to " << regionEnd << ")" << std::endl;
161         }
162 
163         dfRegion = smoothDF(dfRegion);
164 
165         std::vector<int> regionIncrements = distributeRegion
166             (dfRegion, regionDuration, ratio, phaseReset);
167 
168         size_t totalForRegion = 0;
169 
170         for (size_t j = 0; j < regionIncrements.size(); ++j) {
171 
172             int incr = regionIncrements[j];
173 
174             if (j == 0 && phaseReset) increments.push_back(-incr);
175             else increments.push_back(incr);
176 
177             if (incr > 0) totalForRegion += incr;
178             else totalForRegion += -incr;
179 
180             totalInput += m_increment;
181         }
182 
183         if (totalForRegion != regionDuration) {
184             std::cerr << "*** ERROR: distributeRegion returned wrong duration " << totalForRegion << ", expected " << regionDuration << std::endl;
185         }
186 
187         totalOutput += totalForRegion;
188     }
189 
190     if (m_debugLevel > 0) {
191         std::cerr << "total input increment = " << totalInput << " (= " << totalInput / m_increment << " chunks), output = " << totalOutput << ", ratio = " << double(totalOutput)/double(totalInput) << ", ideal output " << size_t(ceil(totalInput * ratio)) << std::endl;
192         std::cerr << "(region total = " << regionTotalChunks << ")" << std::endl;
193     }
194 
195     return increments;
196 }
197 
198 void
mapPeaks(std::vector<Peak> & peaks,std::vector<size_t> & targets,size_t outputDuration,size_t totalCount)199 StretchCalculator::mapPeaks(std::vector<Peak> &peaks,
200                             std::vector<size_t> &targets,
201                             size_t outputDuration,
202                             size_t totalCount)
203 {
204     // outputDuration is in audio samples; totalCount is in chunks
205 
206     if (m_keyFrameMap.empty()) {
207         // "normal" behaviour -- fixed points are strictly in
208         // proportion
209         peaks = m_peaks;
210         for (size_t i = 0; i < peaks.size(); ++i) {
211             targets.push_back
212                 (lrint((double(peaks[i].chunk) * outputDuration) / totalCount));
213         }
214         return;
215     }
216 
217     // We have been given a set of source -> target sample frames in
218     // m_keyFrameMap.  We want to ensure that (to the nearest chunk) these
219     // are followed exactly, and any fixed points that we calculated
220     // ourselves are interpolated in linear proportion in between.
221 
222     size_t peakidx = 0;
223     std::map<size_t, size_t>::const_iterator mi = m_keyFrameMap.begin();
224 
225     // NB we know for certain we have a mapping from 0 -> 0 (or at
226     // least, some mapping for source sample 0) because that is
227     // enforced in setKeyFrameMap above.  However, we aren't guaranteed
228     // to have a mapping for the total duration -- we will usually
229     // need to assume it maps to the normal duration * ratio sample
230 
231     while (mi != m_keyFrameMap.end()) {
232 
233 //        std::cerr << "mi->first is " << mi->first << ", second is " << mi->second <<std::endl;
234 
235         // The map we've been given is from sample to sample, but
236         // we can only map from chunk to sample.  We should perhaps
237         // adjust the target sample to compensate for the discrepancy
238         // between the chunk position and the exact requested source
239         // sample.  But we aren't doing that yet.
240 
241         size_t sourceStartChunk = mi->first / m_increment;
242         size_t sourceEndChunk = totalCount;
243 
244         size_t targetStartSample = mi->second;
245         size_t targetEndSample = outputDuration;
246 
247         ++mi;
248         if (mi != m_keyFrameMap.end()) {
249             sourceEndChunk = mi->first / m_increment;
250             targetEndSample = mi->second;
251         }
252 
253         if (sourceStartChunk >= totalCount ||
254             sourceStartChunk >= sourceEndChunk ||
255             targetStartSample >= outputDuration ||
256             targetStartSample >= targetEndSample) {
257             std::cerr << "NOTE: ignoring mapping from chunk " << sourceStartChunk << " to sample " << targetStartSample << "\n(source or target chunk exceeds total count, or end is not later than start)" << std::endl;
258             continue;
259         }
260 
261         // one peak and target for the mapping, then one for each of
262         // the computed peaks that appear before the following mapping
263 
264         Peak p;
265         p.chunk = sourceStartChunk;
266         p.hard = false; // mappings are in time only, not phase reset points
267         peaks.push_back(p);
268         targets.push_back(targetStartSample);
269 
270         if (m_debugLevel > 1) {
271             std::cerr << "mapped chunk " << sourceStartChunk << " (frame " << sourceStartChunk * m_increment << ") -> " << targetStartSample << std::endl;
272         }
273 
274         while (peakidx < m_peaks.size()) {
275 
276             size_t pchunk = m_peaks[peakidx].chunk;
277 
278             if (pchunk < sourceStartChunk) {
279                 // shouldn't happen, should have been dealt with
280                 // already -- but no harm in ignoring it explicitly
281                 ++peakidx;
282                 continue;
283             }
284             if (pchunk == sourceStartChunk) {
285                 // convert that last peak to a hard one, after all
286                 peaks[peaks.size()-1].hard = true;
287                 ++peakidx;
288                 continue;
289             }
290             if (pchunk >= sourceEndChunk) {
291                 // leave the rest for after the next mapping
292                 break;
293             }
294             p.chunk = pchunk;
295             p.hard = m_peaks[peakidx].hard;
296 
297             double proportion =
298                 double(pchunk - sourceStartChunk) /
299                 double(sourceEndChunk - sourceStartChunk);
300 
301             size_t target =
302                 targetStartSample +
303                 lrint(proportion *
304                       (targetEndSample - targetStartSample));
305 
306             if (target <= targets[targets.size()-1] + m_increment) {
307                 // peaks will become too close together afterwards, ignore
308                 ++peakidx;
309                 continue;
310             }
311 
312             if (m_debugLevel > 1) {
313                 std::cerr << "  peak chunk " << pchunk << " (frame " << pchunk * m_increment << ") -> " << target << std::endl;
314             }
315 
316             peaks.push_back(p);
317             targets.push_back(target);
318             ++peakidx;
319         }
320     }
321 }
322 
323 int64_t
expectedOutFrame(int64_t inFrame,double timeRatio)324 StretchCalculator::expectedOutFrame(int64_t inFrame, double timeRatio)
325 {
326     int64_t checkpointedAt = m_frameCheckpoint.first;
327     int64_t checkpointed = m_frameCheckpoint.second;
328     return int64_t(round(checkpointed + (inFrame - checkpointedAt) * timeRatio));
329 }
330 
331 int
calculateSingle(double timeRatio,double effectivePitchRatio,float df,size_t inIncrement,size_t analysisWindowSize,size_t synthesisWindowSize)332 StretchCalculator::calculateSingle(double timeRatio,
333                                    double effectivePitchRatio,
334                                    float df,
335                                    size_t inIncrement,
336                                    size_t analysisWindowSize,
337                                    size_t synthesisWindowSize)
338 {
339     double ratio = timeRatio / effectivePitchRatio;
340 
341     int increment = int(inIncrement);
342     if (increment == 0) increment = m_increment;
343 
344     int outIncrement = lrint(increment * ratio); // the normal case
345     bool isTransient = false;
346 
347     // We want to ensure, as close as possible, that the phase reset
348     // points appear at the right audio frame numbers. To this end we
349     // track the incoming frame number, its corresponding expected
350     // output frame number, and the actual output frame number
351     // projected based on the ratios provided.
352     //
353     // There are two subtleties:
354     //
355     // (1) on a ratio change, we need to checkpoint the expected
356     // output frame number reached so far and start counting again
357     // with the new ratio. We could do this with a reset to zero, but
358     // it's easier to reason about absolute input/output frame
359     // matches, so for the moment at least we're doing this by
360     // explicitly checkpointing the current numbers (hence the use of
361     // the above expectedOutFrame() function which refers to the
362     // last checkpointed values).
363     //
364     // (2) in the case of a pitch shift in a configuration where
365     // resampling occurs after stretching, all of our output
366     // increments will be effectively modified by resampling after we
367     // return. This is why we separate out timeRatio and
368     // effectivePitchRatio arguments - the former is the ratio that
369     // has already been applied and the latter is the ratio that will
370     // be applied by any subsequent resampling step (which will be 1.0
371     // / pitchScale if resampling is happening after stretching). So
372     // the overall ratio is timeRatio / effectivePitchRatio.
373 
374     bool ratioChanged = (ratio != m_prevRatio);
375     if (ratioChanged) {
376         // Reset our frame counters from the ratio change.
377 
378         // m_outFrameCounter tracks the frames counted at output from
379         // this function, which normally precedes resampling - hence
380         // the use of timeRatio rather than ratio here
381 
382         if (m_debugLevel > 1) {
383             std::cerr << "StretchCalculator: ratio changed from " << m_prevRatio << " to " << ratio << std::endl;
384         }
385 
386         int64_t toCheckpoint = expectedOutFrame
387             (m_inFrameCounter, m_prevTimeRatio);
388         m_frameCheckpoint =
389             std::pair<int64_t, int64_t>(m_inFrameCounter, toCheckpoint);
390     }
391 
392     m_prevRatio = ratio;
393     m_prevTimeRatio = timeRatio;
394 
395     if (m_debugLevel > 2) {
396         std::cerr << "StretchCalculator::calculateSingle: timeRatio = "
397                   << timeRatio << ", effectivePitchRatio = "
398                   << effectivePitchRatio << " (that's 1.0 / "
399                   << (1.0 / effectivePitchRatio)
400                   << "), ratio = " << ratio << ", df = " << df
401                   << ", inIncrement = " << inIncrement
402                   << ", default outIncrement = " << outIncrement
403                   << ", analysisWindowSize = " << analysisWindowSize
404                   << ", synthesisWindowSize = " << synthesisWindowSize
405                   << std::endl;
406 
407         std::cerr << "inFrameCounter = " << m_inFrameCounter
408                   << ", outFrameCounter = " << m_outFrameCounter
409                   << std::endl;
410 
411         std::cerr << "The next sample out is input sample " << m_inFrameCounter << std::endl;
412     }
413 
414     int64_t intended = expectedOutFrame
415         (m_inFrameCounter + analysisWindowSize/4, timeRatio);
416     int64_t projected = int64_t
417         (round(m_outFrameCounter + (synthesisWindowSize/4 * effectivePitchRatio)));
418 
419     int64_t divergence = projected - intended;
420 
421     if (m_debugLevel > 2) {
422         std::cerr << "for current frame + quarter frame: intended " << intended << ", projected " << projected << ", divergence " << divergence << std::endl;
423     }
424 
425     // In principle, the threshold depends on chunk size: larger chunk
426     // sizes need higher thresholds.  Since chunk size depends on
427     // ratio, I suppose we could in theory calculate the threshold
428     // from the ratio directly.  For the moment we're happy if it
429     // works well in common situations.
430 
431     float transientThreshold = 0.35f;
432 //    if (ratio > 1) transientThreshold = 0.25f;
433 
434     if (m_useHardPeaks && df > m_prevDf * 1.1f && df > transientThreshold) {
435         if (divergence > 1000 || divergence < -1000) {
436             if (m_debugLevel > 1) {
437                 std::cerr << "StretchCalculator::calculateSingle: transient, but we're not permitting it because the divergence (" << divergence << ") is too great" << std::endl;
438             }
439         } else {
440             isTransient = true;
441         }
442     }
443 
444     if (m_debugLevel > 2) {
445         std::cerr << "df = " << df << ", prevDf = " << m_prevDf
446                   << ", thresh = " << transientThreshold << std::endl;
447     }
448 
449     m_prevDf = df;
450 
451     if (m_transientAmnesty > 0) {
452         if (isTransient) {
453             if (m_debugLevel > 1) {
454                 std::cerr << "StretchCalculator::calculateSingle: transient, but we have an amnesty (df " << df << ", threshold " << transientThreshold << ")" << std::endl;
455             }
456             isTransient = false;
457         }
458         --m_transientAmnesty;
459     }
460 
461     if (isTransient) {
462         if (m_debugLevel > 1) {
463             std::cerr << "StretchCalculator::calculateSingle: transient at (df " << df << ", threshold " << transientThreshold << ")" << std::endl;
464         }
465 
466         // as in offline mode, 0.05 sec approx min between transients
467         m_transientAmnesty =
468             lrint(ceil(double(m_sampleRate) / (20 * double(increment))));
469 
470         outIncrement = increment;
471 
472     } else {
473 
474         double recovery = 0.0;
475         if (divergence > 1000 || divergence < -1000) {
476             recovery = divergence / ((m_sampleRate / 10.0) / increment);
477         } else if (divergence > 100 || divergence < -100) {
478             recovery = divergence / ((m_sampleRate / 20.0) / increment);
479         } else {
480             recovery = divergence / 4.0;
481         }
482 
483         int incr = lrint(outIncrement - recovery);
484         if (m_debugLevel > 2 || (m_debugLevel > 1 && divergence != 0)) {
485             std::cerr << "divergence = " << divergence << ", recovery = " << recovery << ", incr = " << incr << ", ";
486         }
487 
488         int minIncr = lrint(increment * ratio * 0.3);
489         int maxIncr = lrint(increment * ratio * 2);
490 
491         if (incr < minIncr) {
492             incr = minIncr;
493         } else if (incr > maxIncr) {
494             incr = maxIncr;
495         }
496 
497         if (m_debugLevel > 2 || (m_debugLevel > 1 && divergence != 0)) {
498             std::cerr << "clamped into [" << minIncr << ", " << maxIncr
499                       << "] becomes " << incr << std::endl;
500         }
501 
502         if (incr < 0) {
503             std::cerr << "WARNING: internal error: incr < 0 in calculateSingle"
504                       << std::endl;
505             outIncrement = 0;
506         } else {
507             outIncrement = incr;
508         }
509     }
510 
511     if (m_debugLevel > 1) {
512         std::cerr << "StretchCalculator::calculateSingle: returning isTransient = "
513                   << isTransient << ", outIncrement = " << outIncrement
514                   << std::endl;
515     }
516 
517     m_inFrameCounter += inIncrement;
518     m_outFrameCounter += outIncrement * effectivePitchRatio;
519 
520     if (isTransient) {
521         return -outIncrement;
522     } else {
523         return outIncrement;
524     }
525 }
526 
527 void
reset()528 StretchCalculator::reset()
529 {
530     m_prevDf = 0;
531     m_prevRatio = 1.0;
532     m_prevTimeRatio = 1.0;
533     m_inFrameCounter = 0;
534     m_frameCheckpoint = std::pair<int64_t, int64_t>(0, 0);
535     m_outFrameCounter = 0.0;
536     m_transientAmnesty = 0;
537     m_keyFrameMap.clear();
538 }
539 
540 std::vector<StretchCalculator::Peak>
findPeaks(const std::vector<float> & rawDf)541 StretchCalculator::findPeaks(const std::vector<float> &rawDf)
542 {
543     std::vector<float> df = smoothDF(rawDf);
544 
545     // We distinguish between "soft" and "hard" peaks.  A soft peak is
546     // simply the result of peak-picking on the smoothed onset
547     // detection function, and it represents any (strong-ish) onset.
548     // We aim to ensure always that soft peaks are placed at the
549     // correct position in time.  A hard peak is where there is a very
550     // rapid rise in detection function, and it presumably represents
551     // a more broadband, noisy transient.  For these we perform a
552     // phase reset (if in the appropriate mode), and we locate the
553     // reset at the first point where we notice enough of a rapid
554     // rise, rather than necessarily at the peak itself, in order to
555     // preserve the shape of the transient.
556 
557     std::set<size_t> hardPeakCandidates;
558     std::set<size_t> softPeakCandidates;
559 
560     if (m_useHardPeaks) {
561 
562         // 0.05 sec approx min between hard peaks
563         size_t hardPeakAmnesty = lrint(ceil(double(m_sampleRate) /
564                                             (20 * double(m_increment))));
565         size_t prevHardPeak = 0;
566 
567         if (m_debugLevel > 1) {
568             std::cerr << "hardPeakAmnesty = " << hardPeakAmnesty << std::endl;
569         }
570 
571         for (size_t i = 1; i + 1 < df.size(); ++i) {
572 
573             if (df[i] < 0.1) continue;
574             if (df[i] <= df[i-1] * 1.1) continue;
575             if (df[i] < 0.22) continue;
576 
577             if (!hardPeakCandidates.empty() &&
578                 i < prevHardPeak + hardPeakAmnesty) {
579                 continue;
580             }
581 
582             bool hard = (df[i] > 0.4);
583 
584             if (hard && (m_debugLevel > 1)) {
585                 std::cerr << "hard peak at " << i << ": " << df[i]
586                           << " > absolute " << 0.4
587                           << std::endl;
588             }
589 
590             if (!hard) {
591                 hard = (df[i] > df[i-1] * 1.4);
592 
593                 if (hard && (m_debugLevel > 1)) {
594                     std::cerr << "hard peak at " << i << ": " << df[i]
595                               << " > prev " << df[i-1] << " * 1.4"
596                               << std::endl;
597                 }
598             }
599 
600             if (!hard && i > 1) {
601                 hard = (df[i]   > df[i-1] * 1.2 &&
602                         df[i-1] > df[i-2] * 1.2);
603 
604                 if (hard && (m_debugLevel > 1)) {
605                     std::cerr << "hard peak at " << i << ": " << df[i]
606                               << " > prev " << df[i-1] << " * 1.2 and "
607                               << df[i-1] << " > prev " << df[i-2] << " * 1.2"
608                               << std::endl;
609                 }
610             }
611 
612             if (!hard && i > 2) {
613                 // have already established that df[i] > df[i-1] * 1.1
614                 hard = (df[i] > 0.3 &&
615                         df[i-1] > df[i-2] * 1.1 &&
616                         df[i-2] > df[i-3] * 1.1);
617 
618                 if (hard && (m_debugLevel > 1)) {
619                     std::cerr << "hard peak at " << i << ": " << df[i]
620                               << " > prev " << df[i-1] << " * 1.1 and "
621                               << df[i-1] << " > prev " << df[i-2] << " * 1.1 and "
622                               << df[i-2] << " > prev " << df[i-3] << " * 1.1"
623                               << std::endl;
624                 }
625             }
626 
627             if (!hard) continue;
628 
629 //            (df[i+1] > df[i] && df[i+1] > df[i-1] * 1.8) ||
630 //                df[i] > 0.4) {
631 
632             size_t peakLocation = i;
633 
634             if (i + 1 < rawDf.size() &&
635                 rawDf[i + 1] > rawDf[i] * 1.4) {
636 
637                 ++peakLocation;
638 
639                 if (m_debugLevel > 1) {
640                     std::cerr << "pushing hard peak forward to " << peakLocation << ": " << df[peakLocation] << " > " << df[peakLocation-1] << " * " << 1.4 << std::endl;
641                 }
642             }
643 
644             hardPeakCandidates.insert(peakLocation);
645             prevHardPeak = peakLocation;
646         }
647     }
648 
649     size_t medianmaxsize = lrint(ceil(double(m_sampleRate) /
650                                  double(m_increment))); // 1 sec ish
651 
652     if (m_debugLevel > 1) {
653         std::cerr << "mediansize = " << medianmaxsize << std::endl;
654     }
655     if (medianmaxsize < 7) {
656         medianmaxsize = 7;
657         if (m_debugLevel > 1) {
658             std::cerr << "adjusted mediansize = " << medianmaxsize << std::endl;
659         }
660     }
661 
662     int minspacing = lrint(ceil(double(m_sampleRate) /
663                                 (20 * double(m_increment)))); // 0.05 sec ish
664 
665     std::deque<float> medianwin;
666     std::vector<float> sorted;
667     int softPeakAmnesty = 0;
668 
669     for (size_t i = 0; i < medianmaxsize/2; ++i) {
670         medianwin.push_back(0);
671     }
672     for (size_t i = 0; i < medianmaxsize/2 && i < df.size(); ++i) {
673         medianwin.push_back(df[i]);
674     }
675 
676     size_t lastSoftPeak = 0;
677 
678     for (size_t i = 0; i < df.size(); ++i) {
679 
680         size_t mediansize = medianmaxsize;
681 
682         if (medianwin.size() < mediansize) {
683             mediansize = medianwin.size();
684         }
685 
686         size_t middle = medianmaxsize / 2;
687         if (middle >= mediansize) middle = mediansize-1;
688 
689         size_t nextDf = i + mediansize - middle;
690 
691         if (mediansize < 2) {
692             if (mediansize > medianmaxsize) { // absurd, but never mind that
693                 medianwin.pop_front();
694             }
695             if (nextDf < df.size()) {
696                 medianwin.push_back(df[nextDf]);
697             } else {
698                 medianwin.push_back(0);
699             }
700             continue;
701         }
702 
703         if (m_debugLevel > 2) {
704 //            std::cerr << "have " << mediansize << " in median buffer" << std::endl;
705         }
706 
707         sorted.clear();
708         for (size_t j = 0; j < mediansize; ++j) {
709             sorted.push_back(medianwin[j]);
710         }
711         std::sort(sorted.begin(), sorted.end());
712 
713         size_t n = 90; // percentile above which we pick peaks
714         size_t index = (sorted.size() * n) / 100;
715         if (index >= sorted.size()) index = sorted.size()-1;
716         if (index == sorted.size()-1 && index > 0) --index;
717         float thresh = sorted[index];
718 
719 //        if (m_debugLevel > 2) {
720 //            std::cerr << "medianwin[" << middle << "] = " << medianwin[middle] << ", thresh = " << thresh << std::endl;
721 //            if (medianwin[middle] == 0.f) {
722 //                std::cerr << "contents: ";
723 //                for (size_t j = 0; j < medianwin.size(); ++j) {
724 //                    std::cerr << medianwin[j] << " ";
725 //                }
726 //                std::cerr << std::endl;
727 //            }
728 //        }
729 
730         if (medianwin[middle] > thresh &&
731             medianwin[middle] > medianwin[middle-1] &&
732             medianwin[middle] > medianwin[middle+1] &&
733             softPeakAmnesty == 0) {
734 
735             size_t maxindex = middle;
736             float maxval = medianwin[middle];
737 
738             for (size_t j = middle+1; j < mediansize; ++j) {
739                 if (medianwin[j] > maxval) {
740                     maxval = medianwin[j];
741                     maxindex = j;
742                 } else if (medianwin[j] < medianwin[middle]) {
743                     break;
744                 }
745             }
746 
747             size_t peak = i + maxindex - middle;
748 
749 //            std::cerr << "i = " << i << ", maxindex = " << maxindex << ", middle = " << middle << ", so peak at " << peak << std::endl;
750 
751             if (softPeakCandidates.empty() || lastSoftPeak != peak) {
752 
753                 if (m_debugLevel > 1) {
754                     std::cerr << "soft peak at " << peak << " ("
755                               << peak * m_increment << "): "
756                               << medianwin[middle] << " > "
757                               << thresh << " and "
758                               << medianwin[middle]
759                               << " > " << medianwin[middle-1] << " and "
760                               << medianwin[middle]
761                               << " > " << medianwin[middle+1]
762                               << std::endl;
763                 }
764 
765                 if (peak >= df.size()) {
766                     if (m_debugLevel > 2) {
767                         std::cerr << "peak is beyond end"  << std::endl;
768                     }
769                 } else {
770                     softPeakCandidates.insert(peak);
771                     lastSoftPeak = peak;
772                 }
773             }
774 
775             softPeakAmnesty = minspacing + maxindex - middle;
776             if (m_debugLevel > 2) {
777                 std::cerr << "amnesty = " << softPeakAmnesty << std::endl;
778             }
779 
780         } else if (softPeakAmnesty > 0) --softPeakAmnesty;
781 
782         if (mediansize >= medianmaxsize) {
783             medianwin.pop_front();
784         }
785         if (nextDf < df.size()) {
786             medianwin.push_back(df[nextDf]);
787         } else {
788             medianwin.push_back(0);
789         }
790     }
791 
792     std::vector<Peak> peaks;
793 
794     while (!hardPeakCandidates.empty() || !softPeakCandidates.empty()) {
795 
796         bool haveHardPeak = !hardPeakCandidates.empty();
797         bool haveSoftPeak = !softPeakCandidates.empty();
798 
799         size_t hardPeak = (haveHardPeak ? *hardPeakCandidates.begin() : 0);
800         size_t softPeak = (haveSoftPeak ? *softPeakCandidates.begin() : 0);
801 
802         Peak peak;
803         peak.hard = false;
804         peak.chunk = softPeak;
805 
806         bool ignore = false;
807 
808         if (haveHardPeak &&
809             (!haveSoftPeak || hardPeak <= softPeak)) {
810 
811             if (m_debugLevel > 2) {
812                 std::cerr << "Hard peak: " << hardPeak << std::endl;
813             }
814 
815             peak.hard = true;
816             peak.chunk = hardPeak;
817             hardPeakCandidates.erase(hardPeakCandidates.begin());
818 
819         } else {
820             if (m_debugLevel > 2) {
821                 std::cerr << "Soft peak: " << softPeak << std::endl;
822             }
823             if (!peaks.empty() &&
824                 peaks[peaks.size()-1].hard &&
825                 peaks[peaks.size()-1].chunk + 3 >= softPeak) {
826                 if (m_debugLevel > 2) {
827                     std::cerr << "(ignoring, as we just had a hard peak)"
828                               << std::endl;
829                 }
830                 ignore = true;
831             }
832         }
833 
834         if (haveSoftPeak && peak.chunk == softPeak) {
835             softPeakCandidates.erase(softPeakCandidates.begin());
836         }
837 
838         if (!ignore) {
839             peaks.push_back(peak);
840         }
841     }
842 
843     return peaks;
844 }
845 
846 std::vector<float>
smoothDF(const std::vector<float> & df)847 StretchCalculator::smoothDF(const std::vector<float> &df)
848 {
849     std::vector<float> smoothedDF;
850 
851     for (size_t i = 0; i < df.size(); ++i) {
852         // three-value moving mean window for simple smoothing
853         float total = 0.f, count = 0;
854         if (i > 0) { total += df[i-1]; ++count; }
855         total += df[i]; ++count;
856         if (i+1 < df.size()) { total += df[i+1]; ++count; }
857         float mean = total / count;
858         smoothedDF.push_back(mean);
859     }
860 
861     return smoothedDF;
862 }
863 
864 std::vector<int>
distributeRegion(const std::vector<float> & dfIn,size_t duration,float ratio,bool phaseReset)865 StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
866                                     size_t duration, float ratio, bool phaseReset)
867 {
868     std::vector<float> df(dfIn);
869     std::vector<int> increments;
870 
871     // The peak for the stretch detection function may appear after
872     // the peak that we're using to calculate the start of the region.
873     // We don't want that.  If we find a peak in the first half of
874     // the region, we should set all the values up to that point to
875     // the same value as the peak.
876 
877     // (This might not be subtle enough, especially if the region is
878     // long -- we want a bound that corresponds to acoustic perception
879     // of the audible bounce.)
880 
881     for (size_t i = 1; i < df.size()/2; ++i) {
882         if (df[i] < df[i-1]) {
883             if (m_debugLevel > 1) {
884                 std::cerr << "stretch peak offset: " << i-1 << " (peak " << df[i-1] << ")" << std::endl;
885             }
886             for (size_t j = 0; j < i-1; ++j) {
887                 df[j] = df[i-1];
888             }
889             break;
890         }
891     }
892 
893     float maxDf = 0;
894 
895     for (size_t i = 0; i < df.size(); ++i) {
896         if (i == 0 || df[i] > maxDf) maxDf = df[i];
897     }
898 
899     // We want to try to ensure the last 100ms or so (if possible) are
900     // tending back towards the maximum df, so that the stretchiness
901     // reduces at the end of the stretched region.
902 
903     int reducedRegion = lrint((0.1 * m_sampleRate) / m_increment);
904     if (reducedRegion > int(df.size()/5)) reducedRegion = df.size()/5;
905 
906     for (int i = 0; i < reducedRegion; ++i) {
907         size_t index = df.size() - reducedRegion + i;
908         df[index] = df[index] + ((maxDf - df[index]) * i) / reducedRegion;
909     }
910 
911     long toAllot = long(duration) - long(m_increment * df.size());
912 
913     if (m_debugLevel > 1) {
914         std::cerr << "region of " << df.size() << " chunks, output duration " << duration << ", increment " << m_increment << ", toAllot " << toAllot << std::endl;
915     }
916 
917     size_t totalIncrement = 0;
918 
919     // We place limits on the amount of displacement per chunk.  if
920     // ratio < 0, no increment should be larger than increment*ratio
921     // or smaller than increment*ratio/2; if ratio > 0, none should be
922     // smaller than increment*ratio or larger than increment*ratio*2.
923     // We need to enforce this in the assignment of displacements to
924     // allotments, not by trying to respond if something turns out
925     // wrong.
926 
927     // Note that the ratio is only provided to this function for the
928     // purposes of establishing this bound to the displacement.
929 
930     // so if
931     // maxDisplacement / totalDisplacement > increment * ratio*2 - increment
932     // (for ratio > 1)
933     // or
934     // maxDisplacement / totalDisplacement < increment * ratio/2
935     // (for ratio < 1)
936 
937     // then we need to adjust and accommodate
938 
939     double totalDisplacement = 0;
940     double maxDisplacement = 0; // min displacement will be 0 by definition
941 
942     maxDf = 0;
943     float adj = 0;
944 
945     bool tooShort = true, tooLong = true;
946     const int acceptableIterations = 10;
947     int iteration = 0;
948     int prevExtreme = 0;
949     bool better = false;
950 
951     while ((tooLong || tooShort) && iteration < acceptableIterations) {
952 
953         ++iteration;
954 
955         tooLong = false;
956         tooShort = false;
957         calculateDisplacements(df, maxDf, totalDisplacement, maxDisplacement,
958                                adj);
959 
960         if (m_debugLevel > 1) {
961             std::cerr << "totalDisplacement " << totalDisplacement << ", max " << maxDisplacement << " (maxDf " << maxDf << ", df count " << df.size() << ")" << std::endl;
962         }
963 
964         if (totalDisplacement == 0) {
965 // Not usually a problem, in fact
966 //            std::cerr << "WARNING: totalDisplacement == 0 (duration " << duration << ", " << df.size() << " values in df)" << std::endl;
967             if (!df.empty() && adj == 0) {
968                 tooLong = true; tooShort = true;
969                 adj = 1;
970             }
971             continue;
972         }
973 
974         int extremeIncrement = m_increment +
975             lrint((toAllot * maxDisplacement) / totalDisplacement);
976 
977         if (extremeIncrement < 0) {
978             if (m_debugLevel > 0) {
979                 std::cerr << "NOTE: extreme increment " << extremeIncrement << " < 0, adjusting" << std::endl;
980             }
981             tooShort = true;
982         } else {
983             if (ratio < 1.0) {
984                 if (extremeIncrement > lrint(ceil(m_increment * ratio))) {
985                     std::cerr << "WARNING: extreme increment "
986                               << extremeIncrement << " > "
987                               << m_increment * ratio << std::endl;
988                 } else if (extremeIncrement < (m_increment * ratio) / 2) {
989                     if (m_debugLevel > 0) {
990                         std::cerr << "NOTE: extreme increment "
991                                   << extremeIncrement << " < "
992                                   << (m_increment * ratio) / 2
993                                   << ", adjusting" << std::endl;
994                     }
995                     tooShort = true;
996                     if (iteration > 0) {
997                         better = (extremeIncrement > prevExtreme);
998                     }
999                     prevExtreme = extremeIncrement;
1000                 }
1001             } else {
1002                 if (extremeIncrement > m_increment * ratio * 2) {
1003                     if (m_debugLevel > 0) {
1004                         std::cerr << "NOTE: extreme increment "
1005                                   << extremeIncrement << " > "
1006                                   << m_increment * ratio * 2
1007                                   << ", adjusting" << std::endl;
1008                     }
1009                     tooLong = true;
1010                     if (iteration > 0) {
1011                         better = (extremeIncrement < prevExtreme);
1012                     }
1013                     prevExtreme = extremeIncrement;
1014                 } else if (extremeIncrement < lrint(floor(m_increment * ratio))) {
1015                     std::cerr << "WARNING: extreme increment "
1016                               << extremeIncrement << " < "
1017                               << m_increment * ratio << std::endl;
1018                 }
1019             }
1020         }
1021 
1022         if (tooLong || tooShort) {
1023             // Need to make maxDisplacement smaller as a proportion of
1024             // the total displacement, yet ensure that the
1025             // displacements still sum to the total.
1026             adj += maxDf/10;
1027         }
1028     }
1029 
1030     if (tooLong) {
1031         if (better) {
1032             // we were iterating in the right direction, so
1033             // leave things as they are (and undo that last tweak)
1034             std::cerr << "WARNING: No acceptable displacement adjustment found, using latest values:\nthis region could sound bad" << std::endl;
1035             adj -= maxDf/10;
1036         } else {
1037             std::cerr << "WARNING: No acceptable displacement adjustment found, using defaults:\nthis region could sound bad" << std::endl;
1038             adj = 1;
1039             calculateDisplacements(df, maxDf, totalDisplacement, maxDisplacement,
1040                                    adj);
1041         }
1042     } else if (tooShort) {
1043         std::cerr << "WARNING: No acceptable displacement adjustment found, using flat distribution:\nthis region could sound bad" << std::endl;
1044         adj = 1;
1045         for (size_t i = 0; i < df.size(); ++i) {
1046             df[i] = 1.f;
1047         }
1048         calculateDisplacements(df, maxDf, totalDisplacement, maxDisplacement,
1049                                adj);
1050     }
1051 
1052     for (size_t i = 0; i < df.size(); ++i) {
1053 
1054         double displacement = maxDf - df[i];
1055         if (displacement < 0) displacement -= adj;
1056         else displacement += adj;
1057 
1058         if (i == 0 && phaseReset) {
1059             if (m_debugLevel > 2) {
1060                 std::cerr << "Phase reset at first chunk" << std::endl;
1061             }
1062             if (df.size() == 1) {
1063                 increments.push_back(duration);
1064                 totalIncrement += duration;
1065             } else {
1066                 increments.push_back(m_increment);
1067                 totalIncrement += m_increment;
1068             }
1069             totalDisplacement -= displacement;
1070             continue;
1071         }
1072 
1073         double theoreticalAllotment = 0;
1074 
1075         if (totalDisplacement != 0) {
1076             theoreticalAllotment = (toAllot * displacement) / totalDisplacement;
1077         }
1078         int allotment = lrint(theoreticalAllotment);
1079         if (i + 1 == df.size()) allotment = toAllot;
1080 
1081         int increment = m_increment + allotment;
1082 
1083         if (increment < 0) {
1084             // this is a serious problem, the allocation is quite
1085             // wrong if it allows increment to diverge so far from the
1086             // input increment (though it can happen legitimately if
1087             // asked to squash very violently)
1088             std::cerr << "*** WARNING: increment " << increment << " <= 0, rounding to zero" << std::endl;
1089 
1090             toAllot += m_increment;
1091             increment = 0;
1092 
1093         } else {
1094             toAllot -= allotment;
1095         }
1096 
1097         increments.push_back(increment);
1098         totalIncrement += increment;
1099 
1100         totalDisplacement -= displacement;
1101 
1102         if (m_debugLevel > 2) {
1103             std::cerr << "df " << df[i] << ", smoothed " << df[i] << ", disp " << displacement << ", allot " << theoreticalAllotment << ", incr " << increment << ", remain " << toAllot << std::endl;
1104         }
1105     }
1106 
1107     if (m_debugLevel > 2) {
1108         std::cerr << "total increment: " << totalIncrement << ", left over: " << toAllot << " to allot, displacement " << totalDisplacement << std::endl;
1109     }
1110 
1111     if (totalIncrement != duration) {
1112         std::cerr << "*** WARNING: calculated output duration " << totalIncrement << " != expected " << duration << std::endl;
1113     }
1114 
1115     return increments;
1116 }
1117 
1118 void
calculateDisplacements(const std::vector<float> & df,float & maxDf,double & totalDisplacement,double & maxDisplacement,float adj) const1119 StretchCalculator::calculateDisplacements(const std::vector<float> &df,
1120                                           float &maxDf,
1121                                           double &totalDisplacement,
1122                                           double &maxDisplacement,
1123                                           float adj) const
1124 {
1125     totalDisplacement = maxDisplacement = 0;
1126 
1127     maxDf = 0;
1128 
1129     for (size_t i = 0; i < df.size(); ++i) {
1130         if (i == 0 || df[i] > maxDf) maxDf = df[i];
1131     }
1132 
1133     for (size_t i = 0; i < df.size(); ++i) {
1134         double displacement = maxDf - df[i];
1135         if (displacement < 0) displacement -= adj;
1136         else displacement += adj;
1137         totalDisplacement += displacement;
1138         if (i == 0 || displacement > maxDisplacement) {
1139             maxDisplacement = displacement;
1140         }
1141     }
1142 }
1143 
1144 }
1145 
1146