1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Rubber Band Library
5 An audio time-stretching and pitch-shifting library.
6 Copyright 2007-2021 Particular Programs Ltd.
7
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2 of the
11 License, or (at your option) any later version. See the file
12 COPYING included with this distribution for more information.
13
14 Alternatively, if you have a valid commercial licence for the
15 Rubber Band Library obtained by agreement with the copyright
16 holders, you may redistribute and/or modify it under the terms
17 described in that licence.
18
19 If you wish to distribute code using the Rubber Band Library
20 under terms other than those of the GNU General Public License,
21 you must obtain a valid commercial licence before doing so.
22 */
23
24 #include "StretchCalculator.h"
25
26 #include <math.h>
27 #include <iostream>
28 #include <deque>
29 #include <set>
30 #include <cassert>
31 #include <algorithm>
32
33 #include "system/sysutils.h"
34
35 namespace RubberBand
36 {
37
StretchCalculator(size_t sampleRate,size_t inputIncrement,bool useHardPeaks)38 StretchCalculator::StretchCalculator(size_t sampleRate,
39 size_t inputIncrement,
40 bool useHardPeaks) :
41 m_sampleRate(sampleRate),
42 m_increment(inputIncrement),
43 m_prevDf(0),
44 m_prevRatio(1.0),
45 m_prevTimeRatio(1.0),
46 m_transientAmnesty(0),
47 m_debugLevel(0),
48 m_useHardPeaks(useHardPeaks),
49 m_inFrameCounter(0),
50 m_frameCheckpoint(0, 0),
51 m_outFrameCounter(0)
52 {
53 // std::cerr << "StretchCalculator::StretchCalculator: useHardPeaks = " << useHardPeaks << std::endl;
54 }
55
~StretchCalculator()56 StretchCalculator::~StretchCalculator()
57 {
58 }
59
60 void
setKeyFrameMap(const std::map<size_t,size_t> & mapping)61 StretchCalculator::setKeyFrameMap(const std::map<size_t, size_t> &mapping)
62 {
63 m_keyFrameMap = mapping;
64
65 // Ensure we always have a 0 -> 0 mapping. If there's nothing in
66 // the map at all, don't need to worry about this (empty map is
67 // handled separately anyway)
68 if (!m_keyFrameMap.empty()) {
69 if (m_keyFrameMap.find(0) == m_keyFrameMap.end()) {
70 m_keyFrameMap[0] = 0;
71 }
72 }
73 }
74
75 std::vector<int>
calculate(double ratio,size_t inputDuration,const std::vector<float> & phaseResetDf,const std::vector<float> & stretchDf)76 StretchCalculator::calculate(double ratio, size_t inputDuration,
77 const std::vector<float> &phaseResetDf,
78 const std::vector<float> &stretchDf)
79 {
80 assert(phaseResetDf.size() == stretchDf.size());
81
82 m_peaks = findPeaks(phaseResetDf);
83
84 size_t totalCount = phaseResetDf.size();
85
86 size_t outputDuration = lrint(inputDuration * ratio);
87
88 if (m_debugLevel > 0) {
89 std::cerr << "StretchCalculator::calculate(): inputDuration " << inputDuration << ", ratio " << ratio << ", outputDuration " << outputDuration;
90 }
91
92 outputDuration = lrint((phaseResetDf.size() * m_increment) * ratio);
93
94 if (m_debugLevel > 0) {
95 std::cerr << " (rounded up to " << outputDuration << ")";
96 std::cerr << ", df size " << phaseResetDf.size() << ", increment "
97 << m_increment << std::endl;
98 }
99
100 std::vector<Peak> peaks; // peak position (in chunks) and hardness
101 std::vector<size_t> targets; // targets for mapping peaks (in samples)
102 mapPeaks(peaks, targets, outputDuration, totalCount);
103
104 if (m_debugLevel > 1) {
105 std::cerr << "have " << peaks.size() << " fixed positions" << std::endl;
106 }
107
108 size_t totalInput = 0, totalOutput = 0;
109
110 // For each region between two consecutive time sync points, we
111 // want to take the number of output chunks to be allocated and
112 // the detection function values within the range, and produce a
113 // series of increments that sum to the number of output chunks,
114 // such that each increment is displaced from the input increment
115 // by an amount inversely proportional to the magnitude of the
116 // stretch detection function at that input step.
117
118 size_t regionTotalChunks = 0;
119
120 std::vector<int> increments;
121
122 for (size_t i = 0; i <= peaks.size(); ++i) {
123
124 size_t regionStart, regionStartChunk, regionEnd, regionEndChunk;
125 bool phaseReset = false;
126
127 if (i == 0) {
128 regionStartChunk = 0;
129 regionStart = 0;
130 } else {
131 regionStartChunk = peaks[i-1].chunk;
132 regionStart = targets[i-1];
133 phaseReset = peaks[i-1].hard;
134 }
135
136 if (i == peaks.size()) {
137 // std::cerr << "note: i (=" << i << ") == peaks.size(); regionEndChunk " << regionEndChunk << " -> " << totalCount << ", regionEnd " << regionEnd << " -> " << outputDuration << std::endl;
138 regionEndChunk = totalCount;
139 regionEnd = outputDuration;
140 } else {
141 regionEndChunk = peaks[i].chunk;
142 regionEnd = targets[i];
143 }
144
145 if (regionStartChunk > totalCount) regionStartChunk = totalCount;
146 if (regionStart > outputDuration) regionStart = outputDuration;
147 if (regionEndChunk > totalCount) regionEndChunk = totalCount;
148 if (regionEnd > outputDuration) regionEnd = outputDuration;
149
150 size_t regionDuration = regionEnd - regionStart;
151 regionTotalChunks += regionDuration;
152
153 std::vector<float> dfRegion;
154
155 for (size_t j = regionStartChunk; j != regionEndChunk; ++j) {
156 dfRegion.push_back(stretchDf[j]);
157 }
158
159 if (m_debugLevel > 1) {
160 std::cerr << "distributeRegion from " << regionStartChunk << " to " << regionEndChunk << " (samples " << regionStart << " to " << regionEnd << ")" << std::endl;
161 }
162
163 dfRegion = smoothDF(dfRegion);
164
165 std::vector<int> regionIncrements = distributeRegion
166 (dfRegion, regionDuration, ratio, phaseReset);
167
168 size_t totalForRegion = 0;
169
170 for (size_t j = 0; j < regionIncrements.size(); ++j) {
171
172 int incr = regionIncrements[j];
173
174 if (j == 0 && phaseReset) increments.push_back(-incr);
175 else increments.push_back(incr);
176
177 if (incr > 0) totalForRegion += incr;
178 else totalForRegion += -incr;
179
180 totalInput += m_increment;
181 }
182
183 if (totalForRegion != regionDuration) {
184 std::cerr << "*** ERROR: distributeRegion returned wrong duration " << totalForRegion << ", expected " << regionDuration << std::endl;
185 }
186
187 totalOutput += totalForRegion;
188 }
189
190 if (m_debugLevel > 0) {
191 std::cerr << "total input increment = " << totalInput << " (= " << totalInput / m_increment << " chunks), output = " << totalOutput << ", ratio = " << double(totalOutput)/double(totalInput) << ", ideal output " << size_t(ceil(totalInput * ratio)) << std::endl;
192 std::cerr << "(region total = " << regionTotalChunks << ")" << std::endl;
193 }
194
195 return increments;
196 }
197
198 void
mapPeaks(std::vector<Peak> & peaks,std::vector<size_t> & targets,size_t outputDuration,size_t totalCount)199 StretchCalculator::mapPeaks(std::vector<Peak> &peaks,
200 std::vector<size_t> &targets,
201 size_t outputDuration,
202 size_t totalCount)
203 {
204 // outputDuration is in audio samples; totalCount is in chunks
205
206 if (m_keyFrameMap.empty()) {
207 // "normal" behaviour -- fixed points are strictly in
208 // proportion
209 peaks = m_peaks;
210 for (size_t i = 0; i < peaks.size(); ++i) {
211 targets.push_back
212 (lrint((double(peaks[i].chunk) * outputDuration) / totalCount));
213 }
214 return;
215 }
216
217 // We have been given a set of source -> target sample frames in
218 // m_keyFrameMap. We want to ensure that (to the nearest chunk) these
219 // are followed exactly, and any fixed points that we calculated
220 // ourselves are interpolated in linear proportion in between.
221
222 size_t peakidx = 0;
223 std::map<size_t, size_t>::const_iterator mi = m_keyFrameMap.begin();
224
225 // NB we know for certain we have a mapping from 0 -> 0 (or at
226 // least, some mapping for source sample 0) because that is
227 // enforced in setKeyFrameMap above. However, we aren't guaranteed
228 // to have a mapping for the total duration -- we will usually
229 // need to assume it maps to the normal duration * ratio sample
230
231 while (mi != m_keyFrameMap.end()) {
232
233 // std::cerr << "mi->first is " << mi->first << ", second is " << mi->second <<std::endl;
234
235 // The map we've been given is from sample to sample, but
236 // we can only map from chunk to sample. We should perhaps
237 // adjust the target sample to compensate for the discrepancy
238 // between the chunk position and the exact requested source
239 // sample. But we aren't doing that yet.
240
241 size_t sourceStartChunk = mi->first / m_increment;
242 size_t sourceEndChunk = totalCount;
243
244 size_t targetStartSample = mi->second;
245 size_t targetEndSample = outputDuration;
246
247 ++mi;
248 if (mi != m_keyFrameMap.end()) {
249 sourceEndChunk = mi->first / m_increment;
250 targetEndSample = mi->second;
251 }
252
253 if (sourceStartChunk >= totalCount ||
254 sourceStartChunk >= sourceEndChunk ||
255 targetStartSample >= outputDuration ||
256 targetStartSample >= targetEndSample) {
257 std::cerr << "NOTE: ignoring mapping from chunk " << sourceStartChunk << " to sample " << targetStartSample << "\n(source or target chunk exceeds total count, or end is not later than start)" << std::endl;
258 continue;
259 }
260
261 // one peak and target for the mapping, then one for each of
262 // the computed peaks that appear before the following mapping
263
264 Peak p;
265 p.chunk = sourceStartChunk;
266 p.hard = false; // mappings are in time only, not phase reset points
267 peaks.push_back(p);
268 targets.push_back(targetStartSample);
269
270 if (m_debugLevel > 1) {
271 std::cerr << "mapped chunk " << sourceStartChunk << " (frame " << sourceStartChunk * m_increment << ") -> " << targetStartSample << std::endl;
272 }
273
274 while (peakidx < m_peaks.size()) {
275
276 size_t pchunk = m_peaks[peakidx].chunk;
277
278 if (pchunk < sourceStartChunk) {
279 // shouldn't happen, should have been dealt with
280 // already -- but no harm in ignoring it explicitly
281 ++peakidx;
282 continue;
283 }
284 if (pchunk == sourceStartChunk) {
285 // convert that last peak to a hard one, after all
286 peaks[peaks.size()-1].hard = true;
287 ++peakidx;
288 continue;
289 }
290 if (pchunk >= sourceEndChunk) {
291 // leave the rest for after the next mapping
292 break;
293 }
294 p.chunk = pchunk;
295 p.hard = m_peaks[peakidx].hard;
296
297 double proportion =
298 double(pchunk - sourceStartChunk) /
299 double(sourceEndChunk - sourceStartChunk);
300
301 size_t target =
302 targetStartSample +
303 lrint(proportion *
304 (targetEndSample - targetStartSample));
305
306 if (target <= targets[targets.size()-1] + m_increment) {
307 // peaks will become too close together afterwards, ignore
308 ++peakidx;
309 continue;
310 }
311
312 if (m_debugLevel > 1) {
313 std::cerr << " peak chunk " << pchunk << " (frame " << pchunk * m_increment << ") -> " << target << std::endl;
314 }
315
316 peaks.push_back(p);
317 targets.push_back(target);
318 ++peakidx;
319 }
320 }
321 }
322
323 int64_t
expectedOutFrame(int64_t inFrame,double timeRatio)324 StretchCalculator::expectedOutFrame(int64_t inFrame, double timeRatio)
325 {
326 int64_t checkpointedAt = m_frameCheckpoint.first;
327 int64_t checkpointed = m_frameCheckpoint.second;
328 return int64_t(round(checkpointed + (inFrame - checkpointedAt) * timeRatio));
329 }
330
331 int
calculateSingle(double timeRatio,double effectivePitchRatio,float df,size_t inIncrement,size_t analysisWindowSize,size_t synthesisWindowSize)332 StretchCalculator::calculateSingle(double timeRatio,
333 double effectivePitchRatio,
334 float df,
335 size_t inIncrement,
336 size_t analysisWindowSize,
337 size_t synthesisWindowSize)
338 {
339 double ratio = timeRatio / effectivePitchRatio;
340
341 int increment = int(inIncrement);
342 if (increment == 0) increment = m_increment;
343
344 int outIncrement = lrint(increment * ratio); // the normal case
345 bool isTransient = false;
346
347 // We want to ensure, as close as possible, that the phase reset
348 // points appear at the right audio frame numbers. To this end we
349 // track the incoming frame number, its corresponding expected
350 // output frame number, and the actual output frame number
351 // projected based on the ratios provided.
352 //
353 // There are two subtleties:
354 //
355 // (1) on a ratio change, we need to checkpoint the expected
356 // output frame number reached so far and start counting again
357 // with the new ratio. We could do this with a reset to zero, but
358 // it's easier to reason about absolute input/output frame
359 // matches, so for the moment at least we're doing this by
360 // explicitly checkpointing the current numbers (hence the use of
361 // the above expectedOutFrame() function which refers to the
362 // last checkpointed values).
363 //
364 // (2) in the case of a pitch shift in a configuration where
365 // resampling occurs after stretching, all of our output
366 // increments will be effectively modified by resampling after we
367 // return. This is why we separate out timeRatio and
368 // effectivePitchRatio arguments - the former is the ratio that
369 // has already been applied and the latter is the ratio that will
370 // be applied by any subsequent resampling step (which will be 1.0
371 // / pitchScale if resampling is happening after stretching). So
372 // the overall ratio is timeRatio / effectivePitchRatio.
373
374 bool ratioChanged = (ratio != m_prevRatio);
375 if (ratioChanged) {
376 // Reset our frame counters from the ratio change.
377
378 // m_outFrameCounter tracks the frames counted at output from
379 // this function, which normally precedes resampling - hence
380 // the use of timeRatio rather than ratio here
381
382 if (m_debugLevel > 1) {
383 std::cerr << "StretchCalculator: ratio changed from " << m_prevRatio << " to " << ratio << std::endl;
384 }
385
386 int64_t toCheckpoint = expectedOutFrame
387 (m_inFrameCounter, m_prevTimeRatio);
388 m_frameCheckpoint =
389 std::pair<int64_t, int64_t>(m_inFrameCounter, toCheckpoint);
390 }
391
392 m_prevRatio = ratio;
393 m_prevTimeRatio = timeRatio;
394
395 if (m_debugLevel > 2) {
396 std::cerr << "StretchCalculator::calculateSingle: timeRatio = "
397 << timeRatio << ", effectivePitchRatio = "
398 << effectivePitchRatio << " (that's 1.0 / "
399 << (1.0 / effectivePitchRatio)
400 << "), ratio = " << ratio << ", df = " << df
401 << ", inIncrement = " << inIncrement
402 << ", default outIncrement = " << outIncrement
403 << ", analysisWindowSize = " << analysisWindowSize
404 << ", synthesisWindowSize = " << synthesisWindowSize
405 << std::endl;
406
407 std::cerr << "inFrameCounter = " << m_inFrameCounter
408 << ", outFrameCounter = " << m_outFrameCounter
409 << std::endl;
410
411 std::cerr << "The next sample out is input sample " << m_inFrameCounter << std::endl;
412 }
413
414 int64_t intended = expectedOutFrame
415 (m_inFrameCounter + analysisWindowSize/4, timeRatio);
416 int64_t projected = int64_t
417 (round(m_outFrameCounter + (synthesisWindowSize/4 * effectivePitchRatio)));
418
419 int64_t divergence = projected - intended;
420
421 if (m_debugLevel > 2) {
422 std::cerr << "for current frame + quarter frame: intended " << intended << ", projected " << projected << ", divergence " << divergence << std::endl;
423 }
424
425 // In principle, the threshold depends on chunk size: larger chunk
426 // sizes need higher thresholds. Since chunk size depends on
427 // ratio, I suppose we could in theory calculate the threshold
428 // from the ratio directly. For the moment we're happy if it
429 // works well in common situations.
430
431 float transientThreshold = 0.35f;
432 // if (ratio > 1) transientThreshold = 0.25f;
433
434 if (m_useHardPeaks && df > m_prevDf * 1.1f && df > transientThreshold) {
435 if (divergence > 1000 || divergence < -1000) {
436 if (m_debugLevel > 1) {
437 std::cerr << "StretchCalculator::calculateSingle: transient, but we're not permitting it because the divergence (" << divergence << ") is too great" << std::endl;
438 }
439 } else {
440 isTransient = true;
441 }
442 }
443
444 if (m_debugLevel > 2) {
445 std::cerr << "df = " << df << ", prevDf = " << m_prevDf
446 << ", thresh = " << transientThreshold << std::endl;
447 }
448
449 m_prevDf = df;
450
451 if (m_transientAmnesty > 0) {
452 if (isTransient) {
453 if (m_debugLevel > 1) {
454 std::cerr << "StretchCalculator::calculateSingle: transient, but we have an amnesty (df " << df << ", threshold " << transientThreshold << ")" << std::endl;
455 }
456 isTransient = false;
457 }
458 --m_transientAmnesty;
459 }
460
461 if (isTransient) {
462 if (m_debugLevel > 1) {
463 std::cerr << "StretchCalculator::calculateSingle: transient at (df " << df << ", threshold " << transientThreshold << ")" << std::endl;
464 }
465
466 // as in offline mode, 0.05 sec approx min between transients
467 m_transientAmnesty =
468 lrint(ceil(double(m_sampleRate) / (20 * double(increment))));
469
470 outIncrement = increment;
471
472 } else {
473
474 double recovery = 0.0;
475 if (divergence > 1000 || divergence < -1000) {
476 recovery = divergence / ((m_sampleRate / 10.0) / increment);
477 } else if (divergence > 100 || divergence < -100) {
478 recovery = divergence / ((m_sampleRate / 20.0) / increment);
479 } else {
480 recovery = divergence / 4.0;
481 }
482
483 int incr = lrint(outIncrement - recovery);
484 if (m_debugLevel > 2 || (m_debugLevel > 1 && divergence != 0)) {
485 std::cerr << "divergence = " << divergence << ", recovery = " << recovery << ", incr = " << incr << ", ";
486 }
487
488 int minIncr = lrint(increment * ratio * 0.3);
489 int maxIncr = lrint(increment * ratio * 2);
490
491 if (incr < minIncr) {
492 incr = minIncr;
493 } else if (incr > maxIncr) {
494 incr = maxIncr;
495 }
496
497 if (m_debugLevel > 2 || (m_debugLevel > 1 && divergence != 0)) {
498 std::cerr << "clamped into [" << minIncr << ", " << maxIncr
499 << "] becomes " << incr << std::endl;
500 }
501
502 if (incr < 0) {
503 std::cerr << "WARNING: internal error: incr < 0 in calculateSingle"
504 << std::endl;
505 outIncrement = 0;
506 } else {
507 outIncrement = incr;
508 }
509 }
510
511 if (m_debugLevel > 1) {
512 std::cerr << "StretchCalculator::calculateSingle: returning isTransient = "
513 << isTransient << ", outIncrement = " << outIncrement
514 << std::endl;
515 }
516
517 m_inFrameCounter += inIncrement;
518 m_outFrameCounter += outIncrement * effectivePitchRatio;
519
520 if (isTransient) {
521 return -outIncrement;
522 } else {
523 return outIncrement;
524 }
525 }
526
527 void
reset()528 StretchCalculator::reset()
529 {
530 m_prevDf = 0;
531 m_prevRatio = 1.0;
532 m_prevTimeRatio = 1.0;
533 m_inFrameCounter = 0;
534 m_frameCheckpoint = std::pair<int64_t, int64_t>(0, 0);
535 m_outFrameCounter = 0.0;
536 m_transientAmnesty = 0;
537 m_keyFrameMap.clear();
538 }
539
540 std::vector<StretchCalculator::Peak>
findPeaks(const std::vector<float> & rawDf)541 StretchCalculator::findPeaks(const std::vector<float> &rawDf)
542 {
543 std::vector<float> df = smoothDF(rawDf);
544
545 // We distinguish between "soft" and "hard" peaks. A soft peak is
546 // simply the result of peak-picking on the smoothed onset
547 // detection function, and it represents any (strong-ish) onset.
548 // We aim to ensure always that soft peaks are placed at the
549 // correct position in time. A hard peak is where there is a very
550 // rapid rise in detection function, and it presumably represents
551 // a more broadband, noisy transient. For these we perform a
552 // phase reset (if in the appropriate mode), and we locate the
553 // reset at the first point where we notice enough of a rapid
554 // rise, rather than necessarily at the peak itself, in order to
555 // preserve the shape of the transient.
556
557 std::set<size_t> hardPeakCandidates;
558 std::set<size_t> softPeakCandidates;
559
560 if (m_useHardPeaks) {
561
562 // 0.05 sec approx min between hard peaks
563 size_t hardPeakAmnesty = lrint(ceil(double(m_sampleRate) /
564 (20 * double(m_increment))));
565 size_t prevHardPeak = 0;
566
567 if (m_debugLevel > 1) {
568 std::cerr << "hardPeakAmnesty = " << hardPeakAmnesty << std::endl;
569 }
570
571 for (size_t i = 1; i + 1 < df.size(); ++i) {
572
573 if (df[i] < 0.1) continue;
574 if (df[i] <= df[i-1] * 1.1) continue;
575 if (df[i] < 0.22) continue;
576
577 if (!hardPeakCandidates.empty() &&
578 i < prevHardPeak + hardPeakAmnesty) {
579 continue;
580 }
581
582 bool hard = (df[i] > 0.4);
583
584 if (hard && (m_debugLevel > 1)) {
585 std::cerr << "hard peak at " << i << ": " << df[i]
586 << " > absolute " << 0.4
587 << std::endl;
588 }
589
590 if (!hard) {
591 hard = (df[i] > df[i-1] * 1.4);
592
593 if (hard && (m_debugLevel > 1)) {
594 std::cerr << "hard peak at " << i << ": " << df[i]
595 << " > prev " << df[i-1] << " * 1.4"
596 << std::endl;
597 }
598 }
599
600 if (!hard && i > 1) {
601 hard = (df[i] > df[i-1] * 1.2 &&
602 df[i-1] > df[i-2] * 1.2);
603
604 if (hard && (m_debugLevel > 1)) {
605 std::cerr << "hard peak at " << i << ": " << df[i]
606 << " > prev " << df[i-1] << " * 1.2 and "
607 << df[i-1] << " > prev " << df[i-2] << " * 1.2"
608 << std::endl;
609 }
610 }
611
612 if (!hard && i > 2) {
613 // have already established that df[i] > df[i-1] * 1.1
614 hard = (df[i] > 0.3 &&
615 df[i-1] > df[i-2] * 1.1 &&
616 df[i-2] > df[i-3] * 1.1);
617
618 if (hard && (m_debugLevel > 1)) {
619 std::cerr << "hard peak at " << i << ": " << df[i]
620 << " > prev " << df[i-1] << " * 1.1 and "
621 << df[i-1] << " > prev " << df[i-2] << " * 1.1 and "
622 << df[i-2] << " > prev " << df[i-3] << " * 1.1"
623 << std::endl;
624 }
625 }
626
627 if (!hard) continue;
628
629 // (df[i+1] > df[i] && df[i+1] > df[i-1] * 1.8) ||
630 // df[i] > 0.4) {
631
632 size_t peakLocation = i;
633
634 if (i + 1 < rawDf.size() &&
635 rawDf[i + 1] > rawDf[i] * 1.4) {
636
637 ++peakLocation;
638
639 if (m_debugLevel > 1) {
640 std::cerr << "pushing hard peak forward to " << peakLocation << ": " << df[peakLocation] << " > " << df[peakLocation-1] << " * " << 1.4 << std::endl;
641 }
642 }
643
644 hardPeakCandidates.insert(peakLocation);
645 prevHardPeak = peakLocation;
646 }
647 }
648
649 size_t medianmaxsize = lrint(ceil(double(m_sampleRate) /
650 double(m_increment))); // 1 sec ish
651
652 if (m_debugLevel > 1) {
653 std::cerr << "mediansize = " << medianmaxsize << std::endl;
654 }
655 if (medianmaxsize < 7) {
656 medianmaxsize = 7;
657 if (m_debugLevel > 1) {
658 std::cerr << "adjusted mediansize = " << medianmaxsize << std::endl;
659 }
660 }
661
662 int minspacing = lrint(ceil(double(m_sampleRate) /
663 (20 * double(m_increment)))); // 0.05 sec ish
664
665 std::deque<float> medianwin;
666 std::vector<float> sorted;
667 int softPeakAmnesty = 0;
668
669 for (size_t i = 0; i < medianmaxsize/2; ++i) {
670 medianwin.push_back(0);
671 }
672 for (size_t i = 0; i < medianmaxsize/2 && i < df.size(); ++i) {
673 medianwin.push_back(df[i]);
674 }
675
676 size_t lastSoftPeak = 0;
677
678 for (size_t i = 0; i < df.size(); ++i) {
679
680 size_t mediansize = medianmaxsize;
681
682 if (medianwin.size() < mediansize) {
683 mediansize = medianwin.size();
684 }
685
686 size_t middle = medianmaxsize / 2;
687 if (middle >= mediansize) middle = mediansize-1;
688
689 size_t nextDf = i + mediansize - middle;
690
691 if (mediansize < 2) {
692 if (mediansize > medianmaxsize) { // absurd, but never mind that
693 medianwin.pop_front();
694 }
695 if (nextDf < df.size()) {
696 medianwin.push_back(df[nextDf]);
697 } else {
698 medianwin.push_back(0);
699 }
700 continue;
701 }
702
703 if (m_debugLevel > 2) {
704 // std::cerr << "have " << mediansize << " in median buffer" << std::endl;
705 }
706
707 sorted.clear();
708 for (size_t j = 0; j < mediansize; ++j) {
709 sorted.push_back(medianwin[j]);
710 }
711 std::sort(sorted.begin(), sorted.end());
712
713 size_t n = 90; // percentile above which we pick peaks
714 size_t index = (sorted.size() * n) / 100;
715 if (index >= sorted.size()) index = sorted.size()-1;
716 if (index == sorted.size()-1 && index > 0) --index;
717 float thresh = sorted[index];
718
719 // if (m_debugLevel > 2) {
720 // std::cerr << "medianwin[" << middle << "] = " << medianwin[middle] << ", thresh = " << thresh << std::endl;
721 // if (medianwin[middle] == 0.f) {
722 // std::cerr << "contents: ";
723 // for (size_t j = 0; j < medianwin.size(); ++j) {
724 // std::cerr << medianwin[j] << " ";
725 // }
726 // std::cerr << std::endl;
727 // }
728 // }
729
730 if (medianwin[middle] > thresh &&
731 medianwin[middle] > medianwin[middle-1] &&
732 medianwin[middle] > medianwin[middle+1] &&
733 softPeakAmnesty == 0) {
734
735 size_t maxindex = middle;
736 float maxval = medianwin[middle];
737
738 for (size_t j = middle+1; j < mediansize; ++j) {
739 if (medianwin[j] > maxval) {
740 maxval = medianwin[j];
741 maxindex = j;
742 } else if (medianwin[j] < medianwin[middle]) {
743 break;
744 }
745 }
746
747 size_t peak = i + maxindex - middle;
748
749 // std::cerr << "i = " << i << ", maxindex = " << maxindex << ", middle = " << middle << ", so peak at " << peak << std::endl;
750
751 if (softPeakCandidates.empty() || lastSoftPeak != peak) {
752
753 if (m_debugLevel > 1) {
754 std::cerr << "soft peak at " << peak << " ("
755 << peak * m_increment << "): "
756 << medianwin[middle] << " > "
757 << thresh << " and "
758 << medianwin[middle]
759 << " > " << medianwin[middle-1] << " and "
760 << medianwin[middle]
761 << " > " << medianwin[middle+1]
762 << std::endl;
763 }
764
765 if (peak >= df.size()) {
766 if (m_debugLevel > 2) {
767 std::cerr << "peak is beyond end" << std::endl;
768 }
769 } else {
770 softPeakCandidates.insert(peak);
771 lastSoftPeak = peak;
772 }
773 }
774
775 softPeakAmnesty = minspacing + maxindex - middle;
776 if (m_debugLevel > 2) {
777 std::cerr << "amnesty = " << softPeakAmnesty << std::endl;
778 }
779
780 } else if (softPeakAmnesty > 0) --softPeakAmnesty;
781
782 if (mediansize >= medianmaxsize) {
783 medianwin.pop_front();
784 }
785 if (nextDf < df.size()) {
786 medianwin.push_back(df[nextDf]);
787 } else {
788 medianwin.push_back(0);
789 }
790 }
791
792 std::vector<Peak> peaks;
793
794 while (!hardPeakCandidates.empty() || !softPeakCandidates.empty()) {
795
796 bool haveHardPeak = !hardPeakCandidates.empty();
797 bool haveSoftPeak = !softPeakCandidates.empty();
798
799 size_t hardPeak = (haveHardPeak ? *hardPeakCandidates.begin() : 0);
800 size_t softPeak = (haveSoftPeak ? *softPeakCandidates.begin() : 0);
801
802 Peak peak;
803 peak.hard = false;
804 peak.chunk = softPeak;
805
806 bool ignore = false;
807
808 if (haveHardPeak &&
809 (!haveSoftPeak || hardPeak <= softPeak)) {
810
811 if (m_debugLevel > 2) {
812 std::cerr << "Hard peak: " << hardPeak << std::endl;
813 }
814
815 peak.hard = true;
816 peak.chunk = hardPeak;
817 hardPeakCandidates.erase(hardPeakCandidates.begin());
818
819 } else {
820 if (m_debugLevel > 2) {
821 std::cerr << "Soft peak: " << softPeak << std::endl;
822 }
823 if (!peaks.empty() &&
824 peaks[peaks.size()-1].hard &&
825 peaks[peaks.size()-1].chunk + 3 >= softPeak) {
826 if (m_debugLevel > 2) {
827 std::cerr << "(ignoring, as we just had a hard peak)"
828 << std::endl;
829 }
830 ignore = true;
831 }
832 }
833
834 if (haveSoftPeak && peak.chunk == softPeak) {
835 softPeakCandidates.erase(softPeakCandidates.begin());
836 }
837
838 if (!ignore) {
839 peaks.push_back(peak);
840 }
841 }
842
843 return peaks;
844 }
845
846 std::vector<float>
smoothDF(const std::vector<float> & df)847 StretchCalculator::smoothDF(const std::vector<float> &df)
848 {
849 std::vector<float> smoothedDF;
850
851 for (size_t i = 0; i < df.size(); ++i) {
852 // three-value moving mean window for simple smoothing
853 float total = 0.f, count = 0;
854 if (i > 0) { total += df[i-1]; ++count; }
855 total += df[i]; ++count;
856 if (i+1 < df.size()) { total += df[i+1]; ++count; }
857 float mean = total / count;
858 smoothedDF.push_back(mean);
859 }
860
861 return smoothedDF;
862 }
863
864 std::vector<int>
distributeRegion(const std::vector<float> & dfIn,size_t duration,float ratio,bool phaseReset)865 StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
866 size_t duration, float ratio, bool phaseReset)
867 {
868 std::vector<float> df(dfIn);
869 std::vector<int> increments;
870
871 // The peak for the stretch detection function may appear after
872 // the peak that we're using to calculate the start of the region.
873 // We don't want that. If we find a peak in the first half of
874 // the region, we should set all the values up to that point to
875 // the same value as the peak.
876
877 // (This might not be subtle enough, especially if the region is
878 // long -- we want a bound that corresponds to acoustic perception
879 // of the audible bounce.)
880
881 for (size_t i = 1; i < df.size()/2; ++i) {
882 if (df[i] < df[i-1]) {
883 if (m_debugLevel > 1) {
884 std::cerr << "stretch peak offset: " << i-1 << " (peak " << df[i-1] << ")" << std::endl;
885 }
886 for (size_t j = 0; j < i-1; ++j) {
887 df[j] = df[i-1];
888 }
889 break;
890 }
891 }
892
893 float maxDf = 0;
894
895 for (size_t i = 0; i < df.size(); ++i) {
896 if (i == 0 || df[i] > maxDf) maxDf = df[i];
897 }
898
899 // We want to try to ensure the last 100ms or so (if possible) are
900 // tending back towards the maximum df, so that the stretchiness
901 // reduces at the end of the stretched region.
902
903 int reducedRegion = lrint((0.1 * m_sampleRate) / m_increment);
904 if (reducedRegion > int(df.size()/5)) reducedRegion = df.size()/5;
905
906 for (int i = 0; i < reducedRegion; ++i) {
907 size_t index = df.size() - reducedRegion + i;
908 df[index] = df[index] + ((maxDf - df[index]) * i) / reducedRegion;
909 }
910
911 long toAllot = long(duration) - long(m_increment * df.size());
912
913 if (m_debugLevel > 1) {
914 std::cerr << "region of " << df.size() << " chunks, output duration " << duration << ", increment " << m_increment << ", toAllot " << toAllot << std::endl;
915 }
916
917 size_t totalIncrement = 0;
918
919 // We place limits on the amount of displacement per chunk. if
920 // ratio < 0, no increment should be larger than increment*ratio
921 // or smaller than increment*ratio/2; if ratio > 0, none should be
922 // smaller than increment*ratio or larger than increment*ratio*2.
923 // We need to enforce this in the assignment of displacements to
924 // allotments, not by trying to respond if something turns out
925 // wrong.
926
927 // Note that the ratio is only provided to this function for the
928 // purposes of establishing this bound to the displacement.
929
930 // so if
931 // maxDisplacement / totalDisplacement > increment * ratio*2 - increment
932 // (for ratio > 1)
933 // or
934 // maxDisplacement / totalDisplacement < increment * ratio/2
935 // (for ratio < 1)
936
937 // then we need to adjust and accommodate
938
939 double totalDisplacement = 0;
940 double maxDisplacement = 0; // min displacement will be 0 by definition
941
942 maxDf = 0;
943 float adj = 0;
944
945 bool tooShort = true, tooLong = true;
946 const int acceptableIterations = 10;
947 int iteration = 0;
948 int prevExtreme = 0;
949 bool better = false;
950
951 while ((tooLong || tooShort) && iteration < acceptableIterations) {
952
953 ++iteration;
954
955 tooLong = false;
956 tooShort = false;
957 calculateDisplacements(df, maxDf, totalDisplacement, maxDisplacement,
958 adj);
959
960 if (m_debugLevel > 1) {
961 std::cerr << "totalDisplacement " << totalDisplacement << ", max " << maxDisplacement << " (maxDf " << maxDf << ", df count " << df.size() << ")" << std::endl;
962 }
963
964 if (totalDisplacement == 0) {
965 // Not usually a problem, in fact
966 // std::cerr << "WARNING: totalDisplacement == 0 (duration " << duration << ", " << df.size() << " values in df)" << std::endl;
967 if (!df.empty() && adj == 0) {
968 tooLong = true; tooShort = true;
969 adj = 1;
970 }
971 continue;
972 }
973
974 int extremeIncrement = m_increment +
975 lrint((toAllot * maxDisplacement) / totalDisplacement);
976
977 if (extremeIncrement < 0) {
978 if (m_debugLevel > 0) {
979 std::cerr << "NOTE: extreme increment " << extremeIncrement << " < 0, adjusting" << std::endl;
980 }
981 tooShort = true;
982 } else {
983 if (ratio < 1.0) {
984 if (extremeIncrement > lrint(ceil(m_increment * ratio))) {
985 std::cerr << "WARNING: extreme increment "
986 << extremeIncrement << " > "
987 << m_increment * ratio << std::endl;
988 } else if (extremeIncrement < (m_increment * ratio) / 2) {
989 if (m_debugLevel > 0) {
990 std::cerr << "NOTE: extreme increment "
991 << extremeIncrement << " < "
992 << (m_increment * ratio) / 2
993 << ", adjusting" << std::endl;
994 }
995 tooShort = true;
996 if (iteration > 0) {
997 better = (extremeIncrement > prevExtreme);
998 }
999 prevExtreme = extremeIncrement;
1000 }
1001 } else {
1002 if (extremeIncrement > m_increment * ratio * 2) {
1003 if (m_debugLevel > 0) {
1004 std::cerr << "NOTE: extreme increment "
1005 << extremeIncrement << " > "
1006 << m_increment * ratio * 2
1007 << ", adjusting" << std::endl;
1008 }
1009 tooLong = true;
1010 if (iteration > 0) {
1011 better = (extremeIncrement < prevExtreme);
1012 }
1013 prevExtreme = extremeIncrement;
1014 } else if (extremeIncrement < lrint(floor(m_increment * ratio))) {
1015 std::cerr << "WARNING: extreme increment "
1016 << extremeIncrement << " < "
1017 << m_increment * ratio << std::endl;
1018 }
1019 }
1020 }
1021
1022 if (tooLong || tooShort) {
1023 // Need to make maxDisplacement smaller as a proportion of
1024 // the total displacement, yet ensure that the
1025 // displacements still sum to the total.
1026 adj += maxDf/10;
1027 }
1028 }
1029
1030 if (tooLong) {
1031 if (better) {
1032 // we were iterating in the right direction, so
1033 // leave things as they are (and undo that last tweak)
1034 std::cerr << "WARNING: No acceptable displacement adjustment found, using latest values:\nthis region could sound bad" << std::endl;
1035 adj -= maxDf/10;
1036 } else {
1037 std::cerr << "WARNING: No acceptable displacement adjustment found, using defaults:\nthis region could sound bad" << std::endl;
1038 adj = 1;
1039 calculateDisplacements(df, maxDf, totalDisplacement, maxDisplacement,
1040 adj);
1041 }
1042 } else if (tooShort) {
1043 std::cerr << "WARNING: No acceptable displacement adjustment found, using flat distribution:\nthis region could sound bad" << std::endl;
1044 adj = 1;
1045 for (size_t i = 0; i < df.size(); ++i) {
1046 df[i] = 1.f;
1047 }
1048 calculateDisplacements(df, maxDf, totalDisplacement, maxDisplacement,
1049 adj);
1050 }
1051
1052 for (size_t i = 0; i < df.size(); ++i) {
1053
1054 double displacement = maxDf - df[i];
1055 if (displacement < 0) displacement -= adj;
1056 else displacement += adj;
1057
1058 if (i == 0 && phaseReset) {
1059 if (m_debugLevel > 2) {
1060 std::cerr << "Phase reset at first chunk" << std::endl;
1061 }
1062 if (df.size() == 1) {
1063 increments.push_back(duration);
1064 totalIncrement += duration;
1065 } else {
1066 increments.push_back(m_increment);
1067 totalIncrement += m_increment;
1068 }
1069 totalDisplacement -= displacement;
1070 continue;
1071 }
1072
1073 double theoreticalAllotment = 0;
1074
1075 if (totalDisplacement != 0) {
1076 theoreticalAllotment = (toAllot * displacement) / totalDisplacement;
1077 }
1078 int allotment = lrint(theoreticalAllotment);
1079 if (i + 1 == df.size()) allotment = toAllot;
1080
1081 int increment = m_increment + allotment;
1082
1083 if (increment < 0) {
1084 // this is a serious problem, the allocation is quite
1085 // wrong if it allows increment to diverge so far from the
1086 // input increment (though it can happen legitimately if
1087 // asked to squash very violently)
1088 std::cerr << "*** WARNING: increment " << increment << " <= 0, rounding to zero" << std::endl;
1089
1090 toAllot += m_increment;
1091 increment = 0;
1092
1093 } else {
1094 toAllot -= allotment;
1095 }
1096
1097 increments.push_back(increment);
1098 totalIncrement += increment;
1099
1100 totalDisplacement -= displacement;
1101
1102 if (m_debugLevel > 2) {
1103 std::cerr << "df " << df[i] << ", smoothed " << df[i] << ", disp " << displacement << ", allot " << theoreticalAllotment << ", incr " << increment << ", remain " << toAllot << std::endl;
1104 }
1105 }
1106
1107 if (m_debugLevel > 2) {
1108 std::cerr << "total increment: " << totalIncrement << ", left over: " << toAllot << " to allot, displacement " << totalDisplacement << std::endl;
1109 }
1110
1111 if (totalIncrement != duration) {
1112 std::cerr << "*** WARNING: calculated output duration " << totalIncrement << " != expected " << duration << std::endl;
1113 }
1114
1115 return increments;
1116 }
1117
1118 void
calculateDisplacements(const std::vector<float> & df,float & maxDf,double & totalDisplacement,double & maxDisplacement,float adj) const1119 StretchCalculator::calculateDisplacements(const std::vector<float> &df,
1120 float &maxDf,
1121 double &totalDisplacement,
1122 double &maxDisplacement,
1123 float adj) const
1124 {
1125 totalDisplacement = maxDisplacement = 0;
1126
1127 maxDf = 0;
1128
1129 for (size_t i = 0; i < df.size(); ++i) {
1130 if (i == 0 || df[i] > maxDf) maxDf = df[i];
1131 }
1132
1133 for (size_t i = 0; i < df.size(); ++i) {
1134 double displacement = maxDf - df[i];
1135 if (displacement < 0) displacement -= adj;
1136 else displacement += adj;
1137 totalDisplacement += displacement;
1138 if (i == 0 || displacement > maxDisplacement) {
1139 maxDisplacement = displacement;
1140 }
1141 }
1142 }
1143
1144 }
1145
1146