1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  * Copyright (C) 2010 Bill Cox, Sonic Library
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 package org.mozilla.thirdparty.com.google.android.exoplayer2.audio;
18 
19 import org.mozilla.thirdparty.com.google.android.exoplayer2.util.Assertions;
20 import java.nio.ShortBuffer;
21 import java.util.Arrays;
22 
23 /**
24  * Sonic audio stream processor for time/pitch stretching.
25  * <p>
26  * Based on https://github.com/waywardgeek/sonic.
27  */
28 /* package */ final class Sonic {
29 
30   private static final boolean USE_CHORD_PITCH = false;
31   private static final int MINIMUM_PITCH = 65;
32   private static final int MAXIMUM_PITCH = 400;
33   private static final int AMDF_FREQUENCY = 4000;
34 
35   private final int sampleRate;
36   private final int numChannels;
37   private final int minPeriod;
38   private final int maxPeriod;
39   private final int maxRequired;
40   private final short[] downSampleBuffer;
41 
42   private int inputBufferSize;
43   private short[] inputBuffer;
44   private int outputBufferSize;
45   private short[] outputBuffer;
46   private int pitchBufferSize;
47   private short[] pitchBuffer;
48   private int oldRatePosition;
49   private int newRatePosition;
50   private float speed;
51   private float pitch;
52   private int numInputSamples;
53   private int numOutputSamples;
54   private int numPitchSamples;
55   private int remainingInputToCopy;
56   private int prevPeriod;
57   private int prevMinDiff;
58   private int minDiff;
59   private int maxDiff;
60 
61   /**
62    * Creates a new Sonic audio stream processor.
63    *
64    * @param sampleRate The sample rate of input audio.
65    * @param numChannels The number of channels in the input audio.
66    */
Sonic(int sampleRate, int numChannels)67   public Sonic(int sampleRate, int numChannels) {
68     this.sampleRate = sampleRate;
69     this.numChannels = numChannels;
70     minPeriod = sampleRate / MAXIMUM_PITCH;
71     maxPeriod = sampleRate / MINIMUM_PITCH;
72     maxRequired = 2 * maxPeriod;
73     downSampleBuffer = new short[maxRequired];
74     inputBufferSize = maxRequired;
75     inputBuffer = new short[maxRequired * numChannels];
76     outputBufferSize = maxRequired;
77     outputBuffer = new short[maxRequired * numChannels];
78     pitchBufferSize = maxRequired;
79     pitchBuffer = new short[maxRequired * numChannels];
80     oldRatePosition = 0;
81     newRatePosition = 0;
82     prevPeriod = 0;
83     speed = 1.0f;
84     pitch = 1.0f;
85   }
86 
87   /**
88    * Sets the output speed.
89    */
setSpeed(float speed)90   public void setSpeed(float speed) {
91     this.speed = speed;
92   }
93 
94   /**
95    * Gets the output speed.
96    */
getSpeed()97   public float getSpeed() {
98     return speed;
99   }
100 
101   /**
102    * Sets the output pitch.
103    */
setPitch(float pitch)104   public void setPitch(float pitch) {
105     this.pitch = pitch;
106   }
107 
108   /**
109    * Gets the output pitch.
110    */
getPitch()111   public float getPitch() {
112     return pitch;
113   }
114 
115   /**
116    * Queues remaining data from {@code buffer}, and advances its position by the number of bytes
117    * consumed.
118    *
119    * @param buffer A {@link ShortBuffer} containing input data between its position and limit.
120    */
queueInput(ShortBuffer buffer)121   public void queueInput(ShortBuffer buffer) {
122     int samplesToWrite = buffer.remaining() / numChannels;
123     int bytesToWrite = samplesToWrite * numChannels * 2;
124     enlargeInputBufferIfNeeded(samplesToWrite);
125     buffer.get(inputBuffer, numInputSamples * numChannels, bytesToWrite / 2);
126     numInputSamples += samplesToWrite;
127     processStreamInput();
128   }
129 
130   /**
131    * Gets available output, outputting to the start of {@code buffer}. The buffer's position will be
132    * advanced by the number of bytes written.
133    *
134    * @param buffer A {@link ShortBuffer} into which output will be written.
135    */
getOutput(ShortBuffer buffer)136   public void getOutput(ShortBuffer buffer) {
137     int samplesToRead = Math.min(buffer.remaining() / numChannels, numOutputSamples);
138     buffer.put(outputBuffer, 0, samplesToRead * numChannels);
139     numOutputSamples -= samplesToRead;
140     System.arraycopy(outputBuffer, samplesToRead * numChannels, outputBuffer, 0,
141         numOutputSamples * numChannels);
142   }
143 
144   /**
145    * Forces generating output using whatever data has been queued already. No extra delay will be
146    * added to the output, but flushing in the middle of words could introduce distortion.
147    */
queueEndOfStream()148   public void queueEndOfStream() {
149     int remainingSamples = numInputSamples;
150     float s = speed / pitch;
151     int expectedOutputSamples =
152         numOutputSamples + (int) ((remainingSamples / s + numPitchSamples) / pitch + 0.5f);
153 
154     // Add enough silence to flush both input and pitch buffers.
155     enlargeInputBufferIfNeeded(remainingSamples + 2 * maxRequired);
156     for (int xSample = 0; xSample < 2 * maxRequired * numChannels; xSample++) {
157       inputBuffer[remainingSamples * numChannels + xSample] = 0;
158     }
159     numInputSamples += 2 * maxRequired;
160     processStreamInput();
161     // Throw away any extra samples we generated due to the silence we added.
162     if (numOutputSamples > expectedOutputSamples) {
163       numOutputSamples = expectedOutputSamples;
164     }
165     // Empty input and pitch buffers.
166     numInputSamples = 0;
167     remainingInputToCopy = 0;
168     numPitchSamples = 0;
169   }
170 
171   /**
172    * Returns the number of output samples that can be read with {@link #getOutput(ShortBuffer)}.
173    */
getSamplesAvailable()174   public int getSamplesAvailable() {
175     return numOutputSamples;
176   }
177 
178   // Internal methods.
179 
enlargeOutputBufferIfNeeded(int numSamples)180   private void enlargeOutputBufferIfNeeded(int numSamples) {
181     if (numOutputSamples + numSamples > outputBufferSize) {
182       outputBufferSize += (outputBufferSize / 2) + numSamples;
183       outputBuffer = Arrays.copyOf(outputBuffer, outputBufferSize * numChannels);
184     }
185   }
186 
enlargeInputBufferIfNeeded(int numSamples)187   private void enlargeInputBufferIfNeeded(int numSamples) {
188     if (numInputSamples + numSamples > inputBufferSize) {
189       inputBufferSize += (inputBufferSize / 2) + numSamples;
190       inputBuffer = Arrays.copyOf(inputBuffer, inputBufferSize * numChannels);
191     }
192   }
193 
removeProcessedInputSamples(int position)194   private void removeProcessedInputSamples(int position) {
195     int remainingSamples = numInputSamples - position;
196     System.arraycopy(inputBuffer, position * numChannels, inputBuffer, 0,
197         remainingSamples * numChannels);
198     numInputSamples = remainingSamples;
199   }
200 
copyToOutput(short[] samples, int position, int numSamples)201   private void copyToOutput(short[] samples, int position, int numSamples) {
202     enlargeOutputBufferIfNeeded(numSamples);
203     System.arraycopy(samples, position * numChannels, outputBuffer, numOutputSamples * numChannels,
204         numSamples * numChannels);
205     numOutputSamples += numSamples;
206   }
207 
copyInputToOutput(int position)208   private int copyInputToOutput(int position) {
209     int numSamples = Math.min(maxRequired, remainingInputToCopy);
210     copyToOutput(inputBuffer, position, numSamples);
211     remainingInputToCopy -= numSamples;
212     return numSamples;
213   }
214 
downSampleInput(short[] samples, int position, int skip)215   private void downSampleInput(short[] samples, int position, int skip) {
216     // If skip is greater than one, average skip samples together and write them to the down-sample
217     // buffer. If numChannels is greater than one, mix the channels together as we down sample.
218     int numSamples = maxRequired / skip;
219     int samplesPerValue = numChannels * skip;
220     position *= numChannels;
221     for (int i = 0; i < numSamples; i++) {
222       int value = 0;
223       for (int j = 0; j < samplesPerValue; j++) {
224         value += samples[position + i * samplesPerValue + j];
225       }
226       value /= samplesPerValue;
227       downSampleBuffer[i] = (short) value;
228     }
229   }
230 
findPitchPeriodInRange(short[] samples, int position, int minPeriod, int maxPeriod)231   private int findPitchPeriodInRange(short[] samples, int position, int minPeriod, int maxPeriod) {
232     // Find the best frequency match in the range, and given a sample skip multiple. For now, just
233     // find the pitch of the first channel.
234     int bestPeriod = 0;
235     int worstPeriod = 255;
236     int minDiff = 1;
237     int maxDiff = 0;
238     position *= numChannels;
239     for (int period = minPeriod; period <= maxPeriod; period++) {
240       int diff = 0;
241       for (int i = 0; i < period; i++) {
242         short sVal = samples[position + i];
243         short pVal = samples[position + period + i];
244         diff += sVal >= pVal ? sVal - pVal : pVal - sVal;
245       }
246       // Note that the highest number of samples we add into diff will be less than 256, since we
247       // skip samples. Thus, diff is a 24 bit number, and we can safely multiply by numSamples
248       // without overflow.
249       if (diff * bestPeriod < minDiff * period) {
250         minDiff = diff;
251         bestPeriod = period;
252       }
253       if (diff * worstPeriod > maxDiff * period) {
254         maxDiff = diff;
255         worstPeriod = period;
256       }
257     }
258     this.minDiff = minDiff / bestPeriod;
259     this.maxDiff = maxDiff / worstPeriod;
260     return bestPeriod;
261   }
262 
263   /**
264    * Returns whether the previous pitch period estimate is a better approximation, which can occur
265    * at the abrupt end of voiced words.
266    */
previousPeriodBetter(int minDiff, int maxDiff, boolean preferNewPeriod)267   private boolean previousPeriodBetter(int minDiff, int maxDiff, boolean preferNewPeriod) {
268     if (minDiff == 0 || prevPeriod == 0) {
269       return false;
270     }
271     if (preferNewPeriod) {
272       if (maxDiff > minDiff * 3) {
273         // Got a reasonable match this period
274         return false;
275       }
276       if (minDiff * 2 <= prevMinDiff * 3) {
277         // Mismatch is not that much greater this period
278         return false;
279       }
280     } else {
281       if (minDiff <= prevMinDiff) {
282         return false;
283       }
284     }
285     return true;
286   }
287 
findPitchPeriod(short[] samples, int position, boolean preferNewPeriod)288   private int findPitchPeriod(short[] samples, int position, boolean preferNewPeriod) {
289     // Find the pitch period. This is a critical step, and we may have to try multiple ways to get a
290     // good answer. This version uses AMDF. To improve speed, we down sample by an integer factor
291     // get in the 11 kHz range, and then do it again with a narrower frequency range without down
292     // sampling.
293     int period;
294     int retPeriod;
295     int skip = sampleRate > AMDF_FREQUENCY ? sampleRate / AMDF_FREQUENCY : 1;
296     if (numChannels == 1 && skip == 1) {
297       period = findPitchPeriodInRange(samples, position, minPeriod, maxPeriod);
298     } else {
299       downSampleInput(samples, position, skip);
300       period = findPitchPeriodInRange(downSampleBuffer, 0, minPeriod / skip, maxPeriod / skip);
301       if (skip != 1) {
302         period *= skip;
303         int minP = period - (skip * 4);
304         int maxP = period + (skip * 4);
305         if (minP < minPeriod) {
306           minP = minPeriod;
307         }
308         if (maxP > maxPeriod) {
309           maxP = maxPeriod;
310         }
311         if (numChannels == 1) {
312           period = findPitchPeriodInRange(samples, position, minP, maxP);
313         } else {
314           downSampleInput(samples, position, 1);
315           period = findPitchPeriodInRange(downSampleBuffer, 0, minP, maxP);
316         }
317       }
318     }
319     if (previousPeriodBetter(minDiff, maxDiff, preferNewPeriod)) {
320       retPeriod = prevPeriod;
321     } else {
322       retPeriod = period;
323     }
324     prevMinDiff = minDiff;
325     prevPeriod = period;
326     return retPeriod;
327   }
328 
moveNewSamplesToPitchBuffer(int originalNumOutputSamples)329   private void moveNewSamplesToPitchBuffer(int originalNumOutputSamples) {
330     int numSamples = numOutputSamples - originalNumOutputSamples;
331     if (numPitchSamples + numSamples > pitchBufferSize) {
332       pitchBufferSize += (pitchBufferSize / 2) + numSamples;
333       pitchBuffer = Arrays.copyOf(pitchBuffer, pitchBufferSize * numChannels);
334     }
335     System.arraycopy(outputBuffer, originalNumOutputSamples * numChannels, pitchBuffer,
336         numPitchSamples * numChannels, numSamples * numChannels);
337     numOutputSamples = originalNumOutputSamples;
338     numPitchSamples += numSamples;
339   }
340 
removePitchSamples(int numSamples)341   private void removePitchSamples(int numSamples) {
342     if (numSamples == 0) {
343       return;
344     }
345     System.arraycopy(pitchBuffer, numSamples * numChannels, pitchBuffer, 0,
346         (numPitchSamples - numSamples) * numChannels);
347     numPitchSamples -= numSamples;
348   }
349 
adjustPitch(int originalNumOutputSamples)350   private void adjustPitch(int originalNumOutputSamples) {
351     // Latency due to pitch changes could be reduced by looking at past samples to determine pitch,
352     // rather than future.
353     if (numOutputSamples == originalNumOutputSamples) {
354       return;
355     }
356     moveNewSamplesToPitchBuffer(originalNumOutputSamples);
357     int position = 0;
358     while (numPitchSamples - position >= maxRequired) {
359       int period = findPitchPeriod(pitchBuffer, position, false);
360       int newPeriod = (int) (period / pitch);
361       enlargeOutputBufferIfNeeded(newPeriod);
362       if (pitch >= 1.0f) {
363         overlapAdd(newPeriod, numChannels, outputBuffer, numOutputSamples, pitchBuffer, position,
364             pitchBuffer, position + period - newPeriod);
365       } else {
366         int separation = newPeriod - period;
367         overlapAddWithSeparation(period, numChannels, separation, outputBuffer, numOutputSamples,
368             pitchBuffer, position, pitchBuffer, position);
369       }
370       numOutputSamples += newPeriod;
371       position += period;
372     }
373     removePitchSamples(position);
374   }
375 
interpolate(short[] in, int inPos, int oldSampleRate, int newSampleRate)376   private short interpolate(short[] in, int inPos, int oldSampleRate, int newSampleRate) {
377     short left = in[inPos * numChannels];
378     short right = in[inPos * numChannels + numChannels];
379     int position = newRatePosition * oldSampleRate;
380     int leftPosition = oldRatePosition * newSampleRate;
381     int rightPosition = (oldRatePosition + 1) * newSampleRate;
382     int ratio = rightPosition - position;
383     int width = rightPosition - leftPosition;
384     return (short) ((ratio * left + (width - ratio) * right) / width);
385   }
386 
adjustRate(float rate, int originalNumOutputSamples)387   private void adjustRate(float rate, int originalNumOutputSamples) {
388     if (numOutputSamples == originalNumOutputSamples) {
389       return;
390     }
391     int newSampleRate = (int) (sampleRate / rate);
392     int oldSampleRate = sampleRate;
393     // Set these values to help with the integer math.
394     while (newSampleRate > (1 << 14) || oldSampleRate > (1 << 14)) {
395       newSampleRate /= 2;
396       oldSampleRate /= 2;
397     }
398     moveNewSamplesToPitchBuffer(originalNumOutputSamples);
399     // Leave at least one pitch sample in the buffer.
400     for (int position = 0; position < numPitchSamples - 1; position++) {
401       while ((oldRatePosition + 1) * newSampleRate > newRatePosition * oldSampleRate) {
402         enlargeOutputBufferIfNeeded(1);
403         for (int i = 0; i < numChannels; i++) {
404           outputBuffer[numOutputSamples * numChannels + i] =
405               interpolate(pitchBuffer, position + i, oldSampleRate, newSampleRate);
406         }
407         newRatePosition++;
408         numOutputSamples++;
409       }
410       oldRatePosition++;
411       if (oldRatePosition == oldSampleRate) {
412         oldRatePosition = 0;
413         Assertions.checkState(newRatePosition == newSampleRate);
414         newRatePosition = 0;
415       }
416     }
417     removePitchSamples(numPitchSamples - 1);
418   }
419 
skipPitchPeriod(short[] samples, int position, float speed, int period)420   private int skipPitchPeriod(short[] samples, int position, float speed, int period) {
421     // Skip over a pitch period, and copy period/speed samples to the output.
422     int newSamples;
423     if (speed >= 2.0f) {
424       newSamples = (int) (period / (speed - 1.0f));
425     } else {
426       newSamples = period;
427       remainingInputToCopy = (int) (period * (2.0f - speed) / (speed - 1.0f));
428     }
429     enlargeOutputBufferIfNeeded(newSamples);
430     overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples, samples, position, samples,
431         position + period);
432     numOutputSamples += newSamples;
433     return newSamples;
434   }
435 
insertPitchPeriod(short[] samples, int position, float speed, int period)436   private int insertPitchPeriod(short[] samples, int position, float speed, int period) {
437     // Insert a pitch period, and determine how much input to copy directly.
438     int newSamples;
439     if (speed < 0.5f) {
440       newSamples = (int) (period * speed / (1.0f - speed));
441     } else {
442       newSamples = period;
443       remainingInputToCopy = (int) (period * (2.0f * speed - 1.0f) / (1.0f - speed));
444     }
445     enlargeOutputBufferIfNeeded(period + newSamples);
446     System.arraycopy(samples, position * numChannels, outputBuffer, numOutputSamples * numChannels,
447         period * numChannels);
448     overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples + period, samples,
449         position + period, samples, position);
450     numOutputSamples += period + newSamples;
451     return newSamples;
452   }
453 
changeSpeed(float speed)454   private void changeSpeed(float speed) {
455     if (numInputSamples < maxRequired) {
456       return;
457     }
458     int numSamples = numInputSamples;
459     int position = 0;
460     do {
461       if (remainingInputToCopy > 0) {
462         position += copyInputToOutput(position);
463       } else {
464         int period = findPitchPeriod(inputBuffer, position, true);
465         if (speed > 1.0) {
466           position += period + skipPitchPeriod(inputBuffer, position, speed, period);
467         } else {
468           position += insertPitchPeriod(inputBuffer, position, speed, period);
469         }
470       }
471     } while (position + maxRequired <= numSamples);
472     removeProcessedInputSamples(position);
473   }
474 
processStreamInput()475   private void processStreamInput() {
476     // Resample as many pitch periods as we have buffered on the input.
477     int originalNumOutputSamples = numOutputSamples;
478     float s = speed / pitch;
479     if (s > 1.00001 || s < 0.99999) {
480       changeSpeed(s);
481     } else {
482       copyToOutput(inputBuffer, 0, numInputSamples);
483       numInputSamples = 0;
484     }
485     if (USE_CHORD_PITCH) {
486       if (pitch != 1.0f) {
487         adjustPitch(originalNumOutputSamples);
488       }
489     } else if (!USE_CHORD_PITCH && pitch != 1.0f) {
490       adjustRate(pitch, originalNumOutputSamples);
491     }
492   }
493 
overlapAdd(int numSamples, int numChannels, short[] out, int outPos, short[] rampDown, int rampDownPos, short[] rampUp, int rampUpPos)494   private static void overlapAdd(int numSamples, int numChannels, short[] out, int outPos,
495       short[] rampDown, int rampDownPos, short[] rampUp, int rampUpPos) {
496     for (int i = 0; i < numChannels; i++) {
497       int o = outPos * numChannels + i;
498       int u = rampUpPos * numChannels + i;
499       int d = rampDownPos * numChannels + i;
500       for (int t = 0; t < numSamples; t++) {
501         out[o] = (short) ((rampDown[d] * (numSamples - t) + rampUp[u] * t) / numSamples);
502         o += numChannels;
503         d += numChannels;
504         u += numChannels;
505       }
506     }
507   }
508 
overlapAddWithSeparation(int numSamples, int numChannels, int separation, short[] out, int outPos, short[] rampDown, int rampDownPos, short[] rampUp, int rampUpPos)509   private static void overlapAddWithSeparation(int numSamples, int numChannels, int separation,
510       short[] out, int outPos, short[] rampDown, int rampDownPos, short[] rampUp, int rampUpPos) {
511     for (int i = 0; i < numChannels; i++) {
512       int o = outPos * numChannels + i;
513       int u = rampUpPos * numChannels + i;
514       int d = rampDownPos * numChannels + i;
515       for (int t = 0; t < numSamples + separation; t++) {
516         if (t < separation) {
517           out[o] = (short) (rampDown[d] * (numSamples - t) / numSamples);
518           d += numChannels;
519         } else if (t < numSamples) {
520           out[o] =
521               (short) ((rampDown[d] * (numSamples - t) + rampUp[u] * (t - separation))
522                   / numSamples);
523           d += numChannels;
524           u += numChannels;
525         } else {
526           out[o] = (short) (rampUp[u] * (t - separation) / numSamples);
527           u += numChannels;
528         }
529         o += numChannels;
530       }
531     }
532   }
533 
534 }
535