1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * Copyright (C) 2010 Bill Cox, Sonic Library 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.mozilla.thirdparty.com.google.android.exoplayer2.audio; 18 19 import org.mozilla.thirdparty.com.google.android.exoplayer2.util.Assertions; 20 import java.nio.ShortBuffer; 21 import java.util.Arrays; 22 23 /** 24 * Sonic audio stream processor for time/pitch stretching. 25 * <p> 26 * Based on https://github.com/waywardgeek/sonic. 27 */ 28 /* package */ final class Sonic { 29 30 private static final boolean USE_CHORD_PITCH = false; 31 private static final int MINIMUM_PITCH = 65; 32 private static final int MAXIMUM_PITCH = 400; 33 private static final int AMDF_FREQUENCY = 4000; 34 35 private final int sampleRate; 36 private final int numChannels; 37 private final int minPeriod; 38 private final int maxPeriod; 39 private final int maxRequired; 40 private final short[] downSampleBuffer; 41 42 private int inputBufferSize; 43 private short[] inputBuffer; 44 private int outputBufferSize; 45 private short[] outputBuffer; 46 private int pitchBufferSize; 47 private short[] pitchBuffer; 48 private int oldRatePosition; 49 private int newRatePosition; 50 private float speed; 51 private float pitch; 52 private int numInputSamples; 53 private int numOutputSamples; 54 private int numPitchSamples; 55 private int remainingInputToCopy; 56 private int prevPeriod; 57 private int prevMinDiff; 58 private int minDiff; 59 private int maxDiff; 60 61 /** 62 * Creates a new Sonic audio stream processor. 63 * 64 * @param sampleRate The sample rate of input audio. 65 * @param numChannels The number of channels in the input audio. 66 */ Sonic(int sampleRate, int numChannels)67 public Sonic(int sampleRate, int numChannels) { 68 this.sampleRate = sampleRate; 69 this.numChannels = numChannels; 70 minPeriod = sampleRate / MAXIMUM_PITCH; 71 maxPeriod = sampleRate / MINIMUM_PITCH; 72 maxRequired = 2 * maxPeriod; 73 downSampleBuffer = new short[maxRequired]; 74 inputBufferSize = maxRequired; 75 inputBuffer = new short[maxRequired * numChannels]; 76 outputBufferSize = maxRequired; 77 outputBuffer = new short[maxRequired * numChannels]; 78 pitchBufferSize = maxRequired; 79 pitchBuffer = new short[maxRequired * numChannels]; 80 oldRatePosition = 0; 81 newRatePosition = 0; 82 prevPeriod = 0; 83 speed = 1.0f; 84 pitch = 1.0f; 85 } 86 87 /** 88 * Sets the output speed. 89 */ setSpeed(float speed)90 public void setSpeed(float speed) { 91 this.speed = speed; 92 } 93 94 /** 95 * Gets the output speed. 96 */ getSpeed()97 public float getSpeed() { 98 return speed; 99 } 100 101 /** 102 * Sets the output pitch. 103 */ setPitch(float pitch)104 public void setPitch(float pitch) { 105 this.pitch = pitch; 106 } 107 108 /** 109 * Gets the output pitch. 110 */ getPitch()111 public float getPitch() { 112 return pitch; 113 } 114 115 /** 116 * Queues remaining data from {@code buffer}, and advances its position by the number of bytes 117 * consumed. 118 * 119 * @param buffer A {@link ShortBuffer} containing input data between its position and limit. 120 */ queueInput(ShortBuffer buffer)121 public void queueInput(ShortBuffer buffer) { 122 int samplesToWrite = buffer.remaining() / numChannels; 123 int bytesToWrite = samplesToWrite * numChannels * 2; 124 enlargeInputBufferIfNeeded(samplesToWrite); 125 buffer.get(inputBuffer, numInputSamples * numChannels, bytesToWrite / 2); 126 numInputSamples += samplesToWrite; 127 processStreamInput(); 128 } 129 130 /** 131 * Gets available output, outputting to the start of {@code buffer}. The buffer's position will be 132 * advanced by the number of bytes written. 133 * 134 * @param buffer A {@link ShortBuffer} into which output will be written. 135 */ getOutput(ShortBuffer buffer)136 public void getOutput(ShortBuffer buffer) { 137 int samplesToRead = Math.min(buffer.remaining() / numChannels, numOutputSamples); 138 buffer.put(outputBuffer, 0, samplesToRead * numChannels); 139 numOutputSamples -= samplesToRead; 140 System.arraycopy(outputBuffer, samplesToRead * numChannels, outputBuffer, 0, 141 numOutputSamples * numChannels); 142 } 143 144 /** 145 * Forces generating output using whatever data has been queued already. No extra delay will be 146 * added to the output, but flushing in the middle of words could introduce distortion. 147 */ queueEndOfStream()148 public void queueEndOfStream() { 149 int remainingSamples = numInputSamples; 150 float s = speed / pitch; 151 int expectedOutputSamples = 152 numOutputSamples + (int) ((remainingSamples / s + numPitchSamples) / pitch + 0.5f); 153 154 // Add enough silence to flush both input and pitch buffers. 155 enlargeInputBufferIfNeeded(remainingSamples + 2 * maxRequired); 156 for (int xSample = 0; xSample < 2 * maxRequired * numChannels; xSample++) { 157 inputBuffer[remainingSamples * numChannels + xSample] = 0; 158 } 159 numInputSamples += 2 * maxRequired; 160 processStreamInput(); 161 // Throw away any extra samples we generated due to the silence we added. 162 if (numOutputSamples > expectedOutputSamples) { 163 numOutputSamples = expectedOutputSamples; 164 } 165 // Empty input and pitch buffers. 166 numInputSamples = 0; 167 remainingInputToCopy = 0; 168 numPitchSamples = 0; 169 } 170 171 /** 172 * Returns the number of output samples that can be read with {@link #getOutput(ShortBuffer)}. 173 */ getSamplesAvailable()174 public int getSamplesAvailable() { 175 return numOutputSamples; 176 } 177 178 // Internal methods. 179 enlargeOutputBufferIfNeeded(int numSamples)180 private void enlargeOutputBufferIfNeeded(int numSamples) { 181 if (numOutputSamples + numSamples > outputBufferSize) { 182 outputBufferSize += (outputBufferSize / 2) + numSamples; 183 outputBuffer = Arrays.copyOf(outputBuffer, outputBufferSize * numChannels); 184 } 185 } 186 enlargeInputBufferIfNeeded(int numSamples)187 private void enlargeInputBufferIfNeeded(int numSamples) { 188 if (numInputSamples + numSamples > inputBufferSize) { 189 inputBufferSize += (inputBufferSize / 2) + numSamples; 190 inputBuffer = Arrays.copyOf(inputBuffer, inputBufferSize * numChannels); 191 } 192 } 193 removeProcessedInputSamples(int position)194 private void removeProcessedInputSamples(int position) { 195 int remainingSamples = numInputSamples - position; 196 System.arraycopy(inputBuffer, position * numChannels, inputBuffer, 0, 197 remainingSamples * numChannels); 198 numInputSamples = remainingSamples; 199 } 200 copyToOutput(short[] samples, int position, int numSamples)201 private void copyToOutput(short[] samples, int position, int numSamples) { 202 enlargeOutputBufferIfNeeded(numSamples); 203 System.arraycopy(samples, position * numChannels, outputBuffer, numOutputSamples * numChannels, 204 numSamples * numChannels); 205 numOutputSamples += numSamples; 206 } 207 copyInputToOutput(int position)208 private int copyInputToOutput(int position) { 209 int numSamples = Math.min(maxRequired, remainingInputToCopy); 210 copyToOutput(inputBuffer, position, numSamples); 211 remainingInputToCopy -= numSamples; 212 return numSamples; 213 } 214 downSampleInput(short[] samples, int position, int skip)215 private void downSampleInput(short[] samples, int position, int skip) { 216 // If skip is greater than one, average skip samples together and write them to the down-sample 217 // buffer. If numChannels is greater than one, mix the channels together as we down sample. 218 int numSamples = maxRequired / skip; 219 int samplesPerValue = numChannels * skip; 220 position *= numChannels; 221 for (int i = 0; i < numSamples; i++) { 222 int value = 0; 223 for (int j = 0; j < samplesPerValue; j++) { 224 value += samples[position + i * samplesPerValue + j]; 225 } 226 value /= samplesPerValue; 227 downSampleBuffer[i] = (short) value; 228 } 229 } 230 findPitchPeriodInRange(short[] samples, int position, int minPeriod, int maxPeriod)231 private int findPitchPeriodInRange(short[] samples, int position, int minPeriod, int maxPeriod) { 232 // Find the best frequency match in the range, and given a sample skip multiple. For now, just 233 // find the pitch of the first channel. 234 int bestPeriod = 0; 235 int worstPeriod = 255; 236 int minDiff = 1; 237 int maxDiff = 0; 238 position *= numChannels; 239 for (int period = minPeriod; period <= maxPeriod; period++) { 240 int diff = 0; 241 for (int i = 0; i < period; i++) { 242 short sVal = samples[position + i]; 243 short pVal = samples[position + period + i]; 244 diff += sVal >= pVal ? sVal - pVal : pVal - sVal; 245 } 246 // Note that the highest number of samples we add into diff will be less than 256, since we 247 // skip samples. Thus, diff is a 24 bit number, and we can safely multiply by numSamples 248 // without overflow. 249 if (diff * bestPeriod < minDiff * period) { 250 minDiff = diff; 251 bestPeriod = period; 252 } 253 if (diff * worstPeriod > maxDiff * period) { 254 maxDiff = diff; 255 worstPeriod = period; 256 } 257 } 258 this.minDiff = minDiff / bestPeriod; 259 this.maxDiff = maxDiff / worstPeriod; 260 return bestPeriod; 261 } 262 263 /** 264 * Returns whether the previous pitch period estimate is a better approximation, which can occur 265 * at the abrupt end of voiced words. 266 */ previousPeriodBetter(int minDiff, int maxDiff, boolean preferNewPeriod)267 private boolean previousPeriodBetter(int minDiff, int maxDiff, boolean preferNewPeriod) { 268 if (minDiff == 0 || prevPeriod == 0) { 269 return false; 270 } 271 if (preferNewPeriod) { 272 if (maxDiff > minDiff * 3) { 273 // Got a reasonable match this period 274 return false; 275 } 276 if (minDiff * 2 <= prevMinDiff * 3) { 277 // Mismatch is not that much greater this period 278 return false; 279 } 280 } else { 281 if (minDiff <= prevMinDiff) { 282 return false; 283 } 284 } 285 return true; 286 } 287 findPitchPeriod(short[] samples, int position, boolean preferNewPeriod)288 private int findPitchPeriod(short[] samples, int position, boolean preferNewPeriod) { 289 // Find the pitch period. This is a critical step, and we may have to try multiple ways to get a 290 // good answer. This version uses AMDF. To improve speed, we down sample by an integer factor 291 // get in the 11 kHz range, and then do it again with a narrower frequency range without down 292 // sampling. 293 int period; 294 int retPeriod; 295 int skip = sampleRate > AMDF_FREQUENCY ? sampleRate / AMDF_FREQUENCY : 1; 296 if (numChannels == 1 && skip == 1) { 297 period = findPitchPeriodInRange(samples, position, minPeriod, maxPeriod); 298 } else { 299 downSampleInput(samples, position, skip); 300 period = findPitchPeriodInRange(downSampleBuffer, 0, minPeriod / skip, maxPeriod / skip); 301 if (skip != 1) { 302 period *= skip; 303 int minP = period - (skip * 4); 304 int maxP = period + (skip * 4); 305 if (minP < minPeriod) { 306 minP = minPeriod; 307 } 308 if (maxP > maxPeriod) { 309 maxP = maxPeriod; 310 } 311 if (numChannels == 1) { 312 period = findPitchPeriodInRange(samples, position, minP, maxP); 313 } else { 314 downSampleInput(samples, position, 1); 315 period = findPitchPeriodInRange(downSampleBuffer, 0, minP, maxP); 316 } 317 } 318 } 319 if (previousPeriodBetter(minDiff, maxDiff, preferNewPeriod)) { 320 retPeriod = prevPeriod; 321 } else { 322 retPeriod = period; 323 } 324 prevMinDiff = minDiff; 325 prevPeriod = period; 326 return retPeriod; 327 } 328 moveNewSamplesToPitchBuffer(int originalNumOutputSamples)329 private void moveNewSamplesToPitchBuffer(int originalNumOutputSamples) { 330 int numSamples = numOutputSamples - originalNumOutputSamples; 331 if (numPitchSamples + numSamples > pitchBufferSize) { 332 pitchBufferSize += (pitchBufferSize / 2) + numSamples; 333 pitchBuffer = Arrays.copyOf(pitchBuffer, pitchBufferSize * numChannels); 334 } 335 System.arraycopy(outputBuffer, originalNumOutputSamples * numChannels, pitchBuffer, 336 numPitchSamples * numChannels, numSamples * numChannels); 337 numOutputSamples = originalNumOutputSamples; 338 numPitchSamples += numSamples; 339 } 340 removePitchSamples(int numSamples)341 private void removePitchSamples(int numSamples) { 342 if (numSamples == 0) { 343 return; 344 } 345 System.arraycopy(pitchBuffer, numSamples * numChannels, pitchBuffer, 0, 346 (numPitchSamples - numSamples) * numChannels); 347 numPitchSamples -= numSamples; 348 } 349 adjustPitch(int originalNumOutputSamples)350 private void adjustPitch(int originalNumOutputSamples) { 351 // Latency due to pitch changes could be reduced by looking at past samples to determine pitch, 352 // rather than future. 353 if (numOutputSamples == originalNumOutputSamples) { 354 return; 355 } 356 moveNewSamplesToPitchBuffer(originalNumOutputSamples); 357 int position = 0; 358 while (numPitchSamples - position >= maxRequired) { 359 int period = findPitchPeriod(pitchBuffer, position, false); 360 int newPeriod = (int) (period / pitch); 361 enlargeOutputBufferIfNeeded(newPeriod); 362 if (pitch >= 1.0f) { 363 overlapAdd(newPeriod, numChannels, outputBuffer, numOutputSamples, pitchBuffer, position, 364 pitchBuffer, position + period - newPeriod); 365 } else { 366 int separation = newPeriod - period; 367 overlapAddWithSeparation(period, numChannels, separation, outputBuffer, numOutputSamples, 368 pitchBuffer, position, pitchBuffer, position); 369 } 370 numOutputSamples += newPeriod; 371 position += period; 372 } 373 removePitchSamples(position); 374 } 375 interpolate(short[] in, int inPos, int oldSampleRate, int newSampleRate)376 private short interpolate(short[] in, int inPos, int oldSampleRate, int newSampleRate) { 377 short left = in[inPos * numChannels]; 378 short right = in[inPos * numChannels + numChannels]; 379 int position = newRatePosition * oldSampleRate; 380 int leftPosition = oldRatePosition * newSampleRate; 381 int rightPosition = (oldRatePosition + 1) * newSampleRate; 382 int ratio = rightPosition - position; 383 int width = rightPosition - leftPosition; 384 return (short) ((ratio * left + (width - ratio) * right) / width); 385 } 386 adjustRate(float rate, int originalNumOutputSamples)387 private void adjustRate(float rate, int originalNumOutputSamples) { 388 if (numOutputSamples == originalNumOutputSamples) { 389 return; 390 } 391 int newSampleRate = (int) (sampleRate / rate); 392 int oldSampleRate = sampleRate; 393 // Set these values to help with the integer math. 394 while (newSampleRate > (1 << 14) || oldSampleRate > (1 << 14)) { 395 newSampleRate /= 2; 396 oldSampleRate /= 2; 397 } 398 moveNewSamplesToPitchBuffer(originalNumOutputSamples); 399 // Leave at least one pitch sample in the buffer. 400 for (int position = 0; position < numPitchSamples - 1; position++) { 401 while ((oldRatePosition + 1) * newSampleRate > newRatePosition * oldSampleRate) { 402 enlargeOutputBufferIfNeeded(1); 403 for (int i = 0; i < numChannels; i++) { 404 outputBuffer[numOutputSamples * numChannels + i] = 405 interpolate(pitchBuffer, position + i, oldSampleRate, newSampleRate); 406 } 407 newRatePosition++; 408 numOutputSamples++; 409 } 410 oldRatePosition++; 411 if (oldRatePosition == oldSampleRate) { 412 oldRatePosition = 0; 413 Assertions.checkState(newRatePosition == newSampleRate); 414 newRatePosition = 0; 415 } 416 } 417 removePitchSamples(numPitchSamples - 1); 418 } 419 skipPitchPeriod(short[] samples, int position, float speed, int period)420 private int skipPitchPeriod(short[] samples, int position, float speed, int period) { 421 // Skip over a pitch period, and copy period/speed samples to the output. 422 int newSamples; 423 if (speed >= 2.0f) { 424 newSamples = (int) (period / (speed - 1.0f)); 425 } else { 426 newSamples = period; 427 remainingInputToCopy = (int) (period * (2.0f - speed) / (speed - 1.0f)); 428 } 429 enlargeOutputBufferIfNeeded(newSamples); 430 overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples, samples, position, samples, 431 position + period); 432 numOutputSamples += newSamples; 433 return newSamples; 434 } 435 insertPitchPeriod(short[] samples, int position, float speed, int period)436 private int insertPitchPeriod(short[] samples, int position, float speed, int period) { 437 // Insert a pitch period, and determine how much input to copy directly. 438 int newSamples; 439 if (speed < 0.5f) { 440 newSamples = (int) (period * speed / (1.0f - speed)); 441 } else { 442 newSamples = period; 443 remainingInputToCopy = (int) (period * (2.0f * speed - 1.0f) / (1.0f - speed)); 444 } 445 enlargeOutputBufferIfNeeded(period + newSamples); 446 System.arraycopy(samples, position * numChannels, outputBuffer, numOutputSamples * numChannels, 447 period * numChannels); 448 overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples + period, samples, 449 position + period, samples, position); 450 numOutputSamples += period + newSamples; 451 return newSamples; 452 } 453 changeSpeed(float speed)454 private void changeSpeed(float speed) { 455 if (numInputSamples < maxRequired) { 456 return; 457 } 458 int numSamples = numInputSamples; 459 int position = 0; 460 do { 461 if (remainingInputToCopy > 0) { 462 position += copyInputToOutput(position); 463 } else { 464 int period = findPitchPeriod(inputBuffer, position, true); 465 if (speed > 1.0) { 466 position += period + skipPitchPeriod(inputBuffer, position, speed, period); 467 } else { 468 position += insertPitchPeriod(inputBuffer, position, speed, period); 469 } 470 } 471 } while (position + maxRequired <= numSamples); 472 removeProcessedInputSamples(position); 473 } 474 processStreamInput()475 private void processStreamInput() { 476 // Resample as many pitch periods as we have buffered on the input. 477 int originalNumOutputSamples = numOutputSamples; 478 float s = speed / pitch; 479 if (s > 1.00001 || s < 0.99999) { 480 changeSpeed(s); 481 } else { 482 copyToOutput(inputBuffer, 0, numInputSamples); 483 numInputSamples = 0; 484 } 485 if (USE_CHORD_PITCH) { 486 if (pitch != 1.0f) { 487 adjustPitch(originalNumOutputSamples); 488 } 489 } else if (!USE_CHORD_PITCH && pitch != 1.0f) { 490 adjustRate(pitch, originalNumOutputSamples); 491 } 492 } 493 overlapAdd(int numSamples, int numChannels, short[] out, int outPos, short[] rampDown, int rampDownPos, short[] rampUp, int rampUpPos)494 private static void overlapAdd(int numSamples, int numChannels, short[] out, int outPos, 495 short[] rampDown, int rampDownPos, short[] rampUp, int rampUpPos) { 496 for (int i = 0; i < numChannels; i++) { 497 int o = outPos * numChannels + i; 498 int u = rampUpPos * numChannels + i; 499 int d = rampDownPos * numChannels + i; 500 for (int t = 0; t < numSamples; t++) { 501 out[o] = (short) ((rampDown[d] * (numSamples - t) + rampUp[u] * t) / numSamples); 502 o += numChannels; 503 d += numChannels; 504 u += numChannels; 505 } 506 } 507 } 508 overlapAddWithSeparation(int numSamples, int numChannels, int separation, short[] out, int outPos, short[] rampDown, int rampDownPos, short[] rampUp, int rampUpPos)509 private static void overlapAddWithSeparation(int numSamples, int numChannels, int separation, 510 short[] out, int outPos, short[] rampDown, int rampDownPos, short[] rampUp, int rampUpPos) { 511 for (int i = 0; i < numChannels; i++) { 512 int o = outPos * numChannels + i; 513 int u = rampUpPos * numChannels + i; 514 int d = rampDownPos * numChannels + i; 515 for (int t = 0; t < numSamples + separation; t++) { 516 if (t < separation) { 517 out[o] = (short) (rampDown[d] * (numSamples - t) / numSamples); 518 d += numChannels; 519 } else if (t < numSamples) { 520 out[o] = 521 (short) ((rampDown[d] * (numSamples - t) + rampUp[u] * (t - separation)) 522 / numSamples); 523 d += numChannels; 524 u += numChannels; 525 } else { 526 out[o] = (short) (rampUp[u] * (t - separation) / numSamples); 527 u += numChannels; 528 } 529 o += numChannels; 530 } 531 } 532 } 533 534 } 535