1 /* Sonic library 2 Copyright 2010 3 Bill Cox 4 This file is part of the Sonic Library. 5 6 This file is licensed under the Apache 2.0 license. 7 */ 8 9 /* 10 The Sonic Library implements a new algorithm invented by Bill Cox for the 11 specific purpose of speeding up speech by high factors at high quality. It 12 generates smooth speech at speed up factors as high as 6X, possibly more. It is 13 also capable of slowing down speech, and generates high quality results 14 regardless of the speed up or slow down factor. For speeding up speech by 2X or 15 more, the following equation is used: 16 17 newSamples = period/(speed - 1.0) 18 scale = 1.0/newSamples; 19 20 where period is the current pitch period, determined using AMDF or any other 21 pitch estimator, and speed is the speedup factor. If the current position in 22 the input stream is pointed to by "samples", and the current output stream 23 position is pointed to by "out", then newSamples number of samples can be 24 generated with: 25 26 out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples; 27 28 where t = 0 to newSamples - 1. 29 30 For speed factors < 2X, the PICOLA algorithm is used. The above 31 algorithm is first used to double the speed of one pitch period. Then, enough 32 input is directly copied from the input to the output to achieve the desired 33 speed up factor, where 1.0 < speed < 2.0. The amount of data copied is derived: 34 35 speed = (2*period + length)/(period + length) 36 speed*length + speed*period = 2*period + length 37 length(speed - 1) = 2*period - speed*period 38 length = period*(2 - speed)/(speed - 1) 39 40 For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into 41 the output twice, and length of input is copied from the input to the output 42 until the output desired speed is reached. The length of data copied is: 43 44 length = period*(speed - 0.5)/(1 - speed) 45 46 For slow down factors below 0.5, no data is copied, and an algorithm 47 similar to high speed factors is used. 48 */ 49 50 /* Uncomment this to use sin-wav based overlap add which in theory can improve 51 sound quality slightly, at the expense of lots of floating point math. */ 52 /* #define SONIC_USE_SIN */ 53 54 #ifdef __cplusplus 55 extern "C" { 56 #endif 57 58 /* This specifies the range of voice pitches we try to match. 59 Note that if we go lower than 65, we could overflow in findPitchInRange */ 60 #define SONIC_MIN_PITCH 65 61 #define SONIC_MAX_PITCH 400 62 63 /* These are used to down-sample some inputs to improve speed */ 64 #define SONIC_AMDF_FREQ 4000 65 66 struct sonicStreamStruct; 67 typedef struct sonicStreamStruct* sonicStream; 68 69 /* For all of the following functions, numChannels is multiplied by numSamples 70 to determine the actual number of values read or returned. */ 71 72 /* Create a sonic stream. Return NULL only if we are out of memory and cannot 73 allocate the stream. Set numChannels to 1 for mono, and 2 for stereo. */ 74 sonicStream sonicCreateStream(int sampleRate, int numChannels); 75 /* Destroy the sonic stream. */ 76 void sonicDestroyStream(sonicStream stream); 77 /* Use this to write floating point data to be speed up or down into the stream. 78 Values must be between -1 and 1. Return 0 if memory realloc failed, 79 otherwise 1 */ 80 int sonicWriteFloatToStream(sonicStream stream, float* samples, int numSamples); 81 /* Use this to write 16-bit data to be speed up or down into the stream. 82 Return 0 if memory realloc failed, otherwise 1 */ 83 int sonicWriteShortToStream(sonicStream stream, short* samples, int numSamples); 84 /* Use this to write 8-bit unsigned data to be speed up or down into the stream. 85 Return 0 if memory realloc failed, otherwise 1 */ 86 int sonicWriteUnsignedCharToStream(sonicStream stream, unsigned char* samples, 87 int numSamples); 88 /* Use this to read floating point data out of the stream. Sometimes no data 89 will be available, and zero is returned, which is not an error condition. */ 90 int sonicReadFloatFromStream(sonicStream stream, float* samples, 91 int maxSamples); 92 /* Use this to read 16-bit data out of the stream. Sometimes no data will 93 be available, and zero is returned, which is not an error condition. */ 94 int sonicReadShortFromStream(sonicStream stream, short* samples, 95 int maxSamples); 96 /* Use this to read 8-bit unsigned data out of the stream. Sometimes no data 97 will be available, and zero is returned, which is not an error condition. */ 98 int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char* samples, 99 int maxSamples); 100 /* Force the sonic stream to generate output using whatever data it currently 101 has. No extra delay will be added to the output, but flushing in the middle 102 of words could introduce distortion. */ 103 int sonicFlushStream(sonicStream stream); 104 /* Return the number of samples in the output buffer */ 105 int sonicSamplesAvailable(sonicStream stream); 106 /* Get the speed of the stream. */ 107 float sonicGetSpeed(sonicStream stream); 108 /* Set the speed of the stream. */ 109 void sonicSetSpeed(sonicStream stream, float speed); 110 /* Get the pitch of the stream. */ 111 float sonicGetPitch(sonicStream stream); 112 /* Set the pitch of the stream. */ 113 void sonicSetPitch(sonicStream stream, float pitch); 114 /* Get the rate of the stream. */ 115 float sonicGetRate(sonicStream stream); 116 /* Set the rate of the stream. */ 117 void sonicSetRate(sonicStream stream, float rate); 118 /* Get the scaling factor of the stream. */ 119 float sonicGetVolume(sonicStream stream); 120 /* Set the scaling factor of the stream. */ 121 void sonicSetVolume(sonicStream stream, float volume); 122 /* Get the chord pitch setting. */ 123 int sonicGetChordPitch(sonicStream stream); 124 /* Set chord pitch mode on or off. Default is off. See the documentation 125 page for a description of this feature. */ 126 void sonicSetChordPitch(sonicStream stream, int useChordPitch); 127 /* Get the quality setting. */ 128 int sonicGetQuality(sonicStream stream); 129 /* Set the "quality". Default 0 is virtually as good as 1, but very much 130 * faster. */ 131 void sonicSetQuality(sonicStream stream, int quality); 132 /* Get the sample rate of the stream. */ 133 int sonicGetSampleRate(sonicStream stream); 134 /* Set the sample rate of the stream. This will drop any samples that have not 135 * been read. */ 136 void sonicSetSampleRate(sonicStream stream, int sampleRate); 137 /* Get the number of channels. */ 138 int sonicGetNumChannels(sonicStream stream); 139 /* Set the number of channels. This will drop any samples that have not been 140 * read. */ 141 void sonicSetNumChannels(sonicStream stream, int numChannels); 142 /* This is a non-stream oriented interface to just change the speed of a sound 143 sample. It works in-place on the sample array, so there must be at least 144 speed*numSamples available space in the array. Returns the new number of 145 samples. */ 146 int sonicChangeFloatSpeed(float* samples, int numSamples, float speed, 147 float pitch, float rate, float volume, 148 int useChordPitch, int sampleRate, int numChannels); 149 /* This is a non-stream oriented interface to just change the speed of a sound 150 sample. It works in-place on the sample array, so there must be at least 151 speed*numSamples available space in the array. Returns the new number of 152 samples. */ 153 int sonicChangeShortSpeed(short* samples, int numSamples, float speed, 154 float pitch, float rate, float volume, 155 int useChordPitch, int sampleRate, int numChannels); 156 157 #ifdef SONIC_SPECTROGRAM 158 /* 159 This code generates high quality spectrograms from sound samples, using 160 Time-Aliased-FFTs as described at: 161 162 https://github.com/waywardgeek/spectrogram 163 164 Basically, two adjacent pitch periods are overlap-added to create a sound 165 sample that accurately represents the speech sound at that moment in time. 166 This set of samples is converted to a spetral line using an FFT, and the result 167 is saved as a single spectral line at that moment in time. The resulting 168 spectral lines vary in resolution (it is equal to the number of samples in the 169 pitch period), and the spacing of spectral lines also varies (proportional to 170 the numver of samples in the pitch period). 171 172 To generate a bitmap, linear interpolation is used to render the grayscale 173 value at any particular point in time and frequency. 174 */ 175 176 #define SONIC_MAX_SPECTRUM_FREQ 5000 177 178 struct sonicSpectrogramStruct; 179 struct sonicBitmapStruct; 180 typedef struct sonicSpectrogramStruct* sonicSpectrogram; 181 typedef struct sonicBitmapStruct* sonicBitmap; 182 183 /* sonicBitmap objects represent spectrograms as grayscale bitmaps where each 184 pixel is from 0 (black) to 255 (white). Bitmaps are rows*cols in size. 185 Rows are indexed top to bottom and columns are indexed left to right */ 186 struct sonicBitmapStruct { 187 unsigned char* data; 188 int numRows; 189 int numCols; 190 }; 191 192 typedef struct sonicBitmapStruct* sonicBitmap; 193 194 /* Enable coomputation of a spectrogram on the fly. */ 195 void sonicComputeSpectrogram(sonicStream stream); 196 197 /* Get the spectrogram. */ 198 sonicSpectrogram sonicGetSpectrogram(sonicStream stream); 199 200 /* Create an empty spectrogram. Called automatically if sonicComputeSpectrogram 201 has been called. */ 202 sonicSpectrogram sonicCreateSpectrogram(int sampleRate); 203 204 /* Destroy the spectrotram. This is called automatically when calling 205 sonicDestroyStream. */ 206 void sonicDestroySpectrogram(sonicSpectrogram spectrogram); 207 208 /* Convert the spectrogram to a bitmap. Caller must destroy bitmap when done. */ 209 sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram, 210 int numRows, int numCols); 211 212 /* Destroy a bitmap returned by sonicConvertSpectrogramToBitmap. */ 213 void sonicDestroyBitmap(sonicBitmap bitmap); 214 215 int sonicWritePGM(sonicBitmap bitmap, char* fileName); 216 217 /* Add two pitch periods worth of samples to the spectrogram. There must be 218 2*period samples. Time should advance one pitch period for each call to 219 this function. */ 220 void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram, 221 short* samples, int period, 222 int numChannels); 223 #endif /* SONIC_SPECTROGRAM */ 224 225 #ifdef __cplusplus 226 } 227 #endif 228