1 /*
2 Copyright (c) 2012-2020 Maarten Baert <maarten-baert@hotmail.com>
3 
4 This file is part of SimpleScreenRecorder.
5 
6 SimpleScreenRecorder is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 SimpleScreenRecorder is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with SimpleScreenRecorder.  If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #include "FastResampler.h"
21 
22 #include "Logger.h"
23 #include "CPUFeatures.h"
24 
25 /*
26 This resampler is based on the resampling algorithm described here:
27 https://ccrma.stanford.edu/~jos/resample/resample.html
28 The Speex resampler (a.k.a. Public Parrot Hack) uses almost the same algorithm.
29 
30 This implementation tries to find a balance between performance and quality. The quality is comparable to
31 quality level 3 of the Speex resampler, however this implementation is roughly 3 times faster
32 (mostly because of the SSE2-optimized floating point code).
33 
34 This resampler can handle non-fractional resampling ratios and is suitable for drift correction, but it is not a
35 full variable-rate resampler though: the filter coefficients are calculated for one specific resampling ratio,
36 independent of the drift ratio. The resampling ratio can be changed, but this will result in a small glitch.
37 The drift ratio can be changed at any time without introducing glitches, but since the filter coefficients won't be updated,
38 large drift ratios will result in aliasing (at least for downsampling). It is not meant for corrections larger than a few percent.
39 */
40 
41 // Kaiser window function (beta = 7)
42 // Stats for sinc filter with length 44:
43 // - best cutoff = 0.9060 * nyquist_freq
44 // - minimum stopband attenuation = 71.97 dB
45 // - average stopband attenuation = 87.92 dB
46 #define KAISER7_TABLE_LENGTH 64
47 float kaiser7_table[67] = {
48 	0.99920941f, 1.00000000f, 0.99920941f, 0.99684085f, 0.99290397f, 0.98741477f, 0.98039552f, 0.97187462f,
49 	0.96188648f, 0.95047123f, 0.93767458f, 0.92354751f, 0.90814596f, 0.89153054f, 0.87376619f, 0.85492177f,
50 	0.83506970f, 0.81428555f, 0.79264763f, 0.77023650f, 0.74713462f, 0.72342583f, 0.69919492f, 0.67452719f,
51 	0.64950799f, 0.62422230f, 0.59875427f, 0.57318683f, 0.54760127f, 0.52207683f, 0.49669039f, 0.47151607f,
52 	0.44662491f, 0.42208462f, 0.39795925f, 0.37430898f, 0.35118989f, 0.32865378f, 0.30674805f, 0.28551550f,
53 	0.26499434f, 0.24521804f, 0.22621536f, 0.20801032f, 0.19062228f, 0.17406592f, 0.15835140f, 0.14348447f,
54 	0.12946657f, 0.11629501f, 0.10396318f, 0.09246071f, 0.08177374f, 0.07188512f, 0.06277467f, 0.05441946f,
55 	0.04679404f, 0.03987076f, 0.03362001f, 0.02801054f, 0.02300971f, 0.01858379f, 0.01469823f, 0.01131792f,
56 	0.00840746f, 0.00593141f, 0.00000000f,
57 };
58 
59 // filter properties
60 #define FILTER_BASE_LENGTH  44.0f    // typical number of zero crossings for sinc filter
61 #define FILTER_BASE_SETS    256.0f   // typical number of filter samples per zero crossing
62 #define FILTER_CUTOFF       0.9060f  // bandwidth of sinc filter (relative to lowest Nyquist frequency)
63 
64 // This function calculates window function values based on cubic interpolation (Catmull-Rom spline).
WindowFunction(float * table,unsigned int table_length,float x)65 inline float WindowFunction(float* table, unsigned int table_length, float x) {
66 	x = fabs(x * (float) table_length);
67 	unsigned int index = (int) x; // float-to-int cast is faster than float-to-uint
68 	if(index >= table_length) {
69 		return 0.0f;
70 	} else {
71 		float *val = table + index;
72 		float t = x - (float) index, s = 1.0f - t;
73 		return s * s * (s * val[1] + t * (val[1] * 3.0f + (val[2] - val[0]) * 0.5f))
74 			 + t * t * (t * val[2] + s * (val[2] * 3.0f + (val[1] - val[3]) * 0.5f));
75 	}
76 }
77 
78 // sinc function = sin(x*pi)/(x*pi)
Sinc(float x)79 inline float Sinc(float x) {
80 	if(fabs(x) < 0.0001f)
81 		return 1.0f;
82 	x *= (float) M_PI;
83 	return sinf(x) / x;
84 }
85 
FastResampler(unsigned int channels,float gain)86 FastResampler::FastResampler(unsigned int channels, float gain) {
87 	assert(channels != 0);
88 
89 	// settings
90 	m_channels = channels;
91 	m_gain = gain;
92 	m_resample_ratio = 0.0;
93 	m_drift_ratio = 0.0;
94 
95 	// filter coefficient sets
96 	m_filter_length = 0;
97 	m_filter_rows = 0;
98 
99 	// CPU feature detection
100 #if SSR_USE_X86_ASM
101 	if(CPUFeatures::HasMMX() && CPUFeatures::HasSSE() && CPUFeatures::HasSSE2()) {
102 		switch(m_channels) {
103 			case 1:  m_firfilter2_ptr = &FastResampler_FirFilter2_C1_SSE2; break;
104 			case 2:  m_firfilter2_ptr = &FastResampler_FirFilter2_C2_SSE2; break;
105 			default: m_firfilter2_ptr = &FastResampler_FirFilter2_Cn_SSE2; break;
106 		}
107 	} else {
108 #endif
109 		switch(m_channels) {
110 			case 1:  m_firfilter2_ptr = &FastResampler_FirFilter2_C1_Fallback; break;
111 			case 2:  m_firfilter2_ptr = &FastResampler_FirFilter2_C2_Fallback; break;
112 			default: m_firfilter2_ptr = &FastResampler_FirFilter2_Cn_Fallback; break;
113 		}
114 #if SSR_USE_X86_ASM
115 	}
116 #endif
117 
118 }
119 
Resample(double resample_ratio,double drift_ratio,const float * samples_in,unsigned int sample_count_in,TempBuffer<float> * samples_out,unsigned int sample_offset_out)120 unsigned int FastResampler::Resample(double resample_ratio, double drift_ratio, const float* samples_in, unsigned int sample_count_in, TempBuffer<float>* samples_out, unsigned int sample_offset_out) {
121 
122 	// check the resampling ratio
123 	if(resample_ratio < 1.0e-3 || resample_ratio > 1.0e3) {
124 		Logger::LogError("[FastResampler::Resample] " + Logger::tr("Error: Resample ratio is out of range!"));
125 		throw ResamplerException();
126 	}
127 	if(drift_ratio < 1.0e-1 || drift_ratio > 1.0e1) {
128 		Logger::LogError("[FastResampler::Resample] " + Logger::tr("Error: Drift ratio is out of range!"));
129 		throw ResamplerException();
130 	}
131 
132 	// should we flush old samples first?
133 	if((m_resample_ratio != resample_ratio || samples_in == NULL) && m_filter_length != 0) {
134 
135 		// pad with zero samples
136 		unsigned int pad = m_filter_length / 2 * m_channels;
137 		std::fill_n(m_samples_memory.Reserve(pad), pad, 0.0f);
138 		m_samples_memory.Push(pad);
139 
140 		// reserve memory (with some margin since floating-point isn't 100% accurate)
141 		unsigned int available = m_samples_memory.GetSize() / m_channels;
142 		samples_out->Alloc((sample_offset_out + (unsigned int) lrint((double) available / (m_resample_ratio * m_drift_ratio) * 1.001) + 4) * m_channels, (sample_offset_out != 0));
143 
144 		// resample
145 		std::pair<unsigned int, unsigned int> done = ResampleBatch(m_samples_memory.GetData(), available, samples_out->GetData() + sample_offset_out * m_channels);
146 		sample_offset_out += done.second;
147 
148 	}
149 
150 	// is there new input data?
151 	if(samples_in == NULL) {
152 		ResetResamplerState();
153 		return sample_offset_out;
154 	}
155 
156 	// update filter if the resample ratio changes
157 	if(m_resample_ratio != resample_ratio) {
158 		Logger::LogInfo("[FastResampler::Resample] " + Logger::tr("Resample ratio is %1 (was %2).").arg(resample_ratio, 0, 'f', 4).arg(m_resample_ratio, 0, 'f', 4));
159 		m_resample_ratio = resample_ratio;
160 		UpdateFilterCoefficients();
161 		ResetResamplerState();
162 	}
163 	m_drift_ratio = drift_ratio;
164 
165 	// save input samples
166 	m_samples_memory.Push(samples_in, sample_count_in * m_channels);
167 
168 	// resample one batch at a time
169 	for( ; ; ) {
170 
171 		// reserve memory (with some margin since floating-point isn't 100% accurate)
172 		unsigned int available = m_samples_memory.GetSize() / m_channels;
173 		unsigned int batch = std::min(available, m_filter_length * 256); // needs to be limited to avoid some numerical problems
174 		samples_out->Alloc((sample_offset_out + (unsigned int) lrint((double) batch / (m_resample_ratio * m_drift_ratio) * 1.001) + 4) * m_channels, (sample_offset_out != 0));
175 
176 		// resample
177 		std::pair<unsigned int, unsigned int> done = ResampleBatch(m_samples_memory.GetData(), batch, samples_out->GetData() + sample_offset_out * m_channels);
178 		m_samples_memory.Pop(done.first * m_channels);
179 		sample_offset_out += done.second;
180 
181 		// is this the last batch?
182 		if(batch == available)
183 			break;
184 
185 	}
186 
187 	return sample_offset_out;
188 }
189 
GetInputLatency()190 double FastResampler::GetInputLatency() {
191 	if(m_filter_length == 0)
192 		return 0.0;
193 	int samples_left = (int) (m_samples_memory.GetSize() / m_channels) - (int) (m_filter_length / 2 - 1);
194 	return (double) samples_left - m_time / (double) m_filter_rows;
195 }
196 
GetOutputLatency()197 double FastResampler::GetOutputLatency() {
198 	if(m_filter_length == 0)
199 		return 0.0;
200 	return GetInputLatency() / (m_resample_ratio * m_drift_ratio);
201 }
202 
UpdateFilterCoefficients()203 void FastResampler::UpdateFilterCoefficients() {
204 
205 	// calculate filter parameters
206 	float filter_cutoff = FILTER_CUTOFF / (float) fmax(1.0, m_resample_ratio);
207 	m_filter_length = lrint(FILTER_BASE_LENGTH / filter_cutoff * 0.25f) * 4;
208 	m_filter_rows = std::max(1u, (unsigned int) lrint(FILTER_BASE_SETS * filter_cutoff));
209 
210 	// allocate memory for coefficients
211 	m_filter_coefficients.Alloc(m_filter_length * (m_filter_rows + 1));
212 
213 	// generate coefficients
214 	float *coef = m_filter_coefficients.GetData();
215 	float window = 1.0f / (float) (m_filter_length / 2);
216 	for(unsigned int j = 0; j <= m_filter_rows; ++j) {
217 		float shift = 1.0f - (float) j / (float) m_filter_rows - (float) (m_filter_length / 2);
218 		for(unsigned int i = 0; i < m_filter_length; ++i) {
219 			float x = (float) i + shift;
220 			*(coef++) = WindowFunction(kaiser7_table, KAISER7_TABLE_LENGTH, x * window) * Sinc(x * filter_cutoff) * filter_cutoff * m_gain;
221 		}
222 	}
223 
224 }
225 
ResetResamplerState()226 void FastResampler::ResetResamplerState() {
227 	m_time = 0.0;
228 	m_samples_memory.Clear();
229 	unsigned int pad = (m_filter_length / 2 - 1) * m_channels;
230 	std::fill_n(m_samples_memory.Reserve(pad), pad, 0.0f);
231 	m_samples_memory.Push(pad);
232 }
233 
ResampleBatch(float * samples_in,unsigned int sample_count_in,float * samples_out)234 std::pair<unsigned int, unsigned int> FastResampler::ResampleBatch(float* samples_in, unsigned int sample_count_in, float* samples_out) {
235 
236 	// prepare for resampling
237 	double step = (double) m_filter_rows * m_resample_ratio * m_drift_ratio;
238 
239 	// actual resampling
240 	unsigned int output_samples = 0;
241 	for( ; ; ) {
242 
243 		// calculate the input sample and filter row
244 		unsigned int index = (int) m_time; // float-to-int cast is faster than float-to-uint
245 		unsigned int sample = index / m_filter_rows, row = index % m_filter_rows;
246 
247 		// do we have the required input samples?
248 		if(sample + m_filter_length > sample_count_in) {
249 			unsigned int input_samples = std::min(sample_count_in, sample);
250 			m_time -= input_samples * m_filter_rows;
251 			return std::make_pair(input_samples, output_samples);
252 		}
253 
254 		// calculate the next sample
255 		float *coef1 = m_filter_coefficients.GetData() + m_filter_length * row;
256 		float *coef2 = coef1 + m_filter_length;
257 		float frac = m_time - (double) index;
258 		float *input = samples_in + sample * m_channels;
259 		m_firfilter2_ptr(m_channels, m_filter_length, coef1, coef2, frac, input, samples_out);
260 		samples_out += m_channels;
261 		++output_samples;
262 
263 		// increase the time
264 		m_time += step;
265 
266 	}
267 
268 }
269