1 /*
2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14 * its contributors may be used to endorse or promote products derived
15 * from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "third_party/blink/renderer/platform/audio/sinc_resampler.h"
30
31 #include "build/build_config.h"
32 #include "third_party/blink/renderer/platform/audio/audio_bus.h"
33 #include "third_party/blink/renderer/platform/wtf/math_extras.h"
34
35 #if defined(ARCH_CPU_X86_FAMILY)
36 #include <emmintrin.h>
37 #endif
38
39 // Input buffer layout, dividing the total buffer into regions (r0 - r5):
40 //
41 // |----------------|-----------------------------------------|----------------|
42 //
43 // blockSize + kernelSize / 2
44 // <--------------------------------------------------------->
45 // r0
46 //
47 // kernelSize / 2 kernelSize / 2 kernelSize / 2 kernelSize / 2
48 // <---------------> <---------------> <---------------> <--------------->
49 // r1 r2 r3 r4
50 //
51 // blockSize
52 // <---------------------------------------->
53 // r5
54
55 // The Algorithm:
56 //
57 // 1) Consume input frames into r0 (r1 is zero-initialized).
58 // 2) Position kernel centered at start of r0 (r2) and generate output frames
59 // until kernel is centered at start of r4, or we've finished generating
60 // all the output frames.
61 // 3) Copy r3 to r1 and r4 to r2.
62 // 4) Consume input frames into r5 (zero-pad if we run out of input).
63 // 5) Goto (2) until all of input is consumed.
64 //
65 // note: we're glossing over how the sub-sample handling works with
66 // m_virtualSourceIndex, etc.
67
68 namespace blink {
69
SincResampler(double scale_factor,unsigned kernel_size,unsigned number_of_kernel_offsets)70 SincResampler::SincResampler(double scale_factor,
71 unsigned kernel_size,
72 unsigned number_of_kernel_offsets)
73 : scale_factor_(scale_factor),
74 kernel_size_(kernel_size),
75 number_of_kernel_offsets_(number_of_kernel_offsets),
76 kernel_storage_(kernel_size_ * (number_of_kernel_offsets_ + 1)),
77 virtual_source_index_(0),
78 block_size_(512),
79 // See input buffer layout above.
80 input_buffer_(block_size_ + kernel_size_),
81 source_(nullptr),
82 source_frames_available_(0),
83 source_provider_(nullptr),
84 is_buffer_primed_(false) {
85 InitializeKernel();
86 }
87
InitializeKernel()88 void SincResampler::InitializeKernel() {
89 // Blackman window parameters.
90 double alpha = 0.16;
91 double a0 = 0.5 * (1.0 - alpha);
92 double a1 = 0.5;
93 double a2 = 0.5 * alpha;
94
95 // sincScaleFactor is basically the normalized cutoff frequency of the
96 // low-pass filter.
97 double sinc_scale_factor = scale_factor_ > 1.0 ? 1.0 / scale_factor_ : 1.0;
98
99 // The sinc function is an idealized brick-wall filter, but since we're
100 // windowing it the transition from pass to stop does not happen right away.
101 // So we should adjust the lowpass filter cutoff slightly downward to avoid
102 // some aliasing at the very high-end.
103 // FIXME: this value is empirical and to be more exact should vary depending
104 // on m_kernelSize.
105 sinc_scale_factor *= 0.9;
106
107 int n = kernel_size_;
108 int half_size = n / 2;
109
110 // Generates a set of windowed sinc() kernels.
111 // We generate a range of sub-sample offsets from 0.0 to 1.0.
112 for (unsigned offset_index = 0; offset_index <= number_of_kernel_offsets_;
113 ++offset_index) {
114 double subsample_offset =
115 static_cast<double>(offset_index) / number_of_kernel_offsets_;
116
117 for (int i = 0; i < n; ++i) {
118 // Compute the sinc() with offset.
119 double s =
120 sinc_scale_factor * kPiDouble * (i - half_size - subsample_offset);
121 double sinc = !s ? 1.0 : std::sin(s) / s;
122 sinc *= sinc_scale_factor;
123
124 // Compute Blackman window, matching the offset of the sinc().
125 double x = (i - subsample_offset) / n;
126 double window = a0 - a1 * std::cos(kTwoPiDouble * x) +
127 a2 * std::cos(kTwoPiDouble * 2.0 * x);
128
129 // Window the sinc() function and store at the correct offset.
130 kernel_storage_[i + offset_index * kernel_size_] = sinc * window;
131 }
132 }
133 }
134
ConsumeSource(float * buffer,unsigned number_of_source_frames)135 void SincResampler::ConsumeSource(float* buffer,
136 unsigned number_of_source_frames) {
137 DCHECK(source_provider_);
138
139 // Wrap the provided buffer by an AudioBus for use by the source provider.
140 scoped_refptr<AudioBus> bus =
141 AudioBus::Create(1, number_of_source_frames, false);
142
143 // FIXME: Find a way to make the following const-correct:
144 bus->SetChannelMemory(0, buffer, number_of_source_frames);
145
146 source_provider_->ProvideInput(bus.get(), number_of_source_frames);
147 }
148
149 namespace {
150
151 // BufferSourceProvider is an AudioSourceProvider wrapping an in-memory buffer.
152
153 class BufferSourceProvider final : public AudioSourceProvider {
154 public:
BufferSourceProvider(const float * source,uint32_t number_of_source_frames)155 BufferSourceProvider(const float* source, uint32_t number_of_source_frames)
156 : source_(source), source_frames_available_(number_of_source_frames) {}
157
158 // Consumes samples from the in-memory buffer.
ProvideInput(AudioBus * bus,uint32_t frames_to_process)159 void ProvideInput(AudioBus* bus, uint32_t frames_to_process) override {
160 DCHECK(source_);
161 DCHECK(bus);
162 if (!source_ || !bus)
163 return;
164
165 float* buffer = bus->Channel(0)->MutableData();
166
167 // Clamp to number of frames available and zero-pad.
168 uint32_t frames_to_copy =
169 std::min(source_frames_available_, frames_to_process);
170 memcpy(buffer, source_, sizeof(float) * frames_to_copy);
171
172 // Zero-pad if necessary.
173 if (frames_to_copy < frames_to_process)
174 memset(buffer + frames_to_copy, 0,
175 sizeof(float) * (frames_to_process - frames_to_copy));
176
177 source_frames_available_ -= frames_to_copy;
178 source_ += frames_to_copy;
179 }
180
181 private:
182 const float* source_;
183 uint32_t source_frames_available_;
184 };
185
186 } // namespace
187
Process(const float * source,float * destination,unsigned number_of_source_frames)188 void SincResampler::Process(const float* source,
189 float* destination,
190 unsigned number_of_source_frames) {
191 // Resample an in-memory buffer using an AudioSourceProvider.
192 BufferSourceProvider source_provider(source, number_of_source_frames);
193
194 unsigned number_of_destination_frames =
195 static_cast<unsigned>(number_of_source_frames / scale_factor_);
196 unsigned remaining = number_of_destination_frames;
197
198 while (remaining) {
199 unsigned frames_this_time = std::min(remaining, block_size_);
200 Process(&source_provider, destination, frames_this_time);
201
202 destination += frames_this_time;
203 remaining -= frames_this_time;
204 }
205 }
206
Process(AudioSourceProvider * source_provider,float * destination,uint32_t frames_to_process)207 void SincResampler::Process(AudioSourceProvider* source_provider,
208 float* destination,
209 uint32_t frames_to_process) {
210 DCHECK(source_provider);
211 DCHECK_GT(block_size_, kernel_size_);
212 DCHECK_GE(input_buffer_.size(), block_size_ + kernel_size_);
213 DCHECK_EQ(kernel_size_ % 2, 0u);
214
215 source_provider_ = source_provider;
216
217 unsigned number_of_destination_frames = frames_to_process;
218
219 // Setup various region pointers in the buffer (see diagram above).
220 float* r0 = input_buffer_.Data() + kernel_size_ / 2;
221 float* r1 = input_buffer_.Data();
222 float* r2 = r0;
223 float* r3 = r0 + block_size_ - kernel_size_ / 2;
224 float* r4 = r0 + block_size_;
225 float* r5 = r0 + kernel_size_ / 2;
226
227 // Step (1)
228 // Prime the input buffer at the start of the input stream.
229 if (!is_buffer_primed_) {
230 ConsumeSource(r0, block_size_ + kernel_size_ / 2);
231 is_buffer_primed_ = true;
232 }
233
234 // Step (2)
235
236 while (number_of_destination_frames) {
237 while (virtual_source_index_ < block_size_) {
238 // m_virtualSourceIndex lies in between two kernel offsets so figure out
239 // what they are.
240 int source_index_i = static_cast<int>(virtual_source_index_);
241 double subsample_remainder = virtual_source_index_ - source_index_i;
242
243 double virtual_offset_index =
244 subsample_remainder * number_of_kernel_offsets_;
245 int offset_index = static_cast<int>(virtual_offset_index);
246
247 float* k1 = kernel_storage_.Data() + offset_index * kernel_size_;
248 float* k2 = k1 + kernel_size_;
249
250 // Initialize input pointer based on quantized m_virtualSourceIndex.
251 float* input_p = r1 + source_index_i;
252
253 // We'll compute "convolutions" for the two kernels which straddle
254 // m_virtualSourceIndex
255 float sum1 = 0;
256 float sum2 = 0;
257
258 // Figure out how much to weight each kernel's "convolution".
259 double kernel_interpolation_factor = virtual_offset_index - offset_index;
260
261 // Generate a single output sample.
262 int n = kernel_size_;
263
264 #define CONVOLVE_ONE_SAMPLE() \
265 do { \
266 input = *input_p++; \
267 sum1 += input * *k1; \
268 sum2 += input * *k2; \
269 ++k1; \
270 ++k2; \
271 } while (0)
272
273 {
274 float input;
275
276 #if defined(ARCH_CPU_X86_FAMILY)
277 // If the sourceP address is not 16-byte aligned, the first several
278 // frames (at most three) should be processed seperately.
279 while ((reinterpret_cast<uintptr_t>(input_p) & 0x0F) && n) {
280 CONVOLVE_ONE_SAMPLE();
281 n--;
282 }
283
284 // Now the inputP is aligned and start to apply SSE.
285 float* end_p = input_p + n - n % 4;
286 __m128 m_input;
287 __m128 m_k1;
288 __m128 m_k2;
289 __m128 mul1;
290 __m128 mul2;
291
292 __m128 sums1 = _mm_setzero_ps();
293 __m128 sums2 = _mm_setzero_ps();
294 bool k1_aligned = !(reinterpret_cast<uintptr_t>(k1) & 0x0F);
295 bool k2_aligned = !(reinterpret_cast<uintptr_t>(k2) & 0x0F);
296
297 #define LOAD_DATA(l1, l2) \
298 do { \
299 m_input = _mm_load_ps(input_p); \
300 m_k1 = _mm_##l1##_ps(k1); \
301 m_k2 = _mm_##l2##_ps(k2); \
302 } while (0)
303
304 #define CONVOLVE_4_SAMPLES() \
305 do { \
306 mul1 = _mm_mul_ps(m_input, m_k1); \
307 mul2 = _mm_mul_ps(m_input, m_k2); \
308 sums1 = _mm_add_ps(sums1, mul1); \
309 sums2 = _mm_add_ps(sums2, mul2); \
310 input_p += 4; \
311 k1 += 4; \
312 k2 += 4; \
313 } while (0)
314
315 if (k1_aligned && k2_aligned) { // both aligned
316 while (input_p < end_p) {
317 LOAD_DATA(load, load);
318 CONVOLVE_4_SAMPLES();
319 }
320 } else if (!k1_aligned && k2_aligned) { // only k2 aligned
321 while (input_p < end_p) {
322 LOAD_DATA(loadu, load);
323 CONVOLVE_4_SAMPLES();
324 }
325 } else if (k1_aligned && !k2_aligned) { // only k1 aligned
326 while (input_p < end_p) {
327 LOAD_DATA(load, loadu);
328 CONVOLVE_4_SAMPLES();
329 }
330 } else { // both non-aligned
331 while (input_p < end_p) {
332 LOAD_DATA(loadu, loadu);
333 CONVOLVE_4_SAMPLES();
334 }
335 }
336
337 // Summarize the SSE results to sum1 and sum2.
338 float* group_sum_p = reinterpret_cast<float*>(&sums1);
339 sum1 +=
340 group_sum_p[0] + group_sum_p[1] + group_sum_p[2] + group_sum_p[3];
341 group_sum_p = reinterpret_cast<float*>(&sums2);
342 sum2 +=
343 group_sum_p[0] + group_sum_p[1] + group_sum_p[2] + group_sum_p[3];
344
345 n %= 4;
346 while (n) {
347 CONVOLVE_ONE_SAMPLE();
348 n--;
349 }
350 #else
351 // FIXME: add ARM NEON optimizations for the following. The scalar
352 // code-path can probably also be optimized better.
353
354 // Optimize size 32 and size 64 kernels by unrolling the while loop.
355 // A 20 - 30% speed improvement was measured in some cases by using this
356 // approach.
357
358 if (n == 32) {
359 CONVOLVE_ONE_SAMPLE(); // 1
360 CONVOLVE_ONE_SAMPLE(); // 2
361 CONVOLVE_ONE_SAMPLE(); // 3
362 CONVOLVE_ONE_SAMPLE(); // 4
363 CONVOLVE_ONE_SAMPLE(); // 5
364 CONVOLVE_ONE_SAMPLE(); // 6
365 CONVOLVE_ONE_SAMPLE(); // 7
366 CONVOLVE_ONE_SAMPLE(); // 8
367 CONVOLVE_ONE_SAMPLE(); // 9
368 CONVOLVE_ONE_SAMPLE(); // 10
369 CONVOLVE_ONE_SAMPLE(); // 11
370 CONVOLVE_ONE_SAMPLE(); // 12
371 CONVOLVE_ONE_SAMPLE(); // 13
372 CONVOLVE_ONE_SAMPLE(); // 14
373 CONVOLVE_ONE_SAMPLE(); // 15
374 CONVOLVE_ONE_SAMPLE(); // 16
375 CONVOLVE_ONE_SAMPLE(); // 17
376 CONVOLVE_ONE_SAMPLE(); // 18
377 CONVOLVE_ONE_SAMPLE(); // 19
378 CONVOLVE_ONE_SAMPLE(); // 20
379 CONVOLVE_ONE_SAMPLE(); // 21
380 CONVOLVE_ONE_SAMPLE(); // 22
381 CONVOLVE_ONE_SAMPLE(); // 23
382 CONVOLVE_ONE_SAMPLE(); // 24
383 CONVOLVE_ONE_SAMPLE(); // 25
384 CONVOLVE_ONE_SAMPLE(); // 26
385 CONVOLVE_ONE_SAMPLE(); // 27
386 CONVOLVE_ONE_SAMPLE(); // 28
387 CONVOLVE_ONE_SAMPLE(); // 29
388 CONVOLVE_ONE_SAMPLE(); // 30
389 CONVOLVE_ONE_SAMPLE(); // 31
390 CONVOLVE_ONE_SAMPLE(); // 32
391 } else if (n == 64) {
392 CONVOLVE_ONE_SAMPLE(); // 1
393 CONVOLVE_ONE_SAMPLE(); // 2
394 CONVOLVE_ONE_SAMPLE(); // 3
395 CONVOLVE_ONE_SAMPLE(); // 4
396 CONVOLVE_ONE_SAMPLE(); // 5
397 CONVOLVE_ONE_SAMPLE(); // 6
398 CONVOLVE_ONE_SAMPLE(); // 7
399 CONVOLVE_ONE_SAMPLE(); // 8
400 CONVOLVE_ONE_SAMPLE(); // 9
401 CONVOLVE_ONE_SAMPLE(); // 10
402 CONVOLVE_ONE_SAMPLE(); // 11
403 CONVOLVE_ONE_SAMPLE(); // 12
404 CONVOLVE_ONE_SAMPLE(); // 13
405 CONVOLVE_ONE_SAMPLE(); // 14
406 CONVOLVE_ONE_SAMPLE(); // 15
407 CONVOLVE_ONE_SAMPLE(); // 16
408 CONVOLVE_ONE_SAMPLE(); // 17
409 CONVOLVE_ONE_SAMPLE(); // 18
410 CONVOLVE_ONE_SAMPLE(); // 19
411 CONVOLVE_ONE_SAMPLE(); // 20
412 CONVOLVE_ONE_SAMPLE(); // 21
413 CONVOLVE_ONE_SAMPLE(); // 22
414 CONVOLVE_ONE_SAMPLE(); // 23
415 CONVOLVE_ONE_SAMPLE(); // 24
416 CONVOLVE_ONE_SAMPLE(); // 25
417 CONVOLVE_ONE_SAMPLE(); // 26
418 CONVOLVE_ONE_SAMPLE(); // 27
419 CONVOLVE_ONE_SAMPLE(); // 28
420 CONVOLVE_ONE_SAMPLE(); // 29
421 CONVOLVE_ONE_SAMPLE(); // 30
422 CONVOLVE_ONE_SAMPLE(); // 31
423 CONVOLVE_ONE_SAMPLE(); // 32
424 CONVOLVE_ONE_SAMPLE(); // 33
425 CONVOLVE_ONE_SAMPLE(); // 34
426 CONVOLVE_ONE_SAMPLE(); // 35
427 CONVOLVE_ONE_SAMPLE(); // 36
428 CONVOLVE_ONE_SAMPLE(); // 37
429 CONVOLVE_ONE_SAMPLE(); // 38
430 CONVOLVE_ONE_SAMPLE(); // 39
431 CONVOLVE_ONE_SAMPLE(); // 40
432 CONVOLVE_ONE_SAMPLE(); // 41
433 CONVOLVE_ONE_SAMPLE(); // 42
434 CONVOLVE_ONE_SAMPLE(); // 43
435 CONVOLVE_ONE_SAMPLE(); // 44
436 CONVOLVE_ONE_SAMPLE(); // 45
437 CONVOLVE_ONE_SAMPLE(); // 46
438 CONVOLVE_ONE_SAMPLE(); // 47
439 CONVOLVE_ONE_SAMPLE(); // 48
440 CONVOLVE_ONE_SAMPLE(); // 49
441 CONVOLVE_ONE_SAMPLE(); // 50
442 CONVOLVE_ONE_SAMPLE(); // 51
443 CONVOLVE_ONE_SAMPLE(); // 52
444 CONVOLVE_ONE_SAMPLE(); // 53
445 CONVOLVE_ONE_SAMPLE(); // 54
446 CONVOLVE_ONE_SAMPLE(); // 55
447 CONVOLVE_ONE_SAMPLE(); // 56
448 CONVOLVE_ONE_SAMPLE(); // 57
449 CONVOLVE_ONE_SAMPLE(); // 58
450 CONVOLVE_ONE_SAMPLE(); // 59
451 CONVOLVE_ONE_SAMPLE(); // 60
452 CONVOLVE_ONE_SAMPLE(); // 61
453 CONVOLVE_ONE_SAMPLE(); // 62
454 CONVOLVE_ONE_SAMPLE(); // 63
455 CONVOLVE_ONE_SAMPLE(); // 64
456 } else {
457 while (n--) {
458 // Non-optimized using actual while loop.
459 CONVOLVE_ONE_SAMPLE();
460 }
461 }
462 #endif
463 }
464 #undef CONVOLVE_ONE_SAMPLE
465
466 // Linearly interpolate the two "convolutions".
467 double result = (1.0 - kernel_interpolation_factor) * sum1 +
468 kernel_interpolation_factor * sum2;
469
470 *destination++ = result;
471
472 // Advance the virtual index.
473 virtual_source_index_ += scale_factor_;
474
475 --number_of_destination_frames;
476 if (!number_of_destination_frames)
477 return;
478 }
479
480 // Wrap back around to the start.
481 virtual_source_index_ -= block_size_;
482
483 // Step (3) Copy r3 to r1 and r4 to r2.
484 // This wraps the last input frames back to the start of the buffer.
485 memcpy(r1, r3, sizeof(float) * (kernel_size_ / 2));
486 memcpy(r2, r4, sizeof(float) * (kernel_size_ / 2));
487
488 // Step (4)
489 // Refresh the buffer with more input.
490 ConsumeSource(r5, block_size_);
491 }
492 }
493
494 } // namespace blink
495