1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/audio_buffer.h"
12 
13 #include <string.h>
14 #include <cstdint>
15 
16 #include "common_audio/channel_buffer.h"
17 #include "common_audio/include/audio_util.h"
18 #include "common_audio/resampler/push_sinc_resampler.h"
19 #include "modules/audio_processing/splitting_filter.h"
20 #include "rtc_base/checks.h"
21 
22 namespace webrtc {
23 namespace {
24 
25 const size_t kSamplesPer16kHzChannel = 160;
26 const size_t kSamplesPer32kHzChannel = 320;
27 const size_t kSamplesPer48kHzChannel = 480;
28 
KeyboardChannelIndex(const StreamConfig & stream_config)29 int KeyboardChannelIndex(const StreamConfig& stream_config) {
30   if (!stream_config.has_keyboard()) {
31     RTC_NOTREACHED();
32     return 0;
33   }
34 
35   return stream_config.num_channels();
36 }
37 
NumBandsFromSamplesPerChannel(size_t num_frames)38 size_t NumBandsFromSamplesPerChannel(size_t num_frames) {
39   size_t num_bands = 1;
40   if (num_frames == kSamplesPer32kHzChannel ||
41       num_frames == kSamplesPer48kHzChannel) {
42     num_bands = rtc::CheckedDivExact(num_frames, kSamplesPer16kHzChannel);
43   }
44   return num_bands;
45 }
46 
47 }  // namespace
48 
AudioBuffer(size_t input_num_frames,size_t num_input_channels,size_t process_num_frames,size_t num_process_channels,size_t output_num_frames)49 AudioBuffer::AudioBuffer(size_t input_num_frames,
50                          size_t num_input_channels,
51                          size_t process_num_frames,
52                          size_t num_process_channels,
53                          size_t output_num_frames)
54     : input_num_frames_(input_num_frames),
55       num_input_channels_(num_input_channels),
56       proc_num_frames_(process_num_frames),
57       num_proc_channels_(num_process_channels),
58       output_num_frames_(output_num_frames),
59       num_channels_(num_process_channels),
60       num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),
61       num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),
62       mixed_low_pass_valid_(false),
63       reference_copied_(false),
64       activity_(AudioFrame::kVadUnknown),
65       keyboard_data_(NULL),
66       data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)),
67       output_buffer_(new IFChannelBuffer(output_num_frames_, num_channels_)) {
68   RTC_DCHECK_GT(input_num_frames_, 0);
69   RTC_DCHECK_GT(proc_num_frames_, 0);
70   RTC_DCHECK_GT(output_num_frames_, 0);
71   RTC_DCHECK_GT(num_input_channels_, 0);
72   RTC_DCHECK_GT(num_proc_channels_, 0);
73   RTC_DCHECK_LE(num_proc_channels_, num_input_channels_);
74 
75   if (input_num_frames_ != proc_num_frames_ ||
76       output_num_frames_ != proc_num_frames_) {
77     // Create an intermediate buffer for resampling.
78     process_buffer_.reset(
79         new ChannelBuffer<float>(proc_num_frames_, num_proc_channels_));
80 
81     if (input_num_frames_ != proc_num_frames_) {
82       for (size_t i = 0; i < num_proc_channels_; ++i) {
83         input_resamplers_.push_back(std::unique_ptr<PushSincResampler>(
84             new PushSincResampler(input_num_frames_, proc_num_frames_)));
85       }
86     }
87 
88     if (output_num_frames_ != proc_num_frames_) {
89       for (size_t i = 0; i < num_proc_channels_; ++i) {
90         output_resamplers_.push_back(std::unique_ptr<PushSincResampler>(
91             new PushSincResampler(proc_num_frames_, output_num_frames_)));
92       }
93     }
94   }
95 
96   if (num_bands_ > 1) {
97     split_data_.reset(
98         new IFChannelBuffer(proc_num_frames_, num_proc_channels_, num_bands_));
99     splitting_filter_.reset(
100         new SplittingFilter(num_proc_channels_, num_bands_, proc_num_frames_));
101   }
102 }
103 
~AudioBuffer()104 AudioBuffer::~AudioBuffer() {}
105 
CopyFrom(const float * const * data,const StreamConfig & stream_config)106 void AudioBuffer::CopyFrom(const float* const* data,
107                            const StreamConfig& stream_config) {
108   RTC_DCHECK_EQ(stream_config.num_frames(), input_num_frames_);
109   RTC_DCHECK_EQ(stream_config.num_channels(), num_input_channels_);
110   InitForNewData();
111   // Initialized lazily because there's a different condition in
112   // DeinterleaveFrom.
113   const bool need_to_downmix =
114       num_input_channels_ > 1 && num_proc_channels_ == 1;
115   if (need_to_downmix && !input_buffer_) {
116     input_buffer_.reset(
117         new IFChannelBuffer(input_num_frames_, num_proc_channels_));
118   }
119 
120   if (stream_config.has_keyboard()) {
121     keyboard_data_ = data[KeyboardChannelIndex(stream_config)];
122   }
123 
124   // Downmix.
125   const float* const* data_ptr = data;
126   if (need_to_downmix) {
127     DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_,
128                                 input_buffer_->fbuf()->channels()[0]);
129     data_ptr = input_buffer_->fbuf_const()->channels();
130   }
131 
132   // Resample.
133   if (input_num_frames_ != proc_num_frames_) {
134     for (size_t i = 0; i < num_proc_channels_; ++i) {
135       input_resamplers_[i]->Resample(data_ptr[i], input_num_frames_,
136                                      process_buffer_->channels()[i],
137                                      proc_num_frames_);
138     }
139     data_ptr = process_buffer_->channels();
140   }
141 
142   // Convert to the S16 range.
143   for (size_t i = 0; i < num_proc_channels_; ++i) {
144     FloatToFloatS16(data_ptr[i], proc_num_frames_,
145                     data_->fbuf()->channels()[i]);
146   }
147 }
148 
CopyTo(const StreamConfig & stream_config,float * const * data)149 void AudioBuffer::CopyTo(const StreamConfig& stream_config,
150                          float* const* data) {
151   RTC_DCHECK_EQ(stream_config.num_frames(), output_num_frames_);
152   RTC_DCHECK(stream_config.num_channels() == num_channels_ ||
153              num_channels_ == 1);
154 
155   // Convert to the float range.
156   float* const* data_ptr = data;
157   if (output_num_frames_ != proc_num_frames_) {
158     // Convert to an intermediate buffer for subsequent resampling.
159     data_ptr = process_buffer_->channels();
160   }
161   for (size_t i = 0; i < num_channels_; ++i) {
162     FloatS16ToFloat(data_->fbuf()->channels()[i], proc_num_frames_,
163                     data_ptr[i]);
164   }
165 
166   // Resample.
167   if (output_num_frames_ != proc_num_frames_) {
168     for (size_t i = 0; i < num_channels_; ++i) {
169       output_resamplers_[i]->Resample(data_ptr[i], proc_num_frames_, data[i],
170                                       output_num_frames_);
171     }
172   }
173 
174   // Upmix.
175   for (size_t i = num_channels_; i < stream_config.num_channels(); ++i) {
176     memcpy(data[i], data[0], output_num_frames_ * sizeof(**data));
177   }
178 }
179 
InitForNewData()180 void AudioBuffer::InitForNewData() {
181   keyboard_data_ = NULL;
182   mixed_low_pass_valid_ = false;
183   reference_copied_ = false;
184   activity_ = AudioFrame::kVadUnknown;
185   num_channels_ = num_proc_channels_;
186   data_->set_num_channels(num_proc_channels_);
187   if (split_data_.get()) {
188     split_data_->set_num_channels(num_proc_channels_);
189   }
190 }
191 
channels_const() const192 const int16_t* const* AudioBuffer::channels_const() const {
193   return data_->ibuf_const()->channels();
194 }
195 
channels()196 int16_t* const* AudioBuffer::channels() {
197   mixed_low_pass_valid_ = false;
198   return data_->ibuf()->channels();
199 }
200 
split_bands_const(size_t channel) const201 const int16_t* const* AudioBuffer::split_bands_const(size_t channel) const {
202   return split_data_.get() ? split_data_->ibuf_const()->bands(channel)
203                            : data_->ibuf_const()->bands(channel);
204 }
205 
split_bands(size_t channel)206 int16_t* const* AudioBuffer::split_bands(size_t channel) {
207   mixed_low_pass_valid_ = false;
208   return split_data_.get() ? split_data_->ibuf()->bands(channel)
209                            : data_->ibuf()->bands(channel);
210 }
211 
split_channels_const(Band band) const212 const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
213   if (split_data_.get()) {
214     return split_data_->ibuf_const()->channels(band);
215   } else {
216     return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr;
217   }
218 }
219 
split_channels(Band band)220 int16_t* const* AudioBuffer::split_channels(Band band) {
221   mixed_low_pass_valid_ = false;
222   if (split_data_.get()) {
223     return split_data_->ibuf()->channels(band);
224   } else {
225     return band == kBand0To8kHz ? data_->ibuf()->channels() : nullptr;
226   }
227 }
228 
data()229 ChannelBuffer<int16_t>* AudioBuffer::data() {
230   mixed_low_pass_valid_ = false;
231   return data_->ibuf();
232 }
233 
data() const234 const ChannelBuffer<int16_t>* AudioBuffer::data() const {
235   return data_->ibuf_const();
236 }
237 
split_data()238 ChannelBuffer<int16_t>* AudioBuffer::split_data() {
239   mixed_low_pass_valid_ = false;
240   return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
241 }
242 
split_data() const243 const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
244   return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
245 }
246 
channels_const_f() const247 const float* const* AudioBuffer::channels_const_f() const {
248   return data_->fbuf_const()->channels();
249 }
250 
channels_f()251 float* const* AudioBuffer::channels_f() {
252   mixed_low_pass_valid_ = false;
253   return data_->fbuf()->channels();
254 }
255 
split_bands_const_f(size_t channel) const256 const float* const* AudioBuffer::split_bands_const_f(size_t channel) const {
257   return split_data_.get() ? split_data_->fbuf_const()->bands(channel)
258                            : data_->fbuf_const()->bands(channel);
259 }
260 
split_bands_f(size_t channel)261 float* const* AudioBuffer::split_bands_f(size_t channel) {
262   mixed_low_pass_valid_ = false;
263   return split_data_.get() ? split_data_->fbuf()->bands(channel)
264                            : data_->fbuf()->bands(channel);
265 }
266 
split_channels_const_f(Band band) const267 const float* const* AudioBuffer::split_channels_const_f(Band band) const {
268   if (split_data_.get()) {
269     return split_data_->fbuf_const()->channels(band);
270   } else {
271     return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
272   }
273 }
274 
split_channels_f(Band band)275 float* const* AudioBuffer::split_channels_f(Band band) {
276   mixed_low_pass_valid_ = false;
277   if (split_data_.get()) {
278     return split_data_->fbuf()->channels(band);
279   } else {
280     return band == kBand0To8kHz ? data_->fbuf()->channels() : nullptr;
281   }
282 }
283 
data_f()284 ChannelBuffer<float>* AudioBuffer::data_f() {
285   mixed_low_pass_valid_ = false;
286   return data_->fbuf();
287 }
288 
data_f() const289 const ChannelBuffer<float>* AudioBuffer::data_f() const {
290   return data_->fbuf_const();
291 }
292 
split_data_f()293 ChannelBuffer<float>* AudioBuffer::split_data_f() {
294   mixed_low_pass_valid_ = false;
295   return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
296 }
297 
split_data_f() const298 const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
299   return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
300 }
301 
mixed_low_pass_data()302 const int16_t* AudioBuffer::mixed_low_pass_data() {
303   if (num_proc_channels_ == 1) {
304     return split_bands_const(0)[kBand0To8kHz];
305   }
306 
307   if (!mixed_low_pass_valid_) {
308     if (!mixed_low_pass_channels_.get()) {
309       mixed_low_pass_channels_.reset(
310           new ChannelBuffer<int16_t>(num_split_frames_, 1));
311     }
312 
313     DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz),
314                                     num_split_frames_, num_channels_,
315                                     mixed_low_pass_channels_->channels()[0]);
316     mixed_low_pass_valid_ = true;
317   }
318   return mixed_low_pass_channels_->channels()[0];
319 }
320 
low_pass_reference(int channel) const321 const int16_t* AudioBuffer::low_pass_reference(int channel) const {
322   if (!reference_copied_) {
323     return NULL;
324   }
325 
326   return low_pass_reference_channels_->channels()[channel];
327 }
328 
keyboard_data() const329 const float* AudioBuffer::keyboard_data() const {
330   return keyboard_data_;
331 }
332 
set_activity(AudioFrame::VADActivity activity)333 void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
334   activity_ = activity;
335 }
336 
activity() const337 AudioFrame::VADActivity AudioBuffer::activity() const {
338   return activity_;
339 }
340 
num_channels() const341 size_t AudioBuffer::num_channels() const {
342   return num_channels_;
343 }
344 
set_num_channels(size_t num_channels)345 void AudioBuffer::set_num_channels(size_t num_channels) {
346   num_channels_ = num_channels;
347   data_->set_num_channels(num_channels);
348   if (split_data_.get()) {
349     split_data_->set_num_channels(num_channels);
350   }
351 }
352 
num_frames() const353 size_t AudioBuffer::num_frames() const {
354   return proc_num_frames_;
355 }
356 
num_frames_per_band() const357 size_t AudioBuffer::num_frames_per_band() const {
358   return num_split_frames_;
359 }
360 
num_keyboard_frames() const361 size_t AudioBuffer::num_keyboard_frames() const {
362   // We don't resample the keyboard channel.
363   return input_num_frames_;
364 }
365 
num_bands() const366 size_t AudioBuffer::num_bands() const {
367   return num_bands_;
368 }
369 
370 // The resampler is only for supporting 48kHz to 16kHz in the reverse stream.
DeinterleaveFrom(AudioFrame * frame)371 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
372   RTC_DCHECK_EQ(frame->num_channels_, num_input_channels_);
373   RTC_DCHECK_EQ(frame->samples_per_channel_, input_num_frames_);
374   InitForNewData();
375   // Initialized lazily because there's a different condition in CopyFrom.
376   if ((input_num_frames_ != proc_num_frames_) && !input_buffer_) {
377     input_buffer_.reset(
378         new IFChannelBuffer(input_num_frames_, num_proc_channels_));
379   }
380   activity_ = frame->vad_activity_;
381 
382   int16_t* const* deinterleaved;
383   if (input_num_frames_ == proc_num_frames_) {
384     deinterleaved = data_->ibuf()->channels();
385   } else {
386     deinterleaved = input_buffer_->ibuf()->channels();
387   }
388   // TODO(yujo): handle muted frames more efficiently.
389   if (num_proc_channels_ == 1) {
390     // Downmix and deinterleave simultaneously.
391     DownmixInterleavedToMono(frame->data(), input_num_frames_,
392                              num_input_channels_, deinterleaved[0]);
393   } else {
394     RTC_DCHECK_EQ(num_proc_channels_, num_input_channels_);
395     Deinterleave(frame->data(), input_num_frames_, num_proc_channels_,
396                  deinterleaved);
397   }
398 
399   // Resample.
400   if (input_num_frames_ != proc_num_frames_) {
401     for (size_t i = 0; i < num_proc_channels_; ++i) {
402       input_resamplers_[i]->Resample(
403           input_buffer_->fbuf_const()->channels()[i], input_num_frames_,
404           data_->fbuf()->channels()[i], proc_num_frames_);
405     }
406   }
407 }
408 
InterleaveTo(AudioFrame * frame,bool data_changed) const409 void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
410   frame->vad_activity_ = activity_;
411   if (!data_changed) {
412     return;
413   }
414 
415   RTC_DCHECK(frame->num_channels_ == num_channels_ || num_channels_ == 1);
416   RTC_DCHECK_EQ(frame->samples_per_channel_, output_num_frames_);
417 
418   // Resample if necessary.
419   IFChannelBuffer* data_ptr = data_.get();
420   if (proc_num_frames_ != output_num_frames_) {
421     for (size_t i = 0; i < num_channels_; ++i) {
422       output_resamplers_[i]->Resample(
423           data_->fbuf()->channels()[i], proc_num_frames_,
424           output_buffer_->fbuf()->channels()[i], output_num_frames_);
425     }
426     data_ptr = output_buffer_.get();
427   }
428 
429   // TODO(yujo): handle muted frames more efficiently.
430   if (frame->num_channels_ == num_channels_) {
431     Interleave(data_ptr->ibuf()->channels(), output_num_frames_, num_channels_,
432                frame->mutable_data());
433   } else {
434     UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], output_num_frames_,
435                            frame->num_channels_, frame->mutable_data());
436   }
437 }
438 
CopyLowPassToReference()439 void AudioBuffer::CopyLowPassToReference() {
440   reference_copied_ = true;
441   if (!low_pass_reference_channels_.get() ||
442       low_pass_reference_channels_->num_channels() != num_channels_) {
443     low_pass_reference_channels_.reset(
444         new ChannelBuffer<int16_t>(num_split_frames_, num_proc_channels_));
445   }
446   for (size_t i = 0; i < num_proc_channels_; i++) {
447     memcpy(low_pass_reference_channels_->channels()[i],
448            split_bands_const(i)[kBand0To8kHz],
449            low_pass_reference_channels_->num_frames_per_band() *
450                sizeof(split_bands_const(i)[kBand0To8kHz][0]));
451   }
452 }
453 
SplitIntoFrequencyBands()454 void AudioBuffer::SplitIntoFrequencyBands() {
455   splitting_filter_->Analysis(data_.get(), split_data_.get());
456 }
457 
MergeFrequencyBands()458 void AudioBuffer::MergeFrequencyBands() {
459   splitting_filter_->Synthesis(split_data_.get(), data_.get());
460 }
461 
462 }  // namespace webrtc
463