1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "audio_processing_impl.h"
12
13 #include <assert.h>
14
15 #include "audio_buffer.h"
16 #include "critical_section_wrapper.h"
17 #include "echo_cancellation_impl.h"
18 #include "echo_control_mobile_impl.h"
19 #include "file_wrapper.h"
20 #include "high_pass_filter_impl.h"
21 #include "gain_control_impl.h"
22 #include "level_estimator_impl.h"
23 #include "module_common_types.h"
24 #include "noise_suppression_impl.h"
25 #include "processing_component.h"
26 #include "splitting_filter.h"
27 #include "voice_detection_impl.h"
28
29 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
30 // Files generated at build-time by the protobuf compiler.
31 #ifdef WEBRTC_ANDROID
32 #include "external/webrtc/src/modules/audio_processing/debug.pb.h"
33 #else
34 #include "webrtc/audio_processing/debug.pb.h"
35 #endif
36 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
37
38 namespace webrtc {
Create(int id)39 AudioProcessing* AudioProcessing::Create(int id) {
40
41 AudioProcessingImpl* apm = new AudioProcessingImpl(id);
42 if (apm->Initialize() != kNoError) {
43 delete apm;
44 apm = NULL;
45 }
46
47 return apm;
48 }
49
Destroy(AudioProcessing * apm)50 void AudioProcessing::Destroy(AudioProcessing* apm) {
51 delete static_cast<AudioProcessingImpl*>(apm);
52 }
53
AudioProcessingImpl(int id)54 AudioProcessingImpl::AudioProcessingImpl(int id)
55 : id_(id),
56 echo_cancellation_(NULL),
57 echo_control_mobile_(NULL),
58 gain_control_(NULL),
59 high_pass_filter_(NULL),
60 level_estimator_(NULL),
61 noise_suppression_(NULL),
62 voice_detection_(NULL),
63 crit_(CriticalSectionWrapper::CreateCriticalSection()),
64 render_audio_(NULL),
65 capture_audio_(NULL),
66 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
67 debug_file_(FileWrapper::Create()),
68 event_msg_(new audioproc::Event()),
69 #endif
70 sample_rate_hz_(kSampleRate16kHz),
71 split_sample_rate_hz_(kSampleRate16kHz),
72 samples_per_channel_(sample_rate_hz_ / 100),
73 stream_delay_ms_(0),
74 was_stream_delay_set_(false),
75 num_reverse_channels_(1),
76 num_input_channels_(1),
77 num_output_channels_(1) {
78
79 echo_cancellation_ = new EchoCancellationImpl(this);
80 component_list_.push_back(echo_cancellation_);
81
82 echo_control_mobile_ = new EchoControlMobileImpl(this);
83 component_list_.push_back(echo_control_mobile_);
84
85 gain_control_ = new GainControlImpl(this);
86 component_list_.push_back(gain_control_);
87
88 high_pass_filter_ = new HighPassFilterImpl(this);
89 component_list_.push_back(high_pass_filter_);
90
91 level_estimator_ = new LevelEstimatorImpl(this);
92 component_list_.push_back(level_estimator_);
93
94 noise_suppression_ = new NoiseSuppressionImpl(this);
95 component_list_.push_back(noise_suppression_);
96
97 voice_detection_ = new VoiceDetectionImpl(this);
98 component_list_.push_back(voice_detection_);
99 }
100
~AudioProcessingImpl()101 AudioProcessingImpl::~AudioProcessingImpl() {
102 while (!component_list_.empty()) {
103 ProcessingComponent* component = component_list_.front();
104 component->Destroy();
105 delete component;
106 component_list_.pop_front();
107 }
108
109 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
110 if (debug_file_->Open()) {
111 debug_file_->CloseFile();
112 }
113 #endif
114
115 delete crit_;
116 crit_ = NULL;
117
118 if (render_audio_) {
119 delete render_audio_;
120 render_audio_ = NULL;
121 }
122
123 if (capture_audio_) {
124 delete capture_audio_;
125 capture_audio_ = NULL;
126 }
127 }
128
crit() const129 CriticalSectionWrapper* AudioProcessingImpl::crit() const {
130 return crit_;
131 }
132
split_sample_rate_hz() const133 int AudioProcessingImpl::split_sample_rate_hz() const {
134 return split_sample_rate_hz_;
135 }
136
Initialize()137 int AudioProcessingImpl::Initialize() {
138 CriticalSectionScoped crit_scoped(crit_);
139 return InitializeLocked();
140 }
141
InitializeLocked()142 int AudioProcessingImpl::InitializeLocked() {
143 if (render_audio_ != NULL) {
144 delete render_audio_;
145 render_audio_ = NULL;
146 }
147
148 if (capture_audio_ != NULL) {
149 delete capture_audio_;
150 capture_audio_ = NULL;
151 }
152
153 render_audio_ = new AudioBuffer(num_reverse_channels_,
154 samples_per_channel_);
155 capture_audio_ = new AudioBuffer(num_input_channels_,
156 samples_per_channel_);
157
158 was_stream_delay_set_ = false;
159
160 // Initialize all components.
161 std::list<ProcessingComponent*>::iterator it;
162 for (it = component_list_.begin(); it != component_list_.end(); it++) {
163 int err = (*it)->Initialize();
164 if (err != kNoError) {
165 return err;
166 }
167 }
168
169 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
170 if (debug_file_->Open()) {
171 int err = WriteInitMessage();
172 if (err != kNoError) {
173 return err;
174 }
175 }
176 #endif
177
178 return kNoError;
179 }
180
set_sample_rate_hz(int rate)181 int AudioProcessingImpl::set_sample_rate_hz(int rate) {
182 CriticalSectionScoped crit_scoped(crit_);
183 if (rate != kSampleRate8kHz &&
184 rate != kSampleRate16kHz &&
185 rate != kSampleRate32kHz) {
186 return kBadParameterError;
187 }
188
189 sample_rate_hz_ = rate;
190 samples_per_channel_ = rate / 100;
191
192 if (sample_rate_hz_ == kSampleRate32kHz) {
193 split_sample_rate_hz_ = kSampleRate16kHz;
194 } else {
195 split_sample_rate_hz_ = sample_rate_hz_;
196 }
197
198 return InitializeLocked();
199 }
200
sample_rate_hz() const201 int AudioProcessingImpl::sample_rate_hz() const {
202 return sample_rate_hz_;
203 }
204
set_num_reverse_channels(int channels)205 int AudioProcessingImpl::set_num_reverse_channels(int channels) {
206 CriticalSectionScoped crit_scoped(crit_);
207 // Only stereo supported currently.
208 if (channels > 2 || channels < 1) {
209 return kBadParameterError;
210 }
211
212 num_reverse_channels_ = channels;
213
214 return InitializeLocked();
215 }
216
num_reverse_channels() const217 int AudioProcessingImpl::num_reverse_channels() const {
218 return num_reverse_channels_;
219 }
220
set_num_channels(int input_channels,int output_channels)221 int AudioProcessingImpl::set_num_channels(
222 int input_channels,
223 int output_channels) {
224 CriticalSectionScoped crit_scoped(crit_);
225 if (output_channels > input_channels) {
226 return kBadParameterError;
227 }
228
229 // Only stereo supported currently.
230 if (input_channels > 2 || input_channels < 1) {
231 return kBadParameterError;
232 }
233
234 if (output_channels > 2 || output_channels < 1) {
235 return kBadParameterError;
236 }
237
238 num_input_channels_ = input_channels;
239 num_output_channels_ = output_channels;
240
241 return InitializeLocked();
242 }
243
num_input_channels() const244 int AudioProcessingImpl::num_input_channels() const {
245 return num_input_channels_;
246 }
247
num_output_channels() const248 int AudioProcessingImpl::num_output_channels() const {
249 return num_output_channels_;
250 }
251
ProcessStream(AudioFrame * frame)252 int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
253 CriticalSectionScoped crit_scoped(crit_);
254 int err = kNoError;
255
256 if (frame == NULL) {
257 return kNullPointerError;
258 }
259
260 if (frame->_frequencyInHz != sample_rate_hz_) {
261 return kBadSampleRateError;
262 }
263
264 if (frame->_audioChannel != num_input_channels_) {
265 return kBadNumberChannelsError;
266 }
267
268 if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
269 return kBadDataLengthError;
270 }
271
272 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
273 if (debug_file_->Open()) {
274 event_msg_->set_type(audioproc::Event::STREAM);
275 audioproc::Stream* msg = event_msg_->mutable_stream();
276 const size_t data_size = sizeof(int16_t) *
277 frame->_payloadDataLengthInSamples *
278 frame->_audioChannel;
279 msg->set_input_data(frame->_payloadData, data_size);
280 msg->set_delay(stream_delay_ms_);
281 msg->set_drift(echo_cancellation_->stream_drift_samples());
282 msg->set_level(gain_control_->stream_analog_level());
283 }
284 #endif
285
286 capture_audio_->DeinterleaveFrom(frame);
287
288 // TODO(ajm): experiment with mixing and AEC placement.
289 if (num_output_channels_ < num_input_channels_) {
290 capture_audio_->Mix(num_output_channels_);
291 frame->_audioChannel = num_output_channels_;
292 }
293
294 bool data_changed = stream_data_changed();
295 if (analysis_needed(data_changed)) {
296 for (int i = 0; i < num_output_channels_; i++) {
297 // Split into a low and high band.
298 SplittingFilterAnalysis(capture_audio_->data(i),
299 capture_audio_->low_pass_split_data(i),
300 capture_audio_->high_pass_split_data(i),
301 capture_audio_->analysis_filter_state1(i),
302 capture_audio_->analysis_filter_state2(i));
303 }
304 }
305
306 err = high_pass_filter_->ProcessCaptureAudio(capture_audio_);
307 if (err != kNoError) {
308 return err;
309 }
310
311 err = gain_control_->AnalyzeCaptureAudio(capture_audio_);
312 if (err != kNoError) {
313 return err;
314 }
315
316 err = echo_cancellation_->ProcessCaptureAudio(capture_audio_);
317 if (err != kNoError) {
318 return err;
319 }
320
321 if (echo_control_mobile_->is_enabled() &&
322 noise_suppression_->is_enabled()) {
323 capture_audio_->CopyLowPassToReference();
324 }
325
326 err = noise_suppression_->ProcessCaptureAudio(capture_audio_);
327 if (err != kNoError) {
328 return err;
329 }
330
331 err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_);
332 if (err != kNoError) {
333 return err;
334 }
335
336 err = voice_detection_->ProcessCaptureAudio(capture_audio_);
337 if (err != kNoError) {
338 return err;
339 }
340
341 err = gain_control_->ProcessCaptureAudio(capture_audio_);
342 if (err != kNoError) {
343 return err;
344 }
345
346 if (synthesis_needed(data_changed)) {
347 for (int i = 0; i < num_output_channels_; i++) {
348 // Recombine low and high bands.
349 SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i),
350 capture_audio_->high_pass_split_data(i),
351 capture_audio_->data(i),
352 capture_audio_->synthesis_filter_state1(i),
353 capture_audio_->synthesis_filter_state2(i));
354 }
355 }
356
357 // The level estimator operates on the recombined data.
358 err = level_estimator_->ProcessStream(capture_audio_);
359 if (err != kNoError) {
360 return err;
361 }
362
363 capture_audio_->InterleaveTo(frame, data_changed);
364
365 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
366 if (debug_file_->Open()) {
367 audioproc::Stream* msg = event_msg_->mutable_stream();
368 const size_t data_size = sizeof(int16_t) *
369 frame->_payloadDataLengthInSamples *
370 frame->_audioChannel;
371 msg->set_output_data(frame->_payloadData, data_size);
372 err = WriteMessageToDebugFile();
373 if (err != kNoError) {
374 return err;
375 }
376 }
377 #endif
378
379 was_stream_delay_set_ = false;
380 return kNoError;
381 }
382
AnalyzeReverseStream(AudioFrame * frame)383 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
384 CriticalSectionScoped crit_scoped(crit_);
385 int err = kNoError;
386
387 if (frame == NULL) {
388 return kNullPointerError;
389 }
390
391 if (frame->_frequencyInHz != sample_rate_hz_) {
392 return kBadSampleRateError;
393 }
394
395 if (frame->_audioChannel != num_reverse_channels_) {
396 return kBadNumberChannelsError;
397 }
398
399 if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
400 return kBadDataLengthError;
401 }
402
403 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
404 if (debug_file_->Open()) {
405 event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
406 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
407 const size_t data_size = sizeof(int16_t) *
408 frame->_payloadDataLengthInSamples *
409 frame->_audioChannel;
410 msg->set_data(frame->_payloadData, data_size);
411 err = WriteMessageToDebugFile();
412 if (err != kNoError) {
413 return err;
414 }
415 }
416 #endif
417
418 render_audio_->DeinterleaveFrom(frame);
419
420 // TODO(ajm): turn the splitting filter into a component?
421 if (sample_rate_hz_ == kSampleRate32kHz) {
422 for (int i = 0; i < num_reverse_channels_; i++) {
423 // Split into low and high band.
424 SplittingFilterAnalysis(render_audio_->data(i),
425 render_audio_->low_pass_split_data(i),
426 render_audio_->high_pass_split_data(i),
427 render_audio_->analysis_filter_state1(i),
428 render_audio_->analysis_filter_state2(i));
429 }
430 }
431
432 // TODO(ajm): warnings possible from components?
433 err = echo_cancellation_->ProcessRenderAudio(render_audio_);
434 if (err != kNoError) {
435 return err;
436 }
437
438 err = echo_control_mobile_->ProcessRenderAudio(render_audio_);
439 if (err != kNoError) {
440 return err;
441 }
442
443 err = gain_control_->ProcessRenderAudio(render_audio_);
444 if (err != kNoError) {
445 return err;
446 }
447
448 return err; // TODO(ajm): this is for returning warnings; necessary?
449 }
450
set_stream_delay_ms(int delay)451 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
452 was_stream_delay_set_ = true;
453 if (delay < 0) {
454 return kBadParameterError;
455 }
456
457 // TODO(ajm): the max is rather arbitrarily chosen; investigate.
458 if (delay > 500) {
459 stream_delay_ms_ = 500;
460 return kBadStreamParameterWarning;
461 }
462
463 stream_delay_ms_ = delay;
464 return kNoError;
465 }
466
stream_delay_ms() const467 int AudioProcessingImpl::stream_delay_ms() const {
468 return stream_delay_ms_;
469 }
470
was_stream_delay_set() const471 bool AudioProcessingImpl::was_stream_delay_set() const {
472 return was_stream_delay_set_;
473 }
474
StartDebugRecording(const char filename[AudioProcessing::kMaxFilenameSize])475 int AudioProcessingImpl::StartDebugRecording(
476 const char filename[AudioProcessing::kMaxFilenameSize]) {
477 CriticalSectionScoped crit_scoped(crit_);
478 assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize);
479
480 if (filename == NULL) {
481 return kNullPointerError;
482 }
483
484 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
485 // Stop any ongoing recording.
486 if (debug_file_->Open()) {
487 if (debug_file_->CloseFile() == -1) {
488 return kFileError;
489 }
490 }
491
492 if (debug_file_->OpenFile(filename, false) == -1) {
493 debug_file_->CloseFile();
494 return kFileError;
495 }
496
497 int err = WriteInitMessage();
498 if (err != kNoError) {
499 return err;
500 }
501 return kNoError;
502 #else
503 return kUnsupportedFunctionError;
504 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
505 }
506
StopDebugRecording()507 int AudioProcessingImpl::StopDebugRecording() {
508 CriticalSectionScoped crit_scoped(crit_);
509
510 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
511 // We just return if recording hasn't started.
512 if (debug_file_->Open()) {
513 if (debug_file_->CloseFile() == -1) {
514 return kFileError;
515 }
516 }
517 return kNoError;
518 #else
519 return kUnsupportedFunctionError;
520 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
521 }
522
echo_cancellation() const523 EchoCancellation* AudioProcessingImpl::echo_cancellation() const {
524 return echo_cancellation_;
525 }
526
echo_control_mobile() const527 EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
528 return echo_control_mobile_;
529 }
530
gain_control() const531 GainControl* AudioProcessingImpl::gain_control() const {
532 return gain_control_;
533 }
534
high_pass_filter() const535 HighPassFilter* AudioProcessingImpl::high_pass_filter() const {
536 return high_pass_filter_;
537 }
538
level_estimator() const539 LevelEstimator* AudioProcessingImpl::level_estimator() const {
540 return level_estimator_;
541 }
542
noise_suppression() const543 NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
544 return noise_suppression_;
545 }
546
voice_detection() const547 VoiceDetection* AudioProcessingImpl::voice_detection() const {
548 return voice_detection_;
549 }
550
ChangeUniqueId(const WebRtc_Word32 id)551 WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) {
552 CriticalSectionScoped crit_scoped(crit_);
553 id_ = id;
554
555 return kNoError;
556 }
557
stream_data_changed() const558 bool AudioProcessingImpl::stream_data_changed() const {
559 int enabled_count = 0;
560 std::list<ProcessingComponent*>::const_iterator it;
561 for (it = component_list_.begin(); it != component_list_.end(); it++) {
562 if ((*it)->is_component_enabled()) {
563 enabled_count++;
564 }
565 }
566
567 // Data is unchanged if no components are enabled, or if only level_estimator_
568 // or voice_detection_ is enabled.
569 if (enabled_count == 0) {
570 return false;
571 } else if (enabled_count == 1) {
572 if (level_estimator_->is_enabled() || voice_detection_->is_enabled()) {
573 return false;
574 }
575 } else if (enabled_count == 2) {
576 if (level_estimator_->is_enabled() && voice_detection_->is_enabled()) {
577 return false;
578 }
579 }
580 return true;
581 }
582
synthesis_needed(bool stream_data_changed) const583 bool AudioProcessingImpl::synthesis_needed(bool stream_data_changed) const {
584 return (stream_data_changed && sample_rate_hz_ == kSampleRate32kHz);
585 }
586
analysis_needed(bool stream_data_changed) const587 bool AudioProcessingImpl::analysis_needed(bool stream_data_changed) const {
588 if (!stream_data_changed && !voice_detection_->is_enabled()) {
589 // Only level_estimator_ is enabled.
590 return false;
591 } else if (sample_rate_hz_ == kSampleRate32kHz) {
592 // Something besides level_estimator_ is enabled, and we have super-wb.
593 return true;
594 }
595 return false;
596 }
597
598 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
WriteMessageToDebugFile()599 int AudioProcessingImpl::WriteMessageToDebugFile() {
600 int32_t size = event_msg_->ByteSize();
601 if (size <= 0) {
602 return kUnspecifiedError;
603 }
604 #if defined(WEBRTC_BIG_ENDIAN)
605 // TODO(ajm): Use little-endian "on the wire". For the moment, we can be
606 // pretty safe in assuming little-endian.
607 #endif
608
609 if (!event_msg_->SerializeToString(&event_str_)) {
610 return kUnspecifiedError;
611 }
612
613 // Write message preceded by its size.
614 if (!debug_file_->Write(&size, sizeof(int32_t))) {
615 return kFileError;
616 }
617 if (!debug_file_->Write(event_str_.data(), event_str_.length())) {
618 return kFileError;
619 }
620
621 event_msg_->Clear();
622
623 return 0;
624 }
625
WriteInitMessage()626 int AudioProcessingImpl::WriteInitMessage() {
627 event_msg_->set_type(audioproc::Event::INIT);
628 audioproc::Init* msg = event_msg_->mutable_init();
629 msg->set_sample_rate(sample_rate_hz_);
630 msg->set_device_sample_rate(echo_cancellation_->device_sample_rate_hz());
631 msg->set_num_input_channels(num_input_channels_);
632 msg->set_num_output_channels(num_output_channels_);
633 msg->set_num_reverse_channels(num_reverse_channels_);
634
635 int err = WriteMessageToDebugFile();
636 if (err != kNoError) {
637 return err;
638 }
639
640 return kNoError;
641 }
642 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
643 } // namespace webrtc
644