1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "audio_processing_impl.h"
12 
13 #include <assert.h>
14 
15 #include "audio_buffer.h"
16 #include "critical_section_wrapper.h"
17 #include "echo_cancellation_impl.h"
18 #include "echo_control_mobile_impl.h"
19 #include "file_wrapper.h"
20 #include "high_pass_filter_impl.h"
21 #include "gain_control_impl.h"
22 #include "level_estimator_impl.h"
23 #include "module_common_types.h"
24 #include "noise_suppression_impl.h"
25 #include "processing_component.h"
26 #include "splitting_filter.h"
27 #include "voice_detection_impl.h"
28 
29 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
30 // Files generated at build-time by the protobuf compiler.
31 #ifdef WEBRTC_ANDROID
32 #include "external/webrtc/src/modules/audio_processing/debug.pb.h"
33 #else
34 #include "webrtc/audio_processing/debug.pb.h"
35 #endif
36 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
37 
38 namespace webrtc {
Create(int id)39 AudioProcessing* AudioProcessing::Create(int id) {
40 
41   AudioProcessingImpl* apm = new AudioProcessingImpl(id);
42   if (apm->Initialize() != kNoError) {
43     delete apm;
44     apm = NULL;
45   }
46 
47   return apm;
48 }
49 
Destroy(AudioProcessing * apm)50 void AudioProcessing::Destroy(AudioProcessing* apm) {
51   delete static_cast<AudioProcessingImpl*>(apm);
52 }
53 
AudioProcessingImpl(int id)54 AudioProcessingImpl::AudioProcessingImpl(int id)
55     : id_(id),
56       echo_cancellation_(NULL),
57       echo_control_mobile_(NULL),
58       gain_control_(NULL),
59       high_pass_filter_(NULL),
60       level_estimator_(NULL),
61       noise_suppression_(NULL),
62       voice_detection_(NULL),
63       crit_(CriticalSectionWrapper::CreateCriticalSection()),
64       render_audio_(NULL),
65       capture_audio_(NULL),
66 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
67       debug_file_(FileWrapper::Create()),
68       event_msg_(new audioproc::Event()),
69 #endif
70       sample_rate_hz_(kSampleRate16kHz),
71       split_sample_rate_hz_(kSampleRate16kHz),
72       samples_per_channel_(sample_rate_hz_ / 100),
73       stream_delay_ms_(0),
74       was_stream_delay_set_(false),
75       num_reverse_channels_(1),
76       num_input_channels_(1),
77       num_output_channels_(1) {
78 
79   echo_cancellation_ = new EchoCancellationImpl(this);
80   component_list_.push_back(echo_cancellation_);
81 
82   echo_control_mobile_ = new EchoControlMobileImpl(this);
83   component_list_.push_back(echo_control_mobile_);
84 
85   gain_control_ = new GainControlImpl(this);
86   component_list_.push_back(gain_control_);
87 
88   high_pass_filter_ = new HighPassFilterImpl(this);
89   component_list_.push_back(high_pass_filter_);
90 
91   level_estimator_ = new LevelEstimatorImpl(this);
92   component_list_.push_back(level_estimator_);
93 
94   noise_suppression_ = new NoiseSuppressionImpl(this);
95   component_list_.push_back(noise_suppression_);
96 
97   voice_detection_ = new VoiceDetectionImpl(this);
98   component_list_.push_back(voice_detection_);
99 }
100 
~AudioProcessingImpl()101 AudioProcessingImpl::~AudioProcessingImpl() {
102   while (!component_list_.empty()) {
103     ProcessingComponent* component = component_list_.front();
104     component->Destroy();
105     delete component;
106     component_list_.pop_front();
107   }
108 
109 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
110   if (debug_file_->Open()) {
111     debug_file_->CloseFile();
112   }
113 #endif
114 
115   delete crit_;
116   crit_ = NULL;
117 
118   if (render_audio_) {
119     delete render_audio_;
120     render_audio_ = NULL;
121   }
122 
123   if (capture_audio_) {
124     delete capture_audio_;
125     capture_audio_ = NULL;
126   }
127 }
128 
crit() const129 CriticalSectionWrapper* AudioProcessingImpl::crit() const {
130   return crit_;
131 }
132 
split_sample_rate_hz() const133 int AudioProcessingImpl::split_sample_rate_hz() const {
134   return split_sample_rate_hz_;
135 }
136 
Initialize()137 int AudioProcessingImpl::Initialize() {
138   CriticalSectionScoped crit_scoped(crit_);
139   return InitializeLocked();
140 }
141 
InitializeLocked()142 int AudioProcessingImpl::InitializeLocked() {
143   if (render_audio_ != NULL) {
144     delete render_audio_;
145     render_audio_ = NULL;
146   }
147 
148   if (capture_audio_ != NULL) {
149     delete capture_audio_;
150     capture_audio_ = NULL;
151   }
152 
153   render_audio_ = new AudioBuffer(num_reverse_channels_,
154                                   samples_per_channel_);
155   capture_audio_ = new AudioBuffer(num_input_channels_,
156                                    samples_per_channel_);
157 
158   was_stream_delay_set_ = false;
159 
160   // Initialize all components.
161   std::list<ProcessingComponent*>::iterator it;
162   for (it = component_list_.begin(); it != component_list_.end(); it++) {
163     int err = (*it)->Initialize();
164     if (err != kNoError) {
165       return err;
166     }
167   }
168 
169 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
170   if (debug_file_->Open()) {
171     int err = WriteInitMessage();
172     if (err != kNoError) {
173       return err;
174     }
175   }
176 #endif
177 
178   return kNoError;
179 }
180 
set_sample_rate_hz(int rate)181 int AudioProcessingImpl::set_sample_rate_hz(int rate) {
182   CriticalSectionScoped crit_scoped(crit_);
183   if (rate != kSampleRate8kHz &&
184       rate != kSampleRate16kHz &&
185       rate != kSampleRate32kHz) {
186     return kBadParameterError;
187   }
188 
189   sample_rate_hz_ = rate;
190   samples_per_channel_ = rate / 100;
191 
192   if (sample_rate_hz_ == kSampleRate32kHz) {
193     split_sample_rate_hz_ = kSampleRate16kHz;
194   } else {
195     split_sample_rate_hz_ = sample_rate_hz_;
196   }
197 
198   return InitializeLocked();
199 }
200 
sample_rate_hz() const201 int AudioProcessingImpl::sample_rate_hz() const {
202   return sample_rate_hz_;
203 }
204 
set_num_reverse_channels(int channels)205 int AudioProcessingImpl::set_num_reverse_channels(int channels) {
206   CriticalSectionScoped crit_scoped(crit_);
207   // Only stereo supported currently.
208   if (channels > 2 || channels < 1) {
209     return kBadParameterError;
210   }
211 
212   num_reverse_channels_ = channels;
213 
214   return InitializeLocked();
215 }
216 
num_reverse_channels() const217 int AudioProcessingImpl::num_reverse_channels() const {
218   return num_reverse_channels_;
219 }
220 
set_num_channels(int input_channels,int output_channels)221 int AudioProcessingImpl::set_num_channels(
222     int input_channels,
223     int output_channels) {
224   CriticalSectionScoped crit_scoped(crit_);
225   if (output_channels > input_channels) {
226     return kBadParameterError;
227   }
228 
229   // Only stereo supported currently.
230   if (input_channels > 2 || input_channels < 1) {
231     return kBadParameterError;
232   }
233 
234   if (output_channels > 2 || output_channels < 1) {
235     return kBadParameterError;
236   }
237 
238   num_input_channels_ = input_channels;
239   num_output_channels_ = output_channels;
240 
241   return InitializeLocked();
242 }
243 
num_input_channels() const244 int AudioProcessingImpl::num_input_channels() const {
245   return num_input_channels_;
246 }
247 
num_output_channels() const248 int AudioProcessingImpl::num_output_channels() const {
249   return num_output_channels_;
250 }
251 
ProcessStream(AudioFrame * frame)252 int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
253   CriticalSectionScoped crit_scoped(crit_);
254   int err = kNoError;
255 
256   if (frame == NULL) {
257     return kNullPointerError;
258   }
259 
260   if (frame->_frequencyInHz != sample_rate_hz_) {
261     return kBadSampleRateError;
262   }
263 
264   if (frame->_audioChannel != num_input_channels_) {
265     return kBadNumberChannelsError;
266   }
267 
268   if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
269     return kBadDataLengthError;
270   }
271 
272 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
273   if (debug_file_->Open()) {
274     event_msg_->set_type(audioproc::Event::STREAM);
275     audioproc::Stream* msg = event_msg_->mutable_stream();
276     const size_t data_size = sizeof(int16_t) *
277                              frame->_payloadDataLengthInSamples *
278                              frame->_audioChannel;
279     msg->set_input_data(frame->_payloadData, data_size);
280     msg->set_delay(stream_delay_ms_);
281     msg->set_drift(echo_cancellation_->stream_drift_samples());
282     msg->set_level(gain_control_->stream_analog_level());
283   }
284 #endif
285 
286   capture_audio_->DeinterleaveFrom(frame);
287 
288   // TODO(ajm): experiment with mixing and AEC placement.
289   if (num_output_channels_ < num_input_channels_) {
290     capture_audio_->Mix(num_output_channels_);
291     frame->_audioChannel = num_output_channels_;
292   }
293 
294   bool data_changed = stream_data_changed();
295   if (analysis_needed(data_changed)) {
296     for (int i = 0; i < num_output_channels_; i++) {
297       // Split into a low and high band.
298       SplittingFilterAnalysis(capture_audio_->data(i),
299                               capture_audio_->low_pass_split_data(i),
300                               capture_audio_->high_pass_split_data(i),
301                               capture_audio_->analysis_filter_state1(i),
302                               capture_audio_->analysis_filter_state2(i));
303     }
304   }
305 
306   err = high_pass_filter_->ProcessCaptureAudio(capture_audio_);
307   if (err != kNoError) {
308     return err;
309   }
310 
311   err = gain_control_->AnalyzeCaptureAudio(capture_audio_);
312   if (err != kNoError) {
313     return err;
314   }
315 
316   err = echo_cancellation_->ProcessCaptureAudio(capture_audio_);
317   if (err != kNoError) {
318     return err;
319   }
320 
321   if (echo_control_mobile_->is_enabled() &&
322       noise_suppression_->is_enabled()) {
323     capture_audio_->CopyLowPassToReference();
324   }
325 
326   err = noise_suppression_->ProcessCaptureAudio(capture_audio_);
327   if (err != kNoError) {
328     return err;
329   }
330 
331   err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_);
332   if (err != kNoError) {
333     return err;
334   }
335 
336   err = voice_detection_->ProcessCaptureAudio(capture_audio_);
337   if (err != kNoError) {
338     return err;
339   }
340 
341   err = gain_control_->ProcessCaptureAudio(capture_audio_);
342   if (err != kNoError) {
343     return err;
344   }
345 
346   if (synthesis_needed(data_changed)) {
347     for (int i = 0; i < num_output_channels_; i++) {
348       // Recombine low and high bands.
349       SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i),
350                                capture_audio_->high_pass_split_data(i),
351                                capture_audio_->data(i),
352                                capture_audio_->synthesis_filter_state1(i),
353                                capture_audio_->synthesis_filter_state2(i));
354     }
355   }
356 
357   // The level estimator operates on the recombined data.
358   err = level_estimator_->ProcessStream(capture_audio_);
359   if (err != kNoError) {
360     return err;
361   }
362 
363   capture_audio_->InterleaveTo(frame, data_changed);
364 
365 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
366   if (debug_file_->Open()) {
367     audioproc::Stream* msg = event_msg_->mutable_stream();
368     const size_t data_size = sizeof(int16_t) *
369                              frame->_payloadDataLengthInSamples *
370                              frame->_audioChannel;
371     msg->set_output_data(frame->_payloadData, data_size);
372     err = WriteMessageToDebugFile();
373     if (err != kNoError) {
374       return err;
375     }
376   }
377 #endif
378 
379   was_stream_delay_set_ = false;
380   return kNoError;
381 }
382 
AnalyzeReverseStream(AudioFrame * frame)383 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
384   CriticalSectionScoped crit_scoped(crit_);
385   int err = kNoError;
386 
387   if (frame == NULL) {
388     return kNullPointerError;
389   }
390 
391   if (frame->_frequencyInHz != sample_rate_hz_) {
392     return kBadSampleRateError;
393   }
394 
395   if (frame->_audioChannel != num_reverse_channels_) {
396     return kBadNumberChannelsError;
397   }
398 
399   if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
400     return kBadDataLengthError;
401   }
402 
403 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
404   if (debug_file_->Open()) {
405     event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
406     audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
407     const size_t data_size = sizeof(int16_t) *
408                              frame->_payloadDataLengthInSamples *
409                              frame->_audioChannel;
410     msg->set_data(frame->_payloadData, data_size);
411     err = WriteMessageToDebugFile();
412     if (err != kNoError) {
413       return err;
414     }
415   }
416 #endif
417 
418   render_audio_->DeinterleaveFrom(frame);
419 
420   // TODO(ajm): turn the splitting filter into a component?
421   if (sample_rate_hz_ == kSampleRate32kHz) {
422     for (int i = 0; i < num_reverse_channels_; i++) {
423       // Split into low and high band.
424       SplittingFilterAnalysis(render_audio_->data(i),
425                               render_audio_->low_pass_split_data(i),
426                               render_audio_->high_pass_split_data(i),
427                               render_audio_->analysis_filter_state1(i),
428                               render_audio_->analysis_filter_state2(i));
429     }
430   }
431 
432   // TODO(ajm): warnings possible from components?
433   err = echo_cancellation_->ProcessRenderAudio(render_audio_);
434   if (err != kNoError) {
435     return err;
436   }
437 
438   err = echo_control_mobile_->ProcessRenderAudio(render_audio_);
439   if (err != kNoError) {
440     return err;
441   }
442 
443   err = gain_control_->ProcessRenderAudio(render_audio_);
444   if (err != kNoError) {
445     return err;
446   }
447 
448   return err;  // TODO(ajm): this is for returning warnings; necessary?
449 }
450 
set_stream_delay_ms(int delay)451 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
452   was_stream_delay_set_ = true;
453   if (delay < 0) {
454     return kBadParameterError;
455   }
456 
457   // TODO(ajm): the max is rather arbitrarily chosen; investigate.
458   if (delay > 500) {
459     stream_delay_ms_ = 500;
460     return kBadStreamParameterWarning;
461   }
462 
463   stream_delay_ms_ = delay;
464   return kNoError;
465 }
466 
stream_delay_ms() const467 int AudioProcessingImpl::stream_delay_ms() const {
468   return stream_delay_ms_;
469 }
470 
was_stream_delay_set() const471 bool AudioProcessingImpl::was_stream_delay_set() const {
472   return was_stream_delay_set_;
473 }
474 
StartDebugRecording(const char filename[AudioProcessing::kMaxFilenameSize])475 int AudioProcessingImpl::StartDebugRecording(
476     const char filename[AudioProcessing::kMaxFilenameSize]) {
477   CriticalSectionScoped crit_scoped(crit_);
478   assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize);
479 
480   if (filename == NULL) {
481     return kNullPointerError;
482   }
483 
484 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
485   // Stop any ongoing recording.
486   if (debug_file_->Open()) {
487     if (debug_file_->CloseFile() == -1) {
488       return kFileError;
489     }
490   }
491 
492   if (debug_file_->OpenFile(filename, false) == -1) {
493     debug_file_->CloseFile();
494     return kFileError;
495   }
496 
497   int err = WriteInitMessage();
498   if (err != kNoError) {
499     return err;
500   }
501   return kNoError;
502 #else
503   return kUnsupportedFunctionError;
504 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
505 }
506 
StopDebugRecording()507 int AudioProcessingImpl::StopDebugRecording() {
508   CriticalSectionScoped crit_scoped(crit_);
509 
510 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
511   // We just return if recording hasn't started.
512   if (debug_file_->Open()) {
513     if (debug_file_->CloseFile() == -1) {
514       return kFileError;
515     }
516   }
517   return kNoError;
518 #else
519   return kUnsupportedFunctionError;
520 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
521 }
522 
echo_cancellation() const523 EchoCancellation* AudioProcessingImpl::echo_cancellation() const {
524   return echo_cancellation_;
525 }
526 
echo_control_mobile() const527 EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
528   return echo_control_mobile_;
529 }
530 
gain_control() const531 GainControl* AudioProcessingImpl::gain_control() const {
532   return gain_control_;
533 }
534 
high_pass_filter() const535 HighPassFilter* AudioProcessingImpl::high_pass_filter() const {
536   return high_pass_filter_;
537 }
538 
level_estimator() const539 LevelEstimator* AudioProcessingImpl::level_estimator() const {
540   return level_estimator_;
541 }
542 
noise_suppression() const543 NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
544   return noise_suppression_;
545 }
546 
voice_detection() const547 VoiceDetection* AudioProcessingImpl::voice_detection() const {
548   return voice_detection_;
549 }
550 
ChangeUniqueId(const WebRtc_Word32 id)551 WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) {
552   CriticalSectionScoped crit_scoped(crit_);
553   id_ = id;
554 
555   return kNoError;
556 }
557 
stream_data_changed() const558 bool AudioProcessingImpl::stream_data_changed() const {
559   int enabled_count = 0;
560   std::list<ProcessingComponent*>::const_iterator it;
561   for (it = component_list_.begin(); it != component_list_.end(); it++) {
562     if ((*it)->is_component_enabled()) {
563       enabled_count++;
564     }
565   }
566 
567   // Data is unchanged if no components are enabled, or if only level_estimator_
568   // or voice_detection_ is enabled.
569   if (enabled_count == 0) {
570     return false;
571   } else if (enabled_count == 1) {
572     if (level_estimator_->is_enabled() || voice_detection_->is_enabled()) {
573       return false;
574     }
575   } else if (enabled_count == 2) {
576     if (level_estimator_->is_enabled() && voice_detection_->is_enabled()) {
577       return false;
578     }
579   }
580   return true;
581 }
582 
synthesis_needed(bool stream_data_changed) const583 bool AudioProcessingImpl::synthesis_needed(bool stream_data_changed) const {
584   return (stream_data_changed && sample_rate_hz_ == kSampleRate32kHz);
585 }
586 
analysis_needed(bool stream_data_changed) const587 bool AudioProcessingImpl::analysis_needed(bool stream_data_changed) const {
588   if (!stream_data_changed && !voice_detection_->is_enabled()) {
589     // Only level_estimator_ is enabled.
590     return false;
591   } else if (sample_rate_hz_ == kSampleRate32kHz) {
592     // Something besides level_estimator_ is enabled, and we have super-wb.
593     return true;
594   }
595   return false;
596 }
597 
598 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
WriteMessageToDebugFile()599 int AudioProcessingImpl::WriteMessageToDebugFile() {
600   int32_t size = event_msg_->ByteSize();
601   if (size <= 0) {
602     return kUnspecifiedError;
603   }
604 #if defined(WEBRTC_BIG_ENDIAN)
605   // TODO(ajm): Use little-endian "on the wire". For the moment, we can be
606   //            pretty safe in assuming little-endian.
607 #endif
608 
609   if (!event_msg_->SerializeToString(&event_str_)) {
610     return kUnspecifiedError;
611   }
612 
613   // Write message preceded by its size.
614   if (!debug_file_->Write(&size, sizeof(int32_t))) {
615     return kFileError;
616   }
617   if (!debug_file_->Write(event_str_.data(), event_str_.length())) {
618     return kFileError;
619   }
620 
621   event_msg_->Clear();
622 
623   return 0;
624 }
625 
WriteInitMessage()626 int AudioProcessingImpl::WriteInitMessage() {
627   event_msg_->set_type(audioproc::Event::INIT);
628   audioproc::Init* msg = event_msg_->mutable_init();
629   msg->set_sample_rate(sample_rate_hz_);
630   msg->set_device_sample_rate(echo_cancellation_->device_sample_rate_hz());
631   msg->set_num_input_channels(num_input_channels_);
632   msg->set_num_output_channels(num_output_channels_);
633   msg->set_num_reverse_channels(num_reverse_channels_);
634 
635   int err = WriteMessageToDebugFile();
636   if (err != kNoError) {
637     return err;
638   }
639 
640   return kNoError;
641 }
642 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
643 }  // namespace webrtc
644