1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
12 #define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
13 
14 #include <deque>
15 #include <set>
16 
17 #include "webrtc/base/scoped_ptr.h"
18 #include "webrtc/test/testsupport/gtest_prod_util.h"
19 #include "webrtc/typedefs.h"
20 
21 namespace webrtc {
22 
23 class TransientDetector;
24 
25 // Detects transients in an audio stream and suppress them using a simple
26 // restoration algorithm that attenuates unexpected spikes in the spectrum.
27 class TransientSuppressor {
28  public:
29   TransientSuppressor();
30   ~TransientSuppressor();
31 
32   int Initialize(int sample_rate_hz, int detector_rate_hz, int num_channels);
33 
34   // Processes a |data| chunk, and returns it with keystrokes suppressed from
35   // it. The float format is assumed to be int16 ranged. If there are more than
36   // one channel, the chunks are concatenated one after the other in |data|.
37   // |data_length| must be equal to |data_length_|.
38   // |num_channels| must be equal to |num_channels_|.
39   // A sub-band, ideally the higher, can be used as |detection_data|. If it is
40   // NULL, |data| is used for the detection too. The |detection_data| is always
41   // assumed mono.
42   // If a reference signal (e.g. keyboard microphone) is available, it can be
43   // passed in as |reference_data|. It is assumed mono and must have the same
44   // length as |data|. NULL is accepted if unavailable.
45   // This suppressor performs better if voice information is available.
46   // |voice_probability| is the probability of voice being present in this chunk
47   // of audio. If voice information is not available, |voice_probability| must
48   // always be set to 1.
49   // |key_pressed| determines if a key was pressed on this audio chunk.
50   // Returns 0 on success and -1 otherwise.
51   int Suppress(float* data,
52                size_t data_length,
53                int num_channels,
54                const float* detection_data,
55                size_t detection_length,
56                const float* reference_data,
57                size_t reference_length,
58                float voice_probability,
59                bool key_pressed);
60 
61  private:
62   FRIEND_TEST_ALL_PREFIXES(TransientSuppressorTest,
63                            TypingDetectionLogicWorksAsExpectedForMono);
64   void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr);
65 
66   void UpdateKeypress(bool key_pressed);
67   void UpdateRestoration(float voice_probability);
68 
69   void UpdateBuffers(float* data);
70 
71   void HardRestoration(float* spectral_mean);
72   void SoftRestoration(float* spectral_mean);
73 
74   rtc::scoped_ptr<TransientDetector> detector_;
75 
76   size_t data_length_;
77   size_t detection_length_;
78   size_t analysis_length_;
79   size_t buffer_delay_;
80   size_t complex_analysis_length_;
81   int num_channels_;
82   // Input buffer where the original samples are stored.
83   rtc::scoped_ptr<float[]> in_buffer_;
84   rtc::scoped_ptr<float[]> detection_buffer_;
85   // Output buffer where the restored samples are stored.
86   rtc::scoped_ptr<float[]> out_buffer_;
87 
88   // Arrays for fft.
89   rtc::scoped_ptr<size_t[]> ip_;
90   rtc::scoped_ptr<float[]> wfft_;
91 
92   rtc::scoped_ptr<float[]> spectral_mean_;
93 
94   // Stores the data for the fft.
95   rtc::scoped_ptr<float[]> fft_buffer_;
96 
97   rtc::scoped_ptr<float[]> magnitudes_;
98 
99   const float* window_;
100 
101   rtc::scoped_ptr<float[]> mean_factor_;
102 
103   float detector_smoothed_;
104 
105   int keypress_counter_;
106   int chunks_since_keypress_;
107   bool detection_enabled_;
108   bool suppression_enabled_;
109 
110   bool use_hard_restoration_;
111   int chunks_since_voice_change_;
112 
113   uint32_t seed_;
114 
115   bool using_reference_;
116 };
117 
118 }  // namespace webrtc
119 
120 #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
121