1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * Specifies the interface for the AEC core.
13  */
14 
15 #ifndef MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
16 #define MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
17 
18 #include <stddef.h>
19 
20 #include <memory>
21 
22 extern "C" {
23 #include "common_audio/ring_buffer.h"
24 }
25 #include "common_audio/wav_file.h"
26 #include "modules/audio_processing/aec/aec_common.h"
27 #include "modules/audio_processing/utility/block_mean_calculator.h"
28 #include "modules/audio_processing/utility/ooura_fft.h"
29 #include "rtc_base/constructormagic.h"
30 #include "typedefs.h"  // NOLINT(build/include)
31 
32 namespace webrtc {
33 
34 #define FRAME_LEN 80
35 #define PART_LEN 64               // Length of partition
36 #define PART_LEN1 (PART_LEN + 1)  // Unique fft coefficients
37 #define PART_LEN2 (PART_LEN * 2)  // Length of partition * 2
38 #define NUM_HIGH_BANDS_MAX 2      // Max number of high bands
39 
40 class ApmDataDumper;
41 
42 typedef float complex_t[2];
43 // For performance reasons, some arrays of complex numbers are replaced by twice
44 // as long arrays of float, all the real parts followed by all the imaginary
45 // ones (complex_t[SIZE] -> float[2][SIZE]). This allows SIMD optimizations and
46 // is better than two arrays (one for the real parts and one for the imaginary
47 // parts) as this other way would require two pointers instead of one and cause
48 // extra register spilling. This also allows the offsets to be calculated at
49 // compile time.
50 
51 // Metrics
52 enum { kOffsetLevel = -100 };
53 
54 typedef struct Stats {
55   float instant;
56   float average;
57   float min;
58   float max;
59   float sum;
60   float hisum;
61   float himean;
62   size_t counter;
63   size_t hicounter;
64 } Stats;
65 
66 // Number of partitions for the extended filter mode. The first one is an enum
67 // to be used in array declarations, as it represents the maximum filter length.
68 enum { kExtendedNumPartitions = 32 };
69 static const int kNormalNumPartitions = 12;
70 
71 // Delay estimator constants, used for logging and delay compensation if
72 // if reported delays are disabled.
73 enum { kLookaheadBlocks = 15 };
74 enum {
75   // 500 ms for 16 kHz which is equivalent with the limit of reported delays.
76   kHistorySizeBlocks = 125
77 };
78 
79 typedef struct PowerLevel {
80   PowerLevel();
81 
82   BlockMeanCalculator framelevel;
83   BlockMeanCalculator averagelevel;
84   float minlevel;
85 } PowerLevel;
86 
87 class BlockBuffer {
88  public:
89   BlockBuffer();
90   ~BlockBuffer();
91   void ReInit();
92   void Insert(const float block[PART_LEN]);
93   void ExtractExtendedBlock(float extended_block[PART_LEN]);
94   int AdjustSize(int buffer_size_decrease);
95   size_t Size();
96   size_t AvaliableSpace();
97 
98  private:
99   RingBuffer* buffer_;
100 };
101 
102 class DivergentFilterFraction {
103  public:
104   DivergentFilterFraction();
105 
106   // Reset.
107   void Reset();
108 
109   void AddObservation(const PowerLevel& nearlevel,
110                       const PowerLevel& linoutlevel,
111                       const PowerLevel& nlpoutlevel);
112 
113   // Return the latest fraction.
114   float GetLatestFraction() const;
115 
116  private:
117   // Clear all values added.
118   void Clear();
119 
120   size_t count_;
121   size_t occurrence_;
122   float fraction_;
123 
124   RTC_DISALLOW_COPY_AND_ASSIGN(DivergentFilterFraction);
125 };
126 
127 typedef struct CoherenceState {
128   complex_t sde[PART_LEN1];  // cross-psd of nearend and error
129   complex_t sxd[PART_LEN1];  // cross-psd of farend and nearend
130   float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1];  // far, near, error psd
131 } CoherenceState;
132 
133 struct AecCore {
134   explicit AecCore(int instance_index);
135   ~AecCore();
136 
137   std::unique_ptr<ApmDataDumper> data_dumper;
138   const OouraFft ooura_fft;
139 
140   CoherenceState coherence_state;
141 
142   int farBufWritePos, farBufReadPos;
143 
144   int knownDelay;
145   int inSamples, outSamples;
146   int delayEstCtr;
147 
148   // Nearend buffer used for changing from FRAME_LEN to PART_LEN sample block
149   // sizes. The buffer stores all the incoming bands and for each band a maximum
150   // of PART_LEN - (FRAME_LEN - PART_LEN) values need to be buffered in order to
151   // change the block size from FRAME_LEN to PART_LEN.
152   float nearend_buffer[NUM_HIGH_BANDS_MAX + 1]
153                       [PART_LEN - (FRAME_LEN - PART_LEN)];
154   size_t nearend_buffer_size;
155   float output_buffer[NUM_HIGH_BANDS_MAX + 1][2 * PART_LEN];
156   size_t output_buffer_size;
157 
158   float eBuf[PART_LEN2];  // error
159 
160   float previous_nearend_block[NUM_HIGH_BANDS_MAX + 1][PART_LEN];
161 
162   float xPow[PART_LEN1];
163   float dPow[PART_LEN1];
164   float dMinPow[PART_LEN1];
165   float dInitMinPow[PART_LEN1];
166   float* noisePow;
167 
168   float xfBuf[2][kExtendedNumPartitions * PART_LEN1];  // farend fft buffer
169   float wfBuf[2][kExtendedNumPartitions * PART_LEN1];  // filter fft
170   // Farend windowed fft buffer.
171   complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1];
172 
173   float hNs[PART_LEN1];
174   float hNlFbMin, hNlFbLocalMin;
175   float hNlXdAvgMin;
176   int hNlNewMin, hNlMinCtr;
177   float overDrive;
178   float overdrive_scaling;
179   int nlp_mode;
180   float outBuf[PART_LEN];
181   int delayIdx;
182 
183   short stNearState, echoState;
184   short divergeState;
185 
186   int xfBufBlockPos;
187 
188   BlockBuffer farend_block_buffer_;
189 
190   int system_delay;  // Current system delay buffered in AEC.
191 
192   int mult;  // sampling frequency multiple
193   int sampFreq = 16000;
194   size_t num_bands;
195   uint32_t seed;
196 
197   float filter_step_size;  // stepsize
198   float error_threshold;   // error threshold
199 
200   int noiseEstCtr;
201 
202   PowerLevel farlevel;
203   PowerLevel nearlevel;
204   PowerLevel linoutlevel;
205   PowerLevel nlpoutlevel;
206 
207   int metricsMode;
208   int stateCounter;
209   Stats erl;
210   Stats erle;
211   Stats aNlp;
212   Stats rerl;
213   DivergentFilterFraction divergent_filter_fraction;
214 
215   // Quantities to control H band scaling for SWB input
216   int freq_avg_ic;       // initial bin for averaging nlp gain
217   int flag_Hband_cn;     // for comfort noise
218   float cn_scale_Hband;  // scale for comfort noise in H band
219 
220   int delay_metrics_delivered;
221   int delay_histogram[kHistorySizeBlocks];
222   int num_delay_values;
223   int delay_median;
224   int delay_std;
225   float fraction_poor_delays;
226   int delay_logging_enabled;
227   void* delay_estimator_farend;
228   void* delay_estimator;
229   // Variables associated with delay correction through signal based delay
230   // estimation feedback.
231   int previous_delay;
232   int delay_correction_count;
233   int shift_offset;
234   float delay_quality_threshold;
235   int frame_count;
236 
237   // 0 = delay agnostic mode (signal based delay correction) disabled.
238   // Otherwise enabled.
239   int delay_agnostic_enabled;
240   // 1 = extended filter mode enabled, 0 = disabled.
241   int extended_filter_enabled;
242   // 1 = refined filter adaptation aec mode enabled, 0 = disabled.
243   bool refined_adaptive_filter_enabled;
244 
245   // Runtime selection of number of filter partitions.
246   int num_partitions;
247 
248   // Flag that extreme filter divergence has been detected by the Echo
249   // Suppressor.
250   int extreme_filter_divergence;
251 };
252 
253 AecCore* WebRtcAec_CreateAec(int instance_count);  // Returns NULL on error.
254 void WebRtcAec_FreeAec(AecCore* aec);
255 int WebRtcAec_InitAec(AecCore* aec, int sampFreq);
256 void WebRtcAec_InitAec_SSE2(void);
257 #if defined(MIPS_FPU_LE)
258 void WebRtcAec_InitAec_mips(void);
259 #endif
260 #if defined(WEBRTC_HAS_NEON)
261 void WebRtcAec_InitAec_neon(void);
262 #endif
263 
264 void WebRtcAec_BufferFarendBlock(AecCore* aec, const float* farend);
265 void WebRtcAec_ProcessFrames(AecCore* aec,
266                              const float* const* nearend,
267                              size_t num_bands,
268                              size_t num_samples,
269                              int knownDelay,
270                              float* const* out);
271 
272 // A helper function to call adjust the farend buffer size.
273 // Returns the number of elements the size was decreased with, and adjusts
274 // |system_delay| by the corresponding amount in ms.
275 int WebRtcAec_AdjustFarendBufferSizeAndSystemDelay(AecCore* aec,
276                                                    int size_decrease);
277 
278 // Calculates the median, standard deviation and amount of poor values among the
279 // delay estimates aggregated up to the first call to the function. After that
280 // first call the metrics are aggregated and updated every second. With poor
281 // values we mean values that most likely will cause the AEC to perform poorly.
282 // TODO(bjornv): Consider changing tests and tools to handle constant
283 // constant aggregation window throughout the session instead.
284 int WebRtcAec_GetDelayMetricsCore(AecCore* self,
285                                   int* median,
286                                   int* std,
287                                   float* fraction_poor_delays);
288 
289 // Returns the echo state (1: echo, 0: no echo).
290 int WebRtcAec_echo_state(AecCore* self);
291 
292 // Gets statistics of the echo metrics ERL, ERLE, A_NLP.
293 void WebRtcAec_GetEchoStats(AecCore* self,
294                             Stats* erl,
295                             Stats* erle,
296                             Stats* a_nlp,
297                             float* divergent_filter_fraction);
298 
299 // Sets local configuration modes.
300 void WebRtcAec_SetConfigCore(AecCore* self,
301                              int nlp_mode,
302                              int metrics_mode,
303                              int delay_logging);
304 
305 // Non-zero enables, zero disables.
306 void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable);
307 
308 // Returns non-zero if delay agnostic (i.e., signal based delay estimation) is
309 // enabled and zero if disabled.
310 int WebRtcAec_delay_agnostic_enabled(AecCore* self);
311 
312 // Turns on/off the refined adaptive filter feature.
313 void WebRtcAec_enable_refined_adaptive_filter(AecCore* self, bool enable);
314 
315 // Returns whether the refined adaptive filter is enabled.
316 bool WebRtcAec_refined_adaptive_filter(const AecCore* self);
317 
318 // Enables or disables extended filter mode. Non-zero enables, zero disables.
319 void WebRtcAec_enable_extended_filter(AecCore* self, int enable);
320 
321 // Returns non-zero if extended filter mode is enabled and zero if disabled.
322 int WebRtcAec_extended_filter_enabled(AecCore* self);
323 
324 // Returns the current |system_delay|, i.e., the buffered difference between
325 // far-end and near-end.
326 int WebRtcAec_system_delay(AecCore* self);
327 
328 // Sets the |system_delay| to |value|.  Note that if the value is changed
329 // improperly, there can be a performance regression.  So it should be used with
330 // care.
331 void WebRtcAec_SetSystemDelay(AecCore* self, int delay);
332 
333 }  // namespace webrtc
334 
335 #endif  // MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
336