1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * Specifies the interface for the AEC core.
13  */
14 
15 #ifndef MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
16 #define MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
17 
18 #include <stddef.h>
19 
20 #include <memory>
21 
22 extern "C" {
23 #include "common_audio/ring_buffer.h"
24 }
25 #include "modules/audio_processing/aec/aec_common.h"
26 #include "modules/audio_processing/utility/block_mean_calculator.h"
27 #include "modules/audio_processing/utility/ooura_fft.h"
28 #include "rtc_base/constructormagic.h"
29 
30 namespace webrtc {
31 
32 #define FRAME_LEN 80
33 #define PART_LEN 64               // Length of partition
34 #define PART_LEN1 (PART_LEN + 1)  // Unique fft coefficients
35 #define PART_LEN2 (PART_LEN * 2)  // Length of partition * 2
36 #define NUM_HIGH_BANDS_MAX 2      // Max number of high bands
37 
38 class ApmDataDumper;
39 
40 typedef float complex_t[2];
41 // For performance reasons, some arrays of complex numbers are replaced by twice
42 // as long arrays of float, all the real parts followed by all the imaginary
43 // ones (complex_t[SIZE] -> float[2][SIZE]). This allows SIMD optimizations and
44 // is better than two arrays (one for the real parts and one for the imaginary
45 // parts) as this other way would require two pointers instead of one and cause
46 // extra register spilling. This also allows the offsets to be calculated at
47 // compile time.
48 
49 // Metrics
50 enum { kOffsetLevel = -100 };
51 
52 typedef struct Stats {
53   float instant;
54   float average;
55   float min;
56   float max;
57   float sum;
58   float hisum;
59   float himean;
60   size_t counter;
61   size_t hicounter;
62 } Stats;
63 
64 // Number of partitions for the extended filter mode. The first one is an enum
65 // to be used in array declarations, as it represents the maximum filter length.
66 enum { kExtendedNumPartitions = 32 };
67 static const int kNormalNumPartitions = 12;
68 
69 // Delay estimator constants, used for logging and delay compensation if
70 // if reported delays are disabled.
71 enum { kLookaheadBlocks = 15 };
72 enum {
73   // 500 ms for 16 kHz which is equivalent with the limit of reported delays.
74   kHistorySizeBlocks = 125
75 };
76 
77 typedef struct PowerLevel {
78   PowerLevel();
79 
80   BlockMeanCalculator framelevel;
81   BlockMeanCalculator averagelevel;
82   float minlevel;
83 } PowerLevel;
84 
85 class BlockBuffer {
86  public:
87   BlockBuffer();
88   ~BlockBuffer();
89   void ReInit();
90   void Insert(const float block[PART_LEN]);
91   void ExtractExtendedBlock(float extended_block[PART_LEN]);
92   int AdjustSize(int buffer_size_decrease);
93   size_t Size();
94   size_t AvaliableSpace();
95 
96  private:
97   RingBuffer* buffer_;
98 };
99 
100 class DivergentFilterFraction {
101  public:
102   DivergentFilterFraction();
103 
104   // Reset.
105   void Reset();
106 
107   void AddObservation(const PowerLevel& nearlevel,
108                       const PowerLevel& linoutlevel,
109                       const PowerLevel& nlpoutlevel);
110 
111   // Return the latest fraction.
112   float GetLatestFraction() const;
113 
114  private:
115   // Clear all values added.
116   void Clear();
117 
118   size_t count_;
119   size_t occurrence_;
120   float fraction_;
121 
122   RTC_DISALLOW_COPY_AND_ASSIGN(DivergentFilterFraction);
123 };
124 
125 typedef struct CoherenceState {
126   complex_t sde[PART_LEN1];  // cross-psd of nearend and error
127   complex_t sxd[PART_LEN1];  // cross-psd of farend and nearend
128   float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1];  // far, near, error psd
129 } CoherenceState;
130 
131 struct AecCore {
132   explicit AecCore(int instance_index);
133   ~AecCore();
134 
135   std::unique_ptr<ApmDataDumper> data_dumper;
136   const OouraFft ooura_fft;
137 
138   CoherenceState coherence_state;
139 
140   int farBufWritePos, farBufReadPos;
141 
142   int knownDelay;
143   int inSamples, outSamples;
144   int delayEstCtr;
145 
146   // Nearend buffer used for changing from FRAME_LEN to PART_LEN sample block
147   // sizes. The buffer stores all the incoming bands and for each band a maximum
148   // of PART_LEN - (FRAME_LEN - PART_LEN) values need to be buffered in order to
149   // change the block size from FRAME_LEN to PART_LEN.
150   float nearend_buffer[NUM_HIGH_BANDS_MAX + 1]
151                       [PART_LEN - (FRAME_LEN - PART_LEN)];
152   size_t nearend_buffer_size;
153   float output_buffer[NUM_HIGH_BANDS_MAX + 1][2 * PART_LEN];
154   size_t output_buffer_size;
155 
156   float eBuf[PART_LEN2];  // error
157 
158   float previous_nearend_block[NUM_HIGH_BANDS_MAX + 1][PART_LEN];
159 
160   float xPow[PART_LEN1];
161   float dPow[PART_LEN1];
162   float dMinPow[PART_LEN1];
163   float dInitMinPow[PART_LEN1];
164   float* noisePow;
165 
166   float xfBuf[2][kExtendedNumPartitions * PART_LEN1];  // farend fft buffer
167   float wfBuf[2][kExtendedNumPartitions * PART_LEN1];  // filter fft
168   // Farend windowed fft buffer.
169   complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1];
170 
171   float hNs[PART_LEN1];
172   float hNlFbMin, hNlFbLocalMin;
173   float hNlXdAvgMin;
174   int hNlNewMin, hNlMinCtr;
175   float overDrive;
176   float overdrive_scaling;
177   int nlp_mode;
178   float outBuf[PART_LEN];
179   int delayIdx;
180 
181   short stNearState, echoState;
182   short divergeState;
183 
184   int xfBufBlockPos;
185 
186   BlockBuffer farend_block_buffer_;
187 
188   int system_delay;  // Current system delay buffered in AEC.
189 
190   int mult;  // sampling frequency multiple
191   int sampFreq = 16000;
192   size_t num_bands;
193   uint32_t seed;
194 
195   float filter_step_size;  // stepsize
196   float error_threshold;   // error threshold
197 
198   int noiseEstCtr;
199 
200   PowerLevel farlevel;
201   PowerLevel nearlevel;
202   PowerLevel linoutlevel;
203   PowerLevel nlpoutlevel;
204 
205   int metricsMode;
206   int stateCounter;
207   Stats erl;
208   Stats erle;
209   Stats aNlp;
210   Stats rerl;
211   DivergentFilterFraction divergent_filter_fraction;
212 
213   // Quantities to control H band scaling for SWB input
214   int freq_avg_ic;       // initial bin for averaging nlp gain
215   int flag_Hband_cn;     // for comfort noise
216   float cn_scale_Hband;  // scale for comfort noise in H band
217 
218   int delay_metrics_delivered;
219   int delay_histogram[kHistorySizeBlocks];
220   int num_delay_values;
221   int delay_median;
222   int delay_std;
223   float fraction_poor_delays;
224   int delay_logging_enabled;
225   void* delay_estimator_farend;
226   void* delay_estimator;
227   // Variables associated with delay correction through signal based delay
228   // estimation feedback.
229   int previous_delay;
230   int delay_correction_count;
231   int shift_offset;
232   float delay_quality_threshold;
233   int frame_count;
234 
235   // 0 = delay agnostic mode (signal based delay correction) disabled.
236   // Otherwise enabled.
237   int delay_agnostic_enabled;
238   // 1 = extended filter mode enabled, 0 = disabled.
239   int extended_filter_enabled;
240   // 1 = refined filter adaptation aec mode enabled, 0 = disabled.
241   bool refined_adaptive_filter_enabled;
242 
243   // Runtime selection of number of filter partitions.
244   int num_partitions;
245 
246   // Flag that extreme filter divergence has been detected by the Echo
247   // Suppressor.
248   int extreme_filter_divergence;
249 };
250 
251 AecCore* WebRtcAec_CreateAec(int instance_count);  // Returns NULL on error.
252 void WebRtcAec_FreeAec(AecCore* aec);
253 int WebRtcAec_InitAec(AecCore* aec, int sampFreq);
254 void WebRtcAec_InitAec_SSE2(void);
255 #if defined(MIPS_FPU_LE)
256 void WebRtcAec_InitAec_mips(void);
257 #endif
258 #if defined(WEBRTC_HAS_NEON)
259 void WebRtcAec_InitAec_neon(void);
260 #endif
261 
262 void WebRtcAec_BufferFarendBlock(AecCore* aec, const float* farend);
263 void WebRtcAec_ProcessFrames(AecCore* aec,
264                              const float* const* nearend,
265                              size_t num_bands,
266                              size_t num_samples,
267                              int knownDelay,
268                              float* const* out);
269 
270 // A helper function to call adjust the farend buffer size.
271 // Returns the number of elements the size was decreased with, and adjusts
272 // |system_delay| by the corresponding amount in ms.
273 int WebRtcAec_AdjustFarendBufferSizeAndSystemDelay(AecCore* aec,
274                                                    int size_decrease);
275 
276 // Calculates the median, standard deviation and amount of poor values among the
277 // delay estimates aggregated up to the first call to the function. After that
278 // first call the metrics are aggregated and updated every second. With poor
279 // values we mean values that most likely will cause the AEC to perform poorly.
280 // TODO(bjornv): Consider changing tests and tools to handle constant
281 // constant aggregation window throughout the session instead.
282 int WebRtcAec_GetDelayMetricsCore(AecCore* self,
283                                   int* median,
284                                   int* std,
285                                   float* fraction_poor_delays);
286 
287 // Returns the echo state (1: echo, 0: no echo).
288 int WebRtcAec_echo_state(AecCore* self);
289 
290 // Gets statistics of the echo metrics ERL, ERLE, A_NLP.
291 void WebRtcAec_GetEchoStats(AecCore* self,
292                             Stats* erl,
293                             Stats* erle,
294                             Stats* a_nlp,
295                             float* divergent_filter_fraction);
296 
297 // Sets local configuration modes.
298 void WebRtcAec_SetConfigCore(AecCore* self,
299                              int nlp_mode,
300                              int metrics_mode,
301                              int delay_logging);
302 
303 // Non-zero enables, zero disables.
304 void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable);
305 
306 // Returns non-zero if delay agnostic (i.e., signal based delay estimation) is
307 // enabled and zero if disabled.
308 int WebRtcAec_delay_agnostic_enabled(AecCore* self);
309 
310 // Turns on/off the refined adaptive filter feature.
311 void WebRtcAec_enable_refined_adaptive_filter(AecCore* self, bool enable);
312 
313 // Returns whether the refined adaptive filter is enabled.
314 bool WebRtcAec_refined_adaptive_filter(const AecCore* self);
315 
316 // Enables or disables extended filter mode. Non-zero enables, zero disables.
317 void WebRtcAec_enable_extended_filter(AecCore* self, int enable);
318 
319 // Returns non-zero if extended filter mode is enabled and zero if disabled.
320 int WebRtcAec_extended_filter_enabled(AecCore* self);
321 
322 // Returns the current |system_delay|, i.e., the buffered difference between
323 // far-end and near-end.
324 int WebRtcAec_system_delay(AecCore* self);
325 
326 // Sets the |system_delay| to |value|.  Note that if the value is changed
327 // improperly, there can be a performance regression.  So it should be used with
328 // care.
329 void WebRtcAec_SetSystemDelay(AecCore* self, int delay);
330 
331 }  // namespace webrtc
332 
333 #endif  // MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
334