1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 /* 12 * Specifies the interface for the AEC core. 13 */ 14 15 #ifndef MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ 16 #define MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ 17 18 #include <stddef.h> 19 20 #include <memory> 21 22 extern "C" { 23 #include "common_audio/ring_buffer.h" 24 } 25 #include "modules/audio_processing/aec/aec_common.h" 26 #include "modules/audio_processing/utility/block_mean_calculator.h" 27 #include "modules/audio_processing/utility/ooura_fft.h" 28 #include "rtc_base/constructormagic.h" 29 30 namespace webrtc { 31 32 #define FRAME_LEN 80 33 #define PART_LEN 64 // Length of partition 34 #define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients 35 #define PART_LEN2 (PART_LEN * 2) // Length of partition * 2 36 #define NUM_HIGH_BANDS_MAX 2 // Max number of high bands 37 38 class ApmDataDumper; 39 40 typedef float complex_t[2]; 41 // For performance reasons, some arrays of complex numbers are replaced by twice 42 // as long arrays of float, all the real parts followed by all the imaginary 43 // ones (complex_t[SIZE] -> float[2][SIZE]). This allows SIMD optimizations and 44 // is better than two arrays (one for the real parts and one for the imaginary 45 // parts) as this other way would require two pointers instead of one and cause 46 // extra register spilling. This also allows the offsets to be calculated at 47 // compile time. 48 49 // Metrics 50 enum { kOffsetLevel = -100 }; 51 52 typedef struct Stats { 53 float instant; 54 float average; 55 float min; 56 float max; 57 float sum; 58 float hisum; 59 float himean; 60 size_t counter; 61 size_t hicounter; 62 } Stats; 63 64 // Number of partitions for the extended filter mode. The first one is an enum 65 // to be used in array declarations, as it represents the maximum filter length. 66 enum { kExtendedNumPartitions = 32 }; 67 static const int kNormalNumPartitions = 12; 68 69 // Delay estimator constants, used for logging and delay compensation if 70 // if reported delays are disabled. 71 enum { kLookaheadBlocks = 15 }; 72 enum { 73 // 500 ms for 16 kHz which is equivalent with the limit of reported delays. 74 kHistorySizeBlocks = 125 75 }; 76 77 typedef struct PowerLevel { 78 PowerLevel(); 79 80 BlockMeanCalculator framelevel; 81 BlockMeanCalculator averagelevel; 82 float minlevel; 83 } PowerLevel; 84 85 class BlockBuffer { 86 public: 87 BlockBuffer(); 88 ~BlockBuffer(); 89 void ReInit(); 90 void Insert(const float block[PART_LEN]); 91 void ExtractExtendedBlock(float extended_block[PART_LEN]); 92 int AdjustSize(int buffer_size_decrease); 93 size_t Size(); 94 size_t AvaliableSpace(); 95 96 private: 97 RingBuffer* buffer_; 98 }; 99 100 class DivergentFilterFraction { 101 public: 102 DivergentFilterFraction(); 103 104 // Reset. 105 void Reset(); 106 107 void AddObservation(const PowerLevel& nearlevel, 108 const PowerLevel& linoutlevel, 109 const PowerLevel& nlpoutlevel); 110 111 // Return the latest fraction. 112 float GetLatestFraction() const; 113 114 private: 115 // Clear all values added. 116 void Clear(); 117 118 size_t count_; 119 size_t occurrence_; 120 float fraction_; 121 122 RTC_DISALLOW_COPY_AND_ASSIGN(DivergentFilterFraction); 123 }; 124 125 typedef struct CoherenceState { 126 complex_t sde[PART_LEN1]; // cross-psd of nearend and error 127 complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend 128 float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near, error psd 129 } CoherenceState; 130 131 struct AecCore { 132 explicit AecCore(int instance_index); 133 ~AecCore(); 134 135 std::unique_ptr<ApmDataDumper> data_dumper; 136 const OouraFft ooura_fft; 137 138 CoherenceState coherence_state; 139 140 int farBufWritePos, farBufReadPos; 141 142 int knownDelay; 143 int inSamples, outSamples; 144 int delayEstCtr; 145 146 // Nearend buffer used for changing from FRAME_LEN to PART_LEN sample block 147 // sizes. The buffer stores all the incoming bands and for each band a maximum 148 // of PART_LEN - (FRAME_LEN - PART_LEN) values need to be buffered in order to 149 // change the block size from FRAME_LEN to PART_LEN. 150 float nearend_buffer[NUM_HIGH_BANDS_MAX + 1] 151 [PART_LEN - (FRAME_LEN - PART_LEN)]; 152 size_t nearend_buffer_size; 153 float output_buffer[NUM_HIGH_BANDS_MAX + 1][2 * PART_LEN]; 154 size_t output_buffer_size; 155 156 float eBuf[PART_LEN2]; // error 157 158 float previous_nearend_block[NUM_HIGH_BANDS_MAX + 1][PART_LEN]; 159 160 float xPow[PART_LEN1]; 161 float dPow[PART_LEN1]; 162 float dMinPow[PART_LEN1]; 163 float dInitMinPow[PART_LEN1]; 164 float* noisePow; 165 166 float xfBuf[2][kExtendedNumPartitions * PART_LEN1]; // farend fft buffer 167 float wfBuf[2][kExtendedNumPartitions * PART_LEN1]; // filter fft 168 // Farend windowed fft buffer. 169 complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1]; 170 171 float hNs[PART_LEN1]; 172 float hNlFbMin, hNlFbLocalMin; 173 float hNlXdAvgMin; 174 int hNlNewMin, hNlMinCtr; 175 float overDrive; 176 float overdrive_scaling; 177 int nlp_mode; 178 float outBuf[PART_LEN]; 179 int delayIdx; 180 181 short stNearState, echoState; 182 short divergeState; 183 184 int xfBufBlockPos; 185 186 BlockBuffer farend_block_buffer_; 187 188 int system_delay; // Current system delay buffered in AEC. 189 190 int mult; // sampling frequency multiple 191 int sampFreq = 16000; 192 size_t num_bands; 193 uint32_t seed; 194 195 float filter_step_size; // stepsize 196 float error_threshold; // error threshold 197 198 int noiseEstCtr; 199 200 PowerLevel farlevel; 201 PowerLevel nearlevel; 202 PowerLevel linoutlevel; 203 PowerLevel nlpoutlevel; 204 205 int metricsMode; 206 int stateCounter; 207 Stats erl; 208 Stats erle; 209 Stats aNlp; 210 Stats rerl; 211 DivergentFilterFraction divergent_filter_fraction; 212 213 // Quantities to control H band scaling for SWB input 214 int freq_avg_ic; // initial bin for averaging nlp gain 215 int flag_Hband_cn; // for comfort noise 216 float cn_scale_Hband; // scale for comfort noise in H band 217 218 int delay_metrics_delivered; 219 int delay_histogram[kHistorySizeBlocks]; 220 int num_delay_values; 221 int delay_median; 222 int delay_std; 223 float fraction_poor_delays; 224 int delay_logging_enabled; 225 void* delay_estimator_farend; 226 void* delay_estimator; 227 // Variables associated with delay correction through signal based delay 228 // estimation feedback. 229 int previous_delay; 230 int delay_correction_count; 231 int shift_offset; 232 float delay_quality_threshold; 233 int frame_count; 234 235 // 0 = delay agnostic mode (signal based delay correction) disabled. 236 // Otherwise enabled. 237 int delay_agnostic_enabled; 238 // 1 = extended filter mode enabled, 0 = disabled. 239 int extended_filter_enabled; 240 // 1 = refined filter adaptation aec mode enabled, 0 = disabled. 241 bool refined_adaptive_filter_enabled; 242 243 // Runtime selection of number of filter partitions. 244 int num_partitions; 245 246 // Flag that extreme filter divergence has been detected by the Echo 247 // Suppressor. 248 int extreme_filter_divergence; 249 }; 250 251 AecCore* WebRtcAec_CreateAec(int instance_count); // Returns NULL on error. 252 void WebRtcAec_FreeAec(AecCore* aec); 253 int WebRtcAec_InitAec(AecCore* aec, int sampFreq); 254 void WebRtcAec_InitAec_SSE2(void); 255 #if defined(MIPS_FPU_LE) 256 void WebRtcAec_InitAec_mips(void); 257 #endif 258 #if defined(WEBRTC_HAS_NEON) 259 void WebRtcAec_InitAec_neon(void); 260 #endif 261 262 void WebRtcAec_BufferFarendBlock(AecCore* aec, const float* farend); 263 void WebRtcAec_ProcessFrames(AecCore* aec, 264 const float* const* nearend, 265 size_t num_bands, 266 size_t num_samples, 267 int knownDelay, 268 float* const* out); 269 270 // A helper function to call adjust the farend buffer size. 271 // Returns the number of elements the size was decreased with, and adjusts 272 // |system_delay| by the corresponding amount in ms. 273 int WebRtcAec_AdjustFarendBufferSizeAndSystemDelay(AecCore* aec, 274 int size_decrease); 275 276 // Calculates the median, standard deviation and amount of poor values among the 277 // delay estimates aggregated up to the first call to the function. After that 278 // first call the metrics are aggregated and updated every second. With poor 279 // values we mean values that most likely will cause the AEC to perform poorly. 280 // TODO(bjornv): Consider changing tests and tools to handle constant 281 // constant aggregation window throughout the session instead. 282 int WebRtcAec_GetDelayMetricsCore(AecCore* self, 283 int* median, 284 int* std, 285 float* fraction_poor_delays); 286 287 // Returns the echo state (1: echo, 0: no echo). 288 int WebRtcAec_echo_state(AecCore* self); 289 290 // Gets statistics of the echo metrics ERL, ERLE, A_NLP. 291 void WebRtcAec_GetEchoStats(AecCore* self, 292 Stats* erl, 293 Stats* erle, 294 Stats* a_nlp, 295 float* divergent_filter_fraction); 296 297 // Sets local configuration modes. 298 void WebRtcAec_SetConfigCore(AecCore* self, 299 int nlp_mode, 300 int metrics_mode, 301 int delay_logging); 302 303 // Non-zero enables, zero disables. 304 void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable); 305 306 // Returns non-zero if delay agnostic (i.e., signal based delay estimation) is 307 // enabled and zero if disabled. 308 int WebRtcAec_delay_agnostic_enabled(AecCore* self); 309 310 // Turns on/off the refined adaptive filter feature. 311 void WebRtcAec_enable_refined_adaptive_filter(AecCore* self, bool enable); 312 313 // Returns whether the refined adaptive filter is enabled. 314 bool WebRtcAec_refined_adaptive_filter(const AecCore* self); 315 316 // Enables or disables extended filter mode. Non-zero enables, zero disables. 317 void WebRtcAec_enable_extended_filter(AecCore* self, int enable); 318 319 // Returns non-zero if extended filter mode is enabled and zero if disabled. 320 int WebRtcAec_extended_filter_enabled(AecCore* self); 321 322 // Returns the current |system_delay|, i.e., the buffered difference between 323 // far-end and near-end. 324 int WebRtcAec_system_delay(AecCore* self); 325 326 // Sets the |system_delay| to |value|. Note that if the value is changed 327 // improperly, there can be a performance regression. So it should be used with 328 // care. 329 void WebRtcAec_SetSystemDelay(AecCore* self, int delay); 330 331 } // namespace webrtc 332 333 #endif // MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ 334