1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 /* 12 * Specifies the interface for the AEC core. 13 */ 14 15 #ifndef MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ 16 #define MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ 17 18 #include <stddef.h> 19 20 #include <memory> 21 22 extern "C" { 23 #include "common_audio/ring_buffer.h" 24 } 25 #include "common_audio/wav_file.h" 26 #include "modules/audio_processing/aec/aec_common.h" 27 #include "modules/audio_processing/utility/block_mean_calculator.h" 28 #include "modules/audio_processing/utility/ooura_fft.h" 29 #include "rtc_base/constructormagic.h" 30 #include "typedefs.h" // NOLINT(build/include) 31 32 namespace webrtc { 33 34 #define FRAME_LEN 80 35 #define PART_LEN 64 // Length of partition 36 #define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients 37 #define PART_LEN2 (PART_LEN * 2) // Length of partition * 2 38 #define NUM_HIGH_BANDS_MAX 2 // Max number of high bands 39 40 class ApmDataDumper; 41 42 typedef float complex_t[2]; 43 // For performance reasons, some arrays of complex numbers are replaced by twice 44 // as long arrays of float, all the real parts followed by all the imaginary 45 // ones (complex_t[SIZE] -> float[2][SIZE]). This allows SIMD optimizations and 46 // is better than two arrays (one for the real parts and one for the imaginary 47 // parts) as this other way would require two pointers instead of one and cause 48 // extra register spilling. This also allows the offsets to be calculated at 49 // compile time. 50 51 // Metrics 52 enum { kOffsetLevel = -100 }; 53 54 typedef struct Stats { 55 float instant; 56 float average; 57 float min; 58 float max; 59 float sum; 60 float hisum; 61 float himean; 62 size_t counter; 63 size_t hicounter; 64 } Stats; 65 66 // Number of partitions for the extended filter mode. The first one is an enum 67 // to be used in array declarations, as it represents the maximum filter length. 68 enum { kExtendedNumPartitions = 32 }; 69 static const int kNormalNumPartitions = 12; 70 71 // Delay estimator constants, used for logging and delay compensation if 72 // if reported delays are disabled. 73 enum { kLookaheadBlocks = 15 }; 74 enum { 75 // 500 ms for 16 kHz which is equivalent with the limit of reported delays. 76 kHistorySizeBlocks = 125 77 }; 78 79 typedef struct PowerLevel { 80 PowerLevel(); 81 82 BlockMeanCalculator framelevel; 83 BlockMeanCalculator averagelevel; 84 float minlevel; 85 } PowerLevel; 86 87 class BlockBuffer { 88 public: 89 BlockBuffer(); 90 ~BlockBuffer(); 91 void ReInit(); 92 void Insert(const float block[PART_LEN]); 93 void ExtractExtendedBlock(float extended_block[PART_LEN]); 94 int AdjustSize(int buffer_size_decrease); 95 size_t Size(); 96 size_t AvaliableSpace(); 97 98 private: 99 RingBuffer* buffer_; 100 }; 101 102 class DivergentFilterFraction { 103 public: 104 DivergentFilterFraction(); 105 106 // Reset. 107 void Reset(); 108 109 void AddObservation(const PowerLevel& nearlevel, 110 const PowerLevel& linoutlevel, 111 const PowerLevel& nlpoutlevel); 112 113 // Return the latest fraction. 114 float GetLatestFraction() const; 115 116 private: 117 // Clear all values added. 118 void Clear(); 119 120 size_t count_; 121 size_t occurrence_; 122 float fraction_; 123 124 RTC_DISALLOW_COPY_AND_ASSIGN(DivergentFilterFraction); 125 }; 126 127 typedef struct CoherenceState { 128 complex_t sde[PART_LEN1]; // cross-psd of nearend and error 129 complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend 130 float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near, error psd 131 } CoherenceState; 132 133 struct AecCore { 134 explicit AecCore(int instance_index); 135 ~AecCore(); 136 137 std::unique_ptr<ApmDataDumper> data_dumper; 138 const OouraFft ooura_fft; 139 140 CoherenceState coherence_state; 141 142 int farBufWritePos, farBufReadPos; 143 144 int knownDelay; 145 int inSamples, outSamples; 146 int delayEstCtr; 147 148 // Nearend buffer used for changing from FRAME_LEN to PART_LEN sample block 149 // sizes. The buffer stores all the incoming bands and for each band a maximum 150 // of PART_LEN - (FRAME_LEN - PART_LEN) values need to be buffered in order to 151 // change the block size from FRAME_LEN to PART_LEN. 152 float nearend_buffer[NUM_HIGH_BANDS_MAX + 1] 153 [PART_LEN - (FRAME_LEN - PART_LEN)]; 154 size_t nearend_buffer_size; 155 float output_buffer[NUM_HIGH_BANDS_MAX + 1][2 * PART_LEN]; 156 size_t output_buffer_size; 157 158 float eBuf[PART_LEN2]; // error 159 160 float previous_nearend_block[NUM_HIGH_BANDS_MAX + 1][PART_LEN]; 161 162 float xPow[PART_LEN1]; 163 float dPow[PART_LEN1]; 164 float dMinPow[PART_LEN1]; 165 float dInitMinPow[PART_LEN1]; 166 float* noisePow; 167 168 float xfBuf[2][kExtendedNumPartitions * PART_LEN1]; // farend fft buffer 169 float wfBuf[2][kExtendedNumPartitions * PART_LEN1]; // filter fft 170 // Farend windowed fft buffer. 171 complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1]; 172 173 float hNs[PART_LEN1]; 174 float hNlFbMin, hNlFbLocalMin; 175 float hNlXdAvgMin; 176 int hNlNewMin, hNlMinCtr; 177 float overDrive; 178 float overdrive_scaling; 179 int nlp_mode; 180 float outBuf[PART_LEN]; 181 int delayIdx; 182 183 short stNearState, echoState; 184 short divergeState; 185 186 int xfBufBlockPos; 187 188 BlockBuffer farend_block_buffer_; 189 190 int system_delay; // Current system delay buffered in AEC. 191 192 int mult; // sampling frequency multiple 193 int sampFreq = 16000; 194 size_t num_bands; 195 uint32_t seed; 196 197 float filter_step_size; // stepsize 198 float error_threshold; // error threshold 199 200 int noiseEstCtr; 201 202 PowerLevel farlevel; 203 PowerLevel nearlevel; 204 PowerLevel linoutlevel; 205 PowerLevel nlpoutlevel; 206 207 int metricsMode; 208 int stateCounter; 209 Stats erl; 210 Stats erle; 211 Stats aNlp; 212 Stats rerl; 213 DivergentFilterFraction divergent_filter_fraction; 214 215 // Quantities to control H band scaling for SWB input 216 int freq_avg_ic; // initial bin for averaging nlp gain 217 int flag_Hband_cn; // for comfort noise 218 float cn_scale_Hband; // scale for comfort noise in H band 219 220 int delay_metrics_delivered; 221 int delay_histogram[kHistorySizeBlocks]; 222 int num_delay_values; 223 int delay_median; 224 int delay_std; 225 float fraction_poor_delays; 226 int delay_logging_enabled; 227 void* delay_estimator_farend; 228 void* delay_estimator; 229 // Variables associated with delay correction through signal based delay 230 // estimation feedback. 231 int previous_delay; 232 int delay_correction_count; 233 int shift_offset; 234 float delay_quality_threshold; 235 int frame_count; 236 237 // 0 = delay agnostic mode (signal based delay correction) disabled. 238 // Otherwise enabled. 239 int delay_agnostic_enabled; 240 // 1 = extended filter mode enabled, 0 = disabled. 241 int extended_filter_enabled; 242 // 1 = refined filter adaptation aec mode enabled, 0 = disabled. 243 bool refined_adaptive_filter_enabled; 244 245 // Runtime selection of number of filter partitions. 246 int num_partitions; 247 248 // Flag that extreme filter divergence has been detected by the Echo 249 // Suppressor. 250 int extreme_filter_divergence; 251 }; 252 253 AecCore* WebRtcAec_CreateAec(int instance_count); // Returns NULL on error. 254 void WebRtcAec_FreeAec(AecCore* aec); 255 int WebRtcAec_InitAec(AecCore* aec, int sampFreq); 256 void WebRtcAec_InitAec_SSE2(void); 257 #if defined(MIPS_FPU_LE) 258 void WebRtcAec_InitAec_mips(void); 259 #endif 260 #if defined(WEBRTC_HAS_NEON) 261 void WebRtcAec_InitAec_neon(void); 262 #endif 263 264 void WebRtcAec_BufferFarendBlock(AecCore* aec, const float* farend); 265 void WebRtcAec_ProcessFrames(AecCore* aec, 266 const float* const* nearend, 267 size_t num_bands, 268 size_t num_samples, 269 int knownDelay, 270 float* const* out); 271 272 // A helper function to call adjust the farend buffer size. 273 // Returns the number of elements the size was decreased with, and adjusts 274 // |system_delay| by the corresponding amount in ms. 275 int WebRtcAec_AdjustFarendBufferSizeAndSystemDelay(AecCore* aec, 276 int size_decrease); 277 278 // Calculates the median, standard deviation and amount of poor values among the 279 // delay estimates aggregated up to the first call to the function. After that 280 // first call the metrics are aggregated and updated every second. With poor 281 // values we mean values that most likely will cause the AEC to perform poorly. 282 // TODO(bjornv): Consider changing tests and tools to handle constant 283 // constant aggregation window throughout the session instead. 284 int WebRtcAec_GetDelayMetricsCore(AecCore* self, 285 int* median, 286 int* std, 287 float* fraction_poor_delays); 288 289 // Returns the echo state (1: echo, 0: no echo). 290 int WebRtcAec_echo_state(AecCore* self); 291 292 // Gets statistics of the echo metrics ERL, ERLE, A_NLP. 293 void WebRtcAec_GetEchoStats(AecCore* self, 294 Stats* erl, 295 Stats* erle, 296 Stats* a_nlp, 297 float* divergent_filter_fraction); 298 299 // Sets local configuration modes. 300 void WebRtcAec_SetConfigCore(AecCore* self, 301 int nlp_mode, 302 int metrics_mode, 303 int delay_logging); 304 305 // Non-zero enables, zero disables. 306 void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable); 307 308 // Returns non-zero if delay agnostic (i.e., signal based delay estimation) is 309 // enabled and zero if disabled. 310 int WebRtcAec_delay_agnostic_enabled(AecCore* self); 311 312 // Turns on/off the refined adaptive filter feature. 313 void WebRtcAec_enable_refined_adaptive_filter(AecCore* self, bool enable); 314 315 // Returns whether the refined adaptive filter is enabled. 316 bool WebRtcAec_refined_adaptive_filter(const AecCore* self); 317 318 // Enables or disables extended filter mode. Non-zero enables, zero disables. 319 void WebRtcAec_enable_extended_filter(AecCore* self, int enable); 320 321 // Returns non-zero if extended filter mode is enabled and zero if disabled. 322 int WebRtcAec_extended_filter_enabled(AecCore* self); 323 324 // Returns the current |system_delay|, i.e., the buffered difference between 325 // far-end and near-end. 326 int WebRtcAec_system_delay(AecCore* self); 327 328 // Sets the |system_delay| to |value|. Note that if the value is changed 329 // improperly, there can be a performance regression. So it should be used with 330 // care. 331 void WebRtcAec_SetSystemDelay(AecCore* self, int delay); 332 333 } // namespace webrtc 334 335 #endif // MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ 336