1
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include "agc/include/gain_control.h"
6 #include "ns/include/noise_suppression_x.h"
7 #include "aec/include/echo_cancellation.h"
8 #include "aec/aec_core.h"
9 #include "vad/include/webrtc_vad.h"
10 #include "other/signal_processing_library.h"
11 #include "other/speex_resampler.h"
12 #include "zam/filters.h"
13
14
15
16 typedef struct {
17 NsxHandle *noise_sup_x;
18 VadInst *Vad_handle;
19 void *gain_control, *echo_cancellation;
20 uint32_t fs;
21
22 WebRtcSpl_State48khzTo16khz state_in, state_in_echo;
23 WebRtcSpl_State16khzTo48khz state_out;
24 int32_t tmp_mem[496];
25
26 int16_t msInSndCardBuf;
27
28 FilterStateZam hpfa;
29 FilterStateZam hpfb;
30 FilterStateZam lpfa;
31 FilterStateZam lpfb;
32
33 SpeexResamplerState *downsampler;
34 SpeexResamplerState *downsampler_echo;
35 SpeexResamplerState *upsampler;
36
37 int32_t split_filter_state_1[6];
38 int32_t split_filter_state_2[6];
39 int32_t split_filter_state_3[6];
40 int32_t split_filter_state_4[6];
41
42 int echo_enabled;
43 int gain_enabled;
44 int noise_enabled;
45 int vad_enabled;
46 int lowpass_enabled;
47 } Filter_Audio;
48
49 #define _FILTER_AUDIO
50 #include "filter_audio.h"
51
52
53
kill_filter_audio(Filter_Audio * f_a)54 void kill_filter_audio(Filter_Audio *f_a)
55 {
56 if (!f_a) {
57 return;
58 }
59
60 WebRtcNsx_Free(f_a->noise_sup_x);
61 WebRtcAgc_Free(f_a->gain_control);
62 WebRtcAec_Free(f_a->echo_cancellation);
63 WebRtcVad_Free(f_a->Vad_handle);
64 speex_resampler_destroy(f_a->upsampler);
65 speex_resampler_destroy(f_a->downsampler);
66 speex_resampler_destroy(f_a->downsampler_echo);
67 free(f_a);
68 }
69
new_filter_audio(uint32_t fs)70 Filter_Audio *new_filter_audio(uint32_t fs)
71 {
72 if (fs == 0) {
73 return NULL;
74 }
75
76 Filter_Audio *f_a = calloc(sizeof(Filter_Audio), 1);
77
78 if (!f_a) {
79 return NULL;
80 }
81
82 f_a->fs = fs;
83
84 if (fs != 16000)
85 fs = 32000;
86
87 init_highpass_filter_zam(&f_a->hpfa, 100, (float) f_a->fs);
88 init_highpass_filter_zam(&f_a->hpfb, 100, (float) f_a->fs);
89
90 unsigned int lowpass_filter_frequency = 12000;
91 if (f_a->fs > (lowpass_filter_frequency * 2)) {
92 init_lowpass_filter_zam(&f_a->lpfa, lowpass_filter_frequency, (float) f_a->fs);
93 init_lowpass_filter_zam(&f_a->lpfb, lowpass_filter_frequency, (float) f_a->fs);
94 f_a->lowpass_enabled = 1;
95 }
96
97 if (WebRtcAgc_Create(&f_a->gain_control) == -1) {
98 free(f_a);
99 return NULL;
100 }
101
102 if (WebRtcNsx_Create(&f_a->noise_sup_x) == -1) {
103 WebRtcAgc_Free(f_a->gain_control);
104 free(f_a);
105 return NULL;
106 }
107
108 if (WebRtcAec_Create(&f_a->echo_cancellation) == -1) {
109 WebRtcAgc_Free(f_a->gain_control);
110 WebRtcNsx_Free(f_a->noise_sup_x);
111 free(f_a);
112 return NULL;
113 }
114
115 if (WebRtcVad_Create(&f_a->Vad_handle) == -1){
116 WebRtcAec_Free(f_a->echo_cancellation);
117 WebRtcAgc_Free(f_a->gain_control);
118 WebRtcNsx_Free(f_a->noise_sup_x);
119 free(f_a);
120 return NULL;
121 }
122
123 WebRtcAec_enable_delay_correction(WebRtcAec_aec_core(f_a->echo_cancellation), kAecTrue);
124 WebRtcAec_enable_reported_delay(WebRtcAec_aec_core(f_a->echo_cancellation), kAecTrue);
125
126 WebRtcAgc_config_t gain_config;
127
128 gain_config.targetLevelDbfs = 1;
129 gain_config.compressionGaindB = 20;
130 gain_config.limiterEnable = kAgcTrue;
131
132 if (WebRtcAgc_Init(f_a->gain_control, 0, 255, kAgcModeAdaptiveDigital, fs) == -1 || WebRtcAgc_set_config(f_a->gain_control, gain_config) == -1) {
133 kill_filter_audio(f_a);
134 return NULL;
135 }
136
137
138 if (WebRtcNsx_Init(f_a->noise_sup_x, fs) == -1 || WebRtcNsx_set_policy(f_a->noise_sup_x, 2) == -1) {
139 kill_filter_audio(f_a);
140 return NULL;
141 }
142
143 AecConfig echo_config;
144
145 echo_config.nlpMode = kAecNlpAggressive;
146 echo_config.skewMode = kAecFalse;
147 echo_config.metricsMode = kAecFalse;
148 echo_config.delay_logging = kAecFalse;
149
150 if (WebRtcAec_Init(f_a->echo_cancellation, fs, f_a->fs) == -1 || WebRtcAec_set_config(f_a->echo_cancellation, echo_config) == -1) {
151 kill_filter_audio(f_a);
152 return NULL;
153 }
154
155 int vad_mode = 1; //Aggressiveness mode (0, 1, 2, or 3).
156 if (WebRtcVad_Init(f_a->Vad_handle) == -1 || WebRtcVad_set_mode(f_a->Vad_handle,vad_mode) == -1){
157 kill_filter_audio(f_a);
158 return NULL;
159 }
160
161 f_a->echo_enabled = 1;
162 f_a->gain_enabled = 1;
163 f_a->noise_enabled = 1;
164 f_a->vad_enabled = 1;
165
166 int quality = 4;
167 if (f_a->fs != 16000) {
168 f_a->downsampler = speex_resampler_init(1, f_a->fs, 32000, quality, 0);
169 f_a->upsampler = speex_resampler_init(1, 32000, f_a->fs, quality, 0);
170
171 /* quality doesn't need to be high for this one. */
172 quality = 0;
173 f_a->downsampler_echo = speex_resampler_init(1, f_a->fs, 16000, quality, 0);
174
175 if (!f_a->upsampler || !f_a->downsampler || !f_a->downsampler_echo) {
176 kill_filter_audio(f_a);
177 return NULL;
178 }
179 }
180
181
182 return f_a;
183 }
184
enable_disable_filters(Filter_Audio * f_a,int echo,int noise,int gain,int vad)185 int enable_disable_filters(Filter_Audio *f_a, int echo, int noise, int gain, int vad)
186 {
187 if (!f_a) {
188 return -1;
189 }
190
191 f_a->echo_enabled = echo;
192 f_a->gain_enabled = gain;
193 f_a->noise_enabled = noise;
194 f_a->vad_enabled = vad;
195 return 0;
196 }
197
downsample_audio_echo_in(Filter_Audio * f_a,int16_t * out,const int16_t * in)198 static void downsample_audio_echo_in(Filter_Audio *f_a, int16_t *out, const int16_t *in)
199 {
200 uint32_t inlen = f_a->fs / 100;
201 uint32_t outlen = inlen;
202 speex_resampler_process_int(f_a->downsampler_echo, 0, in, &inlen, out, &outlen);
203 }
204
205
downsample_audio(Filter_Audio * f_a,int16_t * out_l,int16_t * out_h,const int16_t * in,uint32_t in_length)206 static void downsample_audio(Filter_Audio *f_a, int16_t *out_l, int16_t *out_h, const int16_t *in, uint32_t in_length)
207 {
208 int16_t temp[320];
209 uint32_t out_len = 320;
210 if (f_a->fs != 32000) {
211 speex_resampler_process_int(f_a->downsampler, 0, in, &in_length, temp, &out_len);
212 WebRtcSpl_AnalysisQMF(temp, out_len, out_l, out_h,
213 f_a->split_filter_state_1, f_a->split_filter_state_2);
214 } else {
215 WebRtcSpl_AnalysisQMF(in, out_len, out_l, out_h,
216 f_a->split_filter_state_1, f_a->split_filter_state_2);
217 }
218 }
219
upsample_audio(Filter_Audio * f_a,int16_t * out,uint32_t out_len,const int16_t * in_l,const int16_t * in_h,uint32_t in_length)220 static void upsample_audio(Filter_Audio *f_a, int16_t *out, uint32_t out_len, const int16_t *in_l, const int16_t *in_h, uint32_t in_length)
221 {
222 int16_t temp[320];
223 if (f_a->fs != 32000) {
224 WebRtcSpl_SynthesisQMF(in_l, in_h, in_length, temp,
225 f_a->split_filter_state_3, f_a->split_filter_state_4);
226 in_length *= 2;
227 speex_resampler_process_int(f_a->upsampler, 0, temp, &in_length, out, &out_len);
228 } else {
229 WebRtcSpl_SynthesisQMF(in_l, in_h, in_length, out,
230 f_a->split_filter_state_3, f_a->split_filter_state_4);
231 }
232 }
233
234
pass_audio_output(Filter_Audio * f_a,const int16_t * data,unsigned int samples)235 int pass_audio_output(Filter_Audio *f_a, const int16_t *data, unsigned int samples)
236 {
237 if (!f_a || (!f_a->echo_enabled && !f_a->gain_enabled)) {
238 return -1;
239 }
240
241 unsigned int nsx_samples = f_a->fs / 100;
242 if (!samples || (samples % nsx_samples) != 0) {
243 return -1;
244 }
245
246 _Bool resample = 0;
247 unsigned int resampled_samples = 0;
248 if (f_a->fs != 16000) {
249 samples = (samples / nsx_samples) * 160;
250 nsx_samples = 160;
251 resample = 1;
252 }
253
254 unsigned int temp_samples = samples;
255
256 while (temp_samples) {
257 float d_f[nsx_samples];
258
259 if (resample) {
260 int16_t d[nsx_samples];
261 downsample_audio_echo_in(f_a, d, data + resampled_samples);
262
263 if (WebRtcAgc_AddFarend(f_a->gain_control, d, nsx_samples) == -1)
264 return -1;
265
266 S16ToFloatS16(d, nsx_samples, d_f);
267 resampled_samples += f_a->fs / 100;
268 } else {
269 S16ToFloatS16(data + (samples - temp_samples), nsx_samples, d_f);
270 }
271
272 if (WebRtcAec_BufferFarend(f_a->echo_cancellation, d_f, nsx_samples) == -1) {
273 return -1;
274 }
275
276 temp_samples -= nsx_samples;
277 }
278
279 return 0;
280 }
281
282 /* Tell the echo canceller how much time in ms it takes for audio to be played and recorded back after. */
set_echo_delay_ms(Filter_Audio * f_a,int16_t msInSndCardBuf)283 int set_echo_delay_ms(Filter_Audio *f_a, int16_t msInSndCardBuf)
284 {
285 if (!f_a) {
286 return -1;
287 }
288
289 f_a->msInSndCardBuf = msInSndCardBuf;
290
291 return 0;
292 }
293
filter_audio(Filter_Audio * f_a,int16_t * data,unsigned int samples)294 int filter_audio(Filter_Audio *f_a, int16_t *data, unsigned int samples)
295 {
296 if (!f_a) {
297 return -1;
298 }
299
300 unsigned int nsx_samples = f_a->fs / 100;
301 if (!samples || (samples % nsx_samples) != 0) {
302 return -1;
303 }
304
305 _Bool resample = 0;
306 unsigned int resampled_samples = 0;
307 if (f_a->fs != 16000) {
308 samples = (samples / nsx_samples) * 160;
309 nsx_samples = 160;
310 resample = 1;
311 }
312
313 unsigned int temp_samples = samples;
314 unsigned int smp = f_a->fs / 100;
315 int novoice = 1;
316
317 while (temp_samples) {
318 int16_t d_l[nsx_samples];
319 int16_t *d_h = NULL;
320 int16_t temp[nsx_samples];
321 memset(temp, 0, nsx_samples*sizeof(float));
322 if (resample) {
323 d_h = temp;
324 downsample_audio(f_a, d_l, d_h, data + resampled_samples, smp);
325 } else {
326 memcpy(d_l, data + (samples - temp_samples), sizeof(d_l));
327 }
328
329 if(f_a->vad_enabled){
330 if(WebRtcVad_Process(f_a->Vad_handle, 16000, d_l, nsx_samples) == 1){
331 novoice = 0;
332 }
333 } else {
334 novoice = 0;
335 }
336
337 if (f_a->gain_enabled) {
338 int32_t inMicLevel = 128, outMicLevel;
339
340 if (WebRtcAgc_VirtualMic(f_a->gain_control, d_l, d_h, nsx_samples, inMicLevel, &outMicLevel) == -1)
341 return -1;
342 }
343
344 float d_f_l[nsx_samples];
345 S16ToFloatS16(d_l, nsx_samples, d_f_l);
346
347 float d_f_h[nsx_samples];
348 memset(d_f_h, 0, nsx_samples*sizeof(float));
349
350 if (resample) {
351 S16ToFloatS16(d_h, nsx_samples, d_f_h);
352 }
353
354 if (f_a->echo_enabled) {
355 if (WebRtcAec_Process(f_a->echo_cancellation, d_f_l, d_f_h, d_f_l, d_f_h, nsx_samples, f_a->msInSndCardBuf, 0) == -1) {
356 return -1;
357 }
358
359 if (resample) {
360 FloatS16ToS16(d_f_h, nsx_samples, d_h);
361 }
362 FloatS16ToS16(d_f_l, nsx_samples, d_l);
363 }
364
365 if (f_a->noise_enabled) {
366 if (WebRtcNsx_Process(f_a->noise_sup_x, d_l, d_h, d_l, d_h) == -1) {
367 return -1;
368 }
369 }
370
371 if (f_a->gain_enabled) {
372 int32_t inMicLevel = 128, outMicLevel;
373 uint8_t saturationWarning;
374
375 if (WebRtcAgc_Process(f_a->gain_control, d_l, d_h, nsx_samples, d_l, d_h, inMicLevel, &outMicLevel, 0, &saturationWarning) == -1) {
376 return -1;
377 }
378 }
379
380 if (resample) {
381 float d_f_u[smp];
382 upsample_audio(f_a, data + resampled_samples, smp, d_l, d_h, nsx_samples);
383 S16ToFloat(data + resampled_samples, smp, d_f_u);
384 run_filter_zam(&f_a->hpfa, d_f_u, smp);
385 run_filter_zam(&f_a->hpfb, d_f_u, smp);
386
387 if (f_a->lowpass_enabled) {
388 run_filter_zam(&f_a->lpfa, d_f_u, smp);
389 run_filter_zam(&f_a->lpfb, d_f_u, smp);
390 }
391
392 run_saturator_zam(d_f_u, smp);
393 FloatToS16(d_f_u, smp, data + resampled_samples);
394 resampled_samples += smp;
395 } else {
396 S16ToFloat(d_l, nsx_samples, d_f_l);
397
398 run_filter_zam(&f_a->hpfa, d_f_l, nsx_samples);
399 run_filter_zam(&f_a->hpfb, d_f_l, nsx_samples);
400
401 if (f_a->lowpass_enabled) {
402 run_filter_zam(&f_a->lpfa, d_f_l, nsx_samples);
403 run_filter_zam(&f_a->lpfb, d_f_l, nsx_samples);
404 }
405
406 run_saturator_zam(d_f_l, nsx_samples);
407
408 FloatToS16(d_f_l, nsx_samples, d_l);
409 memcpy(data + (samples - temp_samples), d_l, sizeof(d_l));
410 }
411
412 temp_samples -= nsx_samples;
413
414
415 }
416
417 return !novoice;
418 }
419