1 
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include "agc/include/gain_control.h"
6 #include "ns/include/noise_suppression_x.h"
7 #include "aec/include/echo_cancellation.h"
8 #include "aec/aec_core.h"
9 #include "vad/include/webrtc_vad.h"
10 #include "other/signal_processing_library.h"
11 #include "other/speex_resampler.h"
12 #include "zam/filters.h"
13 
14 
15 
16 typedef struct {
17     NsxHandle *noise_sup_x;
18     VadInst   *Vad_handle;
19     void *gain_control, *echo_cancellation;
20     uint32_t fs;
21 
22     WebRtcSpl_State48khzTo16khz state_in, state_in_echo;
23     WebRtcSpl_State16khzTo48khz state_out;
24     int32_t tmp_mem[496];
25 
26     int16_t msInSndCardBuf;
27 
28     FilterStateZam hpfa;
29     FilterStateZam hpfb;
30     FilterStateZam lpfa;
31     FilterStateZam lpfb;
32 
33     SpeexResamplerState *downsampler;
34     SpeexResamplerState *downsampler_echo;
35     SpeexResamplerState *upsampler;
36 
37     int32_t split_filter_state_1[6];
38     int32_t split_filter_state_2[6];
39     int32_t split_filter_state_3[6];
40     int32_t split_filter_state_4[6];
41 
42     int echo_enabled;
43     int gain_enabled;
44     int noise_enabled;
45     int vad_enabled;
46     int lowpass_enabled;
47 } Filter_Audio;
48 
49 #define _FILTER_AUDIO
50 #include "filter_audio.h"
51 
52 
53 
kill_filter_audio(Filter_Audio * f_a)54 void kill_filter_audio(Filter_Audio *f_a)
55 {
56     if (!f_a) {
57         return;
58     }
59 
60     WebRtcNsx_Free(f_a->noise_sup_x);
61     WebRtcAgc_Free(f_a->gain_control);
62     WebRtcAec_Free(f_a->echo_cancellation);
63     WebRtcVad_Free(f_a->Vad_handle);
64     speex_resampler_destroy(f_a->upsampler);
65     speex_resampler_destroy(f_a->downsampler);
66     speex_resampler_destroy(f_a->downsampler_echo);
67     free(f_a);
68 }
69 
new_filter_audio(uint32_t fs)70 Filter_Audio *new_filter_audio(uint32_t fs)
71 {
72     if (fs == 0) {
73         return NULL;
74     }
75 
76     Filter_Audio *f_a = calloc(sizeof(Filter_Audio), 1);
77 
78     if (!f_a) {
79         return NULL;
80     }
81 
82     f_a->fs = fs;
83 
84     if (fs != 16000)
85         fs = 32000;
86 
87     init_highpass_filter_zam(&f_a->hpfa, 100, (float) f_a->fs);
88     init_highpass_filter_zam(&f_a->hpfb, 100, (float) f_a->fs);
89 
90     unsigned int lowpass_filter_frequency = 12000;
91     if (f_a->fs > (lowpass_filter_frequency * 2)) {
92         init_lowpass_filter_zam(&f_a->lpfa, lowpass_filter_frequency, (float) f_a->fs);
93         init_lowpass_filter_zam(&f_a->lpfb, lowpass_filter_frequency, (float) f_a->fs);
94         f_a->lowpass_enabled = 1;
95     }
96 
97     if (WebRtcAgc_Create(&f_a->gain_control) == -1) {
98         free(f_a);
99         return NULL;
100     }
101 
102     if (WebRtcNsx_Create(&f_a->noise_sup_x) == -1) {
103         WebRtcAgc_Free(f_a->gain_control);
104         free(f_a);
105         return NULL;
106     }
107 
108     if (WebRtcAec_Create(&f_a->echo_cancellation) == -1) {
109         WebRtcAgc_Free(f_a->gain_control);
110         WebRtcNsx_Free(f_a->noise_sup_x);
111         free(f_a);
112         return NULL;
113     }
114 
115     if (WebRtcVad_Create(&f_a->Vad_handle) == -1){
116         WebRtcAec_Free(f_a->echo_cancellation);
117         WebRtcAgc_Free(f_a->gain_control);
118         WebRtcNsx_Free(f_a->noise_sup_x);
119         free(f_a);
120         return NULL;
121     }
122 
123     WebRtcAec_enable_delay_correction(WebRtcAec_aec_core(f_a->echo_cancellation), kAecTrue);
124     WebRtcAec_enable_reported_delay(WebRtcAec_aec_core(f_a->echo_cancellation), kAecTrue);
125 
126     WebRtcAgc_config_t gain_config;
127 
128     gain_config.targetLevelDbfs = 1;
129     gain_config.compressionGaindB = 20;
130     gain_config.limiterEnable = kAgcTrue;
131 
132     if (WebRtcAgc_Init(f_a->gain_control, 0, 255, kAgcModeAdaptiveDigital, fs) == -1 || WebRtcAgc_set_config(f_a->gain_control, gain_config) == -1) {
133         kill_filter_audio(f_a);
134         return NULL;
135     }
136 
137 
138     if (WebRtcNsx_Init(f_a->noise_sup_x, fs) == -1 || WebRtcNsx_set_policy(f_a->noise_sup_x, 2) == -1) {
139         kill_filter_audio(f_a);
140         return NULL;
141     }
142 
143     AecConfig echo_config;
144 
145     echo_config.nlpMode = kAecNlpAggressive;
146     echo_config.skewMode = kAecFalse;
147     echo_config.metricsMode = kAecFalse;
148     echo_config.delay_logging = kAecFalse;
149 
150     if (WebRtcAec_Init(f_a->echo_cancellation, fs, f_a->fs) == -1 || WebRtcAec_set_config(f_a->echo_cancellation, echo_config) == -1) {
151         kill_filter_audio(f_a);
152         return NULL;
153     }
154 
155     int vad_mode = 1;  //Aggressiveness mode (0, 1, 2, or 3).
156     if (WebRtcVad_Init(f_a->Vad_handle) == -1 || WebRtcVad_set_mode(f_a->Vad_handle,vad_mode) == -1){
157         kill_filter_audio(f_a);
158         return NULL;
159     }
160 
161     f_a->echo_enabled = 1;
162     f_a->gain_enabled = 1;
163     f_a->noise_enabled = 1;
164     f_a->vad_enabled = 1;
165 
166     int quality = 4;
167     if (f_a->fs != 16000) {
168         f_a->downsampler = speex_resampler_init(1, f_a->fs, 32000, quality, 0);
169         f_a->upsampler = speex_resampler_init(1, 32000, f_a->fs, quality, 0);
170 
171          /* quality doesn't need to be high for this one. */
172         quality = 0;
173         f_a->downsampler_echo = speex_resampler_init(1, f_a->fs, 16000, quality, 0);
174 
175         if (!f_a->upsampler || !f_a->downsampler || !f_a->downsampler_echo) {
176             kill_filter_audio(f_a);
177             return NULL;
178         }
179     }
180 
181 
182     return f_a;
183 }
184 
enable_disable_filters(Filter_Audio * f_a,int echo,int noise,int gain,int vad)185 int enable_disable_filters(Filter_Audio *f_a, int echo, int noise, int gain, int vad)
186 {
187     if (!f_a) {
188         return -1;
189     }
190 
191     f_a->echo_enabled = echo;
192     f_a->gain_enabled = gain;
193     f_a->noise_enabled = noise;
194     f_a->vad_enabled = vad;
195     return 0;
196 }
197 
downsample_audio_echo_in(Filter_Audio * f_a,int16_t * out,const int16_t * in)198 static void downsample_audio_echo_in(Filter_Audio *f_a, int16_t *out, const int16_t *in)
199 {
200     uint32_t inlen = f_a->fs / 100;
201     uint32_t outlen = inlen;
202     speex_resampler_process_int(f_a->downsampler_echo, 0, in, &inlen, out, &outlen);
203 }
204 
205 
downsample_audio(Filter_Audio * f_a,int16_t * out_l,int16_t * out_h,const int16_t * in,uint32_t in_length)206 static void downsample_audio(Filter_Audio *f_a, int16_t *out_l, int16_t *out_h, const int16_t *in, uint32_t in_length)
207 {
208     int16_t temp[320];
209     uint32_t out_len = 320;
210     if (f_a->fs != 32000) {
211         speex_resampler_process_int(f_a->downsampler, 0, in, &in_length, temp, &out_len);
212         WebRtcSpl_AnalysisQMF(temp, out_len, out_l, out_h,
213                               f_a->split_filter_state_1, f_a->split_filter_state_2);
214     } else {
215         WebRtcSpl_AnalysisQMF(in, out_len, out_l, out_h,
216                               f_a->split_filter_state_1, f_a->split_filter_state_2);
217     }
218 }
219 
upsample_audio(Filter_Audio * f_a,int16_t * out,uint32_t out_len,const int16_t * in_l,const int16_t * in_h,uint32_t in_length)220 static void upsample_audio(Filter_Audio *f_a, int16_t *out, uint32_t out_len, const int16_t *in_l, const int16_t *in_h, uint32_t in_length)
221 {
222     int16_t temp[320];
223     if (f_a->fs != 32000) {
224         WebRtcSpl_SynthesisQMF(in_l, in_h, in_length, temp,
225                                f_a->split_filter_state_3, f_a->split_filter_state_4);
226         in_length *= 2;
227         speex_resampler_process_int(f_a->upsampler, 0, temp, &in_length, out, &out_len);
228     } else {
229         WebRtcSpl_SynthesisQMF(in_l, in_h, in_length, out,
230                                f_a->split_filter_state_3, f_a->split_filter_state_4);
231     }
232 }
233 
234 
pass_audio_output(Filter_Audio * f_a,const int16_t * data,unsigned int samples)235 int pass_audio_output(Filter_Audio *f_a, const int16_t *data, unsigned int samples)
236 {
237     if (!f_a || (!f_a->echo_enabled && !f_a->gain_enabled)) {
238         return -1;
239     }
240 
241     unsigned int nsx_samples = f_a->fs / 100;
242     if (!samples || (samples % nsx_samples) != 0) {
243         return -1;
244     }
245 
246     _Bool resample = 0;
247     unsigned int resampled_samples = 0;
248     if (f_a->fs != 16000) {
249         samples = (samples / nsx_samples) * 160;
250         nsx_samples = 160;
251         resample = 1;
252     }
253 
254     unsigned int temp_samples = samples;
255 
256     while (temp_samples) {
257         float d_f[nsx_samples];
258 
259         if (resample) {
260             int16_t d[nsx_samples];
261             downsample_audio_echo_in(f_a, d, data + resampled_samples);
262 
263             if (WebRtcAgc_AddFarend(f_a->gain_control, d, nsx_samples) == -1)
264                 return -1;
265 
266             S16ToFloatS16(d, nsx_samples, d_f);
267             resampled_samples += f_a->fs / 100;
268         } else {
269             S16ToFloatS16(data + (samples - temp_samples), nsx_samples, d_f);
270         }
271 
272         if (WebRtcAec_BufferFarend(f_a->echo_cancellation, d_f, nsx_samples) == -1) {
273             return -1;
274         }
275 
276         temp_samples -= nsx_samples;
277     }
278 
279     return 0;
280 }
281 
282 /* Tell the echo canceller how much time in ms it takes for audio to be played and recorded back after. */
set_echo_delay_ms(Filter_Audio * f_a,int16_t msInSndCardBuf)283 int set_echo_delay_ms(Filter_Audio *f_a, int16_t msInSndCardBuf)
284 {
285     if (!f_a) {
286         return -1;
287     }
288 
289     f_a->msInSndCardBuf = msInSndCardBuf;
290 
291     return 0;
292 }
293 
filter_audio(Filter_Audio * f_a,int16_t * data,unsigned int samples)294 int filter_audio(Filter_Audio *f_a, int16_t *data, unsigned int samples)
295 {
296     if (!f_a) {
297         return -1;
298     }
299 
300     unsigned int nsx_samples = f_a->fs / 100;
301     if (!samples || (samples % nsx_samples) != 0) {
302         return -1;
303     }
304 
305     _Bool resample = 0;
306     unsigned int resampled_samples = 0;
307     if (f_a->fs != 16000) {
308         samples = (samples / nsx_samples) * 160;
309         nsx_samples = 160;
310         resample = 1;
311     }
312 
313     unsigned int temp_samples = samples;
314     unsigned int smp = f_a->fs / 100;
315     int novoice = 1;
316 
317     while (temp_samples) {
318         int16_t d_l[nsx_samples];
319         int16_t *d_h = NULL;
320         int16_t temp[nsx_samples];
321         memset(temp, 0, nsx_samples*sizeof(float));
322         if (resample) {
323             d_h = temp;
324             downsample_audio(f_a, d_l, d_h, data + resampled_samples, smp);
325         } else {
326             memcpy(d_l, data + (samples - temp_samples), sizeof(d_l));
327         }
328 
329         if(f_a->vad_enabled){
330             if(WebRtcVad_Process(f_a->Vad_handle, 16000, d_l, nsx_samples) == 1){
331                 novoice = 0;
332             }
333         } else {
334             novoice = 0;
335         }
336 
337         if (f_a->gain_enabled) {
338             int32_t inMicLevel = 128, outMicLevel;
339 
340             if (WebRtcAgc_VirtualMic(f_a->gain_control, d_l, d_h, nsx_samples, inMicLevel, &outMicLevel) == -1)
341                 return -1;
342         }
343 
344         float d_f_l[nsx_samples];
345         S16ToFloatS16(d_l, nsx_samples, d_f_l);
346 
347         float d_f_h[nsx_samples];
348         memset(d_f_h, 0, nsx_samples*sizeof(float));
349 
350 	if (resample) {
351             S16ToFloatS16(d_h, nsx_samples, d_f_h);
352         }
353 
354         if (f_a->echo_enabled) {
355             if (WebRtcAec_Process(f_a->echo_cancellation, d_f_l, d_f_h, d_f_l, d_f_h, nsx_samples, f_a->msInSndCardBuf, 0) == -1) {
356                 return -1;
357             }
358 
359             if (resample) {
360                 FloatS16ToS16(d_f_h, nsx_samples, d_h);
361             }
362             FloatS16ToS16(d_f_l, nsx_samples, d_l);
363         }
364 
365         if (f_a->noise_enabled) {
366             if (WebRtcNsx_Process(f_a->noise_sup_x, d_l, d_h, d_l, d_h) == -1) {
367                 return -1;
368             }
369         }
370 
371         if (f_a->gain_enabled) {
372             int32_t inMicLevel = 128, outMicLevel;
373             uint8_t saturationWarning;
374 
375             if (WebRtcAgc_Process(f_a->gain_control, d_l, d_h, nsx_samples, d_l, d_h, inMicLevel, &outMicLevel, 0, &saturationWarning) == -1) {
376                 return -1;
377             }
378         }
379 
380         if (resample) {
381             float d_f_u[smp];
382             upsample_audio(f_a, data + resampled_samples, smp, d_l, d_h, nsx_samples);
383             S16ToFloat(data + resampled_samples, smp, d_f_u);
384             run_filter_zam(&f_a->hpfa, d_f_u, smp);
385             run_filter_zam(&f_a->hpfb, d_f_u, smp);
386 
387             if (f_a->lowpass_enabled) {
388                 run_filter_zam(&f_a->lpfa, d_f_u, smp);
389                 run_filter_zam(&f_a->lpfb, d_f_u, smp);
390             }
391 
392             run_saturator_zam(d_f_u, smp);
393             FloatToS16(d_f_u, smp, data + resampled_samples);
394             resampled_samples += smp;
395         } else {
396             S16ToFloat(d_l, nsx_samples, d_f_l);
397 
398             run_filter_zam(&f_a->hpfa, d_f_l, nsx_samples);
399             run_filter_zam(&f_a->hpfb, d_f_l, nsx_samples);
400 
401             if (f_a->lowpass_enabled) {
402                 run_filter_zam(&f_a->lpfa, d_f_l, nsx_samples);
403                 run_filter_zam(&f_a->lpfb, d_f_l, nsx_samples);
404             }
405 
406             run_saturator_zam(d_f_l, nsx_samples);
407 
408             FloatToS16(d_f_l, nsx_samples, d_l);
409             memcpy(data + (samples - temp_samples), d_l, sizeof(d_l));
410         }
411 
412         temp_samples -= nsx_samples;
413 
414 
415     }
416 
417     return !novoice;
418 }
419