1 /* Copyright (c) 2017 Mozilla */
2 /*
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6
7 - Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9
10 - Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
18 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30
31 #include <stdlib.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include "kiss_fft.h"
35 #include "common.h"
36 #include <math.h>
37 #include "rnnoise.h"
38 #include "pitch.h"
39 #include "arch.h"
40 #include "rnn.h"
41 #include "rnn_data.h"
42
43 #define FRAME_SIZE_SHIFT 2
44 #define FRAME_SIZE (120<<FRAME_SIZE_SHIFT)
45 #define WINDOW_SIZE (2*FRAME_SIZE)
46 #define FREQ_SIZE (FRAME_SIZE + 1)
47
48 #define PITCH_MIN_PERIOD 60
49 #define PITCH_MAX_PERIOD 768
50 #define PITCH_FRAME_SIZE 960
51 #define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
52
53 #define SQUARE(x) ((x)*(x))
54
55 #define SMOOTH_BANDS 1
56
57 #if SMOOTH_BANDS
58 #define NB_BANDS 22
59 #else
60 #define NB_BANDS 21
61 #endif
62
63 #define CEPS_MEM 8
64 #define NB_DELTA_CEPS 6
65
66 #define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2)
67
68
69 #ifndef TRAINING
70 #define TRAINING 0
71 #endif
72
73 static const opus_int16 eband5ms[] = {
74 /*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 20k*/
75 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
76 };
77
78
79 typedef struct {
80 int init;
81 kiss_fft_state *kfft;
82 float half_window[FRAME_SIZE];
83 float dct_table[NB_BANDS*NB_BANDS];
84 } CommonState;
85
86 struct DenoiseState {
87 float analysis_mem[FRAME_SIZE];
88 float cepstral_mem[CEPS_MEM][NB_BANDS];
89 int memid;
90 float synthesis_mem[FRAME_SIZE];
91 float pitch_buf[PITCH_BUF_SIZE];
92 float pitch_enh_buf[PITCH_BUF_SIZE];
93 float last_gain;
94 int last_period;
95 float mem_hp_x[2];
96 float lastg[NB_BANDS];
97 RNNState rnn;
98 };
99
100 #if SMOOTH_BANDS
compute_band_energy(float * bandE,const kiss_fft_cpx * X)101 void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
102 int i;
103 float sum[NB_BANDS] = {0};
104 for (i=0;i<NB_BANDS-1;i++)
105 {
106 int j;
107 int band_size;
108 band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
109 for (j=0;j<band_size;j++) {
110 float tmp;
111 float frac = (float)j/band_size;
112 tmp = SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
113 tmp += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
114 sum[i] += (1-frac)*tmp;
115 sum[i+1] += frac*tmp;
116 }
117 }
118 sum[0] *= 2;
119 sum[NB_BANDS-1] *= 2;
120 for (i=0;i<NB_BANDS;i++)
121 {
122 bandE[i] = sum[i];
123 }
124 }
125
compute_band_corr(float * bandE,const kiss_fft_cpx * X,const kiss_fft_cpx * P)126 void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
127 int i;
128 float sum[NB_BANDS] = {0};
129 for (i=0;i<NB_BANDS-1;i++)
130 {
131 int j;
132 int band_size;
133 band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
134 for (j=0;j<band_size;j++) {
135 float tmp;
136 float frac = (float)j/band_size;
137 tmp = X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r;
138 tmp += X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i;
139 sum[i] += (1-frac)*tmp;
140 sum[i+1] += frac*tmp;
141 }
142 }
143 sum[0] *= 2;
144 sum[NB_BANDS-1] *= 2;
145 for (i=0;i<NB_BANDS;i++)
146 {
147 bandE[i] = sum[i];
148 }
149 }
150
interp_band_gain(float * g,const float * bandE)151 void interp_band_gain(float *g, const float *bandE) {
152 int i;
153 memset(g, 0, FREQ_SIZE);
154 for (i=0;i<NB_BANDS-1;i++)
155 {
156 int j;
157 int band_size;
158 band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
159 for (j=0;j<band_size;j++) {
160 float frac = (float)j/band_size;
161 g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
162 }
163 }
164 }
165 #else
compute_band_energy(float * bandE,const kiss_fft_cpx * X)166 void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
167 int i;
168 for (i=0;i<NB_BANDS;i++)
169 {
170 int j;
171 opus_val32 sum = 0;
172 for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) {
173 sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
174 sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
175 }
176 bandE[i] = sum;
177 }
178 }
179
interp_band_gain(float * g,const float * bandE)180 void interp_band_gain(float *g, const float *bandE) {
181 int i;
182 memset(g, 0, FREQ_SIZE);
183 for (i=0;i<NB_BANDS;i++)
184 {
185 int j;
186 for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++)
187 g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = bandE[i];
188 }
189 }
190 #endif
191
192
193 CommonState common;
194
check_init()195 static void check_init() {
196 int i;
197 if (common.init) return;
198 common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
199 for (i=0;i<FRAME_SIZE;i++)
200 common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
201 for (i=0;i<NB_BANDS;i++) {
202 int j;
203 for (j=0;j<NB_BANDS;j++) {
204 common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
205 if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5);
206 }
207 }
208 common.init = 1;
209 }
210
dct(float * out,const float * in)211 static void dct(float *out, const float *in) {
212 int i;
213 check_init();
214 for (i=0;i<NB_BANDS;i++) {
215 int j;
216 float sum = 0;
217 for (j=0;j<NB_BANDS;j++) {
218 sum += in[j] * common.dct_table[j*NB_BANDS + i];
219 }
220 out[i] = sum*sqrt(2./22);
221 }
222 }
223
224 #if 0
225 static void idct(float *out, const float *in) {
226 int i;
227 check_init();
228 for (i=0;i<NB_BANDS;i++) {
229 int j;
230 float sum = 0;
231 for (j=0;j<NB_BANDS;j++) {
232 sum += in[j] * common.dct_table[i*NB_BANDS + j];
233 }
234 out[i] = sum*sqrt(2./22);
235 }
236 }
237 #endif
238
forward_transform(kiss_fft_cpx * out,const float * in)239 static void forward_transform(kiss_fft_cpx *out, const float *in) {
240 int i;
241 kiss_fft_cpx x[WINDOW_SIZE];
242 kiss_fft_cpx y[WINDOW_SIZE];
243 check_init();
244 for (i=0;i<WINDOW_SIZE;i++) {
245 x[i].r = in[i];
246 x[i].i = 0;
247 }
248 opus_fft(common.kfft, x, y, 0);
249 for (i=0;i<FREQ_SIZE;i++) {
250 out[i] = y[i];
251 }
252 }
253
inverse_transform(float * out,const kiss_fft_cpx * in)254 static void inverse_transform(float *out, const kiss_fft_cpx *in) {
255 int i;
256 kiss_fft_cpx x[WINDOW_SIZE];
257 kiss_fft_cpx y[WINDOW_SIZE];
258 check_init();
259 for (i=0;i<FREQ_SIZE;i++) {
260 x[i] = in[i];
261 }
262 for (;i<WINDOW_SIZE;i++) {
263 x[i].r = x[WINDOW_SIZE - i].r;
264 x[i].i = -x[WINDOW_SIZE - i].i;
265 }
266 opus_fft(common.kfft, x, y, 0);
267 /* output in reverse order for IFFT. */
268 out[0] = WINDOW_SIZE*y[0].r;
269 for (i=1;i<WINDOW_SIZE;i++) {
270 out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
271 }
272 }
273
apply_window(float * x)274 static void apply_window(float *x) {
275 int i;
276 check_init();
277 for (i=0;i<FRAME_SIZE;i++) {
278 x[i] *= common.half_window[i];
279 x[WINDOW_SIZE - 1 - i] *= common.half_window[i];
280 }
281 }
282
rnnoise_get_size()283 int rnnoise_get_size() {
284 return sizeof(DenoiseState);
285 }
286
rnnoise_init(DenoiseState * st)287 int rnnoise_init(DenoiseState *st) {
288 memset(st, 0, sizeof(*st));
289 return 0;
290 }
291
rnnoise_create()292 DenoiseState *rnnoise_create() {
293 DenoiseState *st;
294 st = malloc(rnnoise_get_size());
295 rnnoise_init(st);
296 return st;
297 }
298
rnnoise_destroy(DenoiseState * st)299 void rnnoise_destroy(DenoiseState *st) {
300 free(st);
301 }
302
303 #if TRAINING
304 int lowpass = FREQ_SIZE;
305 int band_lp = NB_BANDS;
306 #endif
307
frame_analysis(DenoiseState * st,kiss_fft_cpx * X,float * Ex,const float * in)308 static void frame_analysis(DenoiseState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
309 int i;
310 float x[WINDOW_SIZE];
311 RNN_COPY(x, st->analysis_mem, FRAME_SIZE);
312 for (i=0;i<FRAME_SIZE;i++) x[FRAME_SIZE + i] = in[i];
313 RNN_COPY(st->analysis_mem, in, FRAME_SIZE);
314 apply_window(x);
315 forward_transform(X, x);
316 #if TRAINING
317 for (i=lowpass;i<FREQ_SIZE;i++)
318 X[i].r = X[i].i = 0;
319 #endif
320 compute_band_energy(Ex, X);
321 }
322
compute_frame_features(DenoiseState * st,kiss_fft_cpx * X,kiss_fft_cpx * P,float * Ex,float * Ep,float * Exp,float * features,const float * in)323 static int compute_frame_features(DenoiseState *st, kiss_fft_cpx *X, kiss_fft_cpx *P,
324 float *Ex, float *Ep, float *Exp, float *features, const float *in) {
325 int i;
326 float E = 0;
327 float *ceps_0, *ceps_1, *ceps_2;
328 float spec_variability = 0;
329 float Ly[NB_BANDS];
330 float p[WINDOW_SIZE];
331 float pitch_buf[PITCH_BUF_SIZE>>1];
332 int pitch_index;
333 float gain;
334 float *(pre[1]);
335 float tmp[NB_BANDS];
336 float follow, logMax;
337 frame_analysis(st, X, Ex, in);
338 RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE);
339 RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE);
340 pre[0] = &st->pitch_buf[0];
341 pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1);
342 pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE,
343 PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index);
344 pitch_index = PITCH_MAX_PERIOD-pitch_index;
345
346 gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
347 PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain);
348 st->last_period = pitch_index;
349 st->last_gain = gain;
350 for (i=0;i<WINDOW_SIZE;i++)
351 p[i] = st->pitch_buf[PITCH_BUF_SIZE-WINDOW_SIZE-pitch_index+i];
352 apply_window(p);
353 forward_transform(P, p);
354 compute_band_energy(Ep, P);
355 compute_band_corr(Exp, X, P);
356 for (i=0;i<NB_BANDS;i++) Exp[i] = Exp[i]/sqrt(.001+Ex[i]*Ep[i]);
357 dct(tmp, Exp);
358 for (i=0;i<NB_DELTA_CEPS;i++) features[NB_BANDS+2*NB_DELTA_CEPS+i] = tmp[i];
359 features[NB_BANDS+2*NB_DELTA_CEPS] -= 1.3;
360 features[NB_BANDS+2*NB_DELTA_CEPS+1] -= 0.9;
361 features[NB_BANDS+3*NB_DELTA_CEPS] = .01*(pitch_index-300);
362 logMax = -2;
363 follow = -2;
364 for (i=0;i<NB_BANDS;i++) {
365 Ly[i] = log10(1e-2+Ex[i]);
366 Ly[i] = MAX16(logMax-7, MAX16(follow-1.5, Ly[i]));
367 logMax = MAX16(logMax, Ly[i]);
368 follow = MAX16(follow-1.5, Ly[i]);
369 E += Ex[i];
370 }
371 if (!TRAINING && E < 0.04) {
372 /* If there's no audio, avoid messing up the state. */
373 RNN_CLEAR(features, NB_FEATURES);
374 return 1;
375 }
376 dct(features, Ly);
377 features[0] -= 12;
378 features[1] -= 4;
379 ceps_0 = st->cepstral_mem[st->memid];
380 ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM+st->memid-1] : st->cepstral_mem[st->memid-1];
381 ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM+st->memid-2] : st->cepstral_mem[st->memid-2];
382 for (i=0;i<NB_BANDS;i++) ceps_0[i] = features[i];
383 st->memid++;
384 for (i=0;i<NB_DELTA_CEPS;i++) {
385 features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i];
386 features[NB_BANDS+i] = ceps_0[i] - ceps_2[i];
387 features[NB_BANDS+NB_DELTA_CEPS+i] = ceps_0[i] - 2*ceps_1[i] + ceps_2[i];
388 }
389 /* Spectral variability features. */
390 if (st->memid == CEPS_MEM) st->memid = 0;
391 for (i=0;i<CEPS_MEM;i++)
392 {
393 int j;
394 float mindist = 1e15f;
395 for (j=0;j<CEPS_MEM;j++)
396 {
397 int k;
398 float dist=0;
399 for (k=0;k<NB_BANDS;k++)
400 {
401 float tmp;
402 tmp = st->cepstral_mem[i][k] - st->cepstral_mem[j][k];
403 dist += tmp*tmp;
404 }
405 if (j!=i)
406 mindist = MIN32(mindist, dist);
407 }
408 spec_variability += mindist;
409 }
410 features[NB_BANDS+3*NB_DELTA_CEPS+1] = spec_variability/CEPS_MEM-2.1;
411 return TRAINING && E < 0.1;
412 }
413
frame_synthesis(DenoiseState * st,float * out,const kiss_fft_cpx * y)414 static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y) {
415 float x[WINDOW_SIZE];
416 int i;
417 inverse_transform(x, y);
418 apply_window(x);
419 for (i=0;i<FRAME_SIZE;i++) out[i] = x[i] + st->synthesis_mem[i];
420 RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE);
421 }
422
biquad(float * y,float mem[2],const float * x,const float * b,const float * a,int N)423 static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
424 int i;
425 for (i=0;i<N;i++) {
426 float xi, yi;
427 xi = x[i];
428 yi = x[i] + mem[0];
429 mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
430 mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
431 y[i] = yi;
432 }
433 }
434
pitch_filter(kiss_fft_cpx * X,const kiss_fft_cpx * P,const float * Ex,const float * Ep,const float * Exp,const float * g)435 void pitch_filter(kiss_fft_cpx *X, const kiss_fft_cpx *P, const float *Ex, const float *Ep,
436 const float *Exp, const float *g) {
437 int i;
438 float r[NB_BANDS];
439 float rf[FREQ_SIZE] = {0};
440 for (i=0;i<NB_BANDS;i++) {
441 #if 0
442 if (Exp[i]>g[i]) r[i] = 1;
443 else r[i] = Exp[i]*(1-g[i])/(.001 + g[i]*(1-Exp[i]));
444 r[i] = MIN16(1, MAX16(0, r[i]));
445 #else
446 if (Exp[i]>g[i]) r[i] = 1;
447 else r[i] = SQUARE(Exp[i])*(1-SQUARE(g[i]))/(.001 + SQUARE(g[i])*(1-SQUARE(Exp[i])));
448 r[i] = sqrt(MIN16(1, MAX16(0, r[i])));
449 #endif
450 r[i] *= sqrt(Ex[i]/(1e-8+Ep[i]));
451 }
452 interp_band_gain(rf, r);
453 for (i=0;i<FREQ_SIZE;i++) {
454 X[i].r += rf[i]*P[i].r;
455 X[i].i += rf[i]*P[i].i;
456 }
457 float newE[NB_BANDS];
458 compute_band_energy(newE, X);
459 float norm[NB_BANDS];
460 float normf[FREQ_SIZE]={0};
461 for (i=0;i<NB_BANDS;i++) {
462 norm[i] = sqrt(Ex[i]/(1e-8+newE[i]));
463 }
464 interp_band_gain(normf, norm);
465 for (i=0;i<FREQ_SIZE;i++) {
466 X[i].r *= normf[i];
467 X[i].i *= normf[i];
468 }
469 }
470
rnnoise_process_frame(DenoiseState * st,float * out,const float * in)471 float rnnoise_process_frame(DenoiseState *st, float *out, const float *in) {
472 int i;
473 kiss_fft_cpx X[FREQ_SIZE];
474 kiss_fft_cpx P[WINDOW_SIZE];
475 float x[FRAME_SIZE];
476 float Ex[NB_BANDS], Ep[NB_BANDS];
477 float Exp[NB_BANDS];
478 float features[NB_FEATURES];
479 float g[NB_BANDS];
480 float gf[FREQ_SIZE]={1};
481 float vad_prob = 0;
482 int silence;
483 static const float a_hp[2] = {-1.99599, 0.99600};
484 static const float b_hp[2] = {-2, 1};
485 biquad(x, st->mem_hp_x, in, b_hp, a_hp, FRAME_SIZE);
486 silence = compute_frame_features(st, X, P, Ex, Ep, Exp, features, x);
487
488 if (!silence) {
489 compute_rnn(&st->rnn, g, &vad_prob, features);
490 pitch_filter(X, P, Ex, Ep, Exp, g);
491 for (i=0;i<NB_BANDS;i++) {
492 float alpha = .6f;
493 g[i] = MAX16(g[i], alpha*st->lastg[i]);
494 st->lastg[i] = g[i];
495 }
496 interp_band_gain(gf, g);
497 #if 1
498 for (i=0;i<FREQ_SIZE;i++) {
499 X[i].r *= gf[i];
500 X[i].i *= gf[i];
501 }
502 #endif
503 }
504
505 frame_synthesis(st, out, X);
506 return vad_prob;
507 }
508
509 #if TRAINING
510
uni_rand()511 static float uni_rand() {
512 return rand()/(double)RAND_MAX-.5;
513 }
514
rand_resp(float * a,float * b)515 static void rand_resp(float *a, float *b) {
516 a[0] = .75*uni_rand();
517 a[1] = .75*uni_rand();
518 b[0] = .75*uni_rand();
519 b[1] = .75*uni_rand();
520 }
521
main(int argc,char ** argv)522 int main(int argc, char **argv) {
523 int i;
524 int count=0;
525 static const float a_hp[2] = {-1.99599, 0.99600};
526 static const float b_hp[2] = {-2, 1};
527 float a_noise[2] = {0};
528 float b_noise[2] = {0};
529 float a_sig[2] = {0};
530 float b_sig[2] = {0};
531 float mem_hp_x[2]={0};
532 float mem_hp_n[2]={0};
533 float mem_resp_x[2]={0};
534 float mem_resp_n[2]={0};
535 float x[FRAME_SIZE];
536 float n[FRAME_SIZE];
537 float xn[FRAME_SIZE];
538 int vad_cnt=0;
539 int gain_change_count=0;
540 float speech_gain = 1, noise_gain = 1;
541 FILE *f1, *f2, *fout;
542 DenoiseState *st;
543 DenoiseState *noise_state;
544 DenoiseState *noisy;
545 st = rnnoise_create();
546 noise_state = rnnoise_create();
547 noisy = rnnoise_create();
548 if (argc!=4) {
549 fprintf(stderr, "usage: %s <speech> <noise> <output denoised>\n", argv[0]);
550 return 1;
551 }
552 f1 = fopen(argv[1], "r");
553 f2 = fopen(argv[2], "r");
554 fout = fopen(argv[3], "w");
555 for(i=0;i<150;i++) {
556 short tmp[FRAME_SIZE];
557 fread(tmp, sizeof(short), FRAME_SIZE, f2);
558 }
559 while (1) {
560 kiss_fft_cpx X[FREQ_SIZE], Y[FREQ_SIZE], N[FREQ_SIZE], P[WINDOW_SIZE];
561 float Ex[NB_BANDS], Ey[NB_BANDS], En[NB_BANDS], Ep[NB_BANDS];
562 float Exp[NB_BANDS];
563 float Ln[NB_BANDS];
564 float features[NB_FEATURES];
565 float g[NB_BANDS];
566 float gf[FREQ_SIZE]={1};
567 short tmp[FRAME_SIZE];
568 float vad=0;
569 float vad_prob;
570 float E=0;
571 if (count==50000000) break;
572 if (++gain_change_count > 2821) {
573 speech_gain = pow(10., (-40+(rand()%60))/20.);
574 noise_gain = pow(10., (-30+(rand()%50))/20.);
575 if (rand()%10==0) noise_gain = 0;
576 noise_gain *= speech_gain;
577 if (rand()%10==0) speech_gain = 0;
578 gain_change_count = 0;
579 rand_resp(a_noise, b_noise);
580 rand_resp(a_sig, b_sig);
581 lowpass = FREQ_SIZE * 3000./24000. * pow(50., rand()/(double)RAND_MAX);
582 for (i=0;i<NB_BANDS;i++) {
583 if (eband5ms[i]<<FRAME_SIZE_SHIFT > lowpass) {
584 band_lp = i;
585 break;
586 }
587 }
588 }
589 if (speech_gain != 0) {
590 fread(tmp, sizeof(short), FRAME_SIZE, f1);
591 if (feof(f1)) {
592 rewind(f1);
593 fread(tmp, sizeof(short), FRAME_SIZE, f1);
594 }
595 for (i=0;i<FRAME_SIZE;i++) x[i] = speech_gain*tmp[i];
596 for (i=0;i<FRAME_SIZE;i++) E += tmp[i]*(float)tmp[i];
597 } else {
598 for (i=0;i<FRAME_SIZE;i++) x[i] = 0;
599 E = 0;
600 }
601 if (noise_gain!=0) {
602 fread(tmp, sizeof(short), FRAME_SIZE, f2);
603 if (feof(f2)) {
604 rewind(f2);
605 fread(tmp, sizeof(short), FRAME_SIZE, f2);
606 }
607 for (i=0;i<FRAME_SIZE;i++) n[i] = noise_gain*tmp[i];
608 } else {
609 for (i=0;i<FRAME_SIZE;i++) n[i] = 0;
610 }
611 biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
612 biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
613 biquad(n, mem_hp_n, n, b_hp, a_hp, FRAME_SIZE);
614 biquad(n, mem_resp_n, n, b_noise, a_noise, FRAME_SIZE);
615 for (i=0;i<FRAME_SIZE;i++) xn[i] = x[i] + n[i];
616 if (E > 1e9f) {
617 vad_cnt=0;
618 } else if (E > 1e8f) {
619 vad_cnt -= 5;
620 } else if (E > 1e7f) {
621 vad_cnt++;
622 } else {
623 vad_cnt+=2;
624 }
625 if (vad_cnt < 0) vad_cnt = 0;
626 if (vad_cnt > 15) vad_cnt = 15;
627
628 if (vad_cnt >= 10) vad = 0;
629 else if (vad_cnt > 0) vad = 0.5f;
630 else vad = 1.f;
631
632 frame_analysis(st, Y, Ey, x);
633 frame_analysis(noise_state, N, En, n);
634 for (i=0;i<NB_BANDS;i++) Ln[i] = log10(1e-2+En[i]);
635 int silence = compute_frame_features(noisy, X, P, Ex, Ep, Exp, features, xn);
636 pitch_filter(X, P, Ex, Ep, Exp, g);
637 //printf("%f %d\n", noisy->last_gain, noisy->last_period);
638 for (i=0;i<NB_BANDS;i++) {
639 g[i] = sqrt((Ey[i]+1e-3)/(Ex[i]+1e-3));
640 if (g[i] > 1) g[i] = 1;
641 if (silence || i > band_lp) g[i] = -1;
642 if (Ey[i] < 5e-2 && Ex[i] < 5e-2) g[i] = -1;
643 if (vad==0 && noise_gain==0) g[i] = -1;
644 }
645 count++;
646 #if 0
647 for (i=0;i<NB_FEATURES;i++) printf("%f ", features[i]);
648 for (i=0;i<NB_BANDS;i++) printf("%f ", g[i]);
649 for (i=0;i<NB_BANDS;i++) printf("%f ", Ln[i]);
650 printf("%f\n", vad);
651 #endif
652 #if 1
653 fwrite(features, sizeof(float), NB_FEATURES, stdout);
654 fwrite(g, sizeof(float), NB_BANDS, stdout);
655 fwrite(Ln, sizeof(float), NB_BANDS, stdout);
656 fwrite(&vad, sizeof(float), 1, stdout);
657 #endif
658 #if 0
659 compute_rnn(&noisy->rnn, g, &vad_prob, features);
660 interp_band_gain(gf, g);
661 #if 1
662 for (i=0;i<FREQ_SIZE;i++) {
663 X[i].r *= gf[i];
664 X[i].i *= gf[i];
665 }
666 #endif
667 frame_synthesis(noisy, xn, X);
668
669 for (i=0;i<FRAME_SIZE;i++) tmp[i] = xn[i];
670 fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
671 #endif
672 }
673 fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);
674 fclose(f1);
675 fclose(f2);
676 fclose(fout);
677 return 0;
678 }
679
680 #endif
681