1 /* Copyright (c) 2017 Mozilla */
2 /*
3    Redistribution and use in source and binary forms, with or without
4    modification, are permitted provided that the following conditions
5    are met:
6 
7    - Redistributions of source code must retain the above copyright
8    notice, this list of conditions and the following disclaimer.
9 
10    - Redistributions in binary form must reproduce the above copyright
11    notice, this list of conditions and the following disclaimer in the
12    documentation and/or other materials provided with the distribution.
13 
14    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
18    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30 
31 #include <stdlib.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include "kiss_fft.h"
35 #include "common.h"
36 #include <math.h>
37 #include "rnnoise.h"
38 #include "pitch.h"
39 #include "arch.h"
40 #include "rnn.h"
41 #include "rnn_data.h"
42 
43 #define FRAME_SIZE_SHIFT 2
44 #define FRAME_SIZE (120<<FRAME_SIZE_SHIFT)
45 #define WINDOW_SIZE (2*FRAME_SIZE)
46 #define FREQ_SIZE (FRAME_SIZE + 1)
47 
48 #define PITCH_MIN_PERIOD 60
49 #define PITCH_MAX_PERIOD 768
50 #define PITCH_FRAME_SIZE 960
51 #define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
52 
53 #define SQUARE(x) ((x)*(x))
54 
55 #define SMOOTH_BANDS 1
56 
57 #if SMOOTH_BANDS
58 #define NB_BANDS 22
59 #else
60 #define NB_BANDS 21
61 #endif
62 
63 #define CEPS_MEM 8
64 #define NB_DELTA_CEPS 6
65 
66 #define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2)
67 
68 
69 #ifndef TRAINING
70 #define TRAINING 0
71 #endif
72 
73 static const opus_int16 eband5ms[] = {
74 /*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 20k*/
75   0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
76 };
77 
78 
79 typedef struct {
80   int init;
81   kiss_fft_state *kfft;
82   float half_window[FRAME_SIZE];
83   float dct_table[NB_BANDS*NB_BANDS];
84 } CommonState;
85 
86 struct DenoiseState {
87   float analysis_mem[FRAME_SIZE];
88   float cepstral_mem[CEPS_MEM][NB_BANDS];
89   int memid;
90   float synthesis_mem[FRAME_SIZE];
91   float pitch_buf[PITCH_BUF_SIZE];
92   float pitch_enh_buf[PITCH_BUF_SIZE];
93   float last_gain;
94   int last_period;
95   float mem_hp_x[2];
96   float lastg[NB_BANDS];
97   RNNState rnn;
98 };
99 
100 #if SMOOTH_BANDS
compute_band_energy(float * bandE,const kiss_fft_cpx * X)101 void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
102   int i;
103   float sum[NB_BANDS] = {0};
104   for (i=0;i<NB_BANDS-1;i++)
105   {
106     int j;
107     int band_size;
108     band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
109     for (j=0;j<band_size;j++) {
110       float tmp;
111       float frac = (float)j/band_size;
112       tmp = SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
113       tmp += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
114       sum[i] += (1-frac)*tmp;
115       sum[i+1] += frac*tmp;
116     }
117   }
118   sum[0] *= 2;
119   sum[NB_BANDS-1] *= 2;
120   for (i=0;i<NB_BANDS;i++)
121   {
122     bandE[i] = sum[i];
123   }
124 }
125 
compute_band_corr(float * bandE,const kiss_fft_cpx * X,const kiss_fft_cpx * P)126 void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
127   int i;
128   float sum[NB_BANDS] = {0};
129   for (i=0;i<NB_BANDS-1;i++)
130   {
131     int j;
132     int band_size;
133     band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
134     for (j=0;j<band_size;j++) {
135       float tmp;
136       float frac = (float)j/band_size;
137       tmp = X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r;
138       tmp += X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i;
139       sum[i] += (1-frac)*tmp;
140       sum[i+1] += frac*tmp;
141     }
142   }
143   sum[0] *= 2;
144   sum[NB_BANDS-1] *= 2;
145   for (i=0;i<NB_BANDS;i++)
146   {
147     bandE[i] = sum[i];
148   }
149 }
150 
interp_band_gain(float * g,const float * bandE)151 void interp_band_gain(float *g, const float *bandE) {
152   int i;
153   memset(g, 0, FREQ_SIZE);
154   for (i=0;i<NB_BANDS-1;i++)
155   {
156     int j;
157     int band_size;
158     band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
159     for (j=0;j<band_size;j++) {
160       float frac = (float)j/band_size;
161       g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
162     }
163   }
164 }
165 #else
compute_band_energy(float * bandE,const kiss_fft_cpx * X)166 void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
167   int i;
168   for (i=0;i<NB_BANDS;i++)
169   {
170     int j;
171     opus_val32 sum = 0;
172     for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) {
173       sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
174       sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
175     }
176     bandE[i] = sum;
177   }
178 }
179 
interp_band_gain(float * g,const float * bandE)180 void interp_band_gain(float *g, const float *bandE) {
181   int i;
182   memset(g, 0, FREQ_SIZE);
183   for (i=0;i<NB_BANDS;i++)
184   {
185     int j;
186     for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++)
187       g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = bandE[i];
188   }
189 }
190 #endif
191 
192 
193 CommonState common;
194 
check_init()195 static void check_init() {
196   int i;
197   if (common.init) return;
198   common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
199   for (i=0;i<FRAME_SIZE;i++)
200     common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
201   for (i=0;i<NB_BANDS;i++) {
202     int j;
203     for (j=0;j<NB_BANDS;j++) {
204       common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
205       if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5);
206     }
207   }
208   common.init = 1;
209 }
210 
dct(float * out,const float * in)211 static void dct(float *out, const float *in) {
212   int i;
213   check_init();
214   for (i=0;i<NB_BANDS;i++) {
215     int j;
216     float sum = 0;
217     for (j=0;j<NB_BANDS;j++) {
218       sum += in[j] * common.dct_table[j*NB_BANDS + i];
219     }
220     out[i] = sum*sqrt(2./22);
221   }
222 }
223 
224 #if 0
225 static void idct(float *out, const float *in) {
226   int i;
227   check_init();
228   for (i=0;i<NB_BANDS;i++) {
229     int j;
230     float sum = 0;
231     for (j=0;j<NB_BANDS;j++) {
232       sum += in[j] * common.dct_table[i*NB_BANDS + j];
233     }
234     out[i] = sum*sqrt(2./22);
235   }
236 }
237 #endif
238 
forward_transform(kiss_fft_cpx * out,const float * in)239 static void forward_transform(kiss_fft_cpx *out, const float *in) {
240   int i;
241   kiss_fft_cpx x[WINDOW_SIZE];
242   kiss_fft_cpx y[WINDOW_SIZE];
243   check_init();
244   for (i=0;i<WINDOW_SIZE;i++) {
245     x[i].r = in[i];
246     x[i].i = 0;
247   }
248   opus_fft(common.kfft, x, y, 0);
249   for (i=0;i<FREQ_SIZE;i++) {
250     out[i] = y[i];
251   }
252 }
253 
inverse_transform(float * out,const kiss_fft_cpx * in)254 static void inverse_transform(float *out, const kiss_fft_cpx *in) {
255   int i;
256   kiss_fft_cpx x[WINDOW_SIZE];
257   kiss_fft_cpx y[WINDOW_SIZE];
258   check_init();
259   for (i=0;i<FREQ_SIZE;i++) {
260     x[i] = in[i];
261   }
262   for (;i<WINDOW_SIZE;i++) {
263     x[i].r = x[WINDOW_SIZE - i].r;
264     x[i].i = -x[WINDOW_SIZE - i].i;
265   }
266   opus_fft(common.kfft, x, y, 0);
267   /* output in reverse order for IFFT. */
268   out[0] = WINDOW_SIZE*y[0].r;
269   for (i=1;i<WINDOW_SIZE;i++) {
270     out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
271   }
272 }
273 
apply_window(float * x)274 static void apply_window(float *x) {
275   int i;
276   check_init();
277   for (i=0;i<FRAME_SIZE;i++) {
278     x[i] *= common.half_window[i];
279     x[WINDOW_SIZE - 1 - i] *= common.half_window[i];
280   }
281 }
282 
rnnoise_get_size()283 int rnnoise_get_size() {
284   return sizeof(DenoiseState);
285 }
286 
rnnoise_init(DenoiseState * st)287 int rnnoise_init(DenoiseState *st) {
288   memset(st, 0, sizeof(*st));
289   return 0;
290 }
291 
rnnoise_create()292 DenoiseState *rnnoise_create() {
293   DenoiseState *st;
294   st = malloc(rnnoise_get_size());
295   rnnoise_init(st);
296   return st;
297 }
298 
rnnoise_destroy(DenoiseState * st)299 void rnnoise_destroy(DenoiseState *st) {
300   free(st);
301 }
302 
303 #if TRAINING
304 int lowpass = FREQ_SIZE;
305 int band_lp = NB_BANDS;
306 #endif
307 
frame_analysis(DenoiseState * st,kiss_fft_cpx * X,float * Ex,const float * in)308 static void frame_analysis(DenoiseState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
309   int i;
310   float x[WINDOW_SIZE];
311   RNN_COPY(x, st->analysis_mem, FRAME_SIZE);
312   for (i=0;i<FRAME_SIZE;i++) x[FRAME_SIZE + i] = in[i];
313   RNN_COPY(st->analysis_mem, in, FRAME_SIZE);
314   apply_window(x);
315   forward_transform(X, x);
316 #if TRAINING
317   for (i=lowpass;i<FREQ_SIZE;i++)
318     X[i].r = X[i].i = 0;
319 #endif
320   compute_band_energy(Ex, X);
321 }
322 
compute_frame_features(DenoiseState * st,kiss_fft_cpx * X,kiss_fft_cpx * P,float * Ex,float * Ep,float * Exp,float * features,const float * in)323 static int compute_frame_features(DenoiseState *st, kiss_fft_cpx *X, kiss_fft_cpx *P,
324                                   float *Ex, float *Ep, float *Exp, float *features, const float *in) {
325   int i;
326   float E = 0;
327   float *ceps_0, *ceps_1, *ceps_2;
328   float spec_variability = 0;
329   float Ly[NB_BANDS];
330   float p[WINDOW_SIZE];
331   float pitch_buf[PITCH_BUF_SIZE>>1];
332   int pitch_index;
333   float gain;
334   float *(pre[1]);
335   float tmp[NB_BANDS];
336   float follow, logMax;
337   frame_analysis(st, X, Ex, in);
338   RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE);
339   RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE);
340   pre[0] = &st->pitch_buf[0];
341   pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1);
342   pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE,
343                PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index);
344   pitch_index = PITCH_MAX_PERIOD-pitch_index;
345 
346   gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
347           PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain);
348   st->last_period = pitch_index;
349   st->last_gain = gain;
350   for (i=0;i<WINDOW_SIZE;i++)
351     p[i] = st->pitch_buf[PITCH_BUF_SIZE-WINDOW_SIZE-pitch_index+i];
352   apply_window(p);
353   forward_transform(P, p);
354   compute_band_energy(Ep, P);
355   compute_band_corr(Exp, X, P);
356   for (i=0;i<NB_BANDS;i++) Exp[i] = Exp[i]/sqrt(.001+Ex[i]*Ep[i]);
357   dct(tmp, Exp);
358   for (i=0;i<NB_DELTA_CEPS;i++) features[NB_BANDS+2*NB_DELTA_CEPS+i] = tmp[i];
359   features[NB_BANDS+2*NB_DELTA_CEPS] -= 1.3;
360   features[NB_BANDS+2*NB_DELTA_CEPS+1] -= 0.9;
361   features[NB_BANDS+3*NB_DELTA_CEPS] = .01*(pitch_index-300);
362   logMax = -2;
363   follow = -2;
364   for (i=0;i<NB_BANDS;i++) {
365     Ly[i] = log10(1e-2+Ex[i]);
366     Ly[i] = MAX16(logMax-7, MAX16(follow-1.5, Ly[i]));
367     logMax = MAX16(logMax, Ly[i]);
368     follow = MAX16(follow-1.5, Ly[i]);
369     E += Ex[i];
370   }
371   if (!TRAINING && E < 0.04) {
372     /* If there's no audio, avoid messing up the state. */
373     RNN_CLEAR(features, NB_FEATURES);
374     return 1;
375   }
376   dct(features, Ly);
377   features[0] -= 12;
378   features[1] -= 4;
379   ceps_0 = st->cepstral_mem[st->memid];
380   ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM+st->memid-1] : st->cepstral_mem[st->memid-1];
381   ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM+st->memid-2] : st->cepstral_mem[st->memid-2];
382   for (i=0;i<NB_BANDS;i++) ceps_0[i] = features[i];
383   st->memid++;
384   for (i=0;i<NB_DELTA_CEPS;i++) {
385     features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i];
386     features[NB_BANDS+i] = ceps_0[i] - ceps_2[i];
387     features[NB_BANDS+NB_DELTA_CEPS+i] =  ceps_0[i] - 2*ceps_1[i] + ceps_2[i];
388   }
389   /* Spectral variability features. */
390   if (st->memid == CEPS_MEM) st->memid = 0;
391   for (i=0;i<CEPS_MEM;i++)
392   {
393     int j;
394     float mindist = 1e15f;
395     for (j=0;j<CEPS_MEM;j++)
396     {
397       int k;
398       float dist=0;
399       for (k=0;k<NB_BANDS;k++)
400       {
401         float tmp;
402         tmp = st->cepstral_mem[i][k] - st->cepstral_mem[j][k];
403         dist += tmp*tmp;
404       }
405       if (j!=i)
406         mindist = MIN32(mindist, dist);
407     }
408     spec_variability += mindist;
409   }
410   features[NB_BANDS+3*NB_DELTA_CEPS+1] = spec_variability/CEPS_MEM-2.1;
411   return TRAINING && E < 0.1;
412 }
413 
frame_synthesis(DenoiseState * st,float * out,const kiss_fft_cpx * y)414 static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y) {
415   float x[WINDOW_SIZE];
416   int i;
417   inverse_transform(x, y);
418   apply_window(x);
419   for (i=0;i<FRAME_SIZE;i++) out[i] = x[i] + st->synthesis_mem[i];
420   RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE);
421 }
422 
biquad(float * y,float mem[2],const float * x,const float * b,const float * a,int N)423 static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
424   int i;
425   for (i=0;i<N;i++) {
426     float xi, yi;
427     xi = x[i];
428     yi = x[i] + mem[0];
429     mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
430     mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
431     y[i] = yi;
432   }
433 }
434 
pitch_filter(kiss_fft_cpx * X,const kiss_fft_cpx * P,const float * Ex,const float * Ep,const float * Exp,const float * g)435 void pitch_filter(kiss_fft_cpx *X, const kiss_fft_cpx *P, const float *Ex, const float *Ep,
436                   const float *Exp, const float *g) {
437   int i;
438   float r[NB_BANDS];
439   float rf[FREQ_SIZE] = {0};
440   for (i=0;i<NB_BANDS;i++) {
441 #if 0
442     if (Exp[i]>g[i]) r[i] = 1;
443     else r[i] = Exp[i]*(1-g[i])/(.001 + g[i]*(1-Exp[i]));
444     r[i] = MIN16(1, MAX16(0, r[i]));
445 #else
446     if (Exp[i]>g[i]) r[i] = 1;
447     else r[i] = SQUARE(Exp[i])*(1-SQUARE(g[i]))/(.001 + SQUARE(g[i])*(1-SQUARE(Exp[i])));
448     r[i] = sqrt(MIN16(1, MAX16(0, r[i])));
449 #endif
450     r[i] *= sqrt(Ex[i]/(1e-8+Ep[i]));
451   }
452   interp_band_gain(rf, r);
453   for (i=0;i<FREQ_SIZE;i++) {
454     X[i].r += rf[i]*P[i].r;
455     X[i].i += rf[i]*P[i].i;
456   }
457   float newE[NB_BANDS];
458   compute_band_energy(newE, X);
459   float norm[NB_BANDS];
460   float normf[FREQ_SIZE]={0};
461   for (i=0;i<NB_BANDS;i++) {
462     norm[i] = sqrt(Ex[i]/(1e-8+newE[i]));
463   }
464   interp_band_gain(normf, norm);
465   for (i=0;i<FREQ_SIZE;i++) {
466     X[i].r *= normf[i];
467     X[i].i *= normf[i];
468   }
469 }
470 
rnnoise_process_frame(DenoiseState * st,float * out,const float * in)471 float rnnoise_process_frame(DenoiseState *st, float *out, const float *in) {
472   int i;
473   kiss_fft_cpx X[FREQ_SIZE];
474   kiss_fft_cpx P[WINDOW_SIZE];
475   float x[FRAME_SIZE];
476   float Ex[NB_BANDS], Ep[NB_BANDS];
477   float Exp[NB_BANDS];
478   float features[NB_FEATURES];
479   float g[NB_BANDS];
480   float gf[FREQ_SIZE]={1};
481   float vad_prob = 0;
482   int silence;
483   static const float a_hp[2] = {-1.99599, 0.99600};
484   static const float b_hp[2] = {-2, 1};
485   biquad(x, st->mem_hp_x, in, b_hp, a_hp, FRAME_SIZE);
486   silence = compute_frame_features(st, X, P, Ex, Ep, Exp, features, x);
487 
488   if (!silence) {
489     compute_rnn(&st->rnn, g, &vad_prob, features);
490     pitch_filter(X, P, Ex, Ep, Exp, g);
491     for (i=0;i<NB_BANDS;i++) {
492       float alpha = .6f;
493       g[i] = MAX16(g[i], alpha*st->lastg[i]);
494       st->lastg[i] = g[i];
495     }
496     interp_band_gain(gf, g);
497 #if 1
498     for (i=0;i<FREQ_SIZE;i++) {
499       X[i].r *= gf[i];
500       X[i].i *= gf[i];
501     }
502 #endif
503   }
504 
505   frame_synthesis(st, out, X);
506   return vad_prob;
507 }
508 
509 #if TRAINING
510 
uni_rand()511 static float uni_rand() {
512   return rand()/(double)RAND_MAX-.5;
513 }
514 
rand_resp(float * a,float * b)515 static void rand_resp(float *a, float *b) {
516   a[0] = .75*uni_rand();
517   a[1] = .75*uni_rand();
518   b[0] = .75*uni_rand();
519   b[1] = .75*uni_rand();
520 }
521 
main(int argc,char ** argv)522 int main(int argc, char **argv) {
523   int i;
524   int count=0;
525   static const float a_hp[2] = {-1.99599, 0.99600};
526   static const float b_hp[2] = {-2, 1};
527   float a_noise[2] = {0};
528   float b_noise[2] = {0};
529   float a_sig[2] = {0};
530   float b_sig[2] = {0};
531   float mem_hp_x[2]={0};
532   float mem_hp_n[2]={0};
533   float mem_resp_x[2]={0};
534   float mem_resp_n[2]={0};
535   float x[FRAME_SIZE];
536   float n[FRAME_SIZE];
537   float xn[FRAME_SIZE];
538   int vad_cnt=0;
539   int gain_change_count=0;
540   float speech_gain = 1, noise_gain = 1;
541   FILE *f1, *f2, *fout;
542   DenoiseState *st;
543   DenoiseState *noise_state;
544   DenoiseState *noisy;
545   st = rnnoise_create();
546   noise_state = rnnoise_create();
547   noisy = rnnoise_create();
548   if (argc!=4) {
549     fprintf(stderr, "usage: %s <speech> <noise> <output denoised>\n", argv[0]);
550     return 1;
551   }
552   f1 = fopen(argv[1], "r");
553   f2 = fopen(argv[2], "r");
554   fout = fopen(argv[3], "w");
555   for(i=0;i<150;i++) {
556     short tmp[FRAME_SIZE];
557     fread(tmp, sizeof(short), FRAME_SIZE, f2);
558   }
559   while (1) {
560     kiss_fft_cpx X[FREQ_SIZE], Y[FREQ_SIZE], N[FREQ_SIZE], P[WINDOW_SIZE];
561     float Ex[NB_BANDS], Ey[NB_BANDS], En[NB_BANDS], Ep[NB_BANDS];
562     float Exp[NB_BANDS];
563     float Ln[NB_BANDS];
564     float features[NB_FEATURES];
565     float g[NB_BANDS];
566     float gf[FREQ_SIZE]={1};
567     short tmp[FRAME_SIZE];
568     float vad=0;
569     float vad_prob;
570     float E=0;
571     if (count==50000000) break;
572     if (++gain_change_count > 2821) {
573       speech_gain = pow(10., (-40+(rand()%60))/20.);
574       noise_gain = pow(10., (-30+(rand()%50))/20.);
575       if (rand()%10==0) noise_gain = 0;
576       noise_gain *= speech_gain;
577       if (rand()%10==0) speech_gain = 0;
578       gain_change_count = 0;
579       rand_resp(a_noise, b_noise);
580       rand_resp(a_sig, b_sig);
581       lowpass = FREQ_SIZE * 3000./24000. * pow(50., rand()/(double)RAND_MAX);
582       for (i=0;i<NB_BANDS;i++) {
583         if (eband5ms[i]<<FRAME_SIZE_SHIFT > lowpass) {
584           band_lp = i;
585           break;
586         }
587       }
588     }
589     if (speech_gain != 0) {
590       fread(tmp, sizeof(short), FRAME_SIZE, f1);
591       if (feof(f1)) {
592         rewind(f1);
593         fread(tmp, sizeof(short), FRAME_SIZE, f1);
594       }
595       for (i=0;i<FRAME_SIZE;i++) x[i] = speech_gain*tmp[i];
596       for (i=0;i<FRAME_SIZE;i++) E += tmp[i]*(float)tmp[i];
597     } else {
598       for (i=0;i<FRAME_SIZE;i++) x[i] = 0;
599       E = 0;
600     }
601     if (noise_gain!=0) {
602       fread(tmp, sizeof(short), FRAME_SIZE, f2);
603       if (feof(f2)) {
604         rewind(f2);
605         fread(tmp, sizeof(short), FRAME_SIZE, f2);
606       }
607       for (i=0;i<FRAME_SIZE;i++) n[i] = noise_gain*tmp[i];
608     } else {
609       for (i=0;i<FRAME_SIZE;i++) n[i] = 0;
610     }
611     biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
612     biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
613     biquad(n, mem_hp_n, n, b_hp, a_hp, FRAME_SIZE);
614     biquad(n, mem_resp_n, n, b_noise, a_noise, FRAME_SIZE);
615     for (i=0;i<FRAME_SIZE;i++) xn[i] = x[i] + n[i];
616     if (E > 1e9f) {
617       vad_cnt=0;
618     } else if (E > 1e8f) {
619       vad_cnt -= 5;
620     } else if (E > 1e7f) {
621       vad_cnt++;
622     } else {
623       vad_cnt+=2;
624     }
625     if (vad_cnt < 0) vad_cnt = 0;
626     if (vad_cnt > 15) vad_cnt = 15;
627 
628     if (vad_cnt >= 10) vad = 0;
629     else if (vad_cnt > 0) vad = 0.5f;
630     else vad = 1.f;
631 
632     frame_analysis(st, Y, Ey, x);
633     frame_analysis(noise_state, N, En, n);
634     for (i=0;i<NB_BANDS;i++) Ln[i] = log10(1e-2+En[i]);
635     int silence = compute_frame_features(noisy, X, P, Ex, Ep, Exp, features, xn);
636     pitch_filter(X, P, Ex, Ep, Exp, g);
637     //printf("%f %d\n", noisy->last_gain, noisy->last_period);
638     for (i=0;i<NB_BANDS;i++) {
639       g[i] = sqrt((Ey[i]+1e-3)/(Ex[i]+1e-3));
640       if (g[i] > 1) g[i] = 1;
641       if (silence || i > band_lp) g[i] = -1;
642       if (Ey[i] < 5e-2 && Ex[i] < 5e-2) g[i] = -1;
643       if (vad==0 && noise_gain==0) g[i] = -1;
644     }
645     count++;
646 #if 0
647     for (i=0;i<NB_FEATURES;i++) printf("%f ", features[i]);
648     for (i=0;i<NB_BANDS;i++) printf("%f ", g[i]);
649     for (i=0;i<NB_BANDS;i++) printf("%f ", Ln[i]);
650     printf("%f\n", vad);
651 #endif
652 #if 1
653     fwrite(features, sizeof(float), NB_FEATURES, stdout);
654     fwrite(g, sizeof(float), NB_BANDS, stdout);
655     fwrite(Ln, sizeof(float), NB_BANDS, stdout);
656     fwrite(&vad, sizeof(float), 1, stdout);
657 #endif
658 #if 0
659     compute_rnn(&noisy->rnn, g, &vad_prob, features);
660     interp_band_gain(gf, g);
661 #if 1
662     for (i=0;i<FREQ_SIZE;i++) {
663       X[i].r *= gf[i];
664       X[i].i *= gf[i];
665     }
666 #endif
667     frame_synthesis(noisy, xn, X);
668 
669     for (i=0;i<FRAME_SIZE;i++) tmp[i] = xn[i];
670     fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
671 #endif
672   }
673   fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);
674   fclose(f1);
675   fclose(f2);
676   fclose(fout);
677   return 0;
678 }
679 
680 #endif
681