1 /* Copyright (c) 2018 Gregor Richards
2  * Copyright (c) 2017 Mozilla */
3 /*
4    Redistribution and use in source and binary forms, with or without
5    modification, are permitted provided that the following conditions
6    are met:
7 
8    - Redistributions of source code must retain the above copyright
9    notice, this list of conditions and the following disclaimer.
10 
11    - Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14 
15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
19    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 
32 #include <stdlib.h>
33 #include <string.h>
34 #include <stdio.h>
35 #include "kiss_fft.h"
36 #include "common.h"
37 #include <math.h>
38 #include "rnnoise-nu.h"
39 #include "pitch.h"
40 #include "arch.h"
41 #include "rnn.h"
42 #include "rnn_data.h"
43 
44 #define DEFAULT_SAMPLE_RATE 48000
45 
46 #define FRAME_SIZE_SHIFT 2
47 #define FRAME_SIZE (120<<FRAME_SIZE_SHIFT)
48 #define WINDOW_SIZE (2*FRAME_SIZE)
49 #define FREQ_SIZE (FRAME_SIZE + 1)
50 
51 #define PITCH_MIN_PERIOD 60
52 #define PITCH_MAX_PERIOD 768
53 #define PITCH_FRAME_SIZE 960
54 #define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
55 
56 #define SQUARE(x) ((x)*(x))
57 
58 #define SMOOTH_BANDS 1
59 
60 #define NB_BAND_BOUNDARIES 22
61 
62 #if SMOOTH_BANDS
63 #define NB_BANDS 22
64 #else
65 #define NB_BANDS 21
66 #endif
67 
68 #define CEPS_MEM 8
69 #define NB_DELTA_CEPS 6
70 
71 #define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2)
72 
73 /* We don't allow max attenuation to be more than 60dB */
74 #define MIN_MAX_ATTENUATION 0.000001f
75 
76 
77 #ifndef TRAINING
78 #define TRAINING 0
79 #endif
80 
81 
82 /* cb is the default model */
83 extern const struct RNNModel model_cb;
84 
85 
86 static const opus_int16 eband5ms[] = {
87 /*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 20k*/
88   0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
89 };
90 
91 
92 struct DenoiseState {
93   int init;
94   kiss_fft_state *kfft;
95   float half_window[FRAME_SIZE];
96   float dct_table[NB_BANDS*NB_BANDS];
97 
98   int sample_rate;
99 
100   float analysis_mem[FRAME_SIZE];
101   float cepstral_mem[CEPS_MEM][NB_BANDS];
102   int memid;
103   float synthesis_mem[FRAME_SIZE];
104   float pitch_buf[PITCH_BUF_SIZE];
105   float pitch_enh_buf[PITCH_BUF_SIZE];
106 
107   /* Bands adjusted for the sample rate */
108   opus_int16 band_bins[NB_BAND_BOUNDARIES];
109 
110   float last_gain;
111   int last_period;
112   float mem_hp_x[2];
113   float lastg[NB_BANDS];
114   RNNState rnn;
115 
116   float max_attenuation;
117 };
118 
119 #if SMOOTH_BANDS
120 void compute_band_energy(DenoiseState *st, float *bandE, const kiss_fft_cpx *X) {
121   int i;
122   float sum[NB_BANDS] = {0};
123   for (i=0;i<NB_BANDS-1;i++)
124   {
125     int j;
126     int band_size;
127     band_size = st->band_bins[i+1] - st->band_bins[i];
128     for (j=0;j<band_size;j++) {
129       float tmp;
130       float frac = (float)j/band_size;
131       tmp = SQUARE(X[st->band_bins[i] + j].r);
132       tmp += SQUARE(X[st->band_bins[i] + j].i);
133       sum[i] += (1-frac)*tmp;
134       sum[i+1] += frac*tmp;
135     }
136   }
137   sum[0] *= 2;
138   sum[NB_BANDS-1] *= 2;
139   for (i=0;i<NB_BANDS;i++)
140   {
141     bandE[i] = sum[i];
142   }
143 }
144 
145 void compute_band_corr(DenoiseState *st, float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
146   int i;
147   float sum[NB_BANDS] = {0};
148   for (i=0;i<NB_BANDS-1;i++)
149   {
150     int j;
151     int band_size;
152     band_size = st->band_bins[i+1] - st->band_bins[i];
153     for (j=0;j<band_size;j++) {
154       float tmp;
155       float frac = (float)j/band_size;
156       tmp = X[st->band_bins[i] + j].r * P[st->band_bins[i] + j].r;
157       tmp += X[st->band_bins[i] + j].i * P[st->band_bins[i] + j].i;
158       sum[i] += (1-frac)*tmp;
159       sum[i+1] += frac*tmp;
160     }
161   }
162   sum[0] *= 2;
163   sum[NB_BANDS-1] *= 2;
164   for (i=0;i<NB_BANDS;i++)
165   {
166     bandE[i] = sum[i];
167   }
168 }
169 
170 void interp_band_gain(DenoiseState *st, float *g, const float *bandE) {
171   int i;
172   memset(g, 0, FREQ_SIZE);
173   for (i=0;i<NB_BANDS-1;i++)
174   {
175     int j;
176     int band_size;
177     band_size = st->band_bins[i+1] - st->band_bins[i];
178     for (j=0;j<band_size;j++) {
179       float frac = (float)j/band_size;
180       g[st->band_bins[i] + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
181     }
182   }
183 }
184 #else
185 void compute_band_energy(DenoiseState *st, float *bandE, const kiss_fft_cpx *X) {
186   int i;
187   for (i=0;i<NB_BANDS;i++)
188   {
189     int j;
190     opus_val32 sum = 0;
191     for (j=0;j<(st->band_bins[i+1] - st->band_bins[i]);j++) {
192       sum += SQUARE(X[st->band_bins[i] + j].r);
193       sum += SQUARE(X[st->band_bins[i] + j].i);
194     }
195     bandE[i] = sum;
196   }
197 }
198 
199 void interp_band_gain(DenoiseState *st, float *g, const float *bandE) {
200   int i;
201   memset(g, 0, FREQ_SIZE);
202   for (i=0;i<NB_BANDS;i++)
203   {
204     int j;
205     for (j=0;j<(st->band_bins[i+1] - st->band_bins[i]);j++)
206       g[st->band_bins[i] + j] = bandE[i];
207   }
208 }
209 #endif
210 
211 
212 static void check_init(DenoiseState *st) {
213   int i;
214   if (st->init) return;
215   /* FIXME: Deallocate this! */
216   st->kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
217 
218   /* Get the sample rate set up */
219   if (st->sample_rate <= 0) st->sample_rate = DEFAULT_SAMPLE_RATE;
220 
221   /* Adjust the bins for the sample rate */
222   for (i = 0; i < NB_BAND_BOUNDARIES; i++)
223     st->band_bins[i] = (((long) eband5ms[i]) << FRAME_SIZE_SHIFT) * DEFAULT_SAMPLE_RATE / st->sample_rate;
224 
225   /* Make sure nothing's above the Nyquist frequency */
226   for (i = 0; i < NB_BANDS; i++)
227     if (st->band_bins[i] >= FRAME_SIZE) st->band_bins[i] = FRAME_SIZE - 1;
228 
229   for (i=0;i<FRAME_SIZE;i++)
230     st->half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
231   for (i=0;i<NB_BANDS;i++) {
232     int j;
233     for (j=0;j<NB_BANDS;j++) {
234       st->dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
235       if (j==0) st->dct_table[i*NB_BANDS + j] *= sqrt(.5);
236     }
237   }
238   st->init = 1;
239 }
240 
241 static void dct(DenoiseState *st, float *out, const float *in) {
242   int i;
243   check_init(st);
244   for (i=0;i<NB_BANDS;i++) {
245     int j;
246     float sum = 0;
247     for (j=0;j<NB_BANDS;j++) {
248       sum += in[j] * st->dct_table[j*NB_BANDS + i];
249     }
250     out[i] = sum*sqrt(2./22);
251   }
252 }
253 
254 #if 0
255 static void idct(DenoiseState *st, float *out, const float *in) {
256   int i;
257   check_init(st);
258   for (i=0;i<NB_BANDS;i++) {
259     int j;
260     float sum = 0;
261     for (j=0;j<NB_BANDS;j++) {
262       sum += in[j] * st->dct_table[i*NB_BANDS + j];
263     }
264     out[i] = sum*sqrt(2./22);
265   }
266 }
267 #endif
268 
269 static void forward_transform(DenoiseState *st, kiss_fft_cpx *out, const float *in) {
270   int i;
271   kiss_fft_cpx x[WINDOW_SIZE];
272   kiss_fft_cpx y[WINDOW_SIZE];
273   check_init(st);
274   for (i=0;i<WINDOW_SIZE;i++) {
275     x[i].r = in[i];
276     x[i].i = 0;
277   }
278   opus_fft(st->kfft, x, y, 0);
279   for (i=0;i<FREQ_SIZE;i++) {
280     out[i] = y[i];
281   }
282 }
283 
284 static void inverse_transform(DenoiseState *st, float *out, const kiss_fft_cpx *in) {
285   int i;
286   kiss_fft_cpx x[WINDOW_SIZE];
287   kiss_fft_cpx y[WINDOW_SIZE];
288   check_init(st);
289   for (i=0;i<FREQ_SIZE;i++) {
290     x[i] = in[i];
291   }
292   for (;i<WINDOW_SIZE;i++) {
293     x[i].r = x[WINDOW_SIZE - i].r;
294     x[i].i = -x[WINDOW_SIZE - i].i;
295   }
296   opus_fft(st->kfft, x, y, 0);
297   /* output in reverse order for IFFT. */
298   out[0] = WINDOW_SIZE*y[0].r;
299   for (i=1;i<WINDOW_SIZE;i++) {
300     out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
301   }
302 }
303 
304 static void apply_window(DenoiseState *st, float *x) {
305   int i;
306   check_init(st);
307   for (i=0;i<FRAME_SIZE;i++) {
308     x[i] *= st->half_window[i];
309     x[WINDOW_SIZE - 1 - i] *= st->half_window[i];
310   }
311 }
312 
313 int rnnoise_get_size() {
314   return sizeof(DenoiseState);
315 }
316 
317 int rnnoise_init(DenoiseState *st, RNNModel *model) {
318   memset(st, 0, sizeof(*st));
319   if (model)
320     st->rnn.model = model;
321   else
322     st->rnn.model = &model_cb;
323   st->rnn.vad_gru_state = calloc(sizeof(float), st->rnn.model->vad_gru_size);
324   st->rnn.noise_gru_state = calloc(sizeof(float), st->rnn.model->noise_gru_size);
325   st->rnn.denoise_gru_state = calloc(sizeof(float), st->rnn.model->denoise_gru_size);
326   return 0;
327 }
328 
329 DenoiseState *rnnoise_create(RNNModel *model) {
330   DenoiseState *st;
331   st = malloc(rnnoise_get_size());
332   rnnoise_init(st, model);
333   return st;
334 }
335 
336 void rnnoise_destroy(DenoiseState *st) {
337   if (st->init)
338     free(st->kfft);
339   free(st->rnn.vad_gru_state);
340   free(st->rnn.noise_gru_state);
341   free(st->rnn.denoise_gru_state);
342   free(st);
343 }
344 
345 #if TRAINING
346 int lowpass = FREQ_SIZE;
347 int band_lp = NB_BANDS;
348 #endif
349 
350 static void frame_analysis(DenoiseState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
351   int i;
352   float x[WINDOW_SIZE];
353   RNN_COPY(x, st->analysis_mem, FRAME_SIZE);
354   for (i=0;i<FRAME_SIZE;i++) x[FRAME_SIZE + i] = in[i];
355   RNN_COPY(st->analysis_mem, in, FRAME_SIZE);
356   apply_window(st, x);
357   forward_transform(st, X, x);
358 #if TRAINING
359   for (i=lowpass;i<FREQ_SIZE;i++)
360     X[i].r = X[i].i = 0;
361 #endif
362   compute_band_energy(st, Ex, X);
363 }
364 
365 static int compute_frame_features(DenoiseState *st, kiss_fft_cpx *X, kiss_fft_cpx *P,
366                                   float *Ex, float *Ep, float *Exp, float *features, const float *in) {
367   int i;
368   float E = 0;
369   float *ceps_0, *ceps_1, *ceps_2;
370   float spec_variability = 0;
371   float Ly[NB_BANDS];
372   float p[WINDOW_SIZE];
373   float pitch_buf[PITCH_BUF_SIZE>>1];
374   int pitch_index;
375   float gain;
376   float *(pre[1]);
377   float tmp[NB_BANDS];
378   float follow, logMax;
379   frame_analysis(st, X, Ex, in);
380   RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE);
381   RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE);
382   pre[0] = &st->pitch_buf[0];
383   pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1);
384   pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE,
385                PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index);
386   pitch_index = PITCH_MAX_PERIOD-pitch_index;
387 
388   gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
389           PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain);
390   st->last_period = pitch_index;
391   st->last_gain = gain;
392   for (i=0;i<WINDOW_SIZE;i++)
393     p[i] = st->pitch_buf[PITCH_BUF_SIZE-WINDOW_SIZE-pitch_index+i];
394   apply_window(st, p);
395   forward_transform(st, P, p);
396   compute_band_energy(st, Ep, P);
397 #if SMOOTH_BANDS
398   compute_band_corr(st, Exp, X, P);
399 #endif
400   for (i=0;i<NB_BANDS;i++) Exp[i] = Exp[i]/sqrt(.001+Ex[i]*Ep[i]);
401   dct(st, tmp, Exp);
402   for (i=0;i<NB_DELTA_CEPS;i++) features[NB_BANDS+2*NB_DELTA_CEPS+i] = tmp[i];
403   features[NB_BANDS+2*NB_DELTA_CEPS] -= 1.3;
404   features[NB_BANDS+2*NB_DELTA_CEPS+1] -= 0.9;
405   features[NB_BANDS+3*NB_DELTA_CEPS] = .01*(pitch_index-300);
406   logMax = -2;
407   follow = -2;
408   for (i=0;i<NB_BANDS;i++) {
409     Ly[i] = log10(1e-2+Ex[i]);
410     Ly[i] = MAX16(logMax-7, MAX16(follow-1.5, Ly[i]));
411     logMax = MAX16(logMax, Ly[i]);
412     follow = MAX16(follow-1.5, Ly[i]);
413     E += Ex[i];
414   }
415   if (!TRAINING && E < 0.04) {
416     /* If there's no audio, avoid messing up the state. */
417     RNN_CLEAR(features, NB_FEATURES);
418     return 1;
419   }
420   dct(st, features, Ly);
421   features[0] -= 12;
422   features[1] -= 4;
423   ceps_0 = st->cepstral_mem[st->memid];
424   ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM+st->memid-1] : st->cepstral_mem[st->memid-1];
425   ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM+st->memid-2] : st->cepstral_mem[st->memid-2];
426   for (i=0;i<NB_BANDS;i++) ceps_0[i] = features[i];
427   st->memid++;
428   for (i=0;i<NB_DELTA_CEPS;i++) {
429     features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i];
430     features[NB_BANDS+i] = ceps_0[i] - ceps_2[i];
431     features[NB_BANDS+NB_DELTA_CEPS+i] =  ceps_0[i] - 2*ceps_1[i] + ceps_2[i];
432   }
433   /* Spectral variability features. */
434   if (st->memid == CEPS_MEM) st->memid = 0;
435   for (i=0;i<CEPS_MEM;i++)
436   {
437     int j;
438     float mindist = 1e15f;
439     for (j=0;j<CEPS_MEM;j++)
440     {
441       int k;
442       float dist=0;
443       for (k=0;k<NB_BANDS;k++)
444       {
445         float tmp;
446         tmp = st->cepstral_mem[i][k] - st->cepstral_mem[j][k];
447         dist += tmp*tmp;
448       }
449       if (j!=i)
450         mindist = MIN32(mindist, dist);
451     }
452     spec_variability += mindist;
453   }
454   features[NB_BANDS+3*NB_DELTA_CEPS+1] = spec_variability/CEPS_MEM-2.1;
455   return TRAINING && E < 0.1;
456 }
457 
458 static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y) {
459   float x[WINDOW_SIZE];
460   int i;
461   inverse_transform(st, x, y);
462   apply_window(st, x);
463   for (i=0;i<FRAME_SIZE;i++) out[i] = x[i] + st->synthesis_mem[i];
464   RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE);
465 }
466 
467 static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
468   int i;
469   for (i=0;i<N;i++) {
470     float xi, yi;
471     xi = x[i];
472     yi = x[i] + mem[0];
473     mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
474     mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
475     y[i] = yi;
476   }
477 }
478 
479 void pitch_filter(DenoiseState *st, kiss_fft_cpx *X, const kiss_fft_cpx *P, const float *Ex, const float *Ep,
480                   const float *Exp, const float *g) {
481   int i;
482   float r[NB_BANDS];
483   float rf[FREQ_SIZE] = {0};
484   for (i=0;i<NB_BANDS;i++) {
485 #if 0
486     if (Exp[i]>g[i]) r[i] = 1;
487     else r[i] = Exp[i]*(1-g[i])/(.001 + g[i]*(1-Exp[i]));
488     r[i] = MIN16(1, MAX16(0, r[i]));
489 #else
490     if (Exp[i]>g[i]) r[i] = 1;
491     else r[i] = SQUARE(Exp[i])*(1-SQUARE(g[i]))/(.001 + SQUARE(g[i])*(1-SQUARE(Exp[i])));
492     r[i] = sqrt(MIN16(1, MAX16(0, r[i])));
493 #endif
494     r[i] *= sqrt(Ex[i]/(1e-8+Ep[i]));
495   }
496   interp_band_gain(st, rf, r);
497   for (i=0;i<FREQ_SIZE;i++) {
498     X[i].r += rf[i]*P[i].r;
499     X[i].i += rf[i]*P[i].i;
500   }
501   float newE[NB_BANDS];
502   compute_band_energy(st, newE, X);
503   float norm[NB_BANDS];
504   float normf[FREQ_SIZE]={0};
505   for (i=0;i<NB_BANDS;i++) {
506     norm[i] = sqrt(Ex[i]/(1e-8+newE[i]));
507   }
508   interp_band_gain(st, normf, norm);
509   for (i=0;i<FREQ_SIZE;i++) {
510     X[i].r *= normf[i];
511     X[i].i *= normf[i];
512   }
513 }
514 
515 float rnnoise_process_frame(DenoiseState *st, float *out, const float *in) {
516   int i;
517   kiss_fft_cpx X[FREQ_SIZE];
518   kiss_fft_cpx P[WINDOW_SIZE];
519   float x[FRAME_SIZE];
520   float Ex[NB_BANDS], Ep[NB_BANDS];
521   float Exp[NB_BANDS];
522   float features[NB_FEATURES];
523   float g[NB_BANDS];
524   float gf[FREQ_SIZE]={1};
525   float vad_prob = 0;
526   int silence;
527   static const float a_hp[2] = {-1.99599, 0.99600};
528   static const float b_hp[2] = {-2, 1};
529   biquad(x, st->mem_hp_x, in, b_hp, a_hp, FRAME_SIZE);
530   silence = compute_frame_features(st, X, P, Ex, Ep, Exp, features, x);
531 
532   if (!silence) {
533     compute_rnn(&st->rnn, g, &vad_prob, features);
534     pitch_filter(st, X, P, Ex, Ep, Exp, g);
535     for (i=0;i<NB_BANDS;i++) {
536       float alpha = .6f;
537       g[i] = MAX16(g[i], alpha*st->lastg[i]);
538       st->lastg[i] = g[i];
539     }
540 
541     /* Apply maximum attenuation (minimum value) */
542     if (st->max_attenuation) {
543       float min = 1, mult;
544       for (i=0;i<NB_BANDS;i++) {
545         if (g[i] < min) min = g[i];
546       }
547       if (min < st->max_attenuation) {
548         if (min < MIN_MAX_ATTENUATION)
549           min = MIN_MAX_ATTENUATION;
550         mult = (1.0f-st->max_attenuation) / (1.0f-min);
551         for (i=0;i<NB_BANDS;i++) {
552           if (g[i] < MIN_MAX_ATTENUATION) g[i] = MIN_MAX_ATTENUATION;
553           g[i] = 1.0f-((1.0f-g[i]) * mult);
554           st->lastg[i] = g[i];
555         }
556       }
557     }
558 
559     interp_band_gain(st, gf, g);
560 #if 1
561     for (i=0;i<FREQ_SIZE;i++) {
562       X[i].r *= gf[i];
563       X[i].i *= gf[i];
564     }
565 #endif
566   }
567 
568   frame_synthesis(st, out, X);
569   return vad_prob;
570 }
571 
572 void rnnoise_set_param(DenoiseState *st, int param, float value)
573 {
574   switch (param) {
575     case RNNOISE_PARAM_MAX_ATTENUATION:
576       if ((value > MIN_MAX_ATTENUATION && value <= 1) || value == 0)
577         st->max_attenuation = value;
578       else
579         st->max_attenuation = MIN_MAX_ATTENUATION;
580       break;
581 
582     case RNNOISE_PARAM_SAMPLE_RATE:
583       if (value <= 0)
584         st->sample_rate = 0;
585       else
586         st->sample_rate = value;
587       break;
588   }
589 }
590 
591 #if TRAINING
592 
593 static float uni_rand() {
594   return rand()/(double)RAND_MAX-.5;
595 }
596 
597 static void rand_resp(float *a, float *b) {
598   a[0] = .75*uni_rand();
599   a[1] = .75*uni_rand();
600   b[0] = .75*uni_rand();
601   b[1] = .75*uni_rand();
602 }
603 
604 int main(int argc, char **argv) {
605   int i;
606   int count=0;
607   static const float a_hp[2] = {-1.99599, 0.99600};
608   static const float b_hp[2] = {-2, 1};
609   float a_noise[2] = {0};
610   float b_noise[2] = {0};
611   float a_sig[2] = {0};
612   float b_sig[2] = {0};
613   float mem_hp_x[2]={0};
614   float mem_hp_n[2]={0};
615   float mem_resp_x[2]={0};
616   float mem_resp_n[2]={0};
617   float x[FRAME_SIZE];
618   float n[FRAME_SIZE];
619   float xn[FRAME_SIZE];
620   int vad_cnt=0;
621   int gain_change_count=0;
622   float speech_gain = 1, noise_gain = 1;
623   FILE *f1, *f2;
624   int maxCount;
625   DenoiseState *st;
626   DenoiseState *noise_state;
627   DenoiseState *noisy;
628   st = rnnoise_create(NULL);
629   noise_state = rnnoise_create(NULL);
630   noisy = rnnoise_create(NULL);
631   if (argc!=4) {
632     fprintf(stderr, "usage: %s <speech> <noise> <count>\n", argv[0]);
633     return 1;
634   }
635   f1 = fopen(argv[1], "r");
636   f2 = fopen(argv[2], "r");
637   maxCount = atoi(argv[3]);
638   for(i=0;i<150;i++) {
639     short tmp[FRAME_SIZE];
640     fread(tmp, sizeof(short), FRAME_SIZE, f2);
641   }
642   while (1) {
643     kiss_fft_cpx X[FREQ_SIZE], Y[FREQ_SIZE], N[FREQ_SIZE], P[WINDOW_SIZE];
644     float Ex[NB_BANDS], Ey[NB_BANDS], En[NB_BANDS], Ep[NB_BANDS];
645     float Exp[NB_BANDS];
646     float Ln[NB_BANDS];
647     float features[NB_FEATURES];
648     float g[NB_BANDS];
649     short tmp[FRAME_SIZE];
650     float vad=0;
651     float E=0;
652     if (count==maxCount) break;
653     if ((count%1000)==0) fprintf(stderr, "%d\r", count);
654     if (++gain_change_count > 2821) {
655       speech_gain = pow(10., (-40+(rand()%60))/20.);
656       noise_gain = pow(10., (-30+(rand()%50))/20.);
657       if (rand()%10==0) noise_gain = 0;
658       noise_gain *= speech_gain;
659       if (rand()%10==0) speech_gain = 0;
660       gain_change_count = 0;
661       rand_resp(a_noise, b_noise);
662       rand_resp(a_sig, b_sig);
663       lowpass = FREQ_SIZE * 3000./24000. * pow(50., rand()/(double)RAND_MAX);
664       for (i=0;i<NB_BANDS;i++) {
665         if (eband5ms[i]<<FRAME_SIZE_SHIFT > lowpass) {
666           band_lp = i;
667           break;
668         }
669       }
670     }
671     if (speech_gain != 0) {
672       fread(tmp, sizeof(short), FRAME_SIZE, f1);
673       if (feof(f1)) {
674         rewind(f1);
675         fread(tmp, sizeof(short), FRAME_SIZE, f1);
676       }
677       for (i=0;i<FRAME_SIZE;i++) x[i] = speech_gain*tmp[i];
678       for (i=0;i<FRAME_SIZE;i++) E += tmp[i]*(float)tmp[i];
679     } else {
680       for (i=0;i<FRAME_SIZE;i++) x[i] = 0;
681       E = 0;
682     }
683     if (noise_gain!=0) {
684       fread(tmp, sizeof(short), FRAME_SIZE, f2);
685       if (feof(f2)) {
686         rewind(f2);
687         fread(tmp, sizeof(short), FRAME_SIZE, f2);
688       }
689       for (i=0;i<FRAME_SIZE;i++) n[i] = noise_gain*tmp[i];
690     } else {
691       for (i=0;i<FRAME_SIZE;i++) n[i] = 0;
692     }
693     biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
694     biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
695     biquad(n, mem_hp_n, n, b_hp, a_hp, FRAME_SIZE);
696     biquad(n, mem_resp_n, n, b_noise, a_noise, FRAME_SIZE);
697     for (i=0;i<FRAME_SIZE;i++) xn[i] = x[i] + n[i];
698     if (E > 1e9f) {
699       vad_cnt=0;
700     } else if (E > 1e8f) {
701       vad_cnt -= 5;
702     } else if (E > 1e7f) {
703       vad_cnt++;
704     } else {
705       vad_cnt+=2;
706     }
707     if (vad_cnt < 0) vad_cnt = 0;
708     if (vad_cnt > 15) vad_cnt = 15;
709 
710     if (vad_cnt >= 10) vad = 0;
711     else if (vad_cnt > 0) vad = 0.5f;
712     else vad = 1.f;
713 
714     frame_analysis(st, Y, Ey, x);
715     frame_analysis(noise_state, N, En, n);
716     for (i=0;i<NB_BANDS;i++) Ln[i] = log10(1e-2+En[i]);
717     int silence = compute_frame_features(noisy, X, P, Ex, Ep, Exp, features, xn);
718     pitch_filter(st, X, P, Ex, Ep, Exp, g);
719     //printf("%f %d\n", noisy->last_gain, noisy->last_period);
720     for (i=0;i<NB_BANDS;i++) {
721       g[i] = sqrt((Ey[i]+1e-3)/(Ex[i]+1e-3));
722       if (g[i] > 1) g[i] = 1;
723       if (silence || i > band_lp) g[i] = -1;
724       if (Ey[i] < 5e-2 && Ex[i] < 5e-2) g[i] = -1;
725       if (vad==0 && noise_gain==0) g[i] = -1;
726     }
727     count++;
728 #if 1
729     fwrite(features, sizeof(float), NB_FEATURES, stdout);
730     fwrite(g, sizeof(float), NB_BANDS, stdout);
731     fwrite(Ln, sizeof(float), NB_BANDS, stdout);
732     fwrite(&vad, sizeof(float), 1, stdout);
733 #endif
734   }
735   fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);
736   fclose(f1);
737   fclose(f2);
738   return 0;
739 }
740 
741 #endif
742