1 /*---------------------------------------------------------------------------*\
2
3 FILE........: sine.c
4 AUTHOR......: David Rowe
5 DATE CREATED: 19/8/2010
6
7 Sinusoidal analysis and synthesis functions.
8
9 \*---------------------------------------------------------------------------*/
10
11 /*
12 Copyright (C) 1990-2010 David Rowe
13
14 All rights reserved.
15
16 This program is free software; you can redistribute it and/or modify
17 it under the terms of the GNU Lesser General Public License version 2.1, as
18 published by the Free Software Foundation. This program is
19 distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
22 License for more details.
23
24 You should have received a copy of the GNU Lesser General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 */
27
28 /*---------------------------------------------------------------------------*\
29
30 INCLUDES
31
32 \*---------------------------------------------------------------------------*/
33
34 #include <stdlib.h>
35 #include <stdio.h>
36 #include <math.h>
37
38 #include "defines.h"
39 #include "sine.h"
40 #include "kiss_fft.h"
41
42 #define HPF_BETA 0.125
43
44 /*---------------------------------------------------------------------------*\
45
46 HEADERS
47
48 \*---------------------------------------------------------------------------*/
49
50 void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax,
51 float pstep);
52
53 /*---------------------------------------------------------------------------*\
54
55 FUNCTIONS
56
57 \*---------------------------------------------------------------------------*/
58
c2const_create(int Fs,float framelength_s)59 C2CONST c2const_create(int Fs, float framelength_s) {
60 C2CONST c2const;
61
62 assert((Fs == 8000) || (Fs = 16000));
63 c2const.Fs = Fs;
64 c2const.n_samp = round(Fs*framelength_s);
65 c2const.max_amp = floor(Fs*P_MAX_S/2);
66 c2const.p_min = floor(Fs*P_MIN_S);
67 c2const.p_max = floor(Fs*P_MAX_S);
68 c2const.m_pitch = floor(Fs*M_PITCH_S);
69 c2const.Wo_min = TWO_PI/c2const.p_max;
70 c2const.Wo_max = TWO_PI/c2const.p_min;
71
72 if (Fs == 8000) {
73 c2const.nw = 279;
74 } else {
75 c2const.nw = 511; /* actually a bit shorter in time but lets us maintain constant FFT size */
76 }
77
78 c2const.tw = Fs*TW_S;
79
80 /*
81 fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch);
82 fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max);
83 fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max);
84 fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw);
85 */
86
87 return c2const;
88 }
89
90 /*---------------------------------------------------------------------------*\
91
92 FUNCTION....: make_analysis_window
93 AUTHOR......: David Rowe
94 DATE CREATED: 11/5/94
95
96 Init function that generates the time domain analysis window and it's DFT.
97
98 \*---------------------------------------------------------------------------*/
99
make_analysis_window(C2CONST * c2const,codec2_fft_cfg fft_fwd_cfg,float w[],float W[])100 void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], float W[])
101 {
102 float m;
103 COMP wshift[FFT_ENC];
104 int i,j;
105 int m_pitch = c2const->m_pitch;
106 int nw = c2const->nw;
107
108 /*
109 Generate Hamming window centered on M-sample pitch analysis window
110
111 0 M/2 M-1
112 |-------------|-------------|
113 |-------|-------|
114 nw samples
115
116 All our analysis/synthsis is centred on the M/2 sample.
117 */
118
119 m = 0.0;
120 for(i=0; i<m_pitch/2-nw/2; i++)
121 w[i] = 0.0;
122 for(i=m_pitch/2-nw/2,j=0; i<m_pitch/2+nw/2; i++,j++) {
123 w[i] = 0.5 - 0.5*cosf(TWO_PI*j/(nw-1));
124 m += w[i]*w[i];
125 }
126 for(i=m_pitch/2+nw/2; i<m_pitch; i++)
127 w[i] = 0.0;
128
129 /* Normalise - makes freq domain amplitude estimation straight
130 forward */
131
132 m = 1.0/sqrtf(m*FFT_ENC);
133 for(i=0; i<m_pitch; i++) {
134 w[i] *= m;
135 }
136
137 /*
138 Generate DFT of analysis window, used for later processing. Note
139 we modulo FFT_ENC shift the time domain window w[], this makes the
140 imaginary part of the DFT W[] equal to zero as the shifted w[] is
141 even about the n=0 time axis if nw is odd. Having the imag part
142 of the DFT W[] makes computation easier.
143
144 0 FFT_ENC-1
145 |-------------------------|
146
147 ----\ /----
148 \ /
149 \ / <- shifted version of window w[n]
150 \ /
151 \ /
152 -------
153
154 |---------| |---------|
155 nw/2 nw/2
156 */
157
158 COMP temp[FFT_ENC];
159
160 for(i=0; i<FFT_ENC; i++) {
161 wshift[i].real = 0.0;
162 wshift[i].imag = 0.0;
163 }
164 for(i=0; i<nw/2; i++)
165 wshift[i].real = w[i+m_pitch/2];
166 for(i=FFT_ENC-nw/2,j=m_pitch/2-nw/2; i<FFT_ENC; i++,j++)
167 wshift[i].real = w[j];
168
169 codec2_fft(fft_fwd_cfg, wshift, temp);
170
171 /*
172 Re-arrange W[] to be symmetrical about FFT_ENC/2. Makes later
173 analysis convenient.
174
175 Before:
176
177
178 0 FFT_ENC-1
179 |----------|---------|
180 __ _
181 \ /
182 \_______________/
183
184 After:
185
186 0 FFT_ENC-1
187 |----------|---------|
188 ___
189 / \
190 ________/ \_______
191
192 */
193
194
195 for(i=0; i<FFT_ENC/2; i++) {
196 W[i] = temp[i + FFT_ENC / 2].real;
197 W[i + FFT_ENC / 2] = temp[i].real;
198 }
199
200 }
201
202 /*---------------------------------------------------------------------------*\
203
204 FUNCTION....: hpf
205 AUTHOR......: David Rowe
206 DATE CREATED: 16 Nov 2010
207
208 High pass filter with a -3dB point of about 160Hz.
209
210 y(n) = -HPF_BETA*y(n-1) + x(n) - x(n-1)
211
212 \*---------------------------------------------------------------------------*/
213
hpf(float x,float states[])214 float hpf(float x, float states[])
215 {
216 states[0] = -HPF_BETA*states[0] + x - states[1];
217 states[1] = x;
218
219 return states[0];
220 }
221
222 /*---------------------------------------------------------------------------*\
223
224 FUNCTION....: dft_speech
225 AUTHOR......: David Rowe
226 DATE CREATED: 27/5/94
227
228 Finds the DFT of the current speech input speech frame.
229
230 \*---------------------------------------------------------------------------*/
231
232 // TODO: we can either go for a faster FFT using fftr and some stack usage
233 // or we can reduce stack usage to almost zero on STM32 by switching to fft_inplace
234 #if 1
dft_speech(C2CONST * c2const,codec2_fft_cfg fft_fwd_cfg,COMP Sw[],float Sn[],float w[])235 void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[])
236 {
237 int i;
238 int m_pitch = c2const->m_pitch;
239 int nw = c2const->nw;
240
241 for(i=0; i<FFT_ENC; i++) {
242 Sw[i].real = 0.0;
243 Sw[i].imag = 0.0;
244 }
245
246 /* Centre analysis window on time axis, we need to arrange input
247 to FFT this way to make FFT phases correct */
248
249 /* move 2nd half to start of FFT input vector */
250
251 for(i=0; i<nw/2; i++)
252 Sw[i].real = Sn[i+m_pitch/2]*w[i+m_pitch/2];
253
254 /* move 1st half to end of FFT input vector */
255
256 for(i=0; i<nw/2; i++)
257 Sw[FFT_ENC-nw/2+i].real = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2];
258
259 codec2_fft_inplace(fft_fwd_cfg, Sw);
260 }
261 #else
dft_speech(codec2_fftr_cfg fftr_fwd_cfg,COMP Sw[],float Sn[],float w[])262 void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[])
263 {
264 int i;
265 float sw[FFT_ENC];
266
267 for(i=0; i<FFT_ENC; i++) {
268 sw[i] = 0.0;
269 }
270
271 /* Centre analysis window on time axis, we need to arrange input
272 to FFT this way to make FFT phases correct */
273
274 /* move 2nd half to start of FFT input vector */
275
276 for(i=0; i<nw/2; i++)
277 sw[i] = Sn[i+m_pitch/2]*w[i+m_pitch/2];
278
279 /* move 1st half to end of FFT input vector */
280
281 for(i=0; i<nw/2; i++)
282 sw[FFT_ENC-nw/2+i] = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2];
283
284 codec2_fftr(fftr_fwd_cfg, sw, Sw);
285 }
286 #endif
287
288
289 /*---------------------------------------------------------------------------*\
290
291 FUNCTION....: two_stage_pitch_refinement
292 AUTHOR......: David Rowe
293 DATE CREATED: 27/5/94
294
295 Refines the current pitch estimate using the harmonic sum pitch
296 estimation technique.
297
298 \*---------------------------------------------------------------------------*/
299
two_stage_pitch_refinement(C2CONST * c2const,MODEL * model,COMP Sw[])300 void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[])
301 {
302 float pmin,pmax,pstep; /* pitch refinment minimum, maximum and step */
303
304 /* Coarse refinement */
305
306 pmax = TWO_PI/model->Wo + 5;
307 pmin = TWO_PI/model->Wo - 5;
308 pstep = 1.0;
309 hs_pitch_refinement(model,Sw,pmin,pmax,pstep);
310
311 /* Fine refinement */
312
313 pmax = TWO_PI/model->Wo + 1;
314 pmin = TWO_PI/model->Wo - 1;
315 pstep = 0.25;
316 hs_pitch_refinement(model,Sw,pmin,pmax,pstep);
317
318 /* Limit range */
319
320 if (model->Wo < TWO_PI/c2const->p_max)
321 model->Wo = TWO_PI/c2const->p_max;
322 if (model->Wo > TWO_PI/c2const->p_min)
323 model->Wo = TWO_PI/c2const->p_min;
324
325 model->L = floorf(PI/model->Wo);
326
327 /* trap occasional round off issues with floorf() */
328 if (model->Wo*model->L >= 0.95*PI) {
329 model->L--;
330 }
331 assert(model->Wo*model->L < PI);
332 }
333
334 /*---------------------------------------------------------------------------*\
335
336 FUNCTION....: hs_pitch_refinement
337 AUTHOR......: David Rowe
338 DATE CREATED: 27/5/94
339
340 Harmonic sum pitch refinement function.
341
342 pmin pitch search range minimum
343 pmax pitch search range maximum
344 step pitch search step size
345 model current pitch estimate in model.Wo
346
347 model refined pitch estimate in model.Wo
348
349 \*---------------------------------------------------------------------------*/
350
hs_pitch_refinement(MODEL * model,COMP Sw[],float pmin,float pmax,float pstep)351 void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float pstep)
352 {
353 int m; /* loop variable */
354 int b; /* bin for current harmonic centre */
355 float E; /* energy for current pitch*/
356 float Wo; /* current "test" fundamental freq. */
357 float Wom; /* Wo that maximises E */
358 float Em; /* mamimum energy */
359 float r, one_on_r; /* number of rads/bin */
360 float p; /* current pitch */
361
362 /* Initialisation */
363
364 model->L = PI/model->Wo; /* use initial pitch est. for L */
365 Wom = model->Wo;
366 Em = 0.0;
367 r = TWO_PI/FFT_ENC;
368 one_on_r = 1.0/r;
369
370 /* Determine harmonic sum for a range of Wo values */
371
372 for(p=pmin; p<=pmax; p+=pstep) {
373 E = 0.0;
374 Wo = TWO_PI/p;
375
376 float bFloat = Wo * one_on_r;
377 float currentBFloat = bFloat;
378
379 /* Sum harmonic magnitudes */
380 for(m=1; m<=model->L; m++) {
381 b = (int)(currentBFloat + 0.5);
382 E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag;
383 currentBFloat += bFloat;
384 }
385 /* Compare to see if this is a maximum */
386
387 if (E > Em) {
388 Em = E;
389 Wom = Wo;
390 }
391 }
392
393 model->Wo = Wom;
394 }
395
396 /*---------------------------------------------------------------------------*\
397
398 FUNCTION....: estimate_amplitudes
399 AUTHOR......: David Rowe
400 DATE CREATED: 27/5/94
401
402 Estimates the complex amplitudes of the harmonics.
403
404 \*---------------------------------------------------------------------------*/
405
estimate_amplitudes(MODEL * model,COMP Sw[],float W[],int est_phase)406 void estimate_amplitudes(MODEL *model, COMP Sw[], float W[], int est_phase)
407 {
408 int i,m; /* loop variables */
409 int am,bm; /* bounds of current harmonic */
410 float den; /* denominator of amplitude expression */
411
412 float r = TWO_PI/FFT_ENC;
413 float one_on_r = 1.0/r;
414
415 for(m=1; m<=model->L; m++) {
416 /* Estimate ampltude of harmonic */
417
418 den = 0.0;
419 am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5);
420 bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5);
421
422 for(i=am; i<bm; i++) {
423 den += Sw[i].real*Sw[i].real + Sw[i].imag*Sw[i].imag;
424 }
425
426 model->A[m] = sqrtf(den);
427
428 if (est_phase) {
429 int b = (int)(m*model->Wo/r + 0.5); /* DFT bin of centre of current harmonic */
430
431 /* Estimate phase of harmonic, this is expensive in CPU for
432 embedded devicesso we make it an option */
433
434 model->phi[m] = atan2f(Sw[b].imag,Sw[b].real);
435 }
436 }
437 }
438
439 /*---------------------------------------------------------------------------*\
440
441 est_voicing_mbe()
442
443 Returns the error of the MBE cost function for a fiven F0.
444
445 Note: I think a lot of the operations below can be simplified as
446 W[].imag = 0 and has been normalised such that den always equals 1.
447
448 \*---------------------------------------------------------------------------*/
449
est_voicing_mbe(C2CONST * c2const,MODEL * model,COMP Sw[],float W[])450 float est_voicing_mbe(
451 C2CONST *c2const,
452 MODEL *model,
453 COMP Sw[],
454 float W[]
455 )
456 {
457 int l,al,bl,m; /* loop variables */
458 COMP Am; /* amplitude sample for this band */
459 int offset; /* centers Hw[] about current harmonic */
460 float den; /* denominator of Am expression */
461 float error; /* accumulated error between original and synthesised */
462 float Wo;
463 float sig, snr;
464 float elow, ehigh, eratio;
465 float sixty;
466 COMP Ew;
467 Ew.real = 0;
468 Ew.imag = 0;
469
470 int l_1000hz = model->L*1000.0/(c2const->Fs/2);
471 sig = 1E-4;
472 for(l=1; l<=l_1000hz; l++) {
473 sig += model->A[l]*model->A[l];
474 }
475
476 Wo = model->Wo;
477 error = 1E-4;
478
479 /* Just test across the harmonics in the first 1000 Hz */
480
481 for(l=1; l<=l_1000hz; l++) {
482 Am.real = 0.0;
483 Am.imag = 0.0;
484 den = 0.0;
485 al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI);
486 bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI);
487
488 /* Estimate amplitude of harmonic assuming harmonic is totally voiced */
489
490 offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5;
491 for(m=al; m<bl; m++) {
492 Am.real += Sw[m].real*W[offset+m];
493 Am.imag += Sw[m].imag*W[offset+m];
494 den += W[offset+m]*W[offset+m];
495 }
496
497 Am.real = Am.real/den;
498 Am.imag = Am.imag/den;
499
500 /* Determine error between estimated harmonic and original */
501
502 for(m=al; m<bl; m++) {
503 Ew.real = Sw[m].real - Am.real*W[offset+m];
504 Ew.imag = Sw[m].imag - Am.imag*W[offset+m];
505 error += Ew.real*Ew.real;
506 error += Ew.imag*Ew.imag;
507 }
508 }
509
510 snr = 10.0*log10f(sig/error);
511 if (snr > V_THRESH)
512 model->voiced = 1;
513 else
514 model->voiced = 0;
515
516 /* post processing, helps clean up some voicing errors ------------------*/
517
518 /*
519 Determine the ratio of low freqency to high frequency energy,
520 voiced speech tends to be dominated by low frequency energy,
521 unvoiced by high frequency. This measure can be used to
522 determine if we have made any gross errors.
523 */
524
525 int l_2000hz = model->L*2000.0/(c2const->Fs/2);
526 int l_4000hz = model->L*4000.0/(c2const->Fs/2);
527 elow = ehigh = 1E-4;
528 for(l=1; l<=l_2000hz; l++) {
529 elow += model->A[l]*model->A[l];
530 }
531 for(l=l_2000hz; l<=l_4000hz; l++) {
532 ehigh += model->A[l]*model->A[l];
533 }
534 eratio = 10.0*log10f(elow/ehigh);
535
536 /* Look for Type 1 errors, strongly V speech that has been
537 accidentally declared UV */
538
539 if (model->voiced == 0)
540 if (eratio > 10.0)
541 model->voiced = 1;
542
543 /* Look for Type 2 errors, strongly UV speech that has been
544 accidentally declared V */
545
546 if (model->voiced == 1) {
547 if (eratio < -10.0)
548 model->voiced = 0;
549
550 /* A common source of Type 2 errors is the pitch estimator
551 gives a low (50Hz) estimate for UV speech, which gives a
552 good match with noise due to the close harmoonic spacing.
553 These errors are much more common than people with 50Hz3
554 pitch, so we have just a small eratio threshold. */
555
556 sixty = 60.0*TWO_PI/c2const->Fs;
557 if ((eratio < -4.0) && (model->Wo <= sixty))
558 model->voiced = 0;
559 }
560 //printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0);
561
562 return snr;
563 }
564
565 /*---------------------------------------------------------------------------*\
566
567 FUNCTION....: make_synthesis_window
568 AUTHOR......: David Rowe
569 DATE CREATED: 11/5/94
570
571 Init function that generates the trapezoidal (Parzen) sythesis window.
572
573 \*---------------------------------------------------------------------------*/
574
make_synthesis_window(C2CONST * c2const,float Pn[])575 void make_synthesis_window(C2CONST *c2const, float Pn[])
576 {
577 int i;
578 float win;
579 int n_samp = c2const->n_samp;
580 int tw = c2const->tw;
581
582 /* Generate Parzen window in time domain */
583
584 win = 0.0;
585 for(i=0; i<n_samp/2-tw; i++)
586 Pn[i] = 0.0;
587 win = 0.0;
588 for(i=n_samp/2-tw; i<n_samp/2+tw; win+=1.0/(2*tw), i++ )
589 Pn[i] = win;
590 for(i=n_samp/2+tw; i<3*n_samp/2-tw; i++)
591 Pn[i] = 1.0;
592 win = 1.0;
593 for(i=3*n_samp/2-tw; i<3*n_samp/2+tw; win-=1.0/(2*tw), i++)
594 Pn[i] = win;
595 for(i=3*n_samp/2+tw; i<2*n_samp; i++)
596 Pn[i] = 0.0;
597 }
598
599 /*---------------------------------------------------------------------------*\
600
601 FUNCTION....: synthesise
602 AUTHOR......: David Rowe
603 DATE CREATED: 20/2/95
604
605 Synthesise a speech signal in the frequency domain from the
606 sinusodal model parameters. Uses overlap-add with a trapezoidal
607 window to smoothly interpolate betwen frames.
608
609 \*---------------------------------------------------------------------------*/
610
synthesise(int n_samp,codec2_fftr_cfg fftr_inv_cfg,float Sn_[],MODEL * model,float Pn[],int shift)611 void synthesise(
612 int n_samp,
613 codec2_fftr_cfg fftr_inv_cfg,
614 float Sn_[], /* time domain synthesised signal */
615 MODEL *model, /* ptr to model parameters for this frame */
616 float Pn[], /* time domain Parzen window */
617 int shift /* flag used to handle transition frames */
618 )
619 {
620 int i,l,j,b; /* loop variables */
621 COMP Sw_[FFT_DEC/2+1]; /* DFT of synthesised signal */
622 float sw_[FFT_DEC]; /* synthesised signal */
623
624 if (shift) {
625 /* Update memories */
626 for(i=0; i<n_samp-1; i++) {
627 Sn_[i] = Sn_[i+n_samp];
628 }
629 Sn_[n_samp-1] = 0.0;
630 }
631
632 for(i=0; i<FFT_DEC/2+1; i++) {
633 Sw_[i].real = 0.0;
634 Sw_[i].imag = 0.0;
635 }
636
637 /* Now set up frequency domain synthesised speech */
638
639 for(l=1; l<=model->L; l++) {
640 b = (int)(l*model->Wo*FFT_DEC/TWO_PI + 0.5);
641 if (b > ((FFT_DEC/2)-1)) {
642 b = (FFT_DEC/2)-1;
643 }
644 Sw_[b].real = model->A[l]*cosf(model->phi[l]);
645 Sw_[b].imag = model->A[l]*sinf(model->phi[l]);
646 }
647
648 /* Perform inverse DFT */
649
650 codec2_fftri(fftr_inv_cfg, Sw_,sw_);
651
652 /* Overlap add to previous samples */
653
654 #ifdef USE_KISS_FFT
655 #define FFTI_FACTOR ((float)1.0)
656 #else
657 #define FFTI_FACTOR ((float32_t)FFT_DEC)
658 #endif
659
660 for(i=0; i<n_samp-1; i++) {
661 Sn_[i] += sw_[FFT_DEC-n_samp+1+i]*Pn[i] * FFTI_FACTOR;
662 }
663
664 if (shift)
665 for(i=n_samp-1,j=0; i<2*n_samp; i++,j++)
666 Sn_[i] = sw_[j]*Pn[i] * FFTI_FACTOR;
667 else
668 for(i=n_samp-1,j=0; i<2*n_samp; i++,j++)
669 Sn_[i] += sw_[j]*Pn[i] * FFTI_FACTOR;
670 }
671
672
673 /* todo: this should probably be in some states rather than a static */
674 static unsigned long next = 1;
675
codec2_rand(void)676 int codec2_rand(void) {
677 next = next * 1103515245 + 12345;
678 return((unsigned)(next/65536) % 32768);
679 }
680
681