1 /*---------------------------------------------------------------------------*\
2 
3   FILE........: quantise.c
4   AUTHOR......: David Rowe
5   DATE CREATED: 31/5/92
6 
7   Quantisation functions for the sinusoidal coder.
8 
9 \*---------------------------------------------------------------------------*/
10 
11 /*
12   All rights reserved.
13 
14   This program is free software; you can redistribute it and/or modify
15   it under the terms of the GNU Lesser General Public License version 2.1, as
16   published by the Free Software Foundation.  This program is
17   distributed in the hope that it will be useful, but WITHOUT ANY
18   WARRANTY; without even the implied warranty of MERCHANTABILITY or
19   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
20   License for more details.
21 
22   You should have received a copy of the GNU Lesser General Public License
23   along with this program; if not, see <http://www.gnu.org/licenses/>.
24 
25 */
26 
27 #include <assert.h>
28 #include <ctype.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33 
34 #include "defines.h"
35 #include "dump.h"
36 #include "quantise.h"
37 #include "lpc.h"
38 #include "lsp.h"
39 #include "codec2_fft.h"
40 #include "phase.h"
41 #include "mbest.h"
42 
43 #undef PROFILE
44 #include "machdep.h"
45 
46 #define LSP_DELTA1 0.01         /* grid spacing for LSP root searches */
47 
48 /*---------------------------------------------------------------------------*\
49 
50                           FUNCTION HEADERS
51 
52 \*---------------------------------------------------------------------------*/
53 
54 float speech_to_uq_lsps(float lsp[], float ak[], float Sn[], float w[],
55 			int m_pitch, int order);
56 
57 /*---------------------------------------------------------------------------*\
58 
59                              FUNCTIONS
60 
61 \*---------------------------------------------------------------------------*/
62 
lsp_bits(int i)63 int lsp_bits(int i) {
64     return lsp_cb[i].log2m;
65 }
66 
lspd_bits(int i)67 int lspd_bits(int i) {
68     return lsp_cbd[i].log2m;
69 }
70 
lsp_pred_vq_bits(int i)71 int lsp_pred_vq_bits(int i) {
72     return lsp_cbjvm[i].log2m;
73 }
74 
75 /*---------------------------------------------------------------------------*\
76 
77   quantise
78 
79   Quantises vec by choosing the nearest vector in codebook cb, and
80   returns the vector index.  The squared error of the quantised vector
81   is added to se.
82 
83 \*---------------------------------------------------------------------------*/
84 
quantise(const float * cb,float vec[],float w[],int k,int m,float * se)85 long quantise(const float * cb, float vec[], float w[], int k, int m, float *se)
86 /* float   cb[][K];	current VQ codebook		*/
87 /* float   vec[];	vector to quantise		*/
88 /* float   w[];         weighting vector                */
89 /* int	   k;		dimension of vectors		*/
90 /* int     m;		size of codebook		*/
91 /* float   *se;		accumulated squared error 	*/
92 {
93    float   e;		/* current error		*/
94    long	   besti;	/* best index so far		*/
95    float   beste;	/* best error so far		*/
96    long	   j;
97    int     i;
98    float   diff;
99 
100    besti = 0;
101    beste = 1E32;
102    for(j=0; j<m; j++) {
103 	e = 0.0;
104 	for(i=0; i<k; i++) {
105 	    diff = cb[j*k+i]-vec[i];
106 	    e += (diff*w[i] * diff*w[i]);
107 	}
108 	if (e < beste) {
109 	    beste = e;
110 	    besti = j;
111 	}
112    }
113 
114    *se += beste;
115 
116    return(besti);
117 }
118 
119 
120 
121 /*---------------------------------------------------------------------------*\
122 
123   encode_lspds_scalar()
124 
125   Scalar/VQ LSP difference-in-frequency quantiser.
126 
127 \*---------------------------------------------------------------------------*/
128 
encode_lspds_scalar(int indexes[],float lsp[],int order)129 void encode_lspds_scalar(
130 		 int   indexes[],
131 		 float lsp[],
132 		 int   order
133 )
134 {
135     int   i,k,m;
136     float lsp_hz[order];
137     float lsp__hz[order];
138     float dlsp[order];
139     float dlsp_[order];
140     float wt[order];
141     const float *cb;
142     float se;
143 
144     for(i=0; i<order; i++) {
145 	wt[i] = 1.0;
146     }
147 
148     /* convert from radians to Hz so we can use human readable
149        frequencies */
150 
151     for(i=0; i<order; i++)
152 	lsp_hz[i] = (4000.0/PI)*lsp[i];
153 
154     wt[0] = 1.0;
155     for(i=0; i<order; i++) {
156 
157 	/* find difference from previous quantised lsp */
158 
159 	if (i)
160 	    dlsp[i] = lsp_hz[i] - lsp__hz[i-1];
161 	else
162 	    dlsp[0] = lsp_hz[0];
163 
164 	k = lsp_cbd[i].k;
165 	m = lsp_cbd[i].m;
166 	cb = lsp_cbd[i].cb;
167 	indexes[i] = quantise(cb, &dlsp[i], wt, k, m, &se);
168  	dlsp_[i] = cb[indexes[i]*k];
169 
170 	if (i)
171 	    lsp__hz[i] = lsp__hz[i-1] + dlsp_[i];
172 	else
173 	    lsp__hz[0] = dlsp_[0];
174     }
175 
176 }
177 
178 
decode_lspds_scalar(float lsp_[],int indexes[],int order)179 void decode_lspds_scalar(
180 		 float lsp_[],
181 		 int   indexes[],
182 		 int   order
183 )
184 {
185     int   i,k;
186     float lsp__hz[order];
187     float dlsp_[order];
188     const float *cb;
189 
190      for(i=0; i<order; i++) {
191 
192 	k = lsp_cbd[i].k;
193 	cb = lsp_cbd[i].cb;
194  	dlsp_[i] = cb[indexes[i]*k];
195 
196 	if (i)
197 	    lsp__hz[i] = lsp__hz[i-1] + dlsp_[i];
198 	else
199 	    lsp__hz[0] = dlsp_[0];
200 
201 	lsp_[i] = (PI/4000.0)*lsp__hz[i];
202     }
203 
204 }
205 
206 #define MIN(a,b) ((a)<(b)?(a):(b))
207 #define MAX_ENTRIES 16384
208 
compute_weights(const float * x,float * w,int ndim)209 void compute_weights(const float *x, float *w, int ndim)
210 {
211   int i;
212   w[0] = MIN(x[0], x[1]-x[0]);
213   for (i=1;i<ndim-1;i++)
214     w[i] = MIN(x[i]-x[i-1], x[i+1]-x[i]);
215   w[ndim-1] = MIN(x[ndim-1]-x[ndim-2], PI-x[ndim-1]);
216 
217   for (i=0;i<ndim;i++)
218     w[i] = 1./(.01+w[i]);
219 }
220 
find_nearest(const float * codebook,int nb_entries,float * x,int ndim)221 int find_nearest(const float *codebook, int nb_entries, float *x, int ndim)
222 {
223   int i, j;
224   float min_dist = 1e15;
225   int nearest = 0;
226 
227   for (i=0;i<nb_entries;i++)
228   {
229     float dist=0;
230     for (j=0;j<ndim;j++)
231       dist += (x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]);
232     if (dist<min_dist)
233     {
234       min_dist = dist;
235       nearest = i;
236     }
237   }
238   return nearest;
239 }
240 
find_nearest_weighted(const float * codebook,int nb_entries,float * x,const float * w,int ndim)241 int find_nearest_weighted(const float *codebook, int nb_entries, float *x, const float *w, int ndim)
242 {
243   int i, j;
244   float min_dist = 1e15;
245   int nearest = 0;
246 
247   for (i=0;i<nb_entries;i++)
248   {
249     float dist=0;
250     for (j=0;j<ndim;j++)
251       dist += w[j]*(x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]);
252     if (dist<min_dist)
253     {
254       min_dist = dist;
255       nearest = i;
256     }
257   }
258   return nearest;
259 }
260 
lspjvm_quantise(float * x,float * xq,int order)261 void lspjvm_quantise(float *x, float *xq, int order)
262 {
263   int i, n1, n2, n3;
264   float err[order], err2[order], err3[order];
265   float w[order], w2[order], w3[order];
266   const float *codebook1 = lsp_cbjvm[0].cb;
267   const float *codebook2 = lsp_cbjvm[1].cb;
268   const float *codebook3 = lsp_cbjvm[2].cb;
269 
270   w[0] = MIN(x[0], x[1]-x[0]);
271   for (i=1;i<order-1;i++)
272     w[i] = MIN(x[i]-x[i-1], x[i+1]-x[i]);
273   w[order-1] = MIN(x[order-1]-x[order-2], PI-x[order-1]);
274 
275   compute_weights(x, w, order);
276 
277   n1 = find_nearest(codebook1, lsp_cbjvm[0].m, x, order);
278 
279   for (i=0;i<order;i++)
280   {
281     xq[i] = codebook1[order*n1+i];
282     err[i] = x[i] - xq[i];
283   }
284   for (i=0;i<order/2;i++)
285   {
286     err2[i] = err[2*i];
287     err3[i] = err[2*i+1];
288     w2[i] = w[2*i];
289     w3[i] = w[2*i+1];
290   }
291   n2 = find_nearest_weighted(codebook2, lsp_cbjvm[1].m, err2, w2, order/2);
292   n3 = find_nearest_weighted(codebook3, lsp_cbjvm[2].m, err3, w3, order/2);
293 
294   for (i=0;i<order/2;i++)
295   {
296     xq[2*i] += codebook2[order*n2/2+i];
297     xq[2*i+1] += codebook3[order*n3/2+i];
298   }
299 }
300 
check_lsp_order(float lsp[],int order)301 int check_lsp_order(float lsp[], int order)
302 {
303     int   i;
304     float tmp;
305     int   swaps = 0;
306 
307     for(i=1; i<order; i++)
308 	if (lsp[i] < lsp[i-1]) {
309 	    //fprintf(stderr, "swap %d\n",i);
310 	    swaps++;
311 	    tmp = lsp[i-1];
312 	    lsp[i-1] = lsp[i]-0.1;
313 	    lsp[i] = tmp+0.1;
314             i = 1; /* start check again, as swap may have caused out of order */
315 	}
316 
317     return swaps;
318 }
319 
force_min_lsp_dist(float lsp[],int order)320 void force_min_lsp_dist(float lsp[], int order)
321 {
322     int   i;
323 
324     for(i=1; i<order; i++)
325 	if ((lsp[i]-lsp[i-1]) < 0.01) {
326 	    lsp[i] += 0.01;
327 	}
328 }
329 
330 
331 /*---------------------------------------------------------------------------*\
332 
333    lpc_post_filter()
334 
335    Applies a post filter to the LPC synthesis filter power spectrum
336    Pw, which supresses the inter-formant energy.
337 
338    The algorithm is from p267 (Section 8.6) of "Digital Speech",
339    edited by A.M. Kondoz, 1994 published by Wiley and Sons.  Chapter 8
340    of this text is on the MBE vocoder, and this is a freq domain
341    adaptation of post filtering commonly used in CELP.
342 
343    I used the Octave simulation lpcpf.m to get an understanding of the
344    algorithm.
345 
346    Requires two more FFTs which is significantly more MIPs.  However
347    it should be possible to implement this more efficiently in the
348    time domain.  Just not sure how to handle relative time delays
349    between the synthesis stage and updating these coeffs.  A smaller
350    FFT size might also be accetable to save CPU.
351 
352    TODO:
353    [ ] sync var names between Octave and C version
354    [ ] doc gain normalisation
355    [ ] I think the first FFT is not rqd as we do the same
356        thing in aks_to_M2().
357 
358 \*---------------------------------------------------------------------------*/
359 
lpc_post_filter(codec2_fftr_cfg fftr_fwd_cfg,float Pw[],float ak[],int order,int dump,float beta,float gamma,int bass_boost,float E)360 void lpc_post_filter(codec2_fftr_cfg fftr_fwd_cfg, float Pw[], float ak[],
361                      int order, int dump, float beta, float gamma, int bass_boost, float E)
362 {
363     int   i;
364     float x[FFT_ENC];   /* input to FFTs                */
365     COMP  Ww[FFT_ENC/2+1];  /* weighting spectrum           */
366     float Rw[FFT_ENC/2+1];  /* R = WA                       */
367     float e_before, e_after, gain;
368     float Pfw;
369     float max_Rw, min_Rw;
370     float coeff;
371     PROFILE_VAR(tstart, tfft1, taw, tfft2, tww, tr);
372 
373     PROFILE_SAMPLE(tstart);
374 
375     /* Determine weighting filter spectrum W(exp(jw)) ---------------*/
376 
377     for(i=0; i<FFT_ENC; i++) {
378 	x[i] = 0.0;
379     }
380 
381     x[0]  = ak[0];
382     coeff = gamma;
383     for(i=1; i<=order; i++) {
384 	x[i] = ak[i] * coeff;
385         coeff *= gamma;
386     }
387     codec2_fftr(fftr_fwd_cfg, x, Ww);
388 
389     PROFILE_SAMPLE_AND_LOG(tfft2, taw, "        fft2");
390 
391     for(i=0; i<FFT_ENC/2; i++) {
392 	Ww[i].real = Ww[i].real*Ww[i].real + Ww[i].imag*Ww[i].imag;
393     }
394 
395     PROFILE_SAMPLE_AND_LOG(tww, tfft2, "        Ww");
396 
397     /* Determined combined filter R = WA ---------------------------*/
398 
399     max_Rw = 0.0; min_Rw = 1E32;
400     for(i=0; i<FFT_ENC/2; i++) {
401 	Rw[i] = sqrtf(Ww[i].real * Pw[i]);
402 	if (Rw[i] > max_Rw)
403 	    max_Rw = Rw[i];
404 	if (Rw[i] < min_Rw)
405 	    min_Rw = Rw[i];
406 
407     }
408 
409     PROFILE_SAMPLE_AND_LOG(tr, tww, "        R");
410 
411     #ifdef DUMP
412     if (dump)
413       dump_Rw(Rw);
414     #endif
415 
416     /* create post filter mag spectrum and apply ------------------*/
417 
418     /* measure energy before post filtering */
419 
420     e_before = 1E-4;
421     for(i=0; i<FFT_ENC/2; i++)
422 	e_before += Pw[i];
423 
424     /* apply post filter and measure energy  */
425 
426     #ifdef DUMP
427     if (dump)
428 	dump_Pwb(Pw);
429     #endif
430 
431 
432     e_after = 1E-4;
433     for(i=0; i<FFT_ENC/2; i++) {
434         Pfw = powf(Rw[i], beta);
435         Pw[i] *= Pfw * Pfw;
436         e_after += Pw[i];
437     }
438     gain = e_before/e_after;
439 
440     /* apply gain factor to normalise energy, and LPC Energy */
441 
442     gain *= E;
443     for(i=0; i<FFT_ENC/2; i++) {
444 	Pw[i] *= gain;
445     }
446 
447     if (bass_boost) {
448         /* add 3dB to first 1 kHz to account for LP effect of PF */
449 
450         for(i=0; i<FFT_ENC/8; i++) {
451             Pw[i] *= 1.4*1.4;
452         }
453     }
454 
455     PROFILE_SAMPLE_AND_LOG2(tr, "        filt");
456 }
457 
458 
459 /*---------------------------------------------------------------------------*\
460 
461    aks_to_M2()
462 
463    Transforms the linear prediction coefficients to spectral amplitude
464    samples.  This function determines A(m) from the average energy per
465    band using an FFT.
466 
467 \*---------------------------------------------------------------------------*/
468 
aks_to_M2(codec2_fftr_cfg fftr_fwd_cfg,float ak[],int order,MODEL * model,float E,float * snr,int dump,int sim_pf,int pf,int bass_boost,float beta,float gamma,COMP Aw[])469 void aks_to_M2(
470   codec2_fftr_cfg  fftr_fwd_cfg,
471   float         ak[],	     /* LPC's */
472   int           order,
473   MODEL        *model,	     /* sinusoidal model parameters for this frame */
474   float         E,	     /* energy term */
475   float        *snr,	     /* signal to noise ratio for this frame in dB */
476   int           dump,        /* true to dump sample to dump file */
477   int           sim_pf,      /* true to simulate a post filter */
478   int           pf,          /* true to enable actual LPC post filter */
479   int           bass_boost,  /* enable LPC filter 0-1kHz 3dB boost */
480   float         beta,
481   float         gamma,       /* LPC post filter parameters */
482   COMP          Aw[]         /* output power spectrum */
483 )
484 {
485   int i,m;		/* loop variables */
486   int am,bm;		/* limits of current band */
487   float r;		/* no. rads/bin */
488   float Em;		/* energy in band */
489   float Am;		/* spectral amplitude sample */
490   float signal, noise;
491   PROFILE_VAR(tstart, tfft, tpw, tpf);
492 
493   PROFILE_SAMPLE(tstart);
494 
495   r = TWO_PI/(FFT_ENC);
496 
497   /* Determine DFT of A(exp(jw)) --------------------------------------------*/
498   {
499       float a[FFT_ENC];  /* input to FFT for power spectrum */
500 
501       for(i=0; i<FFT_ENC; i++) {
502           a[i] = 0.0;
503       }
504 
505       for(i=0; i<=order; i++)
506           a[i] = ak[i];
507       codec2_fftr(fftr_fwd_cfg, a, Aw);
508   }
509   PROFILE_SAMPLE_AND_LOG(tfft, tstart, "      fft");
510 
511   /* Determine power spectrum P(w) = E/(A(exp(jw))^2 ------------------------*/
512 
513   float Pw[FFT_ENC/2];
514 
515 #ifndef FDV_ARM_MATH
516   for(i=0; i<FFT_ENC/2; i++) {
517     Pw[i] = 1.0/(Aw[i].real*Aw[i].real + Aw[i].imag*Aw[i].imag + 1E-6);
518   }
519 #else
520   // this difference may seem strange, but the gcc for STM32F4 generates almost 5 times
521   // faster code with the two loops: 1120 ms -> 242 ms
522   // so please leave it as is or improve further
523   // since this code is called 4 times it results in almost 4ms gain (21ms -> 17ms per audio frame decode @ 1300 )
524 
525   for(i=0; i<FFT_ENC/2; i++)
526   {
527       Pw[i] = Aw[i].real * Aw[i].real + Aw[i].imag * Aw[i].imag  + 1E-6;
528   }
529   for(i=0; i<FFT_ENC/2; i++) {
530       Pw[i] = 1.0/(Pw[i]);
531   }
532 #endif
533 
534   PROFILE_SAMPLE_AND_LOG(tpw, tfft, "      Pw");
535 
536   if (pf)
537       lpc_post_filter(fftr_fwd_cfg, Pw, ak, order, dump, beta, gamma, bass_boost, E);
538   else {
539       for(i=0; i<FFT_ENC/2; i++) {
540           Pw[i] *= E;
541       }
542   }
543 
544   PROFILE_SAMPLE_AND_LOG(tpf, tpw, "      LPC post filter");
545 
546   #ifdef DUMP
547   if (dump)
548       dump_Pw(Pw);
549   #endif
550 
551   /* Determine magnitudes from P(w) ----------------------------------------*/
552 
553   /* when used just by decoder {A} might be all zeroes so init signal
554      and noise to prevent log(0) errors */
555 
556   signal = 1E-30; noise = 1E-32;
557 
558   for(m=1; m<=model->L; m++) {
559       am = (int)((m - 0.5)*model->Wo/r + 0.5);
560       bm = (int)((m + 0.5)*model->Wo/r + 0.5);
561 
562       // FIXME: With arm_rfft_fast_f32 we have to use this
563       // otherwise sometimes a to high bm is calculated
564       // which causes trouble later in the calculation
565       // chain
566       // it seems for some reason model->Wo is calculated somewhat too high
567       if (bm>FFT_ENC/2)
568       {
569           bm = FFT_ENC/2;
570       }
571       Em = 0.0;
572 
573       for(i=am; i<bm; i++)
574           Em += Pw[i];
575       Am = sqrtf(Em);
576 
577       signal += model->A[m]*model->A[m];
578       noise  += (model->A[m] - Am)*(model->A[m] - Am);
579 
580       /* This code significantly improves perf of LPC model, in
581          particular when combined with phase0.  The LPC spectrum tends
582          to track just under the peaks of the spectral envelope, and
583          just above nulls.  This algorithm does the reverse to
584          compensate - raising the amplitudes of spectral peaks, while
585          attenuating the null.  This enhances the formants, and
586          supresses the energy between formants. */
587 
588       if (sim_pf) {
589           if (Am > model->A[m])
590               Am *= 0.7;
591           if (Am < model->A[m])
592               Am *= 1.4;
593       }
594       model->A[m] = Am;
595   }
596   *snr = 10.0*log10f(signal/noise);
597 
598   PROFILE_SAMPLE_AND_LOG2(tpf, "      rec");
599 }
600 
601 /*---------------------------------------------------------------------------*\
602 
603   FUNCTION....: encode_Wo()
604   AUTHOR......: David Rowe
605   DATE CREATED: 22/8/2010
606 
607   Encodes Wo using a WO_LEVELS quantiser.
608 
609 \*---------------------------------------------------------------------------*/
610 
encode_Wo(C2CONST * c2const,float Wo,int bits)611 int encode_Wo(C2CONST *c2const, float Wo, int bits)
612 {
613     int   index, Wo_levels = 1<<bits;
614     float Wo_min = c2const->Wo_min;
615     float Wo_max = c2const->Wo_max;
616     float norm;
617 
618     norm = (Wo - Wo_min)/(Wo_max - Wo_min);
619     index = floorf(Wo_levels * norm + 0.5);
620     if (index < 0 ) index = 0;
621     if (index > (Wo_levels-1)) index = Wo_levels-1;
622 
623     return index;
624 }
625 
626 /*---------------------------------------------------------------------------*\
627 
628   FUNCTION....: decode_Wo()
629   AUTHOR......: David Rowe
630   DATE CREATED: 22/8/2010
631 
632   Decodes Wo using a WO_LEVELS quantiser.
633 
634 \*---------------------------------------------------------------------------*/
635 
decode_Wo(C2CONST * c2const,int index,int bits)636 float decode_Wo(C2CONST *c2const, int index, int bits)
637 {
638     float Wo_min = c2const->Wo_min;
639     float Wo_max = c2const->Wo_max;
640     float step;
641     float Wo;
642     int   Wo_levels = 1<<bits;
643 
644     step = (Wo_max - Wo_min)/Wo_levels;
645     Wo   = Wo_min + step*(index);
646 
647     return Wo;
648 }
649 
650 /*---------------------------------------------------------------------------*\
651 
652   FUNCTION....: encode_log_Wo()
653   AUTHOR......: David Rowe
654   DATE CREATED: 22/8/2010
655 
656   Encodes Wo in the log domain using a WO_LEVELS quantiser.
657 
658 \*---------------------------------------------------------------------------*/
659 
encode_log_Wo(C2CONST * c2const,float Wo,int bits)660 int encode_log_Wo(C2CONST *c2const, float Wo, int bits)
661 {
662     int   index, Wo_levels = 1<<bits;
663     float Wo_min = c2const->Wo_min;
664     float Wo_max = c2const->Wo_max;
665     float norm;
666 
667     norm = (log10f(Wo) - log10f(Wo_min))/(log10f(Wo_max) - log10f(Wo_min));
668     index = floorf(Wo_levels * norm + 0.5);
669     if (index < 0 ) index = 0;
670     if (index > (Wo_levels-1)) index = Wo_levels-1;
671 
672     return index;
673 }
674 
675 /*---------------------------------------------------------------------------*\
676 
677   FUNCTION....: decode_log_Wo()
678   AUTHOR......: David Rowe
679   DATE CREATED: 22/8/2010
680 
681   Decodes Wo using a WO_LEVELS quantiser in the log domain.
682 
683 \*---------------------------------------------------------------------------*/
684 
decode_log_Wo(C2CONST * c2const,int index,int bits)685 float decode_log_Wo(C2CONST *c2const, int index, int bits)
686 {
687     float Wo_min = c2const->Wo_min;
688     float Wo_max = c2const->Wo_max;
689     float step;
690     float Wo;
691     int   Wo_levels = 1<<bits;
692 
693     step = (log10f(Wo_max) - log10f(Wo_min))/Wo_levels;
694     Wo   = log10f(Wo_min) + step*(index);
695 
696     return POW10F(Wo);
697 }
698 
699 /*---------------------------------------------------------------------------*\
700 
701   FUNCTION....: speech_to_uq_lsps()
702   AUTHOR......: David Rowe
703   DATE CREATED: 22/8/2010
704 
705   Analyse a windowed frame of time domain speech to determine LPCs
706   which are the converted to LSPs for quantisation and transmission
707   over the channel.
708 
709 \*---------------------------------------------------------------------------*/
710 
speech_to_uq_lsps(float lsp[],float ak[],float Sn[],float w[],int m_pitch,int order)711 float speech_to_uq_lsps(float lsp[],
712 			float ak[],
713 		        float Sn[],
714 		        float w[],
715 		        int m_pitch,
716                         int   order
717 )
718 {
719     int   i, roots;
720     float Wn[m_pitch];
721     float R[order+1];
722     float e, E;
723 
724     e = 0.0;
725     for(i=0; i<m_pitch; i++) {
726 	Wn[i] = Sn[i]*w[i];
727 	e += Wn[i]*Wn[i];
728     }
729 
730     /* trap 0 energy case as LPC analysis will fail */
731 
732     if (e == 0.0) {
733 	for(i=0; i<order; i++)
734 	    lsp[i] = (PI/order)*(float)i;
735 	return 0.0;
736     }
737 
738     autocorrelate(Wn, R, m_pitch, order);
739     levinson_durbin(R, ak, order);
740 
741     E = 0.0;
742     for(i=0; i<=order; i++)
743 	E += ak[i]*R[i];
744 
745     /* 15 Hz BW expansion as I can't hear the difference and it may help
746        help occasional fails in the LSP root finding.  Important to do this
747        after energy calculation to avoid -ve energy values.
748     */
749 
750     for(i=0; i<=order; i++)
751 	ak[i] *= powf(0.994,(float)i);
752 
753     roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1);
754     if (roots != order) {
755 	/* if root finding fails use some benign LSP values instead */
756 	for(i=0; i<order; i++)
757 	    lsp[i] = (PI/order)*(float)i;
758     }
759 
760     return E;
761 }
762 
763 /*---------------------------------------------------------------------------*\
764 
765   FUNCTION....: encode_lsps_scalar()
766   AUTHOR......: David Rowe
767   DATE CREATED: 22/8/2010
768 
769   Thirty-six bit sclar LSP quantiser. From a vector of unquantised
770   (floating point) LSPs finds the quantised LSP indexes.
771 
772 \*---------------------------------------------------------------------------*/
773 
encode_lsps_scalar(int indexes[],float lsp[],int order)774 void encode_lsps_scalar(int indexes[], float lsp[], int order)
775 {
776     int    i,k,m;
777     float  wt[1];
778     float  lsp_hz[order];
779     const float * cb;
780     float se;
781 
782     /* convert from radians to Hz so we can use human readable
783        frequencies */
784 
785     for(i=0; i<order; i++)
786 	lsp_hz[i] = (4000.0/PI)*lsp[i];
787 
788     /* scalar quantisers */
789 
790     wt[0] = 1.0;
791     for(i=0; i<order; i++) {
792 	k = lsp_cb[i].k;
793 	m = lsp_cb[i].m;
794 	cb = lsp_cb[i].cb;
795 	indexes[i] = quantise(cb, &lsp_hz[i], wt, k, m, &se);
796     }
797 }
798 
799 /*---------------------------------------------------------------------------*\
800 
801   FUNCTION....: decode_lsps_scalar()
802   AUTHOR......: David Rowe
803   DATE CREATED: 22/8/2010
804 
805   From a vector of quantised LSP indexes, returns the quantised
806   (floating point) LSPs.
807 
808 \*---------------------------------------------------------------------------*/
809 
decode_lsps_scalar(float lsp[],int indexes[],int order)810 void decode_lsps_scalar(float lsp[], int indexes[], int order)
811 {
812     int    i,k;
813     float  lsp_hz[order];
814     const float * cb;
815 
816     for(i=0; i<order; i++) {
817 	k = lsp_cb[i].k;
818 	cb = lsp_cb[i].cb;
819 	lsp_hz[i] = cb[indexes[i]*k];
820     }
821 
822     /* convert back to radians */
823 
824     for(i=0; i<order; i++)
825 	lsp[i] = (PI/4000.0)*lsp_hz[i];
826 }
827 
828 /*---------------------------------------------------------------------------*\
829 
830   FUNCTION....: encode_lsps_vq()
831   AUTHOR......: David Rowe
832   DATE CREATED: 15 Feb 2012
833 
834   Multi-stage VQ LSP quantiser developed by Jean-Marc Valin.
835 
836 \*---------------------------------------------------------------------------*/
837 
encode_lsps_vq(int * indexes,float * x,float * xq,int order)838 void encode_lsps_vq(int *indexes, float *x, float *xq, int order)
839 {
840   int i, n1, n2, n3;
841   float err[order], err2[order], err3[order];
842   float w[order], w2[order], w3[order];
843   const float *codebook1 = lsp_cbjvm[0].cb;
844   const float *codebook2 = lsp_cbjvm[1].cb;
845   const float *codebook3 = lsp_cbjvm[2].cb;
846 
847   w[0] = MIN(x[0], x[1]-x[0]);
848   for (i=1;i<order-1;i++)
849     w[i] = MIN(x[i]-x[i-1], x[i+1]-x[i]);
850   w[order-1] = MIN(x[order-1]-x[order-2], PI-x[order-1]);
851 
852   compute_weights(x, w, order);
853 
854   n1 = find_nearest(codebook1, lsp_cbjvm[0].m, x, order);
855 
856   for (i=0;i<order;i++)
857   {
858     xq[i]  = codebook1[order*n1+i];
859     err[i] = x[i] - xq[i];
860   }
861   for (i=0;i<order/2;i++)
862   {
863     err2[i] = err[2*i];
864     err3[i] = err[2*i+1];
865     w2[i] = w[2*i];
866     w3[i] = w[2*i+1];
867   }
868   n2 = find_nearest_weighted(codebook2, lsp_cbjvm[1].m, err2, w2, order/2);
869   n3 = find_nearest_weighted(codebook3, lsp_cbjvm[2].m, err3, w3, order/2);
870 
871   indexes[0] = n1;
872   indexes[1] = n2;
873   indexes[2] = n3;
874 }
875 
876 
877 /*---------------------------------------------------------------------------*\
878 
879   FUNCTION....: decode_lsps_vq()
880   AUTHOR......: David Rowe
881   DATE CREATED: 15 Feb 2012
882 
883 \*---------------------------------------------------------------------------*/
884 
decode_lsps_vq(int * indexes,float * xq,int order,int stages)885 void decode_lsps_vq(int *indexes, float *xq, int order, int stages)
886 {
887   int i, n1, n2, n3;
888   const float *codebook1 = lsp_cbjvm[0].cb;
889   const float *codebook2 = lsp_cbjvm[1].cb;
890   const float *codebook3 = lsp_cbjvm[2].cb;
891 
892   n1 = indexes[0];
893   n2 = indexes[1];
894   n3 = indexes[2];
895 
896   for (i=0;i<order;i++) {
897       xq[i] = codebook1[order*n1+i];
898   }
899 
900   if (stages != 1) {
901       for (i=0;i<order/2;i++) {
902           xq[2*i] += codebook2[order*n2/2+i];
903           xq[2*i+1] += codebook3[order*n3/2+i];
904       }
905   }
906 
907 }
908 
909 
910 /*---------------------------------------------------------------------------*\
911 
912   FUNCTION....: bw_expand_lsps()
913   AUTHOR......: David Rowe
914   DATE CREATED: 22/8/2010
915 
916   Applies Bandwidth Expansion (BW) to a vector of LSPs.  Prevents any
917   two LSPs getting too close together after quantisation.  We know
918   from experiment that LSP quantisation errors < 12.5Hz (25Hz step
919   size) are inaudible so we use that as the minimum LSP separation.
920 
921 \*---------------------------------------------------------------------------*/
922 
bw_expand_lsps(float lsp[],int order,float min_sep_low,float min_sep_high)923 void bw_expand_lsps(float lsp[], int order, float min_sep_low, float min_sep_high)
924 {
925     int i;
926 
927     for(i=1; i<4; i++) {
928 
929 	if ((lsp[i] - lsp[i-1]) < min_sep_low*(PI/4000.0))
930 	    lsp[i] = lsp[i-1] + min_sep_low*(PI/4000.0);
931 
932     }
933 
934     /* As quantiser gaps increased, larger BW expansion was required
935        to prevent twinkly noises.  This may need more experiment for
936        different quanstisers.
937     */
938 
939     for(i=4; i<order; i++) {
940 	if (lsp[i] - lsp[i-1] < min_sep_high*(PI/4000.0))
941 	    lsp[i] = lsp[i-1] + min_sep_high*(PI/4000.0);
942     }
943 }
944 
bw_expand_lsps2(float lsp[],int order)945 void bw_expand_lsps2(float lsp[],
946 		    int   order
947 )
948 {
949     int i;
950 
951     for(i=1; i<4; i++) {
952 
953 	if ((lsp[i] - lsp[i-1]) < 100.0*(PI/4000.0))
954 	    lsp[i] = lsp[i-1] + 100.0*(PI/4000.0);
955 
956     }
957 
958     /* As quantiser gaps increased, larger BW expansion was required
959        to prevent twinkly noises.  This may need more experiment for
960        different quanstisers.
961     */
962 
963     for(i=4; i<order; i++) {
964 	if (lsp[i] - lsp[i-1] < 200.0*(PI/4000.0))
965 	    lsp[i] = lsp[i-1] + 200.0*(PI/4000.0);
966     }
967 }
968 
969 /*---------------------------------------------------------------------------*\
970 
971   FUNCTION....: apply_lpc_correction()
972   AUTHOR......: David Rowe
973   DATE CREATED: 22/8/2010
974 
975   Apply first harmonic LPC correction at decoder.  This helps improve
976   low pitch males after LPC modelling, like hts1a and morig.
977 
978 \*---------------------------------------------------------------------------*/
979 
apply_lpc_correction(MODEL * model)980 void apply_lpc_correction(MODEL *model)
981 {
982     if (model->Wo < (PI*150.0/4000)) {
983 	model->A[1] *= 0.032;
984     }
985 }
986 
987 /*---------------------------------------------------------------------------*\
988 
989   FUNCTION....: encode_energy()
990   AUTHOR......: David Rowe
991   DATE CREATED: 22/8/2010
992 
993   Encodes LPC energy using an E_LEVELS quantiser.
994 
995 \*---------------------------------------------------------------------------*/
996 
encode_energy(float e,int bits)997 int encode_energy(float e, int bits)
998 {
999     int   index, e_levels = 1<<bits;
1000     float e_min = E_MIN_DB;
1001     float e_max = E_MAX_DB;
1002     float norm;
1003 
1004     e = 10.0*log10f(e);
1005     norm = (e - e_min)/(e_max - e_min);
1006     index = floorf(e_levels * norm + 0.5);
1007     if (index < 0 ) index = 0;
1008     if (index > (e_levels-1)) index = e_levels-1;
1009 
1010     return index;
1011 }
1012 
1013 /*---------------------------------------------------------------------------*\
1014 
1015   FUNCTION....: decode_energy()
1016   AUTHOR......: David Rowe
1017   DATE CREATED: 22/8/2010
1018 
1019   Decodes energy using a E_LEVELS quantiser.
1020 
1021 \*---------------------------------------------------------------------------*/
1022 
decode_energy(int index,int bits)1023 float decode_energy(int index, int bits)
1024 {
1025     float e_min = E_MIN_DB;
1026     float e_max = E_MAX_DB;
1027     float step;
1028     float e;
1029     int   e_levels = 1<<bits;
1030 
1031     step = (e_max - e_min)/e_levels;
1032     e    = e_min + step*(index);
1033     e    = POW10F(e/10.0);
1034 
1035     return e;
1036 }
1037 
1038 
1039 static float ge_coeff[2] = {0.8, 0.9};
1040 
compute_weights2(const float * x,const float * xp,float * w)1041 void compute_weights2(const float *x, const float *xp, float *w)
1042 {
1043   w[0] = 30;
1044   w[1] = 1;
1045   if (x[1]<0)
1046   {
1047      w[0] *= .6;
1048      w[1] *= .3;
1049   }
1050   if (x[1]<-10)
1051   {
1052      w[0] *= .3;
1053      w[1] *= .3;
1054   }
1055   /* Higher weight if pitch is stable */
1056   if (fabsf(x[0]-xp[0])<.2)
1057   {
1058      w[0] *= 2;
1059      w[1] *= 1.5;
1060   } else if (fabsf(x[0]-xp[0])>.5) /* Lower if not stable */
1061   {
1062      w[0] *= .5;
1063   }
1064 
1065   /* Lower weight for low energy */
1066   if (x[1] < xp[1]-10)
1067   {
1068      w[1] *= .5;
1069   }
1070   if (x[1] < xp[1]-20)
1071   {
1072      w[1] *= .5;
1073   }
1074 
1075   //w[0] = 30;
1076   //w[1] = 1;
1077 
1078   /* Square the weights because it's applied on the squared error */
1079   w[0] *= w[0];
1080   w[1] *= w[1];
1081 
1082 }
1083 
1084 /*---------------------------------------------------------------------------*\
1085 
1086   FUNCTION....: quantise_WoE()
1087   AUTHOR......: Jean-Marc Valin & David Rowe
1088   DATE CREATED: 29 Feb 2012
1089 
1090   Experimental joint Wo and LPC energy vector quantiser developed by
1091   Jean-Marc Valin.  Exploits correlations between the difference in
1092   the log pitch and log energy from frame to frame.  For example
1093   both the pitch and energy tend to only change by small amounts
1094   during voiced speech, however it is important that these changes be
1095   coded carefully.  During unvoiced speech they both change a lot but
1096   the ear is less sensitve to errors so coarser quantisation is OK.
1097 
1098   The ear is sensitive to log energy and loq pitch so we quantise in
1099   these domains.  That way the error measure used to quantise the
1100   values is close to way the ear senses errors.
1101 
1102   See http://jmspeex.livejournal.com/10446.html
1103 
1104 \*---------------------------------------------------------------------------*/
1105 
quantise_WoE(C2CONST * c2const,MODEL * model,float * e,float xq[])1106 void quantise_WoE(C2CONST *c2const, MODEL *model, float *e, float xq[])
1107 {
1108   int          i, n1;
1109   float        x[2];
1110   float        err[2];
1111   float        w[2];
1112   const float *codebook1 = ge_cb[0].cb;
1113   int          nb_entries = ge_cb[0].m;
1114   int          ndim = ge_cb[0].k;
1115   float Wo_min = c2const->Wo_min;
1116   float Wo_max = c2const->Wo_max;
1117   float Fs = c2const->Fs;
1118 
1119   /* VQ is only trained for Fs = 8000 Hz */
1120 
1121   assert(Fs == 8000);
1122 
1123   x[0] = log10f((model->Wo/PI)*4000.0/50.0)/log10f(2);
1124   x[1] = 10.0*log10f(1e-4 + *e);
1125 
1126   compute_weights2(x, xq, w);
1127   for (i=0;i<ndim;i++)
1128     err[i] = x[i]-ge_coeff[i]*xq[i];
1129   n1 = find_nearest_weighted(codebook1, nb_entries, err, w, ndim);
1130 
1131   for (i=0;i<ndim;i++)
1132   {
1133     xq[i] = ge_coeff[i]*xq[i] + codebook1[ndim*n1+i];
1134     err[i] -= codebook1[ndim*n1+i];
1135   }
1136 
1137   /*
1138     x = log2(4000*Wo/(PI*50));
1139     2^x = 4000*Wo/(PI*50)
1140     Wo = (2^x)*(PI*50)/4000;
1141   */
1142 
1143   model->Wo = powf(2.0, xq[0])*(PI*50.0)/4000.0;
1144 
1145   /* bit errors can make us go out of range leading to all sorts of
1146      probs like seg faults */
1147 
1148   if (model->Wo > Wo_max) model->Wo = Wo_max;
1149   if (model->Wo < Wo_min) model->Wo = Wo_min;
1150 
1151   model->L  = PI/model->Wo; /* if we quantise Wo re-compute L */
1152 
1153   *e = POW10F(xq[1]/10.0);
1154 }
1155 
1156 /*---------------------------------------------------------------------------*\
1157 
1158   FUNCTION....: encode_WoE()
1159   AUTHOR......: Jean-Marc Valin & David Rowe
1160   DATE CREATED: 11 May 2012
1161 
1162   Joint Wo and LPC energy vector quantiser developed my Jean-Marc
1163   Valin.  Returns index, and updated states xq[].
1164 
1165 \*---------------------------------------------------------------------------*/
1166 
encode_WoE(MODEL * model,float e,float xq[])1167 int encode_WoE(MODEL *model, float e, float xq[])
1168 {
1169   int          i, n1;
1170   float        x[2];
1171   float        err[2];
1172   float        w[2];
1173   const float *codebook1 = ge_cb[0].cb;
1174   int          nb_entries = ge_cb[0].m;
1175   int          ndim = ge_cb[0].k;
1176 
1177   assert((1<<WO_E_BITS) == nb_entries);
1178 
1179   if (e < 0.0) e = 0;  /* occasional small negative energies due LPC round off I guess */
1180 
1181   x[0] = log10f((model->Wo/PI)*4000.0/50.0)/log10f(2);
1182   x[1] = 10.0*log10f(1e-4 + e);
1183 
1184   compute_weights2(x, xq, w);
1185   for (i=0;i<ndim;i++)
1186     err[i] = x[i]-ge_coeff[i]*xq[i];
1187   n1 = find_nearest_weighted(codebook1, nb_entries, err, w, ndim);
1188 
1189   for (i=0;i<ndim;i++)
1190   {
1191     xq[i] = ge_coeff[i]*xq[i] + codebook1[ndim*n1+i];
1192     err[i] -= codebook1[ndim*n1+i];
1193   }
1194 
1195   //printf("enc: %f %f (%f)(%f) \n", xq[0], xq[1], e, 10.0*log10(1e-4 + e));
1196   return n1;
1197 }
1198 
1199 
1200 /*---------------------------------------------------------------------------*\
1201 
1202   FUNCTION....: decode_WoE()
1203   AUTHOR......: Jean-Marc Valin & David Rowe
1204   DATE CREATED: 11 May 2012
1205 
1206   Joint Wo and LPC energy vector quantiser developed my Jean-Marc
1207   Valin.  Given index and states xq[], returns Wo & E, and updates
1208   states xq[].
1209 
1210 \*---------------------------------------------------------------------------*/
1211 
decode_WoE(C2CONST * c2const,MODEL * model,float * e,float xq[],int n1)1212 void decode_WoE(C2CONST *c2const, MODEL *model, float *e, float xq[], int n1)
1213 {
1214   int          i;
1215   const float *codebook1 = ge_cb[0].cb;
1216   int          ndim = ge_cb[0].k;
1217   float Wo_min = c2const->Wo_min;
1218   float Wo_max = c2const->Wo_max;
1219 
1220   for (i=0;i<ndim;i++)
1221   {
1222     xq[i] = ge_coeff[i]*xq[i] + codebook1[ndim*n1+i];
1223   }
1224 
1225   //printf("dec: %f %f\n", xq[0], xq[1]);
1226   model->Wo = powf(2.0, xq[0])*(PI*50.0)/4000.0;
1227 
1228   /* bit errors can make us go out of range leading to all sorts of
1229      probs like seg faults */
1230 
1231   if (model->Wo > Wo_max) model->Wo = Wo_max;
1232   if (model->Wo < Wo_min) model->Wo = Wo_min;
1233 
1234   model->L  = PI/model->Wo; /* if we quantise Wo re-compute L */
1235 
1236   *e = POW10F(xq[1]/10.0);
1237 }
1238 
1239