1 /* ----------------------------------------------------------------- */
2 /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
3 /*           developed by HTS Working Group                          */
4 /*           http://hts-engine.sourceforge.net/                      */
5 /* ----------------------------------------------------------------- */
6 /*                                                                   */
7 /*  Copyright (c) 2001-2015  Nagoya Institute of Technology          */
8 /*                           Department of Computer Science          */
9 /*                                                                   */
10 /*                2001-2008  Tokyo Institute of Technology           */
11 /*                           Interdisciplinary Graduate School of    */
12 /*                           Science and Engineering                 */
13 /*                                                                   */
14 /* All rights reserved.                                              */
15 /*                                                                   */
16 /* Redistribution and use in source and binary forms, with or        */
17 /* without modification, are permitted provided that the following   */
18 /* conditions are met:                                               */
19 /*                                                                   */
20 /* - Redistributions of source code must retain the above copyright  */
21 /*   notice, this list of conditions and the following disclaimer.   */
22 /* - Redistributions in binary form must reproduce the above         */
23 /*   copyright notice, this list of conditions and the following     */
24 /*   disclaimer in the documentation and/or other materials provided */
25 /*   with the distribution.                                          */
26 /* - Neither the name of the HTS working group nor the names of its  */
27 /*   contributors may be used to endorse or promote products derived */
28 /*   from this software without specific prior written permission.   */
29 /*                                                                   */
30 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
31 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
32 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
33 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
34 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
36 /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
37 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
38 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
40 /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
41 /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
42 /* POSSIBILITY OF SUCH DAMAGE.                                       */
43 /* ----------------------------------------------------------------- */
44 
45 #ifndef HTS_VOCODER_C
46 #define HTS_VOCODER_C
47 
48 #ifdef __cplusplus
49 #define HTS_VOCODER_C_START extern "C" {
50 #define HTS_VOCODER_C_END   }
51 #else
52 #define HTS_VOCODER_C_START
53 #define HTS_VOCODER_C_END
54 #endif                          /* __CPLUSPLUS */
55 
56 HTS_VOCODER_C_START;
57 
58 #include <math.h>               /* for sqrt(),log(),exp(),pow(),cos() */
59 
60 /* hts_engine libraries */
61 #include "HTS_hidden.h"
62 
63 static const double HTS_pade[21] = {
64    1.00000000000,
65    1.00000000000,
66    0.00000000000,
67    1.00000000000,
68    0.00000000000,
69    0.00000000000,
70    1.00000000000,
71    0.00000000000,
72    0.00000000000,
73    0.00000000000,
74    1.00000000000,
75    0.49992730000,
76    0.10670050000,
77    0.01170221000,
78    0.00056562790,
79    1.00000000000,
80    0.49993910000,
81    0.11070980000,
82    0.01369984000,
83    0.00095648530,
84    0.00003041721
85 };
86 
87 /* HTS_movem: move memory */
HTS_movem(double * a,double * b,const int nitem)88 static void HTS_movem(double *a, double *b, const int nitem)
89 {
90    long i = (long) nitem;
91 
92    if (a > b)
93       while (i--)
94          *b++ = *a++;
95    else {
96       a += i;
97       b += i;
98       while (i--)
99          *--b = *--a;
100    }
101 }
102 
103 /* HTS_mlsafir: sub functions for MLSA filter */
HTS_mlsafir(const double x,const double * b,const int m,const double a,const double aa,double * d)104 static double HTS_mlsafir(const double x, const double *b, const int m, const double a, const double aa, double *d)
105 {
106    double y = 0.0;
107    int i;
108 
109    d[0] = x;
110    d[1] = aa * d[0] + a * d[1];
111 
112    for (i = 2; i <= m; i++)
113       d[i] += a * (d[i + 1] - d[i - 1]);
114 
115    for (i = 2; i <= m; i++)
116       y += d[i] * b[i];
117 
118    for (i = m + 1; i > 1; i--)
119       d[i] = d[i - 1];
120 
121    return (y);
122 }
123 
124 /* HTS_mlsadf1: sub functions for MLSA filter */
HTS_mlsadf1(double x,const double * b,const int m,const double a,const double aa,const int pd,double * d,const double * ppade)125 static double HTS_mlsadf1(double x, const double *b, const int m, const double a, const double aa, const int pd, double *d, const double *ppade)
126 {
127    double v, out = 0.0, *pt;
128    int i;
129 
130    pt = &d[pd + 1];
131 
132    for (i = pd; i >= 1; i--) {
133       d[i] = aa * pt[i - 1] + a * d[i];
134       pt[i] = d[i] * b[1];
135       v = pt[i] * ppade[i];
136       x += (1 & i) ? v : -v;
137       out += v;
138    }
139 
140    pt[0] = x;
141    out += x;
142 
143    return (out);
144 }
145 
146 /* HTS_mlsadf2: sub functions for MLSA filter */
HTS_mlsadf2(double x,const double * b,const int m,const double a,const double aa,const int pd,double * d,const double * ppade)147 static double HTS_mlsadf2(double x, const double *b, const int m, const double a, const double aa, const int pd, double *d, const double *ppade)
148 {
149    double v, out = 0.0, *pt;
150    int i;
151 
152    pt = &d[pd * (m + 2)];
153 
154    for (i = pd; i >= 1; i--) {
155       pt[i] = HTS_mlsafir(pt[i - 1], b, m, a, aa, &d[(i - 1) * (m + 2)]);
156       v = pt[i] * ppade[i];
157 
158       x += (1 & i) ? v : -v;
159       out += v;
160    }
161 
162    pt[0] = x;
163    out += x;
164 
165    return (out);
166 }
167 
168 /* HTS_mlsadf: functions for MLSA filter */
HTS_mlsadf(double x,const double * b,const int m,const double a,const int pd,double * d)169 static double HTS_mlsadf(double x, const double *b, const int m, const double a, const int pd, double *d)
170 {
171    const double aa = 1 - a * a;
172    const double *ppade = &(HTS_pade[pd * (pd + 1) / 2]);
173 
174    x = HTS_mlsadf1(x, b, m, a, aa, pd, d, ppade);
175    x = HTS_mlsadf2(x, b, m, a, aa, pd, &d[2 * (pd + 1)], ppade);
176 
177    return (x);
178 }
179 
180 /* HTS_rnd: functions for random noise generation */
HTS_rnd(unsigned long * next)181 static double HTS_rnd(unsigned long *next)
182 {
183    double r;
184 
185    *next = *next * 1103515245L + 12345;
186    r = (*next / 65536L) % 32768L;
187 
188    return (r / RANDMAX);
189 }
190 
191 /* HTS_nrandom: functions for gaussian random noise generation */
HTS_nrandom(HTS_Vocoder * v)192 static double HTS_nrandom(HTS_Vocoder * v)
193 {
194    if (v->sw == 0) {
195       v->sw = 1;
196       do {
197          v->r1 = 2 * HTS_rnd(&v->next) - 1;
198          v->r2 = 2 * HTS_rnd(&v->next) - 1;
199          v->s = v->r1 * v->r1 + v->r2 * v->r2;
200       } while (v->s > 1 || v->s == 0);
201       v->s = sqrt(-2 * log(v->s) / v->s);
202       return (v->r1 * v->s);
203    } else {
204       v->sw = 0;
205       return (v->r2 * v->s);
206    }
207 }
208 
209 /* HTS_mceq: function for M-sequence random noise generation */
HTS_mseq(HTS_Vocoder * v)210 static int HTS_mseq(HTS_Vocoder * v)
211 {
212    int x0, x28;
213 
214    v->x >>= 1;
215    if (v->x & B0)
216       x0 = 1;
217    else
218       x0 = -1;
219    if (v->x & B28)
220       x28 = 1;
221    else
222       x28 = -1;
223    if (x0 + x28)
224       v->x &= B31_;
225    else
226       v->x |= B31;
227 
228    return (x0);
229 }
230 
231 /* HTS_mc2b: transform mel-cepstrum to MLSA digital fillter coefficients */
HTS_mc2b(double * mc,double * b,int m,const double a)232 static void HTS_mc2b(double *mc, double *b, int m, const double a)
233 {
234    if (mc != b) {
235       if (a != 0.0) {
236          b[m] = mc[m];
237          for (m--; m >= 0; m--)
238             b[m] = mc[m] - a * b[m + 1];
239       } else
240          HTS_movem(mc, b, m + 1);
241    } else if (a != 0.0)
242       for (m--; m >= 0; m--)
243          b[m] -= a * b[m + 1];
244 }
245 
246 /* HTS_b2bc: transform MLSA digital filter coefficients to mel-cepstrum */
HTS_b2mc(const double * b,double * mc,int m,const double a)247 static void HTS_b2mc(const double *b, double *mc, int m, const double a)
248 {
249    double d, o;
250 
251    d = mc[m] = b[m];
252    for (m--; m >= 0; m--) {
253       o = b[m] + a * d;
254       d = b[m];
255       mc[m] = o;
256    }
257 }
258 
259 /* HTS_freqt: frequency transformation */
HTS_freqt(HTS_Vocoder * v,const double * c1,const int m1,double * c2,const int m2,const double a)260 static void HTS_freqt(HTS_Vocoder * v, const double *c1, const int m1, double *c2, const int m2, const double a)
261 {
262    int i, j;
263    const double b = 1 - a * a;
264    double *g;
265 
266    if (m2 > v->freqt_size) {
267       if (v->freqt_buff != NULL)
268          HTS_free(v->freqt_buff);
269       v->freqt_buff = (double *) HTS_calloc(m2 + m2 + 2, sizeof(double));
270       v->freqt_size = m2;
271    }
272    g = v->freqt_buff + v->freqt_size + 1;
273 
274    for (i = 0; i < m2 + 1; i++)
275       g[i] = 0.0;
276 
277    for (i = -m1; i <= 0; i++) {
278       if (0 <= m2)
279          g[0] = c1[-i] + a * (v->freqt_buff[0] = g[0]);
280       if (1 <= m2)
281          g[1] = b * v->freqt_buff[0] + a * (v->freqt_buff[1] = g[1]);
282       for (j = 2; j <= m2; j++)
283          g[j] = v->freqt_buff[j - 1] + a * ((v->freqt_buff[j] = g[j]) - g[j - 1]);
284    }
285 
286    HTS_movem(g, c2, m2 + 1);
287 }
288 
289 /* HTS_c2ir: The minimum phase impulse response is evaluated from the minimum phase cepstrum */
HTS_c2ir(const double * c,const int nc,double * h,const int leng)290 static void HTS_c2ir(const double *c, const int nc, double *h, const int leng)
291 {
292    int n, k, upl;
293    double d;
294 
295    h[0] = exp(c[0]);
296    for (n = 1; n < leng; n++) {
297       d = 0;
298       upl = (n >= nc) ? nc - 1 : n;
299       for (k = 1; k <= upl; k++)
300          d += k * c[k] * h[n - k];
301       h[n] = d / n;
302    }
303 }
304 
305 /* HTS_b2en: calculate frame energy */
HTS_b2en(HTS_Vocoder * v,const double * b,const int m,const double a)306 static double HTS_b2en(HTS_Vocoder * v, const double *b, const int m, const double a)
307 {
308    int i;
309    double en = 0.0;
310    double *cep;
311    double *ir;
312 
313    if (v->spectrum2en_size < m) {
314       if (v->spectrum2en_buff != NULL)
315          HTS_free(v->spectrum2en_buff);
316       v->spectrum2en_buff = (double *) HTS_calloc((m + 1) + 2 * IRLENG, sizeof(double));
317       v->spectrum2en_size = m;
318    }
319    cep = v->spectrum2en_buff + m + 1;
320    ir = cep + IRLENG;
321 
322    HTS_b2mc(b, v->spectrum2en_buff, m, a);
323    HTS_freqt(v, v->spectrum2en_buff, m, cep, IRLENG - 1, -a);
324    HTS_c2ir(cep, IRLENG, ir, IRLENG);
325 
326    for (i = 0; i < IRLENG; i++)
327       en += ir[i] * ir[i];
328 
329    return (en);
330 }
331 
332 /* HTS_ignorm: inverse gain normalization */
HTS_ignorm(double * c1,double * c2,int m,const double g)333 static void HTS_ignorm(double *c1, double *c2, int m, const double g)
334 {
335    double k;
336    if (g != 0.0) {
337       k = pow(c1[0], g);
338       for (; m >= 1; m--)
339          c2[m] = k * c1[m];
340       c2[0] = (k - 1.0) / g;
341    } else {
342       HTS_movem(&c1[1], &c2[1], m);
343       c2[0] = log(c1[0]);
344    }
345 }
346 
347 /* HTS_gnorm: gain normalization */
HTS_gnorm(double * c1,double * c2,int m,const double g)348 static void HTS_gnorm(double *c1, double *c2, int m, const double g)
349 {
350    double k;
351    if (g != 0.0) {
352       k = 1.0 + g * c1[0];
353       for (; m >= 1; m--)
354          c2[m] = c1[m] / k;
355       c2[0] = pow(k, 1.0 / g);
356    } else {
357       HTS_movem(&c1[1], &c2[1], m);
358       c2[0] = exp(c1[0]);
359    }
360 }
361 
362 /* HTS_lsp2lpc: transform LSP to LPC */
HTS_lsp2lpc(HTS_Vocoder * v,double * lsp,double * a,const int m)363 static void HTS_lsp2lpc(HTS_Vocoder * v, double *lsp, double *a, const int m)
364 {
365    int i, k, mh1, mh2, flag_odd;
366    double xx, xf, xff;
367    double *p, *q;
368    double *a0, *a1, *a2, *b0, *b1, *b2;
369 
370    flag_odd = 0;
371    if (m % 2 == 0)
372       mh1 = mh2 = m / 2;
373    else {
374       mh1 = (m + 1) / 2;
375       mh2 = (m - 1) / 2;
376       flag_odd = 1;
377    }
378 
379    if (m > v->lsp2lpc_size) {
380       if (v->lsp2lpc_buff != NULL)
381          HTS_free(v->lsp2lpc_buff);
382       v->lsp2lpc_buff = (double *) HTS_calloc(5 * m + 6, sizeof(double));
383       v->lsp2lpc_size = m;
384    }
385    p = v->lsp2lpc_buff + m;
386    q = p + mh1;
387    a0 = q + mh2;
388    a1 = a0 + (mh1 + 1);
389    a2 = a1 + (mh1 + 1);
390    b0 = a2 + (mh1 + 1);
391    b1 = b0 + (mh2 + 1);
392    b2 = b1 + (mh2 + 1);
393 
394    HTS_movem(lsp, v->lsp2lpc_buff, m);
395 
396    for (i = 0; i < mh1 + 1; i++)
397       a0[i] = 0.0;
398    for (i = 0; i < mh1 + 1; i++)
399       a1[i] = 0.0;
400    for (i = 0; i < mh1 + 1; i++)
401       a2[i] = 0.0;
402    for (i = 0; i < mh2 + 1; i++)
403       b0[i] = 0.0;
404    for (i = 0; i < mh2 + 1; i++)
405       b1[i] = 0.0;
406    for (i = 0; i < mh2 + 1; i++)
407       b2[i] = 0.0;
408 
409    /* lsp filter parameters */
410    for (i = k = 0; i < mh1; i++, k += 2)
411       p[i] = -2.0 * cos(v->lsp2lpc_buff[k]);
412    for (i = k = 0; i < mh2; i++, k += 2)
413       q[i] = -2.0 * cos(v->lsp2lpc_buff[k + 1]);
414 
415    /* impulse response of analysis filter */
416    xx = 1.0;
417    xf = xff = 0.0;
418 
419    for (k = 0; k <= m; k++) {
420       if (flag_odd) {
421          a0[0] = xx;
422          b0[0] = xx - xff;
423          xff = xf;
424          xf = xx;
425       } else {
426          a0[0] = xx + xf;
427          b0[0] = xx - xf;
428          xf = xx;
429       }
430 
431       for (i = 0; i < mh1; i++) {
432          a0[i + 1] = a0[i] + p[i] * a1[i] + a2[i];
433          a2[i] = a1[i];
434          a1[i] = a0[i];
435       }
436 
437       for (i = 0; i < mh2; i++) {
438          b0[i + 1] = b0[i] + q[i] * b1[i] + b2[i];
439          b2[i] = b1[i];
440          b1[i] = b0[i];
441       }
442 
443       if (k != 0)
444          a[k - 1] = -0.5 * (a0[mh1] + b0[mh2]);
445       xx = 0.0;
446    }
447 
448    for (i = m - 1; i >= 0; i--)
449       a[i + 1] = -a[i];
450    a[0] = 1.0;
451 }
452 
453 /* HTS_gc2gc: generalized cepstral transformation */
HTS_gc2gc(HTS_Vocoder * v,double * c1,const int m1,const double g1,double * c2,const int m2,const double g2)454 static void HTS_gc2gc(HTS_Vocoder * v, double *c1, const int m1, const double g1, double *c2, const int m2, const double g2)
455 {
456    int i, min, k, mk;
457    double ss1, ss2, cc;
458 
459    if (m1 > v->gc2gc_size) {
460       if (v->gc2gc_buff != NULL)
461          HTS_free(v->gc2gc_buff);
462       v->gc2gc_buff = (double *) HTS_calloc(m1 + 1, sizeof(double));
463       v->gc2gc_size = m1;
464    }
465 
466    HTS_movem(c1, v->gc2gc_buff, m1 + 1);
467 
468    c2[0] = v->gc2gc_buff[0];
469    for (i = 1; i <= m2; i++) {
470       ss1 = ss2 = 0.0;
471       min = m1 < i ? m1 : i - 1;
472       for (k = 1; k <= min; k++) {
473          mk = i - k;
474          cc = v->gc2gc_buff[k] * c2[mk];
475          ss2 += k * cc;
476          ss1 += mk * cc;
477       }
478 
479       if (i <= m1)
480          c2[i] = v->gc2gc_buff[i] + (g2 * ss2 - g1 * ss1) / i;
481       else
482          c2[i] = (g2 * ss2 - g1 * ss1) / i;
483    }
484 }
485 
486 /* HTS_mgc2mgc: frequency and generalized cepstral transformation */
HTS_mgc2mgc(HTS_Vocoder * v,double * c1,const int m1,const double a1,const double g1,double * c2,const int m2,const double a2,const double g2)487 static void HTS_mgc2mgc(HTS_Vocoder * v, double *c1, const int m1, const double a1, const double g1, double *c2, const int m2, const double a2, const double g2)
488 {
489    double a;
490 
491    if (a1 == a2) {
492       HTS_gnorm(c1, c1, m1, g1);
493       HTS_gc2gc(v, c1, m1, g1, c2, m2, g2);
494       HTS_ignorm(c2, c2, m2, g2);
495    } else {
496       a = (a2 - a1) / (1 - a1 * a2);
497       HTS_freqt(v, c1, m1, c2, m2, a);
498       HTS_gnorm(c2, c2, m2, g1);
499       HTS_gc2gc(v, c2, m2, g1, c2, m2, g2);
500       HTS_ignorm(c2, c2, m2, g2);
501    }
502 }
503 
504 /* HTS_lsp2mgc: transform LSP to MGC */
HTS_lsp2mgc(HTS_Vocoder * v,double * lsp,double * mgc,const int m,const double alpha)505 static void HTS_lsp2mgc(HTS_Vocoder * v, double *lsp, double *mgc, const int m, const double alpha)
506 {
507    int i;
508    /* lsp2lpc */
509    HTS_lsp2lpc(v, lsp + 1, mgc, m);
510    if (v->use_log_gain)
511       mgc[0] = exp(lsp[0]);
512    else
513       mgc[0] = lsp[0];
514 
515    /* mgc2mgc */
516    if (NORMFLG1)
517       HTS_ignorm(mgc, mgc, m, v->gamma);
518    else if (MULGFLG1)
519       mgc[0] = (1.0 - mgc[0]) * ((double) v->stage);
520    if (MULGFLG1)
521       for (i = m; i >= 1; i--)
522          mgc[i] *= -((double) v->stage);
523    HTS_mgc2mgc(v, mgc, m, alpha, v->gamma, mgc, m, alpha, v->gamma);
524    if (NORMFLG2)
525       HTS_gnorm(mgc, mgc, m, v->gamma);
526    else if (MULGFLG2)
527       mgc[0] = mgc[0] * v->gamma + 1.0;
528    if (MULGFLG2)
529       for (i = m; i >= 1; i--)
530          mgc[i] *= v->gamma;
531 }
532 
533 /* HTS_mglsadff: sub functions for MGLSA filter */
HTS_mglsadff(double x,const double * b,const int m,const double a,double * d)534 static double HTS_mglsadff(double x, const double *b, const int m, const double a, double *d)
535 {
536    int i;
537 
538    double y;
539    y = d[0] * b[1];
540    for (i = 1; i < m; i++) {
541       d[i] += a * (d[i + 1] - d[i - 1]);
542       y += d[i] * b[i + 1];
543    }
544    x -= y;
545 
546    for (i = m; i > 0; i--)
547       d[i] = d[i - 1];
548    d[0] = a * d[0] + (1 - a * a) * x;
549    return x;
550 }
551 
552 /* HTS_mglsadf: sub functions for MGLSA filter */
HTS_mglsadf(double x,const double * b,const int m,const double a,const int n,double * d)553 static double HTS_mglsadf(double x, const double *b, const int m, const double a, const int n, double *d)
554 {
555    int i;
556 
557    for (i = 0; i < n; i++)
558       x = HTS_mglsadff(x, b, m, a, &d[i * (m + 1)]);
559 
560    return x;
561 }
562 
563 /* THS_check_lsp_stability: check LSP stability */
HTS_check_lsp_stability(double * lsp,size_t m)564 static void HTS_check_lsp_stability(double *lsp, size_t m)
565 {
566    size_t i, j;
567    double tmp;
568    double min = (CHECK_LSP_STABILITY_MIN * PI) / (m + 1);
569    HTS_Boolean find;
570 
571    for (i = 0; i < CHECK_LSP_STABILITY_NUM; i++) {
572       find = FALSE;
573 
574       for (j = 1; j < m; j++) {
575          tmp = lsp[j + 1] - lsp[j];
576          if (tmp < min) {
577             lsp[j] -= 0.5 * (min - tmp);
578             lsp[j + 1] += 0.5 * (min - tmp);
579             find = TRUE;
580          }
581       }
582 
583       if (lsp[1] < min) {
584          lsp[1] = min;
585          find = TRUE;
586       }
587       if (lsp[m] > PI - min) {
588          lsp[m] = PI - min;
589          find = TRUE;
590       }
591 
592       if (find == FALSE)
593          break;
594    }
595 }
596 
597 /* HTS_lsp2en: calculate frame energy */
HTS_lsp2en(HTS_Vocoder * v,double * lsp,size_t m,double alpha)598 static double HTS_lsp2en(HTS_Vocoder * v, double *lsp, size_t m, double alpha)
599 {
600    size_t i;
601    double en = 0.0;
602    double *buff;
603 
604    if (v->spectrum2en_size < m) {
605       if (v->spectrum2en_buff != NULL)
606          HTS_free(v->spectrum2en_buff);
607       v->spectrum2en_buff = (double *) HTS_calloc(m + 1 + IRLENG, sizeof(double));
608       v->spectrum2en_size = m;
609    }
610    buff = v->spectrum2en_buff + m + 1;
611 
612    /* lsp2lpc */
613    HTS_lsp2lpc(v, lsp + 1, v->spectrum2en_buff, m);
614    if (v->use_log_gain)
615       v->spectrum2en_buff[0] = exp(lsp[0]);
616    else
617       v->spectrum2en_buff[0] = lsp[0];
618 
619    /* mgc2mgc */
620    if (NORMFLG1)
621       HTS_ignorm(v->spectrum2en_buff, v->spectrum2en_buff, m, v->gamma);
622    else if (MULGFLG1)
623       v->spectrum2en_buff[0] = (1.0 - v->spectrum2en_buff[0]) * ((double) v->stage);
624    if (MULGFLG1)
625       for (i = 1; i <= m; i++)
626          v->spectrum2en_buff[i] *= -((double) v->stage);
627    HTS_mgc2mgc(v, v->spectrum2en_buff, m, alpha, v->gamma, buff, IRLENG - 1, 0.0, 1);
628 
629    for (i = 0; i < IRLENG; i++)
630       en += buff[i] * buff[i];
631    return en;
632 }
633 
634 /* HTS_white_noise: return white noise */
HTS_white_noise(HTS_Vocoder * v)635 static double HTS_white_noise(HTS_Vocoder * v)
636 {
637    if (v->gauss)
638       return (double) HTS_nrandom(v);
639    else
640       return (double) HTS_mseq(v);
641 }
642 
643 /* HTS_Vocoder_initialize_excitation: initialize excitation */
HTS_Vocoder_initialize_excitation(HTS_Vocoder * v,double pitch,size_t nlpf)644 static void HTS_Vocoder_initialize_excitation(HTS_Vocoder * v, double pitch, size_t nlpf)
645 {
646    size_t i;
647 
648    v->pitch_of_curr_point = pitch;
649    v->pitch_counter = pitch;
650    v->pitch_inc_per_point = 0.0;
651    if (nlpf > 0) {
652       v->excite_buff_size = nlpf;
653       v->excite_ring_buff = (double *) HTS_calloc(v->excite_buff_size, sizeof(double));
654       for (i = 0; i < v->excite_buff_size; i++)
655          v->excite_ring_buff[i] = 0.0;
656       v->excite_buff_index = 0;
657    } else {
658       v->excite_buff_size = 0;
659       v->excite_ring_buff = NULL;
660       v->excite_buff_index = 0;
661    }
662 }
663 
664 /* HTS_Vocoder_start_excitation: start excitation of each frame */
HTS_Vocoder_start_excitation(HTS_Vocoder * v,double pitch)665 static void HTS_Vocoder_start_excitation(HTS_Vocoder * v, double pitch)
666 {
667    if (v->pitch_of_curr_point != 0.0 && pitch != 0.0) {
668       v->pitch_inc_per_point = (pitch - v->pitch_of_curr_point) / v->fprd;
669    } else {
670       v->pitch_inc_per_point = 0.0;
671       v->pitch_of_curr_point = pitch;
672       v->pitch_counter = pitch;
673    }
674 }
675 
676 /* HTS_Vocoder_excite_unvoiced_frame: ping noise to ring buffer */
HTS_Vocoder_excite_unvoiced_frame(HTS_Vocoder * v,double noise)677 static void HTS_Vocoder_excite_unvoiced_frame(HTS_Vocoder * v, double noise)
678 {
679    size_t center = (v->excite_buff_size - 1) / 2;
680    v->excite_ring_buff[(v->excite_buff_index + center) % v->excite_buff_size] += noise;
681 }
682 
683 /* HTS_Vocoder_excite_vooiced_frame: ping noise and pulse to ring buffer */
HTS_Vocoder_excite_voiced_frame(HTS_Vocoder * v,double noise,double pulse,const double * lpf)684 static void HTS_Vocoder_excite_voiced_frame(HTS_Vocoder * v, double noise, double pulse, const double *lpf)
685 {
686    size_t i;
687    size_t center = (v->excite_buff_size - 1) / 2;
688 
689    if (noise != 0.0) {
690       for (i = 0; i < v->excite_buff_size; i++) {
691          if (i == center)
692             v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += noise * (1.0 - lpf[i]);
693          else
694             v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += noise * (0.0 - lpf[i]);
695       }
696    }
697    if (pulse != 0.0) {
698       for (i = 0; i < v->excite_buff_size; i++)
699          v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += pulse * lpf[i];
700    }
701 }
702 
703 /* HTS_Vocoder_get_excitation: get excitation of each sample */
HTS_Vocoder_get_excitation(HTS_Vocoder * v,const double * lpf)704 static double HTS_Vocoder_get_excitation(HTS_Vocoder * v, const double *lpf)
705 {
706    double x;
707    double noise, pulse = 0.0;
708 
709    if (v->excite_buff_size > 0) {
710       noise = HTS_white_noise(v);
711       pulse = 0.0;
712       if (v->pitch_of_curr_point == 0.0) {
713          HTS_Vocoder_excite_unvoiced_frame(v, noise);
714       } else {
715          v->pitch_counter += 1.0;
716          if (v->pitch_counter >= v->pitch_of_curr_point) {
717             pulse = sqrt(v->pitch_of_curr_point);
718             v->pitch_counter -= v->pitch_of_curr_point;
719          }
720          HTS_Vocoder_excite_voiced_frame(v, noise, pulse, lpf);
721          v->pitch_of_curr_point += v->pitch_inc_per_point;
722       }
723       x = v->excite_ring_buff[v->excite_buff_index];
724       v->excite_ring_buff[v->excite_buff_index] = 0.0;
725       v->excite_buff_index++;
726       if (v->excite_buff_index >= v->excite_buff_size)
727          v->excite_buff_index = 0;
728    } else {
729       if (v->pitch_of_curr_point == 0.0) {
730          x = HTS_white_noise(v);
731       } else {
732          v->pitch_counter += 1.0;
733          if (v->pitch_counter >= v->pitch_of_curr_point) {
734             x = sqrt(v->pitch_of_curr_point);
735             v->pitch_counter -= v->pitch_of_curr_point;
736          } else {
737             x = 0.0;
738          }
739          v->pitch_of_curr_point += v->pitch_inc_per_point;
740       }
741    }
742 
743    return x;
744 }
745 
746 /* HTS_Vocoder_end_excitation: end excitation of each frame */
HTS_Vocoder_end_excitation(HTS_Vocoder * v,double pitch)747 static void HTS_Vocoder_end_excitation(HTS_Vocoder * v, double pitch)
748 {
749    v->pitch_of_curr_point = pitch;
750 }
751 
752 /* HTS_Vocoder_postfilter_mcp: postfilter for MCP */
HTS_Vocoder_postfilter_mcp(HTS_Vocoder * v,double * mcp,const int m,double alpha,double beta)753 static void HTS_Vocoder_postfilter_mcp(HTS_Vocoder * v, double *mcp, const int m, double alpha, double beta)
754 {
755    double e1, e2;
756    int k;
757 
758    if (beta > 0.0 && m > 1) {
759       if (v->postfilter_size < m) {
760          if (v->postfilter_buff != NULL)
761             HTS_free(v->postfilter_buff);
762          v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double));
763          v->postfilter_size = m;
764       }
765       HTS_mc2b(mcp, v->postfilter_buff, m, alpha);
766       e1 = HTS_b2en(v, v->postfilter_buff, m, alpha);
767 
768       v->postfilter_buff[1] -= beta * alpha * v->postfilter_buff[2];
769       for (k = 2; k <= m; k++)
770          v->postfilter_buff[k] *= (1.0 + beta);
771 
772       e2 = HTS_b2en(v, v->postfilter_buff, m, alpha);
773       v->postfilter_buff[0] += log(e1 / e2) / 2;
774       HTS_b2mc(v->postfilter_buff, mcp, m, alpha);
775    }
776 }
777 
778 /* HTS_Vocoder_postfilter_lsp: postfilter for LSP */
HTS_Vocoder_postfilter_lsp(HTS_Vocoder * v,double * lsp,size_t m,double alpha,double beta)779 static void HTS_Vocoder_postfilter_lsp(HTS_Vocoder * v, double *lsp, size_t m, double alpha, double beta)
780 {
781    double e1, e2;
782    size_t i;
783    double d1, d2;
784 
785    if (beta > 0.0 && m > 1) {
786       if (v->postfilter_size < m) {
787          if (v->postfilter_buff != NULL)
788             HTS_free(v->postfilter_buff);
789          v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double));
790          v->postfilter_size = m;
791       }
792 
793       e1 = HTS_lsp2en(v, lsp, m, alpha);
794 
795       /* postfiltering */
796       for (i = 0; i <= m; i++) {
797          if (i > 1 && i < m) {
798             d1 = beta * (lsp[i + 1] - lsp[i]);
799             d2 = beta * (lsp[i] - lsp[i - 1]);
800             v->postfilter_buff[i] = lsp[i - 1] + d2 + (d2 * d2 * ((lsp[i + 1] - lsp[i - 1]) - (d1 + d2))) / ((d2 * d2) + (d1 * d1));
801          } else {
802             v->postfilter_buff[i] = lsp[i];
803          }
804       }
805       HTS_movem(v->postfilter_buff, lsp, m + 1);
806 
807       e2 = HTS_lsp2en(v, lsp, m, alpha);
808 
809       if (e1 != e2) {
810          if (v->use_log_gain)
811             lsp[0] += 0.5 * log(e1 / e2);
812          else
813             lsp[0] *= sqrt(e1 / e2);
814       }
815    }
816 }
817 
818 /* HTS_Vocoder_initialize: initialize vocoder */
HTS_Vocoder_initialize(HTS_Vocoder * v,size_t m,size_t stage,HTS_Boolean use_log_gain,size_t rate,size_t fperiod)819 void HTS_Vocoder_initialize(HTS_Vocoder * v, size_t m, size_t stage, HTS_Boolean use_log_gain, size_t rate, size_t fperiod)
820 {
821    /* set parameter */
822    v->is_first = TRUE;
823    v->stage = stage;
824    if (stage != 0)
825       v->gamma = -1.0 / v->stage;
826    else
827       v->gamma = 0.0;
828    v->use_log_gain = use_log_gain;
829    v->fprd = fperiod;
830    v->next = SEED;
831    v->gauss = GAUSS;
832    v->rate = rate;
833    v->pitch_of_curr_point = 0.0;
834    v->pitch_counter = 0.0;
835    v->pitch_inc_per_point = 0.0;
836    v->excite_ring_buff = NULL;
837    v->excite_buff_size = 0;
838    v->excite_buff_index = 0;
839    v->sw = 0;
840    v->x = 0x55555555;
841    /* init buffer */
842    v->freqt_buff = NULL;
843    v->freqt_size = 0;
844    v->gc2gc_buff = NULL;
845    v->gc2gc_size = 0;
846    v->lsp2lpc_buff = NULL;
847    v->lsp2lpc_size = 0;
848    v->postfilter_buff = NULL;
849    v->postfilter_size = 0;
850    v->spectrum2en_buff = NULL;
851    v->spectrum2en_size = 0;
852    if (v->stage == 0) {         /* for MCP */
853       v->c = (double *) HTS_calloc(m * (3 + PADEORDER) + 5 * PADEORDER + 6, sizeof(double));
854       v->cc = v->c + m + 1;
855       v->cinc = v->cc + m + 1;
856       v->d1 = v->cinc + m + 1;
857    } else {                     /* for LSP */
858       v->c = (double *) HTS_calloc((m + 1) * (v->stage + 3), sizeof(double));
859       v->cc = v->c + m + 1;
860       v->cinc = v->cc + m + 1;
861       v->d1 = v->cinc + m + 1;
862    }
863 }
864 
865 /* HTS_Vocoder_synthesize: pulse/noise excitation and MLSA/MGLSA filster based waveform synthesis */
HTS_Vocoder_synthesize(HTS_Vocoder * v,size_t m,double lf0,double * spectrum,size_t nlpf,double * lpf,double alpha,double beta,double volume,double * rawdata,HTS_Audio * audio)866 void HTS_Vocoder_synthesize(HTS_Vocoder * v, size_t m, double lf0, double *spectrum, size_t nlpf, double *lpf, double alpha, double beta, double volume, double *rawdata, HTS_Audio * audio)
867 {
868    double x;
869    int i, j;
870    short xs;
871    int rawidx = 0;
872    double p;
873 
874    /* lf0 -> pitch */
875    if (lf0 == LZERO)
876       p = 0.0;
877    else if (lf0 <= MIN_LF0)
878       p = v->rate / MIN_F0;
879    else if (lf0 >= MAX_LF0)
880       p = v->rate / MAX_F0;
881    else
882       p = v->rate / exp(lf0);
883 
884    /* first time */
885    if (v->is_first == TRUE) {
886       HTS_Vocoder_initialize_excitation(v, p, nlpf);
887       if (v->stage == 0) {      /* for MCP */
888          HTS_mc2b(spectrum, v->c, m, alpha);
889       } else {                  /* for LSP */
890          HTS_movem(spectrum, v->c, m + 1);
891          HTS_lsp2mgc(v, v->c, v->c, m, alpha);
892          HTS_mc2b(v->c, v->c, m, alpha);
893          HTS_gnorm(v->c, v->c, m, v->gamma);
894          for (i = 1; i <= m; i++)
895             v->c[i] *= v->gamma;
896       }
897       v->is_first = FALSE;
898    }
899 
900    HTS_Vocoder_start_excitation(v, p);
901    if (v->stage == 0) {         /* for MCP */
902       HTS_Vocoder_postfilter_mcp(v, spectrum, m, alpha, beta);
903       HTS_mc2b(spectrum, v->cc, m, alpha);
904       for (i = 0; i <= m; i++)
905          v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd;
906    } else {                     /* for LSP */
907       HTS_Vocoder_postfilter_lsp(v, spectrum, m, alpha, beta);
908       HTS_check_lsp_stability(spectrum, m);
909       HTS_lsp2mgc(v, spectrum, v->cc, m, alpha);
910       HTS_mc2b(v->cc, v->cc, m, alpha);
911       HTS_gnorm(v->cc, v->cc, m, v->gamma);
912       for (i = 1; i <= m; i++)
913          v->cc[i] *= v->gamma;
914       for (i = 0; i <= m; i++)
915          v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd;
916    }
917 
918    for (j = 0; j < v->fprd; j++) {
919       x = HTS_Vocoder_get_excitation(v, lpf);
920       if (v->stage == 0) {      /* for MCP */
921          if (x != 0.0)
922             x *= exp(v->c[0]);
923          x = HTS_mlsadf(x, v->c, m, alpha, PADEORDER, v->d1);
924       } else {                  /* for LSP */
925          if (!NGAIN)
926             x *= v->c[0];
927          x = HTS_mglsadf(x, v->c, m, alpha, v->stage, v->d1);
928       }
929       x *= volume;
930 
931       /* output */
932       if (rawdata)
933          rawdata[rawidx++] = x;
934       if (audio) {
935          if (x > 32767.0)
936             xs = 32767;
937          else if (x < -32768.0)
938             xs = -32768;
939          else
940             xs = (short) x;
941          HTS_Audio_write(audio, xs);
942       }
943 
944       for (i = 0; i <= m; i++)
945          v->c[i] += v->cinc[i];
946    }
947 
948    HTS_Vocoder_end_excitation(v, p);
949    HTS_movem(v->cc, v->c, m + 1);
950 }
951 
952 /* HTS_Vocoder_clear: clear vocoder */
HTS_Vocoder_clear(HTS_Vocoder * v)953 void HTS_Vocoder_clear(HTS_Vocoder * v)
954 {
955    if (v != NULL) {
956       /* free buffer */
957       if (v->freqt_buff != NULL) {
958          HTS_free(v->freqt_buff);
959          v->freqt_buff = NULL;
960       }
961       v->freqt_size = 0;
962       if (v->gc2gc_buff != NULL) {
963          HTS_free(v->gc2gc_buff);
964          v->gc2gc_buff = NULL;
965       }
966       v->gc2gc_size = 0;
967       if (v->lsp2lpc_buff != NULL) {
968          HTS_free(v->lsp2lpc_buff);
969          v->lsp2lpc_buff = NULL;
970       }
971       v->lsp2lpc_size = 0;
972       if (v->postfilter_buff != NULL) {
973          HTS_free(v->postfilter_buff);
974          v->postfilter_buff = NULL;
975       }
976       v->postfilter_size = 0;
977       if (v->spectrum2en_buff != NULL) {
978          HTS_free(v->spectrum2en_buff);
979          v->spectrum2en_buff = NULL;
980       }
981       v->spectrum2en_size = 0;
982       if (v->c != NULL) {
983          HTS_free(v->c);
984          v->c = NULL;
985       }
986       v->excite_buff_size = 0;
987       v->excite_buff_index = 0;
988       if (v->excite_ring_buff != NULL) {
989          HTS_free(v->excite_ring_buff);
990          v->excite_ring_buff = NULL;
991       }
992    }
993 }
994 
995 HTS_VOCODER_C_END;
996 
997 #endif                          /* !HTS_VOCODER_C */
998