1 /* ----------------------------------------------------------------- */
2 /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 /* developed by HTS Working Group */
4 /* http://hts-engine.sourceforge.net/ */
5 /* ----------------------------------------------------------------- */
6 /* */
7 /* Copyright (c) 2001-2015 Nagoya Institute of Technology */
8 /* Department of Computer Science */
9 /* */
10 /* 2001-2008 Tokyo Institute of Technology */
11 /* Interdisciplinary Graduate School of */
12 /* Science and Engineering */
13 /* */
14 /* All rights reserved. */
15 /* */
16 /* Redistribution and use in source and binary forms, with or */
17 /* without modification, are permitted provided that the following */
18 /* conditions are met: */
19 /* */
20 /* - Redistributions of source code must retain the above copyright */
21 /* notice, this list of conditions and the following disclaimer. */
22 /* - Redistributions in binary form must reproduce the above */
23 /* copyright notice, this list of conditions and the following */
24 /* disclaimer in the documentation and/or other materials provided */
25 /* with the distribution. */
26 /* - Neither the name of the HTS working group nor the names of its */
27 /* contributors may be used to endorse or promote products derived */
28 /* from this software without specific prior written permission. */
29 /* */
30 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 /* POSSIBILITY OF SUCH DAMAGE. */
43 /* ----------------------------------------------------------------- */
44
45 #ifndef HTS_VOCODER_C
46 #define HTS_VOCODER_C
47
48 #ifdef __cplusplus
49 #define HTS_VOCODER_C_START extern "C" {
50 #define HTS_VOCODER_C_END }
51 #else
52 #define HTS_VOCODER_C_START
53 #define HTS_VOCODER_C_END
54 #endif /* __CPLUSPLUS */
55
56 HTS_VOCODER_C_START;
57
58 #include <math.h> /* for sqrt(),log(),exp(),pow(),cos() */
59
60 /* hts_engine libraries */
61 #include "HTS_hidden.h"
62
63 static const double HTS_pade[21] = {
64 1.00000000000,
65 1.00000000000,
66 0.00000000000,
67 1.00000000000,
68 0.00000000000,
69 0.00000000000,
70 1.00000000000,
71 0.00000000000,
72 0.00000000000,
73 0.00000000000,
74 1.00000000000,
75 0.49992730000,
76 0.10670050000,
77 0.01170221000,
78 0.00056562790,
79 1.00000000000,
80 0.49993910000,
81 0.11070980000,
82 0.01369984000,
83 0.00095648530,
84 0.00003041721
85 };
86
87 /* HTS_movem: move memory */
HTS_movem(double * a,double * b,const int nitem)88 static void HTS_movem(double *a, double *b, const int nitem)
89 {
90 long i = (long) nitem;
91
92 if (a > b)
93 while (i--)
94 *b++ = *a++;
95 else {
96 a += i;
97 b += i;
98 while (i--)
99 *--b = *--a;
100 }
101 }
102
103 /* HTS_mlsafir: sub functions for MLSA filter */
HTS_mlsafir(const double x,const double * b,const int m,const double a,const double aa,double * d)104 static double HTS_mlsafir(const double x, const double *b, const int m, const double a, const double aa, double *d)
105 {
106 double y = 0.0;
107 int i;
108
109 d[0] = x;
110 d[1] = aa * d[0] + a * d[1];
111
112 for (i = 2; i <= m; i++)
113 d[i] += a * (d[i + 1] - d[i - 1]);
114
115 for (i = 2; i <= m; i++)
116 y += d[i] * b[i];
117
118 for (i = m + 1; i > 1; i--)
119 d[i] = d[i - 1];
120
121 return (y);
122 }
123
124 /* HTS_mlsadf1: sub functions for MLSA filter */
HTS_mlsadf1(double x,const double * b,const int m,const double a,const double aa,const int pd,double * d,const double * ppade)125 static double HTS_mlsadf1(double x, const double *b, const int m, const double a, const double aa, const int pd, double *d, const double *ppade)
126 {
127 double v, out = 0.0, *pt;
128 int i;
129
130 pt = &d[pd + 1];
131
132 for (i = pd; i >= 1; i--) {
133 d[i] = aa * pt[i - 1] + a * d[i];
134 pt[i] = d[i] * b[1];
135 v = pt[i] * ppade[i];
136 x += (1 & i) ? v : -v;
137 out += v;
138 }
139
140 pt[0] = x;
141 out += x;
142
143 return (out);
144 }
145
146 /* HTS_mlsadf2: sub functions for MLSA filter */
HTS_mlsadf2(double x,const double * b,const int m,const double a,const double aa,const int pd,double * d,const double * ppade)147 static double HTS_mlsadf2(double x, const double *b, const int m, const double a, const double aa, const int pd, double *d, const double *ppade)
148 {
149 double v, out = 0.0, *pt;
150 int i;
151
152 pt = &d[pd * (m + 2)];
153
154 for (i = pd; i >= 1; i--) {
155 pt[i] = HTS_mlsafir(pt[i - 1], b, m, a, aa, &d[(i - 1) * (m + 2)]);
156 v = pt[i] * ppade[i];
157
158 x += (1 & i) ? v : -v;
159 out += v;
160 }
161
162 pt[0] = x;
163 out += x;
164
165 return (out);
166 }
167
168 /* HTS_mlsadf: functions for MLSA filter */
HTS_mlsadf(double x,const double * b,const int m,const double a,const int pd,double * d)169 static double HTS_mlsadf(double x, const double *b, const int m, const double a, const int pd, double *d)
170 {
171 const double aa = 1 - a * a;
172 const double *ppade = &(HTS_pade[pd * (pd + 1) / 2]);
173
174 x = HTS_mlsadf1(x, b, m, a, aa, pd, d, ppade);
175 x = HTS_mlsadf2(x, b, m, a, aa, pd, &d[2 * (pd + 1)], ppade);
176
177 return (x);
178 }
179
180 /* HTS_rnd: functions for random noise generation */
HTS_rnd(unsigned long * next)181 static double HTS_rnd(unsigned long *next)
182 {
183 double r;
184
185 *next = *next * 1103515245L + 12345;
186 r = (*next / 65536L) % 32768L;
187
188 return (r / RANDMAX);
189 }
190
191 /* HTS_nrandom: functions for gaussian random noise generation */
HTS_nrandom(HTS_Vocoder * v)192 static double HTS_nrandom(HTS_Vocoder * v)
193 {
194 if (v->sw == 0) {
195 v->sw = 1;
196 do {
197 v->r1 = 2 * HTS_rnd(&v->next) - 1;
198 v->r2 = 2 * HTS_rnd(&v->next) - 1;
199 v->s = v->r1 * v->r1 + v->r2 * v->r2;
200 } while (v->s > 1 || v->s == 0);
201 v->s = sqrt(-2 * log(v->s) / v->s);
202 return (v->r1 * v->s);
203 } else {
204 v->sw = 0;
205 return (v->r2 * v->s);
206 }
207 }
208
209 /* HTS_mceq: function for M-sequence random noise generation */
HTS_mseq(HTS_Vocoder * v)210 static int HTS_mseq(HTS_Vocoder * v)
211 {
212 int x0, x28;
213
214 v->x >>= 1;
215 if (v->x & B0)
216 x0 = 1;
217 else
218 x0 = -1;
219 if (v->x & B28)
220 x28 = 1;
221 else
222 x28 = -1;
223 if (x0 + x28)
224 v->x &= B31_;
225 else
226 v->x |= B31;
227
228 return (x0);
229 }
230
231 /* HTS_mc2b: transform mel-cepstrum to MLSA digital fillter coefficients */
HTS_mc2b(double * mc,double * b,int m,const double a)232 static void HTS_mc2b(double *mc, double *b, int m, const double a)
233 {
234 if (mc != b) {
235 if (a != 0.0) {
236 b[m] = mc[m];
237 for (m--; m >= 0; m--)
238 b[m] = mc[m] - a * b[m + 1];
239 } else
240 HTS_movem(mc, b, m + 1);
241 } else if (a != 0.0)
242 for (m--; m >= 0; m--)
243 b[m] -= a * b[m + 1];
244 }
245
246 /* HTS_b2bc: transform MLSA digital filter coefficients to mel-cepstrum */
HTS_b2mc(const double * b,double * mc,int m,const double a)247 static void HTS_b2mc(const double *b, double *mc, int m, const double a)
248 {
249 double d, o;
250
251 d = mc[m] = b[m];
252 for (m--; m >= 0; m--) {
253 o = b[m] + a * d;
254 d = b[m];
255 mc[m] = o;
256 }
257 }
258
259 /* HTS_freqt: frequency transformation */
HTS_freqt(HTS_Vocoder * v,const double * c1,const int m1,double * c2,const int m2,const double a)260 static void HTS_freqt(HTS_Vocoder * v, const double *c1, const int m1, double *c2, const int m2, const double a)
261 {
262 int i, j;
263 const double b = 1 - a * a;
264 double *g;
265
266 if (m2 > v->freqt_size) {
267 if (v->freqt_buff != NULL)
268 HTS_free(v->freqt_buff);
269 v->freqt_buff = (double *) HTS_calloc(m2 + m2 + 2, sizeof(double));
270 v->freqt_size = m2;
271 }
272 g = v->freqt_buff + v->freqt_size + 1;
273
274 for (i = 0; i < m2 + 1; i++)
275 g[i] = 0.0;
276
277 for (i = -m1; i <= 0; i++) {
278 if (0 <= m2)
279 g[0] = c1[-i] + a * (v->freqt_buff[0] = g[0]);
280 if (1 <= m2)
281 g[1] = b * v->freqt_buff[0] + a * (v->freqt_buff[1] = g[1]);
282 for (j = 2; j <= m2; j++)
283 g[j] = v->freqt_buff[j - 1] + a * ((v->freqt_buff[j] = g[j]) - g[j - 1]);
284 }
285
286 HTS_movem(g, c2, m2 + 1);
287 }
288
289 /* HTS_c2ir: The minimum phase impulse response is evaluated from the minimum phase cepstrum */
HTS_c2ir(const double * c,const int nc,double * h,const int leng)290 static void HTS_c2ir(const double *c, const int nc, double *h, const int leng)
291 {
292 int n, k, upl;
293 double d;
294
295 h[0] = exp(c[0]);
296 for (n = 1; n < leng; n++) {
297 d = 0;
298 upl = (n >= nc) ? nc - 1 : n;
299 for (k = 1; k <= upl; k++)
300 d += k * c[k] * h[n - k];
301 h[n] = d / n;
302 }
303 }
304
305 /* HTS_b2en: calculate frame energy */
HTS_b2en(HTS_Vocoder * v,const double * b,const int m,const double a)306 static double HTS_b2en(HTS_Vocoder * v, const double *b, const int m, const double a)
307 {
308 int i;
309 double en = 0.0;
310 double *cep;
311 double *ir;
312
313 if (v->spectrum2en_size < m) {
314 if (v->spectrum2en_buff != NULL)
315 HTS_free(v->spectrum2en_buff);
316 v->spectrum2en_buff = (double *) HTS_calloc((m + 1) + 2 * IRLENG, sizeof(double));
317 v->spectrum2en_size = m;
318 }
319 cep = v->spectrum2en_buff + m + 1;
320 ir = cep + IRLENG;
321
322 HTS_b2mc(b, v->spectrum2en_buff, m, a);
323 HTS_freqt(v, v->spectrum2en_buff, m, cep, IRLENG - 1, -a);
324 HTS_c2ir(cep, IRLENG, ir, IRLENG);
325
326 for (i = 0; i < IRLENG; i++)
327 en += ir[i] * ir[i];
328
329 return (en);
330 }
331
332 /* HTS_ignorm: inverse gain normalization */
HTS_ignorm(double * c1,double * c2,int m,const double g)333 static void HTS_ignorm(double *c1, double *c2, int m, const double g)
334 {
335 double k;
336 if (g != 0.0) {
337 k = pow(c1[0], g);
338 for (; m >= 1; m--)
339 c2[m] = k * c1[m];
340 c2[0] = (k - 1.0) / g;
341 } else {
342 HTS_movem(&c1[1], &c2[1], m);
343 c2[0] = log(c1[0]);
344 }
345 }
346
347 /* HTS_gnorm: gain normalization */
HTS_gnorm(double * c1,double * c2,int m,const double g)348 static void HTS_gnorm(double *c1, double *c2, int m, const double g)
349 {
350 double k;
351 if (g != 0.0) {
352 k = 1.0 + g * c1[0];
353 for (; m >= 1; m--)
354 c2[m] = c1[m] / k;
355 c2[0] = pow(k, 1.0 / g);
356 } else {
357 HTS_movem(&c1[1], &c2[1], m);
358 c2[0] = exp(c1[0]);
359 }
360 }
361
362 /* HTS_lsp2lpc: transform LSP to LPC */
HTS_lsp2lpc(HTS_Vocoder * v,double * lsp,double * a,const int m)363 static void HTS_lsp2lpc(HTS_Vocoder * v, double *lsp, double *a, const int m)
364 {
365 int i, k, mh1, mh2, flag_odd;
366 double xx, xf, xff;
367 double *p, *q;
368 double *a0, *a1, *a2, *b0, *b1, *b2;
369
370 flag_odd = 0;
371 if (m % 2 == 0)
372 mh1 = mh2 = m / 2;
373 else {
374 mh1 = (m + 1) / 2;
375 mh2 = (m - 1) / 2;
376 flag_odd = 1;
377 }
378
379 if (m > v->lsp2lpc_size) {
380 if (v->lsp2lpc_buff != NULL)
381 HTS_free(v->lsp2lpc_buff);
382 v->lsp2lpc_buff = (double *) HTS_calloc(5 * m + 6, sizeof(double));
383 v->lsp2lpc_size = m;
384 }
385 p = v->lsp2lpc_buff + m;
386 q = p + mh1;
387 a0 = q + mh2;
388 a1 = a0 + (mh1 + 1);
389 a2 = a1 + (mh1 + 1);
390 b0 = a2 + (mh1 + 1);
391 b1 = b0 + (mh2 + 1);
392 b2 = b1 + (mh2 + 1);
393
394 HTS_movem(lsp, v->lsp2lpc_buff, m);
395
396 for (i = 0; i < mh1 + 1; i++)
397 a0[i] = 0.0;
398 for (i = 0; i < mh1 + 1; i++)
399 a1[i] = 0.0;
400 for (i = 0; i < mh1 + 1; i++)
401 a2[i] = 0.0;
402 for (i = 0; i < mh2 + 1; i++)
403 b0[i] = 0.0;
404 for (i = 0; i < mh2 + 1; i++)
405 b1[i] = 0.0;
406 for (i = 0; i < mh2 + 1; i++)
407 b2[i] = 0.0;
408
409 /* lsp filter parameters */
410 for (i = k = 0; i < mh1; i++, k += 2)
411 p[i] = -2.0 * cos(v->lsp2lpc_buff[k]);
412 for (i = k = 0; i < mh2; i++, k += 2)
413 q[i] = -2.0 * cos(v->lsp2lpc_buff[k + 1]);
414
415 /* impulse response of analysis filter */
416 xx = 1.0;
417 xf = xff = 0.0;
418
419 for (k = 0; k <= m; k++) {
420 if (flag_odd) {
421 a0[0] = xx;
422 b0[0] = xx - xff;
423 xff = xf;
424 xf = xx;
425 } else {
426 a0[0] = xx + xf;
427 b0[0] = xx - xf;
428 xf = xx;
429 }
430
431 for (i = 0; i < mh1; i++) {
432 a0[i + 1] = a0[i] + p[i] * a1[i] + a2[i];
433 a2[i] = a1[i];
434 a1[i] = a0[i];
435 }
436
437 for (i = 0; i < mh2; i++) {
438 b0[i + 1] = b0[i] + q[i] * b1[i] + b2[i];
439 b2[i] = b1[i];
440 b1[i] = b0[i];
441 }
442
443 if (k != 0)
444 a[k - 1] = -0.5 * (a0[mh1] + b0[mh2]);
445 xx = 0.0;
446 }
447
448 for (i = m - 1; i >= 0; i--)
449 a[i + 1] = -a[i];
450 a[0] = 1.0;
451 }
452
453 /* HTS_gc2gc: generalized cepstral transformation */
HTS_gc2gc(HTS_Vocoder * v,double * c1,const int m1,const double g1,double * c2,const int m2,const double g2)454 static void HTS_gc2gc(HTS_Vocoder * v, double *c1, const int m1, const double g1, double *c2, const int m2, const double g2)
455 {
456 int i, min, k, mk;
457 double ss1, ss2, cc;
458
459 if (m1 > v->gc2gc_size) {
460 if (v->gc2gc_buff != NULL)
461 HTS_free(v->gc2gc_buff);
462 v->gc2gc_buff = (double *) HTS_calloc(m1 + 1, sizeof(double));
463 v->gc2gc_size = m1;
464 }
465
466 HTS_movem(c1, v->gc2gc_buff, m1 + 1);
467
468 c2[0] = v->gc2gc_buff[0];
469 for (i = 1; i <= m2; i++) {
470 ss1 = ss2 = 0.0;
471 min = m1 < i ? m1 : i - 1;
472 for (k = 1; k <= min; k++) {
473 mk = i - k;
474 cc = v->gc2gc_buff[k] * c2[mk];
475 ss2 += k * cc;
476 ss1 += mk * cc;
477 }
478
479 if (i <= m1)
480 c2[i] = v->gc2gc_buff[i] + (g2 * ss2 - g1 * ss1) / i;
481 else
482 c2[i] = (g2 * ss2 - g1 * ss1) / i;
483 }
484 }
485
486 /* HTS_mgc2mgc: frequency and generalized cepstral transformation */
HTS_mgc2mgc(HTS_Vocoder * v,double * c1,const int m1,const double a1,const double g1,double * c2,const int m2,const double a2,const double g2)487 static void HTS_mgc2mgc(HTS_Vocoder * v, double *c1, const int m1, const double a1, const double g1, double *c2, const int m2, const double a2, const double g2)
488 {
489 double a;
490
491 if (a1 == a2) {
492 HTS_gnorm(c1, c1, m1, g1);
493 HTS_gc2gc(v, c1, m1, g1, c2, m2, g2);
494 HTS_ignorm(c2, c2, m2, g2);
495 } else {
496 a = (a2 - a1) / (1 - a1 * a2);
497 HTS_freqt(v, c1, m1, c2, m2, a);
498 HTS_gnorm(c2, c2, m2, g1);
499 HTS_gc2gc(v, c2, m2, g1, c2, m2, g2);
500 HTS_ignorm(c2, c2, m2, g2);
501 }
502 }
503
504 /* HTS_lsp2mgc: transform LSP to MGC */
HTS_lsp2mgc(HTS_Vocoder * v,double * lsp,double * mgc,const int m,const double alpha)505 static void HTS_lsp2mgc(HTS_Vocoder * v, double *lsp, double *mgc, const int m, const double alpha)
506 {
507 int i;
508 /* lsp2lpc */
509 HTS_lsp2lpc(v, lsp + 1, mgc, m);
510 if (v->use_log_gain)
511 mgc[0] = exp(lsp[0]);
512 else
513 mgc[0] = lsp[0];
514
515 /* mgc2mgc */
516 if (NORMFLG1)
517 HTS_ignorm(mgc, mgc, m, v->gamma);
518 else if (MULGFLG1)
519 mgc[0] = (1.0 - mgc[0]) * ((double) v->stage);
520 if (MULGFLG1)
521 for (i = m; i >= 1; i--)
522 mgc[i] *= -((double) v->stage);
523 HTS_mgc2mgc(v, mgc, m, alpha, v->gamma, mgc, m, alpha, v->gamma);
524 if (NORMFLG2)
525 HTS_gnorm(mgc, mgc, m, v->gamma);
526 else if (MULGFLG2)
527 mgc[0] = mgc[0] * v->gamma + 1.0;
528 if (MULGFLG2)
529 for (i = m; i >= 1; i--)
530 mgc[i] *= v->gamma;
531 }
532
533 /* HTS_mglsadff: sub functions for MGLSA filter */
HTS_mglsadff(double x,const double * b,const int m,const double a,double * d)534 static double HTS_mglsadff(double x, const double *b, const int m, const double a, double *d)
535 {
536 int i;
537
538 double y;
539 y = d[0] * b[1];
540 for (i = 1; i < m; i++) {
541 d[i] += a * (d[i + 1] - d[i - 1]);
542 y += d[i] * b[i + 1];
543 }
544 x -= y;
545
546 for (i = m; i > 0; i--)
547 d[i] = d[i - 1];
548 d[0] = a * d[0] + (1 - a * a) * x;
549 return x;
550 }
551
552 /* HTS_mglsadf: sub functions for MGLSA filter */
HTS_mglsadf(double x,const double * b,const int m,const double a,const int n,double * d)553 static double HTS_mglsadf(double x, const double *b, const int m, const double a, const int n, double *d)
554 {
555 int i;
556
557 for (i = 0; i < n; i++)
558 x = HTS_mglsadff(x, b, m, a, &d[i * (m + 1)]);
559
560 return x;
561 }
562
563 /* THS_check_lsp_stability: check LSP stability */
HTS_check_lsp_stability(double * lsp,size_t m)564 static void HTS_check_lsp_stability(double *lsp, size_t m)
565 {
566 size_t i, j;
567 double tmp;
568 double min = (CHECK_LSP_STABILITY_MIN * PI) / (m + 1);
569 HTS_Boolean find;
570
571 for (i = 0; i < CHECK_LSP_STABILITY_NUM; i++) {
572 find = FALSE;
573
574 for (j = 1; j < m; j++) {
575 tmp = lsp[j + 1] - lsp[j];
576 if (tmp < min) {
577 lsp[j] -= 0.5 * (min - tmp);
578 lsp[j + 1] += 0.5 * (min - tmp);
579 find = TRUE;
580 }
581 }
582
583 if (lsp[1] < min) {
584 lsp[1] = min;
585 find = TRUE;
586 }
587 if (lsp[m] > PI - min) {
588 lsp[m] = PI - min;
589 find = TRUE;
590 }
591
592 if (find == FALSE)
593 break;
594 }
595 }
596
597 /* HTS_lsp2en: calculate frame energy */
HTS_lsp2en(HTS_Vocoder * v,double * lsp,size_t m,double alpha)598 static double HTS_lsp2en(HTS_Vocoder * v, double *lsp, size_t m, double alpha)
599 {
600 size_t i;
601 double en = 0.0;
602 double *buff;
603
604 if (v->spectrum2en_size < m) {
605 if (v->spectrum2en_buff != NULL)
606 HTS_free(v->spectrum2en_buff);
607 v->spectrum2en_buff = (double *) HTS_calloc(m + 1 + IRLENG, sizeof(double));
608 v->spectrum2en_size = m;
609 }
610 buff = v->spectrum2en_buff + m + 1;
611
612 /* lsp2lpc */
613 HTS_lsp2lpc(v, lsp + 1, v->spectrum2en_buff, m);
614 if (v->use_log_gain)
615 v->spectrum2en_buff[0] = exp(lsp[0]);
616 else
617 v->spectrum2en_buff[0] = lsp[0];
618
619 /* mgc2mgc */
620 if (NORMFLG1)
621 HTS_ignorm(v->spectrum2en_buff, v->spectrum2en_buff, m, v->gamma);
622 else if (MULGFLG1)
623 v->spectrum2en_buff[0] = (1.0 - v->spectrum2en_buff[0]) * ((double) v->stage);
624 if (MULGFLG1)
625 for (i = 1; i <= m; i++)
626 v->spectrum2en_buff[i] *= -((double) v->stage);
627 HTS_mgc2mgc(v, v->spectrum2en_buff, m, alpha, v->gamma, buff, IRLENG - 1, 0.0, 1);
628
629 for (i = 0; i < IRLENG; i++)
630 en += buff[i] * buff[i];
631 return en;
632 }
633
634 /* HTS_white_noise: return white noise */
HTS_white_noise(HTS_Vocoder * v)635 static double HTS_white_noise(HTS_Vocoder * v)
636 {
637 if (v->gauss)
638 return (double) HTS_nrandom(v);
639 else
640 return (double) HTS_mseq(v);
641 }
642
643 /* HTS_Vocoder_initialize_excitation: initialize excitation */
HTS_Vocoder_initialize_excitation(HTS_Vocoder * v,double pitch,size_t nlpf)644 static void HTS_Vocoder_initialize_excitation(HTS_Vocoder * v, double pitch, size_t nlpf)
645 {
646 size_t i;
647
648 v->pitch_of_curr_point = pitch;
649 v->pitch_counter = pitch;
650 v->pitch_inc_per_point = 0.0;
651 if (nlpf > 0) {
652 v->excite_buff_size = nlpf;
653 v->excite_ring_buff = (double *) HTS_calloc(v->excite_buff_size, sizeof(double));
654 for (i = 0; i < v->excite_buff_size; i++)
655 v->excite_ring_buff[i] = 0.0;
656 v->excite_buff_index = 0;
657 } else {
658 v->excite_buff_size = 0;
659 v->excite_ring_buff = NULL;
660 v->excite_buff_index = 0;
661 }
662 }
663
664 /* HTS_Vocoder_start_excitation: start excitation of each frame */
HTS_Vocoder_start_excitation(HTS_Vocoder * v,double pitch)665 static void HTS_Vocoder_start_excitation(HTS_Vocoder * v, double pitch)
666 {
667 if (v->pitch_of_curr_point != 0.0 && pitch != 0.0) {
668 v->pitch_inc_per_point = (pitch - v->pitch_of_curr_point) / v->fprd;
669 } else {
670 v->pitch_inc_per_point = 0.0;
671 v->pitch_of_curr_point = pitch;
672 v->pitch_counter = pitch;
673 }
674 }
675
676 /* HTS_Vocoder_excite_unvoiced_frame: ping noise to ring buffer */
HTS_Vocoder_excite_unvoiced_frame(HTS_Vocoder * v,double noise)677 static void HTS_Vocoder_excite_unvoiced_frame(HTS_Vocoder * v, double noise)
678 {
679 size_t center = (v->excite_buff_size - 1) / 2;
680 v->excite_ring_buff[(v->excite_buff_index + center) % v->excite_buff_size] += noise;
681 }
682
683 /* HTS_Vocoder_excite_vooiced_frame: ping noise and pulse to ring buffer */
HTS_Vocoder_excite_voiced_frame(HTS_Vocoder * v,double noise,double pulse,const double * lpf)684 static void HTS_Vocoder_excite_voiced_frame(HTS_Vocoder * v, double noise, double pulse, const double *lpf)
685 {
686 size_t i;
687 size_t center = (v->excite_buff_size - 1) / 2;
688
689 if (noise != 0.0) {
690 for (i = 0; i < v->excite_buff_size; i++) {
691 if (i == center)
692 v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += noise * (1.0 - lpf[i]);
693 else
694 v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += noise * (0.0 - lpf[i]);
695 }
696 }
697 if (pulse != 0.0) {
698 for (i = 0; i < v->excite_buff_size; i++)
699 v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += pulse * lpf[i];
700 }
701 }
702
703 /* HTS_Vocoder_get_excitation: get excitation of each sample */
HTS_Vocoder_get_excitation(HTS_Vocoder * v,const double * lpf)704 static double HTS_Vocoder_get_excitation(HTS_Vocoder * v, const double *lpf)
705 {
706 double x;
707 double noise, pulse = 0.0;
708
709 if (v->excite_buff_size > 0) {
710 noise = HTS_white_noise(v);
711 pulse = 0.0;
712 if (v->pitch_of_curr_point == 0.0) {
713 HTS_Vocoder_excite_unvoiced_frame(v, noise);
714 } else {
715 v->pitch_counter += 1.0;
716 if (v->pitch_counter >= v->pitch_of_curr_point) {
717 pulse = sqrt(v->pitch_of_curr_point);
718 v->pitch_counter -= v->pitch_of_curr_point;
719 }
720 HTS_Vocoder_excite_voiced_frame(v, noise, pulse, lpf);
721 v->pitch_of_curr_point += v->pitch_inc_per_point;
722 }
723 x = v->excite_ring_buff[v->excite_buff_index];
724 v->excite_ring_buff[v->excite_buff_index] = 0.0;
725 v->excite_buff_index++;
726 if (v->excite_buff_index >= v->excite_buff_size)
727 v->excite_buff_index = 0;
728 } else {
729 if (v->pitch_of_curr_point == 0.0) {
730 x = HTS_white_noise(v);
731 } else {
732 v->pitch_counter += 1.0;
733 if (v->pitch_counter >= v->pitch_of_curr_point) {
734 x = sqrt(v->pitch_of_curr_point);
735 v->pitch_counter -= v->pitch_of_curr_point;
736 } else {
737 x = 0.0;
738 }
739 v->pitch_of_curr_point += v->pitch_inc_per_point;
740 }
741 }
742
743 return x;
744 }
745
746 /* HTS_Vocoder_end_excitation: end excitation of each frame */
HTS_Vocoder_end_excitation(HTS_Vocoder * v,double pitch)747 static void HTS_Vocoder_end_excitation(HTS_Vocoder * v, double pitch)
748 {
749 v->pitch_of_curr_point = pitch;
750 }
751
752 /* HTS_Vocoder_postfilter_mcp: postfilter for MCP */
HTS_Vocoder_postfilter_mcp(HTS_Vocoder * v,double * mcp,const int m,double alpha,double beta)753 static void HTS_Vocoder_postfilter_mcp(HTS_Vocoder * v, double *mcp, const int m, double alpha, double beta)
754 {
755 double e1, e2;
756 int k;
757
758 if (beta > 0.0 && m > 1) {
759 if (v->postfilter_size < m) {
760 if (v->postfilter_buff != NULL)
761 HTS_free(v->postfilter_buff);
762 v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double));
763 v->postfilter_size = m;
764 }
765 HTS_mc2b(mcp, v->postfilter_buff, m, alpha);
766 e1 = HTS_b2en(v, v->postfilter_buff, m, alpha);
767
768 v->postfilter_buff[1] -= beta * alpha * v->postfilter_buff[2];
769 for (k = 2; k <= m; k++)
770 v->postfilter_buff[k] *= (1.0 + beta);
771
772 e2 = HTS_b2en(v, v->postfilter_buff, m, alpha);
773 v->postfilter_buff[0] += log(e1 / e2) / 2;
774 HTS_b2mc(v->postfilter_buff, mcp, m, alpha);
775 }
776 }
777
778 /* HTS_Vocoder_postfilter_lsp: postfilter for LSP */
HTS_Vocoder_postfilter_lsp(HTS_Vocoder * v,double * lsp,size_t m,double alpha,double beta)779 static void HTS_Vocoder_postfilter_lsp(HTS_Vocoder * v, double *lsp, size_t m, double alpha, double beta)
780 {
781 double e1, e2;
782 size_t i;
783 double d1, d2;
784
785 if (beta > 0.0 && m > 1) {
786 if (v->postfilter_size < m) {
787 if (v->postfilter_buff != NULL)
788 HTS_free(v->postfilter_buff);
789 v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double));
790 v->postfilter_size = m;
791 }
792
793 e1 = HTS_lsp2en(v, lsp, m, alpha);
794
795 /* postfiltering */
796 for (i = 0; i <= m; i++) {
797 if (i > 1 && i < m) {
798 d1 = beta * (lsp[i + 1] - lsp[i]);
799 d2 = beta * (lsp[i] - lsp[i - 1]);
800 v->postfilter_buff[i] = lsp[i - 1] + d2 + (d2 * d2 * ((lsp[i + 1] - lsp[i - 1]) - (d1 + d2))) / ((d2 * d2) + (d1 * d1));
801 } else {
802 v->postfilter_buff[i] = lsp[i];
803 }
804 }
805 HTS_movem(v->postfilter_buff, lsp, m + 1);
806
807 e2 = HTS_lsp2en(v, lsp, m, alpha);
808
809 if (e1 != e2) {
810 if (v->use_log_gain)
811 lsp[0] += 0.5 * log(e1 / e2);
812 else
813 lsp[0] *= sqrt(e1 / e2);
814 }
815 }
816 }
817
818 /* HTS_Vocoder_initialize: initialize vocoder */
HTS_Vocoder_initialize(HTS_Vocoder * v,size_t m,size_t stage,HTS_Boolean use_log_gain,size_t rate,size_t fperiod)819 void HTS_Vocoder_initialize(HTS_Vocoder * v, size_t m, size_t stage, HTS_Boolean use_log_gain, size_t rate, size_t fperiod)
820 {
821 /* set parameter */
822 v->is_first = TRUE;
823 v->stage = stage;
824 if (stage != 0)
825 v->gamma = -1.0 / v->stage;
826 else
827 v->gamma = 0.0;
828 v->use_log_gain = use_log_gain;
829 v->fprd = fperiod;
830 v->next = SEED;
831 v->gauss = GAUSS;
832 v->rate = rate;
833 v->pitch_of_curr_point = 0.0;
834 v->pitch_counter = 0.0;
835 v->pitch_inc_per_point = 0.0;
836 v->excite_ring_buff = NULL;
837 v->excite_buff_size = 0;
838 v->excite_buff_index = 0;
839 v->sw = 0;
840 v->x = 0x55555555;
841 /* init buffer */
842 v->freqt_buff = NULL;
843 v->freqt_size = 0;
844 v->gc2gc_buff = NULL;
845 v->gc2gc_size = 0;
846 v->lsp2lpc_buff = NULL;
847 v->lsp2lpc_size = 0;
848 v->postfilter_buff = NULL;
849 v->postfilter_size = 0;
850 v->spectrum2en_buff = NULL;
851 v->spectrum2en_size = 0;
852 if (v->stage == 0) { /* for MCP */
853 v->c = (double *) HTS_calloc(m * (3 + PADEORDER) + 5 * PADEORDER + 6, sizeof(double));
854 v->cc = v->c + m + 1;
855 v->cinc = v->cc + m + 1;
856 v->d1 = v->cinc + m + 1;
857 } else { /* for LSP */
858 v->c = (double *) HTS_calloc((m + 1) * (v->stage + 3), sizeof(double));
859 v->cc = v->c + m + 1;
860 v->cinc = v->cc + m + 1;
861 v->d1 = v->cinc + m + 1;
862 }
863 }
864
865 /* HTS_Vocoder_synthesize: pulse/noise excitation and MLSA/MGLSA filster based waveform synthesis */
HTS_Vocoder_synthesize(HTS_Vocoder * v,size_t m,double lf0,double * spectrum,size_t nlpf,double * lpf,double alpha,double beta,double volume,double * rawdata,HTS_Audio * audio)866 void HTS_Vocoder_synthesize(HTS_Vocoder * v, size_t m, double lf0, double *spectrum, size_t nlpf, double *lpf, double alpha, double beta, double volume, double *rawdata, HTS_Audio * audio)
867 {
868 double x;
869 int i, j;
870 short xs;
871 int rawidx = 0;
872 double p;
873
874 /* lf0 -> pitch */
875 if (lf0 == LZERO)
876 p = 0.0;
877 else if (lf0 <= MIN_LF0)
878 p = v->rate / MIN_F0;
879 else if (lf0 >= MAX_LF0)
880 p = v->rate / MAX_F0;
881 else
882 p = v->rate / exp(lf0);
883
884 /* first time */
885 if (v->is_first == TRUE) {
886 HTS_Vocoder_initialize_excitation(v, p, nlpf);
887 if (v->stage == 0) { /* for MCP */
888 HTS_mc2b(spectrum, v->c, m, alpha);
889 } else { /* for LSP */
890 HTS_movem(spectrum, v->c, m + 1);
891 HTS_lsp2mgc(v, v->c, v->c, m, alpha);
892 HTS_mc2b(v->c, v->c, m, alpha);
893 HTS_gnorm(v->c, v->c, m, v->gamma);
894 for (i = 1; i <= m; i++)
895 v->c[i] *= v->gamma;
896 }
897 v->is_first = FALSE;
898 }
899
900 HTS_Vocoder_start_excitation(v, p);
901 if (v->stage == 0) { /* for MCP */
902 HTS_Vocoder_postfilter_mcp(v, spectrum, m, alpha, beta);
903 HTS_mc2b(spectrum, v->cc, m, alpha);
904 for (i = 0; i <= m; i++)
905 v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd;
906 } else { /* for LSP */
907 HTS_Vocoder_postfilter_lsp(v, spectrum, m, alpha, beta);
908 HTS_check_lsp_stability(spectrum, m);
909 HTS_lsp2mgc(v, spectrum, v->cc, m, alpha);
910 HTS_mc2b(v->cc, v->cc, m, alpha);
911 HTS_gnorm(v->cc, v->cc, m, v->gamma);
912 for (i = 1; i <= m; i++)
913 v->cc[i] *= v->gamma;
914 for (i = 0; i <= m; i++)
915 v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd;
916 }
917
918 for (j = 0; j < v->fprd; j++) {
919 x = HTS_Vocoder_get_excitation(v, lpf);
920 if (v->stage == 0) { /* for MCP */
921 if (x != 0.0)
922 x *= exp(v->c[0]);
923 x = HTS_mlsadf(x, v->c, m, alpha, PADEORDER, v->d1);
924 } else { /* for LSP */
925 if (!NGAIN)
926 x *= v->c[0];
927 x = HTS_mglsadf(x, v->c, m, alpha, v->stage, v->d1);
928 }
929 x *= volume;
930
931 /* output */
932 if (rawdata)
933 rawdata[rawidx++] = x;
934 if (audio) {
935 if (x > 32767.0)
936 xs = 32767;
937 else if (x < -32768.0)
938 xs = -32768;
939 else
940 xs = (short) x;
941 HTS_Audio_write(audio, xs);
942 }
943
944 for (i = 0; i <= m; i++)
945 v->c[i] += v->cinc[i];
946 }
947
948 HTS_Vocoder_end_excitation(v, p);
949 HTS_movem(v->cc, v->c, m + 1);
950 }
951
952 /* HTS_Vocoder_clear: clear vocoder */
HTS_Vocoder_clear(HTS_Vocoder * v)953 void HTS_Vocoder_clear(HTS_Vocoder * v)
954 {
955 if (v != NULL) {
956 /* free buffer */
957 if (v->freqt_buff != NULL) {
958 HTS_free(v->freqt_buff);
959 v->freqt_buff = NULL;
960 }
961 v->freqt_size = 0;
962 if (v->gc2gc_buff != NULL) {
963 HTS_free(v->gc2gc_buff);
964 v->gc2gc_buff = NULL;
965 }
966 v->gc2gc_size = 0;
967 if (v->lsp2lpc_buff != NULL) {
968 HTS_free(v->lsp2lpc_buff);
969 v->lsp2lpc_buff = NULL;
970 }
971 v->lsp2lpc_size = 0;
972 if (v->postfilter_buff != NULL) {
973 HTS_free(v->postfilter_buff);
974 v->postfilter_buff = NULL;
975 }
976 v->postfilter_size = 0;
977 if (v->spectrum2en_buff != NULL) {
978 HTS_free(v->spectrum2en_buff);
979 v->spectrum2en_buff = NULL;
980 }
981 v->spectrum2en_size = 0;
982 if (v->c != NULL) {
983 HTS_free(v->c);
984 v->c = NULL;
985 }
986 v->excite_buff_size = 0;
987 v->excite_buff_index = 0;
988 if (v->excite_ring_buff != NULL) {
989 HTS_free(v->excite_ring_buff);
990 v->excite_ring_buff = NULL;
991 }
992 }
993 }
994
995 HTS_VOCODER_C_END;
996
997 #endif /* !HTS_VOCODER_C */
998