1 /* --------------------------------------------------------------- */
2 /* The HMM-Based Speech Synthesis System (HTS): version 1.1b */
3 /* HTS Working Group */
4 /* */
5 /* Department of Computer Science */
6 /* Nagoya Institute of Technology */
7 /* and */
8 /* Interdisciplinary Graduate School of Science and Engineering */
9 /* Tokyo Institute of Technology */
10 /* Copyright (c) 2001-2003 */
11 /* All Rights Reserved. */
12 /* */
13 /* Permission is hereby granted, free of charge, to use and */
14 /* distribute this software and its documentation without */
15 /* restriction, including without limitation the rights to use, */
16 /* copy, modify, merge, publish, distribute, sublicense, and/or */
17 /* sell copies of this work, and to permit persons to whom this */
18 /* work is furnished to do so, subject to the following conditions: */
19 /* */
20 /* 1. The code must retain the above copyright notice, this list */
21 /* of conditions and the following disclaimer. */
22 /* */
23 /* 2. Any modifications must be clearly marked as such. */
24 /* */
25 /* NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF TECHNOLOGY, */
26 /* HTS WORKING GROUP, AND THE CONTRIBUTORS TO THIS WORK DISCLAIM */
27 /* ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL */
28 /* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
29 /* SHALL NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF */
30 /* TECHNOLOGY, HTS WORKING GROUP, NOR THE CONTRIBUTORS BE LIABLE */
31 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY */
32 /* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, */
33 /* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTUOUS */
34 /* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR */
35 /* PERFORMANCE OF THIS SOFTWARE. */
36 /* */
37 /* --------------------------------------------------------------- */
38 /* mlpg.c : speech parameter generation from pdf sequence */
39 /* */
40 /* 2003/06/11 by Heiga Zen */
41 /* --------------------------------------------------------------- */
42
43 #include <cstdio>
44 #include <cstdlib>
45 #include <cstring>
46 #include <cctype>
47 #include <cmath>
48 #include "festival.h"
49
50 #include "defaults.h"
51 #include "misc.h"
52 #include "model.h"
53 #include "global.h"
54 #include "vocoder.h"
55 #include "mlpg.h"
56
finv(double x)57 double finv (double x)
58 {
59 if (x >= INFTY2) return 0.0;
60 if (x <= -INFTY2) return 0.0;
61 if (x <= INVINF2 && x >= 0) return INFTY;
62 if (x >= -INVINF2 && x < 0) return -INFTY;
63
64 return 1.0/x;
65 }
66
dcalloc(int x)67 double *dcalloc(int x)
68 {
69 return walloc(double,x);
70 }
71
ddcalloc(int x,int y)72 double **ddcalloc(int x, int y)
73 {
74 register int i;
75 double **ptr;
76
77 ptr = walloc(double *,x);
78
79 for (i=0; i<x; i++)
80 ptr[i] = dcalloc(y);
81
82 return(ptr);
83 }
84
fcalloc(int x)85 float *fcalloc(int x)
86 {
87 return walloc(float,x);
88 }
89
ffcalloc(int x,int y)90 float **ffcalloc(int x, int y)
91 {
92 register int i;
93 float **ptr;
94
95 ptr = walloc(float *,x);
96
97 for (i=0; i<x; i++)
98 ptr[i] = fcalloc(y);
99
100 return(ptr);
101 }
102
str2farray(char * c,float ** x)103 int str2farray (char *c, float **x)
104 {
105 int i, size, sp;
106 char *p, *buf;
107
108 while (isspace(*c))
109 c++;
110
111 if (*c == '\0') {
112 *x = NULL;
113 return (0);
114 }
115
116 size = 1;
117 sp = 0;
118
119 for (p = c; *p != '\0'; p++) {
120 if (!isspace (*p)) {
121 if (sp == 1) {
122 size++;
123 sp = 0;
124 }
125 }
126 else
127 sp = 1;
128 }
129
130 buf = walloc(char,strlen(c));
131
132 *x = walloc(float,size);
133
134 for (i=0; i<size; i++)
135 (*x)[i] = (float)strtod (c, &c);
136
137 return (size);
138 }
139
140 /*----------------------------------------------------------------
141 matrix calcuration functions
142 ----------------------------------------------------------------*/
143
144 /* calc_R_and_r : calcurate R=W'U^{-1}W and r=W'U^{-1}M */
calc_R_and_r(PStream * pst,int m)145 void calc_R_and_r(PStream *pst, int m)
146 {
147 register int i, j, k, l, n;
148 double wu;
149
150 for (i=0; i<pst->T; i++) {
151 pst->sm.r[i] = pst->sm.ivseq[i][m] * pst->sm.mseq[i][m];
152 pst->sm.R[i][0] = pst->sm.ivseq[i][m];
153
154 for (j=1; j<pst->width; j++)
155 pst->sm.R[i][j]=0.0;
156
157 for (j=1; j<pst->dw.num; j++)
158 for (k=pst->dw.width[j][0]; k<=pst->dw.width[j][1]; k++) {
159 n = i+k;
160 if ( (n>=0) && (n<pst->T) && (pst->dw.coef[j][-k]!=0.0) ) {
161 l = j*(pst->order+1)+m;
162 wu = pst->dw.coef[j][-k] * pst->sm.ivseq[n][l];
163 pst->sm.r[i] += wu*pst->sm.mseq[n][l];
164
165 for (l=0; l<pst->width; l++) {
166 n = l-k;
167 if ( (n<=pst->dw.width[j][1]) && (i+l<pst->T) && (pst->dw.coef[j][n] != 0.0) )
168 pst->sm.R[i][l] += wu * pst->dw.coef[j][n];
169 }
170 }
171 }
172 }
173 }
174
175 /* Cholesky : Cholesky factorization of Matrix R */
Cholesky(PStream * pst)176 void Cholesky(PStream *pst)
177 {
178 register int i, j, k;
179
180 pst->sm.R[0][0] = sqrt(pst->sm.R[0][0]);
181
182 for (i=1; i<pst->width; i++)
183 pst->sm.R[0][i] /= pst->sm.R[0][0];
184
185 for (i=1; i<pst->T; i++) {
186 for (j=1; j<pst->width; j++)
187 if (i-j >= 0)
188 pst->sm.R[i][0] -= pst->sm.R[i-j][j] * pst->sm.R[i-j][j];
189
190 pst->sm.R[i][0] = sqrt(pst->sm.R[i][0]);
191
192 for (j=1; j<pst->width; j++) {
193 for (k=0; k<pst->dw.max_L; k++)
194 if (j!=pst->width-1)
195 pst->sm.R[i][j] -= pst->sm.R[i-k-1][j-k]*pst->sm.R[i-k-1][j+1];
196
197 pst->sm.R[i][j] /= pst->sm.R[i][0];
198 }
199 }
200 }
201
202 /* Cholesky_forward : forward substitution to solve linear equations */
Cholesky_forward(PStream * pst)203 void Cholesky_forward(PStream *pst)
204 {
205 register int i, j;
206 double hold;
207
208 pst->sm.g[0] = pst->sm.r[0] / pst->sm.R[0][0];
209
210 for (i=1; i<pst->T; i++) {
211 hold = 0.0;
212 for (j=1; j<pst->width; j++) {
213 if (i-j >= 0)
214 hold += pst->sm.R[i-j][j]*pst->sm.g[i-j];
215 }
216 pst->sm.g[i] = (pst->sm.r[i]-hold)/pst->sm.R[i][0];
217 }
218 }
219
220 /* Cholesky_backward : backward substitution to solve linear equations */
Cholesky_backward(PStream * pst,int m)221 void Cholesky_backward(PStream *pst, int m)
222 {
223 register int i, j;
224 double hold;
225
226 pst->par[pst->T-1][m] = pst->sm.g[pst->T-1] / pst->sm.R[pst->T-1][0];
227
228 for (i=pst->T-2; i>=0; i--) {
229 hold = 0.0;
230 for (j=1; j<pst->width; j++) {
231 if (pst->sm.R[i][j] != 0.0)
232 hold += pst->sm.R[i][j]*pst->par[i+j][m];
233 }
234 pst->par[i][m] = (float)((pst->sm.g[i] - hold) / pst->sm.R[i][0]);
235 }
236 }
237
238 /* generate parameter sequence from pdf sequence */
mlpg(PStream * pst)239 void mlpg(PStream *pst)
240 {
241 int m;
242
243 for (m=0; m<=pst->order; m++) {
244 calc_R_and_r(pst,m);
245 Cholesky(pst);
246 Cholesky_forward(pst);
247 Cholesky_backward(pst,m);
248 }
249 }
250
251
252 /* InitPStream : Initialise PStream for parameter generation */
InitPStream(PStream * pst)253 void InitPStream(PStream *pst)
254 {
255 pst->width = pst->dw.max_L*2+1; /* band width of R */
256
257 pst->sm.mseq = ddcalloc(pst->T, pst->vSize);
258 pst->sm.ivseq = ddcalloc(pst->T, pst->vSize);
259 pst->sm.g = dcalloc (pst->T);
260 pst->sm.R = ddcalloc(pst->T, pst->width);
261 pst->sm.r = dcalloc (pst->T);
262 pst->par = ffcalloc(pst->T,pst->order+1);
263 }
264
265 /* FreePStream : Free PStream */
FreePStream(PStream * pst)266 void FreePStream(PStream *pst)
267 {
268 register int t;
269
270 for (t=0; t<pst->T; t++) {
271 wfree(pst->sm.mseq[t]);
272 wfree(pst->sm.ivseq[t]);
273 wfree(pst->sm.R[t]);
274 wfree(pst->par[t]);
275 }
276
277 for (t=0; t<pst->dw.num; t++)
278 wfree(pst->dw.width[t]);
279 wfree(pst->dw.width);
280 wfree(pst->dw.coefr[0]);
281 for (t=1; t<pst->dw.num; t++)
282 wfree(pst->dw.coefr[t]);
283 wfree(pst->dw.coefr);
284 wfree(pst->dw.coef);
285
286 wfree(pst->sm.mseq);
287 wfree(pst->sm.ivseq);
288 wfree(pst->sm.R);
289 wfree(pst->sm.g);
290 wfree(pst->sm.r);
291 wfree(pst->par);
292 }
293
294 /* pdf2speech : parameter generation from pdf sequence */
pdf2speech(FILE * rawfp,FILE * lf0fp,FILE * mcepfp,PStream * mceppst,PStream * lf0pst,globalP * gp,ModelSet * ms,UttModel * um,VocoderSetup * vs)295 void pdf2speech( FILE *rawfp, FILE *lf0fp, FILE *mcepfp,
296 PStream *mceppst, PStream *lf0pst, globalP *gp, ModelSet *ms, UttModel *um, VocoderSetup *vs)
297 {
298 int frame, mcepframe, lf0frame;
299 int state, lw, rw, k, n;
300 Model *m;
301 HTS_Boolean nobound, *voiced;
302
303 float f0;
304
305 lf0pst->vSize = ms->lf0stream;
306 lf0pst->order = 0;
307 mceppst->vSize = ms->mcepvsize;
308 mceppst->order = mceppst->vSize / mceppst->dw.num - 1;
309
310 InitDWin(lf0pst);
311 InitDWin(mceppst);
312
313 mcepframe = 0;
314 lf0frame = 0;
315
316 voiced = walloc(HTS_Boolean,um->totalframe+1);
317
318 for (m=um->mhead; m!=um->mtail ; m=m->next) {
319 for (state=2; state<=ms->nstate+1; state++) {
320 for (frame=1; frame<=m->dur[state]; frame++) {
321 voiced[mcepframe++] = m->voiced[state];
322 if (m->voiced[state]) {
323 lf0frame++;
324 }
325 }
326 }
327 }
328
329 mceppst->T = mcepframe;
330 lf0pst->T = lf0frame;
331
332 InitPStream(mceppst);
333 InitPStream(lf0pst);
334
335 mcepframe = 0;
336 lf0frame = 0;
337
338 for (m=um->mhead; m!=um->mtail; m=m->next) {
339 for (state=2; state<=ms->nstate+1; state++) {
340 for (frame=1; frame<=m->dur[state]; frame++) {
341 for (k=0; k<ms->mcepvsize; k++) {
342 mceppst->sm.mseq[mcepframe][k] = m->mcepmean[state][k];
343 mceppst->sm.ivseq[mcepframe][k] = finv(m->mcepvariance[state][k]);
344 }
345 for (k=0; k<ms->lf0stream; k++) {
346 lw = lf0pst->dw.width[k][WLEFT];
347 rw = lf0pst->dw.width[k][WRIGHT];
348 nobound = (HTS_Boolean)1;
349
350 for (n=lw; n<=rw;n++)
351 if (mcepframe+n<0 || um->totalframe<mcepframe+n)
352 nobound = (HTS_Boolean)0;
353 else
354 nobound = (HTS_Boolean)((int)nobound & voiced[mcepframe+n]);
355
356 if (voiced[mcepframe]) {
357 lf0pst->sm.mseq[lf0frame][k] = m->lf0mean[state][k+1];
358 if (nobound || k==0)
359 lf0pst->sm.ivseq[lf0frame][k] = finv(m->lf0variance[state][k+1]);
360 else
361 lf0pst->sm.ivseq[lf0frame][k] = 0.0;
362 }
363 }
364 if (voiced[mcepframe])
365 lf0frame++;
366 mcepframe++;
367 }
368 }
369 }
370
371 mlpg(mceppst);
372
373 if (lf0frame>0)
374 mlpg(lf0pst);
375
376 lf0frame = 0;
377
378 if (gp->XIMERA && lf0fp!=NULL)
379 fprintf(lf0fp, "# FrameShift=%dms\n", (FPERIOD*1000)/RATE);
380
381 for (mcepframe=0; mcepframe<mceppst->T; mcepframe++) {
382 if (voiced[mcepframe])
383 f0 = gp->F0_STD * exp(lf0pst->par[lf0frame++][0]) + gp->F0_MEAN;
384 else
385 f0 = 0.0;
386
387 if (mcepfp != NULL)
388 fwrite(mceppst->par[mcepframe], sizeof(float), mceppst->order+1, mcepfp);
389 if (lf0fp != NULL) {
390 if (gp->XIMERA)
391 fprintf(lf0fp, "%.1f 1\n", f0);
392 else
393 fwrite(&f0, sizeof(double), 1, lf0fp);
394 }
395
396 if (rawfp!=NULL)
397 vocoder(f0, mceppst->par[mcepframe], mceppst->order, rawfp, gp, vs);
398 }
399
400 FreePStream(mceppst);
401 FreePStream(lf0pst);
402 wfree(voiced);
403 }
404
405 /* InitDWin : Initialise dynamic window */
InitDWin(PStream * pst)406 void InitDWin(PStream *pst)
407 {
408 int i;
409 int fsize, leng, fpos;
410 FILE *fp;
411
412 /* memory allocation */
413 pst->dw.width = walloc(int *,pst->dw.num);
414
415 for (i=0; i<pst->dw.num; i++) {
416 pst->dw.width[i] = walloc(int,2);
417 }
418
419 pst->dw.coef= walloc(float *,pst->dw.num);
420 /* because the pointers are moved, keep an original of the memory
421 being allocated */
422 pst->dw.coefr= walloc(float *,pst->dw.num);
423
424 /* window for static parameter */
425 pst->dw.width[0][WLEFT] = pst->dw.width[0][WRIGHT] = 0;
426 pst->dw.coef[0] = fcalloc (1);
427 pst->dw.coefr[0] = pst->dw.coef[0];
428 pst->dw.coef[0][0] = 1;
429
430 /* set delta coefficients */
431 for (i=1; i<pst->dw.num; i++) {
432 if (pst->dw.fn[i][0] == ' ')
433 fsize = str2farray(pst->dw.fn[i], &(pst->dw.coef[i]));
434 else { /* read from file */
435 if ((fp = fopen (pst->dw.fn[i], "r")) == NULL) {
436 fprintf(stderr, "file %s not found\n", pst->dw.fn[i]);
437 festival_error();
438 }
439
440 /* check the number of coefficients */
441 fseek(fp, 0L, 2);
442 fpos = (int)ftell(fp);
443 fsize = fpos/sizeof (float);
444 fseek(fp, 0L, 0);
445
446 /* read coefficients */
447 pst->dw.coef[i] = fcalloc (fsize);
448 pst->dw.coefr[i] = pst->dw.coef[i];
449 fread(pst->dw.coef[i], sizeof(float), fsize, fp);
450 if (EST_BIG_ENDIAN)
451 swap_bytes_float(pst->dw.coef[i],fsize);
452
453 fclose(fp);
454 }
455
456 /* set pointer */
457 leng = fsize / 2;
458 pst->dw.coef[i] += leng;
459 pst->dw.width[i][WLEFT] = -leng;
460 pst->dw.width[i][WRIGHT] = leng;
461
462 if (fsize % 2 == 0)
463 pst->dw.width[i][WRIGHT]--;
464 }
465
466 pst->dw.maxw[WLEFT] = pst->dw.maxw[WRIGHT] = 0;
467
468 for (i=0; i<pst->dw.num; i++) {
469 if (pst->dw.maxw[WLEFT] > pst->dw.width[i][WLEFT])
470 pst->dw.maxw[WLEFT] = pst->dw.width[i][WLEFT];
471 if (pst->dw.maxw[WRIGHT] < pst->dw.width[i][WRIGHT])
472 pst->dw.maxw[WRIGHT] = pst->dw.width[i][WRIGHT];
473 }
474
475 /* calcurate max_L to determine size of band matrix */
476 if ( pst->dw.maxw[WLEFT] >= pst->dw.maxw[WRIGHT] )
477 pst->dw.max_L = pst->dw.maxw[WLEFT];
478 else
479 pst->dw.max_L = pst->dw.maxw[WRIGHT];
480
481 }
482
483 /* -------------------- End of "mlpg.c" -------------------- */
484