1 /*  ---------------------------------------------------------------  */
2 /*      The HMM-Based Speech Synthesis System (HTS): version 1.1b    */
3 /*                        HTS Working Group                          */
4 /*                                                                   */
5 /*                   Department of Computer Science                  */
6 /*                   Nagoya Institute of Technology                  */
7 /*                                and                                */
8 /*    Interdisciplinary Graduate School of Science and Engineering   */
9 /*                   Tokyo Institute of Technology                   */
10 /*                      Copyright (c) 2001-2003                      */
11 /*                        All Rights Reserved.                       */
12 /*                                                                   */
13 /*  Permission is hereby granted, free of charge, to use and         */
14 /*  distribute this software and its documentation without           */
15 /*  restriction, including without limitation the rights to use,     */
16 /*  copy, modify, merge, publish, distribute, sublicense, and/or     */
17 /*  sell copies of this work, and to permit persons to whom this     */
18 /*  work is furnished to do so, subject to the following conditions: */
19 /*                                                                   */
20 /*    1. The code must retain the above copyright notice, this list  */
21 /*       of conditions and the following disclaimer.                 */
22 /*                                                                   */
23 /*    2. Any modifications must be clearly marked as such.           */
24 /*                                                                   */
25 /*  NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF TECHNOLOGY,  */
26 /*  HTS WORKING GROUP, AND THE CONTRIBUTORS TO THIS WORK DISCLAIM    */
27 /*  ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL       */
28 /*  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
29 /*  SHALL NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF        */
30 /*  TECHNOLOGY, HTS WORKING GROUP, NOR THE CONTRIBUTORS BE LIABLE    */
31 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY        */
32 /*  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,  */
33 /*  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTUOUS   */
34 /*  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR          */
35 /*  PERFORMANCE OF THIS SOFTWARE.                                    */
36 /*                                                                   */
37 /*  ---------------------------------------------------------------  */
38 /*    mlpg.c : speech parameter generation from pdf sequence         */
39 /*                                                                   */
40 /*                                    2003/06/11 by Heiga Zen        */
41 /*  ---------------------------------------------------------------  */
42 
43 #include <cstdio>
44 #include <cstdlib>
45 #include <cstring>
46 #include <cctype>
47 #include <cmath>
48 #include "festival.h"
49 
50 #include "defaults.h"
51 #include "misc.h"
52 #include "model.h"
53 #include "global.h"
54 #include "vocoder.h"
55 #include "mlpg.h"
56 
finv(double x)57 double finv (double x)
58 {
59    if (x >= INFTY2) return 0.0;
60    if (x <= -INFTY2) return 0.0;
61    if (x <= INVINF2 && x >= 0) return INFTY;
62    if (x >= -INVINF2 && x < 0) return -INFTY;
63 
64    return 1.0/x;
65 }
66 
dcalloc(int x)67 double *dcalloc(int x)
68 {
69     return walloc(double,x);
70 }
71 
ddcalloc(int x,int y)72 double **ddcalloc(int x, int y)
73 {
74    register int i;
75    double **ptr;
76 
77    ptr = walloc(double *,x);
78 
79    for (i=0; i<x; i++)
80       ptr[i] = dcalloc(y);
81 
82    return(ptr);
83 }
84 
fcalloc(int x)85 float *fcalloc(int x)
86 {
87    return walloc(float,x);
88 }
89 
ffcalloc(int x,int y)90 float **ffcalloc(int x, int y)
91 {
92    register int i;
93    float **ptr;
94 
95    ptr = walloc(float *,x);
96 
97    for (i=0; i<x; i++)
98       ptr[i] = fcalloc(y);
99 
100    return(ptr);
101 }
102 
str2farray(char * c,float ** x)103 int str2farray (char *c, float **x)
104 {
105    int i, size, sp;
106    char *p, *buf;
107 
108    while (isspace(*c))
109       c++;
110 
111    if (*c == '\0') {
112       *x = NULL;
113       return (0);
114    }
115 
116    size = 1;
117    sp = 0;
118 
119    for (p = c; *p != '\0'; p++) {
120       if (!isspace (*p)) {
121          if (sp == 1) {
122             size++;
123             sp = 0;
124          }
125       }
126       else
127          sp = 1;
128    }
129 
130    buf = walloc(char,strlen(c));
131 
132    *x = walloc(float,size);
133 
134   for (i=0; i<size; i++)
135      (*x)[i] = (float)strtod (c, &c);
136 
137   return (size);
138 }
139 
140 /*----------------------------------------------------------------
141 	matrix calcuration functions
142 ----------------------------------------------------------------*/
143 
144 /* calc_R_and_r : calcurate R=W'U^{-1}W and r=W'U^{-1}M */
calc_R_and_r(PStream * pst,int m)145 void calc_R_and_r(PStream *pst, int m)
146 {
147    register int i, j, k, l, n;
148    double   wu;
149 
150    for (i=0; i<pst->T; i++) {
151 		pst->sm.r[i]    = pst->sm.ivseq[i][m] * pst->sm.mseq[i][m];
152 		pst->sm.R[i][0] = pst->sm.ivseq[i][m];
153 
154 		for (j=1; j<pst->width; j++)
155          pst->sm.R[i][j]=0.0;
156 
157       for (j=1; j<pst->dw.num; j++)
158          for (k=pst->dw.width[j][0]; k<=pst->dw.width[j][1]; k++) {
159             n = i+k;
160             if ( (n>=0) && (n<pst->T) && (pst->dw.coef[j][-k]!=0.0) ) {
161                l = j*(pst->order+1)+m;
162                wu = pst->dw.coef[j][-k] * pst->sm.ivseq[n][l];
163                pst->sm.r[i] += wu*pst->sm.mseq[n][l];
164 
165                for (l=0; l<pst->width; l++) {
166                   n = l-k;
167                   if ( (n<=pst->dw.width[j][1]) && (i+l<pst->T) && (pst->dw.coef[j][n] != 0.0) )
168                      pst->sm.R[i][l] += wu * pst->dw.coef[j][n];
169                }
170             }
171          }
172    }
173 }
174 
175 /* Cholesky : Cholesky factorization of Matrix R */
Cholesky(PStream * pst)176 void Cholesky(PStream *pst)
177 {
178    register int i, j, k;
179 
180    pst->sm.R[0][0] = sqrt(pst->sm.R[0][0]);
181 
182    for (i=1; i<pst->width; i++)
183       pst->sm.R[0][i] /= pst->sm.R[0][0];
184 
185 	for (i=1; i<pst->T; i++) {
186       for (j=1; j<pst->width; j++)
187          if (i-j >= 0)
188             pst->sm.R[i][0] -= pst->sm.R[i-j][j] * pst->sm.R[i-j][j];
189 
190       pst->sm.R[i][0] = sqrt(pst->sm.R[i][0]);
191 
192       for (j=1; j<pst->width; j++) {
193 			for (k=0; k<pst->dw.max_L; k++)
194             if (j!=pst->width-1)
195                pst->sm.R[i][j] -= pst->sm.R[i-k-1][j-k]*pst->sm.R[i-k-1][j+1];
196 
197          pst->sm.R[i][j] /= pst->sm.R[i][0];
198       }
199    }
200 }
201 
202 /* Cholesky_forward : forward substitution to solve linear equations */
Cholesky_forward(PStream * pst)203 void Cholesky_forward(PStream *pst)
204 {
205    register int i, j;
206    double hold;
207 
208    pst->sm.g[0] = pst->sm.r[0] / pst->sm.R[0][0];
209 
210    for (i=1; i<pst->T; i++) {
211       hold = 0.0;
212       for (j=1; j<pst->width; j++) {
213          if (i-j >= 0)
214             hold += pst->sm.R[i-j][j]*pst->sm.g[i-j];
215       }
216       pst->sm.g[i] = (pst->sm.r[i]-hold)/pst->sm.R[i][0];
217    }
218 }
219 
220 /* Cholesky_backward : backward substitution to solve linear equations */
Cholesky_backward(PStream * pst,int m)221 void Cholesky_backward(PStream *pst, int m)
222 {
223    register int i, j;
224    double hold;
225 
226    pst->par[pst->T-1][m] = pst->sm.g[pst->T-1] / pst->sm.R[pst->T-1][0];
227 
228    for (i=pst->T-2; i>=0; i--) {
229       hold = 0.0;
230       for (j=1; j<pst->width; j++) {
231          if (pst->sm.R[i][j] != 0.0)
232             hold += pst->sm.R[i][j]*pst->par[i+j][m];
233       }
234 		pst->par[i][m] = (float)((pst->sm.g[i] - hold) / pst->sm.R[i][0]);
235    }
236 }
237 
238 /* generate parameter sequence from pdf sequence */
mlpg(PStream * pst)239 void mlpg(PStream *pst)
240 {
241    int m;
242 
243    for (m=0; m<=pst->order; m++) {
244       calc_R_and_r(pst,m);
245       Cholesky(pst);
246       Cholesky_forward(pst);
247       Cholesky_backward(pst,m);
248    }
249 }
250 
251 
252 /* InitPStream : Initialise PStream for parameter generation */
InitPStream(PStream * pst)253 void InitPStream(PStream *pst)
254 {
255    pst->width	   = pst->dw.max_L*2+1;  /* band width of R */
256 
257    pst->sm.mseq  = ddcalloc(pst->T, pst->vSize);
258    pst->sm.ivseq = ddcalloc(pst->T, pst->vSize);
259    pst->sm.g     = dcalloc (pst->T);
260    pst->sm.R     = ddcalloc(pst->T, pst->width);
261    pst->sm.r     = dcalloc (pst->T);
262    pst->par      = ffcalloc(pst->T,pst->order+1);
263 }
264 
265 /* FreePStream : Free PStream */
FreePStream(PStream * pst)266 void FreePStream(PStream *pst)
267 {
268    register int t;
269 
270    for (t=0; t<pst->T; t++) {
271       wfree(pst->sm.mseq[t]);
272       wfree(pst->sm.ivseq[t]);
273       wfree(pst->sm.R[t]);
274       wfree(pst->par[t]);
275    }
276 
277    for (t=0; t<pst->dw.num; t++)
278        wfree(pst->dw.width[t]);
279    wfree(pst->dw.width);
280    wfree(pst->dw.coefr[0]);
281    for (t=1; t<pst->dw.num; t++)
282        wfree(pst->dw.coefr[t]);
283    wfree(pst->dw.coefr);
284    wfree(pst->dw.coef);
285 
286    wfree(pst->sm.mseq);
287    wfree(pst->sm.ivseq);
288    wfree(pst->sm.R);
289    wfree(pst->sm.g);
290    wfree(pst->sm.r);
291    wfree(pst->par);
292 }
293 
294 /* pdf2speech : parameter generation from pdf sequence */
pdf2speech(FILE * rawfp,FILE * lf0fp,FILE * mcepfp,PStream * mceppst,PStream * lf0pst,globalP * gp,ModelSet * ms,UttModel * um,VocoderSetup * vs)295 void pdf2speech( FILE *rawfp, FILE *lf0fp, FILE *mcepfp,
296                   PStream *mceppst, PStream *lf0pst, globalP *gp, ModelSet *ms, UttModel *um, VocoderSetup *vs)
297 {
298    int frame, mcepframe, lf0frame;
299    int state, lw, rw, k, n;
300    Model *m;
301    HTS_Boolean nobound, *voiced;
302 
303    float f0;
304 
305    lf0pst->vSize = ms->lf0stream;
306    lf0pst->order = 0;
307    mceppst->vSize = ms->mcepvsize;
308    mceppst->order = mceppst->vSize / mceppst->dw.num - 1;
309 
310    InitDWin(lf0pst);
311    InitDWin(mceppst);
312 
313    mcepframe  = 0;
314    lf0frame = 0;
315 
316    voiced = walloc(HTS_Boolean,um->totalframe+1);
317 
318    for (m=um->mhead; m!=um->mtail ; m=m->next) {
319       for (state=2; state<=ms->nstate+1; state++) {
320          for (frame=1; frame<=m->dur[state]; frame++) {
321             voiced[mcepframe++] = m->voiced[state];
322             if (m->voiced[state]) {
323                lf0frame++;
324             }
325          }
326       }
327    }
328 
329    mceppst->T = mcepframe;
330    lf0pst->T  = lf0frame;
331 
332    InitPStream(mceppst);
333    InitPStream(lf0pst);
334 
335    mcepframe = 0;
336    lf0frame  = 0;
337 
338    for (m=um->mhead; m!=um->mtail; m=m->next) {
339       for (state=2; state<=ms->nstate+1; state++) {
340          for (frame=1; frame<=m->dur[state]; frame++) {
341             for (k=0; k<ms->mcepvsize; k++) {
342                mceppst->sm.mseq[mcepframe][k]  = m->mcepmean[state][k];
343                mceppst->sm.ivseq[mcepframe][k] = finv(m->mcepvariance[state][k]);
344             }
345             for (k=0; k<ms->lf0stream; k++) {
346                lw = lf0pst->dw.width[k][WLEFT];
347                rw = lf0pst->dw.width[k][WRIGHT];
348                nobound = (HTS_Boolean)1;
349 
350                for (n=lw; n<=rw;n++)
351                   if (mcepframe+n<0 || um->totalframe<mcepframe+n)
352                      nobound = (HTS_Boolean)0;
353                   else
354 		      nobound = (HTS_Boolean)((int)nobound & voiced[mcepframe+n]);
355 
356                if (voiced[mcepframe]) {
357                   lf0pst->sm.mseq[lf0frame][k] = m->lf0mean[state][k+1];
358                   if (nobound || k==0)
359                      lf0pst->sm.ivseq[lf0frame][k] = finv(m->lf0variance[state][k+1]);
360                   else
361                      lf0pst->sm.ivseq[lf0frame][k] = 0.0;
362                }
363             }
364             if (voiced[mcepframe])
365                lf0frame++;
366             mcepframe++;
367          }
368       }
369    }
370 
371    mlpg(mceppst);
372 
373    if (lf0frame>0)
374       mlpg(lf0pst);
375 
376    lf0frame = 0;
377 
378    if (gp->XIMERA && lf0fp!=NULL)
379       fprintf(lf0fp, "# FrameShift=%dms\n", (FPERIOD*1000)/RATE);
380 
381    for (mcepframe=0; mcepframe<mceppst->T; mcepframe++) {
382       if (voiced[mcepframe])
383          f0 = gp->F0_STD * exp(lf0pst->par[lf0frame++][0]) + gp->F0_MEAN;
384       else
385          f0 = 0.0;
386 
387       if (mcepfp != NULL)
388          fwrite(mceppst->par[mcepframe], sizeof(float), mceppst->order+1, mcepfp);
389       if (lf0fp != NULL) {
390          if (gp->XIMERA)
391             fprintf(lf0fp, "%.1f 1\n", f0);
392          else
393             fwrite(&f0, sizeof(double), 1, lf0fp);
394       }
395 
396       if (rawfp!=NULL)
397          vocoder(f0, mceppst->par[mcepframe], mceppst->order, rawfp, gp, vs);
398    }
399 
400    FreePStream(mceppst);
401    FreePStream(lf0pst);
402    wfree(voiced);
403 }
404 
405 /* InitDWin : Initialise dynamic window */
InitDWin(PStream * pst)406 void InitDWin(PStream *pst)
407 {
408    int i;
409    int fsize, leng, fpos;
410    FILE *fp;
411 
412    /* memory allocation */
413    pst->dw.width = walloc(int *,pst->dw.num);
414 
415    for (i=0; i<pst->dw.num; i++) {
416        pst->dw.width[i] = walloc(int,2);
417    }
418 
419    pst->dw.coef= walloc(float *,pst->dw.num);
420    /* because the pointers are moved, keep an original of the memory
421       being allocated */
422    pst->dw.coefr= walloc(float *,pst->dw.num);
423 
424    /* window for static parameter */
425    pst->dw.width[0][WLEFT] = pst->dw.width[0][WRIGHT] = 0;
426    pst->dw.coef[0] = fcalloc (1);
427    pst->dw.coefr[0] = pst->dw.coef[0];
428    pst->dw.coef[0][0] = 1;
429 
430    /* set delta coefficients */
431    for (i=1; i<pst->dw.num; i++) {
432       if (pst->dw.fn[i][0] == ' ')
433          fsize = str2farray(pst->dw.fn[i], &(pst->dw.coef[i]));
434       else {         /* read from file */
435          if ((fp = fopen (pst->dw.fn[i], "r")) == NULL) {
436             fprintf(stderr, "file %s not found\n", pst->dw.fn[i]);
437             festival_error();
438          }
439 
440          /* check the number of coefficients */
441          fseek(fp, 0L, 2);
442 	 fpos = (int)ftell(fp);
443          fsize = fpos/sizeof (float);
444          fseek(fp, 0L, 0);
445 
446          /* read coefficients */
447          pst->dw.coef[i] = fcalloc (fsize);
448          pst->dw.coefr[i] = pst->dw.coef[i];
449          fread(pst->dw.coef[i], sizeof(float), fsize, fp);
450 	 if (EST_BIG_ENDIAN)
451 		 swap_bytes_float(pst->dw.coef[i],fsize);
452 
453          fclose(fp);
454       }
455 
456       /* set pointer */
457       leng = fsize / 2;
458       pst->dw.coef[i] += leng;
459       pst->dw.width[i][WLEFT] = -leng;
460       pst->dw.width[i][WRIGHT] = leng;
461 
462       if (fsize % 2 == 0)
463          pst->dw.width[i][WRIGHT]--;
464    }
465 
466    pst->dw.maxw[WLEFT] = pst->dw.maxw[WRIGHT] = 0;
467 
468    for (i=0; i<pst->dw.num; i++) {
469       if (pst->dw.maxw[WLEFT] > pst->dw.width[i][WLEFT])
470          pst->dw.maxw[WLEFT] = pst->dw.width[i][WLEFT];
471       if (pst->dw.maxw[WRIGHT] < pst->dw.width[i][WRIGHT])
472          pst->dw.maxw[WRIGHT] = pst->dw.width[i][WRIGHT];
473    }
474 
475    /* calcurate max_L to determine size of band matrix */
476    if ( pst->dw.maxw[WLEFT] >= pst->dw.maxw[WRIGHT] )
477       pst->dw.max_L = pst->dw.maxw[WLEFT];
478    else
479       pst->dw.max_L = pst->dw.maxw[WRIGHT];
480 
481 }
482 
483 /* -------------------- End of "mlpg.c" -------------------- */
484