1 /*  ---------------------------------------------------------------  */
2 /*      The HMM-Based Speech Synthesis System (HTS): version 1.1.1   */
3 /*                        HTS Working Group                          */
4 /*                                                                   */
5 /*                   Department of Computer Science                  */
6 /*                   Nagoya Institute of Technology                  */
7 /*                                and                                */
8 /*    Interdisciplinary Graduate School of Science and Engineering   */
9 /*                   Tokyo Institute of Technology                   */
10 /*                      Copyright (c) 2001-2003                      */
11 /*                        All Rights Reserved.                       */
12 /*                                                                   */
13 /*  Permission is hereby granted, free of charge, to use and         */
14 /*  distribute this software and its documentation without           */
15 /*  restriction, including without limitation the rights to use,     */
16 /*  copy, modify, merge, publish, distribute, sublicense, and/or     */
17 /*  sell copies of this work, and to permit persons to whom this     */
18 /*  work is furnished to do so, subject to the following conditions: */
19 /*                                                                   */
20 /*    1. The code must retain the above copyright notice, this list  */
21 /*       of conditions and the following disclaimer.                 */
22 /*                                                                   */
23 /*    2. Any modifications must be clearly marked as such.           */
24 /*                                                                   */
25 /*  NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF TECHNOLOGY,  */
26 /*  HTS WORKING GROUP, AND THE CONTRIBUTORS TO THIS WORK DISCLAIM    */
27 /*  ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL       */
28 /*  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
29 /*  SHALL NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF        */
30 /*  TECHNOLOGY, HTS WORKING GROUP, NOR THE CONTRIBUTORS BE LIABLE    */
31 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY        */
32 /*  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,  */
33 /*  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS   */
34 /*  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR          */
35 /*  PERFORMANCE OF THIS SOFTWARE.                                    */
36 /*                                                                   */
37 /*  ---------------------------------------------------------------  */
38 /*    mlpg.c : speech parameter generation from pdf sequence         */
39 /*                                                                   */
40 /*                                    2003/12/26 by Heiga Zen        */
41 /*  ---------------------------------------------------------------  */
42 /*********************************************************************/
43 /*                                                                   */
44 /*            Nagoya Institute of Technology, Aichi, Japan,          */
45 /*                                and                                */
46 /*             Carnegie Mellon University, Pittsburgh, PA            */
47 /*                   Copyright (c) 2003-2004,2008                    */
48 /*                        All Rights Reserved.                       */
49 /*                                                                   */
50 /*  Permission is hereby granted, free of charge, to use and         */
51 /*  distribute this software and its documentation without           */
52 /*  restriction, including without limitation the rights to use,     */
53 /*  copy, modify, merge, publish, distribute, sublicense, and/or     */
54 /*  sell copies of this work, and to permit persons to whom this     */
55 /*  work is furnished to do so, subject to the following conditions: */
56 /*                                                                   */
57 /*    1. The code must retain the above copyright notice, this list  */
58 /*       of conditions and the following disclaimer.                 */
59 /*    2. Any modifications must be clearly marked as such.           */
60 /*    3. Original authors' names are not deleted.                    */
61 /*                                                                   */
62 /*  NAGOYA INSTITUTE OF TECHNOLOGY, CARNEGIE MELLON UNIVERSITY, AND  */
63 /*  THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH       */
64 /*  REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF     */
65 /*  MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL NAGOYA INSTITUTE  */
66 /*  OF TECHNOLOGY, CARNEGIE MELLON UNIVERSITY, NOR THE CONTRIBUTORS  */
67 /*  BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR  */
68 /*  ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR       */
69 /*  PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER   */
70 /*  TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE    */
71 /*  OR PERFORMANCE OF THIS SOFTWARE.                                 */
72 /*                                                                   */
73 /*********************************************************************/
74 /*                                                                   */
75 /*          Author :  Tomoki Toda (tomoki@ics.nitech.ac.jp)          */
76 /*          Date   :  June 2004                                      */
77 /*                                                                   */
78 /*  Modified as a single file for inclusion in festival/flite        */
79 /*  May 2008 awb@cs.cmu.edu                                          */
80 /*-------------------------------------------------------------------*/
81 /*                                                                   */
82 /*  ML-Based Parameter Generation                                    */
83 /*                                                                   */
84 /*-------------------------------------------------------------------*/
85 
86 #include "cst_alloc.h"
87 #include "cst_string.h"
88 #include "cst_math.h"
89 #include "cst_track.h"
90 #include "cst_wave.h"
91 #include "cst_vc.h"
92 #include "cst_mlpg.h"
93 
94 #define mlpg_alloc(X,Y) (cst_alloc(Y,X))
95 #define mlpg_free cst_free
96 
xmlpgpara_init(int dim,int dim2,int dnum,int clsnum)97 static MLPGPARA xmlpgpara_init(int dim, int dim2, int dnum,
98                                int clsnum)
99 {
100     MLPGPARA param;
101 
102     /* memory allocation */
103     param = mlpg_alloc(1,struct MLPGPARA_STRUCT);
104     param->ov = xdvalloc(dim);
105     param->iuv = NODATA;
106     param->iumv = NODATA;
107     param->flkv = xdvalloc(dnum);
108     param->stm = NODATA;
109     param->dltm = xdmalloc(dnum, dim2);
110     param->pdf = NODATA;
111     param->detvec = NODATA;
112     param->wght = xdmalloc(clsnum, 1);
113     param->mean = xdmalloc(clsnum, dim);
114     param->cov = NODATA;
115     param->clsidxv = NODATA;
116     /* dia_flag */
117 	param->clsdetv = xdvalloc(1);
118 	param->clscov = xdmalloc(1, dim);
119 
120     param->vdet = 1.0;
121     param->vm = NODATA;
122     param->vv = NODATA;
123     param->var = NODATA;
124 
125     return param;
126 }
127 
xmlpgparafree(MLPGPARA param)128 static void xmlpgparafree(MLPGPARA param)
129 {
130     if (param != NODATA) {
131 	if (param->ov != NODATA) xdvfree(param->ov);
132 	if (param->iuv != NODATA) xdvfree(param->iuv);
133 	if (param->iumv != NODATA) xdvfree(param->iumv);
134 	if (param->flkv != NODATA) xdvfree(param->flkv);
135 	if (param->stm != NODATA) xdmfree(param->stm);
136 	if (param->dltm != NODATA) xdmfree(param->dltm);
137 	if (param->pdf != NODATA) xdmfree(param->pdf);
138 	if (param->detvec != NODATA) xdvfree(param->detvec);
139 	if (param->wght != NODATA) xdmfree(param->wght);
140 	if (param->mean != NODATA) xdmfree(param->mean);
141 	if (param->cov != NODATA) xdmfree(param->cov);
142 	if (param->clsidxv != NODATA) xlvfree(param->clsidxv);
143 	if (param->clsdetv != NODATA) xdvfree(param->clsdetv);
144 	if (param->clscov != NODATA) xdmfree(param->clscov);
145 	if (param->vm != NODATA) xdvfree(param->vm);
146 	if (param->vv != NODATA) xdvfree(param->vv);
147 	if (param->var != NODATA) xdvfree(param->var);
148 	mlpg_free(param);
149     }
150 
151     return;
152 }
153 
get_like_pdfseq_vit(int dim,int dim2,int dnum,int clsnum,MLPGPARA param,float ** model,XBOOL dia_flag)154 static double get_like_pdfseq_vit(int dim, int dim2, int dnum, int clsnum,
155                                   MLPGPARA param,
156                                   float **model,
157                                   XBOOL dia_flag)
158 {
159     long d, c, k, l, j;
160     double sumgauss;
161     double like = 0.0;
162 
163     for (d = 0, like = 0.0; d < dnum; d++) {
164 	/* read weight and mean sequences */
165         param->wght->data[0][0] = 0.9; /* FIXME weights */
166         for (j=0; j<dim; j++)
167             param->mean->data[0][j] = model[d][(j+1)*2];
168 
169 	/* observation vector */
170 	for (k = 0; k < dim2; k++) {
171 	    param->ov->data[k] = param->stm->data[d][k];
172 	    param->ov->data[k + dim2] = param->dltm->data[d][k];
173 	}
174 
175 	/* mixture index */
176         c = d;
177 	param->clsdetv->data[0] = param->detvec->data[c];
178 
179 	/* calculating likelihood */
180 	if (dia_flag == XTRUE) {
181 	    for (k = 0; k < param->clscov->col; k++)
182 		param->clscov->data[0][k] = param->cov->data[c][k];
183 	    sumgauss = get_gauss_dia(0, param->ov, param->clsdetv,
184 				     param->wght, param->mean, param->clscov);
185 	} else {
186 	    for (k = 0; k < param->clscov->row; k++)
187 		for (l = 0; l < param->clscov->col; l++)
188 		    param->clscov->data[k][l] =
189 			param->cov->data[k + param->clscov->row * c][l];
190 	    sumgauss = get_gauss_full(0, param->ov, param->clsdetv,
191 				      param->wght, param->mean, param->clscov);
192 	}
193 	if (sumgauss <= 0.0) param->flkv->data[d] = -1.0 * INFTY2;
194 	else param->flkv->data[d] = log(sumgauss);
195 	like += param->flkv->data[d];
196 
197 	/* estimating U', U'*M */
198 	if (dia_flag == XTRUE) {
199 	    /* PDF [U'*M U'] */
200 	    for (k = 0; k < dim; k++) {
201 		param->pdf->data[d][k] =
202 		    param->clscov->data[0][k] * param->mean->data[0][k];
203 		param->pdf->data[d][k + dim] = param->clscov->data[0][k];
204 	    }
205 	} else {
206 	    /* PDF [U'*M U'] */
207 	    for (k = 0; k < dim; k++) {
208 		param->pdf->data[d][k] = 0.0;
209 		for (l = 0; l < dim; l++) {
210 		    param->pdf->data[d][k * dim + dim + l] =
211 			param->clscov->data[k][l];
212 		    param->pdf->data[d][k] +=
213 			param->clscov->data[k][l] * param->mean->data[0][l];
214 		}
215 	    }
216 	}
217     }
218 
219     like /= (double)dnum;
220 
221     return like;
222 }
223 
224 #if 0
225 static double get_like_gv(long dim2, long dnum, MLPGPARA param)
226 {
227     long k;
228     double av = 0.0, dif = 0.0;
229     double vlike = -INFTY;
230 
231     if (param->vm != NODATA && param->vv != NODATA) {
232 	for (k = 0; k < dim2; k++)
233 	    calc_varstats(param->stm->data, k, dnum, &av,
234 			  &(param->var->data[k]), &dif);
235 	vlike = log(get_gauss_dia5(param->vdet, 1.0, param->var,
236                                    param->vm, param->vv));
237     }
238 
239     return vlike;
240 }
241 
242 static void sm_mvav(DMATRIX mat, long hlen)
243 {
244     long k, l, m, p;
245     double d, sd;
246     DVECTOR vec = NODATA;
247     DVECTOR win = NODATA;
248 
249     vec = xdvalloc(mat->row);
250 
251     /* smoothing window */
252     win = xdvalloc(hlen * 2 + 1);
253     for (k = 0, d = 1.0, sd = 0.0; k < hlen; k++, d += 1.0) {
254 	win->data[k] = d;	win->data[win->length - k - 1] = d;
255 	sd += d + d;
256     }
257     win->data[k] = d;	sd += d;
258     for (k = 0; k < win->length; k++) win->data[k] /= sd;
259 
260     for (l = 0; l < mat->col; l++) {
261 	for (k = 0; k < mat->row; k++) {
262 	    for (m = 0, vec->data[k] = 0.0; m < win->length; m++) {
263 		p = k - hlen + m;
264 		if (p >= 0 && p < mat->row)
265 		    vec->data[k] += mat->data[p][l] * win->data[m];
266 	    }
267 	}
268 	for (k = 0; k < mat->row; k++) mat->data[k][l] = vec->data[k];
269     }
270 
271     xdvfree(win);
272     xdvfree(vec);
273 
274     return;
275 }
276 #endif
277 
get_dltmat(DMATRIX mat,DWin * dw,int dno,DMATRIX dmat)278 static void get_dltmat(DMATRIX mat, DWin *dw, int dno, DMATRIX dmat)
279 {
280     int i, j, k, tmpnum;
281 
282     tmpnum = (int)mat->row - dw->width[dno][WRIGHT];
283     for (k = dw->width[dno][WRIGHT]; k < tmpnum; k++)	/* time index */
284 	for (i = 0; i < (int)mat->col; i++)	/* dimension index */
285 	    for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
286 		 j <= dw->width[dno][WRIGHT]; j++)
287 		dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
288 
289     for (i = 0; i < (int)mat->col; i++) {		/* dimension index */
290 	for (k = 0; k < dw->width[dno][WRIGHT]; k++)		/* time index */
291 	    for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
292 		 j <= dw->width[dno][WRIGHT]; j++)
293 		if (k + j >= 0)
294 		    dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
295 		else
296 		    dmat->data[k][i] += (2.0 * mat->data[0][i] - mat->data[-k - j][i]) * dw->coef[dno][j];
297 	for (k = tmpnum; k < (int)mat->row; k++)	/* time index */
298 	    for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
299 		 j <= dw->width[dno][WRIGHT]; j++)
300 		if (k + j < (int)mat->row)
301 		    dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
302 		else
303 		    dmat->data[k][i] += (2.0 * mat->data[mat->row - 1][i] - mat->data[mat->row - k - j + mat->row - 2][i]) * dw->coef[dno][j];
304     }
305 
306     return;
307 }
308 
309 
dcalloc(int x,int xoff)310 static double *dcalloc(int x, int xoff)
311 {
312     double *ptr;
313 
314     ptr = mlpg_alloc(x,double);
315     /* ptr += xoff; */ /* Just not going to allow this */
316     return(ptr);
317 }
318 
ddcalloc(int x,int y,int xoff,int yoff)319 static double **ddcalloc(int x, int y, int xoff, int yoff)
320 {
321     double **ptr;
322     register int i;
323 
324     ptr = mlpg_alloc(x,double *);
325     for (i = 0; i < x; i++) ptr[i] = dcalloc(y, yoff);
326     /* ptr += xoff; */ /* Just not going to allow this */
327     return(ptr);
328 }
329 
330 /***********************************/
331 /* ML using Choleski decomposition */
332 /***********************************/
InitDWin(PStreamChol * pst,const float * dynwin,int fsize)333 static void InitDWin(PStreamChol *pst, const float *dynwin, int fsize)
334 {
335     int i,j;
336     int leng;
337 
338     pst->dw.num = 1;	/* only static */
339     if (dynwin) {
340 	pst->dw.num = 2;	/* static + dyn */
341     }
342     /* memory allocation */
343     pst->dw.width = mlpg_alloc(pst->dw.num,int *);
344     for (i = 0; i < pst->dw.num; i++)
345         pst->dw.width[i] = mlpg_alloc(2,int);
346 
347     pst->dw.coef = mlpg_alloc(pst->dw.num, double *);
348     pst->dw.coef_ptrs = mlpg_alloc(pst->dw.num, double *);
349     /* window for static parameter	WLEFT = 0, WRIGHT = 1 */
350     pst->dw.width[0][WLEFT] = pst->dw.width[0][WRIGHT] = 0;
351     pst->dw.coef_ptrs[0] = mlpg_alloc(1,double);
352     pst->dw.coef[0] = pst->dw.coef_ptrs[0];
353     pst->dw.coef[0][0] = 1.0;
354 
355     /* set delta coefficients */
356     for (i = 1; i < pst->dw.num; i++) {
357         pst->dw.coef_ptrs[i] = mlpg_alloc(fsize, double);
358 	pst->dw.coef[i] = pst->dw.coef_ptrs[i];
359         for (j=0; j<fsize; j++) /* FIXME make dynwin doubles for memmove */
360             pst->dw.coef[i][j] = (double)dynwin[j];
361 	/* set pointer */
362 	leng = fsize / 2;			/* L (fsize = 2 * L + 1) */
363 	pst->dw.coef[i] += leng;		/* [L] -> [0]	center */
364 	pst->dw.width[i][WLEFT] = -leng;	/* -L		left */
365 	pst->dw.width[i][WRIGHT] = leng;	/*  L		right */
366 	if (fsize % 2 == 0) pst->dw.width[i][WRIGHT]--;
367     }
368 
369     pst->dw.maxw[WLEFT] = pst->dw.maxw[WRIGHT] = 0;
370     for (i = 0; i < pst->dw.num; i++) {
371 	if (pst->dw.maxw[WLEFT] > pst->dw.width[i][WLEFT])
372 	    pst->dw.maxw[WLEFT] = pst->dw.width[i][WLEFT];
373 	if (pst->dw.maxw[WRIGHT] < pst->dw.width[i][WRIGHT])
374 	    pst->dw.maxw[WRIGHT] = pst->dw.width[i][WRIGHT];
375     }
376 
377     return;
378 }
379 
InitPStreamChol(PStreamChol * pst,const float * dynwin,int fsize,int order,int T)380 static void InitPStreamChol(PStreamChol *pst, const float *dynwin, int fsize,
381                             int order, int T)
382 {
383     /* order of cepstrum */
384     pst->order = order;
385 
386     /* windows for dynamic feature */
387     InitDWin(pst, dynwin, fsize);
388 
389     /* dimension of observed vector */
390     pst->vSize = (pst->order + 1) * pst->dw.num;	/* odim = dim * (1--3) */
391 
392     /* memory allocation */
393     pst->T = T;					/* number of frames */
394     pst->width = pst->dw.maxw[WRIGHT] * 2 + 1;	/* width of R */
395     pst->mseq = ddcalloc(T, pst->vSize, 0, 0);	/* [T][odim]  */
396     pst->ivseq = ddcalloc(T, pst->vSize, 0, 0);	/* [T][odim] */
397     pst->R = ddcalloc(T, pst->width, 0, 0);	/* [T][width] */
398     pst->r = dcalloc(T, 0);			/* [T] */
399     pst->g = dcalloc(T, 0);			/* [T] */
400     pst->c = ddcalloc(T, pst->order + 1, 0, 0);	/* [T][dim] */
401 
402     return;
403 }
404 
mlgparaChol(DMATRIX pdf,PStreamChol * pst,DMATRIX mlgp)405 static void mlgparaChol(DMATRIX pdf, PStreamChol *pst, DMATRIX mlgp)
406 {
407     int t, d;
408 
409     /* error check */
410     if (pst->vSize * 2 != pdf->col || pst->order + 1 != mlgp->col) {
411 	cst_errmsg("Error mlgparaChol: Different dimension\n");
412         cst_error();
413     }
414 
415     /* mseq: U^{-1}*M,	ifvseq: U^{-1} */
416     for (t = 0; t < pst->T; t++) {
417 	for (d = 0; d < pst->vSize; d++) {
418 	    pst->mseq[t][d] = pdf->data[t][d];
419 	    pst->ivseq[t][d] = pdf->data[t][pst->vSize + d];
420 	}
421     }
422 
423     /* ML parameter generation */
424     mlpgChol(pst);
425 
426     /* extracting parameters */
427     for (t = 0; t < pst->T; t++)
428 	for (d = 0; d <= pst->order; d++)
429 	    mlgp->data[t][d] = pst->c[t][d];
430 
431     return;
432 }
433 
434 /* generate parameter sequence from pdf sequence using Choleski decomposition */
mlpgChol(PStreamChol * pst)435 static void mlpgChol(PStreamChol *pst)
436 {
437    register int m;
438 
439    /* generating parameter in each dimension */
440    for (m = 0; m <= pst->order; m++) {
441        calc_R_and_r(pst, m);
442        Choleski(pst);
443        Choleski_forward(pst);
444        Choleski_backward(pst, m);
445    }
446 
447    return;
448 }
449 
450 /* parameter generation fuctions */
451 /* calc_R_and_r: calculate R = W'U^{-1}W and r = W'U^{-1}M */
calc_R_and_r(PStreamChol * pst,const int m)452 static void calc_R_and_r(PStreamChol *pst, const int m)
453 {
454     register int i, j, k, l, n;
455     double   wu;
456 
457     for (i = 0; i < pst->T; i++) {
458 	pst->r[i] = pst->mseq[i][m];
459 	pst->R[i][0] = pst->ivseq[i][m];
460 
461 	for (j = 1; j < pst->width; j++) pst->R[i][j] = 0.0;
462 
463 	for (j = 1; j < pst->dw.num; j++) {
464 	    for (k = pst->dw.width[j][0]; k <= pst->dw.width[j][1]; k++) {
465 		n = i + k;
466 		if (n >= 0 && n < pst->T && pst->dw.coef[j][-k] != 0.0) {
467 		    l = j * (pst->order + 1) + m;
468 		    pst->r[i] += pst->dw.coef[j][-k] * pst->mseq[n][l];
469 		    wu = pst->dw.coef[j][-k] * pst->ivseq[n][l];
470 
471 		    for (l = 0; l < pst->width; l++) {
472 			n = l-k;
473 			if (n <= pst->dw.width[j][1] && i + l < pst->T &&
474 			    pst->dw.coef[j][n] != 0.0)
475 			    pst->R[i][l] += wu * pst->dw.coef[j][n];
476 		    }
477 		}
478 	    }
479 	}
480     }
481 
482     return;
483 }
484 
485 /* Choleski: Choleski factorization of Matrix R */
Choleski(PStreamChol * pst)486 static void Choleski(PStreamChol *pst)
487 {
488     register int t, j, k;
489 
490     pst->R[0][0] = sqrt(pst->R[0][0]);
491 
492     for (j = 1; j < pst->width; j++) pst->R[0][j] /= pst->R[0][0];
493 
494     for (t = 1; t < pst->T; t++) {
495 	for (j = 1; j < pst->width; j++)
496 	    if (t - j >= 0)
497 		pst->R[t][0] -= pst->R[t - j][j] * pst->R[t - j][j];
498 
499 	pst->R[t][0] = sqrt(pst->R[t][0]);
500 
501 	for (j = 1; j < pst->width; j++) {
502 	    for (k = 0; k < pst->dw.maxw[WRIGHT]; k++)
503 		if (j != pst->width - 1)
504 		    pst->R[t][j] -=
505 			pst->R[t - k - 1][j - k] * pst->R[t - k - 1][j + 1];
506 
507 	    pst->R[t][j] /= pst->R[t][0];
508 	}
509     }
510 
511     return;
512 }
513 
514 /* Choleski_forward: forward substitution to solve linear equations */
Choleski_forward(PStreamChol * pst)515 static void Choleski_forward(PStreamChol *pst)
516 {
517     register int t, j;
518     double hold;
519 
520     pst->g[0] = pst->r[0] / pst->R[0][0];
521 
522     for (t=1; t < pst->T; t++) {
523 	hold = 0.0;
524 	for (j = 1; j < pst->width; j++)
525 	    if (t - j >= 0 && pst->R[t - j][j] != 0.0)
526 		hold += pst->R[t - j][j] * pst->g[t - j];
527 	pst->g[t] = (pst->r[t] - hold) / pst->R[t][0];
528     }
529 
530     return;
531 }
532 
533 /* Choleski_backward: backward substitution to solve linear equations */
Choleski_backward(PStreamChol * pst,const int m)534 static void Choleski_backward(PStreamChol *pst, const int m)
535 {
536     register int t, j;
537     double hold;
538 
539     pst->c[pst->T - 1][m] = pst->g[pst->T - 1] / pst->R[pst->T - 1][0];
540 
541     for (t = pst->T - 2; t >= 0; t--) {
542 	hold = 0.0;
543 	for (j = 1; j < pst->width; j++)
544 	    if (t + j < pst->T && pst->R[t][j] != 0.0)
545 		hold += pst->R[t][j] * pst->c[t + j][m];
546 	pst->c[t][m] = (pst->g[t] - hold) / pst->R[t][0];
547    }
548 
549    return;
550 }
551 
552 
553 /* ML Considering Global Variance */
554 #if 0
555 static void varconv(double **c, const int m, const int T, const double var)
556 {
557     register int n;
558     double sd, osd;
559     double oav = 0.0, ovar = 0.0, odif = 0.0;
560 
561     calc_varstats(c, m, T, &oav, &ovar, &odif);
562     osd = sqrt(ovar);	sd = sqrt(var);
563     for (n = 0; n < T; n++)
564 	c[n][m] = (c[n][m] - oav) / osd * sd + oav;
565 
566     return;
567 }
568 
569 static void calc_varstats(double **c, const int m, const int T,
570 		   double *av, double *var, double *dif)
571 {
572     register int i;
573     register double d;
574 
575     *av = 0.0;
576     *var = 0.0;
577     *dif = 0.0;
578     for (i = 0; i < T; i++) *av += c[i][m];
579     *av /= (double)T;
580     for (i = 0; i < T; i++) {
581 	d = c[i][m] - *av;
582 	*var += d * d;	*dif += d;
583     }
584     *var /= (double)T;
585 
586     return;
587 }
588 
589 /* Diagonal Covariance Version */
590 static void mlgparaGrad(DMATRIX pdf, PStreamChol *pst, DMATRIX mlgp, const int max,
591 		 double th, double e, double alpha, DVECTOR vm, DVECTOR vv,
592 		 XBOOL nrmflag, XBOOL extvflag)
593 {
594     int t, d;
595 
596     /* error check */
597     if (pst->vSize * 2 != pdf->col || pst->order + 1 != mlgp->col) {
598 	cst_errmsg("Error mlgparaChol: Different dimension\n");
599         cst_error();
600     }
601 
602     /* mseq: U^{-1}*M,	ifvseq: U^{-1} */
603     for (t = 0; t < pst->T; t++) {
604 	for (d = 0; d < pst->vSize; d++) {
605 	    pst->mseq[t][d] = pdf->data[t][d];
606 	    pst->ivseq[t][d] = pdf->data[t][pst->vSize + d];
607 	}
608     }
609 
610     /* ML parameter generation */
611     mlpgChol(pst);
612 
613     /* extend variance */
614     if (extvflag == XTRUE)
615 	for (d = 0; d <= pst->order; d++)
616 	    varconv(pst->c, d, pst->T, vm->data[d]);
617 
618     /* estimating parameters */
619     mlpgGrad(pst, max, th, e, alpha, vm, vv, nrmflag);
620 
621     /* extracting parameters */
622     for (t = 0; t < pst->T; t++)
623 	for (d = 0; d <= pst->order; d++)
624 	    mlgp->data[t][d] = pst->c[t][d];
625 
626     return;
627 }
628 
629 /* generate parameter sequence from pdf sequence using gradient */
630 static void mlpgGrad(PStreamChol *pst, const int max, double th, double e,
631 	      double alpha, DVECTOR vm, DVECTOR vv, XBOOL nrmflag)
632 {
633    register int m, i, t;
634    double diff, n, dth;
635 
636    if (nrmflag == XTRUE)
637        n = (double)(pst->T * pst->vSize) / (double)(vm->length);
638    else n = 1.0;
639 
640    /* generating parameter in each dimension */
641    for (m = 0; m <= pst->order; m++) {
642        calc_R_and_r(pst, m);
643        dth = th * sqrt(vm->data[m]);
644        for (i = 0; i < max; i++) {
645 	   calc_grad(pst, m);
646 	   if (vm != NODATA && vv != NODATA)
647 	       calc_vargrad(pst, m, alpha, n, vm->data[m], vv->data[m]);
648 	   for (t = 0, diff = 0.0; t < pst->T; t++) {
649 	       diff += pst->g[t] * pst->g[t];
650 	       pst->c[t][m] += e * pst->g[t];
651 	   }
652 	   diff = sqrt(diff / (double)pst->T);
653 	   if (diff < dth || diff == 0.0) break;
654        }
655    }
656 
657    return;
658 }
659 
660 /* calc_grad: calculate -RX + r = -W'U^{-1}W * X + W'U^{-1}M */
661 void calc_grad(PStreamChol *pst, const int m)
662 {
663     register int i, j;
664 
665     for (i = 0; i < pst->T; i++) {
666 	pst->g[i] = pst->r[i] - pst->c[i][m] * pst->R[i][0];
667 	for (j = 1; j < pst->width; j++) {
668 	    if (i + j < pst->T) pst->g[i] -= pst->c[i + j][m] * pst->R[i][j];
669 	    if (i - j >= 0) pst->g[i] -= pst->c[i - j][m] * pst->R[i - j][j];
670 	}
671     }
672 
673     return;
674 }
675 
676 static void calc_vargrad(PStreamChol *pst, const int m, double alpha, double n,
677 		  double vm, double vv)
678 {
679     register int i;
680     double vg, w1, w2;
681     double av = 0.0, var = 0.0, dif = 0.0;
682 
683     if (alpha > 1.0 || alpha < 0.0) {
684 	w1 = 1.0;		w2 = 1.0;
685     } else {
686 	w1 = alpha;	w2 = 1.0 - alpha;
687     }
688 
689     calc_varstats(pst->c, m, pst->T, &av, &var, &dif);
690 
691     for (i = 0; i < pst->T; i++) {
692 	vg = -(var - vm) * (pst->c[i][m] - av) * vv * 2.0 / (double)pst->T;
693 	pst->g[i] = w1 * pst->g[i] / n + w2 * vg;
694     }
695 
696     return;
697 }
698 #endif
699 
700 /* diagonal covariance */
xget_detvec_diamat2inv(DMATRIX covmat)701 static DVECTOR xget_detvec_diamat2inv(DMATRIX covmat)	/* [num class][dim] */
702 {
703     long dim, clsnum;
704     long i, j;
705     double det;
706     DVECTOR detvec = NODATA;
707     /* In cases where the determinant of the matrix ends up being */
708     /* zero, we can fix this by artificially setting the covariance */
709     /* to be a magic number. I chose this magic number by computing */
710     /* the mean of all MCEP SDs of all leaves in a standard RMS  */
711     /* voice. - Prasanna */
712     /* double magic_covariance = 0.0962*0.0962; */
713     double magic_covariance = 0.0962;
714 
715     clsnum = covmat->row;
716     dim = covmat->col;
717     /* memory allocation */
718     detvec = xdvalloc(clsnum);
719     for (i = 0; i < clsnum; i++) {
720 	for (j = 0, det = 1.0; j < dim; j++) {
721 	    det *= covmat->data[i][j];
722 	    if (det > 0.0) {
723 		covmat->data[i][j] = 1.0 / covmat->data[i][j];
724 	    } else {
725                 covmat->data[i][j] = 1.0 / magic_covariance;
726                 det = pow(magic_covariance, j+1);
727 		/* cst_errmsg("error:(class %ld) determinant <= 0, det = %f\n", i, det); */
728 	    }
729 	}
730 	detvec->data[i] = det;
731     }
732 
733     return detvec;
734 }
735 
get_gauss_full(long clsidx,DVECTOR vec,DVECTOR detvec,DMATRIX weightmat,DMATRIX meanvec,DMATRIX invcovmat)736 static double get_gauss_full(long clsidx,
737                              DVECTOR vec,		/* [dim] */
738                              DVECTOR detvec,		/* [clsnum] */
739                              DMATRIX weightmat,	/* [clsnum][1] */
740                              DMATRIX meanvec,		/* [clsnum][dim] */
741                              DMATRIX invcovmat)	/* [clsnum * dim][dim] */
742 {
743     double gauss;
744 
745     if (detvec->data[clsidx] <= 0.0) {
746 	cst_errmsg("#error: det <= 0.0\n");
747         cst_error();
748     }
749 
750     gauss = weightmat->data[clsidx][0]
751 	/ sqrt(pow(2.0 * PI, (double)vec->length) * detvec->data[clsidx])
752 	* exp(-1.0 * cal_xmcxmc(clsidx, vec, meanvec, invcovmat) / 2.0);
753 
754     return gauss;
755 }
756 
cal_xmcxmc(long clsidx,DVECTOR x,DMATRIX mm,DMATRIX cm)757 static double cal_xmcxmc(long clsidx,
758                          DVECTOR x,
759                          DMATRIX mm,	/* [num class][dim] */
760                          DMATRIX cm)	/* [num class * dim][dim] */
761 {
762     long clsnum, k, l, b, dim;
763     double *vec = NULL;
764     double td, d;
765 
766     dim = x->length;
767     clsnum = mm->row;
768     b = clsidx * dim;
769     if (mm->col != dim || cm->col != dim || clsnum * dim != cm->row) {
770 	cst_errmsg("Error cal_xmcxmc: different dimension\n");
771         cst_error();
772     }
773 
774     /* memory allocation */
775     vec = mlpg_alloc((int)dim, double);
776     for (k = 0; k < dim; k++) vec[k] = x->data[k] - mm->data[clsidx][k];
777     for (k = 0, d = 0.0; k < dim; k++) {
778 	for (l = 0, td = 0.0; l < dim; l++) td += vec[l] * cm->data[l + b][k];
779 	d += td * vec[k];
780     }
781     /* memory free */
782     mlpg_free(vec); vec = NULL;
783 
784     return d;
785 }
786 
787 #if 0
788 /* diagonal covariance */
789 static double get_gauss_dia5(double det,
790                              double weight,
791                              DVECTOR vec,		/* dim */
792                              DVECTOR meanvec,		/* dim */
793                              DVECTOR invcovvec)		/* dim */
794 {
795     double gauss, sb;
796     long k;
797 
798     if (det <= 0.0) {
799 	cst_errmsg("#error: det <= 0.0\n");
800         cst_error();
801     }
802 
803     for (k = 0, gauss = 0.0; k < vec->length; k++) {
804 	sb = vec->data[k] - meanvec->data[k];
805 	gauss += sb * invcovvec->data[k] * sb;
806     }
807 
808     gauss = weight / sqrt(pow(2.0 * PI, (double)vec->length) * det)
809 	* exp(-gauss / 2.0);
810 
811     return gauss;
812 }
813 #endif
814 
get_gauss_dia(long clsidx,DVECTOR vec,DVECTOR detvec,DMATRIX weightmat,DMATRIX meanmat,DMATRIX invcovmat)815 static double get_gauss_dia(long clsidx,
816                             DVECTOR vec,		/* [dim] */
817                             DVECTOR detvec,		/* [clsnum] */
818                             DMATRIX weightmat,		/* [clsnum][1] */
819                             DMATRIX meanmat,		/* [clsnum][dim] */
820                             DMATRIX invcovmat)		/* [clsnum][dim] */
821 {
822     double gauss, sb;
823     long k;
824 
825     if (detvec->data[clsidx] <= 0.0) {
826 	cst_errmsg("#error: det <= 0.0\n");
827         cst_error();
828     }
829 
830     for (k = 0, gauss = 0.0; k < vec->length; k++) {
831 	sb = vec->data[k] - meanmat->data[clsidx][k];
832 	gauss += sb * invcovmat->data[clsidx][k] * sb;
833     }
834 
835     gauss = weightmat->data[clsidx][0]
836 	/ sqrt(pow(2.0 * PI, (double)vec->length) * detvec->data[clsidx])
837 	* exp(-gauss / 2.0);
838 
839     return gauss;
840 }
841 
pst_free(PStreamChol * pst)842 static void pst_free(PStreamChol *pst)
843 {
844     int i;
845 
846     for (i=0; i<pst->dw.num; i++)
847         mlpg_free(pst->dw.width[i]);
848     mlpg_free(pst->dw.width); pst->dw.width = NULL;
849     for (i=0; i<pst->dw.num; i++)
850         mlpg_free(pst->dw.coef_ptrs[i]);
851     mlpg_free(pst->dw.coef); pst->dw.coef = NULL;
852     mlpg_free(pst->dw.coef_ptrs); pst->dw.coef_ptrs = NULL;
853 
854     for (i=0; i<pst->T; i++)
855         mlpg_free(pst->mseq[i]);
856     mlpg_free(pst->mseq);
857     for (i=0; i<pst->T; i++)
858         mlpg_free(pst->ivseq[i]);
859     mlpg_free(pst->ivseq);
860     for (i=0; i<pst->T; i++)
861         mlpg_free(pst->R[i]);
862     mlpg_free(pst->R);
863     mlpg_free(pst->r);
864     mlpg_free(pst->g);
865     for (i=0; i<pst->T; i++)
866         mlpg_free(pst->c[i]);
867     mlpg_free(pst->c);
868 
869     return;
870 }
871 
mlpg(const cst_track * param_track,cst_cg_db * cg_db)872 cst_track *mlpg(const cst_track *param_track, cst_cg_db *cg_db)
873 {
874     /* Generate an (mcep) track using Maximum Likelihood Parameter Generation */
875     MLPGPARA param = NODATA;
876     cst_track *out;
877     int dim, dim_st;
878     /*    float like; */
879     int i,j;
880     int nframes;
881     PStreamChol pst;
882 
883     nframes = param_track->num_frames;
884     dim = (param_track->num_channels/2)-1;
885     dim_st = dim/2; /* dim2 in original code */
886     out = new_track();
887     cst_track_resize(out,nframes,dim_st+1);
888 
889     param = xmlpgpara_init(dim,dim_st,nframes,nframes);
890 
891     /* mixture-index sequence */
892     param->clsidxv = xlvalloc(nframes);
893     for (i=0; i<nframes; i++)
894         param->clsidxv->data[i] = i;
895 
896     /* initial static feature sequence */
897     param->stm = xdmalloc(nframes,dim_st);
898     for (i=0; i<nframes; i++)
899     {
900         for (j=0; j<dim_st; j++)
901             param->stm->data[i][j] = param_track->frames[i][(j+1)*2];
902     }
903 
904     /* Load cluster means */
905     for (i=0; i<nframes; i++)
906         for (j=0; j<dim_st; j++)
907             param->mean->data[i][j] = param_track->frames[i][(j+1)*2];
908 
909     /* GMM parameters diagonal covariance */
910     InitPStreamChol(&pst, cg_db->dynwin, cg_db->dynwinsize, dim_st-1, nframes);
911     param->pdf = xdmalloc(nframes,dim*2);
912     param->cov = xdmalloc(nframes,dim);
913     for (i=0; i<nframes; i++)
914         for (j=0; j<dim; j++)
915             param->cov->data[i][j] =
916                 param_track->frames[i][(j+1)*2+1] *
917                 param_track->frames[i][(j+1)*2+1];
918     param->detvec = xget_detvec_diamat2inv(param->cov);
919 
920     /* global variance parameters */
921     /* TBD get_gv_mlpgpara(param, vmfile, vvfile, dim2, msg_flag); */
922 
923     get_dltmat(param->stm, &pst.dw, 1, param->dltm);
924 
925     get_like_pdfseq_vit(dim, dim_st, nframes, nframes, param,
926 			param_track->frames, XTRUE);
927 
928     /* vlike = get_like_gv(dim2, dnum, param); */
929 
930     mlgparaChol(param->pdf, &pst, param->stm);
931 
932     /* Put the answer back into the output track */
933     for (i=0; i<nframes; i++)
934     {
935         out->times[i] = param_track->times[i];
936         out->frames[i][0] = param_track->frames[i][0]; /* F0 */
937         for (j=0; j<dim_st; j++)
938             out->frames[i][j+1] = param->stm->data[i][j];
939     }
940 
941     /* memory free */
942     xmlpgparafree(param);
943     pst_free(&pst);
944 
945     return out;
946 }
947 
948