1 /*  ---------------------------------------------------------------  */
2 /*      The HMM-Based Speech Synthesis System (HTS): version 1.1.1   */
3 /*                        HTS Working Group                          */
4 /*                                                                   */
5 /*                   Department of Computer Science                  */
6 /*                   Nagoya Institute of Technology                  */
7 /*                                and                                */
8 /*    Interdisciplinary Graduate School of Science and Engineering   */
9 /*                   Tokyo Institute of Technology                   */
10 /*                      Copyright (c) 2001-2003                      */
11 /*                        All Rights Reserved.                       */
12 /*                                                                   */
13 /*  Permission is hereby granted, free of charge, to use and         */
14 /*  distribute this software and its documentation without           */
15 /*  restriction, including without limitation the rights to use,     */
16 /*  copy, modify, merge, publish, distribute, sublicense, and/or     */
17 /*  sell copies of this work, and to permit persons to whom this     */
18 /*  work is furnished to do so, subject to the following conditions: */
19 /*                                                                   */
20 /*    1. The code must retain the above copyright notice, this list  */
21 /*       of conditions and the following disclaimer.                 */
22 /*                                                                   */
23 /*    2. Any modifications must be clearly marked as such.           */
24 /*                                                                   */
25 /*  NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF TECHNOLOGY,  */
26 /*  HTS WORKING GROUP, AND THE CONTRIBUTORS TO THIS WORK DISCLAIM    */
27 /*  ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL       */
28 /*  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
29 /*  SHALL NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF        */
30 /*  TECHNOLOGY, HTS WORKING GROUP, NOR THE CONTRIBUTORS BE LIABLE    */
31 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY        */
32 /*  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,  */
33 /*  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS   */
34 /*  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR          */
35 /*  PERFORMANCE OF THIS SOFTWARE.                                    */
36 /*                                                                   */
37 /*  ---------------------------------------------------------------  */
38 /*    mlpg.c : speech parameter generation from pdf sequence         */
39 /*                                                                   */
40 /*                                    2003/12/26 by Heiga Zen        */
41 /*  ---------------------------------------------------------------  */
42 /*********************************************************************/
43 /*                                                                   */
44 /*            Nagoya Institute of Technology, Aichi, Japan,          */
45 /*                                and                                */
46 /*             Carnegie Mellon University, Pittsburgh, PA            */
47 /*                   Copyright (c) 2003-2004,2008                    */
48 /*                        All Rights Reserved.                       */
49 /*                                                                   */
50 /*  Permission is hereby granted, free of charge, to use and         */
51 /*  distribute this software and its documentation without           */
52 /*  restriction, including without limitation the rights to use,     */
53 /*  copy, modify, merge, publish, distribute, sublicense, and/or     */
54 /*  sell copies of this work, and to permit persons to whom this     */
55 /*  work is furnished to do so, subject to the following conditions: */
56 /*                                                                   */
57 /*    1. The code must retain the above copyright notice, this list  */
58 /*       of conditions and the following disclaimer.                 */
59 /*    2. Any modifications must be clearly marked as such.           */
60 /*    3. Original authors' names are not deleted.                    */
61 /*                                                                   */
62 /*  NAGOYA INSTITUTE OF TECHNOLOGY, CARNEGIE MELLON UNIVERSITY, AND  */
63 /*  THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH       */
64 /*  REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF     */
65 /*  MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL NAGOYA INSTITUTE  */
66 /*  OF TECHNOLOGY, CARNEGIE MELLON UNIVERSITY, NOR THE CONTRIBUTORS  */
67 /*  BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR  */
68 /*  ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR       */
69 /*  PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER   */
70 /*  TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE    */
71 /*  OR PERFORMANCE OF THIS SOFTWARE.                                 */
72 /*                                                                   */
73 /*********************************************************************/
74 /*                                                                   */
75 /*          Author :  Tomoki Toda (tomoki@ics.nitech.ac.jp)          */
76 /*          Date   :  June 2004                                      */
77 /*                                                                   */
78 /*  Modified as a single file for inclusion in festival/flite        */
79 /*  May 2008 awb@cs.cmu.edu                                          */
80 /*-------------------------------------------------------------------*/
81 /*                                                                   */
82 /*  ML-Based Parameter Generation                                    */
83 /*                                                                   */
84 /*-------------------------------------------------------------------*/
85 
86 #include "festival.h"
87 #include "simple_mlpg.h"
88 #define mlpg_alloc(X,Y) (walloc(Y,X))
89 #define mlpg_free wfree
90 #define cst_errmsg EST_error
91 #define cst_error()
92 
93 #ifdef CMUFLITE
94 #include "cst_alloc.h"
95 #include "cst_string.h"
96 #include "cst_math.h"
97 #include "cst_vc.h"
98 #include "cst_track.h"
99 #include "cst_wave.h"
100 #include "cst_mlpg.h"
101 
102 #define mlpg_alloc(X,Y) (cst_alloc(Y,X))
103 #define mlpg_free cst_free
104 #endif
105 
106 static MLPGPARA xmlpgpara_init(int dim, int dim2, int dnum, int clsnum);
107 static void xmlpgparafree(MLPGPARA param);
108 static double get_like_pdfseq_vit(int dim, int dim2, int dnum, int clsnum,
109                                   MLPGPARA param,
110                                   EST_Track *model,
111                                   XBOOL dia_flag);
112 static void get_dltmat(DMATRIX mat, MLPG_DWin *dw, int dno, DMATRIX dmat);
113 
114 static double *dcalloc(int x, int xoff);
115 static double **ddcalloc(int x, int y, int xoff, int yoff);
116 
117 /***********************************/
118 /* ML using Choleski decomposition */
119 /***********************************/
120 /* Diagonal Covariance Version */
121 static void InitDWin(PStreamChol *pst, const float *dynwin, int fsize);
122 static void InitPStreamChol(PStreamChol *pst, const float *dynwin, int fsize,
123                             int order, int T);
124 static void mlgparaChol(DMATRIX pdf, PStreamChol *pst, DMATRIX mlgp);
125 static void mlpgChol(PStreamChol *pst);
126 static void calc_R_and_r(PStreamChol *pst, const int m);
127 static void Choleski(PStreamChol *pst);
128 static void Choleski_forward(PStreamChol *pst);
129 static void Choleski_backward(PStreamChol *pst, const int m);
130 static double get_gauss_full(long clsidx,
131 		      DVECTOR vec,		// [dim]
132 		      DVECTOR detvec,		// [clsnum]
133 		      DMATRIX weightmat,	// [clsnum][1]
134 		      DMATRIX meanvec,		// [clsnum][dim]
135 		      DMATRIX invcovmat);	// [clsnum * dim][dim]
136 static double get_gauss_dia(long clsidx,
137 		     DVECTOR vec,		// [dim]
138 		     DVECTOR detvec,		// [clsnum]
139 		     DMATRIX weightmat,		// [clsnum][1]
140 		     DMATRIX meanmat,		// [clsnum][dim]
141 		     DMATRIX invcovmat);	// [clsnum][dim]
142 static double cal_xmcxmc(long clsidx,
143 		  DVECTOR x,
144 		  DMATRIX mm,	// [num class][dim]
145 		  DMATRIX cm);	// [num class * dim][dim]
146 
147 const float mlpg_dynwin[] = { -0.5, 0.0, 0.5 };
148 #define mlpg_dynwinsize 3
149 
xmlpgpara_init(int dim,int dim2,int dnum,int clsnum)150 static MLPGPARA xmlpgpara_init(int dim, int dim2, int dnum,
151                                int clsnum)
152 {
153     MLPGPARA param;
154 
155     // memory allocation
156     param = mlpg_alloc(1,struct MLPGPARA_STRUCT);
157     param->ov = xdvalloc(dim);
158     param->iuv = NODATA;
159     param->iumv = NODATA;
160     param->flkv = xdvalloc(dnum);
161     param->stm = NODATA;
162     param->dltm = xdmalloc(dnum, dim2);
163     param->pdf = NODATA;
164     param->detvec = NODATA;
165     param->wght = xdmalloc(clsnum, 1);
166     param->mean = xdmalloc(clsnum, dim);
167     param->cov = NODATA;
168     param->clsidxv = NODATA;
169     /* dia_flag */
170 	param->clsdetv = xdvalloc(1);
171 	param->clscov = xdmalloc(1, dim);
172 
173     param->vdet = 1.0;
174     param->vm = NODATA;
175     param->vv = NODATA;
176     param->var = NODATA;
177 
178     return param;
179 }
180 
xmlpgparafree(MLPGPARA param)181 static void xmlpgparafree(MLPGPARA param)
182 {
183     if (param != NODATA) {
184 	if (param->ov != NODATA) xdvfree(param->ov);
185 	if (param->iuv != NODATA) xdvfree(param->iuv);
186 	if (param->iumv != NODATA) xdvfree(param->iumv);
187 	if (param->flkv != NODATA) xdvfree(param->flkv);
188 	if (param->stm != NODATA) xdmfree(param->stm);
189 	if (param->dltm != NODATA) xdmfree(param->dltm);
190 	if (param->pdf != NODATA) xdmfree(param->pdf);
191 	if (param->detvec != NODATA) xdvfree(param->detvec);
192 	if (param->wght != NODATA) xdmfree(param->wght);
193 	if (param->mean != NODATA) xdmfree(param->mean);
194 	if (param->cov != NODATA) xdmfree(param->cov);
195 	if (param->clsidxv != NODATA) xlvfree(param->clsidxv);
196 	if (param->clsdetv != NODATA) xdvfree(param->clsdetv);
197 	if (param->clscov != NODATA) xdmfree(param->clscov);
198 	if (param->vm != NODATA) xdvfree(param->vm);
199 	if (param->vv != NODATA) xdvfree(param->vv);
200 	if (param->var != NODATA) xdvfree(param->var);
201 	mlpg_free(param);
202     }
203 
204     return;
205 }
206 
get_like_pdfseq_vit(int dim,int dim2,int dnum,int clsnum,MLPGPARA param,EST_Track * model,XBOOL dia_flag)207 static double get_like_pdfseq_vit(int dim, int dim2, int dnum, int clsnum,
208                                   MLPGPARA param,
209                                   EST_Track *model,
210                                   XBOOL dia_flag)
211 {
212     long d, c, k, l, j;
213     double sumgauss;
214     double like = 0.0;
215 
216     for (d = 0, like = 0.0; d < dnum; d++) {
217 	// read weight and mean sequences
218         param->wght->data[0][0] = 0.9; /* FIXME weights */
219         for (j=0; j<dim; j++)
220             param->mean->data[0][j] = model->a((int)d,(int)((j+1)*2));
221 
222 	// observation vector
223 	for (k = 0; k < dim2; k++) {
224 	    param->ov->data[k] = param->stm->data[d][k];
225 	    param->ov->data[k + dim2] = param->dltm->data[d][k];
226 	}
227 
228 	// mixture index
229         c = d;
230 	param->clsdetv->data[0] = param->detvec->data[c];
231 
232 	// calculating likelihood
233 	if (dia_flag == XTRUE) {
234 	    for (k = 0; k < param->clscov->col; k++)
235 		param->clscov->data[0][k] = param->cov->data[c][k];
236 	    sumgauss = get_gauss_dia(0, param->ov, param->clsdetv,
237 				     param->wght, param->mean, param->clscov);
238 	} else {
239 	    for (k = 0; k < param->clscov->row; k++)
240 		for (l = 0; l < param->clscov->col; l++)
241 		    param->clscov->data[k][l] =
242 			param->cov->data[k + param->clscov->row * c][l];
243 	    sumgauss = get_gauss_full(0, param->ov, param->clsdetv,
244 				      param->wght, param->mean, param->clscov);
245 	}
246 	if (sumgauss <= 0.0) param->flkv->data[d] = -1.0 * INFTY2;
247 	else param->flkv->data[d] = log(sumgauss);
248 	like += param->flkv->data[d];
249 
250 	// estimating U', U'*M
251 	if (dia_flag == XTRUE) {
252 	    // PDF [U'*M U']
253 	    for (k = 0; k < dim; k++) {
254 		param->pdf->data[d][k] =
255 		    param->clscov->data[0][k] * param->mean->data[0][k];
256 		param->pdf->data[d][k + dim] = param->clscov->data[0][k];
257 	    }
258 	} else {
259 	    // PDF [U'*M U']
260 	    for (k = 0; k < dim; k++) {
261 		param->pdf->data[d][k] = 0.0;
262 		for (l = 0; l < dim; l++) {
263 		    param->pdf->data[d][k * dim + dim + l] =
264 			param->clscov->data[k][l];
265 		    param->pdf->data[d][k] +=
266 			param->clscov->data[k][l] * param->mean->data[0][l];
267 		}
268 	    }
269 	}
270     }
271 
272     like /= (double)dnum;
273 
274     return like;
275 }
276 
277 #if 0
278 static double get_like_gv(long dim2, long dnum, MLPGPARA param)
279 {
280     long k;
281     double av = 0.0, dif = 0.0;
282     double vlike = -INFTY;
283 
284     if (param->vm != NODATA && param->vv != NODATA) {
285 	for (k = 0; k < dim2; k++)
286 	    calc_varstats(param->stm->data, k, dnum, &av,
287 			  &(param->var->data[k]), &dif);
288 	vlike = log(get_gauss_dia5(param->vdet, 1.0, param->var,
289                                    param->vm, param->vv));
290     }
291 
292     return vlike;
293 }
294 
295 static void sm_mvav(DMATRIX mat, long hlen)
296 {
297     long k, l, m, p;
298     double d, sd;
299     DVECTOR vec = NODATA;
300     DVECTOR win = NODATA;
301 
302     vec = xdvalloc(mat->row);
303 
304     // smoothing window
305     win = xdvalloc(hlen * 2 + 1);
306     for (k = 0, d = 1.0, sd = 0.0; k < hlen; k++, d += 1.0) {
307 	win->data[k] = d;	win->data[win->length - k - 1] = d;
308 	sd += d + d;
309     }
310     win->data[k] = d;	sd += d;
311     for (k = 0; k < win->length; k++) win->data[k] /= sd;
312 
313     for (l = 0; l < mat->col; l++) {
314 	for (k = 0; k < mat->row; k++) {
315 	    for (m = 0, vec->data[k] = 0.0; m < win->length; m++) {
316 		p = k - hlen + m;
317 		if (p >= 0 && p < mat->row)
318 		    vec->data[k] += mat->data[p][l] * win->data[m];
319 	    }
320 	}
321 	for (k = 0; k < mat->row; k++) mat->data[k][l] = vec->data[k];
322     }
323 
324     xdvfree(win);
325     xdvfree(vec);
326 
327     return;
328 }
329 #endif
330 
get_dltmat(DMATRIX mat,MLPG_DWin * dw,int dno,DMATRIX dmat)331 static void get_dltmat(DMATRIX mat, MLPG_DWin *dw, int dno, DMATRIX dmat)
332 {
333     int i, j, k, tmpnum;
334 
335     tmpnum = (int)mat->row - dw->width[dno][WRIGHT];
336     for (k = dw->width[dno][WRIGHT]; k < tmpnum; k++)	// time index
337 	for (i = 0; i < (int)mat->col; i++)	// dimension index
338 	    for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
339 		 j <= dw->width[dno][WRIGHT]; j++)
340 		dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
341 
342     for (i = 0; i < (int)mat->col; i++) {		// dimension index
343 	for (k = 0; k < dw->width[dno][WRIGHT]; k++)		// time index
344 	    for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
345 		 j <= dw->width[dno][WRIGHT]; j++)
346 		if (k + j >= 0)
347 		    dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
348 		else
349 		    dmat->data[k][i] += (2.0 * mat->data[0][i] - mat->data[-k - j][i]) * dw->coef[dno][j];
350 	for (k = tmpnum; k < (int)mat->row; k++)	// time index
351 	    for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
352 		 j <= dw->width[dno][WRIGHT]; j++)
353 		if (k + j < (int)mat->row)
354 		    dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
355 		else
356 		    dmat->data[k][i] += (2.0 * mat->data[mat->row - 1][i] - mat->data[mat->row - k - j + mat->row - 2][i]) * dw->coef[dno][j];
357     }
358 
359     return;
360 }
361 
362 
dcalloc(int x,int xoff)363 static double *dcalloc(int x, int xoff)
364 {
365     double *ptr;
366 
367     ptr = mlpg_alloc(x,double);
368     /* ptr += xoff; */ /* Just not going to allow this */
369     return(ptr);
370 }
371 
ddcalloc(int x,int y,int xoff,int yoff)372 static double **ddcalloc(int x, int y, int xoff, int yoff)
373 {
374     double **ptr;
375     register int i;
376 
377     ptr = mlpg_alloc(x,double *);
378     for (i = 0; i < x; i++) ptr[i] = dcalloc(y, yoff);
379     /* ptr += xoff; */ /* Just not going to allow this */
380     return(ptr);
381 }
382 
383 /////////////////////////////////////
384 // ML using Choleski decomposition //
385 /////////////////////////////////////
InitDWin(PStreamChol * pst,const float * dynwin,int fsize)386 static void InitDWin(PStreamChol *pst, const float *dynwin, int fsize)
387 {
388     int i,j;
389     int leng;
390 
391     pst->dw.num = 1;	// only static
392     if (dynwin) {
393 	pst->dw.num = 2;	// static + dyn
394     }
395     // memory allocation
396     pst->dw.width = mlpg_alloc(pst->dw.num,int *);
397     for (i = 0; i < pst->dw.num; i++)
398         pst->dw.width[i] = mlpg_alloc(2,int);
399 
400     pst->dw.coef = mlpg_alloc(pst->dw.num, double *);
401     pst->dw.coef_ptrs = mlpg_alloc(pst->dw.num, double *);
402     // window for static parameter	WLEFT = 0, WRIGHT = 1
403     pst->dw.width[0][WLEFT] = pst->dw.width[0][WRIGHT] = 0;
404     pst->dw.coef_ptrs[0] = mlpg_alloc(1,double);
405     pst->dw.coef[0] = pst->dw.coef_ptrs[0];
406     pst->dw.coef[0][0] = 1.0;
407 
408     // set delta coefficients
409     for (i = 1; i < pst->dw.num; i++) {
410         pst->dw.coef_ptrs[i] = mlpg_alloc(fsize, double);
411 	pst->dw.coef[i] = pst->dw.coef_ptrs[i];
412         for (j=0; j<fsize; j++) /* FIXME make dynwin doubles for memmove */
413             pst->dw.coef[i][j] = (double)dynwin[j];
414 	// set pointer
415 	leng = fsize / 2;			// L (fsize = 2 * L + 1)
416 	pst->dw.coef[i] += leng;		// [L] -> [0]	center
417 	pst->dw.width[i][WLEFT] = -leng;	// -L		left
418 	pst->dw.width[i][WRIGHT] = leng;	//  L		right
419 	if (fsize % 2 == 0) pst->dw.width[i][WRIGHT]--;
420     }
421 
422     pst->dw.maxw[WLEFT] = pst->dw.maxw[WRIGHT] = 0;
423     for (i = 0; i < pst->dw.num; i++) {
424 	if (pst->dw.maxw[WLEFT] > pst->dw.width[i][WLEFT])
425 	    pst->dw.maxw[WLEFT] = pst->dw.width[i][WLEFT];
426 	if (pst->dw.maxw[WRIGHT] < pst->dw.width[i][WRIGHT])
427 	    pst->dw.maxw[WRIGHT] = pst->dw.width[i][WRIGHT];
428     }
429 
430     return;
431 }
432 
InitPStreamChol(PStreamChol * pst,const float * dynwin,int fsize,int order,int T)433 static void InitPStreamChol(PStreamChol *pst, const float *dynwin, int fsize,
434                             int order, int T)
435 {
436     // order of cepstrum
437     pst->order = order;
438 
439     // windows for dynamic feature
440     InitDWin(pst, dynwin, fsize);
441 
442     // dimension of observed vector
443     pst->vSize = (pst->order + 1) * pst->dw.num;	// odim = dim * (1--3)
444 
445     // memory allocation
446     pst->T = T;					// number of frames
447     pst->width = pst->dw.maxw[WRIGHT] * 2 + 1;	// width of R
448     pst->mseq = ddcalloc(T, pst->vSize, 0, 0);	// [T][odim]
449     pst->ivseq = ddcalloc(T, pst->vSize, 0, 0);	// [T][odim]
450     pst->R = ddcalloc(T, pst->width, 0, 0);	// [T][width]
451     pst->r = dcalloc(T, 0);			// [T]
452     pst->g = dcalloc(T, 0);			// [T]
453     pst->c = ddcalloc(T, pst->order + 1, 0, 0);	// [T][dim]
454 
455     return;
456 }
457 
mlgparaChol(DMATRIX pdf,PStreamChol * pst,DMATRIX mlgp)458 static void mlgparaChol(DMATRIX pdf, PStreamChol *pst, DMATRIX mlgp)
459 {
460     int t, d;
461 
462     // error check
463     if (pst->vSize * 2 != pdf->col || pst->order + 1 != mlgp->col) {
464 	cst_errmsg("Error mlgparaChol: Different dimension\n");
465         cst_error();
466     }
467 
468     // mseq: U^{-1}*M,	ifvseq: U^{-1}
469     for (t = 0; t < pst->T; t++) {
470 	for (d = 0; d < pst->vSize; d++) {
471 	    pst->mseq[t][d] = pdf->data[t][d];
472 	    pst->ivseq[t][d] = pdf->data[t][pst->vSize + d];
473 	}
474     }
475 
476     // ML parameter generation
477     mlpgChol(pst);
478 
479     // extracting parameters
480     for (t = 0; t < pst->T; t++)
481 	for (d = 0; d <= pst->order; d++)
482 	    mlgp->data[t][d] = pst->c[t][d];
483 
484     return;
485 }
486 
487 // generate parameter sequence from pdf sequence using Choleski decomposition
mlpgChol(PStreamChol * pst)488 static void mlpgChol(PStreamChol *pst)
489 {
490    register int m;
491 
492    // generating parameter in each dimension
493    for (m = 0; m <= pst->order; m++) {
494        calc_R_and_r(pst, m);
495        Choleski(pst);
496        Choleski_forward(pst);
497        Choleski_backward(pst, m);
498    }
499 
500    return;
501 }
502 
503 //------ parameter generation fuctions
504 // calc_R_and_r: calculate R = W'U^{-1}W and r = W'U^{-1}M
calc_R_and_r(PStreamChol * pst,const int m)505 static void calc_R_and_r(PStreamChol *pst, const int m)
506 {
507     register int i, j, k, l, n;
508     double   wu;
509 
510     for (i = 0; i < pst->T; i++) {
511 	pst->r[i] = pst->mseq[i][m];
512 	pst->R[i][0] = pst->ivseq[i][m];
513 
514 	for (j = 1; j < pst->width; j++) pst->R[i][j] = 0.0;
515 
516 	for (j = 1; j < pst->dw.num; j++) {
517 	    for (k = pst->dw.width[j][0]; k <= pst->dw.width[j][1]; k++) {
518 		n = i + k;
519 		if (n >= 0 && n < pst->T && pst->dw.coef[j][-k] != 0.0) {
520 		    l = j * (pst->order + 1) + m;
521 		    pst->r[i] += pst->dw.coef[j][-k] * pst->mseq[n][l];
522 		    wu = pst->dw.coef[j][-k] * pst->ivseq[n][l];
523 
524 		    for (l = 0; l < pst->width; l++) {
525 			n = l-k;
526 			if (n <= pst->dw.width[j][1] && i + l < pst->T &&
527 			    pst->dw.coef[j][n] != 0.0)
528 			    pst->R[i][l] += wu * pst->dw.coef[j][n];
529 		    }
530 		}
531 	    }
532 	}
533     }
534 
535     return;
536 }
537 
538 // Choleski: Choleski factorization of Matrix R
Choleski(PStreamChol * pst)539 static void Choleski(PStreamChol *pst)
540 {
541     register int t, j, k;
542 
543     pst->R[0][0] = sqrt(pst->R[0][0]);
544 
545     for (j = 1; j < pst->width; j++) pst->R[0][j] /= pst->R[0][0];
546 
547     for (t = 1; t < pst->T; t++) {
548 	for (j = 1; j < pst->width; j++)
549 	    if (t - j >= 0)
550 		pst->R[t][0] -= pst->R[t - j][j] * pst->R[t - j][j];
551 
552 	pst->R[t][0] = sqrt(pst->R[t][0]);
553 
554 	for (j = 1; j < pst->width; j++) {
555 	    for (k = 0; k < pst->dw.maxw[WRIGHT]; k++)
556 		if (j != pst->width - 1)
557 		    pst->R[t][j] -=
558 			pst->R[t - k - 1][j - k] * pst->R[t - k - 1][j + 1];
559 
560 	    pst->R[t][j] /= pst->R[t][0];
561 	}
562     }
563 
564     return;
565 }
566 
567 // Choleski_forward: forward substitution to solve linear equations
Choleski_forward(PStreamChol * pst)568 static void Choleski_forward(PStreamChol *pst)
569 {
570     register int t, j;
571     double hold;
572 
573     pst->g[0] = pst->r[0] / pst->R[0][0];
574 
575     for (t=1; t < pst->T; t++) {
576 	hold = 0.0;
577 	for (j = 1; j < pst->width; j++)
578 	    if (t - j >= 0 && pst->R[t - j][j] != 0.0)
579 		hold += pst->R[t - j][j] * pst->g[t - j];
580 	pst->g[t] = (pst->r[t] - hold) / pst->R[t][0];
581     }
582 
583     return;
584 }
585 
586 // Choleski_backward: backward substitution to solve linear equations
Choleski_backward(PStreamChol * pst,const int m)587 static void Choleski_backward(PStreamChol *pst, const int m)
588 {
589     register int t, j;
590     double hold;
591 
592     pst->c[pst->T - 1][m] = pst->g[pst->T - 1] / pst->R[pst->T - 1][0];
593 
594     for (t = pst->T - 2; t >= 0; t--) {
595 	hold = 0.0;
596 	for (j = 1; j < pst->width; j++)
597 	    if (t + j < pst->T && pst->R[t][j] != 0.0)
598 		hold += pst->R[t][j] * pst->c[t + j][m];
599 	pst->c[t][m] = (pst->g[t] - hold) / pst->R[t][0];
600    }
601 
602    return;
603 }
604 
605 ////////////////////////////////////
606 // ML Considering Global Variance //
607 ////////////////////////////////////
608 #if 0
609 static void varconv(double **c, const int m, const int T, const double var)
610 {
611     register int n;
612     double sd, osd;
613     double oav = 0.0, ovar = 0.0, odif = 0.0;
614 
615     calc_varstats(c, m, T, &oav, &ovar, &odif);
616     osd = sqrt(ovar);	sd = sqrt(var);
617     for (n = 0; n < T; n++)
618 	c[n][m] = (c[n][m] - oav) / osd * sd + oav;
619 
620     return;
621 }
622 
623 static void calc_varstats(double **c, const int m, const int T,
624 		   double *av, double *var, double *dif)
625 {
626     register int i;
627     register double d;
628 
629     *av = 0.0;
630     *var = 0.0;
631     *dif = 0.0;
632     for (i = 0; i < T; i++) *av += c[i][m];
633     *av /= (double)T;
634     for (i = 0; i < T; i++) {
635 	d = c[i][m] - *av;
636 	*var += d * d;	*dif += d;
637     }
638     *var /= (double)T;
639 
640     return;
641 }
642 
643 // Diagonal Covariance Version
644 static void mlgparaGrad(DMATRIX pdf, PStreamChol *pst, DMATRIX mlgp, const int max,
645 		 double th, double e, double alpha, DVECTOR vm, DVECTOR vv,
646 		 XBOOL nrmflag, XBOOL extvflag)
647 {
648     int t, d;
649 
650     // error check
651     if (pst->vSize * 2 != pdf->col || pst->order + 1 != mlgp->col) {
652 	cst_errmsg("Error mlgparaChol: Different dimension\n");
653         cst_error();
654     }
655 
656     // mseq: U^{-1}*M,	ifvseq: U^{-1}
657     for (t = 0; t < pst->T; t++) {
658 	for (d = 0; d < pst->vSize; d++) {
659 	    pst->mseq[t][d] = pdf->data[t][d];
660 	    pst->ivseq[t][d] = pdf->data[t][pst->vSize + d];
661 	}
662     }
663 
664     // ML parameter generation
665     mlpgChol(pst);
666 
667     // extend variance
668     if (extvflag == XTRUE)
669 	for (d = 0; d <= pst->order; d++)
670 	    varconv(pst->c, d, pst->T, vm->data[d]);
671 
672     // estimating parameters
673     mlpgGrad(pst, max, th, e, alpha, vm, vv, nrmflag);
674 
675     // extracting parameters
676     for (t = 0; t < pst->T; t++)
677 	for (d = 0; d <= pst->order; d++)
678 	    mlgp->data[t][d] = pst->c[t][d];
679 
680     return;
681 }
682 
683 // generate parameter sequence from pdf sequence using gradient
684 static void mlpgGrad(PStreamChol *pst, const int max, double th, double e,
685 	      double alpha, DVECTOR vm, DVECTOR vv, XBOOL nrmflag)
686 {
687    register int m, i, t;
688    double diff, n, dth;
689 
690    if (nrmflag == XTRUE)
691        n = (double)(pst->T * pst->vSize) / (double)(vm->length);
692    else n = 1.0;
693 
694    // generating parameter in each dimension
695    for (m = 0; m <= pst->order; m++) {
696        calc_R_and_r(pst, m);
697        dth = th * sqrt(vm->data[m]);
698        for (i = 0; i < max; i++) {
699 	   calc_grad(pst, m);
700 	   if (vm != NODATA && vv != NODATA)
701 	       calc_vargrad(pst, m, alpha, n, vm->data[m], vv->data[m]);
702 	   for (t = 0, diff = 0.0; t < pst->T; t++) {
703 	       diff += pst->g[t] * pst->g[t];
704 	       pst->c[t][m] += e * pst->g[t];
705 	   }
706 	   diff = sqrt(diff / (double)pst->T);
707 	   if (diff < dth || diff == 0.0) break;
708        }
709    }
710 
711    return;
712 }
713 
714 // calc_grad: calculate -RX + r = -W'U^{-1}W * X + W'U^{-1}M
715 void calc_grad(PStreamChol *pst, const int m)
716 {
717     register int i, j;
718 
719     for (i = 0; i < pst->T; i++) {
720 	pst->g[i] = pst->r[i] - pst->c[i][m] * pst->R[i][0];
721 	for (j = 1; j < pst->width; j++) {
722 	    if (i + j < pst->T) pst->g[i] -= pst->c[i + j][m] * pst->R[i][j];
723 	    if (i - j >= 0) pst->g[i] -= pst->c[i - j][m] * pst->R[i - j][j];
724 	}
725     }
726 
727     return;
728 }
729 
730 static void calc_vargrad(PStreamChol *pst, const int m, double alpha, double n,
731 		  double vm, double vv)
732 {
733     register int i;
734     double vg, w1, w2;
735     double av = 0.0, var = 0.0, dif = 0.0;
736 
737     if (alpha > 1.0 || alpha < 0.0) {
738 	w1 = 1.0;		w2 = 1.0;
739     } else {
740 	w1 = alpha;	w2 = 1.0 - alpha;
741     }
742 
743     calc_varstats(pst->c, m, pst->T, &av, &var, &dif);
744 
745     for (i = 0; i < pst->T; i++) {
746 	vg = -(var - vm) * (pst->c[i][m] - av) * vv * 2.0 / (double)pst->T;
747 	pst->g[i] = w1 * pst->g[i] / n + w2 * vg;
748     }
749 
750     return;
751 }
752 #endif
753 
754 // diagonal covariance
xget_detvec_diamat2inv(DMATRIX covmat)755 static DVECTOR xget_detvec_diamat2inv(DMATRIX covmat)	// [num class][dim]
756 {
757     long dim, clsnum;
758     long i, j;
759     double det;
760     DVECTOR detvec = NODATA;
761 
762     clsnum = covmat->row;
763     dim = covmat->col;
764     // memory allocation
765     detvec = xdvalloc(clsnum);
766     for (i = 0; i < clsnum; i++) {
767 	for (j = 0, det = 1.0; j < dim; j++) {
768 	    det *= covmat->data[i][j];
769 	    if (det > 0.0) {
770 		covmat->data[i][j] = 1.0 / covmat->data[i][j];
771 	    } else {
772 		cst_errmsg("error:(class %ld) determinant <= 0, det = %f\n", i, det);
773                 xdvfree(detvec);
774 		return NODATA;
775 	    }
776 	}
777 	detvec->data[i] = det;
778     }
779 
780     return detvec;
781 }
782 
get_gauss_full(long clsidx,DVECTOR vec,DVECTOR detvec,DMATRIX weightmat,DMATRIX meanvec,DMATRIX invcovmat)783 static double get_gauss_full(long clsidx,
784 		      DVECTOR vec,		// [dim]
785 		      DVECTOR detvec,		// [clsnum]
786 		      DMATRIX weightmat,	// [clsnum][1]
787 		      DMATRIX meanvec,		// [clsnum][dim]
788 		      DMATRIX invcovmat)	// [clsnum * dim][dim]
789 {
790     double gauss;
791 
792     if (detvec->data[clsidx] <= 0.0) {
793 	cst_errmsg("#error: det <= 0.0\n");
794         cst_error();
795     }
796 
797     gauss = weightmat->data[clsidx][0]
798 	/ sqrt(pow(2.0 * PI, (double)vec->length) * detvec->data[clsidx])
799 	* exp(-1.0 * cal_xmcxmc(clsidx, vec, meanvec, invcovmat) / 2.0);
800 
801     return gauss;
802 }
803 
cal_xmcxmc(long clsidx,DVECTOR x,DMATRIX mm,DMATRIX cm)804 static double cal_xmcxmc(long clsidx,
805 		  DVECTOR x,
806 		  DMATRIX mm,	// [num class][dim]
807 		  DMATRIX cm)	// [num class * dim][dim]
808 {
809     long clsnum, k, l, b, dim;
810     double *vec = NULL;
811     double td, d;
812 
813     dim = x->length;
814     clsnum = mm->row;
815     b = clsidx * dim;
816     if (mm->col != dim || cm->col != dim || clsnum * dim != cm->row) {
817 	cst_errmsg("Error cal_xmcxmc: different dimension\n");
818         cst_error();
819     }
820 
821     // memory allocation
822     vec = mlpg_alloc((int)dim, double);
823     for (k = 0; k < dim; k++) vec[k] = x->data[k] - mm->data[clsidx][k];
824     for (k = 0, d = 0.0; k < dim; k++) {
825 	for (l = 0, td = 0.0; l < dim; l++) td += vec[l] * cm->data[l + b][k];
826 	d += td * vec[k];
827     }
828     // memory free
829     mlpg_free(vec); vec = NULL;
830 
831     return d;
832 }
833 
834 #if 0
835 // diagonal covariance
836 static double get_gauss_dia5(double det,
837 		     double weight,
838 		     DVECTOR vec,		// dim
839 		     DVECTOR meanvec,		// dim
840 		     DVECTOR invcovvec)		// dim
841 {
842     double gauss, sb;
843     long k;
844 
845     if (det <= 0.0) {
846 	cst_errmsg("#error: det <= 0.0\n");
847         cst_error();
848     }
849 
850     for (k = 0, gauss = 0.0; k < vec->length; k++) {
851 	sb = vec->data[k] - meanvec->data[k];
852 	gauss += sb * invcovvec->data[k] * sb;
853     }
854 
855     gauss = weight / sqrt(pow(2.0 * PI, (double)vec->length) * det)
856 	* exp(-gauss / 2.0);
857 
858     return gauss;
859 }
860 #endif
861 
get_gauss_dia(long clsidx,DVECTOR vec,DVECTOR detvec,DMATRIX weightmat,DMATRIX meanmat,DMATRIX invcovmat)862 static double get_gauss_dia(long clsidx,
863 		     DVECTOR vec,		// [dim]
864 		     DVECTOR detvec,		// [clsnum]
865 		     DMATRIX weightmat,		// [clsnum][1]
866 		     DMATRIX meanmat,		// [clsnum][dim]
867 		     DMATRIX invcovmat)		// [clsnum][dim]
868 {
869     double gauss, sb;
870     long k;
871 
872     if (detvec->data[clsidx] <= 0.0) {
873 	cst_errmsg("#error: det <= 0.0\n");
874         cst_error();
875     }
876 
877     for (k = 0, gauss = 0.0; k < vec->length; k++) {
878 	sb = vec->data[k] - meanmat->data[clsidx][k];
879 	gauss += sb * invcovmat->data[clsidx][k] * sb;
880     }
881 
882     gauss = weightmat->data[clsidx][0]
883 	/ sqrt(pow(2.0 * PI, (double)vec->length) * detvec->data[clsidx])
884 	* exp(-gauss / 2.0);
885 
886     return gauss;
887 }
888 
pst_free(PStreamChol * pst)889 static void pst_free(PStreamChol *pst)
890 {
891     int i;
892 
893     for (i=0; i<pst->dw.num; i++)
894         mlpg_free(pst->dw.width[i]);
895     mlpg_free(pst->dw.width); pst->dw.width = NULL;
896     for (i=0; i<pst->dw.num; i++)
897         mlpg_free(pst->dw.coef_ptrs[i]);
898     mlpg_free(pst->dw.coef); pst->dw.coef = NULL;
899     mlpg_free(pst->dw.coef_ptrs); pst->dw.coef_ptrs = NULL;
900 
901     for (i=0; i<pst->T; i++)
902         mlpg_free(pst->mseq[i]);
903     mlpg_free(pst->mseq);
904     for (i=0; i<pst->T; i++)
905         mlpg_free(pst->ivseq[i]);
906     mlpg_free(pst->ivseq);
907     for (i=0; i<pst->T; i++)
908         mlpg_free(pst->R[i]);
909     mlpg_free(pst->R);
910     mlpg_free(pst->r);
911     mlpg_free(pst->g);
912     for (i=0; i<pst->T; i++)
913         mlpg_free(pst->c[i]);
914     mlpg_free(pst->c);
915 
916     return;
917 }
918 
mlpg(LISP ltrack)919 LISP mlpg(LISP ltrack)
920 {
921     /* Generate an (mcep) track using Maximum Likelihood Parameter Generation */
922     MLPGPARA param = NODATA;
923     EST_Track *param_track, *out;
924     int dim, dim_st;
925     float like;
926     int i,j;
927     int nframes;
928     PStreamChol pst;
929 
930     if ((ltrack == NULL) ||
931         (TYPEP(ltrack,tc_string) &&
932          (streq(get_c_string(ltrack),"nil"))))
933         return NIL;
934 
935     param_track = track(ltrack);
936 
937     nframes = param_track->num_frames();
938     dim = (param_track->num_channels()/2)-1;
939     dim_st = dim/2; /* dim2 in original code */
940     out = new EST_Track();
941     out->copy_setup(*param_track);
942     out->resize(nframes,dim_st+1);
943 
944     param = xmlpgpara_init(dim,dim_st,nframes,nframes);
945 
946     // mixture-index sequence
947     param->clsidxv = xlvalloc(nframes);
948     for (i=0; i<nframes; i++)
949         param->clsidxv->data[i] = i;
950 
951     // initial static feature sequence
952     param->stm = xdmalloc(nframes,dim_st);
953     for (i=0; i<nframes; i++)
954     {
955         for (j=0; j<dim_st; j++)
956             param->stm->data[i][j] = param_track->a(i,(j+1)*2);
957     }
958 
959     /* Load cluster means */
960     for (i=0; i<nframes; i++)
961         for (j=0; j<dim_st; j++)
962             param->mean->data[i][j] = param_track->a(i,(j+1)*2);
963 
964     /* GMM parameters diagonal covariance */
965     InitPStreamChol(&pst, mlpg_dynwin, mlpg_dynwinsize, dim_st-1, nframes);
966     param->pdf = xdmalloc(nframes,dim*2);
967     param->cov = xdmalloc(nframes,dim);
968     for (i=0; i<nframes; i++)
969         for (j=0; j<dim; j++)
970             param->cov->data[i][j] =
971                 param_track->a(i,(j+1)*2+1) *
972                 param_track->a(i,(j+1)*2+1);
973     param->detvec = xget_detvec_diamat2inv(param->cov);
974 
975     /* global variance parameters */
976     /* TBD get_gv_mlpgpara(param, vmfile, vvfile, dim2, msg_flag); */
977 
978     if (nframes > 0)
979     {
980         get_dltmat(param->stm, &pst.dw, 1, param->dltm);
981 
982         like = get_like_pdfseq_vit(dim, dim_st, nframes, nframes, param,
983                                    param_track, XTRUE);
984 
985         /* vlike = get_like_gv(dim2, dnum, param); */
986 
987         mlgparaChol(param->pdf, &pst, param->stm);
988     }
989 
990     /* Put the answer back into the output track */
991     for (i=0; i<nframes; i++)
992     {
993         out->t(i) = param_track->t(i);
994         out->a(i,0) = param_track->a(i,0); /* F0 */
995         for (j=0; j<dim_st; j++)
996         {
997             out->a(i,j+1) = param->stm->data[i][j];
998         }
999     }
1000 
1001     // memory free
1002     xmlpgparafree(param);
1003     pst_free(&pst);
1004 
1005     return siod(out);
1006 }
1007 
1008