1 /* --------------------------------------------------------------- */
2 /* The HMM-Based Speech Synthesis System (HTS): version 1.1.1 */
3 /* HTS Working Group */
4 /* */
5 /* Department of Computer Science */
6 /* Nagoya Institute of Technology */
7 /* and */
8 /* Interdisciplinary Graduate School of Science and Engineering */
9 /* Tokyo Institute of Technology */
10 /* Copyright (c) 2001-2003 */
11 /* All Rights Reserved. */
12 /* */
13 /* Permission is hereby granted, free of charge, to use and */
14 /* distribute this software and its documentation without */
15 /* restriction, including without limitation the rights to use, */
16 /* copy, modify, merge, publish, distribute, sublicense, and/or */
17 /* sell copies of this work, and to permit persons to whom this */
18 /* work is furnished to do so, subject to the following conditions: */
19 /* */
20 /* 1. The code must retain the above copyright notice, this list */
21 /* of conditions and the following disclaimer. */
22 /* */
23 /* 2. Any modifications must be clearly marked as such. */
24 /* */
25 /* NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF TECHNOLOGY, */
26 /* HTS WORKING GROUP, AND THE CONTRIBUTORS TO THIS WORK DISCLAIM */
27 /* ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL */
28 /* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
29 /* SHALL NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF */
30 /* TECHNOLOGY, HTS WORKING GROUP, NOR THE CONTRIBUTORS BE LIABLE */
31 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY */
32 /* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, */
33 /* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS */
34 /* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR */
35 /* PERFORMANCE OF THIS SOFTWARE. */
36 /* */
37 /* --------------------------------------------------------------- */
38 /* mlpg.c : speech parameter generation from pdf sequence */
39 /* */
40 /* 2003/12/26 by Heiga Zen */
41 /* --------------------------------------------------------------- */
42 /*********************************************************************/
43 /* */
44 /* Nagoya Institute of Technology, Aichi, Japan, */
45 /* and */
46 /* Carnegie Mellon University, Pittsburgh, PA */
47 /* Copyright (c) 2003-2004,2008 */
48 /* All Rights Reserved. */
49 /* */
50 /* Permission is hereby granted, free of charge, to use and */
51 /* distribute this software and its documentation without */
52 /* restriction, including without limitation the rights to use, */
53 /* copy, modify, merge, publish, distribute, sublicense, and/or */
54 /* sell copies of this work, and to permit persons to whom this */
55 /* work is furnished to do so, subject to the following conditions: */
56 /* */
57 /* 1. The code must retain the above copyright notice, this list */
58 /* of conditions and the following disclaimer. */
59 /* 2. Any modifications must be clearly marked as such. */
60 /* 3. Original authors' names are not deleted. */
61 /* */
62 /* NAGOYA INSTITUTE OF TECHNOLOGY, CARNEGIE MELLON UNIVERSITY, AND */
63 /* THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH */
64 /* REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF */
65 /* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL NAGOYA INSTITUTE */
66 /* OF TECHNOLOGY, CARNEGIE MELLON UNIVERSITY, NOR THE CONTRIBUTORS */
67 /* BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR */
68 /* ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR */
69 /* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER */
70 /* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE */
71 /* OR PERFORMANCE OF THIS SOFTWARE. */
72 /* */
73 /*********************************************************************/
74 /* */
75 /* Author : Tomoki Toda (tomoki@ics.nitech.ac.jp) */
76 /* Date : June 2004 */
77 /* */
78 /* Modified as a single file for inclusion in festival/flite */
79 /* May 2008 awb@cs.cmu.edu */
80 /*-------------------------------------------------------------------*/
81 /* */
82 /* ML-Based Parameter Generation */
83 /* */
84 /*-------------------------------------------------------------------*/
85
86 #include "festival.h"
87 #include "simple_mlpg.h"
88 #define mlpg_alloc(X,Y) (walloc(Y,X))
89 #define mlpg_free wfree
90 #define cst_errmsg EST_error
91 #define cst_error()
92
93 #ifdef CMUFLITE
94 #include "cst_alloc.h"
95 #include "cst_string.h"
96 #include "cst_math.h"
97 #include "cst_vc.h"
98 #include "cst_track.h"
99 #include "cst_wave.h"
100 #include "cst_mlpg.h"
101
102 #define mlpg_alloc(X,Y) (cst_alloc(Y,X))
103 #define mlpg_free cst_free
104 #endif
105
106 static MLPGPARA xmlpgpara_init(int dim, int dim2, int dnum, int clsnum);
107 static void xmlpgparafree(MLPGPARA param);
108 static double get_like_pdfseq_vit(int dim, int dim2, int dnum, int clsnum,
109 MLPGPARA param,
110 EST_Track *model,
111 XBOOL dia_flag);
112 static void get_dltmat(DMATRIX mat, MLPG_DWin *dw, int dno, DMATRIX dmat);
113
114 static double *dcalloc(int x, int xoff);
115 static double **ddcalloc(int x, int y, int xoff, int yoff);
116
117 /***********************************/
118 /* ML using Choleski decomposition */
119 /***********************************/
120 /* Diagonal Covariance Version */
121 static void InitDWin(PStreamChol *pst, const float *dynwin, int fsize);
122 static void InitPStreamChol(PStreamChol *pst, const float *dynwin, int fsize,
123 int order, int T);
124 static void mlgparaChol(DMATRIX pdf, PStreamChol *pst, DMATRIX mlgp);
125 static void mlpgChol(PStreamChol *pst);
126 static void calc_R_and_r(PStreamChol *pst, const int m);
127 static void Choleski(PStreamChol *pst);
128 static void Choleski_forward(PStreamChol *pst);
129 static void Choleski_backward(PStreamChol *pst, const int m);
130 static double get_gauss_full(long clsidx,
131 DVECTOR vec, // [dim]
132 DVECTOR detvec, // [clsnum]
133 DMATRIX weightmat, // [clsnum][1]
134 DMATRIX meanvec, // [clsnum][dim]
135 DMATRIX invcovmat); // [clsnum * dim][dim]
136 static double get_gauss_dia(long clsidx,
137 DVECTOR vec, // [dim]
138 DVECTOR detvec, // [clsnum]
139 DMATRIX weightmat, // [clsnum][1]
140 DMATRIX meanmat, // [clsnum][dim]
141 DMATRIX invcovmat); // [clsnum][dim]
142 static double cal_xmcxmc(long clsidx,
143 DVECTOR x,
144 DMATRIX mm, // [num class][dim]
145 DMATRIX cm); // [num class * dim][dim]
146
147 const float mlpg_dynwin[] = { -0.5, 0.0, 0.5 };
148 #define mlpg_dynwinsize 3
149
xmlpgpara_init(int dim,int dim2,int dnum,int clsnum)150 static MLPGPARA xmlpgpara_init(int dim, int dim2, int dnum,
151 int clsnum)
152 {
153 MLPGPARA param;
154
155 // memory allocation
156 param = mlpg_alloc(1,struct MLPGPARA_STRUCT);
157 param->ov = xdvalloc(dim);
158 param->iuv = NODATA;
159 param->iumv = NODATA;
160 param->flkv = xdvalloc(dnum);
161 param->stm = NODATA;
162 param->dltm = xdmalloc(dnum, dim2);
163 param->pdf = NODATA;
164 param->detvec = NODATA;
165 param->wght = xdmalloc(clsnum, 1);
166 param->mean = xdmalloc(clsnum, dim);
167 param->cov = NODATA;
168 param->clsidxv = NODATA;
169 /* dia_flag */
170 param->clsdetv = xdvalloc(1);
171 param->clscov = xdmalloc(1, dim);
172
173 param->vdet = 1.0;
174 param->vm = NODATA;
175 param->vv = NODATA;
176 param->var = NODATA;
177
178 return param;
179 }
180
xmlpgparafree(MLPGPARA param)181 static void xmlpgparafree(MLPGPARA param)
182 {
183 if (param != NODATA) {
184 if (param->ov != NODATA) xdvfree(param->ov);
185 if (param->iuv != NODATA) xdvfree(param->iuv);
186 if (param->iumv != NODATA) xdvfree(param->iumv);
187 if (param->flkv != NODATA) xdvfree(param->flkv);
188 if (param->stm != NODATA) xdmfree(param->stm);
189 if (param->dltm != NODATA) xdmfree(param->dltm);
190 if (param->pdf != NODATA) xdmfree(param->pdf);
191 if (param->detvec != NODATA) xdvfree(param->detvec);
192 if (param->wght != NODATA) xdmfree(param->wght);
193 if (param->mean != NODATA) xdmfree(param->mean);
194 if (param->cov != NODATA) xdmfree(param->cov);
195 if (param->clsidxv != NODATA) xlvfree(param->clsidxv);
196 if (param->clsdetv != NODATA) xdvfree(param->clsdetv);
197 if (param->clscov != NODATA) xdmfree(param->clscov);
198 if (param->vm != NODATA) xdvfree(param->vm);
199 if (param->vv != NODATA) xdvfree(param->vv);
200 if (param->var != NODATA) xdvfree(param->var);
201 mlpg_free(param);
202 }
203
204 return;
205 }
206
get_like_pdfseq_vit(int dim,int dim2,int dnum,int clsnum,MLPGPARA param,EST_Track * model,XBOOL dia_flag)207 static double get_like_pdfseq_vit(int dim, int dim2, int dnum, int clsnum,
208 MLPGPARA param,
209 EST_Track *model,
210 XBOOL dia_flag)
211 {
212 long d, c, k, l, j;
213 double sumgauss;
214 double like = 0.0;
215
216 for (d = 0, like = 0.0; d < dnum; d++) {
217 // read weight and mean sequences
218 param->wght->data[0][0] = 0.9; /* FIXME weights */
219 for (j=0; j<dim; j++)
220 param->mean->data[0][j] = model->a((int)d,(int)((j+1)*2));
221
222 // observation vector
223 for (k = 0; k < dim2; k++) {
224 param->ov->data[k] = param->stm->data[d][k];
225 param->ov->data[k + dim2] = param->dltm->data[d][k];
226 }
227
228 // mixture index
229 c = d;
230 param->clsdetv->data[0] = param->detvec->data[c];
231
232 // calculating likelihood
233 if (dia_flag == XTRUE) {
234 for (k = 0; k < param->clscov->col; k++)
235 param->clscov->data[0][k] = param->cov->data[c][k];
236 sumgauss = get_gauss_dia(0, param->ov, param->clsdetv,
237 param->wght, param->mean, param->clscov);
238 } else {
239 for (k = 0; k < param->clscov->row; k++)
240 for (l = 0; l < param->clscov->col; l++)
241 param->clscov->data[k][l] =
242 param->cov->data[k + param->clscov->row * c][l];
243 sumgauss = get_gauss_full(0, param->ov, param->clsdetv,
244 param->wght, param->mean, param->clscov);
245 }
246 if (sumgauss <= 0.0) param->flkv->data[d] = -1.0 * INFTY2;
247 else param->flkv->data[d] = log(sumgauss);
248 like += param->flkv->data[d];
249
250 // estimating U', U'*M
251 if (dia_flag == XTRUE) {
252 // PDF [U'*M U']
253 for (k = 0; k < dim; k++) {
254 param->pdf->data[d][k] =
255 param->clscov->data[0][k] * param->mean->data[0][k];
256 param->pdf->data[d][k + dim] = param->clscov->data[0][k];
257 }
258 } else {
259 // PDF [U'*M U']
260 for (k = 0; k < dim; k++) {
261 param->pdf->data[d][k] = 0.0;
262 for (l = 0; l < dim; l++) {
263 param->pdf->data[d][k * dim + dim + l] =
264 param->clscov->data[k][l];
265 param->pdf->data[d][k] +=
266 param->clscov->data[k][l] * param->mean->data[0][l];
267 }
268 }
269 }
270 }
271
272 like /= (double)dnum;
273
274 return like;
275 }
276
277 #if 0
278 static double get_like_gv(long dim2, long dnum, MLPGPARA param)
279 {
280 long k;
281 double av = 0.0, dif = 0.0;
282 double vlike = -INFTY;
283
284 if (param->vm != NODATA && param->vv != NODATA) {
285 for (k = 0; k < dim2; k++)
286 calc_varstats(param->stm->data, k, dnum, &av,
287 &(param->var->data[k]), &dif);
288 vlike = log(get_gauss_dia5(param->vdet, 1.0, param->var,
289 param->vm, param->vv));
290 }
291
292 return vlike;
293 }
294
295 static void sm_mvav(DMATRIX mat, long hlen)
296 {
297 long k, l, m, p;
298 double d, sd;
299 DVECTOR vec = NODATA;
300 DVECTOR win = NODATA;
301
302 vec = xdvalloc(mat->row);
303
304 // smoothing window
305 win = xdvalloc(hlen * 2 + 1);
306 for (k = 0, d = 1.0, sd = 0.0; k < hlen; k++, d += 1.0) {
307 win->data[k] = d; win->data[win->length - k - 1] = d;
308 sd += d + d;
309 }
310 win->data[k] = d; sd += d;
311 for (k = 0; k < win->length; k++) win->data[k] /= sd;
312
313 for (l = 0; l < mat->col; l++) {
314 for (k = 0; k < mat->row; k++) {
315 for (m = 0, vec->data[k] = 0.0; m < win->length; m++) {
316 p = k - hlen + m;
317 if (p >= 0 && p < mat->row)
318 vec->data[k] += mat->data[p][l] * win->data[m];
319 }
320 }
321 for (k = 0; k < mat->row; k++) mat->data[k][l] = vec->data[k];
322 }
323
324 xdvfree(win);
325 xdvfree(vec);
326
327 return;
328 }
329 #endif
330
get_dltmat(DMATRIX mat,MLPG_DWin * dw,int dno,DMATRIX dmat)331 static void get_dltmat(DMATRIX mat, MLPG_DWin *dw, int dno, DMATRIX dmat)
332 {
333 int i, j, k, tmpnum;
334
335 tmpnum = (int)mat->row - dw->width[dno][WRIGHT];
336 for (k = dw->width[dno][WRIGHT]; k < tmpnum; k++) // time index
337 for (i = 0; i < (int)mat->col; i++) // dimension index
338 for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
339 j <= dw->width[dno][WRIGHT]; j++)
340 dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
341
342 for (i = 0; i < (int)mat->col; i++) { // dimension index
343 for (k = 0; k < dw->width[dno][WRIGHT]; k++) // time index
344 for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
345 j <= dw->width[dno][WRIGHT]; j++)
346 if (k + j >= 0)
347 dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
348 else
349 dmat->data[k][i] += (2.0 * mat->data[0][i] - mat->data[-k - j][i]) * dw->coef[dno][j];
350 for (k = tmpnum; k < (int)mat->row; k++) // time index
351 for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
352 j <= dw->width[dno][WRIGHT]; j++)
353 if (k + j < (int)mat->row)
354 dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
355 else
356 dmat->data[k][i] += (2.0 * mat->data[mat->row - 1][i] - mat->data[mat->row - k - j + mat->row - 2][i]) * dw->coef[dno][j];
357 }
358
359 return;
360 }
361
362
dcalloc(int x,int xoff)363 static double *dcalloc(int x, int xoff)
364 {
365 double *ptr;
366
367 ptr = mlpg_alloc(x,double);
368 /* ptr += xoff; */ /* Just not going to allow this */
369 return(ptr);
370 }
371
ddcalloc(int x,int y,int xoff,int yoff)372 static double **ddcalloc(int x, int y, int xoff, int yoff)
373 {
374 double **ptr;
375 register int i;
376
377 ptr = mlpg_alloc(x,double *);
378 for (i = 0; i < x; i++) ptr[i] = dcalloc(y, yoff);
379 /* ptr += xoff; */ /* Just not going to allow this */
380 return(ptr);
381 }
382
383 /////////////////////////////////////
384 // ML using Choleski decomposition //
385 /////////////////////////////////////
InitDWin(PStreamChol * pst,const float * dynwin,int fsize)386 static void InitDWin(PStreamChol *pst, const float *dynwin, int fsize)
387 {
388 int i,j;
389 int leng;
390
391 pst->dw.num = 1; // only static
392 if (dynwin) {
393 pst->dw.num = 2; // static + dyn
394 }
395 // memory allocation
396 pst->dw.width = mlpg_alloc(pst->dw.num,int *);
397 for (i = 0; i < pst->dw.num; i++)
398 pst->dw.width[i] = mlpg_alloc(2,int);
399
400 pst->dw.coef = mlpg_alloc(pst->dw.num, double *);
401 pst->dw.coef_ptrs = mlpg_alloc(pst->dw.num, double *);
402 // window for static parameter WLEFT = 0, WRIGHT = 1
403 pst->dw.width[0][WLEFT] = pst->dw.width[0][WRIGHT] = 0;
404 pst->dw.coef_ptrs[0] = mlpg_alloc(1,double);
405 pst->dw.coef[0] = pst->dw.coef_ptrs[0];
406 pst->dw.coef[0][0] = 1.0;
407
408 // set delta coefficients
409 for (i = 1; i < pst->dw.num; i++) {
410 pst->dw.coef_ptrs[i] = mlpg_alloc(fsize, double);
411 pst->dw.coef[i] = pst->dw.coef_ptrs[i];
412 for (j=0; j<fsize; j++) /* FIXME make dynwin doubles for memmove */
413 pst->dw.coef[i][j] = (double)dynwin[j];
414 // set pointer
415 leng = fsize / 2; // L (fsize = 2 * L + 1)
416 pst->dw.coef[i] += leng; // [L] -> [0] center
417 pst->dw.width[i][WLEFT] = -leng; // -L left
418 pst->dw.width[i][WRIGHT] = leng; // L right
419 if (fsize % 2 == 0) pst->dw.width[i][WRIGHT]--;
420 }
421
422 pst->dw.maxw[WLEFT] = pst->dw.maxw[WRIGHT] = 0;
423 for (i = 0; i < pst->dw.num; i++) {
424 if (pst->dw.maxw[WLEFT] > pst->dw.width[i][WLEFT])
425 pst->dw.maxw[WLEFT] = pst->dw.width[i][WLEFT];
426 if (pst->dw.maxw[WRIGHT] < pst->dw.width[i][WRIGHT])
427 pst->dw.maxw[WRIGHT] = pst->dw.width[i][WRIGHT];
428 }
429
430 return;
431 }
432
InitPStreamChol(PStreamChol * pst,const float * dynwin,int fsize,int order,int T)433 static void InitPStreamChol(PStreamChol *pst, const float *dynwin, int fsize,
434 int order, int T)
435 {
436 // order of cepstrum
437 pst->order = order;
438
439 // windows for dynamic feature
440 InitDWin(pst, dynwin, fsize);
441
442 // dimension of observed vector
443 pst->vSize = (pst->order + 1) * pst->dw.num; // odim = dim * (1--3)
444
445 // memory allocation
446 pst->T = T; // number of frames
447 pst->width = pst->dw.maxw[WRIGHT] * 2 + 1; // width of R
448 pst->mseq = ddcalloc(T, pst->vSize, 0, 0); // [T][odim]
449 pst->ivseq = ddcalloc(T, pst->vSize, 0, 0); // [T][odim]
450 pst->R = ddcalloc(T, pst->width, 0, 0); // [T][width]
451 pst->r = dcalloc(T, 0); // [T]
452 pst->g = dcalloc(T, 0); // [T]
453 pst->c = ddcalloc(T, pst->order + 1, 0, 0); // [T][dim]
454
455 return;
456 }
457
mlgparaChol(DMATRIX pdf,PStreamChol * pst,DMATRIX mlgp)458 static void mlgparaChol(DMATRIX pdf, PStreamChol *pst, DMATRIX mlgp)
459 {
460 int t, d;
461
462 // error check
463 if (pst->vSize * 2 != pdf->col || pst->order + 1 != mlgp->col) {
464 cst_errmsg("Error mlgparaChol: Different dimension\n");
465 cst_error();
466 }
467
468 // mseq: U^{-1}*M, ifvseq: U^{-1}
469 for (t = 0; t < pst->T; t++) {
470 for (d = 0; d < pst->vSize; d++) {
471 pst->mseq[t][d] = pdf->data[t][d];
472 pst->ivseq[t][d] = pdf->data[t][pst->vSize + d];
473 }
474 }
475
476 // ML parameter generation
477 mlpgChol(pst);
478
479 // extracting parameters
480 for (t = 0; t < pst->T; t++)
481 for (d = 0; d <= pst->order; d++)
482 mlgp->data[t][d] = pst->c[t][d];
483
484 return;
485 }
486
487 // generate parameter sequence from pdf sequence using Choleski decomposition
mlpgChol(PStreamChol * pst)488 static void mlpgChol(PStreamChol *pst)
489 {
490 register int m;
491
492 // generating parameter in each dimension
493 for (m = 0; m <= pst->order; m++) {
494 calc_R_and_r(pst, m);
495 Choleski(pst);
496 Choleski_forward(pst);
497 Choleski_backward(pst, m);
498 }
499
500 return;
501 }
502
503 //------ parameter generation fuctions
504 // calc_R_and_r: calculate R = W'U^{-1}W and r = W'U^{-1}M
calc_R_and_r(PStreamChol * pst,const int m)505 static void calc_R_and_r(PStreamChol *pst, const int m)
506 {
507 register int i, j, k, l, n;
508 double wu;
509
510 for (i = 0; i < pst->T; i++) {
511 pst->r[i] = pst->mseq[i][m];
512 pst->R[i][0] = pst->ivseq[i][m];
513
514 for (j = 1; j < pst->width; j++) pst->R[i][j] = 0.0;
515
516 for (j = 1; j < pst->dw.num; j++) {
517 for (k = pst->dw.width[j][0]; k <= pst->dw.width[j][1]; k++) {
518 n = i + k;
519 if (n >= 0 && n < pst->T && pst->dw.coef[j][-k] != 0.0) {
520 l = j * (pst->order + 1) + m;
521 pst->r[i] += pst->dw.coef[j][-k] * pst->mseq[n][l];
522 wu = pst->dw.coef[j][-k] * pst->ivseq[n][l];
523
524 for (l = 0; l < pst->width; l++) {
525 n = l-k;
526 if (n <= pst->dw.width[j][1] && i + l < pst->T &&
527 pst->dw.coef[j][n] != 0.0)
528 pst->R[i][l] += wu * pst->dw.coef[j][n];
529 }
530 }
531 }
532 }
533 }
534
535 return;
536 }
537
538 // Choleski: Choleski factorization of Matrix R
Choleski(PStreamChol * pst)539 static void Choleski(PStreamChol *pst)
540 {
541 register int t, j, k;
542
543 pst->R[0][0] = sqrt(pst->R[0][0]);
544
545 for (j = 1; j < pst->width; j++) pst->R[0][j] /= pst->R[0][0];
546
547 for (t = 1; t < pst->T; t++) {
548 for (j = 1; j < pst->width; j++)
549 if (t - j >= 0)
550 pst->R[t][0] -= pst->R[t - j][j] * pst->R[t - j][j];
551
552 pst->R[t][0] = sqrt(pst->R[t][0]);
553
554 for (j = 1; j < pst->width; j++) {
555 for (k = 0; k < pst->dw.maxw[WRIGHT]; k++)
556 if (j != pst->width - 1)
557 pst->R[t][j] -=
558 pst->R[t - k - 1][j - k] * pst->R[t - k - 1][j + 1];
559
560 pst->R[t][j] /= pst->R[t][0];
561 }
562 }
563
564 return;
565 }
566
567 // Choleski_forward: forward substitution to solve linear equations
Choleski_forward(PStreamChol * pst)568 static void Choleski_forward(PStreamChol *pst)
569 {
570 register int t, j;
571 double hold;
572
573 pst->g[0] = pst->r[0] / pst->R[0][0];
574
575 for (t=1; t < pst->T; t++) {
576 hold = 0.0;
577 for (j = 1; j < pst->width; j++)
578 if (t - j >= 0 && pst->R[t - j][j] != 0.0)
579 hold += pst->R[t - j][j] * pst->g[t - j];
580 pst->g[t] = (pst->r[t] - hold) / pst->R[t][0];
581 }
582
583 return;
584 }
585
586 // Choleski_backward: backward substitution to solve linear equations
Choleski_backward(PStreamChol * pst,const int m)587 static void Choleski_backward(PStreamChol *pst, const int m)
588 {
589 register int t, j;
590 double hold;
591
592 pst->c[pst->T - 1][m] = pst->g[pst->T - 1] / pst->R[pst->T - 1][0];
593
594 for (t = pst->T - 2; t >= 0; t--) {
595 hold = 0.0;
596 for (j = 1; j < pst->width; j++)
597 if (t + j < pst->T && pst->R[t][j] != 0.0)
598 hold += pst->R[t][j] * pst->c[t + j][m];
599 pst->c[t][m] = (pst->g[t] - hold) / pst->R[t][0];
600 }
601
602 return;
603 }
604
605 ////////////////////////////////////
606 // ML Considering Global Variance //
607 ////////////////////////////////////
608 #if 0
609 static void varconv(double **c, const int m, const int T, const double var)
610 {
611 register int n;
612 double sd, osd;
613 double oav = 0.0, ovar = 0.0, odif = 0.0;
614
615 calc_varstats(c, m, T, &oav, &ovar, &odif);
616 osd = sqrt(ovar); sd = sqrt(var);
617 for (n = 0; n < T; n++)
618 c[n][m] = (c[n][m] - oav) / osd * sd + oav;
619
620 return;
621 }
622
623 static void calc_varstats(double **c, const int m, const int T,
624 double *av, double *var, double *dif)
625 {
626 register int i;
627 register double d;
628
629 *av = 0.0;
630 *var = 0.0;
631 *dif = 0.0;
632 for (i = 0; i < T; i++) *av += c[i][m];
633 *av /= (double)T;
634 for (i = 0; i < T; i++) {
635 d = c[i][m] - *av;
636 *var += d * d; *dif += d;
637 }
638 *var /= (double)T;
639
640 return;
641 }
642
643 // Diagonal Covariance Version
644 static void mlgparaGrad(DMATRIX pdf, PStreamChol *pst, DMATRIX mlgp, const int max,
645 double th, double e, double alpha, DVECTOR vm, DVECTOR vv,
646 XBOOL nrmflag, XBOOL extvflag)
647 {
648 int t, d;
649
650 // error check
651 if (pst->vSize * 2 != pdf->col || pst->order + 1 != mlgp->col) {
652 cst_errmsg("Error mlgparaChol: Different dimension\n");
653 cst_error();
654 }
655
656 // mseq: U^{-1}*M, ifvseq: U^{-1}
657 for (t = 0; t < pst->T; t++) {
658 for (d = 0; d < pst->vSize; d++) {
659 pst->mseq[t][d] = pdf->data[t][d];
660 pst->ivseq[t][d] = pdf->data[t][pst->vSize + d];
661 }
662 }
663
664 // ML parameter generation
665 mlpgChol(pst);
666
667 // extend variance
668 if (extvflag == XTRUE)
669 for (d = 0; d <= pst->order; d++)
670 varconv(pst->c, d, pst->T, vm->data[d]);
671
672 // estimating parameters
673 mlpgGrad(pst, max, th, e, alpha, vm, vv, nrmflag);
674
675 // extracting parameters
676 for (t = 0; t < pst->T; t++)
677 for (d = 0; d <= pst->order; d++)
678 mlgp->data[t][d] = pst->c[t][d];
679
680 return;
681 }
682
683 // generate parameter sequence from pdf sequence using gradient
684 static void mlpgGrad(PStreamChol *pst, const int max, double th, double e,
685 double alpha, DVECTOR vm, DVECTOR vv, XBOOL nrmflag)
686 {
687 register int m, i, t;
688 double diff, n, dth;
689
690 if (nrmflag == XTRUE)
691 n = (double)(pst->T * pst->vSize) / (double)(vm->length);
692 else n = 1.0;
693
694 // generating parameter in each dimension
695 for (m = 0; m <= pst->order; m++) {
696 calc_R_and_r(pst, m);
697 dth = th * sqrt(vm->data[m]);
698 for (i = 0; i < max; i++) {
699 calc_grad(pst, m);
700 if (vm != NODATA && vv != NODATA)
701 calc_vargrad(pst, m, alpha, n, vm->data[m], vv->data[m]);
702 for (t = 0, diff = 0.0; t < pst->T; t++) {
703 diff += pst->g[t] * pst->g[t];
704 pst->c[t][m] += e * pst->g[t];
705 }
706 diff = sqrt(diff / (double)pst->T);
707 if (diff < dth || diff == 0.0) break;
708 }
709 }
710
711 return;
712 }
713
714 // calc_grad: calculate -RX + r = -W'U^{-1}W * X + W'U^{-1}M
715 void calc_grad(PStreamChol *pst, const int m)
716 {
717 register int i, j;
718
719 for (i = 0; i < pst->T; i++) {
720 pst->g[i] = pst->r[i] - pst->c[i][m] * pst->R[i][0];
721 for (j = 1; j < pst->width; j++) {
722 if (i + j < pst->T) pst->g[i] -= pst->c[i + j][m] * pst->R[i][j];
723 if (i - j >= 0) pst->g[i] -= pst->c[i - j][m] * pst->R[i - j][j];
724 }
725 }
726
727 return;
728 }
729
730 static void calc_vargrad(PStreamChol *pst, const int m, double alpha, double n,
731 double vm, double vv)
732 {
733 register int i;
734 double vg, w1, w2;
735 double av = 0.0, var = 0.0, dif = 0.0;
736
737 if (alpha > 1.0 || alpha < 0.0) {
738 w1 = 1.0; w2 = 1.0;
739 } else {
740 w1 = alpha; w2 = 1.0 - alpha;
741 }
742
743 calc_varstats(pst->c, m, pst->T, &av, &var, &dif);
744
745 for (i = 0; i < pst->T; i++) {
746 vg = -(var - vm) * (pst->c[i][m] - av) * vv * 2.0 / (double)pst->T;
747 pst->g[i] = w1 * pst->g[i] / n + w2 * vg;
748 }
749
750 return;
751 }
752 #endif
753
754 // diagonal covariance
xget_detvec_diamat2inv(DMATRIX covmat)755 static DVECTOR xget_detvec_diamat2inv(DMATRIX covmat) // [num class][dim]
756 {
757 long dim, clsnum;
758 long i, j;
759 double det;
760 DVECTOR detvec = NODATA;
761
762 clsnum = covmat->row;
763 dim = covmat->col;
764 // memory allocation
765 detvec = xdvalloc(clsnum);
766 for (i = 0; i < clsnum; i++) {
767 for (j = 0, det = 1.0; j < dim; j++) {
768 det *= covmat->data[i][j];
769 if (det > 0.0) {
770 covmat->data[i][j] = 1.0 / covmat->data[i][j];
771 } else {
772 cst_errmsg("error:(class %ld) determinant <= 0, det = %f\n", i, det);
773 xdvfree(detvec);
774 return NODATA;
775 }
776 }
777 detvec->data[i] = det;
778 }
779
780 return detvec;
781 }
782
get_gauss_full(long clsidx,DVECTOR vec,DVECTOR detvec,DMATRIX weightmat,DMATRIX meanvec,DMATRIX invcovmat)783 static double get_gauss_full(long clsidx,
784 DVECTOR vec, // [dim]
785 DVECTOR detvec, // [clsnum]
786 DMATRIX weightmat, // [clsnum][1]
787 DMATRIX meanvec, // [clsnum][dim]
788 DMATRIX invcovmat) // [clsnum * dim][dim]
789 {
790 double gauss;
791
792 if (detvec->data[clsidx] <= 0.0) {
793 cst_errmsg("#error: det <= 0.0\n");
794 cst_error();
795 }
796
797 gauss = weightmat->data[clsidx][0]
798 / sqrt(pow(2.0 * PI, (double)vec->length) * detvec->data[clsidx])
799 * exp(-1.0 * cal_xmcxmc(clsidx, vec, meanvec, invcovmat) / 2.0);
800
801 return gauss;
802 }
803
cal_xmcxmc(long clsidx,DVECTOR x,DMATRIX mm,DMATRIX cm)804 static double cal_xmcxmc(long clsidx,
805 DVECTOR x,
806 DMATRIX mm, // [num class][dim]
807 DMATRIX cm) // [num class * dim][dim]
808 {
809 long clsnum, k, l, b, dim;
810 double *vec = NULL;
811 double td, d;
812
813 dim = x->length;
814 clsnum = mm->row;
815 b = clsidx * dim;
816 if (mm->col != dim || cm->col != dim || clsnum * dim != cm->row) {
817 cst_errmsg("Error cal_xmcxmc: different dimension\n");
818 cst_error();
819 }
820
821 // memory allocation
822 vec = mlpg_alloc((int)dim, double);
823 for (k = 0; k < dim; k++) vec[k] = x->data[k] - mm->data[clsidx][k];
824 for (k = 0, d = 0.0; k < dim; k++) {
825 for (l = 0, td = 0.0; l < dim; l++) td += vec[l] * cm->data[l + b][k];
826 d += td * vec[k];
827 }
828 // memory free
829 mlpg_free(vec); vec = NULL;
830
831 return d;
832 }
833
834 #if 0
835 // diagonal covariance
836 static double get_gauss_dia5(double det,
837 double weight,
838 DVECTOR vec, // dim
839 DVECTOR meanvec, // dim
840 DVECTOR invcovvec) // dim
841 {
842 double gauss, sb;
843 long k;
844
845 if (det <= 0.0) {
846 cst_errmsg("#error: det <= 0.0\n");
847 cst_error();
848 }
849
850 for (k = 0, gauss = 0.0; k < vec->length; k++) {
851 sb = vec->data[k] - meanvec->data[k];
852 gauss += sb * invcovvec->data[k] * sb;
853 }
854
855 gauss = weight / sqrt(pow(2.0 * PI, (double)vec->length) * det)
856 * exp(-gauss / 2.0);
857
858 return gauss;
859 }
860 #endif
861
get_gauss_dia(long clsidx,DVECTOR vec,DVECTOR detvec,DMATRIX weightmat,DMATRIX meanmat,DMATRIX invcovmat)862 static double get_gauss_dia(long clsidx,
863 DVECTOR vec, // [dim]
864 DVECTOR detvec, // [clsnum]
865 DMATRIX weightmat, // [clsnum][1]
866 DMATRIX meanmat, // [clsnum][dim]
867 DMATRIX invcovmat) // [clsnum][dim]
868 {
869 double gauss, sb;
870 long k;
871
872 if (detvec->data[clsidx] <= 0.0) {
873 cst_errmsg("#error: det <= 0.0\n");
874 cst_error();
875 }
876
877 for (k = 0, gauss = 0.0; k < vec->length; k++) {
878 sb = vec->data[k] - meanmat->data[clsidx][k];
879 gauss += sb * invcovmat->data[clsidx][k] * sb;
880 }
881
882 gauss = weightmat->data[clsidx][0]
883 / sqrt(pow(2.0 * PI, (double)vec->length) * detvec->data[clsidx])
884 * exp(-gauss / 2.0);
885
886 return gauss;
887 }
888
pst_free(PStreamChol * pst)889 static void pst_free(PStreamChol *pst)
890 {
891 int i;
892
893 for (i=0; i<pst->dw.num; i++)
894 mlpg_free(pst->dw.width[i]);
895 mlpg_free(pst->dw.width); pst->dw.width = NULL;
896 for (i=0; i<pst->dw.num; i++)
897 mlpg_free(pst->dw.coef_ptrs[i]);
898 mlpg_free(pst->dw.coef); pst->dw.coef = NULL;
899 mlpg_free(pst->dw.coef_ptrs); pst->dw.coef_ptrs = NULL;
900
901 for (i=0; i<pst->T; i++)
902 mlpg_free(pst->mseq[i]);
903 mlpg_free(pst->mseq);
904 for (i=0; i<pst->T; i++)
905 mlpg_free(pst->ivseq[i]);
906 mlpg_free(pst->ivseq);
907 for (i=0; i<pst->T; i++)
908 mlpg_free(pst->R[i]);
909 mlpg_free(pst->R);
910 mlpg_free(pst->r);
911 mlpg_free(pst->g);
912 for (i=0; i<pst->T; i++)
913 mlpg_free(pst->c[i]);
914 mlpg_free(pst->c);
915
916 return;
917 }
918
mlpg(LISP ltrack)919 LISP mlpg(LISP ltrack)
920 {
921 /* Generate an (mcep) track using Maximum Likelihood Parameter Generation */
922 MLPGPARA param = NODATA;
923 EST_Track *param_track, *out;
924 int dim, dim_st;
925 float like;
926 int i,j;
927 int nframes;
928 PStreamChol pst;
929
930 if ((ltrack == NULL) ||
931 (TYPEP(ltrack,tc_string) &&
932 (streq(get_c_string(ltrack),"nil"))))
933 return NIL;
934
935 param_track = track(ltrack);
936
937 nframes = param_track->num_frames();
938 dim = (param_track->num_channels()/2)-1;
939 dim_st = dim/2; /* dim2 in original code */
940 out = new EST_Track();
941 out->copy_setup(*param_track);
942 out->resize(nframes,dim_st+1);
943
944 param = xmlpgpara_init(dim,dim_st,nframes,nframes);
945
946 // mixture-index sequence
947 param->clsidxv = xlvalloc(nframes);
948 for (i=0; i<nframes; i++)
949 param->clsidxv->data[i] = i;
950
951 // initial static feature sequence
952 param->stm = xdmalloc(nframes,dim_st);
953 for (i=0; i<nframes; i++)
954 {
955 for (j=0; j<dim_st; j++)
956 param->stm->data[i][j] = param_track->a(i,(j+1)*2);
957 }
958
959 /* Load cluster means */
960 for (i=0; i<nframes; i++)
961 for (j=0; j<dim_st; j++)
962 param->mean->data[i][j] = param_track->a(i,(j+1)*2);
963
964 /* GMM parameters diagonal covariance */
965 InitPStreamChol(&pst, mlpg_dynwin, mlpg_dynwinsize, dim_st-1, nframes);
966 param->pdf = xdmalloc(nframes,dim*2);
967 param->cov = xdmalloc(nframes,dim);
968 for (i=0; i<nframes; i++)
969 for (j=0; j<dim; j++)
970 param->cov->data[i][j] =
971 param_track->a(i,(j+1)*2+1) *
972 param_track->a(i,(j+1)*2+1);
973 param->detvec = xget_detvec_diamat2inv(param->cov);
974
975 /* global variance parameters */
976 /* TBD get_gv_mlpgpara(param, vmfile, vvfile, dim2, msg_flag); */
977
978 if (nframes > 0)
979 {
980 get_dltmat(param->stm, &pst.dw, 1, param->dltm);
981
982 like = get_like_pdfseq_vit(dim, dim_st, nframes, nframes, param,
983 param_track, XTRUE);
984
985 /* vlike = get_like_gv(dim2, dnum, param); */
986
987 mlgparaChol(param->pdf, &pst, param->stm);
988 }
989
990 /* Put the answer back into the output track */
991 for (i=0; i<nframes; i++)
992 {
993 out->t(i) = param_track->t(i);
994 out->a(i,0) = param_track->a(i,0); /* F0 */
995 for (j=0; j<dim_st; j++)
996 {
997 out->a(i,j+1) = param->stm->data[i][j];
998 }
999 }
1000
1001 // memory free
1002 xmlpgparafree(param);
1003 pst_free(&pst);
1004
1005 return siod(out);
1006 }
1007
1008