1 /* --------------------------------------------------------------- */
2 /* The HMM-Based Speech Synthesis System (HTS): version 1.1.1 */
3 /* HTS Working Group */
4 /* */
5 /* Department of Computer Science */
6 /* Nagoya Institute of Technology */
7 /* and */
8 /* Interdisciplinary Graduate School of Science and Engineering */
9 /* Tokyo Institute of Technology */
10 /* Copyright (c) 2001-2003 */
11 /* All Rights Reserved. */
12 /* */
13 /* Permission is hereby granted, free of charge, to use and */
14 /* distribute this software and its documentation without */
15 /* restriction, including without limitation the rights to use, */
16 /* copy, modify, merge, publish, distribute, sublicense, and/or */
17 /* sell copies of this work, and to permit persons to whom this */
18 /* work is furnished to do so, subject to the following conditions: */
19 /* */
20 /* 1. The code must retain the above copyright notice, this list */
21 /* of conditions and the following disclaimer. */
22 /* */
23 /* 2. Any modifications must be clearly marked as such. */
24 /* */
25 /* NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF TECHNOLOGY, */
26 /* HTS WORKING GROUP, AND THE CONTRIBUTORS TO THIS WORK DISCLAIM */
27 /* ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL */
28 /* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
29 /* SHALL NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF */
30 /* TECHNOLOGY, HTS WORKING GROUP, NOR THE CONTRIBUTORS BE LIABLE */
31 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY */
32 /* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, */
33 /* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS */
34 /* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR */
35 /* PERFORMANCE OF THIS SOFTWARE. */
36 /* */
37 /* --------------------------------------------------------------- */
38 /* mlpg.c : speech parameter generation from pdf sequence */
39 /* */
40 /* 2003/12/26 by Heiga Zen */
41 /* --------------------------------------------------------------- */
42 /*********************************************************************/
43 /* */
44 /* Nagoya Institute of Technology, Aichi, Japan, */
45 /* and */
46 /* Carnegie Mellon University, Pittsburgh, PA */
47 /* Copyright (c) 2003-2004,2008 */
48 /* All Rights Reserved. */
49 /* */
50 /* Permission is hereby granted, free of charge, to use and */
51 /* distribute this software and its documentation without */
52 /* restriction, including without limitation the rights to use, */
53 /* copy, modify, merge, publish, distribute, sublicense, and/or */
54 /* sell copies of this work, and to permit persons to whom this */
55 /* work is furnished to do so, subject to the following conditions: */
56 /* */
57 /* 1. The code must retain the above copyright notice, this list */
58 /* of conditions and the following disclaimer. */
59 /* 2. Any modifications must be clearly marked as such. */
60 /* 3. Original authors' names are not deleted. */
61 /* */
62 /* NAGOYA INSTITUTE OF TECHNOLOGY, CARNEGIE MELLON UNIVERSITY, AND */
63 /* THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH */
64 /* REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF */
65 /* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL NAGOYA INSTITUTE */
66 /* OF TECHNOLOGY, CARNEGIE MELLON UNIVERSITY, NOR THE CONTRIBUTORS */
67 /* BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR */
68 /* ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR */
69 /* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER */
70 /* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE */
71 /* OR PERFORMANCE OF THIS SOFTWARE. */
72 /* */
73 /*********************************************************************/
74 /* */
75 /* Author : Tomoki Toda (tomoki@ics.nitech.ac.jp) */
76 /* Date : June 2004 */
77 /* */
78 /* Modified as a single file for inclusion in festival/flite */
79 /* May 2008 awb@cs.cmu.edu */
80 /*-------------------------------------------------------------------*/
81 /* */
82 /* ML-Based Parameter Generation */
83 /* */
84 /*-------------------------------------------------------------------*/
85
86 #include "cst_alloc.h"
87 #include "cst_string.h"
88 #include "cst_math.h"
89 #include "cst_track.h"
90 #include "cst_wave.h"
91 #include "cst_vc.h"
92 #include "cst_mlpg.h"
93
94 #define mlpg_alloc(X,Y) (cst_alloc(Y,X))
95 #define mlpg_free cst_free
96
xmlpgpara_init(int dim,int dim2,int dnum,int clsnum)97 static MLPGPARA xmlpgpara_init(int dim, int dim2, int dnum,
98 int clsnum)
99 {
100 MLPGPARA param;
101
102 /* memory allocation */
103 param = mlpg_alloc(1,struct MLPGPARA_STRUCT);
104 param->ov = xdvalloc(dim);
105 param->iuv = NODATA;
106 param->iumv = NODATA;
107 param->flkv = xdvalloc(dnum);
108 param->stm = NODATA;
109 param->dltm = xdmalloc(dnum, dim2);
110 param->pdf = NODATA;
111 param->detvec = NODATA;
112 param->wght = xdmalloc(clsnum, 1);
113 param->mean = xdmalloc(clsnum, dim);
114 param->cov = NODATA;
115 param->clsidxv = NODATA;
116 /* dia_flag */
117 param->clsdetv = xdvalloc(1);
118 param->clscov = xdmalloc(1, dim);
119
120 param->vdet = 1.0;
121 param->vm = NODATA;
122 param->vv = NODATA;
123 param->var = NODATA;
124
125 return param;
126 }
127
xmlpgparafree(MLPGPARA param)128 static void xmlpgparafree(MLPGPARA param)
129 {
130 if (param != NODATA) {
131 if (param->ov != NODATA) xdvfree(param->ov);
132 if (param->iuv != NODATA) xdvfree(param->iuv);
133 if (param->iumv != NODATA) xdvfree(param->iumv);
134 if (param->flkv != NODATA) xdvfree(param->flkv);
135 if (param->stm != NODATA) xdmfree(param->stm);
136 if (param->dltm != NODATA) xdmfree(param->dltm);
137 if (param->pdf != NODATA) xdmfree(param->pdf);
138 if (param->detvec != NODATA) xdvfree(param->detvec);
139 if (param->wght != NODATA) xdmfree(param->wght);
140 if (param->mean != NODATA) xdmfree(param->mean);
141 if (param->cov != NODATA) xdmfree(param->cov);
142 if (param->clsidxv != NODATA) xlvfree(param->clsidxv);
143 if (param->clsdetv != NODATA) xdvfree(param->clsdetv);
144 if (param->clscov != NODATA) xdmfree(param->clscov);
145 if (param->vm != NODATA) xdvfree(param->vm);
146 if (param->vv != NODATA) xdvfree(param->vv);
147 if (param->var != NODATA) xdvfree(param->var);
148 mlpg_free(param);
149 }
150
151 return;
152 }
153
get_like_pdfseq_vit(int dim,int dim2,int dnum,int clsnum,MLPGPARA param,float ** model,XBOOL dia_flag)154 static double get_like_pdfseq_vit(int dim, int dim2, int dnum, int clsnum,
155 MLPGPARA param,
156 float **model,
157 XBOOL dia_flag)
158 {
159 long d, c, k, l, j;
160 double sumgauss;
161 double like = 0.0;
162
163 for (d = 0, like = 0.0; d < dnum; d++) {
164 /* read weight and mean sequences */
165 param->wght->data[0][0] = 0.9; /* FIXME weights */
166 for (j=0; j<dim; j++)
167 param->mean->data[0][j] = model[d][(j+1)*2];
168
169 /* observation vector */
170 for (k = 0; k < dim2; k++) {
171 param->ov->data[k] = param->stm->data[d][k];
172 param->ov->data[k + dim2] = param->dltm->data[d][k];
173 }
174
175 /* mixture index */
176 c = d;
177 param->clsdetv->data[0] = param->detvec->data[c];
178
179 /* calculating likelihood */
180 if (dia_flag == XTRUE) {
181 for (k = 0; k < param->clscov->col; k++)
182 param->clscov->data[0][k] = param->cov->data[c][k];
183 sumgauss = get_gauss_dia(0, param->ov, param->clsdetv,
184 param->wght, param->mean, param->clscov);
185 } else {
186 for (k = 0; k < param->clscov->row; k++)
187 for (l = 0; l < param->clscov->col; l++)
188 param->clscov->data[k][l] =
189 param->cov->data[k + param->clscov->row * c][l];
190 sumgauss = get_gauss_full(0, param->ov, param->clsdetv,
191 param->wght, param->mean, param->clscov);
192 }
193 if (sumgauss <= 0.0) param->flkv->data[d] = -1.0 * INFTY2;
194 else param->flkv->data[d] = log(sumgauss);
195 like += param->flkv->data[d];
196
197 /* estimating U', U'*M */
198 if (dia_flag == XTRUE) {
199 /* PDF [U'*M U'] */
200 for (k = 0; k < dim; k++) {
201 param->pdf->data[d][k] =
202 param->clscov->data[0][k] * param->mean->data[0][k];
203 param->pdf->data[d][k + dim] = param->clscov->data[0][k];
204 }
205 } else {
206 /* PDF [U'*M U'] */
207 for (k = 0; k < dim; k++) {
208 param->pdf->data[d][k] = 0.0;
209 for (l = 0; l < dim; l++) {
210 param->pdf->data[d][k * dim + dim + l] =
211 param->clscov->data[k][l];
212 param->pdf->data[d][k] +=
213 param->clscov->data[k][l] * param->mean->data[0][l];
214 }
215 }
216 }
217 }
218
219 like /= (double)dnum;
220
221 return like;
222 }
223
224 #if 0
225 static double get_like_gv(long dim2, long dnum, MLPGPARA param)
226 {
227 long k;
228 double av = 0.0, dif = 0.0;
229 double vlike = -INFTY;
230
231 if (param->vm != NODATA && param->vv != NODATA) {
232 for (k = 0; k < dim2; k++)
233 calc_varstats(param->stm->data, k, dnum, &av,
234 &(param->var->data[k]), &dif);
235 vlike = log(get_gauss_dia5(param->vdet, 1.0, param->var,
236 param->vm, param->vv));
237 }
238
239 return vlike;
240 }
241
242 static void sm_mvav(DMATRIX mat, long hlen)
243 {
244 long k, l, m, p;
245 double d, sd;
246 DVECTOR vec = NODATA;
247 DVECTOR win = NODATA;
248
249 vec = xdvalloc(mat->row);
250
251 /* smoothing window */
252 win = xdvalloc(hlen * 2 + 1);
253 for (k = 0, d = 1.0, sd = 0.0; k < hlen; k++, d += 1.0) {
254 win->data[k] = d; win->data[win->length - k - 1] = d;
255 sd += d + d;
256 }
257 win->data[k] = d; sd += d;
258 for (k = 0; k < win->length; k++) win->data[k] /= sd;
259
260 for (l = 0; l < mat->col; l++) {
261 for (k = 0; k < mat->row; k++) {
262 for (m = 0, vec->data[k] = 0.0; m < win->length; m++) {
263 p = k - hlen + m;
264 if (p >= 0 && p < mat->row)
265 vec->data[k] += mat->data[p][l] * win->data[m];
266 }
267 }
268 for (k = 0; k < mat->row; k++) mat->data[k][l] = vec->data[k];
269 }
270
271 xdvfree(win);
272 xdvfree(vec);
273
274 return;
275 }
276 #endif
277
get_dltmat(DMATRIX mat,DWin * dw,int dno,DMATRIX dmat)278 static void get_dltmat(DMATRIX mat, DWin *dw, int dno, DMATRIX dmat)
279 {
280 int i, j, k, tmpnum;
281
282 tmpnum = (int)mat->row - dw->width[dno][WRIGHT];
283 for (k = dw->width[dno][WRIGHT]; k < tmpnum; k++) /* time index */
284 for (i = 0; i < (int)mat->col; i++) /* dimension index */
285 for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
286 j <= dw->width[dno][WRIGHT]; j++)
287 dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
288
289 for (i = 0; i < (int)mat->col; i++) { /* dimension index */
290 for (k = 0; k < dw->width[dno][WRIGHT]; k++) /* time index */
291 for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
292 j <= dw->width[dno][WRIGHT]; j++)
293 if (k + j >= 0)
294 dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
295 else
296 dmat->data[k][i] += (2.0 * mat->data[0][i] - mat->data[-k - j][i]) * dw->coef[dno][j];
297 for (k = tmpnum; k < (int)mat->row; k++) /* time index */
298 for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0;
299 j <= dw->width[dno][WRIGHT]; j++)
300 if (k + j < (int)mat->row)
301 dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j];
302 else
303 dmat->data[k][i] += (2.0 * mat->data[mat->row - 1][i] - mat->data[mat->row - k - j + mat->row - 2][i]) * dw->coef[dno][j];
304 }
305
306 return;
307 }
308
309
dcalloc(int x,int xoff)310 static double *dcalloc(int x, int xoff)
311 {
312 double *ptr;
313
314 ptr = mlpg_alloc(x,double);
315 /* ptr += xoff; */ /* Just not going to allow this */
316 return(ptr);
317 }
318
ddcalloc(int x,int y,int xoff,int yoff)319 static double **ddcalloc(int x, int y, int xoff, int yoff)
320 {
321 double **ptr;
322 register int i;
323
324 ptr = mlpg_alloc(x,double *);
325 for (i = 0; i < x; i++) ptr[i] = dcalloc(y, yoff);
326 /* ptr += xoff; */ /* Just not going to allow this */
327 return(ptr);
328 }
329
330 /***********************************/
331 /* ML using Choleski decomposition */
332 /***********************************/
InitDWin(PStreamChol * pst,const float * dynwin,int fsize)333 static void InitDWin(PStreamChol *pst, const float *dynwin, int fsize)
334 {
335 int i,j;
336 int leng;
337
338 pst->dw.num = 1; /* only static */
339 if (dynwin) {
340 pst->dw.num = 2; /* static + dyn */
341 }
342 /* memory allocation */
343 pst->dw.width = mlpg_alloc(pst->dw.num,int *);
344 for (i = 0; i < pst->dw.num; i++)
345 pst->dw.width[i] = mlpg_alloc(2,int);
346
347 pst->dw.coef = mlpg_alloc(pst->dw.num, double *);
348 pst->dw.coef_ptrs = mlpg_alloc(pst->dw.num, double *);
349 /* window for static parameter WLEFT = 0, WRIGHT = 1 */
350 pst->dw.width[0][WLEFT] = pst->dw.width[0][WRIGHT] = 0;
351 pst->dw.coef_ptrs[0] = mlpg_alloc(1,double);
352 pst->dw.coef[0] = pst->dw.coef_ptrs[0];
353 pst->dw.coef[0][0] = 1.0;
354
355 /* set delta coefficients */
356 for (i = 1; i < pst->dw.num; i++) {
357 pst->dw.coef_ptrs[i] = mlpg_alloc(fsize, double);
358 pst->dw.coef[i] = pst->dw.coef_ptrs[i];
359 for (j=0; j<fsize; j++) /* FIXME make dynwin doubles for memmove */
360 pst->dw.coef[i][j] = (double)dynwin[j];
361 /* set pointer */
362 leng = fsize / 2; /* L (fsize = 2 * L + 1) */
363 pst->dw.coef[i] += leng; /* [L] -> [0] center */
364 pst->dw.width[i][WLEFT] = -leng; /* -L left */
365 pst->dw.width[i][WRIGHT] = leng; /* L right */
366 if (fsize % 2 == 0) pst->dw.width[i][WRIGHT]--;
367 }
368
369 pst->dw.maxw[WLEFT] = pst->dw.maxw[WRIGHT] = 0;
370 for (i = 0; i < pst->dw.num; i++) {
371 if (pst->dw.maxw[WLEFT] > pst->dw.width[i][WLEFT])
372 pst->dw.maxw[WLEFT] = pst->dw.width[i][WLEFT];
373 if (pst->dw.maxw[WRIGHT] < pst->dw.width[i][WRIGHT])
374 pst->dw.maxw[WRIGHT] = pst->dw.width[i][WRIGHT];
375 }
376
377 return;
378 }
379
InitPStreamChol(PStreamChol * pst,const float * dynwin,int fsize,int order,int T)380 static void InitPStreamChol(PStreamChol *pst, const float *dynwin, int fsize,
381 int order, int T)
382 {
383 /* order of cepstrum */
384 pst->order = order;
385
386 /* windows for dynamic feature */
387 InitDWin(pst, dynwin, fsize);
388
389 /* dimension of observed vector */
390 pst->vSize = (pst->order + 1) * pst->dw.num; /* odim = dim * (1--3) */
391
392 /* memory allocation */
393 pst->T = T; /* number of frames */
394 pst->width = pst->dw.maxw[WRIGHT] * 2 + 1; /* width of R */
395 pst->mseq = ddcalloc(T, pst->vSize, 0, 0); /* [T][odim] */
396 pst->ivseq = ddcalloc(T, pst->vSize, 0, 0); /* [T][odim] */
397 pst->R = ddcalloc(T, pst->width, 0, 0); /* [T][width] */
398 pst->r = dcalloc(T, 0); /* [T] */
399 pst->g = dcalloc(T, 0); /* [T] */
400 pst->c = ddcalloc(T, pst->order + 1, 0, 0); /* [T][dim] */
401
402 return;
403 }
404
mlgparaChol(DMATRIX pdf,PStreamChol * pst,DMATRIX mlgp)405 static void mlgparaChol(DMATRIX pdf, PStreamChol *pst, DMATRIX mlgp)
406 {
407 int t, d;
408
409 /* error check */
410 if (pst->vSize * 2 != pdf->col || pst->order + 1 != mlgp->col) {
411 cst_errmsg("Error mlgparaChol: Different dimension\n");
412 cst_error();
413 }
414
415 /* mseq: U^{-1}*M, ifvseq: U^{-1} */
416 for (t = 0; t < pst->T; t++) {
417 for (d = 0; d < pst->vSize; d++) {
418 pst->mseq[t][d] = pdf->data[t][d];
419 pst->ivseq[t][d] = pdf->data[t][pst->vSize + d];
420 }
421 }
422
423 /* ML parameter generation */
424 mlpgChol(pst);
425
426 /* extracting parameters */
427 for (t = 0; t < pst->T; t++)
428 for (d = 0; d <= pst->order; d++)
429 mlgp->data[t][d] = pst->c[t][d];
430
431 return;
432 }
433
434 /* generate parameter sequence from pdf sequence using Choleski decomposition */
mlpgChol(PStreamChol * pst)435 static void mlpgChol(PStreamChol *pst)
436 {
437 register int m;
438
439 /* generating parameter in each dimension */
440 for (m = 0; m <= pst->order; m++) {
441 calc_R_and_r(pst, m);
442 Choleski(pst);
443 Choleski_forward(pst);
444 Choleski_backward(pst, m);
445 }
446
447 return;
448 }
449
450 /* parameter generation fuctions */
451 /* calc_R_and_r: calculate R = W'U^{-1}W and r = W'U^{-1}M */
calc_R_and_r(PStreamChol * pst,const int m)452 static void calc_R_and_r(PStreamChol *pst, const int m)
453 {
454 register int i, j, k, l, n;
455 double wu;
456
457 for (i = 0; i < pst->T; i++) {
458 pst->r[i] = pst->mseq[i][m];
459 pst->R[i][0] = pst->ivseq[i][m];
460
461 for (j = 1; j < pst->width; j++) pst->R[i][j] = 0.0;
462
463 for (j = 1; j < pst->dw.num; j++) {
464 for (k = pst->dw.width[j][0]; k <= pst->dw.width[j][1]; k++) {
465 n = i + k;
466 if (n >= 0 && n < pst->T && pst->dw.coef[j][-k] != 0.0) {
467 l = j * (pst->order + 1) + m;
468 pst->r[i] += pst->dw.coef[j][-k] * pst->mseq[n][l];
469 wu = pst->dw.coef[j][-k] * pst->ivseq[n][l];
470
471 for (l = 0; l < pst->width; l++) {
472 n = l-k;
473 if (n <= pst->dw.width[j][1] && i + l < pst->T &&
474 pst->dw.coef[j][n] != 0.0)
475 pst->R[i][l] += wu * pst->dw.coef[j][n];
476 }
477 }
478 }
479 }
480 }
481
482 return;
483 }
484
485 /* Choleski: Choleski factorization of Matrix R */
Choleski(PStreamChol * pst)486 static void Choleski(PStreamChol *pst)
487 {
488 register int t, j, k;
489
490 pst->R[0][0] = sqrt(pst->R[0][0]);
491
492 for (j = 1; j < pst->width; j++) pst->R[0][j] /= pst->R[0][0];
493
494 for (t = 1; t < pst->T; t++) {
495 for (j = 1; j < pst->width; j++)
496 if (t - j >= 0)
497 pst->R[t][0] -= pst->R[t - j][j] * pst->R[t - j][j];
498
499 pst->R[t][0] = sqrt(pst->R[t][0]);
500
501 for (j = 1; j < pst->width; j++) {
502 for (k = 0; k < pst->dw.maxw[WRIGHT]; k++)
503 if (j != pst->width - 1)
504 pst->R[t][j] -=
505 pst->R[t - k - 1][j - k] * pst->R[t - k - 1][j + 1];
506
507 pst->R[t][j] /= pst->R[t][0];
508 }
509 }
510
511 return;
512 }
513
514 /* Choleski_forward: forward substitution to solve linear equations */
Choleski_forward(PStreamChol * pst)515 static void Choleski_forward(PStreamChol *pst)
516 {
517 register int t, j;
518 double hold;
519
520 pst->g[0] = pst->r[0] / pst->R[0][0];
521
522 for (t=1; t < pst->T; t++) {
523 hold = 0.0;
524 for (j = 1; j < pst->width; j++)
525 if (t - j >= 0 && pst->R[t - j][j] != 0.0)
526 hold += pst->R[t - j][j] * pst->g[t - j];
527 pst->g[t] = (pst->r[t] - hold) / pst->R[t][0];
528 }
529
530 return;
531 }
532
533 /* Choleski_backward: backward substitution to solve linear equations */
Choleski_backward(PStreamChol * pst,const int m)534 static void Choleski_backward(PStreamChol *pst, const int m)
535 {
536 register int t, j;
537 double hold;
538
539 pst->c[pst->T - 1][m] = pst->g[pst->T - 1] / pst->R[pst->T - 1][0];
540
541 for (t = pst->T - 2; t >= 0; t--) {
542 hold = 0.0;
543 for (j = 1; j < pst->width; j++)
544 if (t + j < pst->T && pst->R[t][j] != 0.0)
545 hold += pst->R[t][j] * pst->c[t + j][m];
546 pst->c[t][m] = (pst->g[t] - hold) / pst->R[t][0];
547 }
548
549 return;
550 }
551
552
553 /* ML Considering Global Variance */
554 #if 0
555 static void varconv(double **c, const int m, const int T, const double var)
556 {
557 register int n;
558 double sd, osd;
559 double oav = 0.0, ovar = 0.0, odif = 0.0;
560
561 calc_varstats(c, m, T, &oav, &ovar, &odif);
562 osd = sqrt(ovar); sd = sqrt(var);
563 for (n = 0; n < T; n++)
564 c[n][m] = (c[n][m] - oav) / osd * sd + oav;
565
566 return;
567 }
568
569 static void calc_varstats(double **c, const int m, const int T,
570 double *av, double *var, double *dif)
571 {
572 register int i;
573 register double d;
574
575 *av = 0.0;
576 *var = 0.0;
577 *dif = 0.0;
578 for (i = 0; i < T; i++) *av += c[i][m];
579 *av /= (double)T;
580 for (i = 0; i < T; i++) {
581 d = c[i][m] - *av;
582 *var += d * d; *dif += d;
583 }
584 *var /= (double)T;
585
586 return;
587 }
588
589 /* Diagonal Covariance Version */
590 static void mlgparaGrad(DMATRIX pdf, PStreamChol *pst, DMATRIX mlgp, const int max,
591 double th, double e, double alpha, DVECTOR vm, DVECTOR vv,
592 XBOOL nrmflag, XBOOL extvflag)
593 {
594 int t, d;
595
596 /* error check */
597 if (pst->vSize * 2 != pdf->col || pst->order + 1 != mlgp->col) {
598 cst_errmsg("Error mlgparaChol: Different dimension\n");
599 cst_error();
600 }
601
602 /* mseq: U^{-1}*M, ifvseq: U^{-1} */
603 for (t = 0; t < pst->T; t++) {
604 for (d = 0; d < pst->vSize; d++) {
605 pst->mseq[t][d] = pdf->data[t][d];
606 pst->ivseq[t][d] = pdf->data[t][pst->vSize + d];
607 }
608 }
609
610 /* ML parameter generation */
611 mlpgChol(pst);
612
613 /* extend variance */
614 if (extvflag == XTRUE)
615 for (d = 0; d <= pst->order; d++)
616 varconv(pst->c, d, pst->T, vm->data[d]);
617
618 /* estimating parameters */
619 mlpgGrad(pst, max, th, e, alpha, vm, vv, nrmflag);
620
621 /* extracting parameters */
622 for (t = 0; t < pst->T; t++)
623 for (d = 0; d <= pst->order; d++)
624 mlgp->data[t][d] = pst->c[t][d];
625
626 return;
627 }
628
629 /* generate parameter sequence from pdf sequence using gradient */
630 static void mlpgGrad(PStreamChol *pst, const int max, double th, double e,
631 double alpha, DVECTOR vm, DVECTOR vv, XBOOL nrmflag)
632 {
633 register int m, i, t;
634 double diff, n, dth;
635
636 if (nrmflag == XTRUE)
637 n = (double)(pst->T * pst->vSize) / (double)(vm->length);
638 else n = 1.0;
639
640 /* generating parameter in each dimension */
641 for (m = 0; m <= pst->order; m++) {
642 calc_R_and_r(pst, m);
643 dth = th * sqrt(vm->data[m]);
644 for (i = 0; i < max; i++) {
645 calc_grad(pst, m);
646 if (vm != NODATA && vv != NODATA)
647 calc_vargrad(pst, m, alpha, n, vm->data[m], vv->data[m]);
648 for (t = 0, diff = 0.0; t < pst->T; t++) {
649 diff += pst->g[t] * pst->g[t];
650 pst->c[t][m] += e * pst->g[t];
651 }
652 diff = sqrt(diff / (double)pst->T);
653 if (diff < dth || diff == 0.0) break;
654 }
655 }
656
657 return;
658 }
659
660 /* calc_grad: calculate -RX + r = -W'U^{-1}W * X + W'U^{-1}M */
661 void calc_grad(PStreamChol *pst, const int m)
662 {
663 register int i, j;
664
665 for (i = 0; i < pst->T; i++) {
666 pst->g[i] = pst->r[i] - pst->c[i][m] * pst->R[i][0];
667 for (j = 1; j < pst->width; j++) {
668 if (i + j < pst->T) pst->g[i] -= pst->c[i + j][m] * pst->R[i][j];
669 if (i - j >= 0) pst->g[i] -= pst->c[i - j][m] * pst->R[i - j][j];
670 }
671 }
672
673 return;
674 }
675
676 static void calc_vargrad(PStreamChol *pst, const int m, double alpha, double n,
677 double vm, double vv)
678 {
679 register int i;
680 double vg, w1, w2;
681 double av = 0.0, var = 0.0, dif = 0.0;
682
683 if (alpha > 1.0 || alpha < 0.0) {
684 w1 = 1.0; w2 = 1.0;
685 } else {
686 w1 = alpha; w2 = 1.0 - alpha;
687 }
688
689 calc_varstats(pst->c, m, pst->T, &av, &var, &dif);
690
691 for (i = 0; i < pst->T; i++) {
692 vg = -(var - vm) * (pst->c[i][m] - av) * vv * 2.0 / (double)pst->T;
693 pst->g[i] = w1 * pst->g[i] / n + w2 * vg;
694 }
695
696 return;
697 }
698 #endif
699
700 /* diagonal covariance */
xget_detvec_diamat2inv(DMATRIX covmat)701 static DVECTOR xget_detvec_diamat2inv(DMATRIX covmat) /* [num class][dim] */
702 {
703 long dim, clsnum;
704 long i, j;
705 double det;
706 DVECTOR detvec = NODATA;
707 /* In cases where the determinant of the matrix ends up being */
708 /* zero, we can fix this by artificially setting the covariance */
709 /* to be a magic number. I chose this magic number by computing */
710 /* the mean of all MCEP SDs of all leaves in a standard RMS */
711 /* voice. - Prasanna */
712 /* double magic_covariance = 0.0962*0.0962; */
713 double magic_covariance = 0.0962;
714
715 clsnum = covmat->row;
716 dim = covmat->col;
717 /* memory allocation */
718 detvec = xdvalloc(clsnum);
719 for (i = 0; i < clsnum; i++) {
720 for (j = 0, det = 1.0; j < dim; j++) {
721 det *= covmat->data[i][j];
722 if (det > 0.0) {
723 covmat->data[i][j] = 1.0 / covmat->data[i][j];
724 } else {
725 covmat->data[i][j] = 1.0 / magic_covariance;
726 det = pow(magic_covariance, j+1);
727 /* cst_errmsg("error:(class %ld) determinant <= 0, det = %f\n", i, det); */
728 }
729 }
730 detvec->data[i] = det;
731 }
732
733 return detvec;
734 }
735
get_gauss_full(long clsidx,DVECTOR vec,DVECTOR detvec,DMATRIX weightmat,DMATRIX meanvec,DMATRIX invcovmat)736 static double get_gauss_full(long clsidx,
737 DVECTOR vec, /* [dim] */
738 DVECTOR detvec, /* [clsnum] */
739 DMATRIX weightmat, /* [clsnum][1] */
740 DMATRIX meanvec, /* [clsnum][dim] */
741 DMATRIX invcovmat) /* [clsnum * dim][dim] */
742 {
743 double gauss;
744
745 if (detvec->data[clsidx] <= 0.0) {
746 cst_errmsg("#error: det <= 0.0\n");
747 cst_error();
748 }
749
750 gauss = weightmat->data[clsidx][0]
751 / sqrt(pow(2.0 * PI, (double)vec->length) * detvec->data[clsidx])
752 * exp(-1.0 * cal_xmcxmc(clsidx, vec, meanvec, invcovmat) / 2.0);
753
754 return gauss;
755 }
756
cal_xmcxmc(long clsidx,DVECTOR x,DMATRIX mm,DMATRIX cm)757 static double cal_xmcxmc(long clsidx,
758 DVECTOR x,
759 DMATRIX mm, /* [num class][dim] */
760 DMATRIX cm) /* [num class * dim][dim] */
761 {
762 long clsnum, k, l, b, dim;
763 double *vec = NULL;
764 double td, d;
765
766 dim = x->length;
767 clsnum = mm->row;
768 b = clsidx * dim;
769 if (mm->col != dim || cm->col != dim || clsnum * dim != cm->row) {
770 cst_errmsg("Error cal_xmcxmc: different dimension\n");
771 cst_error();
772 }
773
774 /* memory allocation */
775 vec = mlpg_alloc((int)dim, double);
776 for (k = 0; k < dim; k++) vec[k] = x->data[k] - mm->data[clsidx][k];
777 for (k = 0, d = 0.0; k < dim; k++) {
778 for (l = 0, td = 0.0; l < dim; l++) td += vec[l] * cm->data[l + b][k];
779 d += td * vec[k];
780 }
781 /* memory free */
782 mlpg_free(vec); vec = NULL;
783
784 return d;
785 }
786
787 #if 0
788 /* diagonal covariance */
789 static double get_gauss_dia5(double det,
790 double weight,
791 DVECTOR vec, /* dim */
792 DVECTOR meanvec, /* dim */
793 DVECTOR invcovvec) /* dim */
794 {
795 double gauss, sb;
796 long k;
797
798 if (det <= 0.0) {
799 cst_errmsg("#error: det <= 0.0\n");
800 cst_error();
801 }
802
803 for (k = 0, gauss = 0.0; k < vec->length; k++) {
804 sb = vec->data[k] - meanvec->data[k];
805 gauss += sb * invcovvec->data[k] * sb;
806 }
807
808 gauss = weight / sqrt(pow(2.0 * PI, (double)vec->length) * det)
809 * exp(-gauss / 2.0);
810
811 return gauss;
812 }
813 #endif
814
get_gauss_dia(long clsidx,DVECTOR vec,DVECTOR detvec,DMATRIX weightmat,DMATRIX meanmat,DMATRIX invcovmat)815 static double get_gauss_dia(long clsidx,
816 DVECTOR vec, /* [dim] */
817 DVECTOR detvec, /* [clsnum] */
818 DMATRIX weightmat, /* [clsnum][1] */
819 DMATRIX meanmat, /* [clsnum][dim] */
820 DMATRIX invcovmat) /* [clsnum][dim] */
821 {
822 double gauss, sb;
823 long k;
824
825 if (detvec->data[clsidx] <= 0.0) {
826 cst_errmsg("#error: det <= 0.0\n");
827 cst_error();
828 }
829
830 for (k = 0, gauss = 0.0; k < vec->length; k++) {
831 sb = vec->data[k] - meanmat->data[clsidx][k];
832 gauss += sb * invcovmat->data[clsidx][k] * sb;
833 }
834
835 gauss = weightmat->data[clsidx][0]
836 / sqrt(pow(2.0 * PI, (double)vec->length) * detvec->data[clsidx])
837 * exp(-gauss / 2.0);
838
839 return gauss;
840 }
841
pst_free(PStreamChol * pst)842 static void pst_free(PStreamChol *pst)
843 {
844 int i;
845
846 for (i=0; i<pst->dw.num; i++)
847 mlpg_free(pst->dw.width[i]);
848 mlpg_free(pst->dw.width); pst->dw.width = NULL;
849 for (i=0; i<pst->dw.num; i++)
850 mlpg_free(pst->dw.coef_ptrs[i]);
851 mlpg_free(pst->dw.coef); pst->dw.coef = NULL;
852 mlpg_free(pst->dw.coef_ptrs); pst->dw.coef_ptrs = NULL;
853
854 for (i=0; i<pst->T; i++)
855 mlpg_free(pst->mseq[i]);
856 mlpg_free(pst->mseq);
857 for (i=0; i<pst->T; i++)
858 mlpg_free(pst->ivseq[i]);
859 mlpg_free(pst->ivseq);
860 for (i=0; i<pst->T; i++)
861 mlpg_free(pst->R[i]);
862 mlpg_free(pst->R);
863 mlpg_free(pst->r);
864 mlpg_free(pst->g);
865 for (i=0; i<pst->T; i++)
866 mlpg_free(pst->c[i]);
867 mlpg_free(pst->c);
868
869 return;
870 }
871
mlpg(const cst_track * param_track,cst_cg_db * cg_db)872 cst_track *mlpg(const cst_track *param_track, cst_cg_db *cg_db)
873 {
874 /* Generate an (mcep) track using Maximum Likelihood Parameter Generation */
875 MLPGPARA param = NODATA;
876 cst_track *out;
877 int dim, dim_st;
878 /* float like; */
879 int i,j;
880 int nframes;
881 PStreamChol pst;
882
883 nframes = param_track->num_frames;
884 dim = (param_track->num_channels/2)-1;
885 dim_st = dim/2; /* dim2 in original code */
886 out = new_track();
887 cst_track_resize(out,nframes,dim_st+1);
888
889 param = xmlpgpara_init(dim,dim_st,nframes,nframes);
890
891 /* mixture-index sequence */
892 param->clsidxv = xlvalloc(nframes);
893 for (i=0; i<nframes; i++)
894 param->clsidxv->data[i] = i;
895
896 /* initial static feature sequence */
897 param->stm = xdmalloc(nframes,dim_st);
898 for (i=0; i<nframes; i++)
899 {
900 for (j=0; j<dim_st; j++)
901 param->stm->data[i][j] = param_track->frames[i][(j+1)*2];
902 }
903
904 /* Load cluster means */
905 for (i=0; i<nframes; i++)
906 for (j=0; j<dim_st; j++)
907 param->mean->data[i][j] = param_track->frames[i][(j+1)*2];
908
909 /* GMM parameters diagonal covariance */
910 InitPStreamChol(&pst, cg_db->dynwin, cg_db->dynwinsize, dim_st-1, nframes);
911 param->pdf = xdmalloc(nframes,dim*2);
912 param->cov = xdmalloc(nframes,dim);
913 for (i=0; i<nframes; i++)
914 for (j=0; j<dim; j++)
915 param->cov->data[i][j] =
916 param_track->frames[i][(j+1)*2+1] *
917 param_track->frames[i][(j+1)*2+1];
918 param->detvec = xget_detvec_diamat2inv(param->cov);
919
920 /* global variance parameters */
921 /* TBD get_gv_mlpgpara(param, vmfile, vvfile, dim2, msg_flag); */
922
923 get_dltmat(param->stm, &pst.dw, 1, param->dltm);
924
925 get_like_pdfseq_vit(dim, dim_st, nframes, nframes, param,
926 param_track->frames, XTRUE);
927
928 /* vlike = get_like_gv(dim2, dnum, param); */
929
930 mlgparaChol(param->pdf, &pst, param->stm);
931
932 /* Put the answer back into the output track */
933 for (i=0; i<nframes; i++)
934 {
935 out->times[i] = param_track->times[i];
936 out->frames[i][0] = param_track->frames[i][0]; /* F0 */
937 for (j=0; j<dim_st; j++)
938 out->frames[i][j+1] = param->stm->data[i][j];
939 }
940
941 /* memory free */
942 xmlpgparafree(param);
943 pst_free(&pst);
944
945 return out;
946 }
947
948