1 /**
2 * @file wav2mfcc-pipe.c
3 *
4 * <JA>
5 * @brief �����ȷ����� MFCC ��ħ�̤��Ѵ����� (�ե졼��ñ��)
6 *
7 * �����Ǥ� wav2mfcc.c �δؿ���ե졼��Ʊ���˽������뤿����Ѵ�����
8 * �ؿ���Ǽ����Ƥ��ޤ���ǧ�������������Ϥ�ʿ�Ԥ��ƹԤ���硤�������
9 * �ؿ����Ѥ����ޤ���
10 * </JA>
11 *
12 * <EN>
13 * @brief Convert speech inputs into MFCC parameter vectors (per input frame)
14 *
15 * There are functions are derived from wav2mfcc.c, to compute
16 * MFCC vectors in per-frame basis. When performing on-line recognition,
17 * these functions will be used instead of ones in wav2mfcc.c
18 * </EN>
19 *
20 * @author Akinobu LEE
21 * @date Thu Feb 17 18:12:30 2005
22 *
23 * $Revision: 1.3 $
24 *
25 */
26 /*
27 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
28 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
29 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
30 * All rights reserved
31 */
32
33 /* wav2mfcc-pipe.c --- split Wav2MFCC to perform per-frame-basis,
34 and also realtime CMN for 1st-pass pipe-lining */
35
36 /************************************************************************/
37 /* wav2mfcc.c Convert Speech file to MFCC_E_D_(Z) file */
38 /*----------------------------------------------------------------------*/
39 /* Author : Yuichiro Nakano */
40 /* */
41 /* Copyright(C) Yuichiro Nakano 1996-1998 */
42 /*----------------------------------------------------------------------*/
43 /************************************************************************/
44
45
46 #include <sent/stddefs.h>
47 #include <sent/mfcc.h>
48 #include <sent/htk_param.h>
49
50 /***********************************************************************/
51 /**
52 * Allocate a new delta cycle buffer.
53 *
54 * @param veclen [in] length of a vector
55 * @param windowlen [in] window width for computing delta
56 *
57 * @return pointer to newly allocated delta cycle buffer structure.
58 */
59 DeltaBuf *
WMP_deltabuf_new(int veclen,int windowlen)60 WMP_deltabuf_new(int veclen, int windowlen)
61 {
62 int i;
63 DeltaBuf *db;
64
65 db = (DeltaBuf *)mymalloc(sizeof(DeltaBuf));
66 db->veclen = veclen;
67 db->win = windowlen;
68 db->len = windowlen * 2 + 1;
69 db->mfcc = (float **)mymalloc(sizeof(float *) * db->len);
70 db->is_on = (boolean *) mymalloc(sizeof(boolean) * db->len);
71 for (i=0;i<db->len;i++) {
72 db->mfcc[i] = (float *)mymalloc(sizeof(float) * veclen * 2);
73 }
74 db->B = 0;
75 for(i = 1; i <= windowlen; i++) db->B += i * i;
76 db->B *= 2;
77
78 return (db);
79 }
80
81 /**
82 * Destroy the delta cycle buffer.
83 *
84 * @param db [i/o] delta cycle buffer
85 */
86 void
WMP_deltabuf_free(DeltaBuf * db)87 WMP_deltabuf_free(DeltaBuf *db)
88 {
89 int i;
90
91 for (i=0;i<db->len;i++) {
92 free(db->mfcc[i]);
93 }
94 free(db->is_on);
95 free(db->mfcc);
96 free(db);
97 }
98
99 /**
100 * Reset and clear the delta cycle buffer.
101 *
102 * @param db [i/o] delta cycle buffer
103 */
104 void
WMP_deltabuf_prepare(DeltaBuf * db)105 WMP_deltabuf_prepare(DeltaBuf *db)
106 {
107 int i;
108 db->store = 0;
109 for (i=0;i<db->len;i++) {
110 db->is_on[i] = FALSE;
111 }
112 }
113
114 /**
115 * Calculate delta coefficients of the specified point in the cycle buffer.
116 *
117 * @param db [i/o] delta cycle buffer
118 * @param cur [in] target point to calculate the delta coefficients
119 */
120 static void
WMP_deltabuf_calc(DeltaBuf * db,int cur)121 WMP_deltabuf_calc(DeltaBuf *db, int cur)
122 {
123 int n, theta, p;
124 float A1, A2, sum;
125 int last_valid_left, last_valid_right;
126
127 for (n = 0; n < db->veclen; n++) {
128 sum = 0.0;
129 last_valid_left = last_valid_right = cur;
130 for (theta = 1; theta <= db->win; theta++) {
131 p = cur - theta;
132 if (p < 0) p += db->len;
133 if (db->is_on[p]) {
134 A1 = db->mfcc[p][n];
135 last_valid_left = p;
136 } else {
137 A1 = db->mfcc[last_valid_left][n];
138 }
139 p = cur + theta;
140 if (p >= db->len) p -= db->len;
141 if (db->is_on[p]) {
142 A2 = db->mfcc[p][n];
143 last_valid_right = p;
144 } else {
145 A2 = db->mfcc[last_valid_right][n];
146 }
147 sum += theta * (A2 - A1);
148 }
149 db->mfcc[cur][db->veclen + n] = sum / db->B;
150 }
151 }
152
153 /**
154 * Store the given MFCC vector into the delta cycle buffer, and compute the
155 * latest delta coefficients.
156 *
157 * @param db [i/o] delta cycle buffer
158 * @param new_mfcc [in] MFCC vector
159 *
160 * @return TRUE if next delta coeff. computed, in that case it is saved
161 * in db->delta[], or FALSE if delta is not yet computed by short of data.
162 */
163 boolean
WMP_deltabuf_proceed(DeltaBuf * db,float * new_mfcc)164 WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc)
165 {
166 int cur;
167 boolean ret;
168
169 /* copy data to store point */
170 memcpy(db->mfcc[db->store], new_mfcc, sizeof(float) * db->veclen);
171 db->is_on[db->store] = TRUE;
172
173 /* get current calculation point */
174 cur = db->store - db->win;
175 if (cur < 0) cur += db->len;
176
177 /* if the current point is fulfilled, compute delta */
178 if (db->is_on[cur]) {
179 WMP_deltabuf_calc(db, cur);
180 db->vec = db->mfcc[cur];
181 ret = TRUE;
182 } else {
183 ret = FALSE;
184 }
185
186 /* move store pointer to next */
187 db->store++;
188 if (db->store >= db->len) db->store -= db->len;
189
190 /* return TRUE if delta computed for current, or -1 if not calculated yet */
191 return (ret);
192 }
193
194 /**
195 * Flush the delta cycle buffer the delta coefficients
196 * left in the cycle buffer.
197 *
198 * @param db [i/o] delta cycle buffer
199 *
200 * @return TRUE if next delta coeff. computed, in that case it is saved
201 * in db->delta[], or FALSE if all delta computation has been flushed and
202 * no data is available.
203 *
204 */
205 boolean
WMP_deltabuf_flush(DeltaBuf * db)206 WMP_deltabuf_flush(DeltaBuf *db)
207 {
208 int cur;
209 boolean ret;
210
211 /* clear store point */
212 db->is_on[db->store] = FALSE;
213
214 /* get current calculation point */
215 cur = db->store - db->win;
216 if (cur < 0) cur += db->len;
217
218 /* if the current point if fulfilled, compute delta */
219 if (db->is_on[cur]) {
220 WMP_deltabuf_calc(db, cur);
221 db->vec = db->mfcc[cur];
222 ret = TRUE;
223 } else {
224 ret = FALSE;
225 }
226
227 /* move store pointer to next */
228 db->store++;
229 if (db->store >= db->len) db->store -= db->len;
230
231 /* return TRUE if delta computed for current, or -1 if not calculated yet */
232 return (ret);
233 }
234
235 /***********************************************************************/
236 /* MAP-CMN */
237 /***********************************************************************/
238
239 /**
240 * Initialize MAP-CMN at startup.
241 *
242 * @param para [in] MFCC computation configuration parameter
243 * @param weight [in] initial cepstral mean weight
244 *
245 */
246 CMNWork *
CMN_realtime_new(Value * para,float weight)247 CMN_realtime_new(Value *para, float weight)
248 {
249 int i;
250
251 CMNWork *c;
252
253 c = (CMNWork *)mymalloc(sizeof(CMNWork));
254
255 c->cweight = weight;
256 c->mfcc_dim = para->mfcc_dim + (para->c0 ? 1 : 0);
257 c->veclen = para->veclen;
258 c->mean = para->cmn ? TRUE : FALSE;
259 c->var = para->cvn ? TRUE : FALSE;
260 c->clist_max = CPSTEP;
261 c->clist_num = 0;
262 c->clist = (CMEAN *)mymalloc(sizeof(CMEAN) * c->clist_max);
263 for(i=0;i<c->clist_max;i++) {
264 c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->veclen);
265 if (c->var) c->clist[i].mfcc_var = (float *)mymalloc(sizeof(float)*c->veclen);
266 c->clist[i].framenum = 0;
267 }
268 c->now.mfcc_sum = (float *)mymalloc(sizeof(float) * c->veclen);
269 if (c->var) c->now.mfcc_var = (float *)mymalloc(sizeof(float) * c->veclen);
270
271 c->cmean_init = (float *)mymalloc(sizeof(float) * c->veclen);
272 if (c->var) c->cvar_init = (float *)mymalloc(sizeof(float) * c->veclen);
273 c->cmean_init_set = FALSE;
274
275 return c;
276 }
277
278 /**
279 * Free work area for real-time CMN.
280 *
281 * @param c [i/o] CMN calculation work area
282 *
283 */
284 void
CMN_realtime_free(CMNWork * c)285 CMN_realtime_free(CMNWork *c)
286 {
287 int i;
288
289 free(c->cmean_init);
290 free(c->now.mfcc_sum);
291 if (c->var) {
292 free(c->cvar_init);
293 free(c->now.mfcc_var);
294 }
295 for(i=0;i<c->clist_max;i++) {
296 if (c->var) free(c->clist[i].mfcc_var);
297 free(c->clist[i].mfcc_sum);
298 }
299 free(c->clist);
300 free(c);
301 }
302
303 /**
304 * Prepare for MAP-CMN at start of each input
305 *
306 * @param c [i/o] CMN calculation work area
307 */
308 void
CMN_realtime_prepare(CMNWork * c)309 CMN_realtime_prepare(CMNWork *c)
310 {
311 int d;
312
313 for(d=0;d<c->veclen;d++) c->now.mfcc_sum[d] = 0.0;
314 if (c->var) {
315 for(d=0;d<c->veclen;d++) c->now.mfcc_var[d] = 0.0;
316 }
317 c->now.framenum = 0;
318 }
319
320 /**
321 * Perform MAP-CMN for incoming MFCC vectors
322 *
323 * @param c [i/o] CMN calculation work area
324 * @param mfcc [in] MFCC vector
325 *
326 */
327 void
CMN_realtime(CMNWork * c,float * mfcc)328 CMN_realtime(CMNWork *c, float *mfcc)
329 {
330 int d;
331 double x, y;
332
333 c->now.framenum++;
334 if (c->cmean_init_set) {
335 /* initial data exists */
336 for(d=0;d<c->veclen;d++) {
337 /* accumulate current MFCC to sum */
338 c->now.mfcc_sum[d] += mfcc[d];
339 /* calculate map-mean */
340 x = c->now.mfcc_sum[d] + c->cweight * c->cmean_init[d];
341 y = (double)c->now.framenum + c->cweight;
342 x /= y;
343 if (c->var) {
344 /* calculate map-var */
345 c->now.mfcc_var[d] += (mfcc[d] - x) * (mfcc[d] - x);
346 }
347 if (c->mean && d < c->mfcc_dim) {
348 /* mean normalization */
349 mfcc[d] -= x;
350 }
351 if (c->var) {
352 /* variance normalization */
353 x = c->now.mfcc_var[d] + c->cweight * c->cvar_init[d];
354 y = (double)c->now.framenum + c->cweight;
355 mfcc[d] /= sqrt(x / y);
356 }
357 }
358 } else {
359 /* no initial data */
360 for(d=0;d<c->veclen;d++) {
361 /* accumulate current MFCC to sum */
362 c->now.mfcc_sum[d] += mfcc[d];
363 /* calculate current mean */
364 x = c->now.mfcc_sum[d] / c->now.framenum;
365 if (c->var) {
366 /* calculate current variance */
367 c->now.mfcc_var[d] += (mfcc[d] - x) * (mfcc[d] - x);
368 }
369 if (c->mean && d < c->mfcc_dim) {
370 /* mean normalization */
371 mfcc[d] -= x;
372 }
373 #if 0 /* not perform variance normalization on no initial data */
374 if (c->var) {
375 /* variance normalization */
376 mfcc[d] /= sqrt(c->now.mfcc_var[d] / c->now.framenum);
377 }
378 #endif
379 }
380 }
381 }
382
383 /**
384 * Update initial cepstral mean from previous utterances for next input.
385 *
386 * @param c [i/o] CMN calculation work area
387 */
388 void
CMN_realtime_update(CMNWork * c,HTK_Param * param)389 CMN_realtime_update(CMNWork *c, HTK_Param *param)
390 {
391 float *tmp, *tmp2;
392 int i, d;
393 int frames;
394
395 /* if CMN_realtime was never called before this, return immediately */
396 /* this may occur by pausing just after startup */
397 if (c->now.framenum == 0) return;
398
399 /* re-calculate variance based on the final mean at the given param */
400 if (c->var && param != NULL) {
401 float m, x;
402 if (param->samplenum != c->now.framenum) {
403 jlog("InternalError: CMN_realtime_update: param->samplenum != c->now.framenum\n");
404 } else if (param->veclen != c->veclen) {
405 jlog("InternalError: CMN_realtime_update: param->veclen != c->veclen\n");
406 } else {
407 for(d=0;d<c->veclen;d++) {
408 m = c->now.mfcc_sum[d] / (float) c->now.framenum;
409 x = 0;
410 for(i=0;i<param->samplenum;i++) {
411 x += (param->parvec[i][d] - m) * (param->parvec[i][d] - m);
412 }
413 c->now.mfcc_var[d] = x;
414 }
415 }
416 }
417
418 /* compute cepstral mean from now and previous sums up to CPMAX frames */
419 for(d=0;d<c->veclen;d++) c->cmean_init[d] = c->now.mfcc_sum[d];
420 if (c->var) {
421 for(d=0;d<c->veclen;d++) c->cvar_init[d] = c->now.mfcc_var[d];
422 }
423 frames = c->now.framenum;
424 for(i=0;i<c->clist_num;i++) {
425 for(d=0;d<c->veclen;d++) c->cmean_init[d] += c->clist[i].mfcc_sum[d];
426 if (c->var) {
427 for(d=0;d<c->veclen;d++) c->cvar_init[d] += c->clist[i].mfcc_var[d];
428 }
429 frames += c->clist[i].framenum;
430 if (frames >= CPMAX) break;
431 }
432 for(d=0;d<c->veclen;d++) c->cmean_init[d] /= (float) frames;
433 if (c->var) {
434 for(d=0;d<c->veclen;d++) c->cvar_init[d] /= (float) frames;
435 }
436
437 c->cmean_init_set = TRUE;
438
439 /* expand clist if neccessary */
440 if (c->clist_num == c->clist_max && frames < CPMAX) {
441 c->clist_max += CPSTEP;
442 c->clist = (CMEAN *)myrealloc(c->clist, sizeof(CMEAN) * c->clist_max);
443 for(i=c->clist_num;i<c->clist_max;i++) {
444 c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->veclen);
445 if (c->var) c->clist[i].mfcc_var = (float *)mymalloc(sizeof(float)*c->veclen);
446 c->clist[i].framenum = 0;
447 }
448 }
449
450 /* shift clist */
451 tmp = c->clist[c->clist_max-1].mfcc_sum;
452 if (c->var) tmp2 = c->clist[c->clist_max-1].mfcc_var;
453 memmove(&(c->clist[1]), &(c->clist[0]), sizeof(CMEAN) * (c->clist_max - 1));
454 c->clist[0].mfcc_sum = tmp;
455 if (c->var) c->clist[0].mfcc_var = tmp2;
456 /* copy now to clist[0] */
457 memcpy(c->clist[0].mfcc_sum, c->now.mfcc_sum, sizeof(float) * c->veclen);
458 if (c->var) memcpy(c->clist[0].mfcc_var, c->now.mfcc_var, sizeof(float) * c->veclen);
459 c->clist[0].framenum = c->now.framenum;
460
461 if (c->clist_num < c->clist_max) c->clist_num++;
462
463 }
464
465 /**
466 * Read binary with byte swap (assume file is Big Endian)
467 *
468 * @param buf [out] data buffer
469 * @param unitbyte [in] size of unit in bytes
470 * @param unitnum [in] number of units to be read
471 * @param fp [in] file pointer
472 *
473 * @return TRUE if required number of units are fully read, FALSE if failed.
474 */
475 static boolean
myread(void * buf,size_t unitbyte,int unitnum,FILE * fp)476 myread(void *buf, size_t unitbyte, int unitnum, FILE *fp)
477 {
478 if (myfread(buf, unitbyte, unitnum, fp) < (size_t)unitnum) {
479 return(FALSE);
480 }
481 #ifndef WORDS_BIGENDIAN
482 swap_bytes(buf, unitbyte, unitnum);
483 #endif
484 return(TRUE);
485 }
486
487 /**
488 * Write binary with byte swap (assume data is Big Endian)
489 *
490 * @param buf [in] data buffer
491 * @param unitbyte [in] size of unit in bytes
492 * @param unitnum [in] number of units to write
493 * @param fd [in] file descriptor
494 *
495 * @return TRUE if required number of units are fully written, FALSE if failed.
496 */
497 static boolean
mywrite(void * buf,size_t unitbyte,size_t unitnum,int fd)498 mywrite(void *buf, size_t unitbyte, size_t unitnum, int fd)
499 {
500 #ifndef WORDS_BIGENDIAN
501 swap_bytes(buf, unitbyte, unitnum);
502 #endif
503 if (write(fd, buf, unitbyte * unitnum) < unitbyte * unitnum) {
504 return(FALSE);
505 }
506 #ifndef WORDS_BIGENDIAN
507 swap_bytes(buf, unitbyte, unitnum);
508 #endif
509 return(TRUE);
510 }
511
512 /**
513 * Load CMN parameter from file. If the number of MFCC dimension in the
514 * file does not match the specified one, an error will occur.
515 *
516 * @param c [i/o] CMN calculation work area
517 * @param filename [in] file name
518 *
519 * @return TRUE on success, FALSE on failure.
520 */
521 boolean
CMN_load_from_file(CMNWork * c,char * filename)522 CMN_load_from_file(CMNWork *c, char *filename)
523 {
524 FILE *fp;
525 int veclen;
526
527 jlog("Stat: wav2mfcc-pipe: reading initial CMN from file \"%s\"\n", filename);
528 if ((fp = fopen_readfile(filename)) == NULL) {
529 jlog("Error: wav2mfcc-pipe: failed to open\n");
530 return(FALSE);
531 }
532 /* read header */
533 if (myread(&veclen, sizeof(int), 1, fp) == FALSE) {
534 jlog("Error: wav2mfcc-pipe: failed to read header\n");
535 fclose_readfile(fp);
536 return(FALSE);
537 }
538 /* check length */
539 if (veclen != c->veclen) {
540 jlog("Error: wav2mfcc-pipe: cepstral dimension mismatch\n");
541 jlog("Error: wav2mfcc-pipe: process = %d, file = %d\n", c->veclen, veclen);
542 fclose_readfile(fp);
543 return(FALSE);
544 }
545 /* read body */
546 if (myread(c->cmean_init, sizeof(float), c->veclen, fp) == FALSE) {
547 jlog("Error: wav2mfcc-pipe: failed to read mean for CMN\n");
548 fclose_readfile(fp);
549 return(FALSE);
550 }
551 if (c->var) {
552 if (myread(c->cvar_init, sizeof(float), c->veclen, fp) == FALSE) {
553 jlog("Error: wav2mfcc-pipe: failed to read variance for CVN\n");
554 fclose_readfile(fp);
555 return(FALSE);
556 }
557 }
558
559 if (fclose_readfile(fp) == -1) {
560 jlog("Error: wav2mfcc-pipe: failed to close\n");
561 return(FALSE);
562 }
563
564 c->cmean_init_set = TRUE;
565 jlog("Stat: wav2mfcc-pipe: read CMN parameter\n");
566
567 return(TRUE);
568 }
569
570 /**
571 * Save the current CMN vector to a file.
572 *
573 * @param c [i/o] CMN calculation work area
574 * @param filename [in] filename to save the data.
575 *
576 * @return TRUE on success, FALSE on failure.
577 */
578 boolean
CMN_save_to_file(CMNWork * c,char * filename)579 CMN_save_to_file(CMNWork *c, char *filename)
580 {
581 int fd;
582
583 jlog("Stat: wav2mfcc-pipe: writing current cepstral data to file \"%s\"\n", filename);
584
585 if ((fd = creat(filename, 0644)) == -1) {
586 jlog("Error: wav2mfcc-pipe: failed to open \"%s\" to write current cepstral data\n", filename);
587 return(FALSE);
588 }
589 /* write header */
590 if (mywrite(&(c->veclen), sizeof(int), 1, fd) == FALSE) {
591 jlog("Error: wav2mfcc-pipe: cannot write header to \"%s\" as current cepstral data\n", filename);
592 close(fd);
593 return(FALSE);
594 }
595 /* write body */
596 if (mywrite(c->cmean_init, sizeof(float), c->veclen, fd) == FALSE) {
597 jlog("Error: wav2mfcc-pipe: cannot write mean to \"%s\" as current cepstral data\n", filename);
598 close(fd);
599 return(FALSE);
600 }
601 if (c->var) {
602 if (mywrite(c->cvar_init, sizeof(float), c->veclen, fd) == FALSE) {
603 jlog("Error: wav2mfcc-pipe: cannot write variance to \"%s\" as current cepstrum\n", filename);
604 close(fd);
605 return(FALSE);
606 }
607 }
608
609 close(fd);
610
611 jlog("Stat: wav2mfcc-pipe: current cepstral data written to \"%s\"\n", filename);
612
613 return(TRUE);
614 }
615
616
617 /***********************************************************************/
618 /* energy normalization and scaling on live input */
619 /***********************************************************************/
620
621 /**
622 * Initialize work area for energy normalization on live input.
623 * This should be called once on startup.
624 *
625 * @param energy [in] energy normalization work area
626 *
627 */
628 void
energy_max_init(ENERGYWork * energy)629 energy_max_init(ENERGYWork *energy)
630 {
631 energy->max = 5.0;
632 }
633
634 /**
635 * Prepare values for energy normalization on live input.
636 * This should be called before each input segment.
637 *
638 * @param energy [in] energy normalization work area
639 * @param para [in] MFCC computation configuration parameter
640 */
641 void
energy_max_prepare(ENERGYWork * energy,Value * para)642 energy_max_prepare(ENERGYWork *energy, Value *para)
643 {
644 energy->max_last = energy->max;
645 energy->min_last = energy->max - (para->silFloor * LOG_TEN) / 10.0;
646 energy->max = 0.0;
647 }
648
649 /**
650 * Peform energy normalization using maximum of last input.
651 *
652 * @param energy [in] energy normalization work area
653 * @param f [in] raw energy
654 * @param para [in] MFCC computation configuration parameter
655 *
656 * @return value of the normalized log energy.
657 */
658 LOGPROB
energy_max_normalize(ENERGYWork * energy,LOGPROB f,Value * para)659 energy_max_normalize(ENERGYWork *energy, LOGPROB f, Value *para)
660 {
661 if (energy->max < f) energy->max = f;
662 if (f < energy->min_last) f = energy->min_last;
663 return(1.0 - (energy->max_last - f) * para->escale);
664 }
665