1 /**
2  * @file   wav2mfcc-pipe.c
3  *
4  * <JA>
5  * @brief  �����ȷ����� MFCC ��ħ�̤��Ѵ����� (�ե졼��ñ��)
6  *
7  * �����Ǥ� wav2mfcc.c �δؿ���ե졼��Ʊ���˽������뤿����Ѵ�����
8  * �ؿ���Ǽ����Ƥ��ޤ���ǧ�������������Ϥ�ʿ�Ԥ��ƹԤ���硤�������
9  * �ؿ����Ѥ����ޤ���
10  * </JA>
11  *
12  * <EN>
13  * @brief  Convert speech inputs into MFCC parameter vectors (per input frame)
14  *
15  * There are functions are derived from wav2mfcc.c, to compute
16  * MFCC vectors in per-frame basis.  When performing on-line recognition,
17  * these functions will be used instead of ones in wav2mfcc.c
18  * </EN>
19  *
20  * @author Akinobu LEE
21  * @date   Thu Feb 17 18:12:30 2005
22  *
23  * $Revision: 1.3 $
24  *
25  */
26 /*
27  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
28  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
29  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
30  * All rights reserved
31  */
32 
33 /* wav2mfcc-pipe.c --- split Wav2MFCC to perform per-frame-basis,
34    and also realtime CMN for 1st-pass pipe-lining */
35 
36 /************************************************************************/
37 /*    wav2mfcc.c   Convert Speech file to MFCC_E_D_(Z) file             */
38 /*----------------------------------------------------------------------*/
39 /*    Author    : Yuichiro Nakano                                       */
40 /*                                                                      */
41 /*    Copyright(C) Yuichiro Nakano 1996-1998                            */
42 /*----------------------------------------------------------------------*/
43 /************************************************************************/
44 
45 
46 #include <sent/stddefs.h>
47 #include <sent/mfcc.h>
48 #include <sent/htk_param.h>
49 
50 /***********************************************************************/
51 /**
52  * Allocate a new delta cycle buffer.
53  *
54  * @param veclen [in] length of a vector
55  * @param windowlen [in] window width for computing delta
56  *
57  * @return pointer to newly allocated delta cycle buffer structure.
58  */
59 DeltaBuf *
WMP_deltabuf_new(int veclen,int windowlen)60 WMP_deltabuf_new(int veclen, int windowlen)
61 {
62   int i;
63   DeltaBuf *db;
64 
65   db = (DeltaBuf *)mymalloc(sizeof(DeltaBuf));
66   db->veclen = veclen;
67   db->win = windowlen;
68   db->len = windowlen * 2 + 1;
69   db->mfcc = (float **)mymalloc(sizeof(float *) * db->len);
70   db->is_on = (boolean *) mymalloc(sizeof(boolean) * db->len);
71   for (i=0;i<db->len;i++) {
72     db->mfcc[i] = (float *)mymalloc(sizeof(float) * veclen * 2);
73   }
74   db->B = 0;
75   for(i = 1; i <= windowlen; i++) db->B += i * i;
76   db->B *= 2;
77 
78   return (db);
79 }
80 
81 /**
82  * Destroy the delta cycle buffer.
83  *
84  * @param db [i/o] delta cycle buffer
85  */
86 void
WMP_deltabuf_free(DeltaBuf * db)87 WMP_deltabuf_free(DeltaBuf *db)
88 {
89   int i;
90 
91   for (i=0;i<db->len;i++) {
92     free(db->mfcc[i]);
93   }
94   free(db->is_on);
95   free(db->mfcc);
96   free(db);
97 }
98 
99 /**
100  * Reset and clear the delta cycle buffer.
101  *
102  * @param db [i/o] delta cycle buffer
103  */
104 void
WMP_deltabuf_prepare(DeltaBuf * db)105 WMP_deltabuf_prepare(DeltaBuf *db)
106 {
107   int i;
108   db->store = 0;
109   for (i=0;i<db->len;i++) {
110     db->is_on[i] = FALSE;
111   }
112 }
113 
114 /**
115  * Calculate delta coefficients of the specified point in the cycle buffer.
116  *
117  * @param db [i/o] delta cycle buffer
118  * @param cur [in] target point to calculate the delta coefficients
119  */
120 static void
WMP_deltabuf_calc(DeltaBuf * db,int cur)121 WMP_deltabuf_calc(DeltaBuf *db, int cur)
122 {
123   int n, theta, p;
124   float A1, A2, sum;
125   int last_valid_left, last_valid_right;
126 
127   for (n = 0; n < db->veclen; n++) {
128     sum = 0.0;
129     last_valid_left = last_valid_right = cur;
130     for (theta = 1; theta <= db->win; theta++) {
131       p = cur - theta;
132       if (p < 0) p += db->len;
133       if (db->is_on[p]) {
134 	A1 = db->mfcc[p][n];
135 	last_valid_left = p;
136       } else {
137 	A1 = db->mfcc[last_valid_left][n];
138       }
139       p = cur + theta;
140       if (p >= db->len) p -= db->len;
141       if (db->is_on[p]) {
142 	A2 = db->mfcc[p][n];
143 	last_valid_right = p;
144       } else {
145 	A2 = db->mfcc[last_valid_right][n];
146       }
147       sum += theta * (A2 - A1);
148     }
149     db->mfcc[cur][db->veclen + n] = sum / db->B;
150   }
151 }
152 
153 /**
154  * Store the given MFCC vector into the delta cycle buffer, and compute the
155  * latest delta coefficients.
156  *
157  * @param db [i/o] delta cycle buffer
158  * @param new_mfcc [in] MFCC vector
159  *
160  * @return TRUE if next delta coeff. computed, in that case it is saved
161  * in db->delta[], or FALSE if delta is not yet computed by short of data.
162  */
163 boolean
WMP_deltabuf_proceed(DeltaBuf * db,float * new_mfcc)164 WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc)
165 {
166   int cur;
167   boolean ret;
168 
169   /* copy data to store point */
170   memcpy(db->mfcc[db->store], new_mfcc, sizeof(float) * db->veclen);
171   db->is_on[db->store] = TRUE;
172 
173   /* get current calculation point */
174   cur = db->store - db->win;
175   if (cur < 0) cur += db->len;
176 
177   /* if the current point is fulfilled, compute delta  */
178   if (db->is_on[cur]) {
179     WMP_deltabuf_calc(db, cur);
180     db->vec = db->mfcc[cur];
181     ret = TRUE;
182   } else {
183     ret = FALSE;
184   }
185 
186   /* move store pointer to next */
187   db->store++;
188   if (db->store >= db->len) db->store -= db->len;
189 
190   /* return TRUE if delta computed for current, or -1 if not calculated yet */
191   return (ret);
192 }
193 
194 /**
195  * Flush the delta cycle buffer the delta coefficients
196  * left in the cycle buffer.
197  *
198  * @param db [i/o] delta cycle buffer
199  *
200  * @return TRUE if next delta coeff. computed, in that case it is saved
201  * in db->delta[], or FALSE if all delta computation has been flushed and
202  * no data is available.
203  *
204  */
205 boolean
WMP_deltabuf_flush(DeltaBuf * db)206 WMP_deltabuf_flush(DeltaBuf *db)
207 {
208   int cur;
209   boolean ret;
210 
211   /* clear store point */
212   db->is_on[db->store] = FALSE;
213 
214   /* get current calculation point */
215   cur = db->store - db->win;
216   if (cur < 0) cur += db->len;
217 
218   /* if the current point if fulfilled, compute delta  */
219   if (db->is_on[cur]) {
220     WMP_deltabuf_calc(db, cur);
221     db->vec = db->mfcc[cur];
222     ret = TRUE;
223   } else {
224     ret = FALSE;
225   }
226 
227   /* move store pointer to next */
228   db->store++;
229   if (db->store >= db->len) db->store -= db->len;
230 
231   /* return TRUE if delta computed for current, or -1 if not calculated yet */
232   return (ret);
233 }
234 
235 /***********************************************************************/
236 /* MAP-CMN */
237 /***********************************************************************/
238 
239 /**
240  * Initialize MAP-CMN at startup.
241  *
242  * @param para [in] MFCC computation configuration parameter
243  * @param weight [in] initial cepstral mean weight
244  *
245  */
246 CMNWork *
CMN_realtime_new(Value * para,float weight)247 CMN_realtime_new(Value *para, float weight)
248 {
249   int i;
250 
251   CMNWork *c;
252 
253   c = (CMNWork *)mymalloc(sizeof(CMNWork));
254 
255   c->cweight = weight;
256   c->mfcc_dim = para->mfcc_dim + (para->c0 ? 1 : 0);
257   c->veclen = para->veclen;
258   c->mean = para->cmn ? TRUE : FALSE;
259   c->var = para->cvn ? TRUE : FALSE;
260   c->clist_max = CPSTEP;
261   c->clist_num = 0;
262   c->clist = (CMEAN *)mymalloc(sizeof(CMEAN) * c->clist_max);
263   for(i=0;i<c->clist_max;i++) {
264     c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->veclen);
265     if (c->var) c->clist[i].mfcc_var = (float *)mymalloc(sizeof(float)*c->veclen);
266     c->clist[i].framenum = 0;
267   }
268   c->now.mfcc_sum = (float *)mymalloc(sizeof(float) * c->veclen);
269   if (c->var) c->now.mfcc_var = (float *)mymalloc(sizeof(float) * c->veclen);
270 
271   c->cmean_init = (float *)mymalloc(sizeof(float) * c->veclen);
272   if (c->var) c->cvar_init = (float *)mymalloc(sizeof(float) * c->veclen);
273   c->cmean_init_set = FALSE;
274 
275   return c;
276 }
277 
278 /**
279  * Free work area for real-time CMN.
280  *
281  * @param c [i/o] CMN calculation work area
282  *
283  */
284 void
CMN_realtime_free(CMNWork * c)285 CMN_realtime_free(CMNWork *c)
286 {
287   int i;
288 
289   free(c->cmean_init);
290   free(c->now.mfcc_sum);
291   if (c->var) {
292     free(c->cvar_init);
293     free(c->now.mfcc_var);
294   }
295   for(i=0;i<c->clist_max;i++) {
296     if (c->var) free(c->clist[i].mfcc_var);
297     free(c->clist[i].mfcc_sum);
298   }
299   free(c->clist);
300   free(c);
301 }
302 
303 /**
304  * Prepare for MAP-CMN at start of each input
305  *
306  * @param c [i/o] CMN calculation work area
307  */
308 void
CMN_realtime_prepare(CMNWork * c)309 CMN_realtime_prepare(CMNWork *c)
310 {
311   int d;
312 
313   for(d=0;d<c->veclen;d++) c->now.mfcc_sum[d] = 0.0;
314   if (c->var) {
315     for(d=0;d<c->veclen;d++) c->now.mfcc_var[d] = 0.0;
316   }
317   c->now.framenum = 0;
318 }
319 
320 /**
321  * Perform MAP-CMN for incoming MFCC vectors
322  *
323  * @param c [i/o] CMN calculation work area
324  * @param mfcc [in] MFCC vector
325  *
326  */
327 void
CMN_realtime(CMNWork * c,float * mfcc)328 CMN_realtime(CMNWork *c, float *mfcc)
329 {
330   int d;
331   double x, y;
332 
333   c->now.framenum++;
334   if (c->cmean_init_set) {
335     /* initial data exists */
336     for(d=0;d<c->veclen;d++) {
337       /* accumulate current MFCC to sum */
338       c->now.mfcc_sum[d] += mfcc[d];
339       /* calculate map-mean */
340       x = c->now.mfcc_sum[d] + c->cweight * c->cmean_init[d];
341       y = (double)c->now.framenum + c->cweight;
342       x /= y;
343       if (c->var) {
344 	/* calculate map-var */
345 	c->now.mfcc_var[d] += (mfcc[d] - x) * (mfcc[d] - x);
346       }
347       if (c->mean && d < c->mfcc_dim) {
348 	/* mean normalization */
349 	mfcc[d] -= x;
350       }
351       if (c->var) {
352 	/* variance normalization */
353 	x = c->now.mfcc_var[d] + c->cweight * c->cvar_init[d];
354 	y = (double)c->now.framenum + c->cweight;
355 	mfcc[d] /= sqrt(x / y);
356       }
357     }
358   } else {
359     /* no initial data */
360     for(d=0;d<c->veclen;d++) {
361       /* accumulate current MFCC to sum */
362       c->now.mfcc_sum[d] += mfcc[d];
363       /* calculate current mean */
364       x = c->now.mfcc_sum[d] / c->now.framenum;
365       if (c->var) {
366 	/* calculate current variance */
367 	c->now.mfcc_var[d] += (mfcc[d] - x) * (mfcc[d] - x);
368       }
369       if (c->mean && d < c->mfcc_dim) {
370 	/* mean normalization */
371 	mfcc[d] -= x;
372       }
373 #if 0	   /* not perform variance normalization on no initial data */
374       if (c->var) {
375 	/* variance normalization */
376 	mfcc[d] /= sqrt(c->now.mfcc_var[d] / c->now.framenum);
377       }
378 #endif
379     }
380   }
381 }
382 
383 /**
384  * Update initial cepstral mean from previous utterances for next input.
385  *
386  * @param c [i/o] CMN calculation work area
387  */
388 void
CMN_realtime_update(CMNWork * c,HTK_Param * param)389 CMN_realtime_update(CMNWork *c, HTK_Param *param)
390 {
391   float *tmp, *tmp2;
392   int i, d;
393   int frames;
394 
395   /* if CMN_realtime was never called before this, return immediately */
396   /* this may occur by pausing just after startup */
397   if (c->now.framenum == 0) return;
398 
399   /* re-calculate variance based on the final mean at the given param */
400   if (c->var && param != NULL) {
401     float m, x;
402     if (param->samplenum != c->now.framenum) {
403       jlog("InternalError: CMN_realtime_update: param->samplenum != c->now.framenum\n");
404     } else if (param->veclen != c->veclen) {
405       jlog("InternalError: CMN_realtime_update: param->veclen != c->veclen\n");
406     } else {
407       for(d=0;d<c->veclen;d++) {
408 	m = c->now.mfcc_sum[d] / (float) c->now.framenum;
409 	x = 0;
410 	for(i=0;i<param->samplenum;i++) {
411 	  x += (param->parvec[i][d] - m) * (param->parvec[i][d] - m);
412 	}
413 	c->now.mfcc_var[d] = x;
414       }
415     }
416   }
417 
418   /* compute cepstral mean from now and previous sums up to CPMAX frames */
419   for(d=0;d<c->veclen;d++) c->cmean_init[d] = c->now.mfcc_sum[d];
420   if (c->var) {
421     for(d=0;d<c->veclen;d++) c->cvar_init[d] = c->now.mfcc_var[d];
422   }
423   frames = c->now.framenum;
424   for(i=0;i<c->clist_num;i++) {
425     for(d=0;d<c->veclen;d++) c->cmean_init[d] += c->clist[i].mfcc_sum[d];
426     if (c->var) {
427       for(d=0;d<c->veclen;d++) c->cvar_init[d] += c->clist[i].mfcc_var[d];
428     }
429     frames += c->clist[i].framenum;
430     if (frames >= CPMAX) break;
431   }
432   for(d=0;d<c->veclen;d++) c->cmean_init[d] /= (float) frames;
433   if (c->var) {
434     for(d=0;d<c->veclen;d++) c->cvar_init[d] /= (float) frames;
435   }
436 
437   c->cmean_init_set = TRUE;
438 
439   /* expand clist if neccessary */
440   if (c->clist_num == c->clist_max && frames < CPMAX) {
441     c->clist_max += CPSTEP;
442     c->clist = (CMEAN *)myrealloc(c->clist, sizeof(CMEAN) * c->clist_max);
443     for(i=c->clist_num;i<c->clist_max;i++) {
444       c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->veclen);
445       if (c->var) c->clist[i].mfcc_var = (float *)mymalloc(sizeof(float)*c->veclen);
446       c->clist[i].framenum = 0;
447     }
448   }
449 
450   /* shift clist */
451   tmp = c->clist[c->clist_max-1].mfcc_sum;
452   if (c->var) tmp2 = c->clist[c->clist_max-1].mfcc_var;
453   memmove(&(c->clist[1]), &(c->clist[0]), sizeof(CMEAN) * (c->clist_max - 1));
454   c->clist[0].mfcc_sum = tmp;
455   if (c->var) c->clist[0].mfcc_var = tmp2;
456   /* copy now to clist[0] */
457   memcpy(c->clist[0].mfcc_sum, c->now.mfcc_sum, sizeof(float) * c->veclen);
458   if (c->var) memcpy(c->clist[0].mfcc_var, c->now.mfcc_var, sizeof(float) * c->veclen);
459   c->clist[0].framenum = c->now.framenum;
460 
461   if (c->clist_num < c->clist_max) c->clist_num++;
462 
463 }
464 
465 /**
466  * Read binary with byte swap (assume file is Big Endian)
467  *
468  * @param buf [out] data buffer
469  * @param unitbyte [in] size of unit in bytes
470  * @param unitnum [in] number of units to be read
471  * @param fp [in] file pointer
472  *
473  * @return TRUE if required number of units are fully read, FALSE if failed.
474  */
475 static boolean
myread(void * buf,size_t unitbyte,int unitnum,FILE * fp)476 myread(void *buf, size_t unitbyte, int unitnum, FILE *fp)
477 {
478   if (myfread(buf, unitbyte, unitnum, fp) < (size_t)unitnum) {
479     return(FALSE);
480   }
481 #ifndef WORDS_BIGENDIAN
482   swap_bytes(buf, unitbyte, unitnum);
483 #endif
484   return(TRUE);
485 }
486 
487 /**
488  * Write binary with byte swap (assume data is Big Endian)
489  *
490  * @param buf [in] data buffer
491  * @param unitbyte [in] size of unit in bytes
492  * @param unitnum [in] number of units to write
493  * @param fd [in] file descriptor
494  *
495  * @return TRUE if required number of units are fully written, FALSE if failed.
496  */
497 static boolean
mywrite(void * buf,size_t unitbyte,size_t unitnum,int fd)498 mywrite(void *buf, size_t unitbyte, size_t unitnum, int fd)
499 {
500 #ifndef WORDS_BIGENDIAN
501   swap_bytes(buf, unitbyte, unitnum);
502 #endif
503   if (write(fd, buf, unitbyte * unitnum) < unitbyte * unitnum) {
504     return(FALSE);
505   }
506 #ifndef WORDS_BIGENDIAN
507   swap_bytes(buf, unitbyte, unitnum);
508 #endif
509   return(TRUE);
510 }
511 
512 /**
513  * Load CMN parameter from file.  If the number of MFCC dimension in the
514  * file does not match the specified one, an error will occur.
515  *
516  * @param c [i/o] CMN calculation work area
517  * @param filename [in] file name
518  *
519  * @return TRUE on success, FALSE on failure.
520  */
521 boolean
CMN_load_from_file(CMNWork * c,char * filename)522 CMN_load_from_file(CMNWork *c, char *filename)
523 {
524   FILE *fp;
525   int veclen;
526 
527   jlog("Stat: wav2mfcc-pipe: reading initial CMN from file \"%s\"\n", filename);
528   if ((fp = fopen_readfile(filename)) == NULL) {
529     jlog("Error: wav2mfcc-pipe: failed to open\n");
530     return(FALSE);
531   }
532   /* read header */
533   if (myread(&veclen, sizeof(int), 1, fp) == FALSE) {
534     jlog("Error: wav2mfcc-pipe: failed to read header\n");
535     fclose_readfile(fp);
536     return(FALSE);
537   }
538   /* check length */
539   if (veclen != c->veclen) {
540     jlog("Error: wav2mfcc-pipe: cepstral dimension mismatch\n");
541     jlog("Error: wav2mfcc-pipe: process = %d, file = %d\n", c->veclen, veclen);
542     fclose_readfile(fp);
543     return(FALSE);
544   }
545   /* read body */
546   if (myread(c->cmean_init, sizeof(float), c->veclen, fp) == FALSE) {
547     jlog("Error: wav2mfcc-pipe: failed to read mean for CMN\n");
548     fclose_readfile(fp);
549     return(FALSE);
550   }
551   if (c->var) {
552     if (myread(c->cvar_init, sizeof(float), c->veclen, fp) == FALSE) {
553       jlog("Error: wav2mfcc-pipe: failed to read variance for CVN\n");
554       fclose_readfile(fp);
555       return(FALSE);
556     }
557   }
558 
559   if (fclose_readfile(fp) == -1) {
560     jlog("Error: wav2mfcc-pipe: failed to close\n");
561     return(FALSE);
562   }
563 
564   c->cmean_init_set = TRUE;
565   jlog("Stat: wav2mfcc-pipe: read CMN parameter\n");
566 
567   return(TRUE);
568 }
569 
570 /**
571  * Save the current CMN vector to a file.
572  *
573  * @param c [i/o] CMN calculation work area
574  * @param filename [in] filename to save the data.
575  *
576  * @return TRUE on success, FALSE on failure.
577  */
578 boolean
CMN_save_to_file(CMNWork * c,char * filename)579 CMN_save_to_file(CMNWork *c, char *filename)
580 {
581   int fd;
582 
583   jlog("Stat: wav2mfcc-pipe: writing current cepstral data to file \"%s\"\n", filename);
584 
585   if ((fd = creat(filename, 0644)) == -1) {
586     jlog("Error: wav2mfcc-pipe: failed to open \"%s\" to write current cepstral data\n", filename);
587     return(FALSE);
588   }
589   /* write header */
590   if (mywrite(&(c->veclen), sizeof(int), 1, fd) == FALSE) {
591     jlog("Error: wav2mfcc-pipe: cannot write header to \"%s\" as current cepstral data\n", filename);
592     close(fd);
593     return(FALSE);
594   }
595   /* write body */
596   if (mywrite(c->cmean_init, sizeof(float), c->veclen, fd) == FALSE) {
597     jlog("Error: wav2mfcc-pipe: cannot write mean to \"%s\" as current cepstral data\n", filename);
598     close(fd);
599     return(FALSE);
600   }
601   if (c->var) {
602     if (mywrite(c->cvar_init, sizeof(float), c->veclen, fd) == FALSE) {
603       jlog("Error: wav2mfcc-pipe: cannot write variance to \"%s\" as current cepstrum\n", filename);
604       close(fd);
605       return(FALSE);
606     }
607   }
608 
609   close(fd);
610 
611   jlog("Stat: wav2mfcc-pipe: current cepstral data written to \"%s\"\n", filename);
612 
613   return(TRUE);
614 }
615 
616 
617 /***********************************************************************/
618 /* energy normalization and scaling on live input */
619 /***********************************************************************/
620 
621 /**
622  * Initialize work area for energy normalization on live input.
623  * This should be called once on startup.
624  *
625  * @param energy [in] energy normalization work area
626  *
627  */
628 void
energy_max_init(ENERGYWork * energy)629 energy_max_init(ENERGYWork *energy)
630 {
631   energy->max = 5.0;
632 }
633 
634 /**
635  * Prepare values for energy normalization on live input.
636  * This should be called before each input segment.
637  *
638  * @param energy [in] energy normalization work area
639  * @param para [in] MFCC computation configuration parameter
640  */
641 void
energy_max_prepare(ENERGYWork * energy,Value * para)642 energy_max_prepare(ENERGYWork *energy, Value *para)
643 {
644   energy->max_last = energy->max;
645   energy->min_last = energy->max - (para->silFloor * LOG_TEN) / 10.0;
646   energy->max = 0.0;
647 }
648 
649 /**
650  * Peform energy normalization using maximum of last input.
651  *
652  * @param energy [in] energy normalization work area
653  * @param f [in] raw energy
654  * @param para [in] MFCC computation configuration parameter
655  *
656  * @return value of the normalized log energy.
657  */
658 LOGPROB
energy_max_normalize(ENERGYWork * energy,LOGPROB f,Value * para)659 energy_max_normalize(ENERGYWork *energy, LOGPROB f, Value *para)
660 {
661   if (energy->max < f) energy->max = f;
662   if (f < energy->min_last) f = energy->min_last;
663   return(1.0 - (energy->max_last - f) * para->escale);
664 }
665