1 /**
2  * @file   gmm.c
3  *
4  * <JA>
5  * @brief  GMM �ˤ�����ϴ��Ѥ����VAD
6  *
7  * Gaussian Mixture Model (GMM) ����ư���˻��ꤵ�줿��硤Julius/Julian ��
8  * ����ȯ�ä��Ф��ƥե졼�ऴ�Ȥ˥�������׻������������ѥ��������Ф���.
9  * �����GMM�˴�Ť����ϲ�����ȯ�ø��ڤ���Ӵ��Ѥ��Ѥ�����. �ºݤη׻���
10  * ��1�ѥ���ǧ���������¹Ԥ��ƥꥢ�륿����˹Ԥʤ�졤��1�ѥ���λ��Ʊ����
11  * ��̤����Ϥ����.
12  *
13  * GMM�Υ������׻��ˤ� Gaussian pruning �� safe algorithm ���Ѥ���졤
14  * �ƥե졼��ˤ����ƾ�� N �Ĥ�����������������褦�˷׻������.
15  * �������̾��ǧ���Ѳ�����ǥ�ξ��Ȱۤʤꡤľ���ե졼��ν�̾����
16  * �Ѥ��Ƥ��ʤ�.
17  *
18  * GMM_VAD ������ϡ��嵭�����ϴ��Ѥ˲ä��ơ�short-pause segmentation ��
19  * Ʊ�����Ȥˤ��Ѥ��� VAD ���Ԥ���.
20  * </JA>
21  *
22  * <EN>
23  * @brief  Input rejection and VAD using GMM
24  *
25  * When a Gaussian Mixture Model (GMM) is specified on startup, Julius/Julian
26  * will compute the frame-wise likelihoods of each GMM for given inputs,
27  * and produces the accumulated scores for each.  Then the input rejection is
28  * determined from the value.  Actually, the recognition will be computed
29  * on-line concurrently with the 1st pass, and the result will be got as
30  * soon as the 1st pass ends.
31  *
32  * Gaussian pruning is performed using the safe algorithm in the computation
33  * of GMM scores.  In each frame, pruning will be done to fully compute only
34  * the top N Gaussians.  The algorithm is slightly simpler than AM computation,
35  * i.e. the score order of the previous frame is not used here.
36  *
37  * When GMM_VAD is defined, a GMM-based VAD will be enabled in addition to
38  * the input rejection, using the scheme of short-pause segmentation.
39  * </EN>
40  *
41  * @author Akinobu LEE
42  * @date   Tue Mar 15 05:14:10 2005
43  *
44  * $Revision: 1.4 $
45  *
46  */
47 
48 /*
49  * Copyright (c) 2003-2005 Shikano Lab., Nara Institute of Science and Technology
50  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
51  * All rights reserved
52  */
53 
54 #include <julius/julius.h>
55 
56 #undef MES
57 
58 /**
59  * <JA>
60  * Gaussian�Υ�������׻��Ѥ�Gaussian�ꥹ�ȤΤɤΰ��֤��������٤������֤�.
61  *
62  * @param gc [i/o] GMM�׻��ѥ�����ꥢ
63  * @param score [in] ����������������
64  * @param len [in] ���ߤΥꥹ�Ȥ�Ĺ��
65  *
66  * @return �ꥹ�������������
67  * </JA>
68  * <EN>
69  * Return insertion point where a computed Gaussian score should be
70  * inserted in current list of computed Gaussians.
71  *
72  * @param gc [i/o] work area for GMM calculation
73  * @param score [in] a score to be inserted
74  * @param len [in] current length of the list
75  *
76  * @return index to insert the value at the list.
77  * </EN>
78  */
79 static int
gmm_find_insert_point(GMMCalc * gc,LOGPROB score,int len)80 gmm_find_insert_point(GMMCalc *gc, LOGPROB score, int len)
81 {
82   /* binary search on score */
83   int left = 0;
84   int right = len - 1;
85   int mid;
86 
87   while (left < right) {
88     mid = (left + right) / 2;
89     if (gc->OP_calced_score[mid] > score) {
90       left = mid + 1;
91     } else {
92       right = mid;
93     }
94   }
95   return(left);
96 }
97 
98 /**
99  * <JA>
100  * ����Gaussian�η׻���̤�׻��Ѥ�Gaussian�ꥹ�Ȥ˳�Ǽ����.
101  *
102  * @param gc [i/o] GMM�׻��ѥ�����ꥢ
103  * @param id [in] Gaussian �� GMM ��Ǥ��ֹ�
104  * @param score [in] ���� Gaussian �η׻����줿��������
105  * @param len [in] ���ߤΥꥹ�Ȥ�Ĺ���ʸ��߳�Ǽ����Ƥ��� Gaussian �ο���
106  *
107  * @return ��Ǽ��Υꥹ�Ȥ�Ĺ��.
108  * </JA>
109  * <EN>
110  * Store a Gaussian likelihood to the list of computed Gaussians.
111  *
112  * @param gc [i/o] work area for GMM calculation
113  * @param id [in] id of a Gaussian in the GMM to be stored
114  * @param score [in] the likelihood of the Gaussian to be stored
115  * @param len [in] current list length (= current number of Gaussians in cache)
116  *
117  * @return the current length of list after the storing.
118  * </EN>
119  */
120 static int
gmm_cache_push(GMMCalc * gc,int id,LOGPROB score,int len)121 gmm_cache_push(GMMCalc *gc, int id, LOGPROB score, int len)
122 {
123   int insertp;
124 
125   if (len == 0) {               /* first one */
126     gc->OP_calced_score[0] = score;
127     gc->OP_calced_id[0] = id;
128     return(1);
129   }
130   if (gc->OP_calced_score[len-1] >= score) { /* bottom */
131     if (len < gc->OP_gprune_num) {          /* append to bottom */
132       gc->OP_calced_score[len] = score;
133       gc->OP_calced_id[len] = id;
134       len++;
135     }
136     return len;
137   }
138   if (gc->OP_calced_score[0] < score) {
139     insertp = 0;
140   } else {
141     insertp = gmm_find_insert_point(gc, score, len);
142   }
143   if (len < gc->OP_gprune_num) {
144     memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp));
145     memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp));
146   } else if (insertp < len - 1) {
147     memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp - 1));
148     memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp - 1));
149   }
150   gc->OP_calced_score[insertp] = score;
151   gc->OP_calced_id[insertp] = id;
152   if (len < gc->OP_gprune_num) len++;
153   return(len);
154 }
155 
156 /**
157  * <JA>
158  * ���ߤΥե졼������ϥ٥��ȥ���Ф��� Gaussian �ν��ϳ�Ψ��׻�����.
159  * Gaussian pruning �ϹԤʤ�ʤ�.
160  *
161  * @param gc [i/o] GMM�׻��ѥ�����ꥢ
162  * @param binfo [in] Gaussian
163  *
164  * @return ���ϳ�Ψ���п���
165  * </JA>
166  * <EN>
167  * Compute an output probability of a Gaussian for the input vector of
168  * current frame.  No Gaussian pruning is performed in this function.
169  *
170  * @param gc [i/o] work area for GMM calculation
171  * @param binfo [in] Gaussian
172  *
173  * @return the log output probability.
174  * </EN>
175  */
176 static LOGPROB
gmm_compute_g_base(GMMCalc * gc,HTK_HMM_Dens * binfo)177 gmm_compute_g_base(GMMCalc *gc, HTK_HMM_Dens *binfo)
178 {
179   VECT tmp, x;
180   VECT *mean;
181   VECT *var;
182   VECT *vec = gc->OP_vec;
183   short veclen = gc->OP_veclen;
184 
185   if (binfo == NULL) return(LOG_ZERO);
186   mean = binfo->mean;
187   var = binfo->var->vec;
188   tmp = 0.0;
189   for (; veclen > 0; veclen--) {
190     x = *(vec++) - *(mean++);
191     tmp += x * x * *(var++);
192   }
193   return((tmp + binfo->gconst) * -0.5);
194 }
195 
196 /**
197  * <JA>
198  * ���ߤΥե졼������ϥ٥��ȥ���Ф��� Gaussian �ν��ϳ�Ψ��׻�����.
199  * �׻����ˤϸ��ꤷ�����ͤˤ�� safe pruning ��Ԥʤ�.
200  *
201  * @param gc [i/o] GMM�׻��ѥ�����ꥢ
202  * @param binfo [in] Gaussian
203  * @param thres [in] safe pruning �Τ���λ޴��ꤷ������
204  *
205  * @return ���ϳ�Ψ���п���
206  * </JA>
207  * <EN>
208  * Compute an output probability of a Gaussian for the input vector of
209  * current frame.  Safe pruning is performed in this function.
210  *
211  * @param gc [i/o] work area for GMM calculation
212  * @param binfo [in] Gaussian
213  * @param thres [in] pruning threshold for safe pruning
214  *
215  * @return the log output probability.
216  * </EN>
217  */
218 static LOGPROB
gmm_compute_g_safe(GMMCalc * gc,HTK_HMM_Dens * binfo,LOGPROB thres)219 gmm_compute_g_safe(GMMCalc *gc, HTK_HMM_Dens *binfo, LOGPROB thres)
220 {
221   VECT tmp, x;
222   VECT *mean;
223   VECT *var;
224   VECT *vec = gc->OP_vec;
225   short veclen = gc->OP_veclen;
226   VECT fthres = thres * (-2.0);
227 
228   if (binfo == NULL) return(LOG_ZERO);
229   mean = binfo->mean;
230   var = binfo->var->vec;
231   tmp = binfo->gconst;
232   for (; veclen > 0; veclen--) {
233     x = *(vec++) - *(mean++);
234     tmp += x * x * *(var++);
235     if (tmp > fthres)  return LOG_ZERO;
236   }
237   return(tmp * -0.5);
238 }
239 
240 /**
241  * <JA>
242  * GMM�׻��ˤ����� Gaussian pruning �Τ���Υ�����ꥢ����ݤ���
243  *
244  * @param gc [i/o] GMM�׻��ѥ�����ꥢ
245  * @param hmminfo [in] HMM ��¤��
246  * @param prune_num [in] Gaussian pruning �ˤ����Ʒ׻������̥�����ʬ�ۿ�
247  * </JA>
248  * <EN>
249  * Allocate work area for Gaussian pruning for GMM calculation.
250  *
251  * @param gc [i/o] work area for GMM calculation
252  * @param hmminfo [in] HMM structure
253  * @param prune_num [in] number of top Gaussians to be computed at the pruning
254  * </EN>
255  */
256 static void
gmm_gprune_safe_init(GMMCalc * gc,HTK_HMM_INFO * hmminfo,int prune_num)257 gmm_gprune_safe_init(GMMCalc *gc, HTK_HMM_INFO *hmminfo, int prune_num)
258 {
259   /* store the pruning num to local area */
260   gc->OP_gprune_num = prune_num;
261   /* maximum Gaussian set size = maximum mixture size * nstream */
262   gc->OP_calced_maxnum = hmminfo->maxmixturenum * gc->OP_nstream;
263   /* allocate memory for storing list of currently computed Gaussian in a frame */
264   gc->OP_calced_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->OP_calced_maxnum);
265   gc->OP_calced_id = (int *)mymalloc(sizeof(int) * gc->OP_calced_maxnum);
266 }
267 
268 /**
269  * <JA>
270  * @brief  ������ʬ�۽�����γƥ�����ʬ�ۤθ��ե졼����Ф�����ϳ�Ψ��׻�����.
271  *
272  * Gaussian pruning �ˤ�ꡤ�ºݤˤϾ�� N �ĤΤߤ��ݾڤ���޴��꤬�Ԥʤ�졤
273  * ���������㤤������ʬ�ۤϷ׻�����ʤ�.
274  *
275  * �׻���̤Ϸ׻��Ѥ�Gaussian�ꥹ�� (OP_calced_score, OP_calced_id) ��
276  * ��Ǽ�����.
277  *
278  * @param gc [i/o] GMM�׻��ѥ�����ꥢ
279  * @param g [in] ������ʬ�۽���
280  * @param gnum [in] @a g ��Ĺ��
281  * </JA>
282  * <EN>
283  * @brief  Compute scores for a set of Gaussians with Gaussian pruning for
284  * the current frame.
285  *
286  * Gaussian pruning will be performed to guarantee only the top N Gaussians
287  * to be fully computed.  The results will be stored in the list of
288  * computed Gaussians in OP_calced_score and OP_calced_id.
289  *
290  * @param gc [i/o] work area for GMM calculation
291  * @param g [in] set of Gaussians
292  * @param gnum [in] length of @a g
293  * </EN>
294  */
295 static void
gmm_gprune_safe(GMMCalc * gc,HTK_HMM_Dens ** g,int gnum)296 gmm_gprune_safe(GMMCalc *gc, HTK_HMM_Dens **g, int gnum)
297 {
298   int i, num = 0;
299   LOGPROB score, thres;
300 
301   thres = LOG_ZERO;
302   for (i = 0; i < gnum; i++) {
303     if (num < gc->OP_gprune_num) {
304       score = gmm_compute_g_base(gc, g[i]);
305     } else {
306       score = gmm_compute_g_safe(gc, g[i], thres);
307       if (score <= thres) continue;
308     }
309     num = gmm_cache_push(gc, i, score, num);
310     thres = gc->OP_calced_score[num-1];
311   }
312   gc->OP_calced_num = num;
313 }
314 
315 /**
316  * <JA>
317  * ����GMM���֤θ��ե졼����Ф�����ϳ�Ψ��׻�����.
318  *
319  * @param gc [i/o] GMM�׻��ѥ�����ꥢ
320  * @param state [in] GMM ����
321  *
322  * @return ���ϳ�Ψ���п�������
323  * </JA>
324  * <EN>
325  * Compute the output probability of a GMM state for the current frame.
326  *
327  * @param gc [i/o] work area for GMM calculation
328  * @param state [in] GMM state
329  *
330  * @return the log probability.
331  * </EN>
332  */
333 static LOGPROB
gmm_calc_mix(GMMCalc * gc,HTK_HMM_State * state)334 gmm_calc_mix(GMMCalc *gc, HTK_HMM_State *state)
335 {
336   int i;
337   LOGPROB logprob, logprobsum;
338   int s;
339   PROB stream_weight;
340 
341 
342   /* compute Gaussian set */
343   logprobsum = 0.0;
344   for(s=0;s<gc->OP_nstream;s++) {
345     /* set stream weight */
346     if (state->w) stream_weight = state->w->weight[s];
347     else stream_weight = 1.0;
348     /* setup storage pointer for this mixture pdf */
349     gc->OP_vec = gc->OP_vec_stream[s];
350     gc->OP_veclen = gc->OP_veclen_stream[s];
351     /* compute output probabilities */
352     gmm_gprune_safe(gc, state->pdf[s]->b, state->pdf[s]->mix_num);
353     /* computed Gaussians will be set in:
354        score ... OP_calced_score[0..OP_calced_num]
355        id    ... OP_calced_id[0..OP_calced_num] */
356   /* sum */
357     for(i=0;i<gc->OP_calced_num;i++) {
358       gc->OP_calced_score[i] += state->pdf[s]->bweight[gc->OP_calced_id[i]];
359     }
360     /* add log probs */
361     logprob = addlog_array(gc->OP_calced_score, gc->OP_calced_num);
362     /* if outprob of a stream is zero, skip this stream */
363     if (logprob <= LOG_ZERO) continue;
364     /* sum all the obtained mixture scores */
365     logprobsum += logprob * stream_weight;
366 
367   }
368   if (logprobsum == 0.0) return(LOG_ZERO); /* no valid stream */
369   if (logprobsum <= LOG_ZERO) return(LOG_ZERO);	/* lowest == LOG_ZERO */
370   return (logprob * INV_LOG_TEN);
371 }
372 
373 /**
374  * <JA>
375  * ���Ϥλ���ե졼��ˤ�����GMM���֤Υ����������ᥤ��ؿ�.
376  *
377  * @param gc [i/o] GMM�׻��ѥ�����ꥢ
378  * @param t [in] �׻�����ե졼��
379  * @param stateinfo [in] GMM����
380  * @param param [in] ���ϥ٥��ȥ����
381  *
382  * @return ���ϳ�Ψ���п�������
383  * </JA>
384  * <EN>
385  * Main function to compute the output probability of a GMM state for
386  * the specified input frame.
387  *
388  * @param gc [i/o] work area for GMM calculation
389  * @param t [in] time frame on which the output probability should be computed
390  * @param stateinfo [in] GMM state
391  * @param param [in] input vector sequence
392  *
393  * @return the log output probability.
394  * </EN>
395  */
396 static LOGPROB
outprob_state_nocache(GMMCalc * gc,int t,HTK_HMM_State * stateinfo,HTK_Param * param)397 outprob_state_nocache(GMMCalc *gc, int t, HTK_HMM_State *stateinfo, HTK_Param *param)
398 {
399   int d, i;
400   /* set global values for outprob functions to access them */
401   for(d=0,i=0;i<gc->OP_nstream;i++) {
402     gc->OP_vec_stream[i] = &(param->parvec[t][d]);
403     d += gc->OP_veclen_stream[i];
404   }
405   return(gmm_calc_mix(gc, stateinfo));
406 }
407 
408 /************************************************************************/
409 /* global functions */
410 
411 /**
412  * <JA>
413  * GMM�η׻��Τ���ν����. ��ư���˰��٤����ƤФ��.
414  *
415  * @param recog [i/o] ������������
416  * </JA>
417  * <EN>
418  * Initialization for computing GMM likelihoods.  This will be called
419  * once on startup.
420  *
421  * @param recog [i/o] engine instance
422  * </EN>
423  *
424  * @callgraph
425  * @callergraph
426  *
427  */
428 boolean
gmm_init(Recog * recog)429 gmm_init(Recog *recog)
430 {
431   HTK_HMM_INFO *gmm;
432   HTK_HMM_Data *d;
433   GMMCalc *gc;
434   int i;
435 
436   gmm = recog->gmm;
437 
438   /* check GMM format */
439   /* tied-mixture GMM is not supported */
440   if (gmm->is_tied_mixture) {
441     jlog("ERROR: gmm_init: tied-mixture GMM is not supported\n");
442     return FALSE;
443   }
444   /* assume 3 state GMM (only one output state) */
445   for(d=gmm->start;d;d=d->next) {
446     if (d->state_num > 3) {
447       jlog("ERROR: gmm_init: more than three states (one output state) defined in GMM [%s]\n", d->name);
448       return FALSE;
449     }
450   }
451 
452   /* check if CMN needed */
453 
454   /* allocate work area */
455   if (recog->gc == NULL) {
456     gc = (GMMCalc *)mymalloc(sizeof(GMMCalc));
457     recog->gc = gc;
458   } else {
459     gc = recog->gc;
460   }
461 
462   /* allocate buffers */
463   gc->gmm_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gmm->totalhmmnum);
464 
465 #ifdef GMM_VAD
466   gc->nframe = recog->jconf->detect.gmm_margin;
467   gc->rates = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->nframe);
468 #endif
469 
470   gc->is_voice = (boolean *)mymalloc(sizeof(boolean) * gmm->totalhmmnum);
471   i = 0;
472   if (recog->jconf->reject.gmm_reject_cmn_string) {
473     for(d=recog->gmm->start;d;d=d->next) {
474       if (strstr(recog->jconf->reject.gmm_reject_cmn_string, d->name)) {
475 	gc->is_voice[i] = FALSE;
476       } else {
477 	gc->is_voice[i] = TRUE;
478       }
479       i++;
480     }
481   } else {
482     for(d=recog->gmm->start;d;d=d->next) {
483       gc->is_voice[i] = TRUE;
484       i++;
485     }
486   }
487 
488   /* initialize work area */
489   gc->OP_nstream = gmm->opt.stream_info.num;
490   for(i=0;i<gc->OP_nstream;i++) {
491     gc->OP_veclen_stream[i] = gmm->opt.stream_info.vsize[i];
492   }
493   gmm_gprune_safe_init(gc, gmm, recog->jconf->reject.gmm_gprune_num);
494 
495   /* check if variances are inversed */
496   if (!gmm->variance_inversed) {
497     /* here, inverse all variance values for faster computation */
498     htk_hmm_inverse_variances(gmm);
499     gmm->variance_inversed = TRUE;
500   }
501 
502   return TRUE;
503 }
504 
505 /**
506  * <JA>
507  * GMM�׻��Τ���ν�����Ԥʤ�. �����ϳ��Ϥ��Ȥ˸ƤФ��.
508  *
509  * @param recog [i/o] ������������
510  * </JA>
511  * <EN>
512  * Prepare for the next GMM computation.  This will be called just before
513  * an input begins.
514  *
515  * @param recog [i/o] engine instance
516  * </EN>
517  *
518  * @callgraph
519  * @callergraph
520  */
521 void
gmm_prepare(Recog * recog)522 gmm_prepare(Recog *recog)
523 {
524   HTK_HMM_Data *d;
525   int i;
526 
527   /* initialize score buffer and frame count */
528   i = 0;
529   for(d=recog->gmm->start;d;d=d->next) {
530     recog->gc->gmm_score[i] = 0.0;
531     i++;
532   }
533 #ifdef GMM_VAD
534   for(i=0;i<recog->gc->nframe;i++) recog->gc->rates[i] = 0.0;
535   recog->gc->framep = 0;
536   recog->gc->filled = FALSE;
537   recog->gc->in_voice = FALSE;
538 #endif
539 
540   recog->gc->framecount = 0;
541 
542 #ifdef GMM_VAD_DEBUG
543   printf("GMM_VAD: init\n");
544 #endif
545 }
546 
547 /**
548  * <JA>
549  * Ϳ����줿���ϥ٥��ȥ����Τ���ե졼��ˤĤ��ơ���GMM�Υ�������׻�����
550  * �׻���̤� gmm_score ���ѻ�����.
551  *
552  * GMM_VAD ������ϡ���� VAD Ƚ�ꤹ�뤿��ˡ���� jconf->detect.gmm_margin
553  * �ե졼��ʬ�� VAD ������ �ʲ���GMM�κ��祹���� - ����GMM�κ��祹�����ˤ�
554  * ��¸�����.
555  *
556  * @param recog [i/o] ������������
557  * </JA>
558  * <EN>
559  * Compute output probabilities of all GMM for a given input vector, and
560  * accumulate the results to the gmm_score buffer.
561  *
562  * When GMM_VAD is defined, VAD scores,
563  * "(maximum score of speech GMMs) - (maximum score of noise GMMs)" of
564  * last frames (jconf->detect.gmm_margin) will be stored for later VAD
565  * decision.
566  *
567  * @param recog [i/o] engine instance
568  * </EN>
569  *
570  * @callgraph
571  * @callergraph
572  */
573 void
gmm_proceed(Recog * recog)574 gmm_proceed(Recog *recog)
575 {
576   HTK_HMM_Data *d;
577   GMMCalc *gc;
578   int i;
579   MFCCCalc *mfcc;
580   LOGPROB score;
581 #ifdef GMM_VAD
582   LOGPROB max_n;
583   LOGPROB max_v;
584 #endif
585 
586   mfcc = recog->gmmmfcc;
587   gc = recog->gc;
588 
589   if (!mfcc->valid) return;
590 
591   gc->framecount++;
592 
593 #ifdef GMM_VAD
594   max_n = max_v = LOG_ZERO;
595 #endif
596 
597   i = 0;
598   for(d=recog->gmm->start;d;d=d->next) {
599     score = outprob_state_nocache(gc, mfcc->f, d->s[1], mfcc->param);
600     gc->gmm_score[i] += score;
601 #ifdef GMM_VAD
602     if (gc->is_voice[i]) {
603       if (max_v < score) max_v = score;
604     } else {
605       if (max_n < score) max_n = score;
606     }
607 #endif
608 #ifdef MES
609     jlog("DEBUG: [%s: total=%f avg=%f]\n", d->name, gc->gmm_score[i], gc->gmm_score[i] / (float)gc->framecount);
610 #endif
611     i++;
612   }
613 #ifdef GMM_VAD
614 #ifdef GMM_VAD_DEBUG
615   //printf("GMM_VAD: max_v = %f, max_n = %f, rate = %f\n", max_v, max_n, max_v - max_n, gc->framep);
616 #endif
617   /* set rate of this frame */
618   gc->rates[gc->framep] = max_v - max_n;
619 #ifdef GMM_VAD_DEBUG
620   printf("GMM_VAD: %f\n", max_v - max_n);
621 #endif
622   /* increment current frame pointer */
623   gc->framep++;
624   /* if reached end, go to start point */
625   if (gc->framep >= gc->nframe) {
626     gc->filled = TRUE;
627     gc->framep = 0;
628   }
629 #endif
630 }
631 
632 /**
633  * <JA>
634  * @brief  GMM�η׻���λ������̤���Ϥ���.
635  *
636  * gmm_proceed() �ˤ�ä����Ѥ��줿�ƥե졼�ऴ�ȤΥ��������顤
637  * ���祹������GMM����ꤹ��. ���λ����Ψ�˴�Ť������٤�׻���
638  * �ǽ�Ū�ʷ�̤� result_gmm() �ˤ�äƽ��Ϥ���.
639  *
640  * @param recog [i/o] ������������
641  * </JA>
642  * <EN>
643  * @brief  Finish the GMM computation for an input, and output the result.
644  *
645  * The GMM of the maximum score is finally determined from the accumulated
646  * scores computed by gmm_proceed(), and compute the confidence score of the
647  * maximum GMM using posterior probability.  Then the result will be output
648  * using result_gmm().
649  *
650  * @param recog [i/o] engine instance
651  * </EN>
652  *
653  * @callgraph
654  * @callergraph
655  */
656 void
gmm_end(Recog * recog)657 gmm_end(Recog *recog)
658 {
659   HTK_HMM_INFO *gmm;
660   LOGPROB *score;
661   HTK_HMM_Data *d;
662   LOGPROB maxprob;
663   HTK_HMM_Data *dmax;
664 #ifdef CONFIDENCE_MEASURE
665   LOGPROB sum;
666 #endif
667   int i;
668   int maxid;
669 
670   if (recog->gc->framecount == 0) return;
671 
672   gmm = recog->gmm;
673   score = recog->gc->gmm_score;
674 
675   /* get max score */
676   i = 0;
677   maxprob = LOG_ZERO;
678   dmax = NULL;
679   maxid = 0;
680   for(d=gmm->start;d;d=d->next) {
681     if (maxprob < score[i]) {
682       dmax = d;
683       maxprob = score[i];
684       maxid = i;
685     }
686     i++;
687   }
688   recog->gc->max_d = dmax;
689   recog->gc->max_i = maxid;
690 
691 #ifdef CONFIDENCE_MEASURE
692   /* compute CM */
693   sum = 0.0;
694   i = 0;
695   for(d=gmm->start;d;d=d->next) {
696     //sum += pow(10, recog->jconf->annotate.cm_alpha * (score[i] - maxprob));
697     sum += pow(10, 0.05 * (score[i] - maxprob));
698     i++;
699   }
700   recog->gc->gmm_max_cm = 1.0 / sum;
701 #endif
702 
703   /* output result */
704   callback_exec(CALLBACK_RESULT_GMM, recog);
705 
706 }
707 
708 
709 /**
710  * <JA>
711  * GMM�μ��̷�̡��Ǹ�����Ϥ��������ϤȤ���ͭ���Ǥ��ä���
712  * ̵���Ǥ��ä������֤�.
713  *
714  * @param recog [i/o] ������������
715  *
716  * @return ��̤�GMM��̾���� gmm_reject_cmn_string ���̵����� valid �Ȥ���
717  * TRUE, ����� invalid �Ȥ��� FALSE ���֤�.
718  * </JA>
719  * <EN>
720  * Return whether the last input was valid or invalid, from the result of
721  * GMM computation.
722  *
723  * @param recog [i/o] engine instance
724  *
725  * @return TRUE if input is valid, i.e. the name of maximum GMM is not included
726  * in gmm_reject_cmn_string, or FALSE if input is invalid, i.e. the name is
727  * included in that string.
728  * </EN>
729  *
730  * @callgraph
731  * @callergraph
732  */
733 boolean
gmm_valid_input(Recog * recog)734 gmm_valid_input(Recog *recog)
735 {
736   if (recog->gc->max_d == NULL) return FALSE;
737   if (recog->gc->is_voice[recog->gc->max_i]) {
738     return TRUE;
739   }
740   return FALSE;
741 }
742 
743 /**
744  * <EN>
745  * Free work area used for GMM calculation.
746  * </EN>
747  * <JA>
748  * GMM�׻����Ѥ���������ꥢ��������.
749  * </JA>
750  *
751  * @param recog [i/o] engine instance
752  *
753  * @callgraph
754  * @callergraph
755  *
756  */
757 void
gmm_free(Recog * recog)758 gmm_free(Recog *recog)
759 {
760   if (recog->gc) {
761     free(recog->gc->OP_calced_score);
762     free(recog->gc->OP_calced_id);
763     free(recog->gc->is_voice);
764 #ifdef GMM_VAD
765     free(recog->gc->rates);
766 #endif
767     free(recog->gc->gmm_score);
768     free(recog->gc);
769     recog->gc = NULL;
770   }
771 }
772 
773 #ifdef GMM_VAD
774 
775 /**
776  * <EN>
777  * Compute score of voice activity from the last (jconf->detect.gmm_margin)
778  * frames.  Positive value designates speech, and negative means noise.
779  * </EN>
780  * <JA>
781  * ľ���� (jconf->detect.gmm_margin) �ե졼��ʬ�Υ���������
782  * voice activity �Υ�������׻�����. �����ͤϲ���������ͤϻ�����ɽ��.
783  * </JA>
784  *
785  * @param gc [i/o] work area for GMM calculation
786  * @param mean_ret [out] mean value of last (jconf->detect.gmm_margin) frames
787  * @param var_ret [out] variance of last (jconf->detect.gmm_margin) frames
788  * @param count_ret [out] count of speech frames in last (jconf->detect.gmm_margin) frames
789  *
790  */
791 static void
voice_activity_score(GMMCalc * gc,float * mean_ret,float * var_ret,int * count_ret)792 voice_activity_score(GMMCalc *gc, float *mean_ret, float *var_ret, int *count_ret)
793 {
794   int i, len;
795   LOGPROB mean;
796   LOGPROB var;
797   LOGPROB x;
798   int count;
799 
800   if (!gc->filled) {
801     /* cycle buffer not filled yet */
802     *mean_ret = 0.0;
803     *var_ret = 0.0;
804     *count_ret = 0;
805     return;
806   }
807 
808   if (gc->filled) {
809     len = gc->nframe;
810   } else {
811     len = gc->framep;
812   }
813 
814   mean = 0;
815   count = 0;
816   for(i=0;i<len;i++) {
817     mean += gc->rates[i];
818     if (gc->rates[i] > 0.0) count++;
819   }
820   mean /= (float)len;
821   var = 0.0;
822   for(i=0;i<len;i++) {
823     x = mean - gc->rates[i];
824     var += x * x;
825   }
826   var /= (float)len;
827 
828   *mean_ret = mean;
829   *var_ret = var;
830   *count_ret = count;
831 }
832 
833 /**
834  * <EN>
835  * Check if trigger of speech / noise segment.  If we are in noise segment
836  * and some speech input begins at this frame, recog->gc->up_trigger will
837  * be set to TRUE.  If current is in speech segment and it ended at
838  * this frame, recog->gc->down_trigger will be set to FALSE.
839  * </EN>
840  * <JA>
841  * ����/������֤ζ��ڤ���Τ���. ����ޤǤ�������֤Ǥ��Υե졼���
842  * �����ȥꥬ���Τ����Ȥ���recog->gc->up_trigger �� TRUE �˥��åȤ���. ����
843  * ������֤Ƕ�ֽ�λ���Τ����Ȥ���recog->gc->down_trigger �� TRUE ��
844  * ���åȤ���.
845  * </JA>
846  *
847  * @param recog [i/o] engine instance
848  *
849  * @callgraph
850  * @callergraph
851  */
852 void
gmm_check_trigger(Recog * recog)853 gmm_check_trigger(Recog *recog)
854 {
855   GMMCalc *gc;
856   gc = recog->gc;
857   float mean;
858   float var;
859   int count;
860 
861   gc->up_trigger = gc->down_trigger = FALSE;
862 
863   voice_activity_score(gc, &mean, &var, &count);
864 
865   if (gc->in_voice) {
866     if (mean <= recog->jconf->detect.gmm_downtrigger_thres) {
867       gc->down_trigger = TRUE;
868       gc->in_voice = FALSE;
869     }
870   } else {
871     if (mean >= recog->jconf->detect.gmm_uptrigger_thres) {
872       gc->up_trigger = TRUE;
873       gc->in_voice = TRUE;
874     }
875   }
876 
877 #ifdef GMM_VAD_DEBUG
878   printf("GMM_VAD: %s: %f %f %d", gc->in_voice ? "VOICE" : "NOISE", mean, var, count);
879   if (gc->up_trigger) printf(": BEGIN");
880   if (gc->down_trigger) printf(": END");
881   printf("\n");
882 #endif
883 
884 }
885 #endif /* GMM_VAD */
886 
887 /* end of file */
888