1 /**
2 * @file gmm.c
3 *
4 * <JA>
5 * @brief GMM �ˤ�����ϴ��Ѥ����VAD
6 *
7 * Gaussian Mixture Model (GMM) ����ư���˻��ꤵ�줿��硤Julius/Julian ��
8 * ����ȯ�ä��Ф��ƥե졼�ऴ�Ȥ˥�������������������ѥ������Ф���.
9 * �����GMM�˴�Ť����ϲ�����ȯ�ø��ڤ���Ӵ��Ѥ��Ѥ�����. �ºݤη���
10 * ��1�ѥ���ǧ���������¹Ԥ��ƥꥢ�륿����˹Ԥʤ�졤��1�ѥ���λ��Ʊ����
11 * ��̤����Ϥ����.
12 *
13 * GMM�Υ��������ˤ� Gaussian pruning �� safe algorithm ���Ѥ���졤
14 * �ƥե졼��ˤ����ƾ�� N �Ĥ�����������������褦�˷������.
15 * �������̾��ǧ���Ѳ�����ǥ�ξ��Ȱۤʤꡤľ���ե졼��ν�̾����
16 * �Ѥ��Ƥ��ʤ�.
17 *
18 * GMM_VAD ������ϡ��嵭�����ϴ��Ѥ˲ä��ơ�short-pause segmentation ��
19 * Ʊ�����Ȥˤ��Ѥ��� VAD ���Ԥ���.
20 * </JA>
21 *
22 * <EN>
23 * @brief Input rejection and VAD using GMM
24 *
25 * When a Gaussian Mixture Model (GMM) is specified on startup, Julius/Julian
26 * will compute the frame-wise likelihoods of each GMM for given inputs,
27 * and produces the accumulated scores for each. Then the input rejection is
28 * determined from the value. Actually, the recognition will be computed
29 * on-line concurrently with the 1st pass, and the result will be got as
30 * soon as the 1st pass ends.
31 *
32 * Gaussian pruning is performed using the safe algorithm in the computation
33 * of GMM scores. In each frame, pruning will be done to fully compute only
34 * the top N Gaussians. The algorithm is slightly simpler than AM computation,
35 * i.e. the score order of the previous frame is not used here.
36 *
37 * When GMM_VAD is defined, a GMM-based VAD will be enabled in addition to
38 * the input rejection, using the scheme of short-pause segmentation.
39 * </EN>
40 *
41 * @author Akinobu LEE
42 * @date Tue Mar 15 05:14:10 2005
43 *
44 * $Revision: 1.4 $
45 *
46 */
47
48 /*
49 * Copyright (c) 2003-2005 Shikano Lab., Nara Institute of Science and Technology
50 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
51 * All rights reserved
52 */
53
54 #include <julius/julius.h>
55
56 #undef MES
57
58 /**
59 * <JA>
60 * Gaussian�Υ���������Ѥ�Gaussian�ꥹ�ȤΤɤΰ��֤��������٤������֤�.
61 *
62 * @param gc [i/o] GMM���ѥ�����ꥢ
63 * @param score [in] ����������������
64 * @param len [in] ���ߤΥꥹ�Ȥ�Ĺ��
65 *
66 * @return �ꥹ�������������
67 * </JA>
68 * <EN>
69 * Return insertion point where a computed Gaussian score should be
70 * inserted in current list of computed Gaussians.
71 *
72 * @param gc [i/o] work area for GMM calculation
73 * @param score [in] a score to be inserted
74 * @param len [in] current length of the list
75 *
76 * @return index to insert the value at the list.
77 * </EN>
78 */
79 static int
gmm_find_insert_point(GMMCalc * gc,LOGPROB score,int len)80 gmm_find_insert_point(GMMCalc *gc, LOGPROB score, int len)
81 {
82 /* binary search on score */
83 int left = 0;
84 int right = len - 1;
85 int mid;
86
87 while (left < right) {
88 mid = (left + right) / 2;
89 if (gc->OP_calced_score[mid] > score) {
90 left = mid + 1;
91 } else {
92 right = mid;
93 }
94 }
95 return(left);
96 }
97
98 /**
99 * <JA>
100 * ����Gaussian�η���̤���Ѥ�Gaussian�ꥹ�Ȥ˳�Ǽ����.
101 *
102 * @param gc [i/o] GMM���ѥ�����ꥢ
103 * @param id [in] Gaussian �� GMM ��Ǥ��ֹ�
104 * @param score [in] ���� Gaussian �η����줿��������
105 * @param len [in] ���ߤΥꥹ�Ȥ�Ĺ���ʸ��߳�Ǽ����Ƥ��� Gaussian �ο���
106 *
107 * @return ��Ǽ��Υꥹ�Ȥ�Ĺ��.
108 * </JA>
109 * <EN>
110 * Store a Gaussian likelihood to the list of computed Gaussians.
111 *
112 * @param gc [i/o] work area for GMM calculation
113 * @param id [in] id of a Gaussian in the GMM to be stored
114 * @param score [in] the likelihood of the Gaussian to be stored
115 * @param len [in] current list length (= current number of Gaussians in cache)
116 *
117 * @return the current length of list after the storing.
118 * </EN>
119 */
120 static int
gmm_cache_push(GMMCalc * gc,int id,LOGPROB score,int len)121 gmm_cache_push(GMMCalc *gc, int id, LOGPROB score, int len)
122 {
123 int insertp;
124
125 if (len == 0) { /* first one */
126 gc->OP_calced_score[0] = score;
127 gc->OP_calced_id[0] = id;
128 return(1);
129 }
130 if (gc->OP_calced_score[len-1] >= score) { /* bottom */
131 if (len < gc->OP_gprune_num) { /* append to bottom */
132 gc->OP_calced_score[len] = score;
133 gc->OP_calced_id[len] = id;
134 len++;
135 }
136 return len;
137 }
138 if (gc->OP_calced_score[0] < score) {
139 insertp = 0;
140 } else {
141 insertp = gmm_find_insert_point(gc, score, len);
142 }
143 if (len < gc->OP_gprune_num) {
144 memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp));
145 memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp));
146 } else if (insertp < len - 1) {
147 memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp - 1));
148 memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp - 1));
149 }
150 gc->OP_calced_score[insertp] = score;
151 gc->OP_calced_id[insertp] = id;
152 if (len < gc->OP_gprune_num) len++;
153 return(len);
154 }
155
156 /**
157 * <JA>
158 * ���ߤΥե졼������ϥ٥��ȥ���Ф��� Gaussian �ν��ϳ�Ψ�������.
159 * Gaussian pruning �ϹԤʤ�ʤ�.
160 *
161 * @param gc [i/o] GMM���ѥ�����ꥢ
162 * @param binfo [in] Gaussian
163 *
164 * @return ���ϳ�Ψ���п���
165 * </JA>
166 * <EN>
167 * Compute an output probability of a Gaussian for the input vector of
168 * current frame. No Gaussian pruning is performed in this function.
169 *
170 * @param gc [i/o] work area for GMM calculation
171 * @param binfo [in] Gaussian
172 *
173 * @return the log output probability.
174 * </EN>
175 */
176 static LOGPROB
gmm_compute_g_base(GMMCalc * gc,HTK_HMM_Dens * binfo)177 gmm_compute_g_base(GMMCalc *gc, HTK_HMM_Dens *binfo)
178 {
179 VECT tmp, x;
180 VECT *mean;
181 VECT *var;
182 VECT *vec = gc->OP_vec;
183 short veclen = gc->OP_veclen;
184
185 if (binfo == NULL) return(LOG_ZERO);
186 mean = binfo->mean;
187 var = binfo->var->vec;
188 tmp = 0.0;
189 for (; veclen > 0; veclen--) {
190 x = *(vec++) - *(mean++);
191 tmp += x * x * *(var++);
192 }
193 return((tmp + binfo->gconst) * -0.5);
194 }
195
196 /**
197 * <JA>
198 * ���ߤΥե졼������ϥ٥��ȥ���Ф��� Gaussian �ν��ϳ�Ψ�������.
199 * �����ˤϸ��ꤷ�����ͤˤ�� safe pruning ��Ԥʤ�.
200 *
201 * @param gc [i/o] GMM���ѥ�����ꥢ
202 * @param binfo [in] Gaussian
203 * @param thres [in] safe pruning �Τ���λ��ꤷ������
204 *
205 * @return ���ϳ�Ψ���п���
206 * </JA>
207 * <EN>
208 * Compute an output probability of a Gaussian for the input vector of
209 * current frame. Safe pruning is performed in this function.
210 *
211 * @param gc [i/o] work area for GMM calculation
212 * @param binfo [in] Gaussian
213 * @param thres [in] pruning threshold for safe pruning
214 *
215 * @return the log output probability.
216 * </EN>
217 */
218 static LOGPROB
gmm_compute_g_safe(GMMCalc * gc,HTK_HMM_Dens * binfo,LOGPROB thres)219 gmm_compute_g_safe(GMMCalc *gc, HTK_HMM_Dens *binfo, LOGPROB thres)
220 {
221 VECT tmp, x;
222 VECT *mean;
223 VECT *var;
224 VECT *vec = gc->OP_vec;
225 short veclen = gc->OP_veclen;
226 VECT fthres = thres * (-2.0);
227
228 if (binfo == NULL) return(LOG_ZERO);
229 mean = binfo->mean;
230 var = binfo->var->vec;
231 tmp = binfo->gconst;
232 for (; veclen > 0; veclen--) {
233 x = *(vec++) - *(mean++);
234 tmp += x * x * *(var++);
235 if (tmp > fthres) return LOG_ZERO;
236 }
237 return(tmp * -0.5);
238 }
239
240 /**
241 * <JA>
242 * GMM���ˤ����� Gaussian pruning �Τ���Υ�����ꥢ����ݤ���
243 *
244 * @param gc [i/o] GMM���ѥ�����ꥢ
245 * @param hmminfo [in] HMM ��¤��
246 * @param prune_num [in] Gaussian pruning �ˤ����Ʒ������̥�����ʬ�ۿ�
247 * </JA>
248 * <EN>
249 * Allocate work area for Gaussian pruning for GMM calculation.
250 *
251 * @param gc [i/o] work area for GMM calculation
252 * @param hmminfo [in] HMM structure
253 * @param prune_num [in] number of top Gaussians to be computed at the pruning
254 * </EN>
255 */
256 static void
gmm_gprune_safe_init(GMMCalc * gc,HTK_HMM_INFO * hmminfo,int prune_num)257 gmm_gprune_safe_init(GMMCalc *gc, HTK_HMM_INFO *hmminfo, int prune_num)
258 {
259 /* store the pruning num to local area */
260 gc->OP_gprune_num = prune_num;
261 /* maximum Gaussian set size = maximum mixture size * nstream */
262 gc->OP_calced_maxnum = hmminfo->maxmixturenum * gc->OP_nstream;
263 /* allocate memory for storing list of currently computed Gaussian in a frame */
264 gc->OP_calced_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->OP_calced_maxnum);
265 gc->OP_calced_id = (int *)mymalloc(sizeof(int) * gc->OP_calced_maxnum);
266 }
267
268 /**
269 * <JA>
270 * @brief ������ʬ�۽�����γƥ�����ʬ�ۤθ��ե졼����Ф�����ϳ�Ψ�������.
271 *
272 * Gaussian pruning �ˤ�ꡤ�ºݤˤϾ�� N �ĤΤߤ��ݾڤ�����꤬�Ԥʤ�졤
273 * ���������㤤������ʬ�ۤϷ�����ʤ�.
274 *
275 * ����̤Ϸ��Ѥ�Gaussian�ꥹ�� (OP_calced_score, OP_calced_id) ��
276 * ��Ǽ�����.
277 *
278 * @param gc [i/o] GMM���ѥ�����ꥢ
279 * @param g [in] ������ʬ�۽���
280 * @param gnum [in] @a g ����
281 * </JA>
282 * <EN>
283 * @brief Compute scores for a set of Gaussians with Gaussian pruning for
284 * the current frame.
285 *
286 * Gaussian pruning will be performed to guarantee only the top N Gaussians
287 * to be fully computed. The results will be stored in the list of
288 * computed Gaussians in OP_calced_score and OP_calced_id.
289 *
290 * @param gc [i/o] work area for GMM calculation
291 * @param g [in] set of Gaussians
292 * @param gnum [in] length of @a g
293 * </EN>
294 */
295 static void
gmm_gprune_safe(GMMCalc * gc,HTK_HMM_Dens ** g,int gnum)296 gmm_gprune_safe(GMMCalc *gc, HTK_HMM_Dens **g, int gnum)
297 {
298 int i, num = 0;
299 LOGPROB score, thres;
300
301 thres = LOG_ZERO;
302 for (i = 0; i < gnum; i++) {
303 if (num < gc->OP_gprune_num) {
304 score = gmm_compute_g_base(gc, g[i]);
305 } else {
306 score = gmm_compute_g_safe(gc, g[i], thres);
307 if (score <= thres) continue;
308 }
309 num = gmm_cache_push(gc, i, score, num);
310 thres = gc->OP_calced_score[num-1];
311 }
312 gc->OP_calced_num = num;
313 }
314
315 /**
316 * <JA>
317 * ����GMM���֤θ��ե졼����Ф�����ϳ�Ψ�������.
318 *
319 * @param gc [i/o] GMM���ѥ�����ꥢ
320 * @param state [in] GMM ����
321 *
322 * @return ���ϳ�Ψ���п�������
323 * </JA>
324 * <EN>
325 * Compute the output probability of a GMM state for the current frame.
326 *
327 * @param gc [i/o] work area for GMM calculation
328 * @param state [in] GMM state
329 *
330 * @return the log probability.
331 * </EN>
332 */
333 static LOGPROB
gmm_calc_mix(GMMCalc * gc,HTK_HMM_State * state)334 gmm_calc_mix(GMMCalc *gc, HTK_HMM_State *state)
335 {
336 int i;
337 LOGPROB logprob, logprobsum;
338 int s;
339 PROB stream_weight;
340
341
342 /* compute Gaussian set */
343 logprobsum = 0.0;
344 for(s=0;s<gc->OP_nstream;s++) {
345 /* set stream weight */
346 if (state->w) stream_weight = state->w->weight[s];
347 else stream_weight = 1.0;
348 /* setup storage pointer for this mixture pdf */
349 gc->OP_vec = gc->OP_vec_stream[s];
350 gc->OP_veclen = gc->OP_veclen_stream[s];
351 /* compute output probabilities */
352 gmm_gprune_safe(gc, state->pdf[s]->b, state->pdf[s]->mix_num);
353 /* computed Gaussians will be set in:
354 score ... OP_calced_score[0..OP_calced_num]
355 id ... OP_calced_id[0..OP_calced_num] */
356 /* sum */
357 for(i=0;i<gc->OP_calced_num;i++) {
358 gc->OP_calced_score[i] += state->pdf[s]->bweight[gc->OP_calced_id[i]];
359 }
360 /* add log probs */
361 logprob = addlog_array(gc->OP_calced_score, gc->OP_calced_num);
362 /* if outprob of a stream is zero, skip this stream */
363 if (logprob <= LOG_ZERO) continue;
364 /* sum all the obtained mixture scores */
365 logprobsum += logprob * stream_weight;
366
367 }
368 if (logprobsum == 0.0) return(LOG_ZERO); /* no valid stream */
369 if (logprobsum <= LOG_ZERO) return(LOG_ZERO); /* lowest == LOG_ZERO */
370 return (logprob * INV_LOG_TEN);
371 }
372
373 /**
374 * <JA>
375 * ���Ϥλ���ե졼��ˤ�����GMM���֤Υ����������ᥤ��ؿ�.
376 *
377 * @param gc [i/o] GMM���ѥ�����ꥢ
378 * @param t [in] ������ե졼��
379 * @param stateinfo [in] GMM����
380 * @param param [in] ���ϥ٥��ȥ����
381 *
382 * @return ���ϳ�Ψ���п�������
383 * </JA>
384 * <EN>
385 * Main function to compute the output probability of a GMM state for
386 * the specified input frame.
387 *
388 * @param gc [i/o] work area for GMM calculation
389 * @param t [in] time frame on which the output probability should be computed
390 * @param stateinfo [in] GMM state
391 * @param param [in] input vector sequence
392 *
393 * @return the log output probability.
394 * </EN>
395 */
396 static LOGPROB
outprob_state_nocache(GMMCalc * gc,int t,HTK_HMM_State * stateinfo,HTK_Param * param)397 outprob_state_nocache(GMMCalc *gc, int t, HTK_HMM_State *stateinfo, HTK_Param *param)
398 {
399 int d, i;
400 /* set global values for outprob functions to access them */
401 for(d=0,i=0;i<gc->OP_nstream;i++) {
402 gc->OP_vec_stream[i] = &(param->parvec[t][d]);
403 d += gc->OP_veclen_stream[i];
404 }
405 return(gmm_calc_mix(gc, stateinfo));
406 }
407
408 /************************************************************************/
409 /* global functions */
410
411 /**
412 * <JA>
413 * GMM�η��Τ���ν����. ��ư���˰��٤����ƤФ��.
414 *
415 * @param recog [i/o] ������
416 * </JA>
417 * <EN>
418 * Initialization for computing GMM likelihoods. This will be called
419 * once on startup.
420 *
421 * @param recog [i/o] engine instance
422 * </EN>
423 *
424 * @callgraph
425 * @callergraph
426 *
427 */
428 boolean
gmm_init(Recog * recog)429 gmm_init(Recog *recog)
430 {
431 HTK_HMM_INFO *gmm;
432 HTK_HMM_Data *d;
433 GMMCalc *gc;
434 int i;
435
436 gmm = recog->gmm;
437
438 /* check GMM format */
439 /* tied-mixture GMM is not supported */
440 if (gmm->is_tied_mixture) {
441 jlog("ERROR: gmm_init: tied-mixture GMM is not supported\n");
442 return FALSE;
443 }
444 /* assume 3 state GMM (only one output state) */
445 for(d=gmm->start;d;d=d->next) {
446 if (d->state_num > 3) {
447 jlog("ERROR: gmm_init: more than three states (one output state) defined in GMM [%s]\n", d->name);
448 return FALSE;
449 }
450 }
451
452 /* check if CMN needed */
453
454 /* allocate work area */
455 if (recog->gc == NULL) {
456 gc = (GMMCalc *)mymalloc(sizeof(GMMCalc));
457 recog->gc = gc;
458 } else {
459 gc = recog->gc;
460 }
461
462 /* allocate buffers */
463 gc->gmm_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gmm->totalhmmnum);
464
465 #ifdef GMM_VAD
466 gc->nframe = recog->jconf->detect.gmm_margin;
467 gc->rates = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->nframe);
468 #endif
469
470 gc->is_voice = (boolean *)mymalloc(sizeof(boolean) * gmm->totalhmmnum);
471 i = 0;
472 if (recog->jconf->reject.gmm_reject_cmn_string) {
473 for(d=recog->gmm->start;d;d=d->next) {
474 if (strstr(recog->jconf->reject.gmm_reject_cmn_string, d->name)) {
475 gc->is_voice[i] = FALSE;
476 } else {
477 gc->is_voice[i] = TRUE;
478 }
479 i++;
480 }
481 } else {
482 for(d=recog->gmm->start;d;d=d->next) {
483 gc->is_voice[i] = TRUE;
484 i++;
485 }
486 }
487
488 /* initialize work area */
489 gc->OP_nstream = gmm->opt.stream_info.num;
490 for(i=0;i<gc->OP_nstream;i++) {
491 gc->OP_veclen_stream[i] = gmm->opt.stream_info.vsize[i];
492 }
493 gmm_gprune_safe_init(gc, gmm, recog->jconf->reject.gmm_gprune_num);
494
495 /* check if variances are inversed */
496 if (!gmm->variance_inversed) {
497 /* here, inverse all variance values for faster computation */
498 htk_hmm_inverse_variances(gmm);
499 gmm->variance_inversed = TRUE;
500 }
501
502 return TRUE;
503 }
504
505 /**
506 * <JA>
507 * GMM���Τ���ν�����Ԥʤ�. �����ϳ��Ϥ��Ȥ˸ƤФ��.
508 *
509 * @param recog [i/o] ������
510 * </JA>
511 * <EN>
512 * Prepare for the next GMM computation. This will be called just before
513 * an input begins.
514 *
515 * @param recog [i/o] engine instance
516 * </EN>
517 *
518 * @callgraph
519 * @callergraph
520 */
521 void
gmm_prepare(Recog * recog)522 gmm_prepare(Recog *recog)
523 {
524 HTK_HMM_Data *d;
525 int i;
526
527 /* initialize score buffer and frame count */
528 i = 0;
529 for(d=recog->gmm->start;d;d=d->next) {
530 recog->gc->gmm_score[i] = 0.0;
531 i++;
532 }
533 #ifdef GMM_VAD
534 for(i=0;i<recog->gc->nframe;i++) recog->gc->rates[i] = 0.0;
535 recog->gc->framep = 0;
536 recog->gc->filled = FALSE;
537 recog->gc->in_voice = FALSE;
538 #endif
539
540 recog->gc->framecount = 0;
541
542 #ifdef GMM_VAD_DEBUG
543 printf("GMM_VAD: init\n");
544 #endif
545 }
546
547 /**
548 * <JA>
549 * Ϳ����줿���ϥ٥��ȥ����Τ���ե졼��ˤĤ��ơ���GMM�Υ������������
550 * ����̤� gmm_score ���ѻ�����.
551 *
552 * GMM_VAD ������ϡ���� VAD Ƚ�ꤹ�뤿��ˡ���� jconf->detect.gmm_margin
553 * �ե졼��ʬ�� VAD ������ �ʲ���GMM�κ��祹���� - ����GMM�κ��祹�����ˤ�
554 * ��¸�����.
555 *
556 * @param recog [i/o] ������
557 * </JA>
558 * <EN>
559 * Compute output probabilities of all GMM for a given input vector, and
560 * accumulate the results to the gmm_score buffer.
561 *
562 * When GMM_VAD is defined, VAD scores,
563 * "(maximum score of speech GMMs) - (maximum score of noise GMMs)" of
564 * last frames (jconf->detect.gmm_margin) will be stored for later VAD
565 * decision.
566 *
567 * @param recog [i/o] engine instance
568 * </EN>
569 *
570 * @callgraph
571 * @callergraph
572 */
573 void
gmm_proceed(Recog * recog)574 gmm_proceed(Recog *recog)
575 {
576 HTK_HMM_Data *d;
577 GMMCalc *gc;
578 int i;
579 MFCCCalc *mfcc;
580 LOGPROB score;
581 #ifdef GMM_VAD
582 LOGPROB max_n;
583 LOGPROB max_v;
584 #endif
585
586 mfcc = recog->gmmmfcc;
587 gc = recog->gc;
588
589 if (!mfcc->valid) return;
590
591 gc->framecount++;
592
593 #ifdef GMM_VAD
594 max_n = max_v = LOG_ZERO;
595 #endif
596
597 i = 0;
598 for(d=recog->gmm->start;d;d=d->next) {
599 score = outprob_state_nocache(gc, mfcc->f, d->s[1], mfcc->param);
600 gc->gmm_score[i] += score;
601 #ifdef GMM_VAD
602 if (gc->is_voice[i]) {
603 if (max_v < score) max_v = score;
604 } else {
605 if (max_n < score) max_n = score;
606 }
607 #endif
608 #ifdef MES
609 jlog("DEBUG: [%s: total=%f avg=%f]\n", d->name, gc->gmm_score[i], gc->gmm_score[i] / (float)gc->framecount);
610 #endif
611 i++;
612 }
613 #ifdef GMM_VAD
614 #ifdef GMM_VAD_DEBUG
615 //printf("GMM_VAD: max_v = %f, max_n = %f, rate = %f\n", max_v, max_n, max_v - max_n, gc->framep);
616 #endif
617 /* set rate of this frame */
618 gc->rates[gc->framep] = max_v - max_n;
619 #ifdef GMM_VAD_DEBUG
620 printf("GMM_VAD: %f\n", max_v - max_n);
621 #endif
622 /* increment current frame pointer */
623 gc->framep++;
624 /* if reached end, go to start point */
625 if (gc->framep >= gc->nframe) {
626 gc->filled = TRUE;
627 gc->framep = 0;
628 }
629 #endif
630 }
631
632 /**
633 * <JA>
634 * @brief GMM�η���λ������̤���Ϥ���.
635 *
636 * gmm_proceed() �ˤ�ä����Ѥ��줿�ƥե졼�ऴ�ȤΥ��������顤
637 * ���祹������GMM����ꤹ��. ���λ����Ψ�˴�Ť������٤����
638 * �ǽ�Ū�ʷ�̤� result_gmm() �ˤ�äƽ��Ϥ���.
639 *
640 * @param recog [i/o] ������
641 * </JA>
642 * <EN>
643 * @brief Finish the GMM computation for an input, and output the result.
644 *
645 * The GMM of the maximum score is finally determined from the accumulated
646 * scores computed by gmm_proceed(), and compute the confidence score of the
647 * maximum GMM using posterior probability. Then the result will be output
648 * using result_gmm().
649 *
650 * @param recog [i/o] engine instance
651 * </EN>
652 *
653 * @callgraph
654 * @callergraph
655 */
656 void
gmm_end(Recog * recog)657 gmm_end(Recog *recog)
658 {
659 HTK_HMM_INFO *gmm;
660 LOGPROB *score;
661 HTK_HMM_Data *d;
662 LOGPROB maxprob;
663 HTK_HMM_Data *dmax;
664 #ifdef CONFIDENCE_MEASURE
665 LOGPROB sum;
666 #endif
667 int i;
668 int maxid;
669
670 if (recog->gc->framecount == 0) return;
671
672 gmm = recog->gmm;
673 score = recog->gc->gmm_score;
674
675 /* get max score */
676 i = 0;
677 maxprob = LOG_ZERO;
678 dmax = NULL;
679 maxid = 0;
680 for(d=gmm->start;d;d=d->next) {
681 if (maxprob < score[i]) {
682 dmax = d;
683 maxprob = score[i];
684 maxid = i;
685 }
686 i++;
687 }
688 recog->gc->max_d = dmax;
689 recog->gc->max_i = maxid;
690
691 #ifdef CONFIDENCE_MEASURE
692 /* compute CM */
693 sum = 0.0;
694 i = 0;
695 for(d=gmm->start;d;d=d->next) {
696 //sum += pow(10, recog->jconf->annotate.cm_alpha * (score[i] - maxprob));
697 sum += pow(10, 0.05 * (score[i] - maxprob));
698 i++;
699 }
700 recog->gc->gmm_max_cm = 1.0 / sum;
701 #endif
702
703 /* output result */
704 callback_exec(CALLBACK_RESULT_GMM, recog);
705
706 }
707
708
709 /**
710 * <JA>
711 * GMM�μ��̷�̡��Ǹ�����Ϥ��������ϤȤ���ͭ���Ǥ��ä���
712 * ̵���Ǥ��ä������֤�.
713 *
714 * @param recog [i/o] ������
715 *
716 * @return ��̤�GMM��̾���� gmm_reject_cmn_string ���̵����� valid �Ȥ���
717 * TRUE, ����� invalid �Ȥ��� FALSE ���֤�.
718 * </JA>
719 * <EN>
720 * Return whether the last input was valid or invalid, from the result of
721 * GMM computation.
722 *
723 * @param recog [i/o] engine instance
724 *
725 * @return TRUE if input is valid, i.e. the name of maximum GMM is not included
726 * in gmm_reject_cmn_string, or FALSE if input is invalid, i.e. the name is
727 * included in that string.
728 * </EN>
729 *
730 * @callgraph
731 * @callergraph
732 */
733 boolean
gmm_valid_input(Recog * recog)734 gmm_valid_input(Recog *recog)
735 {
736 if (recog->gc->max_d == NULL) return FALSE;
737 if (recog->gc->is_voice[recog->gc->max_i]) {
738 return TRUE;
739 }
740 return FALSE;
741 }
742
743 /**
744 * <EN>
745 * Free work area used for GMM calculation.
746 * </EN>
747 * <JA>
748 * GMM�����Ѥ���������ꥢ��������.
749 * </JA>
750 *
751 * @param recog [i/o] engine instance
752 *
753 * @callgraph
754 * @callergraph
755 *
756 */
757 void
gmm_free(Recog * recog)758 gmm_free(Recog *recog)
759 {
760 if (recog->gc) {
761 free(recog->gc->OP_calced_score);
762 free(recog->gc->OP_calced_id);
763 free(recog->gc->is_voice);
764 #ifdef GMM_VAD
765 free(recog->gc->rates);
766 #endif
767 free(recog->gc->gmm_score);
768 free(recog->gc);
769 recog->gc = NULL;
770 }
771 }
772
773 #ifdef GMM_VAD
774
775 /**
776 * <EN>
777 * Compute score of voice activity from the last (jconf->detect.gmm_margin)
778 * frames. Positive value designates speech, and negative means noise.
779 * </EN>
780 * <JA>
781 * ľ���� (jconf->detect.gmm_margin) �ե졼��ʬ�Υ���������
782 * voice activity �Υ������������. �����ͤϲ���������ͤϻ�����ɽ��.
783 * </JA>
784 *
785 * @param gc [i/o] work area for GMM calculation
786 * @param mean_ret [out] mean value of last (jconf->detect.gmm_margin) frames
787 * @param var_ret [out] variance of last (jconf->detect.gmm_margin) frames
788 * @param count_ret [out] count of speech frames in last (jconf->detect.gmm_margin) frames
789 *
790 */
791 static void
voice_activity_score(GMMCalc * gc,float * mean_ret,float * var_ret,int * count_ret)792 voice_activity_score(GMMCalc *gc, float *mean_ret, float *var_ret, int *count_ret)
793 {
794 int i, len;
795 LOGPROB mean;
796 LOGPROB var;
797 LOGPROB x;
798 int count;
799
800 if (!gc->filled) {
801 /* cycle buffer not filled yet */
802 *mean_ret = 0.0;
803 *var_ret = 0.0;
804 *count_ret = 0;
805 return;
806 }
807
808 if (gc->filled) {
809 len = gc->nframe;
810 } else {
811 len = gc->framep;
812 }
813
814 mean = 0;
815 count = 0;
816 for(i=0;i<len;i++) {
817 mean += gc->rates[i];
818 if (gc->rates[i] > 0.0) count++;
819 }
820 mean /= (float)len;
821 var = 0.0;
822 for(i=0;i<len;i++) {
823 x = mean - gc->rates[i];
824 var += x * x;
825 }
826 var /= (float)len;
827
828 *mean_ret = mean;
829 *var_ret = var;
830 *count_ret = count;
831 }
832
833 /**
834 * <EN>
835 * Check if trigger of speech / noise segment. If we are in noise segment
836 * and some speech input begins at this frame, recog->gc->up_trigger will
837 * be set to TRUE. If current is in speech segment and it ended at
838 * this frame, recog->gc->down_trigger will be set to FALSE.
839 * </EN>
840 * <JA>
841 * ����/������֤ζ��ڤ���Τ���. ����ޤǤ�������֤Ǥ��Υե졼���
842 * �����ȥꥬ���Τ����Ȥ���recog->gc->up_trigger �� TRUE �˥��åȤ���. ����
843 * ������֤Ƕ�ֽ�λ���Τ����Ȥ���recog->gc->down_trigger �� TRUE ��
844 * ���åȤ���.
845 * </JA>
846 *
847 * @param recog [i/o] engine instance
848 *
849 * @callgraph
850 * @callergraph
851 */
852 void
gmm_check_trigger(Recog * recog)853 gmm_check_trigger(Recog *recog)
854 {
855 GMMCalc *gc;
856 gc = recog->gc;
857 float mean;
858 float var;
859 int count;
860
861 gc->up_trigger = gc->down_trigger = FALSE;
862
863 voice_activity_score(gc, &mean, &var, &count);
864
865 if (gc->in_voice) {
866 if (mean <= recog->jconf->detect.gmm_downtrigger_thres) {
867 gc->down_trigger = TRUE;
868 gc->in_voice = FALSE;
869 }
870 } else {
871 if (mean >= recog->jconf->detect.gmm_uptrigger_thres) {
872 gc->up_trigger = TRUE;
873 gc->in_voice = TRUE;
874 }
875 }
876
877 #ifdef GMM_VAD_DEBUG
878 printf("GMM_VAD: %s: %f %f %d", gc->in_voice ? "VOICE" : "NOISE", mean, var, count);
879 if (gc->up_trigger) printf(": BEGIN");
880 if (gc->down_trigger) printf(": END");
881 printf("\n");
882 #endif
883
884 }
885 #endif /* GMM_VAD */
886
887 /* end of file */
888