1 /**
2 * @file search_bestfirst_v2.c
3 *
4 * <JA>
5 * @brief ��2�ѥ���Viterbi�黻����Ӳ��⥹������ (�̾���)
6 *
7 * �����Ǥϡ���2�ѥ��ˤ�����õ����β����Viterbi�������ι����黻��
8 * ��ñ��ȤΥȥ�ꥹ��³������Ӳ���Υ���������Ԥ��ؿ�����������
9 * ���ޤ�.
10 *
11 * ñ����³����ñ��ֲ��ǴĶ���¸���ϡ����Τ� nextscan ���르�ꥺ����Ѥ��ޤ�.
12 * ���Υե�������������Ƥ���ؿ��ϡ�config.h �ˤ����� PASS2_STRICT_IWCD
13 * �� define �Ǥ���Ȥ��˻��Ѥ���ޤ�. �դ˾嵭�� undef �Ǥ���Ȥ��ϡ�
14 * search_bestfirst_v1.c �δؿ����Ѥ����ޤ�.
15 *
16 * Backscan �Ǥϡ��ǥ����ǥ������٤�Ż뤷�ơ���ñ��Ȥ�������ñ���
17 * ������ñ��ֲ��ǥ���ƥ����Ȥϲ���Ÿ�����ˤ��٤Ƹ�̩�˷�����ޤ�.
18 * Backscan ��Ԥʤ� search_bestfirst_v1.c ��������� POP ���˹Ԥʤ��Τ�
19 * ��٤ơ������Ǥϲ��������λ��������Τʥ�����������뤿�ᡤ
20 * ���������٤Ϲ⤤. ����������������뤹�٤Ƥβ�����Ф���
21 * (���Ȥ������å�������ʤ�����Ǥ��äƤ�)�ȥ饤�ե���κƷ���Ԥʤ����ᡤ
22 * ���̤� backscan ����٤����礷�ޤ�.
23 * </JA>
24 *
25 * <EN>
26 * @brief Viterbi path update and scoring on the second pass (standard version)
27 *
28 * This file has functions for score calculations on the 2nd pass.
29 * It includes Viterbi path update calculation of a hypothesis, calculations
30 * of scores and word trellis connection at word expansion.
31 *
32 * The cross-word triphone will be computed just at word expansion time,
33 * for precise scoring. This is called "nextscan" altgorithm. These
34 * functions are enabled when PASS2_STRICT_IWCD is DEFINED in config.h.
35 * If undefined, the "backscan" functions in search_bestfirst_v1.c will be
36 * used instead.
37 *
38 * Here in nextscan algorithm, all cross-word context dependencies between
39 * next word and source hypothesis are computed as soon as a new hypotheses
40 * is expanded. As the precise cross-word triphone score is applied on
41 * hypothesis generation with no delay, more accurate search-time score can
42 * be obtained than the delayed backscan method in search_bestfirst_v1.c.
43 * On the other hand, the computational cost grows much by re-calculating
44 * forward score of cross-word triphones for all the generated hypothethes,
45 * even non-promising ones.
46 * </EN>
47 *
48 * @author Akinobu Lee
49 * @date Mon Sep 12 00:58:50 2005
50 *
51 * $Revision: 1.4 $
52 *
53 */
54 /*
55 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
56 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
57 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
58 * All rights reserved
59 */
60
61 /* By "fast" setting (default), search_bestfirst_v1.c is used for faster
62 decoding. Please specify option "--enable-setup=standard" or
63 "--enable-strict-iwcd2" at "./configure" to activate this. */
64
65 #include <julius/julius.h>
66
67 #ifdef PASS2_STRICT_IWCD
68
69 #undef TCD ///< Define if want triphone debug messages
70
71
72 /**********************************************************************/
73 /************ ����Ρ��ɤδ������ ************/
74 /************ Basic functions for hypothesis node handling ************/
75 /**********************************************************************/
76
77 #undef STOCKER_DEBUG
78
79 #ifdef STOCKER_DEBUG
80 static int stocked_num = 0;
81 static int reused_num = 0;
82 static int new_num = 0;
83 static int request_num = 0;
84 #endif
85
86 /**
87 * <JA>
88 * ����Ρ��ɤ�ºݤ˥���夫���������.
89 *
90 * @param node [in] �������
91 * </JA>
92 * <EN>
93 * Free a hypothesis node actually.
94 *
95 * @param node [in] hypothesis node
96 * </EN>
97 */
98 static void
free_node_exec(NODE * node)99 free_node_exec(NODE *node)
100 {
101 if (node == NULL) return;
102 free(node->g);
103 #ifdef GRAPHOUT_PRECISE_BOUNDARY
104 if (node->region->graphout) {
105 free(node->wordend_frame);
106 free(node->wordend_gscore);
107 }
108 #endif
109 free(node);
110 }
111
112 /**
113 * <JA>
114 * ����Ρ��ɤ����Ѥ�λ���ƥꥵ�������Ѥ˥��ȥå�����
115 *
116 * @param node [in] �������
117 * </JA>
118 * <EN>
119 * Stock an unused hypothesis node for recycle.
120 *
121 * @param node [in] hypothesis node
122 * </EN>
123 * @callgraph
124 * @callergraph
125 */
126 void
free_node(NODE * node)127 free_node(NODE *node)
128 {
129 if (node == NULL) return;
130
131 if (node->region->graphout) {
132 if (node->prevgraph != NULL && node->prevgraph->saved == FALSE) {
133 wordgraph_free(node->prevgraph);
134 }
135 }
136
137 /* save to stocker */
138 node->next = node->region->pass2.stocker_root;
139 node->region->pass2.stocker_root = node;
140
141 #ifdef STOCKER_DEBUG
142 stocked_num++;
143 #endif
144 }
145
146 /**
147 * <JA>
148 * �ꥵ�������ѥΡ��ɳ�Ǽ�ˤ���ˤ���.
149 *
150 * @param s [in] stack decoding work area
151 *
152 * </JA>
153 * <EN>
154 * Clear the node stocker for recycle.
155 *
156 * @param s [in] stack decoding work area
157 *
158 * </EN>
159 * @callgraph
160 * @callergraph
161 */
162 void
clear_stocker(StackDecode * s)163 clear_stocker(StackDecode *s)
164 {
165 NODE *node, *tmp;
166 node = s->stocker_root;
167 while(node) {
168 tmp = node->next;
169 free_node_exec(node);
170 node = tmp;
171 }
172 s->stocker_root = NULL;
173
174 #ifdef STOCKER_DEBUG
175 jlog("DEBUG: %d times requested, %d times newly allocated, %d times reused\n", request_num, new_num, reused_num);
176 stocked_num = 0;
177 reused_num = 0;
178 new_num = 0;
179 request_num = 0;
180 #endif
181 }
182
183 /**
184 * <JA>
185 * ����ԡ�����.
186 *
187 * @param dst [out] ���ԡ���β���
188 * @param src [in] ���ԡ����β���
189 *
190 * @return @a dst ���֤�.
191 * </JA>
192 * <EN>
193 * Copy the content of node to another.
194 *
195 * @param dst [out] target hypothesis
196 * @param src [in] source hypothesis
197 *
198 * @return the value of @a dst.
199 * </EN>
200 * @callgraph
201 * @callergraph
202 */
203 NODE *
cpy_node(NODE * dst,NODE * src)204 cpy_node(NODE *dst, NODE *src)
205 {
206 int peseqlen;
207
208 peseqlen = src->region->peseqlen;
209
210 dst->next = src->next;
211 dst->prev = src->prev;
212 memcpy(dst->g, src->g, sizeof(LOGPROB) * peseqlen);
213 memcpy(dst->seq, src->seq, sizeof(WORD_ID) * MAXSEQNUM);
214 #ifdef CM_SEARCH
215 #ifdef CM_MULTIPLE_ALPHA
216 {
217 int w;
218 for(w=0;w<src->seqnum;w++) {
219 memcpy(dst->cmscore[w], src->cmscore[w], sizeof(LOGPROB) * src->region->config->annotate.cm_alpha_num);
220 }
221 }
222 #else
223 memcpy(dst->cmscore, src->cmscore, sizeof(LOGPROB) * MAXSEQNUM);
224 #endif
225 #endif /* CM_SEARCH */
226 dst->seqnum = src->seqnum;
227 dst->score = src->score;
228 dst->bestt = src->bestt;
229 dst->estimated_next_t = src->estimated_next_t;
230 dst->endflag = src->endflag;
231 dst->state = src->state;
232 dst->tre = src->tre;
233 if (src->region->ccd_flag) {
234 dst->last_ph = src->last_ph;
235 dst->last_ph_sp_attached = src->last_ph_sp_attached;
236 }
237 dst->totallscore = src->totallscore;
238 dst->final_g = src->final_g;
239 #ifdef VISUALIZE
240 dst->popnode = src->popnode;
241 #endif
242
243 if (src->region->graphout) {
244 #ifdef GRAPHOUT_PRECISE_BOUNDARY
245 memcpy(dst->wordend_frame, src->wordend_frame, sizeof(short) * peseqlen);
246 memcpy(dst->wordend_gscore, src->wordend_gscore, sizeof(LOGPROB) * peseqlen);
247 #endif
248 dst->prevgraph = src->prevgraph;
249 dst->lastcontext = src->lastcontext;
250 #ifndef GRAPHOUT_PRECISE_BOUNDARY
251 dst->tail_g_score = src->tail_g_score;
252 #endif
253 }
254 return(dst);
255 }
256
257 /**
258 * <JA>
259 * �����ʲ���Ρ��ɤ����դ���. �⤷��Ǽ�ˤ˰������Ѥ���ʤ��ʤä�
260 * �Ρ��ɤ�������Ϥ��������Ѥ���. �ʤ���п����˳���դ���.
261 *
262 * @param r [in] ǧ������������
263 *
264 * @return �����˳���դ���줿����Ρ��ɤؤΥݥ����֤�.
265 * </JA>
266 * <EN>
267 * Allocate a new hypothesis node. If the node stocker is not empty,
268 * the one in the stocker is re-used. Otherwise, allocate as new.
269 *
270 * @param r [in] recognition process instance
271 *
272 * @return pointer to the newly allocated node.
273 * </EN>
274 * @callgraph
275 * @callergraph
276 */
277 NODE *
newnode(RecogProcess * r)278 newnode(RecogProcess *r)
279 {
280 NODE *tmp;
281 int i;
282 int peseqlen;
283
284 peseqlen = r->peseqlen;
285
286 #ifdef STOCKER_DEBUG
287 request_num++;
288 #endif
289 if ((tmp = r->pass2.stocker_root) != NULL) {
290 /* re-use ones in the stocker */
291 r->pass2.stocker_root = tmp->next;
292 #ifdef STOCKER_DEBUG
293 stocked_num--;
294 reused_num++;
295 #endif
296 } else {
297 /* allocate new */
298 tmp = (NODE *)mymalloc(sizeof(NODE));
299 tmp->g = (LOGPROB *)mymalloc(sizeof(LOGPROB) * peseqlen);
300 #ifdef GRAPHOUT_PRECISE_BOUNDARY
301 if (r->graphout) {
302 tmp->wordend_frame = (short *)mymalloc(sizeof(short) * peseqlen);
303 tmp->wordend_gscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * peseqlen);
304 }
305 #endif
306 #ifdef STOCKER_DEBUG
307 new_num++;
308 #endif
309 }
310
311 /* clear the data */
312 /*bzero(tmp,sizeof(NODE));*/
313 tmp->next=NULL;
314 tmp->prev=NULL;
315 tmp->last_ph = NULL;
316 tmp->last_ph_sp_attached = FALSE;
317 if (r->ccd_flag) {
318 tmp->totallscore = LOG_ZERO;
319 }
320 tmp->endflag = FALSE;
321 tmp->seqnum = 0;
322 for(i = 0; i < peseqlen; i++) {
323 tmp->g[i] = LOG_ZERO;
324 }
325 tmp->final_g = LOG_ZERO;
326 #ifdef VISUALIZE
327 tmp->popnode = NULL;
328 #endif
329 if (r->graphout) {
330 tmp->prevgraph = NULL;
331 tmp->lastcontext = NULL;
332 }
333
334 tmp->region = r;
335
336 return(tmp);
337 }
338
339
340 /**********************************************************************/
341 /************ �������ȥ�ꥹŸ�������ٷ� ****************/
342 /************ Expand trellis and update forward score *****************/
343 /**********************************************************************/
344
345 /**
346 * <JA>
347 * 1ñ��ʬ�Υȥ�ꥹ���ѤΥ�����ꥢ�����.
348 *
349 * @param r [in] ǧ������������
350 *
351 * </JA>
352 * <EN>
353 * Allocate work area for trellis computation of a word.
354 *
355 * @param r [in] recognition process instance
356 *
357 * </EN>
358 * @callgraph
359 * @callergraph
360 */
361 void
malloc_wordtrellis(RecogProcess * r)362 malloc_wordtrellis(RecogProcess *r)
363 {
364 int maxwn;
365 StackDecode *dwrk;
366
367 maxwn = r->lm->winfo->maxwn + 10; /* CCD�ˤ����ư���θ */
368 dwrk = &(r->pass2);
369
370 dwrk->wordtrellis[0] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn);
371 dwrk->wordtrellis[1] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn);
372
373 dwrk->g = (LOGPROB *)mymalloc(sizeof(LOGPROB) * r->peseqlen);
374
375 dwrk->phmmlen_max = r->lm->winfo->maxwlen + 2;
376 dwrk->phmmseq = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * dwrk->phmmlen_max);
377 if (r->lm->config->enable_iwsp && r->am->hmminfo->multipath) {
378 dwrk->has_sp = (boolean *)mymalloc(sizeof(boolean) * dwrk->phmmlen_max);
379 } else {
380 dwrk->has_sp = NULL;
381 }
382
383 dwrk->wef = NULL;
384 dwrk->wes = NULL;
385 dwrk->wend_token_frame[0] = NULL;
386 dwrk->wend_token_frame[1] = NULL;
387 dwrk->wend_token_gscore[0] = NULL;
388 dwrk->wend_token_gscore[1] = NULL;
389 #ifdef GRAPHOUT_PRECISE_BOUNDARY
390 if (r->graphout) {
391 dwrk->wef = (short *)mymalloc(sizeof(short) * r->peseqlen);
392 dwrk->wes = (LOGPROB *)mymalloc(sizeof(LOGPROB) * r->peseqlen);
393 dwrk->wend_token_frame[0] = (short *)mymalloc(sizeof(short) * maxwn);
394 dwrk->wend_token_frame[1] = (short *)mymalloc(sizeof(short) * maxwn);
395 dwrk->wend_token_gscore[0] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn);
396 dwrk->wend_token_gscore[1] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn);
397 }
398 #endif
399 }
400
401 /**
402 * <JA>
403 * 1ñ��ʬ�Υȥ�ꥹ���ѤΥ�������ꥢ�����
404 *
405 * </JA>
406 * <EN>
407 * Free the work area for trellis computation of a word.
408 *
409 * </EN>
410 * @callgraph
411 * @callergraph
412 */
413 void
free_wordtrellis(StackDecode * dwrk)414 free_wordtrellis(StackDecode *dwrk)
415 {
416 free(dwrk->wordtrellis[0]);
417 free(dwrk->wordtrellis[1]);
418 free(dwrk->g);
419 free(dwrk->phmmseq);
420 if (dwrk->has_sp) {
421 free(dwrk->has_sp);
422 dwrk->has_sp = NULL;
423 }
424 #ifdef GRAPHOUT_PRECISE_BOUNDARY
425 if (dwrk->wef) {
426 free(dwrk->wef);
427 free(dwrk->wes);
428 free(dwrk->wend_token_frame[0]);
429 free(dwrk->wend_token_frame[1]);
430 free(dwrk->wend_token_gscore[0]);
431 free(dwrk->wend_token_gscore[1]);
432 dwrk->wef = NULL;
433 }
434 #endif
435 }
436
437
438 /**********************************************************************/
439 /************ ��������������ٷ� *******************/
440 /************ Compute forward score of a hypothesis *******************/
441 /**********************************************************************/
442
443 /* Ϳ����줿���ǤΤʤ�� phmmseq[0..phmmlen-1]���Ф���viterbi����Ԥ�.
444 g[0..framelen-1] �Υ����������ͤȤ��� g_new[0..framelen-1]�˹����ͤ�����.
445 ���� least_frame �ޤǤ�scan����. */
446 /* Viterbi computation for the given phoneme sequence 'phmmseq[0..phmmlen-1]'
447 with g[0..framelen-1] as initial values. The results are stored in
448 g_new[0..framelen-1]. Scan should not terminate at least it reaches
449 'least_frame'. */
450 /**
451 * <JA>
452 * Ϳ����줿���Ǥ��¤Ӥ��Ф��� Viterbi ����Ԥ�����������������
453 * �����������Ѵؿ�.
454 *
455 * @param g [in] ���ߤλ��֤��Ȥ�������������
456 * @param g_new [out] ������ο��������������������Ǽ����Хåե�
457 * @param phmmseq [in] ����HMM���¤�
458 * @param has_sp [in] short-pause location
459 * @param phmmlen [in] @a phmmseq ����
460 * @param param [in] ���ϥѥ���
461 * @param framelen [in] ���ϥե졼��Ĺ
462 * @param least_frame [in] �ӡ�������������Υե졼����ʾ�� Viterbi������
463 * @param final_g [in] final g scores
464 * @param wordend_frame_src [in] ���ߤ�ñ�콪ü�ե졼��ȡ�����
465 * @param wordend_frame_dst [out] ������ο�����ñ�콪ü�ե졼��ȡ�����
466 * @param wordend_gscore_src [in] ���ߤ�ñ�콪ü�������ȡ�����
467 * @param wordend_gscore_dst [out] ������ο�����ñ�콪ü�������ȡ�����
468 * @param r [in] recognition process instance
469 * </JA>
470 * <EN>
471 * Generic function to perform Viterbi path updates for given phoneme
472 * sequence.
473 *
474 * @param g [in] current forward scores at each input frame
475 * @param g_new [out] buffer to save the resulting score updates
476 * @param phmmseq [in] phoneme sequence to perform Viterbi
477 * @param has_sp [in] short-pause location
478 * @param phmmlen [in] length of @a phmmseq.
479 * @param param [in] input parameter vector
480 * @param framelen [in] input frame length to compute
481 * @param least_frame [in] Least frame length to force viterbi even with beam
482 * @param final_g [in] final g scores
483 * @param wordend_frame_src [in] current word-end frame tokens
484 * @param wordend_frame_dst [out] buffer to store updated word-end frame tokens
485 * @param wordend_gscore_src [in] current word-end score tokens
486 * @param wordend_gscore_dst [out] buffer to store updated word-end score tokens
487 * @param r [in] recognition process instance
488 *
489 * </EN>
490 */
491 static void
do_viterbi(LOGPROB * g,LOGPROB * g_new,HMM_Logical ** phmmseq,boolean * has_sp,int phmmlen,HTK_Param * param,int framelen,int least_frame,LOGPROB * final_g,short * wordend_frame_src,short * wordend_frame_dst,LOGPROB * wordend_gscore_src,LOGPROB * wordend_gscore_dst,RecogProcess * r)492 do_viterbi(LOGPROB *g, LOGPROB *g_new, HMM_Logical **phmmseq, boolean *has_sp, int phmmlen, HTK_Param *param, int framelen, int least_frame, LOGPROB *final_g, short *wordend_frame_src, short *wordend_frame_dst, LOGPROB *wordend_gscore_src, LOGPROB *wordend_gscore_dst, RecogProcess *r) /* has_sp and final_g is for multipath only */
493 {
494 HMM *whmm; /* HMM */
495 int wordhmmnum; /* length of above */
496 int startt; /* scan start frame */
497 LOGPROB tmpmax,tmpscore; /* variables for Viterbi process */
498 A_CELL *ac;
499 int t,i,j;
500 boolean node_exist_p;
501 int tn; ///< Temporal pointer to current buffer
502 int tl; ///< Temporal pointer to previous buffer
503
504 /* store global values to local for rapid access */
505 StackDecode *dwrk;
506 WORD_INFO *winfo;
507 HTK_HMM_INFO *hmminfo;
508 LOGPROB *framemaxscore;
509 #ifdef SCAN_BEAM
510 LOGPROB scan_beam_thres;
511 #endif
512
513 dwrk = &(r->pass2);
514 winfo = r->lm->winfo;
515 hmminfo = r->am->hmminfo;
516 framemaxscore = r->pass2.framemaxscore;
517 #ifdef SCAN_BEAM
518 scan_beam_thres = r->config->pass2.scan_beam_thres;
519 #endif
520
521
522 #ifdef TCD
523 jlog("DEBUG: scan for:");
524 for (i=0;i<phmmlen;i++) {
525 jlog(" %s", phmmseq[i]->name);
526 }
527 jlog("\n");
528 #endif
529
530 /* ñ��HMM���� */
531 /* make word HMM */
532 whmm = new_make_word_hmm(hmminfo, phmmseq, phmmlen, has_sp);
533 if (whmm == NULL) {
534 j_internal_error("Error: failed to make word hmm\n");
535 }
536 wordhmmnum = whmm->len;
537 if (wordhmmnum >= winfo->maxwn + 10) {
538 j_internal_error("do_viterbi: word too long (>%d)\n", winfo->maxwn + 10);
539 }
540
541 /* scan�������� -> startt��*/
542 /* search for the start frame -> set to startt */
543 for(t = framelen-1; t >=0 ; t--) {
544 if (
545 #ifdef SCAN_BEAM
546 g[t] > framemaxscore[t] - scan_beam_thres &&
547 #endif
548 g[t] > LOG_ZERO) {
549 break;
550 }
551 }
552 if (t < 0) { /* no node has score > LOG_ZERO */
553 /* reset all scores and end */
554 for(t=0;t<framelen;t++) {
555 g_new[t] = LOG_ZERO;
556 #ifdef GRAPHOUT_PRECISE_BOUNDARY
557 if (r->graphout) {
558 wordend_frame_dst[t] = -1;
559 wordend_gscore_dst[t] = LOG_ZERO;
560 }
561 #endif
562 }
563 free_hmm(whmm);
564 return;
565 }
566 startt = t;
567
568 /* �������ʹ�[startt+1..framelen-1] �� g_new[] ��ꥻ�å� */
569 /* clear g_new[] for [startt+1..framelen-1] */
570 for(t=framelen-1;t>startt;t--) {
571 g_new[t] = LOG_ZERO;
572 #ifdef GRAPHOUT_PRECISE_BOUNDARY
573 if (r->graphout) {
574 wordend_frame_dst[t] = -1;
575 wordend_gscore_dst[t] = LOG_ZERO;
576 }
577 #endif
578 }
579
580 /*****************/
581 /* viterbi start */
582 /*****************/
583
584 /* set initial swap buffer */
585 tn = 0; tl = 1;
586
587 #ifdef GRAPHOUT_PRECISE_BOUNDARY
588 if (r->graphout) {
589 for(i=0;i<wordhmmnum;i++) {
590 dwrk->wend_token_frame[tn][i] = -1;
591 dwrk->wend_token_gscore[tn][i] = LOG_ZERO;
592 }
593 }
594 #endif
595
596 if (! hmminfo->multipath) {
597 /* ���� [startt] ����ͤ����� */
598 /* initialize scores on frame [startt] */
599 for(i=0;i<wordhmmnum-1;i++) dwrk->wordtrellis[tn][i] = LOG_ZERO;
600 dwrk->wordtrellis[tn][wordhmmnum-1] = g[startt] + outprob(&(r->am->hmmwrk), startt, &(whmm->state[wordhmmnum-1]), param);
601 g_new[startt] = dwrk->wordtrellis[tn][0];
602 #ifdef GRAPHOUT_PRECISE_BOUNDARY
603 if (r->graphout) {
604 dwrk->wend_token_frame[tn][wordhmmnum-1] = wordend_frame_src[startt];
605 dwrk->wend_token_gscore[tn][wordhmmnum-1] = wordend_gscore_src[startt];
606 wordend_frame_dst[startt] = dwrk->wend_token_frame[tn][0];
607 wordend_gscore_dst[startt] = dwrk->wend_token_gscore[tn][0];
608 }
609 #endif
610 }
611
612 /* �ᥤ��롼��: startt ����Ϥޤ� 0 �˸����ä� Viterbi �� */
613 /* main loop: start from [startt], and compute Viterbi toward [0] */
614 for(t = hmminfo->multipath ? startt : startt - 1; t >= 0; t--) {
615
616 /* wordtrellis�Υ�����ꥢ��å� */
617 /* swap workarea of wordtrellis */
618 i = tn; tn = tl; tl = i;
619
620 node_exist_p = FALSE; /* TRUE if there is at least 1 survived node in this frame */
621
622 if (! hmminfo->multipath) {
623
624 /* ü�ΥΡ��� [t][wordhmmnum-1]�ϡ��������� �� g[]�ι⤤���ˤʤ� */
625 /* the edge node [t][wordhmmnum-1] is either internal transitin or g[] */
626 tmpscore = LOG_ZERO;
627 for (ac=whmm->state[wordhmmnum-1].ac;ac;ac=ac->next) {
628 if (tmpscore < dwrk->wordtrellis[tl][ac->arc] + ac->a) {
629 j = ac->arc;
630 tmpscore = dwrk->wordtrellis[tl][ac->arc] + ac->a;
631 }
632 }
633 if (g[t] > tmpscore) {
634 tmpmax = g[t];
635 #ifdef GRAPHOUT_PRECISE_BOUNDARY
636 if (r->graphout) {
637 dwrk->wend_token_frame[tn][wordhmmnum-1] = wordend_frame_src[t];
638 dwrk->wend_token_gscore[tn][wordhmmnum-1] = wordend_gscore_src[t];
639 }
640 #endif
641 } else {
642 tmpmax = tmpscore;
643 #ifdef GRAPHOUT_PRECISE_BOUNDARY
644 if (r->graphout) {
645 dwrk->wend_token_frame[tn][wordhmmnum-1] = dwrk->wend_token_frame[tl][j];
646 dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->wend_token_gscore[tl][j];
647 }
648 #endif
649 }
650
651 /* ü�ΥΡ��ɤΥ���������٥��ץ����å�: ���������ʤ���Ȥ� */
652 /* check if the edge node is within score envelope */
653 if (
654 #ifdef SCAN_BEAM
655 tmpmax <= framemaxscore[t] - scan_beam_thres ||
656 #endif
657 tmpmax <= LOG_ZERO
658 ) {
659 dwrk->wordtrellis[tn][wordhmmnum-1] = LOG_ZERO;
660 #ifdef GRAPHOUT_PRECISE_BOUNDARY
661 if (r->graphout) {
662 dwrk->wend_token_frame[tn][wordhmmnum-1] = -1;
663 dwrk->wend_token_gscore[tn][wordhmmnum-1] = LOG_ZERO;
664 }
665 #endif
666 } else {
667 node_exist_p = TRUE;
668 dwrk->wordtrellis[tn][wordhmmnum-1] = tmpmax + outprob(&(r->am->hmmwrk), t, &(whmm->state[wordhmmnum-1]), param);
669 }
670
671 }
672
673 /* node[wordhmmnum-2..0]�ˤĤ��ƥȥ�ꥹ��Ÿ�� */
674 /* expand trellis for node [t][wordhmmnum-2..0] */
675 for(i=wordhmmnum-2;i>=0;i--) {
676
677 /* ����ѥ��Ⱥ��ॹ���� tmpmax �Ĥ��� */
678 /* find most likely path and the max score 'tmpmax' */
679 tmpmax = LOG_ZERO;
680 for (ac=whmm->state[i].ac;ac;ac=ac->next) {
681 if (hmminfo->multipath) {
682 if (ac->arc == wordhmmnum-1) tmpscore = g[t];
683 else if (t + 1 > startt) tmpscore = LOG_ZERO;
684 else tmpscore = dwrk->wordtrellis[tl][ac->arc];
685 tmpscore += ac->a;
686 } else {
687 tmpscore = dwrk->wordtrellis[tl][ac->arc] + ac->a;
688 }
689 if (tmpmax < tmpscore) {
690 tmpmax = tmpscore;
691 j = ac->arc;
692 }
693 }
694
695 /* ����������٥��ץ����å�: ���������ʤ���Ȥ� */
696 /* check if score of this node is within the score envelope */
697 if (
698 #ifdef SCAN_BEAM
699 tmpmax <= framemaxscore[t] - scan_beam_thres ||
700 #endif
701 tmpmax <= LOG_ZERO
702 ) {
703 /* invalid node */
704 dwrk->wordtrellis[tn][i] = LOG_ZERO;
705 #ifdef GRAPHOUT_PRECISE_BOUNDARY
706 if (r->graphout) {
707 dwrk->wend_token_frame[tn][i] = -1;
708 dwrk->wend_token_gscore[tn][i] = LOG_ZERO;
709 }
710 #endif
711 } else {
712 /* survived node */
713 node_exist_p = TRUE;
714 dwrk->wordtrellis[tn][i] = tmpmax;
715 if (! hmminfo->multipath || i > 0) {
716 dwrk->wordtrellis[tn][i] += outprob(&(r->am->hmmwrk), t, &(whmm->state[i]), param);
717 }
718 #ifdef GRAPHOUT_PRECISE_BOUNDARY
719 if (r->graphout) {
720 if (hmminfo->multipath) {
721 if (j == wordhmmnum-1) {
722 dwrk->wend_token_frame[tn][i] = wordend_frame_src[t];
723 dwrk->wend_token_gscore[tn][i] = wordend_gscore_src[t];
724 } else {
725 dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j];
726 dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j];
727 }
728 } else {
729 dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j];
730 dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j];
731 }
732 }
733 #endif
734 }
735 } /* end of node loop */
736
737 /* ���� t ��Viterbi����λ. ������������������ g_new[t] �å� */
738 /* Viterbi end for frame [t]. set the new forward score g_new[t] */
739 g_new[t] = dwrk->wordtrellis[tn][0];
740 #ifdef GRAPHOUT_PRECISE_BOUNDARY
741 if (r->graphout) {
742 /* new wordend */
743 wordend_frame_dst[t] = dwrk->wend_token_frame[tn][0];
744 wordend_gscore_dst[t] = dwrk->wend_token_gscore[tn][0];
745 }
746 #endif
747 /* ���ꤵ�줿 least_frame �����ޤ� t ���ʤ�Ǥ��ꡤ���Ĥ��� t �ˤ�����
748 ����������٥��פˤ�ä������Ĥä��Ρ��ɤ���Ĥ�̵���ä����,
749 ���Υե졼��Ƿ����Ǥ��ڤꤽ��ʾ���([0..t-1])�Ϸ����ʤ� */
750 /* if frame 't' already reached the 'least_frame' and no node was
751 survived in this frame (all nodes pruned by score envelope),
752 terminate computation at this frame and do not computer further
753 frame ([0..t-1]). */
754 if (t < least_frame && (!node_exist_p)) {
755 /* crear the rest scores */
756 for (i=t-1;i>=0;i--) {
757 g_new[i] = LOG_ZERO;
758 #ifdef GRAPHOUT_PRECISE_BOUNDARY
759 if (r->graphout) {
760 wordend_frame_dst[i] = -1;
761 wordend_gscore_dst[i] = LOG_ZERO;
762 }
763 #endif
764 }
765 /* terminate loop */
766 break;
767 }
768
769 } /* end of time loop */
770
771 if (hmminfo->multipath) {
772 /* �������������κǽ��ͤ�� (���� 0 ������� 0 �ؤ�����) */
773 /* compute the total forward score (transition from state 0 to frame 0 */
774 if (t < 0) { /* computed till the end */
775 tmpmax = LOG_ZERO;
776 for(ac=whmm->state[0].ac;ac;ac=ac->next) {
777 tmpscore = dwrk->wordtrellis[tn][ac->arc] + ac->a;
778 if (tmpmax < tmpscore) tmpmax = tmpscore;
779 }
780 *final_g = tmpmax;
781 } else {
782 *final_g = LOG_ZERO;
783 }
784 }
785
786 /* free work area */
787 free_hmm(whmm);
788 }
789
790 /**
791 * <JA>
792 * �Ǹ��1���Ǥ��Ф��� Viterbi ����ʤ��.
793 *
794 * @param now [in] Ÿ������ʸ����. �첻�������������������� g[] �ˤ���Ȥ���.
795 * @param new [out] ������������������� g[] �˳�Ǽ�����.
796 * @param lastphone [in] Viterbi����Ԥ�����HMM
797 * @param sp [in] short-pause insertion
798 * @param param [in] ���ϥ٥��ȥ���
799 * @param r [in] ǧ������������
800 * </JA>
801 * <EN>
802 * Proceed Viterbi for the last one phoneme.
803 *
804 * @param now [in] source hypothesis where the forward scores prior to the
805 * last one phone is stored at g[]
806 * @param new [out] the resulting updated forward scores will be saved to g[]
807 * @param lastphone [in] phone HMM for the Viterbi processing
808 * @param sp [in] short-pause insertion
809 * @param param [in] input vectors
810 * @param r [in] recognition process instance
811 * </EN>
812 */
813 static void
do_viterbi_next_word(NODE * now,NODE * new,HMM_Logical * lastphone,boolean sp,HTK_Param * param,RecogProcess * r)814 do_viterbi_next_word(NODE *now, NODE *new, HMM_Logical *lastphone, boolean sp, HTK_Param *param, RecogProcess *r) /* sp is for multipath only */
815 {
816 int t, n;
817 LOGPROB a_value; /* for non multi-path */
818 int peseqlen;
819 boolean multipath;
820 StackDecode *dwrk;
821
822 dwrk = &(r->pass2);
823
824 multipath = r->am->hmminfo->multipath;
825
826 peseqlen = r->peseqlen;
827
828 if (! multipath) {
829
830 /* �⤷Ÿ��������κǸ��ñ��β���Ĺ�� 1 �Ǥ���С����β��Ǥ�
831 ľ���� scan_word �Ƿ�����Ƥ��ʤ�. ���ξ��, now->g[] �˰�����
832 ����ͤ���Ǽ����Ƥ���.
833 �⤷����Ĺ�����ʾ�Ǥ���С�now->g[] �Ϥ��μ����ޤǷ���������
834 �Υ����������äƤ���Τ�,now->g[t] �������ͤ����ꤹ��ɬ�פ����� */
835 /* If the length of last word is 1, it means the last phone was not
836 scanned in the last call of scan_word(). In this case, now->g[]
837 keeps the previous initial value, so start viterbi with the old scores.
838 If the length is more than 1, the now->g[] keeps the values of the
839 scan result till the previous phone, so make initial value
840 considering last transition probability. */
841 if (r->lm->winfo->wlen[now->seq[now->seqnum-1]] > 1) {
842 n = hmm_logical_state_num(lastphone);
843 a_value = (hmm_logical_trans(lastphone))->a[n-2][n-1];
844 for(t=0; t<peseqlen-1; t++) dwrk->g[t] = now->g[t+1] + a_value;
845 dwrk->g[peseqlen-1] = LOG_ZERO;
846 } else {
847 for(t=0; t<peseqlen; t++) dwrk->g[t] = now->g[t];
848 }
849
850 } else {
851
852 for(t=0; t<peseqlen; t++) dwrk->g[t] = now->g[t];
853 dwrk->phmmseq[0] = lastphone;
854 if (r->lm->config->enable_iwsp) dwrk->has_sp[0] = sp;
855
856 }
857
858 do_viterbi(dwrk->g, new->g,
859 multipath ? dwrk->phmmseq : &lastphone,
860 (r->lm->config->enable_iwsp && multipath) ? dwrk->has_sp : NULL,
861 1, param, peseqlen, now->estimated_next_t, &(new->final_g)
862 #ifdef GRAPHOUT_PRECISE_BOUNDARY
863 , now->wordend_frame, new->wordend_frame
864 , now->wordend_gscore, new->wordend_gscore
865 #else
866 , NULL, NULL
867 , NULL, NULL
868 #endif
869 , r
870 );
871
872 #ifdef GRAPHOUT_PRECISE_BOUNDARY
873 if (! multipath) {
874 if (r->graphout) {
875 /* ����� next_word �Ѥ˶��������Ĵ�� */
876 /* proceed word boundary for one step for next_word */
877 new->wordend_frame[r->peseqlen-1] = new->wordend_frame[0];
878 new->wordend_gscore[r->peseqlen-1] = new->wordend_gscore[0];
879 for (t=0;t<r->peseqlen-1;t++) {
880 new->wordend_frame[t] = new->wordend_frame[t+1];
881 new->wordend_gscore[t] = new->wordend_gscore[t+1];
882 }
883 }
884 }
885 #endif
886 }
887
888 /**
889 * <JA>
890 * �Ǹ��1ñ����������ȥ�ꥹ������ơ�ʸ��������������٤�����.
891 *
892 * @param now [i/o] ʸ����
893 * @param param [in] ���ϥѥ�����
894 * @param r [in] ǧ������������
895 * </JA>
896 * <EN>
897 * Compute the forward viterbi for the last word to update forward scores
898 * and ready for word connection.
899 *
900 * @param now [i/o] hypothesis
901 * @param param [in] input parameter vectors
902 * @param r [in] recognition process instance
903 * </EN>
904 * @callgraph
905 * @callergraph
906 */
907 void
scan_word(NODE * now,HTK_Param * param,RecogProcess * r)908 scan_word(NODE *now, HTK_Param *param, RecogProcess *r)
909 {
910 int i,t;
911 WORD_ID word;
912 int phmmlen;
913 HMM_Logical *tailph;
914
915 /* store global values to local for rapid access */
916 WORD_INFO *winfo;
917 HTK_HMM_INFO *hmminfo;
918 int peseqlen;
919 boolean ccd_flag;
920 boolean enable_iwsp; /* multipath */
921 StackDecode *dwrk;
922
923 dwrk = &(r->pass2);
924 winfo = r->lm->winfo;
925 hmminfo = r->am->hmminfo;
926 peseqlen = r->peseqlen;
927 ccd_flag = r->ccd_flag;
928 if (hmminfo->multipath) {
929 enable_iwsp = r->lm->config->enable_iwsp;
930 }
931
932 #ifndef GRAPHOUT_PRECISE_BOUNDARY
933 if (r->graphout) {
934 if (ccd_flag) {
935 now->tail_g_score = now->g[now->bestt];
936 }
937 }
938 #endif
939
940 /* ----------------------- prepare phoneme sequence ------------------ */
941 /* triphone�ʤ���Ƭ��1���ǤϤ����Ǥ��оݳ�(���Ȥ�next_word�Ǥ��) */
942 /* ������1���Ǥϥ���ƥ����Ȥˤ������ä��ִ� */
943 /* with triphone, modify the tail phone of the last word according to the
944 previous word, and do not compute the head phone here (that will be
945 computed later in next_word() */
946 word = now->seq[now->seqnum-1];
947
948 #ifdef TCD
949 jlog("DEBUG: w=");
950 for(i=0;i<winfo->wlen[word];i++) {
951 jlog(" %s",(winfo->wseq[word][i])->name);
952 }
953 if (ccd_flag) {
954 if (now->last_ph != NULL) {
955 jlog(" | %s", (now->last_ph)->name);
956 }
957 }
958 jlog("\n");
959 #endif /* TCD */
960
961 if (ccd_flag) {
962
963 /* the tail triphone of the last word varies by context */
964 if (now->last_ph != NULL) {
965 tailph = get_right_context_HMM(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name, hmminfo);
966 if (tailph == NULL) {
967 /* fallback to the original bi/mono-phone */
968 /* error if the original is pseudo phone (not explicitly defined
969 in hmmdefs/hmmlist) */
970 /* exception: word with 1 phone (triphone may exist in the next expansion */
971 if (winfo->wlen[word] > 1 && winfo->wseq[word][winfo->wlen[word]-1]->is_pseudo){
972 error_missing_right_triphone(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name);
973 }
974
975 tailph = winfo->wseq[word][winfo->wlen[word]-1];
976 }
977 } else {
978 tailph = winfo->wseq[word][winfo->wlen[word]-1];
979 }
980 /* Ĺ��1��ñ��ϼ���nextword�Ǥ�����Ѳ�����ΤǤ����Ǥ�scan���ʤ� */
981 /* do not scan word if the length is 1, as it further varies in the
982 following next_word() */
983 if (winfo->wlen[word] == 1) {
984 now->last_ph = tailph;
985 if (enable_iwsp && hmminfo->multipath) now->last_ph_sp_attached = TRUE;
986 #ifdef GRAPHOUT_PRECISE_BOUNDARY
987 if (r->graphout) {
988 /* ñ�춭�����¾�������� */
989 /* initialize word boundary propagation info */
990 for (t=0;t<peseqlen;t++) {
991 now->wordend_frame[t] = t;
992 now->wordend_gscore[t] = now->g[t];
993 }
994 }
995 #endif
996 #ifdef TCD
997 jlog("DEBUG: suspended as %s\n", (now->last_ph)->name);
998 #endif
999 return;
1000 }
1001
1002 /* scan�ϰϤβ��������� */
1003 /* prepare HMM of the scan range */
1004 phmmlen = winfo->wlen[word] - 1;
1005 if (phmmlen > dwrk->phmmlen_max) {
1006 j_internal_error("scan_word: num of phonemes in a word exceed phmmlenmax (%d) ?\n", dwrk->phmmlen_max);
1007 }
1008 for (i=0;i<phmmlen-1;i++) {
1009 dwrk->phmmseq[i] = winfo->wseq[word][i+1];
1010 }
1011 dwrk->phmmseq[phmmlen-1] = tailph;
1012 if (enable_iwsp && hmminfo->multipath) {
1013 for (i=0;i<phmmlen-1;i++) dwrk->has_sp[i] = FALSE;
1014 dwrk->has_sp[phmmlen-1] = TRUE;
1015 }
1016
1017 } else { /* ~ccd_flag */
1018
1019 phmmlen = winfo->wlen[word];
1020 for (i=0;i<phmmlen;i++) dwrk->phmmseq[i] = winfo->wseq[word][i];
1021 if (enable_iwsp && hmminfo->multipath) {
1022 for (i=0;i<phmmlen;i++) dwrk->has_sp[i] = FALSE;
1023 dwrk->has_sp[phmmlen-1] = TRUE;
1024 }
1025
1026 }
1027
1028 /* ����g[]�ä������Ƥ��� */
1029 /* temporally keeps the original g[] */
1030 for (t=0;t<peseqlen;t++) dwrk->g[t] = now->g[t];
1031
1032 #ifdef GRAPHOUT_PRECISE_BOUNDARY
1033 if (r->graphout) {
1034 /* ñ�춭�����¾�������� */
1035 /* initialize word boundary propagation info */
1036 for (t=0;t<peseqlen;t++) {
1037 dwrk->wef[t] = t;
1038 dwrk->wes[t] = now->g[t];
1039 }
1040 }
1041 #endif
1042
1043 /* viterbi��¹Ԥ��� g[] ���� now->g[] ������ */
1044 /* do viterbi computation for phmmseq from g[] to now->g[] */
1045 do_viterbi(dwrk->g, now->g, dwrk->phmmseq, (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL,
1046 phmmlen, param, peseqlen, now->estimated_next_t, &(now->final_g)
1047 #ifdef GRAPHOUT_PRECISE_BOUNDARY
1048 /* ñ�춭������ we[] ���� now->wordend_frame[] ������ */
1049 /* propagate word boundary info from we[] to now->wordend_frame[] */
1050 , dwrk->wef, now->wordend_frame
1051 , dwrk->wes, now->wordend_gscore
1052 #else
1053 , NULL, NULL
1054 , NULL, NULL
1055 #endif
1056 , r
1057 );
1058 #ifdef GRAPHOUT_PRECISE_BOUNDARY
1059 if (! hmminfo->multipath) {
1060 if (r->graphout) {
1061 /* ����� next_word �Ѥ˶��������Ĵ�� */
1062 /* proceed word boundary for one step for next_word */
1063 now->wordend_frame[peseqlen-1] = now->wordend_frame[0];
1064 now->wordend_gscore[peseqlen-1] = now->wordend_gscore[0];
1065 for (t=0;t<peseqlen-1;t++) {
1066 now->wordend_frame[t] = now->wordend_frame[t+1];
1067 now->wordend_gscore[t] = now->wordend_gscore[t+1];
1068 }
1069 }
1070 }
1071 #endif
1072
1073 if (ccd_flag) {
1074 /* �������� now->last_ph �� */
1075 /* update 'now->last_ph' for future scan_word() */
1076 now->last_ph = winfo->wseq[word][0];
1077 if (enable_iwsp && hmminfo->multipath) now->last_ph_sp_attached = FALSE; /* wlen > 1 here */
1078 #ifdef TCD
1079 jlog("DEBUG: last_ph = %s\n", (now->last_ph)->name);
1080 #endif
1081 }
1082 }
1083
1084
1085 /**************************************************************************/
1086 /*** �������Ÿ���ȥҥ塼�ꥹ�ƥ��å���Ҥ������Υ�������� ***/
1087 /*** Expand new hypothesis and compute the total score (with heuristic) ***/
1088 /**************************************************************************/
1089
1090 /**
1091 * <JA>
1092 * Ÿ��������˼�ñ�����³���ƿ������������������. ��ñ���ñ��ȥ�ꥹ���
1093 * ���������������³�����ᡤ���⥹�����������.
1094 *
1095 * @param now [in] Ÿ��������
1096 * @param new [out] �������������줿���⤬��Ǽ�����
1097 * @param nword [in] ��³���뼡ñ��ξ���
1098 * @param param [in] ���ϥѥ�����
1099 * @param r [in] ǧ������������
1100 * </JA>
1101 * <EN>
1102 * Connect a new word to generate a next hypothesis. The optimal connection
1103 * point and new sentence score of the new hypothesis will be estimated by
1104 * looking up the corresponding words on word trellis.
1105 *
1106 * @param now [in] source hypothesis
1107 * @param new [out] pointer to save the newly generated hypothesis
1108 * @param nword [in] next word to be connected
1109 * @param param [in] input parameter vector
1110 * @param r [in] recognition process instance
1111 * </EN>
1112 * @callgraph
1113 * @callergraph
1114 */
1115 void
next_word(NODE * now,NODE * new,NEXTWORD * nword,HTK_Param * param,RecogProcess * r)1116 next_word(NODE *now, NODE *new, NEXTWORD *nword, HTK_Param *param, RecogProcess *r)
1117 {
1118 HMM_Logical *lastphone, *newphone;
1119 LOGPROB *g_src;
1120 int t;
1121 int lastword;
1122 int i;
1123 LOGPROB a_value;
1124 LOGPROB tmpp;
1125 int startt;
1126 int word;
1127 TRELLIS_ATOM *tre;
1128 LOGPROB totalscore;
1129 BACKTRELLIS *backtrellis;
1130 WORD_INFO *winfo;
1131 HTK_HMM_INFO *hmminfo;
1132 int peseqlen;
1133 boolean ccd_flag;
1134 StackDecode *dwrk;
1135
1136 dwrk = &(r->pass2);
1137 backtrellis = r->backtrellis;
1138 winfo = r->lm->winfo;
1139 hmminfo = r->am->hmminfo;
1140 peseqlen = r->peseqlen;
1141 ccd_flag = r->ccd_flag;
1142
1143 word = nword->id;
1144 lastword = now->seq[now->seqnum-1];
1145
1146 /* lastphone (ľ��ñ�����Ƭ����) ����� */
1147 /* prepare lastphone (head phone of previous word) */
1148 if (ccd_flag) {
1149 /* �ǽ����� triphone ����³ñ��˲�碌���Ѳ� */
1150 /* modify triphone of last phone according to the next word */
1151 lastphone = get_left_context_HMM(now->last_ph, winfo->wseq[word][winfo->wlen[word]-1]->name, hmminfo);
1152 if (lastphone == NULL) {
1153 /* fallback to the original bi/mono-phone */
1154 /* error if the original is pseudo phone (not explicitly defined
1155 in hmmdefs/hmmlist) */
1156 /* exception: word with 1 phone (triphone may exist in the next expansion */
1157 if (now->last_ph->is_pseudo){
1158 error_missing_left_triphone(now->last_ph, winfo->wseq[word][winfo->wlen[word]-1]->name);
1159 }
1160 lastphone = now->last_ph;
1161 }
1162 }
1163
1164 /* newphone (��³ñ�����������) ����� */
1165 /* prepare newphone (tail phone of next word) */
1166 if (ccd_flag) {
1167 newphone = get_right_context_HMM(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name, hmminfo);
1168 if (newphone == NULL) {
1169 /* fallback to the original bi/mono-phone */
1170 /* error if the original is pseudo phone (not explicitly defined
1171 in hmmdefs/hmmlist) */
1172 /* exception: word with 1 phone (triphone may exist in the next expansion */
1173 if (winfo->wlen[word] > 1 && winfo->wseq[word][winfo->wlen[word]-1]->is_pseudo){
1174 error_missing_right_triphone(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name);
1175 }
1176 newphone = winfo->wseq[word][winfo->wlen[word]-1];
1177 }
1178 } else {
1179 newphone = winfo->wseq[word][winfo->wlen[word]-1];
1180 }
1181
1182 /* ñ���¤ӡ�DFA�����ֹ桢���쥹������ new �طѾ������� */
1183 /* inherit and update word sequence, DFA state and total LM score to 'new' */
1184 new->score = LOG_ZERO;
1185 for (i=0;i< now->seqnum;i++){
1186 new->seq[i] = now->seq[i];
1187 #ifdef CM_SEARCH
1188 #ifdef CM_MULTIPLE_ALPHA
1189 memcpy(new->cmscore[i], now->cmscore[i], sizeof(LOGPROB) * r->config->annotate.cm_alpha_num);
1190 #else
1191 new->cmscore[i] = now->cmscore[i];
1192 #endif
1193 #endif /* CM_SEARCH */
1194 }
1195 new->seq[i] = word;
1196 new->seqnum = now->seqnum+1;
1197 new->state = nword->next_state;
1198 new->totallscore = now->totallscore + nword->lscore;
1199 if (ccd_flag) {
1200 /* ��������������Ȥ�����¸ */
1201 /* keep the lastphone for next scan_word() */
1202 new->last_ph = lastphone;
1203 new->last_ph_sp_attached = now->last_ph_sp_attached;
1204 }
1205
1206 if (ccd_flag) {
1207 /* �Ǹ��1����(lastphone)ʬ��scan�������������������� new ����¸ */
1208 /* scan the lastphone and set the updated score to new->g[] */
1209 do_viterbi_next_word(now, new, lastphone,
1210 hmminfo->multipath ? now->last_ph_sp_attached : FALSE,
1211 param, r);
1212 g_src = new->g;
1213 } else {
1214 g_src = now->g;
1215 #ifdef GRAPHOUT_PRECISE_BOUNDARY
1216 if (r->graphout) {
1217 memcpy(new->wordend_frame, now->wordend_frame, sizeof(short)*peseqlen);
1218 memcpy(new->wordend_gscore, now->wordend_gscore, sizeof(LOGPROB)*peseqlen);
1219 }
1220 #endif
1221 }
1222
1223 /* ����� scan_word �������� new->g[] ���ѹ����Ƥ��� */
1224 /* prepare new->g[] for next scan_word() */
1225 if (hmminfo->multipath) {
1226 startt = peseqlen-1;
1227 } else {
1228 startt = peseqlen-2;
1229 }
1230 i = hmm_logical_state_num(newphone);
1231 a_value = (hmm_logical_trans(newphone))->a[i-2][i-1];
1232 if (hmminfo->multipath) {
1233 for(t=0; t <= startt; t++) {
1234 new->g[t] = g_src[t] + nword->lscore;
1235 }
1236 } else {
1237 for(t=0; t <= startt; t++) {
1238 new->g[t] = g_src[t+1] + a_value + nword->lscore;
1239 }
1240 }
1241
1242 /***************************************************************************/
1243 /* ������(�裲�ѥ�),������(�裱�ѥ�)�ȥ�ꥹ����³��������³���Ĥ��� */
1244 /* connect forward/backward trellis to look for the best connection time */
1245 /***************************************************************************/
1246 /*-----------------------------------------------------------------*/
1247 /* ñ��ȥ�ꥹ��õ����, ��ñ��κ�����³����ȯ������ */
1248 /* determine the best connection time of the new word, seeking the word
1249 trellis */
1250 /*-----------------------------------------------------------------*/
1251
1252 if (r->lmtype == LM_DFA && !r->config->pass2.looktrellis_flag) {
1253 /* ���٤ƤΥե졼��ˤ錄�äƺ����õ�� */
1254 /* search for best trellis word throughout all frame */
1255 for(t = startt; t >= 0; t--) {
1256 tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word);
1257 if (tre == NULL) continue;
1258 totalscore = new->g[t] + tre->backscore;
1259 if (! hmminfo->multipath) {
1260 if (newphone->is_pseudo) {
1261 tmpp = outprob_cd(&(r->am->hmmwrk), t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param);
1262 } else {
1263 tmpp = outprob_state(&(r->am->hmmwrk), t, newphone->body.defined->s[newphone->body.defined->state_num-2], param);
1264 }
1265 totalscore += tmpp;
1266 }
1267 if (new->score < totalscore) {
1268 new->score = totalscore;
1269 new->bestt = t;
1270 new->estimated_next_t = tre->begintime - 1;
1271 new->tre = tre;
1272 }
1273 }
1274
1275 return;
1276 }
1277
1278 /* �Ǹ�˻��Ȥ���TRELLIS_ATOM�ν�ü���֤����� */
1279 /* new�ο�����֤ϡ��嵭�Ǻ��Ѥ���TRELLIS_ATOM�λ�ü���� */
1280
1281 /* ����Ÿ��ñ��Υȥ�ꥹ��ν�ü���֤�����Τߥ������
1282 �����Ϣ³����¸�ߤ���ե졼��ˤĤ��ƤΤ߷� */
1283 /* search for best trellis word only around the estimated time */
1284 /* 1. search forward */
1285 for(t = (nword->tre)->endtime; t >= 0; t--) {
1286 tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word);
1287 if (tre == NULL) break; /* go to 2 if the trellis word disappear */
1288 totalscore = new->g[t] + tre->backscore;
1289 if (! hmminfo->multipath) {
1290 if (newphone->is_pseudo) {
1291 tmpp = outprob_cd(&(r->am->hmmwrk), t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param);
1292 } else {
1293 tmpp = outprob_state(&(r->am->hmmwrk), t, newphone->body.defined->s[newphone->body.defined->state_num-2], param);
1294 }
1295 totalscore += tmpp;
1296 }
1297 if (new->score < totalscore) {
1298 new->score = totalscore;
1299 new->bestt = t;
1300 new->estimated_next_t = tre->begintime - 1;
1301 new->tre = tre;
1302 }
1303 }
1304 /* 2. search bckward */
1305 for(t = (nword->tre)->endtime + 1; t <= startt; t++) {
1306 tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word);
1307 if (tre == NULL) break; /* end if the trellis word disapper */
1308 totalscore = new->g[t] + tre->backscore;
1309 if (! hmminfo->multipath) {
1310 if (newphone->is_pseudo) {
1311 tmpp = outprob_cd(&(r->am->hmmwrk), t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param);
1312 } else {
1313 tmpp = outprob_state(&(r->am->hmmwrk), t, newphone->body.defined->s[newphone->body.defined->state_num-2], param);
1314 }
1315 totalscore += tmpp;
1316 }
1317 if (new->score < totalscore) {
1318 new->score = totalscore;
1319 new->bestt = t;
1320 new->estimated_next_t = tre->begintime - 1;
1321 new->tre = tre;
1322 }
1323 }
1324
1325 /* set current LM score */
1326 new->lscore = nword->lscore;
1327
1328 }
1329
1330
1331 /**********************************************************************/
1332 /********** ������������ ****************************/
1333 /********** Generate an initial hypothesis ****************************/
1334 /**********************************************************************/
1335
1336 /**
1337 * <JA>
1338 * Ϳ����줿ñ�줫�����������������.
1339 *
1340 * @param new [out] �������������줿���⤬��Ǽ�����
1341 * @param nword [in] �������ñ��ξ���
1342 * @param param [in] ���ϥѥ�����
1343 * @param r [in] ǧ������������
1344 * </JA>
1345 * <EN>
1346 * Generate an initial hypothesis from given word.
1347 *
1348 * @param new [out] pointer to save the newly generated hypothesis
1349 * @param nword [in] words of the first candidates
1350 * @param param [in] input parameter vector
1351 * @param r [in] recognition process instance
1352 *
1353 * </EN>
1354 * @callgraph
1355 * @callergraph
1356 */
1357 void
start_word(NODE * new,NEXTWORD * nword,HTK_Param * param,RecogProcess * r)1358 start_word(NODE *new, NEXTWORD *nword, HTK_Param *param, RecogProcess *r)
1359 {
1360 HMM_Logical *newphone;
1361 WORD_ID word;
1362 TRELLIS_ATOM *tre = NULL;
1363 LOGPROB tmpp;
1364 int t;
1365
1366 BACKTRELLIS *backtrellis;
1367 WORD_INFO *winfo;
1368
1369 int peseqlen;
1370 boolean ccd_flag;
1371 boolean multipath;
1372
1373 backtrellis = r->backtrellis;
1374 winfo = r->lm->winfo;
1375 peseqlen = r->peseqlen;
1376 ccd_flag = r->ccd_flag;
1377 multipath = r->am->hmminfo->multipath;
1378
1379 /* initialize data */
1380 word = nword->id;
1381 new->score = LOG_ZERO;
1382 new->seqnum = 1;
1383 new->seq[0] = word;
1384
1385 new->state = nword->next_state;
1386 new->totallscore = nword->lscore;
1387
1388 /* set current LM score */
1389 new->lscore = nword->lscore;
1390
1391 /* cross-word triphone need not be handled on startup */
1392 newphone = winfo->wseq[word][winfo->wlen[word]-1];
1393 if (ccd_flag) {
1394 new->last_ph = NULL;
1395 }
1396
1397 if (r->lmtype == LM_PROB) {
1398 new->g[peseqlen-1] = nword->lscore;
1399 } else {
1400 new->g[peseqlen-1] = 0;
1401 }
1402
1403 for (t=peseqlen-1; t>=0; t--) {
1404 tre = bt_binsearch_atom(backtrellis, t, word);
1405 if (tre != NULL) {
1406 if (r->graphout) {
1407 new->bestt = peseqlen-1;
1408 } else {
1409 new->bestt = t;
1410 }
1411 new->score = new->g[peseqlen-1] + tre->backscore;
1412 if (! multipath) {
1413 if (newphone->is_pseudo) {
1414 tmpp = outprob_cd(&(r->am->hmmwrk), peseqlen-1, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param);
1415 } else {
1416 tmpp = outprob_state(&(r->am->hmmwrk), peseqlen-1, newphone->body.defined->s[newphone->body.defined->state_num-2], param);
1417 }
1418 new->score += tmpp;
1419 }
1420 new->estimated_next_t = tre->begintime - 1;
1421 new->tre = tre;
1422 break;
1423 }
1424 }
1425 if (tre == NULL) { /* no word in backtrellis */
1426 new->score = LOG_ZERO;
1427 }
1428 }
1429
1430 /**
1431 * <JA>
1432 * ��ü��������ü�ޤ�ã����ʸ����κǽ�Ū�ʥ������åȤ���.
1433 *
1434 * @param now [in] ��ü�ޤ�ã��������
1435 * @param new [out] �ǽ�Ū��ʸ����Υ��������Ǽ������ؤΥݥ���
1436 * @param param [in] ���ϥѥ�����
1437 * @param r [in] ǧ������������
1438 * </JA>
1439 * <EN>
1440 * Hypothesis termination: set the final sentence scores of hypothesis
1441 * that has already reached to the end.
1442 *
1443 * @param now [in] hypothesis that has already reached to the end
1444 * @param new [out] pointer to save the final sentence information
1445 * @param param [in] input parameter vectors
1446 * @param r [in] recognition process instance
1447 * </EN>
1448 * @callgraph
1449 * @callergraph
1450 */
1451 void
last_next_word(NODE * now,NODE * new,HTK_Param * param,RecogProcess * r)1452 last_next_word(NODE *now, NODE *new, HTK_Param *param, RecogProcess *r)
1453 {
1454 cpy_node(new, now);
1455 if (r->ccd_flag) {
1456 /* �ǽ�����ʬ�� viterbi ���ƺǽ������������� */
1457 /* scan the last phone and update the final score */
1458 if (r->am->hmminfo->multipath) {
1459 do_viterbi_next_word(now, new, now->last_ph, now->last_ph_sp_attached, param, r);
1460 new->score = new->final_g;
1461 } else {
1462 do_viterbi_next_word(now, new, now->last_ph, FALSE, param, r);
1463 new->score = new->g[0];
1464 }
1465 } else {
1466 if (r->am->hmminfo->multipath) {
1467 new->score = now->final_g;
1468 } else {
1469 new->score = now->g[0];
1470 }
1471 #ifdef GRAPHOUT_PRECISE_BOUNDARY
1472 if (r->graphout) {
1473 /* last boundary has moved to [peseqlen-1] in last scan_word() */
1474 memcpy(new->wordend_frame, now->wordend_frame, sizeof(short)*r->peseqlen);
1475 memcpy(new->wordend_gscore, now->wordend_gscore, sizeof(LOGPROB)*r->peseqlen);
1476 }
1477 #endif
1478 }
1479 }
1480
1481 #endif /* PASS2_STRICT_IWCD */
1482
1483 /* end of file */
1484