1 /**
2  * @file   search_bestfirst_main.c
3  *
4  * <JA>
5  * @brief  ��2�ѥ��������å��ǥ����ǥ���
6  *
7  * Julius ����2�ѥ��Ǥ��륹���å��ǥ����ǥ������르�ꥺ�ब���Ҥ���
8  * �Ƥ��ޤ�. ��1�ѥ��η�̤�ñ��ȥ�ꥹ������ˡ���1�ѥ��Ȥϵո���
9  * �� right-to-left ��õ����Ԥ��ޤ�. ����Υ������ϡ���1�ѥ��Υȥ��
10  * ���Ȥ��Υ�������̤õ�����Υҥ塼�ꥹ�ƥ��å��Ȥ�����³���뤳�Ȥǡ�
11  * ʸ���Τβ��⥹�������θ���ʤ���õ����Ԥ��ޤ�.
12  *
13  * ��ñ�콸��μ����Τ���ˡ�ñ��N-gram�Ǥ� ngram_decode.c ��δؿ�����
14  * ʸˡ�Ǥ� dfa_decode.c �δؿ����Ѥ����ޤ�.
15  *
16  * </JA>
17  *
18  * <EN>
19  * @brief  The second pass: stack decoding
20  *
21  * This file implements search algorithm based on best-first stack
22  * decoding on the 2nd pass.  The search will be performed on backward
23  * (i.e. right-to-left) direction, using the result of 1st pass (word
24  * trellis) as heuristics of unreached area.  Hypothesis are stored
25  * in a global stack, and the best one will be expanded according to
26  * the survived words in the word trellis and language constraint.
27  *
28  * The expanding words will be given by ngram_decode.c for N-gram
29  * based recognition, with their langugage probabilities, or by
30  * dfa_decode.c for grammar-based recognition, with their emitting
31  * DFA state information.
32  *
33  * </EN>
34  *
35  * @author Akinobu Lee
36  * @date   Thu Sep 08 11:51:12 2005
37  *
38  * $Revision: 1.8 $
39  *
40  */
41 /*
42  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
43  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
44  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
45  * All rights reserved
46  */
47 
48 #include <julius/julius.h>
49 
50 /* declaration of local functions */
51 static NODE *get_best_from_stack(NODE **start, int *stacknum);
52 static int put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize);
53 static void put_all_in_stack(NODE **start, int *stacknum, WORD_INFO *winfo);
54 static void free_all_nodes(NODE *node);
55 static void put_hypo_woutput(NODE *hypo, WORD_INFO *winfo);
56 static void put_hypo_wname(NODE *hypo, WORD_INFO *winfo);
57 
58 /**********************************************************************/
59 /********** ��ñ���Ǽ�ΰ�γ������          *************************/
60 /********** allocate memory for nextword data *************************/
61 /**********************************************************************/
62 
63 /**
64  * <JA>
65  * ��ñ��γ�Ǽ�ΰ�γ������.
66  * ��ñ�������Ǽ���뤿��� NEXTWORD ����˥�������դ���.
67  *
68  * @param maxlen [out] ��Ǽ��ǽ��ñ���
69  * @param root [out] ����դ��ΰ����Ƭ�ؤΥݥ���
70  * @param max [in] ����դ����ΰ�Υ�����
71  *
72  * @return ����դ���줿��ñ������ؤΥݥ������֤�.
73  * </JA>
74  * <EN>
75  * Allocate memory for next word candidates.
76  * Allocate NEXTWORD array for storing list of candidate next words.
77  *
78  * @param maxlen [out] maximum number of words that can be stored
79  * @param root [out] pointer to the top address of allocated data
80  * @param max [in] number of elementes to be allocated
81  *
82  * @return the newly allocated pointer of NEXTWORD array.
83  * </EN>
84  */
85 static NEXTWORD **
nw_malloc(int * maxlen,NEXTWORD ** root,int max)86 nw_malloc(int *maxlen, NEXTWORD **root, int max)
87 {
88   NEXTWORD *nwtmp;
89   NEXTWORD **nw;
90   int i;
91 
92   nw = (NEXTWORD **)mymalloc(max * sizeof(NEXTWORD *));
93   nwtmp = (NEXTWORD *)mymalloc(max * sizeof(NEXTWORD));
94   for (i=0;i<max; i++) {
95     nw[i] = &(nwtmp[i]);
96   }
97   *maxlen = max;
98   *root = nwtmp;
99   return nw;
100 }
101 
102 /**
103  * <JA>
104  * ��ñ��γ�Ǽ�ΰ�β���.
105  *
106  * @param nw [in] NEXTWORD����
107  * @param root [in] nw_malloc() ��Ϳ����줿�ΰ���Ƭ�ؤΥݥ���
108  * </JA>
109  * <EN>
110  * Free next word candidate area.
111  *
112  * @param nw [in] pointer to NEXTWORD structure to be free.
113  * @param root [in] pointer to the top address of allocated data previously
114  * returned by nw_malloc()
115  * </EN>
116  */
117 static void
nw_free(NEXTWORD ** nw,NEXTWORD * root)118 nw_free(NEXTWORD **nw, NEXTWORD *root)
119 {
120   free(root);
121   free(nw);
122 }
123 
124 /**
125  * <JA>
126  * @brief  ��ñ������Ǽ�Ѥ� NEXTWORD ����Υ����ΰ��ĥ����.
127  *
128  * ���δؿ���õ����˼�ñ����佸�礬��줿�ݤ˸ƤФ졤����ˤ��¿����
129  * ��ñ�������Ǽ�Ǥ���褦 NEXTWORD ����Ȥ� realloc() ����.
130  * �ºݤˤϺǽ�� nw_malloc() �Ǽ����ñ���ʬ�����ΰ����ݤ��Ƥ��ꡤ
131  * ñ��N-gram���ѻ��ϸƤФ�뤳�ȤϤʤ�. ʸˡǧ���Ǥϡ����硼�ȥݡ�����
132  * �����å׽����ˤ����֤ΰۤʤ�����Ʊ����Ÿ������Τǡ�
133  * ��ñ��������ÿ������礭�����Ȥ������ꤦ��.
134  *
135  * @param nwold [i/o] NEXTWORD����
136  * @param maxlen [i/o] �����Ǽ�����Ǽ����ݥ���. ���ߤκ����Ǽ����
137  * ����ƸƤӡ��ؿ���ǿ����˳��ݤ��줿�����ѹ������.
138  * @param root [i/o] �ΰ���Ƭ�ؤΥݥ������Ǽ���륢�ɥ쥹. �ؿ����
139  * ����������.
140  * @param num [in] ��Ĺ����Ĺ��
141  *
142  * @return ��ĥ���줿�����ʼ�ñ������ؤΥݥ������֤�.
143  * </JA>
144  * <EN>
145  * @brief  expand data area of NEXTWORD.
146  *
147  * In DFA mode, the number of nextwords can exceed the vocabulary size when
148  * more than one DFA states are expanded by short-pause skipping.
149  * In such case, the nextword data area should expanded here.
150  *
151  * @param nwold [i/o] NEXTWORD array
152  * @param maxlen [i/o] pointer to the maximum number of words that can be
153  * stored.  The current number should be stored before calling this function,
154  * and the resulting new number will be stored within this function.
155  * @param root [i/o] address to the pointer of the allocated data.  The
156  * value will be updated by reallocation in this function.
157  * @param num [in] size to expand
158  *
159  * @return the newlly re-allocated pointer of NEXTWORD array.
160  * </EN>
161  */
162 static NEXTWORD **
nw_expand(NEXTWORD ** nwold,int * maxlen,NEXTWORD ** root,int num)163 nw_expand(NEXTWORD **nwold, int *maxlen, NEXTWORD **root, int num)
164 {
165   NEXTWORD *nwtmp;
166   NEXTWORD **nw;
167   int i;
168   int nwmaxlen;
169 
170   nwmaxlen = *maxlen + num;
171 
172   nwtmp = (NEXTWORD *)myrealloc(*root, nwmaxlen * sizeof(NEXTWORD));
173   nw = (NEXTWORD **)myrealloc(nwold, nwmaxlen * sizeof(NEXTWORD *));
174   nw[0] = nwtmp;
175   for (i=1;i<nwmaxlen; i++) {
176     nw[i] = &(nwtmp[i]);
177   }
178   *maxlen = nwmaxlen;
179   *root = nwtmp;
180   return nw;
181 }
182 
183 
184 /**********************************************************************/
185 /********** ���⥹���å������         ********************************/
186 /********** Hypothesis stack operation ********************************/
187 /**********************************************************************/
188 
189 /**
190  * <JA>
191  * �����å��ȥåפκ��ಾ�����Ф�.
192  *
193  * @param start [i/o] �����å�����Ƭ�Ρ��ɤؤΥݥ����ʽ��������礢���
194  * @param stacknum [i/o] ���ߤΥ����å��������ؤΥݥ����ʽ����������
195  *
196  * @return ���Ф������ಾ��Υݥ������֤�.
197  * </JA>
198  * <EN>
199  * Pop the best hypothesis from stack.
200  *
201  * @param start [i/o] pointer to stack top node (will be modified if necessary)
202  * @param stacknum [i/o] pointer to the current stack size (will be modified
203  * if necessary)
204  *
205  * @return pointer to the popped hypothesis.
206  * </EN>
207  */
208 static NODE *
get_best_from_stack(NODE ** start,int * stacknum)209 get_best_from_stack(NODE **start, int *stacknum)
210 {
211   NODE *tmp;
212 
213   /* return top */
214   tmp=(*start);
215   if ((*start)!=NULL) {		/* delete it from stack */
216     (*start)=(*start)->next;
217     if ((*start)!=NULL) (*start)->prev=NULL;
218     (*stacknum)--;
219     return(tmp);
220   }
221   else {
222     return(NULL);
223   }
224 }
225 
226 /**
227  * <JA>
228  * ���벾�⤬�����å���˳�Ǽ����뤫�ɤ��������å�����.
229  *
230  * @param new [in] �����å����벾��
231  * @param bottom [in] �����å�����Ρ��ɤؤΥݥ���
232  * @param stacknum [in] �����å��˸��߳�Ǽ����Ƥ���Ρ��ɿ��ؤΥݥ���
233  * @param stacksize [in] �����å��ΥΡ��ɿ��ξ��
234  *
235  * @return �����å��Υ���������¤�ã���Ƥ��ʤ���������������Ρ��ɤ���
236  * �褱��г�Ǽ�����Ȥ��� 0 ������ʳ��Ǥ���г�Ǽ�Ǥ��ʤ��Ȥ��� -1 ��
237  * �֤�.
238  * </JA>
239  * <EN>
240  * Check whether a hypothesis will be stored in the stack.
241  *
242  * @param new [in] hypothesis to be checked
243  * @param bottom [in] pointer to stack bottom node
244  * @param stacknum [in] pointer to current stack size
245  * @param stacksize [in] pointer to maximum stack size limit
246  *
247  * @return 0 if it will be stored in the stack (in case @a stacknum <
248  * @a stacksize or the score of @a new is better than bottom.  Otherwise
249  * returns -1, which means it can not be pushed to the stack.
250  * </EN>
251  */
252 static int
can_put_to_stack(NODE * new,NODE ** bottom,int * stacknum,int stacksize)253 can_put_to_stack(NODE *new, NODE **bottom, int *stacknum, int stacksize)
254 {
255   /* stack size check */
256   if ((*stacknum + 1) > stacksize && (*bottom)->score >= new->score) {
257     /* new node is below the bottom: discard it */
258     return(-1);
259   }
260   return(0);
261 }
262 
263 /**
264  * <JA>
265  * �����å��˿����ʲ�����Ǽ����.
266  * �����å���Υ���������θ�������֤����������.
267  * ��Ǽ�Ǥ��ʤ��ä���硤Ϳ����줿����� free_node() �����.
268  *
269  * @param new [in] �����å����벾��
270  * @param start [i/o] �����å��ΥȥåץΡ��ɤؤΥݥ���
271  * @param bottom [i/o] �����å�����Ρ��ɤؤΥݥ���
272  * @param stacknum [i/o] �����å��˸��߳�Ǽ����Ƥ���Ρ��ɿ��ؤΥݥ���
273  * @param stacksize [in] �����å��ΥΡ��ɿ��ξ��
274  *
275  * @return ��Ǽ�Ǥ���� 0 ���Ǥ��ʤ��ä����� -1 ���֤�.
276  * </JA>
277  * <EN>
278  * Push a new hypothesis into the stack, keeping score order.
279  * If not succeeded, the given new hypothesis will be freed by free_node().
280  *
281  * @param new [in] hypothesis to be checked
282  * @param start [i/o] pointer to stack top node
283  * @param bottom [i/o] pointer to stack bottom node
284  * @param stacknum [i/o] pointer to current stack size
285  * @param stacksize [in] pointer to maximum stack size limit
286  *
287  * @return 0 if succeded, or -1 if failed to push because of number
288  * limitation or too low score.
289  * </EN>
290  */
291 static int
put_to_stack(NODE * new,NODE ** start,NODE ** bottom,int * stacknum,int stacksize)292 put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize)
293 {
294   NODE *tmp;
295 
296   /* stack size is going to increase... */
297   (*stacknum)++;
298 
299   /* stack size check */
300   if ((*stacknum)>stacksize) {
301     /* stack size overflow */
302     (*stacknum)--;
303     if ((*bottom)->score < new->score) {
304       /* new node will be inserted in the stack: free the bottom */
305       tmp=(*bottom);
306       (*bottom)->prev->next=NULL;
307       (*bottom)=(*bottom)->prev;
308       free_node(tmp);
309     } else {
310       /* new node is below the bottom: discard it */
311       free_node(new);
312       return(-1);
313     }
314   }
315 
316   /* insert new node on edge */
317   if ((*start)==NULL) {		/* no node in stack */
318     /* new node is the only node */
319     (*start)=new;
320     (*bottom)=new;
321     new->next=NULL;
322     new->prev=NULL;
323     return(0);
324   }
325   if ((*start)->score <= new->score) {
326     /* insert on the top */
327     new->next = (*start);
328     new->next->prev = new;
329     (*start)=new;
330     new->prev=NULL;
331     return(0);
332   }
333   if ((*bottom)->score >= new->score) {
334     /* insert on the bottom */
335     new->prev = (*bottom);
336     new->prev->next = new;
337     (*bottom)=new;
338     new->next=NULL;
339     return(0);
340   }
341 
342   /* now the new node is between (*start) and (*bottom) */
343   if (((*start)->score + (*bottom)->score) / 2 > new->score) {
344     /* search from bottom */
345     tmp=(*bottom);
346     while(tmp->score < new->score) tmp=tmp->prev;
347     new->prev=tmp;
348     new->next=tmp->next;
349     tmp->next->prev=new;
350     tmp->next=new;
351   } else {
352     /* search from start */
353     tmp=(*start);
354     while(tmp->score > new->score) tmp=tmp->next;
355     new->next=tmp;
356     new->prev=tmp->prev;
357     tmp->prev->next=new;
358     tmp->prev=new;
359   }
360   return(0);
361 }
362 
363 /**
364  * <JA>
365  * �����å�����Ȥ����ƽ��Ϥ���. �����å�����Ȥϼ�����. (�ǥХå���)
366  *
367  * @param start [i/o] �����å��ΥȥåץΡ��ɤؤΥݥ���
368  * @param stacknum [i/o] �����å��˸��߳�Ǽ����Ƥ���Ρ��ɿ��ؤΥݥ���
369  * @param winfo [in] ñ�켭��
370  * </JA>
371  * <EN>
372  * Output all nodes in the stack. All nodes will be lost (for debug).
373  *
374  * @param start [i/o] pointer to stack top node
375  * @param stacknum [i/o] pointer to current stack size
376  * @param winfo [in] word dictionary
377  * </EN>
378  */
379 static void
put_all_in_stack(NODE ** start,int * stacknum,WORD_INFO * winfo)380 put_all_in_stack(NODE **start, int *stacknum, WORD_INFO *winfo)
381 {
382   NODE *ntmp;
383 
384   jlog("DEBUG: hypotheses remained in global stack\n");
385   while ((ntmp = get_best_from_stack(start, stacknum)) != NULL) {
386     jlog("DEBUG: %3d: s=%f",*stacknum, ntmp->score);
387     put_hypo_woutput(ntmp, winfo);
388     free_node(ntmp);
389   }
390 }
391 
392 /**
393  * <JA>
394  * �����å������������������.
395  *
396  * @param start [i/o] �����å��ΥȥåץΡ���
397  * </JA>
398  * <EN>
399  * Free all nodes in a stack.
400  *
401  * @param start [i/o] stack top node
402  * </EN>
403  */
404 static void
free_all_nodes(NODE * start)405 free_all_nodes(NODE *start)
406 {
407   NODE *tmp;
408   NODE *next;
409 
410   tmp=start;
411   while(tmp) {
412     next=tmp->next;
413     free_node(tmp);
414     tmp=next;
415   }
416 }
417 
418 
419 #ifdef CONFIDENCE_MEASURE
420 
421 /**********************************************************************/
422 /********** ñ�쿮���٤η׻� ******************************************/
423 /********** Confidence scoring ****************************************/
424 /**********************************************************************/
425 
426 #ifdef CM_SEARCH
427 /**************************************************************/
428 /**** CM computation method 1(default):                  ******/
429 /****   - Use local posterior probabilities while search ******/
430 /****   - Obtain CM at hypothesis expansion time         ******/
431 /**************************************************************/
432 
433 /**
434  * <JA>
435  * CM�׻��ѤΥѥ�᡼������������. CM�׻���ľ���˸ƤӽФ����.
436  *
437  * @param sd [i/o] ��2�ѥ��ѥ�����ꥢ
438  * @param wnum [in] �����å�������
439  * @param cm_alpha [in] ���Ѥ��륹���������
440  *
441  * </JA>
442  * <EN>
443  * Initialize parameters for confidence scoring (will be called at
444  * each startup of 2nd pass)
445  *
446  * @param sd [i/o] work area for 2nd pass
447  * @param wnum [in] stack size
448  * @param cm_alpha [in] scaling value to use at confidence scoring
449  * </EN>
450  */
451 static void
cm_init(StackDecode * sd,int wnum,LOGPROB cm_alpha,cm_alpha_num)452 cm_init(StackDecode *sd, int wnum, LOGPROB cm_alpha
453 #ifdef CM_MULTIPLE_ALPHA
454 	, cm_alpha_num
455 #endif
456 	)
457 {
458   sd->l_stacksize = wnum;
459   sd->l_start = sd->l_bottom = NULL;
460   sd->l_stacknum = 0;
461   sd->cm_alpha = cm_alpha;
462 #ifdef CM_MULTIPLE_ALPHA
463   if (sd->cmsumlist) {
464     if (sd->cmsumlistlen < cm_alpha_num) {
465       free(sd->cmsumlist);
466       sd->cmsumlist = NULL;
467     }
468   }
469   if (sd->cmsumlist == NULL) {
470     sd->cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * cm_alpha_num);
471     sd->cmsumlistlen = cm_alpha_num;
472   }
473 #endif
474 }
475 
476 /**
477  * <JA>
478  * CM�׻��Τ���˥����륹���å���Ÿ���������Ū����¸����.
479  *
480  * @param sd [i/o] ��2�ѥ��ѥ�����ꥢ
481  * @param new [in] Ÿ������
482  * </JA>
483  * <EN>
484  * Store an expanded hypothesis to the local stack for later CM scoring
485  *
486  * @param sd [i/o] work area for 2nd pass
487  * @param new [in] expanded hypothesis
488  * </EN>
489  */
490 static void
cm_store(StackDecode * sd,NODE * new)491 cm_store(StackDecode *sd, NODE *new)
492 {
493   /* store the generated hypo into local stack */
494   put_to_stack(new, &(sd->l_start), &(sd->l_bottom), &(sd->l_stacknum), sd->l_stacksize);
495 }
496 
497 /**
498  * <JA>
499  * CM�׻��Τ���˥����륹���å���β���νи���Ψ�ι�פ����.
500  *
501  * @param sd [i/o] ��2�ѥ��ѥ�����ꥢ
502  *
503  * </JA>
504  * <EN>
505  * Compute sum of probabilities for hypotheses in the local stack for
506  * CM scoring.
507  *
508  * @param sd [i/o] work area for 2nd pass
509  *
510  * </EN>
511  */
512 static void
cm_sum_score(StackDecode * sd,bgn,end,step)513 cm_sum_score(StackDecode *sd
514 #ifdef CM_MULTIPLE_ALPHA
515 	     , bgn, end, step
516 #endif
517 )
518 {
519   NODE *node;
520   LOGPROB sum;
521 #ifdef CM_MULTIPLE_ALPHA
522   LOGPROB a;
523   int j;
524 #endif
525 
526   if (sd->l_start == NULL) return;	/* no hypo */
527   sd->cm_tmpbestscore = sd->l_start->score; /* best hypo is at the top of the stack */
528 
529 #ifdef CM_MULTIPLE_ALPHA
530   for (j = 0, a = bgn; a <= end; a += step) {
531     sum = 0.0;
532     for(node = sd->l_start; node; node = node->next) {
533       sum += pow(10, a * (node->score - sd->cm_tmpbestscore));
534     }
535     sd->cmsumlist[j++] = sum;	/* store sums for each alpha coef. */
536   }
537 #else
538   sum = 0.0;
539   for(node = sd->l_start; node; node = node->next) {
540     sum += pow(10, sd->cm_alpha * (node->score - sd->cm_tmpbestscore));
541   }
542   sd->cm_tmpsum = sum;		/* store sum */
543 #endif
544 
545 }
546 
547 /**
548  * <JA>
549  * Ÿ�����줿����ʸ����ˤĤ��ơ�����Ÿ��ñ��ο����٤������Ψ��
550  * ��Ť��Ʒ׻�����.
551  *
552  * @param sd [i/o] ��2�ѥ��ѥ�����ꥢ
553  * @param node [i/o] Ÿ�����줿����ʸ����
554  * </JA>
555  * <EN>
556  * Compute confidence score of a new word at the end of the given hypothesis,
557  * based on the local posterior probabilities.
558  *
559  * @param sd [i/o] work area for 2nd pass
560  * @param node [i/o] expanded hypothesis
561  * </EN>
562  */
563 static void
cm_set_score(StackDecode * sd,NODE * node,bgn,end,step)564 cm_set_score(StackDecode *sd, NODE *node
565 #ifdef CM_MULTIPLE_ALPHA
566 	     , bgn, end, step
567 #endif
568 	     )
569 {
570 #ifdef CM_MULTIPLE_ALPHA
571   int j;
572   LOGPROB a;
573 #endif
574 
575 #ifdef CM_MULTIPLE_ALPHA
576   for (j = 0, a = bgn; a <= end; a += step) {
577     node->cmscore[node->seqnum-1][j] = pow(10, a * (node->score - sd->cm_tmpbestscore)) / sd->cmsumlist[j];
578     j++;
579   }
580 #else
581   node->cmscore[node->seqnum-1] = pow(10, sd->cm_alpha * (node->score - sd->cm_tmpbestscore)) / sd->cm_tmpsum;
582 #endif
583 }
584 
585 /**
586  * <JA>
587  * CM�׻��ѤΥ����륹���å����鲾�����Ф�.
588  *
589  * @param sd [i/o] ��2�ѥ��ѥ�����ꥢ
590  *
591  * @return ���Ф��줿ʸ������֤�.
592  * </JA>
593  * <EN>
594  * Pop one node from local stack for confidence scoring.
595  *
596  * @param sd [i/o] work area for 2nd pass
597  *
598  * @return the popped hypothesis.
599  * </EN>
600  */
601 static NODE *
cm_get_node(StackDecode * sd)602 cm_get_node(StackDecode *sd)
603 {
604   return(get_best_from_stack(&(sd->l_start), &(sd->l_stacknum)));
605 }
606 
607 #endif /* CM_SEARCH */
608 
609 #ifdef CM_NBEST
610 /*****************************************************************/
611 /**** CM computation method 2: conventional N-best scoring *******/
612 /**** NOTE: enough N-best should be computed (-n 10 ~ -n 100) ****/
613 /*****************************************************************/
614 
615 /**
616  * <JA>
617  * �����å���ˤ���ʸ���䤫��ñ�쿮���٤�׻�����.
618  *
619  * @param sd [i/o] ��2�ѥ��ѥ�����ꥢ
620  * @param start [in] �����å�����Ƭ�Ρ���
621  * @param stacknum [in] �����å�������
622  * @param jconf [in] SEARCH������ѥ�᡼��
623  * </JA>
624  * <EN>
625  * Compute confidence scores from N-best sentence candidates in the
626  * given stack.
627  *
628  * @param sd [i/o] work area for 2nd pass
629  * @param start [in] stack top node
630  * @param stacknum [in] current stack size
631  * @param jconf [in] SEARCH configuration parameters
632  * </EN>
633  */
634 static void
cm_compute_from_nbest(StackDecode * sd,NODE * start,int stacknum,JCONF_SEARCH * jconf)635 cm_compute_from_nbest(StackDecode *sd, NODE *start, int stacknum, JCONF_SEARCH *jconf)
636 {
637   NODE *node;
638   LOGPROB bestscore, sum, s;
639   WORD_ID w;
640   int i;
641   LOGPROB cm_alpha;
642   int j;
643 
644   /* prepare buffer */
645 #ifdef CM_MULTIPLE_ALPHA
646   if (sd->cmsumlist) {
647     if (sd->cmsumlistlen < jconf->annotate.cm_alpha_num) {
648       free(sd->cmsumlist);
649       sd->cmsumlist = NULL;
650     }
651   }
652   if (sd->cmsumlist == NULL) {
653     sd->cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * jconf->annotate.cm_alpha_num);
654     sd->cmsumlistlen = cm_alpha_num;
655   }
656 #endif
657   if (sd->sentcm == NULL) {		/* not allocated yet */
658     sd->sentcm = (LOGPROB *)mymalloc(sizeof(LOGPROB)*stacknum);
659     sd->sentnum = stacknum;
660   } else if (sd->sentnum < stacknum) { /* need expanded */
661     sd->sentcm = (LOGPROB *)myrealloc(sentcm, sizeof(LOGPROB)*stacknum);
662     sd->sentnum = stacknum;
663   }
664   if (sd->wordcm == NULL) {
665     sd->wordcm = (LOGPROB *)mymalloc(sizeof(LOGPROB) * winfo->num);
666   }
667 
668   cm_alpha = jconf->annotate.cm_alpha;
669 #ifdef CM_MULTIPLE_ALPHA
670   for (j = 0, cm_alpha = jconf->annotate.cm_alpha_bgn; cm_alpha <= jconf->annotate.cm_alpha_end; cm_alpha += jconf->annotate.cm_alpha_step) {
671 #endif
672     /* clear whole word cm buffer */
673     for(w=0;w<winfo->num;w++) {
674       sd->wordcm[w] = 0.0;
675     }
676     /* get best score */
677     bestscore = start->score;
678     /* compute sum score of all hypothesis */
679     sum = 0.0;
680     for (node = start; node != NULL; node = node->next) {
681       sum += pow(10, cm_alpha * (node->score - bestscore));
682     }
683     /* compute sentence posteriori probabilities */
684     i = 0;
685     for (node = start; node != NULL; node = node->next) {
686       sd->sentcm[i] = pow(10, cm_alpha * (node->score - bestscore)) / sum;
687       i++;
688     }
689     /* compute word posteriori probabilities */
690     i = 0;
691     for (node = start; node != NULL; node = node->next) {
692       for (w=0;w<node->seqnum;w++) {
693 	sd->wordcm[node->seq[w]] += sd->sentcm[i];
694       }
695       i++;
696     }
697     /* store the probabilities to node */
698     for (node = start; node != NULL; node = node->next) {
699       for (w=0;w<node->seqnum;w++) {
700 #ifdef CM_MULTIPLE_ALPHA
701 	node->cmscore[w][j] = sd->wordcm[node->seq[w]];
702 #else
703 	node->cmscore[w] = sd->wordcm[node->seq[w]];
704 #endif
705       }
706     }
707 #ifdef CM_MULTIPLE_ALPHA
708     j++;
709   }
710 #endif
711 }
712 
713 #endif /* CM_NBEST */
714 
715 #endif /* CONFIDENCE_MEASURE */
716 
717 
718 /**********************************************************************/
719 /********** Enveloped best-first search *******************************/
720 /**********************************************************************/
721 
722 /*
723  * 1. Word envelope
724  *
725  * ���β���ӡ�����������: Ÿ�����Ȥʤä�����ο����β���Ĺ(ñ���)
726  * ���Ȥ˥�����Ȥ���. �������ۤ����餽����û������ϰʸ�Ÿ�����ʤ�.
727  *
728  * Introduce a kind of beam width to search tree: count the number of
729  * popped hypotheses per the depth of the hypotheses, and when a count
730  * in a certain depth reaches the threshold, all hypotheses shorter than
731  * the depth will be dropped from candidates.
732  *
733  */
734 
735 /**
736  * <JA>
737  * Word envelope �Ѥ˥���������������.
738  *
739  * @param s [i/o] ��2�ѥ��ѥ�����ꥢ
740  *
741  * </JA>
742  * <EN>
743  * Initialize counters fro word enveloping.
744  *
745  * @param s [i/o] work area for 2nd pass
746  * </EN>
747  */
748 static void
wb_init(StackDecode * s)749 wb_init(StackDecode *s)
750 {
751   int i;
752   for(i=0;i<=MAXSEQNUM;i++) s->hypo_len_count[i] = 0;
753   s->maximum_filled_length = -1;
754 }
755 
756 /**
757  * <JA>
758  * Word envelope ���Ȥ��ơ�Ϳ����줿�����Ÿ�����Ƥ褤���ɤ������֤�.
759  * �ޤ���Word envelope �Υ�������������.
760  *
761  * @param s [i/o] ��2�ѥ��ѥ�����ꥢ
762  * @param now [in] ������Ÿ�����褦�Ȥ��Ƥ��벾��
763  * @param width [in] Ÿ��������Ȥξ����
764  *
765  * @return Ÿ����ǽ��Ÿ��������Ȥ���¤�ã���Ƥ��ʤ��ˤʤ� TRUE,
766  * Ÿ���Բ�ǽ�ʥ�����Ȥ���¤�ã���Ƥ���ˤʤ� FALSE ���֤�.
767  * </JA>
768  * <EN>
769  * Consult the current word envelope to check if word expansion from
770  * the hypothesis node is allowed or not.  Also increment the counter
771  * of word envelope if needed.
772  *
773  * @param s [i/o] work area for 2nd pass
774  * @param now [in] popped hypothesis
775  * @param width [in] maximum limit of expansion count
776  *
777  * @return TRUE if word expansion is allowed (in case word envelope count
778  * of the corresponding hypothesis depth does not reach the limit), or
779  * FALSE if already prohibited.
780  * </EN>
781  */
782 static boolean
wb_ok(StackDecode * s,NODE * now,int width)783 wb_ok(StackDecode *s, NODE *now, int width)
784 {
785   if (now->seqnum <= s->maximum_filled_length) {
786     /* word expansion is not allowed because a word expansion count
787        at deeper level has already reached the limit */
788     return FALSE;
789   } else {
790     /* word expansion is possible.  Increment the word expansion count
791        of the given depth */
792     s->hypo_len_count[now->seqnum]++;
793     if (s->hypo_len_count[now->seqnum] > width) {
794       /* the word expansion count of this level has reached the limit, so
795 	 set the beam-filled depth to this level to inhibit further
796 	 expansion of shorter hypotheses. */
797       if (s->maximum_filled_length < now->seqnum) s->maximum_filled_length = now->seqnum;
798     }
799     return TRUE;
800   }
801 }
802 
803 #ifdef SCAN_BEAM
804 /*
805  * 2. Score envelope
806  *
807  * Viterbi�׻��̤κ︺: ���ϥե졼�ऴ�Ȥκ������� (score envelope) ��
808  * ������ˤ錄�äƵ�Ͽ���Ƥ���. ��������������ٷ׻����ˡ����� envelope
809  * ����������ʾ她�����������Ȥ���Viterbi �ѥ��α黻�����Ǥ���.
810  *
811  * �����Ǥϡ����Ф������⤫��ե졼�ऴ�Ȥ� score envelope ��������
812  * ��ʬ�����Ҥ���Ƥ���. Envelope ���θ���� Viterbi �׻��μºݤ�
813  * scan_word() ���ȤΤ���.
814  *
815  * Reduce computation cost of hypothesis Viterbi processing by setting a
816  * "score envelope" that holds the maximum scores at every frames
817  * throughout the expanded hypotheses.  When calculating Viterbi path
818  * on HMM trellis for updating score of popped hypothesis, Viterbi paths
819  * that goes below a certain range from the score envelope are dropped.
820  *
821  * These functions are for updating the score envelope according to the
822  * popped hypothesis.  For actual Viterbi process with score envelope,
823  * see scan_word().
824  *
825  */
826 
827 /**
828  * <JA>
829  * Score envelope ����������. ��2�ѥ��γ��ϻ��˸ƤФ��.
830  *
831  * @param s [i/o] ��2�ѥ��ѥ�����ꥢ
832  * @param framenum [in] ���ϥե졼��Ĺ
833  * </JA>
834  * <EN>
835  * Initialize score envelope.  This will be called once at the beginning
836  * of 2nd pass.
837  *
838  * @param s [i/o] work area for 2nd pass
839  * @param framenum [in] input frame length
840  * </EN>
841  */
842 static void
envl_init(StackDecode * s,int framenum)843 envl_init(StackDecode *s, int framenum)
844 {
845   int i;
846   for(i=0;i<framenum;i++) s->framemaxscore[i] = LOG_ZERO;
847 }
848 
849 /**
850  * <JA>
851  * ��������������������� score envelope ��������.
852  *
853  * @param s [i/o] ��2�ѥ��ѥ�����ꥢ
854  * @param n [in] ����
855  * @param framenum [in] ���ϥե졼��Ĺ
856  * </JA>
857  * <EN>
858  * Update the score envelope using forward score of the given hypothesis.
859  *
860  * @param s [i/o] work area for 2nd pass
861  * @param n [in] hypothesis
862  * @param framenum [in] input frame length
863  * </EN>
864  */
865 static void
envl_update(StackDecode * s,NODE * n,int framenum)866 envl_update(StackDecode *s, NODE *n, int framenum)
867 {
868   int t;
869   for(t=framenum-1;t>=0;t--) {
870     if (s->framemaxscore[t] < n->g[t]) s->framemaxscore[t] = n->g[t];
871   }
872 }
873 #endif /* SCAN_BEAM */
874 
875 
876 /**********************************************************************/
877 /********** Short pause segmentation **********************************/
878 /**********************************************************************/
879 
880 /**
881  * <JA>
882  * ǧ����̤��顤�������϶�֤�ǧ�����Ϥ���ݤν��ñ��������åȤ���.
883  * Ʃ��줪��Ӳ���ν�ʣ���θ���ƽ��ñ���������ꤵ���.
884  *
885  * @param hypo [in] ���ߤ����϶�֤�ǧ����̤Ȥ��Ƥ�ʸ����
886  * @param r [in] ǧ��������������
887  * </JA>
888  * <EN>
889  * Set the previous word context for the recognition of the next input
890  * segment from the current recognition result.  The initial context word
891  * will be chosen from the current recognition result skipping transparent
892  * word and multiplied words.
893  *
894  * @param hypo [in] sentence candidate as a recognition result of current
895  * input segment
896  * @param r [in] recognition process instance
897  * </EN>
898  *
899  * @callgraph
900  * @callergraph
901  *
902  */
903 void
segment_set_last_nword(NODE * hypo,RecogProcess * r)904 segment_set_last_nword(NODE *hypo, RecogProcess *r)
905 {
906   int i;
907   WORD_ID w;
908 
909   if (r->sp_break_last_nword_allow_override) {
910     for(i=0;i<hypo->seqnum;i++) {
911       w = hypo->seq[i];
912       if (w != r->sp_break_last_word
913 	  && !is_sil(w, r)
914 	  && !r->lm->winfo->is_transparent[w]
915 	  ) {
916 	r->sp_break_last_nword = w;
917 	break;
918       }
919     }
920 #ifdef SP_BREAK_DEBUG
921     printf("sp_break_last_nword=%d[%s]\n", r->sp_break_last_nword, r->lm->winfo->woutput[r->sp_break_last_nword]);
922 #endif
923   } else {
924     r->sp_break_last_nword = WORD_INVALID;
925   }
926 }
927 
928 
929 /**********************************************************************/
930 /********* Debug output of hypothesis while search ********************/
931 /**********************************************************************/
932 
933 /**
934  * <JA>
935  * �ǥХå��Ѥ˲����ñ�����ɽ������.
936  *
937  * @param hypo [in] ����
938  * @param winfo [in] ñ�켭��
939  * </JA>
940  * <EN>
941  * Output word sequence of a hypothesis for debug.
942  *
943  * @param hypo [in] hypothesis
944  * @param winfo [in] word dictionary
945  * </EN>
946  */
947 static void
put_hypo_woutput(NODE * hypo,WORD_INFO * winfo)948 put_hypo_woutput(NODE *hypo, WORD_INFO *winfo)
949 {
950   int i,w;
951 
952   if (hypo != NULL) {
953     for (i=hypo->seqnum-1;i>=0;i--) {
954       w = hypo->seq[i];
955       jlog(" %s", winfo->woutput[w]);
956     }
957   }
958   jlog("\n");
959 }
960 
961 /**
962  * <JA>
963  * �ǥХå��Ѥ˲����ñ��N-gram����ȥ�̾��Julian�Ǥϥ��ƥ����ֹ�ˤ���Ϥ���.
964  *
965  * @param hypo [in] ����
966  * @param winfo [in] ñ�켭��
967  * </JA>
968  * <EN>
969  * Output N-gram entries (or DFA category IDs) of a hypothesis for debug.
970  *
971  * @param hypo [in] hypothesis
972  * @param winfo [in] word dictionary
973  * </EN>
974  */
975 static void
put_hypo_wname(NODE * hypo,WORD_INFO * winfo)976 put_hypo_wname(NODE *hypo, WORD_INFO *winfo)
977 {
978   int i,w;
979 
980   if (hypo != NULL) {
981     for (i=hypo->seqnum-1;i>=0;i--) {
982       w = hypo->seq[i];
983       jlog(" %s", winfo->wname[w]);
984     }
985   }
986   jlog("\n");
987 }
988 
989 /**
990  * <EN>
991  * Save a hypothesis as a recognition result f 2nd pass.
992  * </EN>
993  * <JA>
994  * ��2�ѥ��η�̤Ȥ��Ʋ������¸����.
995  * </JA>
996  *
997  * @param hypo [in] hypothesis to save
998  * @param r [in] recognition process instance
999  *
1000  */
1001 static void
store_result_pass2(NODE * hypo,RecogProcess * r)1002 store_result_pass2(NODE *hypo, RecogProcess *r)
1003 {
1004   int i;
1005   Sentence *s;
1006 
1007   s = &(r->result.sent[r->result.sentnum]);
1008 
1009   s->word_num = hypo->seqnum;
1010   for (i = 0; i < hypo->seqnum; i++) {
1011     s->word[i] = hypo->seq[hypo->seqnum - 1 - i];
1012   }
1013 #ifdef CONFIDENCE_MEASURE
1014   for (i = 0; i < hypo->seqnum; i++) {
1015     s->confidence[i] = hypo->cmscore[hypo->seqnum - 1 - i];
1016   }
1017 #endif
1018 
1019   s->score = hypo->score;
1020   s->score_lm = hypo->totallscore;
1021   s->score_am = hypo->score - hypo->totallscore;
1022   if (r->lmtype == LM_DFA) {
1023     /* output which grammar the hypothesis belongs to on multiple grammar */
1024     /* determine only by the last word */
1025     if (multigram_get_all_num(r->lm) > 0) {
1026       s->gram_id = multigram_get_gram_from_category(r->lm->winfo->wton[hypo->seq[0]], r->lm);
1027     } else {
1028       s->gram_id = 0;
1029     }
1030   }
1031 
1032   r->result.sentnum++;
1033   /* add to tail */
1034 }
1035 
1036 
1037 /**********************************************************************/
1038 /******** Output top 'ncan' hypotheses in a stack and free all ********/
1039 /**********************************************************************/
1040 
1041 /**
1042  * <JA>
1043  * �����å������̤β������Ф���ǧ����̤Ȥ��ƽ��Ϥ���. ����ˡ�
1044  * �����å��˳�Ǽ����Ƥ������Ƥβ�����������.
1045  *
1046  * ����줿ʸ����ϡ����ä����̳�Ǽ�ѤΥ����å��˳�Ǽ�����. õ����
1047  * λ��"-n" �ο�����ʸ���䤬���Ĥ��뤫��õ�������Ǥ����ˤθ塤���Ū
1048  * ������줿ʸ������椫����N�ġ�"-output" �ǻ��ꤵ�줿���ˤβ����
1049  * ���Ϥ���.
1050  *
1051  * ���꤬����Х��饤����Ȥ⤳���Ǽ¹Ԥ���.
1052  *
1053  * @param r_start [i/o] ��̳�Ǽ�ѥ����å�����Ƭ�Ρ��ɤؤΥݥ���
1054  * @param r_bottom [i/o] ��̳�Ǽ�ѥ����å�����Ρ��ɤؤΥݥ���
1055  * @param r_stacknum [i/o] �����å��˳�Ǽ����Ƥ���Ρ��ɿ��ؤΥݥ���
1056  * @param ncan [in] ���Ϥ����̲����
1057  * @param r [in] ǧ��������������
1058  * @param param [in] ���ϥѥ�᡼��
1059  * </JA>
1060  * <EN>
1061  * Output top N-best hypotheses in a stack as a recognition result, and
1062  * free all hypotheses.
1063  *
1064  * The sentence candidates found at the 2nd pass will be pushed to the
1065  * "result stack" instead of immediate output.  After recognition is
1066  * over (in case the number of found sentences reaches the number
1067  * specified by "-n", or search has been terminated in other reason),
1068  * the top N sentence candidates in the stack will be output as a
1069  * final result (where N should be specified by "-output").  After
1070  * then, all the hypotheses in the stack will be freed.
1071  *
1072  * Additionally, forced alignment for the recognized sentence
1073  * will be executed here if required.
1074  *
1075  * @param r_start [i/o] pointer to the top node of the result stack
1076  * @param r_bottom [i/o] pointer to the bottom node of the result stack
1077  * @param r_stacknum [i/o] number of candidates in the current result stack
1078  * @param ncan [in] number of sentence candidates to be output
1079  * @param r [in] recognition process instance
1080  * @param param [in] input parameter
1081  * </EN>
1082  */
1083 static void
result_reorder_and_output(NODE ** r_start,NODE ** r_bottom,int * r_stacknum,int ncan,RecogProcess * r,HTK_Param * param)1084 result_reorder_and_output(NODE **r_start, NODE **r_bottom, int *r_stacknum, int ncan, RecogProcess *r, HTK_Param *param)
1085 {
1086   NODE *now;
1087   int num;
1088 
1089 #ifdef CM_NBEST
1090   /* compute CM from the N-best sentence candidates */
1091   cm_compute_from_nbest(&(r->pass2), *r_start, *r_stacknum, r->config);
1092 #endif
1093   num = 0;
1094   while ((now = get_best_from_stack(r_start,r_stacknum)) != NULL && num < ncan) {
1095     num++;
1096     /* output result */
1097     store_result_pass2(now, r);
1098 
1099     /* if in sp segmentation mode, */
1100     /* set the last context-aware word for next recognition */
1101     if (r->lmtype == LM_PROB && r->config->successive.enabled && num == 1) segment_set_last_nword(now, r);
1102 
1103     free_node(now);
1104   }
1105   /* free the rest */
1106   if (now != NULL) free_node(now);
1107   free_all_nodes(*r_start);
1108 }
1109 
1110 /**
1111  * <EN>
1112  * @brief  Post-process of 2nd pass when no result is obtained.
1113  *
1114  * This is a post-process for the 2nd pass which should be called when
1115  * the 2nd pass has no result.  This will occur when the 2nd pass was
1116  * executed but failed with no sentence candidate, or skipped by
1117  * an option.
1118  *
1119  * When the 2nd argument is set to TRUE, the result of the 1st pass
1120  * will be copied as final result of 2nd pass and the recognition
1121  * status flag is set to SUCCESS.  If FALSE, recognition status will
1122  * be set to FAILED.  On sp-segment decoding, the initial hypothesis
1123  * marker for the next input segment will be set up from the 1st pass
1124  * result also.
1125  *
1126  * @param r [in] recognition process instance
1127  * @param use_1pass_as_final [in] when TRUE the 1st pass result will be used as final recognition result of 2nd pass.
1128  *
1129  * </EN>
1130  * <JA>
1131  * @brief  ��2�ѥ��β������ʤ����ν�λ����
1132  *
1133  * ��2�ѥ������Ԥ���������2�ѥ����¹Ԥ���ʤ�����ξ���
1134  * ǧ����λ������Ԥ���use_1pass_as_final �� TRUE �ΤȤ���
1135  * ��1�ѥ��η�̤���2�ѥ��η�̤Ȥ��ƥ��ԡ����Ƴ�Ǽ����ǧ�������Ȥ��롥
1136  * FALSE����ǧ�����ԤȤ��롥
1137  * �ޤ���sp-segment ���ϡ�����ǧ������Ѥν�������������1�ѥ���
1138  * ��̤���Ԥ���
1139  *
1140  * @param r [in] ǧ��������������
1141  * @param use_1pass_as_final [in] TRUE ����1�ѥ��η�̤���2�ѥ���̤˳�Ǽ����
1142  *
1143  * </JA>
1144  */
1145 void
pass2_finalize_on_no_result(RecogProcess * r,boolean use_1pass_as_final)1146 pass2_finalize_on_no_result(RecogProcess *r, boolean use_1pass_as_final)
1147 {
1148   NODE *now;
1149   int i, j;
1150 
1151   /* õ������ */
1152   /* search failed */
1153 
1154   /* make temporal hypothesis data from the result of previous 1st pass */
1155   now = newnode(r);
1156   for (i=0;i<r->pass1_wnum;i++) {
1157     now->seq[i] = r->pass1_wseq[r->pass1_wnum-1-i];
1158   }
1159   now->seqnum = r->pass1_wnum;
1160   now->score = r->pass1_score;
1161 #ifdef CONFIDENCE_MEASURE
1162   /* fill in null values */
1163 #ifdef CM_MULTIPLE_ALPHA
1164   for(j=0;j<jconf->annotate.cm_alpha_num;j++) {
1165     for(i=0;i<now->seqnum;i++) now->cmscore[i][j] = 0.0;
1166   }
1167 #else
1168   for(i=0;i<now->seqnum;i++) now->cmscore[i] = 0.0;
1169 #endif
1170 #endif /* CONFIDENCE_MEASURE */
1171 
1172   if (r->lmtype == LM_PROB && r->config->successive.enabled) {
1173     /* if in sp segment mode, */
1174     /* find segment restart words from 1st pass result */
1175     segment_set_last_nword(now, r);
1176   }
1177 
1178   if (use_1pass_as_final) {
1179     /* ��1�ѥ��η�̤��Τޤ޽��Ϥ��� */
1180     /* output the result of the previous 1st pass as a final result. */
1181     store_result_pass2(now, r);
1182     r->result.status = J_RESULT_STATUS_SUCCESS;
1183   } else {
1184     /* store output as failure */
1185     r->result.status = J_RESULT_STATUS_FAIL;
1186     //callback_exec(CALLBACK_RESULT, r);
1187   }
1188 
1189   free_node(now);
1190 }
1191 
1192 
1193 /**********************************************************************/
1194 /********* Main stack decoding function *******************************/
1195 /**********************************************************************/
1196 
1197 /**
1198  * <JA>
1199  * ��2õ���ѥ��Ǥ��륹���å��ǥ����ǥ�����Ԥ��ᥤ��ؿ�
1200  *
1201  * �����Τ��� cate_bgn, cate_num ��ñ��N-gram�Ǥ�̵�뤵���.
1202  *
1203  * @param param [in] ���ϥѥ�᡼���٥��ȥ���
1204  * @param r [i/o] ǧ��������������
1205  * @param cate_bgn [in] Ÿ���оݤȤ��٤����ƥ���γ����ֹ�
1206  * @param cate_num [in] Ÿ���оݤȤ��٤����ƥ���ο�
1207  * </JA>
1208  * <EN>
1209  * Main function to perform stack decoding of the 2nd search pass.
1210  *
1211  * The cate_bgn and cate_num (third and fourth argument) will have
1212  * no effect when N-gram is used.
1213  *
1214  * @param param [in] input parameter vector
1215  * @param r [i/o] recognition process instance
1216  * @param cate_bgn [in] category id to allow word expansion from (ignored in Julius)
1217  * @param cate_num [in] num of category to allow word expansion from (ignored in Julius)
1218  * </EN>
1219  *
1220  * @callgraph
1221  * @callergraph
1222  *
1223  */
1224 void
wchmm_fbs(HTK_Param * param,RecogProcess * r,int cate_bgn,int cate_num)1225 wchmm_fbs(HTK_Param *param, RecogProcess *r, int cate_bgn, int cate_num)
1226 {
1227   /* ʸ���⥹���å� */
1228   /* hypothesis stack (double-linked list) */
1229   int stacknum;			/* current stack size */
1230   NODE *start = NULL;		/* top node */
1231   NODE *bottom = NULL;		/* bottom node */
1232 
1233   /* ǧ����̳�Ǽ�����å�(��̤Ϥ����ؤ��ä��������) */
1234   /* result sentence stack (found results will be stored here and then re-ordered) */
1235   int r_stacksize;
1236   int r_stacknum;
1237   NODE *r_start = NULL;
1238   NODE *r_bottom = NULL;
1239 
1240   /* ������ꥢ */
1241   /* work area */
1242   NEXTWORD fornoise; /* Dummy NEXTWORD data for short-pause insertion handling */
1243 
1244   NEXTWORD **nextword, *nwroot;	/* buffer to store predicted words */
1245   int maxnwnum;			/* current allocated number of words in nextword */
1246   int nwnum;			/* current number of words in nextword */
1247   NODE *now, *new;		/* popped current hypo., expanded new hypo. */
1248   NODE *now_noise;	       /* for inserting/deleting noise word */
1249   boolean now_noise_calced;
1250   boolean acc;
1251   int t;
1252   int w,i,j;
1253   LOGPROB last_score = LOG_ZERO;
1254   /* for graph generation */
1255   LOGPROB prev_score;
1256   WordGraph *wordgraph_root = NULL;
1257   boolean merged_p;
1258 #ifdef GRAPHOUT_DYNAMIC
1259   int dynamic_merged_num = 0;
1260   WordGraph *wtmp;
1261   LOGPROB lscore_prev;
1262 #endif
1263 #ifdef GRAPHOUT_SEARCH
1264   int terminate_search_num = 0;
1265 #endif
1266 
1267   /* local temporal parameter */
1268   int stacksize, ncan, maxhypo, peseqlen;
1269   JCONF_SEARCH *jconf;
1270   WORD_INFO *winfo;
1271   NGRAM_INFO *ngram;
1272   DFA_INFO *gdfa;
1273   BACKTRELLIS *backtrellis;
1274   StackDecode *dwrk;
1275 
1276   if (r->lmtype == LM_DFA) {
1277     if (debug2_flag) jlog("DEBUG: only words in these categories will be expanded: %d-%d\n", cate_bgn, cate_bgn + cate_num-1);
1278   }
1279 
1280   /*
1281    * �����
1282    * Initialize
1283    */
1284 
1285   /* just for quick access */
1286   jconf = r->config;
1287   winfo = r->lm->winfo;
1288   if (r->lmtype == LM_PROB) {
1289     ngram = r->lm->ngram;
1290   } else if (r->lmtype == LM_DFA) {
1291     gdfa = r->lm->dfa;
1292   }
1293   backtrellis = r->backtrellis;
1294   dwrk = &(r->pass2);
1295 
1296   stacksize = jconf->pass2.stack_size;
1297   ncan = jconf->pass2.nbest;
1298   maxhypo = jconf->pass2.hypo_overflow;
1299   peseqlen = backtrellis->framelen;
1300 
1301   /* store data for sub routines */
1302   r->peseqlen = backtrellis->framelen;
1303   //recog->ccd_flag = recog->jconf->am.ccd_flag;
1304   /* ͽ¬ñ���Ǽ�ΰ����� */
1305   /* malloc area for word prediction */
1306   /* the initial maximum number of nextwords is the size of vocabulary */
1307   nextword = nw_malloc(&maxnwnum, &nwroot, winfo->num);
1308   /* �������������׻��Ѥ��ΰ����� */
1309   /* malloc are for forward viterbi (scan_word()) */
1310   malloc_wordtrellis(r);		/* scan_word���ΰ� */
1311   /* ���⥹���å������ */
1312   /* initialize hypothesis stack */
1313   start = bottom = NULL;
1314   stacknum = 0;
1315   /* ��̳�Ǽ�����å������ */
1316   /* initialize result stack */
1317   r_stacksize = ncan;
1318   r_start = r_bottom = NULL;
1319   r_stacknum = 0;
1320 
1321   /* ����������� */
1322   /* initialize counter */
1323   dwrk->popctr = 0;
1324   dwrk->genectr = 0;
1325   dwrk->pushctr = 0;
1326   dwrk->finishnum = 0;
1327 
1328 #ifdef CM_SEARCH
1329   /* initialize local stack */
1330   cm_init(dwrk, winfo->num, jconf->annotate.cm_alpha
1331 #ifdef CM_MULTIPLE_ALPHA
1332 	  , jconf->annotate.cm_alpha_num
1333 #endif
1334 	  );
1335 #endif
1336 #ifdef SCAN_BEAM
1337   /* prepare and initialize score envelope */
1338   dwrk->framemaxscore = (LOGPROB *)mymalloc(sizeof(LOGPROB)*peseqlen);
1339   envl_init(dwrk, peseqlen);
1340 #endif /* SCAN_BEAM */
1341 
1342   /* ����٥���õ���Ѥ�ñ��Ĺ��Ÿ���������������� */
1343   /* initialize counters for envelope search */
1344   if (jconf->pass2.enveloped_bestfirst_width >= 0) wb_init(dwrk);
1345 
1346   if (jconf->graph.enabled) {
1347     wordgraph_init(r->wchmm);
1348   }
1349 
1350   /*
1351    * �������(1ñ�줫��ʤ�)����, ʸ���⥹���å��ˤ����
1352    * get a set of initial words from LM function and push them as initial
1353    * hypotheses
1354    */
1355   /* the first words will be stored in nextword[] */
1356   if (r->lmtype == LM_PROB) {
1357     nwnum = ngram_firstwords(nextword, peseqlen, maxnwnum, r);
1358   } else if (r->lmtype == LM_DFA) {
1359     nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, r);
1360     /* ��줿�顢�Хåե������䤷�ƺƥ����� */
1361     /* If the number of nextwords can exceed the buffer size, expand the
1362        nextword data area */
1363     while (nwnum < 0) {
1364       nextword = nw_expand(nextword, &maxnwnum, &nwroot, winfo->num);
1365       nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, r);
1366     }
1367   }
1368 
1369   if (debug2_flag) {
1370     jlog("DEBUG: %d words in wordtrellis as first hypothesis\n", nwnum);
1371   }
1372 
1373   /* store them to stack */
1374   for (w = 0; w < nwnum; w++) {
1375     if (r->lmtype == LM_DFA) {
1376       /* limit word hypothesis */
1377       if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) {
1378 	continue;
1379       }
1380     }
1381     /* generate new hypothesis */
1382     new = newnode(r);
1383     start_word(new, nextword[w], param, r);
1384     if (r->lmtype == LM_DFA) {
1385       if (new->score <= LOG_ZERO) { /* not on trellis */
1386 	free_node(new);
1387 	continue;
1388       }
1389     }
1390     dwrk->genectr++;
1391 #ifdef CM_SEARCH
1392     /* store the local hypothesis to temporal stack */
1393     cm_store(dwrk, new);
1394 #else
1395     /* put to stack */
1396     if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
1397       dwrk->current = new;
1398       //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r);
1399       if (jconf->graph.enabled) {
1400 	new->prevgraph = NULL;
1401 	new->lastcontext = NULL;
1402       }
1403       dwrk->pushctr++;
1404     }
1405 #endif
1406   }
1407 #ifdef CM_SEARCH
1408   /* compute score sum */
1409   cm_sum_score(dwrk
1410 #ifdef CM_MULTIPLE_ALPHA
1411 	       , jconf->annotate.cm_alpha_bgn
1412 	       , jconf->annotate.cm_alpha_end
1413 	       , jconf->annotate.cm_alpha_step
1414 #endif
1415 	       );
1416 
1417   /* compute CM and put the generated hypotheses to global stack */
1418   while ((new = cm_get_node(dwrk)) != NULL) {
1419     cm_set_score(dwrk, new
1420 #ifdef CM_MULTIPLE_ALPHA
1421 		 , jconf->annotate.cm_alpha_bgn
1422 		 , jconf->annotate.cm_alpha_end
1423 		 , jconf->annotate.cm_alpha_step
1424 #endif
1425 		 );
1426 #ifdef CM_SEARCH_LIMIT
1427     if (new->cmscore[new->seqnum-1] < jconf->annotate.cm_cut_thres
1428 #ifdef CM_SEARCH_LIMIT_AFTER
1429 	&& dwrk->finishnum > 0
1430 #endif
1431 	) {
1432       free_node(new);
1433       continue;
1434     }
1435 #endif /* CM_SEARCH_LIMIT */
1436 
1437     if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
1438       dwrk->current = new;
1439       //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r);
1440       if (r->graphout) {
1441 	new->prevgraph = NULL;
1442 	new->lastcontext = NULL;
1443       }
1444       dwrk->pushctr++;
1445     }
1446   }
1447 #endif
1448 
1449   if (debug2_flag) {
1450     jlog("DEBUG: %d pushed\n", dwrk->pushctr);
1451   }
1452 
1453   /********************/
1454   /* main search loop */
1455   /********************/
1456 
1457   for (;;) {
1458 
1459     /* if terminate signal has been received, cancel this input */
1460     /*
1461      * if (recog->process_want_terminate) {
1462      *	 jlog("DEBUG: process terminated by request\n");
1463      *	 break;
1464      * }
1465      */
1466 
1467     /*
1468      * ���⥹���å�����Ǥ⥹�����ι⤤�������Ф�
1469      * pop the top hypothesis from stack
1470      */
1471 #ifdef DEBUG
1472     jlog("DEBUG: get one hypothesis\n");
1473 #endif
1474     now = get_best_from_stack(&start,&stacknum);
1475     if (now == NULL) {  /* stack empty ---> õ����λ*/
1476       jlog("WARNING: %02d %s: hypothesis stack exhausted, terminate search now\n", r->config->id, r->config->name);
1477       jlog("STAT: %02d %s: %d sentences have been found\n", r->config->id, r->config->name, dwrk->finishnum);
1478       break;
1479     }
1480     /* (bogus score check) */
1481     if (now->score <= LOG_ZERO) {
1482       free_node(now);
1483       continue;
1484     }
1485 
1486 
1487     /* ñ�쥰����Ѥ� pop ����� f ������������¸ */
1488     if (r->graphout) {
1489       prev_score = now->score;
1490     }
1491 
1492     /* word envelope �����å� */
1493     /* consult word envelope */
1494     if (jconf->pass2.enveloped_bestfirst_width >= 0) {
1495       if (!wb_ok(dwrk, now, jconf->pass2.enveloped_bestfirst_width)) {
1496 	/* ���β���Ĺ�ˤ�����Ÿ������������߷׿��ϴ������ͤ�ۤ��Ƥ���.
1497 	   ���Τ��ᡤ���β���ϼΤƤ�. */
1498 	/* the number of popped hypotheses at the length already
1499 	   reaches its limit, so the current popped hypothesis should
1500 	   be discarded here with no expansion */
1501 	if (debug2_flag) {
1502 	  jlog("DEBUG: popped but pruned by word envelope:");
1503 	  put_hypo_woutput(now, r->lm->winfo);
1504 	}
1505 	free_node(now);
1506 	continue;
1507       }
1508     }
1509 
1510 #ifdef CM_SEARCH_LIMIT_POP
1511     if (now->cmscore[now->seqnum-1] < jconf->annotate.cm_cut_thres_pop) {
1512       free_node(now);
1513       continue;
1514     }
1515 #endif /* CM_SEARCH_LIMIT_POP */
1516 
1517     dwrk->popctr++;
1518 
1519     /* (for debug) ���Ф�������Ȥ��Υ���������� */
1520     /*             output information of the popped hypothesis to stdout */
1521     if (debug2_flag) {
1522       jlog("DEBUG: --- pop %d:\n", dwrk->popctr);
1523       jlog("DEBUG:  "); put_hypo_woutput(now, r->lm->winfo);
1524       jlog("DEBUG:  "); put_hypo_wname(now, r->lm->winfo);
1525       jlog("DEBUG:  %d words, f=%f, g=%f\n", now->seqnum, now->score, now->g[now->bestt]);
1526       jlog("DEBUG:  last word on trellis: [%d-%d]\n", now->estimated_next_t + 1, now->bestt);
1527     }
1528 
1529     dwrk->current = now;
1530     //callback_exec(CALLBACK_DEBUG_PASS2_POP, r);
1531 
1532     if (r->graphout) {
1533 
1534 #ifdef GRAPHOUT_DYNAMIC
1535       /* merge last word in popped hypo if possible */
1536       wtmp = wordgraph_check_merge(now->prevgraph, &wordgraph_root, now->seq[now->seqnum-1], &merged_p, jconf);
1537       if (wtmp != NULL) {		/* wtmp holds merged word */
1538 	dynamic_merged_num++;
1539 
1540 	lscore_prev = (now->prevgraph) ? now->prevgraph->lscore_tmp : 0.0;
1541 
1542 	if (now->prevgraph != NULL) {
1543 	  if (now->prevgraph->saved) {
1544 	    j_internal_error("wchmm_fbs: already saved??\n");
1545 	  }
1546 	  wordgraph_free(now->prevgraph);
1547 	}
1548 
1549 	if (now->lastcontext != NULL
1550 	    && now->lastcontext != wtmp	/* avoid self loop */
1551 	    ) {
1552 	  wordgraph_check_and_add_leftword(now->lastcontext, wtmp, lscore_prev);
1553 #ifdef GRAPHOUT_SEARCH_CONSIDER_RIGHT
1554 	  if (merged_p) {
1555 	    if (wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev) == FALSE) {
1556 	      merged_p = TRUE;
1557 	    } else {
1558 	      merged_p = FALSE;
1559 	    }
1560 	  } else {
1561 	    wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev);
1562 	  }
1563 #else
1564 	  wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev);
1565 #endif
1566 	}
1567 
1568 	now->prevgraph = wtmp;	/* change word to the merged one */
1569 
1570 	/*printf("last word merged\n");*/
1571 	/* previous still remains at memory here... (will be purged later) */
1572       } else {
1573 	wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root);
1574       }
1575 #ifdef GRAPHOUT_SEARCH
1576       /* if recent hypotheses are included in the existing graph, terminate */
1577       if (merged_p && now->endflag == FALSE
1578 #ifdef GRAPHOUT_SEARCH_DELAY_TERMINATION
1579 	  /* Do not apply search termination by graph merging
1580 	     until the first sentence candidate is found. */
1581 	  && (jconf->graph.graphout_search_delay == FALSE || dwrk->finishnum > 0)
1582 #endif
1583 	  ) {
1584 	terminate_search_num++;
1585 	free_node(now);
1586 	continue;
1587       }
1588 #endif
1589 #else  /* ~GRAPHOUT_DYNAMIC */
1590       /* always save */
1591       wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root);
1592 #endif /* ~GRAPHOUT_DYNAMIC */
1593     }
1594 
1595     /* ���Ф�������Υ��������� score envelope ���� */
1596     /* update score envelope using the popped hypothesis */
1597     envl_update(dwrk, now, peseqlen);
1598 
1599     /*
1600      * ���Ф�������μ����ե饰������Ω�äƤ���С�
1601      * ���β����õ����λ�Ȥߤʤ�����̤Ȥ��ƽ��Ϥ��Ƽ��Υ롼�פ�.
1602      *
1603      * If the popped hypothesis already reached to the end,
1604      * we can treat it as a recognition result.
1605      */
1606 #ifdef DEBUG
1607     VERMES("endflag check\n");
1608 #endif
1609 
1610     if (now->endflag) {
1611       if (debug2_flag) {
1612 	jlog("DEBUG:  This is a full sentence candidate\n");
1613       }
1614       /* quick, dirty hack */
1615       if (now->score == last_score) {
1616 	free_node(now);
1617 	continue;
1618       } else {
1619 	last_score = now->score;
1620       }
1621 
1622       dwrk->finishnum++;
1623       if (debug2_flag) {
1624 	jlog("DEBUG:  %d-th sentence found\n", dwrk->finishnum);
1625       }
1626 
1627 	/* ������β��⤬����줿���ȥ������ǥ����Ȥ��뤿�ᡤ
1628 	   ���Ū���̤Υ����å��˳�Ǽ���Ƥ��� */
1629 	/* store the result to result stack
1630 	   after search is finished, they will be re-ordered and output */
1631 	put_to_stack(now, &r_start, &r_bottom, &r_stacknum, r_stacksize);
1632 	/* �������ʸ���⤬����줿�ʤ�õ����λ���� */
1633 	/* finish search if specified number of results are found */
1634 	if (dwrk->finishnum >= ncan) {
1635 	  break;
1636 	} else {
1637 	  continue;
1638 	}
1639 
1640     } /* end of now->endflag */
1641 
1642 
1643     /*
1644      * õ�����Ԥ��Ф���.
1645      * ������� maxhypo �ʾ�Ÿ�����줿��, �⤦����ʾ��õ�����ʤ�
1646      *
1647      * detecting search failure:
1648      * if the number of expanded hypotheses reaches maxhypo, giveup further search
1649      */
1650 #ifdef DEBUG
1651     jlog("DEBUG: loop end check\n");
1652 #endif
1653     if (dwrk->popctr >= maxhypo) {
1654       jlog("WARNING: %02d %s: num of popped hypotheses reached the limit (%d)\n", r->config->id, r->config->name, maxhypo);
1655       /* (for debug) õ�����Ի��ˡ������å��˻Ĥä�������Ǥ��Ф� */
1656       /* (for debug) output all hypothesis remaining in the stack */
1657       if (debug2_flag) put_all_in_stack(&start, &stacknum, r->lm->winfo);
1658       free_node(now);
1659       break;			/* end of search */
1660     }
1661     /* ����Ĺ�������ͤ�ۤ����Ȥ������β�����˴����� */
1662     /* check hypothesis word length overflow */
1663     if (now->seqnum >= MAXSEQNUM) {
1664       jlog("ERROR: sentence length exceeded system limit ( > %d)\n", MAXSEQNUM);
1665       free_node(now);
1666       continue;
1667     }
1668 
1669 #ifndef GRAPHOUT_PRECISE_BOUNDARY
1670     if (r->graphout) {
1671       /* if monophone (= no backscan), the tail g score should be kept here */
1672       /* else, updated tail g score will be computed in scan_word()  */
1673       if(!jconf->am.ccd_flag) {
1674 	now->tail_g_score = now->g[now->bestt];
1675       }
1676     }
1677 #endif
1678 
1679     /*
1680      * �������������������롧 �Ǹ��ñ�����ʬ����������������׻�����.
1681      * update forward score: compute forward trellis for the last word
1682      */
1683 #ifdef DEBUG
1684     jlog("DEBUG: scan_word\n");
1685 #endif
1686     scan_word(now, param, r);
1687     if (now->score < LOG_ZERO) { /* another end-of-search detecter */
1688       jlog("WARNING: too low score, ignore: score=%f",now->score);
1689       put_hypo_woutput(now, r->lm->winfo);
1690       free_node(now);
1691       continue;
1692     }
1693 
1694     /*
1695      * ���Ф������⤬ʸ�Ȥ��Ƽ�����ǽ�Ǥ���С�
1696      * �����ե饰��Ω�ƤƤ����å��ˤ���ľ���Ƥ���.
1697      * (���˼��Ф��줿���Ȥʤ�)
1698      *
1699      * if the current popped hypothesis is acceptable, set endflag
1700      * and return it to stack: it will become the recognition result
1701      * when popped again.
1702      */
1703 #ifdef DEBUG
1704     jlog("DEBUG: accept check\n");
1705 #endif
1706 
1707     if (r->lmtype == LM_PROB) {
1708       acc = ngram_acceptable(now, r);
1709     } else if (r->lmtype == LM_DFA) {
1710       acc = dfa_acceptable(now, r);
1711     }
1712     if (acc && now->estimated_next_t <= 5) {
1713       new = newnode(r);
1714       /* new �� now ����Ȥ��ԡ����ơ��ǽ�Ū�ʥ�������׻� */
1715       /* copy content of 'now' to 'new', and compute the final score */
1716       last_next_word(now, new, param, r);
1717       if (debug2_flag) {
1718 	jlog("DEBUG:  This is acceptable as a sentence candidate\n");
1719       }
1720       /* g[] �����ϻ�ü��ã���Ƥ��ʤ���д��� */
1721       /* reject this sentence candidate if g[] does not reach the end */
1722       if (new->score <= LOG_ZERO) {
1723 	if (debug2_flag) {
1724 	  jlog("DEBUG:  But invalid because Viterbi pass does not reach the 0th frame\n");
1725 	}
1726 	free_node(new);
1727 	free_node(now);
1728 	continue;
1729       }
1730       /* �����ե饰��Ω�Ƥ�����ľ�� */
1731       /* set endflag and push again  */
1732       if (debug2_flag) {
1733 	jlog("DEBUG  This hypo itself was pushed with final score=%f\n", new->score);
1734       }
1735       new->endflag = TRUE;
1736       if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
1737 	if (r->graphout) {
1738 	  if (new->score > LOG_ZERO) {
1739 	    new->lastcontext = now->prevgraph;
1740 	    new->prevgraph = wordgraph_assign(new->seq[new->seqnum-1],
1741 					      WORD_INVALID,
1742 					      (new->seqnum >= 2) ? new->seq[new->seqnum-2] : WORD_INVALID,
1743 					      0,
1744 #ifdef GRAPHOUT_PRECISE_BOUNDARY
1745 					      /* wordend are shifted to the last */
1746 #ifdef PASS2_STRICT_IWCD
1747 					      new->wordend_frame[0],
1748 #else
1749 					      now->wordend_frame[0],
1750 #endif
1751 #else
1752 					      now->bestt,
1753 #endif
1754 					      new->score,
1755 					      prev_score,
1756 					      now->g[0],
1757 #ifdef GRAPHOUT_PRECISE_BOUNDARY
1758 #ifdef PASS2_STRICT_IWCD
1759 					      new->wordend_gscore[0],
1760 #else
1761 					      now->wordend_gscore[0],
1762 #endif
1763 #else
1764 					      now->tail_g_score,
1765 #endif
1766 					      now->lscore,
1767 #ifdef CM_SEARCH
1768 					      new->cmscore[new->seqnum-1],
1769 #else
1770 					      LOG_ZERO,
1771 #endif
1772 					      r
1773 					      );
1774 	  } else {
1775 	    new->lastcontext = now->lastcontext;
1776 	    new->prevgraph = now->prevgraph;
1777 	  }
1778 	} /* put_to_stack() != -1 */
1779       }	/* recog->graphout */
1780       /* ���β���Ϥ����ǽ���餺��, �������餵���ñ��Ÿ������ */
1781       /* continue with the 'now' hypothesis, not terminate here */
1782     }
1783 
1784     /*
1785      * ���β��⤫�顤��ñ�콸�����ꤹ��.
1786      * ��ñ�콸���, ���β���ο����ü�ե졼����դ�¸�ߤ���
1787      * �裱�ѥ��Υȥ�ꥹñ�콸��.
1788      *
1789      * N-gram�ξ��ϳ�ñ��� n-gram ��³��Ψ���ޤޤ��.
1790      * DFA �ξ���, ������Ǥ���� DFA �����³��ǽ�ʤ�ΤΤߤ��֤äƤ���
1791      */
1792     /*
1793      * Determine next word set that can connect to this hypothesis.
1794      * They come from the trellis word that has been survived at near the
1795      * beginning of the last word.
1796      *
1797      * In N-gram mode, they also contain N-gram probabilities toward the
1798      * source hypothesis.  In DFA mode, the word set is further reduced
1799      * by the grammatical constraint
1800      */
1801 #ifdef DEBUG
1802     jlog("DEBUG: get next words\n");
1803 #endif
1804     if (r->lmtype == LM_PROB) {
1805       nwnum = ngram_nextwords(now, nextword, maxnwnum, r);
1806     } else if (r->lmtype == LM_DFA) {
1807       nwnum = dfa_nextwords(now, nextword, maxnwnum, r);
1808       /* nextword ����줿�顢�Хåե������䤷�ƺƥ����� */
1809       /* If the number of nextwords can exceed the buffer size, expand the
1810 	 nextword data area */
1811       while (nwnum < 0) {
1812 	nextword = nw_expand(nextword, &maxnwnum, &nwroot, winfo->num);
1813 	nwnum = dfa_nextwords(now, nextword, maxnwnum, r);
1814       }
1815     }
1816     if (debug2_flag) {
1817       jlog("DEBUG:  %d words extracted from wordtrellis\n", nwnum);
1818     }
1819 
1820     /*
1821      * ����ȼ�ñ�콸�礫�鿷����ʸ������������������å��ˤ����.
1822      */
1823     /*
1824      * generate new hypotheses from 'now' and 'nextword',
1825      * and push them to stack
1826      */
1827 #ifdef DEBUG
1828     jlog("DEBUG: generate hypo\n");
1829 #endif
1830 
1831     if (r->lmtype == LM_DFA) {
1832       now_noise_calced = FALSE;	/* TRUE is noise-inserted score has been calculated */
1833     }
1834     i = dwrk->pushctr;		/* store old value */
1835 
1836 #ifdef CM_SEARCH
1837     /* initialize local stack */
1838     cm_init(dwrk, winfo->num, jconf->annotate.cm_alpha
1839 #ifdef CM_MULTIPLE_ALPHA
1840 	    , jconf->annotate.cm_alpha_num
1841 #endif
1842 	    );
1843 #endif
1844 
1845     /* for each nextword, generate a new hypothesis */
1846     for (w = 0; w < nwnum; w++) {
1847       if (r->lmtype == LM_DFA) {
1848 	/* limit word hypothesis */
1849 	if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) {
1850 	  continue;
1851 	}
1852       }
1853       new = newnode(r);
1854 
1855       if (r->lmtype == LM_DFA) {
1856 
1857 	if (nextword[w]->can_insert_sp == TRUE) {
1858 	  /* �Υ���������ȥ�ꥹ��������׻��������ޤʤ����Ȥκ����ͤ��� */
1859 	  /* compute hypothesis score with noise inserted */
1860 
1861 	  if (now_noise_calced == FALSE) {
1862 	    /* now �� sp ��Ĥ������� now_noise ����,���Υ�������׻� */
1863 	    /* generate temporal hypothesis 'now_noise' which has short-pause
1864 	       word after the original 'now' */
1865 	    fornoise.id = gdfa->sp_id;
1866 	    now_noise = newnode(r);
1867 	    cpy_node(now_noise, now);
1868 #if 0
1869 	    now_noise_tmp = newnode(r);
1870 	    next_word(now, now_noise_tmp, &fornoise, param, r);
1871 	    scan_word(now_noise_tmp, param, r);
1872 	    for(t=0;t<peseqlen;t++) {
1873 	      now_noise->g[t] = max(now_noise_tmp->g[t], now->g[t]);
1874 	    }
1875 	    free_node(now_noise_tmp);
1876 #else
1877 	    /* expand NOISE only if it exists in backward trellis */
1878 	    /* begin patch by kashima */
1879 	    if (jconf->pass2.looktrellis_flag) {
1880 	      if(!dfa_look_around(&fornoise, now, r)){
1881 		free_node(now_noise);
1882 		free_node(new);
1883 		continue;
1884 	      }
1885 	    }
1886 	    /* end patch by kashima */
1887 
1888 	    /* now_nosie �� ������ g[] ��׻��������� now �� g[] ����Ӥ���
1889 	       �⤤������� */
1890 	    /* compute trellis score g[], and adopt the maximum score
1891 	       for each frame compared with now->g[] */
1892 	    next_word(now, now_noise, &fornoise, param, r);
1893 	    scan_word(now_noise, param, r);
1894 	    for(t=0;t<peseqlen;t++) {
1895 	      now_noise->g[t] = max(now_noise->g[t], now->g[t]);
1896 	    }
1897 	    /* �Υ���������ݤ��θ������������׻������Τǡ�
1898 	       �����ǺǸ�ΥΥ���ñ��� now_noise ����ä� */
1899 	    /* now that score has been computed considering pause insertion,
1900 	       we can delete the last noise word from now_noise here */
1901 	    now_noise->seqnum--;
1902 #endif
1903 	    now_noise_calced = TRUE;
1904 	  }
1905 
1906 	  /* expand word only if it exists in backward trellis */
1907 	  /* begin patch by kashima */
1908 	  if (jconf->pass2.looktrellis_flag) {
1909 	    if(!dfa_look_around(nextword[w], now_noise, r)){
1910 	      free_node(new);
1911 	      continue;
1912 	    }
1913 	  }
1914 	  /* end patch by kashima */
1915 
1916 	  /* ����������' new' �� 'now_noise' �������� */
1917 	  /* generate a new hypothesis 'new' from 'now_noise' */
1918 	  next_word(now_noise, new, nextword[w], param, r);
1919 
1920 	} else {
1921 
1922 	  /* expand word only if it exists in backward trellis */
1923 	  /* begin patch by kashima */
1924 	  if (jconf->pass2.looktrellis_flag) {
1925 	    if(!dfa_look_around(nextword[w], now, r)){
1926 	      free_node(new);
1927 	      continue;
1928 	    }
1929 	  }
1930 	  /* end patch by kashima */
1931 
1932 	  /* ����������' new' �� 'now_noise' �������� */
1933 	  /* generate a new hypothesis 'new' from 'now_noise' */
1934 	  next_word(now, new, nextword[w], param, r);
1935 
1936 	}
1937       }
1938 
1939       if (r->lmtype == LM_PROB) {
1940 
1941 	/* ����������' new' �� 'now_noise' ��������
1942 	   N-gram �ξ��ϥΥ��������̰������ʤ� */
1943 	/* generate a new hypothesis 'new' from 'now'.
1944 	   pause insertion is treated as same as normal words in N-gram mode. */
1945 	next_word(now, new, nextword[w], param, r);
1946 
1947       }
1948 
1949       if (new->score <= LOG_ZERO) { /* not on trellis */
1950 	free_node(new);
1951 	continue;
1952       }
1953 
1954       dwrk->genectr++;
1955 
1956 #ifdef CM_SEARCH
1957       /* store the local hypothesis to temporal stack */
1958       cm_store(dwrk, new);
1959 #else
1960       /* ������������ 'new' �����å�������� */
1961       /* push the generated hypothesis 'new' to stack */
1962 
1963       /* stack overflow */
1964       if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) {
1965 	free_node(new);
1966 	continue;
1967       }
1968 
1969       if (r->graphout) {
1970 	/* assign a word arc to the last fixed word */
1971 	new->lastcontext = now->prevgraph;
1972 	new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2],
1973 					  new->seq[new->seqnum-1],
1974 					  (new->seqnum >= 3) ? new->seq[new->seqnum-3] : WORD_INVALID,
1975 					  new->bestt + 1,
1976 #ifdef GRAPHOUT_PRECISE_BOUNDARY
1977 #ifdef PASS2_STRICT_IWCD
1978 					  /* most up-to-date wordend_gscore is on new, because the last phone of 'now' will be computed at next_word() */
1979 					  new->wordend_frame[new->bestt],
1980 #else
1981 					  now->wordend_frame[new->bestt],
1982 #endif
1983 #else
1984 					  now->bestt,
1985 #endif
1986 					  new->score, prev_score,
1987 #ifdef PASS2_STRICT_IWCD
1988 					  new->g[new->bestt] - new->lscore,
1989 #else
1990 					  now->g[new->bestt+1],
1991 #endif
1992 #ifdef GRAPHOUT_PRECISE_BOUNDARY
1993 #ifdef PASS2_STRICT_IWCD
1994 					  /* most up-to-date wordend_gscore is on new, because the last phone of 'now' will be computed at next_word() */
1995 					  new->wordend_gscore[new->bestt],
1996 #else
1997 					  now->wordend_gscore[new->bestt],
1998 #endif
1999 #else
2000 					  now->tail_g_score,
2001 #endif
2002 					  now->lscore,
2003 #ifdef CM_SEARCH
2004 					  new->cmscore[new->seqnum-2],
2005 #else
2006 					  LOG_ZERO,
2007 #endif
2008 					  r
2009 					  );
2010       }	/* recog->graphout */
2011       put_to_stack(new, &start, &bottom, &stacknum, stacksize);
2012       if (debug2_flag) {
2013 	j = new->seq[new->seqnum-1];
2014 	jlog("DEBUG:  %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt);
2015       }
2016       dwrk->current = new;
2017       //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r);
2018       dwrk->pushctr++;
2019 #endif
2020     } /* end of nextword loop */
2021 
2022 #ifdef CM_SEARCH
2023     /* compute score sum */
2024     cm_sum_score(dwrk
2025 #ifdef CM_MULTIPLE_ALPHA
2026 		 , jconf->annotate.cm_alpha_bgn
2027 		 , jconf->annotate.cm_alpha_end
2028 		 , jconf->annotate.cm_alpha_step
2029 #endif
2030 		 );
2031     /* compute CM and put the generated hypotheses to global stack */
2032     while ((new = cm_get_node(dwrk)) != NULL) {
2033       cm_set_score(dwrk, new
2034 #ifdef CM_MULTIPLE_ALPHA
2035 		   , jconf->annotate.cm_alpha_bgn
2036 		   , jconf->annotate.cm_alpha_end
2037 		   , jconf->annotate.cm_alpha_step
2038 #endif
2039 		   );
2040 #ifdef CM_SEARCH_LIMIT
2041       if (new->cmscore[new->seqnum-1] < jconf->annotate.cm_cut_thres
2042 #ifdef CM_SEARCH_LIMIT_AFTER
2043 	  && dwrk->finishnum > 0
2044 #endif
2045 	  ) {
2046 	free_node(new);
2047 	continue;
2048       }
2049 #endif /* CM_SEARCH_LIMIT */
2050       /*      j = new->seq[new->seqnum-1];
2051 	      printf("  %15s [%15s](id=%5d)(%f) [%d-%d] cm=%f\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt, new->cmscore[new->seqnum-1]);*/
2052 
2053       /* stack overflow */
2054       if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) {
2055 	free_node(new);
2056 	continue;
2057       }
2058 
2059       if (r->graphout) {
2060 
2061 	/* assign a word arc to the last fixed word */
2062 	new->lastcontext = now->prevgraph;
2063 	new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2],
2064 					  new->seq[new->seqnum-1],
2065 					  (new->seqnum >= 3) ? new->seq[new->seqnum-3] : WORD_INVALID,
2066 					  new->bestt + 1,
2067 #ifdef GRAPHOUT_PRECISE_BOUNDARY
2068 #ifdef PASS2_STRICT_IWCD
2069 					  new->wordend_frame[new->bestt],
2070 #else
2071 					  now->wordend_frame[new->bestt],
2072 #endif
2073 #else
2074 					  now->bestt,
2075 #endif
2076 					  new->score, prev_score,
2077 #ifdef PASS2_STRICT_IWCD
2078 					  new->g[new->bestt] - new->lscore,
2079 #else
2080 					  now->g[new->bestt+1],
2081 #endif
2082 #ifdef GRAPHOUT_PRECISE_BOUNDARY
2083 #ifdef PASS2_STRICT_IWCD
2084 					  new->wordend_gscore[new->bestt],
2085 #else
2086 					  now->wordend_gscore[new->bestt],
2087 #endif
2088 #else
2089 					  now->tail_g_score,
2090 #endif
2091 					  now->lscore,
2092 #ifdef CM_SEARCH
2093 					  new->cmscore[new->seqnum-2],
2094 #else
2095 					  LOG_ZERO,
2096 #endif
2097 					  r
2098 					  );
2099       }	/* recog->graphout */
2100 
2101       put_to_stack(new, &start, &bottom, &stacknum, stacksize);
2102       if (debug2_flag) {
2103 	j = new->seq[new->seqnum-1];
2104 	jlog("DEBUG:  %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt);
2105       }
2106       dwrk->current = new;
2107       //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r);
2108       dwrk->pushctr++;
2109     }
2110 #endif
2111 
2112     if (debug2_flag) {
2113       jlog("DEBUG: %d pushed\n",dwrk->pushctr-i);
2114     }
2115     if (r->lmtype == LM_DFA) {
2116       if (now_noise_calced == TRUE) free_node(now_noise);
2117     }
2118 
2119     /*
2120      * ���Ф��������ΤƤ�
2121      * free the source hypothesis
2122      */
2123     free_node(now);
2124 
2125   }
2126   /***************/
2127   /* End of Loop */
2128   /***************/
2129 
2130   /* output */
2131   if (dwrk->finishnum == 0) {		/* if search failed */
2132 
2133     /* finalize result when no hypothesis was obtained */
2134     if (verbose_flag) {
2135       if (r->config->sw.fallback_pass1_flag) {
2136 	jlog("%02d %s: got no candidates, output 1st pass result as a final result\n", r->config->id, r->config->name);
2137       } else {
2138 	jlog("WARNING: %02d %s: got no candidates, search failed\n", r->config->id, r->config->name);
2139       }
2140     }
2141     pass2_finalize_on_no_result(r, r->config->sw.fallback_pass1_flag);
2142 
2143   } else {			/* if at least 1 candidate found */
2144 
2145     if (debug2_flag) {
2146       jlog("STAT: %02d %s: got %d candidates\n", r->config->id, r->config->name, dwrk->finishnum);
2147     }
2148       /* ��̤Ϥޤ����Ϥ���Ƥ��ʤ��Τǡ�ʸ�����ѥ����å�������Ȥ���
2149 	 �����ǽ��Ϥ��� */
2150       /* As all of the found candidate are in result stack, we sort them
2151 	 and output them here  */
2152       if (debug2_flag) jlog("DEBUG: done\n");
2153       result_reorder_and_output(&r_start, &r_bottom, &r_stacknum, jconf->output.output_hypo_maxnum, r, param);
2154 
2155       r->result.status = J_RESULT_STATUS_SUCCESS;
2156       //callback_exec(CALLBACK_RESULT, r);
2157       //callback_exec(CALLBACK_EVENT_PASS2_END, r);
2158   }
2159 
2160   /* �Ƽ參��������� */
2161   /* output counters */
2162   if (verbose_flag) {
2163     jlog("STAT: %02d %s: %d generated, %d pushed, %d nodes popped in %d\n",
2164 	 r->config->id, r->config->name,
2165 	 dwrk->genectr, dwrk->pushctr, dwrk->popctr, backtrellis->framelen);
2166     jlog_flush();
2167 #ifdef GRAPHOUT_DYNAMIC
2168     if (r->graphout) {
2169       jlog("STAT: %02d %s: graph: %d merged", r->config->id, r->config->name, dynamic_merged_num);
2170 #ifdef GRAPHOUT_SEARCH
2171       jlog("S, %d terminated", terminate_search_num);
2172 #endif
2173       jlog(" in %d\n", dwrk->popctr);
2174     }
2175 #endif
2176   }
2177 
2178   if (dwrk->finishnum > 0 && r->graphout) {
2179     if (verbose_flag) jlog("STAT: ------ wordgraph post-processing begin ------\n");
2180     /* garbage collection and word merging */
2181     /* words with no following word (except end of sentence) will be erased */
2182     wordgraph_purge_leaf_nodes(&wordgraph_root, r);
2183 #ifdef GRAPHOUT_DEPTHCUT
2184     /* perform word graph depth cutting here */
2185     wordgraph_depth_cut(&wordgraph_root, r);
2186 #endif
2187 
2188     /* if GRAPHOUT_PRECISE_BOUNDARY defined,
2189        propagate exact time boundary to the right context.
2190        words of different boundary will be duplicated here.
2191     */
2192     wordgraph_adjust_boundary(&wordgraph_root, r);
2193 
2194     if (jconf->graph.confnet) {
2195       /* CONFUSION NETWORK GENERATION */
2196 
2197       /* old merging functions should be skipped */
2198 
2199       /* finalize: sort and annotate ID */
2200       r->graph_totalwordnum = wordgraph_sort_and_annotate_id(&wordgraph_root, r);
2201       /* check coherence */
2202       wordgraph_check_coherence(wordgraph_root, r);
2203       /* compute graph CM by forward-backward processing */
2204       graph_forward_backward(wordgraph_root, r);
2205       if (verbose_flag) jlog("STAT: ------ wordgraph post-processing end ------\n");
2206 
2207       r->result.wg = wordgraph_root;
2208 
2209       /*
2210        * if (jconf->graph.lattice) {
2211        *   callback_exec(CALLBACK_RESULT_GRAPH, r);
2212        * }
2213        */
2214 
2215       /* parse the graph to extract order relationship */
2216       graph_make_order(wordgraph_root, r);
2217       /* create confusion network */
2218       r->result.confnet = confnet_create(wordgraph_root, r);
2219       /* output confusion network */
2220       //callback_exec(CALLBACK_RESULT_CONFNET, r);
2221       /* free area for order relationship */
2222       graph_free_order(r);
2223       /* free confusion network clusters */
2224       //cn_free_all(&(r->result.confnet));
2225 
2226     } else if (jconf->graph.lattice) {
2227       /* WORD LATTICE POSTPROCESSING */
2228 
2229       /* merge words with the same time and same score */
2230       wordgraph_compaction_thesame(&wordgraph_root);
2231       /* merge words with the same time (skip if "-graphrange -1") */
2232       wordgraph_compaction_exacttime(&wordgraph_root, r);
2233       /* merge words of near time (skip if "-graphrange" value <= 0 */
2234       wordgraph_compaction_neighbor(&wordgraph_root, r);
2235       /* finalize: sort and annotate ID */
2236       r->graph_totalwordnum = wordgraph_sort_and_annotate_id(&wordgraph_root, r);
2237       /* check coherence */
2238       wordgraph_check_coherence(wordgraph_root, r);
2239       /* compute graph CM by forward-backward processing */
2240       graph_forward_backward(wordgraph_root, r);
2241       if (verbose_flag) jlog("STAT: ------ wordgraph post-processing end ------\n");
2242       /* output graph */
2243       r->result.wg = wordgraph_root;
2244       //callback_exec(CALLBACK_RESULT_GRAPH, r);
2245 
2246     } else {
2247       j_internal_error("InternalError: graph generation specified but no output format specified?\n");
2248     }
2249 
2250     /* clear all wordgraph */
2251     //wordgraph_clean(&(r->result.wg));
2252   } /* r->graphout */
2253 
2254 
2255   /* ��λ���� */
2256   /* finalize */
2257   nw_free(nextword, nwroot);
2258   free_all_nodes(start);
2259   free_wordtrellis(dwrk);
2260 #ifdef SCAN_BEAM
2261   free(dwrk->framemaxscore);
2262 #endif
2263   //result_sentence_free(r);
2264   clear_stocker(dwrk);
2265 }
2266 
2267 /* end of file */
2268