1 /**
2  * @file   dfa_decode.c
3  *
4  * <JA>
5  * @brief  ����ʸˡ�˴�Ť���ñ��ͽ¬���裲�ѥ���
6  *
7  * Ϳ����줿������Ф��ơ�DFA ʸˡ����³��ǽ�ʼ�ñ��ν������ꤹ��.
8  * �������ºݤˤ�, Ÿ���������ͽ¬������ü�ե졼����դ�ñ��ȥ�ꥹ
9  * ��˻ĤäƤ���ñ��Τߤ�Ÿ�������.
10  *
11  * ʸˡ����Ǥϥ��硼�ȥݡ�����ñ��Ȥ��Ƶ��Ҥ������Υ��硼�ȥݡ���ñ���
12  * �и����֤�ʸˡ�ǻ��ꤹ��. ���������ºݤ����ϤǤϤ������ꤷ�����֤�
13  * ɬ������ݡ���������ʤ����ᡤñ��Ÿ���ˤ����Ƥϡ�
14  * ��ñ�콸��˥��硼�ȥݡ�����������ϡ�����ˤ��μ���ñ�콸��ޤǸ���
15  * ��ñ�콸��˴ޤ��. �ºݤˤ����˥��硼�ȥݡ�������������뤫�ɤ����ϡ�
16  * search_bestfirst_main.c ��ξ�ԤΥ���������Ӥ���Ƚ�Ǥ���.
17  *
18  * ʸˡ���Ѥ���ǧ���������������Ǥϡ�dfa_firstwords(), dfa_nextwords(),
19  * dfa_acceptable(), dfa_eosscore() ����2�ѥ��Υᥤ��ؿ� wchmm_fbs() ����
20  * ���Ѥ����. �ʤ� N-gram ���Ѥ���ǧ���������������Ǥϡ�
21  * ����� ngram_decode.c ��δؿ����Ȥ���.
22  * </JA>
23  *
24  * <EN>
25  * @brief  Grammar-based word prediction (2nd pass)
26  *
27  * Given a part-of-sentence hypothesis, these function determine a set of next
28  * words allowed to be connected by the grammar.  Actually, only words in the
29  * word trellis, which exist around the estimated word-end frame will be
30  * expanded.
31  *
32  * When using DFA grammar, the possible (short) pause insertion point
33  * should be explicitly specified in grammar, by defining "short-pause
34  * word" in vocabulary and write its appearance in grammar.  Since a
35  * short pause will not always appear on the specified point, Julius
36  * will consider the skipping of such short pause word for the next
37  * word prediction in these functions.  Whether short pause was
38  * actually inserted or not in the user input will be determined by
39  * score in search_bestfirst_main.c.
40  *
41  * In recognition process instance with DFA grammar, dfa_firstwords(),
42  * dfa_nextwords(), dfa_acceptable() and dfa_eosscore() will be called
43  * from main search function wchmm_fbs().  When using N-gram, on the
44  * other hand, the corresponding functions in ngram_decode.c will be
45  * used instead.  </EN>
46  *
47  * @author Akinobu LEE
48  * @date   Mon Mar  7 15:31:00 2005
49  *
50  * $Revision: 1.2 $
51  *
52  */
53 /*
54  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
55  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
56  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
57  * All rights reserved
58  */
59 
60 #include <julius/julius.h>
61 
62 /**
63  * <JA>
64  * ʸˡ�ˤ������äơ�ʸƬ����³������ñ���ǽ��ͽ¬ñ�췲�Ȥ����֤�.
65  *
66  * @param nw [out] ��ñ�콸��γ�Ǽ��ؤΥݥ���
67  * @param peseqlen [in] ���ϥե졼��Ĺ
68  * @param maxnw [in] @a nw �ε���������Ĺ
69  * @param r [in] ǧ���ץ�����������
70  *
71  * @return ͽ¬���줿ñ��� (���������顼���� -1 ���֤�)
72  * </JA>
73  * <EN>
74  * Return initial word set from grammar.
75  *
76  * @param nw [out] pointer to hold the resulting next word set
77  * @param peseqlen [in] input frame length
78  * @param maxnw [in] maximum number of words that can be set in @a nw
79  * @param r [in] recognition process instance
80  *
81  * @return the number of predicted words, or -1 on error.
82  * </EN>
83  *
84  * @callgraph
85  * @callergraph
86  *
87  */
88 int
dfa_firstwords(NEXTWORD ** nw,int peseqlen,int maxnw,RecogProcess * r)89 dfa_firstwords(NEXTWORD **nw, int peseqlen, int maxnw, RecogProcess *r)
90 {
91   DFA_INFO *dfa;
92   DFA_ARC *arc;
93   MULTIGRAM *m;
94   int s, sb, se;
95   int cate, iw, ns;
96   int num = 0;
97 
98   dfa = r->lm->dfa;
99 
100   for (m = r->lm->grammars; m; m = m->next) {
101     if (m->active) {
102       sb = m->state_begin;
103       se = sb + m->dfa->state_num;
104       for(s=sb;s<se;s++) {
105 	if ((dfa->st[s].status & INITIAL_S) != 0) { /* from initial state */
106 	  for (arc = dfa->st[s].arc; arc; arc = arc->next) {	/* for all arc */
107 	    cate = arc->label;	/* category ID */
108 	    ns = arc->to_state;	/* next DFA state ID */
109 	    /* all words within the category is expanded */
110 	    for (iw=0;iw<dfa->term.wnum[cate];iw++) {
111 	      nw[num]->id = dfa->term.tw[cate][iw]; /* word ID */
112 	      nw[num]->next_state = ns; /* next state */
113 	      nw[num]->can_insert_sp = FALSE; /* short pause should not inserted before this word */
114 	      nw[num]->lscore = r->config->lmp.penalty2;
115 	      num++;
116 	      if (num >= maxnw) return -1; /* buffer overflow */
117 	    }
118 	  }
119 	}
120       }
121     }
122   }
123 
124   return num;
125 }
126 
127 /**
128  * <JA>
129  * ��ʬʸ������Ф��ơ�ʸˡ�˽��äƼ�����³������ñ�췲���֤�.
130  *
131  * @param hypo [in] Ÿ��������ʬʸ����
132  * @param nw [out] ��ñ�콸��γ�Ǽ��ؤΥݥ���
133  * @param maxnw [in] @a nw �ε���������Ĺ
134  * @param r [in] ǧ���ץ�����������
135  *
136  * @return ͽ¬���줿ñ��� (���������顼���� -1 ���֤�)
137  * </JA>
138  * <EN>
139  * Given a part-of-sentence hypothesis, returns the next word set defined
140  * by DFA grammar.
141  *
142  * @param hypo [in] the source part-of-sentene hypothesis
143  * @param nw [out] pointer to hold the resulting next word set
144  * @param maxnw [in] maximum number of words that can be set in @a nw
145  * @param r [in] recognition process instance
146  *
147  * @return the number of predicted words, or -1 on error.
148  * </EN>
149  *
150  * @callgraph
151  * @callergraph
152  *
153  */
154 int
dfa_nextwords(NODE * hypo,NEXTWORD ** nw,int maxnw,RecogProcess * r)155 dfa_nextwords(NODE *hypo, NEXTWORD **nw, int maxnw, RecogProcess *r)
156 {
157   DFA_INFO *dfa;
158   DFA_ARC *arc, *arc2;
159   int iw,cate,ns,cate2,ns2;
160   int num = 0;
161 
162   dfa = r->lm->dfa;
163 
164   /* hypo->state: current DFA state ID */
165   for (arc = dfa->st[hypo->state].arc; arc; arc = arc->next) {/* for all arc */
166     cate = arc->label;
167     ns = arc->to_state;
168     if (dfa->is_sp[cate]) {	/* short pause */
169       /* expand one more next (not expand the short pause word itself) */
170       for (arc2 = dfa->st[ns].arc; arc2; arc2 = arc2->next) {
171 	cate2 = arc2->label;
172 	ns2 = arc2->to_state;
173 	for (iw=0;iw<dfa->term.wnum[cate2];iw++) {
174 	  nw[num]->id = dfa->term.tw[cate2][iw];
175 	  nw[num]->next_state = ns2;
176 	  nw[num]->can_insert_sp = TRUE;
177 	  nw[num]->lscore = r->config->lmp.penalty2;
178 	  num++;
179 	  if (num >= maxnw) return -1; /* buffer overflow */
180 	}
181       }
182     } else {			/* not short pause */
183       /* all words within the category is expanded */
184       for (iw=0;iw<dfa->term.wnum[cate];iw++) {
185 	nw[num]->id = dfa->term.tw[cate][iw];
186 	nw[num]->next_state = ns;
187 	nw[num]->can_insert_sp = FALSE;
188 
189 	nw[num]->lscore = r->config->lmp.penalty2;
190 	num++;
191 	if (num >= maxnw) return -1; /* buffer overflow */
192       }
193     }
194   }
195   return num;
196 }
197 
198 
199 /**
200  * <JA>
201  * ��ʬʸ���⤬ʸˡ��ʸ�Ȥ��ƺǽ�(������ǽ)���֤ˤ��뤫�ɤ������֤�.
202  *
203  * @param hypo [in] ��ʬʸ����
204  * @param r [in] ǧ���ץ�����������
205  *
206  * @return ������ǽ���֤ˤ���Ȥ� TRUE �����Բ�ǽ�ʤȤ� FALSE
207  * </JA>
208  * <EN>
209  * Return whether the hypothesis is currently on final state
210  *
211  * @param hypo [in] sentence hypothesis
212  * @param r [in] recognition process instance
213  *
214  * @return TRUE when on final state, or FALSE if not acceptable.
215  * </EN>
216  *
217  * @callgraph
218  * @callergraph
219  *
220  */
221 boolean
dfa_acceptable(NODE * hypo,RecogProcess * r)222 dfa_acceptable(NODE *hypo, RecogProcess *r)
223 {
224   if (r->lm->dfa->st[hypo->state].status & ACCEPT_S) {
225     return TRUE;
226   } else {
227     return FALSE;
228   }
229 }
230 
231 /* patch by kashima */
232 /**
233  * <JA>
234  * ��ñ����䤬���ο��ꤵ�줿��³ͽ¬���������ñ��ȥ�ꥹ���
235  * ���뤫�ɤ���������å������⤷����Ф��Υȥ�ꥹñ��ؤΥݥ������å�
236  * ����. �ʤ��������³���Ϥ��ȤǷ�ޤ�Τǡ������ǤϺ�Ŭ�ʥȥ�ꥹñ��
237  * �Ǥʤ��Ƥ褤.
238  *
239  * @param nword [i/o] ��ñ����� (�б�����ȥ�ꥹñ��ؤΥݥ�����
240  * ���åȤ����)
241  * @param hypo [in] Ÿ��������
242  * @param r [in] ǧ���ץ�����������
243  *
244  * @return ñ��ȥ�ꥹ���ͽ¬�����ն�˼�ñ�줬¸�ߤ���� TRUE��¸��
245  * ���ʤ���� FALSE ���֤�.
246  * </JA>
247  * <EN>
248  * Check if the given nextword exists in the word trellis around the
249  * estimated connection time.  If exist, set the pointer to the corresponding
250  * trellis word to the nextword.  Since the best connection time will be
251  * re-computed later, it need not to be an optimal one.
252  *
253  * @param nword [i/o] next word candidate (pointer to the found trellis word
254  * will be set)
255  * @param hypo [in] source part-of-sentence hypothesis
256  * @param r [in] recognition process instance
257  *
258  * @return TRUE if the nextword exists on the word trellis around the estimated
259  * connection point, or FALSE if not exist.
260  * </EN>
261  *
262  * @callgraph
263  * @callergraph
264  *
265  */
266 boolean
dfa_look_around(NEXTWORD * nword,NODE * hypo,RecogProcess * r)267 dfa_look_around(NEXTWORD *nword, NODE *hypo, RecogProcess *r)
268 {
269   int t,tm;
270   int i;
271   WORD_ID w;
272   BACKTRELLIS *bt;
273   int lookup_range;
274 
275   bt = r->backtrellis;
276   lookup_range = r->config->pass2.lookup_range;
277 
278   tm = hypo->estimated_next_t;	/* estimated connection time */
279 
280   /* look aound [tm-lookup_range..tm+lookup_range] frame */
281   /* near the center is better:
282      1. the first half (backward)   2. the second half (forward) */
283   /* 1. backward */
284   for(t = tm; t >= tm - lookup_range; t--) {
285     if (t < 0) break;
286      for (i=0;i<bt->num[t];i++) {
287        w = (bt->rw[t][i])->wid;
288        if(w == nword->id){	/* found */
289          nword->tre = bt->rw[t][i];
290          return TRUE;
291        }
292      }
293   }
294   /* 2. forward */
295   for(t = tm + 1; t < tm + lookup_range; t++) {
296     if (t > bt->framelen - 1) break;
297     if (t >= hypo->bestt) break;
298     for (i=0;i<bt->num[t];i++) {
299       w = (bt->rw[t][i])->wid;
300       if(w == nword->id){	/* found */
301         nword->tre = bt->rw[t][i];
302         return TRUE;
303       }
304     }
305   }
306 
307   return FALSE;			/* not found */
308 }
309 
310 /* end of file */
311