1 /**
2 * @file dfa_decode.c
3 *
4 * <JA>
5 * @brief ����ʸˡ�˴�Ť���ñ��ͽ¬���裲�ѥ���
6 *
7 * Ϳ����줿������Ф��ơ�DFA ʸˡ����³��ǽ�ʼ�ñ��ν������ꤹ��.
8 * �������ºݤˤ�, Ÿ���������ͽ¬������ü�ե졼����դ�ñ��ȥ�ꥹ
9 * ��˻ĤäƤ���ñ��Τߤ�Ÿ�������.
10 *
11 * ʸˡ����Ǥϥ��硼�ȥݡ�����ñ��Ȥ��Ƶ��Ҥ������Υ��硼�ȥݡ���ñ���
12 * �и����֤�ʸˡ�ǻ��ꤹ��. ���������ºݤ����ϤǤϤ������ꤷ�����֤�
13 * ɬ������ݡ���������ʤ����ᡤñ��Ÿ���ˤ����Ƥϡ�
14 * ��ñ�콸��˥��硼�ȥݡ�����������ϡ�����ˤ��μ���ñ�콸��ޤǸ���
15 * ��ñ�콸��˴ޤ��. �ºݤˤ����˥��硼�ȥݡ�������������뤫�ɤ����ϡ�
16 * search_bestfirst_main.c ��ξ�ԤΥ���������Ӥ���Ƚ�Ǥ���.
17 *
18 * ʸˡ���Ѥ���ǧ�����������Ǥϡ�dfa_firstwords(), dfa_nextwords(),
19 * dfa_acceptable(), dfa_eosscore() ����2�ѥ��Υᥤ��ؿ� wchmm_fbs() ����
20 * ���Ѥ����. �ʤ� N-gram ���Ѥ���ǧ�����������Ǥϡ�
21 * ����� ngram_decode.c ��δؿ����Ȥ���.
22 * </JA>
23 *
24 * <EN>
25 * @brief Grammar-based word prediction (2nd pass)
26 *
27 * Given a part-of-sentence hypothesis, these function determine a set of next
28 * words allowed to be connected by the grammar. Actually, only words in the
29 * word trellis, which exist around the estimated word-end frame will be
30 * expanded.
31 *
32 * When using DFA grammar, the possible (short) pause insertion point
33 * should be explicitly specified in grammar, by defining "short-pause
34 * word" in vocabulary and write its appearance in grammar. Since a
35 * short pause will not always appear on the specified point, Julius
36 * will consider the skipping of such short pause word for the next
37 * word prediction in these functions. Whether short pause was
38 * actually inserted or not in the user input will be determined by
39 * score in search_bestfirst_main.c.
40 *
41 * In recognition process instance with DFA grammar, dfa_firstwords(),
42 * dfa_nextwords(), dfa_acceptable() and dfa_eosscore() will be called
43 * from main search function wchmm_fbs(). When using N-gram, on the
44 * other hand, the corresponding functions in ngram_decode.c will be
45 * used instead. </EN>
46 *
47 * @author Akinobu LEE
48 * @date Mon Mar 7 15:31:00 2005
49 *
50 * $Revision: 1.2 $
51 *
52 */
53 /*
54 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
55 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
56 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
57 * All rights reserved
58 */
59
60 #include <julius/julius.h>
61
62 /**
63 * <JA>
64 * ʸˡ�ˤ������äơ�ʸƬ����³������ñ���ǽ��ͽ¬ñ�췲�Ȥ����֤�.
65 *
66 * @param nw [out] ��ñ�콸��γ�Ǽ��ؤΥݥ���
67 * @param peseqlen [in] ���ϥե졼��Ĺ
68 * @param maxnw [in] @a nw �ε���������Ĺ
69 * @param r [in] ǧ���ץ���������
70 *
71 * @return ͽ¬���줿ñ��� (���������顼���� -1 ���֤�)
72 * </JA>
73 * <EN>
74 * Return initial word set from grammar.
75 *
76 * @param nw [out] pointer to hold the resulting next word set
77 * @param peseqlen [in] input frame length
78 * @param maxnw [in] maximum number of words that can be set in @a nw
79 * @param r [in] recognition process instance
80 *
81 * @return the number of predicted words, or -1 on error.
82 * </EN>
83 *
84 * @callgraph
85 * @callergraph
86 *
87 */
88 int
dfa_firstwords(NEXTWORD ** nw,int peseqlen,int maxnw,RecogProcess * r)89 dfa_firstwords(NEXTWORD **nw, int peseqlen, int maxnw, RecogProcess *r)
90 {
91 DFA_INFO *dfa;
92 DFA_ARC *arc;
93 MULTIGRAM *m;
94 int s, sb, se;
95 int cate, iw, ns;
96 int num = 0;
97
98 dfa = r->lm->dfa;
99
100 for (m = r->lm->grammars; m; m = m->next) {
101 if (m->active) {
102 sb = m->state_begin;
103 se = sb + m->dfa->state_num;
104 for(s=sb;s<se;s++) {
105 if ((dfa->st[s].status & INITIAL_S) != 0) { /* from initial state */
106 for (arc = dfa->st[s].arc; arc; arc = arc->next) { /* for all arc */
107 cate = arc->label; /* category ID */
108 ns = arc->to_state; /* next DFA state ID */
109 /* all words within the category is expanded */
110 for (iw=0;iw<dfa->term.wnum[cate];iw++) {
111 nw[num]->id = dfa->term.tw[cate][iw]; /* word ID */
112 nw[num]->next_state = ns; /* next state */
113 nw[num]->can_insert_sp = FALSE; /* short pause should not inserted before this word */
114 nw[num]->lscore = r->config->lmp.penalty2;
115 num++;
116 if (num >= maxnw) return -1; /* buffer overflow */
117 }
118 }
119 }
120 }
121 }
122 }
123
124 return num;
125 }
126
127 /**
128 * <JA>
129 * ��ʬʸ������Ф��ơ�ʸˡ�˽��äƼ�����³������ñ�췲���֤�.
130 *
131 * @param hypo [in] Ÿ��������ʬʸ����
132 * @param nw [out] ��ñ�콸��γ�Ǽ��ؤΥݥ���
133 * @param maxnw [in] @a nw �ε���������Ĺ
134 * @param r [in] ǧ���ץ���������
135 *
136 * @return ͽ¬���줿ñ��� (���������顼���� -1 ���֤�)
137 * </JA>
138 * <EN>
139 * Given a part-of-sentence hypothesis, returns the next word set defined
140 * by DFA grammar.
141 *
142 * @param hypo [in] the source part-of-sentene hypothesis
143 * @param nw [out] pointer to hold the resulting next word set
144 * @param maxnw [in] maximum number of words that can be set in @a nw
145 * @param r [in] recognition process instance
146 *
147 * @return the number of predicted words, or -1 on error.
148 * </EN>
149 *
150 * @callgraph
151 * @callergraph
152 *
153 */
154 int
dfa_nextwords(NODE * hypo,NEXTWORD ** nw,int maxnw,RecogProcess * r)155 dfa_nextwords(NODE *hypo, NEXTWORD **nw, int maxnw, RecogProcess *r)
156 {
157 DFA_INFO *dfa;
158 DFA_ARC *arc, *arc2;
159 int iw,cate,ns,cate2,ns2;
160 int num = 0;
161
162 dfa = r->lm->dfa;
163
164 /* hypo->state: current DFA state ID */
165 for (arc = dfa->st[hypo->state].arc; arc; arc = arc->next) {/* for all arc */
166 cate = arc->label;
167 ns = arc->to_state;
168 if (dfa->is_sp[cate]) { /* short pause */
169 /* expand one more next (not expand the short pause word itself) */
170 for (arc2 = dfa->st[ns].arc; arc2; arc2 = arc2->next) {
171 cate2 = arc2->label;
172 ns2 = arc2->to_state;
173 for (iw=0;iw<dfa->term.wnum[cate2];iw++) {
174 nw[num]->id = dfa->term.tw[cate2][iw];
175 nw[num]->next_state = ns2;
176 nw[num]->can_insert_sp = TRUE;
177 nw[num]->lscore = r->config->lmp.penalty2;
178 num++;
179 if (num >= maxnw) return -1; /* buffer overflow */
180 }
181 }
182 } else { /* not short pause */
183 /* all words within the category is expanded */
184 for (iw=0;iw<dfa->term.wnum[cate];iw++) {
185 nw[num]->id = dfa->term.tw[cate][iw];
186 nw[num]->next_state = ns;
187 nw[num]->can_insert_sp = FALSE;
188
189 nw[num]->lscore = r->config->lmp.penalty2;
190 num++;
191 if (num >= maxnw) return -1; /* buffer overflow */
192 }
193 }
194 }
195 return num;
196 }
197
198
199 /**
200 * <JA>
201 * ��ʬʸ���⤬ʸˡ��ʸ�Ȥ��ƺǽ�(������ǽ)���֤ˤ��뤫�ɤ������֤�.
202 *
203 * @param hypo [in] ��ʬʸ����
204 * @param r [in] ǧ���ץ���������
205 *
206 * @return ������ǽ���֤ˤ���Ȥ� TRUE �����Բ�ǽ�ʤȤ� FALSE
207 * </JA>
208 * <EN>
209 * Return whether the hypothesis is currently on final state
210 *
211 * @param hypo [in] sentence hypothesis
212 * @param r [in] recognition process instance
213 *
214 * @return TRUE when on final state, or FALSE if not acceptable.
215 * </EN>
216 *
217 * @callgraph
218 * @callergraph
219 *
220 */
221 boolean
dfa_acceptable(NODE * hypo,RecogProcess * r)222 dfa_acceptable(NODE *hypo, RecogProcess *r)
223 {
224 if (r->lm->dfa->st[hypo->state].status & ACCEPT_S) {
225 return TRUE;
226 } else {
227 return FALSE;
228 }
229 }
230
231 /* patch by kashima */
232 /**
233 * <JA>
234 * ��ñ����䤬���ο��ꤵ�줿��³ͽ¬���������ñ��ȥ�ꥹ���
235 * ���뤫�ɤ���������å������⤷����Ф��Υȥ�ꥹñ��ؤΥݥ��å�
236 * ����. �ʤ��������³���Ϥ��ȤǷ�ޤ�Τǡ������ǤϺ�Ŭ�ʥȥ�ꥹñ��
237 * �Ǥʤ��Ƥ褤.
238 *
239 * @param nword [i/o] ��ñ����� (�б�����ȥ�ꥹñ��ؤΥݥ���
240 * ���åȤ����)
241 * @param hypo [in] Ÿ��������
242 * @param r [in] ǧ���ץ���������
243 *
244 * @return ñ��ȥ�ꥹ���ͽ¬�����ն�˼�ñ�줬¸�ߤ���� TRUE��¸��
245 * ���ʤ���� FALSE ���֤�.
246 * </JA>
247 * <EN>
248 * Check if the given nextword exists in the word trellis around the
249 * estimated connection time. If exist, set the pointer to the corresponding
250 * trellis word to the nextword. Since the best connection time will be
251 * re-computed later, it need not to be an optimal one.
252 *
253 * @param nword [i/o] next word candidate (pointer to the found trellis word
254 * will be set)
255 * @param hypo [in] source part-of-sentence hypothesis
256 * @param r [in] recognition process instance
257 *
258 * @return TRUE if the nextword exists on the word trellis around the estimated
259 * connection point, or FALSE if not exist.
260 * </EN>
261 *
262 * @callgraph
263 * @callergraph
264 *
265 */
266 boolean
dfa_look_around(NEXTWORD * nword,NODE * hypo,RecogProcess * r)267 dfa_look_around(NEXTWORD *nword, NODE *hypo, RecogProcess *r)
268 {
269 int t,tm;
270 int i;
271 WORD_ID w;
272 BACKTRELLIS *bt;
273 int lookup_range;
274
275 bt = r->backtrellis;
276 lookup_range = r->config->pass2.lookup_range;
277
278 tm = hypo->estimated_next_t; /* estimated connection time */
279
280 /* look aound [tm-lookup_range..tm+lookup_range] frame */
281 /* near the center is better:
282 1. the first half (backward) 2. the second half (forward) */
283 /* 1. backward */
284 for(t = tm; t >= tm - lookup_range; t--) {
285 if (t < 0) break;
286 for (i=0;i<bt->num[t];i++) {
287 w = (bt->rw[t][i])->wid;
288 if(w == nword->id){ /* found */
289 nword->tre = bt->rw[t][i];
290 return TRUE;
291 }
292 }
293 }
294 /* 2. forward */
295 for(t = tm + 1; t < tm + lookup_range; t++) {
296 if (t > bt->framelen - 1) break;
297 if (t >= hypo->bestt) break;
298 for (i=0;i<bt->num[t];i++) {
299 w = (bt->rw[t][i])->wid;
300 if(w == nword->id){ /* found */
301 nword->tre = bt->rw[t][i];
302 return TRUE;
303 }
304 }
305 }
306
307 return FALSE; /* not found */
308 }
309
310 /* end of file */
311