1 /*
2 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
3 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
4 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
5 * All rights reserved
6 */
7
8 /* accept_check --- sentence accept checker for DFA grammar */
9
10 #include <sent/stddefs.h>
11 #include <sent/vocabulary.h>
12 #include <sent/dfa.h>
13 #include <sent/speech.h>
14 #include "common.h"
15
16 #define SPNAME_DEF "sp"
17
18 WORD_INFO *winfo;
19 DFA_INFO *dfa;
20 char **termname;
21 boolean no_term_file;
22
23 boolean verbose_flag = FALSE;
24 boolean term_mode = FALSE;
25
26 #define MAXBUFLEN 4096
27 typedef struct __wtoken__ {
28 WORD_ID wid;
29 struct __wtoken__ *next;
30 } WTOKEN;
31
32 static char buf[MAXBUFLEN];
33 static WTOKEN *wseq[MAXSEQNUM];
34 static int nseq;
35 static int nseq_reached;
36
37 static void
put_wtoken()38 put_wtoken()
39 {
40 int i;
41 WTOKEN *tok;
42
43 printf("wseq:");
44 for (i=0;i<nseq;i++) {
45 printf(" %s", winfo->woutput[wseq[i]->wid]);
46 }
47 printf("\n");
48 printf("cate:");
49 for (i=0;i<nseq;i++) {
50 if (wseq[i]->next != NULL) { /* more than one */
51 printf(" (");
52 } else {
53 printf(" ");
54 }
55 if (no_term_file) {
56 printf("%s", winfo->wname[wseq[i]->wid]);
57 } else {
58 printf("%s", termname[winfo->wton[wseq[i]->wid]]);
59 }
60 if (wseq[i]->next != NULL) { /* more than one */
61 for(tok = wseq[i]->next; tok; tok = tok->next) {
62 if (no_term_file) {
63 printf("|%s", winfo->wname[tok->wid]);
64 } else {
65 printf("|%s", termname[winfo->wton[tok->wid]]);
66 }
67 }
68 printf(")");
69 }
70 }
71 printf("\n");
72 }
73
74 static boolean
get_wtoken()75 get_wtoken()
76 {
77 char *p;
78 int i,it;
79 WTOKEN *new, *prev;
80
81 /* get word sequence from stdin */
82 if (term_mode) {
83 fprintf(stderr, "please input category sequence>");
84 } else {
85 fprintf(stderr, "please input word sequence>");
86 }
87 if (fgets(buf, MAXBUFLEN, stdin) == NULL) {
88 /* if input error, terminate program */
89 exit(0);
90 }
91
92 /* decode string -> wid */
93 nseq = 0;
94 for(p = strtok(buf, " \n"); p; p = strtok(NULL, " \n")) {
95 it = 0;
96 prev = NULL;
97 if (term_mode) {
98 if (no_term_file) {
99 if (atoi(p) >= 0 && atoi(p) < dfa->term_num) {
100 new = (WTOKEN *)mymalloc(sizeof(WTOKEN));
101 if (dfa->term.wnum[atoi(p)] == 0) {
102 printf("rejected at %d: category \"%s\" has no word\n", nseq+1, p);
103 return(FALSE);
104 }
105 new->wid = dfa->term.tw[atoi(p)][0];
106 new->next = prev;
107 prev = new;
108 it++;
109 }
110 } else { /* termname exist */
111 for (i=0;i<dfa->term_num;i++) {
112 if (strmatch(p, termname[i])) {
113 if (dfa->term.wnum[i] == 0) {
114 printf("rejected at %d: category \"%s\" has no word\n", nseq+1, p);
115 return(FALSE);
116 }
117 new = (WTOKEN *)mymalloc(sizeof(WTOKEN));
118 new->wid = dfa->term.tw[i][0];
119 new->next = prev;
120 prev = new;
121 it++;
122 }
123 }
124 }
125 if (prev == NULL) { /* not found */
126 printf("rejected at %d: category \"%s\" not exist\n", nseq+1, p);
127 return(FALSE);
128 }
129 } else { /* normal word mode */
130 for (i=0;i<winfo->num;i++) {
131 if (strmatch(p, winfo->woutput[i])) {
132 new = (WTOKEN *)mymalloc(sizeof(WTOKEN));
133 new->wid = i;
134 new->next = prev;
135 prev = new;
136 it++;
137 }
138 }
139 if (prev == NULL) { /* not found */
140 printf("rejected at %d: word \"%s\" not in voca\n", nseq+1, p);
141 return(FALSE);
142 }
143 }
144 wseq[nseq++] = new;
145 }
146
147 /* output */
148 put_wtoken();
149 return(TRUE);
150 }
151
152 /* $B>uBV(Bstateid$B$K$F(Biseq$BHVL\$NF~NO$,<u$1IU$1$i$l$k$+$I$&$+$rJV$9(B */
153 /* $B?<$5M%@hC5:w(B */
154 static void
put_state(int s,int l)155 put_state(int s, int l)
156 {
157 int i;
158 for (i=0;i<=l;i++) printf(" ");
159 printf("[%d]\n",s);
160 }
161
162 static boolean
can_accept_recursive(int stateid,int iseq)163 can_accept_recursive(int stateid, int iseq)
164 {
165 WTOKEN *token;
166 DFA_ARC *arc, *arc2;
167 int cate, cate2, ns, ns2;
168 int i;
169
170 if (verbose_flag) put_state(stateid, iseq);
171
172 if (nseq_reached > iseq) nseq_reached = iseq;
173 if (iseq < 0) { /* reaches last */
174 /* previous call is last word */
175 if (dfa->st[stateid].status | ACCEPT_S) {
176 return TRUE;
177 } else {
178 return FALSE;
179 }
180 }
181
182 for (token = wseq[iseq]; token; token = token->next) {
183 if (verbose_flag) {
184 for(i=0;i<=iseq;i++) printf(" ");
185 if (no_term_file) {
186 printf("%s(%s)\n",winfo->woutput[token->wid], winfo->wname[token->wid]);
187 } else {
188 printf("%s(%s:%s)\n",winfo->woutput[token->wid], termname[winfo->wton[token->wid]], winfo->wname[token->wid]);
189 }
190 }
191 for (arc = dfa->st[stateid].arc; arc; arc = arc->next) {
192 cate = arc->label;
193 ns = arc->to_state;
194 if (dfa->is_sp[cate]) {
195 for (arc2 = dfa->st[ns].arc; arc2; arc2 = arc2->next) {
196 cate2 = arc2->label;
197 ns2 = arc2->to_state;
198 if (cate2 == winfo->wton[token->wid]) { /* found */
199 if (can_accept_recursive(ns2, iseq - 1)) {
200 return TRUE;
201 } else {
202 /* examine next */
203 if (verbose_flag) put_state(stateid, iseq);
204 }
205 }
206 }
207 } else { /* not noise */
208 if (cate == winfo->wton[token->wid]) { /* found */
209 if (can_accept_recursive(ns, iseq - 1)) {
210 return TRUE;
211 } else {
212 /* examine next */
213 if (verbose_flag) put_state(stateid, iseq);
214 }
215 }
216 }
217 }
218 }
219
220 /* not allowed under this node */
221 return FALSE;
222 }
223
224 static void
accept_main()225 accept_main()
226 {
227 int i;
228
229 if (!get_wtoken()) return; /* failed */
230 if (nseq == 0) return;
231
232 nseq_reached = nseq;
233
234 for (i=0;i<dfa->state_num;i++) {
235 if ((dfa->st[i].status & INITIAL_S) != 0) { /* $B=i4|>uBV$+$i(B */
236 if (can_accept_recursive(i, nseq-1)) {
237 printf("accepted\n");
238 return;
239 }
240 }
241 }
242 printf("rejected at %d by DFA\n", nseq_reached + 1);
243 }
244
245
246 static char *
usage(char * s)247 usage(char *s)
248 {
249 fprintf(stderr, "accept_check --- determine acception/rejection of transcription from stdin\n");
250 fprintf(stderr, "usage: %s [-t] [-v] prefix\n",s);
251 fprintf(stderr, " -t ... use category symbols instead of words (needs .term)\n");
252 fprintf(stderr, " -s string ... specify short-pause model\n");
253 fprintf(stderr, " -v ... verbose output\n");
254 exit(1);
255 }
256
257 static void
put_dfainfo()258 put_dfainfo()
259 {
260 printf("%d categories, %d words\n",dfa->term_num,winfo->num);
261 printf("DFA has %d nodes and %d arcs\n", dfa->state_num, dfa->arc_num);
262 }
263
main(int argc,char * argv[])264 int main(int argc, char *argv[])
265 {
266 int i, len;
267 char *prefix;
268 char *dfafile, *dictfile, *termfile;
269 char *spname_default = SPNAME_DEF;
270 char *spname = NULL;
271 #define NEXTARG (++i >= argc) ? (char *)usage(argv[0]) : argv[i]
272
273 /* argument */
274 if (argc == 1) usage(argv[0]);
275 for(i=1;i<argc;i++) {
276 if (argv[i][0] == '-') {
277 switch(argv[i][1]) {
278 case 'v': /* verbose output */
279 verbose_flag = TRUE;
280 break;
281 case 't':
282 term_mode = TRUE;
283 break;
284 case 's':
285 if (++i >= argc) {
286 usage(argv[0]);
287 }
288 spname = argv[i];
289 default:
290 fprintf(stderr, "no such option: %s\n",argv[i]);
291 usage(argv[0]);
292 }
293 } else {
294 prefix = argv[i];
295 }
296 }
297 if (spname == NULL) spname = spname_default;
298
299 len = strlen(prefix) + 10;
300 dfafile = (char *)mymalloc(len);
301 dictfile = (char *)mymalloc(len);
302 termfile = (char *)mymalloc(len);
303 strcpy(dfafile, prefix);
304 strcat(dfafile, ".dfa");
305 strcpy(dictfile, prefix);
306 strcat(dictfile, ".dict");
307 strcpy(termfile, prefix);
308 strcat(termfile, ".term");
309
310 /* start init */
311 winfo = word_info_new();
312 init_voca(winfo, dictfile, NULL, TRUE, FALSE);
313 dfa = dfa_info_new();
314 init_dfa(dfa, dfafile);
315 make_dfa_voca_ref(dfa, winfo);
316 termname = (char **)mymalloc(sizeof(char *) * dfa->term_num);
317 init_term(termfile, termname);
318 if (termname[0] == NULL) { /* no .term file */
319 no_term_file = TRUE;
320 } else {
321 no_term_file = FALSE;
322 }
323
324 /* output info */
325 put_dfainfo();
326
327 /* set dfa->sp_id and dfa->is_sp[cid] from name "sp" */
328 {
329 int t, i;
330 WORD_ID w;
331
332 dfa->sp_id = WORD_INVALID;
333 dfa->is_sp = (boolean *)mymalloc(sizeof(boolean) * dfa->term_num);
334 for(t=0;t<dfa->term_num;t++) {
335 dfa->is_sp[t] = FALSE;
336 for(i=0;i<dfa->term.wnum[t]; i++) {
337 w = dfa->term.tw[t][i];
338 if (strcmp(winfo->woutput[w], spname) == 0) {
339 if (dfa->sp_id == WORD_INVALID) dfa->sp_id = w;
340 dfa->is_sp[t] = TRUE;
341 break;
342 }
343 }
344 }
345 }
346 if (verbose_flag) {
347 if (dfa->sp_id != WORD_INVALID) {
348 printf("skippable word for NOISE: %s\t%s\n", winfo->wname[dfa->sp_id], winfo->woutput[dfa->sp_id]);
349 }
350 }
351 printf("----- \n");
352
353 /* main loop */
354 for (;;) {
355 accept_main();
356 }
357
358 free(dfafile);
359 free(dictfile);
360 return 0;
361 }
362