1 /*
2  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
3  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
4  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
5  * All rights reserved
6  */
7 
8 /* accept_check --- sentence accept checker for DFA grammar */
9 
10 #include <sent/stddefs.h>
11 #include <sent/vocabulary.h>
12 #include <sent/dfa.h>
13 #include <sent/speech.h>
14 #include "common.h"
15 
16 #define SPNAME_DEF "sp"
17 
18 WORD_INFO *winfo;
19 DFA_INFO *dfa;
20 char **termname;
21 boolean no_term_file;
22 
23 boolean verbose_flag = FALSE;
24 boolean term_mode = FALSE;
25 
26 #define MAXBUFLEN 4096
27 typedef struct __wtoken__ {
28   WORD_ID wid;
29   struct __wtoken__ *next;
30 } WTOKEN;
31 
32 static char buf[MAXBUFLEN];
33 static WTOKEN *wseq[MAXSEQNUM];
34 static int nseq;
35 static int nseq_reached;
36 
37 static void
put_wtoken()38 put_wtoken()
39 {
40   int i;
41   WTOKEN *tok;
42 
43   printf("wseq:");
44   for (i=0;i<nseq;i++) {
45     printf(" %s", winfo->woutput[wseq[i]->wid]);
46   }
47   printf("\n");
48   printf("cate:");
49   for (i=0;i<nseq;i++) {
50     if (wseq[i]->next != NULL) { /* more than one */
51       printf(" (");
52     } else {
53       printf(" ");
54     }
55     if (no_term_file) {
56       printf("%s", winfo->wname[wseq[i]->wid]);
57     } else {
58       printf("%s", termname[winfo->wton[wseq[i]->wid]]);
59     }
60     if (wseq[i]->next != NULL) { /* more than one */
61       for(tok = wseq[i]->next; tok; tok = tok->next) {
62 	if (no_term_file) {
63 	  printf("|%s", winfo->wname[tok->wid]);
64 	} else {
65 	  printf("|%s", termname[winfo->wton[tok->wid]]);
66 	}
67       }
68       printf(")");
69     }
70   }
71   printf("\n");
72 }
73 
74 static boolean
get_wtoken()75 get_wtoken()
76 {
77   char *p;
78   int i,it;
79   WTOKEN *new, *prev;
80 
81   /* get word sequence from stdin */
82   if (term_mode) {
83     fprintf(stderr, "please input category sequence>");
84   } else {
85     fprintf(stderr, "please input word sequence>");
86   }
87   if (fgets(buf, MAXBUFLEN, stdin) == NULL) {
88     /* if input error, terminate program */
89     exit(0);
90   }
91 
92   /* decode string -> wid */
93   nseq = 0;
94   for(p = strtok(buf, " \n"); p; p = strtok(NULL, " \n")) {
95     it = 0;
96     prev = NULL;
97     if (term_mode) {
98       if (no_term_file) {
99 	if (atoi(p) >= 0 && atoi(p) < dfa->term_num) {
100 	  new = (WTOKEN *)mymalloc(sizeof(WTOKEN));
101 	  if (dfa->term.wnum[atoi(p)] == 0) {
102 	    printf("rejected at %d: category \"%s\" has no word\n", nseq+1, p);
103 	    return(FALSE);
104 	  }
105 	  new->wid = dfa->term.tw[atoi(p)][0];
106 	  new->next = prev;
107 	  prev = new;
108 	  it++;
109 	}
110       } else {			/* termname exist */
111 	for (i=0;i<dfa->term_num;i++) {
112 	  if (strmatch(p, termname[i])) {
113 	    if (dfa->term.wnum[i] == 0) {
114 	      printf("rejected at %d: category \"%s\" has no word\n", nseq+1, p);
115 	      return(FALSE);
116 	    }
117 	    new = (WTOKEN *)mymalloc(sizeof(WTOKEN));
118 	    new->wid = dfa->term.tw[i][0];
119 	    new->next = prev;
120 	    prev = new;
121 	    it++;
122 	  }
123 	}
124       }
125       if (prev == NULL) {		/* not found */
126 	printf("rejected at %d: category \"%s\" not exist\n", nseq+1, p);
127 	return(FALSE);
128       }
129     } else {			/* normal word mode */
130       for (i=0;i<winfo->num;i++) {
131 	if (strmatch(p, winfo->woutput[i])) {
132 	  new = (WTOKEN *)mymalloc(sizeof(WTOKEN));
133 	  new->wid = i;
134 	  new->next = prev;
135 	  prev = new;
136 	  it++;
137 	}
138       }
139       if (prev == NULL) {		/* not found */
140 	printf("rejected at %d: word \"%s\" not in voca\n", nseq+1, p);
141 	return(FALSE);
142       }
143     }
144     wseq[nseq++] = new;
145   }
146 
147   /* output */
148   put_wtoken();
149   return(TRUE);
150 }
151 
152 /* $B>uBV(Bstateid$B$K$F(Biseq$BHVL\$NF~NO$,<u$1IU$1$i$l$k$+$I$&$+$rJV$9(B */
153 /* $B?<$5M%@hC5:w(B */
154 static void
put_state(int s,int l)155 put_state(int s, int l)
156 {
157   int i;
158   for (i=0;i<=l;i++) printf("  ");
159   printf("[%d]\n",s);
160 }
161 
162 static boolean
can_accept_recursive(int stateid,int iseq)163 can_accept_recursive(int stateid, int iseq)
164 {
165   WTOKEN *token;
166   DFA_ARC *arc, *arc2;
167   int cate, cate2, ns, ns2;
168   int i;
169 
170   if (verbose_flag) put_state(stateid, iseq);
171 
172   if (nseq_reached > iseq) nseq_reached = iseq;
173   if (iseq < 0) {		/* reaches last */
174     /* previous call is last word */
175     if (dfa->st[stateid].status | ACCEPT_S) {
176       return TRUE;
177     } else {
178       return FALSE;
179     }
180   }
181 
182   for (token = wseq[iseq]; token; token = token->next) {
183     if (verbose_flag) {
184       for(i=0;i<=iseq;i++) printf("  ");
185       if (no_term_file) {
186 	printf("%s(%s)\n",winfo->woutput[token->wid], winfo->wname[token->wid]);
187       } else {
188 	printf("%s(%s:%s)\n",winfo->woutput[token->wid], termname[winfo->wton[token->wid]], winfo->wname[token->wid]);
189       }
190     }
191     for (arc = dfa->st[stateid].arc; arc; arc = arc->next) {
192       cate = arc->label;
193       ns = arc->to_state;
194       if (dfa->is_sp[cate]) {
195 	for (arc2 = dfa->st[ns].arc; arc2; arc2 = arc2->next) {
196 	  cate2 = arc2->label;
197 	  ns2 = arc2->to_state;
198 	  if (cate2 == winfo->wton[token->wid]) { /* found */
199 	    if (can_accept_recursive(ns2, iseq - 1)) {
200 	      return TRUE;
201 	    } else {
202 	      /* examine next */
203 	      if (verbose_flag) put_state(stateid, iseq);
204 	    }
205 	  }
206 	}
207       } else {			/* not noise */
208 	if (cate == winfo->wton[token->wid]) { /* found */
209 	  if (can_accept_recursive(ns, iseq - 1)) {
210 	    return TRUE;
211 	  } else {
212 	    /* examine next */
213 	      if (verbose_flag) put_state(stateid, iseq);
214 	  }
215 	}
216       }
217     }
218   }
219 
220   /* not allowed under this node */
221   return FALSE;
222 }
223 
224 static void
accept_main()225 accept_main()
226 {
227   int i;
228 
229   if (!get_wtoken()) return; /* failed */
230   if (nseq == 0) return;
231 
232   nseq_reached = nseq;
233 
234   for (i=0;i<dfa->state_num;i++) {
235     if ((dfa->st[i].status & INITIAL_S) != 0) { /* $B=i4|>uBV$+$i(B */
236       if (can_accept_recursive(i, nseq-1)) {
237 	printf("accepted\n");
238 	return;
239       }
240     }
241   }
242   printf("rejected at %d by DFA\n", nseq_reached + 1);
243 }
244 
245 
246 static char *
usage(char * s)247 usage(char *s)
248 {
249   fprintf(stderr, "accept_check --- determine acception/rejection of transcription from stdin\n");
250   fprintf(stderr, "usage: %s [-t] [-v] prefix\n",s);
251   fprintf(stderr, "  -t  ... use category symbols instead of words (needs .term)\n");
252   fprintf(stderr, "  -s string ... specify short-pause model\n");
253   fprintf(stderr, "  -v  ... verbose output\n");
254   exit(1);
255 }
256 
257 static void
put_dfainfo()258 put_dfainfo()
259 {
260   printf("%d categories, %d words\n",dfa->term_num,winfo->num);
261   printf("DFA has %d nodes and %d arcs\n", dfa->state_num, dfa->arc_num);
262 }
263 
main(int argc,char * argv[])264 int main(int argc, char *argv[])
265 {
266   int i, len;
267   char *prefix;
268   char *dfafile, *dictfile, *termfile;
269   char *spname_default = SPNAME_DEF;
270   char *spname = NULL;
271 #define NEXTARG (++i >= argc) ? (char *)usage(argv[0]) : argv[i]
272 
273   /* argument */
274   if (argc == 1) usage(argv[0]);
275   for(i=1;i<argc;i++) {
276     if (argv[i][0] == '-') {
277       switch(argv[i][1]) {
278       case 'v':			/* verbose output */
279 	verbose_flag = TRUE;
280 	break;
281       case 't':
282 	term_mode = TRUE;
283 	break;
284       case 's':
285 	if (++i >= argc) {
286 	  usage(argv[0]);
287 	}
288 	spname = argv[i];
289       default:
290 	fprintf(stderr, "no such option: %s\n",argv[i]);
291 	usage(argv[0]);
292       }
293     } else {
294       prefix = argv[i];
295     }
296   }
297   if (spname == NULL) spname = spname_default;
298 
299   len = strlen(prefix) + 10;
300   dfafile = (char *)mymalloc(len);
301   dictfile = (char *)mymalloc(len);
302   termfile = (char *)mymalloc(len);
303   strcpy(dfafile, prefix);
304   strcat(dfafile, ".dfa");
305   strcpy(dictfile, prefix);
306   strcat(dictfile, ".dict");
307   strcpy(termfile, prefix);
308   strcat(termfile, ".term");
309 
310   /* start init */
311   winfo = word_info_new();
312   init_voca(winfo, dictfile, NULL, TRUE, FALSE);
313   dfa = dfa_info_new();
314   init_dfa(dfa, dfafile);
315   make_dfa_voca_ref(dfa, winfo);
316   termname = (char **)mymalloc(sizeof(char *) * dfa->term_num);
317   init_term(termfile, termname);
318   if (termname[0] == NULL) {	/* no .term file */
319     no_term_file = TRUE;
320   } else {
321     no_term_file = FALSE;
322   }
323 
324   /* output info */
325   put_dfainfo();
326 
327   /* set dfa->sp_id and dfa->is_sp[cid] from name "sp" */
328   {
329     int t, i;
330     WORD_ID w;
331 
332     dfa->sp_id = WORD_INVALID;
333     dfa->is_sp = (boolean *)mymalloc(sizeof(boolean) * dfa->term_num);
334     for(t=0;t<dfa->term_num;t++) {
335       dfa->is_sp[t] = FALSE;
336       for(i=0;i<dfa->term.wnum[t]; i++) {
337 	w = dfa->term.tw[t][i];
338 	if (strcmp(winfo->woutput[w], spname) == 0) {
339 	  if (dfa->sp_id == WORD_INVALID) dfa->sp_id = w;
340 	  dfa->is_sp[t] = TRUE;
341 	  break;
342 	}
343       }
344     }
345   }
346   if (verbose_flag) {
347     if (dfa->sp_id != WORD_INVALID) {
348       printf("skippable word for NOISE: %s\t%s\n", winfo->wname[dfa->sp_id], winfo->woutput[dfa->sp_id]);
349     }
350   }
351   printf("----- \n");
352 
353   /* main loop */
354   for (;;) {
355     accept_main();
356   }
357 
358   free(dfafile);
359   free(dictfile);
360   return 0;
361 }
362