1 /*
2  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
3  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
4  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
5  * All rights reserved
6  */
7 
8 /* generate --- generate random sentences acceptable by given grammar */
9 
10 #include "common.h"
11 #include "gen_next.h"
12 #if defined(_WIN32) && !defined(__CYGWIN32__)
13 #include "process.h"
14 #endif
15 
16 #define MAXHYPO 300
17 
18 WORD_INFO *winfo;
19 DFA_INFO *dfa;
20 char **termname;
21 boolean verbose_flag = FALSE;
22 boolean term_mode = FALSE;
23 boolean no_term_file;
24 
25 NODE *
new_generate()26 new_generate()
27 {
28   NEXTWORD **nw;
29   NODE *now;
30   int i,j,num,selected;
31 
32   /* init */
33   nw = nw_malloc();
34   now = (NODE *)mymalloc(sizeof(NODE));
35   now->endflag = FALSE;
36   now->seqnum = 0;
37 
38   /* set init hypo */
39   if (term_mode) {
40     num = dfa_firstterms(nw);
41   } else {
42     num = dfa_firstwords(nw);
43   }
44 
45   for (;;) {
46     if (verbose_flag) {
47       if (no_term_file) {
48 	for(i=0;i<num;i++)printf("\t-> %s\t%s\n",winfo->wname[nw[i]->id],winfo->woutput[nw[i]->id]);
49       } else {
50 	for(i=0;i<num;i++)printf("\t-> %s\t%s\n",termname[winfo->wton[nw[i]->id]],winfo->woutput[nw[i]->id]);
51       }
52     }
53     /* select random one */
54     if (num == 1) {
55       selected = 0;
56     } else {
57       j = abs(rand()) % num;
58       for(i=0;i<j;i++) {
59 	selected = abs(rand()) % num;
60       }
61     }
62     if (selected >= num) selected = num - 1;
63 
64     now->seq[now->seqnum++] = nw[selected]->id;
65     now->state = nw[selected]->next_state;
66 
67     if (now->seqnum >= MAXSEQNUM) {
68       printf("word num exceeded %d\n", MAXSEQNUM);
69       nw_free(nw);
70       return(now);
71     }
72 
73     /* output */
74     if (verbose_flag) {
75       printf("(%3d) %s\n", now->state, winfo->woutput[now->seq[now->seqnum-1]]);
76     }
77 
78     /* end check */
79     if (dfa_acceptable(now)) break;
80 
81     /* get next words */
82     if (term_mode) {
83       num = dfa_nextterms(now, nw);
84     } else {
85       num = dfa_nextwords(now, nw);
86     }
87   }
88 
89   nw_free(nw);
90   return(now);
91 
92 }
93 
94 static boolean
match_node(NODE * a,NODE * b)95 match_node(NODE *a, NODE *b)
96 {
97   int i;
98 
99   if (a->seqnum != b->seqnum) return(FALSE);
100   for (i=0;i<a->seqnum;i++) {
101     if (a->seq[i] != b->seq[i]) return(FALSE);
102   }
103   return(TRUE);
104 }
105 
106 static void
generate_main(int num)107 generate_main(int num)
108 {
109   NODE *sent;
110   NODE **stock;
111   int i,n,c;
112 
113   /* avoid generating same sentence */
114   stock = (NODE **)mymalloc(sizeof(NODE *)*num);
115   n = 0;
116   c = 0;
117   while (n < num) {
118     sent = new_generate();
119     for (i=0;i<n;i++) {
120       if (match_node(sent, stock[i])) break;
121     }
122     if (i >= n) {		/* no match, store as new */
123       stock[n++] = sent;
124       for (i=sent->seqnum-1;i>=0;i--) {
125 	if (term_mode) {
126 	  if (no_term_file) {
127 	    printf(" %s", winfo->wname[sent->seq[i]]);
128 	  } else {
129 	    printf(" %s", termname[winfo->wton[sent->seq[i]]]);
130 	  }
131 	} else {
132 	  printf(" %s", winfo->woutput[sent->seq[i]]);
133 	}
134       }
135       printf("\n");
136       c = 0;
137     } else {			/* same, ignored */
138       c++;
139       if (c >= MAXHYPO) {
140 	printf("no further sentence in the last %d trial\n", c);
141 	break;
142       }
143       free(sent);
144     }
145   }
146 
147   for(i=0;i<n;i++) free(stock[i]);
148   free(stock);
149 }
150 
151 
152 static char *
usage(char * s)153 usage(char *s)
154 {
155   fprintf(stderr, "generate --- sentence random generator\n");
156   fprintf(stderr, "usage: %s [-v] [-n] prefix\n",s);
157   fprintf(stderr, "  -n num  ... generate N sentences (default: 10)\n");
158   fprintf(stderr, "  -t      ... use category symbols instead of words (needs .term)\n");
159   fprintf(stderr, "  -s string ... specify short-pause model\n");
160   fprintf(stderr, "  -v      ... verbose output\n");
161   exit(1);
162 }
163 
164 static void
put_dfainfo()165 put_dfainfo()
166 {
167   printf("%d categories, %d words\n",dfa->term_num,winfo->num);
168   printf("DFA has %d nodes and %d arcs\n", dfa->state_num, dfa->arc_num);
169 }
170 
171 
main(int argc,char * argv[])172 int main(int argc, char *argv[])
173 {
174   int i, len;
175   char *prefix = NULL;
176   char *dfafile, *dictfile, *termfile;
177   int gnum = 10;
178   char *spname_default = SPNAME_DEF;
179   char *spname = NULL;
180 #define NEXTARG (++i >= argc) ? (char *)usage(argv[0]) : argv[i]
181 
182   /* argument */
183   for(i=1;i<argc;i++) {
184     if (argv[i][0] == '-') {
185       switch(argv[i][1]) {
186       case 'v':			/* verbose output */
187 	verbose_flag = TRUE;
188 	gnum = 1;
189 	break;
190       case 't':			/* terminal mode */
191 	term_mode = TRUE;
192 	break;
193       case 'n':
194 	gnum = atoi(NEXTARG);
195 	break;
196       case 's':
197 	if (++i >= argc) {
198 	  usage(argv[0]);
199 	}
200 	spname = argv[i];
201 	break;
202       default:
203 	fprintf(stderr, "no such option: %s\n",argv[i]);
204 	usage(argv[0]);
205       }
206     } else {
207       prefix = argv[i];
208     }
209   }
210   if (prefix == NULL) usage(argv[0]);
211 
212   if (spname == NULL) spname = spname_default;
213 
214   len = strlen(prefix) + 10;
215   dfafile = (char *)mymalloc(len);
216   dictfile = (char *)mymalloc(len);
217   termfile = (char *)mymalloc(len);
218   strcpy(dfafile, prefix);
219   strcat(dfafile, ".dfa");
220   strcpy(dictfile, prefix);
221   strcat(dictfile, ".dict");
222   strcpy(termfile, prefix);
223   strcat(termfile, ".term");
224 
225   /* start init */
226   winfo = word_info_new();
227   init_voca(winfo, dictfile, NULL, TRUE, FALSE);
228   dfa = dfa_info_new();
229   init_dfa(dfa, dfafile);
230   make_dfa_voca_ref(dfa, winfo);
231 
232   termname = (char **)mymalloc(sizeof(char *) * dfa->term_num);
233   init_term(termfile, termname);
234   if (termname[0] == NULL) {	/* no .term file */
235     no_term_file = TRUE;
236   } else {
237     no_term_file = FALSE;
238   }
239 
240   /* output info */
241   put_dfainfo();
242 
243   /* set dfa->sp_id and dfa->is_sp[cid] from name "sp" */
244   {
245     int t, i;
246     WORD_ID w;
247 
248     dfa->sp_id = WORD_INVALID;
249     dfa->is_sp = (boolean *)mymalloc(sizeof(boolean) * dfa->term_num);
250     for(t=0;t<dfa->term_num;t++) {
251       dfa->is_sp[t] = FALSE;
252       for(i=0;i<dfa->term.wnum[t]; i++) {
253 	w = dfa->term.tw[t][i];
254 	if (strcmp(winfo->woutput[w], spname) == 0) {
255 	  if (dfa->sp_id == WORD_INVALID) dfa->sp_id = w;
256 	  dfa->is_sp[t] = TRUE;
257 	  break;
258 	}
259       }
260     }
261   }
262   if (verbose_flag) {
263     if (dfa->sp_id != WORD_INVALID) {
264       printf("skippable word for NOISE: %s\t%s\n", winfo->wname[dfa->sp_id], winfo->woutput[dfa->sp_id]);
265     }
266   }
267   printf("----- \n");
268 
269   /* random seed */
270   srand(getpid());
271 
272   /* main loop */
273   generate_main(gnum);
274 
275   free(dfafile);
276   free(dictfile);
277   return 0;
278 }
279