1 /*
2 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
3 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
4 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
5 * All rights reserved
6 */
7
8 /* generate --- generate random sentences acceptable by given grammar */
9
10 #include "common.h"
11 #include "gen_next.h"
12 #if defined(_WIN32) && !defined(__CYGWIN32__)
13 #include "process.h"
14 #endif
15
16 #define MAXHYPO 300
17
18 WORD_INFO *winfo;
19 DFA_INFO *dfa;
20 char **termname;
21 boolean verbose_flag = FALSE;
22 boolean term_mode = FALSE;
23 boolean no_term_file;
24
25 NODE *
new_generate()26 new_generate()
27 {
28 NEXTWORD **nw;
29 NODE *now;
30 int i,j,num,selected;
31
32 /* init */
33 nw = nw_malloc();
34 now = (NODE *)mymalloc(sizeof(NODE));
35 now->endflag = FALSE;
36 now->seqnum = 0;
37
38 /* set init hypo */
39 if (term_mode) {
40 num = dfa_firstterms(nw);
41 } else {
42 num = dfa_firstwords(nw);
43 }
44
45 for (;;) {
46 if (verbose_flag) {
47 if (no_term_file) {
48 for(i=0;i<num;i++)printf("\t-> %s\t%s\n",winfo->wname[nw[i]->id],winfo->woutput[nw[i]->id]);
49 } else {
50 for(i=0;i<num;i++)printf("\t-> %s\t%s\n",termname[winfo->wton[nw[i]->id]],winfo->woutput[nw[i]->id]);
51 }
52 }
53 /* select random one */
54 if (num == 1) {
55 selected = 0;
56 } else {
57 j = abs(rand()) % num;
58 for(i=0;i<j;i++) {
59 selected = abs(rand()) % num;
60 }
61 }
62 if (selected >= num) selected = num - 1;
63
64 now->seq[now->seqnum++] = nw[selected]->id;
65 now->state = nw[selected]->next_state;
66
67 if (now->seqnum >= MAXSEQNUM) {
68 printf("word num exceeded %d\n", MAXSEQNUM);
69 nw_free(nw);
70 return(now);
71 }
72
73 /* output */
74 if (verbose_flag) {
75 printf("(%3d) %s\n", now->state, winfo->woutput[now->seq[now->seqnum-1]]);
76 }
77
78 /* end check */
79 if (dfa_acceptable(now)) break;
80
81 /* get next words */
82 if (term_mode) {
83 num = dfa_nextterms(now, nw);
84 } else {
85 num = dfa_nextwords(now, nw);
86 }
87 }
88
89 nw_free(nw);
90 return(now);
91
92 }
93
94 static boolean
match_node(NODE * a,NODE * b)95 match_node(NODE *a, NODE *b)
96 {
97 int i;
98
99 if (a->seqnum != b->seqnum) return(FALSE);
100 for (i=0;i<a->seqnum;i++) {
101 if (a->seq[i] != b->seq[i]) return(FALSE);
102 }
103 return(TRUE);
104 }
105
106 static void
generate_main(int num)107 generate_main(int num)
108 {
109 NODE *sent;
110 NODE **stock;
111 int i,n,c;
112
113 /* avoid generating same sentence */
114 stock = (NODE **)mymalloc(sizeof(NODE *)*num);
115 n = 0;
116 c = 0;
117 while (n < num) {
118 sent = new_generate();
119 for (i=0;i<n;i++) {
120 if (match_node(sent, stock[i])) break;
121 }
122 if (i >= n) { /* no match, store as new */
123 stock[n++] = sent;
124 for (i=sent->seqnum-1;i>=0;i--) {
125 if (term_mode) {
126 if (no_term_file) {
127 printf(" %s", winfo->wname[sent->seq[i]]);
128 } else {
129 printf(" %s", termname[winfo->wton[sent->seq[i]]]);
130 }
131 } else {
132 printf(" %s", winfo->woutput[sent->seq[i]]);
133 }
134 }
135 printf("\n");
136 c = 0;
137 } else { /* same, ignored */
138 c++;
139 if (c >= MAXHYPO) {
140 printf("no further sentence in the last %d trial\n", c);
141 break;
142 }
143 free(sent);
144 }
145 }
146
147 for(i=0;i<n;i++) free(stock[i]);
148 free(stock);
149 }
150
151
152 static char *
usage(char * s)153 usage(char *s)
154 {
155 fprintf(stderr, "generate --- sentence random generator\n");
156 fprintf(stderr, "usage: %s [-v] [-n] prefix\n",s);
157 fprintf(stderr, " -n num ... generate N sentences (default: 10)\n");
158 fprintf(stderr, " -t ... use category symbols instead of words (needs .term)\n");
159 fprintf(stderr, " -s string ... specify short-pause model\n");
160 fprintf(stderr, " -v ... verbose output\n");
161 exit(1);
162 }
163
164 static void
put_dfainfo()165 put_dfainfo()
166 {
167 printf("%d categories, %d words\n",dfa->term_num,winfo->num);
168 printf("DFA has %d nodes and %d arcs\n", dfa->state_num, dfa->arc_num);
169 }
170
171
main(int argc,char * argv[])172 int main(int argc, char *argv[])
173 {
174 int i, len;
175 char *prefix = NULL;
176 char *dfafile, *dictfile, *termfile;
177 int gnum = 10;
178 char *spname_default = SPNAME_DEF;
179 char *spname = NULL;
180 #define NEXTARG (++i >= argc) ? (char *)usage(argv[0]) : argv[i]
181
182 /* argument */
183 for(i=1;i<argc;i++) {
184 if (argv[i][0] == '-') {
185 switch(argv[i][1]) {
186 case 'v': /* verbose output */
187 verbose_flag = TRUE;
188 gnum = 1;
189 break;
190 case 't': /* terminal mode */
191 term_mode = TRUE;
192 break;
193 case 'n':
194 gnum = atoi(NEXTARG);
195 break;
196 case 's':
197 if (++i >= argc) {
198 usage(argv[0]);
199 }
200 spname = argv[i];
201 break;
202 default:
203 fprintf(stderr, "no such option: %s\n",argv[i]);
204 usage(argv[0]);
205 }
206 } else {
207 prefix = argv[i];
208 }
209 }
210 if (prefix == NULL) usage(argv[0]);
211
212 if (spname == NULL) spname = spname_default;
213
214 len = strlen(prefix) + 10;
215 dfafile = (char *)mymalloc(len);
216 dictfile = (char *)mymalloc(len);
217 termfile = (char *)mymalloc(len);
218 strcpy(dfafile, prefix);
219 strcat(dfafile, ".dfa");
220 strcpy(dictfile, prefix);
221 strcat(dictfile, ".dict");
222 strcpy(termfile, prefix);
223 strcat(termfile, ".term");
224
225 /* start init */
226 winfo = word_info_new();
227 init_voca(winfo, dictfile, NULL, TRUE, FALSE);
228 dfa = dfa_info_new();
229 init_dfa(dfa, dfafile);
230 make_dfa_voca_ref(dfa, winfo);
231
232 termname = (char **)mymalloc(sizeof(char *) * dfa->term_num);
233 init_term(termfile, termname);
234 if (termname[0] == NULL) { /* no .term file */
235 no_term_file = TRUE;
236 } else {
237 no_term_file = FALSE;
238 }
239
240 /* output info */
241 put_dfainfo();
242
243 /* set dfa->sp_id and dfa->is_sp[cid] from name "sp" */
244 {
245 int t, i;
246 WORD_ID w;
247
248 dfa->sp_id = WORD_INVALID;
249 dfa->is_sp = (boolean *)mymalloc(sizeof(boolean) * dfa->term_num);
250 for(t=0;t<dfa->term_num;t++) {
251 dfa->is_sp[t] = FALSE;
252 for(i=0;i<dfa->term.wnum[t]; i++) {
253 w = dfa->term.tw[t][i];
254 if (strcmp(winfo->woutput[w], spname) == 0) {
255 if (dfa->sp_id == WORD_INVALID) dfa->sp_id = w;
256 dfa->is_sp[t] = TRUE;
257 break;
258 }
259 }
260 }
261 }
262 if (verbose_flag) {
263 if (dfa->sp_id != WORD_INVALID) {
264 printf("skippable word for NOISE: %s\t%s\n", winfo->wname[dfa->sp_id], winfo->woutput[dfa->sp_id]);
265 }
266 }
267 printf("----- \n");
268
269 /* random seed */
270 srand(getpid());
271
272 /* main loop */
273 generate_main(gnum);
274
275 free(dfafile);
276 free(dictfile);
277 return 0;
278 }
279