1 /* file migmdet.c 23.10.95.
2 * 6/8/98 large scale reorganisation to omit globals, etc.
3 * 13/1/98 changes for the `gen' type replacing char for generators.
4 *
5 * This file contains the function migm_determinize, which makes the
6 * multiple initial state generalised multiplier associated with
7 * a coset automatic group deterministic.
8 * That is, it computes the ordinary deterministic generalised multiplier.
9 */
10
11 #define LABHTSIZE 8192
12
13 #include <stdio.h>
14 #include "defs.h"
15 #include "fsa.h"
16 #include "hash.h"
17 #include "externals.h"
18
19 static fsa *migm_determinize_short(fsa *migmptr, storage_type op_table_type,
20 boolean destroy, char *tempfilename);
21 static fsa *migm_determinize_int(fsa *migmptr, storage_type op_table_type,
22 boolean destroy, char *tempfilename);
23
24 /* The fsa *migmptr must be a migm.
25 * The returned fsa accepts a accepts the same language but is deterministic.
26 */
migm_determinize(fsa * migmptr,storage_type op_table_type,boolean destroy,char * tempfilename)27 fsa *migm_determinize(fsa *migmptr, storage_type op_table_type, boolean destroy,
28 char *tempfilename)
29 {
30 if (kbm_print_level >= 3)
31 printf(" #Calling migm_determinize.\n");
32 if (migmptr->states->size < MAXUSHORT)
33 return migm_determinize_short(migmptr, op_table_type, destroy,
34 tempfilename);
35 else
36 return migm_determinize_int(migmptr, op_table_type, destroy, tempfilename);
37 }
38
migm_determinize_short(fsa * migmptr,storage_type op_table_type,boolean destroy,char * tempfilename)39 static fsa *migm_determinize_short(fsa *migmptr, storage_type op_table_type,
40 boolean destroy, char *tempfilename)
41 {
42 int **table, ne, ngens, nssi, ns, dr, *fsarow, nt, cstate, csi, im, i, j, k,
43 g1, len = 0, nlab, ct;
44 unsigned short *ht_ptr, *ht_chptr, *ht_ptrb, *ht_ptre, *cs_ptr, *cs_ptre,
45 *ptr;
46 gen **w;
47 boolean geninlab[MAXGEN + 1];
48 boolean dense_ip, dense_op;
49 short_hash_table ht, labelht;
50 setToLabelsType *newlabel;
51 fsa *det;
52 srec *labelset;
53 FILE *tempfile;
54
55 if (kbm_print_level >= 3)
56 printf(" #Calling migm_determinize_short.\n");
57 if (!migmptr->flags[MIDFA]) {
58 fprintf(stderr, "Error: migm_determinize only applies to MIDFA's.\n");
59 return 0;
60 }
61 if (migmptr->alphabet->type != PRODUCT || migmptr->alphabet->arity != 2) {
62 fprintf(stderr, "Error in migm_determinize: fsa must be 2-variable.\n");
63 return 0;
64 }
65 if (migmptr->states->type != LABELED ||
66 migmptr->states->labels->type != LISTOFWORDS) {
67 fprintf(stderr, "Error in migm_determinize: states of fsa must be labels "
68 "to lists of words.\n");
69 return 0;
70 }
71
72 ne = migmptr->alphabet->size;
73 ngens = migmptr->alphabet->base->size;
74
75 tmalloc(det, fsa, 1);
76 fsa_init(det);
77 srec_copy(det->alphabet, migmptr->alphabet);
78 det->flags[DFA] = TRUE;
79 det->flags[ACCESSIBLE] = TRUE;
80 det->flags[BFS] = TRUE;
81
82 det->table->table_type = op_table_type;
83 det->table->denserows = 0;
84 det->table->printing_format = op_table_type;
85
86 dense_ip = migmptr->table->table_type == DENSE;
87 dr = migmptr->table->denserows;
88 dense_op = op_table_type == DENSE;
89 table = migmptr->table->table_data_ptr;
90
91 det->num_initial = 1;
92 tmalloc(det->initial, int, 2);
93 det->initial[1] = 1;
94
95 short_hash_init(&ht, FALSE, 0, 0, 0);
96 ht_ptr = ht.current_ptr;
97 nssi = migmptr->num_initial;
98 for (i = 0; i < nssi; i++)
99 ht_ptr[i] = migmptr->initial[i + 1];
100 im = short_hash_locate(&ht, nssi);
101 /* Each state in 'det' will be represented as a subset of the set of states
102 * of *migmptr. The initial state contains the initial states
103 * of *migmptr.
104 * The subsets will be stored as variable-length records in the hash-table,
105 * always in increasing order.
106 */
107 if (im != 1) {
108 fprintf(stderr, "Hash-initialisation problem in migm_determinize.\n");
109 return 0;
110 }
111 if ((tempfile = fopen(tempfilename, "w")) == 0) {
112 fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
113 return 0;
114 }
115 if (dense_op)
116 tmalloc(fsarow, int, ne) else tmalloc(fsarow, int, 2 * ne + 1)
117
118 cstate = 0;
119 if (dense_op)
120 len = ne; /* The length of the fsarow output. */
121 nt = 0; /* Number of transitions in det */
122
123 while (++cstate <= ht.num_recs) {
124 if (kbm_print_level >= 3) {
125 if ((cstate <= 1000 && cstate % 100 == 0) ||
126 (cstate <= 10000 && cstate % 1000 == 0) ||
127 (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
128 printf(" #cstate = %d; number of states = %d.\n", cstate,
129 ht.num_recs);
130 }
131 cs_ptr = short_hash_rec(&ht, cstate);
132 cs_ptre = short_hash_rec(&ht, cstate) + short_hash_rec_len(&ht, cstate) - 1;
133 if (!dense_op)
134 len = 0;
135
136 for (g1 = 1; g1 <= ne; g1++) {
137 /* Calculate action of generator g1 on state cstate - to get the image,
138 * we simply apply it to each state in the subset of states representing
139 * cstate.
140 */
141 ht_ptrb = ht.current_ptr;
142 ht_ptre = ht_ptrb - 1;
143 ptr = cs_ptr - 1;
144 while (++ptr <= cs_ptre) {
145 csi = target(dense_ip, table, g1, *ptr, dr);
146 if (csi == 0)
147 continue;
148 if (ht_ptrb > ht_ptre || csi > *ht_ptre) {
149 /* We have a new state for the image subset to be added to the end */
150 *(++ht_ptre) = csi;
151 }
152 else {
153 ht_chptr = ht_ptrb;
154 while (*ht_chptr < csi)
155 ht_chptr++;
156 if (csi < *ht_chptr) {
157 /* we have a new state for the image subset to be added in the
158 * middle */
159 ht_ptr = ++ht_ptre;
160 while (ht_ptr > ht_chptr) {
161 *ht_ptr = *(ht_ptr - 1);
162 ht_ptr--;
163 }
164 *ht_ptr = csi;
165 }
166 }
167 }
168 im = short_hash_locate(&ht, ht_ptre - ht_ptrb + 1);
169 if (im == -1)
170 return 0;
171 if (dense_op)
172 fsarow[g1 - 1] = im;
173 else if (im > 0) {
174 fsarow[++len] = g1;
175 fsarow[++len] = im;
176 }
177 if (im > 0)
178 nt++;
179 }
180 if (!dense_op)
181 fsarow[0] = len++;
182 fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
183 }
184 fclose(tempfile);
185
186 ns = det->states->size = ht.num_recs;
187 det->table->numTransitions = nt;
188
189 /* Now we need to work out the labels of the states. These are lists of words
190 * of length <= 1, arising from the lists in the states of *migmptr that
191 * make up the relevant subset of states that is a state of *det.
192 * We need another hash-table for this.
193 */
194 det->states->type = LABELED;
195 tmalloc(det->states->labels, srec, 1);
196 labelset = det->states->labels;
197 labelset->type = LISTOFWORDS;
198 labelset->alphabet_size = migmptr->alphabet->base->size;
199 for (i = 1; i <= ngens; i++) {
200 tmalloc(labelset->alphabet[i], char,
201 stringlen(migmptr->alphabet->base->names[i]) + 1);
202 strcpy(labelset->alphabet[i], migmptr->alphabet->base->names[i]);
203 }
204 tmalloc(det->states->setToLabels, setToLabelsType, ns + 1);
205 newlabel = det->states->setToLabels;
206
207 short_hash_init(&labelht, FALSE, 0, LABHTSIZE, 2 * LABHTSIZE);
208
209 tmalloc(det->is_accepting, boolean, ns + 1);
210 for (i = 1; i <= ns; i++)
211 det->is_accepting[i] = FALSE;
212 for (cstate = 1; cstate <= ns; cstate++) {
213 ht_ptrb = labelht.current_ptr;
214 ht_ptre = ht_ptrb - 1;
215
216 cs_ptr = short_hash_rec(&ht, cstate);
217 cs_ptre = short_hash_rec(&ht, cstate) + short_hash_rec_len(&ht, cstate) - 1;
218 for (i = 0; i <= ngens; i++)
219 geninlab[i] = FALSE; /* geninlab records which generators occur in label*/
220 ptr = cs_ptr - 1;
221 while (++ptr <= cs_ptre) {
222 j = migmptr->states->setToLabels[*ptr];
223 if (j) {
224 w = migmptr->states->labels->wordslist[j];
225 i = 0;
226 while (w[i] != 0) {
227 if (genstrlen(w[i]) <= 1) {
228 det->is_accepting[cstate] = TRUE;
229 geninlab[w[i][0]] = TRUE;
230 }
231 i++;
232 }
233 }
234 }
235 for (k = 0; k <= ngens; k++)
236 if (geninlab[k])
237 *(++ht_ptre) = k == 0 ? ngens + 1 : k;
238 /* that completes calculation of label for cstate */
239 newlabel[cstate] = short_hash_locate(&labelht, ht_ptre - ht_ptrb + 1);
240 if (newlabel[cstate] == -1)
241 return 0;
242 }
243
244 short_hash_clear(&ht);
245 tfree(fsarow);
246
247 /* Finally copy the records from the label hash-table into the set of labels
248 */
249 nlab = labelset->size = labelht.num_recs;
250 if (kbm_print_level >= 3)
251 printf(" #There are %d distinct labels.\n", nlab);
252 tmalloc(labelset->wordslist, gen **, nlab + 1);
253 for (i = 1; i <= nlab; i++) {
254 len = short_hash_rec_len(&labelht, i);
255 tmalloc(labelset->wordslist[i], gen *, len + 1);
256 ht_ptr = short_hash_rec(&labelht, i);
257 for (j = 0; j < len; j++) {
258 if (ht_ptr[j] == ngens + 1) {
259 tmalloc(labelset->wordslist[i][j], gen, 1);
260 labelset->wordslist[i][j][0] = 0;
261 }
262 else {
263 tmalloc(labelset->wordslist[i][j], gen, 2);
264 labelset->wordslist[i][j][0] = ht_ptr[j];
265 labelset->wordslist[i][j][1] = 0;
266 }
267 }
268 labelset->wordslist[i][len] = 0;
269 }
270
271 short_hash_clear(&labelht);
272 ct = 0;
273 for (i = 1; i <= ns; i++)
274 if (det->is_accepting[i])
275 ct++;
276 det->num_accepting = ct;
277 tmalloc(det->accepting, int, ct + 1);
278 ct = 0;
279 for (i = 1; i <= ns; i++)
280 if (det->is_accepting[i])
281 det->accepting[++ct] = i;
282 tfree(det->is_accepting);
283
284 if (destroy)
285 fsa_clear(migmptr);
286
287 /* Now read the transition table back in */
288 tempfile = fopen(tempfilename, "r");
289 compressed_transitions_read(det, tempfile);
290 fclose(tempfile);
291
292 unlink(tempfilename);
293
294 return det;
295 }
296
migm_determinize_int(fsa * migmptr,storage_type op_table_type,boolean destroy,char * tempfilename)297 static fsa *migm_determinize_int(fsa *migmptr, storage_type op_table_type,
298 boolean destroy, char *tempfilename)
299 {
300 fprintf(stderr,
301 "Sorry - migm_determinize is not yet implemented for machines.\n");
302 fprintf(stderr, "with more than 65536 states.\n");
303 return 0;
304 }
305