1 /* file migmdet.c  23.10.95.
2  * 6/8/98 large scale reorganisation to omit globals, etc.
3  * 13/1/98 changes for the `gen' type replacing char for generators.
4  *
5  * This file contains the function migm_determinize, which makes the
6  * multiple initial state generalised multiplier associated with
7  * a coset automatic group deterministic.
8  * That is, it computes the ordinary deterministic generalised multiplier.
9  */
10 
11 #define LABHTSIZE 8192
12 
13 #include <stdio.h>
14 #include "defs.h"
15 #include "fsa.h"
16 #include "hash.h"
17 #include "externals.h"
18 
19 static fsa *migm_determinize_short(fsa *migmptr, storage_type op_table_type,
20                                    boolean destroy, char *tempfilename);
21 static fsa *migm_determinize_int(fsa *migmptr, storage_type op_table_type,
22                                  boolean destroy, char *tempfilename);
23 
24 /* The fsa *migmptr must be a migm.
25  * The returned fsa accepts a accepts the same language but is deterministic.
26  */
migm_determinize(fsa * migmptr,storage_type op_table_type,boolean destroy,char * tempfilename)27 fsa *migm_determinize(fsa *migmptr, storage_type op_table_type, boolean destroy,
28                       char *tempfilename)
29 {
30   if (kbm_print_level >= 3)
31     printf("    #Calling migm_determinize.\n");
32   if (migmptr->states->size < MAXUSHORT)
33     return migm_determinize_short(migmptr, op_table_type, destroy,
34                                   tempfilename);
35   else
36     return migm_determinize_int(migmptr, op_table_type, destroy, tempfilename);
37 }
38 
migm_determinize_short(fsa * migmptr,storage_type op_table_type,boolean destroy,char * tempfilename)39 static fsa *migm_determinize_short(fsa *migmptr, storage_type op_table_type,
40                                    boolean destroy, char *tempfilename)
41 {
42   int **table, ne, ngens, nssi, ns, dr, *fsarow, nt, cstate, csi, im, i, j, k,
43       g1, len = 0, nlab, ct;
44   unsigned short *ht_ptr, *ht_chptr, *ht_ptrb, *ht_ptre, *cs_ptr, *cs_ptre,
45       *ptr;
46   gen **w;
47   boolean geninlab[MAXGEN + 1];
48   boolean dense_ip, dense_op;
49   short_hash_table ht, labelht;
50   setToLabelsType *newlabel;
51   fsa *det;
52   srec *labelset;
53   FILE *tempfile;
54 
55   if (kbm_print_level >= 3)
56     printf("    #Calling migm_determinize_short.\n");
57   if (!migmptr->flags[MIDFA]) {
58     fprintf(stderr, "Error: migm_determinize only applies to MIDFA's.\n");
59     return 0;
60   }
61   if (migmptr->alphabet->type != PRODUCT || migmptr->alphabet->arity != 2) {
62     fprintf(stderr, "Error in migm_determinize: fsa must be 2-variable.\n");
63     return 0;
64   }
65   if (migmptr->states->type != LABELED ||
66       migmptr->states->labels->type != LISTOFWORDS) {
67     fprintf(stderr, "Error in migm_determinize: states of fsa must be labels "
68                     "to lists of words.\n");
69     return 0;
70   }
71 
72   ne = migmptr->alphabet->size;
73   ngens = migmptr->alphabet->base->size;
74 
75   tmalloc(det, fsa, 1);
76   fsa_init(det);
77   srec_copy(det->alphabet, migmptr->alphabet);
78   det->flags[DFA] = TRUE;
79   det->flags[ACCESSIBLE] = TRUE;
80   det->flags[BFS] = TRUE;
81 
82   det->table->table_type = op_table_type;
83   det->table->denserows = 0;
84   det->table->printing_format = op_table_type;
85 
86   dense_ip = migmptr->table->table_type == DENSE;
87   dr = migmptr->table->denserows;
88   dense_op = op_table_type == DENSE;
89   table = migmptr->table->table_data_ptr;
90 
91   det->num_initial = 1;
92   tmalloc(det->initial, int, 2);
93   det->initial[1] = 1;
94 
95   short_hash_init(&ht, FALSE, 0, 0, 0);
96   ht_ptr = ht.current_ptr;
97   nssi = migmptr->num_initial;
98   for (i = 0; i < nssi; i++)
99     ht_ptr[i] = migmptr->initial[i + 1];
100   im = short_hash_locate(&ht, nssi);
101   /* Each state in 'det' will be represented as a subset of the set of states
102    * of *migmptr. The initial state contains the initial states
103    * of *migmptr.
104    * The subsets will be stored as variable-length records in the hash-table,
105    * always in increasing order.
106    */
107   if (im != 1) {
108     fprintf(stderr, "Hash-initialisation problem in migm_determinize.\n");
109     return 0;
110   }
111   if ((tempfile = fopen(tempfilename, "w")) == 0) {
112     fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
113     return 0;
114   }
115   if (dense_op)
116     tmalloc(fsarow, int, ne) else tmalloc(fsarow, int, 2 * ne + 1)
117 
118         cstate = 0;
119   if (dense_op)
120     len = ne; /* The length of the fsarow output. */
121   nt = 0;     /* Number of transitions in det */
122 
123   while (++cstate <= ht.num_recs) {
124     if (kbm_print_level >= 3) {
125       if ((cstate <= 1000 && cstate % 100 == 0) ||
126           (cstate <= 10000 && cstate % 1000 == 0) ||
127           (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
128         printf("    #cstate = %d;  number of states = %d.\n", cstate,
129                ht.num_recs);
130     }
131     cs_ptr = short_hash_rec(&ht, cstate);
132     cs_ptre = short_hash_rec(&ht, cstate) + short_hash_rec_len(&ht, cstate) - 1;
133     if (!dense_op)
134       len = 0;
135 
136     for (g1 = 1; g1 <= ne; g1++) {
137       /* Calculate action of generator g1 on state cstate  - to get the image,
138        * we simply apply it to each state in the subset of states representing
139        * cstate.
140        */
141       ht_ptrb = ht.current_ptr;
142       ht_ptre = ht_ptrb - 1;
143       ptr = cs_ptr - 1;
144       while (++ptr <= cs_ptre) {
145         csi = target(dense_ip, table, g1, *ptr, dr);
146         if (csi == 0)
147           continue;
148         if (ht_ptrb > ht_ptre || csi > *ht_ptre) {
149           /* We have a new state for the image subset to be added to the end */
150           *(++ht_ptre) = csi;
151         }
152         else {
153           ht_chptr = ht_ptrb;
154           while (*ht_chptr < csi)
155             ht_chptr++;
156           if (csi < *ht_chptr) {
157             /* we have a new state for the image subset to be added in the
158              * middle */
159             ht_ptr = ++ht_ptre;
160             while (ht_ptr > ht_chptr) {
161               *ht_ptr = *(ht_ptr - 1);
162               ht_ptr--;
163             }
164             *ht_ptr = csi;
165           }
166         }
167       }
168       im = short_hash_locate(&ht, ht_ptre - ht_ptrb + 1);
169       if (im == -1)
170         return 0;
171       if (dense_op)
172         fsarow[g1 - 1] = im;
173       else if (im > 0) {
174         fsarow[++len] = g1;
175         fsarow[++len] = im;
176       }
177       if (im > 0)
178         nt++;
179     }
180     if (!dense_op)
181       fsarow[0] = len++;
182     fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
183   }
184   fclose(tempfile);
185 
186   ns = det->states->size = ht.num_recs;
187   det->table->numTransitions = nt;
188 
189   /* Now we need to work out the labels of the states. These are lists of words
190    * of length <= 1, arising from the lists in the states of *migmptr that
191    * make up the relevant subset of states that is a state of *det.
192    * We need another hash-table for this.
193    */
194   det->states->type = LABELED;
195   tmalloc(det->states->labels, srec, 1);
196   labelset = det->states->labels;
197   labelset->type = LISTOFWORDS;
198   labelset->alphabet_size = migmptr->alphabet->base->size;
199   for (i = 1; i <= ngens; i++) {
200     tmalloc(labelset->alphabet[i], char,
201             stringlen(migmptr->alphabet->base->names[i]) + 1);
202     strcpy(labelset->alphabet[i], migmptr->alphabet->base->names[i]);
203   }
204   tmalloc(det->states->setToLabels, setToLabelsType, ns + 1);
205   newlabel = det->states->setToLabels;
206 
207   short_hash_init(&labelht, FALSE, 0, LABHTSIZE, 2 * LABHTSIZE);
208 
209   tmalloc(det->is_accepting, boolean, ns + 1);
210   for (i = 1; i <= ns; i++)
211     det->is_accepting[i] = FALSE;
212   for (cstate = 1; cstate <= ns; cstate++) {
213     ht_ptrb = labelht.current_ptr;
214     ht_ptre = ht_ptrb - 1;
215 
216     cs_ptr = short_hash_rec(&ht, cstate);
217     cs_ptre = short_hash_rec(&ht, cstate) + short_hash_rec_len(&ht, cstate) - 1;
218     for (i = 0; i <= ngens; i++)
219       geninlab[i] = FALSE; /* geninlab records which generators occur in label*/
220     ptr = cs_ptr - 1;
221     while (++ptr <= cs_ptre) {
222       j = migmptr->states->setToLabels[*ptr];
223       if (j) {
224         w = migmptr->states->labels->wordslist[j];
225         i = 0;
226         while (w[i] != 0) {
227           if (genstrlen(w[i]) <= 1) {
228             det->is_accepting[cstate] = TRUE;
229             geninlab[w[i][0]] = TRUE;
230           }
231           i++;
232         }
233       }
234     }
235     for (k = 0; k <= ngens; k++)
236       if (geninlab[k])
237         *(++ht_ptre) = k == 0 ? ngens + 1 : k;
238     /* that completes calculation of label for cstate */
239     newlabel[cstate] = short_hash_locate(&labelht, ht_ptre - ht_ptrb + 1);
240     if (newlabel[cstate] == -1)
241       return 0;
242   }
243 
244   short_hash_clear(&ht);
245   tfree(fsarow);
246 
247   /* Finally copy the records from the label hash-table into the set of labels
248    */
249   nlab = labelset->size = labelht.num_recs;
250   if (kbm_print_level >= 3)
251     printf("    #There are %d distinct labels.\n", nlab);
252   tmalloc(labelset->wordslist, gen **, nlab + 1);
253   for (i = 1; i <= nlab; i++) {
254     len = short_hash_rec_len(&labelht, i);
255     tmalloc(labelset->wordslist[i], gen *, len + 1);
256     ht_ptr = short_hash_rec(&labelht, i);
257     for (j = 0; j < len; j++) {
258       if (ht_ptr[j] == ngens + 1) {
259         tmalloc(labelset->wordslist[i][j], gen, 1);
260         labelset->wordslist[i][j][0] = 0;
261       }
262       else {
263         tmalloc(labelset->wordslist[i][j], gen, 2);
264         labelset->wordslist[i][j][0] = ht_ptr[j];
265         labelset->wordslist[i][j][1] = 0;
266       }
267     }
268     labelset->wordslist[i][len] = 0;
269   }
270 
271   short_hash_clear(&labelht);
272   ct = 0;
273   for (i = 1; i <= ns; i++)
274     if (det->is_accepting[i])
275       ct++;
276   det->num_accepting = ct;
277   tmalloc(det->accepting, int, ct + 1);
278   ct = 0;
279   for (i = 1; i <= ns; i++)
280     if (det->is_accepting[i])
281       det->accepting[++ct] = i;
282   tfree(det->is_accepting);
283 
284   if (destroy)
285     fsa_clear(migmptr);
286 
287   /* Now read the transition table back in */
288   tempfile = fopen(tempfilename, "r");
289   compressed_transitions_read(det, tempfile);
290   fclose(tempfile);
291 
292   unlink(tempfilename);
293 
294   return det;
295 }
296 
migm_determinize_int(fsa * migmptr,storage_type op_table_type,boolean destroy,char * tempfilename)297 static fsa *migm_determinize_int(fsa *migmptr, storage_type op_table_type,
298                                  boolean destroy, char *tempfilename)
299 {
300   fprintf(stderr,
301           "Sorry - migm_determinize is not yet implemented for machines.\n");
302   fprintf(stderr, "with more than 65536 states.\n");
303   return 0;
304 }
305