1 /* file fsaminkb.c  16. 12. 94.
2  * 13/1/98 changes for introduction of `gen' type in place of char for
3  * generators.
4  * This file contains the routines necessary to calculate a 2-variable fsa
5  * accepting precisely the minimal KB-rules of a short-lex automatic group.
6  *
7  * The first routine fsa_minred uses the word-acceptor to construct the
8  * minimal reducible words - i.e. those for which all subwords are
9  * accepted by the word-acceptor - clearly it is enough for the two words
10  * obtained by omitting the first and last letters to be acceptable, so
11  * it is essentially an application of fsa_and.
12  *
13  * The second routine fsa_minkb is a 2-variable machine accepting precisely the
14  * minimal KB-reduction equations - i.e it accepts a pair (w1,w2) if w1 and w2
15  * are equal in G, w1 is accepted by the fsa constructed by fsa_minred, and
16  * w2 is accepted by the word-acceptor. Its construction is as in fsa_triples.
17  *
18  * The third routine, fsa_diff calculates the word-difference machine
19  * associated with an fsa that accepts a collection of equations.
20  * So, for example, it can be used with input the KB-reduction machine,
21  * mentioned in the last paragraph, to calculate the correct diff1 machine,
22  * or with the generalised multiplier to construct the correct diff2 machine.
23  */
24 
25 #include <stdio.h>
26 #include "defs.h"
27 #include "fsa.h"
28 #include "rws.h"
29 #include "hash.h"
30 #include "externals.h"
31 
32 /* Functions defined in this file: */
33 
34 extern int (*reduce_word)(gen *w, reduction_struct *rs_rws);
35 static gen testword[4096]; /* Used for reducing words */
36 
fsa_minred(fsa * waptr,storage_type op_table_type,boolean destroy,char * tempfilename)37 fsa *fsa_minred(fsa *waptr, storage_type op_table_type, boolean destroy,
38                 char *tempfilename)
39 {
40   int **table, ne, nsi, nsi1, ns, dr, *fsarow, nt, cstate, csa, csb, im, i, g,
41       len = 0, ct, *ht_ptr;
42   boolean dense_ip, dense_op;
43   fsa *minred;
44   hash_table ht;
45   FILE *tempfile;
46 
47   if (kbm_print_level >= 3)
48     printf("    #Calling fsa_minred.\n");
49   if (!waptr->flags[DFA]) {
50     fprintf(stderr, "Error: fsa_minred only applies to DFA's.\n");
51     return 0;
52   }
53 
54   tmalloc(minred, fsa, 1);
55   fsa_init(minred);
56   srec_copy(minred->alphabet, waptr->alphabet);
57   minred->flags[DFA] = TRUE;
58   minred->flags[ACCESSIBLE] = TRUE;
59   minred->flags[BFS] = TRUE;
60 
61   ne = waptr->alphabet->size;
62   nsi = waptr->states->size;
63   nsi1 = nsi + 1;
64 
65   table = waptr->table->table_data_ptr;
66 
67   dense_ip = waptr->table->table_type == DENSE;
68   dr = waptr->table->denserows;
69   dense_op = op_table_type == DENSE;
70 
71   minred->states->type = SIMPLE;
72   minred->num_initial = 1;
73   tmalloc(minred->initial, int, 2);
74   minred->initial[1] = 1;
75   minred->table->table_type = op_table_type;
76   minred->table->denserows = 0;
77   minred->table->printing_format = op_table_type;
78 
79   hash_init(&ht, TRUE, 2, 0, 0);
80   ht_ptr = ht.current_ptr;
81   ht_ptr[0] = waptr->initial[1];
82   ht_ptr[1] = waptr->initial[1];
83   im = hash_locate(&ht, 2);
84   if (im != 1) {
85     fprintf(stderr, "Hash-initialisation problem in fsa_minred.\n");
86     return 0;
87   }
88   if ((tempfile = fopen(tempfilename, "w")) == 0) {
89     fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
90     return 0;
91   }
92   if (dense_op)
93     tmalloc(fsarow, int, ne) else tmalloc(fsarow, int, 2 * ne + 1)
94 
95         cstate = 0;
96   if (dense_op)
97     len = ne; /* The length of the fsarow output. */
98   nt = 0;     /* Number of transitions in minred */
99 
100   /* A state of *minred will be essentially a pair of states of *waptr,
101    * with the transitions coming from those of *waptr.
102    * The differences are that the left hand side will accept words  w
103    * which are not accepted by *waptr but whose maximal prefix is,
104    * whereas the right hand side will accept words  w which are not accepted
105    * by *waptr but whose maximal suffix is.
106    * Thus, on the lhs, transitions to 0 in *waptr will go to a new accept-
107    * state nsi1 instead (with no transitions from nsi1) whereas on the rhs
108    * the first transition will be back to the intiial state.
109    * The initial state itself is non-accept.
110    */
111   while (++cstate <= ht.num_recs) {
112     if (kbm_print_level >= 3) {
113       if ((cstate <= 1000 && cstate % 100 == 0) ||
114           (cstate <= 10000 && cstate % 1000 == 0) ||
115           (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
116         printf("    #cstate = %d;  number of states = %d.\n", cstate,
117                ht.num_recs);
118     }
119     ht_ptr = hash_rec(&ht, cstate);
120     csa = ht_ptr[0];
121     csb = ht_ptr[1];
122     if (!dense_op)
123       len = 0;
124     for (g = 1; g <= ne; g++) {
125       /* Calculate action of generator g on state cstate */
126       ht_ptr = ht.current_ptr;
127       if (csa == 0 || csa == nsi1)
128         ht_ptr[0] = 0;
129       else {
130         ht_ptr[0] = target(dense_ip, table, g, csa, dr);
131         if (ht_ptr[0] == 0)
132           ht_ptr[0] = nsi1;
133       }
134       if (cstate == 1)
135         ht_ptr[1] = csb;
136       else
137         ht_ptr[1] = csb ? target(dense_ip, table, g, csb, dr) : 0;
138       if (ht_ptr[0] == 0 || ht_ptr[1] == 0)
139         im = 0;
140       else
141         im = hash_locate(&ht, 2);
142       if (dense_op)
143         fsarow[g - 1] = im;
144       else if (im > 0) {
145         fsarow[++len] = g;
146         fsarow[++len] = im;
147       }
148       if (im > 0)
149         nt++;
150     }
151     if (!dense_op)
152       fsarow[0] = len++;
153     fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
154   }
155   fclose(tempfile);
156 
157   ns = minred->states->size = ht.num_recs;
158   minred->table->numTransitions = nt;
159   /* Locate the accept states: first count them and then record them. */
160   ct = 0;
161   for (i = 1; i <= ns; i++) {
162     ht_ptr = hash_rec(&ht, i);
163     if (ht_ptr[0] == nsi1)
164       ct++;
165   }
166   minred->num_accepting = ct;
167   if (ct == 1 || ct != ns) {
168     tmalloc(minred->accepting, int, ct + 1);
169     ct = 0;
170     for (i = 1; i <= ns; i++) {
171       ht_ptr = hash_rec(&ht, i);
172       if (ht_ptr[0] == nsi1)
173         minred->accepting[++ct] = i;
174     }
175   }
176   hash_clear(&ht);
177   tfree(fsarow);
178   tfree(waptr->is_accepting);
179 
180   if (destroy)
181     fsa_clear(waptr);
182 
183   /* Now read the transition table back in */
184   tempfile = fopen(tempfilename, "r");
185   compressed_transitions_read(minred, tempfile);
186   fclose(tempfile);
187 
188   unlink(tempfilename);
189 
190   return minred;
191 }
192 
193 /* *waptr is assumed to be the word-acceptor of an automatic group.
194  * and *minredptr the fsa which accepts minimal reducible words
195  * (computed using fsa_minred above).
196  * In particular, all states of *waptr should be accepting.
197  * *diffptr is assumed to be a word-difference machine of the same automatic
198  *  group. Both are assumed to be stored in dense-format.
199  * This routine constructs the fsa of which the states are triples (s1,s2,d),
200  * with s1 and s2 states of *minredptr and *waptr and d a state of *diffptr.
201  * (More precisely, if *waptr has n states, then s2 may also be equal
202  * to n+1, meaning that the end of string symbol has been read on  rhs.
203  * Since lhs is never shorter than rhs in an accpeted pair, the end of
204  * string symbol on the lhs always leads to failure.)
205  * The alphabet is 2-variable with base the alphabet of *waptr
206  * (i.e. the same alphabet as *diffptr).
207  * The alphabet member (g1,g2) maps (s1,s2,d) to (s1^g1,s2^g2,d^(g1,g2))
208  * if all three components are nonzero, and to zero otherwise.
209  * The transition-table of the resulting fsa is output in the usual way to
210  * file tempfilename with table-type specified by op_table_type, before
211  * minimisation.
212  * A state is accept is s1, s2 and d all are (but s2 always is).
213  * Short hash-tables will be used, so this routine won't work if *waptr
214  * or *diffptr has more than 65535 states.
215  *
216  */
fsa_minkb(fsa * minredptr,fsa * waptr,fsa * diffptr,storage_type op_table_type,boolean destroy,char * tempfilename)217 fsa *fsa_minkb(fsa *minredptr, fsa *waptr, fsa *diffptr,
218                storage_type op_table_type, boolean destroy, char *tempfilename)
219 {
220   int **minredtable, **watable, ***difftable, identity, ngens, ngens1, nswa1,
221       ne, ns, *fsarow, nt, cstate, cswa1, cswa2, csdiff, im, i, e, len = 0, ct;
222   unsigned short *ht_ptr;
223   boolean dense_op;
224   fsa *minkbptr;
225   short_hash_table ht;
226   FILE *tempfile;
227   gen g1, g2;
228 
229   if (kbm_print_level >= 3)
230     printf("    #Calling fsa_minkbptr.\n");
231 
232   if (!minredptr->flags[DFA] || !waptr->flags[DFA] || !diffptr->flags[DFA]) {
233     fprintf(stderr, "Error: fsa_minkbptr only applies to DFA's.\n");
234     return 0;
235   }
236   if (minredptr->alphabet->type != IDENTIFIERS ||
237       waptr->alphabet->type != IDENTIFIERS) {
238     fprintf(stderr, "Error in fsa_minkbptr: an fsa has wrong type.\n");
239     return 0;
240   }
241   if (waptr->num_accepting != waptr->states->size) {
242     fprintf(stderr,
243             "Error in fsa_minkbptr: second fsa should be a word-acceptor.\n");
244     return 0;
245   }
246   if (diffptr->alphabet->type != PRODUCT || diffptr->alphabet->arity != 2) {
247     fprintf(stderr, "Error in fsa_minkbptr: third fsa must be 2-variable.\n");
248     return 0;
249   }
250   if (diffptr->states->type != WORDS) {
251     fprintf(stderr,
252             "Error in fsa_minkbptr: third fsa must be word-difference type.\n");
253     return 0;
254   }
255   if (!srec_equal(diffptr->alphabet->base, waptr->alphabet)) {
256     fprintf(stderr, "Error in fsa_minkbptr: fsa's alphabet's don't match.\n");
257     return 0;
258   }
259   if (minredptr->states->size >= MAXUSHORT ||
260       waptr->states->size >= MAXUSHORT || diffptr->states->size >= MAXUSHORT) {
261     fprintf(stderr,
262             "Error in fsa_minkbptr: one of the fsa's has too many states.\n");
263     return 0;
264   }
265 
266   if (fsa_table_dptr_init(diffptr) == -1)
267     return 0;
268 
269   tmalloc(minkbptr, fsa, 1);
270   fsa_init(minkbptr);
271   srec_copy(minkbptr->alphabet, diffptr->alphabet);
272   minkbptr->flags[DFA] = TRUE;
273   minkbptr->flags[ACCESSIBLE] = TRUE;
274   minkbptr->flags[BFS] = TRUE;
275 
276   ngens = waptr->alphabet->size;
277   ngens1 = ngens + 1;
278   ne = diffptr->alphabet->size;
279   nswa1 = waptr->states->size + 1;
280 
281   if (ne != ngens1 * ngens1 - 1) {
282     fprintf(
283         stderr,
284         "Error: in a 2-variable fsa, alphabet size should = ngens^2 - 1.\n");
285     return 0;
286   }
287 
288   minredtable = minredptr->table->table_data_ptr;
289   watable = waptr->table->table_data_ptr;
290   difftable = diffptr->table->table_data_dptr;
291 
292   dense_op = op_table_type == DENSE;
293 
294   minkbptr->states->type = SIMPLE;
295 
296   minkbptr->num_initial = 1;
297   tmalloc(minkbptr->initial, int, 2);
298   minkbptr->initial[1] = 1;
299   minkbptr->table->table_type = op_table_type;
300   minkbptr->table->denserows = 0;
301   minkbptr->table->printing_format = op_table_type;
302 
303   short_hash_init(&ht, TRUE, 3, 0, 0);
304   ht_ptr = ht.current_ptr;
305   ht_ptr[0] = minredptr->initial[1];
306   ht_ptr[1] = waptr->initial[1];
307   ht_ptr[2] = identity = diffptr->initial[1];
308   im = short_hash_locate(&ht, 3);
309   if (im != 1) {
310     fprintf(stderr, "Hash-initialisation problem in fsa_minkbptr.\n");
311     return 0;
312   }
313   if ((tempfile = fopen(tempfilename, "w")) == 0) {
314     fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
315     return 0;
316   }
317   if (dense_op)
318     tmalloc(fsarow, int, ne) else tmalloc(fsarow, int, 2 * ne + 1)
319 
320         cstate = 0;
321   if (dense_op)
322     len = ne; /* The length of the fsarow output. */
323   nt = 0;     /* Number of transitions in minkbptr */
324 
325   while (++cstate <= ht.num_recs) {
326     if (kbm_print_level >= 3) {
327       if ((cstate <= 1000 && cstate % 100 == 0) ||
328           (cstate <= 10000 && cstate % 1000 == 0) ||
329           (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
330         printf("    #cstate = %d;  number of states = %d.\n", cstate,
331                ht.num_recs);
332     }
333     ht_ptr = short_hash_rec(&ht, cstate);
334     cswa1 = ht_ptr[0];
335     cswa2 = ht_ptr[1];
336     csdiff = ht_ptr[2];
337     if (dense_op)
338       for (i = 0; i < ne; i++)
339         fsarow[i] = 0;
340     else
341       len = 0;
342     e = 0; /* e is the number of the edge corresponding to the pair (g1,g2) */
343     for (g1 = 1; g1 <= ngens; g1++)
344       for (g2 = 1; g2 <= ngens1; g2++) {
345         e++;
346         /* Calculate action of generator-pair (g1,g2) on state cstate - since
347          * the padding symbol cannot occur on the lhs, g1 only goes up to ngens.
348          */
349         ht_ptr = ht.current_ptr;
350         ht_ptr[2] = dense_dtarget(difftable, g1, g2, csdiff);
351         if (ht_ptr[2] == 0)
352           im = 0;
353         else {
354           ht_ptr[0] = dense_target(minredtable, g1, cswa1);
355           if (ht_ptr[0] == 0)
356             im = 0;
357           else {
358             ht_ptr[1] =
359                 g2 == ngens1
360                     ? nswa1
361                     : cswa2 == nswa1 ? 0 : dense_target(watable, g2, cswa2);
362             if (ht_ptr[1] == 0)
363               im = 0;
364             else
365               im = short_hash_locate(&ht, 3);
366           }
367         }
368 
369         if (dense_op)
370           fsarow[e - 1] = im;
371         else if (im > 0) {
372           fsarow[++len] = e;
373           fsarow[++len] = im;
374         }
375         if (im > 0)
376           nt++;
377       } /* for (g1=1;g1<=ngens1; ... */
378     if (!dense_op)
379       fsarow[0] = len++;
380     fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
381   } /*while (++cstate <= ht.num_recs) */
382   fclose(tempfile);
383 
384   ns = minkbptr->states->size = ht.num_recs;
385   minkbptr->table->numTransitions = nt;
386 
387   if (kbm_print_level >= 3)
388     printf("    #Calculated transitions - %d states, %d transitions.\n", ns,
389            nt);
390 
391   /* Now locate the accept states */
392 
393   fsa_set_is_accepting(minredptr);
394   ct = 0;
395   for (i = 1; i <= ns; i++) {
396     ht_ptr = short_hash_rec(&ht, i);
397     if (minredptr->is_accepting[ht_ptr[0]] && ht_ptr[2] == identity)
398       ct++;
399   }
400   minkbptr->num_accepting = ct;
401   if (ct == 1 || ct != ns) {
402     tmalloc(minkbptr->accepting, int, ct + 1);
403     ct = 0;
404     for (i = 1; i <= ns; i++) {
405       ht_ptr = short_hash_rec(&ht, i);
406       if (minredptr->is_accepting[ht_ptr[0]] && ht_ptr[2] == identity)
407         minkbptr->accepting[++ct] = i;
408     }
409   }
410 
411   short_hash_clear(&ht);
412   tfree(minredptr->is_accepting);
413   tfree(fsarow);
414   if (destroy) {
415     fsa_clear(minredptr);
416     fsa_clear(waptr);
417     fsa_clear(diffptr);
418   }
419   /* Now read the transition table back in */
420   tempfile = fopen(tempfilename, "r");
421   compressed_transitions_read(minkbptr, tempfile);
422   fclose(tempfile);
423 
424   unlink(tempfilename);
425 
426   return minkbptr;
427 }
428 
429 /* *fsaptr should be a two-variable automaton that accepts pairs of equations
430  * It must be stored in dense format.
431  * The corresponding word-difference machine is computed and returned.
432  * For example, if fsaptr is minkb as returned by fsa_minkb (above),
433  * then the correct diff1 machine for the shortlex automatic group is computed.
434  */
fsa_diff(fsa * fsaptr,reduction_struct * rs_wd,storage_type op_table_type)435 fsa *fsa_diff(fsa *fsaptr, reduction_struct *rs_wd, storage_type op_table_type)
436 {
437   int i, l, n, ngens, ngens1, ne, ns, g1, g2, fsaptr_state, diff_state,
438       *state_diff, ***fsaptr_table, ***wd_table, **table, fsaptr_im, diff_im,
439       *inv;
440   fsa *diffptr;
441   gen *stw;
442 
443   if (kbm_print_level >= 3)
444     printf("    #Calling fsa_diff.\n");
445   if (!fsaptr->flags[DFA]) {
446     fprintf(stderr, "Error: fsa_diff only applies to DFA's.\n");
447     return 0;
448   }
449 
450   if (fsaptr->alphabet->type != PRODUCT || fsaptr->alphabet->arity != 2) {
451     fprintf(stderr, "Error in fsa_diff: fsa must be 2-variable.\n");
452     return 0;
453   }
454 
455   ne = fsaptr->alphabet->size;
456   ngens = fsaptr->alphabet->base->size;
457   ngens1 = ngens + 1;
458   ns = fsaptr->states->size;
459   if (fsa_table_dptr_init(fsaptr) == -1)
460     return 0;
461   fsaptr_table = fsaptr->table->table_data_dptr;
462 
463   if (ne != ngens1 * ngens1 - 1) {
464     fprintf(
465         stderr,
466         "Error: in a 2-variable fsa, alphabet size should = ngens^2 - 1.\n");
467     return 0;
468   }
469 
470   tmalloc(diffptr, fsa, 1);
471   fsa_init(diffptr);
472   srec_copy(diffptr->alphabet, fsaptr->alphabet);
473 
474   /* Maximum possible number of states of diff is of course ns */
475   diffptr->states->type = WORDS;
476   tmalloc(diffptr->states->words, gen *, ns + 1);
477   diffptr->states->alphabet_size = fsaptr->alphabet->base->size;
478   for (i = 1; i <= fsaptr->alphabet->base->size; i++) {
479     tmalloc(diffptr->states->alphabet[i], char,
480             stringlen(fsaptr->alphabet->base->names[i]) + 1);
481     strcpy(diffptr->states->alphabet[i], fsaptr->alphabet->base->names[i]);
482   }
483   diffptr->states->size = 1;
484   /* Set up first state, which is the empty word. */
485   tmalloc(diffptr->states->words[1], gen, 1);
486   diffptr->states->words[1][0] = 0;
487 
488   diffptr->num_initial = 1;
489   tmalloc(diffptr->initial, int, 2);
490   diffptr->initial[1] = 1;
491   diffptr->num_accepting = 1;
492   tmalloc(diffptr->accepting, int, 2);
493   diffptr->accepting[1] = 1; /* Only the initial state is accepting */
494 
495   diffptr->flags[DFA] = TRUE;
496   diffptr->flags[TRIM] = TRUE;
497 
498   fsa_table_init(diffptr->table, ns, diffptr->alphabet->size);
499   diffptr->table->printing_format = op_table_type;
500   if (fsa_table_dptr_init(diffptr) == -1)
501     return 0;
502   wd_table = diffptr->table->table_data_dptr;
503   table = diffptr->table->table_data_ptr;
504   for (i = 1; i <= ne; i++)
505     table[i][1] = 0;
506 
507   reduce_word = diff_reduce;
508   if (calculate_inverses(&inv, ngens, rs_wd) == -1)
509     return 0;
510 
511   /* Now build the transition-table of diffptr, using that of fsaptr.
512    * Each state of fsaptr maps onto one of *diffptr, with the mapping
513    * stored in the list state_diff.
514    */
515   tmalloc(state_diff, int, ns + 1);
516   for (i = 1; i <= ns; i++)
517     state_diff[i] = 0;
518   state_diff[1] = 1;
519   for (fsaptr_state = 1; fsaptr_state <= ns; fsaptr_state++) {
520     diff_state = state_diff[fsaptr_state];
521     for (g1 = 1; g1 <= ngens1; g1++)
522       for (g2 = 1; g2 <= ngens1; g2++) {
523         if (g1 == ngens1 && g2 == ngens1)
524           continue;
525         fsaptr_im = dense_dtarget(fsaptr_table, g1, g2, fsaptr_state);
526         if (fsaptr_im == 0)
527           continue;
528         diff_im = state_diff[fsaptr_im];
529         if (diff_im == 0) {
530           /* We have to work out what word-difference the state maps onto */
531           stw = diffptr->states->words[diff_state];
532           l = genstrlen(stw);
533           if (g1 == ngens1) {
534             genstrcpy(testword, stw);
535             testword[l] = g2;
536             testword[l + 1] = 0;
537           }
538           else if (g2 == ngens1) {
539             testword[0] = inv[g1];
540             genstrcpy(testword + 1, stw);
541             testword[l + 1] = 0;
542           }
543           else {
544             testword[0] = inv[g1];
545             genstrcpy(testword + 1, stw);
546             testword[l + 1] = g2;
547             testword[l + 2] = 0;
548           }
549           if ((*reduce_word)(testword, rs_wd) == -1)
550             return 0;
551           n = diff_no(diffptr, testword);
552           if (n == 0) {
553             n = ++diffptr->states->size;
554             tmalloc(diffptr->states->words[n], gen, genstrlen(testword) + 1);
555             genstrcpy(diffptr->states->words[n], testword);
556             for (i = 1; i <= ne; i++)
557               table[i][n] = 0;
558           }
559           state_diff[fsaptr_im] = diff_im = n;
560         }
561         set_dense_dtarget(wd_table, g1, g2, diff_state, diff_im);
562       }
563   }
564 
565   tfree(inv);
566   tfree(state_diff);
567   return diffptr;
568 }
569