1 /* gpgeowa.c 15/4/96.
2  * 28/10/98 -n option: output a new 2-variable automaton near_geopairs
3  * together with associated difference-machine near_geodiff,
4  * which accepts all pairs of geodesics which end at distance at most
5  * one apart. This is formed by composing with the generalised
6  * multiplier.
7  * 29/9/98 large-scale re-organisation. Abolish almost all global variables
8  *        and make them components of the rws structure.
9  *
10  * 5/2/98 change generators from type char to type `gen'.
11  * SYNOPSIS:
12  * gpgeowa [-op1 d/s] [-op2 d/s] [-silent] [-v] [-l/-h] [-f] [-diff1/-diff2]
13  *         [-me maxeqns] [-mwd maxwdiffs] [-n] groupname
14  *
15  * Input is from groupname.wa and groupname.diff2 (or groupname.diff1)
16  * Output is to groupname.geowa, groupname.geodiff and groupname.geopairs
17  * Also, a temporary updated diff file for geodesic reduction to
18  * shortlex word representative will be output to groupname.tdiff.
19  *
20  * OPTIONS:
21  * -op1 d/s	output of geowa machine in dense or sparse format -
22  *              dense is default
23  * -op2 d/s	output of geodiff and geopairs machines in dense or sparse
24  *		format - sparse is default
25  * -v		verbose
26  * -silent	no diagnostics
27  * -l/-h	large/huge hash-tables (for big examples)
28  * -f           read the transition table repeatedly from file while mimimizing.
29  *              this avoids storing the large table, but is a little slower.
30  * -diff1/diff2 take input from groupname.diff1 or groupname.diff2
31  *              (diff2 is default). Latter is usually better.
32  * -me  maxeqns  Abort the geodesic word-acceptor checking process after finding
33  * 		maxeqns offending words w -  default is MAXEQNS
34  * -mwd maxwdiffs
35  *              At most maxwdiffs word-differences possible in tdiff machine.
36  * -n		output a new 2-variable automaton near_geopairs
37  *              together with associated difference-machine near_geodiff,
38  *
39  */
40 
41 #include <stdio.h>
42 #include "defs.h"
43 #include "fsa.h"
44 #include "rws.h"
45 #include "definitions.h"
46 #define MAXEQNS 512
47 #define MAXWDIFFS 4096
48 
49 static FILE *rfile, *wfile;
50 
51 static void badusage(void);
52 int (*reduce_word)(gen *w, reduction_struct *rs_rws);
53 
main(int argc,char * argv[])54 int main(int argc, char *argv[])
55 {
56   int arg, i, ct, ngens;
57   char inf1[100], inf2[100], inf3[100], outf1[100], outf2[100], outf3[100],
58       outf4[100], outf5[100], outf6[100], fsaname[100], tempfilename[100];
59   int maxeqns, maxwdiffs, numeqns, old_ndiff, *inv;
60   fsa temp, *waptr, *geowaptr, *geodiffptr, *tdiffptr, *gmptr, *gpp,
61       *geopairsptr, *tgpp;
62   reduction_equation *eqnptr;
63   reduction_struct rs_wd;
64   storage_type op_store1 = DENSE;
65   storage_type op_store2 = SPARSE;
66   boolean diff1_ip = FALSE, readback = TRUE;
67   boolean near_machine = FALSE;
68 
69   setbuf(stdout, (char *)0);
70   setbuf(stderr, (char *)0);
71 
72   maxeqns = MAXEQNS;
73   maxwdiffs = MAXWDIFFS;
74   inf1[0] = '\0';
75   arg = 1;
76   while (argc > arg) {
77     if (strcmp(argv[arg], "-op1") == 0) {
78       arg++;
79       if (arg >= argc)
80         badusage();
81       if (strcmp(argv[arg], "d") == 0)
82         op_store1 = DENSE;
83       else if (strcmp(argv[arg], "s") == 0)
84         op_store1 = SPARSE;
85       else
86         badusage();
87     }
88     else if (strcmp(argv[arg], "-op2") == 0) {
89       arg++;
90       if (arg >= argc)
91         badusage();
92       if (strcmp(argv[arg], "d") == 0)
93         op_store2 = DENSE;
94       else if (strcmp(argv[arg], "s") == 0)
95         op_store2 = SPARSE;
96       else
97         badusage();
98     }
99     else if (strcmp(argv[arg], "-silent") == 0)
100       kbm_print_level = 0;
101     else if (strcmp(argv[arg], "-v") == 0)
102       kbm_print_level = 2;
103     else if (strcmp(argv[arg], "-vv") == 0)
104       kbm_print_level = 3;
105     else if (strcmp(argv[arg], "-l") == 0)
106       kbm_large = TRUE;
107     else if (strcmp(argv[arg], "-h") == 0)
108       kbm_huge = TRUE;
109     else if (strcmp(argv[arg], "-f") == 0)
110       readback = FALSE;
111     else if (strcmp(argv[arg], "-diff1") == 0)
112       diff1_ip = TRUE;
113     else if (strcmp(argv[arg], "-diff2") == 0)
114       diff1_ip = FALSE;
115     else if (strcmp(argv[arg], "-n") == 0)
116       near_machine = TRUE;
117     else if (strcmp(argv[arg], "-me") == 0) {
118       arg++;
119       if (arg >= argc)
120         badusage();
121       maxeqns = atoi(argv[arg]);
122     }
123     else if (strcmp(argv[arg], "-mwd") == 0) {
124       arg++;
125       if (arg >= argc)
126         badusage();
127       maxwdiffs = atoi(argv[arg]);
128     }
129     else {
130       if (argv[arg][0] == '-')
131         badusage();
132       if (strcmp(inf1, "") != 0)
133         badusage();
134       else
135         strcpy(inf1, argv[arg]);
136     }
137     arg++;
138   }
139 
140   if (stringlen(inf1) == 0)
141     badusage();
142 
143   if (diff1_ip) {
144     /* We need to copy the diff1 machine to the file groupname.tdiff  */
145     strcpy(inf2, inf1);
146     strcat(inf2, ".diff1");
147     if ((rfile = fopen(inf2, "r")) == 0) {
148       fprintf(stderr, "Cannot open file %s.\n", inf2);
149       exit(1);
150     }
151     fsa_read(rfile, &temp, DENSE, 0, 0, TRUE, fsaname);
152     fclose(rfile);
153     strcpy(outf4, inf1);
154     strcat(outf4, ".tdiff");
155     base_prefix(fsaname);
156     strcat(fsaname, ".tdiff");
157     wfile = fopen(outf4, "w");
158     fsa_print(wfile, &temp, fsaname);
159     fclose(wfile);
160   }
161 
162   tmalloc(eqnptr, reduction_equation, maxeqns);
163 
164   strcpy(tempfilename, inf1);
165   strcat(tempfilename, "temp_triples_XXX");
166 
167   strcpy(inf2, inf1);
168   strcat(inf2, ".diff2");
169   strcpy(inf3, inf1);
170   strcat(inf3, ".gm");
171 
172   strcpy(outf1, inf1);
173   strcat(outf1, ".geowa");
174   strcpy(outf2, inf1);
175   strcat(outf2, ".geopairs");
176   strcpy(outf3, inf1);
177   strcat(outf3, ".geodiff");
178   strcpy(outf4, inf1);
179   strcat(outf4, ".tdiff");
180   strcpy(outf5, inf1);
181   strcat(outf5, ".near_geopairs");
182   strcpy(outf6, inf1);
183   strcat(outf6, ".near_geodiff");
184 
185   strcat(inf1, ".wa");
186 
187   /* First read in the second-word difference machine for word-reduction */
188   tmalloc(rs_wd.wd_fsa, fsa, 1);
189   if ((rfile = fopen(inf2, "r")) == 0) {
190     fprintf(stderr, "Cannot open file %s.\n", inf2);
191     exit(1);
192   }
193   fsa_read(rfile, rs_wd.wd_fsa, DENSE, 0, 0, TRUE, fsaname);
194   fclose(rfile);
195   ngens = rs_wd.wd_fsa->alphabet->base->size;
196   if (fsa_table_dptr_init(rs_wd.wd_fsa) == -1)
197     exit(1);
198   reduce_word = diff_reduce;
199 
200   if (!diff1_ip) {
201     /* Write this fsa to  the temporary file as first approximation to *tdiffptr
202      */
203     base_prefix(fsaname);
204     strcat(fsaname, ".tdiff");
205     wfile = fopen(outf4, "w");
206     fsa_print(wfile, rs_wd.wd_fsa, fsaname);
207     fclose(wfile);
208   }
209 
210   tmalloc(waptr, fsa, 1);
211   tmalloc(tdiffptr, fsa, 1);
212 
213   /* Now start the loop to calculate the correct geowaptr and tdiffptr */
214   ct = 0;
215   while (1) {
216     ct++;
217     /* Read in word-acceptor. */
218     if ((rfile = fopen(inf1, "r")) == 0) {
219       fprintf(stderr, "Cannot open file %s.\n", inf1);
220       exit(1);
221     }
222     fsa_read(rfile, waptr, DENSE, 0, 0, TRUE, fsaname);
223     fclose(rfile);
224     if (ct > 1) {
225       /* Re-read in tdiffptr */
226       rfile = fopen(outf4, "r");
227       fsa_read(rfile, tdiffptr, DENSE, 0, 0, TRUE, fsaname);
228       fclose(rfile);
229     }
230     else {
231       /* The first approximation to *tdiffptr is the diff2 machine which we
232        * have already read in as *wd_fsa, so we copy it.
233        */
234       fsa_copy(tdiffptr, rs_wd.wd_fsa);
235     }
236     geopairsptr =
237         fsa_geopairs(waptr, tdiffptr, op_store2, TRUE, tempfilename, readback);
238     if (geopairsptr == 0)
239       exit(1);
240     if (kbm_print_level > 1)
241       printf("  #Number of states of geopairs before minimisation = %d.\n",
242              geopairsptr->states->size);
243     if (readback) {
244       if (fsa_minimize(geopairsptr) == -1)
245         exit(1);
246     }
247     else {
248       if (fsa_ip_minimize(geopairsptr) == -1)
249         exit(1);
250     }
251     if (kbm_print_level > 1)
252       printf("  #Number of states of geopairs after minimisation = %d.\n",
253              geopairsptr->states->size);
254 
255     base_prefix(fsaname);
256     strcat(fsaname, ".geopairs");
257     wfile = fopen(outf2, "w");
258     fsa_print(wfile, geopairsptr, fsaname);
259     fclose(wfile);
260     fsa_clear(geopairsptr);
261 
262     /* Next we form the updated *geowaptr. We read *geopairsptr back in again
263      * (since the table format required is usually different).
264      */
265     rfile = fopen(outf2, "r");
266     fsa_read(rfile, geopairsptr, DENSE, 0, 0, TRUE, fsaname);
267     fclose(rfile);
268 
269     geowaptr = fsa_exists(geopairsptr, op_store1, TRUE, tempfilename);
270     if (geowaptr == 0)
271       exit(1);
272     if (kbm_print_level > 1)
273       printf("  #Number of states of geowa before minimisation = %d.\n",
274              geowaptr->states->size);
275 
276     /* We will make the language prefix closed, then output
277         if (geowaptr->states->size > 1) {
278           tfree(geowaptr->accepting);
279           geowaptr->num_accepting=geowaptr->states->size;
280         }
281     */
282 
283     if (fsa_minimize(geowaptr) == -1)
284       exit(1);
285     if (kbm_print_level > 1)
286       printf("  #Number of states of geowa after minimisation = %d.\n",
287              geowaptr->states->size);
288     base_prefix(fsaname);
289     strcat(fsaname, ".geowa");
290     wfile = fopen(outf1, "w");
291     fsa_print(wfile, geowaptr, fsaname);
292     fclose(wfile);
293     fsa_clear(geowaptr);
294 
295     /* Now we read *geowaptr and *tdiffptr back in for the correctness
296      * test on the former.
297      */
298     rfile = fopen(outf1, "r");
299     fsa_read(rfile, geowaptr, DENSE, 0, 0, TRUE, fsaname);
300     fclose(rfile);
301     rfile = fopen(outf4, "r");
302     fsa_read(rfile, tdiffptr, DENSE, 0, maxwdiffs, TRUE, fsaname);
303     fclose(rfile);
304     numeqns = fsa_checkgeowa(geowaptr, tdiffptr, eqnptr, maxeqns);
305     if (numeqns == -1)
306       exit(1);
307     if (numeqns == 0) {
308       if (kbm_print_level > 0)
309         printf("#Geodesic word-acceptor with %d states computed.\n",
310                geowaptr->states->size);
311       fsa_clear(geowaptr);
312       fsa_clear(tdiffptr);
313       break; /* Correctness test passed */
314     }
315     else {
316       fsa_clear(geowaptr);
317       /* We are going to reduce the words returned to their shortlex normal
318        * form, then adjoin the associated word-differences to *tdiffptr.
319        */
320       tfree(geowaptr);
321       tfree(geopairsptr);
322       if (kbm_print_level > 1)
323         printf("  #Updating geodesic word-difference machine.\n");
324       old_ndiff = tdiffptr->states->size;
325       if (calculate_inverses(&inv, ngens, &rs_wd) == -1)
326         exit(1);
327       for (i = 0; i < numeqns; i++) {
328         tmalloc(eqnptr[i].rhs, gen, genstrlen(eqnptr[i].lhs) + 1);
329         genstrcpy(eqnptr[i].rhs, eqnptr[i].lhs);
330         reduce_word(eqnptr[i].rhs, &rs_wd);
331         if (add_wd_fsa(tdiffptr, eqnptr + i, inv, TRUE, &rs_wd) == -1)
332           exit(1);
333         tfree(eqnptr[i].lhs) tfree(eqnptr[i].rhs)
334       }
335       if (make_full_wd_fsa(tdiffptr, inv, old_ndiff + 1, &rs_wd) == -1)
336         exit(1);
337       tfree(inv);
338       if (kbm_print_level > 1)
339         printf("  #Geodesic word-difference machine now has %d states.\n",
340                tdiffptr->states->size);
341       base_prefix(fsaname);
342       strcat(fsaname, ".tdiff");
343       wfile = fopen(outf4, "w");
344       fsa_print(wfile, tdiffptr, fsaname);
345       fclose(wfile);
346       fsa_clear(tdiffptr);
347     }
348   }
349   tfree(waptr);
350   tfree(geowaptr);
351   tfree(tdiffptr);
352 
353   /* The current *geopairsptr accepts (w1,w2) if w1=w2, w1 and w2 are
354    * geodesics, and w2 is accepted by the word-acceptor.
355    * To get it to accept ALL pairs of equal geodesics, we form the
356    * composite of itself with its transpose.
357    */
358   tmalloc(gpp, fsa, 1);
359   tmalloc(tgpp, fsa, 1);
360   rfile = fopen(outf2, "r");
361   fsa_read(rfile, gpp, DENSE, 0, 0, TRUE, fsaname);
362   fclose(rfile);
363   fsa_copy(tgpp, gpp);
364   if (fsa_swap_coords(tgpp) == -1)
365     exit(1);
366   tfree(geopairsptr);
367   geopairsptr =
368       fsa_composite(gpp, tgpp, op_store2, FALSE, tempfilename, readback);
369   if (geopairsptr == 0)
370     exit(1);
371   if (kbm_print_level > 1)
372     printf("  #Number of states of final geopairs before minimisation = %d.\n",
373            geopairsptr->states->size);
374   if (readback) {
375     if (fsa_minimize(geopairsptr) == -1)
376       exit(1);
377   }
378   else {
379     if (fsa_ip_minimize(geopairsptr) == -1)
380       exit(1);
381   }
382   if (kbm_print_level > 1)
383     printf("  #Number of states of final geopairs after minimisation = %d.\n",
384            geopairsptr->states->size);
385   if (kbm_print_level > 0)
386     printf("#Geodesic pairs machine with %d states computed.\n",
387            geopairsptr->states->size);
388 
389   base_prefix(fsaname);
390   strcat(fsaname, ".geopairs");
391   wfile = fopen(outf2, "w");
392   fsa_print(wfile, geopairsptr, fsaname);
393   fclose(wfile);
394   fsa_clear(geopairsptr);
395 
396   /* Now form the geodesic word-difference machine */
397   rfile = fopen(outf2, "r");
398   fsa_read(rfile, geopairsptr, DENSE, 0, 0, TRUE, fsaname);
399   fclose(rfile);
400   geodiffptr = fsa_diff(geopairsptr, &rs_wd, op_store2);
401   fsa_clear(geopairsptr);
402   tfree(geopairsptr);
403 
404   base_prefix(fsaname);
405   strcat(fsaname, ".geodiff");
406   wfile = fopen(outf3, "w");
407   fsa_print(wfile, geodiffptr, fsaname);
408   fclose(wfile);
409   if (kbm_print_level > 0)
410     printf("#Geodesic difference machine with %d states computed.\n",
411            geodiffptr->states->size);
412   fsa_clear(geodiffptr);
413   tfree(geodiffptr);
414   if (!near_machine) {
415     fsa_clear(gpp);
416     tfree(gpp);
417     fsa_clear(tgpp);
418     tfree(tgpp);
419     fsa_clear(rs_wd.wd_fsa);
420     tfree(rs_wd.wd_fsa);
421     tfree(eqnptr);
422     exit(0);
423   }
424 
425   /* Finally, we form near_geopairs, which accepts pairs of geodesics
426    * that finish distance at most one apart. To do this we form the
427    * composite gpp * gm * tgpp.
428    */
429   rfile = fopen(inf3, "r");
430   tmalloc(gmptr, fsa, 1);
431   fsa_read(rfile, gmptr, DENSE, 0, 0, TRUE, fsaname);
432   fclose(rfile);
433   /* first compose gpp with gm */
434   geopairsptr =
435       fsa_composite(gpp, gmptr, op_store2, TRUE, tempfilename, readback);
436   if (geopairsptr == 0)
437     exit(1);
438   tfree(gpp);
439   tfree(gmptr);
440   if (kbm_print_level > 1)
441     printf("  #Number of states of first composite = %d.\n",
442            geopairsptr->states->size);
443   if (readback) {
444     if (fsa_minimize(geopairsptr) == -1)
445       exit(1);
446   }
447   else {
448     if (fsa_ip_minimize(geopairsptr) == -1)
449       exit(1);
450   }
451   if (kbm_print_level > 1)
452     printf("  #Number of states of first composite after minimisation = %d.\n",
453            geopairsptr->states->size);
454 
455   /* write *geopairsptr and re-read to get dense storage type */
456   base_prefix(fsaname);
457   strcat(fsaname, ".near_geopairs");
458   wfile = fopen(outf5, "w");
459   fsa_print(wfile, geopairsptr, fsaname);
460   fclose(wfile);
461   fsa_clear(geopairsptr);
462   rfile = fopen(outf5, "r");
463   fsa_read(rfile, geopairsptr, DENSE, 0, 0, TRUE, fsaname);
464   fclose(rfile);
465 
466   /* now compose result with tggp */
467   gpp =
468       fsa_composite(geopairsptr, tgpp, op_store2, TRUE, tempfilename, readback);
469   tfree(geopairsptr);
470   tfree(tgpp);
471   if (gpp == 0)
472     exit(1);
473   if (kbm_print_level > 1)
474     printf("  #Number of states of near geopairs before minimisation = %d.\n",
475            gpp->states->size);
476 
477   /* write *gpp and re-read to get dense storage type */
478   /*
479     wfile = fopen(outf5,"w");
480     fsa_print(wfile,gpp,fsaname);
481     fclose(wfile);
482     fsa_clear(gpp);
483     rfile=fopen(outf5,"r");
484     fsa_read(rfile,gpp,DENSE,0,0,TRUE,fsaname);
485     fclose(rfile);
486   */
487 
488   /* Form the near geodesic word-difference machine */
489   /*
490     geodiffptr=fsa_diff(gpp,&rs_wd,op_store2);
491     base_prefix(fsaname);
492     strcat(fsaname,".near_geodiff");
493     wfile = fopen(outf6,"w");
494     fsa_print(wfile,geodiffptr,fsaname);
495     fclose(wfile);
496     if (kbm_print_level>0)
497       printf("#Geodesic difference machine with %d states computed.\n",
498               geodiffptr->states->size);
499     fsa_clear(geodiffptr);
500     tfree(geodiffptr);
501   */
502 
503   if (readback) {
504     if (fsa_minimize(gpp) == -1)
505       exit(1);
506   }
507   else {
508     if (fsa_ip_minimize(gpp) == -1)
509       exit(1);
510   }
511   if (kbm_print_level > 1)
512     printf("  #Number of states of near geopairs after minimisation = %d.\n",
513            gpp->states->size);
514   if (kbm_print_level > 0)
515     printf("#Geodesic near pairs machine with %d states computed.\n",
516            gpp->states->size);
517 
518   wfile = fopen(outf5, "w");
519   fsa_print(wfile, gpp, fsaname);
520   fclose(wfile);
521   fsa_clear(gpp);
522   tfree(gpp);
523 
524   fsa_clear(rs_wd.wd_fsa);
525   tfree(rs_wd.wd_fsa);
526   tfree(eqnptr);
527   exit(0);
528 }
529 
badusage(void)530 void badusage(void)
531 {
532   fprintf(stderr,
533           "Usage: gpgeowa [-op1 d/s] [-op2 d/s] [-silent] [-v] [-l/-h] [-f]\n");
534   fprintf(stderr, "          [-diff1/-diff2] [-me maxeqns] [-mwd maxwdiffs] "
535                   "[-n] groupname\n");
536   exit(1);
537 }
538