1 /* file nfa.c  8.9.97.
2  *
3  * 25/9/98 added parameter subsets to nfa_determinize.
4  * 2/10/97 - added version of nfa_determinize for compactly stored
5  * transition tables.
6  * This file contains routines for processing nfa's i.e. non-deterministic
7  * finite state automata.
8  * Currently, the only routine available is nfa_determinize, which returns
9  * an equivalent deterministic automaton, whose states are subsets of the
10  * input fsa.
11  * Currently, the only input format accepted is sparse, where epsilon
12  * transitions are from generator 0.
13  */
14 
15 #include <stdio.h>
16 #include "defs.h"
17 #include "fsa.h"
18 #include "hash.h"
19 #include "externals.h"
20 /* the following three definitions are for use in extracting edge-label
21  * and state in compactly stored pairs.
22  */
23 #define SHIFT 24
24 #define MASKR (unsigned)0xffffff
25 #define MASKL (unsigned)0xff000000
26 
27 /* Functions defined in this file: */
28 static fsa *nfa_determinize_short(fsa *fsaptr, storage_type op_table_type,
29                                   boolean eps_trans, boolean destroy,
30                                   boolean subsets, char *tempfilename);
31 static fsa *nfa_determinize_int(fsa *fsaptr, storage_type op_table_type,
32                                 boolean eps_trans, boolean destroy,
33                                 boolean subsets, char *tempfilename);
34 static fsa *nfa_cdeterminize_short(fsa *fsaptr, storage_type op_table_type,
35                                    boolean destroy, char *tempfilename);
36 static fsa *nfa_cdeterminize_int(fsa *fsaptr, storage_type op_table_type,
37                                  boolean destroy, char *tempfilename);
38 static fsa *nfa_exists_short(fsa *fsaptr, storage_type op_table_type,
39                              boolean destroy, char *tempfilename);
40 static fsa *nfa_exists_int(fsa *fsaptr, storage_type op_table_type,
41                            boolean destroy, char *tempfilename);
42 
43 
44 /* The fsa *fsaptr must be an fsa with sparse table.
45  * The returned fsa accepts the same language but is deterministic.
46  * If eps_trans is false, it is assumed that *fsaptr has no epsilon
47  * transitions.
48  * If subsets is true, then the returned fsa will have state-set of type
49  * "list of integers", where the list represents the subset of the state-set
50  * of *fsaptr that correponds to the state of the returned fsa.
51  */
nfa_determinize(fsa * fsaptr,storage_type op_table_type,boolean eps_trans,boolean destroy,boolean subsets,char * tempfilename)52 fsa *nfa_determinize(fsa *fsaptr, storage_type op_table_type, boolean eps_trans,
53                      boolean destroy, boolean subsets, char *tempfilename)
54 {
55   if (kbm_print_level >= 3)
56     printf("    #Calling nfa_determinize.\n");
57   if (fsaptr->states->size < MAXUSHORT)
58     return nfa_determinize_short(fsaptr, op_table_type, eps_trans, destroy,
59                                  subsets, tempfilename);
60   else
61     return nfa_determinize_int(fsaptr, op_table_type, eps_trans, destroy,
62                                subsets, tempfilename);
63 }
64 
nfa_determinize_short(fsa * fsaptr,storage_type op_table_type,boolean eps_trans,boolean destroy,boolean subsets,char * tempfilename)65 static fsa *nfa_determinize_short(fsa *fsaptr, storage_type op_table_type,
66                                   boolean eps_trans, boolean destroy,
67                                   boolean subsets, char *tempfilename)
68 {
69   int **table, *tableptr, *tableptre, ngens, nssi, ns, *fsarow, nt, cstate, csi,
70       im, i, g1, len = 0, ct, n;
71   unsigned short *ht_ptr, *ht_chptr, *ht_ptrb, *ht_ptre, *cs_ptr, *cs_ptre,
72       *ptr;
73   boolean dense_op;
74   short_hash_table ht;
75   fsa *det;
76   FILE *tempfile;
77 
78   if (kbm_print_level >= 3)
79     printf("    #Calling nfa_determinize_short.\n");
80 
81   tmalloc(det, fsa, 1);
82   fsa_init(det);
83   srec_copy(det->alphabet, fsaptr->alphabet);
84   det->flags[DFA] = TRUE;
85   det->flags[ACCESSIBLE] = TRUE;
86   det->flags[BFS] = TRUE;
87 
88   det->states->type = subsets ? LISTOFINTS : SIMPLE;
89 
90   det->table->table_type = op_table_type;
91   det->table->denserows = 0;
92   det->table->printing_format = op_table_type;
93 
94   if (fsaptr->num_initial == 0) {
95     det->num_initial = 0;
96     det->num_accepting = 0;
97     det->states->size = 0;
98     if (destroy)
99       fsa_clear(fsaptr);
100     return det;
101   }
102   ngens = det->alphabet->size;
103 
104   fsa_set_is_accepting(fsaptr);
105 
106   dense_op = op_table_type == DENSE;
107   table = fsaptr->table->table_data_ptr;
108 
109   det->num_initial = 1;
110   tmalloc(det->initial, int, 2);
111   det->initial[1] = 1;
112 
113   short_hash_init(&ht, FALSE, 0, 0, 0);
114   ht_ptrb = ht.current_ptr;
115   ht_ptre = ht_ptrb - 1;
116   nssi = fsaptr->num_initial;
117   for (i = 0; i < nssi; i++)
118     *(++ht_ptre) = fsaptr->initial[i + 1];
119   /* now perform epsilon closure of initial subset */
120   ptr = ht_ptrb - 1;
121   if (eps_trans)
122     while (++ptr <= ht_ptre) {
123       tableptr = table[*ptr];
124       tableptre = table[*ptr + 1];
125       while (tableptr < tableptre) {
126         if (*tableptr == 0) {
127           csi = *(tableptr + 1);
128           if (csi == 0) {
129             tableptr += 2;
130             continue;
131           }
132           if (csi > *ht_ptre) {
133             /* We have a new state for the image subset to be added to the end
134              */
135             *(++ht_ptre) = csi;
136           }
137           else {
138             ht_chptr = ht_ptrb;
139             while (*ht_chptr < csi)
140               ht_chptr++;
141             if (csi < *ht_chptr) {
142               /* we have a new state for the image subset to be added in the
143                * middle */
144               ht_ptr = ++ht_ptre;
145               while (ht_ptr > ht_chptr) {
146                 *ht_ptr = *(ht_ptr - 1);
147                 ht_ptr--;
148               }
149               *ht_ptr = csi;
150               if (ptr > ht_ptr)
151                 ptr = ht_ptr - 1;
152             }
153           }
154         }
155         tableptr += 2;
156       }
157     }
158   im = short_hash_locate(&ht, ht_ptre - ht_ptrb + 1);
159   /* Each state in 'det' will be represented as a subset of the set of states
160    * of *fsaptr. The initial state contains the initial states
161    * of *fsaptr.
162    * A subset is an accept-state if it contains an accept state of *fsaptr
163    * The subsets will be stored as variable-length records in the hash-table,
164    * always in increasing order.
165    */
166   if (im != 1) {
167     fprintf(stderr, "Hash-initialisation problem in nfa_determinize.\n");
168     return 0;
169   }
170   if ((tempfile = fopen(tempfilename, "w")) == 0) {
171     fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
172     return 0;
173   }
174   if (dense_op)
175     tmalloc(fsarow, int, ngens) else tmalloc(fsarow, int, 2 * ngens + 1)
176 
177         cstate = 0;
178   if (dense_op)
179     len = ngens; /* The length of the fsarow output. */
180   nt = 0;        /* Number of transitions in det */
181 
182   while (++cstate <= ht.num_recs) {
183     if (kbm_print_level >= 3) {
184       if ((cstate <= 1000 && cstate % 100 == 0) ||
185           (cstate <= 10000 && cstate % 1000 == 0) ||
186           (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
187         printf("    #cstate = %d;  number of states = %d.\n", cstate,
188                ht.num_recs);
189     }
190     cs_ptr = short_hash_rec(&ht, cstate);
191     cs_ptre = short_hash_rec(&ht, cstate) + short_hash_rec_len(&ht, cstate) - 1;
192     if (!dense_op)
193       len = 0;
194 
195     for (g1 = 1; g1 <= ngens; g1++) {
196       /* Calculate action of generator g1 on state cstate  - to get the image,
197        * we simply apply it to each state in the subset of states representing
198        * cstate.
199        */
200       ht_ptrb = ht.current_ptr;
201       ht_ptre = ht_ptrb - 1;
202       ptr = cs_ptr - 1;
203       while (++ptr <= cs_ptre) {
204         tableptr = table[*ptr];
205         tableptre = table[*ptr + 1];
206         while (tableptr < tableptre) {
207           if (*tableptr == g1) {
208             csi = *(tableptr + 1);
209             if (csi == 0) {
210               tableptr += 2;
211               continue;
212             }
213             if (ht_ptrb > ht_ptre || csi > *ht_ptre) {
214               /* We have a new state for the image subset to be added to the end
215                */
216               *(++ht_ptre) = csi;
217             }
218             else {
219               ht_chptr = ht_ptrb;
220               while (*ht_chptr < csi)
221                 ht_chptr++;
222               if (csi < *ht_chptr) {
223                 /* we have a new state for the image subset to be added in the
224                  * middle */
225                 ht_ptr = ++ht_ptre;
226                 while (ht_ptr > ht_chptr) {
227                   *ht_ptr = *(ht_ptr - 1);
228                   ht_ptr--;
229                 }
230                 *ht_ptr = csi;
231               }
232             }
233           }
234           tableptr += 2;
235         }
236       }
237       /* now perform epsilon closure of image subset */
238       ptr = ht_ptrb - 1;
239       if (eps_trans)
240         while (++ptr <= ht_ptre) {
241           tableptr = table[*ptr];
242           tableptre = table[*ptr + 1];
243           while (tableptr < tableptre) {
244             if (*tableptr == 0) {
245               csi = *(tableptr + 1);
246               if (csi == 0) {
247                 tableptr += 2;
248                 continue;
249               }
250               if (csi > *ht_ptre) {
251                 /* We have a new state for the image subset to be added to the
252                  * end */
253                 *(++ht_ptre) = csi;
254               }
255               else {
256                 ht_chptr = ht_ptrb;
257                 while (*ht_chptr < csi)
258                   ht_chptr++;
259                 if (csi < *ht_chptr) {
260                   /* we have a new state for the image subset to be added in the
261                    * middle */
262                   ht_ptr = ++ht_ptre;
263                   while (ht_ptr > ht_chptr) {
264                     *ht_ptr = *(ht_ptr - 1);
265                     ht_ptr--;
266                   }
267                   *ht_ptr = csi;
268                   if (ptr > ht_ptr)
269                     ptr = ht_ptr - 1;
270                 }
271               }
272             }
273             tableptr += 2;
274           }
275         }
276       im = short_hash_locate(&ht, ht_ptre - ht_ptrb + 1);
277       if (dense_op)
278         fsarow[g1 - 1] = im;
279       else if (im > 0) {
280         fsarow[++len] = g1;
281         fsarow[++len] = im;
282       }
283       if (im > 0)
284         nt++;
285     }
286     if (!dense_op)
287       fsarow[0] = len++;
288     fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
289   }
290   fclose(tempfile);
291 
292   ns = det->states->size = ht.num_recs;
293   det->table->numTransitions = nt;
294 
295   /* Locate the accept states. A state is an accept state if and only if
296    * the subset representing it contains an accept state of *fsaptr.
297    * Also, if subsets is true, record the states in the intlist field
298    * of det->states.
299    */
300   tmalloc(det->is_accepting, boolean, ns + 1);
301   for (i = 1; i <= ns; i++)
302     det->is_accepting[i] = FALSE;
303   if (subsets)
304     tmalloc(det->states->intlist, int *, ns + 1);
305   ct = 0;
306   for (cstate = 1; cstate <= ns; cstate++) {
307     len = short_hash_rec_len(&ht, cstate);
308     cs_ptr = short_hash_rec(&ht, cstate);
309     cs_ptre = short_hash_rec(&ht, cstate) + len - 1;
310     /* See if the set contains an accept-state */
311     ptr = cs_ptr - 1;
312     while (++ptr <= cs_ptre)
313       if (fsaptr->is_accepting[*ptr]) {
314         det->is_accepting[cstate] = TRUE;
315         ct++;
316         break;
317       }
318     if (subsets) {
319       tmalloc(det->states->intlist[cstate], int, len + 1);
320       det->states->intlist[cstate][0] = len;
321       n = 0;
322       ptr = cs_ptr - 1;
323       while (++ptr <= cs_ptre)
324         det->states->intlist[cstate][++n] = *ptr;
325     }
326   }
327 
328   det->num_accepting = ct;
329   if (ct == 1 || ct != ns) {
330     tmalloc(det->accepting, int, ct + 1);
331     ct = 0;
332     for (i = 1; i <= ns; i++)
333       if (det->is_accepting[i])
334         det->accepting[++ct] = i;
335   }
336   tfree(fsaptr->is_accepting);
337   tfree(det->is_accepting);
338   short_hash_clear(&ht);
339   tfree(fsarow);
340 
341   if (destroy)
342     fsa_clear(fsaptr);
343 
344   /* Now read the transition table back in */
345   tempfile = fopen(tempfilename, "r");
346   compressed_transitions_read(det, tempfile);
347   fclose(tempfile);
348 
349   unlink(tempfilename);
350 
351   return det;
352 }
353 
nfa_determinize_int(fsa * fsaptr,storage_type op_table_type,boolean eps_trans,boolean destroy,boolean subsets,char * tempfilename)354 static fsa *nfa_determinize_int(fsa *fsaptr, storage_type op_table_type,
355                                 boolean eps_trans, boolean destroy,
356                                 boolean subsets, char *tempfilename)
357 {
358   int **table, *tableptr, *tableptre, ngens, nssi, ns, *fsarow, nt, cstate, csi,
359       im, i, g1, len = 0, ct, n;
360   int *ht_ptr, *ht_chptr, *ht_ptrb, *ht_ptre, *cs_ptr, *cs_ptre, *ptr;
361   boolean dense_op;
362   hash_table ht;
363   fsa *det;
364   FILE *tempfile;
365 
366   if (kbm_print_level >= 3)
367     printf("    #Calling nfa_determinize_int.\n");
368 
369   tmalloc(det, fsa, 1);
370   fsa_init(det);
371   srec_copy(det->alphabet, fsaptr->alphabet);
372   det->flags[DFA] = TRUE;
373   det->flags[ACCESSIBLE] = TRUE;
374   det->flags[BFS] = TRUE;
375 
376   det->states->type = subsets ? LISTOFINTS : SIMPLE;
377 
378   det->table->table_type = op_table_type;
379   det->table->denserows = 0;
380   det->table->printing_format = op_table_type;
381 
382   if (fsaptr->num_initial == 0) {
383     det->num_initial = 0;
384     det->num_accepting = 0;
385     det->states->size = 0;
386     if (destroy)
387       fsa_clear(fsaptr);
388     return det;
389   }
390   ngens = det->alphabet->size;
391 
392   fsa_set_is_accepting(fsaptr);
393 
394   dense_op = op_table_type == DENSE;
395   table = fsaptr->table->table_data_ptr;
396 
397   det->num_initial = 1;
398   tmalloc(det->initial, int, 2);
399   det->initial[1] = 1;
400 
401   hash_init(&ht, FALSE, 0, 0, 0);
402   ht_ptrb = ht.current_ptr;
403   ht_ptre = ht_ptrb - 1;
404   nssi = fsaptr->num_initial;
405   for (i = 0; i < nssi; i++)
406     *(++ht_ptre) = fsaptr->initial[i + 1];
407   /* now perform epsilon closure of initial subset */
408   ptr = ht_ptrb - 1;
409   if (eps_trans)
410     while (++ptr <= ht_ptre) {
411       tableptr = table[*ptr];
412       tableptre = table[*ptr + 1];
413       while (tableptr < tableptre) {
414         if (*tableptr == 0) {
415           csi = *(tableptr + 1);
416           if (csi == 0) {
417             tableptr += 2;
418             continue;
419           }
420           if (csi > *ht_ptre) {
421             /* We have a new state for the image subset to be added to the end
422              */
423             *(++ht_ptre) = csi;
424           }
425           else {
426             ht_chptr = ht_ptrb;
427             while (*ht_chptr < csi)
428               ht_chptr++;
429             if (csi < *ht_chptr) {
430               /* we have a new state for the image subset to be added in the
431                * middle */
432               ht_ptr = ++ht_ptre;
433               while (ht_ptr > ht_chptr) {
434                 *ht_ptr = *(ht_ptr - 1);
435                 ht_ptr--;
436               }
437               *ht_ptr = csi;
438               if (ptr > ht_ptr)
439                 ptr = ht_ptr - 1;
440             }
441           }
442         }
443         tableptr += 2;
444       }
445     }
446   im = hash_locate(&ht, ht_ptre - ht_ptrb + 1);
447   /* Each state in 'det' will be represented as a subset of the set of states
448    * of *fsaptr. The initial state contains the initial states
449    * of *fsaptr.
450    * A subset is an accept-state if it contains an accept state of *fsaptr
451    * The subsets will be stored as variable-length records in the hash-table,
452    * always in increasing order.
453    */
454   if (im != 1) {
455     fprintf(stderr, "Hash-initialisation problem in nfa_determinize.\n");
456     return 0;
457   }
458   if ((tempfile = fopen(tempfilename, "w")) == 0) {
459     fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
460     return 0;
461   }
462   if (dense_op)
463     tmalloc(fsarow, int, ngens) else tmalloc(fsarow, int, 2 * ngens + 1)
464 
465         cstate = 0;
466   if (dense_op)
467     len = ngens; /* The length of the fsarow output. */
468   nt = 0;        /* Number of transitions in det */
469 
470   while (++cstate <= ht.num_recs) {
471     if (kbm_print_level >= 3) {
472       if ((cstate <= 1000 && cstate % 100 == 0) ||
473           (cstate <= 10000 && cstate % 1000 == 0) ||
474           (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
475         printf("    #cstate = %d;  number of states = %d.\n", cstate,
476                ht.num_recs);
477     }
478     cs_ptr = hash_rec(&ht, cstate);
479     cs_ptre = hash_rec(&ht, cstate) + hash_rec_len(&ht, cstate) - 1;
480     if (!dense_op)
481       len = 0;
482 
483     for (g1 = 1; g1 <= ngens; g1++) {
484       /* Calculate action of generator g1 on state cstate  - to get the image,
485        * we simply apply it to each state in the subset of states representing
486        * cstate.
487        */
488       ht_ptrb = ht.current_ptr;
489       ht_ptre = ht_ptrb - 1;
490       ptr = cs_ptr - 1;
491       while (++ptr <= cs_ptre) {
492         tableptr = table[*ptr];
493         tableptre = table[*ptr + 1];
494         while (tableptr < tableptre) {
495           if (*tableptr == g1) {
496             csi = *(tableptr + 1);
497             if (csi == 0) {
498               tableptr += 2;
499               continue;
500             }
501             if (ht_ptrb > ht_ptre || csi > *ht_ptre) {
502               /* We have a new state for the image subset to be added to the end
503                */
504               *(++ht_ptre) = csi;
505             }
506             else {
507               ht_chptr = ht_ptrb;
508               while (*ht_chptr < csi)
509                 ht_chptr++;
510               if (csi < *ht_chptr) {
511                 /* we have a new state for the image subset to be added in the
512                  * middle */
513                 ht_ptr = ++ht_ptre;
514                 while (ht_ptr > ht_chptr) {
515                   *ht_ptr = *(ht_ptr - 1);
516                   ht_ptr--;
517                 }
518                 *ht_ptr = csi;
519               }
520             }
521           }
522           tableptr += 2;
523         }
524       }
525       /* now perform epsilon closure of image subset */
526       ptr = ht_ptrb - 1;
527       if (eps_trans)
528         while (++ptr <= ht_ptre) {
529           tableptr = table[*ptr];
530           tableptre = table[*ptr + 1];
531           while (tableptr < tableptre) {
532             if (*tableptr == 0) {
533               csi = *(tableptr + 1);
534               if (csi == 0) {
535                 tableptr += 2;
536                 continue;
537               }
538               if (csi > *ht_ptre) {
539                 /* We have a new state for the image subset to be added to the
540                  * end */
541                 *(++ht_ptre) = csi;
542               }
543               else {
544                 ht_chptr = ht_ptrb;
545                 while (*ht_chptr < csi)
546                   ht_chptr++;
547                 if (csi < *ht_chptr) {
548                   /* we have a new state for the image subset to be added in the
549                    * middle */
550                   ht_ptr = ++ht_ptre;
551                   while (ht_ptr > ht_chptr) {
552                     *ht_ptr = *(ht_ptr - 1);
553                     ht_ptr--;
554                   }
555                   *ht_ptr = csi;
556                   if (ptr > ht_ptr)
557                     ptr = ht_ptr - 1;
558                 }
559               }
560             }
561             tableptr += 2;
562           }
563         }
564       im = hash_locate(&ht, ht_ptre - ht_ptrb + 1);
565       if (dense_op)
566         fsarow[g1 - 1] = im;
567       else if (im > 0) {
568         fsarow[++len] = g1;
569         fsarow[++len] = im;
570       }
571       if (im > 0)
572         nt++;
573     }
574     if (!dense_op)
575       fsarow[0] = len++;
576     fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
577   }
578   fclose(tempfile);
579 
580   ns = det->states->size = ht.num_recs;
581   det->table->numTransitions = nt;
582 
583   /* Locate the accept states. A state is an accept state if and only if
584    * the subset representing it contains an accept state of *fsaptr.
585    * Also, if subsets is true, record the states in the intlist field
586    * of det->states.
587    */
588   tmalloc(det->is_accepting, boolean, ns + 1);
589   for (i = 1; i <= ns; i++)
590     det->is_accepting[i] = FALSE;
591   if (subsets)
592     tmalloc(det->states->intlist, int *, ns + 1);
593   ct = 0;
594   for (cstate = 1; cstate <= ns; cstate++) {
595     len = hash_rec_len(&ht, cstate);
596     cs_ptr = hash_rec(&ht, cstate);
597     cs_ptre = hash_rec(&ht, cstate) + len - 1;
598     /* See if the set contains an accept-state */
599     ptr = cs_ptr - 1;
600     while (++ptr <= cs_ptre)
601       if (fsaptr->is_accepting[*ptr]) {
602         det->is_accepting[cstate] = TRUE;
603         ct++;
604         break;
605       }
606     if (subsets) {
607       tmalloc(det->states->intlist[cstate], int, len + 1);
608       det->states->intlist[cstate][0] = len;
609       n = 0;
610       ptr = cs_ptr - 1;
611       while (++ptr <= cs_ptre)
612         det->states->intlist[cstate][++n] = *ptr;
613     }
614   }
615 
616   det->num_accepting = ct;
617   if (ct == 1 || ct != ns) {
618     tmalloc(det->accepting, int, ct + 1);
619     ct = 0;
620     for (i = 1; i <= ns; i++)
621       if (det->is_accepting[i])
622         det->accepting[++ct] = i;
623   }
624   tfree(fsaptr->is_accepting);
625   tfree(det->is_accepting);
626   hash_clear(&ht);
627   tfree(fsarow);
628 
629   if (destroy)
630     fsa_clear(fsaptr);
631 
632   /* Now read the transition table back in */
633   tempfile = fopen(tempfilename, "r");
634   compressed_transitions_read(det, tempfile);
635   fclose(tempfile);
636 
637   unlink(tempfilename);
638 
639   return det;
640 }
641 
642 /* Version of nfa_determinize for transtition tables stored compactly.
643  * FOR THE MOMENT WE ASSUME NO EPSILON TRANSITIONS
644  * The fsa *fsaptr must be an fsa with compact table.
645  * The returned fsa accepts the same language but is deterministic.
646  */
nfa_cdeterminize(fsa * fsaptr,storage_type op_table_type,boolean destroy,char * tempfilename)647 fsa *nfa_cdeterminize(fsa *fsaptr, storage_type op_table_type, boolean destroy,
648                       char *tempfilename)
649 {
650   if (kbm_print_level >= 3)
651     printf("    #Calling nfa_determinize.\n");
652   if (fsaptr->states->size < MAXUSHORT)
653     return nfa_cdeterminize_short(fsaptr, op_table_type, destroy, tempfilename);
654   else
655     return nfa_cdeterminize_int(fsaptr, op_table_type, destroy, tempfilename);
656 }
657 
nfa_cdeterminize_short(fsa * fsaptr,storage_type op_table_type,boolean destroy,char * tempfilename)658 static fsa *nfa_cdeterminize_short(fsa *fsaptr, storage_type op_table_type,
659                                    boolean destroy, char *tempfilename)
660 {
661   int ngens, nssi, ns, *fsarow, nt, cstate, csi, im, i, g1, len = 0, ct;
662   unsigned int **table, *tableptr, *tableptre, g1shift;
663   unsigned short *ht_ptr, *ht_chptr, *ht_ptrb, *ht_ptre, *cs_ptr, *cs_ptre,
664       *ptr;
665   boolean dense_op;
666   short_hash_table ht;
667   fsa *det;
668   FILE *tempfile;
669 
670   if (kbm_print_level >= 3)
671     printf("    #Calling nfa_determinize_short.\n");
672 
673   tmalloc(det, fsa, 1);
674   fsa_init(det);
675   srec_copy(det->alphabet, fsaptr->alphabet);
676   det->flags[DFA] = TRUE;
677   det->flags[ACCESSIBLE] = TRUE;
678   det->flags[BFS] = TRUE;
679 
680   det->states->type = SIMPLE;
681 
682   det->table->table_type = op_table_type;
683   det->table->denserows = 0;
684   det->table->printing_format = op_table_type;
685 
686   if (fsaptr->num_initial == 0) {
687     det->num_initial = 0;
688     det->num_accepting = 0;
689     det->states->size = 0;
690     if (destroy)
691       fsa_clear(fsaptr);
692     return det;
693   }
694   ngens = det->alphabet->size;
695 
696   fsa_set_is_accepting(fsaptr);
697 
698   dense_op = op_table_type == DENSE;
699   table = fsaptr->table->ctable_data_ptr;
700 
701   det->num_initial = 1;
702   tmalloc(det->initial, int, 2);
703   det->initial[1] = 1;
704 
705   short_hash_init(&ht, FALSE, 0, 0, 0);
706   ht_ptrb = ht.current_ptr;
707   ht_ptre = ht_ptrb - 1;
708   nssi = fsaptr->num_initial;
709   for (i = 0; i < nssi; i++)
710     *(++ht_ptre) = fsaptr->initial[i + 1];
711   im = short_hash_locate(&ht, ht_ptre - ht_ptrb + 1);
712   /* Each state in 'det' will be represented as a subset of the set of states
713    * of *fsaptr. The initial state contains the initial states
714    * of *fsaptr.
715    * A subset is an accept-state if it contains an accept state of *fsaptr
716    * The subsets will be stored as variable-length records in the hash-table,
717    * always in increasing order.
718    */
719   if (im != 1) {
720     fprintf(stderr, "Hash-initialisation problem in nfa_determinize.\n");
721     return 0;
722   }
723   if ((tempfile = fopen(tempfilename, "w")) == 0) {
724     fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
725     return 0;
726   }
727   if (dense_op)
728     tmalloc(fsarow, int, ngens) else tmalloc(fsarow, int, 2 * ngens + 1)
729 
730         cstate = 0;
731   if (dense_op)
732     len = ngens; /* The length of the fsarow output. */
733   nt = 0;        /* Number of transitions in det */
734 
735   while (++cstate <= ht.num_recs) {
736     if (kbm_print_level >= 3) {
737       if ((cstate <= 1000 && cstate % 100 == 0) ||
738           (cstate <= 10000 && cstate % 1000 == 0) ||
739           (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
740         printf("    #cstate = %d;  number of states = %d.\n", cstate,
741                ht.num_recs);
742     }
743     cs_ptr = short_hash_rec(&ht, cstate);
744     cs_ptre = short_hash_rec(&ht, cstate) + short_hash_rec_len(&ht, cstate) - 1;
745     if (!dense_op)
746       len = 0;
747 
748     for (g1 = 1; g1 <= ngens; g1++) {
749       /* Calculate action of generator g1 on state cstate  - to get the image,
750        * we simply apply it to each state in the subset of states representing
751        * cstate.
752        */
753       g1shift = ((unsigned)g1) << SHIFT;
754       ht_ptrb = ht.current_ptr;
755       ht_ptre = ht_ptrb - 1;
756       ptr = cs_ptr - 1;
757       while (++ptr <= cs_ptre) {
758         tableptr = table[*ptr];
759         tableptre = table[*ptr + 1];
760         while (tableptr < tableptre) {
761           if ((*tableptr & MASKL) == g1shift) {
762             csi = *tableptr & MASKR;
763             if (csi == 0)
764               continue;
765             if (ht_ptrb > ht_ptre || csi > *ht_ptre) {
766               /* We have a new state for the image subset to be added to the end
767                */
768               *(++ht_ptre) = csi;
769             }
770             else {
771               ht_chptr = ht_ptrb;
772               while (*ht_chptr < csi)
773                 ht_chptr++;
774               if (csi < *ht_chptr) {
775                 /* we have a new state for the image subset to be added in the
776                  * middle */
777                 ht_ptr = ++ht_ptre;
778                 while (ht_ptr > ht_chptr) {
779                   *ht_ptr = *(ht_ptr - 1);
780                   ht_ptr--;
781                 }
782                 *ht_ptr = csi;
783               }
784             }
785           }
786           tableptr++;
787         }
788       }
789       im = short_hash_locate(&ht, ht_ptre - ht_ptrb + 1);
790       if (dense_op)
791         fsarow[g1 - 1] = im;
792       else if (im > 0) {
793         fsarow[++len] = g1;
794         fsarow[++len] = im;
795       }
796       if (im > 0)
797         nt++;
798     }
799     if (!dense_op)
800       fsarow[0] = len++;
801     fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
802   }
803   fclose(tempfile);
804 
805   ns = det->states->size = ht.num_recs;
806   det->table->numTransitions = nt;
807 
808   /* Locate the accept states. A state is an accept state if and only if
809    * the subset representing it contains an accept state of *fsaptr.
810    */
811   tmalloc(det->is_accepting, boolean, ns + 1);
812   for (i = 1; i <= ns; i++)
813     det->is_accepting[i] = FALSE;
814   ct = 0;
815   for (cstate = 1; cstate <= ns; cstate++) {
816     cs_ptr = short_hash_rec(&ht, cstate);
817     cs_ptre = short_hash_rec(&ht, cstate) + short_hash_rec_len(&ht, cstate) - 1;
818     /* See if the set contains an accept-state */
819     ptr = cs_ptr - 1;
820     while (++ptr <= cs_ptre)
821       if (fsaptr->is_accepting[*ptr]) {
822         det->is_accepting[cstate] = TRUE;
823         ct++;
824         break;
825       }
826   }
827 
828   det->num_accepting = ct;
829   if (ct == 1 || ct != ns) {
830     tmalloc(det->accepting, int, ct + 1);
831     ct = 0;
832     for (i = 1; i <= ns; i++)
833       if (det->is_accepting[i])
834         det->accepting[++ct] = i;
835   }
836   tfree(fsaptr->is_accepting);
837   tfree(det->is_accepting);
838   short_hash_clear(&ht);
839   tfree(fsarow);
840 
841   if (destroy)
842     fsa_clear(fsaptr);
843 
844   /* Now read the transition table back in */
845   tempfile = fopen(tempfilename, "r");
846   compressed_transitions_read(det, tempfile);
847   fclose(tempfile);
848 
849   unlink(tempfilename);
850 
851   return det;
852 }
853 
nfa_cdeterminize_int(fsa * fsaptr,storage_type op_table_type,boolean destroy,char * tempfilename)854 static fsa *nfa_cdeterminize_int(fsa *fsaptr, storage_type op_table_type,
855                                  boolean destroy, char *tempfilename)
856 {
857   int ngens, nssi, ns, *fsarow, nt, cstate, csi, im, i, g1, len = 0, ct;
858   unsigned int **table, *tableptr, *tableptre, g1shift;
859   int *ht_ptr, *ht_chptr, *ht_ptrb, *ht_ptre, *cs_ptr, *cs_ptre, *ptr;
860   boolean dense_op;
861   hash_table ht;
862   fsa *det;
863   FILE *tempfile;
864 
865   if (kbm_print_level >= 3)
866     printf("    #Calling nfa_determinize_short.\n");
867 
868   tmalloc(det, fsa, 1);
869   fsa_init(det);
870   srec_copy(det->alphabet, fsaptr->alphabet);
871   det->flags[DFA] = TRUE;
872   det->flags[ACCESSIBLE] = TRUE;
873   det->flags[BFS] = TRUE;
874 
875   det->states->type = SIMPLE;
876 
877   det->table->table_type = op_table_type;
878   det->table->denserows = 0;
879   det->table->printing_format = op_table_type;
880 
881   if (fsaptr->num_initial == 0) {
882     det->num_initial = 0;
883     det->num_accepting = 0;
884     det->states->size = 0;
885     if (destroy)
886       fsa_clear(fsaptr);
887     return det;
888   }
889   ngens = det->alphabet->size;
890 
891   fsa_set_is_accepting(fsaptr);
892 
893   dense_op = op_table_type == DENSE;
894   table = fsaptr->table->ctable_data_ptr;
895 
896   det->num_initial = 1;
897   tmalloc(det->initial, int, 2);
898   det->initial[1] = 1;
899 
900   hash_init(&ht, FALSE, 0, 0, 0);
901   ht_ptrb = ht.current_ptr;
902   ht_ptre = ht_ptrb - 1;
903   nssi = fsaptr->num_initial;
904   for (i = 0; i < nssi; i++)
905     *(++ht_ptre) = fsaptr->initial[i + 1];
906   im = hash_locate(&ht, ht_ptre - ht_ptrb + 1);
907   /* Each state in 'det' will be represented as a subset of the set of states
908    * of *fsaptr. The initial state contains the initial states
909    * of *fsaptr.
910    * A subset is an accept-state if it contains an accept state of *fsaptr
911    * The subsets will be stored as variable-length records in the hash-table,
912    * always in increasing order.
913    */
914   if (im != 1) {
915     fprintf(stderr, "Hash-initialisation problem in nfa_determinize.\n");
916     return 0;
917   }
918   if ((tempfile = fopen(tempfilename, "w")) == 0) {
919     fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
920     return 0;
921   }
922   if (dense_op)
923     tmalloc(fsarow, int, ngens) else tmalloc(fsarow, int, 2 * ngens + 1)
924 
925         cstate = 0;
926   if (dense_op)
927     len = ngens; /* The length of the fsarow output. */
928   nt = 0;        /* Number of transitions in det */
929 
930   while (++cstate <= ht.num_recs) {
931     if (kbm_print_level >= 3) {
932       if ((cstate <= 1000 && cstate % 100 == 0) ||
933           (cstate <= 10000 && cstate % 1000 == 0) ||
934           (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
935         printf("    #cstate = %d;  number of states = %d.\n", cstate,
936                ht.num_recs);
937     }
938     cs_ptr = hash_rec(&ht, cstate);
939     cs_ptre = hash_rec(&ht, cstate) + hash_rec_len(&ht, cstate) - 1;
940     if (!dense_op)
941       len = 0;
942 
943     for (g1 = 1; g1 <= ngens; g1++) {
944       /* Calculate action of generator g1 on state cstate  - to get the image,
945        * we simply apply it to each state in the subset of states representing
946        * cstate.
947        */
948       g1shift = ((unsigned)g1) << SHIFT;
949       ht_ptrb = ht.current_ptr;
950       ht_ptre = ht_ptrb - 1;
951       ptr = cs_ptr - 1;
952       while (++ptr <= cs_ptre) {
953         tableptr = table[*ptr];
954         tableptre = table[*ptr + 1];
955         while (tableptr < tableptre) {
956           if ((*tableptr & MASKL) == g1shift) {
957             csi = *tableptr & MASKR;
958             if (csi == 0)
959               continue;
960             if (ht_ptrb > ht_ptre || csi > *ht_ptre) {
961               /* We have a new state for the image subset to be added to the end
962                */
963               *(++ht_ptre) = csi;
964             }
965             else {
966               ht_chptr = ht_ptrb;
967               while (*ht_chptr < csi)
968                 ht_chptr++;
969               if (csi < *ht_chptr) {
970                 /* we have a new state for the image subset to be added in the
971                  * middle */
972                 ht_ptr = ++ht_ptre;
973                 while (ht_ptr > ht_chptr) {
974                   *ht_ptr = *(ht_ptr - 1);
975                   ht_ptr--;
976                 }
977                 *ht_ptr = csi;
978               }
979             }
980           }
981           tableptr++;
982         }
983       }
984       im = hash_locate(&ht, ht_ptre - ht_ptrb + 1);
985       if (dense_op)
986         fsarow[g1 - 1] = im;
987       else if (im > 0) {
988         fsarow[++len] = g1;
989         fsarow[++len] = im;
990       }
991       if (im > 0)
992         nt++;
993     }
994     if (!dense_op)
995       fsarow[0] = len++;
996     fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
997   }
998   fclose(tempfile);
999 
1000   ns = det->states->size = ht.num_recs;
1001   det->table->numTransitions = nt;
1002 
1003   /* Locate the accept states. A state is an accept state if and only if
1004    * the subset representing it contains an accept state of *fsaptr.
1005    */
1006   tmalloc(det->is_accepting, boolean, ns + 1);
1007   for (i = 1; i <= ns; i++)
1008     det->is_accepting[i] = FALSE;
1009   ct = 0;
1010   for (cstate = 1; cstate <= ns; cstate++) {
1011     cs_ptr = hash_rec(&ht, cstate);
1012     cs_ptre = hash_rec(&ht, cstate) + hash_rec_len(&ht, cstate) - 1;
1013     /* See if the set contains an accept-state */
1014     ptr = cs_ptr - 1;
1015     while (++ptr <= cs_ptre)
1016       if (fsaptr->is_accepting[*ptr]) {
1017         det->is_accepting[cstate] = TRUE;
1018         ct++;
1019         break;
1020       }
1021   }
1022 
1023   det->num_accepting = ct;
1024   if (ct == 1 || ct != ns) {
1025     tmalloc(det->accepting, int, ct + 1);
1026     ct = 0;
1027     for (i = 1; i <= ns; i++)
1028       if (det->is_accepting[i])
1029         det->accepting[++ct] = i;
1030   }
1031   tfree(fsaptr->is_accepting);
1032   tfree(det->is_accepting);
1033   hash_clear(&ht);
1034   tfree(fsarow);
1035 
1036   if (destroy)
1037     fsa_clear(fsaptr);
1038 
1039   /* Now read the transition table back in */
1040   tempfile = fopen(tempfilename, "r");
1041   compressed_transitions_read(det, tempfile);
1042   fclose(tempfile);
1043 
1044   unlink(tempfilename);
1045 
1046   return det;
1047 }
1048 
1049 /* *fsaptr must be a 2-variable (non-deterministic) fsa.
1050  * For the moment we assume it has no epsilon transitions.
1051  * The returned fsa is deterministic and accepts a word w_1 iff (w_1,w_2) is
1052  * accepted by *fsaptr, for some w_2 (with the usual padding conventions).
1053  * In fact, nfa_exists calls nfa_exists_int or nfa_exists_short,
1054  * the latter using the short hash table functions.
1055  */
nfa_exists(fsa * fsaptr,storage_type op_table_type,boolean destroy,char * tempfilename)1056 fsa *nfa_exists(fsa *fsaptr, storage_type op_table_type, boolean destroy,
1057                 char *tempfilename)
1058 {
1059   if (kbm_print_level >= 3)
1060     printf("    #Calling nfa_exists.\n");
1061   if (fsaptr->states->size < MAXUSHORT)
1062     return nfa_exists_short(fsaptr, op_table_type, destroy, tempfilename);
1063   else
1064     return nfa_exists_int(fsaptr, op_table_type, destroy, tempfilename);
1065 }
1066 
nfa_exists_short(fsa * fsaptr,storage_type op_table_type,boolean destroy,char * tempfilename)1067 fsa *nfa_exists_short(fsa *fsaptr, storage_type op_table_type, boolean destroy,
1068                       char *tempfilename)
1069 {
1070   int **table, ne, *tableptr, *tableptre, ngens, ns, *fsarow, es, ef, nt,
1071       cstate, csi, im, i, k, g1, len = 0, ct;
1072   unsigned short *ht_ptr, *ht_chptr, *ht_ptrb, *ht_ptre, *cs_ptr, *cs_ptre,
1073       *ptr;
1074   boolean dense_op, got;
1075   short_hash_table ht;
1076   fsa *exists;
1077   FILE *tempfile;
1078 
1079   if (kbm_print_level >= 3)
1080     printf("    #Calling nfa_exists_short.\n");
1081 
1082   if (fsaptr->alphabet->type != PRODUCT || fsaptr->alphabet->arity != 2) {
1083     fprintf(stderr, "Error in nfa_exists: fsa must be 2-variable.\n");
1084     return 0;
1085   }
1086 
1087   tmalloc(exists, fsa, 1);
1088   fsa_init(exists);
1089   srec_copy(exists->alphabet, fsaptr->alphabet->base);
1090   exists->flags[DFA] = TRUE;
1091   exists->flags[ACCESSIBLE] = TRUE;
1092   exists->flags[BFS] = TRUE;
1093 
1094   exists->states->type = SIMPLE;
1095 
1096   exists->table->table_type = op_table_type;
1097   exists->table->denserows = 0;
1098   exists->table->printing_format = op_table_type;
1099 
1100   if (fsaptr->num_initial == 0) {
1101     exists->num_initial = 0;
1102     exists->num_accepting = 0;
1103     exists->states->size = 0;
1104     if (destroy)
1105       fsa_clear(fsaptr);
1106     return exists;
1107   }
1108   ne = fsaptr->alphabet->size;
1109   ngens = exists->alphabet->size;
1110 
1111   if (ne != (ngens + 1) * (ngens + 1) - 1) {
1112     fprintf(stderr, "Error: in a 2-variable fsa, alphabet size should = "
1113                     "(ngens+1)^2 - 1.\n");
1114     return 0;
1115   }
1116 
1117   fsa_set_is_accepting(fsaptr);
1118 
1119   dense_op = op_table_type == DENSE;
1120   table = fsaptr->table->table_data_ptr;
1121 
1122   exists->num_initial = 1;
1123   tmalloc(exists->initial, int, 2);
1124   exists->initial[1] = 1;
1125 
1126   short_hash_init(&ht, FALSE, 0, 0, 0);
1127   ht_ptr = ht.current_ptr;
1128   k = fsaptr->num_initial;
1129   for (i = 1; i <= k; i++)
1130     ht_ptr[i - 1] = fsaptr->initial[i];
1131   im = short_hash_locate(&ht, k);
1132   /* Each state in 'exists' will be represented as a subset of the set of states
1133    * of *fsaptr. The initial state consists of the initial states
1134    * of *fsaptr. A subset is an accept-state if it contains an accept state of
1135    * *fsaptr, or if one can get to an accept-state by applying elements
1136    * ($,x) where $ is the padding symbol.
1137    * The subsets will be stored as variable-length records in the hash-table,
1138    * always in increasing order.
1139    */
1140   if (im != 1) {
1141     fprintf(stderr, "Hash-initialisation problem in nfa_exists.\n");
1142     return 0;
1143   }
1144   if ((tempfile = fopen(tempfilename, "w")) == 0) {
1145     fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
1146     return 0;
1147   }
1148   if (dense_op)
1149     tmalloc(fsarow, int, ngens) else tmalloc(fsarow, int, 2 * ngens + 1)
1150 
1151         cstate = 0;
1152   if (dense_op)
1153     len = ngens; /* The length of the fsarow output. */
1154   nt = 0;        /* Number of transitions in exists */
1155 
1156   while (++cstate <= ht.num_recs) {
1157     if (kbm_print_level >= 3) {
1158       if ((cstate <= 1000 && cstate % 100 == 0) ||
1159           (cstate <= 10000 && cstate % 1000 == 0) ||
1160           (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
1161         printf("    #cstate = %d;  number of states = %d.\n", cstate,
1162                ht.num_recs);
1163     }
1164     cs_ptr = short_hash_rec(&ht, cstate);
1165     cs_ptre = short_hash_rec(&ht, cstate) + short_hash_rec_len(&ht, cstate) - 1;
1166     if (!dense_op)
1167       len = 0;
1168 
1169     for (g1 = 1; g1 <= ngens; g1++) {
1170       /* Calculate action of generator g1 on state cstate  - to get the image,
1171        * we have to apply (g1,g2) to each element in the subset corresponding to
1172        * cstate, and this for each generator g2 of the base-alphabet (including
1173        * the padding symbol).
1174        */
1175       ht_ptrb = ht.current_ptr;
1176       ht_ptre = ht_ptrb - 1;
1177       ptr = cs_ptr - 1;
1178       es = (g1 - 1) * (ngens + 1) + 1;
1179       ef = g1 * (ngens + 1);
1180       /* As g2 ranges from 1 to ngens+1 in the pair (g1,g2), for fixed g1, the
1181        * corresponding edge number in the fsa ranges from es to ef.
1182        */
1183       while (++ptr <= cs_ptre) {
1184         tableptr = table[*ptr];
1185         tableptre = table[*ptr + 1];
1186         while (tableptr < tableptre) {
1187           if (*tableptr >= es && *tableptr <= ef) {
1188             csi = *(tableptr + 1);
1189             if (csi == 0)
1190               continue;
1191             if (ht_ptrb > ht_ptre || csi > *ht_ptre) {
1192               /* We have a new state for the image subset to be added to the end
1193                */
1194               *(++ht_ptre) = csi;
1195             }
1196             else {
1197               ht_chptr = ht_ptrb;
1198               while (*ht_chptr < csi)
1199                 ht_chptr++;
1200               if (csi < *ht_chptr) {
1201                 /* we have a new state for the image subset to be added in the
1202                  * middle */
1203                 ht_ptr = ++ht_ptre;
1204                 while (ht_ptr > ht_chptr) {
1205                   *ht_ptr = *(ht_ptr - 1);
1206                   ht_ptr--;
1207                 }
1208                 *ht_ptr = csi;
1209               }
1210             }
1211           }
1212           tableptr += 2;
1213         }
1214       }
1215       im = short_hash_locate(&ht, ht_ptre - ht_ptrb + 1);
1216       if (dense_op)
1217         fsarow[g1 - 1] = im;
1218       else if (im > 0) {
1219         fsarow[++len] = g1;
1220         fsarow[++len] = im;
1221       }
1222       if (im > 0)
1223         nt++;
1224     }
1225     if (!dense_op)
1226       fsarow[0] = len++;
1227     fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
1228   }
1229   fclose(tempfile);
1230 
1231   ns = exists->states->size = ht.num_recs;
1232   exists->table->numTransitions = nt;
1233 
1234   /* Locate the accept states. This is slightly cumbersome, since a state
1235    * is an accept state if either the corresponding subset contains an
1236    * accept state of *fsaptr, or we can get from some such state to an
1237    * accept state by applying elements ($,x).
1238    * We will need to use the array exists->is_accepting.
1239    */
1240   tmalloc(exists->is_accepting, boolean, ns + 1);
1241   for (i = 1; i <= ns; i++)
1242     exists->is_accepting[i] = FALSE;
1243   ct = 0;
1244   for (cstate = ns; cstate >= 1; cstate--) {
1245     /* We work backwards through the states, since we wish to add on additional
1246      * elements at the end of the list in the hash-table - this destroys
1247      * later entries, but that doesn't matter, since we have already dealt
1248      * with them.
1249      */
1250     cs_ptr = short_hash_rec(&ht, cstate);
1251     cs_ptre = short_hash_rec(&ht, cstate) + short_hash_rec_len(&ht, cstate) - 1;
1252     /* First see if the set itself contains an accept-state */
1253     ptr = cs_ptr - 1;
1254     while (++ptr <= cs_ptre)
1255       if (fsaptr->is_accepting[*ptr]) {
1256         exists->is_accepting[cstate] = TRUE;
1257         ct++;
1258         break;
1259       }
1260     if (exists->is_accepting[cstate])
1261       continue;
1262     /* Next apply generators ($,x) and see if we get anything new, and
1263      * if it is an accept state.
1264      * Anything new is added to the end of the list - we don't need to
1265      * bother about having them in increasing order this time.
1266      */
1267     es = ngens * (ngens + 1) + 1;
1268     ef = (ngens + 1) * (ngens + 1) - 1;
1269     ptr = cs_ptr - 1;
1270     while (++ptr <= cs_ptre) {
1271       tableptr = table[*ptr];
1272       tableptre = table[*ptr + 1];
1273       while (tableptr < tableptre) {
1274         if (*tableptr >= es && *tableptr <= ef) {
1275           csi = *(tableptr + 1);
1276           if (csi == 0)
1277             continue;
1278           if (fsaptr->is_accepting[csi]) {
1279             exists->is_accepting[cstate] = TRUE;
1280             ct++;
1281             break;
1282           }
1283           /* see if csi is new */
1284           ht_chptr = cs_ptr - 1;
1285           got = FALSE;
1286           while (++ht_chptr <= cs_ptre)
1287             if (csi == *ht_chptr) {
1288               got = TRUE;
1289               break;
1290             }
1291           if (!got)
1292             /* add csi to the end */
1293             *(++cs_ptre) = csi;
1294         }
1295         tableptr += 2;
1296       }
1297       if (exists->is_accepting[cstate])
1298         break;
1299     }
1300   }
1301 
1302   exists->num_accepting = ct;
1303   if (ct == 1 || ct != ns) {
1304     tmalloc(exists->accepting, int, ct + 1);
1305     ct = 0;
1306     for (i = 1; i <= ns; i++)
1307       if (exists->is_accepting[i])
1308         exists->accepting[++ct] = i;
1309   }
1310   tfree(fsaptr->is_accepting);
1311   tfree(exists->is_accepting);
1312   short_hash_clear(&ht);
1313   tfree(fsarow);
1314 
1315   if (destroy)
1316     fsa_clear(fsaptr);
1317 
1318   /* Now read the transition table back in */
1319   tempfile = fopen(tempfilename, "r");
1320   compressed_transitions_read(exists, tempfile);
1321   fclose(tempfile);
1322 
1323   unlink(tempfilename);
1324 
1325   return exists;
1326 }
1327 
nfa_exists_int(fsa * fsaptr,storage_type op_table_type,boolean destroy,char * tempfilename)1328 fsa *nfa_exists_int(fsa *fsaptr, storage_type op_table_type, boolean destroy,
1329                     char *tempfilename)
1330 {
1331   int **table, ne, *tableptr, *tableptre, ngens, ns, *fsarow, es, ef, nt,
1332       cstate, csi, im, i, k, g1, len = 0, ct;
1333   int *ht_ptr, *ht_chptr, *ht_ptrb, *ht_ptre, *cs_ptr, *cs_ptre, *ptr;
1334   boolean dense_op, got;
1335   hash_table ht;
1336   fsa *exists;
1337   FILE *tempfile;
1338 
1339   if (kbm_print_level >= 3)
1340     printf("    #Calling nfa_exists_int.\n");
1341 
1342   if (fsaptr->alphabet->type != PRODUCT || fsaptr->alphabet->arity != 2) {
1343     fprintf(stderr, "Error in nfa_exists: fsa must be 2-variable.\n");
1344     return 0;
1345   }
1346 
1347   tmalloc(exists, fsa, 1);
1348   fsa_init(exists);
1349   srec_copy(exists->alphabet, fsaptr->alphabet->base);
1350   exists->flags[DFA] = TRUE;
1351   exists->flags[ACCESSIBLE] = TRUE;
1352   exists->flags[BFS] = TRUE;
1353 
1354   exists->states->type = SIMPLE;
1355 
1356   exists->table->table_type = op_table_type;
1357   exists->table->denserows = 0;
1358   exists->table->printing_format = op_table_type;
1359 
1360   if (fsaptr->num_initial == 0) {
1361     exists->num_initial = 0;
1362     exists->num_accepting = 0;
1363     exists->states->size = 0;
1364     if (destroy)
1365       fsa_clear(fsaptr);
1366     return exists;
1367   }
1368   ne = fsaptr->alphabet->size;
1369   ngens = exists->alphabet->size;
1370 
1371   if (ne != (ngens + 1) * (ngens + 1) - 1) {
1372     fprintf(stderr, "Error: in a 2-variable fsa, alphabet size should = "
1373                     "(ngens+1)^2 - 1.\n");
1374     return 0;
1375   }
1376 
1377   fsa_set_is_accepting(fsaptr);
1378 
1379   dense_op = op_table_type == DENSE;
1380   table = fsaptr->table->table_data_ptr;
1381 
1382   exists->num_initial = 1;
1383   tmalloc(exists->initial, int, 2);
1384   exists->initial[1] = 1;
1385 
1386   hash_init(&ht, FALSE, 0, 0, 0);
1387   ht_ptr = ht.current_ptr;
1388   k = fsaptr->num_initial;
1389   for (i = 1; i <= k; i++)
1390     ht_ptr[i - 1] = fsaptr->initial[i];
1391   im = hash_locate(&ht, k);
1392   /* Each state in 'exists' will be represented as a subset of the set of states
1393    * of *fsaptr. The initial state consists of the initial states
1394    * of *fsaptr. A subset is an accept-state if it contains an accept state of
1395    * *fsaptr, or if one can get to an accept-state by applying elements
1396    * ($,x) where $ is the padding symbol.
1397    * The subsets will be stored as variable-length records in the hash-table,
1398    * always in increasing order.
1399    */
1400   if (im != 1) {
1401     fprintf(stderr, "Hash-initialisation problem in nfa_exists.\n");
1402     return 0;
1403   }
1404   if ((tempfile = fopen(tempfilename, "w")) == 0) {
1405     fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
1406     return 0;
1407   }
1408   if (dense_op)
1409     tmalloc(fsarow, int, ngens) else tmalloc(fsarow, int, 2 * ngens + 1)
1410 
1411         cstate = 0;
1412   if (dense_op)
1413     len = ngens; /* The length of the fsarow output. */
1414   nt = 0;        /* Number of transitions in exists */
1415 
1416   while (++cstate <= ht.num_recs) {
1417     if (kbm_print_level >= 3) {
1418       if ((cstate <= 1000 && cstate % 100 == 0) ||
1419           (cstate <= 10000 && cstate % 1000 == 0) ||
1420           (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
1421         printf("    #cstate = %d;  number of states = %d.\n", cstate,
1422                ht.num_recs);
1423     }
1424     cs_ptr = hash_rec(&ht, cstate);
1425     cs_ptre = hash_rec(&ht, cstate) + hash_rec_len(&ht, cstate) - 1;
1426     if (!dense_op)
1427       len = 0;
1428 
1429     for (g1 = 1; g1 <= ngens; g1++) {
1430       /* Calculate action of generator g1 on state cstate  - to get the image,
1431        * we have to apply (g1,g2) to each element in the subset corresponding to
1432        * cstate, and this for each generator g2 of the base-alphabet (including
1433        * the padding symbol).
1434        */
1435       ht_ptrb = ht.current_ptr;
1436       ht_ptre = ht_ptrb - 1;
1437       ptr = cs_ptr - 1;
1438       es = (g1 - 1) * (ngens + 1) + 1;
1439       ef = g1 * (ngens + 1);
1440       /* As g2 ranges from 1 to ngens+1 in the pair (g1,g2), for fixed g1, the
1441        * corresponding edge number in the fsa ranges from es to ef.
1442        */
1443       while (++ptr <= cs_ptre) {
1444         tableptr = table[*ptr];
1445         tableptre = table[*ptr + 1];
1446         while (tableptr < tableptre) {
1447           if (*tableptr >= es && *tableptr <= ef) {
1448             csi = *(tableptr + 1);
1449             if (csi == 0)
1450               continue;
1451             if (ht_ptrb > ht_ptre || csi > *ht_ptre) {
1452               /* We have a new state for the image subset to be added to the end
1453                */
1454               *(++ht_ptre) = csi;
1455             }
1456             else {
1457               ht_chptr = ht_ptrb;
1458               while (*ht_chptr < csi)
1459                 ht_chptr++;
1460               if (csi < *ht_chptr) {
1461                 /* we have a new state for the image subset to be added in the
1462                  * middle */
1463                 ht_ptr = ++ht_ptre;
1464                 while (ht_ptr > ht_chptr) {
1465                   *ht_ptr = *(ht_ptr - 1);
1466                   ht_ptr--;
1467                 }
1468                 *ht_ptr = csi;
1469               }
1470             }
1471           }
1472           tableptr += 2;
1473         }
1474       }
1475       im = hash_locate(&ht, ht_ptre - ht_ptrb + 1);
1476       if (dense_op)
1477         fsarow[g1 - 1] = im;
1478       else if (im > 0) {
1479         fsarow[++len] = g1;
1480         fsarow[++len] = im;
1481       }
1482       if (im > 0)
1483         nt++;
1484     }
1485     if (!dense_op)
1486       fsarow[0] = len++;
1487     fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
1488   }
1489   fclose(tempfile);
1490 
1491   ns = exists->states->size = ht.num_recs;
1492   exists->table->numTransitions = nt;
1493 
1494   /* Locate the accept states. This is slightly cumbersome, since a state
1495    * is an accept state if either the corresponding subset contains an
1496    * accept state of *fsaptr, or we can get from some such state to an
1497    * accept state by applying elements ($,x).
1498    * We will need to use the array exists->is_accepting.
1499    */
1500   tmalloc(exists->is_accepting, boolean, ns + 1);
1501   for (i = 1; i <= ns; i++)
1502     exists->is_accepting[i] = FALSE;
1503   ct = 0;
1504   for (cstate = ns; cstate >= 1; cstate--) {
1505     /* We work backwards through the states, since we wish to add on additional
1506      * elements at the end of the list in the hash-table - this destroys
1507      * later entries, but that doesn't matter, since we have already dealt
1508      * with them.
1509      */
1510     cs_ptr = hash_rec(&ht, cstate);
1511     cs_ptre = hash_rec(&ht, cstate) + hash_rec_len(&ht, cstate) - 1;
1512     /* First see if the set itself contains an accept-state */
1513     ptr = cs_ptr - 1;
1514     while (++ptr <= cs_ptre)
1515       if (fsaptr->is_accepting[*ptr]) {
1516         exists->is_accepting[cstate] = TRUE;
1517         ct++;
1518         break;
1519       }
1520     if (exists->is_accepting[cstate])
1521       continue;
1522     /* Next apply generators ($,x) and see if we get anything new, and
1523      * if it is an accept state.
1524      * Anything new is added to the end of the list - we don't need to
1525      * bother about having them in increasing order this time.
1526      */
1527     es = ngens * (ngens + 1) + 1;
1528     ef = (ngens + 1) * (ngens + 1) - 1;
1529     ptr = cs_ptr - 1;
1530     while (++ptr <= cs_ptre) {
1531       tableptr = table[*ptr];
1532       tableptre = table[*ptr + 1];
1533       while (tableptr < tableptre) {
1534         if (*tableptr >= es && *tableptr <= ef) {
1535           csi = *(tableptr + 1);
1536           if (csi == 0)
1537             continue;
1538           if (fsaptr->is_accepting[csi]) {
1539             exists->is_accepting[cstate] = TRUE;
1540             ct++;
1541             break;
1542           }
1543           /* see if csi is new */
1544           ht_chptr = cs_ptr - 1;
1545           got = FALSE;
1546           while (++ht_chptr <= cs_ptre)
1547             if (csi == *ht_chptr) {
1548               got = TRUE;
1549               break;
1550             }
1551           if (!got)
1552             /* add csi to the end */
1553             *(++cs_ptre) = csi;
1554         }
1555         tableptr += 2;
1556       }
1557       if (exists->is_accepting[cstate])
1558         break;
1559     }
1560   }
1561 
1562   exists->num_accepting = ct;
1563   if (ct == 1 || ct != ns) {
1564     tmalloc(exists->accepting, int, ct + 1);
1565     ct = 0;
1566     for (i = 1; i <= ns; i++)
1567       if (exists->is_accepting[i])
1568         exists->accepting[++ct] = i;
1569   }
1570   tfree(fsaptr->is_accepting);
1571   tfree(exists->is_accepting);
1572   hash_clear(&ht);
1573   tfree(fsarow);
1574 
1575   if (destroy)
1576     fsa_clear(fsaptr);
1577 
1578   /* Now read the transition table back in */
1579   tempfile = fopen(tempfilename, "r");
1580   compressed_transitions_read(exists, tempfile);
1581   fclose(tempfile);
1582 
1583   unlink(tempfilename);
1584 
1585   return exists;
1586 }
1587