1 /* file nfa.c 8.9.97.
2 *
3 * 25/9/98 added parameter subsets to nfa_determinize.
4 * 2/10/97 - added version of nfa_determinize for compactly stored
5 * transition tables.
6 * This file contains routines for processing nfa's i.e. non-deterministic
7 * finite state automata.
8 * Currently, the only routine available is nfa_determinize, which returns
9 * an equivalent deterministic automaton, whose states are subsets of the
10 * input fsa.
11 * Currently, the only input format accepted is sparse, where epsilon
12 * transitions are from generator 0.
13 */
14
15 #include <stdio.h>
16 #include "defs.h"
17 #include "fsa.h"
18 #include "hash.h"
19 #include "externals.h"
20 /* the following three definitions are for use in extracting edge-label
21 * and state in compactly stored pairs.
22 */
23 #define SHIFT 24
24 #define MASKR (unsigned)0xffffff
25 #define MASKL (unsigned)0xff000000
26
27 /* Functions defined in this file: */
28 static fsa *nfa_determinize_short(fsa *fsaptr, storage_type op_table_type,
29 boolean eps_trans, boolean destroy,
30 boolean subsets, char *tempfilename);
31 static fsa *nfa_determinize_int(fsa *fsaptr, storage_type op_table_type,
32 boolean eps_trans, boolean destroy,
33 boolean subsets, char *tempfilename);
34 static fsa *nfa_cdeterminize_short(fsa *fsaptr, storage_type op_table_type,
35 boolean destroy, char *tempfilename);
36 static fsa *nfa_cdeterminize_int(fsa *fsaptr, storage_type op_table_type,
37 boolean destroy, char *tempfilename);
38 static fsa *nfa_exists_short(fsa *fsaptr, storage_type op_table_type,
39 boolean destroy, char *tempfilename);
40 static fsa *nfa_exists_int(fsa *fsaptr, storage_type op_table_type,
41 boolean destroy, char *tempfilename);
42
43
44 /* The fsa *fsaptr must be an fsa with sparse table.
45 * The returned fsa accepts the same language but is deterministic.
46 * If eps_trans is false, it is assumed that *fsaptr has no epsilon
47 * transitions.
48 * If subsets is true, then the returned fsa will have state-set of type
49 * "list of integers", where the list represents the subset of the state-set
50 * of *fsaptr that correponds to the state of the returned fsa.
51 */
nfa_determinize(fsa * fsaptr,storage_type op_table_type,boolean eps_trans,boolean destroy,boolean subsets,char * tempfilename)52 fsa *nfa_determinize(fsa *fsaptr, storage_type op_table_type, boolean eps_trans,
53 boolean destroy, boolean subsets, char *tempfilename)
54 {
55 if (kbm_print_level >= 3)
56 printf(" #Calling nfa_determinize.\n");
57 if (fsaptr->states->size < MAXUSHORT)
58 return nfa_determinize_short(fsaptr, op_table_type, eps_trans, destroy,
59 subsets, tempfilename);
60 else
61 return nfa_determinize_int(fsaptr, op_table_type, eps_trans, destroy,
62 subsets, tempfilename);
63 }
64
nfa_determinize_short(fsa * fsaptr,storage_type op_table_type,boolean eps_trans,boolean destroy,boolean subsets,char * tempfilename)65 static fsa *nfa_determinize_short(fsa *fsaptr, storage_type op_table_type,
66 boolean eps_trans, boolean destroy,
67 boolean subsets, char *tempfilename)
68 {
69 int **table, *tableptr, *tableptre, ngens, nssi, ns, *fsarow, nt, cstate, csi,
70 im, i, g1, len = 0, ct, n;
71 unsigned short *ht_ptr, *ht_chptr, *ht_ptrb, *ht_ptre, *cs_ptr, *cs_ptre,
72 *ptr;
73 boolean dense_op;
74 short_hash_table ht;
75 fsa *det;
76 FILE *tempfile;
77
78 if (kbm_print_level >= 3)
79 printf(" #Calling nfa_determinize_short.\n");
80
81 tmalloc(det, fsa, 1);
82 fsa_init(det);
83 srec_copy(det->alphabet, fsaptr->alphabet);
84 det->flags[DFA] = TRUE;
85 det->flags[ACCESSIBLE] = TRUE;
86 det->flags[BFS] = TRUE;
87
88 det->states->type = subsets ? LISTOFINTS : SIMPLE;
89
90 det->table->table_type = op_table_type;
91 det->table->denserows = 0;
92 det->table->printing_format = op_table_type;
93
94 if (fsaptr->num_initial == 0) {
95 det->num_initial = 0;
96 det->num_accepting = 0;
97 det->states->size = 0;
98 if (destroy)
99 fsa_clear(fsaptr);
100 return det;
101 }
102 ngens = det->alphabet->size;
103
104 fsa_set_is_accepting(fsaptr);
105
106 dense_op = op_table_type == DENSE;
107 table = fsaptr->table->table_data_ptr;
108
109 det->num_initial = 1;
110 tmalloc(det->initial, int, 2);
111 det->initial[1] = 1;
112
113 short_hash_init(&ht, FALSE, 0, 0, 0);
114 ht_ptrb = ht.current_ptr;
115 ht_ptre = ht_ptrb - 1;
116 nssi = fsaptr->num_initial;
117 for (i = 0; i < nssi; i++)
118 *(++ht_ptre) = fsaptr->initial[i + 1];
119 /* now perform epsilon closure of initial subset */
120 ptr = ht_ptrb - 1;
121 if (eps_trans)
122 while (++ptr <= ht_ptre) {
123 tableptr = table[*ptr];
124 tableptre = table[*ptr + 1];
125 while (tableptr < tableptre) {
126 if (*tableptr == 0) {
127 csi = *(tableptr + 1);
128 if (csi == 0) {
129 tableptr += 2;
130 continue;
131 }
132 if (csi > *ht_ptre) {
133 /* We have a new state for the image subset to be added to the end
134 */
135 *(++ht_ptre) = csi;
136 }
137 else {
138 ht_chptr = ht_ptrb;
139 while (*ht_chptr < csi)
140 ht_chptr++;
141 if (csi < *ht_chptr) {
142 /* we have a new state for the image subset to be added in the
143 * middle */
144 ht_ptr = ++ht_ptre;
145 while (ht_ptr > ht_chptr) {
146 *ht_ptr = *(ht_ptr - 1);
147 ht_ptr--;
148 }
149 *ht_ptr = csi;
150 if (ptr > ht_ptr)
151 ptr = ht_ptr - 1;
152 }
153 }
154 }
155 tableptr += 2;
156 }
157 }
158 im = short_hash_locate(&ht, ht_ptre - ht_ptrb + 1);
159 /* Each state in 'det' will be represented as a subset of the set of states
160 * of *fsaptr. The initial state contains the initial states
161 * of *fsaptr.
162 * A subset is an accept-state if it contains an accept state of *fsaptr
163 * The subsets will be stored as variable-length records in the hash-table,
164 * always in increasing order.
165 */
166 if (im != 1) {
167 fprintf(stderr, "Hash-initialisation problem in nfa_determinize.\n");
168 return 0;
169 }
170 if ((tempfile = fopen(tempfilename, "w")) == 0) {
171 fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
172 return 0;
173 }
174 if (dense_op)
175 tmalloc(fsarow, int, ngens) else tmalloc(fsarow, int, 2 * ngens + 1)
176
177 cstate = 0;
178 if (dense_op)
179 len = ngens; /* The length of the fsarow output. */
180 nt = 0; /* Number of transitions in det */
181
182 while (++cstate <= ht.num_recs) {
183 if (kbm_print_level >= 3) {
184 if ((cstate <= 1000 && cstate % 100 == 0) ||
185 (cstate <= 10000 && cstate % 1000 == 0) ||
186 (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
187 printf(" #cstate = %d; number of states = %d.\n", cstate,
188 ht.num_recs);
189 }
190 cs_ptr = short_hash_rec(&ht, cstate);
191 cs_ptre = short_hash_rec(&ht, cstate) + short_hash_rec_len(&ht, cstate) - 1;
192 if (!dense_op)
193 len = 0;
194
195 for (g1 = 1; g1 <= ngens; g1++) {
196 /* Calculate action of generator g1 on state cstate - to get the image,
197 * we simply apply it to each state in the subset of states representing
198 * cstate.
199 */
200 ht_ptrb = ht.current_ptr;
201 ht_ptre = ht_ptrb - 1;
202 ptr = cs_ptr - 1;
203 while (++ptr <= cs_ptre) {
204 tableptr = table[*ptr];
205 tableptre = table[*ptr + 1];
206 while (tableptr < tableptre) {
207 if (*tableptr == g1) {
208 csi = *(tableptr + 1);
209 if (csi == 0) {
210 tableptr += 2;
211 continue;
212 }
213 if (ht_ptrb > ht_ptre || csi > *ht_ptre) {
214 /* We have a new state for the image subset to be added to the end
215 */
216 *(++ht_ptre) = csi;
217 }
218 else {
219 ht_chptr = ht_ptrb;
220 while (*ht_chptr < csi)
221 ht_chptr++;
222 if (csi < *ht_chptr) {
223 /* we have a new state for the image subset to be added in the
224 * middle */
225 ht_ptr = ++ht_ptre;
226 while (ht_ptr > ht_chptr) {
227 *ht_ptr = *(ht_ptr - 1);
228 ht_ptr--;
229 }
230 *ht_ptr = csi;
231 }
232 }
233 }
234 tableptr += 2;
235 }
236 }
237 /* now perform epsilon closure of image subset */
238 ptr = ht_ptrb - 1;
239 if (eps_trans)
240 while (++ptr <= ht_ptre) {
241 tableptr = table[*ptr];
242 tableptre = table[*ptr + 1];
243 while (tableptr < tableptre) {
244 if (*tableptr == 0) {
245 csi = *(tableptr + 1);
246 if (csi == 0) {
247 tableptr += 2;
248 continue;
249 }
250 if (csi > *ht_ptre) {
251 /* We have a new state for the image subset to be added to the
252 * end */
253 *(++ht_ptre) = csi;
254 }
255 else {
256 ht_chptr = ht_ptrb;
257 while (*ht_chptr < csi)
258 ht_chptr++;
259 if (csi < *ht_chptr) {
260 /* we have a new state for the image subset to be added in the
261 * middle */
262 ht_ptr = ++ht_ptre;
263 while (ht_ptr > ht_chptr) {
264 *ht_ptr = *(ht_ptr - 1);
265 ht_ptr--;
266 }
267 *ht_ptr = csi;
268 if (ptr > ht_ptr)
269 ptr = ht_ptr - 1;
270 }
271 }
272 }
273 tableptr += 2;
274 }
275 }
276 im = short_hash_locate(&ht, ht_ptre - ht_ptrb + 1);
277 if (dense_op)
278 fsarow[g1 - 1] = im;
279 else if (im > 0) {
280 fsarow[++len] = g1;
281 fsarow[++len] = im;
282 }
283 if (im > 0)
284 nt++;
285 }
286 if (!dense_op)
287 fsarow[0] = len++;
288 fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
289 }
290 fclose(tempfile);
291
292 ns = det->states->size = ht.num_recs;
293 det->table->numTransitions = nt;
294
295 /* Locate the accept states. A state is an accept state if and only if
296 * the subset representing it contains an accept state of *fsaptr.
297 * Also, if subsets is true, record the states in the intlist field
298 * of det->states.
299 */
300 tmalloc(det->is_accepting, boolean, ns + 1);
301 for (i = 1; i <= ns; i++)
302 det->is_accepting[i] = FALSE;
303 if (subsets)
304 tmalloc(det->states->intlist, int *, ns + 1);
305 ct = 0;
306 for (cstate = 1; cstate <= ns; cstate++) {
307 len = short_hash_rec_len(&ht, cstate);
308 cs_ptr = short_hash_rec(&ht, cstate);
309 cs_ptre = short_hash_rec(&ht, cstate) + len - 1;
310 /* See if the set contains an accept-state */
311 ptr = cs_ptr - 1;
312 while (++ptr <= cs_ptre)
313 if (fsaptr->is_accepting[*ptr]) {
314 det->is_accepting[cstate] = TRUE;
315 ct++;
316 break;
317 }
318 if (subsets) {
319 tmalloc(det->states->intlist[cstate], int, len + 1);
320 det->states->intlist[cstate][0] = len;
321 n = 0;
322 ptr = cs_ptr - 1;
323 while (++ptr <= cs_ptre)
324 det->states->intlist[cstate][++n] = *ptr;
325 }
326 }
327
328 det->num_accepting = ct;
329 if (ct == 1 || ct != ns) {
330 tmalloc(det->accepting, int, ct + 1);
331 ct = 0;
332 for (i = 1; i <= ns; i++)
333 if (det->is_accepting[i])
334 det->accepting[++ct] = i;
335 }
336 tfree(fsaptr->is_accepting);
337 tfree(det->is_accepting);
338 short_hash_clear(&ht);
339 tfree(fsarow);
340
341 if (destroy)
342 fsa_clear(fsaptr);
343
344 /* Now read the transition table back in */
345 tempfile = fopen(tempfilename, "r");
346 compressed_transitions_read(det, tempfile);
347 fclose(tempfile);
348
349 unlink(tempfilename);
350
351 return det;
352 }
353
nfa_determinize_int(fsa * fsaptr,storage_type op_table_type,boolean eps_trans,boolean destroy,boolean subsets,char * tempfilename)354 static fsa *nfa_determinize_int(fsa *fsaptr, storage_type op_table_type,
355 boolean eps_trans, boolean destroy,
356 boolean subsets, char *tempfilename)
357 {
358 int **table, *tableptr, *tableptre, ngens, nssi, ns, *fsarow, nt, cstate, csi,
359 im, i, g1, len = 0, ct, n;
360 int *ht_ptr, *ht_chptr, *ht_ptrb, *ht_ptre, *cs_ptr, *cs_ptre, *ptr;
361 boolean dense_op;
362 hash_table ht;
363 fsa *det;
364 FILE *tempfile;
365
366 if (kbm_print_level >= 3)
367 printf(" #Calling nfa_determinize_int.\n");
368
369 tmalloc(det, fsa, 1);
370 fsa_init(det);
371 srec_copy(det->alphabet, fsaptr->alphabet);
372 det->flags[DFA] = TRUE;
373 det->flags[ACCESSIBLE] = TRUE;
374 det->flags[BFS] = TRUE;
375
376 det->states->type = subsets ? LISTOFINTS : SIMPLE;
377
378 det->table->table_type = op_table_type;
379 det->table->denserows = 0;
380 det->table->printing_format = op_table_type;
381
382 if (fsaptr->num_initial == 0) {
383 det->num_initial = 0;
384 det->num_accepting = 0;
385 det->states->size = 0;
386 if (destroy)
387 fsa_clear(fsaptr);
388 return det;
389 }
390 ngens = det->alphabet->size;
391
392 fsa_set_is_accepting(fsaptr);
393
394 dense_op = op_table_type == DENSE;
395 table = fsaptr->table->table_data_ptr;
396
397 det->num_initial = 1;
398 tmalloc(det->initial, int, 2);
399 det->initial[1] = 1;
400
401 hash_init(&ht, FALSE, 0, 0, 0);
402 ht_ptrb = ht.current_ptr;
403 ht_ptre = ht_ptrb - 1;
404 nssi = fsaptr->num_initial;
405 for (i = 0; i < nssi; i++)
406 *(++ht_ptre) = fsaptr->initial[i + 1];
407 /* now perform epsilon closure of initial subset */
408 ptr = ht_ptrb - 1;
409 if (eps_trans)
410 while (++ptr <= ht_ptre) {
411 tableptr = table[*ptr];
412 tableptre = table[*ptr + 1];
413 while (tableptr < tableptre) {
414 if (*tableptr == 0) {
415 csi = *(tableptr + 1);
416 if (csi == 0) {
417 tableptr += 2;
418 continue;
419 }
420 if (csi > *ht_ptre) {
421 /* We have a new state for the image subset to be added to the end
422 */
423 *(++ht_ptre) = csi;
424 }
425 else {
426 ht_chptr = ht_ptrb;
427 while (*ht_chptr < csi)
428 ht_chptr++;
429 if (csi < *ht_chptr) {
430 /* we have a new state for the image subset to be added in the
431 * middle */
432 ht_ptr = ++ht_ptre;
433 while (ht_ptr > ht_chptr) {
434 *ht_ptr = *(ht_ptr - 1);
435 ht_ptr--;
436 }
437 *ht_ptr = csi;
438 if (ptr > ht_ptr)
439 ptr = ht_ptr - 1;
440 }
441 }
442 }
443 tableptr += 2;
444 }
445 }
446 im = hash_locate(&ht, ht_ptre - ht_ptrb + 1);
447 /* Each state in 'det' will be represented as a subset of the set of states
448 * of *fsaptr. The initial state contains the initial states
449 * of *fsaptr.
450 * A subset is an accept-state if it contains an accept state of *fsaptr
451 * The subsets will be stored as variable-length records in the hash-table,
452 * always in increasing order.
453 */
454 if (im != 1) {
455 fprintf(stderr, "Hash-initialisation problem in nfa_determinize.\n");
456 return 0;
457 }
458 if ((tempfile = fopen(tempfilename, "w")) == 0) {
459 fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
460 return 0;
461 }
462 if (dense_op)
463 tmalloc(fsarow, int, ngens) else tmalloc(fsarow, int, 2 * ngens + 1)
464
465 cstate = 0;
466 if (dense_op)
467 len = ngens; /* The length of the fsarow output. */
468 nt = 0; /* Number of transitions in det */
469
470 while (++cstate <= ht.num_recs) {
471 if (kbm_print_level >= 3) {
472 if ((cstate <= 1000 && cstate % 100 == 0) ||
473 (cstate <= 10000 && cstate % 1000 == 0) ||
474 (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
475 printf(" #cstate = %d; number of states = %d.\n", cstate,
476 ht.num_recs);
477 }
478 cs_ptr = hash_rec(&ht, cstate);
479 cs_ptre = hash_rec(&ht, cstate) + hash_rec_len(&ht, cstate) - 1;
480 if (!dense_op)
481 len = 0;
482
483 for (g1 = 1; g1 <= ngens; g1++) {
484 /* Calculate action of generator g1 on state cstate - to get the image,
485 * we simply apply it to each state in the subset of states representing
486 * cstate.
487 */
488 ht_ptrb = ht.current_ptr;
489 ht_ptre = ht_ptrb - 1;
490 ptr = cs_ptr - 1;
491 while (++ptr <= cs_ptre) {
492 tableptr = table[*ptr];
493 tableptre = table[*ptr + 1];
494 while (tableptr < tableptre) {
495 if (*tableptr == g1) {
496 csi = *(tableptr + 1);
497 if (csi == 0) {
498 tableptr += 2;
499 continue;
500 }
501 if (ht_ptrb > ht_ptre || csi > *ht_ptre) {
502 /* We have a new state for the image subset to be added to the end
503 */
504 *(++ht_ptre) = csi;
505 }
506 else {
507 ht_chptr = ht_ptrb;
508 while (*ht_chptr < csi)
509 ht_chptr++;
510 if (csi < *ht_chptr) {
511 /* we have a new state for the image subset to be added in the
512 * middle */
513 ht_ptr = ++ht_ptre;
514 while (ht_ptr > ht_chptr) {
515 *ht_ptr = *(ht_ptr - 1);
516 ht_ptr--;
517 }
518 *ht_ptr = csi;
519 }
520 }
521 }
522 tableptr += 2;
523 }
524 }
525 /* now perform epsilon closure of image subset */
526 ptr = ht_ptrb - 1;
527 if (eps_trans)
528 while (++ptr <= ht_ptre) {
529 tableptr = table[*ptr];
530 tableptre = table[*ptr + 1];
531 while (tableptr < tableptre) {
532 if (*tableptr == 0) {
533 csi = *(tableptr + 1);
534 if (csi == 0) {
535 tableptr += 2;
536 continue;
537 }
538 if (csi > *ht_ptre) {
539 /* We have a new state for the image subset to be added to the
540 * end */
541 *(++ht_ptre) = csi;
542 }
543 else {
544 ht_chptr = ht_ptrb;
545 while (*ht_chptr < csi)
546 ht_chptr++;
547 if (csi < *ht_chptr) {
548 /* we have a new state for the image subset to be added in the
549 * middle */
550 ht_ptr = ++ht_ptre;
551 while (ht_ptr > ht_chptr) {
552 *ht_ptr = *(ht_ptr - 1);
553 ht_ptr--;
554 }
555 *ht_ptr = csi;
556 if (ptr > ht_ptr)
557 ptr = ht_ptr - 1;
558 }
559 }
560 }
561 tableptr += 2;
562 }
563 }
564 im = hash_locate(&ht, ht_ptre - ht_ptrb + 1);
565 if (dense_op)
566 fsarow[g1 - 1] = im;
567 else if (im > 0) {
568 fsarow[++len] = g1;
569 fsarow[++len] = im;
570 }
571 if (im > 0)
572 nt++;
573 }
574 if (!dense_op)
575 fsarow[0] = len++;
576 fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
577 }
578 fclose(tempfile);
579
580 ns = det->states->size = ht.num_recs;
581 det->table->numTransitions = nt;
582
583 /* Locate the accept states. A state is an accept state if and only if
584 * the subset representing it contains an accept state of *fsaptr.
585 * Also, if subsets is true, record the states in the intlist field
586 * of det->states.
587 */
588 tmalloc(det->is_accepting, boolean, ns + 1);
589 for (i = 1; i <= ns; i++)
590 det->is_accepting[i] = FALSE;
591 if (subsets)
592 tmalloc(det->states->intlist, int *, ns + 1);
593 ct = 0;
594 for (cstate = 1; cstate <= ns; cstate++) {
595 len = hash_rec_len(&ht, cstate);
596 cs_ptr = hash_rec(&ht, cstate);
597 cs_ptre = hash_rec(&ht, cstate) + len - 1;
598 /* See if the set contains an accept-state */
599 ptr = cs_ptr - 1;
600 while (++ptr <= cs_ptre)
601 if (fsaptr->is_accepting[*ptr]) {
602 det->is_accepting[cstate] = TRUE;
603 ct++;
604 break;
605 }
606 if (subsets) {
607 tmalloc(det->states->intlist[cstate], int, len + 1);
608 det->states->intlist[cstate][0] = len;
609 n = 0;
610 ptr = cs_ptr - 1;
611 while (++ptr <= cs_ptre)
612 det->states->intlist[cstate][++n] = *ptr;
613 }
614 }
615
616 det->num_accepting = ct;
617 if (ct == 1 || ct != ns) {
618 tmalloc(det->accepting, int, ct + 1);
619 ct = 0;
620 for (i = 1; i <= ns; i++)
621 if (det->is_accepting[i])
622 det->accepting[++ct] = i;
623 }
624 tfree(fsaptr->is_accepting);
625 tfree(det->is_accepting);
626 hash_clear(&ht);
627 tfree(fsarow);
628
629 if (destroy)
630 fsa_clear(fsaptr);
631
632 /* Now read the transition table back in */
633 tempfile = fopen(tempfilename, "r");
634 compressed_transitions_read(det, tempfile);
635 fclose(tempfile);
636
637 unlink(tempfilename);
638
639 return det;
640 }
641
642 /* Version of nfa_determinize for transtition tables stored compactly.
643 * FOR THE MOMENT WE ASSUME NO EPSILON TRANSITIONS
644 * The fsa *fsaptr must be an fsa with compact table.
645 * The returned fsa accepts the same language but is deterministic.
646 */
nfa_cdeterminize(fsa * fsaptr,storage_type op_table_type,boolean destroy,char * tempfilename)647 fsa *nfa_cdeterminize(fsa *fsaptr, storage_type op_table_type, boolean destroy,
648 char *tempfilename)
649 {
650 if (kbm_print_level >= 3)
651 printf(" #Calling nfa_determinize.\n");
652 if (fsaptr->states->size < MAXUSHORT)
653 return nfa_cdeterminize_short(fsaptr, op_table_type, destroy, tempfilename);
654 else
655 return nfa_cdeterminize_int(fsaptr, op_table_type, destroy, tempfilename);
656 }
657
nfa_cdeterminize_short(fsa * fsaptr,storage_type op_table_type,boolean destroy,char * tempfilename)658 static fsa *nfa_cdeterminize_short(fsa *fsaptr, storage_type op_table_type,
659 boolean destroy, char *tempfilename)
660 {
661 int ngens, nssi, ns, *fsarow, nt, cstate, csi, im, i, g1, len = 0, ct;
662 unsigned int **table, *tableptr, *tableptre, g1shift;
663 unsigned short *ht_ptr, *ht_chptr, *ht_ptrb, *ht_ptre, *cs_ptr, *cs_ptre,
664 *ptr;
665 boolean dense_op;
666 short_hash_table ht;
667 fsa *det;
668 FILE *tempfile;
669
670 if (kbm_print_level >= 3)
671 printf(" #Calling nfa_determinize_short.\n");
672
673 tmalloc(det, fsa, 1);
674 fsa_init(det);
675 srec_copy(det->alphabet, fsaptr->alphabet);
676 det->flags[DFA] = TRUE;
677 det->flags[ACCESSIBLE] = TRUE;
678 det->flags[BFS] = TRUE;
679
680 det->states->type = SIMPLE;
681
682 det->table->table_type = op_table_type;
683 det->table->denserows = 0;
684 det->table->printing_format = op_table_type;
685
686 if (fsaptr->num_initial == 0) {
687 det->num_initial = 0;
688 det->num_accepting = 0;
689 det->states->size = 0;
690 if (destroy)
691 fsa_clear(fsaptr);
692 return det;
693 }
694 ngens = det->alphabet->size;
695
696 fsa_set_is_accepting(fsaptr);
697
698 dense_op = op_table_type == DENSE;
699 table = fsaptr->table->ctable_data_ptr;
700
701 det->num_initial = 1;
702 tmalloc(det->initial, int, 2);
703 det->initial[1] = 1;
704
705 short_hash_init(&ht, FALSE, 0, 0, 0);
706 ht_ptrb = ht.current_ptr;
707 ht_ptre = ht_ptrb - 1;
708 nssi = fsaptr->num_initial;
709 for (i = 0; i < nssi; i++)
710 *(++ht_ptre) = fsaptr->initial[i + 1];
711 im = short_hash_locate(&ht, ht_ptre - ht_ptrb + 1);
712 /* Each state in 'det' will be represented as a subset of the set of states
713 * of *fsaptr. The initial state contains the initial states
714 * of *fsaptr.
715 * A subset is an accept-state if it contains an accept state of *fsaptr
716 * The subsets will be stored as variable-length records in the hash-table,
717 * always in increasing order.
718 */
719 if (im != 1) {
720 fprintf(stderr, "Hash-initialisation problem in nfa_determinize.\n");
721 return 0;
722 }
723 if ((tempfile = fopen(tempfilename, "w")) == 0) {
724 fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
725 return 0;
726 }
727 if (dense_op)
728 tmalloc(fsarow, int, ngens) else tmalloc(fsarow, int, 2 * ngens + 1)
729
730 cstate = 0;
731 if (dense_op)
732 len = ngens; /* The length of the fsarow output. */
733 nt = 0; /* Number of transitions in det */
734
735 while (++cstate <= ht.num_recs) {
736 if (kbm_print_level >= 3) {
737 if ((cstate <= 1000 && cstate % 100 == 0) ||
738 (cstate <= 10000 && cstate % 1000 == 0) ||
739 (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
740 printf(" #cstate = %d; number of states = %d.\n", cstate,
741 ht.num_recs);
742 }
743 cs_ptr = short_hash_rec(&ht, cstate);
744 cs_ptre = short_hash_rec(&ht, cstate) + short_hash_rec_len(&ht, cstate) - 1;
745 if (!dense_op)
746 len = 0;
747
748 for (g1 = 1; g1 <= ngens; g1++) {
749 /* Calculate action of generator g1 on state cstate - to get the image,
750 * we simply apply it to each state in the subset of states representing
751 * cstate.
752 */
753 g1shift = ((unsigned)g1) << SHIFT;
754 ht_ptrb = ht.current_ptr;
755 ht_ptre = ht_ptrb - 1;
756 ptr = cs_ptr - 1;
757 while (++ptr <= cs_ptre) {
758 tableptr = table[*ptr];
759 tableptre = table[*ptr + 1];
760 while (tableptr < tableptre) {
761 if ((*tableptr & MASKL) == g1shift) {
762 csi = *tableptr & MASKR;
763 if (csi == 0)
764 continue;
765 if (ht_ptrb > ht_ptre || csi > *ht_ptre) {
766 /* We have a new state for the image subset to be added to the end
767 */
768 *(++ht_ptre) = csi;
769 }
770 else {
771 ht_chptr = ht_ptrb;
772 while (*ht_chptr < csi)
773 ht_chptr++;
774 if (csi < *ht_chptr) {
775 /* we have a new state for the image subset to be added in the
776 * middle */
777 ht_ptr = ++ht_ptre;
778 while (ht_ptr > ht_chptr) {
779 *ht_ptr = *(ht_ptr - 1);
780 ht_ptr--;
781 }
782 *ht_ptr = csi;
783 }
784 }
785 }
786 tableptr++;
787 }
788 }
789 im = short_hash_locate(&ht, ht_ptre - ht_ptrb + 1);
790 if (dense_op)
791 fsarow[g1 - 1] = im;
792 else if (im > 0) {
793 fsarow[++len] = g1;
794 fsarow[++len] = im;
795 }
796 if (im > 0)
797 nt++;
798 }
799 if (!dense_op)
800 fsarow[0] = len++;
801 fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
802 }
803 fclose(tempfile);
804
805 ns = det->states->size = ht.num_recs;
806 det->table->numTransitions = nt;
807
808 /* Locate the accept states. A state is an accept state if and only if
809 * the subset representing it contains an accept state of *fsaptr.
810 */
811 tmalloc(det->is_accepting, boolean, ns + 1);
812 for (i = 1; i <= ns; i++)
813 det->is_accepting[i] = FALSE;
814 ct = 0;
815 for (cstate = 1; cstate <= ns; cstate++) {
816 cs_ptr = short_hash_rec(&ht, cstate);
817 cs_ptre = short_hash_rec(&ht, cstate) + short_hash_rec_len(&ht, cstate) - 1;
818 /* See if the set contains an accept-state */
819 ptr = cs_ptr - 1;
820 while (++ptr <= cs_ptre)
821 if (fsaptr->is_accepting[*ptr]) {
822 det->is_accepting[cstate] = TRUE;
823 ct++;
824 break;
825 }
826 }
827
828 det->num_accepting = ct;
829 if (ct == 1 || ct != ns) {
830 tmalloc(det->accepting, int, ct + 1);
831 ct = 0;
832 for (i = 1; i <= ns; i++)
833 if (det->is_accepting[i])
834 det->accepting[++ct] = i;
835 }
836 tfree(fsaptr->is_accepting);
837 tfree(det->is_accepting);
838 short_hash_clear(&ht);
839 tfree(fsarow);
840
841 if (destroy)
842 fsa_clear(fsaptr);
843
844 /* Now read the transition table back in */
845 tempfile = fopen(tempfilename, "r");
846 compressed_transitions_read(det, tempfile);
847 fclose(tempfile);
848
849 unlink(tempfilename);
850
851 return det;
852 }
853
nfa_cdeterminize_int(fsa * fsaptr,storage_type op_table_type,boolean destroy,char * tempfilename)854 static fsa *nfa_cdeterminize_int(fsa *fsaptr, storage_type op_table_type,
855 boolean destroy, char *tempfilename)
856 {
857 int ngens, nssi, ns, *fsarow, nt, cstate, csi, im, i, g1, len = 0, ct;
858 unsigned int **table, *tableptr, *tableptre, g1shift;
859 int *ht_ptr, *ht_chptr, *ht_ptrb, *ht_ptre, *cs_ptr, *cs_ptre, *ptr;
860 boolean dense_op;
861 hash_table ht;
862 fsa *det;
863 FILE *tempfile;
864
865 if (kbm_print_level >= 3)
866 printf(" #Calling nfa_determinize_short.\n");
867
868 tmalloc(det, fsa, 1);
869 fsa_init(det);
870 srec_copy(det->alphabet, fsaptr->alphabet);
871 det->flags[DFA] = TRUE;
872 det->flags[ACCESSIBLE] = TRUE;
873 det->flags[BFS] = TRUE;
874
875 det->states->type = SIMPLE;
876
877 det->table->table_type = op_table_type;
878 det->table->denserows = 0;
879 det->table->printing_format = op_table_type;
880
881 if (fsaptr->num_initial == 0) {
882 det->num_initial = 0;
883 det->num_accepting = 0;
884 det->states->size = 0;
885 if (destroy)
886 fsa_clear(fsaptr);
887 return det;
888 }
889 ngens = det->alphabet->size;
890
891 fsa_set_is_accepting(fsaptr);
892
893 dense_op = op_table_type == DENSE;
894 table = fsaptr->table->ctable_data_ptr;
895
896 det->num_initial = 1;
897 tmalloc(det->initial, int, 2);
898 det->initial[1] = 1;
899
900 hash_init(&ht, FALSE, 0, 0, 0);
901 ht_ptrb = ht.current_ptr;
902 ht_ptre = ht_ptrb - 1;
903 nssi = fsaptr->num_initial;
904 for (i = 0; i < nssi; i++)
905 *(++ht_ptre) = fsaptr->initial[i + 1];
906 im = hash_locate(&ht, ht_ptre - ht_ptrb + 1);
907 /* Each state in 'det' will be represented as a subset of the set of states
908 * of *fsaptr. The initial state contains the initial states
909 * of *fsaptr.
910 * A subset is an accept-state if it contains an accept state of *fsaptr
911 * The subsets will be stored as variable-length records in the hash-table,
912 * always in increasing order.
913 */
914 if (im != 1) {
915 fprintf(stderr, "Hash-initialisation problem in nfa_determinize.\n");
916 return 0;
917 }
918 if ((tempfile = fopen(tempfilename, "w")) == 0) {
919 fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
920 return 0;
921 }
922 if (dense_op)
923 tmalloc(fsarow, int, ngens) else tmalloc(fsarow, int, 2 * ngens + 1)
924
925 cstate = 0;
926 if (dense_op)
927 len = ngens; /* The length of the fsarow output. */
928 nt = 0; /* Number of transitions in det */
929
930 while (++cstate <= ht.num_recs) {
931 if (kbm_print_level >= 3) {
932 if ((cstate <= 1000 && cstate % 100 == 0) ||
933 (cstate <= 10000 && cstate % 1000 == 0) ||
934 (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
935 printf(" #cstate = %d; number of states = %d.\n", cstate,
936 ht.num_recs);
937 }
938 cs_ptr = hash_rec(&ht, cstate);
939 cs_ptre = hash_rec(&ht, cstate) + hash_rec_len(&ht, cstate) - 1;
940 if (!dense_op)
941 len = 0;
942
943 for (g1 = 1; g1 <= ngens; g1++) {
944 /* Calculate action of generator g1 on state cstate - to get the image,
945 * we simply apply it to each state in the subset of states representing
946 * cstate.
947 */
948 g1shift = ((unsigned)g1) << SHIFT;
949 ht_ptrb = ht.current_ptr;
950 ht_ptre = ht_ptrb - 1;
951 ptr = cs_ptr - 1;
952 while (++ptr <= cs_ptre) {
953 tableptr = table[*ptr];
954 tableptre = table[*ptr + 1];
955 while (tableptr < tableptre) {
956 if ((*tableptr & MASKL) == g1shift) {
957 csi = *tableptr & MASKR;
958 if (csi == 0)
959 continue;
960 if (ht_ptrb > ht_ptre || csi > *ht_ptre) {
961 /* We have a new state for the image subset to be added to the end
962 */
963 *(++ht_ptre) = csi;
964 }
965 else {
966 ht_chptr = ht_ptrb;
967 while (*ht_chptr < csi)
968 ht_chptr++;
969 if (csi < *ht_chptr) {
970 /* we have a new state for the image subset to be added in the
971 * middle */
972 ht_ptr = ++ht_ptre;
973 while (ht_ptr > ht_chptr) {
974 *ht_ptr = *(ht_ptr - 1);
975 ht_ptr--;
976 }
977 *ht_ptr = csi;
978 }
979 }
980 }
981 tableptr++;
982 }
983 }
984 im = hash_locate(&ht, ht_ptre - ht_ptrb + 1);
985 if (dense_op)
986 fsarow[g1 - 1] = im;
987 else if (im > 0) {
988 fsarow[++len] = g1;
989 fsarow[++len] = im;
990 }
991 if (im > 0)
992 nt++;
993 }
994 if (!dense_op)
995 fsarow[0] = len++;
996 fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
997 }
998 fclose(tempfile);
999
1000 ns = det->states->size = ht.num_recs;
1001 det->table->numTransitions = nt;
1002
1003 /* Locate the accept states. A state is an accept state if and only if
1004 * the subset representing it contains an accept state of *fsaptr.
1005 */
1006 tmalloc(det->is_accepting, boolean, ns + 1);
1007 for (i = 1; i <= ns; i++)
1008 det->is_accepting[i] = FALSE;
1009 ct = 0;
1010 for (cstate = 1; cstate <= ns; cstate++) {
1011 cs_ptr = hash_rec(&ht, cstate);
1012 cs_ptre = hash_rec(&ht, cstate) + hash_rec_len(&ht, cstate) - 1;
1013 /* See if the set contains an accept-state */
1014 ptr = cs_ptr - 1;
1015 while (++ptr <= cs_ptre)
1016 if (fsaptr->is_accepting[*ptr]) {
1017 det->is_accepting[cstate] = TRUE;
1018 ct++;
1019 break;
1020 }
1021 }
1022
1023 det->num_accepting = ct;
1024 if (ct == 1 || ct != ns) {
1025 tmalloc(det->accepting, int, ct + 1);
1026 ct = 0;
1027 for (i = 1; i <= ns; i++)
1028 if (det->is_accepting[i])
1029 det->accepting[++ct] = i;
1030 }
1031 tfree(fsaptr->is_accepting);
1032 tfree(det->is_accepting);
1033 hash_clear(&ht);
1034 tfree(fsarow);
1035
1036 if (destroy)
1037 fsa_clear(fsaptr);
1038
1039 /* Now read the transition table back in */
1040 tempfile = fopen(tempfilename, "r");
1041 compressed_transitions_read(det, tempfile);
1042 fclose(tempfile);
1043
1044 unlink(tempfilename);
1045
1046 return det;
1047 }
1048
1049 /* *fsaptr must be a 2-variable (non-deterministic) fsa.
1050 * For the moment we assume it has no epsilon transitions.
1051 * The returned fsa is deterministic and accepts a word w_1 iff (w_1,w_2) is
1052 * accepted by *fsaptr, for some w_2 (with the usual padding conventions).
1053 * In fact, nfa_exists calls nfa_exists_int or nfa_exists_short,
1054 * the latter using the short hash table functions.
1055 */
nfa_exists(fsa * fsaptr,storage_type op_table_type,boolean destroy,char * tempfilename)1056 fsa *nfa_exists(fsa *fsaptr, storage_type op_table_type, boolean destroy,
1057 char *tempfilename)
1058 {
1059 if (kbm_print_level >= 3)
1060 printf(" #Calling nfa_exists.\n");
1061 if (fsaptr->states->size < MAXUSHORT)
1062 return nfa_exists_short(fsaptr, op_table_type, destroy, tempfilename);
1063 else
1064 return nfa_exists_int(fsaptr, op_table_type, destroy, tempfilename);
1065 }
1066
nfa_exists_short(fsa * fsaptr,storage_type op_table_type,boolean destroy,char * tempfilename)1067 fsa *nfa_exists_short(fsa *fsaptr, storage_type op_table_type, boolean destroy,
1068 char *tempfilename)
1069 {
1070 int **table, ne, *tableptr, *tableptre, ngens, ns, *fsarow, es, ef, nt,
1071 cstate, csi, im, i, k, g1, len = 0, ct;
1072 unsigned short *ht_ptr, *ht_chptr, *ht_ptrb, *ht_ptre, *cs_ptr, *cs_ptre,
1073 *ptr;
1074 boolean dense_op, got;
1075 short_hash_table ht;
1076 fsa *exists;
1077 FILE *tempfile;
1078
1079 if (kbm_print_level >= 3)
1080 printf(" #Calling nfa_exists_short.\n");
1081
1082 if (fsaptr->alphabet->type != PRODUCT || fsaptr->alphabet->arity != 2) {
1083 fprintf(stderr, "Error in nfa_exists: fsa must be 2-variable.\n");
1084 return 0;
1085 }
1086
1087 tmalloc(exists, fsa, 1);
1088 fsa_init(exists);
1089 srec_copy(exists->alphabet, fsaptr->alphabet->base);
1090 exists->flags[DFA] = TRUE;
1091 exists->flags[ACCESSIBLE] = TRUE;
1092 exists->flags[BFS] = TRUE;
1093
1094 exists->states->type = SIMPLE;
1095
1096 exists->table->table_type = op_table_type;
1097 exists->table->denserows = 0;
1098 exists->table->printing_format = op_table_type;
1099
1100 if (fsaptr->num_initial == 0) {
1101 exists->num_initial = 0;
1102 exists->num_accepting = 0;
1103 exists->states->size = 0;
1104 if (destroy)
1105 fsa_clear(fsaptr);
1106 return exists;
1107 }
1108 ne = fsaptr->alphabet->size;
1109 ngens = exists->alphabet->size;
1110
1111 if (ne != (ngens + 1) * (ngens + 1) - 1) {
1112 fprintf(stderr, "Error: in a 2-variable fsa, alphabet size should = "
1113 "(ngens+1)^2 - 1.\n");
1114 return 0;
1115 }
1116
1117 fsa_set_is_accepting(fsaptr);
1118
1119 dense_op = op_table_type == DENSE;
1120 table = fsaptr->table->table_data_ptr;
1121
1122 exists->num_initial = 1;
1123 tmalloc(exists->initial, int, 2);
1124 exists->initial[1] = 1;
1125
1126 short_hash_init(&ht, FALSE, 0, 0, 0);
1127 ht_ptr = ht.current_ptr;
1128 k = fsaptr->num_initial;
1129 for (i = 1; i <= k; i++)
1130 ht_ptr[i - 1] = fsaptr->initial[i];
1131 im = short_hash_locate(&ht, k);
1132 /* Each state in 'exists' will be represented as a subset of the set of states
1133 * of *fsaptr. The initial state consists of the initial states
1134 * of *fsaptr. A subset is an accept-state if it contains an accept state of
1135 * *fsaptr, or if one can get to an accept-state by applying elements
1136 * ($,x) where $ is the padding symbol.
1137 * The subsets will be stored as variable-length records in the hash-table,
1138 * always in increasing order.
1139 */
1140 if (im != 1) {
1141 fprintf(stderr, "Hash-initialisation problem in nfa_exists.\n");
1142 return 0;
1143 }
1144 if ((tempfile = fopen(tempfilename, "w")) == 0) {
1145 fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
1146 return 0;
1147 }
1148 if (dense_op)
1149 tmalloc(fsarow, int, ngens) else tmalloc(fsarow, int, 2 * ngens + 1)
1150
1151 cstate = 0;
1152 if (dense_op)
1153 len = ngens; /* The length of the fsarow output. */
1154 nt = 0; /* Number of transitions in exists */
1155
1156 while (++cstate <= ht.num_recs) {
1157 if (kbm_print_level >= 3) {
1158 if ((cstate <= 1000 && cstate % 100 == 0) ||
1159 (cstate <= 10000 && cstate % 1000 == 0) ||
1160 (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
1161 printf(" #cstate = %d; number of states = %d.\n", cstate,
1162 ht.num_recs);
1163 }
1164 cs_ptr = short_hash_rec(&ht, cstate);
1165 cs_ptre = short_hash_rec(&ht, cstate) + short_hash_rec_len(&ht, cstate) - 1;
1166 if (!dense_op)
1167 len = 0;
1168
1169 for (g1 = 1; g1 <= ngens; g1++) {
1170 /* Calculate action of generator g1 on state cstate - to get the image,
1171 * we have to apply (g1,g2) to each element in the subset corresponding to
1172 * cstate, and this for each generator g2 of the base-alphabet (including
1173 * the padding symbol).
1174 */
1175 ht_ptrb = ht.current_ptr;
1176 ht_ptre = ht_ptrb - 1;
1177 ptr = cs_ptr - 1;
1178 es = (g1 - 1) * (ngens + 1) + 1;
1179 ef = g1 * (ngens + 1);
1180 /* As g2 ranges from 1 to ngens+1 in the pair (g1,g2), for fixed g1, the
1181 * corresponding edge number in the fsa ranges from es to ef.
1182 */
1183 while (++ptr <= cs_ptre) {
1184 tableptr = table[*ptr];
1185 tableptre = table[*ptr + 1];
1186 while (tableptr < tableptre) {
1187 if (*tableptr >= es && *tableptr <= ef) {
1188 csi = *(tableptr + 1);
1189 if (csi == 0)
1190 continue;
1191 if (ht_ptrb > ht_ptre || csi > *ht_ptre) {
1192 /* We have a new state for the image subset to be added to the end
1193 */
1194 *(++ht_ptre) = csi;
1195 }
1196 else {
1197 ht_chptr = ht_ptrb;
1198 while (*ht_chptr < csi)
1199 ht_chptr++;
1200 if (csi < *ht_chptr) {
1201 /* we have a new state for the image subset to be added in the
1202 * middle */
1203 ht_ptr = ++ht_ptre;
1204 while (ht_ptr > ht_chptr) {
1205 *ht_ptr = *(ht_ptr - 1);
1206 ht_ptr--;
1207 }
1208 *ht_ptr = csi;
1209 }
1210 }
1211 }
1212 tableptr += 2;
1213 }
1214 }
1215 im = short_hash_locate(&ht, ht_ptre - ht_ptrb + 1);
1216 if (dense_op)
1217 fsarow[g1 - 1] = im;
1218 else if (im > 0) {
1219 fsarow[++len] = g1;
1220 fsarow[++len] = im;
1221 }
1222 if (im > 0)
1223 nt++;
1224 }
1225 if (!dense_op)
1226 fsarow[0] = len++;
1227 fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
1228 }
1229 fclose(tempfile);
1230
1231 ns = exists->states->size = ht.num_recs;
1232 exists->table->numTransitions = nt;
1233
1234 /* Locate the accept states. This is slightly cumbersome, since a state
1235 * is an accept state if either the corresponding subset contains an
1236 * accept state of *fsaptr, or we can get from some such state to an
1237 * accept state by applying elements ($,x).
1238 * We will need to use the array exists->is_accepting.
1239 */
1240 tmalloc(exists->is_accepting, boolean, ns + 1);
1241 for (i = 1; i <= ns; i++)
1242 exists->is_accepting[i] = FALSE;
1243 ct = 0;
1244 for (cstate = ns; cstate >= 1; cstate--) {
1245 /* We work backwards through the states, since we wish to add on additional
1246 * elements at the end of the list in the hash-table - this destroys
1247 * later entries, but that doesn't matter, since we have already dealt
1248 * with them.
1249 */
1250 cs_ptr = short_hash_rec(&ht, cstate);
1251 cs_ptre = short_hash_rec(&ht, cstate) + short_hash_rec_len(&ht, cstate) - 1;
1252 /* First see if the set itself contains an accept-state */
1253 ptr = cs_ptr - 1;
1254 while (++ptr <= cs_ptre)
1255 if (fsaptr->is_accepting[*ptr]) {
1256 exists->is_accepting[cstate] = TRUE;
1257 ct++;
1258 break;
1259 }
1260 if (exists->is_accepting[cstate])
1261 continue;
1262 /* Next apply generators ($,x) and see if we get anything new, and
1263 * if it is an accept state.
1264 * Anything new is added to the end of the list - we don't need to
1265 * bother about having them in increasing order this time.
1266 */
1267 es = ngens * (ngens + 1) + 1;
1268 ef = (ngens + 1) * (ngens + 1) - 1;
1269 ptr = cs_ptr - 1;
1270 while (++ptr <= cs_ptre) {
1271 tableptr = table[*ptr];
1272 tableptre = table[*ptr + 1];
1273 while (tableptr < tableptre) {
1274 if (*tableptr >= es && *tableptr <= ef) {
1275 csi = *(tableptr + 1);
1276 if (csi == 0)
1277 continue;
1278 if (fsaptr->is_accepting[csi]) {
1279 exists->is_accepting[cstate] = TRUE;
1280 ct++;
1281 break;
1282 }
1283 /* see if csi is new */
1284 ht_chptr = cs_ptr - 1;
1285 got = FALSE;
1286 while (++ht_chptr <= cs_ptre)
1287 if (csi == *ht_chptr) {
1288 got = TRUE;
1289 break;
1290 }
1291 if (!got)
1292 /* add csi to the end */
1293 *(++cs_ptre) = csi;
1294 }
1295 tableptr += 2;
1296 }
1297 if (exists->is_accepting[cstate])
1298 break;
1299 }
1300 }
1301
1302 exists->num_accepting = ct;
1303 if (ct == 1 || ct != ns) {
1304 tmalloc(exists->accepting, int, ct + 1);
1305 ct = 0;
1306 for (i = 1; i <= ns; i++)
1307 if (exists->is_accepting[i])
1308 exists->accepting[++ct] = i;
1309 }
1310 tfree(fsaptr->is_accepting);
1311 tfree(exists->is_accepting);
1312 short_hash_clear(&ht);
1313 tfree(fsarow);
1314
1315 if (destroy)
1316 fsa_clear(fsaptr);
1317
1318 /* Now read the transition table back in */
1319 tempfile = fopen(tempfilename, "r");
1320 compressed_transitions_read(exists, tempfile);
1321 fclose(tempfile);
1322
1323 unlink(tempfilename);
1324
1325 return exists;
1326 }
1327
nfa_exists_int(fsa * fsaptr,storage_type op_table_type,boolean destroy,char * tempfilename)1328 fsa *nfa_exists_int(fsa *fsaptr, storage_type op_table_type, boolean destroy,
1329 char *tempfilename)
1330 {
1331 int **table, ne, *tableptr, *tableptre, ngens, ns, *fsarow, es, ef, nt,
1332 cstate, csi, im, i, k, g1, len = 0, ct;
1333 int *ht_ptr, *ht_chptr, *ht_ptrb, *ht_ptre, *cs_ptr, *cs_ptre, *ptr;
1334 boolean dense_op, got;
1335 hash_table ht;
1336 fsa *exists;
1337 FILE *tempfile;
1338
1339 if (kbm_print_level >= 3)
1340 printf(" #Calling nfa_exists_int.\n");
1341
1342 if (fsaptr->alphabet->type != PRODUCT || fsaptr->alphabet->arity != 2) {
1343 fprintf(stderr, "Error in nfa_exists: fsa must be 2-variable.\n");
1344 return 0;
1345 }
1346
1347 tmalloc(exists, fsa, 1);
1348 fsa_init(exists);
1349 srec_copy(exists->alphabet, fsaptr->alphabet->base);
1350 exists->flags[DFA] = TRUE;
1351 exists->flags[ACCESSIBLE] = TRUE;
1352 exists->flags[BFS] = TRUE;
1353
1354 exists->states->type = SIMPLE;
1355
1356 exists->table->table_type = op_table_type;
1357 exists->table->denserows = 0;
1358 exists->table->printing_format = op_table_type;
1359
1360 if (fsaptr->num_initial == 0) {
1361 exists->num_initial = 0;
1362 exists->num_accepting = 0;
1363 exists->states->size = 0;
1364 if (destroy)
1365 fsa_clear(fsaptr);
1366 return exists;
1367 }
1368 ne = fsaptr->alphabet->size;
1369 ngens = exists->alphabet->size;
1370
1371 if (ne != (ngens + 1) * (ngens + 1) - 1) {
1372 fprintf(stderr, "Error: in a 2-variable fsa, alphabet size should = "
1373 "(ngens+1)^2 - 1.\n");
1374 return 0;
1375 }
1376
1377 fsa_set_is_accepting(fsaptr);
1378
1379 dense_op = op_table_type == DENSE;
1380 table = fsaptr->table->table_data_ptr;
1381
1382 exists->num_initial = 1;
1383 tmalloc(exists->initial, int, 2);
1384 exists->initial[1] = 1;
1385
1386 hash_init(&ht, FALSE, 0, 0, 0);
1387 ht_ptr = ht.current_ptr;
1388 k = fsaptr->num_initial;
1389 for (i = 1; i <= k; i++)
1390 ht_ptr[i - 1] = fsaptr->initial[i];
1391 im = hash_locate(&ht, k);
1392 /* Each state in 'exists' will be represented as a subset of the set of states
1393 * of *fsaptr. The initial state consists of the initial states
1394 * of *fsaptr. A subset is an accept-state if it contains an accept state of
1395 * *fsaptr, or if one can get to an accept-state by applying elements
1396 * ($,x) where $ is the padding symbol.
1397 * The subsets will be stored as variable-length records in the hash-table,
1398 * always in increasing order.
1399 */
1400 if (im != 1) {
1401 fprintf(stderr, "Hash-initialisation problem in nfa_exists.\n");
1402 return 0;
1403 }
1404 if ((tempfile = fopen(tempfilename, "w")) == 0) {
1405 fprintf(stderr, "Error: cannot open file %s\n", tempfilename);
1406 return 0;
1407 }
1408 if (dense_op)
1409 tmalloc(fsarow, int, ngens) else tmalloc(fsarow, int, 2 * ngens + 1)
1410
1411 cstate = 0;
1412 if (dense_op)
1413 len = ngens; /* The length of the fsarow output. */
1414 nt = 0; /* Number of transitions in exists */
1415
1416 while (++cstate <= ht.num_recs) {
1417 if (kbm_print_level >= 3) {
1418 if ((cstate <= 1000 && cstate % 100 == 0) ||
1419 (cstate <= 10000 && cstate % 1000 == 0) ||
1420 (cstate <= 100000 && cstate % 5000 == 0) || cstate % 50000 == 0)
1421 printf(" #cstate = %d; number of states = %d.\n", cstate,
1422 ht.num_recs);
1423 }
1424 cs_ptr = hash_rec(&ht, cstate);
1425 cs_ptre = hash_rec(&ht, cstate) + hash_rec_len(&ht, cstate) - 1;
1426 if (!dense_op)
1427 len = 0;
1428
1429 for (g1 = 1; g1 <= ngens; g1++) {
1430 /* Calculate action of generator g1 on state cstate - to get the image,
1431 * we have to apply (g1,g2) to each element in the subset corresponding to
1432 * cstate, and this for each generator g2 of the base-alphabet (including
1433 * the padding symbol).
1434 */
1435 ht_ptrb = ht.current_ptr;
1436 ht_ptre = ht_ptrb - 1;
1437 ptr = cs_ptr - 1;
1438 es = (g1 - 1) * (ngens + 1) + 1;
1439 ef = g1 * (ngens + 1);
1440 /* As g2 ranges from 1 to ngens+1 in the pair (g1,g2), for fixed g1, the
1441 * corresponding edge number in the fsa ranges from es to ef.
1442 */
1443 while (++ptr <= cs_ptre) {
1444 tableptr = table[*ptr];
1445 tableptre = table[*ptr + 1];
1446 while (tableptr < tableptre) {
1447 if (*tableptr >= es && *tableptr <= ef) {
1448 csi = *(tableptr + 1);
1449 if (csi == 0)
1450 continue;
1451 if (ht_ptrb > ht_ptre || csi > *ht_ptre) {
1452 /* We have a new state for the image subset to be added to the end
1453 */
1454 *(++ht_ptre) = csi;
1455 }
1456 else {
1457 ht_chptr = ht_ptrb;
1458 while (*ht_chptr < csi)
1459 ht_chptr++;
1460 if (csi < *ht_chptr) {
1461 /* we have a new state for the image subset to be added in the
1462 * middle */
1463 ht_ptr = ++ht_ptre;
1464 while (ht_ptr > ht_chptr) {
1465 *ht_ptr = *(ht_ptr - 1);
1466 ht_ptr--;
1467 }
1468 *ht_ptr = csi;
1469 }
1470 }
1471 }
1472 tableptr += 2;
1473 }
1474 }
1475 im = hash_locate(&ht, ht_ptre - ht_ptrb + 1);
1476 if (dense_op)
1477 fsarow[g1 - 1] = im;
1478 else if (im > 0) {
1479 fsarow[++len] = g1;
1480 fsarow[++len] = im;
1481 }
1482 if (im > 0)
1483 nt++;
1484 }
1485 if (!dense_op)
1486 fsarow[0] = len++;
1487 fwrite((void *)fsarow, sizeof(int), (size_t)len, tempfile);
1488 }
1489 fclose(tempfile);
1490
1491 ns = exists->states->size = ht.num_recs;
1492 exists->table->numTransitions = nt;
1493
1494 /* Locate the accept states. This is slightly cumbersome, since a state
1495 * is an accept state if either the corresponding subset contains an
1496 * accept state of *fsaptr, or we can get from some such state to an
1497 * accept state by applying elements ($,x).
1498 * We will need to use the array exists->is_accepting.
1499 */
1500 tmalloc(exists->is_accepting, boolean, ns + 1);
1501 for (i = 1; i <= ns; i++)
1502 exists->is_accepting[i] = FALSE;
1503 ct = 0;
1504 for (cstate = ns; cstate >= 1; cstate--) {
1505 /* We work backwards through the states, since we wish to add on additional
1506 * elements at the end of the list in the hash-table - this destroys
1507 * later entries, but that doesn't matter, since we have already dealt
1508 * with them.
1509 */
1510 cs_ptr = hash_rec(&ht, cstate);
1511 cs_ptre = hash_rec(&ht, cstate) + hash_rec_len(&ht, cstate) - 1;
1512 /* First see if the set itself contains an accept-state */
1513 ptr = cs_ptr - 1;
1514 while (++ptr <= cs_ptre)
1515 if (fsaptr->is_accepting[*ptr]) {
1516 exists->is_accepting[cstate] = TRUE;
1517 ct++;
1518 break;
1519 }
1520 if (exists->is_accepting[cstate])
1521 continue;
1522 /* Next apply generators ($,x) and see if we get anything new, and
1523 * if it is an accept state.
1524 * Anything new is added to the end of the list - we don't need to
1525 * bother about having them in increasing order this time.
1526 */
1527 es = ngens * (ngens + 1) + 1;
1528 ef = (ngens + 1) * (ngens + 1) - 1;
1529 ptr = cs_ptr - 1;
1530 while (++ptr <= cs_ptre) {
1531 tableptr = table[*ptr];
1532 tableptre = table[*ptr + 1];
1533 while (tableptr < tableptre) {
1534 if (*tableptr >= es && *tableptr <= ef) {
1535 csi = *(tableptr + 1);
1536 if (csi == 0)
1537 continue;
1538 if (fsaptr->is_accepting[csi]) {
1539 exists->is_accepting[cstate] = TRUE;
1540 ct++;
1541 break;
1542 }
1543 /* see if csi is new */
1544 ht_chptr = cs_ptr - 1;
1545 got = FALSE;
1546 while (++ht_chptr <= cs_ptre)
1547 if (csi == *ht_chptr) {
1548 got = TRUE;
1549 break;
1550 }
1551 if (!got)
1552 /* add csi to the end */
1553 *(++cs_ptre) = csi;
1554 }
1555 tableptr += 2;
1556 }
1557 if (exists->is_accepting[cstate])
1558 break;
1559 }
1560 }
1561
1562 exists->num_accepting = ct;
1563 if (ct == 1 || ct != ns) {
1564 tmalloc(exists->accepting, int, ct + 1);
1565 ct = 0;
1566 for (i = 1; i <= ns; i++)
1567 if (exists->is_accepting[i])
1568 exists->accepting[++ct] = i;
1569 }
1570 tfree(fsaptr->is_accepting);
1571 tfree(exists->is_accepting);
1572 hash_clear(&ht);
1573 tfree(fsarow);
1574
1575 if (destroy)
1576 fsa_clear(fsaptr);
1577
1578 /* Now read the transition table back in */
1579 tempfile = fopen(tempfilename, "r");
1580 compressed_transitions_read(exists, tempfile);
1581 fclose(tempfile);
1582
1583 unlink(tempfilename);
1584
1585 return exists;
1586 }
1587