1 /* file fsaipmin.c - 12. 12. 94.
2  * This file contains the routines fsa_ip_minimize and fsa_ip_labeled_minimize.
3  * They are based on the minimization functions in fsa.c, but instead of
4  * holding the whole of the transtion table of the fsa to be minimized in
5  * store, they read it in from file with each iteration.
6  * The reading part is as in "compressed_transitions_read".
7  */
8 
9 #include <stdlib.h>
10 #include "defs.h"
11 #include "fsa.h"
12 #include "hash.h"
13 #include "externals.h"
14 
15 /* Functions defined in this file */
16 
17 /* Minimize the fsa *fsaptr, of which transitions are stored externally */
fsa_ip_minimize(fsa * fsaptr)18 int fsa_ip_minimize(fsa *fsaptr)
19 {
20   int *block_numa, *block_numb, *block_swap, i, j, k, l, len, *ptr, *ptr2,
21       *ptr2e, *ht_ptr, ne, ns_orig, *fsarow, **table, ns_final, ns_new,
22       num_iterations;
23   hash_table ht;
24   boolean fixed;
25   FILE *rfile;
26 
27   if (fsaptr->table->table_type == SPARSE && fsaptr->table->denserows > 0) {
28     fprintf(stderr, "Sorry: fsa_minimize unavailable for sparse storage with "
29                     "dense rows.\n");
30     return -1;
31   }
32 
33   if (kbm_print_level >= 3)
34     printf("    #Calling fsa_ip_minimize.\n");
35   if (!fsaptr->flags[DFA]) {
36     fprintf(stderr, "Error: fsa_minimize only applies to DFA's.\n");
37     return -1;
38   }
39   if (!fsaptr->flags[ACCESSIBLE]) {
40     fprintf(stderr, "Error: fsa in fsa_labeled_minimize must be accessible.\n");
41     return -1;
42   }
43 
44   ne = fsaptr->alphabet->size;
45   ns_orig = fsaptr->states->size;
46   if (ns_orig == 0) {
47     fsaptr->flags[TRIM] = TRUE;
48     fsaptr->flags[MINIMIZED] = TRUE;
49     return 0;
50   }
51 
52   /* First throw away any existing structure on the state-set. */
53   srec_clear(fsaptr->states);
54   fsaptr->states->type = SIMPLE;
55 
56   tmalloc(block_numa, int, ns_orig + 1);
57   tmalloc(block_numb, int, ns_orig + 1);
58   /* Start with block_numa equal to the accept/reject division
59    * Remember that state/block number 0 is always failure with no hope.
60    */
61   if (fsaptr->num_accepting == ns_orig) {
62     block_numa[0] = 0;
63     for (i = 1; i <= ns_orig; i++)
64       block_numa[i] = 1;
65   }
66   else {
67     for (i = 0; i <= ns_orig; i++)
68       block_numa[i] = 0;
69     for (i = 1; i <= fsaptr->num_accepting; i++)
70       block_numa[fsaptr->accepting[i]] = 1;
71   }
72 
73   fixed = fsaptr->table->table_type == DENSE;
74 
75   if (fixed)
76     tmalloc(fsarow, int, ne + 1) else tmalloc(fsarow, int, 2 * ne)
77 
78         ns_new = 1;
79   num_iterations = 0;
80   /* The main refinement loop follows. */
81   do {
82     if ((rfile = fopen(fsaptr->table->filename, "r")) == 0) {
83       fprintf(stderr, "#Cannot open file %s.\n", fsaptr->table->filename);
84       return -1;
85     }
86     num_iterations++;
87     ns_final = ns_new;
88     /* Turn off excessive printing at this point */
89     j = kbm_print_level;
90     kbm_print_level = 1;
91     hash_init(&ht, fixed, ne + 1, 0, 0);
92     kbm_print_level = j;
93     if (kbm_print_level >= 3)
94       printf("    #Iterating - number of state categories = %d.\n", ns_new);
95     block_numb[0] = 0;
96     for (i = 1; i <= ns_orig; i++) {
97       /* Insert the encoded form of the transitions from state i into the
98        * hashtable preceded by the current block number of i.
99        */
100       ptr = ht.current_ptr;
101       *ptr = block_numa[i];
102       if (fixed) {
103         fread((void *)(fsarow + 1), sizeof(int), (size_t)ne, rfile);
104         for (j = 1; j <= ne; j++)
105           ptr[j] = block_numa[fsarow[j]];
106         l = ne + 1;
107       }
108       else {
109         fread((void *)&len, sizeof(int), (size_t)1, rfile);
110         if (len > 0)
111           fread((void *)fsarow, sizeof(int), (size_t)len, rfile);
112         l = 0;
113         for (j = 1; j < len; j += 2) {
114           k = block_numa[fsarow[j]];
115           if (k > 0) {
116             ptr[++l] = fsarow[j - 1];
117             ptr[++l] = k;
118           }
119         }
120         if (l > 0 || *ptr > 0)
121           l++;
122         /* For technical reasons, we want the zero record to be empty */
123       }
124       block_numb[i] = hash_locate(&ht, l);
125     }
126     fclose(rfile);
127     ns_new = ht.num_recs;
128     block_swap = block_numa;
129     block_numa = block_numb;
130     block_numb = block_swap;
131     if (ns_new > ns_final)
132       hash_clear(&ht);
133   } while (ns_new > ns_final);
134 
135   tfree(fsarow);
136 
137   /* At this stage, either ns_final = ns_new, or the fsa has empty accepted
138    * language, ns_new=0 and ns_final=1.
139    */
140 
141   fsaptr->flags[TRIM] = TRUE;
142   fsaptr->flags[MINIMIZED] = TRUE;
143 
144   if (ns_new == 0) {
145     /* This is the awkward case of no states - always causes problems! */
146     fsaptr->states->size = 0;
147     fsaptr->num_initial = 0;
148     tfree(fsaptr->initial);
149     fsaptr->num_accepting = 0;
150     tfree(fsaptr->accepting);
151     unlink(fsaptr->table->filename);
152     tfree(fsaptr->table->filename);
153   }
154   else {
155     /* Re-define the fsa fields  */
156     fsaptr->states->size = ns_final;
157 
158     fsaptr->initial[1] = block_numa[fsaptr->initial[1]];
159 
160     if (fsaptr->num_accepting == ns_orig) {
161       fsaptr->num_accepting = ns_final;
162       if (ns_final == 1) {
163         tmalloc(fsaptr->accepting, int, 2);
164         fsaptr->accepting[1] = 1;
165       }
166     }
167     else {
168       tmalloc(fsaptr->is_accepting, boolean, ns_final + 1);
169       for (i = 1; i <= ns_final; i++)
170         fsaptr->is_accepting[i] = FALSE;
171       for (i = 1; i <= fsaptr->num_accepting; i++)
172         fsaptr->is_accepting[block_numa[fsaptr->accepting[i]]] = TRUE;
173       fsaptr->num_accepting = 0;
174       for (i = 1; i <= ns_final; i++)
175         if (fsaptr->is_accepting[i])
176           fsaptr->num_accepting++;
177       tfree(fsaptr->accepting);
178       tmalloc(fsaptr->accepting, int, fsaptr->num_accepting + 1);
179       j = 0;
180       for (i = 1; i <= ns_final; i++)
181         if (fsaptr->is_accepting[i])
182           fsaptr->accepting[++j] = i;
183       tfree(fsaptr->is_accepting);
184     }
185 
186     /* Finally copy the transition table data from the hash-table back to the
187      * fsa */
188     unlink(fsaptr->table->filename);
189     tfree(fsaptr->table->filename);
190     if (fixed) {
191       fsa_table_init(fsaptr->table, ns_final, ne);
192       table = fsaptr->table->table_data_ptr;
193       for (i = 1; i <= ns_final; i++) {
194         ht_ptr = hash_rec(&ht, i);
195         for (j = 1; j <= ne; j++)
196           table[j][i] = ht_ptr[j];
197       }
198     }
199     else {
200       tmalloc(fsaptr->table->table_data_ptr, int *, ns_final + 2);
201       tmalloc(fsaptr->table->table_data_ptr[0], int, ht.tot_space - ns_final);
202       table = fsaptr->table->table_data_ptr;
203       table[1] = ptr = table[0];
204       for (i = 1; i <= ns_final; i++) {
205         ht_ptr = hash_rec(&ht, i);
206         ptr2 = ht_ptr + 1;
207         ptr2e = ht_ptr + hash_rec_len(&ht, i) - 1;
208         while (ptr2 <= ptr2e)
209           *(ptr++) = *(ptr2++);
210         table[i + 1] = ptr;
211       }
212     }
213   }
214   hash_clear(&ht);
215   tfree(block_numa);
216   tfree(block_numb);
217   if (kbm_print_level >= 3)
218     printf("    #Number of iterations = %d.\n", num_iterations);
219   return 0;
220 }
221 
222 /* This is the minimization function for fsa's which misght have more than
223  * two categories of states.
224  * We use the labeled set-record type to identify the categories, so *fsaptr
225  * must have state-set of labeled type.
226  * Minimize the fsa *fsaptr, of which transitions are stored externally.
227  */
fsa_ip_labeled_minimize(fsa * fsaptr)228 int fsa_ip_labeled_minimize(fsa *fsaptr)
229 {
230   int *block_numa, *block_numb, *block_swap, i, j, k, l, len, *ptr, *ptr2,
231       *ptr2e, *ht_ptr, ne, ns_orig, *fsarow, **table, ns_final, ns_new,
232       num_iterations;
233   hash_table ht;
234   boolean fixed, *occurs;
235   FILE *rfile;
236 
237   if (fsaptr->table->table_type == SPARSE && fsaptr->table->denserows > 0) {
238     fprintf(stderr, "Sorry: fsa_minimize unavailable for sparse storage with "
239                     "dense rows.\n");
240     return -1;
241   }
242   if (kbm_print_level >= 3)
243     printf("    #Calling fsa_ip_labeled_minimize.\n");
244   if (!fsaptr->flags[DFA]) {
245     fprintf(stderr, "Error: fsa_labeled_minimize only applies to DFA's.\n");
246     return -1;
247   }
248   if (!fsaptr->flags[ACCESSIBLE]) {
249     fprintf(stderr, "Error: fsa in fsa_labeled_minimize must be accessible.\n");
250     return -1;
251   }
252 
253   if (fsaptr->states->type != LABELED) {
254     fprintf(
255         stderr,
256         "Error: in fsa_labeled_minimize state=set must have type labeled.\n");
257     return -1;
258   }
259 
260   ne = fsaptr->alphabet->size;
261   ns_orig = fsaptr->states->size;
262   if (ns_orig == 0)
263     return 0;
264 
265   /* Start with block_numa equal to the subdivision defined by the labeling.  */
266   tmalloc(occurs, boolean, fsaptr->states->labels->size + 1);
267   for (i = 1; i <= fsaptr->states->labels->size; i++)
268     occurs[i] = FALSE;
269   tmalloc(block_numa, int, ns_orig + 1);
270   tmalloc(block_numb, int, ns_orig + 1);
271   ns_new = 0;
272   block_numa[0] = 0; /* The zero state is always regarded as having label 0 */
273   for (i = 1; i <= ns_orig; i++) {
274     j = fsaptr->states->setToLabels[i];
275     if (j > 0 && !occurs[j]) {
276       occurs[j] = TRUE;
277       ns_new++;
278     }
279     block_numa[i] = j;
280   }
281   tfree(occurs);
282   if (ns_new == 0)
283     ns_new = 1; /* a patch for the case when there are no labels */
284 
285   fixed = fsaptr->table->table_type == DENSE;
286 
287   if (fixed)
288     tmalloc(fsarow, int, ne + 1) else tmalloc(fsarow, int, 2 * ne)
289 
290         num_iterations = 0;
291   /* The main refinement loop follows. */
292   do {
293     if ((rfile = fopen(fsaptr->table->filename, "r")) == 0) {
294       fprintf(stderr, "#Cannot open file %s.\n", fsaptr->table->filename);
295       return -1;
296     }
297     num_iterations++;
298     ns_final = ns_new;
299     /* Turn off excessive printing at this point */
300     j = kbm_print_level;
301     kbm_print_level = 1;
302     hash_init(&ht, fixed, ne + 1, 0, 0);
303     kbm_print_level = j;
304     if (kbm_print_level >= 3)
305       printf("    #Iterating - number of state categories = %d.\n", ns_new);
306     block_numb[0] = 0;
307     for (i = 1; i <= ns_orig; i++) {
308       /* Insert the encoded form of the transitions from state i into the
309        * hashtable preceded by the current block number of i. First read in the
310        * transitions for this row from the file.
311        */
312       ptr = ht.current_ptr;
313       *ptr = block_numa[i];
314       if (fixed) {
315         fread((void *)(fsarow + 1), sizeof(int), (size_t)ne, rfile);
316         for (j = 1; j <= ne; j++)
317           ptr[j] = block_numa[fsarow[j]];
318         l = ne + 1;
319       }
320       else {
321         fread((void *)&len, sizeof(int), (size_t)1, rfile);
322         if (len > 0)
323           fread((void *)fsarow, sizeof(int), (size_t)len, rfile);
324         l = 0;
325         for (j = 1; j < len; j += 2) {
326           k = block_numa[fsarow[j]];
327           if (k > 0) {
328             ptr[++l] = fsarow[j - 1];
329             ptr[++l] = k;
330           }
331         }
332         if (l > 0 || *ptr > 0)
333           l++;
334         /* For technical reasons, we want the zero record to be empty */
335       }
336       block_numb[i] = hash_locate(&ht, l);
337     }
338     fclose(rfile);
339     ns_new = ht.num_recs;
340     block_swap = block_numa;
341     block_numa = block_numb;
342     block_numb = block_swap;
343     if (ns_new > ns_final)
344       hash_clear(&ht);
345   } while (ns_new > ns_final);
346 
347   tfree(fsarow);
348 
349   /* At this stage, either ns_final = ns_new, or the fsa has empty accepted
350    * language, ns_new=0 and ns_final=1.
351    */
352 
353   fsaptr->flags[ACCESSIBLE] = TRUE;
354 
355   if (ns_new == 0) {
356     /* This is the awkward case of no states - always causes problems! */
357     fsaptr->states->size = 0;
358     fsaptr->num_initial = 0;
359     tfree(fsaptr->initial);
360     fsaptr->num_accepting = 0;
361     tfree(fsaptr->accepting);
362     unlink(fsaptr->table->filename);
363     tfree(fsaptr->table->filename);
364   }
365   else {
366     /* Re-define the fsa fields  */
367     fsaptr->states->size = ns_final;
368 
369     fsaptr->initial[1] = block_numa[fsaptr->initial[1]];
370 
371     for (i = 1; i <= ns_orig; i++)
372       block_numb[block_numa[i]] = fsaptr->states->setToLabels[i];
373     for (i = 1; i <= ns_final; i++)
374       fsaptr->states->setToLabels[i] = block_numb[i];
375 
376     if (fsaptr->num_accepting == ns_orig) {
377       fsaptr->num_accepting = ns_final;
378       if (ns_final == 1) {
379         tmalloc(fsaptr->accepting, int, 2);
380         fsaptr->accepting[1] = 1;
381       }
382     }
383     else {
384       tmalloc(fsaptr->is_accepting, boolean, ns_final + 1);
385       for (i = 1; i <= ns_final; i++)
386         fsaptr->is_accepting[i] = FALSE;
387       for (i = 1; i <= fsaptr->num_accepting; i++)
388         fsaptr->is_accepting[block_numa[fsaptr->accepting[i]]] = TRUE;
389       fsaptr->num_accepting = 0;
390       for (i = 1; i <= ns_final; i++)
391         if (fsaptr->is_accepting[i])
392           fsaptr->num_accepting++;
393       tfree(fsaptr->accepting);
394       tmalloc(fsaptr->accepting, int, fsaptr->num_accepting + 1);
395       j = 0;
396       for (i = 1; i <= ns_final; i++)
397         if (fsaptr->is_accepting[i])
398           fsaptr->accepting[++j] = i;
399       tfree(fsaptr->is_accepting);
400     }
401 
402     /* Finally copy the transition table data from the hash-table back to the
403      * fsa */
404     unlink(fsaptr->table->filename);
405     tfree(fsaptr->table->filename);
406     if (fixed) {
407       fsa_table_init(fsaptr->table, ns_final, ne);
408       table = fsaptr->table->table_data_ptr;
409       for (i = 1; i <= ns_final; i++) {
410         ht_ptr = hash_rec(&ht, i);
411         for (j = 1; j <= ne; j++)
412           table[j][i] = ht_ptr[j];
413       }
414     }
415     else {
416       tmalloc(fsaptr->table->table_data_ptr, int *, ns_final + 2);
417       tmalloc(fsaptr->table->table_data_ptr[0], int, ht.tot_space - ns_final);
418       table = fsaptr->table->table_data_ptr;
419       table[1] = ptr = table[0];
420       for (i = 1; i <= ns_final; i++) {
421         ht_ptr = hash_rec(&ht, i);
422         ptr2 = ht_ptr + 1;
423         ptr2e = ht_ptr + hash_rec_len(&ht, i) - 1;
424         while (ptr2 <= ptr2e)
425           *(ptr++) = *(ptr2++);
426         table[i + 1] = ptr;
427       }
428     }
429   }
430   hash_clear(&ht);
431   tfree(block_numa);
432   tfree(block_numb);
433   if (kbm_print_level >= 3)
434     printf("    #Number of iterations = %d.\n", num_iterations);
435   return 0;
436 }
437