1 /* file fsaipmin.c - 12. 12. 94.
2 * This file contains the routines fsa_ip_minimize and fsa_ip_labeled_minimize.
3 * They are based on the minimization functions in fsa.c, but instead of
4 * holding the whole of the transtion table of the fsa to be minimized in
5 * store, they read it in from file with each iteration.
6 * The reading part is as in "compressed_transitions_read".
7 */
8
9 #include <stdlib.h>
10 #include "defs.h"
11 #include "fsa.h"
12 #include "hash.h"
13 #include "externals.h"
14
15 /* Functions defined in this file */
16
17 /* Minimize the fsa *fsaptr, of which transitions are stored externally */
fsa_ip_minimize(fsa * fsaptr)18 int fsa_ip_minimize(fsa *fsaptr)
19 {
20 int *block_numa, *block_numb, *block_swap, i, j, k, l, len, *ptr, *ptr2,
21 *ptr2e, *ht_ptr, ne, ns_orig, *fsarow, **table, ns_final, ns_new,
22 num_iterations;
23 hash_table ht;
24 boolean fixed;
25 FILE *rfile;
26
27 if (fsaptr->table->table_type == SPARSE && fsaptr->table->denserows > 0) {
28 fprintf(stderr, "Sorry: fsa_minimize unavailable for sparse storage with "
29 "dense rows.\n");
30 return -1;
31 }
32
33 if (kbm_print_level >= 3)
34 printf(" #Calling fsa_ip_minimize.\n");
35 if (!fsaptr->flags[DFA]) {
36 fprintf(stderr, "Error: fsa_minimize only applies to DFA's.\n");
37 return -1;
38 }
39 if (!fsaptr->flags[ACCESSIBLE]) {
40 fprintf(stderr, "Error: fsa in fsa_labeled_minimize must be accessible.\n");
41 return -1;
42 }
43
44 ne = fsaptr->alphabet->size;
45 ns_orig = fsaptr->states->size;
46 if (ns_orig == 0) {
47 fsaptr->flags[TRIM] = TRUE;
48 fsaptr->flags[MINIMIZED] = TRUE;
49 return 0;
50 }
51
52 /* First throw away any existing structure on the state-set. */
53 srec_clear(fsaptr->states);
54 fsaptr->states->type = SIMPLE;
55
56 tmalloc(block_numa, int, ns_orig + 1);
57 tmalloc(block_numb, int, ns_orig + 1);
58 /* Start with block_numa equal to the accept/reject division
59 * Remember that state/block number 0 is always failure with no hope.
60 */
61 if (fsaptr->num_accepting == ns_orig) {
62 block_numa[0] = 0;
63 for (i = 1; i <= ns_orig; i++)
64 block_numa[i] = 1;
65 }
66 else {
67 for (i = 0; i <= ns_orig; i++)
68 block_numa[i] = 0;
69 for (i = 1; i <= fsaptr->num_accepting; i++)
70 block_numa[fsaptr->accepting[i]] = 1;
71 }
72
73 fixed = fsaptr->table->table_type == DENSE;
74
75 if (fixed)
76 tmalloc(fsarow, int, ne + 1) else tmalloc(fsarow, int, 2 * ne)
77
78 ns_new = 1;
79 num_iterations = 0;
80 /* The main refinement loop follows. */
81 do {
82 if ((rfile = fopen(fsaptr->table->filename, "r")) == 0) {
83 fprintf(stderr, "#Cannot open file %s.\n", fsaptr->table->filename);
84 return -1;
85 }
86 num_iterations++;
87 ns_final = ns_new;
88 /* Turn off excessive printing at this point */
89 j = kbm_print_level;
90 kbm_print_level = 1;
91 hash_init(&ht, fixed, ne + 1, 0, 0);
92 kbm_print_level = j;
93 if (kbm_print_level >= 3)
94 printf(" #Iterating - number of state categories = %d.\n", ns_new);
95 block_numb[0] = 0;
96 for (i = 1; i <= ns_orig; i++) {
97 /* Insert the encoded form of the transitions from state i into the
98 * hashtable preceded by the current block number of i.
99 */
100 ptr = ht.current_ptr;
101 *ptr = block_numa[i];
102 if (fixed) {
103 fread((void *)(fsarow + 1), sizeof(int), (size_t)ne, rfile);
104 for (j = 1; j <= ne; j++)
105 ptr[j] = block_numa[fsarow[j]];
106 l = ne + 1;
107 }
108 else {
109 fread((void *)&len, sizeof(int), (size_t)1, rfile);
110 if (len > 0)
111 fread((void *)fsarow, sizeof(int), (size_t)len, rfile);
112 l = 0;
113 for (j = 1; j < len; j += 2) {
114 k = block_numa[fsarow[j]];
115 if (k > 0) {
116 ptr[++l] = fsarow[j - 1];
117 ptr[++l] = k;
118 }
119 }
120 if (l > 0 || *ptr > 0)
121 l++;
122 /* For technical reasons, we want the zero record to be empty */
123 }
124 block_numb[i] = hash_locate(&ht, l);
125 }
126 fclose(rfile);
127 ns_new = ht.num_recs;
128 block_swap = block_numa;
129 block_numa = block_numb;
130 block_numb = block_swap;
131 if (ns_new > ns_final)
132 hash_clear(&ht);
133 } while (ns_new > ns_final);
134
135 tfree(fsarow);
136
137 /* At this stage, either ns_final = ns_new, or the fsa has empty accepted
138 * language, ns_new=0 and ns_final=1.
139 */
140
141 fsaptr->flags[TRIM] = TRUE;
142 fsaptr->flags[MINIMIZED] = TRUE;
143
144 if (ns_new == 0) {
145 /* This is the awkward case of no states - always causes problems! */
146 fsaptr->states->size = 0;
147 fsaptr->num_initial = 0;
148 tfree(fsaptr->initial);
149 fsaptr->num_accepting = 0;
150 tfree(fsaptr->accepting);
151 unlink(fsaptr->table->filename);
152 tfree(fsaptr->table->filename);
153 }
154 else {
155 /* Re-define the fsa fields */
156 fsaptr->states->size = ns_final;
157
158 fsaptr->initial[1] = block_numa[fsaptr->initial[1]];
159
160 if (fsaptr->num_accepting == ns_orig) {
161 fsaptr->num_accepting = ns_final;
162 if (ns_final == 1) {
163 tmalloc(fsaptr->accepting, int, 2);
164 fsaptr->accepting[1] = 1;
165 }
166 }
167 else {
168 tmalloc(fsaptr->is_accepting, boolean, ns_final + 1);
169 for (i = 1; i <= ns_final; i++)
170 fsaptr->is_accepting[i] = FALSE;
171 for (i = 1; i <= fsaptr->num_accepting; i++)
172 fsaptr->is_accepting[block_numa[fsaptr->accepting[i]]] = TRUE;
173 fsaptr->num_accepting = 0;
174 for (i = 1; i <= ns_final; i++)
175 if (fsaptr->is_accepting[i])
176 fsaptr->num_accepting++;
177 tfree(fsaptr->accepting);
178 tmalloc(fsaptr->accepting, int, fsaptr->num_accepting + 1);
179 j = 0;
180 for (i = 1; i <= ns_final; i++)
181 if (fsaptr->is_accepting[i])
182 fsaptr->accepting[++j] = i;
183 tfree(fsaptr->is_accepting);
184 }
185
186 /* Finally copy the transition table data from the hash-table back to the
187 * fsa */
188 unlink(fsaptr->table->filename);
189 tfree(fsaptr->table->filename);
190 if (fixed) {
191 fsa_table_init(fsaptr->table, ns_final, ne);
192 table = fsaptr->table->table_data_ptr;
193 for (i = 1; i <= ns_final; i++) {
194 ht_ptr = hash_rec(&ht, i);
195 for (j = 1; j <= ne; j++)
196 table[j][i] = ht_ptr[j];
197 }
198 }
199 else {
200 tmalloc(fsaptr->table->table_data_ptr, int *, ns_final + 2);
201 tmalloc(fsaptr->table->table_data_ptr[0], int, ht.tot_space - ns_final);
202 table = fsaptr->table->table_data_ptr;
203 table[1] = ptr = table[0];
204 for (i = 1; i <= ns_final; i++) {
205 ht_ptr = hash_rec(&ht, i);
206 ptr2 = ht_ptr + 1;
207 ptr2e = ht_ptr + hash_rec_len(&ht, i) - 1;
208 while (ptr2 <= ptr2e)
209 *(ptr++) = *(ptr2++);
210 table[i + 1] = ptr;
211 }
212 }
213 }
214 hash_clear(&ht);
215 tfree(block_numa);
216 tfree(block_numb);
217 if (kbm_print_level >= 3)
218 printf(" #Number of iterations = %d.\n", num_iterations);
219 return 0;
220 }
221
222 /* This is the minimization function for fsa's which misght have more than
223 * two categories of states.
224 * We use the labeled set-record type to identify the categories, so *fsaptr
225 * must have state-set of labeled type.
226 * Minimize the fsa *fsaptr, of which transitions are stored externally.
227 */
fsa_ip_labeled_minimize(fsa * fsaptr)228 int fsa_ip_labeled_minimize(fsa *fsaptr)
229 {
230 int *block_numa, *block_numb, *block_swap, i, j, k, l, len, *ptr, *ptr2,
231 *ptr2e, *ht_ptr, ne, ns_orig, *fsarow, **table, ns_final, ns_new,
232 num_iterations;
233 hash_table ht;
234 boolean fixed, *occurs;
235 FILE *rfile;
236
237 if (fsaptr->table->table_type == SPARSE && fsaptr->table->denserows > 0) {
238 fprintf(stderr, "Sorry: fsa_minimize unavailable for sparse storage with "
239 "dense rows.\n");
240 return -1;
241 }
242 if (kbm_print_level >= 3)
243 printf(" #Calling fsa_ip_labeled_minimize.\n");
244 if (!fsaptr->flags[DFA]) {
245 fprintf(stderr, "Error: fsa_labeled_minimize only applies to DFA's.\n");
246 return -1;
247 }
248 if (!fsaptr->flags[ACCESSIBLE]) {
249 fprintf(stderr, "Error: fsa in fsa_labeled_minimize must be accessible.\n");
250 return -1;
251 }
252
253 if (fsaptr->states->type != LABELED) {
254 fprintf(
255 stderr,
256 "Error: in fsa_labeled_minimize state=set must have type labeled.\n");
257 return -1;
258 }
259
260 ne = fsaptr->alphabet->size;
261 ns_orig = fsaptr->states->size;
262 if (ns_orig == 0)
263 return 0;
264
265 /* Start with block_numa equal to the subdivision defined by the labeling. */
266 tmalloc(occurs, boolean, fsaptr->states->labels->size + 1);
267 for (i = 1; i <= fsaptr->states->labels->size; i++)
268 occurs[i] = FALSE;
269 tmalloc(block_numa, int, ns_orig + 1);
270 tmalloc(block_numb, int, ns_orig + 1);
271 ns_new = 0;
272 block_numa[0] = 0; /* The zero state is always regarded as having label 0 */
273 for (i = 1; i <= ns_orig; i++) {
274 j = fsaptr->states->setToLabels[i];
275 if (j > 0 && !occurs[j]) {
276 occurs[j] = TRUE;
277 ns_new++;
278 }
279 block_numa[i] = j;
280 }
281 tfree(occurs);
282 if (ns_new == 0)
283 ns_new = 1; /* a patch for the case when there are no labels */
284
285 fixed = fsaptr->table->table_type == DENSE;
286
287 if (fixed)
288 tmalloc(fsarow, int, ne + 1) else tmalloc(fsarow, int, 2 * ne)
289
290 num_iterations = 0;
291 /* The main refinement loop follows. */
292 do {
293 if ((rfile = fopen(fsaptr->table->filename, "r")) == 0) {
294 fprintf(stderr, "#Cannot open file %s.\n", fsaptr->table->filename);
295 return -1;
296 }
297 num_iterations++;
298 ns_final = ns_new;
299 /* Turn off excessive printing at this point */
300 j = kbm_print_level;
301 kbm_print_level = 1;
302 hash_init(&ht, fixed, ne + 1, 0, 0);
303 kbm_print_level = j;
304 if (kbm_print_level >= 3)
305 printf(" #Iterating - number of state categories = %d.\n", ns_new);
306 block_numb[0] = 0;
307 for (i = 1; i <= ns_orig; i++) {
308 /* Insert the encoded form of the transitions from state i into the
309 * hashtable preceded by the current block number of i. First read in the
310 * transitions for this row from the file.
311 */
312 ptr = ht.current_ptr;
313 *ptr = block_numa[i];
314 if (fixed) {
315 fread((void *)(fsarow + 1), sizeof(int), (size_t)ne, rfile);
316 for (j = 1; j <= ne; j++)
317 ptr[j] = block_numa[fsarow[j]];
318 l = ne + 1;
319 }
320 else {
321 fread((void *)&len, sizeof(int), (size_t)1, rfile);
322 if (len > 0)
323 fread((void *)fsarow, sizeof(int), (size_t)len, rfile);
324 l = 0;
325 for (j = 1; j < len; j += 2) {
326 k = block_numa[fsarow[j]];
327 if (k > 0) {
328 ptr[++l] = fsarow[j - 1];
329 ptr[++l] = k;
330 }
331 }
332 if (l > 0 || *ptr > 0)
333 l++;
334 /* For technical reasons, we want the zero record to be empty */
335 }
336 block_numb[i] = hash_locate(&ht, l);
337 }
338 fclose(rfile);
339 ns_new = ht.num_recs;
340 block_swap = block_numa;
341 block_numa = block_numb;
342 block_numb = block_swap;
343 if (ns_new > ns_final)
344 hash_clear(&ht);
345 } while (ns_new > ns_final);
346
347 tfree(fsarow);
348
349 /* At this stage, either ns_final = ns_new, or the fsa has empty accepted
350 * language, ns_new=0 and ns_final=1.
351 */
352
353 fsaptr->flags[ACCESSIBLE] = TRUE;
354
355 if (ns_new == 0) {
356 /* This is the awkward case of no states - always causes problems! */
357 fsaptr->states->size = 0;
358 fsaptr->num_initial = 0;
359 tfree(fsaptr->initial);
360 fsaptr->num_accepting = 0;
361 tfree(fsaptr->accepting);
362 unlink(fsaptr->table->filename);
363 tfree(fsaptr->table->filename);
364 }
365 else {
366 /* Re-define the fsa fields */
367 fsaptr->states->size = ns_final;
368
369 fsaptr->initial[1] = block_numa[fsaptr->initial[1]];
370
371 for (i = 1; i <= ns_orig; i++)
372 block_numb[block_numa[i]] = fsaptr->states->setToLabels[i];
373 for (i = 1; i <= ns_final; i++)
374 fsaptr->states->setToLabels[i] = block_numb[i];
375
376 if (fsaptr->num_accepting == ns_orig) {
377 fsaptr->num_accepting = ns_final;
378 if (ns_final == 1) {
379 tmalloc(fsaptr->accepting, int, 2);
380 fsaptr->accepting[1] = 1;
381 }
382 }
383 else {
384 tmalloc(fsaptr->is_accepting, boolean, ns_final + 1);
385 for (i = 1; i <= ns_final; i++)
386 fsaptr->is_accepting[i] = FALSE;
387 for (i = 1; i <= fsaptr->num_accepting; i++)
388 fsaptr->is_accepting[block_numa[fsaptr->accepting[i]]] = TRUE;
389 fsaptr->num_accepting = 0;
390 for (i = 1; i <= ns_final; i++)
391 if (fsaptr->is_accepting[i])
392 fsaptr->num_accepting++;
393 tfree(fsaptr->accepting);
394 tmalloc(fsaptr->accepting, int, fsaptr->num_accepting + 1);
395 j = 0;
396 for (i = 1; i <= ns_final; i++)
397 if (fsaptr->is_accepting[i])
398 fsaptr->accepting[++j] = i;
399 tfree(fsaptr->is_accepting);
400 }
401
402 /* Finally copy the transition table data from the hash-table back to the
403 * fsa */
404 unlink(fsaptr->table->filename);
405 tfree(fsaptr->table->filename);
406 if (fixed) {
407 fsa_table_init(fsaptr->table, ns_final, ne);
408 table = fsaptr->table->table_data_ptr;
409 for (i = 1; i <= ns_final; i++) {
410 ht_ptr = hash_rec(&ht, i);
411 for (j = 1; j <= ne; j++)
412 table[j][i] = ht_ptr[j];
413 }
414 }
415 else {
416 tmalloc(fsaptr->table->table_data_ptr, int *, ns_final + 2);
417 tmalloc(fsaptr->table->table_data_ptr[0], int, ht.tot_space - ns_final);
418 table = fsaptr->table->table_data_ptr;
419 table[1] = ptr = table[0];
420 for (i = 1; i <= ns_final; i++) {
421 ht_ptr = hash_rec(&ht, i);
422 ptr2 = ht_ptr + 1;
423 ptr2e = ht_ptr + hash_rec_len(&ht, i) - 1;
424 while (ptr2 <= ptr2e)
425 *(ptr++) = *(ptr2++);
426 table[i + 1] = ptr;
427 }
428 }
429 }
430 hash_clear(&ht);
431 tfree(block_numa);
432 tfree(block_numb);
433 if (kbm_print_level >= 3)
434 printf(" #Number of iterations = %d.\n", num_iterations);
435 return 0;
436 }
437