1 /* file fsa.h  16.9.94.
2  * This header file for finite state automata contains the definitions
3  * of the fsa (finite state automaton) and srec (set record) structures.
4  * and macros for accessing and setting entries in the transition table.
5  *
6  * 9/1/98.  New version to allow more than 127 generators.
7  *          Generators have type `gen' instead of char, where here we
8  *          define `gen' to be unsigned short.
9  * 2/10/97. Added the compact storage type for fsa tables.
10  * 16.8.96. Added field comment_state_numbers to table_struc structure.
11  * 7. 5.96. Added srec_type "list of integers" (LISTOFINTS)
12  * 13.9.95. Added srec_type "list of words" (LISTOFWORDS)
13  * 25.7.95. Added type "MIDFA" of fsa - this is a fsa that would be
14  *	deterministic if it were not for the fact that it may have
15  *	multiple initial states. Used for word-difference machines in
16  *      coset automatic groups.
17  *	Can still have densely stored table.
18  * 5. 12. 94. - added sparse table storage with some rows stored densely.
19  *            - designed macros for all accessing of table entries.
20  * 25. 11. 94. - added filename component to transition table structure,
21  *               for possible external storage of table (probably won't be
22  *               used in the end).
23  * 17.11.94. - implemented labeled format
24  */
25 
26 #ifndef KBMAG_FSA_H
27 #define KBMAG_FSA_H
28 
29 typedef enum {
30   SIMPLE,
31   IDENTIFIERS,
32   WORDS,
33   LISTOFWORDS,
34   LISTOFINTS,
35   STRINGS,
36   LABELED,
37   PRODUCT
38 } srec_type;
39 extern char *kbm_type_names[];
40 typedef enum { DENSE, SPARSE, COMPACT } storage_type;
41 typedef enum {
42   DFA,
43   NFA,
44   MIDFA,
45   MINIMIZED,
46   BFS,
47   ACCESSIBLE,
48   TRIM,
49   RWS
50 } kbm_flag_strings;
51 #define num_kbm_flag_strings 8
52 extern char *kbm_flag_names[];
53 /* definition of kbm_type_names and kbm_flag_names in fsa.c */
54 
55 typedef short setToLabelsType;
56 /* for srec field member - may wish to change this */
57 typedef struct srec {
58   /* This mirrors the set record type defined in the GAP format.
59    * We won't require the `format' field here, since names will always be
60    * stored as a simple list.
61    * In the `word' type, the names will be stored as strings of `gen's,
62    * and the base component should be of type identifiers, where number i
63    * in the string corresponds to the name of identifier number i in the
64    * base.names list.
65    * A list of integer is stored as a simple list preceded by its length.
66    */
67   srec_type type;
68   int size;
69   char **names;     /* Used with identifiers or strings */
70   gen **words;      /* The type gen is used with words  */
71   gen ***wordslist; /* For list of words type  - if wordslist[i] contains
72                      * n words, these will be stored in wordslist[i][j]
73                      * for 0<=j<n, with wordslist[i][n] set to 0.
74                      */
75   int **intlist;    /* For list of integers type - if intlist[i] is a list of
76                      * length n, then the integers will be stored as intlist[i][j]
77                      * for 1<=j<=n, with intlist[i][0]=n.
78                      */
79   int arity;        /* for product type */
80   char padding; /* for product type - the printing char. for padding symbol */
81   char *alphabet[MAXGEN + 1];   /* names of base-alphabet for words */
82   int alphabet_size;            /* length of base-alphabet for words */
83   struct srec *base;            /* for product type */
84   struct srec *labels;          /* for labeled type */
85   setToLabelsType *setToLabels; /* for labeled type */
86 } srec;
87 
88 typedef struct {
89   /* This is used to store the transition table of a finite state automaton.
90    * The data structure depends on the table_type.
91    * Currently, there are two main types, dense and sparse
92    * dense is the simplest and fastest - entries are stored in a 2-dimensional
93    * array structure, with rows corresponding to generators. It can be
94    * expensive in terms of space.
95    * Dense format can only be used for deterministic machines (the
96    * dense nondeterministic type is not yet implemented).
97    * In sparse storage, the transitions from a given state are stored in
98    * a sequence of memory locations, with each transition taking up two
99    * locations, the first a generator and the second the target of the
100    * transition from  the given state with that generator as label.
101    * There is a pointer to this sequence of locations for each state.
102    * sparse format is not suitable if new transitions need to be added
103    * from a given state once the structure has been set up.
104    * If the storage type is sparse, but the "dense_rows" field is a positive
105    * integer n, then the transitions for the first n states are stord in dense
106    * format, and the remainder in sparse format. This compromise hopefully
107    * allows reasonably fast access without using too much space - this is based
108    * on the assumption that there will be most transitions for the states with
109    * the smallest numbers. The default target mechanism is not yet implemented -
110    * if read from an input file, the information will be inserted into the
111    * table. and dense format will be used. If the filename pointer is nonzero,
112    * then the transition table is not present in the file at all - it is stored
113    * externally in unformatted form in the file *filename (possibly with other
114    * information). 2/10/97 - a third storage type, compact is added. This is as
115    * in sparse, but each (generator,state) pair is stored in a single 4-byte
116    * word, with the generator stored in the first byte. (So we have a maximum of
117    * 256 edge-label.)
118    */
119   char *filename;
120   storage_type table_type;
121   storage_type printing_format;
122   /* This specifies the format for printing, which may not be the same as
123    * that for storing - again dense only applicable to dfa's.
124    */
125   boolean comment_state_numbers;
126   /* If this is set true, then when printing the table, the entry
127    * corresponding to each state is followed by a comment giving the
128    * number of that state. This is to increase readability of the table.
129    */
130   int numTransitions; /* The number of nonzero transitions in the table.
131                        * Used mainly when reading in.
132                        */
133   int maxstates;      /* The maximum number of states for which space has been
134                        * allocated
135                        * Only really meaningful for dense storage type.
136                        */
137   int denserows; /* Only applies in sparse storage type - the number of states
138                   * for which the transitions are store densely.
139                   */
140   int **table_data_ptr;
141   /* This field is used for setting and accessing the transitions.
142    * Its usage will depend on the storage type.
143    * Let k be the target of the transition from state i with letter j.
144    * For type dense, k will be accessed as table_data_ptr[j][i].
145    * For type sparse, table_data_ptr[i] points to a location in table_data
146    * which is the beginning of the sequence of transitions from state i.
147    * NOTE THIS FUNDAMENTAL DIFFERENCE - for sparse type, the 'i' is  a
148    * state number, but for dense type, it would be an edge number.
149    * In sparse type, for i<=denserows, table_data_ptr[i][j] = k.
150    *
151    * In all cases, table_data_ptr[0] will point at the beginning of the
152    * data.
153    */
154   int ***table_data_dptr;
155   /* table_data_dptr  is only used for dense storage of 2-variable
156    * automata, when it is defined to make table_data_dptr[i1][i2][j]
157    * the target of the transition from the state j with edge-label
158    * the pair (i1,i2) - this is purely for speed of access.
159    * NOTE This last field should be set whenever required using the
160    * function fsa_table_dptr_init().
161    */
162   unsigned int **ctable_data_ptr;
163   /* For the compact form of data storage */
164 } table_struc;
165 
166 typedef struct {
167   /* This mirrors the finite state automaton type in the GAP format.
168    */
169   srec *states;
170   srec *alphabet;
171   int num_initial; /* The number of initial states.
172                     * The list itself is set whenever this is nonzero.
173                     */
174   int *initial;
175   boolean *is_initial; /* this array is only set when required.
176                         * It should be deallocated immediately after use.
177                         */
178   int num_accepting;   /* The number os accepting states. If this number is
179                         * equal to 0 or states->size>1, then the list itself
180                         * accepting need not be set.
181                         */
182   int *accepting;
183   boolean *is_accepting;  /* this array is only set when required.
184                            * It should be deallocated immediately after use.
185                            */
186   boolean *is_accessible; /* this array is only set when required.
187                            * It should be deallocated immediately after use.
188                            */
189   boolean flags[num_kbm_flag_strings];
190   /* set to TRUE if corresponding flag is set.
191    * the entries correspond to flag_list
192    */
193   table_struc *table;
194 } fsa;
195 
196 
197 /* The following macros should be used for accessing and setting transition
198  * targets outside of the file fsa.c.
199  * They have been designed to provide a stable access, whilst still being
200  * reasonably fast - because fast access of table entries is extremely
201  * time-critical in many applications.
202  * "dense" is intended to be a boolean set to true if the storage type of
203  * the table is dense, and false if sparse.
204  * "table" should be the table_data_ptr field in either case.
205  * "denserows" should be set equal to the denserows component of the table.
206  * "e" is the edge-number and "s" the state number for which the target
207  * state is required.
208  * For sparse format, the function sparse_target (in fsa.c) is called.
209  * Use dense_target when the storage type is known to be dense.
210  */
211 #define target(dense, table, e, s, denserows)                                  \
212   (dense ? table[e][s]                                                         \
213          : s <= denserows ? table[s][e]                                        \
214                           : sparse_target(e, table[s], table[s + 1]))
215 #define dense_target(table, e, s) (table[e][s])
216 #define set_dense_target(table, e, s, t) (table[e][s] = t)
217 
218 /* The next two are for accessing and setting targets for 2-variable
219  * fsa's stored in dense type.
220  * "dtable" should be set equal to the table_data_dptr field.
221  */
222 #define dense_dtarget(dtable, g1, g2, s) (dtable[g1][g2][s])
223 #define set_dense_dtarget(dtable, g1, g2, s, t) (dtable[g1][g2][s] = t)
224 
225 
226 /* function prototypes */
227 extern fsa *fsa_and_not(fsa *fsaptr1, fsa *fsaptr2, storage_type op_table_type,
228                         boolean destroy, char *tempfilename);
229 extern fsa *fsa_and(fsa *fsaptr1, fsa *fsaptr2, storage_type op_table_type,
230                     boolean destroy, char *tempfilename);
231 extern fsa *fsa_concat(fsa *fsaptr1, fsa *fsaptr2, boolean destroy);
232 extern fsa *fsa_exists(fsa *fsaptr, storage_type op_table_type, boolean destroy,
233                        char *tempfilename);
234 extern fsa *fsa_laband(fsa *fsaptr1, fsa *fsaptr2, storage_type op_table_type,
235                        boolean destroy, char *tempfilename);
236 extern fsa *fsa_not(fsa *fsaptr, storage_type op_table_type);
237 extern fsa *fsa_or(fsa *fsaptr1, fsa *fsaptr2, storage_type op_table_type,
238                    boolean destroy, char *tempfilename);
239 extern fsa *fsa_star(fsa *fsaptr, boolean destroy);
240 
241 
242 extern fsa *fsa_composite(fsa *mult1ptr, fsa *mult2ptr,
243                           storage_type op_table_type, boolean destroy,
244                           char *compfilename, boolean readback);
245 extern fsa *fsa_genmult2(fsa *genmultptr, storage_type op_table_type,
246                          boolean destroy, char *genmult2filename,
247                          boolean readback);
248 extern fsa *fsa_geopairs(fsa *waptr, fsa *diffptr, storage_type op_table_type,
249                          boolean destroy, char *tempfilename, boolean readback);
250 extern fsa *fsa_micomposite(fsa *mult1ptr, fsa *mult2ptr,
251                             storage_type op_table_type, boolean destroy,
252                             char *compfilename, boolean readback);
253 extern fsa *fsa_migm2(fsa *migmptr, storage_type op_table_type, boolean destroy,
254                       char *migm2filename, boolean readback, char *prefix);
255 extern fsa *fsa_minkb(fsa *minredptr, fsa *waptr, fsa *diffptr,
256                       storage_type op_table_type, boolean destroy,
257                       char *tempfilename);
258 extern fsa *fsa_minred(fsa *waptr, storage_type op_table_type, boolean destroy,
259                        char *tempfilename);
260 extern fsa *fsa_mireverse(fsa *fsaptr, storage_type op_table_type,
261                           boolean destroy, char *tempfilename);
262 extern fsa *fsa_reverse(fsa *fsaptr, storage_type op_table_type,
263                         boolean destroy, boolean subsets, char *tempfilename);
264 extern fsa *fsa_submult(fsa *subwaptr, fsa *multptr, storage_type op_table_type,
265                         boolean destroy, char *tempfilename, boolean readback);
266 extern fsa *fsa_wa_cos(fsa *fsaptr, storage_type op_table_type, boolean destroy,
267                        char *tempfilename);
268 extern fsa *fsa_wa(fsa *fsaptr, storage_type op_table_type, boolean destroy,
269                    char *tempfilename);
270 extern fsa *midfa_determinize(fsa *fsaptr, storage_type op_table_type,
271                               boolean destroy, char *tempfilename);
272 extern fsa *migm_determinize(fsa *migmptr, storage_type op_table_type,
273                              boolean destroy, char *tempfilename);
274 extern fsa *nfa_determinize(fsa *fsaptr, storage_type op_table_type,
275                             boolean eps_trans, boolean destroy, boolean subsets,
276                             char *tempfilename);
277 
278 extern boolean fsa_equal(fsa *fsaptr1, fsa *fsaptr2);
279 
280 extern int fsa_bfs(fsa *fsaptr);
281 extern int fsa_clear_rws(fsa *fsaptr);
282 extern int fsa_count(fsa *fsaptr);
283 extern int fsa_delete_state(fsa *fsaptr, int stateno);
284 extern int fsa_enumerate(FILE *wfile, fsa *fsaptr, int min, int max,
285                          boolean putcomma, int stateno);
286 extern int fsa_growth(FILE *wfile, fsa *fsaptr, unsigned *primes, char *var);
287 extern int fsa_ip_labeled_minimize(fsa *fsaptr);
288 extern int fsa_ip_minimize(fsa *fsaptr);
289 extern int fsa_labeled_minimize(fsa *fsaptr);
290 extern int fsa_minimize(fsa *fsaptr);
291 extern int fsa_swap_coords(fsa *fsaptr);
292 extern int fsa_table_dptr_init(fsa *fsaptr);
293 
294 extern void fsa_clear(fsa *fsaptr);
295 extern void fsa_copy(fsa *fsaptr1, fsa *fsaptr2);
296 extern void fsa_init(fsa *fsaptr);
297 extern void fsa_print(FILE *wfile, fsa *fsaptr, char *name);
298 extern void fsa_read(FILE *rfile, fsa *fsaptr, storage_type table_storage_type,
299                      int dr, int maxstates, boolean assignment, char *name);
300 extern void fsa_set_is_accepting(fsa *fsaptr);
301 extern void fsa_set_is_accessible(fsa *fsaptr);
302 extern void fsa_set_is_initial(fsa *fsaptr);
303 extern void fsa_table_init(table_struc *tableptr, int maxstates, int ne);
304 
305 extern int words_and_not(fsa *fsaptr1, fsa *fsaptr2, gen **words, int maxwords);
306 
307 extern int sparse_target(int g, int *p1, int *p2);
308 
309 int diff_no(fsa *wd_fsaptr, gen *w);
310 
311 void compressed_transitions_read(fsa *fsaptr, FILE *rfile);
312 
313 int fsa_makemult(fsa *genmultptr, int g);
314 int fsa_makemult2(fsa *genmult2ptr, int g1, int g2);
315 
316 int fsa_mimakemult(fsa *migmptr, int g, char *prefix);
317 int fsa_mimakemult2(fsa *migm2ptr, int g1, int g2, char *prefix);
318 
319 int mimult_minimize(fsa *fsaptr);
320 
321 #endif
322