1 /* $Id$
2 // Copyright (c) 2001,2002                        RIPE NCC
3 //
4 // All Rights Reserved
5 //
6 // Permission to use, copy, modify, and distribute this software and its
7 // documentation for any purpose and without fee is hereby granted,
8 // provided that the above copyright notice appear in all copies and that
9 // both that copyright notice and this permission notice appear in
10 // supporting documentation, and that the name of the author not be
11 // used in advertising or publicity pertaining to distribution of the
12 // software without specific, written prior permission.
13 //
14 // THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
15 // ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS; IN NO EVENT SHALL
16 // AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
17 // DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
18 // AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
19 // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 //
21  */
22 /*
23  * RSd release 0.1:
24  * Copyright (c) 1995 University of Southern California and/or
25  * the International Business Machines Corporation.
26  * All rights reserved.
27 //    Permission is hereby granted, free of charge, to any person obtaining a copy
28 //    of this software and associated documentation files (the "Software"), to deal
29 //    in the Software without restriction, including without limitation the rights
30 //    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
31 //    copies of the Software, and to permit persons to whom the Software is
32 //    furnished to do so, subject to the following conditions:
33 //
34 //    The above copyright notice and this permission notice shall be included in
35 //    all copies or substantial portions of the Software.
36 //
37 //    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38 //    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
39 //    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
40 //    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
41 //    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
42 //    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
43 //    THE SOFTWARE.
44  */
45 
46 #ifndef RE2DFA
47 #define RE2DFA
48 
49 #include "config.h"
50 
51 #define MAX_AS ~(unsigned int)0-1
52 
53 /*
54  * The following is a compile time limit on the number of NFA states
55  * in an AS path regular expression. We use the fact that most expressions
56  * of interest compile to an NFA with a small number of states. We assign
57  * each NFA state a bit in a bit mask; then we can represent the target
58  * states of an NFA transition as a bitmap.
59  */
60 
61 /* Change this in the compile options file */
62 #ifndef RD_MAXASPSTATES
63 //#define RD_MAXASPSTATES 64		/* Multiples of bytes */
64 #define RD_MAXASPSTATES 8000		/* Multiples of bytes */
65 #endif /* RD_MAXASPSTATES */
66 
67 #define RD_NBBY 8		/* Number of bits per byte */
68 #define RD_ASPSIZE	(RD_MAXASPSTATES/RD_NBBY)
69 #define STATES_MASK(x)	unsigned char (x)[RD_ASPSIZE]
70 
71 /*
72  * Data structures for converting from regular expressions to DFA.
73  * We use doubly linked lists for the list of states, arcs and final
74  * states, since we frequently perform element insertion/deletion
75  * and list splicing.
76  */
77 
78 /*
79  * Doubly linked list data structure for all lists.
80  */
81 
82 typedef struct _rd_dq {
83     struct _rd_dq	*rq_forw;	/* Pointer to forw element */
84     struct _rd_dq	*rq_back;	/* Pointer to back element */
85     void		*rq_self;	/* Pointer to encaps data struct */
86 } rd_dq;
87 
88 /*
89  * The basic finite machine data structure. It contains a singly
90  * linked list of states, and a linked list of start and final states.
91  * This structure is used to represent both the NFA and the DFA.
92  */
93 
94 typedef struct _rd_fm {
95     struct _rd_state 	*rf_start;	/* Start state of FM */
96     rd_dq 		rf_states;	/* States of the FM */
97     rd_dq		rf_final;	/* NFA's final states - unused in DFA */
98 
99    /* Cengiz added these fields to support ^AS1 and AS1$ like expressions */
100     char                bolexp;         /* if set m is starts with ^ */
101     char                eolexp;         /* if set m is ends in $ */
102 } rd_fm;
103 
104 /*
105  * Each state itself contains a linked list of the arcs that
106  * emanate from it. It also contains linked list anchor structs.
107  *
108  * Notes:
109  *	The rs_bit field is used in three different ways:
110  *		- for an NFA state, it denotes the assigned bit number.
111  *		- for a un-minimized DFA, it is used to number states to
112  *			implement Myhill-Nerode
113  *		- used for printing DFA.
114  *	The rs_flags field is used in two different ways:
115  *		- during Myhill-Nerode, it marks delete-able states.
116  *		- in the final DFA, it marks final and reject states.
117  */
118 
119 typedef struct _rd_state {
120     rd_dq		rs_arcs;	/* Arcs out of this state */
121     rd_dq		rs_states;	/* Queue block for states */
122     rd_dq		rs_final;	/* Queue block for final states */
123     union {
124 	struct {
125 	    unsigned short rsus_bit;	/* See Notes above */
126 	    unsigned short rsus_flags;	/* See Notes above */
127 	} rsu_s;
128 	STATES_MASK(rsu_mask);		/* If DFA state, which NFA state set */
129     } rs_rsu;
130 
131 #define rs_bit rs_rsu.rsu_s.rsus_bit
132 #define rs_flags rs_rsu.rsu_s.rsus_flags
133 #define rs_mask rs_rsu.rsu_mask
134 
135    /* Cengiz added this field to make rd_duplicate_dfa more efficient */
136    struct _rd_state *new_address;
137 } rd_state;
138 
139 /*
140  * Possible flags for state.
141  */
142 
143 #define RSF_DELETE	0x1		/* This state can be deleted */
144 #define RSF_FINAL	0x2		/* Final state */
145 #define RSF_REJECT	0x4		/* Reject state */
146 #define RSF_MARKED	0x8		/* marked as reachable */
147 
148 /*
149  * Each arc contains the AS number range for which it is to be
150  * traversed and a pointer to the state at which the arc terminates.
151  *
152  * Note: ra_low and ra_high MUST be unsigned int (we use this fact in
153  * rd_complete_ranges and rd_complete_arcs).
154  */
155 
156 typedef struct _rd_arc {
157     unsigned int	ra_low;		/* Low end of range */
158     unsigned int	ra_high;	/* High end of range */
159     rd_dq		ra_arcs;	/* Queue block for arcs */
160     union {
161 	struct _rd_state	*rau_to;	/* target state of DFA arc */
162 	STATES_MASK(rau_mask);			/* target states of NFA arc */
163     } ra_rau;
164 
165 #define ra_to 		ra_rau.rau_to
166 #define ra_mask 	ra_rau.rau_mask
167 } rd_arc;
168 
169 /*
170  * Data structures used for Myhill-Nerode minimization.
171  */
172 
173 /*
174  * This is a single element of the 2-dimensional array. This contains
175  * a bit to indicate if the corresponding pair of states is distinguishable
176  * (in the Myhill-Nerode sense) and a pointer to a list of state pairs
177  * whose fate is tied to this pair of states.
178  */
179 
180 typedef struct _rd_notsame {
181     int			rt_mark;	/* 1 if pair is distinguishable */
182     struct _rd_pair	*rt_pair;	/* List of state pairs */
183 } rd_notsame;
184 
185 /*
186  * This struct contains pairs of states in the rt_pair linked list.
187  * During construction of this list, we maintain the invariant that
188  * rp_first's bit number is always less than that of rp_second.
189  */
190 
191 typedef struct _rd_pair {
192     struct _rd_state	*rp_first;	/* Lower numbered state in pair */
193     struct _rd_state	*rp_second;	/* Higher numbered state in pair */
194     struct _rd_pair	*rp_next;	/* Next in list of states */
195 } rd_pair;
196 
197 /*
198  * Externs.
199  */
200 
201 extern void rd_init();
202 
203 extern rd_arc *rd_alloc_range(unsigned int, unsigned int);
204 extern rd_dq  *rd_alloc_range_list(rd_arc *);
205 extern rd_dq  *rd_alloc_range_list_empty();
206 extern void rd_complement_range_list(rd_dq *);
207 extern void rd_insert_range(rd_dq *, rd_arc *);
208 
209 extern rd_fm *rd_singleton(rd_dq *);
210 extern rd_fm *rd_alternate(rd_fm *, rd_fm *);
211 extern rd_fm *rd_concatenate(rd_fm *, rd_fm *);
212 extern rd_fm *rd_zero_or_more(rd_fm *);
213 extern rd_fm *rd_one_or_more(rd_fm *);
214 extern rd_fm *rd_zero_or_one(rd_fm *);
215 
216 extern void rd_minimize(rd_fm *);
217 extern rd_fm *rd_ntod(rd_fm *);
218 
219 extern void rd_print_nfa(rd_fm *);
220 extern void rd_print_dfa(rd_fm *);
221 
222 /*
223  * Macros for doubly-linked list insertion, deletion and other operations.
224  * For uniformity, ALL queue operations take rd_dq pointers as arguments.
225  */
226 
227 #define RDQ_SET_EMPTY(q)  						\
228 {									\
229     (q)->rq_forw = ((q)->rq_back = (q)); 				\
230 }
231 
232 #define RDQ_ISEMPTY(q)	  						\
233     (((q)->rq_forw == (q)) && ((q)->rq_back == (q)))
234 
235 #define RDQ_INIT(q, r)	{						\
236     RDQ_SET_EMPTY((q));							\
237     (q)->rq_self = (r);							\
238 }
239 
240 #define RDQ_INSERT_AFTER(q, e) {					\
241     (e)->rq_forw = (q)->rq_forw;					\
242     (e)->rq_back = (q);							\
243     (q)->rq_forw->rq_back = (e);					\
244     (q)->rq_forw = (e);							\
245 }
246 
247 #define RDQ_INSERT_BEFORE(q, e) {					\
248     (e)->rq_back = (q)->rq_back;					\
249     (e)->rq_forw = (q);							\
250     (q)->rq_back->rq_forw = (e);					\
251     (q)->rq_back = (e);							\
252 }
253 
254 #define RDQ_UNLINK(q) {							\
255     (q)->rq_forw->rq_back = (q)->rq_back;				\
256     (q)->rq_back->rq_forw = (q)->rq_forw;				\
257     RDQ_SET_EMPTY((q));							\
258 }
259 
260 #define RDQ_UNLINK_LIST(h,j) {						\
261     rd_dq *XXrq;							\
262     XXrq = (h)->rq_forw;						\
263     while (XXrq->rq_self != (j)) {					\
264         RDQ_UNLINK(XXrq);						\
265         XXrq = (h)->rq_forw;						\
266     }									\
267 }
268 
269 #define RDQ_UNLINK_LIST_FREE_ELMS(h,j) {				\
270     rd_dq *XXrq;							\
271     XXrq = (h)->rq_forw;						\
272     while (XXrq->rq_self != (j)) {	                                \
273         RDQ_UNLINK(XXrq);                 				\
274 	free(XXrq->rq_self);                                            \
275         XXrq = (h)->rq_forw;						\
276     }									\
277 }
278 
279 /*
280  * Splices two lists together, whose list heads are given by q1 and q2.
281  * The resulting head is q1, and q2 is returned as the empty list.
282  */
283 
284 #define RDQ_SPLICE(q1, q2) {						\
285     if (!RDQ_ISEMPTY(q2)) {						\
286         (q2)->rq_forw->rq_back = (q1)->rq_back;				\
287         (q1)->rq_back->rq_forw = (q2)->rq_forw;				\
288         (q2)->rq_back->rq_forw = (q1);					\
289         (q1)->rq_back = (q2)->rq_back;					\
290         RDQ_SET_EMPTY((q2));						\
291     }									\
292 }
293 
294 /*
295  * Macros for looping through a doubly linked list, starting from the head.
296  * Given a head, element pointer and an element type.
297  */
298 
299 #define RDQ_LIST_START(h,j,e,t)	{					\
300     rd_dq *XXrq;							\
301     for (XXrq = (h)->rq_forw; XXrq->rq_self != (j); XXrq = XXrq->rq_forw) {\
302         (e) = (t *) XXrq->rq_self;					\
303 
304 #define RDQ_LIST_END(h,j,e,t)	} }
305 
306 /*
307  * Sets the element "e" to the head of the list, or NULL if the list
308  * is empty. This is used to create while loops when using RDQ_LIST_START
309  * may not be appropriate (e.g. when we need to do some processing on
310  * a list element and then delete each element of the list).
311  */
312 
313 #define RDQ_LIST_HEAD(q,e,t)						\
314     ((e) = (RDQ_ISEMPTY(q)) ? NULL : (t *) ((q)->rq_forw->rq_self))
315 
316 
317 
318 /*
319  * Macros for machine related predicates.
320  */
321 
322 #define RD_IS_FINAL(s)	      					\
323     (!RDQ_ISEMPTY(&((s)->rs_final)))
324 
325 #define RD_ACCEPTS_EMPTY_STRING(f)				\
326     (RD_IS_FINAL((f)->rf_start))
327 
328 
329 
330 
331 /*
332  * Cengiz Alaettinoglu's additions
333  */
334 
335 extern int rd_isEmpty_dfa(rd_fm *fm);
336 extern int rd_is_universal_dfa(rd_fm *fm);
337 extern rd_fm *rd_empty_string_machine();
338 extern rd_fm *rd_empty_set_machine();
339 extern rd_fm *rd_empty_set_dfa();
340 extern rd_fm *rd_duplicate_dfa(rd_fm *fm);
341 extern void rd_complement_dfa(rd_fm *fm);
342 extern void rd_free_nfa(rd_fm *fm); /* works for dfa and nfa */
343 #define rd_free_dfa rd_free_nfa
344 extern rd_fm *rd_intersect_dfa(rd_fm *fm1, rd_fm *fm2);
345 extern int rd_equal_dfa(rd_fm *fm1, rd_fm *fm2);
346 extern void rd_dton(rd_fm *fm);
347 extern rd_fm *rd_make_bol(rd_fm *fm);
348 extern rd_fm *rd_make_eol(rd_fm *fm);
349 
350 #endif /* RE2DFA */
351