1 /* $Id$ 2 // Copyright (c) 2001,2002 RIPE NCC 3 // 4 // All Rights Reserved 5 // 6 // Permission to use, copy, modify, and distribute this software and its 7 // documentation for any purpose and without fee is hereby granted, 8 // provided that the above copyright notice appear in all copies and that 9 // both that copyright notice and this permission notice appear in 10 // supporting documentation, and that the name of the author not be 11 // used in advertising or publicity pertaining to distribution of the 12 // software without specific, written prior permission. 13 // 14 // THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING 15 // ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS; IN NO EVENT SHALL 16 // AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 17 // DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 18 // AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 19 // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 20 // 21 */ 22 /* 23 * RSd release 0.1: 24 * Copyright (c) 1995 University of Southern California and/or 25 * the International Business Machines Corporation. 26 * All rights reserved. 27 // Permission is hereby granted, free of charge, to any person obtaining a copy 28 // of this software and associated documentation files (the "Software"), to deal 29 // in the Software without restriction, including without limitation the rights 30 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 31 // copies of the Software, and to permit persons to whom the Software is 32 // furnished to do so, subject to the following conditions: 33 // 34 // The above copyright notice and this permission notice shall be included in 35 // all copies or substantial portions of the Software. 36 // 37 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 38 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 39 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 40 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 41 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 42 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 43 // THE SOFTWARE. 44 */ 45 46 #ifndef RE2DFA 47 #define RE2DFA 48 49 #include "config.h" 50 51 #define MAX_AS ~(unsigned int)0-1 52 53 /* 54 * The following is a compile time limit on the number of NFA states 55 * in an AS path regular expression. We use the fact that most expressions 56 * of interest compile to an NFA with a small number of states. We assign 57 * each NFA state a bit in a bit mask; then we can represent the target 58 * states of an NFA transition as a bitmap. 59 */ 60 61 /* Change this in the compile options file */ 62 #ifndef RD_MAXASPSTATES 63 //#define RD_MAXASPSTATES 64 /* Multiples of bytes */ 64 #define RD_MAXASPSTATES 8000 /* Multiples of bytes */ 65 #endif /* RD_MAXASPSTATES */ 66 67 #define RD_NBBY 8 /* Number of bits per byte */ 68 #define RD_ASPSIZE (RD_MAXASPSTATES/RD_NBBY) 69 #define STATES_MASK(x) unsigned char (x)[RD_ASPSIZE] 70 71 /* 72 * Data structures for converting from regular expressions to DFA. 73 * We use doubly linked lists for the list of states, arcs and final 74 * states, since we frequently perform element insertion/deletion 75 * and list splicing. 76 */ 77 78 /* 79 * Doubly linked list data structure for all lists. 80 */ 81 82 typedef struct _rd_dq { 83 struct _rd_dq *rq_forw; /* Pointer to forw element */ 84 struct _rd_dq *rq_back; /* Pointer to back element */ 85 void *rq_self; /* Pointer to encaps data struct */ 86 } rd_dq; 87 88 /* 89 * The basic finite machine data structure. It contains a singly 90 * linked list of states, and a linked list of start and final states. 91 * This structure is used to represent both the NFA and the DFA. 92 */ 93 94 typedef struct _rd_fm { 95 struct _rd_state *rf_start; /* Start state of FM */ 96 rd_dq rf_states; /* States of the FM */ 97 rd_dq rf_final; /* NFA's final states - unused in DFA */ 98 99 /* Cengiz added these fields to support ^AS1 and AS1$ like expressions */ 100 char bolexp; /* if set m is starts with ^ */ 101 char eolexp; /* if set m is ends in $ */ 102 } rd_fm; 103 104 /* 105 * Each state itself contains a linked list of the arcs that 106 * emanate from it. It also contains linked list anchor structs. 107 * 108 * Notes: 109 * The rs_bit field is used in three different ways: 110 * - for an NFA state, it denotes the assigned bit number. 111 * - for a un-minimized DFA, it is used to number states to 112 * implement Myhill-Nerode 113 * - used for printing DFA. 114 * The rs_flags field is used in two different ways: 115 * - during Myhill-Nerode, it marks delete-able states. 116 * - in the final DFA, it marks final and reject states. 117 */ 118 119 typedef struct _rd_state { 120 rd_dq rs_arcs; /* Arcs out of this state */ 121 rd_dq rs_states; /* Queue block for states */ 122 rd_dq rs_final; /* Queue block for final states */ 123 union { 124 struct { 125 unsigned short rsus_bit; /* See Notes above */ 126 unsigned short rsus_flags; /* See Notes above */ 127 } rsu_s; 128 STATES_MASK(rsu_mask); /* If DFA state, which NFA state set */ 129 } rs_rsu; 130 131 #define rs_bit rs_rsu.rsu_s.rsus_bit 132 #define rs_flags rs_rsu.rsu_s.rsus_flags 133 #define rs_mask rs_rsu.rsu_mask 134 135 /* Cengiz added this field to make rd_duplicate_dfa more efficient */ 136 struct _rd_state *new_address; 137 } rd_state; 138 139 /* 140 * Possible flags for state. 141 */ 142 143 #define RSF_DELETE 0x1 /* This state can be deleted */ 144 #define RSF_FINAL 0x2 /* Final state */ 145 #define RSF_REJECT 0x4 /* Reject state */ 146 #define RSF_MARKED 0x8 /* marked as reachable */ 147 148 /* 149 * Each arc contains the AS number range for which it is to be 150 * traversed and a pointer to the state at which the arc terminates. 151 * 152 * Note: ra_low and ra_high MUST be unsigned int (we use this fact in 153 * rd_complete_ranges and rd_complete_arcs). 154 */ 155 156 typedef struct _rd_arc { 157 unsigned int ra_low; /* Low end of range */ 158 unsigned int ra_high; /* High end of range */ 159 rd_dq ra_arcs; /* Queue block for arcs */ 160 union { 161 struct _rd_state *rau_to; /* target state of DFA arc */ 162 STATES_MASK(rau_mask); /* target states of NFA arc */ 163 } ra_rau; 164 165 #define ra_to ra_rau.rau_to 166 #define ra_mask ra_rau.rau_mask 167 } rd_arc; 168 169 /* 170 * Data structures used for Myhill-Nerode minimization. 171 */ 172 173 /* 174 * This is a single element of the 2-dimensional array. This contains 175 * a bit to indicate if the corresponding pair of states is distinguishable 176 * (in the Myhill-Nerode sense) and a pointer to a list of state pairs 177 * whose fate is tied to this pair of states. 178 */ 179 180 typedef struct _rd_notsame { 181 int rt_mark; /* 1 if pair is distinguishable */ 182 struct _rd_pair *rt_pair; /* List of state pairs */ 183 } rd_notsame; 184 185 /* 186 * This struct contains pairs of states in the rt_pair linked list. 187 * During construction of this list, we maintain the invariant that 188 * rp_first's bit number is always less than that of rp_second. 189 */ 190 191 typedef struct _rd_pair { 192 struct _rd_state *rp_first; /* Lower numbered state in pair */ 193 struct _rd_state *rp_second; /* Higher numbered state in pair */ 194 struct _rd_pair *rp_next; /* Next in list of states */ 195 } rd_pair; 196 197 /* 198 * Externs. 199 */ 200 201 extern void rd_init(); 202 203 extern rd_arc *rd_alloc_range(unsigned int, unsigned int); 204 extern rd_dq *rd_alloc_range_list(rd_arc *); 205 extern rd_dq *rd_alloc_range_list_empty(); 206 extern void rd_complement_range_list(rd_dq *); 207 extern void rd_insert_range(rd_dq *, rd_arc *); 208 209 extern rd_fm *rd_singleton(rd_dq *); 210 extern rd_fm *rd_alternate(rd_fm *, rd_fm *); 211 extern rd_fm *rd_concatenate(rd_fm *, rd_fm *); 212 extern rd_fm *rd_zero_or_more(rd_fm *); 213 extern rd_fm *rd_one_or_more(rd_fm *); 214 extern rd_fm *rd_zero_or_one(rd_fm *); 215 216 extern void rd_minimize(rd_fm *); 217 extern rd_fm *rd_ntod(rd_fm *); 218 219 extern void rd_print_nfa(rd_fm *); 220 extern void rd_print_dfa(rd_fm *); 221 222 /* 223 * Macros for doubly-linked list insertion, deletion and other operations. 224 * For uniformity, ALL queue operations take rd_dq pointers as arguments. 225 */ 226 227 #define RDQ_SET_EMPTY(q) \ 228 { \ 229 (q)->rq_forw = ((q)->rq_back = (q)); \ 230 } 231 232 #define RDQ_ISEMPTY(q) \ 233 (((q)->rq_forw == (q)) && ((q)->rq_back == (q))) 234 235 #define RDQ_INIT(q, r) { \ 236 RDQ_SET_EMPTY((q)); \ 237 (q)->rq_self = (r); \ 238 } 239 240 #define RDQ_INSERT_AFTER(q, e) { \ 241 (e)->rq_forw = (q)->rq_forw; \ 242 (e)->rq_back = (q); \ 243 (q)->rq_forw->rq_back = (e); \ 244 (q)->rq_forw = (e); \ 245 } 246 247 #define RDQ_INSERT_BEFORE(q, e) { \ 248 (e)->rq_back = (q)->rq_back; \ 249 (e)->rq_forw = (q); \ 250 (q)->rq_back->rq_forw = (e); \ 251 (q)->rq_back = (e); \ 252 } 253 254 #define RDQ_UNLINK(q) { \ 255 (q)->rq_forw->rq_back = (q)->rq_back; \ 256 (q)->rq_back->rq_forw = (q)->rq_forw; \ 257 RDQ_SET_EMPTY((q)); \ 258 } 259 260 #define RDQ_UNLINK_LIST(h,j) { \ 261 rd_dq *XXrq; \ 262 XXrq = (h)->rq_forw; \ 263 while (XXrq->rq_self != (j)) { \ 264 RDQ_UNLINK(XXrq); \ 265 XXrq = (h)->rq_forw; \ 266 } \ 267 } 268 269 #define RDQ_UNLINK_LIST_FREE_ELMS(h,j) { \ 270 rd_dq *XXrq; \ 271 XXrq = (h)->rq_forw; \ 272 while (XXrq->rq_self != (j)) { \ 273 RDQ_UNLINK(XXrq); \ 274 free(XXrq->rq_self); \ 275 XXrq = (h)->rq_forw; \ 276 } \ 277 } 278 279 /* 280 * Splices two lists together, whose list heads are given by q1 and q2. 281 * The resulting head is q1, and q2 is returned as the empty list. 282 */ 283 284 #define RDQ_SPLICE(q1, q2) { \ 285 if (!RDQ_ISEMPTY(q2)) { \ 286 (q2)->rq_forw->rq_back = (q1)->rq_back; \ 287 (q1)->rq_back->rq_forw = (q2)->rq_forw; \ 288 (q2)->rq_back->rq_forw = (q1); \ 289 (q1)->rq_back = (q2)->rq_back; \ 290 RDQ_SET_EMPTY((q2)); \ 291 } \ 292 } 293 294 /* 295 * Macros for looping through a doubly linked list, starting from the head. 296 * Given a head, element pointer and an element type. 297 */ 298 299 #define RDQ_LIST_START(h,j,e,t) { \ 300 rd_dq *XXrq; \ 301 for (XXrq = (h)->rq_forw; XXrq->rq_self != (j); XXrq = XXrq->rq_forw) {\ 302 (e) = (t *) XXrq->rq_self; \ 303 304 #define RDQ_LIST_END(h,j,e,t) } } 305 306 /* 307 * Sets the element "e" to the head of the list, or NULL if the list 308 * is empty. This is used to create while loops when using RDQ_LIST_START 309 * may not be appropriate (e.g. when we need to do some processing on 310 * a list element and then delete each element of the list). 311 */ 312 313 #define RDQ_LIST_HEAD(q,e,t) \ 314 ((e) = (RDQ_ISEMPTY(q)) ? NULL : (t *) ((q)->rq_forw->rq_self)) 315 316 317 318 /* 319 * Macros for machine related predicates. 320 */ 321 322 #define RD_IS_FINAL(s) \ 323 (!RDQ_ISEMPTY(&((s)->rs_final))) 324 325 #define RD_ACCEPTS_EMPTY_STRING(f) \ 326 (RD_IS_FINAL((f)->rf_start)) 327 328 329 330 331 /* 332 * Cengiz Alaettinoglu's additions 333 */ 334 335 extern int rd_isEmpty_dfa(rd_fm *fm); 336 extern int rd_is_universal_dfa(rd_fm *fm); 337 extern rd_fm *rd_empty_string_machine(); 338 extern rd_fm *rd_empty_set_machine(); 339 extern rd_fm *rd_empty_set_dfa(); 340 extern rd_fm *rd_duplicate_dfa(rd_fm *fm); 341 extern void rd_complement_dfa(rd_fm *fm); 342 extern void rd_free_nfa(rd_fm *fm); /* works for dfa and nfa */ 343 #define rd_free_dfa rd_free_nfa 344 extern rd_fm *rd_intersect_dfa(rd_fm *fm1, rd_fm *fm2); 345 extern int rd_equal_dfa(rd_fm *fm1, rd_fm *fm2); 346 extern void rd_dton(rd_fm *fm); 347 extern rd_fm *rd_make_bol(rd_fm *fm); 348 extern rd_fm *rd_make_eol(rd_fm *fm); 349 350 #endif /* RE2DFA */ 351