1 /* super.h - decls for lazilly constructed DFAs 2 * 3 **************************************************************** 4 * Copyright (C) 1998, 2000 Thomas Lord 5 * 6 * See the file "COPYING" for further information about 7 * the copyright and warranty status of this work. 8 */ 9 10 11 12 #ifndef INCLUDE__RX__SUPER_H 13 #define INCLUDE__RX__SUPER_H 14 15 /* #define RX_LARGE_TABLES */ 16 17 struct rx_superset; 18 struct rx_super_edge; 19 struct rx_superstate; 20 21 #include "hackerlab/mem/alloc-limits.h" 22 #include "hackerlab/bitsets/bitset.h" 23 #include "hackerlab/hash/hashtree.h" 24 #include "hackerlab/rx/nfa.h" 25 26 27 28 /************************************************************************ 29 *(h1 "Superstate Data Structures") 30 * 31 * 32 * One state of the DFA is called a "superstate" because it 33 * represents the composition of one or more NFA states. The 34 * functions in this chapter build superstates and edges between 35 * them. 36 * 37 * A DFA can be prohibitively large -- too large to keep in memory all 38 * at once. So these functions help you to build only the superstates 39 * you actually need. While you keep those states locked, they remain 40 * in memory; when unlocked, they can be discarded and recycled. 41 * 42 * As you fill the superstate cache with DFA states you visit often, 43 * you'll want to follow transitions between those DFA states as 44 * quickly as possible. For that reason, the data structures in this 45 * chapter are slightly complicated: to heavily optimize the case when 46 * you are tracing through the DFA graph entirely out of the 47 * superstate cache. In the optimized case, you can advance a DFA 48 * through one state transition with only 10-20 machine instructions. 49 * That number of instructions, multiplied by the length of the input 50 * string, is the entire overhead of a DFA comparison running out 51 * of the cache! 52 */ 53 54 /************************************************************************ 55 *(h2 "DFA State Transition Tables") 56 * 57 * A regular expression DFA is represented by a collection of 58 * transition tables, one per DFA state. If the character set has `N' 59 * characters, the transition tables have `N' entries. 60 * 61 * Each entry in a transition table is one instruction for a virtual 62 * machine. 63 */ 64 65 /*(c #s"struct rx_inx" :category type) 66 * struct rx_inx; 67 * 68 * This structure holds one instruction for a virtual machine that 69 * interprets the DFA. For example, a structure of this type is used 70 * to represent one entry in the transition table of a DFA state. 71 * 72 * This structure type is padded to a power-of-2 bytes because in some 73 * critical code, we dispatch by indexing an array of these 74 * structures. If that indexing can be accomplished by just a shift 75 * of the index, we're much happier than if it requires a full-fledged 76 * multiplication. 77 * 78 * An instruction includes an opcode and two operands. The opcode is 79 * stored in the field `inx' and the operands are stored in the fields 80 * `data' and `data_2'. The field `data' is only used by one 81 * instruction (`rx_next_char'). For all other instructions, `data' 82 * is set to 0. This is used to speed up dispatching: the field 83 * `data' is fetched and tested before the field `inx'. If data is 84 * not 0, there is no need to fetch the field `inx' at all -- we 85 * already know it contains `rx_next_char' and we already have the 86 * primary operand to that instruction in a register. 87 * 88 * In the case of a `rx_next_char' instruction, the primary operand is 89 * a pointer to the transition table of the destination DFA state. 90 * The secondary operand is the state label of the destination state. 91 * Thus, the state transition part of a DFA matching loop is something 92 * like: 93 * 94 * while (1) 95 * { 96 * // Presume that the current transition table entry 97 * // for the next input character (`input_char') contains 98 * // the virtual instruction `rx_next_char'. 99 * // 100 * // Fetch the operands for that instruction: 101 * // 102 * next_state_table 103 * = (struct rx_inx *)current_state_table[input_char].data; 104 * next_state_label 105 * = (long)current_state_table[input_char].data_2; 106 * 107 * // Now check, is it really a `rx_next_char' instruction? 108 * // 109 * if (!next_state_table) 110 * { 111 * // Primary operand was 0 -- not really `rx_next_char'. 112 * // 113 * ... handle a cache miss or illegal input character 114 * ... patch up the values of `next_state_table' and 115 * ... `next_state_label' 116 * } 117 * 118 * // Advance to the next DFA state: 119 * // 120 * current_state_table = next_state_table; 121 * current_state_label = next_state_label; 122 * 123 * // Test whether or not it is a final state: 124 * // 125 * if (current_state_label) 126 * { 127 * // we have reached a final state in the DFA 128 * ... perhaps return or perhaps continue to look for 129 * ... a longer match. 130 * } 131 * 132 * // Process further input. 133 * // 134 * input_char = read_next_char (); 135 * } 136 * 137 * 138 * Here are the details of the strucure type: 139 * 140 insert*/ 141 struct rx_inx 142 { 143 void * data; 144 /* The primary operand of this instruction. For efficient 145 indexing, this must be the first field of the structure.*/ 146 147 void * data_2; 148 /* The secondary operand of this instruction.*/ 149 150 void * inx; 151 /* The opcode for this instruction. This field is declared 152 `void *' to force it to occupy exactly one word. In fact it 153 holds a value of type `enum rx_opcode'.*/ 154 155 struct rx_inx * next_same_edge; 156 /* A list of instruction frames, anchored at e->inx_list, 157 of instructions built for the same edge. */ 158 }; 159 /*end-insert 160 */ 161 162 163 /*(c #s"enum rx_opcode" :category type) 164 * enum rx_opcode; 165 * 166 * This type represents opcodes for a regular expression DFA virtual 167 * machine. 168 * 169 insert*/ 170 enum rx_opcode 171 { 172 rx_cache_miss = 0, /* must be 0 */ 173 /* An `rx_cache_miss' instruction means that a transition 174 has been reached whose edge or destination state is not 175 currently in the DFA cache. 176 177 If the edge is missing from the cache, then `data' and 178 `data_2' are 0. If the edge is in the cache, but the 179 destination state is either not in the cache or is unknown to 180 the edge, then `data' is 0 and `data_2' points to the edge.*/ 181 182 183 rx_next_char = rx_cache_miss + 1, 184 /* An `rx_next_char' instruction means that a transition has been 185 reached to a DFA state that is known to be in the DFA state 186 cache. 187 188 `data' points to the transition table of the destination state 189 (the `transitions' field of a `struct rx_superstate'). 190 191 `data_2' is set to the state label of the destination state. 192 193 If the `data' field of a `struct rx_inx' is not 0, then the 194 `inx' field is guaranteed to be `rx_next_char'. If the 195 insruction is `rx_next_char', a DFA interpreter is almost 196 certain to need the value in `data'. A DFA interpreter 197 should optimize this instruction above all others. This 198 suggests that interpreters should dispatch first on the value 199 in `data' (on whether or not it is 0), and then (if `data' is 200 0) on the instruction in field `inx'.*/ 201 202 203 rx_backtrack = rx_next_char + 1, 204 /* An `rx_backtrack' instruction means that there is no 205 transition defined for an input character. If the DFA is 206 being used for regular expression comparison, this means that 207 the input does not match.*/ 208 209 rx_huge_char = rx_backtrack + 1, 210 /* An `rx_huge_char' instruction means that the the character 211 is a UTF-16 high surrogate. */ 212 213 214 rx_bogus_utf8 = rx_backtrack + 1, 215 /* Second, third or fourth byte of a UTF-8 character. 216 */ 217 218 rx_2byte_utf8 = rx_bogus_utf8 + 1, 219 /* First byte of a 2-byte UTF-8 character. 220 */ 221 222 rx_3byte_utf8 = rx_2byte_utf8 + 1, 223 /* First byte of a 3-byte UTF-8 character. 224 */ 225 226 rx_4byte_utf8 = rx_3byte_utf8 + 1, 227 /* First byte of a 4-byte UTF-8 character. 228 */ 229 230 rx_num_instructions = rx_4byte_utf8 + 1, 231 }; 232 /*end-insert 233 */ 234 235 /************************************************************************ 236 *(h2 "DFA States") 237 * 238 */ 239 240 /*(c #s"struct rx_superstate" :category type) 241 * struct rx_superstate; 242 * 243 * This structure holds one state in a regular expression DFA. 244 * 245 insert*/ 246 struct rx_super_transition; /* no such structure */ 247 typedef struct rx_super_transition ** rx_transition_table; 248 249 struct rx_superstate 250 { 251 struct rx_superset * members; 252 /* A list of NFA states: the NFA state set corresponding to this 253 DFA state. No two DFA states have the same set of members. */ 254 255 struct rx_super_edge * outgoing_edges; 256 /* A list of outgoing DFA edges, linked by `next_same_present'. 257 At any time, some edges for this state may be missing from the 258 list (because they are missing from the DFA cache). To build 259 a complete DFA, it is necessary to traverse the complete 260 transition table of every state, keeping locks on each state 261 encountered so that none are flushed from the DFA cache. (Be 262 advised: DFA graphs can be quite large.)*/ 263 264 265 struct rx_super_edge * incoming_edges; 266 /* A queue of incoming DFA edges, linked by `next_same_dest' and 267 `prev_same_dest'. Again, those edges not in the DFA cache 268 are missing from this list. Also missing from this list are 269 edges that are in the cache, but that do not yet know their 270 destination state.*/ 271 272 273 /* Superstate Locks and Superstate Cache Management 274 * 275 * Every superstate has a lock count -- a kind of reference 276 * count. While your code holds a pointer to a superstate, that 277 * superstate should have a non-0 lock count unless you are 278 * certain that none of the functions that allocate storage in 279 * the DFA cache will be called. You can use `rx_lock_superstate' 280 * and `rx_unlock_superstate' to manipulate this reference count. 281 * 282 * The gory details of memory management for DFA states is 283 * interesting and relevant to programmers who want to obtain the 284 * best performance from this library. Many of those details are 285 * explained here. 286 * 287 * Upon creation, superstates are given a lock count of 0 and are 288 * added to the back of a queue of live superstates. When a 289 * cache-hit yields an already existing superstate, it is moved 290 * back to back of that queue. Thus, the states most recently 291 * returned from the DFA cache tend to be at the back of the queue 292 * of live states. 293 * 294 * While a state has a lock count of 0, it is a candidate to be 295 * made semifree and then truly free. This can happen any time the 296 * old state reaches the _front_ of the queue of live states and 297 * new states or edges are allocated in the DFA cache (such as 298 * during a call to `rx_handle_cache_miss'). When this occurs, 299 * incoming transitions to the state are modified to cause cache 300 * misses, the state is moved from the queue of live states to the 301 * back of the queue of semifree states, and the state is a 302 * candidate to become truly free. To convert a state which is 303 * semifree back to a live state, use `rx_refresh_this_superstate'. 304 * 305 * When a state becomes semifree, there are two possibilities of 306 * what will happen next. One possibility is that Rx will reclaim 307 * the state's storage. This will happen if the state reaches the 308 * _front_ of the queue of semifree states at a time when Rx detects 309 * that the DFA cache is over-full. 310 * 311 * The other possibility is that a semifree state will be 312 * referenced by a DFA transition which results in a cache miss. 313 * The cache miss handler reverts the semifree state to a live 314 * state. Restoring the state this way is less expensive than 315 * rebuilding the state from scratch and gives us the information 316 * that the state is useful enough to keep in the cache. In 317 * effect, we use the cache misses that restore semifree states to 318 * sort the two queues of DFA states, moving popular states to the 319 * back of those queues, and letting unpopular states drift to the 320 * front of the live queue, then through the semifree queue, and 321 * finally into being recycled. 322 * 323 * There is a (soft) upper bound on the amount of memory Rx will use 324 * for caching superstates. While that upper bound is exceeded, Rx 325 * tries to free some superstates which have a lock count of 0, 326 * starting with the head of the queue of semifree states. 327 */ 328 int locks; /* A reference count. */ 329 int is_semifree; /* Is this state live or semifree? */ 330 331 struct rx_superstate * next_recyclable; 332 struct rx_superstate * prev_recyclable; 333 /* While a DFA state is live, these links place the state on 334 a queue of all live DFA states. 335 336 While the state is semifree, these links place the state on 337 queue of all semifree DFA states.*/ 338 339 340 341 /* At run-time, a matcher follows transitions from one superstate 342 * to the next. At times, a destination state may be missing from 343 * the superstate cache or from a particular instruction frame. In 344 * those cases, a destination superset (set of NFA states) is 345 * computed, and that is used as a key to search the cache of DFA 346 * states. If a cache entry is found (and valid) then the 347 * superstate is missing only from the instruction frame, which is 348 * then filled in, and the transition resumed normally. If no 349 * cache entry is found, a new superstate is constructed for that 350 * superset. 351 * 352 * The following field (`rx_id') is used when validating cache 353 * entries in the superstate cache keyed on supersets. If a cache 354 * search for a particular DFA state yields this state, the result 355 * is only valid if this field agrees with the `rx_id' field of the 356 * NFA. 357 * 358 * This field is necessary because of the possibility of the 359 * following (unlikely but not impossible) sequence of events: 360 * 361 * The caller allocates an NFA (call it "NFA A"). 362 * 363 * The caller builds DFA states based on NFA A. 364 * 365 * The caller frees NFA A. The DFA states continue to exist 366 * until flushed from the DFA cache from lack of use. 367 * 368 * The caller allocates a new NFA ("NFA B") which happens 369 * to use the same region of the heap as NFA A. 370 * By coincidence, some of the NFA states sets for NFA A 371 * and NFA B hash to the same value. Consequently, when 372 * looking for a DFA state built for NFA B, the cache probe 373 * might yield a stale DFA state built for NFA A. 374 * 375 * But luckilly, NFA A and NFA B are guaranteed to have 376 * different values for the field `rx_id'. By comparing 377 * the `rx_id' field of NFA B to the superstate yielded by 378 * the cache probe, we can know to invalidate that cache 379 * entry. 380 */ 381 int rx_id; 382 383 /* Superstates of a given NFA are numbered sequentially. The 384 * sequence number counter is the global variable 385 * `rx_superstate_counter' which programs are free to modify. 386 * 387 * Sequence numbers are useful for applications which manipulate the 388 * DFA graph in ways other than simply following transition tables. 389 */ 390 int seq; 391 392 /* The following field is used by `rx_mkstatic', which builds a 393 * complete DFA by examining superstates. 394 */ 395 long table_id; 396 397 int storage_unit_size; /* see nfa.h */ 398 int has_huge_table; /* yes for Unicode */ 399 400 #ifdef RX_LARGE_TABLES 401 struct rx_inx *** huge_char_transitions; 402 #else 403 struct rx_inx **** huge_char_transitions; 404 #endif 405 406 /* struct rx_inx transitions[1]; */ 407 struct rx_super_transition * transitions[1]; 408 }; 409 /*end-insert 410 */ 411 412 413 #define rx_transitions_to_suprestate(T) ((struct rx_superstate *) \ 414 ((char *)(T) \ 415 - ((unsigned long) \ 416 ((struct rx_superstate *)0)->transitions))) 417 418 419 #define rx_transition8(T,N) (((struct rx_inx *)(T)) + (N)) 420 #define rx_subset_transitions8(T,N) (((struct rx_inx *)(T)) + bitset_subset_offset(N)) 421 422 #ifdef RX_LARGE_TABLES 423 424 # define rx_page1_index16(N) ((N) >> 8) 425 # define rx_page2_index16(N) ((N) & 0xff) 426 # define rx_page2_subset_index16(N) bitset_subset_offset (rx_page2_index16(N)) 427 # define rx_page1_16(T) ((struct rx_inx **)(T)) 428 # define rx_page2_16(T,N) rx_page1_16 (T)[rx_page1_index16(N)] 429 430 # define rx_transition16(T,N) (rx_page2_16 ((T), (N)) + rx_page2_index16 (N)) 431 # define rx_subset_transitions16(T,N) (rx_page2_16 ((T), (N)) + rx_page2_subset_index16 (N)) 432 433 434 # define rx_page1_size21 32 435 # define rx_page2_size21 256 436 # define rx_page1_index21(N) ((N) >> 16) 437 # define rx_page2_index21(N) (((N) >> 8) & 0xff) 438 # define rx_page3_index21(N) ((N) & 0xff) 439 # define rx_page3_subset_index21(N) bitset_subset_offset (rx_page3_index21 (N)) 440 # define rx_page2_21(T,N) ((T)[rx_page1_index21 (N)]) 441 # define rx_page3_21(T,N) rx_page2_21 ((T), (N))[rx_page2_index21 (N)] 442 443 # define rx_transition21(T,N) (rx_page3_21 ((T), (N)) + rx_page3_index21 (N)) 444 # define rx_subset_transition21(T,N) (rx_page3_21 ((T), (N)) + rx_page3_subset_index21 (N)) 445 446 447 #else /* !defined(RX_LARGE_TABLES) */ 448 449 450 # if (bits_per_subset == 32) 451 452 # define rx_page2_size (8) 453 # define rx_page3_size bits_per_subset 454 # define rx_page1_index16(N) ((N) >> 8) 455 # define rx_page2_index16(N) (((N) >> 5) & 0x7) 456 # define rx_page3_index16(N) ((N) & 0x1f) 457 458 # define rx_page1_size21 256 459 # define rx_page2_size21 32 460 # define rx_page3_size21 (8) 461 # define rx_page4_size21 bits_per_subset 462 # define rx_page1_index21(N) ((N) >> 13) 463 # define rx_page2_index21(N) (((N) >> 8) & 0x1f) 464 # define rx_page3_index21(N) (((N) >> 5) & 0x7) 465 # define rx_page4_index21(N) ((N) & 0x1f) 466 467 468 # elif (bits_per_subset == 64) 469 470 # define rx_page2_size (4) 471 # define rx_page3_size bits_per_subset 472 # define rx_page1_index16(N) ((N) >> 8) 473 # define rx_page2_index16(N) (((N) >> 6) & 0x3) 474 # define rx_page3_index16(N) ((N) & 0x3f) 475 476 # define rx_page1_size21 256 477 # define rx_page2_size21 32 478 # define rx_page3_size21 (8) 479 # define rx_page4_size21 bits_per_subset 480 # define rx_page1_index21(N) ((N) >> 13) 481 # define rx_page2_index21(N) (((N) >> 8) & 0x1f) 482 # define rx_page3_index21(N) (((N) >> 6) & 0x3) 483 # define rx_page4_index21(N) ((N) & 0x3f) 484 485 # else 486 487 # error "odd bits_per_subset in hackerlab/rx/super.h" 488 489 # endif 490 491 # define rx_page1_16(T) ((struct rx_inx ***)(T)) 492 # define rx_page2_16(T,N) rx_page1_16 (T)[rx_page1_index16(N)] 493 # define rx_page3_16(T,N) rx_page2_16 (T,N)[rx_page2_index16(N)] 494 # define rx_page3_subset_index16(N) bitset_subset_offset (rx_page3_index16(N)) 495 496 # define rx_transition16(T,N) (rx_page3_16 ((T), (N)) + rx_page3_index16 (N)) 497 # define rx_subset_transitions16(T,N) (rx_page3_16 ((T), (N)) + rx_page3_subset_index16 (N)) 498 499 500 # define rx_page1_21(T) (T) 501 # define rx_page2_21(T,N) rx_page1_21(T)[rx_page1_index21(N)] 502 # define rx_page3_21(T,N) rx_page2_21((T),(N))[rx_page2_index21(N)] 503 # define rx_page4_21(T,N) rx_page3_21((T),(N))[rx_page3_index21(N)] 504 # define rx_page4_subset_index21(N) bitset_subset_offset (rx_page4_index21 (N)) 505 506 # define rx_transition21(T,N) (rx_page4_21 ((T), (N)) + rx_page4_index21 (N)) 507 # define rx_subset_transition21(T,N) (rx_page4_21 ((T), (N)) + rx_page4_subset_index21 (N)) 508 509 #endif 510 511 512 513 514 515 516 517 /*(c rx_lock_superstate :category macro) 518 * #define rx_lock_superstate(RX_NFA, SUPERSTATE) 519 * 520 * Increment the reference count of a DFA state. 521 */ 522 #define rx_lock_superstate(R,S) ((S)->locks++) 523 524 /*(c rx_unlock_superstate :category macro) 525 * #define rx_unlock_superstate(RX_NFA, SUPERSTATE) 526 * 527 * Decrement the reference count of a DFA state. 528 */ 529 #define rx_unlock_superstate(R,S) (--(S)->locks) 530 531 /*(c rx_superstate_transition_table :category macro) 532 * #define rx_superstate_transition_table(RX_NFA, SUPERSTATE) 533 * 534 * As a function: 535 * 536 * extern struct rx_inx * 537 * rx_superstate_transition_table (struct rx_nfa * rx, 538 * struct rx_superstate * state); 539 * 540 * Return the DFA state transition table that corresponds to a 541 * particular superstate 542 */ 543 #define rx_superstate_transition_table(RX_NFA, SUPERSTATE) \ 544 &((SUPERSTATE)->transitions) 545 546 /*(c rx_transition_table_superstate :category macro) 547 * #define rx_transition_table_superstate(RX_NFA, TABLE) 548 * 549 * As a function: 550 * 551 * extern struct rx_superstate * 552 * rx_superstate_transition_table (struct rx_nfa * rx, 553 * struct rx_inx * table); 554 * 555 * Return the superstate that corresponds to a particular 556 * DFA state transition table. 557 */ 558 #define rx_transition_table_superstate(RX_NFA, TABLE) \ 559 ((struct rx_superstate *)\ 560 ( ((char *)(TABLE)) - (char *)(&((struct rx_superstate *)0)->transitions))) 561 562 563 564 /*(c rx_superstate_counter :category variable) 565 * extern int rx_superstate_counter; 566 * 567 * As DFA states are allocated, they are assigned sequential 568 * numbers. The number of the next DFA state is stored in this 569 * variable. 570 */ 571 extern int rx_superstate_counter; 572 573 574 575 /************************************************************************ 576 *(h2 "DFA Edges") 577 * 578 */ 579 580 /*(c #s"struct rx_super_edge" :category type) 581 * 582 * This structure holds one edge of a regular expression DFA. 583 * 584 * All DFA edges are character set edges -- they are labeled by 585 * a non-empty set of characters. No two of the character sets 586 * of edges originating in a common DFA state intersect. 587 * 588 insert*/ 589 struct rx_super_edge 590 { 591 bits cset; 592 /* The character set of this edge.*/ 593 594 struct rx_superstate * present; 595 /* The DFA source state of this edge. If the edge exists, this 596 state is guaranteed to be in the cache.*/ 597 598 struct rx_superstate * future; 599 /* The DFA destination state of this edge. This state may or may 600 not be in the DFA cache, and if it is in the cache, this edge 601 may or may not "know" about it. If the state is in the cache 602 and this edge "knows" about it, then this field points to the 603 state. Otherwise, this field contains 0.*/ 604 605 606 struct rx_inx * inx_list; 607 /* A list of instruction frames, linked by next_same_edge, 608 built for this edge. */ 609 610 611 struct rx_super_edge * next_same_present; 612 /* This points to the next edge in a list of DFA edges sharing a 613 common starting state. The list starts at 614 `present->outgoing_edges'.*/ 615 616 struct rx_super_edge * next_same_dest; 617 struct rx_super_edge * prev_same_dest; 618 /* These link this edge into a queue of edges sharing a 619 common DFA ending state. These fields are only valid if 620 `future' is not 0. In that case, this edge is stored on 621 the queue `future->incoming_edges'.*/ 622 623 int is_backtrack; 624 }; 625 /*end-insert 626 */ 627 628 629 630 631 /************************************************************************ 632 *(h2 "Sets of NFA States") 633 * 634 * Every state of a DFA corresponds to a non-empty set of NFA states. 635 * No two DFA states share the same set of NFA states. 636 * 637 */ 638 639 /*(c #s"struct rx_superset" :category type) 640 * struct rx_superset; 641 * 642 * This structure holds a set of NFA states and is used to represent 643 * the set of states that comprise a single DFA state. In xref:"The 644 * NFA Data Types" we encountered another representation for sets of 645 * NFA states, used for computing epsilon closures of NFA states. 646 * Unfortunately, the requirements of computing epsilon closures and 647 * of computing DFA states are too different, so two representations 648 * are needed. 649 * 650 * For equal `car' and `cdr' (in the sense of `=='), there is at most 651 * one of these structures. 652 * 653 * The empty set is *not* represented by `(struct rx_superset *)0' 654 * but instead by a superset structure with `car' and `cdr' equal to 655 * 0. Thus, the last element of every `struct rx_superste' list 656 * (linked by `cdr' pointers) is a structure with `car' and `cdr' 0. 657 * (This simplifies hash-consing sets of these structures). 658 * 659 insert*/ 660 struct rx_superset 661 { 662 struct rx_nfa_state * car; 663 /* This points to the first NFA state in the set. The 664 list of states is kept sorted by id number, from 665 least to greatest. 666 667 Note that if the NFA has been freed but the superset has not, 668 then this pointer is invalid. This condition can be detected 669 by comparing the `id' field of this structure with the `rx_id' 670 field of the NFA over which this set was supposedly built. If 671 the two numbers agree, then the `car' pointer is valid. If 672 the two numbers disagree, then this set was actually built for 673 an NFA that no longer exists: it is a stale entry in the cache 674 of supersets.*/ 675 676 struct rx_superset * cdr; 677 /* This points to the rest of this set.*/ 678 679 long state_label; 680 /* If all of the state labels of the NFA states in this set are 681 0, then this field is 0. Otherwise, this field is the NFA 682 state label with the smallest magnitude.*/ 683 684 int has_cset_edges; 685 /* This is the logical-or (`||') of the same field of the `cdr', 686 and the same-named field of the `car'. If it is 0, then this 687 superset corresponds to a dead-end DFA state (no outgoing 688 edges).*/ 689 690 /* Cache management data. 691 */ 692 int refs; /* A reference count. */ 693 int id; /* See `car', above. */ 694 struct hashtree_item hash_item; /* For hash-consing lists */ 695 696 697 struct rx_nfa_state * nfa_state; 698 /* If this superset is discovered to be the eclosure of a 699 single NFA state by `rx_nfa_state_to_superstate', this 700 field points to that NFA state. 701 702 The state points back to this set in the field 703 `superstate_set'. 704 705 If the NFA is subsequently freed, this field is reset to 0.*/ 706 707 struct rx_superstate * superstate[2]; 708 /* If the DFA states for this superset exists (in the cache of DFA 709 states), then this field points to those DFA state. 710 711 superstate[0] for 1-byte storage unit size, 712 superstate[1] for 2-byte storage unit size. 713 */ 714 715 }; 716 /*end-insert 717 */ 718 719 720 extern alloc_limits rx__dfa_alloc_limits; 721 722 723 /* automatically generated __STDC__ prototypes */ 724 extern void rx__init_dfa_alloc_limits (void); 725 extern size_t rx_dfa_cache_failure_pt (void); 726 extern void rx__dfa_cache_statistics (size_t * threshold, 727 size_t * failure_pt, 728 size_t * in_use, 729 size_t * high_water_mark, 730 int * hits, 731 int * misses, 732 int * total_hits, 733 int * total_misses); 734 extern void rx_set_dfa_cache_failure_pt (size_t n); 735 extern int rx__really_free_superstate (void); 736 extern struct rx_superstate * rx_nfa_state_to_superstate (struct rx_nfa * rx, 737 struct rx_nfa_state * nfa_state, 738 int storage_unit_size); 739 extern void rx_clear_superstate_table_ids (struct rx_nfa * nfa); 740 extern struct rx_inx * rx_handle_cache_miss (struct rx_nfa * rx, 741 struct rx_superstate * super, 742 unsigned int chr, 743 void * data_2) ; 744 #endif /* INCLUDE__RX__SUPER_H */ 745