1 /* super.c - lazilly constructed DFAs
2 *
3 ****************************************************************
4 * Copyright (C) 1998, 2000 Thomas Lord
5 *
6 * See the file "COPYING" for further information about
7 * the copyright and warranty status of this work.
8 */
9
10
11
12 #include "hackerlab/bugs/panic.h"
13 #include "hackerlab/mem/mem.h"
14 #include "hackerlab/rx/bits-tree-rules.h"
15 #include "hackerlab/rx/dfa-cache.h"
16 #include "hackerlab/rx/super.h"
17
18
19 /* __STDC__ prototypes for static functions */
20 static void free_some_dfa_memory (void * ign, size_t needed);
21 static int rx_dfa_cache_prepare (size_t amt);
22 static void * rx_dfa_cache_malloc (size_t size);
23 static void * rx_dfa_cache_soft_malloc (size_t size);
24 static void rx_dfa_cache_free (void * mem);
25 static struct rx_superset * rx_protect_superset (struct rx_nfa * rx, struct rx_superset * set);
26 static void rx_release_superset (struct rx_superset *set);
27 static struct rx_superset * rx_superset_cons (struct rx_nfa * rx,
28 struct rx_nfa_state *car,
29 struct rx_superset *cdr);
30 static struct hashtree_item * superset_allocator (void * val, struct hashtree_rules * rules);
31 static struct rx_superset * rx_superstate_eclosure_union (struct rx_nfa * rx,
32 struct rx_superset *set,
33 struct rx_nfa_state_set *ecl) ;
34 static int supersetcmp (void * va, void * vb, struct hashtree_rules * rules);
35 static struct hashtree * super_hash_allocator (struct hashtree_rules * rules);
36 static void super_hash_liberator (struct hashtree * hash, struct hashtree_rules * rules);
37 static void superset_hash_item_liberator (struct hashtree_item * it,
38 struct hashtree_rules * rules);
39 static void install_cache_miss_transition (struct rx_super_edge * e);
40 static void semifree_superstate ();
41 static void refresh_semifree_superstate (struct rx_superstate * super);
42 static void rx_refresh_this_superstate (struct rx_superstate * superstate);
43 static struct rx_superstate * rx_superstate (struct rx_nfa *rx,
44 struct rx_superset *set,
45 int storage_unit_size);
46 static int solve_destination (struct rx_nfa * rx,
47 struct rx_inx * inx_out,
48 struct rx_super_edge * e,
49 int storage_unit_size);
50 static int compute_super_edge_cset (struct rx_nfa *rx,
51 bits csetout,
52 struct rx_superstate * superstate,
53 unsigned int chr);
54 static struct rx_super_edge * rx_super_edge (struct rx_nfa *rx, struct rx_superstate *super, bits cset);
55 static int install_partial_transition (struct rx_superstate *super,
56 struct rx_inx *answer,
57 struct rx_super_edge * edge,
58 bitset_subset set,
59 int chr);
60
61
62 #define COPIES_32 \
63 X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
64
65
66 #define COPIES_64 \
67 X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
68 X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
69
70 #define COPIES_256 \
71 X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
72 X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
73 X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
74 X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
75 X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
76 X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
77 X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
78 X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
79
80 #if rx_page1_size21 == 256
81 # define COPIES_PAGE1_21_SIZE COPIES_256
82 #elif rx_page1_size21 == 32
83 # define COPIES_PAGE1_21_SIZE COPIES_32
84 #else
85 # error you broke fragile code
86 #endif
87
88 #if rx_page2_size21 == 256
89 # define COPIES_PAGE2_21_SIZE COPIES_256
90 #elif rx_page2_size21 == 32
91 # define COPIES_PAGE2_21_SIZE COPIES_32
92 #else
93 # error you broke fragile code
94 #endif
95
96
97 #if bits_per_subset == 32
98 # define COPIES_BPS COPIES_32
99 # define COPIES_PAGE2_SIZE X, X, X, X, X, X, X, X
100 # define COPIES_PAGE3_21_SIZE X, X, X, X, X, X, X, X
101 #elif bits_per_subset == 64
102 # define COPIES_BPS COPIES_64
103 # define COPIES_PAGE2_SIZE X, X, X, X
104 # define COPIES_PAGE3_21_SIZE X, X, X, X
105 #else
106 # error "odd bits_per_subset in super.c"
107 #endif
108
109
110 /* Default value for leaf nodes of multi-level transition tables:
111 */
112 #undef X
113 #define X {0, 0, (void *)rx_cache_miss, 0}
114 static struct rx_inx rx_shared_cache_miss_page[256] = {COPIES_256};
115
116 /* Fixed backtrack page for leaf nodes of multi-level transition tables:
117 */
118 #undef X
119 #define X {0, 0, (void *)rx_backtrack, 0}
120 static struct rx_inx rx_shared_backtrack_page[256] = {COPIES_256};
121
122 /* Fixed value for high-surrogate leaf nodes of 16-bit tables:
123 */
124 #undef X
125 #define X {0, 0, (void *)rx_huge_char, 0}
126 static struct rx_inx rx_huge_char_page[256] = {COPIES_256};
127
128 #ifdef RX_LARGE_TABLES
129
130 /* Default value for page2 of 21-bit tables:
131 */
132 #undef X
133 #define X rx_shared_cache_miss_page
134 static struct rx_inx * rx_huge_char_second_level_page[rx_page2_size21] = { COPIES_256 };
135
136 /* Default value for huge_char_transitions:
137 */
138 #undef X
139 #define X rx_huge_char_second_level_page
140 static struct rx_inx ** rx_default_huge_char_transitions[rx_page1_size21] = { COPIES_32 };
141
142 #else /* ndef RX_LARGE_TABLES */
143
144 #undef X
145 #define X rx_shared_cache_miss_page
146 static struct rx_inx * rx_default_small_table_page2[rx_page2_size] = { COPIES_PAGE2_SIZE };
147
148 #undef X
149 #define X rx_shared_backtrack_page
150 static struct rx_inx * rx_small_table_backtrack_page2[rx_page2_size] = { COPIES_PAGE2_SIZE };
151
152 #undef X
153 #define X rx_huge_char_page
154 static struct rx_inx * rx_huge_char_page2[rx_page2_size] = { COPIES_PAGE2_SIZE };
155
156
157 #undef X
158 #define X rx_shared_cache_miss_page
159 static struct rx_inx * rx_huge_char_third_level_page[rx_page3_size21] = { COPIES_PAGE3_21_SIZE };
160
161 #undef X
162 #define X rx_huge_char_third_level_page
163 static struct rx_inx ** rx_huge_char_second_level_page[rx_page2_size21] = { COPIES_PAGE2_21_SIZE };
164
165 #undef X
166 #define X rx_huge_char_second_level_page
167 static struct rx_inx *** rx_default_huge_char_transitions[rx_page1_size21] = { COPIES_PAGE1_21_SIZE };
168
169 #endif
170
171
172 /************************************************************************
173 *(h0 "The Superstate DFA"
174 * :subtitle "rx/super.c"
175 * :includes ("rx/super.h"))
176 *
177 * It is well known that there is a mapping from non-deterministic
178 * finite-state automata to equivalent deterministic finite-state
179 * automata (a good text book about compiler construction explains
180 * this). That mapping is important to applications which use regular
181 * expressions for efficient pattern matching. When a regular
182 * expression comparison is implemented using a single pass with a
183 * non-deterministic automata with `K' states, the comparison
184 * algorithm may run for a number of steps which is proportional to
185 * `K*N' for a length `N' input string (where each step is relatively
186 * expensive). When implemented using backtracking, `N**K'
187 * (relatively inexpensive) steps may be required. On the other hand,
188 * when regular expression comparison is implemented using a
189 * deterministic automata, the algorithm requires a number of
190 * instructions which is linear in the length of the input string
191 * (order `N' (inexpensive) steps).
192 *
193 * The catch is that, using brute force, it is expensive (in time and
194 * space) to build a DFA from a typical regular expression NFA. An
195 * NFA with `K' states can yield a DFA with order `2^K' states.
196 *
197 * The trick is that, during most matches using typical regexps, only
198 * a small portion of the total potential number of DFA states are
199 * actually needed -- time and space can be saved by building DFA
200 * states on-demand. During any match, only two DFA states are needed
201 * at a time -- if the total number of DFA states encountered during a
202 * match is very large, space can be saved (at the expense of time) by
203 * discarding less frequently used DFA states and reconstructing them
204 * on demand.
205 *
206 * The functions in this chapter do the trick: they build a DFA on
207 * demand, keeping a cache of frequently used states, and discarding
208 * infrequently used states if the cache grows too large.
209 *
210 * As a result, DFA-style matching using the functions here provide
211 * matching in order `N' (inexpensive) steps for most patterns (by
212 * acting like a DFA match). In the worst case (when the cache is not
213 * effective), these functions provide matching in order `N*K'
214 * (relatively expensive) steps (by acting like a one-pass NFA match).
215 *
216 */
217 /*(menu)
218 */
219
220 /*(h1 "The DFA Virtual Machine")
221 *
222 * The view taken by the design of Rx is that a DFA built from a
223 * regexp is a kind of program which can be interpreted by a
224 * virtual machine.
225 *
226 * The basic operation of this machine is that it has one register: a
227 * DFA state. In each cycle, it reads one input character and
228 * tries to advance to the next DFA state by looking that character up
229 * in the transition table for the current state.
230 *
231 * The machine's outputs are the state label of the current DFA state,
232 * a flag that indicates whether or not the current state has outgoing
233 * edges (whether or not any input characters are valid), and a signal
234 * that occurs when there is no transition for an input character.
235 *
236 * Implementing this virtual machine is complicated by the fact that
237 * its program, the DFA, is not constructed all at once and might not
238 * exist in its entirety at any one time. The virtual machine interpreter
239 * has to be able to handle the case when the next state reached after
240 * some input character hasn't been built yet or has been flushed
241 * from the DFA cache.
242 */
243
244
245
246 /*(include-documentation "super.h")
247 */
248
249 /************************************************************************
250 * The Rx DFA Cache
251 *
252 * The DFA state cache is kept in a structure referenced through
253 * the global pointer `rx_default_cache'. In the future, this
254 * may be exposed as an interface to the library: allowing separate
255 * caches for regexps with different priorities.
256 */
257
258 struct rx_cache;
259
260 struct rx_cache
261 {
262 struct hashtree_rules superset_hashtree_rules;
263
264 struct rx_superstate * lru_superstate;
265 /*
266 * The least recently used live superstate. This is the next
267 * superstate that will become semifree. Superstates are linked on
268 * this queue by `next_recyclable' and `prev_recyclable'.
269 */
270
271 struct rx_superstate * semifree_superstate;
272 /*
273 * The least recently used semifree superstate. This is the next
274 * superstate that will become completely freed. Superstates are
275 * linked on this queue by `next_recyclable' and `prev_recyclable'.
276 */
277
278 /* Allocation counts. The number of various kinds of objects
279 * currently allocated. This is used for debugging and for tuning
280 * the rate at which states are semifreed.
281 */
282 int hash_tables;
283 int supersets;
284 int super_edges;
285 int superstates;
286 int semifree_superstates;
287
288 /* The ratio of cache hits to cache misses when searching for
289 * superstates. This ratio is weighted in favor of recent
290 * cache probes.
291 */
292 int hits;
293 int misses;
294 int total_hits;
295 int total_misses;
296
297 /* A hash table mapping `car' and `cdr' to NFA states.
298 */
299 struct hashtree superset_table;
300 };
301
302 static struct rx_cache * rx_default_cache;
303
304
305 int rx_superstate_counter = 0;
306
307
308
309 /************************************************************************
310 * DFA Cache Allocation
311 *
312 * These malloc-like functions add or subtract from the `bytes_used' field
313 * of the DFA state cache. They discard states from an over-full cache.
314 */
315
316 #ifndef RX_DEFAULT_DFA_CACHE_SIZE
317 /* This is an upper bound on the number of bytes that may (normally)
318 * be allocated for DFA states. If this threshold would be exceeded,
319 * Rx tries to flush some DFA states from the cache.
320 */
321 #define RX_DEFAULT_DFA_CACHE_SIZE (sizeof (void *) * (1 << 16))
322 #endif
323
324 alloc_limits rx__dfa_alloc_limits;
325
326
327 static void
free_some_dfa_memory(void * ign,size_t needed)328 free_some_dfa_memory (void * ign, size_t needed)
329 {
330 while ( (lim_in_use (rx__dfa_alloc_limits) + needed > lim_threshold (rx__dfa_alloc_limits))
331 && (0 <= rx__really_free_superstate ()))
332 ;
333 }
334
335
336 void
rx__init_dfa_alloc_limits(void)337 rx__init_dfa_alloc_limits (void)
338 {
339 static int init = 1;
340
341 if (init)
342 {
343 rx__dfa_alloc_limits = make_alloc_limits ("Rx DFA cache",
344 RX_DEFAULT_DFA_CACHE_SIZE,
345 0,
346 0,
347 free_some_dfa_memory,
348 0);
349 init = 0;
350 }
351 }
352
353
354 size_t
rx_dfa_cache_failure_pt(void)355 rx_dfa_cache_failure_pt (void)
356 {
357 rx__init_dfa_alloc_limits ();
358 return lim_failure_pt (rx__dfa_alloc_limits);
359 }
360
361 void
rx__dfa_cache_statistics(size_t * threshold,size_t * failure_pt,size_t * in_use,size_t * high_water_mark,int * hits,int * misses,int * total_hits,int * total_misses)362 rx__dfa_cache_statistics (size_t * threshold,
363 size_t * failure_pt,
364 size_t * in_use,
365 size_t * high_water_mark,
366 int * hits,
367 int * misses,
368 int * total_hits,
369 int * total_misses)
370 {
371 rx__init_dfa_alloc_limits ();
372 if (threshold)
373 *threshold = rx_dfa_cache_threshold ();
374 if (failure_pt)
375 *failure_pt = rx_dfa_cache_failure_pt ();
376 if (in_use)
377 *in_use = rx_dfa_cache_in_use ();
378 if (high_water_mark)
379 *high_water_mark = rx_dfa_cache_high_water_mark ();
380 if (hits)
381 *hits = rx_default_cache->hits;
382 if (misses)
383 *misses = rx_default_cache->misses;
384 if (total_hits)
385 *total_hits = rx_default_cache->total_hits;
386 if (total_misses)
387 *total_misses = rx_default_cache->total_misses;
388 }
389
390
391 void
rx_set_dfa_cache_failure_pt(size_t n)392 rx_set_dfa_cache_failure_pt (size_t n)
393 {
394 rx__init_dfa_alloc_limits ();
395 lim_set_failure_pt (rx__dfa_alloc_limits, n);
396 }
397
398
399 static int
rx_dfa_cache_prepare(size_t amt)400 rx_dfa_cache_prepare (size_t amt)
401 {
402 rx__init_dfa_alloc_limits ();
403 return lim_prepare (rx__dfa_alloc_limits, amt);
404 }
405
406 /* static char * rx_dfa_cache_malloc (int size);
407 *
408 * Allocate memory for the DFA state cache. If this allocation would
409 * overflow the cache then this function first tries to reduce the cache
410 * size by flushing old DFA statess.
411 */
412 static void *
rx_dfa_cache_malloc(size_t size)413 rx_dfa_cache_malloc (size_t size)
414 {
415 void * answer;
416
417 rx__init_dfa_alloc_limits ();
418 answer = lim_malloc (rx__dfa_alloc_limits, size);
419 return answer;
420 }
421
422
423 static void *
rx_dfa_cache_soft_malloc(size_t size)424 rx_dfa_cache_soft_malloc (size_t size)
425 {
426 void * answer;
427
428 rx__init_dfa_alloc_limits ();
429 answer = lim_soft_malloc (rx__dfa_alloc_limits, size);
430 return answer;
431 }
432
433
434 /* static void rx_dfa_cache_free (char * mem);
435 *
436 * Free memory for the DFA state cache.
437 */
438 static void
rx_dfa_cache_free(void * mem)439 rx_dfa_cache_free (void * mem)
440 {
441 lim_free (rx__dfa_alloc_limits, mem);
442 }
443
444
445 /************************************************************************
446 * The NFA State Set Cache
447 *
448 * Each DFA state points to a set of NFA states. That set is stored
449 * in the NFA state set cache which is part of the DFA state cache.
450 *
451 * These are the hash-table hook functions for the NFA state set
452 * cache.
453 */
454
455
456 /* (c rx_protect_superset)
457 * struct rx_superset * rx_protect_superset (struct rx_nfa * rx, struct rx_superset * set);
458 *
459 * Increment the reference count of a superset. See xref:"struct
460 * rx_superset" and xref:"rx_release_superset".
461 */
462 static struct rx_superset *
rx_protect_superset(struct rx_nfa * rx,struct rx_superset * set)463 rx_protect_superset (struct rx_nfa * rx, struct rx_superset * set)
464 {
465 if (set)
466 ++set->refs;
467 return set;
468 }
469
470
471 /* (c rx_release_superset)
472 * void rx_release_superset (struct rx_superset *set);
473 *
474 * Decrement the reference count on a superset. If it becomes 0,
475 * then free storage used by the set.
476 */
477 static void
rx_release_superset(struct rx_superset * set)478 rx_release_superset (struct rx_superset *set)
479 {
480 if (set && !--set->refs)
481 {
482 if (set->nfa_state)
483 set->nfa_state->superstate_set = 0;
484 rx_release_superset (set->cdr);
485 hashtree_delete (&set->hash_item, &rx_default_cache->superset_hashtree_rules);
486 rx_dfa_cache_free ((char *)set);
487 --rx_default_cache->supersets;
488 }
489 }
490
491
492 /* This adds an element to a superstate set. These sets are lists, such
493 * that lists with == elements are ==.
494 *
495 *
496 * on entry:
497 * CDR has at least one reference which is taken over by this
498 * function (becomes the reference of the set returned to its cdr)
499 *
500 * on exit:
501 * set returned has one reference which is for the caller.
502 */
503
504 static struct rx_superset *
rx_superset_cons(struct rx_nfa * rx,struct rx_nfa_state * car,struct rx_superset * cdr)505 rx_superset_cons (struct rx_nfa * rx,
506 struct rx_nfa_state *car,
507 struct rx_superset *cdr)
508 {
509 struct rx_cache * cache = rx_default_cache;
510 if (!car && !cdr)
511 return 0;
512 {
513 struct rx_superset template;
514 struct hashtree_item * hit;
515 struct rx_superset * answer;
516
517 template.car = car;
518 template.cdr = cdr;
519 template.id = rx->rx_id;
520
521 rx_dfa_cache_prepare (4 * sizeof (struct hashtree *) + sizeof (struct rx_superset));
522 hit = hashtree_store (&cache->superset_table,
523 (unsigned long)car ^ car->id ^ (unsigned long)cdr,
524 (void *)&template,
525 &cache->superset_hashtree_rules);
526 if (!hit)
527 return 0;
528 answer = (struct rx_superset *)hit->key;
529 rx_protect_superset (rx, answer);
530 if (answer->refs != 1)
531 rx_release_superset (cdr);
532 return answer;
533 }
534 }
535
536
537 #define rx_abs(A) (((A) > 0) ? (A) : -(A))
538
539 /*
540 * static int superset_allocator (int * errn,
541 * struct hashtree_item ** retv,
542 * void * val,
543 * struct hashtree_rules * rules);
544 *
545 * Hash tree allocation function for supersets.
546 *
547 * on entry:
548 * CDR of `val' at least one reference which is taken over by this
549 * function (becomes the reference of the set returned to its cdr)
550 *
551 * on exit:
552 * Set returned has a reference count of 0.
553 */
554 static struct hashtree_item *
superset_allocator(void * val,struct hashtree_rules * rules)555 superset_allocator (void * val, struct hashtree_rules * rules)
556 {
557 struct rx_cache * cache;
558 struct rx_superset * template;
559 struct rx_superset * newset;
560
561 cache = ((struct rx_cache *)
562 ((char *)rules
563 - (unsigned long)(&((struct rx_cache *)0)->superset_hashtree_rules)));
564 template = (struct rx_superset *)val;
565 newset = ((struct rx_superset *)
566 rx_dfa_cache_soft_malloc (sizeof (*template)));
567 if (!newset)
568 return 0;
569 ++cache->supersets;
570
571 {
572 long cdrfinal;
573 long cdredges;
574
575 cdrfinal = (template->cdr ? template->cdr->state_label : 0);
576 cdredges = (template->cdr ? template->cdr->has_cset_edges : 0);
577
578 if (!template->car->state_label)
579 newset->state_label = cdrfinal;
580 else if (rx_abs (template->car->state_label) < rx_abs (cdrfinal))
581 newset->state_label = template->car->state_label;
582 else if (template->car->state_label > 0)
583 newset->state_label = template->car->state_label;
584 else
585 newset->state_label = cdrfinal;
586
587 newset->has_cset_edges = (template->car->has_cset_edges || cdredges);
588 }
589
590 newset->refs = 0;
591 newset->id = template->id;
592 newset->car = template->car;
593 newset->cdr = template->cdr;
594 newset->superstate[0] = 0;
595 newset->superstate[1] = 0;
596 newset->nfa_state = 0;
597 newset->hash_item.key = (void *)newset;
598 newset->hash_item.binding = 0;
599 return &newset->hash_item;
600 }
601
602
603 /* This computes a union of two NFA state sets. The sets do not have the
604 * same representation though. One is a RX_SUPERSET structure (part
605 * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
606 *
607 * On exit, `set' has at least one reference which is for the caller.
608 */
609 static struct rx_superset *
rx_superstate_eclosure_union(struct rx_nfa * rx,struct rx_superset * set,struct rx_nfa_state_set * ecl)610 rx_superstate_eclosure_union (struct rx_nfa * rx,
611 struct rx_superset *set,
612 struct rx_nfa_state_set *ecl)
613 {
614 if (!ecl)
615 {
616 rx_protect_superset (rx, set);
617 return set;
618 }
619
620 if (!set)
621 return rx_superset_cons (rx, ecl->car,
622 rx_superstate_eclosure_union (rx, 0, ecl->cdr));
623
624 if (set->car == ecl->car)
625 return rx_superstate_eclosure_union (rx, set, ecl->cdr);
626
627 {
628 struct rx_superset * tail;
629 struct rx_nfa_state * first;
630
631 if (set->car->id < ecl->car->id)
632 {
633 tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
634 if (!tail)
635 return 0;
636 first = set->car;
637 }
638 else
639 {
640 tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
641 if (!tail)
642 return 0;
643 first = ecl->car;
644 }
645
646 {
647 struct rx_superset * answer;
648 answer = rx_superset_cons (rx, first, tail);
649 if (!answer)
650 {
651 rx_release_superset (tail);
652 return 0;
653 }
654 return answer;
655 }
656 }
657 }
658
659
660 static int
supersetcmp(void * va,void * vb,struct hashtree_rules * rules)661 supersetcmp (void * va, void * vb, struct hashtree_rules * rules)
662 {
663 struct rx_superset * a = (struct rx_superset *)va;
664 struct rx_superset * b = (struct rx_superset *)vb;
665
666 a = (struct rx_superset *)va;
667 b = (struct rx_superset *)vb;
668 return ( (a == b)
669 || ( a
670 && b
671 && (a->id == b->id)
672 && (a->car == b->car)
673 && (a->cdr == b->cdr)));
674 }
675
676
677 static struct hashtree *
super_hash_allocator(struct hashtree_rules * rules)678 super_hash_allocator (struct hashtree_rules * rules)
679 {
680 struct rx_cache * cache;
681 struct hashtree * it;
682
683 cache = ((struct rx_cache *)
684 ((char *)rules
685 - (unsigned long)(&((struct rx_cache *)0)->superset_hashtree_rules)));
686 it = ((struct hashtree *)
687 rx_dfa_cache_soft_malloc (sizeof (struct hashtree)));
688 if (!it)
689 return 0;
690 ++cache->hash_tables;
691 return it;
692 }
693
694
695 static void
super_hash_liberator(struct hashtree * hash,struct hashtree_rules * rules)696 super_hash_liberator (struct hashtree * hash, struct hashtree_rules * rules)
697 {
698 rx_dfa_cache_free ((char *)hash);
699 --rx_default_cache->hash_tables;
700 }
701
702 static void
superset_hash_item_liberator(struct hashtree_item * it,struct hashtree_rules * rules)703 superset_hash_item_liberator (struct hashtree_item * it,
704 struct hashtree_rules * rules)
705 {
706 }
707
708
709 /************************************************************************
710 * The Default DFA State Cache
711 *
712 *
713 *
714 */
715
716 static struct rx_cache default_cache =
717 {
718 {
719 supersetcmp,
720 super_hash_allocator,
721 super_hash_liberator,
722 superset_allocator,
723 superset_hash_item_liberator,
724 },
725 0, /* lru_superstate */
726 0, /* semifree_superstate */
727 0, /* hash_tables */
728 0, /* super_edges */
729 0, /* supersets */
730 0, /* superstates */
731 0, /* semifree_superstates */
732 0, /* hits */
733 0, /* misses */
734 0, /* total hits */
735 0, /* total misses */
736 { /* hash table */
737 0,
738 0,
739 0,
740 {0}
741 }
742 };
743
744
745 static struct rx_cache * rx_default_cache = &default_cache;
746
747 /************************************************************************
748 * Making DFA States Semifree and Restoring Them to Live State
749 *
750 */
751
752 /* static void install_cache_miss_transition (struct rx_super_edge * e);
753 *
754 * Install the instruction `answer' in the transition table of `super'
755 * for all instructions built for `e'.
756 */
757 static void
install_cache_miss_transition(struct rx_super_edge * e)758 install_cache_miss_transition (struct rx_super_edge * e)
759 {
760 struct rx_inx * transitions;
761
762 transitions = e->inx_list;
763 while (transitions)
764 {
765 transitions->inx = (void *)rx_cache_miss;
766 transitions->data = 0;
767 transitions->data_2 = (void *)e;
768 transitions = transitions->next_same_edge;
769 }
770
771 e->inx_list = 0;
772 }
773
774
775 /* static void semifree_superstate ();
776 *
777 * Select the most recently used live superstate that is not locked and
778 * make it semifree.
779 *
780 * This function makes all incoming instruction frames cache miss
781 * instructions. It moves the state from the `lru_superstate' queue
782 * of the cache to the `semifree_superstate' queue.
783 */
784 static void
semifree_superstate()785 semifree_superstate ()
786 {
787 struct rx_cache * cache = rx_default_cache;
788 int disqualified;
789
790 disqualified = cache->semifree_superstates;
791 if (disqualified == cache->superstates)
792 return;
793
794 while (cache->lru_superstate->locks)
795 {
796 cache->lru_superstate = cache->lru_superstate->next_recyclable;
797 ++disqualified;
798 if (disqualified == cache->superstates)
799 return;
800 }
801
802 {
803 struct rx_superstate * it;
804
805 it = cache->lru_superstate;
806 it->next_recyclable->prev_recyclable = it->prev_recyclable;
807 it->prev_recyclable->next_recyclable = it->next_recyclable;
808
809 cache->lru_superstate = (it == it->next_recyclable
810 ? 0
811 : it->next_recyclable);
812
813 if (!cache->semifree_superstate)
814 {
815 cache->semifree_superstate = it;
816 it->next_recyclable = it;
817 it->prev_recyclable = it;
818 }
819 else
820 {
821 it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
822 it->next_recyclable = cache->semifree_superstate;
823 it->prev_recyclable->next_recyclable = it;
824 it->next_recyclable->prev_recyclable = it;
825 }
826 {
827 struct rx_super_edge *e;
828
829 it->is_semifree = 1;
830 ++cache->semifree_superstates;
831
832 e = it->incoming_edges;
833 if (e)
834 {
835 e->prev_same_dest->next_same_dest = 0;
836 while (e)
837 {
838 install_cache_miss_transition (e);
839 e = e->next_same_dest;
840 }
841 e = it->incoming_edges;
842 e->prev_same_dest->next_same_dest = e;
843 }
844 }
845 }
846 }
847
848
849 /* static void refresh_semifree_superstate (struct rx_superstate * super);
850 *
851 * Make `super', which may or may not be in a semifree state, a live
852 * superstate.
853 *
854 * This moves the state to the back of the `lru_superstate' queue.
855 */
856 static void
refresh_semifree_superstate(struct rx_superstate * super)857 refresh_semifree_superstate (struct rx_superstate * super)
858 {
859 struct rx_cache * cache = rx_default_cache;
860
861 if (cache->semifree_superstate == super)
862 cache->semifree_superstate = (super->prev_recyclable == super
863 ? 0
864 : super->prev_recyclable);
865 else if (cache->lru_superstate == super)
866 cache->lru_superstate = (super->prev_recyclable == super
867 ? 0
868 : super->prev_recyclable);
869
870 super->next_recyclable->prev_recyclable = super->prev_recyclable;
871 super->prev_recyclable->next_recyclable = super->next_recyclable;
872
873 if (!cache->lru_superstate)
874 (cache->lru_superstate
875 = super->next_recyclable
876 = super->prev_recyclable
877 = super);
878 else
879 {
880 super->next_recyclable = cache->lru_superstate;
881 super->prev_recyclable = cache->lru_superstate->prev_recyclable;
882 super->next_recyclable->prev_recyclable = super;
883 super->prev_recyclable->next_recyclable = super;
884 }
885 if (super->is_semifree)
886 {
887 super->is_semifree = 0;
888 --cache->semifree_superstates;
889 }
890 }
891
892
893
894 /* void rx_refresh_this_superstate (struct rx_superstate * superstate);
895 *
896 * If this state is semifree, pass it to `refresh_semifree_superstate'.
897 * Otherwise, move this state to the rear of the `lru_superstate' queue.
898 */
899 static void
rx_refresh_this_superstate(struct rx_superstate * superstate)900 rx_refresh_this_superstate (struct rx_superstate * superstate)
901 {
902 struct rx_cache * cache = rx_default_cache;
903 if (superstate->is_semifree)
904 refresh_semifree_superstate (superstate);
905 else if (cache->lru_superstate == superstate)
906 cache->lru_superstate = superstate->next_recyclable;
907 else if (superstate != cache->lru_superstate->prev_recyclable)
908 {
909 superstate->next_recyclable->prev_recyclable
910 = superstate->prev_recyclable;
911 superstate->prev_recyclable->next_recyclable
912 = superstate->next_recyclable;
913 superstate->next_recyclable = cache->lru_superstate;
914 superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
915 superstate->next_recyclable->prev_recyclable = superstate;
916 superstate->prev_recyclable->next_recyclable = superstate;
917 }
918 }
919
920
921 /************************************************************************
922 * Really Freeing DFA States
923 *
924 * After becoming semifree, a DFA state may eventually be freed for
925 * real -- meaning that the memory it occupies is recycled.
926 *
927 */
928
929 /* This tries to add a new superstate to the superstate freelist.
930 * It might, as a result, free some edge pieces or hash tables.
931 * If nothing can be freed because too many locks are being held, fail.
932 */
933
934 /* int rx__really_free_superstate ();
935 *
936 * Attempt to reduce memory usage of the DFA state cache by truly
937 * freeing one superstate. Return -1 if no state could be freed, 0
938 * otherwise.
939 */
940 int
rx__really_free_superstate(void)941 rx__really_free_superstate (void)
942 {
943 struct rx_cache * cache = rx_default_cache;
944 struct rx_superstate * it;
945
946 if (!cache->superstates)
947 return -1;
948
949 /* If the rate of cache misses is high, we semifree superstates at a
950 * rate proportional to our need to truly free states, but greater.
951 * That way the total amount of work done freeing states remains
952 * proportional to the work needed, but a spike in demand for memory
953 * will cause a larger spike in the amount of effort put into
954 * rescuing the most desirable states in the cache.
955 *
956 * For example, as this is written, when the cache miss rate is
957 * high, states are semifreed at a rate 3x the rate at which they
958 * are truly freed. If, after a period of running mostly out of the
959 * cache, we suddenly have to free 1/3 of the states in the cache to
960 * make room for new states, the remaining 2/3 of the old states
961 * will all have been marked semifree and thus the most useful of
962 * those states will have a high probability of being rescued even
963 * if the demand for memory continues. They will have a fighting
964 * chance against those new states which were needed once but which
965 * aren't otherwise very useful.
966 *
967 * We semifree states at this accelerated rate when the cache miss
968 * rate is high because in that case, we expect the cache miss rate
969 * to remain high, and so we expect the high demand on memory to
970 * continue.
971 *
972 * When the cache miss rate is low, we look at the number of
973 * semifree states compared to the total number of states. If that
974 * ratio is low (there aren't many semifree states), then we again
975 * semifree states at an accelerated rate. If that ratio is high
976 * (there are quite a few semifree states), then we only semifree
977 * one state for every state truly freed. The idea is that a low
978 * cache miss rate predicts a low demand on memory. When demand on
979 * memory is low, we want to maintain a constant sized queue of
980 * semifree states which is a fraction of the total number of
981 * states. The existence of that queue helps us from throwing away
982 * useful DFA states while demand on memory is low while the fixed
983 * size of that queue means that the overhead of maintaining it is a
984 * small constant multiple of the time spent truly freeing states.
985 */
986 if ( (cache->misses < cache->hits)
987 || (3 * cache->semifree_superstates >= cache->superstates))
988 semifree_superstate (cache);
989 else
990 {
991 semifree_superstate (cache);
992 semifree_superstate (cache);
993 semifree_superstate (cache);
994 }
995
996
997 /* At this point, if no states are semifree, then all states are
998 * locked and no states can be freed.
999 */
1000 if (!cache->semifree_superstate)
1001 return -1;
1002
1003 /* Remove the oldest semifree state from the queue of all semifree
1004 * states. Correct the counters that count DFA states.
1005 */
1006 {
1007 it = cache->semifree_superstate;
1008 it->next_recyclable->prev_recyclable = it->prev_recyclable;
1009 it->prev_recyclable->next_recyclable = it->next_recyclable;
1010 cache->semifree_superstate = ((it == it->next_recyclable)
1011 ? 0
1012 : it->next_recyclable);
1013 --cache->semifree_superstates;
1014 --cache->superstates;
1015 }
1016
1017
1018 /* Unlink the queue of incoming edges and clear the `future' field
1019 * of those edges which otherwise points to this state.
1020 */
1021 if (it->incoming_edges)
1022 {
1023 struct rx_super_edge * e;
1024 it->incoming_edges->prev_same_dest->next_same_dest = 0;
1025 e = it->incoming_edges;
1026 while (e)
1027 {
1028 struct rx_super_edge * et;
1029 et = e;
1030 e = e->next_same_dest;
1031 et->prev_same_dest = et->next_same_dest = 0;
1032 et->future = 0;
1033 }
1034 }
1035
1036
1037 /* Free all outgoing edges and remove them from the `incoming_edges'
1038 * queue of their destination states.
1039 */
1040 {
1041 struct rx_super_edge *tc;
1042 tc = it->outgoing_edges;
1043 while (tc)
1044 {
1045 struct rx_super_edge *tct;
1046
1047 tct = tc;
1048 tc = tc->next_same_present;
1049
1050 if (tct->future)
1051 {
1052 if (tct->future->incoming_edges == tct)
1053 {
1054 tct->future->incoming_edges = tct->next_same_dest;
1055 if (tct->future->incoming_edges == tct)
1056 tct->future->incoming_edges = 0;
1057 }
1058 tct->next_same_dest->prev_same_dest = tct->prev_same_dest;
1059 tct->prev_same_dest->next_same_dest = tct->next_same_dest;
1060 }
1061 bits_free (tct->cset);
1062 rx_dfa_cache_free ((char *)tct);
1063 --cache->super_edges;
1064 }
1065 }
1066
1067 #ifdef RX_LARGE_TABLES
1068 {
1069 /* free second level transition tables */
1070 if (it->storage_unit_size == 2)
1071 {
1072 int x;
1073
1074 for (x = 0; x < 256; ++x)
1075 {
1076 if ( (((struct rx_inx **)it->transitions)[x] != rx_shared_cache_miss_page)
1077 && (((struct rx_inx **)it->transitions)[x] != rx_shared_backtrack_page)
1078 && (((struct rx_inx **)it->transitions)[x] != rx_huge_char_page))
1079 rx_dfa_cache_free ((void *)((struct rx_inx **)it->transitions)[x]);
1080 }
1081 }
1082 }
1083 #else
1084 {
1085 /* free second level transition tables */
1086 if (it->storage_unit_size == 2)
1087 {
1088 int x;
1089
1090 for (x = 0; x < 256; ++x)
1091 {
1092 if ( (((struct rx_inx ***)it->transitions)[x] != rx_default_small_table_page2)
1093 && (((struct rx_inx ***)it->transitions)[x] != rx_small_table_backtrack_page2)
1094 && (((struct rx_inx ***)it->transitions)[x] != rx_huge_char_page2))
1095 {
1096 int y;
1097 struct rx_inx ** page2;
1098
1099 page2 = ((struct rx_inx ***)it->transitions)[x];
1100 for (y = 0; y < rx_page2_size; ++y)
1101 {
1102 if (page2[y] != rx_shared_cache_miss_page)
1103 {
1104 rx_dfa_cache_free ((void *)page2[y]);
1105 }
1106 }
1107 rx_dfa_cache_free ((void *)page2);
1108 }
1109 }
1110 }
1111 }
1112 #endif
1113
1114 if (it->huge_char_transitions != rx_default_huge_char_transitions)
1115 {
1116 int x;
1117
1118 #ifdef RX_LARGE_TABLES
1119 for (x = 0; x < rx_page1_size21; ++x)
1120 {
1121 struct rx_inx ** sub;
1122
1123 sub = it->huge_char_transitions[x];
1124 if (sub != rx_huge_char_second_level_page)
1125 {
1126 int z;
1127
1128 for (z = 0; z < rx_page2_size21; ++z)
1129 {
1130 struct rx_inx * leaf;
1131
1132 leaf = sub[z];
1133 if (leaf != rx_shared_cache_miss_page)
1134 rx_dfa_cache_free ((void *)leaf);
1135 }
1136 rx_dfa_cache_free ((void *)sub);
1137 }
1138 }
1139 #else
1140 for (x = 0; x < rx_page1_size21; ++x)
1141 {
1142 struct rx_inx *** sub;
1143
1144 sub = it->huge_char_transitions[x];
1145 if (sub != rx_huge_char_second_level_page)
1146 {
1147 int z;
1148
1149 for (z = 0; z < rx_page2_size21; ++z)
1150 {
1151 struct rx_inx ** subsub;
1152
1153 subsub = sub[z];
1154
1155 if (subsub != rx_huge_char_third_level_page)
1156 {
1157 int zz;
1158
1159 for (zz = 0; zz < rx_page3_size21; ++zz)
1160 {
1161 struct rx_inx * leaf;
1162
1163 leaf = subsub[zz];
1164 if (leaf != rx_shared_cache_miss_page)
1165 rx_dfa_cache_free ((void *)leaf);
1166 }
1167 rx_dfa_cache_free ((void *)subsub);
1168 }
1169 }
1170 rx_dfa_cache_free ((void *)sub);
1171 }
1172 }
1173 #endif
1174 rx_dfa_cache_free ((void *)it->huge_char_transitions);
1175 }
1176
1177 /* Clear the pointer to this state in the NFA set of this state.
1178 */
1179 if (it->members->superstate[0] == it)
1180 it->members->superstate[0] = 0;
1181 if (it->members->superstate[1] == it)
1182 it->members->superstate[1] = 0;
1183
1184 /* Release the NFA set.
1185 */
1186 rx_release_superset (it->members);
1187
1188 /* Free the memory of this DFA state.
1189 */
1190 rx_dfa_cache_free ((char *)it);
1191
1192 return 0;
1193 }
1194
1195
1196 /************************************************************************
1197 *(h1 "Building DFA States")
1198 *
1199 *
1200 *
1201 */
1202
1203 /* struct rx_superstate * rx_superstate (struct rx_nfa *rx,
1204 * struct rx_superset *set,
1205 * int storage_unit_size);
1206 *
1207 * Retrieve a DFA state from the cache or construct a new state for a
1208 * give NFA state set.
1209 *
1210 * This may cause old states to be flushed from the DFA state cache.
1211 */
1212 static struct rx_superstate *
rx_superstate(struct rx_nfa * rx,struct rx_superset * set,int storage_unit_size)1213 rx_superstate (struct rx_nfa *rx,
1214 struct rx_superset *set,
1215 int storage_unit_size)
1216 {
1217 struct rx_cache * cache;
1218 struct rx_superstate * superstate;
1219 int has_huge_table;
1220
1221 has_huge_table = 0;
1222
1223 if (rx->local_cset_size == 256)
1224 has_huge_table = 0;
1225 else if (rx->local_cset_size == (1 << 21))
1226 has_huge_table = 1;
1227 else
1228 panic ("odd sized cset in rx_superstate");
1229
1230 cache = rx_default_cache;
1231 superstate = 0;
1232
1233 /* Does the superstate already exist in the cache? */
1234 if (set->superstate[storage_unit_size - 1])
1235 {
1236 ++cache->hits;
1237 ++cache->total_hits;
1238 while ((cache->hits + cache->misses) > 256)
1239 {
1240 cache->hits >>= 1;
1241 cache->misses >>= 1;
1242 }
1243
1244 superstate = set->superstate[storage_unit_size - 1];
1245 rx_refresh_this_superstate (superstate);
1246 return superstate;
1247 }
1248
1249
1250 /* This point reached only for cache misses. */
1251 ++cache->misses;
1252 ++cache->total_misses;
1253 while ((cache->hits + cache->misses) > 256)
1254 {
1255 cache->hits >>= 1;
1256 cache->misses >>= 1;
1257 }
1258
1259
1260 if (!has_huge_table)
1261 {
1262 int superstate_size;
1263 superstate_size = (sizeof (*superstate) + (sizeof (struct rx_inx) * 256));
1264 superstate = ((struct rx_superstate *) rx_dfa_cache_malloc (superstate_size));
1265
1266 if (!superstate)
1267 return 0;
1268
1269 superstate->storage_unit_size = storage_unit_size;
1270 superstate->has_huge_table = 0;
1271
1272 mem_set0 ((void *)&superstate->transitions, sizeof (rx_shared_cache_miss_page));
1273 /* mem_move ((void *)&superstate->transitions, (void *)rx_shared_cache_miss_page, sizeof (rx_shared_cache_miss_page)); */
1274 }
1275 else
1276 {
1277 int x;
1278 int superstate_size;
1279
1280 superstate_size = 0;
1281
1282 if (storage_unit_size == 2)
1283 superstate_size = (sizeof (*superstate) + (sizeof (struct rx_inx *) * 256));
1284 else if (storage_unit_size == 1)
1285 superstate_size = (sizeof (*superstate) + (sizeof (struct rx_inx) * 256));
1286 else
1287 panic ("odd storage unit size in rx_superstate");
1288
1289 superstate = ((struct rx_superstate *) rx_dfa_cache_malloc (superstate_size));
1290 if (!superstate)
1291 return 0;
1292
1293 superstate->storage_unit_size = storage_unit_size;
1294 superstate->has_huge_table = 1;
1295
1296 if (storage_unit_size == 2)
1297 {
1298 #ifdef RX_LARGE_TABLES
1299 {
1300 for (x = 0; x < 0xd8; ++x)
1301 ((struct rx_inx **)superstate->transitions)[x] = rx_shared_cache_miss_page;
1302 for (x = 0xd8; x < 0xdc; ++x)
1303 ((struct rx_inx **)superstate->transitions)[x] = rx_huge_char_page;
1304 for (x = 0xdc; x < 0xe0; ++x)
1305 ((struct rx_inx **)superstate->transitions)[x] = rx_shared_backtrack_page;
1306 for (x = 0xe0; x < 256; ++x)
1307 ((struct rx_inx **)superstate->transitions)[x] = rx_shared_cache_miss_page;
1308 }
1309 #else
1310 {
1311 for (x = 0; x < 0xd8; ++x)
1312 ((struct rx_inx ***)superstate->transitions)[x] = rx_default_small_table_page2;
1313 for (x = 0xd8; x < 0xdc; ++x)
1314 ((struct rx_inx ***)superstate->transitions)[x] = rx_huge_char_page2;
1315 for (x = 0xdc; x < 0xe0; ++x)
1316 ((struct rx_inx ***)superstate->transitions)[x] = rx_small_table_backtrack_page2;
1317 for (x = 0xe0; x < 256; ++x)
1318 ((struct rx_inx ***)superstate->transitions)[x] = rx_default_small_table_page2;
1319 }
1320 #endif
1321 }
1322 else if (storage_unit_size == 1)
1323 {
1324 mem_move ((void *)&superstate->transitions, (void *)rx_shared_cache_miss_page, 128 * sizeof (struct rx_inx));
1325
1326 /* no valid multi-byte encodings begin with 80..c1
1327 */
1328 for (x = 0x80; x < 0xc2; ++x)
1329 {
1330 ((struct rx_inx *)&superstate->transitions)[x].data = (void *)0;
1331 ((struct rx_inx *)&superstate->transitions)[x].data_2 = (void *)0;
1332 ((struct rx_inx *)&superstate->transitions)[x].inx = (void *)rx_bogus_utf8;
1333 ((struct rx_inx *)&superstate->transitions)[x].next_same_edge = (struct rx_inx *)0;
1334 }
1335 /* valid 2 byte encodings begin with c2..df
1336 */
1337 for (x = 0xc2; x < 0xe0; ++x)
1338 {
1339 ((struct rx_inx *)&superstate->transitions)[x].data = (void *)0;
1340 ((struct rx_inx *)&superstate->transitions)[x].data_2 = (void *)0;
1341 ((struct rx_inx *)&superstate->transitions)[x].inx = (void *)rx_2byte_utf8;
1342 ((struct rx_inx *)&superstate->transitions)[x].next_same_edge = (struct rx_inx *)0;
1343 }
1344 /* valid 3 byte encodings begin with e0..ef
1345 */
1346 for (x = 0xe0; x < 0xf0; ++x)
1347 {
1348 ((struct rx_inx *)&superstate->transitions)[x].data = (void *)0;
1349 ((struct rx_inx *)&superstate->transitions)[x].data_2 = (void *)0;
1350 ((struct rx_inx *)&superstate->transitions)[x].inx = (void *)rx_3byte_utf8;
1351 ((struct rx_inx *)&superstate->transitions)[x].next_same_edge = (struct rx_inx *)0;
1352 }
1353 /* valid 4 byte encodings begin with f0..f4
1354 */
1355 for (x = 0xf0; x < 0xf5; ++x)
1356 {
1357 ((struct rx_inx *)&superstate->transitions)[x].data = (void *)0;
1358 ((struct rx_inx *)&superstate->transitions)[x].data_2 = (void *)0;
1359 ((struct rx_inx *)&superstate->transitions)[x].inx = (void *)rx_4byte_utf8;
1360 ((struct rx_inx *)&superstate->transitions)[x].next_same_edge = (struct rx_inx *)0;
1361 }
1362 /* no valid multi-byte encodings begin with f5..ff
1363 */
1364 for (x = 0xf5; x <= 0xff; ++x)
1365 {
1366 ((struct rx_inx *)&superstate->transitions)[x].data = (void *)0;
1367 ((struct rx_inx *)&superstate->transitions)[x].data_2 = (void *)0;
1368 ((struct rx_inx *)&superstate->transitions)[x].inx = (void *)rx_bogus_utf8;
1369 ((struct rx_inx *)&superstate->transitions)[x].next_same_edge = (struct rx_inx *)0;
1370 }
1371 }
1372 else
1373 panic ("odd storage unit size in rx_superstate");
1374 }
1375
1376 superstate->huge_char_transitions = rx_default_huge_char_transitions;
1377 ++cache->superstates;
1378 superstate->seq = rx_superstate_counter++;
1379 superstate->table_id = -1;
1380
1381 if (!cache->lru_superstate)
1382 (cache->lru_superstate
1383 = superstate->next_recyclable
1384 = superstate->prev_recyclable
1385 = superstate);
1386 else
1387 {
1388 superstate->next_recyclable = cache->lru_superstate;
1389 superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
1390 ( superstate->prev_recyclable->next_recyclable
1391 = superstate->next_recyclable->prev_recyclable
1392 = superstate);
1393 }
1394 superstate->outgoing_edges = 0;
1395 superstate->incoming_edges = 0;
1396 superstate->members = set;
1397 set->superstate[storage_unit_size - 1] = superstate;
1398 rx_protect_superset (rx, set);
1399 superstate->is_semifree = 0;
1400 superstate->rx_id = rx->rx_id;
1401 superstate->locks = 0;
1402 return superstate;
1403 }
1404
1405
1406 /*(c rx_nfa_state_to_superstate)
1407 * struct rx_superstate *
1408 * rx_nfa_state_to_superstate (struct rx_nfa * rx,
1409 * struct rx_nfa_state * nfa_state,
1410 * int storage_unit_size);
1411 *
1412 * Construct the DFA state whose NFA state set is the epsilon closure
1413 * of `nfa_state' in the NFA `rx'. Return that superstate, with no
1414 * new locks applied to it.
1415 */
1416 struct rx_superstate *
rx_nfa_state_to_superstate(struct rx_nfa * rx,struct rx_nfa_state * nfa_state,int storage_unit_size)1417 rx_nfa_state_to_superstate (struct rx_nfa * rx,
1418 struct rx_nfa_state * nfa_state,
1419 int storage_unit_size)
1420 {
1421 struct rx_nfa_state_set * epsilon_closure;
1422 struct rx_superset * contents;
1423 struct rx_superstate * answer;
1424
1425 /* Compute the superstate set for this NFA state and
1426 * aquire a reference count for it.
1427 *
1428 * Ensure that a reference to this set is cached in
1429 * the NFA state.
1430 */
1431 if (nfa_state->superstate_set)
1432 {
1433 contents = nfa_state->superstate_set;
1434 rx_protect_superset (rx, contents);
1435 }
1436 else
1437 {
1438 epsilon_closure = rx_state_closure (rx, nfa_state);
1439 if (!epsilon_closure)
1440 return 0;
1441
1442 contents = rx_superstate_eclosure_union (rx,
1443 0,
1444 epsilon_closure);
1445 if (!contents)
1446 return 0;
1447
1448 /* rx_superstate_eclosure_union gives us a reference count
1449 */
1450 nfa_state->superstate_set = contents;
1451 contents->nfa_state = nfa_state;
1452 }
1453
1454 /* Aquire a superstate for this superstate_set and drop
1455 * the reference count on the set.
1456 */
1457 if ( contents->superstate[storage_unit_size - 1]
1458 && (contents->superstate[storage_unit_size - 1]->rx_id == rx->rx_id))
1459 {
1460 answer = contents->superstate[storage_unit_size - 1];
1461 /* Treat this as a cache hit that contributes longevity
1462 * to the cache entry and that turns a semifree state
1463 * into a live state:
1464 */
1465 rx_refresh_this_superstate (answer);
1466 rx_release_superset (contents);
1467 return answer;
1468 }
1469 else
1470 {
1471 answer = rx_superstate (rx, contents, storage_unit_size);
1472 rx_release_superset (contents);
1473 return answer; /* might be 0 */
1474 }
1475 }
1476
1477
1478
1479
1480 /* void rx_clear_superstate_table_ids (struct rx_nfa * nfa);
1481 *
1482 * SHOULD BE VOID?
1483 */
1484 void
rx_clear_superstate_table_ids(struct rx_nfa * nfa)1485 rx_clear_superstate_table_ids (struct rx_nfa * nfa)
1486 {
1487 struct rx_superstate * qs[2];
1488 int x;
1489
1490 qs[0] = rx_default_cache->semifree_superstate;
1491 qs[1] = rx_default_cache->lru_superstate;
1492 for (x = 0; x < 2; ++x)
1493 {
1494 struct rx_superstate * q;
1495 struct rx_superstate * it;
1496
1497 q = qs[x];
1498 it = q;
1499 if (q)
1500 do
1501 {
1502 it->table_id = -1;
1503 it = it->next_recyclable;
1504 }
1505 while (it != q);
1506 }
1507 }
1508
1509
1510 /************************************************************************
1511 * Low Level Support for Handling Cache Misses in the Dfa State Cache
1512 *
1513 *
1514 */
1515
1516 /*(c solve_destination)
1517 * static void solve_destination (struct rx_nfa * rx,
1518 * struct rx_inx * inx_out,
1519 * struct rx_super_edge * e,
1520 * int storage_unit_size);
1521 *
1522 * Compute the destination state for DFA edge `e'.
1523 *
1524 * As a side effect, set the `future' field of `e'.
1525 *
1526 * Return an `rx_next_char' instruction frame in `inx_out'. The
1527 * `data' field of the instruction will point to the destination
1528 * state. This instruction can be copied into transition table
1529 * entries for edge `e'.
1530 */
1531 static int
solve_destination(struct rx_nfa * rx,struct rx_inx * inx_out,struct rx_super_edge * e,int storage_unit_size)1532 solve_destination (struct rx_nfa * rx,
1533 struct rx_inx * inx_out,
1534 struct rx_super_edge * e,
1535 int storage_unit_size)
1536 {
1537 struct rx_superset * nfa_state;
1538 struct rx_superset * solution;
1539 struct rx_superstate *dest;
1540
1541 if (e->is_backtrack)
1542 {
1543 inx_out->inx = (void *) rx_backtrack;
1544 inx_out->data = 0;
1545 inx_out->data_2 = 0;
1546 return 0;
1547 }
1548
1549 solution = 0;
1550
1551 /* Iterate over all NFA states in the state set of this superstate. */
1552 for (nfa_state = e->present->members; nfa_state; nfa_state = nfa_state->cdr)
1553 {
1554 struct rx_nfa_edge * ne;
1555
1556 /* Iterate over all edges of each NFA state. */
1557 for (ne = nfa_state->car->edges; ne; ne = ne->next)
1558
1559 /* If we find an edge that is labeled with
1560 * the characters we are solving for.....
1561 */
1562 if ((ne->type == ne_cset) && bits_is_subset (ne->cset, e->cset))
1563 {
1564 struct rx_nfa_state * n;
1565 struct rx_superset * old_sol;
1566
1567 n = ne->dest;
1568 old_sol = solution;
1569 {
1570 struct rx_nfa_state_set * set;
1571 set = rx_state_closure (rx, n);
1572
1573 if (!set)
1574 {
1575 rx_release_superset (old_sol);
1576 return -1;
1577 }
1578
1579 solution = rx_superstate_eclosure_union (rx, solution, set);
1580
1581 if (!solution)
1582 {
1583 rx_release_superset (old_sol);
1584 return -1;
1585 }
1586 }
1587 rx_release_superset (old_sol);
1588 }
1589 }
1590
1591 if (solution == 0)
1592 {
1593 e->is_backtrack = 1;
1594 inx_out->inx = (void *) rx_backtrack;
1595 inx_out->data = 0;
1596 inx_out->data_2 = 0;
1597 return 0;
1598 }
1599
1600 dest = rx_superstate (rx, solution, storage_unit_size);
1601 rx_release_superset (solution);
1602 if (!dest)
1603 return -1;
1604 e->future = dest;
1605
1606 if (!dest->incoming_edges)
1607 {
1608 dest->incoming_edges = e;
1609 e->next_same_dest = e->prev_same_dest = e;
1610 }
1611 else
1612 {
1613 e->next_same_dest = dest->incoming_edges;
1614 e->prev_same_dest = dest->incoming_edges->prev_same_dest;
1615 e->next_same_dest->prev_same_dest = e;
1616 e->prev_same_dest->next_same_dest = e;
1617 }
1618 inx_out->data = (void *)&dest->transitions;
1619 inx_out->data_2 = (void *) dest->members->state_label;
1620 inx_out->inx = (void *) rx_next_char;
1621 return 0;
1622 }
1623
1624
1625 /* static int compute_super_edge_cset (struct rx_nfa *rx,
1626 * bits csetout,
1627 * struct rx_superstate * superstate,
1628 * t_uchar chr);
1629 *
1630 * For DFA state `superstate' and character `chr', compute a character
1631 * set which is the label of the DFA edge defining transitions out of
1632 * `superstate' on input character `chr'.
1633 *
1634 * This character set can then be used to compute the NFA state set of
1635 * the destination state of the transition. See `solve_destination'.
1636 *
1637 * 0 == backtrack
1638 * n > 0 == n_nfa_edges
1639 * n < 0 == ESPACE
1640 */
1641 static int
compute_super_edge_cset(struct rx_nfa * rx,bits csetout,struct rx_superstate * superstate,unsigned int chr)1642 compute_super_edge_cset (struct rx_nfa *rx,
1643 bits csetout,
1644 struct rx_superstate * superstate,
1645 unsigned int chr)
1646 {
1647 int n_nfa_edges;
1648 struct rx_superset * stateset;
1649
1650 stateset = superstate->members;
1651
1652 if (bits_fill (csetout))
1653 return -1;
1654
1655 n_nfa_edges = 0;
1656 while (stateset)
1657 {
1658 struct rx_nfa_edge *e;
1659
1660 for (e = stateset->car->edges; e; e = e->next)
1661 if (e->type == ne_cset)
1662 {
1663 if (!bits_is_member (e->cset, chr))
1664 {
1665 if (bits_difference (csetout, e->cset))
1666 return -1;
1667 }
1668 else
1669 {
1670 if (bits_intersection (csetout, e->cset))
1671 return -1;
1672 }
1673 ++n_nfa_edges;
1674 }
1675 stateset = stateset->cdr;
1676 }
1677 return n_nfa_edges;
1678 }
1679
1680
1681 /* static struct rx_super_edge * rx_super_edge (struct rx_nfa *rx, struct rx_superstate *super, bits cset);
1682 *
1683 * Construct a new DFA edge, originating in state `super', and
1684 * defining transitions for characters in `cset'.
1685 */
1686 static struct rx_super_edge *
rx_super_edge(struct rx_nfa * rx,struct rx_superstate * super,bits cset)1687 rx_super_edge (struct rx_nfa *rx, struct rx_superstate *super, bits cset)
1688 {
1689 struct rx_super_edge *tc;
1690
1691 tc = ((struct rx_super_edge *) rx_dfa_cache_malloc (sizeof (struct rx_super_edge)));
1692 if (!tc)
1693 return 0;
1694 mem_set0 ((void *)tc, sizeof (struct rx_super_edge));
1695 ++rx_default_cache->super_edges;
1696 tc->next_same_present = super->outgoing_edges;
1697 super->outgoing_edges = tc;
1698 tc->cset = cset;
1699 tc->present = super;
1700 tc->inx_list = 0;
1701 tc->is_backtrack = 0;
1702 return tc;
1703 }
1704
1705
1706
1707 /* static void install_partial_transition (struct rx_superstate *super,
1708 * struct rx_inx *answer,
1709 * struct rx_super_edge * edge,
1710 * bitset_subset set,
1711 * int offset);
1712 *
1713 * Fill in entries in the transition table of `super'.
1714 *
1715 * Entry `N' is filled if `N' is in the range:
1716 *
1717 * offset .. offset + bits_per_subset
1718 *
1719 * and:
1720 *
1721 * set & (1 << (N - offset))
1722 *
1723 * is not 0.
1724 *
1725 * Filled entries are filled with a copy of `*answer'.
1726 *
1727 * This function is useful when handling a cache miss. It is
1728 * expensive to fill in the entire transition table and often, only a
1729 * few entries in a narrow range (such as lower case letters) will
1730 * every be needed. When a missing entry is discovered, we fill in
1731 * only a narrow region of the table around the missing entry.
1732 */
1733 static int
install_partial_transition(struct rx_superstate * super,struct rx_inx * answer,struct rx_super_edge * edge,bitset_subset set,int chr)1734 install_partial_transition (struct rx_superstate *super,
1735 struct rx_inx *answer,
1736 struct rx_super_edge * edge,
1737 bitset_subset set,
1738 int chr)
1739 {
1740 int x;
1741 bitset_subset pos;
1742 struct rx_inx * inxs;
1743
1744 if (!super->has_huge_table || ((super->storage_unit_size == 1) && (chr < 0x80)))
1745 inxs = rx_subset_transitions8 (super->transitions, chr);
1746 else
1747 {
1748 if ((super->storage_unit_size == 1) || (chr > 0xffff))
1749 {
1750
1751 #ifdef RX_LARGE_TABLES
1752 {
1753 struct rx_inx ** page2_21;
1754 struct rx_inx * page3_21;
1755 if (super->huge_char_transitions == rx_default_huge_char_transitions)
1756 {
1757 struct rx_inx *** ht;
1758 ht = (struct rx_inx ***)rx_dfa_cache_malloc (rx_page1_size21 * sizeof (struct rx_inx **));
1759 if (!ht)
1760 return -1;
1761 super->huge_char_transitions = ht;
1762 mem_move ((t_uchar *)super->huge_char_transitions, (t_uchar *)rx_default_huge_char_transitions, sizeof (rx_default_huge_char_transitions));
1763 }
1764
1765 page2_21 = rx_page2_21 (super->huge_char_transitions, chr);
1766
1767 if (page2_21 == rx_huge_char_second_level_page)
1768 {
1769 page2_21 = (struct rx_inx **)rx_dfa_cache_malloc (rx_page2_size21 * sizeof (struct rx_inx *));
1770 if (!page2_21)
1771 return -1;
1772 mem_move ((t_uchar *)page2_21, (t_uchar *)rx_huge_char_second_level_page, rx_page2_size21 * sizeof (struct rx_inx *));
1773 rx_page2_21 (super->huge_char_transitions, chr) = page2_21;
1774 }
1775
1776 page3_21 = rx_page3_21 (super->huge_char_transitions, chr);
1777
1778 if (page3_21 == rx_shared_cache_miss_page)
1779 {
1780 page3_21 = (struct rx_inx *)rx_dfa_cache_malloc (256 * sizeof (struct rx_inx));
1781 if (!page3_21)
1782 return -1;
1783 mem_move ((t_uchar *)page3_21, (t_uchar *)rx_shared_cache_miss_page, 256 * sizeof (struct rx_inx));
1784 rx_page3_21(super->huge_char_transitions, chr) = page3_21;
1785 }
1786 }
1787 #else
1788 {
1789 struct rx_inx *** page2_21;
1790 struct rx_inx ** page3_21;
1791 struct rx_inx * page4_21;
1792
1793 if (super->huge_char_transitions == rx_default_huge_char_transitions)
1794 {
1795 struct rx_inx **** ht;
1796 ht = (struct rx_inx ****)rx_dfa_cache_malloc (rx_page1_size21 * sizeof (struct rx_inx ***));
1797 if (!ht)
1798 return -1;
1799 super->huge_char_transitions = ht;
1800 mem_move ((t_uchar *)super->huge_char_transitions, (t_uchar *)rx_default_huge_char_transitions, sizeof (rx_default_huge_char_transitions));
1801 }
1802
1803 page2_21 = rx_page2_21 (super->huge_char_transitions, chr);
1804
1805 if (page2_21 == rx_huge_char_second_level_page)
1806 {
1807 page2_21 = (struct rx_inx ***)rx_dfa_cache_malloc (rx_page2_size21 * sizeof (struct rx_inx **));
1808 if (!page2_21)
1809 return -1;
1810 mem_move ((t_uchar *)page2_21, (t_uchar *)rx_huge_char_second_level_page, rx_page2_size21 * sizeof (struct rx_inx **));
1811 rx_page2_21 (super->huge_char_transitions, chr) = page2_21;
1812 }
1813
1814 page3_21 = rx_page3_21 (super->huge_char_transitions, chr);
1815
1816 if (page3_21 == rx_huge_char_third_level_page)
1817 {
1818 page3_21 = (struct rx_inx **)rx_dfa_cache_malloc (rx_page3_size21 * sizeof (struct rx_inx *));
1819 if (!page3_21)
1820 return -1;
1821 mem_move ((t_uchar *)page3_21, (t_uchar *)rx_huge_char_third_level_page, rx_page3_size21 * sizeof (struct rx_inx *));
1822 rx_page3_21(super->huge_char_transitions, chr) = page3_21;
1823 }
1824
1825 page4_21 = rx_page4_21 (super->huge_char_transitions, chr);
1826 if (page4_21 == rx_shared_cache_miss_page)
1827 {
1828 page4_21 = (struct rx_inx *)rx_dfa_cache_malloc (rx_page4_size21 * sizeof (struct rx_inx));
1829 if (!page4_21)
1830 return -1;
1831 mem_move ((t_uchar *)page4_21, (t_uchar *)rx_shared_cache_miss_page, rx_page4_size21 * sizeof (struct rx_inx));
1832 rx_page4_21 (super->huge_char_transitions, chr) = page4_21;
1833 }
1834 }
1835 #endif
1836 inxs = rx_subset_transition21 (super->huge_char_transitions, chr);
1837 }
1838 else
1839 {
1840 /* (storage_unit_size == 2) && (chr > 0xffff)
1841 */
1842 #ifdef RX_LARGE_TABLES
1843 {
1844 struct rx_inx * page2_16;
1845
1846 page2_16 = rx_page2_16 (super->transitions, chr);
1847 if (page2_16 == rx_shared_cache_miss_page)
1848 {
1849 page2_16 = (struct rx_inx *)rx_dfa_cache_malloc (256 * sizeof (struct rx_inx));
1850 if (!page2_16)
1851 return -1;
1852 mem_move ((t_uchar *)page2_16, (t_uchar *)rx_shared_cache_miss_page, 256 * sizeof (struct rx_inx));
1853 rx_page2_16(super->transitions, chr) = page2_16;
1854 }
1855
1856 inxs = rx_subset_transitions16 (super->transitions, chr);
1857 }
1858 #else
1859
1860 {
1861 struct rx_inx ** page2_16;
1862 struct rx_inx * page3_16;
1863
1864 page2_16 = rx_page2_16 (super->transitions, chr);
1865 if (page2_16 == rx_default_small_table_page2)
1866 {
1867 page2_16 = (struct rx_inx **)rx_dfa_cache_malloc (rx_page2_size * sizeof (struct rx_inx *));
1868 if (!page2_16)
1869 return -1;
1870 mem_move ((t_uchar *)page2_16, (t_uchar *)rx_default_small_table_page2, rx_page2_size * sizeof (struct rx_inx *));
1871 rx_page2_16(super->transitions, chr) = page2_16;
1872 }
1873
1874 page3_16 = rx_page3_16 (super->transitions, chr);
1875 if (page3_16 == rx_shared_cache_miss_page)
1876 {
1877 page3_16 = (struct rx_inx *)rx_dfa_cache_malloc (rx_page3_size * sizeof (struct rx_inx));
1878 if (!page3_16)
1879 return -1;
1880 mem_move ((t_uchar *)page3_16, (t_uchar *)rx_shared_cache_miss_page, rx_page3_size * sizeof (struct rx_inx));
1881 rx_page3_16(super->transitions, chr) = page3_16;
1882 }
1883
1884 inxs = rx_subset_transitions16 (super->transitions, chr);
1885 }
1886
1887 #endif
1888 }
1889 }
1890
1891
1892 for (x = 0, pos = 1; x < bits_per_subset; ++x, pos <<= 1)
1893 {
1894 if (set & pos)
1895 {
1896 inxs[x] = *answer;
1897 inxs[x].next_same_edge = edge->inx_list;
1898 edge->inx_list = &inxs[x];
1899 }
1900 }
1901 return 0;
1902 }
1903
1904
1905
1906
1907 /************************************************************************
1908 *(h1 "DFA State Transition Cache Misses")
1909 *
1910 *
1911 */
1912
1913
1914 /*(c rx_handle_cache_miss)
1915 * struct rx_inx * rx_handle_cache_miss (struct rx_nfa * rx,
1916 * struct rx_superstate * super,
1917 * t_uchar chr,
1918 * void * data_2);
1919 *
1920 * Recover from an `rx_cache_miss' instruction.
1921 *
1922 * `rx' and `super' are the NFA and superstate. `chr' is the input
1923 * character for which the transition table of `super' contains
1924 * an `rx_cache_miss' instruction.
1925 *
1926 * `data_2' is the `data_2' field of the `rx_cache_miss' instruction
1927 * that caused us to call `rx_handle_cache_miss'.
1928 *
1929 * We return a pointer to another instruction (perhaps the corrected
1930 * transition table entry) which replaces the cache miss instruction.
1931 * The caller of this function can continue by re-dispatching on the
1932 * new instruction (which is permitted to be another cache miss
1933 * instruction).
1934 */
1935 struct rx_inx *
rx_handle_cache_miss(struct rx_nfa * rx,struct rx_superstate * super,unsigned int chr,void * data_2)1936 rx_handle_cache_miss (struct rx_nfa * rx,
1937 struct rx_superstate * super,
1938 unsigned int chr,
1939 void * data_2)
1940 {
1941 struct rx_super_edge * e;
1942
1943 /* There are three kinds of cache miss.
1944 *
1945 * The first occurs when a transition is taken that has never been
1946 * computed during the lifetime of the source superstate. In this
1947 * case, no DFA edge exists for this transition. The cache miss is
1948 * handled by calling `compute_super_edge_cset' and building a new
1949 * DFA edge. If `compute_super_edge_cset' tells us that no
1950 * transition is defined, we fill in the transition table with
1951 * `rx_backtrack' instructions and return one of those.
1952 *
1953 * The second kind of cache miss occurs when we have a DFA edge
1954 * (pointed to by `data_2') but the destination superstate of that
1955 * edge isn't known. `solve_destination' is used to construct the
1956 * destination superstate or find it in the DFA cache. We return a
1957 * `rx_next_char' instruction.
1958 *
1959 * Finally, the third kind of cache miss occurs when the destination
1960 * superstate of a transition is known but is or was in a `semi-free
1961 * state'. That case is handled by `refresh_semifree_superstate'.
1962 * We return an `rx_next_char' instruction.
1963 *
1964 */
1965
1966
1967
1968 /* If the `rx_cache_miss' instruction contained a pointer to an
1969 * existing DFA edge, that pointer was passed to us as `data_2'.
1970 */
1971 e = data_2;
1972
1973 retry:
1974
1975 if (!e)
1976 {
1977 /* A missing DFA edge. Look for it in the cache.
1978 */
1979 for (e = super->outgoing_edges; e; e = e->next_same_present)
1980 if (bits_is_member (e->cset, chr))
1981 goto retry;
1982
1983 /* A DFA edge missing from the cache. Try to build it now.
1984 */
1985 {
1986 bits trcset;
1987
1988 rx_lock_superstate (rx, super);
1989
1990 if (super->has_huge_table)
1991 trcset = bits_alloc (rx__dfa_alloc_limits, uni_bits_tree_rule);
1992 else
1993 trcset = bits_alloc (rx__dfa_alloc_limits, rx_8bit_bits_tree_rule);
1994
1995 if (!trcset)
1996 {
1997 rx_unlock_superstate (rx, super);
1998 return 0;
1999 }
2000
2001 {
2002 int nfa_edges;
2003
2004 nfa_edges = compute_super_edge_cset (rx, trcset, super, chr);
2005 if (nfa_edges < 0)
2006 {
2007 rx_unlock_superstate (rx, super);
2008 return 0;
2009 }
2010 #if 0
2011 else if (!nfa_edges)
2012 {
2013 static struct rx_inx backtrack = { 0, 0, (void *)rx_backtrack, 0 };
2014 rx_unlock_superstate (rx, super);
2015 bits_free (trcset);
2016 return &backtrack;
2017 }
2018 #endif
2019 else
2020 {
2021 e = rx_super_edge (rx, super, trcset);
2022 rx_unlock_superstate (rx, super);
2023 if (!e)
2024 return 0;
2025 goto retry;
2026 }
2027 }
2028 }
2029 }
2030 else if (e->future)
2031 {
2032 struct rx_inx inx;
2033
2034 /* We know the superstate edge and its destination state, but
2035 * the instruction frame contained a stale `rx_cache_miss'
2036 * instruction (the instruction itself, not the state, was the
2037 * stale part of the cache). We must now replace the
2038 * instruction with an `rx_next_char' instruction.
2039 *
2040 * Its possible that the destination state is in a semifree
2041 * state and must be refreshed.
2042 */
2043 refresh_semifree_superstate (e->future);
2044 rx_lock_superstate (rx, super);
2045
2046 inx.data = (void *)&e->future->transitions;
2047 inx.data_2 = (void *)e->future->members->state_label;
2048 inx.inx = (void *)rx_next_char;
2049
2050 {
2051 bitset_subset s;
2052 if ( bits_get_subset (&s, e->cset, chr)
2053 || install_partial_transition (super, &inx, e, s, chr))
2054 {
2055 rx_unlock_superstate (rx, super);
2056 return 0;
2057 }
2058 }
2059 rx_unlock_superstate (rx, super);
2060
2061 if (!super->has_huge_table || ((super->storage_unit_size == 1) && (chr < 0x80)))
2062 return rx_transition8 (&super->transitions, chr);
2063 else if ((super->storage_unit_size == 2) && (chr < (1 << 16)))
2064 return rx_transition16 (&super->transitions, chr);
2065 else
2066 return rx_transition21 (super->huge_char_transitions, chr);
2067 }
2068 else
2069 {
2070 struct rx_inx inx;
2071
2072 /* A DFA edge with no known destination state. We must now
2073 * compute the destination state and fill in transition table
2074 * entries with `rx_next_char' instructions.
2075 */
2076 rx_lock_superstate (rx, super);
2077 {
2078 bitset_subset s;
2079 struct rx_superstate * destination;
2080
2081 if (solve_destination (rx, &inx, e, super->storage_unit_size))
2082 {
2083 rx_unlock_superstate (rx, super);
2084 return 0;
2085 }
2086
2087 if (inx.inx == (void *)rx_next_char)
2088 {
2089 destination = rx_transitions_to_suprestate ((rx_transition_table)(inx.data));
2090 rx_lock_superstate (rx, destination);
2091 }
2092 else
2093 destination = 0;
2094
2095 if (bits_get_subset (&s, e->cset, chr)
2096 || install_partial_transition (super, &inx, e, s, chr))
2097 {
2098 if (destination)
2099 rx_unlock_superstate (rx, destination);
2100 rx_unlock_superstate (rx, super);
2101 return 0;
2102 }
2103 if (destination)
2104 rx_unlock_superstate (rx, destination);
2105 }
2106 rx_unlock_superstate (rx, super);
2107 if (!super->has_huge_table || ((super->storage_unit_size == 1) && (chr < 0x80)))
2108 return rx_transition8 (&super->transitions, chr);
2109 else if ((super->storage_unit_size == 2) && (chr < (1 << 16)))
2110 return rx_transition16 (&super->transitions, chr);
2111 else
2112 return rx_transition21 (super->huge_char_transitions, chr);
2113 }
2114 }
2115
2116
2117 /************************************************************************
2118 * DFA Cache Debugging Tools
2119 *
2120 * I sometimes use this code to debug this file.
2121 */
2122
2123 #if 0
2124 static int
2125 qlen (struct rx_superstate * q)
2126 {
2127 int count = 1;
2128 struct rx_superstate * it;
2129 if (!q)
2130 return 0;
2131 for (it = q->next_recyclable; it != q; it = it->next_recyclable)
2132 ++count;
2133 return count;
2134 }
2135
2136
2137 static void
2138 check_cache ()
2139 {
2140 struct rx_cache * cache = rx_default_cache;
2141 int total = cache->superstates;
2142 int semi = cache->semifree_superstates;
2143
2144
2145 total = cache->superstates;
2146 semi = cache->semifree_superstates;
2147
2148 if (semi != qlen (cache->semifree_superstate))
2149 panic ("failed cache check in rx");
2150
2151 if ((total - semi) != qlen (cache->lru_superstate))
2152 panic ("failed cache check in rx (2)");
2153 }
2154
2155
2156 #endif
2157