1 /* super.c - lazilly constructed DFAs
2  *
3  ****************************************************************
4  * Copyright (C) 1998, 2000 Thomas Lord
5  *
6  * See the file "COPYING" for further information about
7  * the copyright and warranty status of this work.
8  */
9 
10 
11 
12 #include "hackerlab/bugs/panic.h"
13 #include "hackerlab/mem/mem.h"
14 #include "hackerlab/rx/bits-tree-rules.h"
15 #include "hackerlab/rx/dfa-cache.h"
16 #include "hackerlab/rx/super.h"
17 
18 
19 /* __STDC__ prototypes for static functions */
20 static void free_some_dfa_memory (void * ign, size_t needed);
21 static int rx_dfa_cache_prepare (size_t amt);
22 static void * rx_dfa_cache_malloc (size_t size);
23 static void * rx_dfa_cache_soft_malloc (size_t size);
24 static void rx_dfa_cache_free (void * mem);
25 static struct rx_superset * rx_protect_superset (struct rx_nfa * rx, struct rx_superset * set);
26 static void rx_release_superset (struct rx_superset *set);
27 static struct rx_superset * rx_superset_cons (struct rx_nfa * rx,
28 					      struct rx_nfa_state *car,
29 					      struct rx_superset *cdr);
30 static struct hashtree_item * superset_allocator (void * val, struct hashtree_rules * rules);
31 static struct rx_superset * rx_superstate_eclosure_union (struct rx_nfa * rx,
32 							  struct rx_superset *set,
33 							  struct rx_nfa_state_set *ecl) ;
34 static int supersetcmp (void * va, void * vb, struct hashtree_rules * rules);
35 static struct hashtree * super_hash_allocator (struct hashtree_rules * rules);
36 static void super_hash_liberator (struct hashtree * hash, struct hashtree_rules * rules);
37 static void superset_hash_item_liberator (struct hashtree_item * it,
38 					  struct hashtree_rules * rules);
39 static void install_cache_miss_transition (struct rx_super_edge * e);
40 static void semifree_superstate ();
41 static void refresh_semifree_superstate (struct rx_superstate * super);
42 static void rx_refresh_this_superstate (struct rx_superstate * superstate);
43 static struct rx_superstate * rx_superstate (struct rx_nfa *rx,
44 					     struct rx_superset *set,
45 					     int storage_unit_size);
46 static int solve_destination (struct rx_nfa * rx,
47 			      struct rx_inx * inx_out,
48 			      struct rx_super_edge * e,
49 			      int storage_unit_size);
50 static int compute_super_edge_cset (struct rx_nfa *rx,
51 				    bits csetout,
52 				    struct rx_superstate * superstate,
53 				    unsigned int chr);
54 static struct rx_super_edge * rx_super_edge (struct rx_nfa *rx, struct rx_superstate *super, bits cset);
55 static int install_partial_transition  (struct rx_superstate *super,
56 					struct rx_inx *answer,
57 					struct rx_super_edge * edge,
58 					bitset_subset set,
59 					int chr);
60 
61 
62 #define COPIES_32	\
63   X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
64 
65 
66 #define COPIES_64	\
67   X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
68   X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
69 
70 #define COPIES_256 \
71   X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
72   X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
73   X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
74   X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
75   X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
76   X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
77   X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, \
78   X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
79 
80 #if rx_page1_size21 == 256
81 #  define COPIES_PAGE1_21_SIZE  COPIES_256
82 #elif rx_page1_size21 == 32
83 #  define COPIES_PAGE1_21_SIZE  COPIES_32
84 #else
85 #  error you broke fragile code
86 #endif
87 
88 #if rx_page2_size21 == 256
89 #  define COPIES_PAGE2_21_SIZE  COPIES_256
90 #elif rx_page2_size21 == 32
91 #  define COPIES_PAGE2_21_SIZE  COPIES_32
92 #else
93 #  error you broke fragile code
94 #endif
95 
96 
97 #if bits_per_subset == 32
98 #  define COPIES_BPS COPIES_32
99 #  define COPIES_PAGE2_SIZE X, X, X, X, X, X, X, X
100 #  define COPIES_PAGE3_21_SIZE X, X, X, X, X, X, X, X
101 #elif bits_per_subset == 64
102 #  define COPIES_BPS COPIES_64
103 #  define COPIES_PAGE2_SIZE X, X, X, X
104 #  define COPIES_PAGE3_21_SIZE X, X, X, X
105 #else
106 #  error "odd bits_per_subset in super.c"
107 #endif
108 
109 
110 /* Default value for leaf nodes of multi-level transition tables:
111  */
112 #undef X
113 #define X {0, 0, (void *)rx_cache_miss, 0}
114 static struct rx_inx rx_shared_cache_miss_page[256] = {COPIES_256};
115 
116 /* Fixed backtrack page for leaf nodes of multi-level transition tables:
117  */
118 #undef X
119 #define X {0, 0, (void *)rx_backtrack, 0}
120 static struct rx_inx rx_shared_backtrack_page[256] = {COPIES_256};
121 
122 /* Fixed value for high-surrogate leaf nodes of 16-bit tables:
123  */
124 #undef X
125 #define X {0, 0, (void *)rx_huge_char, 0}
126 static struct rx_inx rx_huge_char_page[256] = {COPIES_256};
127 
128 #ifdef RX_LARGE_TABLES
129 
130 /* Default value for page2 of 21-bit tables:
131  */
132 #undef X
133 #define X rx_shared_cache_miss_page
134 static struct rx_inx * rx_huge_char_second_level_page[rx_page2_size21] = { COPIES_256 };
135 
136 /* Default value for huge_char_transitions:
137  */
138 #undef X
139 #define X rx_huge_char_second_level_page
140 static struct rx_inx ** rx_default_huge_char_transitions[rx_page1_size21] = { COPIES_32 };
141 
142 #else /* ndef RX_LARGE_TABLES */
143 
144 #undef X
145 #define X rx_shared_cache_miss_page
146 static struct rx_inx * rx_default_small_table_page2[rx_page2_size] = { COPIES_PAGE2_SIZE };
147 
148 #undef X
149 #define X rx_shared_backtrack_page
150 static struct rx_inx * rx_small_table_backtrack_page2[rx_page2_size] = { COPIES_PAGE2_SIZE };
151 
152 #undef X
153 #define X rx_huge_char_page
154 static struct rx_inx * rx_huge_char_page2[rx_page2_size] = { COPIES_PAGE2_SIZE };
155 
156 
157 #undef X
158 #define X rx_shared_cache_miss_page
159 static struct rx_inx * rx_huge_char_third_level_page[rx_page3_size21] = { COPIES_PAGE3_21_SIZE };
160 
161 #undef X
162 #define X rx_huge_char_third_level_page
163 static struct rx_inx ** rx_huge_char_second_level_page[rx_page2_size21] = { COPIES_PAGE2_21_SIZE };
164 
165 #undef X
166 #define X rx_huge_char_second_level_page
167 static struct rx_inx *** rx_default_huge_char_transitions[rx_page1_size21] = { COPIES_PAGE1_21_SIZE };
168 
169 #endif
170 
171 
172 /************************************************************************
173  *(h0 "The Superstate DFA"
174  *    :subtitle "rx/super.c"
175  *    :includes ("rx/super.h"))
176  *
177  * It is well known that there is a mapping from non-deterministic
178  * finite-state automata to equivalent deterministic finite-state
179  * automata (a good text book about compiler construction explains
180  * this). That mapping is important to applications which use regular
181  * expressions for efficient pattern matching.  When a regular
182  * expression comparison is implemented using a single pass with a
183  * non-deterministic automata with `K' states, the comparison
184  * algorithm may run for a number of steps which is proportional to
185  * `K*N' for a length `N' input string (where each step is relatively
186  * expensive).  When implemented using backtracking, `N**K'
187  * (relatively inexpensive) steps may be required.  On the other hand,
188  * when regular expression comparison is implemented using a
189  * deterministic automata, the algorithm requires a number of
190  * instructions which is linear in the length of the input string
191  * (order `N' (inexpensive) steps).
192  *
193  * The catch is that, using brute force, it is expensive (in time and
194  * space) to build a DFA from a typical regular expression NFA.  An
195  * NFA with `K' states can yield a DFA with order `2^K' states.
196  *
197  * The trick is that, during most matches using typical regexps, only
198  * a small portion of the total potential number of DFA states are
199  * actually needed -- time and space can be saved by building DFA
200  * states on-demand.  During any match, only two DFA states are needed
201  * at a time -- if the total number of DFA states encountered during a
202  * match is very large, space can be saved (at the expense of time) by
203  * discarding less frequently used DFA states and reconstructing them
204  * on demand.
205  *
206  * The functions in this chapter do the trick: they build a DFA on
207  * demand, keeping a cache of frequently used states, and discarding
208  * infrequently used states if the cache grows too large.
209  *
210  * As a result, DFA-style matching using the functions here provide
211  * matching in order `N' (inexpensive) steps for most patterns (by
212  * acting like a DFA match).  In the worst case (when the cache is not
213  * effective), these functions provide matching in order `N*K'
214  * (relatively expensive) steps (by acting like a one-pass NFA match).
215  *
216  */
217 /*(menu)
218  */
219 
220 /*(h1 "The DFA Virtual Machine")
221  *
222  * The view taken by the design of Rx is that a DFA built from a
223  * regexp is a kind of program which can be interpreted by a
224  * virtual machine.
225  *
226  * The basic operation of this machine is that it has one register: a
227  * DFA state.  In each cycle, it reads one input character and
228  * tries to advance to the next DFA state by looking that character up
229  * in the transition table for the current state.
230  *
231  * The machine's outputs are the state label of the current DFA state,
232  * a flag that indicates whether or not the current state has outgoing
233  * edges (whether or not any input characters are valid), and a signal
234  * that occurs when there is no transition for an input character.
235  *
236  * Implementing this virtual machine is complicated by the fact that
237  * its program, the DFA, is not constructed all at once and might not
238  * exist in its entirety at any one time.  The virtual machine interpreter
239  * has to be able to handle the case when the next state reached after
240  * some input character hasn't been built yet or has been flushed
241  * from the DFA cache.
242  */
243 
244 
245 
246 /*(include-documentation "super.h")
247  */
248 
249 /************************************************************************
250  * The Rx DFA Cache
251  *
252  * The DFA state cache is kept in a structure referenced through
253  * the global pointer `rx_default_cache'.  In the future, this
254  * may be exposed as an interface to the library: allowing separate
255  * caches for regexps with different priorities.
256  */
257 
258 struct rx_cache;
259 
260 struct rx_cache
261 {
262   struct hashtree_rules superset_hashtree_rules;
263 
264   struct rx_superstate * lru_superstate;
265   /*
266    * The least recently used live superstate.  This is the next
267    * superstate that will become semifree.  Superstates are linked on
268    * this queue by `next_recyclable' and `prev_recyclable'.
269    */
270 
271   struct rx_superstate * semifree_superstate;
272   /*
273    * The least recently used semifree superstate.  This is the next
274    * superstate that will become completely freed.  Superstates are
275    * linked on this queue by `next_recyclable' and `prev_recyclable'.
276    */
277 
278   /* Allocation counts.  The number of various kinds of objects
279    * currently allocated.  This is used for debugging and for tuning
280    * the rate at which states are semifreed.
281    */
282   int hash_tables;
283   int supersets;
284   int super_edges;
285   int superstates;
286   int semifree_superstates;
287 
288   /* The ratio of cache hits to cache misses when searching for
289    * superstates.  This ratio is weighted in favor of recent
290    * cache probes.
291    */
292   int hits;
293   int misses;
294   int total_hits;
295   int total_misses;
296 
297   /* A hash table mapping `car' and `cdr' to NFA states.
298    */
299   struct hashtree superset_table;
300 };
301 
302 static struct rx_cache * rx_default_cache;
303 
304 
305 int rx_superstate_counter = 0;
306 
307 
308 
309 /************************************************************************
310  * DFA Cache Allocation
311  *
312  * These malloc-like functions add or subtract from the `bytes_used' field
313  * of the DFA state cache.  They discard states from an over-full cache.
314  */
315 
316 #ifndef RX_DEFAULT_DFA_CACHE_SIZE
317 /* This is an upper bound on the number of bytes that may (normally)
318  * be allocated for DFA states.  If this threshold would be exceeded,
319  * Rx tries to flush some DFA states from the cache.
320  */
321 #define RX_DEFAULT_DFA_CACHE_SIZE (sizeof (void *) * (1 << 16))
322 #endif
323 
324 alloc_limits rx__dfa_alloc_limits;
325 
326 
327 static void
free_some_dfa_memory(void * ign,size_t needed)328 free_some_dfa_memory (void * ign, size_t needed)
329 {
330   while (   (lim_in_use (rx__dfa_alloc_limits) + needed > lim_threshold (rx__dfa_alloc_limits))
331 	 && (0 <= rx__really_free_superstate ()))
332     ;
333 }
334 
335 
336 void
rx__init_dfa_alloc_limits(void)337 rx__init_dfa_alloc_limits (void)
338 {
339   static int init = 1;
340 
341   if (init)
342     {
343       rx__dfa_alloc_limits = make_alloc_limits ("Rx DFA cache",
344 						RX_DEFAULT_DFA_CACHE_SIZE,
345 						0,
346 						0,
347 						free_some_dfa_memory,
348 						0);
349       init = 0;
350     }
351 }
352 
353 
354 size_t
rx_dfa_cache_failure_pt(void)355 rx_dfa_cache_failure_pt (void)
356 {
357   rx__init_dfa_alloc_limits ();
358   return lim_failure_pt (rx__dfa_alloc_limits);
359 }
360 
361 void
rx__dfa_cache_statistics(size_t * threshold,size_t * failure_pt,size_t * in_use,size_t * high_water_mark,int * hits,int * misses,int * total_hits,int * total_misses)362 rx__dfa_cache_statistics (size_t * threshold,
363 			 size_t * failure_pt,
364 			 size_t * in_use,
365 			 size_t * high_water_mark,
366 			 int * hits,
367 			 int * misses,
368 			 int * total_hits,
369 			 int * total_misses)
370 {
371   rx__init_dfa_alloc_limits ();
372   if (threshold)
373     *threshold = rx_dfa_cache_threshold ();
374   if (failure_pt)
375     *failure_pt = rx_dfa_cache_failure_pt ();
376   if (in_use)
377     *in_use = rx_dfa_cache_in_use ();
378   if (high_water_mark)
379     *high_water_mark = rx_dfa_cache_high_water_mark ();
380   if (hits)
381     *hits = rx_default_cache->hits;
382   if (misses)
383     *misses = rx_default_cache->misses;
384   if (total_hits)
385     *total_hits = rx_default_cache->total_hits;
386   if (total_misses)
387     *total_misses = rx_default_cache->total_misses;
388 }
389 
390 
391 void
rx_set_dfa_cache_failure_pt(size_t n)392 rx_set_dfa_cache_failure_pt (size_t n)
393 {
394   rx__init_dfa_alloc_limits ();
395   lim_set_failure_pt (rx__dfa_alloc_limits, n);
396 }
397 
398 
399 static int
rx_dfa_cache_prepare(size_t amt)400 rx_dfa_cache_prepare (size_t amt)
401 {
402   rx__init_dfa_alloc_limits ();
403   return lim_prepare (rx__dfa_alloc_limits, amt);
404 }
405 
406 /* static char * rx_dfa_cache_malloc (int size);
407  *
408  * Allocate memory for the DFA state cache.  If this allocation would
409  * overflow the cache then this function first tries to reduce the cache
410  * size by flushing old DFA statess.
411  */
412 static void *
rx_dfa_cache_malloc(size_t size)413 rx_dfa_cache_malloc (size_t size)
414 {
415   void * answer;
416 
417   rx__init_dfa_alloc_limits ();
418   answer = lim_malloc (rx__dfa_alloc_limits, size);
419   return answer;
420 }
421 
422 
423 static void *
rx_dfa_cache_soft_malloc(size_t size)424 rx_dfa_cache_soft_malloc (size_t size)
425 {
426   void * answer;
427 
428   rx__init_dfa_alloc_limits ();
429   answer = lim_soft_malloc (rx__dfa_alloc_limits, size);
430   return answer;
431 }
432 
433 
434 /* static void rx_dfa_cache_free (char * mem);
435  *
436  * Free memory for the DFA state cache.
437  */
438 static void
rx_dfa_cache_free(void * mem)439 rx_dfa_cache_free (void * mem)
440 {
441   lim_free (rx__dfa_alloc_limits, mem);
442 }
443 
444 
445 /************************************************************************
446  * The NFA State Set Cache
447  *
448  * Each DFA state points to a set of NFA states.  That set is stored
449  * in the NFA state set cache which is part of the DFA state cache.
450  *
451  * These are the hash-table hook functions for the NFA state set
452  * cache.
453  */
454 
455 
456 /*  (c rx_protect_superset)
457  * struct rx_superset * rx_protect_superset (struct rx_nfa * rx, struct rx_superset * set);
458  *
459  * Increment the reference count of a superset.  See xref:"struct
460  * rx_superset" and xref:"rx_release_superset".
461  */
462 static struct rx_superset *
rx_protect_superset(struct rx_nfa * rx,struct rx_superset * set)463 rx_protect_superset (struct rx_nfa * rx, struct rx_superset * set)
464 {
465   if (set)
466     ++set->refs;
467   return set;
468 }
469 
470 
471 /*  (c rx_release_superset)
472  * void rx_release_superset (struct rx_superset *set);
473  *
474  * Decrement the reference count on a superset.  If it becomes 0,
475  * then free storage used by the set.
476  */
477 static void
rx_release_superset(struct rx_superset * set)478 rx_release_superset (struct rx_superset *set)
479 {
480   if (set && !--set->refs)
481     {
482       if (set->nfa_state)
483 	set->nfa_state->superstate_set = 0;
484       rx_release_superset (set->cdr);
485       hashtree_delete (&set->hash_item, &rx_default_cache->superset_hashtree_rules);
486       rx_dfa_cache_free ((char *)set);
487       --rx_default_cache->supersets;
488     }
489 }
490 
491 
492 /* This adds an element to a superstate set.  These sets are lists, such
493  * that lists with == elements are ==.
494  *
495  *
496  * on entry:
497  *   	CDR has at least one reference which is taken over by this
498  * 	function (becomes the reference of the set returned to its cdr)
499  *
500  * on exit:
501  * 	set returned has one reference which is for the caller.
502  */
503 
504 static struct rx_superset *
rx_superset_cons(struct rx_nfa * rx,struct rx_nfa_state * car,struct rx_superset * cdr)505 rx_superset_cons (struct rx_nfa * rx,
506 		  struct rx_nfa_state *car,
507 		  struct rx_superset *cdr)
508 {
509   struct rx_cache * cache = rx_default_cache;
510   if (!car && !cdr)
511     return 0;
512   {
513     struct rx_superset template;
514     struct hashtree_item * hit;
515     struct rx_superset * answer;
516 
517     template.car = car;
518     template.cdr = cdr;
519     template.id = rx->rx_id;
520 
521     rx_dfa_cache_prepare (4 * sizeof (struct hashtree *) + sizeof (struct rx_superset));
522     hit = hashtree_store (&cache->superset_table,
523 			  (unsigned long)car ^ car->id ^ (unsigned long)cdr,
524 			  (void *)&template,
525 			  &cache->superset_hashtree_rules);
526     if (!hit)
527       return 0;
528     answer = (struct rx_superset *)hit->key;
529     rx_protect_superset (rx, answer);
530     if (answer->refs != 1)
531       rx_release_superset (cdr);
532     return answer;
533   }
534 }
535 
536 
537 #define rx_abs(A) (((A) > 0) ? (A) : -(A))
538 
539 /*
540  * static int superset_allocator (int * errn,
541  *                                struct hashtree_item ** retv,
542  *                                void * val,
543  *                                struct hashtree_rules * rules);
544  *
545  * Hash tree allocation function for supersets.
546  *
547  * on entry:
548  *   	CDR of `val' at least one reference which is taken over by this
549  * 	function (becomes the reference of the set returned to its cdr)
550  *
551  * on exit:
552  * 	Set returned has a reference count of 0.
553  */
554 static struct hashtree_item *
superset_allocator(void * val,struct hashtree_rules * rules)555 superset_allocator (void * val, struct hashtree_rules * rules)
556 {
557   struct rx_cache * cache;
558   struct rx_superset * template;
559   struct rx_superset * newset;
560 
561   cache = ((struct rx_cache *)
562 	   ((char *)rules
563 	    - (unsigned long)(&((struct rx_cache *)0)->superset_hashtree_rules)));
564   template = (struct rx_superset *)val;
565   newset = ((struct rx_superset *)
566 	    rx_dfa_cache_soft_malloc (sizeof (*template)));
567   if (!newset)
568     return 0;
569   ++cache->supersets;
570 
571   {
572     long cdrfinal;
573     long cdredges;
574 
575     cdrfinal = (template->cdr ? template->cdr->state_label : 0);
576     cdredges = (template->cdr ? template->cdr->has_cset_edges : 0);
577 
578     if (!template->car->state_label)
579       newset->state_label = cdrfinal;
580     else if (rx_abs (template->car->state_label) < rx_abs (cdrfinal))
581       newset->state_label = template->car->state_label;
582     else if (template->car->state_label > 0)
583       newset->state_label = template->car->state_label;
584     else
585       newset->state_label = cdrfinal;
586 
587     newset->has_cset_edges = (template->car->has_cset_edges || cdredges);
588   }
589 
590   newset->refs = 0;
591   newset->id = template->id;
592   newset->car = template->car;
593   newset->cdr = template->cdr;
594   newset->superstate[0] = 0;
595   newset->superstate[1] = 0;
596   newset->nfa_state = 0;
597   newset->hash_item.key = (void *)newset;
598   newset->hash_item.binding = 0;
599   return &newset->hash_item;
600 }
601 
602 
603 /* This computes a union of two NFA state sets.  The sets do not have the
604  * same representation though.  One is a RX_SUPERSET structure (part
605  * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
606  *
607  * On exit, `set' has at least one reference which is for the caller.
608  */
609 static struct rx_superset *
rx_superstate_eclosure_union(struct rx_nfa * rx,struct rx_superset * set,struct rx_nfa_state_set * ecl)610 rx_superstate_eclosure_union (struct rx_nfa * rx,
611 			      struct rx_superset *set,
612 			      struct rx_nfa_state_set *ecl)
613 {
614   if (!ecl)
615     {
616       rx_protect_superset (rx, set);
617       return set;
618     }
619 
620   if (!set)
621     return rx_superset_cons (rx, ecl->car,
622 			     rx_superstate_eclosure_union (rx, 0, ecl->cdr));
623 
624   if (set->car == ecl->car)
625     return rx_superstate_eclosure_union (rx, set, ecl->cdr);
626 
627   {
628     struct rx_superset * tail;
629     struct rx_nfa_state * first;
630 
631     if (set->car->id < ecl->car->id)
632       {
633 	tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
634 	if (!tail)
635 	  return 0;
636 	first = set->car;
637       }
638     else
639       {
640 	tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
641 	if (!tail)
642 	  return 0;
643 	first = ecl->car;
644       }
645 
646     {
647       struct rx_superset * answer;
648       answer = rx_superset_cons (rx, first, tail);
649       if (!answer)
650 	{
651 	  rx_release_superset (tail);
652 	  return 0;
653 	}
654       return answer;
655     }
656   }
657 }
658 
659 
660 static int
supersetcmp(void * va,void * vb,struct hashtree_rules * rules)661 supersetcmp (void * va, void * vb, struct hashtree_rules * rules)
662 {
663   struct rx_superset * a = (struct rx_superset *)va;
664   struct rx_superset * b = (struct rx_superset *)vb;
665 
666   a = (struct rx_superset *)va;
667   b = (struct rx_superset *)vb;
668   return (   (a == b)
669 	  || (   a
670 	      && b
671 	      && (a->id == b->id)
672 	      && (a->car == b->car)
673 	      && (a->cdr == b->cdr)));
674 }
675 
676 
677 static struct hashtree *
super_hash_allocator(struct hashtree_rules * rules)678 super_hash_allocator (struct hashtree_rules * rules)
679 {
680   struct rx_cache * cache;
681   struct hashtree * it;
682 
683   cache = ((struct rx_cache *)
684 	   ((char *)rules
685 	    - (unsigned long)(&((struct rx_cache *)0)->superset_hashtree_rules)));
686   it = ((struct hashtree *)
687  	rx_dfa_cache_soft_malloc (sizeof (struct hashtree)));
688   if (!it)
689     return 0;
690   ++cache->hash_tables;
691   return it;
692 }
693 
694 
695 static void
super_hash_liberator(struct hashtree * hash,struct hashtree_rules * rules)696 super_hash_liberator (struct hashtree * hash, struct hashtree_rules * rules)
697 {
698   rx_dfa_cache_free ((char *)hash);
699   --rx_default_cache->hash_tables;
700 }
701 
702 static void
superset_hash_item_liberator(struct hashtree_item * it,struct hashtree_rules * rules)703 superset_hash_item_liberator (struct hashtree_item * it,
704 			      struct hashtree_rules * rules)
705 {
706 }
707 
708 
709 /************************************************************************
710  * The Default DFA State Cache
711  *
712  *
713  *
714  */
715 
716 static struct rx_cache default_cache =
717 {
718   {
719     supersetcmp,
720     super_hash_allocator,
721     super_hash_liberator,
722     superset_allocator,
723     superset_hash_item_liberator,
724   },
725   0,				/* lru_superstate */
726   0,				/* semifree_superstate */
727   0,				/* hash_tables */
728   0,				/* super_edges */
729   0,				/* supersets */
730   0,				/* superstates */
731   0,				/* semifree_superstates */
732   0,				/* hits */
733   0,				/* misses */
734   0,				/* total hits */
735   0,				/* total misses */
736   {				/* hash table */
737     0,
738     0,
739     0,
740     {0}
741   }
742 };
743 
744 
745 static struct rx_cache * rx_default_cache = &default_cache;
746 
747 /************************************************************************
748  * Making DFA States Semifree and Restoring Them to Live State
749  *
750  */
751 
752 /* static void install_cache_miss_transition (struct rx_super_edge * e);
753  *
754  * Install the instruction `answer' in the transition table of `super'
755  * for all instructions built for `e'.
756  */
757 static void
install_cache_miss_transition(struct rx_super_edge * e)758 install_cache_miss_transition (struct rx_super_edge * e)
759 {
760   struct rx_inx * transitions;
761 
762   transitions = e->inx_list;
763   while (transitions)
764     {
765       transitions->inx = (void *)rx_cache_miss;
766       transitions->data = 0;
767       transitions->data_2 = (void *)e;
768       transitions = transitions->next_same_edge;
769     }
770 
771   e->inx_list = 0;
772 }
773 
774 
775 /* static void semifree_superstate ();
776  *
777  * Select the most recently used live superstate that is not locked and
778  * make it semifree.
779  *
780  * This function makes all incoming instruction frames cache miss
781  * instructions.  It moves the state from the `lru_superstate' queue
782  * of the cache to the `semifree_superstate' queue.
783  */
784 static void
semifree_superstate()785 semifree_superstate ()
786 {
787   struct rx_cache * cache = rx_default_cache;
788   int disqualified;
789 
790   disqualified = cache->semifree_superstates;
791   if (disqualified == cache->superstates)
792     return;
793 
794   while (cache->lru_superstate->locks)
795     {
796       cache->lru_superstate = cache->lru_superstate->next_recyclable;
797       ++disqualified;
798       if (disqualified == cache->superstates)
799 	return;
800     }
801 
802   {
803     struct rx_superstate * it;
804 
805     it = cache->lru_superstate;
806     it->next_recyclable->prev_recyclable = it->prev_recyclable;
807     it->prev_recyclable->next_recyclable = it->next_recyclable;
808 
809     cache->lru_superstate = (it == it->next_recyclable
810 			     ? 0
811 			     : it->next_recyclable);
812 
813     if (!cache->semifree_superstate)
814       {
815 	cache->semifree_superstate = it;
816 	it->next_recyclable = it;
817 	it->prev_recyclable = it;
818       }
819     else
820       {
821 	it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
822 	it->next_recyclable = cache->semifree_superstate;
823 	it->prev_recyclable->next_recyclable = it;
824 	it->next_recyclable->prev_recyclable = it;
825       }
826     {
827       struct rx_super_edge *e;
828 
829       it->is_semifree = 1;
830       ++cache->semifree_superstates;
831 
832       e = it->incoming_edges;
833       if (e)
834 	{
835 	  e->prev_same_dest->next_same_dest = 0;
836 	  while (e)
837 	    {
838 	      install_cache_miss_transition (e);
839 	      e = e->next_same_dest;
840 	    }
841 	  e = it->incoming_edges;
842 	  e->prev_same_dest->next_same_dest = e;
843 	}
844     }
845   }
846 }
847 
848 
849 /* static void refresh_semifree_superstate (struct rx_superstate * super);
850  *
851  * Make `super', which may or may not be in a semifree state, a live
852  * superstate.
853  *
854  * This moves the state to the back of the `lru_superstate' queue.
855  */
856 static void
refresh_semifree_superstate(struct rx_superstate * super)857 refresh_semifree_superstate (struct rx_superstate * super)
858 {
859   struct rx_cache * cache = rx_default_cache;
860 
861   if (cache->semifree_superstate == super)
862     cache->semifree_superstate = (super->prev_recyclable == super
863 				  ? 0
864 				  : super->prev_recyclable);
865   else if (cache->lru_superstate == super)
866     cache->lru_superstate = (super->prev_recyclable == super
867 			     ? 0
868 			     : super->prev_recyclable);
869 
870   super->next_recyclable->prev_recyclable = super->prev_recyclable;
871   super->prev_recyclable->next_recyclable = super->next_recyclable;
872 
873   if (!cache->lru_superstate)
874     (cache->lru_superstate
875      = super->next_recyclable
876      = super->prev_recyclable
877      = super);
878   else
879     {
880       super->next_recyclable = cache->lru_superstate;
881       super->prev_recyclable = cache->lru_superstate->prev_recyclable;
882       super->next_recyclable->prev_recyclable = super;
883       super->prev_recyclable->next_recyclable = super;
884     }
885   if (super->is_semifree)
886     {
887       super->is_semifree = 0;
888       --cache->semifree_superstates;
889     }
890 }
891 
892 
893 
894 /* void rx_refresh_this_superstate (struct rx_superstate * superstate);
895  *
896  * If this state is semifree, pass it to `refresh_semifree_superstate'.
897  * Otherwise, move this state to the rear of the `lru_superstate' queue.
898  */
899 static void
rx_refresh_this_superstate(struct rx_superstate * superstate)900 rx_refresh_this_superstate (struct rx_superstate * superstate)
901 {
902   struct rx_cache * cache = rx_default_cache;
903   if (superstate->is_semifree)
904     refresh_semifree_superstate (superstate);
905   else if (cache->lru_superstate == superstate)
906     cache->lru_superstate = superstate->next_recyclable;
907   else if (superstate != cache->lru_superstate->prev_recyclable)
908     {
909       superstate->next_recyclable->prev_recyclable
910 	= superstate->prev_recyclable;
911       superstate->prev_recyclable->next_recyclable
912 	= superstate->next_recyclable;
913       superstate->next_recyclable = cache->lru_superstate;
914       superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
915       superstate->next_recyclable->prev_recyclable = superstate;
916       superstate->prev_recyclable->next_recyclable = superstate;
917     }
918 }
919 
920 
921 /************************************************************************
922  * Really Freeing DFA States
923  *
924  * After becoming semifree, a DFA state may eventually be freed for
925  * real -- meaning that the memory it occupies is recycled.
926  *
927  */
928 
929 /* This tries to add a new superstate to the superstate freelist.
930  * It might, as a result, free some edge pieces or hash tables.
931  * If nothing can be freed because too many locks are being held, fail.
932  */
933 
934 /* int rx__really_free_superstate ();
935  *
936  * Attempt to reduce memory usage of the DFA state cache by truly
937  * freeing one superstate.  Return -1 if no state could be freed, 0
938  * otherwise.
939  */
940 int
rx__really_free_superstate(void)941 rx__really_free_superstate (void)
942 {
943   struct rx_cache * cache = rx_default_cache;
944   struct rx_superstate * it;
945 
946   if (!cache->superstates)
947     return -1;
948 
949   /* If the rate of cache misses is high, we semifree superstates at a
950    * rate proportional to our need to truly free states, but greater.
951    * That way the total amount of work done freeing states remains
952    * proportional to the work needed, but a spike in demand for memory
953    * will cause a larger spike in the amount of effort put into
954    * rescuing the most desirable states in the cache.
955    *
956    * For example, as this is written, when the cache miss rate is
957    * high, states are semifreed at a rate 3x the rate at which they
958    * are truly freed.  If, after a period of running mostly out of the
959    * cache, we suddenly have to free 1/3 of the states in the cache to
960    * make room for new states, the remaining 2/3 of the old states
961    * will all have been marked semifree and thus the most useful of
962    * those states will have a high probability of being rescued even
963    * if the demand for memory continues.  They will have a fighting
964    * chance against those new states which were needed once but which
965    * aren't otherwise very useful.
966    *
967    * We semifree states at this accelerated rate when the cache miss
968    * rate is high because in that case, we expect the cache miss rate
969    * to remain high, and so we expect the high demand on memory to
970    * continue.
971    *
972    * When the cache miss rate is low, we look at the number of
973    * semifree states compared to the total number of states.  If that
974    * ratio is low (there aren't many semifree states), then we again
975    * semifree states at an accelerated rate.  If that ratio is high
976    * (there are quite a few semifree states), then we only semifree
977    * one state for every state truly freed.  The idea is that a low
978    * cache miss rate predicts a low demand on memory.  When demand on
979    * memory is low, we want to maintain a constant sized queue of
980    * semifree states which is a fraction of the total number of
981    * states.  The existence of that queue helps us from throwing away
982    * useful DFA states while demand on memory is low while the fixed
983    * size of that queue means that the overhead of maintaining it is a
984    * small constant multiple of the time spent truly freeing states.
985    */
986   if (   (cache->misses < cache->hits)
987       || (3 * cache->semifree_superstates >= cache->superstates))
988     semifree_superstate (cache);
989   else
990     {
991       semifree_superstate (cache);
992       semifree_superstate (cache);
993       semifree_superstate (cache);
994     }
995 
996 
997   /* At this point, if no states are semifree, then all states are
998    * locked and no states can be freed.
999    */
1000   if (!cache->semifree_superstate)
1001     return -1;
1002 
1003   /* Remove the oldest semifree state from the queue of all semifree
1004    * states.  Correct the counters that count DFA states.
1005    */
1006   {
1007     it = cache->semifree_superstate;
1008     it->next_recyclable->prev_recyclable = it->prev_recyclable;
1009     it->prev_recyclable->next_recyclable = it->next_recyclable;
1010     cache->semifree_superstate = ((it == it->next_recyclable)
1011 				  ? 0
1012 				  : it->next_recyclable);
1013     --cache->semifree_superstates;
1014     --cache->superstates;
1015   }
1016 
1017 
1018   /* Unlink the queue of incoming edges and clear the `future' field
1019    * of those edges which otherwise points to this state.
1020    */
1021   if (it->incoming_edges)
1022     {
1023       struct rx_super_edge * e;
1024       it->incoming_edges->prev_same_dest->next_same_dest = 0;
1025       e = it->incoming_edges;
1026       while (e)
1027 	{
1028 	  struct rx_super_edge * et;
1029 	  et = e;
1030 	  e = e->next_same_dest;
1031 	  et->prev_same_dest = et->next_same_dest = 0;
1032 	  et->future = 0;
1033 	}
1034     }
1035 
1036 
1037   /* Free all outgoing edges and remove them from the `incoming_edges'
1038    * queue of their destination states.
1039    */
1040   {
1041     struct rx_super_edge *tc;
1042     tc = it->outgoing_edges;
1043     while (tc)
1044       {
1045 	struct rx_super_edge *tct;
1046 
1047 	tct = tc;
1048 	tc = tc->next_same_present;
1049 
1050 	if (tct->future)
1051 	  {
1052 	    if (tct->future->incoming_edges == tct)
1053 	      {
1054 		tct->future->incoming_edges = tct->next_same_dest;
1055 		if (tct->future->incoming_edges == tct)
1056 		  tct->future->incoming_edges = 0;
1057 	      }
1058 	    tct->next_same_dest->prev_same_dest = tct->prev_same_dest;
1059 	    tct->prev_same_dest->next_same_dest = tct->next_same_dest;
1060 	  }
1061 	bits_free (tct->cset);
1062 	rx_dfa_cache_free ((char *)tct);
1063 	--cache->super_edges;
1064       }
1065   }
1066 
1067 #ifdef RX_LARGE_TABLES
1068   {
1069     /* free second level transition tables */
1070     if (it->storage_unit_size == 2)
1071       {
1072 	int x;
1073 
1074 	for (x = 0; x < 256; ++x)
1075 	  {
1076 	    if (   (((struct rx_inx **)it->transitions)[x] != rx_shared_cache_miss_page)
1077 		&& (((struct rx_inx **)it->transitions)[x] != rx_shared_backtrack_page)
1078 		&& (((struct rx_inx **)it->transitions)[x] != rx_huge_char_page))
1079 	      rx_dfa_cache_free ((void *)((struct rx_inx **)it->transitions)[x]);
1080 	  }
1081       }
1082   }
1083 #else
1084   {
1085     /* free second level transition tables */
1086     if (it->storage_unit_size == 2)
1087       {
1088 	int x;
1089 
1090 	for (x = 0; x < 256; ++x)
1091 	  {
1092 	    if (   (((struct rx_inx ***)it->transitions)[x] != rx_default_small_table_page2)
1093 		&& (((struct rx_inx ***)it->transitions)[x] != rx_small_table_backtrack_page2)
1094 		&& (((struct rx_inx ***)it->transitions)[x] != rx_huge_char_page2))
1095 	      {
1096 		int y;
1097 		struct rx_inx ** page2;
1098 
1099 		page2 = ((struct rx_inx ***)it->transitions)[x];
1100 		for (y = 0; y < rx_page2_size; ++y)
1101 		  {
1102 		    if (page2[y] != rx_shared_cache_miss_page)
1103 		      {
1104 			rx_dfa_cache_free ((void *)page2[y]);
1105 		      }
1106 		  }
1107 		rx_dfa_cache_free ((void *)page2);
1108 	      }
1109 	  }
1110       }
1111   }
1112 #endif
1113 
1114   if (it->huge_char_transitions != rx_default_huge_char_transitions)
1115     {
1116       int x;
1117 
1118 #ifdef RX_LARGE_TABLES
1119       for (x = 0; x < rx_page1_size21; ++x)
1120 	{
1121 	  struct rx_inx ** sub;
1122 
1123 	  sub = it->huge_char_transitions[x];
1124 	  if (sub != rx_huge_char_second_level_page)
1125 	    {
1126 	      int z;
1127 
1128 	      for (z = 0; z < rx_page2_size21; ++z)
1129 		{
1130 		  struct rx_inx * leaf;
1131 
1132 		  leaf = sub[z];
1133 		  if (leaf != rx_shared_cache_miss_page)
1134 		    rx_dfa_cache_free ((void *)leaf);
1135 		}
1136 	      rx_dfa_cache_free ((void *)sub);
1137 	    }
1138 	}
1139 #else
1140       for (x = 0; x < rx_page1_size21; ++x)
1141 	{
1142 	  struct rx_inx *** sub;
1143 
1144 	  sub = it->huge_char_transitions[x];
1145 	  if (sub != rx_huge_char_second_level_page)
1146 	    {
1147 	      int z;
1148 
1149 	      for (z = 0; z < rx_page2_size21; ++z)
1150 		{
1151 		  struct rx_inx ** subsub;
1152 
1153 		  subsub = sub[z];
1154 
1155 		  if (subsub != rx_huge_char_third_level_page)
1156 		    {
1157 		      int zz;
1158 
1159 		      for (zz = 0; zz < rx_page3_size21; ++zz)
1160 			{
1161 			  struct rx_inx * leaf;
1162 
1163 			  leaf = subsub[zz];
1164 			  if (leaf != rx_shared_cache_miss_page)
1165 			    rx_dfa_cache_free ((void *)leaf);
1166 			}
1167 		      rx_dfa_cache_free ((void *)subsub);
1168 		    }
1169 		}
1170 	      rx_dfa_cache_free ((void *)sub);
1171 	    }
1172 	}
1173 #endif
1174       rx_dfa_cache_free ((void *)it->huge_char_transitions);
1175     }
1176 
1177   /* Clear the pointer to this state in the NFA set of this state.
1178    */
1179   if (it->members->superstate[0] == it)
1180     it->members->superstate[0] = 0;
1181   if (it->members->superstate[1] == it)
1182     it->members->superstate[1] = 0;
1183 
1184   /* Release the NFA set.
1185    */
1186   rx_release_superset (it->members);
1187 
1188   /* Free the memory of this DFA state.
1189    */
1190   rx_dfa_cache_free ((char *)it);
1191 
1192   return 0;
1193 }
1194 
1195 
1196 /************************************************************************
1197  *(h1 "Building DFA States")
1198  *
1199  *
1200  *
1201  */
1202 
1203 /* struct rx_superstate * rx_superstate (struct rx_nfa *rx,
1204  *					 struct rx_superset *set,
1205  *					 int storage_unit_size);
1206  *
1207  * Retrieve a DFA state from the cache or construct a new state for a
1208  * give NFA state set.
1209  *
1210  * This may cause old states to be flushed from the DFA state cache.
1211  */
1212 static struct rx_superstate *
rx_superstate(struct rx_nfa * rx,struct rx_superset * set,int storage_unit_size)1213 rx_superstate (struct rx_nfa *rx,
1214 	       struct rx_superset *set,
1215 	       int storage_unit_size)
1216 {
1217   struct rx_cache * cache;
1218   struct rx_superstate * superstate;
1219   int has_huge_table;
1220 
1221   has_huge_table = 0;
1222 
1223   if (rx->local_cset_size == 256)
1224     has_huge_table = 0;
1225   else if (rx->local_cset_size == (1 << 21))
1226     has_huge_table = 1;
1227   else
1228     panic ("odd sized cset in rx_superstate");
1229 
1230   cache = rx_default_cache;
1231   superstate = 0;
1232 
1233   /* Does the superstate already exist in the cache? */
1234   if (set->superstate[storage_unit_size - 1])
1235     {
1236       ++cache->hits;
1237       ++cache->total_hits;
1238       while ((cache->hits + cache->misses) > 256)
1239 	{
1240 	  cache->hits >>= 1;
1241 	  cache->misses >>= 1;
1242 	}
1243 
1244       superstate = set->superstate[storage_unit_size - 1];
1245       rx_refresh_this_superstate (superstate);
1246       return superstate;
1247     }
1248 
1249 
1250   /* This point reached only for cache misses. */
1251   ++cache->misses;
1252   ++cache->total_misses;
1253   while ((cache->hits + cache->misses) > 256)
1254     {
1255       cache->hits >>= 1;
1256       cache->misses >>= 1;
1257     }
1258 
1259 
1260   if (!has_huge_table)
1261     {
1262       int superstate_size;
1263       superstate_size = (sizeof (*superstate) + (sizeof (struct rx_inx) * 256));
1264       superstate = ((struct rx_superstate *) rx_dfa_cache_malloc (superstate_size));
1265 
1266       if (!superstate)
1267 	return 0;
1268 
1269       superstate->storage_unit_size = storage_unit_size;
1270       superstate->has_huge_table = 0;
1271 
1272       mem_set0 ((void *)&superstate->transitions, sizeof (rx_shared_cache_miss_page));
1273       /* mem_move ((void *)&superstate->transitions, (void *)rx_shared_cache_miss_page, sizeof (rx_shared_cache_miss_page)); */
1274     }
1275   else
1276     {
1277       int x;
1278       int superstate_size;
1279 
1280       superstate_size = 0;
1281 
1282       if (storage_unit_size == 2)
1283 	superstate_size = (sizeof (*superstate) + (sizeof (struct rx_inx *) * 256));
1284       else if (storage_unit_size == 1)
1285 	superstate_size = (sizeof (*superstate) + (sizeof (struct rx_inx) * 256));
1286       else
1287 	panic ("odd storage unit size in rx_superstate");
1288 
1289       superstate = ((struct rx_superstate *) rx_dfa_cache_malloc (superstate_size));
1290       if (!superstate)
1291 	return 0;
1292 
1293       superstate->storage_unit_size = storage_unit_size;
1294       superstate->has_huge_table = 1;
1295 
1296       if (storage_unit_size == 2)
1297 	{
1298 #ifdef RX_LARGE_TABLES
1299 	  {
1300 	    for (x = 0; x < 0xd8; ++x)
1301 	      ((struct rx_inx **)superstate->transitions)[x] = rx_shared_cache_miss_page;
1302 	    for (x = 0xd8; x < 0xdc; ++x)
1303 	      ((struct rx_inx **)superstate->transitions)[x] = rx_huge_char_page;
1304 	    for (x = 0xdc; x < 0xe0; ++x)
1305 	      ((struct rx_inx **)superstate->transitions)[x] = rx_shared_backtrack_page;
1306 	    for (x = 0xe0; x < 256; ++x)
1307 	      ((struct rx_inx **)superstate->transitions)[x] = rx_shared_cache_miss_page;
1308 	  }
1309 #else
1310 	  {
1311 	    for (x = 0; x < 0xd8; ++x)
1312 	      ((struct rx_inx ***)superstate->transitions)[x] = rx_default_small_table_page2;
1313 	    for (x = 0xd8; x < 0xdc; ++x)
1314 	      ((struct rx_inx ***)superstate->transitions)[x] = rx_huge_char_page2;
1315 	    for (x = 0xdc; x < 0xe0; ++x)
1316 	      ((struct rx_inx ***)superstate->transitions)[x] = rx_small_table_backtrack_page2;
1317 	    for (x = 0xe0; x < 256; ++x)
1318 	      ((struct rx_inx ***)superstate->transitions)[x] = rx_default_small_table_page2;
1319 	  }
1320 #endif
1321 	}
1322       else if (storage_unit_size == 1)
1323 	{
1324 	  mem_move ((void *)&superstate->transitions, (void *)rx_shared_cache_miss_page, 128 * sizeof (struct rx_inx));
1325 
1326 	  /* no valid multi-byte encodings begin with 80..c1
1327 	   */
1328 	  for (x = 0x80; x < 0xc2; ++x)
1329 	    {
1330 	      ((struct rx_inx *)&superstate->transitions)[x].data = (void *)0;
1331 	      ((struct rx_inx *)&superstate->transitions)[x].data_2 = (void *)0;
1332 	      ((struct rx_inx *)&superstate->transitions)[x].inx = (void *)rx_bogus_utf8;
1333 	      ((struct rx_inx *)&superstate->transitions)[x].next_same_edge = (struct rx_inx *)0;
1334 	    }
1335 	  /* valid 2 byte encodings begin with c2..df
1336 	   */
1337 	  for (x = 0xc2; x < 0xe0; ++x)
1338 	    {
1339 	      ((struct rx_inx *)&superstate->transitions)[x].data = (void *)0;
1340 	      ((struct rx_inx *)&superstate->transitions)[x].data_2 = (void *)0;
1341 	      ((struct rx_inx *)&superstate->transitions)[x].inx = (void *)rx_2byte_utf8;
1342 	      ((struct rx_inx *)&superstate->transitions)[x].next_same_edge = (struct rx_inx *)0;
1343 	    }
1344 	  /* valid 3 byte encodings begin with e0..ef
1345 	   */
1346 	  for (x = 0xe0; x < 0xf0; ++x)
1347 	    {
1348 	      ((struct rx_inx *)&superstate->transitions)[x].data = (void *)0;
1349 	      ((struct rx_inx *)&superstate->transitions)[x].data_2 = (void *)0;
1350 	      ((struct rx_inx *)&superstate->transitions)[x].inx = (void *)rx_3byte_utf8;
1351 	      ((struct rx_inx *)&superstate->transitions)[x].next_same_edge = (struct rx_inx *)0;
1352 	    }
1353 	  /* valid 4 byte encodings begin with f0..f4
1354 	   */
1355 	  for (x = 0xf0; x < 0xf5; ++x)
1356 	    {
1357 	      ((struct rx_inx *)&superstate->transitions)[x].data = (void *)0;
1358 	      ((struct rx_inx *)&superstate->transitions)[x].data_2 = (void *)0;
1359 	      ((struct rx_inx *)&superstate->transitions)[x].inx = (void *)rx_4byte_utf8;
1360 	      ((struct rx_inx *)&superstate->transitions)[x].next_same_edge = (struct rx_inx *)0;
1361 	    }
1362 	  /* no valid multi-byte encodings begin with f5..ff
1363 	   */
1364 	  for (x = 0xf5; x <= 0xff; ++x)
1365 	    {
1366 	      ((struct rx_inx *)&superstate->transitions)[x].data = (void *)0;
1367 	      ((struct rx_inx *)&superstate->transitions)[x].data_2 = (void *)0;
1368 	      ((struct rx_inx *)&superstate->transitions)[x].inx = (void *)rx_bogus_utf8;
1369 	      ((struct rx_inx *)&superstate->transitions)[x].next_same_edge = (struct rx_inx *)0;
1370 	    }
1371 	}
1372       else
1373 	panic ("odd storage unit size in rx_superstate");
1374     }
1375 
1376   superstate->huge_char_transitions = rx_default_huge_char_transitions;
1377   ++cache->superstates;
1378   superstate->seq = rx_superstate_counter++;
1379   superstate->table_id = -1;
1380 
1381   if (!cache->lru_superstate)
1382     (cache->lru_superstate
1383      = superstate->next_recyclable
1384      = superstate->prev_recyclable
1385      = superstate);
1386   else
1387     {
1388       superstate->next_recyclable = cache->lru_superstate;
1389       superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
1390       (  superstate->prev_recyclable->next_recyclable
1391        = superstate->next_recyclable->prev_recyclable
1392        = superstate);
1393     }
1394   superstate->outgoing_edges = 0;
1395   superstate->incoming_edges = 0;
1396   superstate->members = set;
1397   set->superstate[storage_unit_size - 1] = superstate;
1398   rx_protect_superset (rx, set);
1399   superstate->is_semifree = 0;
1400   superstate->rx_id = rx->rx_id;
1401   superstate->locks = 0;
1402   return superstate;
1403 }
1404 
1405 
1406 /*(c rx_nfa_state_to_superstate)
1407  * struct rx_superstate *
1408  * rx_nfa_state_to_superstate (struct rx_nfa * rx,
1409  *			       struct rx_nfa_state * nfa_state,
1410  * 			       int storage_unit_size);
1411  *
1412  * Construct the DFA state whose NFA state set is the epsilon closure
1413  * of `nfa_state' in the NFA `rx'.  Return that superstate, with no
1414  * new locks applied to it.
1415  */
1416 struct rx_superstate *
rx_nfa_state_to_superstate(struct rx_nfa * rx,struct rx_nfa_state * nfa_state,int storage_unit_size)1417 rx_nfa_state_to_superstate (struct rx_nfa * rx,
1418 			    struct rx_nfa_state * nfa_state,
1419 			    int storage_unit_size)
1420 {
1421   struct rx_nfa_state_set * epsilon_closure;
1422   struct rx_superset * contents;
1423   struct rx_superstate * answer;
1424 
1425   /* Compute the superstate set for this NFA state and
1426    * aquire a reference count for it.
1427    *
1428    * Ensure that a reference to this set is cached in
1429    * the NFA state.
1430    */
1431   if (nfa_state->superstate_set)
1432     {
1433       contents = nfa_state->superstate_set;
1434       rx_protect_superset (rx, contents);
1435     }
1436   else
1437     {
1438       epsilon_closure = rx_state_closure (rx, nfa_state);
1439       if (!epsilon_closure)
1440 	return 0;
1441 
1442       contents = rx_superstate_eclosure_union (rx,
1443 					       0,
1444 					       epsilon_closure);
1445       if (!contents)
1446 	return 0;
1447 
1448       /* rx_superstate_eclosure_union gives us a reference count
1449        */
1450       nfa_state->superstate_set = contents;
1451       contents->nfa_state = nfa_state;
1452     }
1453 
1454   /* Aquire a superstate for this superstate_set and drop
1455    * the reference count on the set.
1456    */
1457   if (   contents->superstate[storage_unit_size - 1]
1458       && (contents->superstate[storage_unit_size - 1]->rx_id == rx->rx_id))
1459     {
1460       answer = contents->superstate[storage_unit_size - 1];
1461       /* Treat this as a cache hit that contributes longevity
1462        * to the cache entry and that turns a semifree state
1463        * into a live state:
1464        */
1465       rx_refresh_this_superstate (answer);
1466       rx_release_superset (contents);
1467       return answer;
1468     }
1469   else
1470     {
1471       answer = rx_superstate (rx, contents, storage_unit_size);
1472       rx_release_superset (contents);
1473       return answer;		/* might be 0 */
1474     }
1475 }
1476 
1477 
1478 
1479 
1480 /* void rx_clear_superstate_table_ids (struct rx_nfa * nfa);
1481  *
1482  * SHOULD BE VOID?
1483  */
1484 void
rx_clear_superstate_table_ids(struct rx_nfa * nfa)1485 rx_clear_superstate_table_ids (struct rx_nfa * nfa)
1486 {
1487   struct rx_superstate * qs[2];
1488   int x;
1489 
1490   qs[0] = rx_default_cache->semifree_superstate;
1491   qs[1] = rx_default_cache->lru_superstate;
1492   for (x = 0; x < 2; ++x)
1493     {
1494       struct rx_superstate * q;
1495       struct rx_superstate * it;
1496 
1497       q = qs[x];
1498       it = q;
1499       if (q)
1500 	do
1501 	  {
1502 	    it->table_id = -1;
1503 	    it = it->next_recyclable;
1504 	  }
1505 	while (it != q);
1506     }
1507 }
1508 
1509 
1510 /************************************************************************
1511  * Low Level Support for Handling Cache Misses in the Dfa State Cache
1512  *
1513  *
1514  */
1515 
1516 /*(c solve_destination)
1517  * static void solve_destination (struct rx_nfa * rx,
1518  *                                struct rx_inx * inx_out,
1519  *                                struct rx_super_edge * e,
1520  *                                int storage_unit_size);
1521  *
1522  * Compute the destination state for DFA edge `e'.
1523  *
1524  * As a side effect, set the `future' field of `e'.
1525  *
1526  * Return an `rx_next_char' instruction frame in `inx_out'.  The
1527  * `data' field of the instruction will point to the destination
1528  * state.  This instruction can be copied into transition table
1529  * entries for edge `e'.
1530  */
1531 static int
solve_destination(struct rx_nfa * rx,struct rx_inx * inx_out,struct rx_super_edge * e,int storage_unit_size)1532 solve_destination (struct rx_nfa * rx,
1533 		   struct rx_inx * inx_out,
1534 		   struct rx_super_edge * e,
1535 		   int storage_unit_size)
1536 {
1537   struct rx_superset * nfa_state;
1538   struct rx_superset * solution;
1539   struct rx_superstate *dest;
1540 
1541   if (e->is_backtrack)
1542     {
1543       inx_out->inx = (void *) rx_backtrack;
1544       inx_out->data = 0;
1545       inx_out->data_2 = 0;
1546       return 0;
1547     }
1548 
1549   solution = 0;
1550 
1551   /* Iterate over all NFA states in the state set of this superstate. */
1552   for (nfa_state = e->present->members; nfa_state; nfa_state = nfa_state->cdr)
1553     {
1554       struct rx_nfa_edge * ne;
1555 
1556       /* Iterate over all edges of each NFA state. */
1557       for (ne = nfa_state->car->edges; ne; ne = ne->next)
1558 
1559         /* If we find an edge that is labeled with
1560 	 * the characters we are solving for.....
1561 	 */
1562 	if ((ne->type == ne_cset) && bits_is_subset (ne->cset, e->cset))
1563 	  {
1564 	    struct rx_nfa_state * n;
1565 	    struct rx_superset * old_sol;
1566 
1567 	    n = ne->dest;
1568 	    old_sol = solution;
1569 	    {
1570 	      struct rx_nfa_state_set * set;
1571 	      set = rx_state_closure (rx, n);
1572 
1573 	      if (!set)
1574 		{
1575 		  rx_release_superset (old_sol);
1576 		  return -1;
1577 		}
1578 
1579 	      solution = rx_superstate_eclosure_union (rx, solution, set);
1580 
1581 	      if (!solution)
1582 		{
1583 		  rx_release_superset (old_sol);
1584 		  return -1;
1585 		}
1586 	    }
1587 	    rx_release_superset (old_sol);
1588 	  }
1589     }
1590 
1591   if (solution == 0)
1592     {
1593       e->is_backtrack = 1;
1594       inx_out->inx = (void *) rx_backtrack;
1595       inx_out->data = 0;
1596       inx_out->data_2 = 0;
1597       return 0;
1598     }
1599 
1600   dest = rx_superstate (rx, solution, storage_unit_size);
1601   rx_release_superset (solution);
1602   if (!dest)
1603     return -1;
1604   e->future = dest;
1605 
1606   if (!dest->incoming_edges)
1607     {
1608       dest->incoming_edges = e;
1609       e->next_same_dest = e->prev_same_dest = e;
1610     }
1611   else
1612     {
1613       e->next_same_dest = dest->incoming_edges;
1614       e->prev_same_dest = dest->incoming_edges->prev_same_dest;
1615       e->next_same_dest->prev_same_dest = e;
1616       e->prev_same_dest->next_same_dest = e;
1617     }
1618   inx_out->data = (void *)&dest->transitions;
1619   inx_out->data_2 = (void *) dest->members->state_label;
1620   inx_out->inx = (void *) rx_next_char;
1621   return 0;
1622 }
1623 
1624 
1625 /* static int compute_super_edge_cset (struct rx_nfa *rx,
1626  *				       bits csetout,
1627  *				       struct rx_superstate * superstate,
1628  *				       t_uchar chr);
1629  *
1630  * For DFA state `superstate' and character `chr', compute a character
1631  * set which is the label of the DFA edge defining transitions out of
1632  * `superstate' on input character `chr'.
1633  *
1634  * This character set can then be used to compute the NFA state set of
1635  * the destination state of the transition.  See `solve_destination'.
1636  *
1637  * 0 == backtrack
1638  * n > 0 == n_nfa_edges
1639  * n < 0 == ESPACE
1640  */
1641 static int
compute_super_edge_cset(struct rx_nfa * rx,bits csetout,struct rx_superstate * superstate,unsigned int chr)1642 compute_super_edge_cset (struct rx_nfa *rx,
1643 			 bits csetout,
1644 			 struct rx_superstate * superstate,
1645 			 unsigned int chr)
1646 {
1647   int n_nfa_edges;
1648   struct rx_superset * stateset;
1649 
1650   stateset = superstate->members;
1651 
1652   if (bits_fill (csetout))
1653     return -1;
1654 
1655   n_nfa_edges = 0;
1656   while (stateset)
1657     {
1658       struct rx_nfa_edge *e;
1659 
1660       for (e = stateset->car->edges; e; e = e->next)
1661 	if (e->type == ne_cset)
1662 	  {
1663 	    if (!bits_is_member (e->cset, chr))
1664 	      {
1665 		if (bits_difference (csetout, e->cset))
1666 		  return -1;
1667 	      }
1668 	    else
1669 	      {
1670 		if (bits_intersection (csetout, e->cset))
1671 		  return -1;
1672 	      }
1673 	    ++n_nfa_edges;
1674 	  }
1675       stateset = stateset->cdr;
1676     }
1677   return n_nfa_edges;
1678 }
1679 
1680 
1681 /* static struct rx_super_edge * rx_super_edge (struct rx_nfa *rx, struct rx_superstate *super, bits cset);
1682  *
1683  * Construct a new DFA edge, originating in state `super', and
1684  * defining transitions for characters in `cset'.
1685  */
1686 static struct rx_super_edge *
rx_super_edge(struct rx_nfa * rx,struct rx_superstate * super,bits cset)1687 rx_super_edge (struct rx_nfa *rx, struct rx_superstate *super, bits cset)
1688 {
1689   struct rx_super_edge *tc;
1690 
1691   tc = ((struct rx_super_edge *) rx_dfa_cache_malloc (sizeof (struct rx_super_edge)));
1692   if (!tc)
1693     return 0;
1694   mem_set0 ((void *)tc, sizeof (struct rx_super_edge));
1695   ++rx_default_cache->super_edges;
1696   tc->next_same_present = super->outgoing_edges;
1697   super->outgoing_edges = tc;
1698   tc->cset = cset;
1699   tc->present = super;
1700   tc->inx_list = 0;
1701   tc->is_backtrack = 0;
1702   return tc;
1703 }
1704 
1705 
1706 
1707 /* static void install_partial_transition (struct rx_superstate *super,
1708  *					   struct rx_inx *answer,
1709  *					   struct rx_super_edge * edge,
1710  *					   bitset_subset set,
1711  *					   int offset);
1712  *
1713  * Fill in entries in the transition table of `super'.
1714  *
1715  * Entry `N' is filled if `N' is in the range:
1716  *
1717  *	offset .. offset + bits_per_subset
1718  *
1719  * and:
1720  *
1721  *	 set & (1 << (N - offset))
1722  *
1723  * is not 0.
1724  *
1725  * Filled entries are filled with a copy of `*answer'.
1726  *
1727  * This function is useful when handling a cache miss.  It is
1728  * expensive to fill in the entire transition table and often, only a
1729  * few entries in a narrow range (such as lower case letters) will
1730  * every be needed.  When a missing entry is discovered, we fill in
1731  * only a narrow region of the table around the missing entry.
1732  */
1733 static int
install_partial_transition(struct rx_superstate * super,struct rx_inx * answer,struct rx_super_edge * edge,bitset_subset set,int chr)1734 install_partial_transition  (struct rx_superstate *super,
1735 			     struct rx_inx *answer,
1736 			     struct rx_super_edge * edge,
1737 			     bitset_subset set,
1738 			     int chr)
1739 {
1740   int x;
1741   bitset_subset pos;
1742   struct rx_inx * inxs;
1743 
1744   if (!super->has_huge_table || ((super->storage_unit_size == 1) && (chr < 0x80)))
1745     inxs = rx_subset_transitions8 (super->transitions, chr);
1746   else
1747     {
1748       if ((super->storage_unit_size == 1) || (chr > 0xffff))
1749 	{
1750 
1751 #ifdef RX_LARGE_TABLES
1752 	  {
1753 	    struct rx_inx ** page2_21;
1754 	    struct rx_inx * page3_21;
1755 	    if (super->huge_char_transitions == rx_default_huge_char_transitions)
1756 	      {
1757 		struct rx_inx *** ht;
1758 		ht = (struct rx_inx ***)rx_dfa_cache_malloc (rx_page1_size21 * sizeof (struct rx_inx **));
1759 		if (!ht)
1760 		  return -1;
1761 		super->huge_char_transitions = ht;
1762 		mem_move ((t_uchar *)super->huge_char_transitions, (t_uchar *)rx_default_huge_char_transitions, sizeof (rx_default_huge_char_transitions));
1763 	      }
1764 
1765 	    page2_21 = rx_page2_21 (super->huge_char_transitions, chr);
1766 
1767 	    if (page2_21 == rx_huge_char_second_level_page)
1768 	      {
1769 		page2_21 = (struct rx_inx **)rx_dfa_cache_malloc (rx_page2_size21 * sizeof (struct rx_inx *));
1770 		if (!page2_21)
1771 		  return -1;
1772 		mem_move ((t_uchar *)page2_21, (t_uchar *)rx_huge_char_second_level_page, rx_page2_size21 * sizeof (struct rx_inx *));
1773 		rx_page2_21 (super->huge_char_transitions, chr) = page2_21;
1774 	      }
1775 
1776 	    page3_21 = rx_page3_21 (super->huge_char_transitions, chr);
1777 
1778 	    if (page3_21 == rx_shared_cache_miss_page)
1779 	      {
1780 		page3_21 = (struct rx_inx *)rx_dfa_cache_malloc (256 * sizeof (struct rx_inx));
1781 		if (!page3_21)
1782 		  return -1;
1783 		mem_move ((t_uchar *)page3_21, (t_uchar *)rx_shared_cache_miss_page, 256 * sizeof (struct rx_inx));
1784 		rx_page3_21(super->huge_char_transitions, chr) = page3_21;
1785 	      }
1786 	  }
1787 #else
1788 	  {
1789 	    struct rx_inx ***  page2_21;
1790 	    struct rx_inx ** page3_21;
1791 	    struct rx_inx * page4_21;
1792 
1793 	    if (super->huge_char_transitions == rx_default_huge_char_transitions)
1794 	      {
1795 		struct rx_inx **** ht;
1796 		ht = (struct rx_inx ****)rx_dfa_cache_malloc (rx_page1_size21 * sizeof (struct rx_inx ***));
1797 		if (!ht)
1798 		  return -1;
1799 		super->huge_char_transitions = ht;
1800 		mem_move ((t_uchar *)super->huge_char_transitions, (t_uchar *)rx_default_huge_char_transitions, sizeof (rx_default_huge_char_transitions));
1801 	      }
1802 
1803 	    page2_21 = rx_page2_21 (super->huge_char_transitions, chr);
1804 
1805 	    if (page2_21 == rx_huge_char_second_level_page)
1806 	      {
1807 		page2_21 = (struct rx_inx ***)rx_dfa_cache_malloc (rx_page2_size21 * sizeof (struct rx_inx **));
1808 		if (!page2_21)
1809 		  return -1;
1810 		mem_move ((t_uchar *)page2_21, (t_uchar *)rx_huge_char_second_level_page, rx_page2_size21 * sizeof (struct rx_inx **));
1811 		rx_page2_21 (super->huge_char_transitions, chr) = page2_21;
1812 	      }
1813 
1814 	    page3_21 = rx_page3_21 (super->huge_char_transitions, chr);
1815 
1816 	    if (page3_21 == rx_huge_char_third_level_page)
1817 	      {
1818 		page3_21 = (struct rx_inx **)rx_dfa_cache_malloc (rx_page3_size21 * sizeof (struct rx_inx *));
1819 		if (!page3_21)
1820 		  return -1;
1821 		mem_move ((t_uchar *)page3_21, (t_uchar *)rx_huge_char_third_level_page, rx_page3_size21 * sizeof (struct rx_inx *));
1822 		rx_page3_21(super->huge_char_transitions, chr) = page3_21;
1823 	      }
1824 
1825 	    page4_21 = rx_page4_21 (super->huge_char_transitions, chr);
1826 	    if (page4_21 == rx_shared_cache_miss_page)
1827 	      {
1828 		page4_21 = (struct rx_inx *)rx_dfa_cache_malloc (rx_page4_size21 * sizeof (struct rx_inx));
1829 		if (!page4_21)
1830 		  return -1;
1831 		mem_move ((t_uchar *)page4_21, (t_uchar *)rx_shared_cache_miss_page, rx_page4_size21 * sizeof (struct rx_inx));
1832 		rx_page4_21 (super->huge_char_transitions, chr) = page4_21;
1833 	      }
1834 	  }
1835 #endif
1836 	  inxs = rx_subset_transition21 (super->huge_char_transitions, chr);
1837 	}
1838       else
1839 	{
1840 	  /* (storage_unit_size == 2) && (chr > 0xffff)
1841 	   */
1842 #ifdef RX_LARGE_TABLES
1843 	  {
1844 	    struct rx_inx * page2_16;
1845 
1846 	    page2_16 = rx_page2_16 (super->transitions, chr);
1847 	    if (page2_16 == rx_shared_cache_miss_page)
1848 	      {
1849 		page2_16 = (struct rx_inx *)rx_dfa_cache_malloc (256 * sizeof (struct rx_inx));
1850 		if (!page2_16)
1851 		  return -1;
1852 		mem_move ((t_uchar *)page2_16, (t_uchar *)rx_shared_cache_miss_page, 256 * sizeof (struct rx_inx));
1853 		rx_page2_16(super->transitions, chr) = page2_16;
1854 	      }
1855 
1856 	    inxs = rx_subset_transitions16 (super->transitions, chr);
1857 	  }
1858 #else
1859 
1860 	  {
1861 	    struct rx_inx ** page2_16;
1862 	    struct rx_inx * page3_16;
1863 
1864 	    page2_16 = rx_page2_16 (super->transitions, chr);
1865 	    if (page2_16 == rx_default_small_table_page2)
1866 	      {
1867 		page2_16 = (struct rx_inx **)rx_dfa_cache_malloc (rx_page2_size * sizeof (struct rx_inx *));
1868 		if (!page2_16)
1869 		  return -1;
1870 		mem_move ((t_uchar *)page2_16, (t_uchar *)rx_default_small_table_page2, rx_page2_size * sizeof (struct rx_inx *));
1871 		rx_page2_16(super->transitions, chr) = page2_16;
1872 	      }
1873 
1874 	    page3_16 = rx_page3_16 (super->transitions, chr);
1875 	    if (page3_16 == rx_shared_cache_miss_page)
1876 	      {
1877 		page3_16 = (struct rx_inx *)rx_dfa_cache_malloc (rx_page3_size * sizeof (struct rx_inx));
1878 		if (!page3_16)
1879 		  return -1;
1880 		mem_move ((t_uchar *)page3_16, (t_uchar *)rx_shared_cache_miss_page, rx_page3_size * sizeof (struct rx_inx));
1881 		rx_page3_16(super->transitions, chr) = page3_16;
1882 	      }
1883 
1884 	    inxs = rx_subset_transitions16 (super->transitions, chr);
1885 	  }
1886 
1887 #endif
1888 	}
1889     }
1890 
1891 
1892   for (x = 0, pos = 1; x < bits_per_subset; ++x, pos <<= 1)
1893     {
1894       if (set & pos)
1895 	{
1896 	  inxs[x] = *answer;
1897 	  inxs[x].next_same_edge = edge->inx_list;
1898 	  edge->inx_list = &inxs[x];
1899 	}
1900     }
1901   return 0;
1902 }
1903 
1904 
1905 
1906 
1907 /************************************************************************
1908  *(h1 "DFA State Transition Cache Misses")
1909  *
1910  *
1911  */
1912 
1913 
1914 /*(c rx_handle_cache_miss)
1915  * struct rx_inx * rx_handle_cache_miss (struct rx_nfa * rx,
1916  *					 struct rx_superstate * super,
1917  *					 t_uchar chr,
1918  *					 void * data_2);
1919  *
1920  * Recover from an `rx_cache_miss' instruction.
1921  *
1922  * `rx' and `super' are the NFA and superstate.  `chr' is the input
1923  * character for which the transition table of `super' contains
1924  * an `rx_cache_miss' instruction.
1925  *
1926  * `data_2' is the `data_2' field of the `rx_cache_miss' instruction
1927  * that caused us to call `rx_handle_cache_miss'.
1928  *
1929  * We return a pointer to another instruction (perhaps the corrected
1930  * transition table entry) which replaces the cache miss instruction.
1931  * The caller of this function can continue by re-dispatching on the
1932  * new instruction (which is permitted to be another cache miss
1933  * instruction).
1934  */
1935 struct rx_inx *
rx_handle_cache_miss(struct rx_nfa * rx,struct rx_superstate * super,unsigned int chr,void * data_2)1936 rx_handle_cache_miss (struct rx_nfa * rx,
1937 		      struct rx_superstate * super,
1938 		      unsigned int chr,
1939 		      void * data_2)
1940 {
1941   struct rx_super_edge * e;
1942 
1943   /* There are three kinds of cache miss.
1944    *
1945    * The first occurs when a transition is taken that has never been
1946    * computed during the lifetime of the source superstate.  In this
1947    * case, no DFA edge exists for this transition.  The cache miss is
1948    * handled by calling `compute_super_edge_cset' and building a new
1949    * DFA edge.  If `compute_super_edge_cset' tells us that no
1950    * transition is defined, we fill in the transition table with
1951    * `rx_backtrack' instructions and return one of those.
1952    *
1953    * The second kind of cache miss occurs when we have a DFA edge
1954    * (pointed to by `data_2') but the destination superstate of that
1955    * edge isn't known.  `solve_destination' is used to construct the
1956    * destination superstate or find it in the DFA cache.  We return a
1957    * `rx_next_char' instruction.
1958    *
1959    * Finally, the third kind of cache miss occurs when the destination
1960    * superstate of a transition is known but is or was in a `semi-free
1961    * state'.  That case is handled by `refresh_semifree_superstate'.
1962    * We return an `rx_next_char' instruction.
1963    *
1964    */
1965 
1966 
1967 
1968   /* If the `rx_cache_miss' instruction contained a pointer to an
1969    * existing DFA edge, that pointer was passed to us as `data_2'.
1970    */
1971   e = data_2;
1972 
1973  retry:
1974 
1975   if (!e)
1976     {
1977       /* A missing DFA edge.  Look for it in the cache.
1978        */
1979       for (e = super->outgoing_edges; e; e = e->next_same_present)
1980 	if (bits_is_member (e->cset, chr))
1981 	  goto retry;
1982 
1983       /* A DFA edge missing from the cache.  Try to build it now.
1984        */
1985       {
1986 	bits trcset;
1987 
1988 	rx_lock_superstate (rx, super);
1989 
1990 	if (super->has_huge_table)
1991 	  trcset = bits_alloc (rx__dfa_alloc_limits, uni_bits_tree_rule);
1992 	else
1993 	  trcset = bits_alloc (rx__dfa_alloc_limits, rx_8bit_bits_tree_rule);
1994 
1995 	if (!trcset)
1996 	  {
1997 	    rx_unlock_superstate (rx, super);
1998 	    return 0;
1999 	  }
2000 
2001 	{
2002 	  int nfa_edges;
2003 
2004 	  nfa_edges = compute_super_edge_cset (rx, trcset, super, chr);
2005 	  if (nfa_edges < 0)
2006 	    {
2007 	      rx_unlock_superstate (rx, super);
2008 	      return 0;
2009 	    }
2010 #if 0
2011 	  else if (!nfa_edges)
2012 	    {
2013 	      static struct rx_inx backtrack = { 0, 0, (void *)rx_backtrack, 0 };
2014 	      rx_unlock_superstate (rx, super);
2015 	      bits_free (trcset);
2016 	      return &backtrack;
2017 	    }
2018 #endif
2019 	  else
2020 	    {
2021 	      e = rx_super_edge (rx, super, trcset);
2022 	      rx_unlock_superstate (rx, super);
2023 	      if (!e)
2024 		return 0;
2025 	      goto retry;
2026 	    }
2027 	}
2028       }
2029     }
2030   else if (e->future)
2031     {
2032       struct rx_inx inx;
2033 
2034       /* We know the superstate edge and its destination state, but
2035        * the instruction frame contained a stale `rx_cache_miss'
2036        * instruction (the instruction itself, not the state, was the
2037        * stale part of the cache).  We must now replace the
2038        * instruction with an `rx_next_char' instruction.
2039        *
2040        * Its possible that the destination state is in a semifree
2041        * state and must be refreshed.
2042        */
2043       refresh_semifree_superstate (e->future);
2044       rx_lock_superstate (rx, super);
2045 
2046       inx.data = (void *)&e->future->transitions;
2047       inx.data_2 = (void *)e->future->members->state_label;
2048       inx.inx = (void *)rx_next_char;
2049 
2050       {
2051 	bitset_subset s;
2052 	if (   bits_get_subset (&s, e->cset, chr)
2053 	    || install_partial_transition (super, &inx, e, s, chr))
2054 	  {
2055 	    rx_unlock_superstate (rx, super);
2056 	    return 0;
2057 	  }
2058       }
2059       rx_unlock_superstate (rx, super);
2060 
2061       if (!super->has_huge_table || ((super->storage_unit_size == 1) && (chr < 0x80)))
2062 	return rx_transition8 (&super->transitions, chr);
2063       else if ((super->storage_unit_size == 2) && (chr < (1 << 16)))
2064 	return rx_transition16 (&super->transitions, chr);
2065       else
2066 	return rx_transition21 (super->huge_char_transitions, chr);
2067     }
2068   else
2069     {
2070       struct rx_inx inx;
2071 
2072       /* A DFA edge with no known destination state.  We must now
2073        * compute the destination state and fill in transition table
2074        * entries with `rx_next_char' instructions.
2075        */
2076       rx_lock_superstate (rx, super);
2077       {
2078 	bitset_subset s;
2079 	struct rx_superstate * destination;
2080 
2081 	if (solve_destination (rx, &inx, e, super->storage_unit_size))
2082 	  {
2083 	    rx_unlock_superstate (rx, super);
2084 	    return 0;
2085 	  }
2086 
2087 	if (inx.inx == (void *)rx_next_char)
2088 	  {
2089 	    destination = rx_transitions_to_suprestate ((rx_transition_table)(inx.data));
2090 	    rx_lock_superstate (rx, destination);
2091 	  }
2092 	else
2093 	  destination = 0;
2094 
2095 	if (bits_get_subset (&s, e->cset, chr)
2096 	    || install_partial_transition (super, &inx, e, s, chr))
2097 	  {
2098 	    if (destination)
2099 	      rx_unlock_superstate (rx, destination);
2100 	    rx_unlock_superstate (rx, super);
2101 	    return 0;
2102 	  }
2103 	if (destination)
2104 	  rx_unlock_superstate (rx, destination);
2105       }
2106       rx_unlock_superstate (rx, super);
2107       if (!super->has_huge_table || ((super->storage_unit_size == 1) && (chr < 0x80)))
2108 	return rx_transition8 (&super->transitions, chr);
2109       else if ((super->storage_unit_size == 2) && (chr < (1 << 16)))
2110 	return rx_transition16 (&super->transitions, chr);
2111       else
2112 	return rx_transition21 (super->huge_char_transitions, chr);
2113     }
2114 }
2115 
2116 
2117 /************************************************************************
2118  * DFA Cache Debugging Tools
2119  *
2120  * I sometimes use this code to debug this file.
2121  */
2122 
2123 #if 0
2124 static int
2125 qlen (struct rx_superstate * q)
2126 {
2127   int count = 1;
2128   struct rx_superstate * it;
2129   if (!q)
2130     return 0;
2131   for (it = q->next_recyclable; it != q; it = it->next_recyclable)
2132     ++count;
2133   return count;
2134 }
2135 
2136 
2137 static void
2138 check_cache ()
2139 {
2140   struct rx_cache * cache = rx_default_cache;
2141   int total = cache->superstates;
2142   int semi = cache->semifree_superstates;
2143 
2144 
2145   total = cache->superstates;
2146   semi = cache->semifree_superstates;
2147 
2148   if (semi != qlen (cache->semifree_superstate))
2149     panic ("failed cache check in rx");
2150 
2151   if ((total - semi) != qlen (cache->lru_superstate))
2152     panic ("failed cache check in rx (2)");
2153 }
2154 
2155 
2156 #endif
2157