1 /* tag: Tom Lord Tue Dec  4 14:41:37 2001 (nfa-cache.c)
2  */
3 /* nfa-cache.c -
4  *
5  ****************************************************************
6  * Copyright (C) 2000 Tom Lord
7  *
8  * See the file "COPYING" for further information about
9  * the copyright and warranty status of this work.
10  */
11 
12 
13 #include "hackerlab/rx/nfa.h"
14 #include "hackerlab/rx/nfa-cache.h"
15 
16 
17 /************************************************************************
18  *(h1 "Tuning the NFA Cache Size"
19  *    :includes ("hackerlab/rx/nfa-cache.h"))
20  *
21  * |NFA cache|
22  * |cache (NFA)|
23  * |NFA|
24  * |non-deterministic finite automata|
25  * When Rx compiles a regexp or regular expression, it builds a tree
26  * structure that describes the syntax of the expression.  Later, some
27  * or all of the tree is converted to a graph, representing a
28  * non-deterministic finite automata (NFA).
29  *
30  * Rx maintains a cache so that whenever two expressions have equivalent
31  * tree structure, they are likely to share a single NFA.  This cache
32  * speeds up the processing of regexps by avoiding redundant NFA construction.
33  *
34  * Note that the NFA cache can preserve NFA beyond the lifetime of a
35  * single compiled expression.  If an expression is compiled, then
36  * matched, then freed, then recompiled, the recompiled expression
37  * will sometimes re-use the NFA cached from the first compile.
38  *
39  * |deterministic finite automata|
40  * |DFA|
41  * During matching, NFA are incrementally converted to deterministic
42  * automata (DFA).  Another cache is kept of DFA fragments (see
43  * xref:"Tuning the DFA Cache Size").  Here again, the NFA cache speeds up
44  * processing: when a single NFA is re-used, the DFA cache is made
45  * more effective.
46  *
47  * This chapter presents functions which are used to monitor and tune
48  * the performance of the NFA cache.  Be sure to also read
49  * xref:"The Impact of NFA and DFA Cache Sizes".
50  */
51 
52 
53 /************************************************************************
54  *(h2 "The NFA Cache Replacement Strategy")
55  *
56  * |cache replacement strategy (NFA)|
57  * |NFA cache replacement strategy|
58  * NFA cache entries are approximately sorted from most to least
59  * recently used.  When cache space is exhausted, the least recently
60  * used entries are discarded.
61  *
62  */
63 
64 
65 
66 
67 #ifndef RX_DEFAULT_NFA_CACHE_SIZE
68 #define RX_DEFAULT_NFA_CACHE_SIZE (sizeof (void *) * (1 << 16))
69 #endif
70 
71 static alloc_limits nfa_alloc_limits;
72 
73 
74 
75 void
rx_free_some_nfa_memory(void * ign,size_t needed)76 rx_free_some_nfa_memory (void * ign, size_t needed)
77 {
78   while (   (lim_in_use (nfa_alloc_limits) + needed > lim_threshold (nfa_alloc_limits))
79 	 && (0 <= rx__really_free_unfa ()))
80     ;
81 }
82 
83 
84 static void
init_nfa_alloc_limits(void)85 init_nfa_alloc_limits (void)
86 {
87   static int init = 1;
88 
89   if (init)
90     {
91       nfa_alloc_limits = make_alloc_limits ("Rx NFA cache",
92 					    RX_DEFAULT_NFA_CACHE_SIZE,
93 					    0,
94 					    0,
95 					    rx_free_some_nfa_memory,
96 					    0);
97       init = 0;
98     }
99 }
100 
101 alloc_limits
rx_nfa_cache_limits(void)102 rx_nfa_cache_limits (void)
103 {
104   init_nfa_alloc_limits ();
105   return nfa_alloc_limits;
106 }
107 
108 
109 /************************************************************************
110  *(h2 "The Advisory NFA Cache Limit")
111  *
112  * |NFA cache limit|
113  * |cache limit (NFA)|
114  * |advisory limit|
115  * |NFA cache threshold|
116  * |cache threshold (NFA)|
117  * The size of the NFA cache is regulated by an advisory limit called
118  * the "cache threshold".  The threshold is a size (expressed in bytes)
119  * which represents an ideal limit on the amount of memory used by the
120  * NFA cache.
121  *
122  * If an allocation within the NFA cache would cause the total amount
123  * of memory used by the cache to exceed the threshold, Rx attempts to
124  * discard sufficient cache entries to avoid exceeding the threshold.
125  * This is not always possible.  When necessary for correct operation,
126  * Rx will exceed the cache threshold: usually by a small amount;
127  * rarely by a large amount.  (That is why the threshold is called an
128  * ^advisory^ limit.)
129  *
130  * The default threshold is `1MB'.
131  */
132 
133 
134 /*(c rx_set_nfa_cache_threshold)
135  * void rx_set_nfa_cache_threshold (size_t n);
136  *
137  * Set the advisory NFA cache limit to `n'.
138  */
139 void
rx_set_nfa_cache_threshold(size_t n)140 rx_set_nfa_cache_threshold (size_t n)
141 {
142   init_nfa_alloc_limits ();
143   lim_set_threshold (nfa_alloc_limits, n);
144 }
145 
146 
147 /*(c rx_nfa_cache_threshold)
148  * size_t rx_nfa_cache_threshold (void);
149  *
150  * Return the current NFA cache limit.
151  */
152 size_t
rx_nfa_cache_threshold(void)153 rx_nfa_cache_threshold (void)
154 {
155   init_nfa_alloc_limits ();
156   return lim_threshold (nfa_alloc_limits);
157 }
158 
159 
160 /* Not yet documented. */
161 
162 void
rx_set_nfa_cache_failure_pt(size_t n)163 rx_set_nfa_cache_failure_pt (size_t n)
164 {
165   init_nfa_alloc_limits ();
166   lim_set_failure_pt (nfa_alloc_limits, n);
167 }
168 
169 
170 size_t
rx_nfa_cache_failure_pt(void)171 rx_nfa_cache_failure_pt (void)
172 {
173   init_nfa_alloc_limits ();
174   return lim_failure_pt (nfa_alloc_limits);
175 }
176 
177 
178 /************************************************************************
179  *(h2 "NFA Cache Statistics")
180  *
181  * |NFA cache statistics|
182  * |cache statistics (NFA)|
183  * These functions report statistics about the NFA cache.
184  *
185  */
186 
187 
188 /*(c rx_nfa_cache_in_use)
189  * size_t rx_nfa_cache_in_use (void);
190  *
191  * Return the amount of memory currently in use by the NFA cache.
192  */
193 size_t
rx_nfa_cache_in_use(void)194 rx_nfa_cache_in_use (void)
195 {
196   init_nfa_alloc_limits ();
197   return lim_in_use (nfa_alloc_limits);
198 }
199 
200 
201 /*(c rx_nfa_cache_high_water_mark)
202  * size_t rx_nfa_cache_high_water_mark (void);
203  *
204  * Return the largest amount of memory ever used at one time by the
205  * NFA cache.
206  */
207 size_t
rx_nfa_cache_high_water_mark(void)208 rx_nfa_cache_high_water_mark (void)
209 {
210   init_nfa_alloc_limits ();
211   return lim_high_water_mark (nfa_alloc_limits);
212 }
213 
214 
215 
216 /*(c rx_nfa_cache_statistics)
217  * void rx_nfa_cache_statistics (size_t * threshold,
218  *                               size_t * ign,
219  *                               size_t * in_use,
220  *                               size_t * high_water_mark,
221  *                               int * hits,
222  *                               int * misses,
223  *                               int * ign2);
224  *
225  * Return statistics about the effectiveness of the NFA cache.
226  *
227  * All parameters are used to return values.  Any parameter may be 0.
228  *
229  * `threshold' returns the NFA cache threshold.
230  *
231  * `ign' is reserved for future use and should be ignored.
232  *
233  * `in_use' returns the number of bytes currently used by the NFA cache.
234  *
235  * `high_water_mark' returns the largest number of bytes ever used by the
236  *  NFA cache.
237  *
238  * `hits' returns the number of cache hits that have occured within
239  * the NFA cache.
240  *
241  * `misses' returns the number of cache misses that have occured within
242  * the NFA cache.
243  *
244  * `ign2' is reserved for future use and should be ignored.
245  */
246 void
rx_nfa_cache_statistics(size_t * threshold,size_t * ign,size_t * in_use,size_t * high_water_mark,int * hits,int * misses,int * ign2)247 rx_nfa_cache_statistics (size_t * threshold,
248 			 size_t * ign,
249 			 size_t * in_use,
250 			 size_t * high_water_mark,
251 			 int * hits,
252 			 int * misses,
253 			 int * ign2)
254 {
255   rx__nfa_cache_statistics (threshold,
256 			    ign,
257 			    in_use,
258 			    high_water_mark,
259 			    hits,
260 			    misses,
261 			    ign2);
262 }
263 
264 
265 
266 /************************************************************************
267  *(h2 "Flushing the NFA Cache")
268  *
269  * |cache flushing (NFA)|
270  * |NFA cache flushing|
271  */
272 
273 
274 /*(c rx_flush_nfa_cache)
275  * size_t rx_flush_nfa_cache (void);
276  *
277  * Attempt to flush all entries from the NFA cache.  If there exist
278  * compiled regexps (that have not been freed), it may not be possible
279  * to entirely empty the NFA cache.
280  *
281  * Return the number of bytes still allocated to the NFA cache after
282  * the flush.
283  */
284 size_t
rx_flush_nfa_cache(void)285 rx_flush_nfa_cache (void)
286 {
287   while (0 <= rx__really_free_unfa ())
288     ;
289   return rx_nfa_cache_in_use ();
290 }
291 
292 
293 
294 
295 void *
rx_nfa_cache_malloc(size_t size)296 rx_nfa_cache_malloc (size_t size)
297 {
298   void * answer;
299 
300   init_nfa_alloc_limits ();
301   answer = lim_malloc (nfa_alloc_limits, size);
302   return answer;
303 }
304 
305 
306 void *
rx_nfa_cache_soft_malloc(size_t size)307 rx_nfa_cache_soft_malloc (size_t size)
308 {
309   void * answer;
310 
311   init_nfa_alloc_limits ();
312   answer = lim_soft_malloc (nfa_alloc_limits, size);
313   return answer;
314 }
315 
316 
317 void *
rx_nfa_cache_realloc(void * prev,size_t size)318 rx_nfa_cache_realloc (void * prev, size_t size)
319 {
320   void * answer;
321 
322   init_nfa_alloc_limits ();
323   answer = lim_realloc (nfa_alloc_limits, prev, size);
324   return answer;
325 }
326 
327 
328 /* static void rx_cache_free (int size, char * mem);
329  *
330  * Free memory for the NFA state cache.
331  */
332 void
rx_nfa_cache_free(void * mem)333 rx_nfa_cache_free (void * mem)
334 {
335   lim_free (nfa_alloc_limits, mem);
336 }
337 
338 
339