1 /* tag: Tom Lord Tue Dec 4 14:41:37 2001 (nfa-cache.c)
2 */
3 /* nfa-cache.c -
4 *
5 ****************************************************************
6 * Copyright (C) 2000 Tom Lord
7 *
8 * See the file "COPYING" for further information about
9 * the copyright and warranty status of this work.
10 */
11
12
13 #include "hackerlab/rx/nfa.h"
14 #include "hackerlab/rx/nfa-cache.h"
15
16
17 /************************************************************************
18 *(h1 "Tuning the NFA Cache Size"
19 * :includes ("hackerlab/rx/nfa-cache.h"))
20 *
21 * |NFA cache|
22 * |cache (NFA)|
23 * |NFA|
24 * |non-deterministic finite automata|
25 * When Rx compiles a regexp or regular expression, it builds a tree
26 * structure that describes the syntax of the expression. Later, some
27 * or all of the tree is converted to a graph, representing a
28 * non-deterministic finite automata (NFA).
29 *
30 * Rx maintains a cache so that whenever two expressions have equivalent
31 * tree structure, they are likely to share a single NFA. This cache
32 * speeds up the processing of regexps by avoiding redundant NFA construction.
33 *
34 * Note that the NFA cache can preserve NFA beyond the lifetime of a
35 * single compiled expression. If an expression is compiled, then
36 * matched, then freed, then recompiled, the recompiled expression
37 * will sometimes re-use the NFA cached from the first compile.
38 *
39 * |deterministic finite automata|
40 * |DFA|
41 * During matching, NFA are incrementally converted to deterministic
42 * automata (DFA). Another cache is kept of DFA fragments (see
43 * xref:"Tuning the DFA Cache Size"). Here again, the NFA cache speeds up
44 * processing: when a single NFA is re-used, the DFA cache is made
45 * more effective.
46 *
47 * This chapter presents functions which are used to monitor and tune
48 * the performance of the NFA cache. Be sure to also read
49 * xref:"The Impact of NFA and DFA Cache Sizes".
50 */
51
52
53 /************************************************************************
54 *(h2 "The NFA Cache Replacement Strategy")
55 *
56 * |cache replacement strategy (NFA)|
57 * |NFA cache replacement strategy|
58 * NFA cache entries are approximately sorted from most to least
59 * recently used. When cache space is exhausted, the least recently
60 * used entries are discarded.
61 *
62 */
63
64
65
66
67 #ifndef RX_DEFAULT_NFA_CACHE_SIZE
68 #define RX_DEFAULT_NFA_CACHE_SIZE (sizeof (void *) * (1 << 16))
69 #endif
70
71 static alloc_limits nfa_alloc_limits;
72
73
74
75 void
rx_free_some_nfa_memory(void * ign,size_t needed)76 rx_free_some_nfa_memory (void * ign, size_t needed)
77 {
78 while ( (lim_in_use (nfa_alloc_limits) + needed > lim_threshold (nfa_alloc_limits))
79 && (0 <= rx__really_free_unfa ()))
80 ;
81 }
82
83
84 static void
init_nfa_alloc_limits(void)85 init_nfa_alloc_limits (void)
86 {
87 static int init = 1;
88
89 if (init)
90 {
91 nfa_alloc_limits = make_alloc_limits ("Rx NFA cache",
92 RX_DEFAULT_NFA_CACHE_SIZE,
93 0,
94 0,
95 rx_free_some_nfa_memory,
96 0);
97 init = 0;
98 }
99 }
100
101 alloc_limits
rx_nfa_cache_limits(void)102 rx_nfa_cache_limits (void)
103 {
104 init_nfa_alloc_limits ();
105 return nfa_alloc_limits;
106 }
107
108
109 /************************************************************************
110 *(h2 "The Advisory NFA Cache Limit")
111 *
112 * |NFA cache limit|
113 * |cache limit (NFA)|
114 * |advisory limit|
115 * |NFA cache threshold|
116 * |cache threshold (NFA)|
117 * The size of the NFA cache is regulated by an advisory limit called
118 * the "cache threshold". The threshold is a size (expressed in bytes)
119 * which represents an ideal limit on the amount of memory used by the
120 * NFA cache.
121 *
122 * If an allocation within the NFA cache would cause the total amount
123 * of memory used by the cache to exceed the threshold, Rx attempts to
124 * discard sufficient cache entries to avoid exceeding the threshold.
125 * This is not always possible. When necessary for correct operation,
126 * Rx will exceed the cache threshold: usually by a small amount;
127 * rarely by a large amount. (That is why the threshold is called an
128 * ^advisory^ limit.)
129 *
130 * The default threshold is `1MB'.
131 */
132
133
134 /*(c rx_set_nfa_cache_threshold)
135 * void rx_set_nfa_cache_threshold (size_t n);
136 *
137 * Set the advisory NFA cache limit to `n'.
138 */
139 void
rx_set_nfa_cache_threshold(size_t n)140 rx_set_nfa_cache_threshold (size_t n)
141 {
142 init_nfa_alloc_limits ();
143 lim_set_threshold (nfa_alloc_limits, n);
144 }
145
146
147 /*(c rx_nfa_cache_threshold)
148 * size_t rx_nfa_cache_threshold (void);
149 *
150 * Return the current NFA cache limit.
151 */
152 size_t
rx_nfa_cache_threshold(void)153 rx_nfa_cache_threshold (void)
154 {
155 init_nfa_alloc_limits ();
156 return lim_threshold (nfa_alloc_limits);
157 }
158
159
160 /* Not yet documented. */
161
162 void
rx_set_nfa_cache_failure_pt(size_t n)163 rx_set_nfa_cache_failure_pt (size_t n)
164 {
165 init_nfa_alloc_limits ();
166 lim_set_failure_pt (nfa_alloc_limits, n);
167 }
168
169
170 size_t
rx_nfa_cache_failure_pt(void)171 rx_nfa_cache_failure_pt (void)
172 {
173 init_nfa_alloc_limits ();
174 return lim_failure_pt (nfa_alloc_limits);
175 }
176
177
178 /************************************************************************
179 *(h2 "NFA Cache Statistics")
180 *
181 * |NFA cache statistics|
182 * |cache statistics (NFA)|
183 * These functions report statistics about the NFA cache.
184 *
185 */
186
187
188 /*(c rx_nfa_cache_in_use)
189 * size_t rx_nfa_cache_in_use (void);
190 *
191 * Return the amount of memory currently in use by the NFA cache.
192 */
193 size_t
rx_nfa_cache_in_use(void)194 rx_nfa_cache_in_use (void)
195 {
196 init_nfa_alloc_limits ();
197 return lim_in_use (nfa_alloc_limits);
198 }
199
200
201 /*(c rx_nfa_cache_high_water_mark)
202 * size_t rx_nfa_cache_high_water_mark (void);
203 *
204 * Return the largest amount of memory ever used at one time by the
205 * NFA cache.
206 */
207 size_t
rx_nfa_cache_high_water_mark(void)208 rx_nfa_cache_high_water_mark (void)
209 {
210 init_nfa_alloc_limits ();
211 return lim_high_water_mark (nfa_alloc_limits);
212 }
213
214
215
216 /*(c rx_nfa_cache_statistics)
217 * void rx_nfa_cache_statistics (size_t * threshold,
218 * size_t * ign,
219 * size_t * in_use,
220 * size_t * high_water_mark,
221 * int * hits,
222 * int * misses,
223 * int * ign2);
224 *
225 * Return statistics about the effectiveness of the NFA cache.
226 *
227 * All parameters are used to return values. Any parameter may be 0.
228 *
229 * `threshold' returns the NFA cache threshold.
230 *
231 * `ign' is reserved for future use and should be ignored.
232 *
233 * `in_use' returns the number of bytes currently used by the NFA cache.
234 *
235 * `high_water_mark' returns the largest number of bytes ever used by the
236 * NFA cache.
237 *
238 * `hits' returns the number of cache hits that have occured within
239 * the NFA cache.
240 *
241 * `misses' returns the number of cache misses that have occured within
242 * the NFA cache.
243 *
244 * `ign2' is reserved for future use and should be ignored.
245 */
246 void
rx_nfa_cache_statistics(size_t * threshold,size_t * ign,size_t * in_use,size_t * high_water_mark,int * hits,int * misses,int * ign2)247 rx_nfa_cache_statistics (size_t * threshold,
248 size_t * ign,
249 size_t * in_use,
250 size_t * high_water_mark,
251 int * hits,
252 int * misses,
253 int * ign2)
254 {
255 rx__nfa_cache_statistics (threshold,
256 ign,
257 in_use,
258 high_water_mark,
259 hits,
260 misses,
261 ign2);
262 }
263
264
265
266 /************************************************************************
267 *(h2 "Flushing the NFA Cache")
268 *
269 * |cache flushing (NFA)|
270 * |NFA cache flushing|
271 */
272
273
274 /*(c rx_flush_nfa_cache)
275 * size_t rx_flush_nfa_cache (void);
276 *
277 * Attempt to flush all entries from the NFA cache. If there exist
278 * compiled regexps (that have not been freed), it may not be possible
279 * to entirely empty the NFA cache.
280 *
281 * Return the number of bytes still allocated to the NFA cache after
282 * the flush.
283 */
284 size_t
rx_flush_nfa_cache(void)285 rx_flush_nfa_cache (void)
286 {
287 while (0 <= rx__really_free_unfa ())
288 ;
289 return rx_nfa_cache_in_use ();
290 }
291
292
293
294
295 void *
rx_nfa_cache_malloc(size_t size)296 rx_nfa_cache_malloc (size_t size)
297 {
298 void * answer;
299
300 init_nfa_alloc_limits ();
301 answer = lim_malloc (nfa_alloc_limits, size);
302 return answer;
303 }
304
305
306 void *
rx_nfa_cache_soft_malloc(size_t size)307 rx_nfa_cache_soft_malloc (size_t size)
308 {
309 void * answer;
310
311 init_nfa_alloc_limits ();
312 answer = lim_soft_malloc (nfa_alloc_limits, size);
313 return answer;
314 }
315
316
317 void *
rx_nfa_cache_realloc(void * prev,size_t size)318 rx_nfa_cache_realloc (void * prev, size_t size)
319 {
320 void * answer;
321
322 init_nfa_alloc_limits ();
323 answer = lim_realloc (nfa_alloc_limits, prev, size);
324 return answer;
325 }
326
327
328 /* static void rx_cache_free (int size, char * mem);
329 *
330 * Free memory for the NFA state cache.
331 */
332 void
rx_nfa_cache_free(void * mem)333 rx_nfa_cache_free (void * mem)
334 {
335 lim_free (nfa_alloc_limits, mem);
336 }
337
338
339