1 /*
2 bench.c - simple regex benchmark program
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 #ifdef HAVE_CONFIG_H
10 #include <config.h>
11 #endif /* HAVE_CONFIG_H */
12
13 #include <stdio.h>
14 #include <stdlib.h>
15 #ifdef HAVE_GETOPT_H
16 #include <getopt.h>
17 #endif /* HAVE_GETOPT_H */
18 #include <time.h>
19 #include <unistd.h>
20 #include <math.h>
21 #include <sys/types.h>
22
23 #if 0
24 #include <hackerlab/rx-posix/regex.h>
25 #else
26 #include <regex.h>
27 #endif
28
29 /* T distribution for alpha = 0.025 (for 95% confidence). XXX - is
30 this correct? */
31 double t_distribution[] = {
32 12.71,
33 4.303,
34 3.182,
35 2.776,
36 2.571,
37 2.447,
38 2.365,
39 2.306,
40 2.262,
41 2.228,
42 2.201,
43 2.179,
44 2.160,
45 2.145,
46 2.131,
47 2.120,
48 2.110,
49 2.101,
50 2.093,
51 2.086,
52 2.080,
53 2.074,
54 2.069,
55 2.064,
56 2.060,
57 2.056,
58 2.052,
59 2.048,
60 2.045,
61 2.042
62 };
63
64 void
stats(double * sample_data,int samples,int len)65 stats(double *sample_data, int samples, int len)
66 {
67 double mean, tmp1, tmp2, variance, stddev, error, percent;
68 int i;
69
70 mean = 0;
71 for (i = 0; i < samples; i++)
72 mean += sample_data[i];
73 mean = mean/i;
74 printf("# mean: %.5f\n", mean);
75
76 tmp1 = 0;
77 for (i = 0; i < samples; i++) {
78 tmp2 = sample_data[i] - mean;
79 tmp1 += tmp2*tmp2;
80 }
81 if (samples > 1)
82 variance = tmp1 / (samples-1);
83 else
84 variance = 0;
85 stddev = sqrt(variance);
86 printf("# variance: %.16f\n", variance);
87 printf("# standard deviation: %.16f\n", stddev);
88
89 error = t_distribution[samples-1] * stddev / sqrt(samples);
90 if (mean != 0)
91 percent = 100*error/mean;
92 else
93 percent = 0;
94 printf("# error: �%.16f (�%.4f%%)\n", error, percent);
95
96 printf("%d\t%.5f\t%.5f\n", len, mean, error);
97
98 fflush(stdout);
99 }
100
101 void
run_tests(int len,int samples,double * sample_data,int repeats,regex_t * reobj,char * str,char * tmpbuf)102 run_tests(int len, int samples, double *sample_data, int repeats,
103 regex_t *reobj, char *str, char *tmpbuf)
104 {
105 int i, j, errcode;
106 clock_t c1, c2;
107 regmatch_t pmatch[10];
108
109
110 printf("# len = %d\n", len);
111 fflush(stdout);
112 for (i = 0; i < samples; i++) {
113 c1 = clock();
114 for (j = 0; j < repeats; j++)
115 if ((errcode = tre_regexec(reobj, str, 10, pmatch, 0))) {
116 tre_regerror(errcode, reobj, tmpbuf, 255);
117 printf("error: %s\n", tmpbuf);
118 }
119 c2 = clock();
120
121 sample_data[i] = (double)(c2-c1)/(CLOCKS_PER_SEC*repeats);
122
123 printf("# sample: %.5f sec, clocks: %ld\n",
124 (double)(c2-c1)/(CLOCKS_PER_SEC*repeats),
125 (long)(c2-c1));
126 fflush(stdout);
127 }
128 fflush(stdout);
129
130 for (i = 0; i < 10; i += 2) {
131 printf("# pmatch[%d].rm_so = %d\n", i/2, (int)pmatch[i/2].rm_so);
132 printf("# pmatch[%d].rm_eo = %d\n", i/2, (int)pmatch[i/2].rm_eo);
133 }
134 }
135
136
137 int
main(int argc,char ** argv)138 main(int argc, char **argv)
139 {
140 regex_t reobj;
141 char *str;
142 char tmpbuf[256];
143 int i, j;
144 int max_len = 1024*1024*10;
145 int steps = 20;
146 int repeats = 10;
147 int samples = 20;
148 int len;
149 clock_t c1, c2;
150 int opt;
151 double sample_data[30];
152
153 int test_id = -1;
154
155 while ((opt = getopt(argc, argv, "r:l:s:j:t:")) != -1) {
156 switch (opt) {
157 case 't':
158 test_id = atoi(optarg);
159 break;
160 case 'l':
161 max_len = atoi(optarg);
162 break;
163 case 'j':
164 steps = atoi(optarg);
165 break;
166 case 's':
167 samples = atoi(optarg);
168 break;
169 case 'r':
170 repeats = atoi(optarg);
171 break;
172 default:
173 printf("P�lli.\n");
174 return 1;
175 }
176 }
177
178 /* XXX - Check that the correct results are returned. For example, GNU
179 regex-0.12 returns incorrect results for very long strings in
180 test number 1. */
181
182 switch (test_id) {
183 case 0:
184 printf("# pattern: \"a*\"\n");
185 printf("# string: \"aaaaaa...\"\n");
186 len = 0;
187 tre_regcomp(&reobj, "a*", REG_EXTENDED);
188 while (len <= max_len) {
189
190 str = malloc(sizeof(char) * (len+1));
191 for (i = 0; i < len; i++)
192 str[i] = 'a';
193 str[len-1] = '\0';
194
195 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
196 stats(sample_data, samples, len);
197 len = len + (max_len/steps);
198 free(str);
199 }
200 break;
201
202
203 case 1:
204 printf("# pattern: \"(a)*\"\n");
205 printf("# string: \"aaaaaa...\"\n");
206 len = 0;
207 tre_regcomp(&reobj, "(a)*", REG_EXTENDED);
208 while (len <= max_len) {
209
210 str = malloc(sizeof(char) * (len+1));
211 for (i = 0; i < len; i++)
212 str[i] = 'a';
213 str[len-1] = '\0';
214
215 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
216 stats(sample_data, samples, len);
217 len = len + (max_len/steps);
218 free(str);
219 }
220 break;
221
222
223 case 2:
224 printf("# pattern: \"(a*)\"\n");
225 printf("# string: \"aaaaaa...\"\n"); len = 0;
226 tre_regcomp(&reobj, "(a*)", REG_EXTENDED);
227 while (len <= max_len) {
228
229 str = malloc(sizeof(char) * (len+1));
230 for (i = 0; i < len; i++)
231 str[i] = 'a';
232 str[len-1] = '\0';
233
234 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
235 stats(sample_data, samples, len);
236 len = len + (max_len/steps);
237 free(str);
238 }
239 break;
240
241 case 3:
242 printf("# pattern: \"(a*)*|b*\"\n");
243 printf("# string: \"aaaaaa...b\"\n");
244 len = 0;
245 tre_regcomp(&reobj, "(a*)*|b*", REG_EXTENDED);
246 while (len <= max_len) {
247 str = malloc(sizeof(char) * (len+1));
248 for (i = 0; i < len-1; i++)
249 str[i] = 'a';
250 if (len > 0)
251 str[len-1] = 'b';
252 str[len] = '\0';
253
254 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
255 stats(sample_data, samples, len);
256 len = len + (max_len/steps);
257 free(str);
258 }
259 break;
260
261 case 4:
262 printf("# pattern: \"(a|a|a|...|a)\"\n");
263 printf("# string: \"aaaaaa...\"\n");
264 len = 1024*1024;
265 str = malloc(sizeof(char) * (len+1));
266 for (i = 0; i < len-1; i++)
267 str[i] = 'a';
268 str[len] = '\0';
269 len = 0;
270 while (len <= max_len) {
271 tmpbuf[0] = '(';
272 for (i = 1; i < (len*2); i++) {
273 tmpbuf[i] = 'a';
274 if (i < len*2-2) {
275 i++;
276 tmpbuf[i] = '|';
277 }
278 }
279 printf("# i = %d\n", i);
280 tmpbuf[i] = ')';
281 tmpbuf[i+1] = '*';
282 tmpbuf[i+2] = '\0';
283 printf("# pat = %s\n", tmpbuf);
284 tre_regcomp(&reobj, tmpbuf, REG_EXTENDED);
285
286 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
287 stats(sample_data, samples, len);
288 len = len + (max_len/steps);
289 tre_regfree(&reobj);
290 }
291 free(str);
292 break;
293
294 case 5:
295 printf("# pattern: \"foobar\"\n");
296 printf("# string: \"aaaaaa...foobar\"\n");
297 len = 0;
298 tre_regcomp(&reobj, "foobar", REG_EXTENDED);
299 while (len <= max_len) {
300 str = malloc(sizeof(char) * (len+7));
301 for (i = 0; i < len; i++) {
302 if (i*i % 3)
303 str[i] = 'a';
304 else
305 str[i] = 'a';
306 }
307 str[len+0] = 'f';
308 str[len+1] = 'o';
309 str[len+2] = 'o';
310 str[len+3] = 'b';
311 str[len+4] = 'a';
312 str[len+5] = 'r';
313 str[len+6] = '\0';
314
315 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
316 stats(sample_data, samples, len);
317 len = len + (max_len/steps);
318 free(str);
319 }
320 break;
321
322
323 case 6:
324 printf("# pattern: \"a*foobar\"\n");
325 printf("# string: \"aaaaaa...foobar\"\n");
326 len = 0;
327 tre_regcomp(&reobj, "a*foobar", REG_EXTENDED);
328 while (len <= max_len) {
329 str = malloc(sizeof(char) * (len+7));
330 for (i = 0; i < len; i++) {
331 str[i] = 'a';
332 }
333 str[len+0] = 'f';
334 str[len+1] = 'o';
335 str[len+2] = 'o';
336 str[len+3] = 'b';
337 str[len+4] = 'a';
338 str[len+5] = 'r';
339 str[len+6] = '\0';
340
341 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
342 stats(sample_data, samples, len);
343 len = len + (max_len/steps);
344 free(str);
345 }
346 break;
347
348
349 case 7:
350 printf("# pattern: \"(a)*foobar\"\n");
351 printf("# string: \"aaaaabbaaab...foobar\"\n");
352 len = 0;
353 tre_regcomp(&reobj, "(a)*foobar", REG_EXTENDED);
354 while (len <= max_len) {
355 str = malloc(sizeof(char) * (len+7));
356 for (i = 0; i < len; i++) {
357 /* Without this GNU regex won't find a match! */
358 if (i*(i-1) % 3)
359 str[i] = 'b';
360 else
361 str[i] = 'a';
362 }
363 str[len+0] = 'f';
364 str[len+1] = 'o';
365 str[len+2] = 'o';
366 str[len+3] = 'b';
367 str[len+4] = 'a';
368 str[len+5] = 'r';
369 str[len+6] = '\0';
370
371 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
372 stats(sample_data, samples, len);
373 len = len + (max_len/steps);
374 free(str);
375 }
376 break;
377
378
379 case 8:
380 printf("# pattern: \"(a|b)*foobar\"\n");
381 printf("# string: \"aaaaabbaaab...foobar\"\n");
382 len = 0;
383 tre_regcomp(&reobj, "(a|b)*foobar", REG_EXTENDED);
384 while (len <= max_len) {
385 str = malloc(sizeof(char) * (len+7));
386 for (i = 0; i < len; i++) {
387 if (i*(i-1) % 3)
388 str[i] = 'b';
389 else
390 str[i] = 'a';
391 /* Without this GNU regex won't find a match! */
392 if (i % (1024*1024*10 - 100))
393 str[i] = 'f';
394 }
395 str[len+0] = 'f';
396 str[len+1] = 'o';
397 str[len+2] = 'o';
398 str[len+3] = 'b';
399 str[len+4] = 'a';
400 str[len+5] = 'r';
401 str[len+6] = '\0';
402
403 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
404 stats(sample_data, samples, len);
405 len = len + (max_len/steps);
406 free(str);
407 }
408 break;
409
410
411 case 9:
412 printf("# pattern: hand-coded a*\n");
413 printf("# string: \"aaaaaa...\"\n");
414 len = 0;
415 while (len <= max_len) {
416 printf("# len = %d\n", len);
417
418 str = malloc(sizeof(char)*(len+1));
419 for (i = 0; i < len; i++)
420 str[i] = 'a';
421 str[len-1] = '\0';
422
423 for (i = 0; i < samples; i++) {
424 c1 = clock();
425 for (j = 0; j < repeats; j++) {
426 char *s;
427 int l;
428
429 s = str;
430 l = 0;
431
432
433 while (s != '\0') {
434 if (*s == 'a') {
435 s++;
436 l++;
437 } else
438 break;
439 }
440 }
441 c2 = clock();
442 sample_data[i] = (double)(c2-c1)/(CLOCKS_PER_SEC*repeats);
443
444 printf("# sample: %.5f sec, clocks: %ld\n",
445 (double)(c2-c1)/(CLOCKS_PER_SEC*repeats),
446 (long)(c2-c1));
447 fflush(stdout);
448 }
449 fflush(stdout);
450
451 stats(sample_data, samples, len);
452 len = len + (max_len/steps);
453 free(str);
454 }
455 break;
456
457
458 default:
459 printf("Pelle.\n");
460 return 1;
461 }
462
463 tre_regfree(&reobj);
464
465 return 0;
466 }
467