1 /*
2 * test_regset.c --- test for regset API
3 * Copyright (c) 2019 K.Kosako
4 */
5 #include <stdlib.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <time.h>
9
10 #include "oniguruma.h"
11
12 static int nsucc = 0;
13 static int nfail = 0;
14 static int nerror = 0;
15
16
17 static int
make_regset(int line_no,int n,char * pat[],OnigRegSet ** rset,int error_no)18 make_regset(int line_no, int n, char* pat[], OnigRegSet** rset, int error_no)
19 {
20 int r;
21 int i;
22 OnigRegSet* set;
23 regex_t* reg;
24 OnigErrorInfo einfo;
25
26 *rset = NULL;
27 r = onig_regset_new(&set, 0, NULL);
28 if (r != 0) return r;
29
30 for (i = 0; i < n; i++) {
31 r = onig_new(®, (UChar* )pat[i], (UChar* )(pat[i] + strlen(pat[i])),
32 ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT,
33 &einfo);
34 if (r != 0) {
35 char s[ONIG_MAX_ERROR_MESSAGE_LEN];
36
37 if (error_no == 0) {
38 onig_error_code_to_str((UChar* )s, r, &einfo);
39 fprintf(stderr, "ERROR: %d: %s /%s/\n", line_no, s, pat[i]);
40 nerror++;
41 }
42 else {
43 if (r == error_no) {
44 fprintf(stdout, "OK(ERROR): %d: /%s/ %d\n", line_no, pat[i], r);
45 nsucc++;
46 }
47 else {
48 fprintf(stdout, "FAIL(ERROR): %d: /%s/ %d, %d\n",
49 line_no, pat[i], error_no, r);
50 nfail++;
51 }
52 }
53 onig_regset_free(set);
54 return r;
55 }
56
57 r = onig_regset_add(set, reg);
58 if (r != 0) {
59 onig_regset_free(set);
60 fprintf(stderr, "ERROR: %d: onig_regset_add(): /%s/\n", line_no, pat[i]);
61 nerror++;
62 return r;
63 }
64 }
65
66 *rset = set;
67 return 0;
68 }
69
70 static double
get_sec(clock_t start,clock_t end)71 get_sec(clock_t start, clock_t end)
72 {
73 double t;
74
75 t = (double )(end - start) / CLOCKS_PER_SEC;
76 return t;
77 }
78
79 /* use clock(), because clock_gettime() doesn't exist in Windows and old Unix. */
80
81 static int
time_test(int repeat,int n,char * ps[],char * s,char * end,double * rt_set,double * rt_reg)82 time_test(int repeat, int n, char* ps[], char* s, char* end, double* rt_set, double* rt_reg)
83 {
84 int r;
85 int i;
86 int match_pos;
87 OnigRegSet* set;
88 clock_t ts1, ts2;
89 double t_set, t_reg;
90
91 r = make_regset(0, n, ps, &set, 0);
92 if (r != 0) return r;
93
94 ts1 = clock();
95 for (i = 0; i < repeat; i++) {
96 r = onig_regset_search(set, (UChar* )s, (UChar* )end, (UChar* )s, (UChar* )end,
97 ONIG_REGSET_POSITION_LEAD, ONIG_OPTION_NONE, &match_pos);
98 if (r < 0) {
99 fprintf(stderr, "FAIL onig_regset_search(POSITION_LEAD): %d\n", r);
100 onig_regset_free(set);
101 return r;
102 }
103 }
104
105 ts2 = clock();
106 t_set = get_sec(ts1, ts2);
107
108 ts1 = clock();
109 for (i = 0; i < repeat; i++) {
110 r = onig_regset_search(set, (UChar* )s, (UChar* )end, (UChar* )s, (UChar* )end,
111 ONIG_REGSET_REGEX_LEAD, ONIG_OPTION_NONE, &match_pos);
112 if (r < 0) {
113 fprintf(stderr, "FAIL onig_regset_search(REGEX_LEAD): %d\n", r);
114 onig_regset_free(set);
115 return r;
116 }
117 }
118
119 ts2 = clock();
120 t_reg = get_sec(ts1, ts2);
121
122 onig_regset_free(set);
123
124 *rt_set = t_set;
125 *rt_reg = t_reg;
126 return 0;
127 }
128
129 static void
fisher_yates_shuffle(int n,char * ps[],char * cps[])130 fisher_yates_shuffle(int n, char* ps[], char* cps[])
131 {
132 #define GET_RAND(n) (rand()%(n+1))
133 #define SWAP(a,b) { char* tmp = a; a = b; b = tmp; }
134
135 int i;
136
137 for (i = 0; i < n; i++)
138 cps[i] = ps[i];
139
140 for (i = n - 1; i > 0; i--) {
141 int x = GET_RAND(i);
142 SWAP(cps[i], cps[x]);
143 }
144 }
145
146 static void
time_compare(int n,char * ps[],char * s,char * end)147 time_compare(int n, char* ps[], char* s, char* end)
148 {
149 int r;
150 int i;
151 int repeat;
152 double t_set, t_reg;
153 double total_set, total_reg;
154 char** cps;
155
156 cps = (char** )malloc(sizeof(char*) * n);
157 if (cps == 0) return ;
158
159 repeat = 100 / n;
160 total_set = total_reg = 0.0;
161 for (i = 0; i < n; i++) {
162 fisher_yates_shuffle(n, ps, cps);
163 r = time_test(repeat, n, cps, s, end, &t_set, &t_reg);
164 if (r != 0) {
165 free(cps);
166 return ;
167 }
168 total_set += t_set;
169 total_reg += t_reg;
170 }
171
172 free(cps);
173
174 fprintf(stdout, "POS lead: %6.2lfmsec. REG lead: %6.2lfmsec.\n",
175 total_set * 1000.0, total_reg * 1000.0);
176 }
177
178
179 static OnigRegSetLead XX_LEAD = ONIG_REGSET_POSITION_LEAD;
180
181 static void
xx(int line_no,int n,char * ps[],char * s,int from,int to,int mem,int not,int error_no)182 xx(int line_no, int n, char* ps[], char* s, int from, int to, int mem, int not, int error_no)
183 {
184 int r;
185 int match_pos;
186 int match_index;
187 OnigRegSet* set;
188 char *end;
189
190 r = make_regset(line_no, n, ps, &set, error_no);
191 if (r != 0) return ;
192
193 end = s + strlen(s);
194
195 r = onig_regset_search(set, (UChar* )s, (UChar* )end, (UChar* )s, (UChar* )end,
196 XX_LEAD, ONIG_OPTION_NONE, &match_pos);
197 if (r < 0) {
198 if (r == ONIG_MISMATCH) {
199 if (not) {
200 fprintf(stdout, "OK(N): %d\n", line_no);
201 nsucc++;
202 }
203 else {
204 fprintf(stdout, "FAIL: %d\n", line_no);
205 nfail++;
206 }
207 }
208 else {
209 if (error_no == 0) {
210 char buf[ONIG_MAX_ERROR_MESSAGE_LEN];
211 onig_error_code_to_str((UChar* )buf, r);
212 fprintf(stderr, "ERROR: %d: %s\n", line_no, buf);
213 nerror++;
214 }
215 else {
216 if (r == error_no) {
217 fprintf(stdout, "OK(ERROR): %d: %d\n", line_no, r);
218 nsucc++;
219 }
220 else {
221 fprintf(stdout, "FAIL ERROR NO: %d: %d, %d\n", line_no, error_no, r);
222 nfail++;
223 }
224 }
225 }
226 }
227 else {
228 if (not) {
229 fprintf(stdout, "FAIL(N): %d\n", line_no);
230 nfail++;
231 }
232 else {
233 OnigRegion* region;
234
235 match_index = r;
236 region = onig_regset_get_region(set, match_index);
237 if (region == 0) {
238 fprintf(stderr, "ERROR: %d: can't get region.\n", line_no);
239 nerror++;
240 onig_regset_free(set);
241 return ;
242 }
243
244 if (region->beg[mem] == from && region->end[mem] == to) {
245 fprintf(stdout, "OK: %d\n", line_no);
246 nsucc++;
247 }
248 else {
249 char buf[1000];
250 int len;
251 len = region->end[mem] - region->beg[mem];
252 strncpy(buf, s + region->beg[mem], len);
253 buf[len] = '\0';
254 fprintf(stdout, "FAIL: %d: %d-%d : %d-%d (%s)\n", line_no,
255 from, to, region->beg[mem], region->end[mem], buf);
256 nfail++;
257 }
258 }
259 }
260
261 onig_regset_free(set);
262 }
263
264 static void
x2(int line_no,int n,char * ps[],char * s,int from,int to)265 x2(int line_no, int n, char* ps[], char* s, int from, int to)
266 {
267 xx(line_no, n, ps, s, from, to, 0, 0, 0);
268 }
269
270 static void
x3(int line_no,int n,char * ps[],char * s,int from,int to,int mem)271 x3(int line_no, int n, char* ps[], char* s, int from, int to, int mem)
272 {
273 xx(line_no, n, ps, s, from, to, mem, 0, 0);
274 }
275
276 static void
n(int line_no,int n,char * ps[],char * s)277 n(int line_no, int n, char* ps[], char* s)
278 {
279 xx(line_no, n, ps, s, 0, 0, 0, 1, 0);
280 }
281
282 #define ASIZE(a) sizeof(a)/sizeof(a[0])
283 #define X2(ps,s,from,to) x2(__LINE__,ASIZE(ps),ps,s,from,to)
284 #define X3(ps,s,from,to,mem) x3(__LINE__,ASIZE(ps),ps,s,from,to,mem)
285 #define N(ps,s) n(__LINE__,ASIZE(ps),ps,s)
286 #define NZERO(s) n(__LINE__,0,(char** )0,s)
287
288 #ifndef _WIN32
289
290 /* getdelim() doesn't exist in Windows */
291
292 static int
get_all_content_of_file(char * path,char ** rs,char ** rend)293 get_all_content_of_file(char* path, char** rs, char** rend)
294 {
295 ssize_t len;
296 size_t n;
297 char* line;
298 FILE* fp;
299
300 fp = fopen(path, "r");
301 if (fp == 0) return -1;
302
303 n = 0;
304 line = NULL;
305 len = getdelim(&line, &n, EOF, fp);
306 fclose(fp);
307 if (len < 0) return -2;
308
309 *rs = line;
310 *rend = line + len;
311 return 0;
312 }
313 #endif
314
315
316 #define TEXT_PATH "kofu-utf8.txt"
317
318 /* --- To get kofu.txt ---
319 $ wget https://www.aozora.gr.jp/cards/000148/files/774_ruby_1640.zip
320 $ unzip 774_ruby_1640.zip
321 $ nkf -Lu -w8 kofu.txt > kofu-utf8.txt
322 (convert encoding to utf-8 with BOM and line terminator to be Unix-form)
323 */
324
325 static char* p1[] = {
326 "abc",
327 "(bca)",
328 "(cab)"
329 };
330
331 static char* p2[] = {
332 "小説",
333 "9",
334 "夏目漱石",
335 };
336
337 static char* p3[] = {
338 "^いる。",
339 "^校正",
340 "^底本",
341 "^ 翌日",
342 };
343
344 static char* p4[] = {
345 "《[^》]{5}》",
346 "《[^》]{6}》",
347 "《[^》]{7}》",
348 "《[^》]{8}》",
349 "《[^》]{9}》",
350 "《[^》]{10}》",
351 "《[^》]{11}》",
352 "《[^》]{12}》",
353 "《[^》]{13}》",
354 "《[^》]{14}》",
355 "《[^》]{15}》",
356 "《[^》]{16}》",
357 "《[^》]{17}》",
358 "《[^》]{18}》",
359 "《[^》]{19}》",
360 "《[^》]{20}》",
361 };
362
363 static char* p5[] = {
364 "小室圭",
365 "bbbbbb",
366 "ドナルド・トランプ",
367 "筑摩書房",
368 "松原",
369 "aaaaaaaaa",
370 "bbbbbbbbb",
371 "ccccc",
372 "ddddddddddd",
373 "eee",
374 "ffffffffffff",
375 "gggggggggg",
376 "hhhhhhhhhhhhhh",
377 "iiiiiii",
378 };
379
380 static char* p6[] = {
381 "^.{1000,}",
382 "松原",
383 "小室圭",
384 "ドナルド・トランプ",
385 "筑摩書房",
386 };
387
388 static char* p7[] = {
389 "0+", "1+", "2+", "3+", "4+", "5+", "6+", "7+", "8+", "9+",
390 };
391
392 static char* p8[] = {"a", ".*"};
393
394 extern int
main(int argc,char * argv[])395 main(int argc, char* argv[])
396 {
397 #ifndef _WIN32
398 int file_exist;
399 #endif
400 int r;
401 char *s, *end;
402 OnigEncoding use_encs[1];
403
404 use_encs[0] = ONIG_ENCODING_UTF8;
405 onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0]));
406
407 srand(12345);
408
409 XX_LEAD = ONIG_REGSET_POSITION_LEAD;
410
411 NZERO(" abab bccab ca");
412 X2(p1, " abab bccab ca", 8, 11);
413 X3(p1, " abab bccab ca", 8, 11, 1);
414 N(p2, " XXXX AAA 1223 012345678bbb");
415 X2(p2, "0123456789", 9, 10);
416 X2(p7, "abcde 555 qwert", 6, 9);
417 X2(p8, "", 0, 0);
418
419 XX_LEAD = ONIG_REGSET_REGEX_LEAD;
420
421 NZERO(" abab bccab ca");
422 X2(p1, " abab bccab ca", 8, 11);
423 X3(p1, " abab bccab ca", 8, 11, 1);
424 N(p2, " XXXX AAA 1223 012345678bbb");
425 X2(p2, "0123456789", 9, 10);
426 X2(p7, "abcde 555 qwert", 6, 9);
427
428 #ifndef _WIN32
429 r = get_all_content_of_file(TEXT_PATH, &s, &end);
430 if (r == 0) {
431 fprintf(stdout, "FILE: %s, size: %d\n", TEXT_PATH, (int )(end - s));
432 file_exist = 1;
433 }
434 else {
435 fprintf(stdout, "Ignore %s\n", TEXT_PATH);
436 file_exist = 0;
437 }
438
439 if (file_exist != 0) {
440 X2(p2, s, 10, 22);
441 X2(p3, s, 496079, 496088);
442 X2(p4, s, 1294, 1315);
443 }
444 #endif
445
446 fprintf(stdout,
447 "\nRESULT SUCC: %4d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n",
448 nsucc, nfail, nerror, onig_version());
449
450 #ifndef _WIN32
451 if (file_exist != 0) {
452 fprintf(stdout, "\n");
453 time_compare(ASIZE(p2), p2, s, end);
454 time_compare(ASIZE(p3), p3, s, end);
455 time_compare(ASIZE(p4), p4, s, end);
456 time_compare(ASIZE(p5), p5, s, end);
457 time_compare(ASIZE(p6), p6, s, end);
458 fprintf(stdout, "\n");
459 free(s);
460 }
461 #endif
462
463 onig_end();
464
465 return ((nfail == 0 && nerror == 0) ? 0 : -1);
466 }
467