1 /*
2  * test_regset.c  --- test for regset API
3  * Copyright (c) 2019  K.Kosako
4  */
5 #include <stdlib.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <time.h>
9 
10 #include "oniguruma.h"
11 
12 static int nsucc  = 0;
13 static int nfail  = 0;
14 static int nerror = 0;
15 
16 
17 static int
make_regset(int line_no,int n,char * pat[],OnigRegSet ** rset,int error_no)18 make_regset(int line_no, int n, char* pat[], OnigRegSet** rset, int error_no)
19 {
20   int r;
21   int i;
22   OnigRegSet* set;
23   regex_t* reg;
24   OnigErrorInfo einfo;
25 
26   *rset = NULL;
27   r = onig_regset_new(&set, 0, NULL);
28   if (r != 0) return r;
29 
30   for (i = 0; i < n; i++) {
31     r = onig_new(&reg, (UChar* )pat[i], (UChar* )(pat[i] + strlen(pat[i])),
32                  ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT,
33                  &einfo);
34     if (r != 0) {
35       char s[ONIG_MAX_ERROR_MESSAGE_LEN];
36 
37       if (error_no == 0) {
38         onig_error_code_to_str((UChar* )s, r, &einfo);
39         fprintf(stderr, "ERROR: %d: %s  /%s/\n", line_no, s, pat[i]);
40         nerror++;
41       }
42       else {
43         if (r == error_no) {
44           fprintf(stdout, "OK(ERROR): %d: /%s/ %d\n", line_no, pat[i], r);
45           nsucc++;
46         }
47         else {
48           fprintf(stdout, "FAIL(ERROR): %d: /%s/ %d, %d\n",
49                   line_no, pat[i], error_no, r);
50           nfail++;
51         }
52       }
53       onig_regset_free(set);
54       return r;
55     }
56 
57     r = onig_regset_add(set, reg);
58     if (r != 0) {
59       onig_regset_free(set);
60       fprintf(stderr, "ERROR: %d: onig_regset_add(): /%s/\n", line_no, pat[i]);
61       nerror++;
62       return r;
63     }
64   }
65 
66   *rset = set;
67   return 0;
68 }
69 
70 static double
get_sec(clock_t start,clock_t end)71 get_sec(clock_t start, clock_t end)
72 {
73   double t;
74 
75   t = (double )(end - start) / CLOCKS_PER_SEC;
76   return t;
77 }
78 
79 /* use clock(), because clock_gettime() doesn't exist in Windows and old Unix. */
80 
81 static int
time_test(int repeat,int n,char * ps[],char * s,char * end,double * rt_set,double * rt_reg)82 time_test(int repeat, int n, char* ps[], char* s, char* end, double* rt_set, double* rt_reg)
83 {
84   int r;
85   int i;
86   int match_pos;
87   OnigRegSet* set;
88   clock_t ts1, ts2;
89   double t_set, t_reg;
90 
91   r = make_regset(0, n, ps, &set, 0);
92   if (r != 0) return r;
93 
94   ts1 = clock();
95   for (i = 0; i < repeat; i++) {
96     r = onig_regset_search(set, (UChar* )s, (UChar* )end, (UChar* )s, (UChar* )end,
97                            ONIG_REGSET_POSITION_LEAD, ONIG_OPTION_NONE, &match_pos);
98     if (r < 0) {
99       fprintf(stderr, "FAIL onig_regset_search(POSITION_LEAD): %d\n", r);
100       onig_regset_free(set);
101       return r;
102     }
103   }
104 
105   ts2 = clock();
106   t_set = get_sec(ts1, ts2);
107 
108   ts1 = clock();
109   for (i = 0; i < repeat; i++) {
110     r = onig_regset_search(set, (UChar* )s, (UChar* )end, (UChar* )s, (UChar* )end,
111                            ONIG_REGSET_REGEX_LEAD, ONIG_OPTION_NONE, &match_pos);
112     if (r < 0) {
113       fprintf(stderr, "FAIL onig_regset_search(REGEX_LEAD): %d\n", r);
114       onig_regset_free(set);
115       return r;
116     }
117   }
118 
119   ts2 = clock();
120   t_reg = get_sec(ts1, ts2);
121 
122   onig_regset_free(set);
123 
124   *rt_set = t_set;
125   *rt_reg = t_reg;
126   return 0;
127 }
128 
129 static void
fisher_yates_shuffle(int n,char * ps[],char * cps[])130 fisher_yates_shuffle(int n, char* ps[], char* cps[])
131 {
132 #define GET_RAND(n)  (rand()%(n+1))
133 #define SWAP(a,b)    { char* tmp = a; a = b; b = tmp; }
134 
135   int i;
136 
137   for (i = 0; i < n; i++)
138     cps[i] = ps[i];
139 
140   for (i = n - 1; i > 0; i--) {
141     int x = GET_RAND(i);
142     SWAP(cps[i], cps[x]);
143   }
144 }
145 
146 static void
time_compare(int n,char * ps[],char * s,char * end)147 time_compare(int n, char* ps[], char* s, char* end)
148 {
149   int r;
150   int i;
151   int repeat;
152   double t_set, t_reg;
153   double total_set, total_reg;
154   char** cps;
155 
156   cps = (char** )malloc(sizeof(char*) * n);
157   if (cps == 0) return ;
158 
159   repeat = 100 / n;
160   total_set = total_reg = 0.0;
161   for (i = 0; i < n; i++) {
162     fisher_yates_shuffle(n, ps, cps);
163     r = time_test(repeat, n, cps, s, end, &t_set, &t_reg);
164     if (r != 0) {
165       free(cps);
166       return ;
167     }
168     total_set += t_set;
169     total_reg += t_reg;
170   }
171 
172   free(cps);
173 
174   fprintf(stdout, "POS lead: %6.2lfmsec.  REG lead: %6.2lfmsec.\n",
175           total_set * 1000.0, total_reg * 1000.0);
176 }
177 
178 
179 static OnigRegSetLead XX_LEAD = ONIG_REGSET_POSITION_LEAD;
180 
181 static void
xx(int line_no,int n,char * ps[],char * s,int from,int to,int mem,int not,int error_no)182 xx(int line_no, int n, char* ps[], char* s, int from, int to, int mem, int not, int error_no)
183 {
184   int r;
185   int match_pos;
186   int match_index;
187   OnigRegSet* set;
188   char *end;
189 
190   r = make_regset(line_no, n, ps, &set, error_no);
191   if (r != 0) return ;
192 
193   end = s + strlen(s);
194 
195   r = onig_regset_search(set, (UChar* )s, (UChar* )end, (UChar* )s, (UChar* )end,
196                          XX_LEAD, ONIG_OPTION_NONE, &match_pos);
197   if (r < 0) {
198     if (r == ONIG_MISMATCH) {
199       if (not) {
200         fprintf(stdout, "OK(N): %d\n", line_no);
201         nsucc++;
202       }
203       else {
204         fprintf(stdout, "FAIL: %d\n", line_no);
205         nfail++;
206       }
207     }
208     else {
209       if (error_no == 0) {
210         char buf[ONIG_MAX_ERROR_MESSAGE_LEN];
211         onig_error_code_to_str((UChar* )buf, r);
212         fprintf(stderr, "ERROR: %d: %s\n", line_no, buf);
213         nerror++;
214       }
215       else {
216         if (r == error_no) {
217           fprintf(stdout, "OK(ERROR): %d: %d\n", line_no, r);
218           nsucc++;
219         }
220         else {
221           fprintf(stdout, "FAIL ERROR NO: %d: %d, %d\n", line_no, error_no, r);
222           nfail++;
223         }
224       }
225     }
226   }
227   else {
228     if (not) {
229       fprintf(stdout, "FAIL(N): %d\n", line_no);
230       nfail++;
231     }
232     else {
233       OnigRegion* region;
234 
235       match_index = r;
236       region = onig_regset_get_region(set, match_index);
237       if (region == 0) {
238         fprintf(stderr, "ERROR: %d: can't get region.\n", line_no);
239         nerror++;
240         onig_regset_free(set);
241         return ;
242       }
243 
244       if (region->beg[mem] == from && region->end[mem] == to) {
245         fprintf(stdout, "OK: %d\n", line_no);
246         nsucc++;
247       }
248       else {
249         char buf[1000];
250         int len;
251         len = region->end[mem] - region->beg[mem];
252         strncpy(buf, s + region->beg[mem], len);
253         buf[len] = '\0';
254         fprintf(stdout, "FAIL: %d: %d-%d : %d-%d (%s)\n", line_no,
255                 from, to, region->beg[mem], region->end[mem], buf);
256         nfail++;
257       }
258     }
259   }
260 
261   onig_regset_free(set);
262 }
263 
264 static void
x2(int line_no,int n,char * ps[],char * s,int from,int to)265 x2(int line_no, int n, char* ps[], char* s, int from, int to)
266 {
267   xx(line_no, n, ps, s, from, to, 0, 0, 0);
268 }
269 
270 static void
x3(int line_no,int n,char * ps[],char * s,int from,int to,int mem)271 x3(int line_no, int n, char* ps[], char* s, int from, int to, int mem)
272 {
273   xx(line_no, n, ps, s, from, to, mem, 0, 0);
274 }
275 
276 static void
n(int line_no,int n,char * ps[],char * s)277 n(int line_no, int n, char* ps[], char* s)
278 {
279   xx(line_no, n, ps, s, 0, 0, 0, 1, 0);
280 }
281 
282 #define ASIZE(a)              sizeof(a)/sizeof(a[0])
283 #define X2(ps,s,from,to)      x2(__LINE__,ASIZE(ps),ps,s,from,to)
284 #define X3(ps,s,from,to,mem)  x3(__LINE__,ASIZE(ps),ps,s,from,to,mem)
285 #define N(ps,s)                n(__LINE__,ASIZE(ps),ps,s)
286 #define NZERO(s)               n(__LINE__,0,(char** )0,s)
287 
288 #ifndef _WIN32
289 
290 /* getdelim() doesn't exist in Windows */
291 
292 static int
get_all_content_of_file(char * path,char ** rs,char ** rend)293 get_all_content_of_file(char* path, char** rs, char** rend)
294 {
295   ssize_t len;
296   size_t n;
297   char* line;
298   FILE* fp;
299 
300   fp = fopen(path, "r");
301   if (fp == 0) return -1;
302 
303   n = 0;
304   line = NULL;
305   len = getdelim(&line, &n, EOF, fp);
306   fclose(fp);
307   if (len < 0) return -2;
308 
309   *rs   = line;
310   *rend = line + len;
311   return 0;
312 }
313 #endif
314 
315 
316 #define TEXT_PATH    "kofu-utf8.txt"
317 
318 /* --- To get kofu.txt ---
319    $ wget https://www.aozora.gr.jp/cards/000148/files/774_ruby_1640.zip
320    $ unzip 774_ruby_1640.zip
321    $ nkf -Lu -w8 kofu.txt > kofu-utf8.txt
322      (convert encoding to utf-8 with BOM and line terminator to be Unix-form)
323 */
324 
325 static char* p1[] = {
326   "abc",
327   "(bca)",
328   "(cab)"
329 };
330 
331 static char* p2[] = {
332   "小説",
333   "9",
334   "夏目漱石",
335 };
336 
337 static char* p3[] = {
338   "^いる。",
339   "^校正",
340   "^底本",
341   "^ 翌日",
342 };
343 
344 static char* p4[] = {
345   "《[^》]{5}》",
346   "《[^》]{6}》",
347   "《[^》]{7}》",
348   "《[^》]{8}》",
349   "《[^》]{9}》",
350   "《[^》]{10}》",
351   "《[^》]{11}》",
352   "《[^》]{12}》",
353   "《[^》]{13}》",
354   "《[^》]{14}》",
355   "《[^》]{15}》",
356   "《[^》]{16}》",
357   "《[^》]{17}》",
358   "《[^》]{18}》",
359   "《[^》]{19}》",
360   "《[^》]{20}》",
361 };
362 
363 static char* p5[] = {
364   "小室圭",
365   "bbbbbb",
366   "ドナルド・トランプ",
367   "筑摩書房",
368   "松原",
369   "aaaaaaaaa",
370   "bbbbbbbbb",
371   "ccccc",
372   "ddddddddddd",
373   "eee",
374   "ffffffffffff",
375   "gggggggggg",
376   "hhhhhhhhhhhhhh",
377   "iiiiiii",
378 };
379 
380 static char* p6[] = {
381   "^.{1000,}",
382   "松原",
383   "小室圭",
384   "ドナルド・トランプ",
385   "筑摩書房",
386 };
387 
388 static char* p7[] = {
389   "0+", "1+", "2+", "3+", "4+", "5+", "6+", "7+", "8+", "9+",
390 };
391 
392 static char* p8[] = {"a", ".*"};
393 
394 extern int
main(int argc,char * argv[])395 main(int argc, char* argv[])
396 {
397 #ifndef _WIN32
398   int file_exist;
399 #endif
400   int r;
401   char *s, *end;
402   OnigEncoding use_encs[1];
403 
404   use_encs[0] = ONIG_ENCODING_UTF8;
405   onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0]));
406 
407   srand(12345);
408 
409   XX_LEAD = ONIG_REGSET_POSITION_LEAD;
410 
411   NZERO(" abab bccab ca");
412   X2(p1, " abab bccab ca", 8, 11);
413   X3(p1, " abab bccab ca", 8, 11, 1);
414   N(p2, " XXXX AAA 1223 012345678bbb");
415   X2(p2, "0123456789", 9, 10);
416   X2(p7, "abcde 555 qwert", 6, 9);
417   X2(p8, "", 0, 0);
418 
419   XX_LEAD = ONIG_REGSET_REGEX_LEAD;
420 
421   NZERO(" abab bccab ca");
422   X2(p1, " abab bccab ca", 8, 11);
423   X3(p1, " abab bccab ca", 8, 11, 1);
424   N(p2, " XXXX AAA 1223 012345678bbb");
425   X2(p2, "0123456789", 9, 10);
426   X2(p7, "abcde 555 qwert", 6, 9);
427 
428 #ifndef _WIN32
429   r = get_all_content_of_file(TEXT_PATH, &s, &end);
430   if (r == 0) {
431     fprintf(stdout, "FILE: %s, size: %d\n", TEXT_PATH, (int )(end - s));
432     file_exist = 1;
433   }
434   else {
435     fprintf(stdout, "Ignore %s\n", TEXT_PATH);
436     file_exist = 0;
437   }
438 
439   if (file_exist != 0) {
440     X2(p2, s, 10, 22);
441     X2(p3, s, 496079, 496088);
442     X2(p4, s, 1294, 1315);
443   }
444 #endif
445 
446   fprintf(stdout,
447           "\nRESULT   SUCC: %4d,  FAIL: %d,  ERROR: %d      (by Oniguruma %s)\n",
448           nsucc, nfail, nerror, onig_version());
449 
450 #ifndef _WIN32
451   if (file_exist != 0) {
452     fprintf(stdout, "\n");
453     time_compare(ASIZE(p2), p2, s, end);
454     time_compare(ASIZE(p3), p3, s, end);
455     time_compare(ASIZE(p4), p4, s, end);
456     time_compare(ASIZE(p5), p5, s, end);
457     time_compare(ASIZE(p6), p6, s, end);
458     fprintf(stdout, "\n");
459     free(s);
460   }
461 #endif
462 
463   onig_end();
464 
465   return ((nfail == 0 && nerror == 0) ? 0 : -1);
466 }
467