1 /*
2 * regset.c
3 * Copyright (c) 2019 K.Kosako
4 */
5 #include <stdio.h>
6 #include <unistd.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <fcntl.h>
12 #include <time.h>
13
14 #include "oniguruma.h"
15
16
17 #define RETRY_LIMIT 5000
18
19 #ifdef STANDALONE
20 //#define CHECK_EACH_REGEX_SEARCH_TIME
21 #endif
22
23 #define MAX_REG_NUM 256
24
25 typedef unsigned char uint8_t;
26 static OnigEncoding ENC;
27
28 static void
output_current_time(FILE * fp)29 output_current_time(FILE* fp)
30 {
31 char d[64];
32 time_t t;
33
34 t = time(NULL);
35 strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t));
36
37 fprintf(fp, "%s", d);
38 }
39
40 #ifdef CHECK_EACH_REGEX_SEARCH_TIME
41 static double
get_sec(struct timespec * ts,struct timespec * te)42 get_sec(struct timespec* ts, struct timespec* te)
43 {
44 double t;
45
46 t = (te->tv_sec - ts->tv_sec) +
47 (double )(te->tv_nsec - ts->tv_nsec) / 1000000000.0;
48 return t;
49 }
50
51 static int
check_each_regex_search_time(OnigRegSet * set,unsigned char * str,unsigned char * end)52 check_each_regex_search_time(OnigRegSet* set, unsigned char* str, unsigned char* end)
53 {
54 int n;
55 int i;
56 int r;
57 OnigRegion* region;
58
59 n = onig_regset_number_of_regex(set);
60 region = onig_region_new();
61
62 for (i = 0; i < n; i++) {
63 regex_t* reg;
64 unsigned char* start;
65 unsigned char* range;
66 struct timespec ts1, ts2;
67 double t;
68
69 reg = onig_regset_get_regex(set, i);
70 start = str;
71 range = end;
72
73 clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts1);
74
75 r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
76
77 clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts2);
78 t = get_sec(&ts1, &ts2);
79
80 fprintf(stdout, "regex search time %d: %6.2lfmsec.\n", i, t * 1000.0);
81 }
82
83 onig_region_free(region, 1);
84 return 0;
85 }
86 #endif
87
88 static int
search(OnigRegSet * set,OnigRegSetLead lead,unsigned char * str,unsigned char * end)89 search(OnigRegSet* set, OnigRegSetLead lead, unsigned char* str, unsigned char* end)
90 {
91 int r;
92 int match_pos;
93 unsigned char *start, *range;
94
95 start = str;
96 range = end;
97 r = onig_regset_search(set, str, end, start, range, lead,
98 ONIG_OPTION_NONE, &match_pos);
99 if (r >= 0) {
100 #ifdef STANDALONE
101 int i;
102 int match_index;
103 OnigRegion* region;
104
105 match_index = r;
106 fprintf(stdout, "match reg index: %d, pos: %d (%s)\n",
107 match_index, match_pos, ONIGENC_NAME(ENC));
108 region = onig_regset_get_region(set, match_index);
109 if (region == 0) {
110 fprintf(stdout, "ERROR: can't get region.\n");
111 return -1;
112 }
113
114 for (i = 0; i < region->num_regs; i++) {
115 fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
116 }
117 #endif
118 }
119 else if (r == ONIG_MISMATCH) {
120 #ifdef STANDALONE
121 fprintf(stdout, "search fail (%s)\n", ONIGENC_NAME(ENC));
122 #endif
123 }
124 else { /* error */
125 #ifdef STANDALONE
126 char s[ONIG_MAX_ERROR_MESSAGE_LEN];
127
128 onig_error_code_to_str((UChar* )s, r);
129 fprintf(stdout, "ERROR: %s\n", s);
130 fprintf(stdout, " (%s)\n", ONIGENC_NAME(ENC));
131 #endif
132 return -1;
133 }
134
135 return 0;
136 }
137
138 static long INPUT_COUNT;
139 static long EXEC_COUNT;
140 static long EXEC_COUNT_INTERVAL;
141 static long REGEX_SUCCESS_COUNT;
142 static long VALID_STRING_COUNT;
143
144 static int
exec(OnigEncoding enc,int reg_num,int init_reg_num,UChar * pat[],UChar * pat_end[],OnigRegSetLead lead,UChar * str,UChar * end)145 exec(OnigEncoding enc, int reg_num, int init_reg_num,
146 UChar* pat[], UChar* pat_end[],
147 OnigRegSetLead lead, UChar* str, UChar* end)
148 {
149 int r;
150 int i, j;
151 OnigRegSet* set;
152 regex_t* reg;
153 OnigOptionType options;
154 OnigErrorInfo einfo;
155 regex_t* regs[MAX_REG_NUM];
156
157 EXEC_COUNT++;
158 EXEC_COUNT_INTERVAL++;
159
160 options = (EXEC_COUNT % 4 == 0) ? ONIG_OPTION_IGNORECASE : ONIG_OPTION_NONE;
161
162 onig_initialize(&enc, 1);
163 onig_set_retry_limit_in_search(RETRY_LIMIT);
164
165 for (i = 0; i < init_reg_num; i++) {
166 r = onig_new(®s[i], pat[i], pat_end[i], options, ENC,
167 ONIG_SYNTAX_DEFAULT, &einfo);
168 if (r != 0) {
169 #ifdef STANDALONE
170 char s[ONIG_MAX_ERROR_MESSAGE_LEN];
171
172 onig_error_code_to_str((UChar* )s, r, &einfo);
173 fprintf(stdout, "ERROR: index: %d, %s\n", i, s);
174 #endif
175
176 for (j = 0; j < i; j++) onig_free(regs[j]);
177
178 onig_end();
179
180 if (r == ONIGERR_PARSER_BUG ||
181 r == ONIGERR_STACK_BUG ||
182 r == ONIGERR_UNDEFINED_BYTECODE ||
183 r == ONIGERR_UNEXPECTED_BYTECODE) {
184 return -2;
185 }
186 else
187 return -1;
188 }
189 }
190
191 r = onig_regset_new(&set, init_reg_num, regs);
192 if (r != 0) {
193 for (i = 0; i < init_reg_num; i++) {
194 onig_free(regs[i]);
195 }
196 onig_end();
197 return -1;
198 }
199
200 for (i = init_reg_num; i < reg_num; i++) {
201 r = onig_new(®, pat[i], pat_end[i], options, ENC,
202 ONIG_SYNTAX_DEFAULT, &einfo);
203 if (r != 0) {
204 #ifdef STANDALONE
205 char s[ONIG_MAX_ERROR_MESSAGE_LEN];
206
207 onig_error_code_to_str((UChar* )s, r, &einfo);
208 fprintf(stdout, "ERROR: index: %d, %s\n", i, s);
209 #endif
210 onig_regset_free(set);
211 onig_end();
212
213 if (r == ONIGERR_PARSER_BUG ||
214 r == ONIGERR_STACK_BUG ||
215 r == ONIGERR_UNDEFINED_BYTECODE ||
216 r == ONIGERR_UNEXPECTED_BYTECODE) {
217 return -2;
218 }
219 else
220 return -1;
221 }
222
223 r = onig_regset_add(set, reg);
224 if (r != 0) {
225 onig_regset_free(set);
226 onig_end();
227 fprintf(stdout, "ERROR: onig_regset_add(): %d\n", i);
228 return r;
229 }
230 }
231
232 REGEX_SUCCESS_COUNT++;
233
234 if (onigenc_is_valid_mbc_string(enc, str, end) != 0) {
235 VALID_STRING_COUNT++;
236 r = search(set, lead, str, end);
237 #ifdef CHECK_EACH_REGEX_SEARCH_TIME
238 r = check_each_regex_search_time(set, str, end);
239 #endif
240 }
241
242 onig_regset_free(set);
243 onig_end();
244 return 0;
245 }
246
247 #define MAX_PATTERN_SIZE 30
248 #define NUM_CONTROL_BYTES 3
249
250 #define EXEC_PRINT_INTERVAL 2000000
251
252 static int MaxRegNum;
253 static int MaxInitRegNum;
254
255 extern int
LLVMFuzzerTestOneInput(const uint8_t * Data,size_t Size)256 LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
257 {
258 int r, i;
259 int pattern_size;
260 unsigned char *str_null_end;
261 size_t remaining_size;
262 unsigned char *data;
263 unsigned int reg_num;
264 unsigned int init_reg_num;
265 unsigned char* pat[256];
266 unsigned char* pat_end[256];
267 int len;
268 unsigned int lead_num;
269 OnigRegSetLead lead;
270
271 INPUT_COUNT++;
272
273 if (Size < NUM_CONTROL_BYTES) return 0;
274
275 remaining_size = Size;
276 data = (unsigned char* )(Data);
277
278 reg_num = data[0];
279 data++;
280 remaining_size--;
281
282 init_reg_num = data[0];
283 data++;
284 remaining_size--;
285
286 lead_num = data[0];
287 data++;
288 remaining_size--;
289 lead = (lead_num % 2 == 0 ? ONIG_REGSET_POSITION_LEAD : ONIG_REGSET_REGEX_LEAD);
290
291 if (remaining_size < reg_num * 2) {
292 reg_num = reg_num % 15; // zero is OK.
293 }
294
295 init_reg_num %= (reg_num + 1);
296
297 if (MaxRegNum < reg_num)
298 MaxRegNum = reg_num;
299
300 if (MaxInitRegNum < init_reg_num)
301 MaxInitRegNum = init_reg_num;
302
303 if (reg_num == 0)
304 pattern_size = 1;
305 else
306 pattern_size = remaining_size / (reg_num * 2);
307
308 if (pattern_size > MAX_PATTERN_SIZE)
309 pattern_size = MAX_PATTERN_SIZE;
310
311 len = pattern_size * reg_num;
312 if (len == 0) len = 1;
313
314 for (i = 0; i < reg_num; i++) {
315 pat[i] = (unsigned char* )malloc(pattern_size);
316 memcpy(pat[i], data, pattern_size);
317 pat_end[i] = pat[i] + pattern_size;
318 data += pattern_size;
319 remaining_size -= pattern_size;
320 }
321
322 unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1);
323 memcpy(str, data, remaining_size);
324 str_null_end = str + remaining_size;
325
326 #ifdef STANDALONE
327 fprintf(stdout, "reg num: %d, pattern size: %d, lead: %s\n",
328 reg_num, pattern_size,
329 lead == ONIG_REGSET_POSITION_LEAD ? "position" : "regex");
330
331 if (reg_num != 0) {
332 unsigned char* p;
333 i = 0;
334 p = pat[0];
335 while (p < pat_end[0]) {
336 fprintf(stdout, " 0x%02x", (int )*p++);
337 i++;
338 if (i % 8 == 0) fprintf(stdout, "\n");
339 }
340 fprintf(stdout, "\n");
341 }
342 #endif
343
344 ENC = ONIG_ENCODING_UTF8;
345
346 r = exec(ENC, reg_num, init_reg_num, pat, pat_end, lead, str, str_null_end);
347
348 for (i = 0; i < reg_num; i++) {
349 free(pat[i]);
350 }
351 free(str);
352
353 if (r == -2) {
354 //output_data("parser-bug", Data, Size);
355 exit(-2);
356 }
357
358 if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) {
359 float fexec, freg, fvalid;
360
361 fexec = (float )EXEC_COUNT / INPUT_COUNT;
362 freg = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT;
363 fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT;
364
365 output_current_time(stdout);
366 fprintf(stdout, ": %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f MAX REG:%d-%d\n",
367 EXEC_COUNT, fexec, freg, fvalid, MaxRegNum, MaxInitRegNum);
368
369 EXEC_COUNT_INTERVAL = 0;
370 }
371 else if (EXEC_COUNT == 1) {
372 output_current_time(stdout);
373 fprintf(stdout, ": ------------ START ------------\n");
374 }
375
376 return r;
377 }
378
379 #ifdef STANDALONE
380
main(int argc,char * argv[])381 extern int main(int argc, char* argv[])
382 {
383 size_t n;
384 uint8_t Data[10000];
385
386 n = read(0, Data, sizeof(Data));
387 fprintf(stdout, "n: %ld\n", n);
388 LLVMFuzzerTestOneInput(Data, n);
389
390 return 0;
391 }
392 #endif /* STANDALONE */
393