1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2020 K.Kosako
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #ifndef ONIG_NO_PRINT
31 #ifndef NEED_TO_INCLUDE_STDIO
32 #define NEED_TO_INCLUDE_STDIO
33 #endif
34 #endif
35
36 #include "regint.h"
37
38 #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
39 ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
40
41 #ifdef USE_CRNL_AS_LINE_TERMINATOR
42 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
43 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
44 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
45 #endif
46
47 #define CHECK_INTERRUPT_IN_MATCH
48
49 #define STACK_MEM_START(reg, idx) \
50 (MEM_STATUS_AT((reg)->push_mem_start, (idx)) != 0 ? \
51 STACK_AT(mem_start_stk[idx].i)->u.mem.pstr : mem_start_stk[idx].s)
52
53 #define STACK_MEM_END(reg, idx) \
54 (MEM_STATUS_AT((reg)->push_mem_end, (idx)) != 0 ? \
55 STACK_AT(mem_end_stk[idx].i)->u.mem.pstr : mem_end_stk[idx].s)
56
57 static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high);
58
59 static int
60 search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, /* match range */ const UChar* data_range, /* subject string range */ OnigRegion* region, OnigOptionType option, OnigMatchParam* mp);
61
62
63 #ifdef USE_CALLOUT
64 typedef struct {
65 int last_match_at_call_counter;
66 struct {
67 OnigType type;
68 OnigValue val;
69 } slot[ONIG_CALLOUT_DATA_SLOT_NUM];
70 } CalloutData;
71 #endif
72
73 struct OnigMatchParamStruct {
74 unsigned int match_stack_limit;
75 #ifdef USE_RETRY_LIMIT
76 unsigned long retry_limit_in_match;
77 unsigned long retry_limit_in_search;
78 #endif
79 #ifdef USE_CALLOUT
80 OnigCalloutFunc progress_callout_of_contents;
81 OnigCalloutFunc retraction_callout_of_contents;
82 int match_at_call_counter;
83 void* callout_user_data;
84 CalloutData* callout_data;
85 int callout_data_alloc_num;
86 #endif
87 };
88
89 extern int
onig_set_match_stack_limit_size_of_match_param(OnigMatchParam * param,unsigned int limit)90 onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param,
91 unsigned int limit)
92 {
93 param->match_stack_limit = limit;
94 return ONIG_NORMAL;
95 }
96
97 extern int
onig_set_retry_limit_in_match_of_match_param(OnigMatchParam * param,unsigned long limit)98 onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param,
99 unsigned long limit)
100 {
101 #ifdef USE_RETRY_LIMIT
102 param->retry_limit_in_match = limit;
103 return ONIG_NORMAL;
104 #else
105 return ONIG_NO_SUPPORT_CONFIG;
106 #endif
107 }
108
109 extern int
onig_set_retry_limit_in_search_of_match_param(OnigMatchParam * param,unsigned long limit)110 onig_set_retry_limit_in_search_of_match_param(OnigMatchParam* param,
111 unsigned long limit)
112 {
113 #ifdef USE_RETRY_LIMIT
114 param->retry_limit_in_search = limit;
115 return ONIG_NORMAL;
116 #else
117 return ONIG_NO_SUPPORT_CONFIG;
118 #endif
119 }
120
121 extern int
onig_set_progress_callout_of_match_param(OnigMatchParam * param,OnigCalloutFunc f)122 onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
123 {
124 #ifdef USE_CALLOUT
125 param->progress_callout_of_contents = f;
126 return ONIG_NORMAL;
127 #else
128 return ONIG_NO_SUPPORT_CONFIG;
129 #endif
130 }
131
132 extern int
onig_set_retraction_callout_of_match_param(OnigMatchParam * param,OnigCalloutFunc f)133 onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
134 {
135 #ifdef USE_CALLOUT
136 param->retraction_callout_of_contents = f;
137 return ONIG_NORMAL;
138 #else
139 return ONIG_NO_SUPPORT_CONFIG;
140 #endif
141 }
142
143 extern int
onig_set_callout_user_data_of_match_param(OnigMatchParam * param,void * user_data)144 onig_set_callout_user_data_of_match_param(OnigMatchParam* param, void* user_data)
145 {
146 #ifdef USE_CALLOUT
147 param->callout_user_data = user_data;
148 return ONIG_NORMAL;
149 #else
150 return ONIG_NO_SUPPORT_CONFIG;
151 #endif
152 }
153
154
155 typedef struct {
156 void* stack_p;
157 int stack_n;
158 OnigOptionType options;
159 OnigRegion* region;
160 int ptr_num;
161 const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
162 unsigned int match_stack_limit;
163 #ifdef USE_RETRY_LIMIT
164 unsigned long retry_limit_in_match;
165 unsigned long retry_limit_in_search;
166 unsigned long retry_limit_in_search_counter;
167 #endif
168 OnigMatchParam* mp;
169 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
170 int best_len; /* for ONIG_OPTION_FIND_LONGEST */
171 UChar* best_s;
172 #endif
173 #ifdef USE_CALL
174 unsigned long subexp_call_in_search_counter;
175 #endif
176 } MatchArg;
177
178
179 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
180
181 /* arguments type */
182 typedef enum {
183 ARG_SPECIAL = -1,
184 ARG_NON = 0,
185 ARG_RELADDR = 1,
186 ARG_ABSADDR = 2,
187 ARG_LENGTH = 3,
188 ARG_MEMNUM = 4,
189 ARG_OPTION = 5,
190 ARG_MODE = 6
191 } OpArgType;
192
193 typedef struct {
194 short int opcode;
195 char* name;
196 } OpInfoType;
197
198 static OpInfoType OpInfo[] = {
199 { OP_FINISH, "finish"},
200 { OP_END, "end"},
201 { OP_STR_1, "str_1"},
202 { OP_STR_2, "str_2"},
203 { OP_STR_3, "str_3"},
204 { OP_STR_4, "str_4"},
205 { OP_STR_5, "str_5"},
206 { OP_STR_N, "str_n"},
207 { OP_STR_MB2N1, "str_mb2-n1"},
208 { OP_STR_MB2N2, "str_mb2-n2"},
209 { OP_STR_MB2N3, "str_mb2-n3"},
210 { OP_STR_MB2N, "str_mb2-n"},
211 { OP_STR_MB3N, "str_mb3n"},
212 { OP_STR_MBN, "str_mbn"},
213 { OP_CCLASS, "cclass"},
214 { OP_CCLASS_MB, "cclass-mb"},
215 { OP_CCLASS_MIX, "cclass-mix"},
216 { OP_CCLASS_NOT, "cclass-not"},
217 { OP_CCLASS_MB_NOT, "cclass-mb-not"},
218 { OP_CCLASS_MIX_NOT, "cclass-mix-not"},
219 { OP_ANYCHAR, "anychar"},
220 { OP_ANYCHAR_ML, "anychar-ml"},
221 { OP_ANYCHAR_STAR, "anychar*"},
222 { OP_ANYCHAR_ML_STAR, "anychar-ml*"},
223 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next"},
224 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next"},
225 { OP_WORD, "word"},
226 { OP_WORD_ASCII, "word-ascii"},
227 { OP_NO_WORD, "not-word"},
228 { OP_NO_WORD_ASCII, "not-word-ascii"},
229 { OP_WORD_BOUNDARY, "word-boundary"},
230 { OP_NO_WORD_BOUNDARY, "not-word-boundary"},
231 { OP_WORD_BEGIN, "word-begin"},
232 { OP_WORD_END, "word-end"},
233 { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary"},
234 { OP_BEGIN_BUF, "begin-buf"},
235 { OP_END_BUF, "end-buf"},
236 { OP_BEGIN_LINE, "begin-line"},
237 { OP_END_LINE, "end-line"},
238 { OP_SEMI_END_BUF, "semi-end-buf"},
239 { OP_CHECK_POSITION, "check-position"},
240 { OP_BACKREF1, "backref1"},
241 { OP_BACKREF2, "backref2"},
242 { OP_BACKREF_N, "backref-n"},
243 { OP_BACKREF_N_IC, "backref-n-ic"},
244 { OP_BACKREF_MULTI, "backref_multi"},
245 { OP_BACKREF_MULTI_IC, "backref_multi-ic"},
246 { OP_BACKREF_WITH_LEVEL, "backref_with_level"},
247 { OP_BACKREF_WITH_LEVEL_IC, "backref_with_level-c"},
248 { OP_BACKREF_CHECK, "backref_check"},
249 { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level"},
250 { OP_MEM_START_PUSH, "mem-start-push"},
251 { OP_MEM_START, "mem-start"},
252 { OP_MEM_END_PUSH, "mem-end-push"},
253 #ifdef USE_CALL
254 { OP_MEM_END_PUSH_REC, "mem-end-push-rec"},
255 #endif
256 { OP_MEM_END, "mem-end"},
257 #ifdef USE_CALL
258 { OP_MEM_END_REC, "mem-end-rec"},
259 #endif
260 { OP_FAIL, "fail"},
261 { OP_JUMP, "jump"},
262 { OP_PUSH, "push"},
263 { OP_PUSH_SUPER, "push-super"},
264 { OP_POP, "pop"},
265 { OP_POP_TO_MARK, "pop-to-mark"},
266 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
267 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1"},
268 #endif
269 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next"},
270 { OP_REPEAT, "repeat"},
271 { OP_REPEAT_NG, "repeat-ng"},
272 { OP_REPEAT_INC, "repeat-inc"},
273 { OP_REPEAT_INC_NG, "repeat-inc-ng"},
274 { OP_EMPTY_CHECK_START, "empty-check-start"},
275 { OP_EMPTY_CHECK_END, "empty-check-end"},
276 { OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst"},
277 #ifdef USE_CALL
278 { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push"},
279 #endif
280 { OP_MOVE, "move"},
281 { OP_STEP_BACK_START, "step-back-start"},
282 { OP_STEP_BACK_NEXT, "step-back-next"},
283 { OP_CUT_TO_MARK, "cut-to-mark"},
284 { OP_MARK, "mark"},
285 { OP_SAVE_VAL, "save-val"},
286 { OP_UPDATE_VAR, "update-var"},
287 #ifdef USE_CALL
288 { OP_CALL, "call"},
289 { OP_RETURN, "return"},
290 #endif
291 #ifdef USE_CALLOUT
292 { OP_CALLOUT_CONTENTS, "callout-contents"},
293 { OP_CALLOUT_NAME, "callout-name"},
294 #endif
295 { -1, ""}
296 };
297
298 static char*
op2name(int opcode)299 op2name(int opcode)
300 {
301 int i;
302
303 for (i = 0; OpInfo[i].opcode >= 0; i++) {
304 if (opcode == OpInfo[i].opcode) return OpInfo[i].name;
305 }
306
307 return "";
308 }
309
310 static void
p_after_op(FILE * f)311 p_after_op(FILE* f)
312 {
313 fputs(" ", f);
314 }
315
316 static void
p_string(FILE * f,int len,UChar * s)317 p_string(FILE* f, int len, UChar* s)
318 {
319 while (len-- > 0) { fputc(*s++, f); }
320 }
321
322 static void
p_len_string(FILE * f,LengthType len,int mb_len,UChar * s)323 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
324 {
325 int x = len * mb_len;
326
327 fprintf(f, "len:%d ", len);
328 while (x-- > 0) { fputc(*s++, f); }
329 }
330
331 static void
p_rel_addr(FILE * f,RelAddrType rel_addr,Operation * p,Operation * start)332 p_rel_addr(FILE* f, RelAddrType rel_addr, Operation* p, Operation* start)
333 {
334 char* flag;
335 char* space1;
336 char* space2;
337 RelAddrType curr;
338 AbsAddrType abs_addr;
339
340 curr = (RelAddrType )(p - start);
341 abs_addr = curr + rel_addr;
342
343 flag = rel_addr < 0 ? "" : "+";
344 space1 = rel_addr < 10 ? " " : "";
345 space2 = abs_addr < 10 ? " " : "";
346
347 fprintf(f, "%s%s%d => %s%d", space1, flag, rel_addr, space2, abs_addr);
348 }
349
350 static int
bitset_on_num(BitSetRef bs)351 bitset_on_num(BitSetRef bs)
352 {
353 int i, n;
354
355 n = 0;
356 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
357 if (BITSET_AT(bs, i)) n++;
358 }
359
360 return n;
361 }
362
363
364 #ifdef USE_DIRECT_THREADED_CODE
365 #define GET_OPCODE(reg,index) (reg)->ocs[index]
366 #else
367 #define GET_OPCODE(reg,index) (reg)->ops[index].opcode
368 #endif
369
370 static void
print_compiled_byte_code(FILE * f,regex_t * reg,int index,Operation * start,OnigEncoding enc)371 print_compiled_byte_code(FILE* f, regex_t* reg, int index,
372 Operation* start, OnigEncoding enc)
373 {
374 static char* SaveTypeNames[] = {
375 "KEEP",
376 "S",
377 "RIGHT_RANGE"
378 };
379
380 static char* UpdateVarTypeNames[] = {
381 "KEEP_FROM_STACK_LAST",
382 "S_FROM_STACK",
383 "RIGHT_RANGE_FROM_STACK",
384 "RIGHT_RANGE_FROM_S_STACK",
385 "RIGHT_RANGE_TO_S",
386 "RIGHT_RANGE_INIT"
387 };
388
389 int i, n;
390 RelAddrType addr;
391 LengthType len;
392 MemNumType mem;
393 OnigCodePoint code;
394 ModeType mode;
395 UChar *q;
396 Operation* p;
397 enum OpCode opcode;
398
399 p = reg->ops + index;
400
401 opcode = GET_OPCODE(reg, index);
402
403 fprintf(f, "%s", op2name(opcode));
404 p_after_op(f);
405
406 switch (opcode) {
407 case OP_STR_1:
408 p_string(f, 1, p->exact.s); break;
409 case OP_STR_2:
410 p_string(f, 2, p->exact.s); break;
411 case OP_STR_3:
412 p_string(f, 3, p->exact.s); break;
413 case OP_STR_4:
414 p_string(f, 4, p->exact.s); break;
415 case OP_STR_5:
416 p_string(f, 5, p->exact.s); break;
417 case OP_STR_N:
418 len = p->exact_n.n;
419 p_string(f, len, p->exact_n.s); break;
420 case OP_STR_MB2N1:
421 p_string(f, 2, p->exact.s); break;
422 case OP_STR_MB2N2:
423 p_string(f, 4, p->exact.s); break;
424 case OP_STR_MB2N3:
425 p_string(f, 3, p->exact.s); break;
426 case OP_STR_MB2N:
427 len = p->exact_n.n;
428 p_len_string(f, len, 2, p->exact_n.s); break;
429 case OP_STR_MB3N:
430 len = p->exact_n.n;
431 p_len_string(f, len, 3, p->exact_n.s); break;
432 case OP_STR_MBN:
433 {
434 int mb_len;
435
436 mb_len = p->exact_len_n.len;
437 len = p->exact_len_n.n;
438 q = p->exact_len_n.s;
439 fprintf(f, "mblen:%d len:%d ", mb_len, len);
440 n = len * mb_len;
441 while (n-- > 0) { fputc(*q++, f); }
442 }
443 break;
444
445 case OP_CCLASS:
446 case OP_CCLASS_NOT:
447 n = bitset_on_num(p->cclass.bsp);
448 fprintf(f, "n:%d", n);
449 break;
450 case OP_CCLASS_MB:
451 case OP_CCLASS_MB_NOT:
452 {
453 OnigCodePoint ncode;
454 OnigCodePoint* codes;
455
456 codes = (OnigCodePoint* )p->cclass_mb.mb;
457 GET_CODE_POINT(ncode, codes);
458 codes++;
459 GET_CODE_POINT(code, codes);
460 fprintf(f, "n:%d code:0x%x", ncode, code);
461 }
462 break;
463 case OP_CCLASS_MIX:
464 case OP_CCLASS_MIX_NOT:
465 {
466 OnigCodePoint ncode;
467 OnigCodePoint* codes;
468
469 codes = (OnigCodePoint* )p->cclass_mix.mb;
470 n = bitset_on_num(p->cclass_mix.bsp);
471
472 GET_CODE_POINT(ncode, codes);
473 codes++;
474 GET_CODE_POINT(code, codes);
475 fprintf(f, "nsg:%d code:%u nmb:%u", n, code, ncode);
476 }
477 break;
478
479 case OP_ANYCHAR_STAR_PEEK_NEXT:
480 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
481 p_string(f, 1, &(p->anychar_star_peek_next.c));
482 break;
483
484 case OP_WORD_BOUNDARY:
485 case OP_NO_WORD_BOUNDARY:
486 case OP_WORD_BEGIN:
487 case OP_WORD_END:
488 mode = p->word_boundary.mode;
489 fprintf(f, "mode:%d", mode);
490 break;
491
492 case OP_BACKREF_N:
493 case OP_BACKREF_N_IC:
494 mem = p->backref_n.n1;
495 fprintf(f, "n:%d", mem);
496 break;
497 case OP_BACKREF_MULTI_IC:
498 case OP_BACKREF_MULTI:
499 case OP_BACKREF_CHECK:
500 n = p->backref_general.num;
501 fprintf(f, "n:%d ", n);
502 for (i = 0; i < n; i++) {
503 mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
504 if (i > 0) fputs(", ", f);
505 fprintf(f, "%d", mem);
506 }
507 break;
508 case OP_BACKREF_WITH_LEVEL:
509 case OP_BACKREF_WITH_LEVEL_IC:
510 case OP_BACKREF_CHECK_WITH_LEVEL:
511 {
512 LengthType level;
513
514 level = p->backref_general.nest_level;
515 fprintf(f, "level:%d ", level);
516 n = p->backref_general.num;
517 for (i = 0; i < n; i++) {
518 mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
519 if (i > 0) fputs(", ", f);
520 fprintf(f, "%d", mem);
521 }
522 }
523 break;
524
525 case OP_MEM_START:
526 case OP_MEM_START_PUSH:
527 mem = p->memory_start.num;
528 fprintf(f, "mem:%d", mem);
529 break;
530
531 case OP_MEM_END:
532 case OP_MEM_END_PUSH:
533 #ifdef USE_CALL
534 case OP_MEM_END_REC:
535 case OP_MEM_END_PUSH_REC:
536 #endif
537 mem = p->memory_end.num;
538 fprintf(f, "mem:%d", mem);
539 break;
540
541 case OP_JUMP:
542 addr = p->jump.addr;
543 p_rel_addr(f, addr, p, start);
544 break;
545
546 case OP_PUSH:
547 case OP_PUSH_SUPER:
548 addr = p->push.addr;
549 p_rel_addr(f, addr, p, start);
550 break;
551
552 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
553 case OP_PUSH_OR_JUMP_EXACT1:
554 addr = p->push_or_jump_exact1.addr;
555 p_rel_addr(f, addr, p, start);
556 fprintf(f, " c:");
557 p_string(f, 1, &(p->push_or_jump_exact1.c));
558 break;
559 #endif
560
561 case OP_PUSH_IF_PEEK_NEXT:
562 addr = p->push_if_peek_next.addr;
563 p_rel_addr(f, addr, p, start);
564 fprintf(f, " c:");
565 p_string(f, 1, &(p->push_if_peek_next.c));
566 break;
567
568 case OP_REPEAT:
569 case OP_REPEAT_NG:
570 mem = p->repeat.id;
571 addr = p->repeat.addr;
572 fprintf(f, "id:%d ", mem);
573 p_rel_addr(f, addr, p, start);
574 break;
575
576 case OP_REPEAT_INC:
577 case OP_REPEAT_INC_NG:
578 mem = p->repeat.id;
579 fprintf(f, "id:%d", mem);
580 break;
581
582 case OP_EMPTY_CHECK_START:
583 mem = p->empty_check_start.mem;
584 fprintf(f, "id:%d", mem);
585 break;
586 case OP_EMPTY_CHECK_END:
587 case OP_EMPTY_CHECK_END_MEMST:
588 #ifdef USE_CALL
589 case OP_EMPTY_CHECK_END_MEMST_PUSH:
590 #endif
591 mem = p->empty_check_end.mem;
592 fprintf(f, "id:%d", mem);
593 break;
594
595 #ifdef USE_CALL
596 case OP_CALL:
597 addr = p->call.addr;
598 fprintf(f, "=> %d", addr);
599 break;
600 #endif
601
602 case OP_MOVE:
603 fprintf(f, "n:%d", p->move.n);
604 break;
605
606 case OP_STEP_BACK_START:
607 addr = p->step_back_start.addr;
608 fprintf(f, "init:%d rem:%d ",
609 p->step_back_start.initial,
610 p->step_back_start.remaining);
611 p_rel_addr(f, addr, p, start);
612 break;
613
614 case OP_POP_TO_MARK:
615 mem = p->pop_to_mark.id;
616 fprintf(f, "id:%d", mem);
617 break;
618
619 case OP_CUT_TO_MARK:
620 {
621 int restore;
622
623 mem = p->cut_to_mark.id;
624 restore = p->cut_to_mark.restore_pos;
625 fprintf(f, "id:%d restore:%d", mem, restore);
626 }
627 break;
628
629 case OP_MARK:
630 {
631 int save;
632
633 mem = p->mark.id;
634 save = p->mark.save_pos;
635 fprintf(f, "id:%d save:%d", mem, save);
636 }
637 break;
638
639 case OP_SAVE_VAL:
640 {
641 SaveType type;
642
643 type = p->save_val.type;
644 mem = p->save_val.id;
645 fprintf(f, "%s id:%d", SaveTypeNames[type], mem);
646 }
647 break;
648
649 case OP_UPDATE_VAR:
650 {
651 UpdateVarType type;
652 int clear;
653
654 type = p->update_var.type;
655 mem = p->update_var.id;
656 clear = p->update_var.clear;
657 fprintf(f, "%s id:%d", UpdateVarTypeNames[type], mem);
658 if (type == UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK ||
659 type == UPDATE_VAR_RIGHT_RANGE_FROM_STACK)
660 fprintf(f, " clear:%d", clear);
661 }
662 break;
663
664 #ifdef USE_CALLOUT
665 case OP_CALLOUT_CONTENTS:
666 mem = p->callout_contents.num;
667 fprintf(f, "num:%d", mem);
668 break;
669
670 case OP_CALLOUT_NAME:
671 {
672 int id;
673
674 id = p->callout_name.id;
675 mem = p->callout_name.num;
676 fprintf(f, "id:%d num:%d", id, mem);
677 }
678 break;
679 #endif
680
681 case OP_TEXT_SEGMENT_BOUNDARY:
682 if (p->text_segment_boundary.not != 0)
683 fprintf(f, " not");
684 break;
685
686 case OP_CHECK_POSITION:
687 switch (p->check_position.type) {
688 case CHECK_POSITION_SEARCH_START:
689 fprintf(f, "search-start"); break;
690 case CHECK_POSITION_CURRENT_RIGHT_RANGE:
691 fprintf(f, "current-right-range"); break;
692 default:
693 break;
694 };
695 break;
696
697 case OP_FINISH:
698 case OP_END:
699 case OP_ANYCHAR:
700 case OP_ANYCHAR_ML:
701 case OP_ANYCHAR_STAR:
702 case OP_ANYCHAR_ML_STAR:
703 case OP_WORD:
704 case OP_WORD_ASCII:
705 case OP_NO_WORD:
706 case OP_NO_WORD_ASCII:
707 case OP_BEGIN_BUF:
708 case OP_END_BUF:
709 case OP_BEGIN_LINE:
710 case OP_END_LINE:
711 case OP_SEMI_END_BUF:
712 case OP_BACKREF1:
713 case OP_BACKREF2:
714 case OP_FAIL:
715 case OP_POP:
716 case OP_STEP_BACK_NEXT:
717 #ifdef USE_CALL
718 case OP_RETURN:
719 #endif
720 break;
721
722 default:
723 fprintf(DBGFP, "print_compiled_byte_code: undefined code %d\n", opcode);
724 break;
725 }
726 }
727 #endif /* defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) */
728
729 #ifdef ONIG_DEBUG_COMPILE
730 extern void
onig_print_compiled_byte_code_list(FILE * f,regex_t * reg)731 onig_print_compiled_byte_code_list(FILE* f, regex_t* reg)
732 {
733 Operation* bp;
734 Operation* start = reg->ops;
735 Operation* end = reg->ops + reg->ops_used;
736
737 fprintf(f, "push_mem_start: 0x%x, push_mem_end: 0x%x\n",
738 reg->push_mem_start, reg->push_mem_end);
739 fprintf(f, "code-length: %d\n", reg->ops_used);
740
741 bp = start;
742 while (bp < end) {
743 int pos = bp - start;
744
745 fprintf(f, "%4d: ", pos);
746 print_compiled_byte_code(f, reg, pos, start, reg->enc);
747 fprintf(f, "\n");
748 bp++;
749 }
750 fprintf(f, "\n");
751 }
752 #endif
753
754
755 #ifdef USE_CAPTURE_HISTORY
756 static void history_tree_free(OnigCaptureTreeNode* node);
757
758 static void
history_tree_clear(OnigCaptureTreeNode * node)759 history_tree_clear(OnigCaptureTreeNode* node)
760 {
761 int i;
762
763 if (IS_NULL(node)) return ;
764
765 for (i = 0; i < node->num_childs; i++) {
766 if (IS_NOT_NULL(node->childs[i])) {
767 history_tree_free(node->childs[i]);
768 }
769 }
770 for (i = 0; i < node->allocated; i++) {
771 node->childs[i] = (OnigCaptureTreeNode* )0;
772 }
773 node->num_childs = 0;
774 node->beg = ONIG_REGION_NOTPOS;
775 node->end = ONIG_REGION_NOTPOS;
776 node->group = -1;
777 }
778
779 static void
history_tree_free(OnigCaptureTreeNode * node)780 history_tree_free(OnigCaptureTreeNode* node)
781 {
782 history_tree_clear(node);
783 if (IS_NOT_NULL(node->childs)) xfree(node->childs);
784
785 xfree(node);
786 }
787
788 static void
history_root_free(OnigRegion * r)789 history_root_free(OnigRegion* r)
790 {
791 if (IS_NULL(r->history_root)) return ;
792
793 history_tree_free(r->history_root);
794 r->history_root = (OnigCaptureTreeNode* )0;
795 }
796
797 static OnigCaptureTreeNode*
history_node_new(void)798 history_node_new(void)
799 {
800 OnigCaptureTreeNode* node;
801
802 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
803 CHECK_NULL_RETURN(node);
804
805 node->childs = (OnigCaptureTreeNode** )0;
806 node->allocated = 0;
807 node->num_childs = 0;
808 node->group = -1;
809 node->beg = ONIG_REGION_NOTPOS;
810 node->end = ONIG_REGION_NOTPOS;
811
812 return node;
813 }
814
815 static int
history_tree_add_child(OnigCaptureTreeNode * parent,OnigCaptureTreeNode * child)816 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
817 {
818 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
819
820 if (parent->num_childs >= parent->allocated) {
821 int n, i;
822
823 if (IS_NULL(parent->childs)) {
824 n = HISTORY_TREE_INIT_ALLOC_SIZE;
825 parent->childs =
826 (OnigCaptureTreeNode** )xmalloc(sizeof(parent->childs[0]) * n);
827 }
828 else {
829 n = parent->allocated * 2;
830 parent->childs =
831 (OnigCaptureTreeNode** )xrealloc(parent->childs,
832 sizeof(parent->childs[0]) * n);
833 }
834 CHECK_NULL_RETURN_MEMERR(parent->childs);
835 for (i = parent->allocated; i < n; i++) {
836 parent->childs[i] = (OnigCaptureTreeNode* )0;
837 }
838 parent->allocated = n;
839 }
840
841 parent->childs[parent->num_childs] = child;
842 parent->num_childs++;
843 return 0;
844 }
845
846 static OnigCaptureTreeNode*
history_tree_clone(OnigCaptureTreeNode * node)847 history_tree_clone(OnigCaptureTreeNode* node)
848 {
849 int i;
850 OnigCaptureTreeNode *clone, *child;
851
852 clone = history_node_new();
853 CHECK_NULL_RETURN(clone);
854
855 clone->beg = node->beg;
856 clone->end = node->end;
857 for (i = 0; i < node->num_childs; i++) {
858 child = history_tree_clone(node->childs[i]);
859 if (IS_NULL(child)) {
860 history_tree_free(clone);
861 return (OnigCaptureTreeNode* )0;
862 }
863 history_tree_add_child(clone, child);
864 }
865
866 return clone;
867 }
868
869 extern OnigCaptureTreeNode*
onig_get_capture_tree(OnigRegion * region)870 onig_get_capture_tree(OnigRegion* region)
871 {
872 return region->history_root;
873 }
874 #endif /* USE_CAPTURE_HISTORY */
875
876 extern void
onig_region_clear(OnigRegion * region)877 onig_region_clear(OnigRegion* region)
878 {
879 int i;
880
881 for (i = 0; i < region->num_regs; i++) {
882 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
883 }
884 #ifdef USE_CAPTURE_HISTORY
885 history_root_free(region);
886 #endif
887 }
888
889 extern int
onig_region_resize(OnigRegion * region,int n)890 onig_region_resize(OnigRegion* region, int n)
891 {
892 region->num_regs = n;
893
894 if (n < ONIG_NREGION)
895 n = ONIG_NREGION;
896
897 if (region->allocated == 0) {
898 region->beg = (int* )xmalloc(n * sizeof(int));
899 region->end = (int* )xmalloc(n * sizeof(int));
900
901 if (region->beg == 0 || region->end == 0)
902 return ONIGERR_MEMORY;
903
904 region->allocated = n;
905 }
906 else if (region->allocated < n) {
907 region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
908 region->end = (int* )xrealloc(region->end, n * sizeof(int));
909
910 if (region->beg == 0 || region->end == 0)
911 return ONIGERR_MEMORY;
912
913 region->allocated = n;
914 }
915
916 return 0;
917 }
918
919 static int
onig_region_resize_clear(OnigRegion * region,int n)920 onig_region_resize_clear(OnigRegion* region, int n)
921 {
922 int r;
923
924 r = onig_region_resize(region, n);
925 if (r != 0) return r;
926 onig_region_clear(region);
927 return 0;
928 }
929
930 extern int
onig_region_set(OnigRegion * region,int at,int beg,int end)931 onig_region_set(OnigRegion* region, int at, int beg, int end)
932 {
933 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
934
935 if (at >= region->allocated) {
936 int r = onig_region_resize(region, at + 1);
937 if (r < 0) return r;
938 }
939
940 region->beg[at] = beg;
941 region->end[at] = end;
942 return 0;
943 }
944
945 extern void
onig_region_init(OnigRegion * region)946 onig_region_init(OnigRegion* region)
947 {
948 region->num_regs = 0;
949 region->allocated = 0;
950 region->beg = (int* )0;
951 region->end = (int* )0;
952 region->history_root = (OnigCaptureTreeNode* )0;
953 }
954
955 extern OnigRegion*
onig_region_new(void)956 onig_region_new(void)
957 {
958 OnigRegion* r;
959
960 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
961 CHECK_NULL_RETURN(r);
962 onig_region_init(r);
963 return r;
964 }
965
966 extern void
onig_region_free(OnigRegion * r,int free_self)967 onig_region_free(OnigRegion* r, int free_self)
968 {
969 if (r != 0) {
970 if (r->allocated > 0) {
971 if (r->beg) xfree(r->beg);
972 if (r->end) xfree(r->end);
973 r->allocated = 0;
974 }
975 #ifdef USE_CAPTURE_HISTORY
976 history_root_free(r);
977 #endif
978 if (free_self) xfree(r);
979 }
980 }
981
982 extern void
onig_region_copy(OnigRegion * to,OnigRegion * from)983 onig_region_copy(OnigRegion* to, OnigRegion* from)
984 {
985 #define RREGC_SIZE (sizeof(int) * from->num_regs)
986 int i;
987
988 if (to == from) return;
989
990 if (to->allocated == 0) {
991 if (from->num_regs > 0) {
992 to->beg = (int* )xmalloc(RREGC_SIZE);
993 if (IS_NULL(to->beg)) return;
994 to->end = (int* )xmalloc(RREGC_SIZE);
995 if (IS_NULL(to->end)) return;
996 to->allocated = from->num_regs;
997 }
998 }
999 else if (to->allocated < from->num_regs) {
1000 to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
1001 if (IS_NULL(to->beg)) return;
1002 to->end = (int* )xrealloc(to->end, RREGC_SIZE);
1003 if (IS_NULL(to->end)) return;
1004 to->allocated = from->num_regs;
1005 }
1006
1007 for (i = 0; i < from->num_regs; i++) {
1008 to->beg[i] = from->beg[i];
1009 to->end[i] = from->end[i];
1010 }
1011 to->num_regs = from->num_regs;
1012
1013 #ifdef USE_CAPTURE_HISTORY
1014 history_root_free(to);
1015
1016 if (IS_NOT_NULL(from->history_root)) {
1017 to->history_root = history_tree_clone(from->history_root);
1018 }
1019 #endif
1020 }
1021
1022 #ifdef USE_CALLOUT
1023 #define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \
1024 args.in = (ain);\
1025 args.name_id = (aname_id);\
1026 args.num = anum;\
1027 args.regex = reg;\
1028 args.string = str;\
1029 args.string_end = end;\
1030 args.start = sstart;\
1031 args.right_range = right_range;\
1032 args.current = s;\
1033 args.retry_in_match_counter = retry_in_match_counter;\
1034 args.msa = msa;\
1035 args.stk_base = stk_base;\
1036 args.stk = stk;\
1037 args.mem_start_stk = mem_start_stk;\
1038 args.mem_end_stk = mem_end_stk;\
1039 result = (func)(&args, user);\
1040 } while (0)
1041
1042 #define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\
1043 int result;\
1044 OnigCalloutArgs args;\
1045 CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\
1046 switch (result) {\
1047 case ONIG_CALLOUT_FAIL:\
1048 case ONIG_CALLOUT_SUCCESS:\
1049 break;\
1050 default:\
1051 if (result > 0) {\
1052 result = ONIGERR_INVALID_ARGUMENT;\
1053 }\
1054 best_len = result;\
1055 goto match_at_end;\
1056 break;\
1057 }\
1058 } while(0)
1059 #endif
1060
1061
1062 /** stack **/
1063 #define STK_ALT_FLAG 0x0001
1064
1065 /* stack type */
1066 /* used by normal-POP */
1067 #define STK_SUPER_ALT STK_ALT_FLAG
1068 #define STK_ALT (0x0002 | STK_ALT_FLAG)
1069
1070 /* handled by normal-POP */
1071 #define STK_MEM_START 0x0010
1072 #define STK_MEM_END 0x8030
1073 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1074 #define STK_REPEAT_INC (0x0040 | STK_MASK_POP_HANDLED)
1075 #else
1076 #define STK_REPEAT_INC 0x0040
1077 #endif
1078 #ifdef USE_CALLOUT
1079 #define STK_CALLOUT 0x0070
1080 #endif
1081
1082 /* avoided by normal-POP */
1083 #define STK_VOID 0x0000 /* for fill a blank */
1084 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1085 #define STK_EMPTY_CHECK_START (0x3000 | STK_MASK_POP_HANDLED)
1086 #else
1087 #define STK_EMPTY_CHECK_START 0x3000
1088 #endif
1089 #define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
1090 #define STK_MEM_END_MARK 0x8100
1091 #define STK_CALL_FRAME (0x0400 | STK_MASK_POP_HANDLED)
1092 #define STK_RETURN (0x0500 | STK_MASK_POP_HANDLED)
1093 #define STK_SAVE_VAL 0x0600
1094 #define STK_MARK 0x0704
1095
1096 /* stack type check mask */
1097 #define STK_MASK_POP_USED STK_ALT_FLAG
1098 #define STK_MASK_POP_HANDLED 0x0010
1099 #define STK_MASK_POP_HANDLED_TIL (STK_MASK_POP_HANDLED | 0x0004)
1100 #define STK_MASK_TO_VOID_TARGET 0x100e
1101 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
1102
1103 typedef ptrdiff_t StackIndex;
1104
1105 #define INVALID_STACK_INDEX ((StackIndex )-1)
1106
1107 typedef union {
1108 StackIndex i;
1109 UChar* s;
1110 } StkPtrType;
1111
1112
1113 typedef struct _StackType {
1114 unsigned int type;
1115 int zid;
1116 union {
1117 struct {
1118 Operation* pcode; /* byte code position */
1119 UChar* pstr; /* string position */
1120 } state;
1121 struct {
1122 int count;
1123 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1124 StackIndex prev_index; /* index of stack */
1125 #endif
1126 } repeat_inc;
1127 struct {
1128 UChar *pstr; /* start/end position */
1129 /* Following information is set, if this stack type is MEM-START */
1130 StkPtrType prev_start; /* prev. info (for backtrack "(...)*" ) */
1131 StkPtrType prev_end; /* prev. info (for backtrack "(...)*" ) */
1132 } mem;
1133 struct {
1134 UChar *pstr; /* start position */
1135 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1136 StackIndex prev_index; /* index of stack */
1137 #endif
1138 } empty_check;
1139 #ifdef USE_CALL
1140 struct {
1141 Operation *ret_addr; /* byte code position */
1142 UChar *pstr; /* string position */
1143 } call_frame;
1144 #endif
1145 struct {
1146 enum SaveType type;
1147 UChar* v;
1148 UChar* v2;
1149 } val;
1150 #ifdef USE_CALLOUT
1151 struct {
1152 int num;
1153 OnigCalloutFunc func;
1154 } callout;
1155 #endif
1156 } u;
1157 } StackType;
1158
1159 #ifdef USE_CALLOUT
1160
1161 struct OnigCalloutArgsStruct {
1162 OnigCalloutIn in;
1163 int name_id; /* name id or ONIG_NON_NAME_ID */
1164 int num;
1165 OnigRegex regex;
1166 const OnigUChar* string;
1167 const OnigUChar* string_end;
1168 const OnigUChar* start;
1169 const OnigUChar* right_range;
1170 const OnigUChar* current; /* current matching position */
1171 unsigned long retry_in_match_counter;
1172
1173 /* invisible to users */
1174 MatchArg* msa;
1175 StackType* stk_base;
1176 StackType* stk;
1177 StkPtrType* mem_start_stk;
1178 StkPtrType* mem_end_stk;
1179 };
1180
1181 #endif
1182
1183 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1184
1185 #define PTR_NUM_SIZE(reg) ((reg)->num_repeat + (reg)->num_empty_check + ((reg)->num_mem + 1) * 2)
1186 #define UPDATE_FOR_STACK_REALLOC do{\
1187 repeat_stk = (StackIndex* )alloc_base;\
1188 empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
1189 mem_start_stk = (StkPtrType* )(empty_check_stk + reg->num_empty_check);\
1190 mem_end_stk = mem_start_stk + num_mem + 1;\
1191 } while(0)
1192
1193 #define SAVE_REPEAT_STK_VAR(sid) stk->u.repeat_inc.prev_index = repeat_stk[sid]
1194 #define LOAD_TO_REPEAT_STK_VAR(sid) repeat_stk[sid] = GET_STACK_INDEX(stk)
1195 #define POP_REPEAT_INC else if (stk->type == STK_REPEAT_INC) {repeat_stk[stk->zid] = stk->u.repeat_inc.prev_index;}
1196
1197 #define SAVE_EMPTY_CHECK_STK_VAR(sid) stk->u.empty_check.prev_index = empty_check_stk[sid]
1198 #define LOAD_TO_EMPTY_CHECK_STK_VAR(sid) empty_check_stk[sid] = GET_STACK_INDEX(stk)
1199 #define POP_EMPTY_CHECK_START else if (stk->type == STK_EMPTY_CHECK_START) {empty_check_stk[stk->zid] = stk->u.empty_check.prev_index;}
1200
1201 #else
1202
1203 #define PTR_NUM_SIZE(reg) (((reg)->num_mem + 1) * 2)
1204 #define UPDATE_FOR_STACK_REALLOC do{\
1205 mem_start_stk = (StkPtrType* )alloc_base;\
1206 mem_end_stk = mem_start_stk + num_mem + 1;\
1207 } while(0)
1208
1209 #define SAVE_REPEAT_STK_VAR(sid)
1210 #define LOAD_TO_REPEAT_STK_VAR(sid)
1211 #define POP_REPEAT_INC
1212
1213 #define SAVE_EMPTY_CHECK_STK_VAR(sid)
1214 #define LOAD_TO_EMPTY_CHECK_STK_VAR(sid)
1215 #define POP_EMPTY_CHECK_START
1216
1217 #endif /* USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */
1218
1219 #ifdef USE_RETRY_LIMIT
1220 #define RETRY_IN_MATCH_ARG_INIT(msa,mpv) \
1221 (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\
1222 (msa).retry_limit_in_search = (mpv)->retry_limit_in_search;\
1223 (msa).retry_limit_in_search_counter = 0;
1224 #else
1225 #define RETRY_IN_MATCH_ARG_INIT(msa,mpv)
1226 #endif
1227
1228 #if defined(USE_CALL)
1229 #define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv) \
1230 (msa).subexp_call_in_search_counter = 0;
1231
1232 #define POP_CALL else if (stk->type == STK_RETURN) {subexp_call_nest_counter++;} else if (stk->type == STK_CALL_FRAME) {subexp_call_nest_counter--;}
1233 #else
1234 #define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)
1235 #define POP_CALL
1236 #endif
1237
1238 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1239 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
1240 (msa).stack_p = (void* )0;\
1241 (msa).options = (arg_option);\
1242 (msa).region = (arg_region);\
1243 (msa).start = (arg_start);\
1244 (msa).match_stack_limit = (mpv)->match_stack_limit;\
1245 RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
1246 SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
1247 (msa).mp = mpv;\
1248 (msa).best_len = ONIG_MISMATCH;\
1249 (msa).ptr_num = PTR_NUM_SIZE(reg);\
1250 } while(0)
1251 #else
1252 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
1253 (msa).stack_p = (void* )0;\
1254 (msa).options = (arg_option);\
1255 (msa).region = (arg_region);\
1256 (msa).start = (arg_start);\
1257 (msa).match_stack_limit = (mpv)->match_stack_limit;\
1258 RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
1259 SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
1260 (msa).mp = mpv;\
1261 (msa).ptr_num = PTR_NUM_SIZE(reg);\
1262 } while(0)
1263 #endif
1264
1265 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
1266
1267
1268 #define ALLOCA_PTR_NUM_LIMIT 50
1269
1270 #define STACK_INIT(stack_num) do {\
1271 if (msa->stack_p) {\
1272 is_alloca = 0;\
1273 alloc_base = msa->stack_p;\
1274 stk_base = (StackType* )(alloc_base\
1275 + (sizeof(StkPtrType) * msa->ptr_num));\
1276 stk = stk_base;\
1277 stk_end = stk_base + msa->stack_n;\
1278 }\
1279 else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
1280 is_alloca = 0;\
1281 alloc_base = (char* )xmalloc(sizeof(StkPtrType) * msa->ptr_num\
1282 + sizeof(StackType) * (stack_num));\
1283 CHECK_NULL_RETURN_MEMERR(alloc_base);\
1284 stk_base = (StackType* )(alloc_base\
1285 + (sizeof(StkPtrType) * msa->ptr_num));\
1286 stk = stk_base;\
1287 stk_end = stk_base + (stack_num);\
1288 }\
1289 else {\
1290 is_alloca = 1;\
1291 alloc_base = (char* )xalloca(sizeof(StkPtrType) * msa->ptr_num\
1292 + sizeof(StackType) * (stack_num));\
1293 CHECK_NULL_RETURN_MEMERR(alloc_base);\
1294 stk_base = (StackType* )(alloc_base\
1295 + (sizeof(StkPtrType) * msa->ptr_num));\
1296 stk = stk_base;\
1297 stk_end = stk_base + (stack_num);\
1298 }\
1299 } while(0);
1300
1301
1302 #define STACK_SAVE(msa,is_alloca,alloc_base) do{\
1303 (msa)->stack_n = (int )(stk_end - stk_base);\
1304 if ((is_alloca) != 0) {\
1305 size_t size = sizeof(StkPtrType) * (msa)->ptr_num\
1306 + sizeof(StackType) * (msa)->stack_n;\
1307 (msa)->stack_p = xmalloc(size);\
1308 CHECK_NULL_RETURN_MEMERR((msa)->stack_p);\
1309 xmemcpy((msa)->stack_p, (alloc_base), size);\
1310 }\
1311 else {\
1312 (msa)->stack_p = (alloc_base);\
1313 };\
1314 } while(0)
1315
1316 static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1317
1318 extern unsigned int
onig_get_match_stack_limit_size(void)1319 onig_get_match_stack_limit_size(void)
1320 {
1321 return MatchStackLimit;
1322 }
1323
1324 extern int
onig_set_match_stack_limit_size(unsigned int size)1325 onig_set_match_stack_limit_size(unsigned int size)
1326 {
1327 MatchStackLimit = size;
1328 return 0;
1329 }
1330
1331 #ifdef USE_RETRY_LIMIT
1332
1333 static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH;
1334 static unsigned long RetryLimitInSearch = DEFAULT_RETRY_LIMIT_IN_SEARCH;
1335
1336 #define CHECK_RETRY_LIMIT_IN_MATCH do {\
1337 if (++retry_in_match_counter > retry_limit_in_match) {\
1338 MATCH_AT_ERROR_RETURN(retry_in_match_counter > msa->retry_limit_in_match ? ONIGERR_RETRY_LIMIT_IN_MATCH_OVER : ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER); \
1339 }\
1340 } while (0)
1341
1342 #else
1343
1344 #define CHECK_RETRY_LIMIT_IN_MATCH
1345
1346 #endif /* USE_RETRY_LIMIT */
1347
1348 extern unsigned long
onig_get_retry_limit_in_match(void)1349 onig_get_retry_limit_in_match(void)
1350 {
1351 #ifdef USE_RETRY_LIMIT
1352 return RetryLimitInMatch;
1353 #else
1354 return 0;
1355 #endif
1356 }
1357
1358 extern int
onig_set_retry_limit_in_match(unsigned long n)1359 onig_set_retry_limit_in_match(unsigned long n)
1360 {
1361 #ifdef USE_RETRY_LIMIT
1362 RetryLimitInMatch = n;
1363 return 0;
1364 #else
1365 return ONIG_NO_SUPPORT_CONFIG;
1366 #endif
1367 }
1368
1369 extern unsigned long
onig_get_retry_limit_in_search(void)1370 onig_get_retry_limit_in_search(void)
1371 {
1372 #ifdef USE_RETRY_LIMIT
1373 return RetryLimitInSearch;
1374 #else
1375 return 0;
1376 #endif
1377 }
1378
1379 extern int
onig_set_retry_limit_in_search(unsigned long n)1380 onig_set_retry_limit_in_search(unsigned long n)
1381 {
1382 #ifdef USE_RETRY_LIMIT
1383 RetryLimitInSearch = n;
1384 return 0;
1385 #else
1386 return ONIG_NO_SUPPORT_CONFIG;
1387 #endif
1388 }
1389
1390 #ifdef USE_CALL
1391 static unsigned long SubexpCallLimitInSearch = DEFAULT_SUBEXP_CALL_LIMIT_IN_SEARCH;
1392
1393 extern unsigned long
onig_get_subexp_call_limit_in_search(void)1394 onig_get_subexp_call_limit_in_search(void)
1395 {
1396 return SubexpCallLimitInSearch;
1397 }
1398
1399 extern int
onig_set_subexp_call_limit_in_search(unsigned long n)1400 onig_set_subexp_call_limit_in_search(unsigned long n)
1401 {
1402 SubexpCallLimitInSearch = n;
1403 return 0;
1404 }
1405
1406 #endif
1407
1408 #ifdef USE_CALLOUT
1409 static OnigCalloutFunc DefaultProgressCallout;
1410 static OnigCalloutFunc DefaultRetractionCallout;
1411 #endif
1412
1413 extern OnigMatchParam*
onig_new_match_param(void)1414 onig_new_match_param(void)
1415 {
1416 OnigMatchParam* p;
1417
1418 p = (OnigMatchParam* )xmalloc(sizeof(*p));
1419 if (IS_NOT_NULL(p)) {
1420 onig_initialize_match_param(p);
1421 }
1422
1423 return p;
1424 }
1425
1426 extern void
onig_free_match_param_content(OnigMatchParam * p)1427 onig_free_match_param_content(OnigMatchParam* p)
1428 {
1429 #ifdef USE_CALLOUT
1430 if (IS_NOT_NULL(p->callout_data)) {
1431 xfree(p->callout_data);
1432 p->callout_data = 0;
1433 }
1434 #endif
1435 }
1436
1437 extern void
onig_free_match_param(OnigMatchParam * p)1438 onig_free_match_param(OnigMatchParam* p)
1439 {
1440 if (IS_NOT_NULL(p)) {
1441 onig_free_match_param_content(p);
1442 xfree(p);
1443 }
1444 }
1445
1446 extern int
onig_initialize_match_param(OnigMatchParam * mp)1447 onig_initialize_match_param(OnigMatchParam* mp)
1448 {
1449 mp->match_stack_limit = MatchStackLimit;
1450 #ifdef USE_RETRY_LIMIT
1451 mp->retry_limit_in_match = RetryLimitInMatch;
1452 mp->retry_limit_in_search = RetryLimitInSearch;
1453 #endif
1454
1455 #ifdef USE_CALLOUT
1456 mp->progress_callout_of_contents = DefaultProgressCallout;
1457 mp->retraction_callout_of_contents = DefaultRetractionCallout;
1458 mp->match_at_call_counter = 0;
1459 mp->callout_user_data = 0;
1460 mp->callout_data = 0;
1461 mp->callout_data_alloc_num = 0;
1462 #endif
1463
1464 return ONIG_NORMAL;
1465 }
1466
1467 #ifdef USE_CALLOUT
1468
1469 static int
adjust_match_param(regex_t * reg,OnigMatchParam * mp)1470 adjust_match_param(regex_t* reg, OnigMatchParam* mp)
1471 {
1472 RegexExt* ext = reg->extp;
1473
1474 mp->match_at_call_counter = 0;
1475
1476 if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL;
1477
1478 if (ext->callout_num > mp->callout_data_alloc_num) {
1479 CalloutData* d;
1480 size_t n = ext->callout_num * sizeof(*d);
1481 if (IS_NOT_NULL(mp->callout_data))
1482 d = (CalloutData* )xrealloc(mp->callout_data, n);
1483 else
1484 d = (CalloutData* )xmalloc(n);
1485 CHECK_NULL_RETURN_MEMERR(d);
1486
1487 mp->callout_data = d;
1488 mp->callout_data_alloc_num = ext->callout_num;
1489 }
1490
1491 xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData));
1492 return ONIG_NORMAL;
1493 }
1494
1495 #define ADJUST_MATCH_PARAM(reg, mp) \
1496 r = adjust_match_param(reg, mp);\
1497 if (r != ONIG_NORMAL) return r;
1498
1499 #define CALLOUT_DATA_AT_NUM(mp, num) ((mp)->callout_data + ((num) - 1))
1500
1501 extern int
onig_check_callout_data_and_clear_old_values(OnigCalloutArgs * args)1502 onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args)
1503 {
1504 OnigMatchParam* mp;
1505 int num;
1506 CalloutData* d;
1507
1508 mp = args->msa->mp;
1509 num = args->num;
1510
1511 d = CALLOUT_DATA_AT_NUM(mp, num);
1512 if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1513 xmemset(d, 0, sizeof(*d));
1514 d->last_match_at_call_counter = mp->match_at_call_counter;
1515 return d->last_match_at_call_counter;
1516 }
1517
1518 return 0;
1519 }
1520
1521 extern int
onig_get_callout_data_dont_clear_old(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType * type,OnigValue * val)1522 onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp,
1523 int callout_num, int slot,
1524 OnigType* type, OnigValue* val)
1525 {
1526 OnigType t;
1527 CalloutData* d;
1528
1529 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1530
1531 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1532 t = d->slot[slot].type;
1533 if (IS_NOT_NULL(type)) *type = t;
1534 if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
1535 return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
1536 }
1537
1538 extern int
onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs * args,int slot,OnigType * type,OnigValue * val)1539 onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args,
1540 int slot, OnigType* type,
1541 OnigValue* val)
1542 {
1543 return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp,
1544 args->num, slot, type, val);
1545 }
1546
1547 extern int
onig_get_callout_data(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType * type,OnigValue * val)1548 onig_get_callout_data(regex_t* reg, OnigMatchParam* mp,
1549 int callout_num, int slot,
1550 OnigType* type, OnigValue* val)
1551 {
1552 OnigType t;
1553 CalloutData* d;
1554
1555 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1556
1557 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1558 if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1559 xmemset(d, 0, sizeof(*d));
1560 d->last_match_at_call_counter = mp->match_at_call_counter;
1561 }
1562
1563 t = d->slot[slot].type;
1564 if (IS_NOT_NULL(type)) *type = t;
1565 if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
1566 return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
1567 }
1568
1569 extern int
onig_get_callout_data_by_tag(regex_t * reg,OnigMatchParam * mp,const UChar * tag,const UChar * tag_end,int slot,OnigType * type,OnigValue * val)1570 onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1571 const UChar* tag, const UChar* tag_end, int slot,
1572 OnigType* type, OnigValue* val)
1573 {
1574 int num;
1575
1576 num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1577 if (num < 0) return num;
1578 if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1579
1580 return onig_get_callout_data(reg, mp, num, slot, type, val);
1581 }
1582
1583 extern int
onig_get_callout_data_by_callout_args(OnigCalloutArgs * args,int callout_num,int slot,OnigType * type,OnigValue * val)1584 onig_get_callout_data_by_callout_args(OnigCalloutArgs* args,
1585 int callout_num, int slot,
1586 OnigType* type, OnigValue* val)
1587 {
1588 return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot,
1589 type, val);
1590 }
1591
1592 extern int
onig_get_callout_data_by_callout_args_self(OnigCalloutArgs * args,int slot,OnigType * type,OnigValue * val)1593 onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1594 int slot, OnigType* type, OnigValue* val)
1595 {
1596 return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot,
1597 type, val);
1598 }
1599
1600 extern int
onig_set_callout_data(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType type,OnigValue * val)1601 onig_set_callout_data(regex_t* reg, OnigMatchParam* mp,
1602 int callout_num, int slot,
1603 OnigType type, OnigValue* val)
1604 {
1605 CalloutData* d;
1606
1607 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1608
1609 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1610 d->slot[slot].type = type;
1611 d->slot[slot].val = *val;
1612 d->last_match_at_call_counter = mp->match_at_call_counter;
1613
1614 return ONIG_NORMAL;
1615 }
1616
1617 extern int
onig_set_callout_data_by_tag(regex_t * reg,OnigMatchParam * mp,const UChar * tag,const UChar * tag_end,int slot,OnigType type,OnigValue * val)1618 onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1619 const UChar* tag, const UChar* tag_end, int slot,
1620 OnigType type, OnigValue* val)
1621 {
1622 int num;
1623
1624 num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1625 if (num < 0) return num;
1626 if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1627
1628 return onig_set_callout_data(reg, mp, num, slot, type, val);
1629 }
1630
1631 extern int
onig_set_callout_data_by_callout_args(OnigCalloutArgs * args,int callout_num,int slot,OnigType type,OnigValue * val)1632 onig_set_callout_data_by_callout_args(OnigCalloutArgs* args,
1633 int callout_num, int slot,
1634 OnigType type, OnigValue* val)
1635 {
1636 return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot,
1637 type, val);
1638 }
1639
1640 extern int
onig_set_callout_data_by_callout_args_self(OnigCalloutArgs * args,int slot,OnigType type,OnigValue * val)1641 onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1642 int slot, OnigType type, OnigValue* val)
1643 {
1644 return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot,
1645 type, val);
1646 }
1647
1648 #else
1649 #define ADJUST_MATCH_PARAM(reg, mp)
1650 #endif /* USE_CALLOUT */
1651
1652
1653 static int
stack_double(int * is_alloca,char ** arg_alloc_base,StackType ** arg_stk_base,StackType ** arg_stk_end,StackType ** arg_stk,MatchArg * msa)1654 stack_double(int* is_alloca, char** arg_alloc_base,
1655 StackType** arg_stk_base, StackType** arg_stk_end,
1656 StackType** arg_stk, MatchArg* msa)
1657 {
1658 unsigned int n;
1659 int used;
1660 size_t size;
1661 size_t new_size;
1662 char* alloc_base;
1663 char* new_alloc_base;
1664 StackType *stk_base, *stk_end, *stk;
1665
1666 alloc_base = *arg_alloc_base;
1667 stk_base = *arg_stk_base;
1668 stk_end = *arg_stk_end;
1669 stk = *arg_stk;
1670
1671 n = (unsigned int )(stk_end - stk_base);
1672 size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n;
1673 n *= 2;
1674 new_size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n;
1675 if (*is_alloca != 0) {
1676 new_alloc_base = (char* )xmalloc(new_size);
1677 if (IS_NULL(new_alloc_base)) {
1678 STACK_SAVE(msa, *is_alloca, alloc_base);
1679 return ONIGERR_MEMORY;
1680 }
1681 xmemcpy(new_alloc_base, alloc_base, size);
1682 *is_alloca = 0;
1683 }
1684 else {
1685 if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) {
1686 if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit) {
1687 STACK_SAVE(msa, *is_alloca, alloc_base);
1688 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1689 }
1690 else
1691 n = msa->match_stack_limit;
1692 }
1693 new_alloc_base = (char* )xrealloc(alloc_base, new_size);
1694 if (IS_NULL(new_alloc_base)) {
1695 STACK_SAVE(msa, *is_alloca, alloc_base);
1696 return ONIGERR_MEMORY;
1697 }
1698 }
1699
1700 alloc_base = new_alloc_base;
1701 used = (int )(stk - stk_base);
1702 *arg_alloc_base = alloc_base;
1703 *arg_stk_base = (StackType* )(alloc_base
1704 + (sizeof(StkPtrType) * msa->ptr_num));
1705 *arg_stk = *arg_stk_base + used;
1706 *arg_stk_end = *arg_stk_base + n;
1707 return 0;
1708 }
1709
1710 #define STACK_ENSURE(n) do {\
1711 if ((int )(stk_end - stk) < (n)) {\
1712 int r = stack_double(&is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\
1713 if (r != 0) return r;\
1714 UPDATE_FOR_STACK_REALLOC;\
1715 }\
1716 } while(0)
1717
1718 #define STACK_AT(index) (stk_base + (index))
1719 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
1720
1721 #define STACK_PUSH_TYPE(stack_type) do {\
1722 STACK_ENSURE(1);\
1723 stk->type = (stack_type);\
1724 STACK_INC;\
1725 } while(0)
1726
1727 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1728
1729 #define STACK_PUSH(stack_type,pat,s) do {\
1730 STACK_ENSURE(1);\
1731 stk->type = (stack_type);\
1732 stk->u.state.pcode = (pat);\
1733 stk->u.state.pstr = (s);\
1734 STACK_INC;\
1735 } while(0)
1736
1737 #define STACK_PUSH_WITH_ZID(stack_type,pat,s,id) do {\
1738 STACK_ENSURE(1);\
1739 stk->type = (stack_type);\
1740 stk->zid = (int )(id);\
1741 stk->u.state.pcode = (pat);\
1742 stk->u.state.pstr = (s);\
1743 STACK_INC;\
1744 } while(0)
1745
1746 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
1747 stk->type = (stack_type);\
1748 stk->u.state.pcode = (pat);\
1749 STACK_INC;\
1750 } while(0)
1751
1752 #ifdef ONIG_DEBUG_MATCH
1753 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1754 stk->type = (stack_type);\
1755 stk->u.state.pcode = (pat);\
1756 stk->u.state.pstr = s;\
1757 STACK_INC;\
1758 } while (0)
1759 #else
1760 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1761 stk->type = (stack_type);\
1762 stk->u.state.pcode = (pat);\
1763 STACK_INC;\
1764 } while (0)
1765 #endif
1766
1767 #define STACK_PUSH_ALT(pat,s) STACK_PUSH(STK_ALT,pat,s)
1768 #define STACK_PUSH_SUPER_ALT(pat,s) STACK_PUSH(STK_SUPER_ALT,pat,s)
1769 #define STACK_PUSH_ALT_WITH_ZID(pat,s,id) STACK_PUSH_WITH_ZID(STK_ALT,pat,s,id)
1770
1771 #if 0
1772 #define STACK_PUSH_REPEAT(sid, pat) do {\
1773 STACK_ENSURE(1);\
1774 stk->type = STK_REPEAT;\
1775 stk->zid = (sid);\
1776 stk->u.repeat.pcode = (pat);\
1777 STACK_INC;\
1778 } while(0)
1779 #endif
1780
1781 #define STACK_PUSH_REPEAT_INC(sid, ct) do {\
1782 STACK_ENSURE(1);\
1783 stk->type = STK_REPEAT_INC;\
1784 stk->zid = (sid);\
1785 stk->u.repeat_inc.count = (ct);\
1786 SAVE_REPEAT_STK_VAR(sid);\
1787 LOAD_TO_REPEAT_STK_VAR(sid);\
1788 STACK_INC;\
1789 } while(0)
1790
1791 #define STACK_PUSH_MEM_START(mnum, s) do {\
1792 STACK_ENSURE(1);\
1793 stk->type = STK_MEM_START;\
1794 stk->zid = (mnum);\
1795 stk->u.mem.pstr = (s);\
1796 stk->u.mem.prev_start = mem_start_stk[mnum];\
1797 stk->u.mem.prev_end = mem_end_stk[mnum];\
1798 mem_start_stk[mnum].i = GET_STACK_INDEX(stk);\
1799 mem_end_stk[mnum].i = INVALID_STACK_INDEX;\
1800 STACK_INC;\
1801 } while(0)
1802
1803 #define STACK_PUSH_MEM_END(mnum, s) do {\
1804 STACK_ENSURE(1);\
1805 stk->type = STK_MEM_END;\
1806 stk->zid = (mnum);\
1807 stk->u.mem.pstr = (s);\
1808 stk->u.mem.prev_start = mem_start_stk[mnum];\
1809 stk->u.mem.prev_end = mem_end_stk[mnum];\
1810 mem_end_stk[mnum].i = GET_STACK_INDEX(stk);\
1811 STACK_INC;\
1812 } while(0)
1813
1814 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
1815 STACK_ENSURE(1);\
1816 stk->type = STK_MEM_END_MARK;\
1817 stk->zid = (mnum);\
1818 STACK_INC;\
1819 } while(0)
1820
1821 #define STACK_GET_MEM_START(mnum, k) do {\
1822 int level = 0;\
1823 k = stk;\
1824 while (k > stk_base) {\
1825 k--;\
1826 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1827 && k->zid == (mnum)) {\
1828 level++;\
1829 }\
1830 else if (k->type == STK_MEM_START && k->zid == (mnum)) {\
1831 if (level == 0) break;\
1832 level--;\
1833 }\
1834 }\
1835 } while(0)
1836
1837 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1838 int level = 0;\
1839 while (k < stk) {\
1840 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1841 if (level == 0) (start) = k->u.mem.pstr;\
1842 level++;\
1843 }\
1844 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1845 level--;\
1846 if (level == 0) {\
1847 (end) = k->u.mem.pstr;\
1848 break;\
1849 }\
1850 }\
1851 k++;\
1852 }\
1853 } while(0)
1854
1855 #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\
1856 STACK_ENSURE(1);\
1857 stk->type = STK_EMPTY_CHECK_START;\
1858 stk->zid = (cnum);\
1859 stk->u.empty_check.pstr = (s);\
1860 SAVE_EMPTY_CHECK_STK_VAR(cnum);\
1861 LOAD_TO_EMPTY_CHECK_STK_VAR(cnum);\
1862 STACK_INC;\
1863 } while(0)
1864
1865 #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\
1866 STACK_ENSURE(1);\
1867 stk->type = STK_EMPTY_CHECK_END;\
1868 stk->zid = (cnum);\
1869 STACK_INC;\
1870 } while(0)
1871
1872 #define STACK_PUSH_CALL_FRAME(pat) do {\
1873 STACK_ENSURE(1);\
1874 stk->type = STK_CALL_FRAME;\
1875 stk->u.call_frame.ret_addr = (pat);\
1876 STACK_INC;\
1877 } while(0)
1878
1879 #define STACK_PUSH_RETURN do {\
1880 STACK_ENSURE(1);\
1881 stk->type = STK_RETURN;\
1882 STACK_INC;\
1883 } while(0)
1884
1885 #define STACK_PUSH_MARK(sid) do {\
1886 STACK_ENSURE(1);\
1887 stk->type = STK_MARK;\
1888 stk->zid = (sid);\
1889 STACK_INC;\
1890 } while(0)
1891
1892 #define STACK_PUSH_MARK_WITH_POS(sid, s) do {\
1893 STACK_ENSURE(1);\
1894 stk->type = STK_MARK;\
1895 stk->zid = (sid);\
1896 stk->u.val.v = (UChar* )(s);\
1897 STACK_INC;\
1898 } while(0)
1899
1900 #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\
1901 STACK_ENSURE(1);\
1902 stk->type = STK_SAVE_VAL;\
1903 stk->zid = (sid);\
1904 stk->u.val.type = (stype);\
1905 stk->u.val.v = (UChar* )(sval);\
1906 STACK_INC;\
1907 } while(0)
1908
1909 #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\
1910 STACK_ENSURE(1);\
1911 stk->type = STK_SAVE_VAL;\
1912 stk->zid = (sid);\
1913 stk->u.val.type = (stype);\
1914 stk->u.val.v = (UChar* )(sval);\
1915 STACK_INC;\
1916 } while(0)
1917
1918 #define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\
1919 StackType *k = stk;\
1920 while (k > stk_base) {\
1921 k--;\
1922 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \
1923 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\
1924 (sval) = k->u.val.v;\
1925 break;\
1926 }\
1927 }\
1928 } while (0)
1929
1930 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval, clear) do {\
1931 int level = 0;\
1932 StackType *k = stk;\
1933 while (k > stk_base) {\
1934 k--;\
1935 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1936 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1937 && k->zid == (sid)) {\
1938 if (level == 0) {\
1939 (sval) = k->u.val.v;\
1940 if (clear != 0) k->type = STK_VOID;\
1941 break;\
1942 }\
1943 }\
1944 else if (k->type == STK_CALL_FRAME)\
1945 level--;\
1946 else if (k->type == STK_RETURN)\
1947 level++;\
1948 }\
1949 } while (0)
1950
1951 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \
1952 int level = 0;\
1953 StackType *k = stk;\
1954 while (k > stk_base) {\
1955 k--;\
1956 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1957 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1958 && k->zid == (sid)) {\
1959 if (level == 0) {\
1960 (sval) = k->u.val.v;\
1961 break;\
1962 }\
1963 }\
1964 else if (k->type == STK_CALL_FRAME)\
1965 level--;\
1966 else if (k->type == STK_RETURN)\
1967 level++;\
1968 }\
1969 } while (0)
1970
1971 #define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
1972 STACK_ENSURE(1);\
1973 stk->type = STK_CALLOUT;\
1974 stk->zid = ONIG_NON_NAME_ID;\
1975 stk->u.callout.num = (anum);\
1976 stk->u.callout.func = (func);\
1977 STACK_INC;\
1978 } while(0)
1979
1980 #define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\
1981 STACK_ENSURE(1);\
1982 stk->type = STK_CALLOUT;\
1983 stk->zid = (aid);\
1984 stk->u.callout.num = (anum);\
1985 stk->u.callout.func = (func);\
1986 STACK_INC;\
1987 } while(0)
1988
1989 #ifdef ONIG_DEBUG
1990 #define STACK_BASE_CHECK(p, at) \
1991 if ((p) < stk_base) {\
1992 fprintf(DBGFP, "at %s\n", at);\
1993 MATCH_AT_ERROR_RETURN(ONIGERR_STACK_BUG);\
1994 }
1995 #else
1996 #define STACK_BASE_CHECK(p, at)
1997 #endif
1998
1999 #define STACK_POP_ONE do {\
2000 stk--;\
2001 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
2002 } while(0)
2003
2004
2005 #ifdef USE_CALLOUT
2006 #define POP_CALLOUT_CASE \
2007 else if (stk->type == STK_CALLOUT) {\
2008 RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\
2009 }
2010 #else
2011 #define POP_CALLOUT_CASE
2012 #endif
2013
2014 #define STACK_POP do {\
2015 switch (pop_level) {\
2016 case STACK_POP_LEVEL_FREE:\
2017 while (1) {\
2018 stk--;\
2019 STACK_BASE_CHECK(stk, "STACK_POP"); \
2020 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
2021 }\
2022 break;\
2023 case STACK_POP_LEVEL_MEM_START:\
2024 while (1) {\
2025 stk--;\
2026 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
2027 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
2028 else if (stk->type == STK_MEM_START) {\
2029 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2030 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
2031 }\
2032 }\
2033 break;\
2034 default:\
2035 while (1) {\
2036 stk--;\
2037 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
2038 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
2039 else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\
2040 if (stk->type == STK_MEM_START) {\
2041 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2042 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
2043 }\
2044 else if (stk->type == STK_MEM_END) {\
2045 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2046 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
2047 }\
2048 POP_REPEAT_INC \
2049 POP_EMPTY_CHECK_START \
2050 POP_CALL \
2051 POP_CALLOUT_CASE\
2052 }\
2053 }\
2054 break;\
2055 }\
2056 } while(0)
2057
2058 #define STACK_POP_TO_MARK(sid) do {\
2059 while (1) {\
2060 stk--;\
2061 STACK_BASE_CHECK(stk, "STACK_POP_TO_MARK");\
2062 if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
2063 if (stk->type == STK_MARK) {\
2064 if (stk->zid == (sid)) break;\
2065 }\
2066 else {\
2067 if (stk->type == STK_MEM_START) {\
2068 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2069 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
2070 }\
2071 else if (stk->type == STK_MEM_END) {\
2072 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2073 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
2074 }\
2075 POP_REPEAT_INC \
2076 POP_EMPTY_CHECK_START \
2077 POP_CALL \
2078 /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
2079 }\
2080 }\
2081 }\
2082 } while(0)
2083
2084
2085 #define POP_TIL_BODY(aname, til_type) do {\
2086 while (1) {\
2087 stk--;\
2088 STACK_BASE_CHECK(stk, (aname));\
2089 if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
2090 if (stk->type == (til_type)) break;\
2091 else {\
2092 if (stk->type == STK_MEM_START) {\
2093 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2094 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
2095 }\
2096 else if (stk->type == STK_MEM_END) {\
2097 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2098 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
2099 }\
2100 POP_REPEAT_INC \
2101 POP_EMPTY_CHECK_START \
2102 POP_CALL \
2103 /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
2104 }\
2105 }\
2106 }\
2107 } while(0)
2108
2109
2110 #define STACK_TO_VOID_TO_MARK(k,sid) do {\
2111 k = stk;\
2112 while (1) {\
2113 k--;\
2114 STACK_BASE_CHECK(k, "STACK_TO_VOID_TO_MARK");\
2115 if (IS_TO_VOID_TARGET(k)) {\
2116 if (k->type == STK_MARK) {\
2117 if (k->zid == (sid)) {\
2118 k->type = STK_VOID;\
2119 break;\
2120 } /* don't void different id mark */ \
2121 }\
2122 else\
2123 k->type = STK_VOID;\
2124 }\
2125 }\
2126 } while(0)
2127
2128 #define EMPTY_CHECK_START_SEARCH(sid, k) do {\
2129 k = stk;\
2130 while (1) {\
2131 k--;\
2132 STACK_BASE_CHECK(k, "EMPTY_CHECK_START_SEARCH"); \
2133 if (k->type == STK_EMPTY_CHECK_START) {\
2134 if (k->zid == (sid)) break;\
2135 }\
2136 }\
2137 } while(0)
2138
2139 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2140
2141 #define GET_EMPTY_CHECK_START(sid, k) do {\
2142 if (reg->num_call == 0) {\
2143 k = STACK_AT(empty_check_stk[sid]);\
2144 }\
2145 else {\
2146 EMPTY_CHECK_START_SEARCH(sid, k);\
2147 }\
2148 } while(0)
2149 #else
2150
2151 #define GET_EMPTY_CHECK_START(sid, k) EMPTY_CHECK_START_SEARCH(sid, k)
2152
2153 #endif
2154
2155
2156 #define STACK_EMPTY_CHECK(isnull, sid, s) do {\
2157 StackType* k;\
2158 GET_EMPTY_CHECK_START(sid, k);\
2159 (isnull) = (k->u.empty_check.pstr == (s));\
2160 } while(0)
2161
2162 #define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
2163 if (k->u.mem.prev_end.i == INVALID_STACK_INDEX) {\
2164 (addr) = 0;\
2165 }\
2166 else {\
2167 if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\
2168 (addr) = STACK_AT(k->u.mem.prev_end.i)->u.mem.pstr;\
2169 else\
2170 (addr) = k->u.mem.prev_end.s;\
2171 }\
2172 } while (0)
2173
2174 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
2175 #define STACK_EMPTY_CHECK_MEM(isnull, sid, s, reg) do {\
2176 StackType* k;\
2177 GET_EMPTY_CHECK_START(sid, k);\
2178 if (k->u.empty_check.pstr != (s)) {\
2179 (isnull) = 0;\
2180 }\
2181 else {\
2182 UChar* endp;\
2183 (isnull) = 1;\
2184 while (k < stk) {\
2185 if (k->type == STK_MEM_START &&\
2186 MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid)) {\
2187 STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
2188 if (endp == 0) {\
2189 (isnull) = 0; break;\
2190 }\
2191 else if (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != endp) {\
2192 (isnull) = 0; break;\
2193 }\
2194 else if (endp != s) {\
2195 (isnull) = -1; /* empty, but position changed */ \
2196 }\
2197 }\
2198 k++;\
2199 }\
2200 }\
2201 } while(0)
2202
2203 #define STACK_EMPTY_CHECK_MEM_REC(isnull,sid,s,reg) do {\
2204 int level = 0;\
2205 StackType* k = stk;\
2206 while (1) {\
2207 k--;\
2208 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM_REC");\
2209 if (k->type == STK_EMPTY_CHECK_START) {\
2210 if (k->zid == (sid)) {\
2211 if (level == 0) {\
2212 if (k->u.empty_check.pstr != (s)) {\
2213 (isnull) = 0;\
2214 break;\
2215 }\
2216 else {\
2217 UChar* endp;\
2218 (isnull) = 1;\
2219 while (k < stk) {\
2220 if (k->type == STK_MEM_START) {\
2221 if (level == 0 && \
2222 MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid) !=0) {\
2223 STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
2224 if (endp == 0) {\
2225 (isnull) = 0; break;\
2226 }\
2227 else if (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != endp) { \
2228 (isnull) = 0; break;\
2229 }\
2230 else if (endp != s) {\
2231 (isnull) = -1; /* empty, but position changed */\
2232 }\
2233 }\
2234 }\
2235 else if (k->type == STK_EMPTY_CHECK_START) {\
2236 if (k->zid == (sid)) level++;\
2237 }\
2238 else if (k->type == STK_EMPTY_CHECK_END) {\
2239 if (k->zid == (sid)) level--;\
2240 }\
2241 k++;\
2242 }\
2243 break;\
2244 }\
2245 }\
2246 else {\
2247 level--;\
2248 }\
2249 }\
2250 }\
2251 else if (k->type == STK_EMPTY_CHECK_END) {\
2252 if (k->zid == (sid)) level++;\
2253 }\
2254 }\
2255 } while(0)
2256 #else
2257 #define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\
2258 int level = 0;\
2259 StackType* k = stk;\
2260 while (1) {\
2261 k--;\
2262 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \
2263 if (k->type == STK_EMPTY_CHECK_START) {\
2264 if (k->u.empty_check.num == (id)) {\
2265 if (level == 0) {\
2266 (isnull) = (k->u.empty_check.pstr == (s));\
2267 break;\
2268 }\
2269 }\
2270 level--;\
2271 }\
2272 else if (k->type == STK_EMPTY_CHECK_END) {\
2273 level++;\
2274 }\
2275 }\
2276 } while(0)
2277 #endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
2278
2279 #define STACK_GET_REPEAT_COUNT_SEARCH(sid, c) do {\
2280 StackType* k = stk;\
2281 while (1) {\
2282 (k)--;\
2283 STACK_BASE_CHECK(k, "STACK_GET_REPEAT_COUNT_SEARCH");\
2284 if ((k)->type == STK_REPEAT_INC) {\
2285 if ((k)->zid == (sid)) {\
2286 (c) = (k)->u.repeat_inc.count;\
2287 break;\
2288 }\
2289 }\
2290 else if ((k)->type == STK_RETURN) {\
2291 int level = -1;\
2292 while (1) {\
2293 (k)--;\
2294 if ((k)->type == STK_CALL_FRAME) {\
2295 level++;\
2296 if (level == 0) break;\
2297 }\
2298 else if ((k)->type == STK_RETURN) level--;\
2299 }\
2300 }\
2301 }\
2302 } while(0)
2303
2304 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2305
2306 #define STACK_GET_REPEAT_COUNT(sid, c) do {\
2307 if (reg->num_call == 0) {\
2308 (c) = (STACK_AT(repeat_stk[sid]))->u.repeat_inc.count;\
2309 }\
2310 else {\
2311 STACK_GET_REPEAT_COUNT_SEARCH(sid, c);\
2312 }\
2313 } while(0)
2314 #else
2315 #define STACK_GET_REPEAT_COUNT(sid, c) STACK_GET_REPEAT_COUNT_SEARCH(sid, c)
2316 #endif
2317
2318 #ifdef USE_CALL
2319 #define STACK_RETURN(addr) do {\
2320 int level = 0;\
2321 StackType* k = stk;\
2322 while (1) {\
2323 k--;\
2324 STACK_BASE_CHECK(k, "STACK_RETURN"); \
2325 if (k->type == STK_CALL_FRAME) {\
2326 if (level == 0) {\
2327 (addr) = k->u.call_frame.ret_addr;\
2328 break;\
2329 }\
2330 else level--;\
2331 }\
2332 else if (k->type == STK_RETURN)\
2333 level++;\
2334 }\
2335 } while(0)
2336
2337 #define GET_STACK_RETURN_CALL(k,addr) do {\
2338 int level = 0;\
2339 k = stk;\
2340 while (1) {\
2341 k--;\
2342 STACK_BASE_CHECK(k, "GET_STACK_RETURN_CALL");\
2343 if (k->type == STK_CALL_FRAME) {\
2344 if (level == 0) {\
2345 (addr) = k->u.call_frame.ret_addr;\
2346 break;\
2347 }\
2348 else level--;\
2349 }\
2350 else if (k->type == STK_RETURN)\
2351 level++;\
2352 }\
2353 } while(0)
2354 #endif
2355
2356
2357 #define STRING_CMP(s1,s2,len) do {\
2358 while (len-- > 0) {\
2359 if (*s1++ != *s2++) goto fail;\
2360 }\
2361 } while(0)
2362
2363 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
2364 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2365 goto fail; \
2366 } while(0)
2367
string_cmp_ic(OnigEncoding enc,int case_fold_flag,UChar * s1,UChar ** ps2,int mblen)2368 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
2369 UChar* s1, UChar** ps2, int mblen)
2370 {
2371 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2372 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2373 UChar *p1, *p2, *end1, *s2, *end2;
2374 int len1, len2;
2375
2376 s2 = *ps2;
2377 end1 = s1 + mblen;
2378 end2 = s2 + mblen;
2379 while (s1 < end1) {
2380 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
2381 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
2382 if (len1 != len2) return 0;
2383 p1 = buf1;
2384 p2 = buf2;
2385 while (len1-- > 0) {
2386 if (*p1 != *p2) return 0;
2387 p1++;
2388 p2++;
2389 }
2390 if (s2 >= end2) {
2391 if (s1 < end1) return 0;
2392 else break;
2393 }
2394 }
2395
2396 *ps2 = s2;
2397 return 1;
2398 }
2399
2400 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
2401 is_fail = 0;\
2402 while (len-- > 0) {\
2403 if (*s1++ != *s2++) {\
2404 is_fail = 1; break;\
2405 }\
2406 }\
2407 } while(0)
2408
2409 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
2410 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2411 is_fail = 1; \
2412 else \
2413 is_fail = 0; \
2414 } while(0)
2415
2416
2417 #define IS_EMPTY_STR (str == end)
2418 #define ON_STR_BEGIN(s) ((s) == str)
2419 #define ON_STR_END(s) ((s) == end)
2420 #define DATA_ENSURE_CHECK1 (s < right_range)
2421 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
2422 #define DATA_ENSURE(n) if (right_range - s < (n)) goto fail
2423
2424 #define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range
2425
2426 #ifdef USE_CAPTURE_HISTORY
2427 static int
make_capture_history_tree(OnigCaptureTreeNode * node,StackType ** kp,StackType * stk_top,UChar * str,regex_t * reg)2428 make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
2429 StackType* stk_top, UChar* str, regex_t* reg)
2430 {
2431 int n, r;
2432 OnigCaptureTreeNode* child;
2433 StackType* k = *kp;
2434
2435 while (k < stk_top) {
2436 if (k->type == STK_MEM_START) {
2437 n = k->zid;
2438 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
2439 MEM_STATUS_AT(reg->capture_history, n) != 0) {
2440 child = history_node_new();
2441 CHECK_NULL_RETURN_MEMERR(child);
2442 child->group = n;
2443 child->beg = (int )(k->u.mem.pstr - str);
2444 r = history_tree_add_child(node, child);
2445 if (r != 0) return r;
2446 *kp = (k + 1);
2447 r = make_capture_history_tree(child, kp, stk_top, str, reg);
2448 if (r != 0) return r;
2449
2450 k = *kp;
2451 child->end = (int )(k->u.mem.pstr - str);
2452 }
2453 }
2454 else if (k->type == STK_MEM_END) {
2455 if (k->zid == node->group) {
2456 node->end = (int )(k->u.mem.pstr - str);
2457 *kp = k;
2458 return 0;
2459 }
2460 }
2461 k++;
2462 }
2463
2464 return 1; /* 1: root node ending. */
2465 }
2466 #endif
2467
2468 #ifdef USE_BACKREF_WITH_LEVEL
mem_is_in_memp(int mem,int num,MemNumType * memp)2469 static int mem_is_in_memp(int mem, int num, MemNumType* memp)
2470 {
2471 int i;
2472
2473 for (i = 0; i < num; i++) {
2474 if (mem == (int )memp[i]) return 1;
2475 }
2476 return 0;
2477 }
2478
2479 static int
backref_match_at_nested_level(regex_t * reg,StackType * top,StackType * stk_base,int ignore_case,int case_fold_flag,int nest,int mem_num,MemNumType * memp,UChar ** s,const UChar * send)2480 backref_match_at_nested_level(regex_t* reg,
2481 StackType* top, StackType* stk_base,
2482 int ignore_case, int case_fold_flag,
2483 int nest, int mem_num, MemNumType* memp,
2484 UChar** s, const UChar* send)
2485 {
2486 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
2487 int level;
2488 StackType* k;
2489
2490 level = 0;
2491 k = top;
2492 k--;
2493 while (k >= stk_base) {
2494 if (k->type == STK_CALL_FRAME) {
2495 level--;
2496 }
2497 else if (k->type == STK_RETURN) {
2498 level++;
2499 }
2500 else if (level == nest) {
2501 if (k->type == STK_MEM_START) {
2502 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2503 pstart = k->u.mem.pstr;
2504 if (IS_NOT_NULL(pend)) {
2505 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
2506 p = pstart;
2507 ss = *s;
2508
2509 if (ignore_case != 0) {
2510 if (string_cmp_ic(reg->enc, case_fold_flag,
2511 pstart, &ss, (int )(pend - pstart)) == 0)
2512 return 0; /* or goto next_mem; */
2513 }
2514 else {
2515 while (p < pend) {
2516 if (*p++ != *ss++) return 0; /* or goto next_mem; */
2517 }
2518 }
2519
2520 *s = ss;
2521 return 1;
2522 }
2523 }
2524 }
2525 else if (k->type == STK_MEM_END) {
2526 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2527 pend = k->u.mem.pstr;
2528 }
2529 }
2530 }
2531 k--;
2532 }
2533
2534 return 0;
2535 }
2536
2537 static int
backref_check_at_nested_level(regex_t * reg,StackType * top,StackType * stk_base,int nest,int mem_num,MemNumType * memp)2538 backref_check_at_nested_level(regex_t* reg,
2539 StackType* top, StackType* stk_base,
2540 int nest, int mem_num, MemNumType* memp)
2541 {
2542 int level;
2543 StackType* k;
2544
2545 level = 0;
2546 k = top;
2547 k--;
2548 while (k >= stk_base) {
2549 if (k->type == STK_CALL_FRAME) {
2550 level--;
2551 }
2552 else if (k->type == STK_RETURN) {
2553 level++;
2554 }
2555 else if (level == nest) {
2556 if (k->type == STK_MEM_END) {
2557 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2558 return 1;
2559 }
2560 }
2561 }
2562 k--;
2563 }
2564
2565 return 0;
2566 }
2567 #endif /* USE_BACKREF_WITH_LEVEL */
2568
2569 static int SubexpCallMaxNestLevel = DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL;
2570
2571 #ifdef ONIG_DEBUG_STATISTICS
2572
2573 #ifdef USE_TIMEOFDAY
2574
2575 static struct timeval ts, te;
2576 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
2577 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
2578 (((te).tv_sec - (ts).tv_sec)*1000000))
2579 #else
2580
2581 static struct tms ts, te;
2582 #define GETTIME(t) times(&(t))
2583 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
2584
2585 #endif /* USE_TIMEOFDAY */
2586
2587 static int OpCounter[256];
2588 static int OpPrevCounter[256];
2589 static unsigned long OpTime[256];
2590 static int OpCurr = OP_FINISH;
2591 static int OpPrevTarget = OP_FAIL;
2592 static int MaxStackDepth = 0;
2593
2594 #define SOP_IN(opcode) do {\
2595 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2596 OpCurr = opcode;\
2597 OpCounter[opcode]++;\
2598 GETTIME(ts);\
2599 } while(0)
2600
2601 #define SOP_OUT do {\
2602 GETTIME(te);\
2603 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2604 } while(0)
2605
2606 extern void
onig_statistics_init(void)2607 onig_statistics_init(void)
2608 {
2609 int i;
2610 for (i = 0; i < 256; i++) {
2611 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2612 }
2613 MaxStackDepth = 0;
2614 }
2615
2616 extern int
onig_print_statistics(FILE * f)2617 onig_print_statistics(FILE* f)
2618 {
2619 int r;
2620 int i;
2621
2622 r = fprintf(f, " count prev time\n");
2623 if (r < 0) return -1;
2624
2625 for (i = 0; OpInfo[i].opcode >= 0; i++) {
2626 r = fprintf(f, "%8d: %8d: %10ld: %s\n",
2627 OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name);
2628 if (r < 0) return -1;
2629 }
2630 r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2631 if (r < 0) return -1;
2632
2633 return 0;
2634 }
2635
2636 #define STACK_INC do {\
2637 stk++;\
2638 if (stk - stk_base > MaxStackDepth) \
2639 MaxStackDepth = stk - stk_base;\
2640 } while(0)
2641
2642 #else
2643 #define STACK_INC stk++
2644
2645 #define SOP_IN(opcode)
2646 #define SOP_OUT
2647 #endif
2648
2649
2650 /* matching region of POSIX API */
2651 typedef int regoff_t;
2652
2653 typedef struct {
2654 regoff_t rm_so;
2655 regoff_t rm_eo;
2656 } posix_regmatch_t;
2657
2658
2659
2660 #ifdef USE_THREADED_CODE
2661
2662 #define BYTECODE_INTERPRETER_START GOTO_OP;
2663 #define BYTECODE_INTERPRETER_END
2664 #define CASE_OP(x) L_##x: SOP_IN(OP_##x); MATCH_DEBUG_OUT(0)
2665 #define DEFAULT_OP /* L_DEFAULT: */
2666 #define NEXT_OP JUMP_OP
2667 #define JUMP_OP GOTO_OP
2668 #ifdef USE_DIRECT_THREADED_CODE
2669 #define GOTO_OP goto *(p->opaddr)
2670 #else
2671 #define GOTO_OP goto *opcode_to_label[p->opcode]
2672 #endif
2673 #define BREAK_OP /* Nothing */
2674
2675 #else
2676
2677 #define BYTECODE_INTERPRETER_START \
2678 while (1) {\
2679 MATCH_DEBUG_OUT(0)\
2680 switch (p->opcode) {
2681 #define BYTECODE_INTERPRETER_END } }
2682 #define CASE_OP(x) case OP_##x: SOP_IN(OP_##x);
2683 #define DEFAULT_OP default:
2684 #define NEXT_OP break
2685 #define JUMP_OP GOTO_OP
2686 #define GOTO_OP continue; break
2687 #define BREAK_OP break
2688
2689 #endif /* USE_THREADED_CODE */
2690
2691 #define INC_OP p++
2692 #define JUMP_OUT_WITH_SPREV_SET SOP_OUT; NEXT_OP
2693 #define JUMP_OUT SOP_OUT; JUMP_OP
2694 #define BREAK_OUT SOP_OUT; BREAK_OP
2695 #define CHECK_INTERRUPT_JUMP_OUT SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP
2696
2697
2698 #ifdef ONIG_DEBUG_MATCH
2699 #define MATCH_DEBUG_OUT(offset) do {\
2700 Operation *xp;\
2701 UChar *q, *bp, buf[50];\
2702 int len, spos;\
2703 spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\
2704 xp = p - (offset);\
2705 fprintf(DBGFP, "%7u: %7ld: %4d> \"",\
2706 counter, GET_STACK_INDEX(stk), spos);\
2707 counter++;\
2708 bp = buf;\
2709 if (IS_NOT_NULL(s)) {\
2710 for (i = 0, q = s; i < 7 && q < end; i++) {\
2711 len = enclen(encode, q);\
2712 while (len-- > 0) *bp++ = *q++;\
2713 }\
2714 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }\
2715 else { xmemcpy(bp, "\"", 1); bp += 1; }\
2716 }\
2717 else {\
2718 xmemcpy(bp, "\"", 1); bp += 1;\
2719 }\
2720 *bp = 0;\
2721 fputs((char* )buf, DBGFP);\
2722 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', DBGFP);\
2723 if (xp == FinishCode)\
2724 fprintf(DBGFP, "----: finish");\
2725 else {\
2726 int index;\
2727 enum OpCode zopcode;\
2728 Operation* addr;\
2729 index = (int )(xp - reg->ops);\
2730 fprintf(DBGFP, "%4d: ", index);\
2731 print_compiled_byte_code(DBGFP, reg, index, reg->ops, encode); \
2732 zopcode = GET_OPCODE(reg, index);\
2733 if (zopcode == OP_RETURN) {\
2734 GET_STACK_RETURN_CALL(stkp, addr);\
2735 fprintf(DBGFP, " f:%ld -> %d", \
2736 GET_STACK_INDEX(stkp), (int )(addr - reg->ops));\
2737 }\
2738 }\
2739 fprintf(DBGFP, "\n");\
2740 } while(0);
2741 #else
2742 #define MATCH_DEBUG_OUT(offset)
2743 #endif
2744
2745 #define MATCH_AT_ERROR_RETURN(err_code) do {\
2746 best_len = err_code; goto match_at_end;\
2747 } while(0)
2748
2749 #define MATCH_COUNTER_OUT(title) do {\
2750 int i;\
2751 fprintf(DBGFP, "%s (%ld): retry limit: %8lu, subexp_call: %8lu\n", (title), (sstart - str), retry_in_match_counter, msa->subexp_call_in_search_counter); \
2752 fprintf(DBGFP, " ");\
2753 for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) {\
2754 fprintf(DBGFP, " %6lu", subexp_call_counters[i]);\
2755 }\
2756 fprintf(DBGFP, "\n");\
2757 fflush(DBGFP);\
2758 } while (0)
2759
2760
2761 /* match data(str - end) from position (sstart). */
2762 static int
match_at(regex_t * reg,const UChar * str,const UChar * end,const UChar * in_right_range,const UChar * sstart,MatchArg * msa)2763 match_at(regex_t* reg, const UChar* str, const UChar* end,
2764 const UChar* in_right_range, const UChar* sstart,
2765 MatchArg* msa)
2766 {
2767
2768 #if defined(USE_DIRECT_THREADED_CODE)
2769 static Operation FinishCode[] = { { .opaddr=&&L_FINISH } };
2770 #else
2771 static Operation FinishCode[] = { { OP_FINISH } };
2772 #endif
2773
2774 #ifdef USE_THREADED_CODE
2775 static const void *opcode_to_label[] = {
2776 &&L_FINISH,
2777 &&L_END,
2778 &&L_STR_1,
2779 &&L_STR_2,
2780 &&L_STR_3,
2781 &&L_STR_4,
2782 &&L_STR_5,
2783 &&L_STR_N,
2784 &&L_STR_MB2N1,
2785 &&L_STR_MB2N2,
2786 &&L_STR_MB2N3,
2787 &&L_STR_MB2N,
2788 &&L_STR_MB3N,
2789 &&L_STR_MBN,
2790 &&L_CCLASS,
2791 &&L_CCLASS_MB,
2792 &&L_CCLASS_MIX,
2793 &&L_CCLASS_NOT,
2794 &&L_CCLASS_MB_NOT,
2795 &&L_CCLASS_MIX_NOT,
2796 &&L_ANYCHAR,
2797 &&L_ANYCHAR_ML,
2798 &&L_ANYCHAR_STAR,
2799 &&L_ANYCHAR_ML_STAR,
2800 &&L_ANYCHAR_STAR_PEEK_NEXT,
2801 &&L_ANYCHAR_ML_STAR_PEEK_NEXT,
2802 &&L_WORD,
2803 &&L_WORD_ASCII,
2804 &&L_NO_WORD,
2805 &&L_NO_WORD_ASCII,
2806 &&L_WORD_BOUNDARY,
2807 &&L_NO_WORD_BOUNDARY,
2808 &&L_WORD_BEGIN,
2809 &&L_WORD_END,
2810 &&L_TEXT_SEGMENT_BOUNDARY,
2811 &&L_BEGIN_BUF,
2812 &&L_END_BUF,
2813 &&L_BEGIN_LINE,
2814 &&L_END_LINE,
2815 &&L_SEMI_END_BUF,
2816 &&L_CHECK_POSITION,
2817 &&L_BACKREF1,
2818 &&L_BACKREF2,
2819 &&L_BACKREF_N,
2820 &&L_BACKREF_N_IC,
2821 &&L_BACKREF_MULTI,
2822 &&L_BACKREF_MULTI_IC,
2823 #ifdef USE_BACKREF_WITH_LEVEL
2824 &&L_BACKREF_WITH_LEVEL,
2825 &&L_BACKREF_WITH_LEVEL_IC,
2826 #endif
2827 &&L_BACKREF_CHECK,
2828 #ifdef USE_BACKREF_WITH_LEVEL
2829 &&L_BACKREF_CHECK_WITH_LEVEL,
2830 #endif
2831 &&L_MEM_START,
2832 &&L_MEM_START_PUSH,
2833 &&L_MEM_END_PUSH,
2834 #ifdef USE_CALL
2835 &&L_MEM_END_PUSH_REC,
2836 #endif
2837 &&L_MEM_END,
2838 #ifdef USE_CALL
2839 &&L_MEM_END_REC,
2840 #endif
2841 &&L_FAIL,
2842 &&L_JUMP,
2843 &&L_PUSH,
2844 &&L_PUSH_SUPER,
2845 &&L_POP,
2846 &&L_POP_TO_MARK,
2847 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2848 &&L_PUSH_OR_JUMP_EXACT1,
2849 #endif
2850 &&L_PUSH_IF_PEEK_NEXT,
2851 &&L_REPEAT,
2852 &&L_REPEAT_NG,
2853 &&L_REPEAT_INC,
2854 &&L_REPEAT_INC_NG,
2855 &&L_EMPTY_CHECK_START,
2856 &&L_EMPTY_CHECK_END,
2857 &&L_EMPTY_CHECK_END_MEMST,
2858 #ifdef USE_CALL
2859 &&L_EMPTY_CHECK_END_MEMST_PUSH,
2860 #endif
2861 &&L_MOVE,
2862 &&L_STEP_BACK_START,
2863 &&L_STEP_BACK_NEXT,
2864 &&L_CUT_TO_MARK,
2865 &&L_MARK,
2866 &&L_SAVE_VAL,
2867 &&L_UPDATE_VAR,
2868 #ifdef USE_CALL
2869 &&L_CALL,
2870 &&L_RETURN,
2871 #endif
2872 #ifdef USE_CALLOUT
2873 &&L_CALLOUT_CONTENTS,
2874 &&L_CALLOUT_NAME,
2875 #endif
2876 };
2877 #endif
2878
2879 int i, n, num_mem, best_len, pop_level;
2880 LengthType tlen, tlen2;
2881 MemNumType mem;
2882 RelAddrType addr;
2883 UChar *s, *ps;
2884 UChar *right_range;
2885 int is_alloca;
2886 char *alloc_base;
2887 StackType *stk_base, *stk, *stk_end;
2888 StackType *stkp; /* used as any purpose. */
2889 StkPtrType *mem_start_stk, *mem_end_stk;
2890 UChar* keep;
2891
2892 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2893 StackIndex *repeat_stk;
2894 StackIndex *empty_check_stk;
2895 #endif
2896 #ifdef USE_RETRY_LIMIT
2897 unsigned long retry_limit_in_match;
2898 unsigned long retry_in_match_counter;
2899 #endif
2900 #ifdef USE_CALLOUT
2901 int of;
2902 #endif
2903 #ifdef ONIG_DEBUG_MATCH_COUNTER
2904 #define MAX_SUBEXP_CALL_COUNTERS 9
2905 unsigned long subexp_call_counters[MAX_SUBEXP_CALL_COUNTERS];
2906 #endif
2907
2908 Operation* p = reg->ops;
2909 OnigOptionType option = reg->options;
2910 OnigEncoding encode = reg->enc;
2911 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2912
2913 #ifdef USE_CALL
2914 unsigned long subexp_call_nest_counter = 0;
2915 #endif
2916
2917 #ifdef ONIG_DEBUG_MATCH
2918 static unsigned int counter = 1;
2919 #endif
2920
2921 #ifdef ONIG_DEBUG_MATCH_COUNTER
2922 for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) {
2923 subexp_call_counters[i] = 0;
2924 }
2925 #endif
2926
2927 #ifdef USE_DIRECT_THREADED_CODE
2928 if (IS_NULL(msa)) {
2929 for (i = 0; i < reg->ops_used; i++) {
2930 const void* addr;
2931 addr = opcode_to_label[reg->ocs[i]];
2932 p->opaddr = addr;
2933 p++;
2934 }
2935 return ONIG_NORMAL;
2936 }
2937 #endif
2938
2939 #ifdef USE_CALLOUT
2940 msa->mp->match_at_call_counter++;
2941 #endif
2942
2943 #ifdef USE_RETRY_LIMIT
2944 retry_limit_in_match = msa->retry_limit_in_match;
2945 if (msa->retry_limit_in_search != 0) {
2946 unsigned long rem = msa->retry_limit_in_search
2947 - msa->retry_limit_in_search_counter;
2948 if (rem < retry_limit_in_match)
2949 retry_limit_in_match = rem;
2950 }
2951 #endif
2952
2953 pop_level = reg->stack_pop_level;
2954 num_mem = reg->num_mem;
2955 STACK_INIT(INIT_MATCH_STACK_SIZE);
2956 UPDATE_FOR_STACK_REALLOC;
2957 for (i = 1; i <= num_mem; i++) {
2958 mem_start_stk[i].i = mem_end_stk[i].i = INVALID_STACK_INDEX;
2959 }
2960
2961 #ifdef ONIG_DEBUG_MATCH
2962 fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p\n", str, end, sstart);
2963 fprintf(DBGFP, "size: %d, start offset: %d\n",
2964 (int )(end - str), (int )(sstart - str));
2965 #endif
2966
2967 best_len = ONIG_MISMATCH;
2968 keep = s = (UChar* )sstart;
2969 STACK_PUSH_BOTTOM(STK_ALT, FinishCode); /* bottom stack */
2970 INIT_RIGHT_RANGE;
2971
2972 #ifdef USE_RETRY_LIMIT
2973 retry_in_match_counter = 0;
2974 #endif
2975
2976 BYTECODE_INTERPRETER_START {
2977 CASE_OP(END)
2978 n = (int )(s - sstart);
2979 if (n > best_len) {
2980 OnigRegion* region;
2981 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2982 if (OPTON_FIND_LONGEST(option)) {
2983 if (n > msa->best_len) {
2984 msa->best_len = n;
2985 msa->best_s = (UChar* )sstart;
2986 goto set_region;
2987 }
2988 else
2989 goto end_best_len;
2990 }
2991 #endif
2992 best_len = n;
2993
2994 set_region:
2995 region = msa->region;
2996 if (region) {
2997 if (keep > s) keep = s;
2998
2999 #ifdef USE_POSIX_API
3000 if (OPTON_POSIX_REGION(msa->options)) {
3001 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
3002
3003 rmt[0].rm_so = (regoff_t )(keep - str);
3004 rmt[0].rm_eo = (regoff_t )(s - str);
3005 for (i = 1; i <= num_mem; i++) {
3006 if (mem_end_stk[i].i != INVALID_STACK_INDEX) {
3007 rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str);
3008 rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i) - str);
3009 }
3010 else {
3011 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
3012 }
3013 }
3014 }
3015 else {
3016 #endif /* USE_POSIX_API */
3017 region->beg[0] = (int )(keep - str);
3018 region->end[0] = (int )(s - str);
3019 for (i = 1; i <= num_mem; i++) {
3020 if (mem_end_stk[i].i != INVALID_STACK_INDEX) {
3021 region->beg[i] = (int )(STACK_MEM_START(reg, i) - str);
3022 region->end[i] = (int )(STACK_MEM_END(reg, i) - str);
3023 }
3024 else {
3025 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
3026 }
3027 }
3028
3029 #ifdef USE_CAPTURE_HISTORY
3030 if (reg->capture_history != 0) {
3031 int r;
3032 OnigCaptureTreeNode* node;
3033
3034 if (IS_NULL(region->history_root)) {
3035 region->history_root = node = history_node_new();
3036 CHECK_NULL_RETURN_MEMERR(node);
3037 }
3038 else {
3039 node = region->history_root;
3040 history_tree_clear(node);
3041 }
3042
3043 node->group = 0;
3044 node->beg = (int )(keep - str);
3045 node->end = (int )(s - str);
3046
3047 stkp = stk_base;
3048 r = make_capture_history_tree(region->history_root, &stkp,
3049 stk, (UChar* )str, reg);
3050 if (r < 0) MATCH_AT_ERROR_RETURN(r);
3051 }
3052 #endif /* USE_CAPTURE_HISTORY */
3053 #ifdef USE_POSIX_API
3054 } /* else OPTON_POSIX_REGION() */
3055 #endif
3056 } /* if (region) */
3057 } /* n > best_len */
3058
3059 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3060 end_best_len:
3061 #endif
3062 SOP_OUT;
3063
3064 if (OPTON_FIND_CONDITION(option)) {
3065 if (OPTON_FIND_NOT_EMPTY(option) && s == sstart) {
3066 best_len = ONIG_MISMATCH;
3067 goto fail; /* for retry */
3068 }
3069 if (OPTON_FIND_LONGEST(option)) {
3070 if (s >= in_right_range && msa->best_s == sstart)
3071 best_len = msa->best_len;
3072 else
3073 goto fail; /* for retry */
3074 }
3075 }
3076
3077 /* default behavior: return first-matching result. */
3078 goto match_at_end;
3079
3080 CASE_OP(STR_1)
3081 DATA_ENSURE(1);
3082 ps = p->exact.s;
3083 if (*ps != *s) goto fail;
3084 s++;
3085 INC_OP;
3086 JUMP_OUT_WITH_SPREV_SET;
3087
3088 CASE_OP(STR_2)
3089 DATA_ENSURE(2);
3090 ps = p->exact.s;
3091 if (*ps != *s) goto fail;
3092 ps++; s++;
3093 if (*ps != *s) goto fail;
3094 s++;
3095 INC_OP;
3096 JUMP_OUT;
3097
3098 CASE_OP(STR_3)
3099 DATA_ENSURE(3);
3100 ps = p->exact.s;
3101 if (*ps != *s) goto fail;
3102 ps++; s++;
3103 if (*ps != *s) goto fail;
3104 ps++; s++;
3105 if (*ps != *s) goto fail;
3106 s++;
3107 INC_OP;
3108 JUMP_OUT;
3109
3110 CASE_OP(STR_4)
3111 DATA_ENSURE(4);
3112 ps = p->exact.s;
3113 if (*ps != *s) goto fail;
3114 ps++; s++;
3115 if (*ps != *s) goto fail;
3116 ps++; s++;
3117 if (*ps != *s) goto fail;
3118 ps++; s++;
3119 if (*ps != *s) goto fail;
3120 s++;
3121 INC_OP;
3122 JUMP_OUT;
3123
3124 CASE_OP(STR_5)
3125 DATA_ENSURE(5);
3126 ps = p->exact.s;
3127 if (*ps != *s) goto fail;
3128 ps++; s++;
3129 if (*ps != *s) goto fail;
3130 ps++; s++;
3131 if (*ps != *s) goto fail;
3132 ps++; s++;
3133 if (*ps != *s) goto fail;
3134 ps++; s++;
3135 if (*ps != *s) goto fail;
3136 s++;
3137 INC_OP;
3138 JUMP_OUT;
3139
3140 CASE_OP(STR_N)
3141 tlen = p->exact_n.n;
3142 DATA_ENSURE(tlen);
3143 ps = p->exact_n.s;
3144 while (tlen-- > 0) {
3145 if (*ps++ != *s++) goto fail;
3146 }
3147 INC_OP;
3148 JUMP_OUT;
3149
3150 CASE_OP(STR_MB2N1)
3151 DATA_ENSURE(2);
3152 ps = p->exact.s;
3153 if (*ps != *s) goto fail;
3154 ps++; s++;
3155 if (*ps != *s) goto fail;
3156 s++;
3157 INC_OP;
3158 JUMP_OUT_WITH_SPREV_SET;
3159
3160 CASE_OP(STR_MB2N2)
3161 DATA_ENSURE(4);
3162 ps = p->exact.s;
3163 if (*ps != *s) goto fail;
3164 ps++; s++;
3165 if (*ps != *s) goto fail;
3166 ps++; s++;
3167 if (*ps != *s) goto fail;
3168 ps++; s++;
3169 if (*ps != *s) goto fail;
3170 s++;
3171 INC_OP;
3172 JUMP_OUT;
3173
3174 CASE_OP(STR_MB2N3)
3175 DATA_ENSURE(6);
3176 ps = p->exact.s;
3177 if (*ps != *s) goto fail;
3178 ps++; s++;
3179 if (*ps != *s) goto fail;
3180 ps++; s++;
3181 if (*ps != *s) goto fail;
3182 ps++; s++;
3183 if (*ps != *s) goto fail;
3184 ps++; s++;
3185 if (*ps != *s) goto fail;
3186 ps++; s++;
3187 if (*ps != *s) goto fail;
3188 ps++; s++;
3189 INC_OP;
3190 JUMP_OUT;
3191
3192 CASE_OP(STR_MB2N)
3193 tlen = p->exact_n.n;
3194 DATA_ENSURE(tlen * 2);
3195 ps = p->exact_n.s;
3196 while (tlen-- > 0) {
3197 if (*ps != *s) goto fail;
3198 ps++; s++;
3199 if (*ps != *s) goto fail;
3200 ps++; s++;
3201 }
3202 INC_OP;
3203 JUMP_OUT;
3204
3205 CASE_OP(STR_MB3N)
3206 tlen = p->exact_n.n;
3207 DATA_ENSURE(tlen * 3);
3208 ps = p->exact_n.s;
3209 while (tlen-- > 0) {
3210 if (*ps != *s) goto fail;
3211 ps++; s++;
3212 if (*ps != *s) goto fail;
3213 ps++; s++;
3214 if (*ps != *s) goto fail;
3215 ps++; s++;
3216 }
3217 INC_OP;
3218 JUMP_OUT;
3219
3220 CASE_OP(STR_MBN)
3221 tlen = p->exact_len_n.len; /* mb byte len */
3222 tlen2 = p->exact_len_n.n; /* number of chars */
3223 tlen2 *= tlen;
3224 DATA_ENSURE(tlen2);
3225 ps = p->exact_len_n.s;
3226 while (tlen2-- > 0) {
3227 if (*ps != *s) goto fail;
3228 ps++; s++;
3229 }
3230 INC_OP;
3231 JUMP_OUT;
3232
3233 CASE_OP(CCLASS)
3234 DATA_ENSURE(1);
3235 if (BITSET_AT(p->cclass.bsp, *s) == 0) goto fail;
3236 if (ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
3237 s++;
3238 INC_OP;
3239 JUMP_OUT_WITH_SPREV_SET;
3240
3241 CASE_OP(CCLASS_MB)
3242 DATA_ENSURE(1);
3243 if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
3244
3245 cclass_mb:
3246 {
3247 OnigCodePoint code;
3248 UChar *ss;
3249 int mb_len;
3250
3251 mb_len = enclen(encode, s);
3252 DATA_ENSURE(mb_len);
3253 ss = s;
3254 s += mb_len;
3255 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3256 if (! onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
3257 }
3258 INC_OP;
3259 JUMP_OUT_WITH_SPREV_SET;
3260
3261 CASE_OP(CCLASS_MIX)
3262 DATA_ENSURE(1);
3263 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
3264 goto cclass_mb;
3265 }
3266 else {
3267 if (BITSET_AT(p->cclass_mix.bsp, *s) == 0)
3268 goto fail;
3269
3270 s++;
3271 }
3272 INC_OP;
3273 JUMP_OUT_WITH_SPREV_SET;
3274
3275 CASE_OP(CCLASS_NOT)
3276 DATA_ENSURE(1);
3277 if (BITSET_AT(p->cclass.bsp, *s) != 0) goto fail;
3278 s += enclen(encode, s);
3279 INC_OP;
3280 JUMP_OUT_WITH_SPREV_SET;
3281
3282 CASE_OP(CCLASS_MB_NOT)
3283 DATA_ENSURE(1);
3284 if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
3285 s++;
3286 goto cc_mb_not_success;
3287 }
3288
3289 cclass_mb_not:
3290 {
3291 OnigCodePoint code;
3292 UChar *ss;
3293 int mb_len = enclen(encode, s);
3294
3295 if (! DATA_ENSURE_CHECK(mb_len)) {
3296 DATA_ENSURE(1);
3297 s = (UChar* )end;
3298 goto cc_mb_not_success;
3299 }
3300
3301 ss = s;
3302 s += mb_len;
3303 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3304 if (onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
3305 }
3306
3307 cc_mb_not_success:
3308 INC_OP;
3309 JUMP_OUT_WITH_SPREV_SET;
3310
3311 CASE_OP(CCLASS_MIX_NOT)
3312 DATA_ENSURE(1);
3313 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
3314 goto cclass_mb_not;
3315 }
3316 else {
3317 if (BITSET_AT(p->cclass_mix.bsp, *s) != 0)
3318 goto fail;
3319
3320 s++;
3321 }
3322 INC_OP;
3323 JUMP_OUT_WITH_SPREV_SET;
3324
3325 CASE_OP(ANYCHAR)
3326 DATA_ENSURE(1);
3327 n = enclen(encode, s);
3328 DATA_ENSURE(n);
3329 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3330 s += n;
3331 INC_OP;
3332 JUMP_OUT_WITH_SPREV_SET;
3333
3334 CASE_OP(ANYCHAR_ML)
3335 DATA_ENSURE(1);
3336 n = enclen(encode, s);
3337 DATA_ENSURE(n);
3338 s += n;
3339 INC_OP;
3340 JUMP_OUT_WITH_SPREV_SET;
3341
3342 CASE_OP(ANYCHAR_STAR)
3343 INC_OP;
3344 while (DATA_ENSURE_CHECK1) {
3345 STACK_PUSH_ALT(p, s);
3346 n = enclen(encode, s);
3347 DATA_ENSURE(n);
3348 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3349 s += n;
3350 }
3351 JUMP_OUT;
3352
3353 CASE_OP(ANYCHAR_ML_STAR)
3354 INC_OP;
3355 while (DATA_ENSURE_CHECK1) {
3356 STACK_PUSH_ALT(p, s);
3357 n = enclen(encode, s);
3358 if (n > 1) {
3359 DATA_ENSURE(n);
3360 s += n;
3361 }
3362 else {
3363 s++;
3364 }
3365 }
3366 JUMP_OUT;
3367
3368 CASE_OP(ANYCHAR_STAR_PEEK_NEXT)
3369 {
3370 UChar c;
3371
3372 c = p->anychar_star_peek_next.c;
3373 INC_OP;
3374 while (DATA_ENSURE_CHECK1) {
3375 if (c == *s) {
3376 STACK_PUSH_ALT(p, s);
3377 }
3378 n = enclen(encode, s);
3379 DATA_ENSURE(n);
3380 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3381 s += n;
3382 }
3383 }
3384 JUMP_OUT;
3385
3386 CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT)
3387 {
3388 UChar c;
3389
3390 c = p->anychar_star_peek_next.c;
3391 INC_OP;
3392 while (DATA_ENSURE_CHECK1) {
3393 if (c == *s) {
3394 STACK_PUSH_ALT(p, s);
3395 }
3396 n = enclen(encode, s);
3397 if (n > 1) {
3398 DATA_ENSURE(n);
3399 s += n;
3400 }
3401 else {
3402 s++;
3403 }
3404 }
3405 }
3406 JUMP_OUT;
3407
3408 CASE_OP(WORD)
3409 DATA_ENSURE(1);
3410 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3411 goto fail;
3412
3413 s += enclen(encode, s);
3414 INC_OP;
3415 JUMP_OUT_WITH_SPREV_SET;
3416
3417 CASE_OP(WORD_ASCII)
3418 DATA_ENSURE(1);
3419 if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3420 goto fail;
3421
3422 s += enclen(encode, s);
3423 INC_OP;
3424 JUMP_OUT_WITH_SPREV_SET;
3425
3426 CASE_OP(NO_WORD)
3427 DATA_ENSURE(1);
3428 if (ONIGENC_IS_MBC_WORD(encode, s, end))
3429 goto fail;
3430
3431 s += enclen(encode, s);
3432 INC_OP;
3433 JUMP_OUT_WITH_SPREV_SET;
3434
3435 CASE_OP(NO_WORD_ASCII)
3436 DATA_ENSURE(1);
3437 if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3438 goto fail;
3439
3440 s += enclen(encode, s);
3441 INC_OP;
3442 JUMP_OUT_WITH_SPREV_SET;
3443
3444 CASE_OP(WORD_BOUNDARY)
3445 {
3446 ModeType mode;
3447
3448 mode = p->word_boundary.mode;
3449 if (ON_STR_BEGIN(s)) {
3450 DATA_ENSURE(1);
3451 if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3452 goto fail;
3453 }
3454 else {
3455 UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3456 if (ON_STR_END(s)) {
3457 if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3458 goto fail;
3459 }
3460 else {
3461 if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3462 == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3463 goto fail;
3464 }
3465 }
3466 }
3467 INC_OP;
3468 JUMP_OUT;
3469
3470 CASE_OP(NO_WORD_BOUNDARY)
3471 {
3472 ModeType mode;
3473
3474 mode = p->word_boundary.mode;
3475 if (ON_STR_BEGIN(s)) {
3476 if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3477 goto fail;
3478 }
3479 else {
3480 UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3481 if (ON_STR_END(s)) {
3482 if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3483 goto fail;
3484 }
3485 else {
3486 if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3487 != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3488 goto fail;
3489 }
3490 }
3491 }
3492 INC_OP;
3493 JUMP_OUT;
3494
3495 #ifdef USE_WORD_BEGIN_END
3496 CASE_OP(WORD_BEGIN)
3497 {
3498 ModeType mode;
3499
3500 mode = p->word_boundary.mode;
3501 if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3502 UChar* sprev;
3503 if (ON_STR_BEGIN(s)) {
3504 INC_OP;
3505 JUMP_OUT;
3506 }
3507 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3508 if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3509 INC_OP;
3510 JUMP_OUT;
3511 }
3512 }
3513 }
3514 goto fail;
3515
3516 CASE_OP(WORD_END)
3517 {
3518 ModeType mode;
3519
3520 mode = p->word_boundary.mode;
3521 if (! ON_STR_BEGIN(s)) {
3522 UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3523 if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3524 if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3525 INC_OP;
3526 JUMP_OUT;
3527 }
3528 }
3529 }
3530 }
3531 goto fail;
3532 #endif
3533
3534 CASE_OP(TEXT_SEGMENT_BOUNDARY)
3535 {
3536 int is_break;
3537 UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3538
3539 switch (p->text_segment_boundary.type) {
3540 case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
3541 is_break = onigenc_egcb_is_break_position(encode, s, sprev, str, end);
3542 break;
3543 #ifdef USE_UNICODE_WORD_BREAK
3544 case WORD_BOUNDARY:
3545 is_break = onigenc_wb_is_break_position(encode, s, sprev, str, end);
3546 break;
3547 #endif
3548 default:
3549 MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
3550 break;
3551 }
3552
3553 if (p->text_segment_boundary.not != 0)
3554 is_break = ! is_break;
3555
3556 if (is_break != 0) {
3557 INC_OP;
3558 JUMP_OUT;
3559 }
3560 else {
3561 goto fail;
3562 }
3563 }
3564
3565 CASE_OP(BEGIN_BUF)
3566 if (! ON_STR_BEGIN(s)) goto fail;
3567 if (OPTON_NOTBOL(msa->options)) goto fail;
3568 if (OPTON_NOT_BEGIN_STRING(msa->options)) goto fail;
3569
3570 INC_OP;
3571 JUMP_OUT;
3572
3573 CASE_OP(END_BUF)
3574 if (! ON_STR_END(s)) goto fail;
3575 if (OPTON_NOTEOL(msa->options)) goto fail;
3576 if (OPTON_NOT_END_STRING(msa->options)) goto fail;
3577
3578 INC_OP;
3579 JUMP_OUT;
3580
3581 CASE_OP(BEGIN_LINE)
3582 if (ON_STR_BEGIN(s)) {
3583 if (OPTON_NOTBOL(msa->options)) goto fail;
3584 INC_OP;
3585 JUMP_OUT;
3586 }
3587 else if (! ON_STR_END(s)) {
3588 UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3589 if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3590 INC_OP;
3591 JUMP_OUT;
3592 }
3593 }
3594 goto fail;
3595
3596 CASE_OP(END_LINE)
3597 if (ON_STR_END(s)) {
3598 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3599 UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3600 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3601 #endif
3602 if (OPTON_NOTEOL(msa->options)) goto fail;
3603 INC_OP;
3604 JUMP_OUT;
3605 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3606 }
3607 #endif
3608 }
3609 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
3610 INC_OP;
3611 JUMP_OUT;
3612 }
3613 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3614 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3615 INC_OP;
3616 JUMP_OUT;
3617 }
3618 #endif
3619 goto fail;
3620
3621 CASE_OP(SEMI_END_BUF)
3622 if (ON_STR_END(s)) {
3623 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3624 UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3625 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3626 #endif
3627 if (OPTON_NOTEOL(msa->options)) goto fail;
3628 if (OPTON_NOT_END_STRING(msa->options)) goto fail;
3629 INC_OP;
3630 JUMP_OUT;
3631 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3632 }
3633 #endif
3634 }
3635 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
3636 ON_STR_END(s + enclen(encode, s))) {
3637 if (OPTON_NOTEOL(msa->options)) goto fail;
3638 if (OPTON_NOT_END_STRING(msa->options)) goto fail;
3639 INC_OP;
3640 JUMP_OUT;
3641 }
3642 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3643 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3644 UChar* ss = s + enclen(encode, s);
3645 ss += enclen(encode, ss);
3646 if (ON_STR_END(ss)) {
3647 if (OPTON_NOTEOL(msa->options)) goto fail;
3648 if (OPTON_NOT_END_STRING(msa->options)) goto fail;
3649 INC_OP;
3650 JUMP_OUT;
3651 }
3652 }
3653 #endif
3654 goto fail;
3655
3656 CASE_OP(CHECK_POSITION)
3657 switch (p->check_position.type) {
3658 case CHECK_POSITION_SEARCH_START:
3659 if (s != msa->start) goto fail;
3660 if (OPTON_NOT_BEGIN_POSITION(msa->options)) goto fail;
3661 break;
3662 case CHECK_POSITION_CURRENT_RIGHT_RANGE:
3663 if (s != right_range) goto fail;
3664 break;
3665 default:
3666 break;
3667 }
3668 INC_OP;
3669 JUMP_OUT;
3670
3671 CASE_OP(MEM_START_PUSH)
3672 mem = p->memory_start.num;
3673 STACK_PUSH_MEM_START(mem, s);
3674 INC_OP;
3675 JUMP_OUT;
3676
3677 CASE_OP(MEM_START)
3678 mem = p->memory_start.num;
3679 mem_start_stk[mem].s = s;
3680 INC_OP;
3681 JUMP_OUT;
3682
3683 CASE_OP(MEM_END_PUSH)
3684 mem = p->memory_end.num;
3685 STACK_PUSH_MEM_END(mem, s);
3686 INC_OP;
3687 JUMP_OUT;
3688
3689 CASE_OP(MEM_END)
3690 mem = p->memory_end.num;
3691 mem_end_stk[mem].s = s;
3692 INC_OP;
3693 JUMP_OUT;
3694
3695 #ifdef USE_CALL
3696 CASE_OP(MEM_END_PUSH_REC)
3697 {
3698 StackIndex si;
3699
3700 mem = p->memory_end.num;
3701 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3702 si = GET_STACK_INDEX(stkp);
3703 STACK_PUSH_MEM_END(mem, s);
3704 mem_start_stk[mem].i = si;
3705 INC_OP;
3706 JUMP_OUT;
3707 }
3708
3709 CASE_OP(MEM_END_REC)
3710 mem = p->memory_end.num;
3711 mem_end_stk[mem].s = s;
3712 STACK_GET_MEM_START(mem, stkp);
3713
3714 if (MEM_STATUS_AT(reg->push_mem_start, mem))
3715 mem_start_stk[mem].i = GET_STACK_INDEX(stkp);
3716 else
3717 mem_start_stk[mem].s = stkp->u.mem.pstr;
3718
3719 STACK_PUSH_MEM_END_MARK(mem);
3720 INC_OP;
3721 JUMP_OUT;
3722 #endif
3723
3724 CASE_OP(BACKREF1)
3725 mem = 1;
3726 goto backref;
3727
3728 CASE_OP(BACKREF2)
3729 mem = 2;
3730 goto backref;
3731
3732 CASE_OP(BACKREF_N)
3733 mem = p->backref_n.n1;
3734 backref:
3735 {
3736 UChar *pstart, *pend;
3737
3738 if (mem_end_stk[mem].i == INVALID_STACK_INDEX) goto fail;
3739 if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail;
3740
3741 pstart = STACK_MEM_START(reg, mem);
3742 pend = STACK_MEM_END(reg, mem);
3743 n = (int )(pend - pstart);
3744 if (n != 0) {
3745 DATA_ENSURE(n);
3746 STRING_CMP(s, pstart, n);
3747 }
3748 }
3749 INC_OP;
3750 JUMP_OUT;
3751
3752 CASE_OP(BACKREF_N_IC)
3753 mem = p->backref_n.n1;
3754 {
3755 UChar *pstart, *pend;
3756
3757 if (mem_end_stk[mem].i == INVALID_STACK_INDEX) goto fail;
3758 if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail;
3759
3760 pstart = STACK_MEM_START(reg, mem);
3761 pend = STACK_MEM_END(reg, mem);
3762 n = (int )(pend - pstart);
3763 if (n != 0) {
3764 DATA_ENSURE(n);
3765 STRING_CMP_IC(case_fold_flag, pstart, &s, n);
3766 }
3767 }
3768 INC_OP;
3769 JUMP_OUT;
3770
3771 CASE_OP(BACKREF_MULTI)
3772 {
3773 int is_fail;
3774 UChar *pstart, *pend, *swork;
3775
3776 tlen = p->backref_general.num;
3777 for (i = 0; i < tlen; i++) {
3778 mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
3779
3780 if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue;
3781 if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
3782
3783 pstart = STACK_MEM_START(reg, mem);
3784 pend = STACK_MEM_END(reg, mem);
3785 n = (int )(pend - pstart);
3786 if (n != 0) {
3787 DATA_ENSURE(n);
3788 swork = s;
3789 STRING_CMP_VALUE(swork, pstart, n, is_fail);
3790 if (is_fail) continue;
3791 s = swork;
3792 }
3793 break; /* success */
3794 }
3795 if (i == tlen) goto fail;
3796 }
3797 INC_OP;
3798 JUMP_OUT;
3799
3800 CASE_OP(BACKREF_MULTI_IC)
3801 {
3802 int is_fail;
3803 UChar *pstart, *pend, *swork;
3804
3805 tlen = p->backref_general.num;
3806 for (i = 0; i < tlen; i++) {
3807 mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
3808
3809 if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue;
3810 if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
3811
3812 pstart = STACK_MEM_START(reg, mem);
3813 pend = STACK_MEM_END(reg, mem);
3814 n = (int )(pend - pstart);
3815 if (n != 0) {
3816 DATA_ENSURE(n);
3817 swork = s;
3818 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
3819 if (is_fail) continue;
3820 s = swork;
3821 }
3822 break; /* success */
3823 }
3824 if (i == tlen) goto fail;
3825 }
3826 INC_OP;
3827 JUMP_OUT;
3828
3829 #ifdef USE_BACKREF_WITH_LEVEL
3830 CASE_OP(BACKREF_WITH_LEVEL_IC)
3831 n = 1; /* ignore case */
3832 goto backref_with_level;
3833 CASE_OP(BACKREF_WITH_LEVEL)
3834 {
3835 int level;
3836 MemNumType* mems;
3837
3838 n = 0;
3839 backref_with_level:
3840 level = p->backref_general.nest_level;
3841 tlen = p->backref_general.num;
3842 mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3843
3844 if (! backref_match_at_nested_level(reg, stk, stk_base, n,
3845 case_fold_flag, level, (int )tlen, mems, &s, end)) {
3846 goto fail;
3847 }
3848 }
3849 INC_OP;
3850 JUMP_OUT;
3851 #endif
3852
3853 CASE_OP(BACKREF_CHECK)
3854 {
3855 MemNumType* mems;
3856
3857 tlen = p->backref_general.num;
3858 mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3859
3860 for (i = 0; i < tlen; i++) {
3861 mem = mems[i];
3862 if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue;
3863 if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
3864 break; /* success */
3865 }
3866 if (i == tlen) goto fail;
3867 }
3868 INC_OP;
3869 JUMP_OUT;
3870
3871 #ifdef USE_BACKREF_WITH_LEVEL
3872 CASE_OP(BACKREF_CHECK_WITH_LEVEL)
3873 {
3874 LengthType level;
3875 MemNumType* mems;
3876
3877 level = p->backref_general.nest_level;
3878 tlen = p->backref_general.num;
3879 mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3880
3881 if (backref_check_at_nested_level(reg, stk, stk_base,
3882 (int )level, (int )tlen, mems) == 0)
3883 goto fail;
3884 }
3885 INC_OP;
3886 JUMP_OUT;
3887 #endif
3888
3889 CASE_OP(EMPTY_CHECK_START)
3890 mem = p->empty_check_start.mem; /* mem: null check id */
3891 STACK_PUSH_EMPTY_CHECK_START(mem, s);
3892 INC_OP;
3893 JUMP_OUT;
3894
3895 CASE_OP(EMPTY_CHECK_END)
3896 {
3897 int is_empty;
3898
3899 mem = p->empty_check_end.mem; /* mem: null check id */
3900 STACK_EMPTY_CHECK(is_empty, mem, s);
3901 INC_OP;
3902 if (is_empty) {
3903 #ifdef ONIG_DEBUG_MATCH
3904 fprintf(DBGFP, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s);
3905 #endif
3906 empty_check_found:
3907 /* empty loop founded, skip next instruction */
3908 #if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
3909 switch (p->opcode) {
3910 case OP_JUMP:
3911 case OP_PUSH:
3912 case OP_REPEAT_INC:
3913 case OP_REPEAT_INC_NG:
3914 INC_OP;
3915 break;
3916 default:
3917 MATCH_AT_ERROR_RETURN(ONIGERR_UNEXPECTED_BYTECODE);
3918 break;
3919 }
3920 #else
3921 INC_OP;
3922 #endif
3923 }
3924 }
3925 JUMP_OUT;
3926
3927 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
3928 CASE_OP(EMPTY_CHECK_END_MEMST)
3929 {
3930 int is_empty;
3931
3932 mem = p->empty_check_end.mem; /* mem: null check id */
3933 STACK_EMPTY_CHECK_MEM(is_empty, mem, s, reg);
3934 INC_OP;
3935 if (is_empty) {
3936 #ifdef ONIG_DEBUG_MATCH
3937 fprintf(DBGFP, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s);
3938 #endif
3939 if (is_empty == -1) goto fail;
3940 goto empty_check_found;
3941 }
3942 }
3943 JUMP_OUT;
3944 #endif
3945
3946 #ifdef USE_CALL
3947 CASE_OP(EMPTY_CHECK_END_MEMST_PUSH)
3948 {
3949 int is_empty;
3950
3951 mem = p->empty_check_end.mem; /* mem: null check id */
3952 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
3953 STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg);
3954 #else
3955 STACK_EMPTY_CHECK_REC(is_empty, mem, s);
3956 #endif
3957 INC_OP;
3958 if (is_empty) {
3959 #ifdef ONIG_DEBUG_MATCH
3960 fprintf(DBGFP, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n",
3961 (int )mem, s);
3962 #endif
3963 if (is_empty == -1) goto fail;
3964 goto empty_check_found;
3965 }
3966 else {
3967 STACK_PUSH_EMPTY_CHECK_END(mem);
3968 }
3969 }
3970 JUMP_OUT;
3971 #endif
3972
3973 CASE_OP(JUMP)
3974 addr = p->jump.addr;
3975 p += addr;
3976 CHECK_INTERRUPT_JUMP_OUT;
3977
3978 CASE_OP(PUSH)
3979 addr = p->push.addr;
3980 STACK_PUSH_ALT(p + addr, s);
3981 INC_OP;
3982 JUMP_OUT;
3983
3984 CASE_OP(PUSH_SUPER)
3985 addr = p->push.addr;
3986 STACK_PUSH_SUPER_ALT(p + addr, s);
3987 INC_OP;
3988 JUMP_OUT;
3989
3990 CASE_OP(POP)
3991 STACK_POP_ONE;
3992 INC_OP;
3993 JUMP_OUT;
3994
3995 CASE_OP(POP_TO_MARK)
3996 STACK_POP_TO_MARK(p->pop_to_mark.id);
3997 INC_OP;
3998 JUMP_OUT;
3999
4000 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4001 CASE_OP(PUSH_OR_JUMP_EXACT1)
4002 {
4003 UChar c;
4004
4005 addr = p->push_or_jump_exact1.addr;
4006 c = p->push_or_jump_exact1.c;
4007 if (DATA_ENSURE_CHECK1 && c == *s) {
4008 STACK_PUSH_ALT(p + addr, s);
4009 INC_OP;
4010 JUMP_OUT;
4011 }
4012 }
4013 p += addr;
4014 JUMP_OUT;
4015 #endif
4016
4017 CASE_OP(PUSH_IF_PEEK_NEXT)
4018 {
4019 UChar c;
4020
4021 addr = p->push_if_peek_next.addr;
4022 c = p->push_if_peek_next.c;
4023 if (DATA_ENSURE_CHECK1 && c == *s) {
4024 STACK_PUSH_ALT(p + addr, s);
4025 }
4026 }
4027 INC_OP;
4028 JUMP_OUT;
4029
4030 CASE_OP(REPEAT)
4031 mem = p->repeat.id; /* mem: OP_REPEAT ID */
4032 addr = p->repeat.addr;
4033
4034 STACK_PUSH_REPEAT_INC(mem, 0);
4035 if (reg->repeat_range[mem].lower == 0) {
4036 STACK_PUSH_ALT(p + addr, s);
4037 }
4038 INC_OP;
4039 JUMP_OUT;
4040
4041 CASE_OP(REPEAT_NG)
4042 mem = p->repeat.id; /* mem: OP_REPEAT ID */
4043 addr = p->repeat.addr;
4044
4045 STACK_PUSH_REPEAT_INC(mem, 0);
4046 if (reg->repeat_range[mem].lower == 0) {
4047 STACK_PUSH_ALT(p + 1, s);
4048 p += addr;
4049 }
4050 else
4051 INC_OP;
4052 JUMP_OUT;
4053
4054 CASE_OP(REPEAT_INC)
4055 mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
4056 STACK_GET_REPEAT_COUNT(mem, n);
4057 n++;
4058 if (n >= reg->repeat_range[mem].upper) {
4059 /* end of repeat. Nothing to do. */
4060 INC_OP;
4061 }
4062 else if (n >= reg->repeat_range[mem].lower) {
4063 INC_OP;
4064 STACK_PUSH_ALT(p, s);
4065 p = reg->repeat_range[mem].u.pcode;
4066 }
4067 else {
4068 p = reg->repeat_range[mem].u.pcode;
4069 }
4070 STACK_PUSH_REPEAT_INC(mem, n);
4071 CHECK_INTERRUPT_JUMP_OUT;
4072
4073 CASE_OP(REPEAT_INC_NG)
4074 mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
4075 STACK_GET_REPEAT_COUNT(mem, n);
4076 n++;
4077 STACK_PUSH_REPEAT_INC(mem, n);
4078 if (n == reg->repeat_range[mem].upper) {
4079 INC_OP;
4080 }
4081 else {
4082 if (n >= reg->repeat_range[mem].lower) {
4083 STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s);
4084 INC_OP;
4085 }
4086 else {
4087 p = reg->repeat_range[mem].u.pcode;
4088 }
4089 }
4090 CHECK_INTERRUPT_JUMP_OUT;
4091
4092 #ifdef USE_CALL
4093 CASE_OP(CALL)
4094 if (subexp_call_nest_counter == SubexpCallMaxNestLevel)
4095 goto fail;
4096 subexp_call_nest_counter++;
4097
4098 if (SubexpCallLimitInSearch != 0) {
4099 msa->subexp_call_in_search_counter++;
4100 #ifdef ONIG_DEBUG_MATCH_COUNTER
4101 if (p->call.called_mem < MAX_SUBEXP_CALL_COUNTERS)
4102 subexp_call_counters[p->call.called_mem]++;
4103 if (msa->subexp_call_in_search_counter % 1000 == 0)
4104 MATCH_COUNTER_OUT("CALL");
4105 #endif
4106 if (msa->subexp_call_in_search_counter >
4107 SubexpCallLimitInSearch) {
4108 MATCH_AT_ERROR_RETURN(ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER);
4109 }
4110 }
4111
4112 addr = p->call.addr;
4113 INC_OP; STACK_PUSH_CALL_FRAME(p);
4114 p = reg->ops + addr;
4115
4116 JUMP_OUT;
4117
4118 CASE_OP(RETURN)
4119 STACK_RETURN(p);
4120 STACK_PUSH_RETURN;
4121 subexp_call_nest_counter--;
4122 JUMP_OUT;
4123 #endif
4124
4125 CASE_OP(MOVE)
4126 if (p->move.n < 0) {
4127 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, -p->move.n);
4128 if (IS_NULL(s)) goto fail;
4129 }
4130 else {
4131 int len;
4132
4133 for (tlen = p->move.n; tlen > 0; tlen--) {
4134 len = enclen(encode, s);
4135 s += len;
4136 if (s > end) goto fail;
4137 if (s == end) {
4138 if (tlen != 1) goto fail;
4139 else break;
4140 }
4141 }
4142 }
4143 INC_OP;
4144 JUMP_OUT;
4145
4146 CASE_OP(STEP_BACK_START)
4147 tlen = p->step_back_start.initial;
4148 if (tlen != 0) {
4149 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
4150 if (IS_NULL(s)) goto fail;
4151 }
4152 if (p->step_back_start.remaining != 0) {
4153 STACK_PUSH_ALT_WITH_ZID(p + 1, s, p->step_back_start.remaining);
4154 p += p->step_back_start.addr;
4155 }
4156 else
4157 INC_OP;
4158 JUMP_OUT;
4159
4160 CASE_OP(STEP_BACK_NEXT)
4161 tlen = (LengthType )stk->zid; /* remaining count */
4162 if (tlen != INFINITE_LEN) tlen--;
4163 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, 1);
4164 if (IS_NULL(s)) goto fail;
4165 if (tlen != 0) {
4166 STACK_PUSH_ALT_WITH_ZID(p, s, (int )tlen);
4167 }
4168 INC_OP;
4169 JUMP_OUT;
4170
4171 CASE_OP(CUT_TO_MARK)
4172 mem = p->cut_to_mark.id; /* mem: mark id */
4173 STACK_TO_VOID_TO_MARK(stkp, mem);
4174 if (p->cut_to_mark.restore_pos != 0) {
4175 s = stkp->u.val.v;
4176 }
4177 INC_OP;
4178 JUMP_OUT;
4179
4180 CASE_OP(MARK)
4181 mem = p->mark.id; /* mem: mark id */
4182 if (p->mark.save_pos != 0)
4183 STACK_PUSH_MARK_WITH_POS(mem, s);
4184 else
4185 STACK_PUSH_MARK(mem);
4186
4187 INC_OP;
4188 JUMP_OUT;
4189
4190 CASE_OP(SAVE_VAL)
4191 {
4192 SaveType type;
4193
4194 type = p->save_val.type;
4195 mem = p->save_val.id; /* mem: save id */
4196 switch ((enum SaveType )type) {
4197 case SAVE_KEEP:
4198 STACK_PUSH_SAVE_VAL(mem, type, s);
4199 break;
4200
4201 case SAVE_S:
4202 STACK_PUSH_SAVE_VAL_WITH_SPREV(mem, type, s);
4203 break;
4204
4205 case SAVE_RIGHT_RANGE:
4206 STACK_PUSH_SAVE_VAL(mem, SAVE_RIGHT_RANGE, right_range);
4207 break;
4208 }
4209 }
4210 INC_OP;
4211 JUMP_OUT;
4212
4213 CASE_OP(UPDATE_VAR)
4214 {
4215 UpdateVarType type;
4216 enum SaveType save_type;
4217
4218 type = p->update_var.type;
4219
4220 switch ((enum UpdateVarType )type) {
4221 case UPDATE_VAR_KEEP_FROM_STACK_LAST:
4222 STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep);
4223 break;
4224 case UPDATE_VAR_S_FROM_STACK:
4225 mem = p->update_var.id; /* mem: save id */
4226 STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s);
4227 break;
4228 case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK:
4229 save_type = SAVE_S;
4230 goto get_save_val_type_last_id;
4231 break;
4232 case UPDATE_VAR_RIGHT_RANGE_FROM_STACK:
4233 save_type = SAVE_RIGHT_RANGE;
4234 get_save_val_type_last_id:
4235 mem = p->update_var.id; /* mem: save id */
4236 STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type, mem, right_range, p->update_var.clear);
4237 break;
4238 case UPDATE_VAR_RIGHT_RANGE_TO_S:
4239 right_range = s;
4240 break;
4241 case UPDATE_VAR_RIGHT_RANGE_INIT:
4242 INIT_RIGHT_RANGE;
4243 break;
4244 }
4245 }
4246 INC_OP;
4247 JUMP_OUT;
4248
4249 #ifdef USE_CALLOUT
4250 CASE_OP(CALLOUT_CONTENTS)
4251 of = ONIG_CALLOUT_OF_CONTENTS;
4252 mem = p->callout_contents.num;
4253 goto callout_common_entry;
4254 BREAK_OUT;
4255
4256 CASE_OP(CALLOUT_NAME)
4257 {
4258 int call_result;
4259 int name_id;
4260 int in;
4261 CalloutListEntry* e;
4262 OnigCalloutFunc func;
4263 OnigCalloutArgs args;
4264
4265 of = ONIG_CALLOUT_OF_NAME;
4266 mem = p->callout_name.num;
4267
4268 callout_common_entry:
4269 e = onig_reg_callout_list_at(reg, mem);
4270 in = e->in;
4271 if (of == ONIG_CALLOUT_OF_NAME) {
4272 name_id = p->callout_name.id;
4273 func = onig_get_callout_start_func(reg, mem);
4274 }
4275 else {
4276 name_id = ONIG_NON_NAME_ID;
4277 func = msa->mp->progress_callout_of_contents;
4278 }
4279
4280 if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) {
4281 CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id,
4282 (int )mem, msa->mp->callout_user_data, args, call_result);
4283 switch (call_result) {
4284 case ONIG_CALLOUT_FAIL:
4285 goto fail;
4286 break;
4287 case ONIG_CALLOUT_SUCCESS:
4288 goto retraction_callout2;
4289 break;
4290 default: /* error code */
4291 if (call_result > 0) {
4292 call_result = ONIGERR_INVALID_ARGUMENT;
4293 }
4294 best_len = call_result;
4295 goto match_at_end;
4296 break;
4297 }
4298 }
4299 else {
4300 retraction_callout2:
4301 if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) {
4302 if (of == ONIG_CALLOUT_OF_NAME) {
4303 if (IS_NOT_NULL(func)) {
4304 STACK_PUSH_CALLOUT_NAME(name_id, mem, func);
4305 }
4306 }
4307 else {
4308 func = msa->mp->retraction_callout_of_contents;
4309 if (IS_NOT_NULL(func)) {
4310 STACK_PUSH_CALLOUT_CONTENTS(mem, func);
4311 }
4312 }
4313 }
4314 }
4315 }
4316 INC_OP;
4317 JUMP_OUT;
4318 #endif
4319
4320 CASE_OP(FINISH)
4321 goto match_at_end;
4322
4323 #ifdef ONIG_DEBUG_STATISTICS
4324 fail:
4325 SOP_OUT;
4326 goto fail2;
4327 #endif
4328 CASE_OP(FAIL)
4329 #ifdef ONIG_DEBUG_STATISTICS
4330 fail2:
4331 #else
4332 fail:
4333 #endif
4334 STACK_POP;
4335 p = stk->u.state.pcode;
4336 s = stk->u.state.pstr;
4337 CHECK_RETRY_LIMIT_IN_MATCH;
4338 JUMP_OUT;
4339
4340 DEFAULT_OP
4341 MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
4342
4343 } BYTECODE_INTERPRETER_END;
4344
4345 match_at_end:
4346 if (msa->retry_limit_in_search != 0) {
4347 msa->retry_limit_in_search_counter += retry_in_match_counter;
4348 }
4349
4350 #ifdef ONIG_DEBUG_MATCH_COUNTER
4351 MATCH_COUNTER_OUT("END");
4352 #endif
4353
4354 STACK_SAVE(msa, is_alloca, alloc_base);
4355 return best_len;
4356 }
4357
4358
4359 #ifdef USE_REGSET
4360
4361 typedef struct {
4362 regex_t* reg;
4363 OnigRegion* region;
4364 } RR;
4365
4366 struct OnigRegSetStruct {
4367 RR* rs;
4368 int n;
4369 int alloc;
4370 OnigEncoding enc;
4371 int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
4372 OnigLen anc_dmin; /* (SEMI_)END_BUF anchor distance */
4373 OnigLen anc_dmax; /* (SEMI_)END_BUF anchor distance */
4374 int all_low_high;
4375 int anychar_inf;
4376 };
4377
4378 enum SearchRangeStatus {
4379 SRS_DEAD = 0,
4380 SRS_LOW_HIGH = 1,
4381 SRS_ALL_RANGE = 2
4382 };
4383
4384 typedef struct {
4385 int state; /* value of enum SearchRangeStatus */
4386 UChar* low;
4387 UChar* high;
4388 UChar* sch_range;
4389 } SearchRange;
4390
4391 #define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \
4392 r = match_at(reg, str, end, (upper_range), s, msas + i); \
4393 if (r != ONIG_MISMATCH) {\
4394 if (r >= 0) {\
4395 goto match;\
4396 }\
4397 else goto finish; /* error */ \
4398 }
4399
4400 static inline int
regset_search_body_position_lead(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,const UChar * orig_range,OnigOptionType option,MatchArg * msas,int * rmatch_pos)4401 regset_search_body_position_lead(OnigRegSet* set,
4402 const UChar* str, const UChar* end,
4403 const UChar* start, const UChar* range, /* match start range */
4404 const UChar* orig_range, /* data range */
4405 OnigOptionType option, MatchArg* msas, int* rmatch_pos)
4406 {
4407 int r, n, i;
4408 UChar *s;
4409 UChar *low, *high;
4410 UChar* sch_range;
4411 regex_t* reg;
4412 OnigEncoding enc;
4413 SearchRange* sr;
4414
4415 n = set->n;
4416 enc = set->enc;
4417 s = (UChar* )start;
4418
4419 sr = (SearchRange* )xmalloc(sizeof(*sr) * n);
4420 CHECK_NULL_RETURN_MEMERR(sr);
4421
4422 for (i = 0; i < n; i++) {
4423 reg = set->rs[i].reg;
4424
4425 sr[i].state = SRS_DEAD;
4426 if (reg->optimize != OPTIMIZE_NONE) {
4427 if (reg->dist_max != INFINITE_LEN) {
4428 if (end - range > reg->dist_max)
4429 sch_range = (UChar* )range + reg->dist_max;
4430 else
4431 sch_range = (UChar* )end;
4432
4433 if (forward_search(reg, str, end, s, sch_range, &low, &high)) {
4434 sr[i].state = SRS_LOW_HIGH;
4435 sr[i].low = low;
4436 sr[i].high = high;
4437 sr[i].sch_range = sch_range;
4438 }
4439 }
4440 else {
4441 sch_range = (UChar* )end;
4442 if (forward_search(reg, str, end, s, sch_range, &low, &high)) {
4443 goto total_active;
4444 }
4445 }
4446 }
4447 else {
4448 total_active:
4449 sr[i].state = SRS_ALL_RANGE;
4450 sr[i].low = s;
4451 sr[i].high = (UChar* )range;
4452 }
4453 }
4454
4455 #define ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN 500
4456
4457 if (set->all_low_high != 0
4458 && range - start > ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN) {
4459 do {
4460 int try_count = 0;
4461 for (i = 0; i < n; i++) {
4462 if (sr[i].state == SRS_DEAD) continue;
4463
4464 if (s < sr[i].low) continue;
4465 if (s >= sr[i].high) {
4466 if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
4467 &low, &high) != 0) {
4468 sr[i].low = low;
4469 sr[i].high = high;
4470 if (s < low) continue;
4471 }
4472 else {
4473 sr[i].state = SRS_DEAD;
4474 continue;
4475 }
4476 }
4477
4478 reg = set->rs[i].reg;
4479 REGSET_MATCH_AND_RETURN_CHECK(orig_range);
4480 try_count++;
4481 } /* for (i) */
4482
4483 if (s >= range) break;
4484
4485 if (try_count == 0) {
4486 low = (UChar* )range;
4487 for (i = 0; i < n; i++) {
4488 if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) {
4489 low = sr[i].low;
4490 }
4491 }
4492 if (low == range) break;
4493
4494 s = low;
4495 }
4496 else {
4497 s += enclen(enc, s);
4498 }
4499 } while (1);
4500 }
4501 else {
4502 int prev_is_newline = 1;
4503 do {
4504 for (i = 0; i < n; i++) {
4505 if (sr[i].state == SRS_DEAD) continue;
4506 if (sr[i].state == SRS_LOW_HIGH) {
4507 if (s < sr[i].low) continue;
4508 if (s >= sr[i].high) {
4509 if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
4510 &low, &high) != 0) {
4511 sr[i].low = low;
4512 sr[i].high = high;
4513 if (s < low) continue;
4514 }
4515 else {
4516 sr[i].state = SRS_DEAD;
4517 continue;
4518 }
4519 }
4520 }
4521
4522 reg = set->rs[i].reg;
4523 if ((reg->anchor & ANCR_ANYCHAR_INF) == 0 || prev_is_newline != 0) {
4524 REGSET_MATCH_AND_RETURN_CHECK(orig_range);
4525 }
4526 }
4527
4528 if (s >= range) break;
4529
4530 if (set->anychar_inf != 0)
4531 prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end);
4532
4533 s += enclen(enc, s);
4534 } while (1);
4535 }
4536
4537 xfree(sr);
4538 return ONIG_MISMATCH;
4539
4540 finish:
4541 xfree(sr);
4542 return r;
4543
4544 match:
4545 xfree(sr);
4546 *rmatch_pos = (int )(s - str);
4547 return i;
4548 }
4549
4550 static inline int
regset_search_body_regex_lead(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * orig_range,OnigRegSetLead lead,OnigOptionType option,OnigMatchParam * mps[],int * rmatch_pos)4551 regset_search_body_regex_lead(OnigRegSet* set,
4552 const UChar* str, const UChar* end,
4553 const UChar* start, const UChar* orig_range, OnigRegSetLead lead,
4554 OnigOptionType option, OnigMatchParam* mps[], int* rmatch_pos)
4555 {
4556 int r;
4557 int i;
4558 int n;
4559 int match_index;
4560 const UChar* ep;
4561 regex_t* reg;
4562 OnigRegion* region;
4563
4564 n = set->n;
4565
4566 match_index = ONIG_MISMATCH;
4567 ep = orig_range;
4568 for (i = 0; i < n; i++) {
4569 reg = set->rs[i].reg;
4570 region = set->rs[i].region;
4571 r = search_in_range(reg, str, end, start, ep, orig_range, region, option, mps[i]);
4572 if (r > 0) {
4573 if (str + r < ep) {
4574 match_index = i;
4575 *rmatch_pos = r;
4576 if (lead == ONIG_REGSET_PRIORITY_TO_REGEX_ORDER)
4577 break;
4578
4579 ep = str + r;
4580 }
4581 }
4582 else if (r == 0) {
4583 match_index = i;
4584 *rmatch_pos = r;
4585 break;
4586 }
4587 }
4588
4589 return match_index;
4590 }
4591
4592 extern int
onig_regset_search_with_param(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegSetLead lead,OnigOptionType option,OnigMatchParam * mps[],int * rmatch_pos)4593 onig_regset_search_with_param(OnigRegSet* set,
4594 const UChar* str, const UChar* end,
4595 const UChar* start, const UChar* range,
4596 OnigRegSetLead lead, OnigOptionType option, OnigMatchParam* mps[],
4597 int* rmatch_pos)
4598 {
4599 int r;
4600 int i;
4601 UChar *s;
4602 regex_t* reg;
4603 OnigEncoding enc;
4604 OnigRegion* region;
4605 MatchArg* msas;
4606 const UChar *orig_start = start;
4607 const UChar *orig_range = range;
4608
4609 if (set->n == 0)
4610 return ONIG_MISMATCH;
4611
4612 if (OPTON_POSIX_REGION(option))
4613 return ONIGERR_INVALID_ARGUMENT;
4614
4615 r = 0;
4616 enc = set->enc;
4617 msas = (MatchArg* )NULL;
4618
4619 for (i = 0; i < set->n; i++) {
4620 reg = set->rs[i].reg;
4621 region = set->rs[i].region;
4622 ADJUST_MATCH_PARAM(reg, mps[i]);
4623 if (IS_NOT_NULL(region)) {
4624 r = onig_region_resize_clear(region, reg->num_mem + 1);
4625 if (r != 0) goto finish_no_msa;
4626 }
4627 }
4628
4629 if (start > end || start < str) goto mismatch_no_msa;
4630 if (str < end) {
4631 /* forward search only */
4632 if (range < start)
4633 return ONIGERR_INVALID_ARGUMENT;
4634 }
4635
4636 if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
4637 if (! ONIGENC_IS_VALID_MBC_STRING(enc, str, end)) {
4638 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
4639 goto finish_no_msa;
4640 }
4641 }
4642
4643 if (set->anchor != OPTIMIZE_NONE && str < end) {
4644 UChar *min_semi_end, *max_semi_end;
4645
4646 if ((set->anchor & ANCR_BEGIN_POSITION) != 0) {
4647 /* search start-position only */
4648 begin_position:
4649 range = start + 1;
4650 }
4651 else if ((set->anchor & ANCR_BEGIN_BUF) != 0) {
4652 /* search str-position only */
4653 if (start != str) goto mismatch_no_msa;
4654 range = str + 1;
4655 }
4656 else if ((set->anchor & ANCR_END_BUF) != 0) {
4657 min_semi_end = max_semi_end = (UChar* )end;
4658
4659 end_buf:
4660 if ((OnigLen )(max_semi_end - str) < set->anc_dmin)
4661 goto mismatch_no_msa;
4662
4663 if ((OnigLen )(min_semi_end - start) > set->anc_dmax) {
4664 start = min_semi_end - set->anc_dmax;
4665 if (start < end)
4666 start = onigenc_get_right_adjust_char_head(enc, str, start);
4667 }
4668 if ((OnigLen )(max_semi_end - (range - 1)) < set->anc_dmin) {
4669 range = max_semi_end - set->anc_dmin + 1;
4670 }
4671 if (start > range) goto mismatch_no_msa;
4672 }
4673 else if ((set->anchor & ANCR_SEMI_END_BUF) != 0) {
4674 UChar* pre_end = ONIGENC_STEP_BACK(enc, str, end, 1);
4675
4676 max_semi_end = (UChar* )end;
4677 if (ONIGENC_IS_MBC_NEWLINE(enc, pre_end, end)) {
4678 min_semi_end = pre_end;
4679
4680 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4681 pre_end = ONIGENC_STEP_BACK(enc, str, pre_end, 1);
4682 if (IS_NOT_NULL(pre_end) &&
4683 ONIGENC_IS_MBC_CRNL(enc, pre_end, end)) {
4684 min_semi_end = pre_end;
4685 }
4686 #endif
4687 if (min_semi_end > str && start <= min_semi_end) {
4688 goto end_buf;
4689 }
4690 }
4691 else {
4692 min_semi_end = (UChar* )end;
4693 goto end_buf;
4694 }
4695 }
4696 else if ((set->anchor & ANCR_ANYCHAR_INF_ML) != 0) {
4697 goto begin_position;
4698 }
4699 }
4700 else if (str == end) { /* empty string */
4701 start = end = str;
4702 s = (UChar* )start;
4703
4704 msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
4705 CHECK_NULL_RETURN_MEMERR(msas);
4706 for (i = 0; i < set->n; i++) {
4707 reg = set->rs[i].reg;
4708 MATCH_ARG_INIT(msas[i], reg, option, set->rs[i].region, start, mps[i]);
4709 }
4710 for (i = 0; i < set->n; i++) {
4711 reg = set->rs[i].reg;
4712 if (reg->threshold_len == 0) {
4713 /* REGSET_MATCH_AND_RETURN_CHECK(end); */
4714 /* Can't use REGSET_MATCH_AND_RETURN_CHECK()
4715 because r must be set regex index (i)
4716 */
4717 r = match_at(reg, str, end, end, s, msas + i);
4718 if (r != ONIG_MISMATCH) {
4719 if (r >= 0) {
4720 r = i;
4721 goto match;
4722 }
4723 else goto finish; /* error */
4724 }
4725 }
4726 }
4727
4728 goto mismatch;
4729 }
4730
4731 if (lead == ONIG_REGSET_POSITION_LEAD) {
4732 msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
4733 CHECK_NULL_RETURN_MEMERR(msas);
4734
4735 for (i = 0; i < set->n; i++) {
4736 MATCH_ARG_INIT(msas[i], set->rs[i].reg, option, set->rs[i].region,
4737 orig_start, mps[i]);
4738 }
4739
4740 r = regset_search_body_position_lead(set, str, end, start, range,
4741 orig_range, option, msas, rmatch_pos);
4742 }
4743 else {
4744 r = regset_search_body_regex_lead(set, str, end, start, orig_range,
4745 lead, option, mps, rmatch_pos);
4746 }
4747 if (r < 0) goto finish;
4748 else goto match2;
4749
4750 mismatch:
4751 r = ONIG_MISMATCH;
4752 finish:
4753 for (i = 0; i < set->n; i++) {
4754 if (IS_NOT_NULL(msas))
4755 MATCH_ARG_FREE(msas[i]);
4756 if (OPTON_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
4757 IS_NOT_NULL(set->rs[i].region)) {
4758 onig_region_clear(set->rs[i].region);
4759 }
4760 }
4761 if (IS_NOT_NULL(msas)) xfree(msas);
4762 return r;
4763
4764 mismatch_no_msa:
4765 r = ONIG_MISMATCH;
4766 finish_no_msa:
4767 return r;
4768
4769 match:
4770 *rmatch_pos = (int )(s - str);
4771 match2:
4772 for (i = 0; i < set->n; i++) {
4773 if (IS_NOT_NULL(msas))
4774 MATCH_ARG_FREE(msas[i]);
4775 if (OPTON_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
4776 IS_NOT_NULL(set->rs[i].region)) {
4777 onig_region_clear(set->rs[i].region);
4778 }
4779 }
4780 if (IS_NOT_NULL(msas)) xfree(msas);
4781 return r; /* regex index */
4782 }
4783
4784 extern int
onig_regset_search(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegSetLead lead,OnigOptionType option,int * rmatch_pos)4785 onig_regset_search(OnigRegSet* set, const UChar* str, const UChar* end,
4786 const UChar* start, const UChar* range,
4787 OnigRegSetLead lead, OnigOptionType option, int* rmatch_pos)
4788 {
4789 int r;
4790 int i;
4791 OnigMatchParam* mp;
4792 OnigMatchParam** mps;
4793
4794 mps = (OnigMatchParam** )xmalloc((sizeof(OnigMatchParam*) + sizeof(OnigMatchParam)) * set->n);
4795 CHECK_NULL_RETURN_MEMERR(mps);
4796
4797 mp = (OnigMatchParam* )(mps + set->n);
4798
4799 for (i = 0; i < set->n; i++) {
4800 onig_initialize_match_param(mp + i);
4801 mps[i] = mp + i;
4802 }
4803
4804 r = onig_regset_search_with_param(set, str, end, start, range, lead, option, mps,
4805 rmatch_pos);
4806 for (i = 0; i < set->n; i++)
4807 onig_free_match_param_content(mp + i);
4808
4809 xfree(mps);
4810
4811 return r;
4812 }
4813
4814 #endif /* USE_REGSET */
4815
4816
4817 static UChar*
slow_search(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)4818 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
4819 const UChar* text, const UChar* text_end, UChar* text_range)
4820 {
4821 UChar *t, *p, *s, *end;
4822
4823 end = (UChar* )text_end;
4824 end -= target_end - target - 1;
4825 if (end > text_range)
4826 end = text_range;
4827
4828 s = (UChar* )text;
4829
4830 while (s < end) {
4831 if (*s == *target) {
4832 p = s + 1;
4833 t = target + 1;
4834 while (t < target_end) {
4835 if (*t != *p++)
4836 break;
4837 t++;
4838 }
4839 if (t == target_end)
4840 return s;
4841 }
4842 s += enclen(enc, s);
4843 }
4844
4845 return (UChar* )NULL;
4846 }
4847
4848 static UChar*
slow_search_backward(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)4849 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4850 const UChar* text, const UChar* adjust_text,
4851 const UChar* text_end, const UChar* text_start)
4852 {
4853 UChar *t, *p, *s;
4854
4855 s = (UChar* )text_end;
4856 s -= (target_end - target);
4857 if (s > text_start)
4858 s = (UChar* )text_start;
4859 else
4860 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
4861
4862 while (PTR_GE(s, text)) {
4863 if (*s == *target) {
4864 p = s + 1;
4865 t = target + 1;
4866 while (t < target_end) {
4867 if (*t != *p++)
4868 break;
4869 t++;
4870 }
4871 if (t == target_end)
4872 return s;
4873 }
4874 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
4875 }
4876
4877 return (UChar* )NULL;
4878 }
4879
4880 static UChar*
sunday_quick_search_step_forward(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)4881 sunday_quick_search_step_forward(regex_t* reg,
4882 const UChar* target, const UChar* target_end,
4883 const UChar* text, const UChar* text_end,
4884 const UChar* text_range)
4885 {
4886 const UChar *s, *se, *t, *p, *end;
4887 const UChar *tail;
4888 int skip, tlen1;
4889 int map_offset;
4890 OnigEncoding enc;
4891
4892 #ifdef ONIG_DEBUG_SEARCH
4893 fprintf(DBGFP,
4894 "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n",
4895 text, text_end, text_range);
4896 #endif
4897
4898 enc = reg->enc;
4899
4900 tail = target_end - 1;
4901 tlen1 = (int )(tail - target);
4902 end = text_range;
4903 if (tlen1 > text_end - end)
4904 end = text_end - tlen1;
4905
4906 map_offset = reg->map_offset;
4907 s = text;
4908
4909 while (s < end) {
4910 p = se = s + tlen1;
4911 t = tail;
4912 while (*p == *t) {
4913 if (t == target) return (UChar* )s;
4914 p--; t--;
4915 }
4916 if (se + map_offset >= text_end) break;
4917 skip = reg->map[*(se + map_offset)];
4918 #if 0
4919 t = s;
4920 do {
4921 s += enclen(enc, s);
4922 } while ((s - t) < skip && s < end);
4923 #else
4924 s += skip;
4925 if (s < end)
4926 s = onigenc_get_right_adjust_char_head(enc, text, s);
4927 #endif
4928 }
4929
4930 return (UChar* )NULL;
4931 }
4932
4933 static UChar*
sunday_quick_search(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)4934 sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
4935 const UChar* text, const UChar* text_end,
4936 const UChar* text_range)
4937 {
4938 const UChar *s, *t, *p, *end;
4939 const UChar *tail;
4940 int map_offset;
4941 ptrdiff_t target_len;
4942
4943 map_offset = reg->map_offset;
4944 tail = target_end - 1;
4945 target_len = target_end - target;
4946
4947 if (target_len > text_end - text_range) {
4948 end = text_end;
4949 if (target_len > text_end - text)
4950 return (UChar* )NULL;
4951 }
4952 else {
4953 end = text_range + target_len;
4954 }
4955
4956 s = text + target_len - 1;
4957
4958 #ifdef USE_STRICT_POINTER_ADDRESS
4959 if (s < end) {
4960 while (TRUE) {
4961 p = s;
4962 t = tail;
4963 while (*p == *t) {
4964 if (t == target) return (UChar* )p;
4965 p--; t--;
4966 }
4967 if (text_end - s <= map_offset) break;
4968 if (reg->map[*(s + map_offset)] >= end - s) break;
4969 s += reg->map[*(s + map_offset)];
4970 }
4971 }
4972 #else
4973 while (s < end) {
4974 p = s;
4975 t = tail;
4976 while (*p == *t) {
4977 if (t == target) return (UChar* )p;
4978 p--; t--;
4979 }
4980 if (text_end - s <= map_offset) break;
4981 s += reg->map[*(s + map_offset)];
4982 }
4983 #endif
4984
4985 return (UChar* )NULL;
4986 }
4987
4988 static UChar*
map_search(OnigEncoding enc,UChar map[],const UChar * text,const UChar * text_range)4989 map_search(OnigEncoding enc, UChar map[],
4990 const UChar* text, const UChar* text_range)
4991 {
4992 const UChar *s = text;
4993
4994 while (s < text_range) {
4995 if (map[*s]) return (UChar* )s;
4996
4997 s += enclen(enc, s);
4998 }
4999 return (UChar* )NULL;
5000 }
5001
5002 static UChar*
map_search_backward(OnigEncoding enc,UChar map[],const UChar * text,const UChar * adjust_text,const UChar * text_start)5003 map_search_backward(OnigEncoding enc, UChar map[],
5004 const UChar* text, const UChar* adjust_text,
5005 const UChar* text_start)
5006 {
5007 const UChar *s = text_start;
5008
5009 while (PTR_GE(s, text)) {
5010 if (map[*s]) return (UChar* )s;
5011
5012 s = onigenc_get_prev_char_head(enc, adjust_text, s);
5013 }
5014 return (UChar* )NULL;
5015 }
5016 extern int
onig_match(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option)5017 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
5018 OnigRegion* region, OnigOptionType option)
5019 {
5020 int r;
5021 OnigMatchParam mp;
5022
5023 onig_initialize_match_param(&mp);
5024 r = onig_match_with_param(reg, str, end, at, region, option, &mp);
5025 onig_free_match_param_content(&mp);
5026 return r;
5027 }
5028
5029 extern int
onig_match_with_param(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5030 onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
5031 const UChar* at, OnigRegion* region, OnigOptionType option,
5032 OnigMatchParam* mp)
5033 {
5034 int r;
5035 MatchArg msa;
5036
5037 #ifndef USE_POSIX_API
5038 if (OPTON_POSIX_REGION(option)) return ONIGERR_INVALID_ARGUMENT;
5039 #endif
5040
5041 ADJUST_MATCH_PARAM(reg, mp);
5042 MATCH_ARG_INIT(msa, reg, option, region, at, mp);
5043 if (region
5044 #ifdef USE_POSIX_API
5045 && !OPTON_POSIX_REGION(option)
5046 #endif
5047 ) {
5048 r = onig_region_resize_clear(region, reg->num_mem + 1);
5049 }
5050 else
5051 r = 0;
5052
5053 if (r == 0) {
5054 if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
5055 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
5056 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
5057 goto end;
5058 }
5059 }
5060
5061 r = match_at(reg, str, end, end, at, &msa);
5062 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5063 if (OPTON_FIND_LONGEST(option) && r == ONIG_MISMATCH) {
5064 if (msa.best_len >= 0) {
5065 r = msa.best_len;
5066 }
5067 }
5068 #endif
5069 }
5070
5071 end:
5072 MATCH_ARG_FREE(msa);
5073 return r;
5074 }
5075
5076 static int
forward_search(regex_t * reg,const UChar * str,const UChar * end,UChar * start,UChar * range,UChar ** low,UChar ** high)5077 forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
5078 UChar* range, UChar** low, UChar** high)
5079 {
5080 UChar *p, *pprev = (UChar* )NULL;
5081
5082 #ifdef ONIG_DEBUG_SEARCH
5083 fprintf(DBGFP, "forward_search: str: %p, end: %p, start: %p, range: %p\n",
5084 str, end, start, range);
5085 #endif
5086
5087 p = start;
5088 if (reg->dist_min != 0) {
5089 if (end - p <= reg->dist_min)
5090 return 0; /* fail */
5091
5092 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
5093 p += reg->dist_min;
5094 }
5095 else {
5096 UChar *q = p + reg->dist_min;
5097 while (p < q) p += enclen(reg->enc, p);
5098 }
5099 }
5100
5101 retry:
5102 switch (reg->optimize) {
5103 case OPTIMIZE_STR:
5104 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
5105 break;
5106
5107 case OPTIMIZE_STR_FAST:
5108 p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
5109 break;
5110
5111 case OPTIMIZE_STR_FAST_STEP_FORWARD:
5112 p = sunday_quick_search_step_forward(reg, reg->exact, reg->exact_end,
5113 p, end, range);
5114 break;
5115
5116 case OPTIMIZE_MAP:
5117 p = map_search(reg->enc, reg->map, p, range);
5118 break;
5119 }
5120
5121 if (p && p < range) {
5122 if (p - start < reg->dist_min) {
5123 retry_gate:
5124 pprev = p;
5125 p += enclen(reg->enc, p);
5126 goto retry;
5127 }
5128
5129 if (reg->sub_anchor) {
5130 UChar* prev;
5131
5132 switch (reg->sub_anchor) {
5133 case ANCR_BEGIN_LINE:
5134 if (!ON_STR_BEGIN(p)) {
5135 prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p);
5136 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
5137 goto retry_gate;
5138 }
5139 break;
5140
5141 case ANCR_END_LINE:
5142 if (ON_STR_END(p)) {
5143 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5144 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
5145 (pprev ? pprev : str), p);
5146 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
5147 goto retry_gate;
5148 #endif
5149 }
5150 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
5151 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5152 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
5153 #endif
5154 )
5155 goto retry_gate;
5156
5157 break;
5158 }
5159 }
5160
5161 if (reg->dist_max == 0) {
5162 *low = p;
5163 *high = p;
5164 }
5165 else {
5166 if (reg->dist_max != INFINITE_LEN) {
5167 if (p - str < reg->dist_max) {
5168 *low = (UChar* )str;
5169 }
5170 else {
5171 *low = p - reg->dist_max;
5172 if (*low > start) {
5173 *low = onigenc_get_right_adjust_char_head(reg->enc, start, *low);
5174 }
5175 }
5176 }
5177 /* no needs to adjust *high, *high is used as range check only */
5178 if (p - str < reg->dist_min)
5179 *high = (UChar* )str;
5180 else
5181 *high = p - reg->dist_min;
5182 }
5183
5184 #ifdef ONIG_DEBUG_SEARCH
5185 fprintf(DBGFP,
5186 "forward_search success: low: %d, high: %d, dmin: %u, dmax: %u\n",
5187 (int )(*low - str), (int )(*high - str),
5188 reg->dist_min, reg->dist_max);
5189 #endif
5190 return 1; /* success */
5191 }
5192
5193 return 0; /* fail */
5194 }
5195
5196
5197 static int
backward_search(regex_t * reg,const UChar * str,const UChar * end,UChar * s,const UChar * range,UChar * adjrange,UChar ** low,UChar ** high)5198 backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
5199 const UChar* range, UChar* adjrange, UChar** low, UChar** high)
5200 {
5201 UChar *p;
5202
5203 p = s;
5204
5205 retry:
5206 switch (reg->optimize) {
5207 case OPTIMIZE_STR:
5208 exact_method:
5209 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
5210 range, adjrange, end, p);
5211 break;
5212
5213 case OPTIMIZE_STR_FAST:
5214 case OPTIMIZE_STR_FAST_STEP_FORWARD:
5215 goto exact_method;
5216 break;
5217
5218 case OPTIMIZE_MAP:
5219 p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
5220 break;
5221 }
5222
5223 if (p) {
5224 if (reg->sub_anchor) {
5225 UChar* prev;
5226
5227 switch (reg->sub_anchor) {
5228 case ANCR_BEGIN_LINE:
5229 if (!ON_STR_BEGIN(p)) {
5230 prev = onigenc_get_prev_char_head(reg->enc, str, p);
5231 if (IS_NOT_NULL(prev) && !ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
5232 p = prev;
5233 goto retry;
5234 }
5235 }
5236 break;
5237
5238 case ANCR_END_LINE:
5239 if (ON_STR_END(p)) {
5240 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5241 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5242 if (IS_NULL(prev)) goto fail;
5243 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
5244 p = prev;
5245 goto retry;
5246 }
5247 #endif
5248 }
5249 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
5250 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5251 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
5252 #endif
5253 ) {
5254 p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5255 if (IS_NULL(p)) goto fail;
5256 goto retry;
5257 }
5258 break;
5259 }
5260 }
5261
5262 if (reg->dist_max != INFINITE_LEN) {
5263 if (p - str < reg->dist_max)
5264 *low = (UChar* )str;
5265 else
5266 *low = p - reg->dist_max;
5267
5268 if (reg->dist_min != 0) {
5269 if (p - str < reg->dist_min)
5270 *high = (UChar* )str;
5271 else
5272 *high = p - reg->dist_min;
5273 }
5274 else {
5275 *high = p;
5276 }
5277
5278 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
5279 }
5280
5281 #ifdef ONIG_DEBUG_SEARCH
5282 fprintf(DBGFP, "backward_search: low: %d, high: %d\n",
5283 (int )(*low - str), (int )(*high - str));
5284 #endif
5285 return 1; /* success */
5286 }
5287
5288 fail:
5289 #ifdef ONIG_DEBUG_SEARCH
5290 fprintf(DBGFP, "backward_search: fail.\n");
5291 #endif
5292 return 0; /* fail */
5293 }
5294
5295
5296 extern int
onig_search(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option)5297 onig_search(regex_t* reg, const UChar* str, const UChar* end,
5298 const UChar* start, const UChar* range, OnigRegion* region,
5299 OnigOptionType option)
5300 {
5301 int r;
5302 OnigMatchParam mp;
5303 const UChar* data_range;
5304
5305 onig_initialize_match_param(&mp);
5306
5307 /* The following is an expanded code of onig_search_with_param() */
5308 if (range > start)
5309 data_range = range;
5310 else
5311 data_range = end;
5312
5313 r = search_in_range(reg, str, end, start, range, data_range, region,
5314 option, &mp);
5315
5316 onig_free_match_param_content(&mp);
5317 return r;
5318
5319 }
5320
5321 static int
search_in_range(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,const UChar * data_range,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5322 search_in_range(regex_t* reg, const UChar* str, const UChar* end,
5323 const UChar* start, const UChar* range, /* match start range */
5324 const UChar* data_range, /* subject string range */
5325 OnigRegion* region,
5326 OnigOptionType option, OnigMatchParam* mp)
5327 {
5328 int r;
5329 UChar *s;
5330 MatchArg msa;
5331 const UChar *orig_start = start;
5332
5333 #ifdef ONIG_DEBUG_SEARCH
5334 fprintf(DBGFP,
5335 "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n",
5336 str, (int )(end - str), (int )(start - str), (int )(range - str));
5337 #endif
5338
5339 ADJUST_MATCH_PARAM(reg, mp);
5340
5341 #ifndef USE_POSIX_API
5342 if (OPTON_POSIX_REGION(option)) {
5343 r = ONIGERR_INVALID_ARGUMENT;
5344 goto finish_no_msa;
5345 }
5346 #endif
5347
5348 if (region
5349 #ifdef USE_POSIX_API
5350 && ! OPTON_POSIX_REGION(option)
5351 #endif
5352 ) {
5353 r = onig_region_resize_clear(region, reg->num_mem + 1);
5354 if (r != 0) goto finish_no_msa;
5355 }
5356
5357 if (start > end || start < str) goto mismatch_no_msa;
5358
5359 if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
5360 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
5361 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
5362 goto finish_no_msa;
5363 }
5364 }
5365
5366
5367 #define MATCH_AND_RETURN_CHECK(upper_range) \
5368 r = match_at(reg, str, end, (upper_range), s, &msa);\
5369 if (r != ONIG_MISMATCH) {\
5370 if (r >= 0) {\
5371 goto match;\
5372 }\
5373 else goto finish; /* error */ \
5374 }
5375
5376
5377 /* anchor optimize: resume search range */
5378 if (reg->anchor != 0 && str < end) {
5379 UChar *min_semi_end, *max_semi_end;
5380
5381 if (reg->anchor & ANCR_BEGIN_POSITION) {
5382 /* search start-position only */
5383 begin_position:
5384 if (range > start)
5385 range = start + 1;
5386 else
5387 range = start;
5388 }
5389 else if (reg->anchor & ANCR_BEGIN_BUF) {
5390 /* search str-position only */
5391 if (range > start) {
5392 if (start != str) goto mismatch_no_msa;
5393 range = str + 1;
5394 }
5395 else {
5396 if (range <= str) {
5397 start = str;
5398 range = str;
5399 }
5400 else
5401 goto mismatch_no_msa;
5402 }
5403 }
5404 else if (reg->anchor & ANCR_END_BUF) {
5405 min_semi_end = max_semi_end = (UChar* )end;
5406
5407 end_buf:
5408 if ((OnigLen )(max_semi_end - str) < reg->anc_dist_min)
5409 goto mismatch_no_msa;
5410
5411 if (range > start) {
5412 if (reg->anc_dist_max != INFINITE_LEN &&
5413 min_semi_end - start > reg->anc_dist_max) {
5414 start = min_semi_end - reg->anc_dist_max;
5415 if (start < end)
5416 start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
5417 }
5418 if (max_semi_end - (range - 1) < reg->anc_dist_min) {
5419 if (max_semi_end - str + 1 < reg->anc_dist_min)
5420 goto mismatch_no_msa;
5421 else
5422 range = max_semi_end - reg->anc_dist_min + 1;
5423 }
5424
5425 if (start > range) goto mismatch_no_msa;
5426 /* If start == range, match with empty at end.
5427 Backward search is used. */
5428 }
5429 else {
5430 if (reg->anc_dist_max != INFINITE_LEN &&
5431 min_semi_end - range > reg->anc_dist_max) {
5432 range = min_semi_end - reg->anc_dist_max;
5433 }
5434 if (max_semi_end - start < reg->anc_dist_min) {
5435 if (max_semi_end - str < reg->anc_dist_min)
5436 goto mismatch_no_msa;
5437 else {
5438 start = max_semi_end - reg->anc_dist_min;
5439 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
5440 }
5441 }
5442 if (range > start) goto mismatch_no_msa;
5443 }
5444 }
5445 else if (reg->anchor & ANCR_SEMI_END_BUF) {
5446 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
5447
5448 max_semi_end = (UChar* )end;
5449 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5450 min_semi_end = pre_end;
5451
5452 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5453 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
5454 if (IS_NOT_NULL(pre_end) &&
5455 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5456 min_semi_end = pre_end;
5457 }
5458 #endif
5459 if (min_semi_end > str && start <= min_semi_end) {
5460 goto end_buf;
5461 }
5462 }
5463 else {
5464 min_semi_end = (UChar* )end;
5465 goto end_buf;
5466 }
5467 }
5468 else if ((reg->anchor & ANCR_ANYCHAR_INF_ML) && range > start) {
5469 goto begin_position;
5470 }
5471 }
5472 else if (str == end) { /* empty string */
5473 static const UChar* address_for_empty_string = (UChar* )"";
5474
5475 #ifdef ONIG_DEBUG_SEARCH
5476 fprintf(DBGFP, "onig_search: empty string.\n");
5477 #endif
5478
5479 if (reg->threshold_len == 0) {
5480 start = end = str = address_for_empty_string;
5481 s = (UChar* )start;
5482
5483 MATCH_ARG_INIT(msa, reg, option, region, start, mp);
5484 MATCH_AND_RETURN_CHECK(end);
5485 goto mismatch;
5486 }
5487 goto mismatch_no_msa;
5488 }
5489
5490 #ifdef ONIG_DEBUG_SEARCH
5491 fprintf(DBGFP, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5492 (int )(end - str), (int )(start - str), (int )(range - str));
5493 #endif
5494
5495 MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp);
5496
5497 s = (UChar* )start;
5498 if (range > start) { /* forward search */
5499 if (reg->optimize != OPTIMIZE_NONE) {
5500 UChar *sch_range, *low, *high;
5501
5502 if (reg->dist_max != 0) {
5503 if (reg->dist_max == INFINITE_LEN)
5504 sch_range = (UChar* )end;
5505 else {
5506 if ((end - range) < reg->dist_max)
5507 sch_range = (UChar* )end;
5508 else {
5509 sch_range = (UChar* )range + reg->dist_max;
5510 }
5511 }
5512 }
5513 else
5514 sch_range = (UChar* )range;
5515
5516 if ((end - start) < reg->threshold_len)
5517 goto mismatch;
5518
5519 if (reg->dist_max != INFINITE_LEN) {
5520 do {
5521 if (! forward_search(reg, str, end, s, sch_range, &low, &high))
5522 goto mismatch;
5523 if (s < low) {
5524 s = low;
5525 }
5526 while (s <= high) {
5527 MATCH_AND_RETURN_CHECK(data_range);
5528 s += enclen(reg->enc, s);
5529 }
5530 } while (s < range);
5531 goto mismatch;
5532 }
5533 else { /* check only. */
5534 if (! forward_search(reg, str, end, s, sch_range, &low, &high))
5535 goto mismatch;
5536
5537 if ((reg->anchor & ANCR_ANYCHAR_INF) != 0 &&
5538 (reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) {
5539 do {
5540 UChar* prev;
5541
5542 MATCH_AND_RETURN_CHECK(data_range);
5543 prev = s;
5544 s += enclen(reg->enc, s);
5545
5546 while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
5547 prev = s;
5548 s += enclen(reg->enc, s);
5549 }
5550 } while (s < range);
5551 goto mismatch;
5552 }
5553 }
5554 }
5555
5556 do {
5557 MATCH_AND_RETURN_CHECK(data_range);
5558 s += enclen(reg->enc, s);
5559 } while (s < range);
5560
5561 if (s == range) { /* because empty match with /$/. */
5562 MATCH_AND_RETURN_CHECK(data_range);
5563 }
5564 }
5565 else { /* backward search */
5566 if (range < str) goto mismatch;
5567
5568 if (orig_start < end)
5569 orig_start += enclen(reg->enc, orig_start); /* is upper range */
5570
5571 if (reg->optimize != OPTIMIZE_NONE) {
5572 UChar *low, *high, *adjrange, *sch_start;
5573 const UChar *min_range;
5574
5575 if ((end - range) < reg->threshold_len) goto mismatch;
5576
5577 if (range < end)
5578 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
5579 else
5580 adjrange = (UChar* )end;
5581
5582 if (end - range > reg->dist_min)
5583 min_range = range + reg->dist_min;
5584 else
5585 min_range = end;
5586
5587 if (reg->dist_max != INFINITE_LEN) {
5588 do {
5589 if (end - s > reg->dist_max)
5590 sch_start = s + reg->dist_max;
5591 else {
5592 sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
5593 }
5594
5595 if (backward_search(reg, str, end, sch_start, min_range, adjrange,
5596 &low, &high) <= 0)
5597 goto mismatch;
5598
5599 if (s > high)
5600 s = high;
5601
5602 while (PTR_GE(s, low)) {
5603 MATCH_AND_RETURN_CHECK(orig_start);
5604 s = onigenc_get_prev_char_head(reg->enc, str, s);
5605 }
5606 } while (PTR_GE(s, range));
5607 goto mismatch;
5608 }
5609 else { /* check only. */
5610 sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
5611
5612 if (backward_search(reg, str, end, sch_start, min_range, adjrange,
5613 &low, &high) <= 0) goto mismatch;
5614 }
5615 }
5616
5617 do {
5618 MATCH_AND_RETURN_CHECK(orig_start);
5619 s = onigenc_get_prev_char_head(reg->enc, str, s);
5620 } while (PTR_GE(s, range));
5621 }
5622
5623 mismatch:
5624 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5625 if (OPTON_FIND_LONGEST(reg->options)) {
5626 if (msa.best_len >= 0) {
5627 s = msa.best_s;
5628 goto match;
5629 }
5630 }
5631 #endif
5632 r = ONIG_MISMATCH;
5633
5634 finish:
5635 MATCH_ARG_FREE(msa);
5636
5637 /* If result is mismatch and no FIND_NOT_EMPTY option,
5638 then the region is not set in match_at(). */
5639 if (OPTON_FIND_NOT_EMPTY(reg->options) && region
5640 #ifdef USE_POSIX_API
5641 && !OPTON_POSIX_REGION(option)
5642 #endif
5643 ) {
5644 onig_region_clear(region);
5645 }
5646
5647 #ifdef ONIG_DEBUG
5648 if (r != ONIG_MISMATCH)
5649 fprintf(DBGFP, "onig_search: error %d\n", r);
5650 #endif
5651 return r;
5652
5653 mismatch_no_msa:
5654 r = ONIG_MISMATCH;
5655 finish_no_msa:
5656 #ifdef ONIG_DEBUG
5657 if (r != ONIG_MISMATCH)
5658 fprintf(DBGFP, "onig_search: error %d\n", r);
5659 #endif
5660 return r;
5661
5662 match:
5663 MATCH_ARG_FREE(msa);
5664 return (int )(s - str);
5665 }
5666
5667 extern int
onig_search_with_param(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5668 onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
5669 const UChar* start, const UChar* range, OnigRegion* region,
5670 OnigOptionType option, OnigMatchParam* mp)
5671 {
5672 const UChar* data_range;
5673
5674 if (range > start)
5675 data_range = range;
5676 else
5677 data_range = end;
5678
5679 return search_in_range(reg, str, end, start, range, data_range, region,
5680 option, mp);
5681 }
5682
5683 extern int
onig_scan(regex_t * reg,const UChar * str,const UChar * end,OnigRegion * region,OnigOptionType option,int (* scan_callback)(int,int,OnigRegion *,void *),void * callback_arg)5684 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5685 OnigRegion* region, OnigOptionType option,
5686 int (*scan_callback)(int, int, OnigRegion*, void*),
5687 void* callback_arg)
5688 {
5689 int r;
5690 int n;
5691 int rs;
5692 const UChar* start;
5693
5694 if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
5695 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
5696 return ONIGERR_INVALID_WIDE_CHAR_VALUE;
5697
5698 ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING);
5699 }
5700
5701 n = 0;
5702 start = str;
5703 while (1) {
5704 r = onig_search(reg, str, end, start, end, region, option);
5705 if (r >= 0) {
5706 rs = scan_callback(n, r, region, callback_arg);
5707 n++;
5708 if (rs != 0)
5709 return rs;
5710
5711 if (region->end[0] == start - str) {
5712 if (start >= end) break;
5713 start += enclen(reg->enc, start);
5714 }
5715 else
5716 start = str + region->end[0];
5717
5718 if (start > end)
5719 break;
5720 }
5721 else if (r == ONIG_MISMATCH) {
5722 break;
5723 }
5724 else { /* error */
5725 return r;
5726 }
5727 }
5728
5729 return n;
5730 }
5731
5732 extern int
onig_get_subexp_call_max_nest_level(void)5733 onig_get_subexp_call_max_nest_level(void)
5734 {
5735 return SubexpCallMaxNestLevel;
5736 }
5737
5738 extern int
onig_set_subexp_call_max_nest_level(int level)5739 onig_set_subexp_call_max_nest_level(int level)
5740 {
5741 SubexpCallMaxNestLevel = level;
5742 return 0;
5743 }
5744
5745 extern OnigEncoding
onig_get_encoding(regex_t * reg)5746 onig_get_encoding(regex_t* reg)
5747 {
5748 return reg->enc;
5749 }
5750
5751 extern OnigOptionType
onig_get_options(regex_t * reg)5752 onig_get_options(regex_t* reg)
5753 {
5754 return reg->options;
5755 }
5756
5757 extern OnigCaseFoldType
onig_get_case_fold_flag(regex_t * reg)5758 onig_get_case_fold_flag(regex_t* reg)
5759 {
5760 return reg->case_fold_flag;
5761 }
5762
5763 extern OnigSyntaxType*
onig_get_syntax(regex_t * reg)5764 onig_get_syntax(regex_t* reg)
5765 {
5766 return reg->syntax;
5767 }
5768
5769 extern int
onig_number_of_captures(regex_t * reg)5770 onig_number_of_captures(regex_t* reg)
5771 {
5772 return reg->num_mem;
5773 }
5774
5775 extern int
onig_number_of_capture_histories(regex_t * reg)5776 onig_number_of_capture_histories(regex_t* reg)
5777 {
5778 #ifdef USE_CAPTURE_HISTORY
5779 int i, n;
5780
5781 n = 0;
5782 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5783 if (MEM_STATUS_AT(reg->capture_history, i) != 0)
5784 n++;
5785 }
5786 return n;
5787 #else
5788 return 0;
5789 #endif
5790 }
5791
5792 extern void
onig_copy_encoding(OnigEncoding to,OnigEncoding from)5793 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
5794 {
5795 *to = *from;
5796 }
5797
5798 #ifdef USE_REGSET
5799
5800 extern int
onig_regset_new(OnigRegSet ** rset,int n,regex_t * regs[])5801 onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[])
5802 {
5803 #define REGSET_INITIAL_ALLOC_SIZE 10
5804
5805 int i;
5806 int r;
5807 int alloc;
5808 OnigRegSet* set;
5809 RR* rs;
5810
5811 *rset = 0;
5812
5813 set = (OnigRegSet* )xmalloc(sizeof(*set));
5814 CHECK_NULL_RETURN_MEMERR(set);
5815
5816 alloc = n > REGSET_INITIAL_ALLOC_SIZE ? n : REGSET_INITIAL_ALLOC_SIZE;
5817 rs = (RR* )xmalloc(sizeof(set->rs[0]) * alloc);
5818 if (IS_NULL(rs)) {
5819 xfree(set);
5820 return ONIGERR_MEMORY;
5821 }
5822
5823 set->rs = rs;
5824 set->n = 0;
5825 set->alloc = alloc;
5826
5827 for (i = 0; i < n; i++) {
5828 regex_t* reg = regs[i];
5829
5830 r = onig_regset_add(set, reg);
5831 if (r != 0) {
5832 for (i = 0; i < set->n; i++) {
5833 OnigRegion* region = set->rs[i].region;
5834 if (IS_NOT_NULL(region))
5835 onig_region_free(region, 1);
5836 }
5837 xfree(set->rs);
5838 xfree(set);
5839 return r;
5840 }
5841 }
5842
5843 *rset = set;
5844 return 0;
5845 }
5846
5847 static void
update_regset_by_reg(OnigRegSet * set,regex_t * reg)5848 update_regset_by_reg(OnigRegSet* set, regex_t* reg)
5849 {
5850 if (set->n == 1) {
5851 set->enc = reg->enc;
5852 set->anchor = reg->anchor;
5853 set->anc_dmin = reg->anc_dist_min;
5854 set->anc_dmax = reg->anc_dist_max;
5855 set->all_low_high =
5856 (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN) ? 0 : 1;
5857 set->anychar_inf = (reg->anchor & ANCR_ANYCHAR_INF) != 0 ? 1 : 0;
5858 }
5859 else {
5860 int anchor;
5861
5862 anchor = set->anchor & reg->anchor;
5863 if (anchor != 0) {
5864 OnigLen anc_dmin;
5865 OnigLen anc_dmax;
5866
5867 anc_dmin = set->anc_dmin;
5868 anc_dmax = set->anc_dmax;
5869 if (anc_dmin > reg->anc_dist_min) anc_dmin = reg->anc_dist_min;
5870 if (anc_dmax < reg->anc_dist_max) anc_dmax = reg->anc_dist_max;
5871 set->anc_dmin = anc_dmin;
5872 set->anc_dmax = anc_dmax;
5873 }
5874
5875 set->anchor = anchor;
5876
5877 if (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN)
5878 set->all_low_high = 0;
5879
5880 if ((reg->anchor & ANCR_ANYCHAR_INF) != 0)
5881 set->anychar_inf = 1;
5882 }
5883 }
5884
5885 extern int
onig_regset_add(OnigRegSet * set,regex_t * reg)5886 onig_regset_add(OnigRegSet* set, regex_t* reg)
5887 {
5888 OnigRegion* region;
5889
5890 if (OPTON_FIND_LONGEST(reg->options))
5891 return ONIGERR_INVALID_ARGUMENT;
5892
5893 if (set->n != 0 && reg->enc != set->enc)
5894 return ONIGERR_INVALID_ARGUMENT;
5895
5896 if (set->n >= set->alloc) {
5897 RR* nrs;
5898 int new_alloc;
5899
5900 new_alloc = set->alloc * 2;
5901 nrs = (RR* )xrealloc(set->rs, sizeof(set->rs[0]) * new_alloc);
5902 CHECK_NULL_RETURN_MEMERR(nrs);
5903
5904 set->rs = nrs;
5905 set->alloc = new_alloc;
5906 }
5907
5908 region = onig_region_new();
5909 CHECK_NULL_RETURN_MEMERR(region);
5910
5911 set->rs[set->n].reg = reg;
5912 set->rs[set->n].region = region;
5913 set->n++;
5914
5915 update_regset_by_reg(set, reg);
5916 return 0;
5917 }
5918
5919 extern int
onig_regset_replace(OnigRegSet * set,int at,regex_t * reg)5920 onig_regset_replace(OnigRegSet* set, int at, regex_t* reg)
5921 {
5922 int i;
5923
5924 if (at < 0 || at >= set->n)
5925 return ONIGERR_INVALID_ARGUMENT;
5926
5927 if (IS_NULL(reg)) {
5928 onig_region_free(set->rs[at].region, 1);
5929 for (i = at; i < set->n - 1; i++) {
5930 set->rs[i].reg = set->rs[i+1].reg;
5931 set->rs[i].region = set->rs[i+1].region;
5932 }
5933 set->n--;
5934 }
5935 else {
5936 if (OPTON_FIND_LONGEST(reg->options))
5937 return ONIGERR_INVALID_ARGUMENT;
5938
5939 if (set->n > 1 && reg->enc != set->enc)
5940 return ONIGERR_INVALID_ARGUMENT;
5941
5942 set->rs[at].reg = reg;
5943 }
5944
5945 for (i = 0; i < set->n; i++)
5946 update_regset_by_reg(set, set->rs[i].reg);
5947
5948 return 0;
5949 }
5950
5951 extern void
onig_regset_free(OnigRegSet * set)5952 onig_regset_free(OnigRegSet* set)
5953 {
5954 int i;
5955
5956 for (i = 0; i < set->n; i++) {
5957 regex_t* reg;
5958 OnigRegion* region;
5959
5960 reg = set->rs[i].reg;
5961 region = set->rs[i].region;
5962 onig_free(reg);
5963 if (IS_NOT_NULL(region))
5964 onig_region_free(region, 1);
5965 }
5966
5967 xfree(set->rs);
5968 xfree(set);
5969 }
5970
5971 extern int
onig_regset_number_of_regex(OnigRegSet * set)5972 onig_regset_number_of_regex(OnigRegSet* set)
5973 {
5974 return set->n;
5975 }
5976
5977 extern regex_t*
onig_regset_get_regex(OnigRegSet * set,int at)5978 onig_regset_get_regex(OnigRegSet* set, int at)
5979 {
5980 if (at < 0 || at >= set->n)
5981 return (regex_t* )0;
5982
5983 return set->rs[at].reg;
5984 }
5985
5986 extern OnigRegion*
onig_regset_get_region(OnigRegSet * set,int at)5987 onig_regset_get_region(OnigRegSet* set, int at)
5988 {
5989 if (at < 0 || at >= set->n)
5990 return (OnigRegion* )0;
5991
5992 return set->rs[at].region;
5993 }
5994
5995 #endif /* USE_REGSET */
5996
5997
5998 #ifdef USE_DIRECT_THREADED_CODE
5999 extern int
onig_init_for_match_at(regex_t * reg)6000 onig_init_for_match_at(regex_t* reg)
6001 {
6002 return match_at(reg, (const UChar* )NULL, (const UChar* )NULL,
6003 (const UChar* )NULL, (const UChar* )NULL,
6004 (MatchArg* )NULL);
6005 }
6006 #endif
6007
6008
6009 /* for callout functions */
6010
6011 #ifdef USE_CALLOUT
6012
6013 extern OnigCalloutFunc
onig_get_progress_callout(void)6014 onig_get_progress_callout(void)
6015 {
6016 return DefaultProgressCallout;
6017 }
6018
6019 extern int
onig_set_progress_callout(OnigCalloutFunc f)6020 onig_set_progress_callout(OnigCalloutFunc f)
6021 {
6022 DefaultProgressCallout = f;
6023 return ONIG_NORMAL;
6024 }
6025
6026 extern OnigCalloutFunc
onig_get_retraction_callout(void)6027 onig_get_retraction_callout(void)
6028 {
6029 return DefaultRetractionCallout;
6030 }
6031
6032 extern int
onig_set_retraction_callout(OnigCalloutFunc f)6033 onig_set_retraction_callout(OnigCalloutFunc f)
6034 {
6035 DefaultRetractionCallout = f;
6036 return ONIG_NORMAL;
6037 }
6038
6039 extern int
onig_get_callout_num_by_callout_args(OnigCalloutArgs * args)6040 onig_get_callout_num_by_callout_args(OnigCalloutArgs* args)
6041 {
6042 return args->num;
6043 }
6044
6045 extern OnigCalloutIn
onig_get_callout_in_by_callout_args(OnigCalloutArgs * args)6046 onig_get_callout_in_by_callout_args(OnigCalloutArgs* args)
6047 {
6048 return args->in;
6049 }
6050
6051 extern int
onig_get_name_id_by_callout_args(OnigCalloutArgs * args)6052 onig_get_name_id_by_callout_args(OnigCalloutArgs* args)
6053 {
6054 return args->name_id;
6055 }
6056
6057 extern const UChar*
onig_get_contents_by_callout_args(OnigCalloutArgs * args)6058 onig_get_contents_by_callout_args(OnigCalloutArgs* args)
6059 {
6060 int num;
6061 CalloutListEntry* e;
6062
6063 num = args->num;
6064 e = onig_reg_callout_list_at(args->regex, num);
6065 if (IS_NULL(e)) return 0;
6066 if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
6067 return e->u.content.start;
6068 }
6069
6070 return 0;
6071 }
6072
6073 extern const UChar*
onig_get_contents_end_by_callout_args(OnigCalloutArgs * args)6074 onig_get_contents_end_by_callout_args(OnigCalloutArgs* args)
6075 {
6076 int num;
6077 CalloutListEntry* e;
6078
6079 num = args->num;
6080 e = onig_reg_callout_list_at(args->regex, num);
6081 if (IS_NULL(e)) return 0;
6082 if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
6083 return e->u.content.end;
6084 }
6085
6086 return 0;
6087 }
6088
6089 extern int
onig_get_args_num_by_callout_args(OnigCalloutArgs * args)6090 onig_get_args_num_by_callout_args(OnigCalloutArgs* args)
6091 {
6092 int num;
6093 CalloutListEntry* e;
6094
6095 num = args->num;
6096 e = onig_reg_callout_list_at(args->regex, num);
6097 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
6098 if (e->of == ONIG_CALLOUT_OF_NAME) {
6099 return e->u.arg.num;
6100 }
6101
6102 return ONIGERR_INVALID_ARGUMENT;
6103 }
6104
6105 extern int
onig_get_passed_args_num_by_callout_args(OnigCalloutArgs * args)6106 onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args)
6107 {
6108 int num;
6109 CalloutListEntry* e;
6110
6111 num = args->num;
6112 e = onig_reg_callout_list_at(args->regex, num);
6113 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
6114 if (e->of == ONIG_CALLOUT_OF_NAME) {
6115 return e->u.arg.passed_num;
6116 }
6117
6118 return ONIGERR_INVALID_ARGUMENT;
6119 }
6120
6121 extern int
onig_get_arg_by_callout_args(OnigCalloutArgs * args,int index,OnigType * type,OnigValue * val)6122 onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index,
6123 OnigType* type, OnigValue* val)
6124 {
6125 int num;
6126 CalloutListEntry* e;
6127
6128 num = args->num;
6129 e = onig_reg_callout_list_at(args->regex, num);
6130 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
6131 if (e->of == ONIG_CALLOUT_OF_NAME) {
6132 if (IS_NOT_NULL(type)) *type = e->u.arg.types[index];
6133 if (IS_NOT_NULL(val)) *val = e->u.arg.vals[index];
6134 return ONIG_NORMAL;
6135 }
6136
6137 return ONIGERR_INVALID_ARGUMENT;
6138 }
6139
6140 extern const UChar*
onig_get_string_by_callout_args(OnigCalloutArgs * args)6141 onig_get_string_by_callout_args(OnigCalloutArgs* args)
6142 {
6143 return args->string;
6144 }
6145
6146 extern const UChar*
onig_get_string_end_by_callout_args(OnigCalloutArgs * args)6147 onig_get_string_end_by_callout_args(OnigCalloutArgs* args)
6148 {
6149 return args->string_end;
6150 }
6151
6152 extern const UChar*
onig_get_start_by_callout_args(OnigCalloutArgs * args)6153 onig_get_start_by_callout_args(OnigCalloutArgs* args)
6154 {
6155 return args->start;
6156 }
6157
6158 extern const UChar*
onig_get_right_range_by_callout_args(OnigCalloutArgs * args)6159 onig_get_right_range_by_callout_args(OnigCalloutArgs* args)
6160 {
6161 return args->right_range;
6162 }
6163
6164 extern const UChar*
onig_get_current_by_callout_args(OnigCalloutArgs * args)6165 onig_get_current_by_callout_args(OnigCalloutArgs* args)
6166 {
6167 return args->current;
6168 }
6169
6170 extern OnigRegex
onig_get_regex_by_callout_args(OnigCalloutArgs * args)6171 onig_get_regex_by_callout_args(OnigCalloutArgs* args)
6172 {
6173 return args->regex;
6174 }
6175
6176 extern unsigned long
onig_get_retry_counter_by_callout_args(OnigCalloutArgs * args)6177 onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args)
6178 {
6179 return args->retry_in_match_counter;
6180 }
6181
6182
6183 extern int
onig_get_capture_range_in_callout(OnigCalloutArgs * a,int mem_num,int * begin,int * end)6184 onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end)
6185 {
6186 OnigRegex reg;
6187 const UChar* str;
6188 StackType* stk_base;
6189 int i;
6190 StkPtrType* mem_start_stk;
6191 StkPtrType* mem_end_stk;
6192
6193 i = mem_num;
6194 reg = a->regex;
6195 str = a->string;
6196 stk_base = a->stk_base;
6197 mem_start_stk = a->mem_start_stk;
6198 mem_end_stk = a->mem_end_stk;
6199
6200 if (i > 0) {
6201 if (a->mem_end_stk[i].i != INVALID_STACK_INDEX) {
6202 *begin = (int )(STACK_MEM_START(reg, i) - str);
6203 *end = (int )(STACK_MEM_END(reg, i) - str);
6204 }
6205 else {
6206 *begin = *end = ONIG_REGION_NOTPOS;
6207 }
6208 }
6209 else
6210 return ONIGERR_INVALID_ARGUMENT;
6211
6212 return ONIG_NORMAL;
6213 }
6214
6215 extern int
onig_get_used_stack_size_in_callout(OnigCalloutArgs * a,int * used_num,int * used_bytes)6216 onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes)
6217 {
6218 int n;
6219
6220 n = (int )(a->stk - a->stk_base);
6221
6222 if (used_num != 0)
6223 *used_num = n;
6224
6225 if (used_bytes != 0)
6226 *used_bytes = n * sizeof(StackType);
6227
6228 return ONIG_NORMAL;
6229 }
6230
6231
6232 /* builtin callout functions */
6233
6234 extern int
onig_builtin_fail(OnigCalloutArgs * args ARG_UNUSED,void * user_data ARG_UNUSED)6235 onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
6236 {
6237 return ONIG_CALLOUT_FAIL;
6238 }
6239
6240 extern int
onig_builtin_mismatch(OnigCalloutArgs * args ARG_UNUSED,void * user_data ARG_UNUSED)6241 onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
6242 {
6243 return ONIG_MISMATCH;
6244 }
6245
6246 extern int
onig_builtin_error(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6247 onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6248 {
6249 int r;
6250 int n;
6251 OnigValue val;
6252
6253 r = onig_get_arg_by_callout_args(args, 0, 0, &val);
6254 if (r != ONIG_NORMAL) return r;
6255
6256 n = (int )val.l;
6257 if (n >= 0) {
6258 n = ONIGERR_INVALID_CALLOUT_BODY;
6259 }
6260 else if (onig_is_error_code_needs_param(n)) {
6261 n = ONIGERR_INVALID_CALLOUT_BODY;
6262 }
6263
6264 return n;
6265 }
6266
6267 extern int
onig_builtin_count(OnigCalloutArgs * args,void * user_data)6268 onig_builtin_count(OnigCalloutArgs* args, void* user_data)
6269 {
6270 (void )onig_check_callout_data_and_clear_old_values(args);
6271
6272 return onig_builtin_total_count(args, user_data);
6273 }
6274
6275 extern int
onig_builtin_total_count(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6276 onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6277 {
6278 int r;
6279 int slot;
6280 OnigType type;
6281 OnigValue val;
6282 OnigValue aval;
6283 OnigCodePoint count_type;
6284
6285 r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
6286 if (r != ONIG_NORMAL) return r;
6287
6288 count_type = aval.c;
6289 if (count_type != '>' && count_type != 'X' && count_type != '<')
6290 return ONIGERR_INVALID_CALLOUT_ARG;
6291
6292 r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0,
6293 &type, &val);
6294 if (r < ONIG_NORMAL)
6295 return r;
6296 else if (r > ONIG_NORMAL) {
6297 /* type == void: initial state */
6298 val.l = 0;
6299 }
6300
6301 if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
6302 slot = 2;
6303 if (count_type == '<')
6304 val.l++;
6305 else if (count_type == 'X')
6306 val.l--;
6307 }
6308 else {
6309 slot = 1;
6310 if (count_type != '<')
6311 val.l++;
6312 }
6313
6314 r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val);
6315 if (r != ONIG_NORMAL) return r;
6316
6317 /* slot 1: in progress counter, slot 2: in retraction counter */
6318 r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot,
6319 &type, &val);
6320 if (r < ONIG_NORMAL)
6321 return r;
6322 else if (r > ONIG_NORMAL) {
6323 val.l = 0;
6324 }
6325
6326 val.l++;
6327 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6328 if (r != ONIG_NORMAL) return r;
6329
6330 return ONIG_CALLOUT_SUCCESS;
6331 }
6332
6333 extern int
onig_builtin_max(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6334 onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6335 {
6336 int r;
6337 int slot;
6338 long max_val;
6339 OnigCodePoint count_type;
6340 OnigType type;
6341 OnigValue val;
6342 OnigValue aval;
6343
6344 (void )onig_check_callout_data_and_clear_old_values(args);
6345
6346 slot = 0;
6347 r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
6348 if (r < ONIG_NORMAL)
6349 return r;
6350 else if (r > ONIG_NORMAL) {
6351 /* type == void: initial state */
6352 type = ONIG_TYPE_LONG;
6353 val.l = 0;
6354 }
6355
6356 r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
6357 if (r != ONIG_NORMAL) return r;
6358 if (type == ONIG_TYPE_TAG) {
6359 r = onig_get_callout_data_by_callout_args(args, aval.tag, 0, &type, &aval);
6360 if (r < ONIG_NORMAL) return r;
6361 else if (r > ONIG_NORMAL)
6362 max_val = 0L;
6363 else
6364 max_val = aval.l;
6365 }
6366 else { /* LONG */
6367 max_val = aval.l;
6368 }
6369
6370 r = onig_get_arg_by_callout_args(args, 1, &type, &aval);
6371 if (r != ONIG_NORMAL) return r;
6372
6373 count_type = aval.c;
6374 if (count_type != '>' && count_type != 'X' && count_type != '<')
6375 return ONIGERR_INVALID_CALLOUT_ARG;
6376
6377 if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
6378 if (count_type == '<') {
6379 if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
6380 val.l++;
6381 }
6382 else if (count_type == 'X')
6383 val.l--;
6384 }
6385 else {
6386 if (count_type != '<') {
6387 if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
6388 val.l++;
6389 }
6390 }
6391
6392 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6393 if (r != ONIG_NORMAL) return r;
6394
6395 return ONIG_CALLOUT_SUCCESS;
6396 }
6397
6398 enum OP_CMP {
6399 OP_EQ,
6400 OP_NE,
6401 OP_LT,
6402 OP_GT,
6403 OP_LE,
6404 OP_GE
6405 };
6406
6407 extern int
onig_builtin_cmp(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6408 onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6409 {
6410 int r;
6411 int slot;
6412 long lv;
6413 long rv;
6414 OnigType type;
6415 OnigValue val;
6416 regex_t* reg;
6417 enum OP_CMP op;
6418
6419 reg = args->regex;
6420
6421 r = onig_get_arg_by_callout_args(args, 0, &type, &val);
6422 if (r != ONIG_NORMAL) return r;
6423
6424 if (type == ONIG_TYPE_TAG) {
6425 r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
6426 if (r < ONIG_NORMAL) return r;
6427 else if (r > ONIG_NORMAL)
6428 lv = 0L;
6429 else
6430 lv = val.l;
6431 }
6432 else { /* ONIG_TYPE_LONG */
6433 lv = val.l;
6434 }
6435
6436 r = onig_get_arg_by_callout_args(args, 2, &type, &val);
6437 if (r != ONIG_NORMAL) return r;
6438
6439 if (type == ONIG_TYPE_TAG) {
6440 r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
6441 if (r < ONIG_NORMAL) return r;
6442 else if (r > ONIG_NORMAL)
6443 rv = 0L;
6444 else
6445 rv = val.l;
6446 }
6447 else { /* ONIG_TYPE_LONG */
6448 rv = val.l;
6449 }
6450
6451 slot = 0;
6452 r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
6453 if (r < ONIG_NORMAL)
6454 return r;
6455 else if (r > ONIG_NORMAL) {
6456 /* type == void: initial state */
6457 OnigCodePoint c1, c2;
6458 UChar* p;
6459
6460 r = onig_get_arg_by_callout_args(args, 1, &type, &val);
6461 if (r != ONIG_NORMAL) return r;
6462
6463 p = val.s.start;
6464 c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
6465 p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
6466 if (p < val.s.end) {
6467 c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
6468 p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
6469 if (p != val.s.end) return ONIGERR_INVALID_CALLOUT_ARG;
6470 }
6471 else
6472 c2 = 0;
6473
6474 switch (c1) {
6475 case '=':
6476 if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
6477 op = OP_EQ;
6478 break;
6479 case '!':
6480 if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
6481 op = OP_NE;
6482 break;
6483 case '<':
6484 if (c2 == '=') op = OP_LE;
6485 else if (c2 == 0) op = OP_LT;
6486 else return ONIGERR_INVALID_CALLOUT_ARG;
6487 break;
6488 case '>':
6489 if (c2 == '=') op = OP_GE;
6490 else if (c2 == 0) op = OP_GT;
6491 else return ONIGERR_INVALID_CALLOUT_ARG;
6492 break;
6493 default:
6494 return ONIGERR_INVALID_CALLOUT_ARG;
6495 break;
6496 }
6497 val.l = (long )op;
6498 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6499 if (r != ONIG_NORMAL) return r;
6500 }
6501 else {
6502 op = (enum OP_CMP )val.l;
6503 }
6504
6505 switch (op) {
6506 case OP_EQ: r = (lv == rv); break;
6507 case OP_NE: r = (lv != rv); break;
6508 case OP_LT: r = (lv < rv); break;
6509 case OP_GT: r = (lv > rv); break;
6510 case OP_LE: r = (lv <= rv); break;
6511 case OP_GE: r = (lv >= rv); break;
6512 }
6513
6514 return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS;
6515 }
6516
6517
6518 #ifndef ONIG_NO_PRINT
6519
6520 static FILE* OutFp;
6521
6522 /* name start with "onig_" for macros. */
6523 static int
onig_builtin_monitor(OnigCalloutArgs * args,void * user_data)6524 onig_builtin_monitor(OnigCalloutArgs* args, void* user_data)
6525 {
6526 int r;
6527 int num;
6528 size_t tag_len;
6529 const UChar* start;
6530 const UChar* right;
6531 const UChar* current;
6532 const UChar* string;
6533 const UChar* strend;
6534 const UChar* tag_start;
6535 const UChar* tag_end;
6536 regex_t* reg;
6537 OnigCalloutIn in;
6538 OnigType type;
6539 OnigValue val;
6540 char buf[20];
6541 FILE* fp;
6542
6543 fp = OutFp;
6544
6545 r = onig_get_arg_by_callout_args(args, 0, &type, &val);
6546 if (r != ONIG_NORMAL) return r;
6547
6548 in = onig_get_callout_in_by_callout_args(args);
6549 if (in == ONIG_CALLOUT_IN_PROGRESS) {
6550 if (val.c == '<')
6551 return ONIG_CALLOUT_SUCCESS;
6552 }
6553 else {
6554 if (val.c != 'X' && val.c != '<')
6555 return ONIG_CALLOUT_SUCCESS;
6556 }
6557
6558 num = onig_get_callout_num_by_callout_args(args);
6559 start = onig_get_start_by_callout_args(args);
6560 right = onig_get_right_range_by_callout_args(args);
6561 current = onig_get_current_by_callout_args(args);
6562 string = onig_get_string_by_callout_args(args);
6563 strend = onig_get_string_end_by_callout_args(args);
6564 reg = onig_get_regex_by_callout_args(args);
6565 tag_start = onig_get_callout_tag_start(reg, num);
6566 tag_end = onig_get_callout_tag_end(reg, num);
6567
6568 if (tag_start == 0)
6569 xsnprintf(buf, sizeof(buf), "#%d", num);
6570 else {
6571 /* CAUTION: tag string is not terminated with NULL. */
6572 int i;
6573
6574 tag_len = tag_end - tag_start;
6575 if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1;
6576 for (i = 0; i < tag_len; i++) buf[i] = tag_start[i];
6577 buf[tag_len] = '\0';
6578 }
6579
6580 fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n",
6581 buf,
6582 in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=",
6583 (int )(current - string),
6584 (int )(start - string),
6585 (int )(right - string),
6586 (int )(strend - string));
6587 fflush(fp);
6588
6589 return ONIG_CALLOUT_SUCCESS;
6590 }
6591
6592 extern int
onig_setup_builtin_monitors_by_ascii_encoded_name(void * fp)6593 onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */)
6594 {
6595 int id;
6596 char* name;
6597 OnigEncoding enc;
6598 unsigned int ts[4];
6599 OnigValue opts[4];
6600
6601 if (IS_NOT_NULL(fp))
6602 OutFp = (FILE* )fp;
6603 else
6604 OutFp = stdout;
6605
6606 enc = ONIG_ENCODING_ASCII;
6607
6608 name = "MON";
6609 ts[0] = ONIG_TYPE_CHAR;
6610 opts[0].c = '>';
6611 BC_B_O(name, monitor, 1, ts, 1, opts);
6612
6613 return ONIG_NORMAL;
6614 }
6615
6616 #endif /* ONIG_NO_PRINT */
6617
6618 #endif /* USE_CALLOUT */
6619