1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2021 K.Kosako
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #ifndef ONIG_NO_PRINT
31 #ifndef NEED_TO_INCLUDE_STDIO
32 #define NEED_TO_INCLUDE_STDIO
33 #endif
34 #endif
35
36 #include "regint.h"
37
38 #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
39 ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
40
41 #ifdef USE_CRNL_AS_LINE_TERMINATOR
42 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
43 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
44 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
45 #endif
46
47 #define CHECK_INTERRUPT_IN_MATCH
48
49 #define STACK_MEM_START(reg, idx) \
50 (MEM_STATUS_AT((reg)->push_mem_start, (idx)) != 0 ? \
51 STACK_AT(mem_start_stk[idx].i)->u.mem.pstr : mem_start_stk[idx].s)
52
53 #define STACK_MEM_END(reg, idx) \
54 (MEM_STATUS_AT((reg)->push_mem_end, (idx)) != 0 ? \
55 STACK_AT(mem_end_stk[idx].i)->u.mem.pstr : mem_end_stk[idx].s)
56
57 #ifdef _MSC_VER
58 #define DIST_CAST(d) (size_t )(d)
59 #else
60 #define DIST_CAST(d) (d)
61 #endif
62
63
64 static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high);
65
66 static int
67 search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, /* match range */ const UChar* data_range, /* subject string range */ OnigRegion* region, OnigOptionType option, OnigMatchParam* mp);
68
69
70 #ifdef USE_CALLOUT
71 typedef struct {
72 int last_match_at_call_counter;
73 struct {
74 OnigType type;
75 OnigValue val;
76 } slot[ONIG_CALLOUT_DATA_SLOT_NUM];
77 } CalloutData;
78 #endif
79
80 struct OnigMatchParamStruct {
81 unsigned int match_stack_limit;
82 #ifdef USE_RETRY_LIMIT
83 unsigned long retry_limit_in_match;
84 unsigned long retry_limit_in_search;
85 #endif
86
87 void* callout_user_data; /* used in callback each match */
88 #ifdef USE_CALLOUT
89 OnigCalloutFunc progress_callout_of_contents;
90 OnigCalloutFunc retraction_callout_of_contents;
91 int match_at_call_counter;
92 CalloutData* callout_data;
93 int callout_data_alloc_num;
94 #endif
95 };
96
97 extern int
onig_set_match_stack_limit_size_of_match_param(OnigMatchParam * param,unsigned int limit)98 onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param,
99 unsigned int limit)
100 {
101 param->match_stack_limit = limit;
102 return ONIG_NORMAL;
103 }
104
105 extern int
onig_set_retry_limit_in_match_of_match_param(OnigMatchParam * param,unsigned long limit)106 onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param,
107 unsigned long limit)
108 {
109 #ifdef USE_RETRY_LIMIT
110 param->retry_limit_in_match = limit;
111 return ONIG_NORMAL;
112 #else
113 return ONIG_NO_SUPPORT_CONFIG;
114 #endif
115 }
116
117 extern int
onig_set_retry_limit_in_search_of_match_param(OnigMatchParam * param,unsigned long limit)118 onig_set_retry_limit_in_search_of_match_param(OnigMatchParam* param,
119 unsigned long limit)
120 {
121 #ifdef USE_RETRY_LIMIT
122 param->retry_limit_in_search = limit;
123 return ONIG_NORMAL;
124 #else
125 return ONIG_NO_SUPPORT_CONFIG;
126 #endif
127 }
128
129 extern int
onig_set_progress_callout_of_match_param(OnigMatchParam * param,OnigCalloutFunc f)130 onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
131 {
132 #ifdef USE_CALLOUT
133 param->progress_callout_of_contents = f;
134 return ONIG_NORMAL;
135 #else
136 return ONIG_NO_SUPPORT_CONFIG;
137 #endif
138 }
139
140 extern int
onig_set_retraction_callout_of_match_param(OnigMatchParam * param,OnigCalloutFunc f)141 onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
142 {
143 #ifdef USE_CALLOUT
144 param->retraction_callout_of_contents = f;
145 return ONIG_NORMAL;
146 #else
147 return ONIG_NO_SUPPORT_CONFIG;
148 #endif
149 }
150
151 extern int
onig_set_callout_user_data_of_match_param(OnigMatchParam * param,void * user_data)152 onig_set_callout_user_data_of_match_param(OnigMatchParam* param, void* user_data)
153 {
154 param->callout_user_data = user_data;
155 return ONIG_NORMAL;
156 }
157
158
159 typedef struct {
160 void* stack_p;
161 int stack_n;
162 OnigOptionType options;
163 OnigRegion* region;
164 int ptr_num;
165 const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
166 unsigned int match_stack_limit;
167 #ifdef USE_RETRY_LIMIT
168 unsigned long retry_limit_in_match;
169 unsigned long retry_limit_in_search;
170 unsigned long retry_limit_in_search_counter;
171 #endif
172 OnigMatchParam* mp;
173 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
174 int best_len; /* for ONIG_OPTION_FIND_LONGEST */
175 UChar* best_s;
176 #endif
177 #ifdef USE_CALL
178 unsigned long subexp_call_in_search_counter;
179 #endif
180 } MatchArg;
181
182
183 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
184
185 /* arguments type */
186 typedef enum {
187 ARG_SPECIAL = -1,
188 ARG_NON = 0,
189 ARG_RELADDR = 1,
190 ARG_ABSADDR = 2,
191 ARG_LENGTH = 3,
192 ARG_MEMNUM = 4,
193 ARG_OPTION = 5,
194 ARG_MODE = 6
195 } OpArgType;
196
197 typedef struct {
198 short int opcode;
199 char* name;
200 } OpInfoType;
201
202 static OpInfoType OpInfo[] = {
203 { OP_FINISH, "finish"},
204 { OP_END, "end"},
205 { OP_STR_1, "str_1"},
206 { OP_STR_2, "str_2"},
207 { OP_STR_3, "str_3"},
208 { OP_STR_4, "str_4"},
209 { OP_STR_5, "str_5"},
210 { OP_STR_N, "str_n"},
211 { OP_STR_MB2N1, "str_mb2-n1"},
212 { OP_STR_MB2N2, "str_mb2-n2"},
213 { OP_STR_MB2N3, "str_mb2-n3"},
214 { OP_STR_MB2N, "str_mb2-n"},
215 { OP_STR_MB3N, "str_mb3n"},
216 { OP_STR_MBN, "str_mbn"},
217 { OP_CCLASS, "cclass"},
218 { OP_CCLASS_MB, "cclass-mb"},
219 { OP_CCLASS_MIX, "cclass-mix"},
220 { OP_CCLASS_NOT, "cclass-not"},
221 { OP_CCLASS_MB_NOT, "cclass-mb-not"},
222 { OP_CCLASS_MIX_NOT, "cclass-mix-not"},
223 { OP_ANYCHAR, "anychar"},
224 { OP_ANYCHAR_ML, "anychar-ml"},
225 { OP_ANYCHAR_STAR, "anychar*"},
226 { OP_ANYCHAR_ML_STAR, "anychar-ml*"},
227 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next"},
228 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next"},
229 { OP_WORD, "word"},
230 { OP_WORD_ASCII, "word-ascii"},
231 { OP_NO_WORD, "not-word"},
232 { OP_NO_WORD_ASCII, "not-word-ascii"},
233 { OP_WORD_BOUNDARY, "word-boundary"},
234 { OP_NO_WORD_BOUNDARY, "not-word-boundary"},
235 { OP_WORD_BEGIN, "word-begin"},
236 { OP_WORD_END, "word-end"},
237 { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary"},
238 { OP_BEGIN_BUF, "begin-buf"},
239 { OP_END_BUF, "end-buf"},
240 { OP_BEGIN_LINE, "begin-line"},
241 { OP_END_LINE, "end-line"},
242 { OP_SEMI_END_BUF, "semi-end-buf"},
243 { OP_CHECK_POSITION, "check-position"},
244 { OP_BACKREF1, "backref1"},
245 { OP_BACKREF2, "backref2"},
246 { OP_BACKREF_N, "backref-n"},
247 { OP_BACKREF_N_IC, "backref-n-ic"},
248 { OP_BACKREF_MULTI, "backref_multi"},
249 { OP_BACKREF_MULTI_IC, "backref_multi-ic"},
250 { OP_BACKREF_WITH_LEVEL, "backref_with_level"},
251 { OP_BACKREF_WITH_LEVEL_IC, "backref_with_level-c"},
252 { OP_BACKREF_CHECK, "backref_check"},
253 { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level"},
254 { OP_MEM_START_PUSH, "mem-start-push"},
255 { OP_MEM_START, "mem-start"},
256 { OP_MEM_END_PUSH, "mem-end-push"},
257 #ifdef USE_CALL
258 { OP_MEM_END_PUSH_REC, "mem-end-push-rec"},
259 #endif
260 { OP_MEM_END, "mem-end"},
261 #ifdef USE_CALL
262 { OP_MEM_END_REC, "mem-end-rec"},
263 #endif
264 { OP_FAIL, "fail"},
265 { OP_JUMP, "jump"},
266 { OP_PUSH, "push"},
267 { OP_PUSH_SUPER, "push-super"},
268 { OP_POP, "pop"},
269 { OP_POP_TO_MARK, "pop-to-mark"},
270 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
271 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1"},
272 #endif
273 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next"},
274 { OP_REPEAT, "repeat"},
275 { OP_REPEAT_NG, "repeat-ng"},
276 { OP_REPEAT_INC, "repeat-inc"},
277 { OP_REPEAT_INC_NG, "repeat-inc-ng"},
278 { OP_EMPTY_CHECK_START, "empty-check-start"},
279 { OP_EMPTY_CHECK_END, "empty-check-end"},
280 { OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst"},
281 #ifdef USE_CALL
282 { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push"},
283 #endif
284 { OP_MOVE, "move"},
285 { OP_STEP_BACK_START, "step-back-start"},
286 { OP_STEP_BACK_NEXT, "step-back-next"},
287 { OP_CUT_TO_MARK, "cut-to-mark"},
288 { OP_MARK, "mark"},
289 { OP_SAVE_VAL, "save-val"},
290 { OP_UPDATE_VAR, "update-var"},
291 #ifdef USE_CALL
292 { OP_CALL, "call"},
293 { OP_RETURN, "return"},
294 #endif
295 #ifdef USE_CALLOUT
296 { OP_CALLOUT_CONTENTS, "callout-contents"},
297 { OP_CALLOUT_NAME, "callout-name"},
298 #endif
299 { -1, ""}
300 };
301
302 static char*
op2name(int opcode)303 op2name(int opcode)
304 {
305 int i;
306
307 for (i = 0; OpInfo[i].opcode >= 0; i++) {
308 if (opcode == OpInfo[i].opcode) return OpInfo[i].name;
309 }
310
311 return "";
312 }
313
314 static void
p_after_op(FILE * f)315 p_after_op(FILE* f)
316 {
317 fputs(" ", f);
318 }
319
320 static void
p_string(FILE * f,int len,UChar * s)321 p_string(FILE* f, int len, UChar* s)
322 {
323 while (len-- > 0) { fputc(*s++, f); }
324 }
325
326 static void
p_len_string(FILE * f,LengthType len,int mb_len,UChar * s)327 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
328 {
329 int x = len * mb_len;
330
331 fprintf(f, "len:%d ", len);
332 while (x-- > 0) { fputc(*s++, f); }
333 }
334
335 static void
p_rel_addr(FILE * f,RelAddrType rel_addr,Operation * p,Operation * start)336 p_rel_addr(FILE* f, RelAddrType rel_addr, Operation* p, Operation* start)
337 {
338 char* flag;
339 char* space1;
340 char* space2;
341 RelAddrType curr;
342 AbsAddrType abs_addr;
343
344 curr = (RelAddrType )(p - start);
345 abs_addr = curr + rel_addr;
346
347 flag = rel_addr < 0 ? "" : "+";
348 space1 = rel_addr < 10 ? " " : "";
349 space2 = abs_addr < 10 ? " " : "";
350
351 fprintf(f, "%s%s%d => %s%d", space1, flag, rel_addr, space2, abs_addr);
352 }
353
354 static int
bitset_on_num(BitSetRef bs)355 bitset_on_num(BitSetRef bs)
356 {
357 int i, n;
358
359 n = 0;
360 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
361 if (BITSET_AT(bs, i)) n++;
362 }
363
364 return n;
365 }
366
367
368 #ifdef USE_DIRECT_THREADED_CODE
369 #define GET_OPCODE(reg,index) (reg)->ocs[index]
370 #else
371 #define GET_OPCODE(reg,index) (reg)->ops[index].opcode
372 #endif
373
374 static void
print_compiled_byte_code(FILE * f,regex_t * reg,int index,Operation * start,OnigEncoding enc)375 print_compiled_byte_code(FILE* f, regex_t* reg, int index,
376 Operation* start, OnigEncoding enc)
377 {
378 static char* SaveTypeNames[] = {
379 "KEEP",
380 "S",
381 "RIGHT_RANGE"
382 };
383
384 static char* UpdateVarTypeNames[] = {
385 "KEEP_FROM_STACK_LAST",
386 "S_FROM_STACK",
387 "RIGHT_RANGE_FROM_STACK",
388 "RIGHT_RANGE_FROM_S_STACK",
389 "RIGHT_RANGE_TO_S",
390 "RIGHT_RANGE_INIT"
391 };
392
393 int i, n;
394 RelAddrType addr;
395 LengthType len;
396 MemNumType mem;
397 OnigCodePoint code;
398 ModeType mode;
399 UChar *q;
400 Operation* p;
401 enum OpCode opcode;
402
403 p = reg->ops + index;
404
405 opcode = GET_OPCODE(reg, index);
406
407 fprintf(f, "%s", op2name(opcode));
408 p_after_op(f);
409
410 switch (opcode) {
411 case OP_STR_1:
412 p_string(f, 1, p->exact.s); break;
413 case OP_STR_2:
414 p_string(f, 2, p->exact.s); break;
415 case OP_STR_3:
416 p_string(f, 3, p->exact.s); break;
417 case OP_STR_4:
418 p_string(f, 4, p->exact.s); break;
419 case OP_STR_5:
420 p_string(f, 5, p->exact.s); break;
421 case OP_STR_N:
422 len = p->exact_n.n;
423 p_string(f, len, p->exact_n.s); break;
424 case OP_STR_MB2N1:
425 p_string(f, 2, p->exact.s); break;
426 case OP_STR_MB2N2:
427 p_string(f, 4, p->exact.s); break;
428 case OP_STR_MB2N3:
429 p_string(f, 3, p->exact.s); break;
430 case OP_STR_MB2N:
431 len = p->exact_n.n;
432 p_len_string(f, len, 2, p->exact_n.s); break;
433 case OP_STR_MB3N:
434 len = p->exact_n.n;
435 p_len_string(f, len, 3, p->exact_n.s); break;
436 case OP_STR_MBN:
437 {
438 int mb_len;
439
440 mb_len = p->exact_len_n.len;
441 len = p->exact_len_n.n;
442 q = p->exact_len_n.s;
443 fprintf(f, "mblen:%d len:%d ", mb_len, len);
444 n = len * mb_len;
445 while (n-- > 0) { fputc(*q++, f); }
446 }
447 break;
448
449 case OP_CCLASS:
450 case OP_CCLASS_NOT:
451 n = bitset_on_num(p->cclass.bsp);
452 fprintf(f, "n:%d", n);
453 break;
454 case OP_CCLASS_MB:
455 case OP_CCLASS_MB_NOT:
456 {
457 OnigCodePoint ncode;
458 OnigCodePoint* codes;
459
460 codes = (OnigCodePoint* )p->cclass_mb.mb;
461 GET_CODE_POINT(ncode, codes);
462 codes++;
463 GET_CODE_POINT(code, codes);
464 fprintf(f, "n:%d code:0x%x", ncode, code);
465 }
466 break;
467 case OP_CCLASS_MIX:
468 case OP_CCLASS_MIX_NOT:
469 {
470 OnigCodePoint ncode;
471 OnigCodePoint* codes;
472
473 codes = (OnigCodePoint* )p->cclass_mix.mb;
474 n = bitset_on_num(p->cclass_mix.bsp);
475
476 GET_CODE_POINT(ncode, codes);
477 codes++;
478 GET_CODE_POINT(code, codes);
479 fprintf(f, "nsg:%d code:%u nmb:%u", n, code, ncode);
480 }
481 break;
482
483 case OP_ANYCHAR_STAR_PEEK_NEXT:
484 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
485 p_string(f, 1, &(p->anychar_star_peek_next.c));
486 break;
487
488 case OP_WORD_BOUNDARY:
489 case OP_NO_WORD_BOUNDARY:
490 case OP_WORD_BEGIN:
491 case OP_WORD_END:
492 mode = p->word_boundary.mode;
493 fprintf(f, "mode:%d", mode);
494 break;
495
496 case OP_BACKREF_N:
497 case OP_BACKREF_N_IC:
498 mem = p->backref_n.n1;
499 fprintf(f, "n:%d", mem);
500 break;
501 case OP_BACKREF_MULTI_IC:
502 case OP_BACKREF_MULTI:
503 case OP_BACKREF_CHECK:
504 n = p->backref_general.num;
505 fprintf(f, "n:%d ", n);
506 for (i = 0; i < n; i++) {
507 mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
508 if (i > 0) fputs(", ", f);
509 fprintf(f, "%d", mem);
510 }
511 break;
512 case OP_BACKREF_WITH_LEVEL:
513 case OP_BACKREF_WITH_LEVEL_IC:
514 case OP_BACKREF_CHECK_WITH_LEVEL:
515 {
516 LengthType level;
517
518 level = p->backref_general.nest_level;
519 fprintf(f, "level:%d ", level);
520 n = p->backref_general.num;
521 for (i = 0; i < n; i++) {
522 mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
523 if (i > 0) fputs(", ", f);
524 fprintf(f, "%d", mem);
525 }
526 }
527 break;
528
529 case OP_MEM_START:
530 case OP_MEM_START_PUSH:
531 mem = p->memory_start.num;
532 fprintf(f, "mem:%d", mem);
533 break;
534
535 case OP_MEM_END:
536 case OP_MEM_END_PUSH:
537 #ifdef USE_CALL
538 case OP_MEM_END_REC:
539 case OP_MEM_END_PUSH_REC:
540 #endif
541 mem = p->memory_end.num;
542 fprintf(f, "mem:%d", mem);
543 break;
544
545 case OP_JUMP:
546 addr = p->jump.addr;
547 p_rel_addr(f, addr, p, start);
548 break;
549
550 case OP_PUSH:
551 case OP_PUSH_SUPER:
552 addr = p->push.addr;
553 p_rel_addr(f, addr, p, start);
554 break;
555
556 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
557 case OP_PUSH_OR_JUMP_EXACT1:
558 addr = p->push_or_jump_exact1.addr;
559 p_rel_addr(f, addr, p, start);
560 fprintf(f, " c:");
561 p_string(f, 1, &(p->push_or_jump_exact1.c));
562 break;
563 #endif
564
565 case OP_PUSH_IF_PEEK_NEXT:
566 addr = p->push_if_peek_next.addr;
567 p_rel_addr(f, addr, p, start);
568 fprintf(f, " c:");
569 p_string(f, 1, &(p->push_if_peek_next.c));
570 break;
571
572 case OP_REPEAT:
573 case OP_REPEAT_NG:
574 mem = p->repeat.id;
575 addr = p->repeat.addr;
576 fprintf(f, "id:%d ", mem);
577 p_rel_addr(f, addr, p, start);
578 break;
579
580 case OP_REPEAT_INC:
581 case OP_REPEAT_INC_NG:
582 mem = p->repeat.id;
583 fprintf(f, "id:%d", mem);
584 break;
585
586 case OP_EMPTY_CHECK_START:
587 mem = p->empty_check_start.mem;
588 fprintf(f, "id:%d", mem);
589 break;
590 case OP_EMPTY_CHECK_END:
591 case OP_EMPTY_CHECK_END_MEMST:
592 #ifdef USE_CALL
593 case OP_EMPTY_CHECK_END_MEMST_PUSH:
594 #endif
595 mem = p->empty_check_end.mem;
596 fprintf(f, "id:%d", mem);
597 break;
598
599 #ifdef USE_CALL
600 case OP_CALL:
601 addr = p->call.addr;
602 fprintf(f, "=> %d", addr);
603 break;
604 #endif
605
606 case OP_MOVE:
607 fprintf(f, "n:%d", p->move.n);
608 break;
609
610 case OP_STEP_BACK_START:
611 addr = p->step_back_start.addr;
612 fprintf(f, "init:%d rem:%d ",
613 p->step_back_start.initial,
614 p->step_back_start.remaining);
615 p_rel_addr(f, addr, p, start);
616 break;
617
618 case OP_POP_TO_MARK:
619 mem = p->pop_to_mark.id;
620 fprintf(f, "id:%d", mem);
621 break;
622
623 case OP_CUT_TO_MARK:
624 {
625 int restore;
626
627 mem = p->cut_to_mark.id;
628 restore = p->cut_to_mark.restore_pos;
629 fprintf(f, "id:%d restore:%d", mem, restore);
630 }
631 break;
632
633 case OP_MARK:
634 {
635 int save;
636
637 mem = p->mark.id;
638 save = p->mark.save_pos;
639 fprintf(f, "id:%d save:%d", mem, save);
640 }
641 break;
642
643 case OP_SAVE_VAL:
644 {
645 SaveType type;
646
647 type = p->save_val.type;
648 mem = p->save_val.id;
649 fprintf(f, "%s id:%d", SaveTypeNames[type], mem);
650 }
651 break;
652
653 case OP_UPDATE_VAR:
654 {
655 UpdateVarType type;
656 int clear;
657
658 type = p->update_var.type;
659 mem = p->update_var.id;
660 clear = p->update_var.clear;
661 fprintf(f, "%s id:%d", UpdateVarTypeNames[type], mem);
662 if (type == UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK ||
663 type == UPDATE_VAR_RIGHT_RANGE_FROM_STACK)
664 fprintf(f, " clear:%d", clear);
665 }
666 break;
667
668 #ifdef USE_CALLOUT
669 case OP_CALLOUT_CONTENTS:
670 mem = p->callout_contents.num;
671 fprintf(f, "num:%d", mem);
672 break;
673
674 case OP_CALLOUT_NAME:
675 {
676 int id;
677
678 id = p->callout_name.id;
679 mem = p->callout_name.num;
680 fprintf(f, "id:%d num:%d", id, mem);
681 }
682 break;
683 #endif
684
685 case OP_TEXT_SEGMENT_BOUNDARY:
686 if (p->text_segment_boundary.not != 0)
687 fprintf(f, " not");
688 break;
689
690 case OP_CHECK_POSITION:
691 switch (p->check_position.type) {
692 case CHECK_POSITION_SEARCH_START:
693 fprintf(f, "search-start"); break;
694 case CHECK_POSITION_CURRENT_RIGHT_RANGE:
695 fprintf(f, "current-right-range"); break;
696 default:
697 break;
698 };
699 break;
700
701 case OP_FINISH:
702 case OP_END:
703 case OP_ANYCHAR:
704 case OP_ANYCHAR_ML:
705 case OP_ANYCHAR_STAR:
706 case OP_ANYCHAR_ML_STAR:
707 case OP_WORD:
708 case OP_WORD_ASCII:
709 case OP_NO_WORD:
710 case OP_NO_WORD_ASCII:
711 case OP_BEGIN_BUF:
712 case OP_END_BUF:
713 case OP_BEGIN_LINE:
714 case OP_END_LINE:
715 case OP_SEMI_END_BUF:
716 case OP_BACKREF1:
717 case OP_BACKREF2:
718 case OP_FAIL:
719 case OP_POP:
720 case OP_STEP_BACK_NEXT:
721 #ifdef USE_CALL
722 case OP_RETURN:
723 #endif
724 break;
725
726 default:
727 fprintf(DBGFP, "print_compiled_byte_code: undefined code %d\n", opcode);
728 break;
729 }
730 }
731 #endif /* defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) */
732
733 #ifdef ONIG_DEBUG_COMPILE
734 extern void
onig_print_compiled_byte_code_list(FILE * f,regex_t * reg)735 onig_print_compiled_byte_code_list(FILE* f, regex_t* reg)
736 {
737 Operation* bp;
738 Operation* start = reg->ops;
739 Operation* end = reg->ops + reg->ops_used;
740
741 fprintf(f, "push_mem_start: 0x%x, push_mem_end: 0x%x\n",
742 reg->push_mem_start, reg->push_mem_end);
743 fprintf(f, "code-length: %d\n", reg->ops_used);
744
745 bp = start;
746 while (bp < end) {
747 int pos = bp - start;
748
749 fprintf(f, "%4d: ", pos);
750 print_compiled_byte_code(f, reg, pos, start, reg->enc);
751 fprintf(f, "\n");
752 bp++;
753 }
754 fprintf(f, "\n");
755 }
756 #endif
757
758
759 #ifdef USE_CAPTURE_HISTORY
760 static void history_tree_free(OnigCaptureTreeNode* node);
761
762 static void
history_tree_clear(OnigCaptureTreeNode * node)763 history_tree_clear(OnigCaptureTreeNode* node)
764 {
765 int i;
766
767 if (IS_NULL(node)) return ;
768
769 for (i = 0; i < node->num_childs; i++) {
770 if (IS_NOT_NULL(node->childs[i])) {
771 history_tree_free(node->childs[i]);
772 }
773 }
774 for (i = 0; i < node->allocated; i++) {
775 node->childs[i] = (OnigCaptureTreeNode* )0;
776 }
777 node->num_childs = 0;
778 node->beg = ONIG_REGION_NOTPOS;
779 node->end = ONIG_REGION_NOTPOS;
780 node->group = -1;
781 }
782
783 static void
history_tree_free(OnigCaptureTreeNode * node)784 history_tree_free(OnigCaptureTreeNode* node)
785 {
786 history_tree_clear(node);
787 if (IS_NOT_NULL(node->childs)) xfree(node->childs);
788
789 xfree(node);
790 }
791
792 static void
history_root_free(OnigRegion * r)793 history_root_free(OnigRegion* r)
794 {
795 if (IS_NULL(r->history_root)) return ;
796
797 history_tree_free(r->history_root);
798 r->history_root = (OnigCaptureTreeNode* )0;
799 }
800
801 static OnigCaptureTreeNode*
history_node_new(void)802 history_node_new(void)
803 {
804 OnigCaptureTreeNode* node;
805
806 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
807 CHECK_NULL_RETURN(node);
808
809 node->childs = (OnigCaptureTreeNode** )0;
810 node->allocated = 0;
811 node->num_childs = 0;
812 node->group = -1;
813 node->beg = ONIG_REGION_NOTPOS;
814 node->end = ONIG_REGION_NOTPOS;
815
816 return node;
817 }
818
819 static int
history_tree_add_child(OnigCaptureTreeNode * parent,OnigCaptureTreeNode * child)820 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
821 {
822 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
823
824 if (parent->num_childs >= parent->allocated) {
825 int n, i;
826
827 if (IS_NULL(parent->childs)) {
828 n = HISTORY_TREE_INIT_ALLOC_SIZE;
829 parent->childs =
830 (OnigCaptureTreeNode** )xmalloc(sizeof(parent->childs[0]) * n);
831 }
832 else {
833 n = parent->allocated * 2;
834 parent->childs =
835 (OnigCaptureTreeNode** )xrealloc(parent->childs,
836 sizeof(parent->childs[0]) * n);
837 }
838 CHECK_NULL_RETURN_MEMERR(parent->childs);
839 for (i = parent->allocated; i < n; i++) {
840 parent->childs[i] = (OnigCaptureTreeNode* )0;
841 }
842 parent->allocated = n;
843 }
844
845 parent->childs[parent->num_childs] = child;
846 parent->num_childs++;
847 return 0;
848 }
849
850 static OnigCaptureTreeNode*
history_tree_clone(OnigCaptureTreeNode * node)851 history_tree_clone(OnigCaptureTreeNode* node)
852 {
853 int i;
854 OnigCaptureTreeNode *clone, *child;
855
856 clone = history_node_new();
857 CHECK_NULL_RETURN(clone);
858
859 clone->beg = node->beg;
860 clone->end = node->end;
861 for (i = 0; i < node->num_childs; i++) {
862 child = history_tree_clone(node->childs[i]);
863 if (IS_NULL(child)) {
864 history_tree_free(clone);
865 return (OnigCaptureTreeNode* )0;
866 }
867 history_tree_add_child(clone, child);
868 }
869
870 return clone;
871 }
872
873 extern OnigCaptureTreeNode*
onig_get_capture_tree(OnigRegion * region)874 onig_get_capture_tree(OnigRegion* region)
875 {
876 return region->history_root;
877 }
878 #endif /* USE_CAPTURE_HISTORY */
879
880
881 static OnigCallbackEachMatchFunc CallbackEachMatch;
882
883 extern OnigCallbackEachMatchFunc
onig_get_callback_each_match(void)884 onig_get_callback_each_match(void)
885 {
886 return CallbackEachMatch;
887 }
888
889 extern int
onig_set_callback_each_match(OnigCallbackEachMatchFunc f)890 onig_set_callback_each_match(OnigCallbackEachMatchFunc f)
891 {
892 CallbackEachMatch = f;
893 return ONIG_NORMAL;
894 }
895
896
897 extern void
onig_region_clear(OnigRegion * region)898 onig_region_clear(OnigRegion* region)
899 {
900 int i;
901
902 for (i = 0; i < region->num_regs; i++) {
903 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
904 }
905 #ifdef USE_CAPTURE_HISTORY
906 history_root_free(region);
907 #endif
908 }
909
910 extern int
onig_region_resize(OnigRegion * region,int n)911 onig_region_resize(OnigRegion* region, int n)
912 {
913 region->num_regs = n;
914
915 if (n < ONIG_NREGION)
916 n = ONIG_NREGION;
917
918 if (region->allocated == 0) {
919 region->beg = (int* )xmalloc(n * sizeof(int));
920 region->end = (int* )xmalloc(n * sizeof(int));
921
922 if (region->beg == 0 || region->end == 0)
923 return ONIGERR_MEMORY;
924
925 region->allocated = n;
926 }
927 else if (region->allocated < n) {
928 region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
929 region->end = (int* )xrealloc(region->end, n * sizeof(int));
930
931 if (region->beg == 0 || region->end == 0)
932 return ONIGERR_MEMORY;
933
934 region->allocated = n;
935 }
936
937 return 0;
938 }
939
940 static int
onig_region_resize_clear(OnigRegion * region,int n)941 onig_region_resize_clear(OnigRegion* region, int n)
942 {
943 int r;
944
945 r = onig_region_resize(region, n);
946 if (r != 0) return r;
947 onig_region_clear(region);
948 return 0;
949 }
950
951 extern int
onig_region_set(OnigRegion * region,int at,int beg,int end)952 onig_region_set(OnigRegion* region, int at, int beg, int end)
953 {
954 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
955
956 if (at >= region->allocated) {
957 int r = onig_region_resize(region, at + 1);
958 if (r < 0) return r;
959 }
960
961 region->beg[at] = beg;
962 region->end[at] = end;
963 return 0;
964 }
965
966 extern void
onig_region_init(OnigRegion * region)967 onig_region_init(OnigRegion* region)
968 {
969 region->num_regs = 0;
970 region->allocated = 0;
971 region->beg = (int* )0;
972 region->end = (int* )0;
973 region->history_root = (OnigCaptureTreeNode* )0;
974 }
975
976 extern OnigRegion*
onig_region_new(void)977 onig_region_new(void)
978 {
979 OnigRegion* r;
980
981 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
982 CHECK_NULL_RETURN(r);
983 onig_region_init(r);
984 return r;
985 }
986
987 extern void
onig_region_free(OnigRegion * r,int free_self)988 onig_region_free(OnigRegion* r, int free_self)
989 {
990 if (r != 0) {
991 if (r->allocated > 0) {
992 if (r->beg) xfree(r->beg);
993 if (r->end) xfree(r->end);
994 r->allocated = 0;
995 }
996 #ifdef USE_CAPTURE_HISTORY
997 history_root_free(r);
998 #endif
999 if (free_self) xfree(r);
1000 }
1001 }
1002
1003 extern void
onig_region_copy(OnigRegion * to,OnigRegion * from)1004 onig_region_copy(OnigRegion* to, OnigRegion* from)
1005 {
1006 #define RREGC_SIZE (sizeof(int) * from->num_regs)
1007 int i;
1008
1009 if (to == from) return;
1010
1011 if (to->allocated == 0) {
1012 if (from->num_regs > 0) {
1013 to->beg = (int* )xmalloc(RREGC_SIZE);
1014 if (IS_NULL(to->beg)) return;
1015 to->end = (int* )xmalloc(RREGC_SIZE);
1016 if (IS_NULL(to->end)) return;
1017 to->allocated = from->num_regs;
1018 }
1019 }
1020 else if (to->allocated < from->num_regs) {
1021 to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
1022 if (IS_NULL(to->beg)) return;
1023 to->end = (int* )xrealloc(to->end, RREGC_SIZE);
1024 if (IS_NULL(to->end)) return;
1025 to->allocated = from->num_regs;
1026 }
1027
1028 for (i = 0; i < from->num_regs; i++) {
1029 to->beg[i] = from->beg[i];
1030 to->end[i] = from->end[i];
1031 }
1032 to->num_regs = from->num_regs;
1033
1034 #ifdef USE_CAPTURE_HISTORY
1035 history_root_free(to);
1036
1037 if (IS_NOT_NULL(from->history_root)) {
1038 to->history_root = history_tree_clone(from->history_root);
1039 }
1040 #endif
1041 }
1042
1043 #ifdef USE_CALLOUT
1044 #define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \
1045 args.in = (ain);\
1046 args.name_id = (aname_id);\
1047 args.num = anum;\
1048 args.regex = reg;\
1049 args.string = str;\
1050 args.string_end = end;\
1051 args.start = sstart;\
1052 args.right_range = right_range;\
1053 args.current = s;\
1054 args.retry_in_match_counter = retry_in_match_counter;\
1055 args.msa = msa;\
1056 args.stk_base = stk_base;\
1057 args.stk = stk;\
1058 args.mem_start_stk = mem_start_stk;\
1059 args.mem_end_stk = mem_end_stk;\
1060 result = (func)(&args, user);\
1061 } while (0)
1062
1063 #define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\
1064 int result;\
1065 OnigCalloutArgs args;\
1066 CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\
1067 switch (result) {\
1068 case ONIG_CALLOUT_FAIL:\
1069 case ONIG_CALLOUT_SUCCESS:\
1070 break;\
1071 default:\
1072 if (result > 0) {\
1073 result = ONIGERR_INVALID_ARGUMENT;\
1074 }\
1075 best_len = result;\
1076 goto match_at_end;\
1077 break;\
1078 }\
1079 } while(0)
1080 #endif
1081
1082
1083 /** stack **/
1084 #define STK_ALT_FLAG 0x0001
1085
1086 /* stack type */
1087 /* used by normal-POP */
1088 #define STK_SUPER_ALT STK_ALT_FLAG
1089 #define STK_ALT (0x0002 | STK_ALT_FLAG)
1090
1091 /* handled by normal-POP */
1092 #define STK_MEM_START 0x0010
1093 #define STK_MEM_END 0x8030
1094 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1095 #define STK_REPEAT_INC (0x0040 | STK_MASK_POP_HANDLED)
1096 #else
1097 #define STK_REPEAT_INC 0x0040
1098 #endif
1099 #ifdef USE_CALLOUT
1100 #define STK_CALLOUT 0x0070
1101 #endif
1102
1103 /* avoided by normal-POP */
1104 #define STK_VOID 0x0000 /* for fill a blank */
1105 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1106 #define STK_EMPTY_CHECK_START (0x3000 | STK_MASK_POP_HANDLED)
1107 #else
1108 #define STK_EMPTY_CHECK_START 0x3000
1109 #endif
1110 #define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
1111 #define STK_MEM_END_MARK 0x8100
1112 #define STK_CALL_FRAME (0x0400 | STK_MASK_POP_HANDLED)
1113 #define STK_RETURN (0x0500 | STK_MASK_POP_HANDLED)
1114 #define STK_SAVE_VAL 0x0600
1115 #define STK_MARK 0x0704
1116
1117 /* stack type check mask */
1118 #define STK_MASK_POP_USED STK_ALT_FLAG
1119 #define STK_MASK_POP_HANDLED 0x0010
1120 #define STK_MASK_POP_HANDLED_TIL (STK_MASK_POP_HANDLED | 0x0004)
1121 #define STK_MASK_TO_VOID_TARGET 0x100e
1122 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
1123
1124 typedef ptrdiff_t StackIndex;
1125
1126 #define INVALID_STACK_INDEX ((StackIndex )-1)
1127
1128 typedef union {
1129 StackIndex i;
1130 UChar* s;
1131 } StkPtrType;
1132
1133
1134 typedef struct _StackType {
1135 unsigned int type;
1136 int zid;
1137 union {
1138 struct {
1139 Operation* pcode; /* byte code position */
1140 UChar* pstr; /* string position */
1141 } state;
1142 struct {
1143 int count;
1144 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1145 StackIndex prev_index; /* index of stack */
1146 #endif
1147 } repeat_inc;
1148 struct {
1149 UChar *pstr; /* start/end position */
1150 /* Following information is set, if this stack type is MEM-START */
1151 StkPtrType prev_start; /* prev. info (for backtrack "(...)*" ) */
1152 StkPtrType prev_end; /* prev. info (for backtrack "(...)*" ) */
1153 } mem;
1154 struct {
1155 UChar *pstr; /* start position */
1156 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1157 StackIndex prev_index; /* index of stack */
1158 #endif
1159 } empty_check;
1160 #ifdef USE_CALL
1161 struct {
1162 Operation *ret_addr; /* byte code position */
1163 UChar *pstr; /* string position */
1164 } call_frame;
1165 #endif
1166 struct {
1167 enum SaveType type;
1168 UChar* v;
1169 UChar* v2;
1170 } val;
1171 #ifdef USE_CALLOUT
1172 struct {
1173 int num;
1174 OnigCalloutFunc func;
1175 } callout;
1176 #endif
1177 } u;
1178 } StackType;
1179
1180 #ifdef USE_CALLOUT
1181
1182 struct OnigCalloutArgsStruct {
1183 OnigCalloutIn in;
1184 int name_id; /* name id or ONIG_NON_NAME_ID */
1185 int num;
1186 OnigRegex regex;
1187 const OnigUChar* string;
1188 const OnigUChar* string_end;
1189 const OnigUChar* start;
1190 const OnigUChar* right_range;
1191 const OnigUChar* current; /* current matching position */
1192 unsigned long retry_in_match_counter;
1193
1194 /* invisible to users */
1195 MatchArg* msa;
1196 StackType* stk_base;
1197 StackType* stk;
1198 StkPtrType* mem_start_stk;
1199 StkPtrType* mem_end_stk;
1200 };
1201
1202 #endif
1203
1204 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1205
1206 #define PTR_NUM_SIZE(reg) ((reg)->num_repeat + (reg)->num_empty_check + ((reg)->num_mem + 1) * 2)
1207 #define UPDATE_FOR_STACK_REALLOC do{\
1208 repeat_stk = (StackIndex* )alloc_base;\
1209 empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
1210 mem_start_stk = (StkPtrType* )(empty_check_stk + reg->num_empty_check);\
1211 mem_end_stk = mem_start_stk + num_mem + 1;\
1212 } while(0)
1213
1214 #define SAVE_REPEAT_STK_VAR(sid) stk->u.repeat_inc.prev_index = repeat_stk[sid]
1215 #define LOAD_TO_REPEAT_STK_VAR(sid) repeat_stk[sid] = GET_STACK_INDEX(stk)
1216 #define POP_REPEAT_INC else if (stk->type == STK_REPEAT_INC) {repeat_stk[stk->zid] = stk->u.repeat_inc.prev_index;}
1217
1218 #define SAVE_EMPTY_CHECK_STK_VAR(sid) stk->u.empty_check.prev_index = empty_check_stk[sid]
1219 #define LOAD_TO_EMPTY_CHECK_STK_VAR(sid) empty_check_stk[sid] = GET_STACK_INDEX(stk)
1220 #define POP_EMPTY_CHECK_START else if (stk->type == STK_EMPTY_CHECK_START) {empty_check_stk[stk->zid] = stk->u.empty_check.prev_index;}
1221
1222 #else
1223
1224 #define PTR_NUM_SIZE(reg) (((reg)->num_mem + 1) * 2)
1225 #define UPDATE_FOR_STACK_REALLOC do{\
1226 mem_start_stk = (StkPtrType* )alloc_base;\
1227 mem_end_stk = mem_start_stk + num_mem + 1;\
1228 } while(0)
1229
1230 #define SAVE_REPEAT_STK_VAR(sid)
1231 #define LOAD_TO_REPEAT_STK_VAR(sid)
1232 #define POP_REPEAT_INC
1233
1234 #define SAVE_EMPTY_CHECK_STK_VAR(sid)
1235 #define LOAD_TO_EMPTY_CHECK_STK_VAR(sid)
1236 #define POP_EMPTY_CHECK_START
1237
1238 #endif /* USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */
1239
1240 #ifdef USE_RETRY_LIMIT
1241 #define RETRY_IN_MATCH_ARG_INIT(msa,mpv) \
1242 (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\
1243 (msa).retry_limit_in_search = (mpv)->retry_limit_in_search;\
1244 (msa).retry_limit_in_search_counter = 0;
1245 #else
1246 #define RETRY_IN_MATCH_ARG_INIT(msa,mpv)
1247 #endif
1248
1249 #if defined(USE_CALL)
1250 #define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv) \
1251 (msa).subexp_call_in_search_counter = 0;
1252
1253 #define POP_CALL else if (stk->type == STK_RETURN) {subexp_call_nest_counter++;} else if (stk->type == STK_CALL_FRAME) {subexp_call_nest_counter--;}
1254 #else
1255 #define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)
1256 #define POP_CALL
1257 #endif
1258
1259 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1260 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
1261 (msa).stack_p = (void* )0;\
1262 (msa).options = (arg_option)|(reg)->options;\
1263 (msa).region = (arg_region);\
1264 (msa).start = (arg_start);\
1265 (msa).match_stack_limit = (mpv)->match_stack_limit;\
1266 RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
1267 SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
1268 (msa).mp = mpv;\
1269 (msa).best_len = ONIG_MISMATCH;\
1270 (msa).ptr_num = PTR_NUM_SIZE(reg);\
1271 } while(0)
1272 #else
1273 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
1274 (msa).stack_p = (void* )0;\
1275 (msa).options = (arg_option)|(reg)->options;\
1276 (msa).region = (arg_region);\
1277 (msa).start = (arg_start);\
1278 (msa).match_stack_limit = (mpv)->match_stack_limit;\
1279 RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
1280 SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
1281 (msa).mp = mpv;\
1282 (msa).ptr_num = PTR_NUM_SIZE(reg);\
1283 } while(0)
1284 #endif
1285
1286 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
1287
1288
1289 #define ALLOCA_PTR_NUM_LIMIT 50
1290
1291 #define STACK_INIT(stack_num) do {\
1292 if (msa->stack_p) {\
1293 is_alloca = 0;\
1294 alloc_base = msa->stack_p;\
1295 stk_base = (StackType* )(alloc_base\
1296 + (sizeof(StkPtrType) * msa->ptr_num));\
1297 stk = stk_base;\
1298 stk_end = stk_base + msa->stack_n;\
1299 }\
1300 else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
1301 is_alloca = 0;\
1302 alloc_base = (char* )xmalloc(sizeof(StkPtrType) * msa->ptr_num\
1303 + sizeof(StackType) * (stack_num));\
1304 CHECK_NULL_RETURN_MEMERR(alloc_base);\
1305 stk_base = (StackType* )(alloc_base\
1306 + (sizeof(StkPtrType) * msa->ptr_num));\
1307 stk = stk_base;\
1308 stk_end = stk_base + (stack_num);\
1309 }\
1310 else {\
1311 is_alloca = 1;\
1312 alloc_base = (char* )xalloca(sizeof(StkPtrType) * msa->ptr_num\
1313 + sizeof(StackType) * (stack_num));\
1314 CHECK_NULL_RETURN_MEMERR(alloc_base);\
1315 stk_base = (StackType* )(alloc_base\
1316 + (sizeof(StkPtrType) * msa->ptr_num));\
1317 stk = stk_base;\
1318 stk_end = stk_base + (stack_num);\
1319 }\
1320 } while(0);
1321
1322
1323 #define STACK_SAVE(msa,is_alloca,alloc_base) do{\
1324 (msa)->stack_n = (int )(stk_end - stk_base);\
1325 if ((is_alloca) != 0) {\
1326 size_t size = sizeof(StkPtrType) * (msa)->ptr_num\
1327 + sizeof(StackType) * (msa)->stack_n;\
1328 (msa)->stack_p = xmalloc(size);\
1329 CHECK_NULL_RETURN_MEMERR((msa)->stack_p);\
1330 xmemcpy((msa)->stack_p, (alloc_base), size);\
1331 }\
1332 else {\
1333 (msa)->stack_p = (alloc_base);\
1334 };\
1335 } while(0)
1336
1337 static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1338
1339 extern unsigned int
onig_get_match_stack_limit_size(void)1340 onig_get_match_stack_limit_size(void)
1341 {
1342 return MatchStackLimit;
1343 }
1344
1345 extern int
onig_set_match_stack_limit_size(unsigned int size)1346 onig_set_match_stack_limit_size(unsigned int size)
1347 {
1348 MatchStackLimit = size;
1349 return 0;
1350 }
1351
1352 #ifdef USE_RETRY_LIMIT
1353
1354 static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH;
1355 static unsigned long RetryLimitInSearch = DEFAULT_RETRY_LIMIT_IN_SEARCH;
1356
1357 #define CHECK_RETRY_LIMIT_IN_MATCH do {\
1358 if (++retry_in_match_counter > retry_limit_in_match) {\
1359 MATCH_AT_ERROR_RETURN(retry_in_match_counter > msa->retry_limit_in_match ? ONIGERR_RETRY_LIMIT_IN_MATCH_OVER : ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER); \
1360 }\
1361 } while (0)
1362
1363 #else
1364
1365 #define CHECK_RETRY_LIMIT_IN_MATCH
1366
1367 #endif /* USE_RETRY_LIMIT */
1368
1369 extern unsigned long
onig_get_retry_limit_in_match(void)1370 onig_get_retry_limit_in_match(void)
1371 {
1372 #ifdef USE_RETRY_LIMIT
1373 return RetryLimitInMatch;
1374 #else
1375 return 0;
1376 #endif
1377 }
1378
1379 extern int
onig_set_retry_limit_in_match(unsigned long n)1380 onig_set_retry_limit_in_match(unsigned long n)
1381 {
1382 #ifdef USE_RETRY_LIMIT
1383 RetryLimitInMatch = n;
1384 return 0;
1385 #else
1386 return ONIG_NO_SUPPORT_CONFIG;
1387 #endif
1388 }
1389
1390 extern unsigned long
onig_get_retry_limit_in_search(void)1391 onig_get_retry_limit_in_search(void)
1392 {
1393 #ifdef USE_RETRY_LIMIT
1394 return RetryLimitInSearch;
1395 #else
1396 return 0;
1397 #endif
1398 }
1399
1400 extern int
onig_set_retry_limit_in_search(unsigned long n)1401 onig_set_retry_limit_in_search(unsigned long n)
1402 {
1403 #ifdef USE_RETRY_LIMIT
1404 RetryLimitInSearch = n;
1405 return 0;
1406 #else
1407 return ONIG_NO_SUPPORT_CONFIG;
1408 #endif
1409 }
1410
1411 #ifdef USE_CALL
1412 static unsigned long SubexpCallLimitInSearch = DEFAULT_SUBEXP_CALL_LIMIT_IN_SEARCH;
1413
1414 extern unsigned long
onig_get_subexp_call_limit_in_search(void)1415 onig_get_subexp_call_limit_in_search(void)
1416 {
1417 return SubexpCallLimitInSearch;
1418 }
1419
1420 extern int
onig_set_subexp_call_limit_in_search(unsigned long n)1421 onig_set_subexp_call_limit_in_search(unsigned long n)
1422 {
1423 SubexpCallLimitInSearch = n;
1424 return 0;
1425 }
1426
1427 #endif
1428
1429
1430 #ifdef USE_CALLOUT
1431 static OnigCalloutFunc DefaultProgressCallout;
1432 static OnigCalloutFunc DefaultRetractionCallout;
1433 #endif
1434
1435 extern OnigMatchParam*
onig_new_match_param(void)1436 onig_new_match_param(void)
1437 {
1438 OnigMatchParam* p;
1439
1440 p = (OnigMatchParam* )xmalloc(sizeof(*p));
1441 if (IS_NOT_NULL(p)) {
1442 onig_initialize_match_param(p);
1443 }
1444
1445 return p;
1446 }
1447
1448 extern void
onig_free_match_param_content(OnigMatchParam * p)1449 onig_free_match_param_content(OnigMatchParam* p)
1450 {
1451 #ifdef USE_CALLOUT
1452 if (IS_NOT_NULL(p->callout_data)) {
1453 xfree(p->callout_data);
1454 p->callout_data = 0;
1455 }
1456 #endif
1457 }
1458
1459 extern void
onig_free_match_param(OnigMatchParam * p)1460 onig_free_match_param(OnigMatchParam* p)
1461 {
1462 if (IS_NOT_NULL(p)) {
1463 onig_free_match_param_content(p);
1464 xfree(p);
1465 }
1466 }
1467
1468 extern int
onig_initialize_match_param(OnigMatchParam * mp)1469 onig_initialize_match_param(OnigMatchParam* mp)
1470 {
1471 mp->match_stack_limit = MatchStackLimit;
1472 #ifdef USE_RETRY_LIMIT
1473 mp->retry_limit_in_match = RetryLimitInMatch;
1474 mp->retry_limit_in_search = RetryLimitInSearch;
1475 #endif
1476
1477 mp->callout_user_data = 0;
1478
1479 #ifdef USE_CALLOUT
1480 mp->progress_callout_of_contents = DefaultProgressCallout;
1481 mp->retraction_callout_of_contents = DefaultRetractionCallout;
1482 mp->match_at_call_counter = 0;
1483 mp->callout_data = 0;
1484 mp->callout_data_alloc_num = 0;
1485 #endif
1486
1487 return ONIG_NORMAL;
1488 }
1489
1490 #ifdef USE_CALLOUT
1491
1492 static int
adjust_match_param(regex_t * reg,OnigMatchParam * mp)1493 adjust_match_param(regex_t* reg, OnigMatchParam* mp)
1494 {
1495 RegexExt* ext = reg->extp;
1496
1497 mp->match_at_call_counter = 0;
1498
1499 if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL;
1500
1501 if (ext->callout_num > mp->callout_data_alloc_num) {
1502 CalloutData* d;
1503 size_t n = ext->callout_num * sizeof(*d);
1504 if (IS_NOT_NULL(mp->callout_data))
1505 d = (CalloutData* )xrealloc(mp->callout_data, n);
1506 else
1507 d = (CalloutData* )xmalloc(n);
1508 CHECK_NULL_RETURN_MEMERR(d);
1509
1510 mp->callout_data = d;
1511 mp->callout_data_alloc_num = ext->callout_num;
1512 }
1513
1514 xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData));
1515 return ONIG_NORMAL;
1516 }
1517
1518 #define ADJUST_MATCH_PARAM(reg, mp) \
1519 r = adjust_match_param(reg, mp);\
1520 if (r != ONIG_NORMAL) return r;
1521
1522 #define CALLOUT_DATA_AT_NUM(mp, num) ((mp)->callout_data + ((num) - 1))
1523
1524 extern int
onig_check_callout_data_and_clear_old_values(OnigCalloutArgs * args)1525 onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args)
1526 {
1527 OnigMatchParam* mp;
1528 int num;
1529 CalloutData* d;
1530
1531 mp = args->msa->mp;
1532 num = args->num;
1533
1534 d = CALLOUT_DATA_AT_NUM(mp, num);
1535 if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1536 xmemset(d, 0, sizeof(*d));
1537 d->last_match_at_call_counter = mp->match_at_call_counter;
1538 return d->last_match_at_call_counter;
1539 }
1540
1541 return 0;
1542 }
1543
1544 extern int
onig_get_callout_data_dont_clear_old(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType * type,OnigValue * val)1545 onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp,
1546 int callout_num, int slot,
1547 OnigType* type, OnigValue* val)
1548 {
1549 OnigType t;
1550 CalloutData* d;
1551
1552 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1553
1554 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1555 t = d->slot[slot].type;
1556 if (IS_NOT_NULL(type)) *type = t;
1557 if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
1558 return (t == ONIG_TYPE_VOID ? ONIG_VALUE_IS_NOT_SET : ONIG_NORMAL);
1559 }
1560
1561 extern int
onig_get_callout_data_by_tag_dont_clear_old(regex_t * reg,OnigMatchParam * mp,const UChar * tag,const UChar * tag_end,int slot,OnigType * type,OnigValue * val)1562 onig_get_callout_data_by_tag_dont_clear_old(regex_t* reg,
1563 OnigMatchParam* mp, const UChar* tag, const UChar* tag_end, int slot,
1564 OnigType* type, OnigValue* val)
1565 {
1566 int num;
1567
1568 num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1569 if (num < 0) return num;
1570 if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1571
1572 return onig_get_callout_data_dont_clear_old(reg, mp, num, slot, type, val);
1573 }
1574
1575 extern int
onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs * args,int slot,OnigType * type,OnigValue * val)1576 onig_get_callout_data_by_callout_args_self_dont_clear_old(
1577 OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val)
1578 {
1579 return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp,
1580 args->num, slot, type, val);
1581 }
1582
1583 extern int
onig_get_callout_data(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType * type,OnigValue * val)1584 onig_get_callout_data(regex_t* reg, OnigMatchParam* mp,
1585 int callout_num, int slot,
1586 OnigType* type, OnigValue* val)
1587 {
1588 OnigType t;
1589 CalloutData* d;
1590
1591 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1592
1593 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1594 if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1595 xmemset(d, 0, sizeof(*d));
1596 d->last_match_at_call_counter = mp->match_at_call_counter;
1597 }
1598
1599 t = d->slot[slot].type;
1600 if (IS_NOT_NULL(type)) *type = t;
1601 if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
1602 return (t == ONIG_TYPE_VOID ? ONIG_VALUE_IS_NOT_SET : ONIG_NORMAL);
1603 }
1604
1605 extern int
onig_get_callout_data_by_tag(regex_t * reg,OnigMatchParam * mp,const UChar * tag,const UChar * tag_end,int slot,OnigType * type,OnigValue * val)1606 onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1607 const UChar* tag, const UChar* tag_end, int slot,
1608 OnigType* type, OnigValue* val)
1609 {
1610 int num;
1611
1612 num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1613 if (num < 0) return num;
1614 if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1615
1616 return onig_get_callout_data(reg, mp, num, slot, type, val);
1617 }
1618
1619 extern int
onig_get_callout_data_by_callout_args(OnigCalloutArgs * args,int callout_num,int slot,OnigType * type,OnigValue * val)1620 onig_get_callout_data_by_callout_args(OnigCalloutArgs* args,
1621 int callout_num, int slot,
1622 OnigType* type, OnigValue* val)
1623 {
1624 return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot,
1625 type, val);
1626 }
1627
1628 extern int
onig_get_callout_data_by_callout_args_self(OnigCalloutArgs * args,int slot,OnigType * type,OnigValue * val)1629 onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1630 int slot, OnigType* type, OnigValue* val)
1631 {
1632 return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot,
1633 type, val);
1634 }
1635
1636 extern int
onig_set_callout_data(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType type,OnigValue * val)1637 onig_set_callout_data(regex_t* reg, OnigMatchParam* mp,
1638 int callout_num, int slot,
1639 OnigType type, OnigValue* val)
1640 {
1641 CalloutData* d;
1642
1643 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1644
1645 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1646 d->slot[slot].type = type;
1647 d->slot[slot].val = *val;
1648 d->last_match_at_call_counter = mp->match_at_call_counter;
1649
1650 return ONIG_NORMAL;
1651 }
1652
1653 extern int
onig_set_callout_data_by_tag(regex_t * reg,OnigMatchParam * mp,const UChar * tag,const UChar * tag_end,int slot,OnigType type,OnigValue * val)1654 onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1655 const UChar* tag, const UChar* tag_end, int slot,
1656 OnigType type, OnigValue* val)
1657 {
1658 int num;
1659
1660 num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1661 if (num < 0) return num;
1662 if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1663
1664 return onig_set_callout_data(reg, mp, num, slot, type, val);
1665 }
1666
1667 extern int
onig_set_callout_data_by_callout_args(OnigCalloutArgs * args,int callout_num,int slot,OnigType type,OnigValue * val)1668 onig_set_callout_data_by_callout_args(OnigCalloutArgs* args,
1669 int callout_num, int slot,
1670 OnigType type, OnigValue* val)
1671 {
1672 return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot,
1673 type, val);
1674 }
1675
1676 extern int
onig_set_callout_data_by_callout_args_self(OnigCalloutArgs * args,int slot,OnigType type,OnigValue * val)1677 onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1678 int slot, OnigType type, OnigValue* val)
1679 {
1680 return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot,
1681 type, val);
1682 }
1683
1684 #else
1685 #define ADJUST_MATCH_PARAM(reg, mp)
1686 #endif /* USE_CALLOUT */
1687
1688
1689 static int
stack_double(int * is_alloca,char ** arg_alloc_base,StackType ** arg_stk_base,StackType ** arg_stk_end,StackType ** arg_stk,MatchArg * msa)1690 stack_double(int* is_alloca, char** arg_alloc_base,
1691 StackType** arg_stk_base, StackType** arg_stk_end,
1692 StackType** arg_stk, MatchArg* msa)
1693 {
1694 unsigned int n;
1695 int used;
1696 size_t size;
1697 size_t new_size;
1698 char* alloc_base;
1699 char* new_alloc_base;
1700 StackType *stk_base, *stk_end, *stk;
1701
1702 alloc_base = *arg_alloc_base;
1703 stk_base = *arg_stk_base;
1704 stk_end = *arg_stk_end;
1705 stk = *arg_stk;
1706
1707 n = (unsigned int )(stk_end - stk_base);
1708 size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n;
1709 n *= 2;
1710 new_size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n;
1711 if (*is_alloca != 0) {
1712 new_alloc_base = (char* )xmalloc(new_size);
1713 if (IS_NULL(new_alloc_base)) {
1714 STACK_SAVE(msa, *is_alloca, alloc_base);
1715 return ONIGERR_MEMORY;
1716 }
1717 xmemcpy(new_alloc_base, alloc_base, size);
1718 *is_alloca = 0;
1719 }
1720 else {
1721 if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) {
1722 if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit) {
1723 STACK_SAVE(msa, *is_alloca, alloc_base);
1724 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1725 }
1726 else
1727 n = msa->match_stack_limit;
1728 }
1729 new_alloc_base = (char* )xrealloc(alloc_base, new_size);
1730 if (IS_NULL(new_alloc_base)) {
1731 STACK_SAVE(msa, *is_alloca, alloc_base);
1732 return ONIGERR_MEMORY;
1733 }
1734 }
1735
1736 alloc_base = new_alloc_base;
1737 used = (int )(stk - stk_base);
1738 *arg_alloc_base = alloc_base;
1739 *arg_stk_base = (StackType* )(alloc_base
1740 + (sizeof(StkPtrType) * msa->ptr_num));
1741 *arg_stk = *arg_stk_base + used;
1742 *arg_stk_end = *arg_stk_base + n;
1743 return 0;
1744 }
1745
1746 #define STACK_ENSURE(n) do {\
1747 if ((int )(stk_end - stk) < (n)) {\
1748 int r = stack_double(&is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\
1749 if (r != 0) return r;\
1750 UPDATE_FOR_STACK_REALLOC;\
1751 }\
1752 } while(0)
1753
1754 #define STACK_AT(index) (stk_base + (index))
1755 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
1756
1757 #define STACK_PUSH_TYPE(stack_type) do {\
1758 STACK_ENSURE(1);\
1759 stk->type = (stack_type);\
1760 STACK_INC;\
1761 } while(0)
1762
1763 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1764
1765 #define STACK_PUSH(stack_type,pat,s) do {\
1766 STACK_ENSURE(1);\
1767 stk->type = (stack_type);\
1768 stk->u.state.pcode = (pat);\
1769 stk->u.state.pstr = (s);\
1770 STACK_INC;\
1771 } while(0)
1772
1773 #define STACK_PUSH_WITH_ZID(stack_type,pat,s,id) do {\
1774 STACK_ENSURE(1);\
1775 stk->type = (stack_type);\
1776 stk->zid = (int )(id);\
1777 stk->u.state.pcode = (pat);\
1778 stk->u.state.pstr = (s);\
1779 STACK_INC;\
1780 } while(0)
1781
1782 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
1783 stk->type = (stack_type);\
1784 stk->u.state.pcode = (pat);\
1785 STACK_INC;\
1786 } while(0)
1787
1788 #ifdef ONIG_DEBUG_MATCH
1789 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1790 stk->type = (stack_type);\
1791 stk->u.state.pcode = (pat);\
1792 stk->u.state.pstr = s;\
1793 STACK_INC;\
1794 } while (0)
1795 #else
1796 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1797 stk->type = (stack_type);\
1798 stk->u.state.pcode = (pat);\
1799 STACK_INC;\
1800 } while (0)
1801 #endif
1802
1803 #define STACK_PUSH_ALT(pat,s) STACK_PUSH(STK_ALT,pat,s)
1804 #define STACK_PUSH_SUPER_ALT(pat,s) STACK_PUSH(STK_SUPER_ALT,pat,s)
1805 #define STACK_PUSH_ALT_WITH_ZID(pat,s,id) STACK_PUSH_WITH_ZID(STK_ALT,pat,s,id)
1806
1807 #if 0
1808 #define STACK_PUSH_REPEAT(sid, pat) do {\
1809 STACK_ENSURE(1);\
1810 stk->type = STK_REPEAT;\
1811 stk->zid = (sid);\
1812 stk->u.repeat.pcode = (pat);\
1813 STACK_INC;\
1814 } while(0)
1815 #endif
1816
1817 #define STACK_PUSH_REPEAT_INC(sid, ct) do {\
1818 STACK_ENSURE(1);\
1819 stk->type = STK_REPEAT_INC;\
1820 stk->zid = (sid);\
1821 stk->u.repeat_inc.count = (ct);\
1822 SAVE_REPEAT_STK_VAR(sid);\
1823 LOAD_TO_REPEAT_STK_VAR(sid);\
1824 STACK_INC;\
1825 } while(0)
1826
1827 #define STACK_PUSH_MEM_START(mnum, s) do {\
1828 STACK_ENSURE(1);\
1829 stk->type = STK_MEM_START;\
1830 stk->zid = (mnum);\
1831 stk->u.mem.pstr = (s);\
1832 stk->u.mem.prev_start = mem_start_stk[mnum];\
1833 stk->u.mem.prev_end = mem_end_stk[mnum];\
1834 mem_start_stk[mnum].i = GET_STACK_INDEX(stk);\
1835 mem_end_stk[mnum].i = INVALID_STACK_INDEX;\
1836 STACK_INC;\
1837 } while(0)
1838
1839 #define STACK_PUSH_MEM_END(mnum, s) do {\
1840 STACK_ENSURE(1);\
1841 stk->type = STK_MEM_END;\
1842 stk->zid = (mnum);\
1843 stk->u.mem.pstr = (s);\
1844 stk->u.mem.prev_start = mem_start_stk[mnum];\
1845 stk->u.mem.prev_end = mem_end_stk[mnum];\
1846 mem_end_stk[mnum].i = GET_STACK_INDEX(stk);\
1847 STACK_INC;\
1848 } while(0)
1849
1850 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
1851 STACK_ENSURE(1);\
1852 stk->type = STK_MEM_END_MARK;\
1853 stk->zid = (mnum);\
1854 STACK_INC;\
1855 } while(0)
1856
1857 #define STACK_GET_MEM_START(mnum, k) do {\
1858 int level = 0;\
1859 k = stk;\
1860 while (k > stk_base) {\
1861 k--;\
1862 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1863 && k->zid == (mnum)) {\
1864 level++;\
1865 }\
1866 else if (k->type == STK_MEM_START && k->zid == (mnum)) {\
1867 if (level == 0) break;\
1868 level--;\
1869 }\
1870 }\
1871 } while(0)
1872
1873 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1874 int level = 0;\
1875 while (k < stk) {\
1876 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1877 if (level == 0) (start) = k->u.mem.pstr;\
1878 level++;\
1879 }\
1880 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1881 level--;\
1882 if (level == 0) {\
1883 (end) = k->u.mem.pstr;\
1884 break;\
1885 }\
1886 }\
1887 k++;\
1888 }\
1889 } while(0)
1890
1891 #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\
1892 STACK_ENSURE(1);\
1893 stk->type = STK_EMPTY_CHECK_START;\
1894 stk->zid = (cnum);\
1895 stk->u.empty_check.pstr = (s);\
1896 SAVE_EMPTY_CHECK_STK_VAR(cnum);\
1897 LOAD_TO_EMPTY_CHECK_STK_VAR(cnum);\
1898 STACK_INC;\
1899 } while(0)
1900
1901 #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\
1902 STACK_ENSURE(1);\
1903 stk->type = STK_EMPTY_CHECK_END;\
1904 stk->zid = (cnum);\
1905 STACK_INC;\
1906 } while(0)
1907
1908 #define STACK_PUSH_CALL_FRAME(pat) do {\
1909 STACK_ENSURE(1);\
1910 stk->type = STK_CALL_FRAME;\
1911 stk->u.call_frame.ret_addr = (pat);\
1912 STACK_INC;\
1913 } while(0)
1914
1915 #define STACK_PUSH_RETURN do {\
1916 STACK_ENSURE(1);\
1917 stk->type = STK_RETURN;\
1918 STACK_INC;\
1919 } while(0)
1920
1921 #define STACK_PUSH_MARK(sid) do {\
1922 STACK_ENSURE(1);\
1923 stk->type = STK_MARK;\
1924 stk->zid = (sid);\
1925 STACK_INC;\
1926 } while(0)
1927
1928 #define STACK_PUSH_MARK_WITH_POS(sid, s) do {\
1929 STACK_ENSURE(1);\
1930 stk->type = STK_MARK;\
1931 stk->zid = (sid);\
1932 stk->u.val.v = (UChar* )(s);\
1933 STACK_INC;\
1934 } while(0)
1935
1936 #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\
1937 STACK_ENSURE(1);\
1938 stk->type = STK_SAVE_VAL;\
1939 stk->zid = (sid);\
1940 stk->u.val.type = (stype);\
1941 stk->u.val.v = (UChar* )(sval);\
1942 STACK_INC;\
1943 } while(0)
1944
1945 #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\
1946 STACK_ENSURE(1);\
1947 stk->type = STK_SAVE_VAL;\
1948 stk->zid = (sid);\
1949 stk->u.val.type = (stype);\
1950 stk->u.val.v = (UChar* )(sval);\
1951 STACK_INC;\
1952 } while(0)
1953
1954 #define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\
1955 StackType *k = stk;\
1956 while (k > stk_base) {\
1957 k--;\
1958 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \
1959 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\
1960 (sval) = k->u.val.v;\
1961 break;\
1962 }\
1963 }\
1964 } while (0)
1965
1966 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval, clear) do {\
1967 int level = 0;\
1968 StackType *k = stk;\
1969 while (k > stk_base) {\
1970 k--;\
1971 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1972 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1973 && k->zid == (sid)) {\
1974 if (level == 0) {\
1975 (sval) = k->u.val.v;\
1976 if (clear != 0) k->type = STK_VOID;\
1977 break;\
1978 }\
1979 }\
1980 else if (k->type == STK_CALL_FRAME)\
1981 level--;\
1982 else if (k->type == STK_RETURN)\
1983 level++;\
1984 }\
1985 } while (0)
1986
1987 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \
1988 int level = 0;\
1989 StackType *k = stk;\
1990 while (k > stk_base) {\
1991 k--;\
1992 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1993 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1994 && k->zid == (sid)) {\
1995 if (level == 0) {\
1996 (sval) = k->u.val.v;\
1997 break;\
1998 }\
1999 }\
2000 else if (k->type == STK_CALL_FRAME)\
2001 level--;\
2002 else if (k->type == STK_RETURN)\
2003 level++;\
2004 }\
2005 } while (0)
2006
2007 #define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
2008 STACK_ENSURE(1);\
2009 stk->type = STK_CALLOUT;\
2010 stk->zid = ONIG_NON_NAME_ID;\
2011 stk->u.callout.num = (anum);\
2012 stk->u.callout.func = (func);\
2013 STACK_INC;\
2014 } while(0)
2015
2016 #define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\
2017 STACK_ENSURE(1);\
2018 stk->type = STK_CALLOUT;\
2019 stk->zid = (aid);\
2020 stk->u.callout.num = (anum);\
2021 stk->u.callout.func = (func);\
2022 STACK_INC;\
2023 } while(0)
2024
2025 #ifdef ONIG_DEBUG
2026 #define STACK_BASE_CHECK(p, at) \
2027 if ((p) < stk_base) {\
2028 fprintf(DBGFP, "at %s\n", at);\
2029 MATCH_AT_ERROR_RETURN(ONIGERR_STACK_BUG);\
2030 }
2031 #else
2032 #define STACK_BASE_CHECK(p, at)
2033 #endif
2034
2035 #define STACK_POP_ONE do {\
2036 stk--;\
2037 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
2038 } while(0)
2039
2040
2041 #ifdef USE_CALLOUT
2042 #define POP_CALLOUT_CASE \
2043 else if (stk->type == STK_CALLOUT) {\
2044 RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\
2045 }
2046 #else
2047 #define POP_CALLOUT_CASE
2048 #endif
2049
2050 #define STACK_POP do {\
2051 switch (pop_level) {\
2052 case STACK_POP_LEVEL_FREE:\
2053 while (1) {\
2054 stk--;\
2055 STACK_BASE_CHECK(stk, "STACK_POP"); \
2056 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
2057 }\
2058 break;\
2059 case STACK_POP_LEVEL_MEM_START:\
2060 while (1) {\
2061 stk--;\
2062 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
2063 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
2064 else if (stk->type == STK_MEM_START) {\
2065 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2066 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
2067 }\
2068 }\
2069 break;\
2070 default:\
2071 while (1) {\
2072 stk--;\
2073 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
2074 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
2075 else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\
2076 if (stk->type == STK_MEM_START) {\
2077 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2078 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
2079 }\
2080 else if (stk->type == STK_MEM_END) {\
2081 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2082 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
2083 }\
2084 POP_REPEAT_INC \
2085 POP_EMPTY_CHECK_START \
2086 POP_CALL \
2087 POP_CALLOUT_CASE\
2088 }\
2089 }\
2090 break;\
2091 }\
2092 } while(0)
2093
2094 #define STACK_POP_TO_MARK(sid) do {\
2095 while (1) {\
2096 stk--;\
2097 STACK_BASE_CHECK(stk, "STACK_POP_TO_MARK");\
2098 if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
2099 if (stk->type == STK_MARK) {\
2100 if (stk->zid == (sid)) break;\
2101 }\
2102 else {\
2103 if (stk->type == STK_MEM_START) {\
2104 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2105 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
2106 }\
2107 else if (stk->type == STK_MEM_END) {\
2108 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2109 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
2110 }\
2111 POP_REPEAT_INC \
2112 POP_EMPTY_CHECK_START \
2113 POP_CALL \
2114 /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
2115 }\
2116 }\
2117 }\
2118 } while(0)
2119
2120
2121 #define POP_TIL_BODY(aname, til_type) do {\
2122 while (1) {\
2123 stk--;\
2124 STACK_BASE_CHECK(stk, (aname));\
2125 if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
2126 if (stk->type == (til_type)) break;\
2127 else {\
2128 if (stk->type == STK_MEM_START) {\
2129 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2130 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
2131 }\
2132 else if (stk->type == STK_MEM_END) {\
2133 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2134 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
2135 }\
2136 POP_REPEAT_INC \
2137 POP_EMPTY_CHECK_START \
2138 POP_CALL \
2139 /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
2140 }\
2141 }\
2142 }\
2143 } while(0)
2144
2145
2146 #define STACK_TO_VOID_TO_MARK(k,sid) do {\
2147 k = stk;\
2148 while (1) {\
2149 k--;\
2150 STACK_BASE_CHECK(k, "STACK_TO_VOID_TO_MARK");\
2151 if (IS_TO_VOID_TARGET(k)) {\
2152 if (k->type == STK_MARK) {\
2153 if (k->zid == (sid)) {\
2154 k->type = STK_VOID;\
2155 break;\
2156 } /* don't void different id mark */ \
2157 }\
2158 else\
2159 k->type = STK_VOID;\
2160 }\
2161 }\
2162 } while(0)
2163
2164 #define EMPTY_CHECK_START_SEARCH(sid, k) do {\
2165 k = stk;\
2166 while (1) {\
2167 k--;\
2168 STACK_BASE_CHECK(k, "EMPTY_CHECK_START_SEARCH"); \
2169 if (k->type == STK_EMPTY_CHECK_START) {\
2170 if (k->zid == (sid)) break;\
2171 }\
2172 }\
2173 } while(0)
2174
2175 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2176
2177 #define GET_EMPTY_CHECK_START(sid, k) do {\
2178 if (reg->num_call == 0) {\
2179 k = STACK_AT(empty_check_stk[sid]);\
2180 }\
2181 else {\
2182 EMPTY_CHECK_START_SEARCH(sid, k);\
2183 }\
2184 } while(0)
2185 #else
2186
2187 #define GET_EMPTY_CHECK_START(sid, k) EMPTY_CHECK_START_SEARCH(sid, k)
2188
2189 #endif
2190
2191
2192 #define STACK_EMPTY_CHECK(isnull, sid, s) do {\
2193 StackType* k;\
2194 GET_EMPTY_CHECK_START(sid, k);\
2195 (isnull) = (k->u.empty_check.pstr == (s));\
2196 } while(0)
2197
2198 #define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
2199 if (k->u.mem.prev_end.i == INVALID_STACK_INDEX) {\
2200 (addr) = 0;\
2201 }\
2202 else {\
2203 if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\
2204 (addr) = STACK_AT(k->u.mem.prev_end.i)->u.mem.pstr;\
2205 else\
2206 (addr) = k->u.mem.prev_end.s;\
2207 }\
2208 } while (0)
2209
2210 #ifdef USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT
2211 #define STACK_EMPTY_CHECK_MEM(isnull, sid, empty_status_mem, s, reg) do {\
2212 StackType* klow;\
2213 GET_EMPTY_CHECK_START(sid, klow);\
2214 if (klow->u.empty_check.pstr != (s)) {\
2215 stack_empty_check_mem_not_empty:\
2216 (isnull) = 0;\
2217 }\
2218 else {\
2219 StackType *k, *kk;\
2220 MemStatusType ms = (empty_status_mem);\
2221 (isnull) = 1;\
2222 k = stk;\
2223 while (k > klow) {\
2224 k--;\
2225 if (k->type == STK_MEM_END && MEM_STATUS_LIMIT_AT(ms, k->zid)) {\
2226 kk = klow;\
2227 while (kk < k) {\
2228 if (kk->type == STK_MEM_START && kk->zid == k->zid) {\
2229 if (kk->u.mem.prev_end.i == INVALID_STACK_INDEX || \
2230 ((STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr) && (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr))) {\
2231 goto stack_empty_check_mem_not_empty;\
2232 }\
2233 else {\
2234 ms &= ~((MemStatusType )1 << k->zid);\
2235 break;\
2236 }\
2237 }\
2238 kk++;\
2239 }\
2240 if (ms == 0) break;\
2241 }\
2242 }\
2243 }\
2244 } while(0)
2245
2246 #define STACK_EMPTY_CHECK_MEM_REC(isnull,sid,empty_status_mem,s,reg) do {\
2247 int level = 0;\
2248 StackType* klow = stk;\
2249 while (1) {\
2250 klow--;\
2251 STACK_BASE_CHECK(klow, "STACK_EMPTY_CHECK_MEM_REC");\
2252 if (klow->type == STK_EMPTY_CHECK_START) {\
2253 if (klow->zid == (sid)) {\
2254 if (level == 0) {\
2255 if (klow->u.empty_check.pstr != (s)) {\
2256 stack_empty_check_mem_rec_not_empty:\
2257 (isnull) = 0;\
2258 break;\
2259 }\
2260 else {\
2261 StackType *k, *kk;\
2262 MemStatusType ms;\
2263 (isnull) = 1;\
2264 if ((empty_status_mem) == 0) break;\
2265 ms = (empty_status_mem);\
2266 k = stk;\
2267 while (k > klow) {\
2268 k--;\
2269 if (k->type == STK_MEM_END) {\
2270 if (level == 0 && MEM_STATUS_LIMIT_AT(ms, k->zid)) {\
2271 kk = klow;\
2272 kk++;\
2273 while (kk < k) {\
2274 if (kk->type == STK_MEM_START && kk->zid == k->zid) {\
2275 if (kk->u.mem.prev_end.i == INVALID_STACK_INDEX || \
2276 ((STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr) && (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr))) {\
2277 goto stack_empty_check_mem_rec_not_empty;\
2278 }\
2279 else {\
2280 ms &= ~((MemStatusType )1 << k->zid);\
2281 break;\
2282 }\
2283 }\
2284 else if (kk->type == STK_EMPTY_CHECK_START) {\
2285 if (kk->zid == (sid)) level++;\
2286 }\
2287 else if (kk->type == STK_EMPTY_CHECK_END) {\
2288 if (kk->zid == (sid)) level--;\
2289 }\
2290 kk++;\
2291 }\
2292 level = 0;\
2293 if (ms == 0) break;\
2294 }\
2295 }\
2296 else if (k->type == STK_EMPTY_CHECK_START) {\
2297 if (k->zid == (sid)) level++;\
2298 }\
2299 else if (k->type == STK_EMPTY_CHECK_END) {\
2300 if (k->zid == (sid)) level--;\
2301 }\
2302 }\
2303 break;\
2304 }\
2305 }\
2306 else {\
2307 level--;\
2308 }\
2309 }\
2310 }\
2311 else if (klow->type == STK_EMPTY_CHECK_END) {\
2312 if (klow->zid == (sid)) level++;\
2313 }\
2314 }\
2315 } while(0)
2316 #else
2317 #define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\
2318 int level = 0;\
2319 StackType* k = stk;\
2320 while (1) {\
2321 k--;\
2322 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \
2323 if (k->type == STK_EMPTY_CHECK_START) {\
2324 if (k->u.empty_check.num == (id)) {\
2325 if (level == 0) {\
2326 (isnull) = (k->u.empty_check.pstr == (s));\
2327 break;\
2328 }\
2329 }\
2330 level--;\
2331 }\
2332 else if (k->type == STK_EMPTY_CHECK_END) {\
2333 level++;\
2334 }\
2335 }\
2336 } while(0)
2337 #endif /* USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT */
2338
2339 #define STACK_GET_REPEAT_COUNT_SEARCH(sid, c) do {\
2340 StackType* k = stk;\
2341 while (1) {\
2342 (k)--;\
2343 STACK_BASE_CHECK(k, "STACK_GET_REPEAT_COUNT_SEARCH");\
2344 if ((k)->type == STK_REPEAT_INC) {\
2345 if ((k)->zid == (sid)) {\
2346 (c) = (k)->u.repeat_inc.count;\
2347 break;\
2348 }\
2349 }\
2350 else if ((k)->type == STK_RETURN) {\
2351 int level = -1;\
2352 while (1) {\
2353 (k)--;\
2354 if ((k)->type == STK_CALL_FRAME) {\
2355 level++;\
2356 if (level == 0) break;\
2357 }\
2358 else if ((k)->type == STK_RETURN) level--;\
2359 }\
2360 }\
2361 }\
2362 } while(0)
2363
2364 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2365
2366 #define STACK_GET_REPEAT_COUNT(sid, c) do {\
2367 if (reg->num_call == 0) {\
2368 (c) = (STACK_AT(repeat_stk[sid]))->u.repeat_inc.count;\
2369 }\
2370 else {\
2371 STACK_GET_REPEAT_COUNT_SEARCH(sid, c);\
2372 }\
2373 } while(0)
2374 #else
2375 #define STACK_GET_REPEAT_COUNT(sid, c) STACK_GET_REPEAT_COUNT_SEARCH(sid, c)
2376 #endif
2377
2378 #ifdef USE_CALL
2379 #define STACK_RETURN(addr) do {\
2380 int level = 0;\
2381 StackType* k = stk;\
2382 while (1) {\
2383 k--;\
2384 STACK_BASE_CHECK(k, "STACK_RETURN"); \
2385 if (k->type == STK_CALL_FRAME) {\
2386 if (level == 0) {\
2387 (addr) = k->u.call_frame.ret_addr;\
2388 break;\
2389 }\
2390 else level--;\
2391 }\
2392 else if (k->type == STK_RETURN)\
2393 level++;\
2394 }\
2395 } while(0)
2396
2397 #define GET_STACK_RETURN_CALL(k,addr) do {\
2398 int level = 0;\
2399 k = stk;\
2400 while (1) {\
2401 k--;\
2402 STACK_BASE_CHECK(k, "GET_STACK_RETURN_CALL");\
2403 if (k->type == STK_CALL_FRAME) {\
2404 if (level == 0) {\
2405 (addr) = k->u.call_frame.ret_addr;\
2406 break;\
2407 }\
2408 else level--;\
2409 }\
2410 else if (k->type == STK_RETURN)\
2411 level++;\
2412 }\
2413 } while(0)
2414 #endif
2415
2416
2417 #define STRING_CMP(s1,s2,len) do {\
2418 while (len-- > 0) {\
2419 if (*s1++ != *s2++) goto fail;\
2420 }\
2421 } while(0)
2422
2423 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
2424 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2425 goto fail; \
2426 } while(0)
2427
string_cmp_ic(OnigEncoding enc,int case_fold_flag,UChar * s1,UChar ** ps2,int mblen)2428 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
2429 UChar* s1, UChar** ps2, int mblen)
2430 {
2431 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2432 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2433 UChar *p1, *p2, *end1, *s2, *end2;
2434 int len1, len2;
2435
2436 s2 = *ps2;
2437 end1 = s1 + mblen;
2438 end2 = s2 + mblen;
2439 while (s1 < end1) {
2440 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
2441 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
2442 if (len1 != len2) return 0;
2443 p1 = buf1;
2444 p2 = buf2;
2445 while (len1-- > 0) {
2446 if (*p1 != *p2) return 0;
2447 p1++;
2448 p2++;
2449 }
2450 if (s2 >= end2) {
2451 if (s1 < end1) return 0;
2452 else break;
2453 }
2454 }
2455
2456 *ps2 = s2;
2457 return 1;
2458 }
2459
2460 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
2461 is_fail = 0;\
2462 while (len-- > 0) {\
2463 if (*s1++ != *s2++) {\
2464 is_fail = 1; break;\
2465 }\
2466 }\
2467 } while(0)
2468
2469 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
2470 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2471 is_fail = 1; \
2472 else \
2473 is_fail = 0; \
2474 } while(0)
2475
2476
2477 #define IS_EMPTY_STR (str == end)
2478 #define ON_STR_BEGIN(s) ((s) == str)
2479 #define ON_STR_END(s) ((s) == end)
2480 #define DATA_ENSURE_CHECK1 (s < right_range)
2481 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
2482 #define DATA_ENSURE(n) if (right_range - s < (n)) goto fail
2483
2484 #define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range
2485
2486 #ifdef USE_CAPTURE_HISTORY
2487 static int
make_capture_history_tree(OnigCaptureTreeNode * node,StackType ** kp,StackType * stk_top,UChar * str,regex_t * reg)2488 make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
2489 StackType* stk_top, UChar* str, regex_t* reg)
2490 {
2491 int n, r;
2492 OnigCaptureTreeNode* child;
2493 StackType* k = *kp;
2494
2495 while (k < stk_top) {
2496 if (k->type == STK_MEM_START) {
2497 n = k->zid;
2498 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
2499 MEM_STATUS_AT(reg->capture_history, n) != 0) {
2500 child = history_node_new();
2501 CHECK_NULL_RETURN_MEMERR(child);
2502 child->group = n;
2503 child->beg = (int )(k->u.mem.pstr - str);
2504 r = history_tree_add_child(node, child);
2505 if (r != 0) return r;
2506 *kp = (k + 1);
2507 r = make_capture_history_tree(child, kp, stk_top, str, reg);
2508 if (r != 0) return r;
2509
2510 k = *kp;
2511 child->end = (int )(k->u.mem.pstr - str);
2512 }
2513 }
2514 else if (k->type == STK_MEM_END) {
2515 if (k->zid == node->group) {
2516 node->end = (int )(k->u.mem.pstr - str);
2517 *kp = k;
2518 return 0;
2519 }
2520 }
2521 k++;
2522 }
2523
2524 return 1; /* 1: root node ending. */
2525 }
2526 #endif
2527
2528 #ifdef USE_BACKREF_WITH_LEVEL
mem_is_in_memp(int mem,int num,MemNumType * memp)2529 static int mem_is_in_memp(int mem, int num, MemNumType* memp)
2530 {
2531 int i;
2532
2533 for (i = 0; i < num; i++) {
2534 if (mem == (int )memp[i]) return 1;
2535 }
2536 return 0;
2537 }
2538
2539 static int
backref_match_at_nested_level(regex_t * reg,StackType * top,StackType * stk_base,int ignore_case,int case_fold_flag,int nest,int mem_num,MemNumType * memp,UChar ** s,const UChar * send)2540 backref_match_at_nested_level(regex_t* reg,
2541 StackType* top, StackType* stk_base,
2542 int ignore_case, int case_fold_flag,
2543 int nest, int mem_num, MemNumType* memp,
2544 UChar** s, const UChar* send)
2545 {
2546 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
2547 int level;
2548 StackType* k;
2549
2550 level = 0;
2551 k = top;
2552 k--;
2553 while (k >= stk_base) {
2554 if (k->type == STK_CALL_FRAME) {
2555 level--;
2556 }
2557 else if (k->type == STK_RETURN) {
2558 level++;
2559 }
2560 else if (level == nest) {
2561 if (k->type == STK_MEM_START) {
2562 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2563 pstart = k->u.mem.pstr;
2564 if (IS_NOT_NULL(pend)) {
2565 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
2566 p = pstart;
2567 ss = *s;
2568
2569 if (ignore_case != 0) {
2570 if (string_cmp_ic(reg->enc, case_fold_flag,
2571 pstart, &ss, (int )(pend - pstart)) == 0)
2572 return 0; /* or goto next_mem; */
2573 }
2574 else {
2575 while (p < pend) {
2576 if (*p++ != *ss++) return 0; /* or goto next_mem; */
2577 }
2578 }
2579
2580 *s = ss;
2581 return 1;
2582 }
2583 }
2584 }
2585 else if (k->type == STK_MEM_END) {
2586 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2587 pend = k->u.mem.pstr;
2588 }
2589 }
2590 }
2591 k--;
2592 }
2593
2594 return 0;
2595 }
2596
2597 static int
backref_check_at_nested_level(regex_t * reg,StackType * top,StackType * stk_base,int nest,int mem_num,MemNumType * memp)2598 backref_check_at_nested_level(regex_t* reg,
2599 StackType* top, StackType* stk_base,
2600 int nest, int mem_num, MemNumType* memp)
2601 {
2602 int level;
2603 StackType* k;
2604
2605 level = 0;
2606 k = top;
2607 k--;
2608 while (k >= stk_base) {
2609 if (k->type == STK_CALL_FRAME) {
2610 level--;
2611 }
2612 else if (k->type == STK_RETURN) {
2613 level++;
2614 }
2615 else if (level == nest) {
2616 if (k->type == STK_MEM_END) {
2617 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2618 return 1;
2619 }
2620 }
2621 }
2622 k--;
2623 }
2624
2625 return 0;
2626 }
2627 #endif /* USE_BACKREF_WITH_LEVEL */
2628
2629 static int SubexpCallMaxNestLevel = DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL;
2630
2631 #ifdef ONIG_DEBUG_STATISTICS
2632
2633 #ifdef USE_TIMEOFDAY
2634
2635 static struct timeval ts, te;
2636 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
2637 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
2638 (((te).tv_sec - (ts).tv_sec)*1000000))
2639 #else
2640
2641 static struct tms ts, te;
2642 #define GETTIME(t) times(&(t))
2643 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
2644
2645 #endif /* USE_TIMEOFDAY */
2646
2647 static int OpCounter[256];
2648 static int OpPrevCounter[256];
2649 static unsigned long OpTime[256];
2650 static int OpCurr = OP_FINISH;
2651 static int OpPrevTarget = OP_FAIL;
2652 static int MaxStackDepth = 0;
2653
2654 #define SOP_IN(opcode) do {\
2655 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2656 OpCurr = opcode;\
2657 OpCounter[opcode]++;\
2658 GETTIME(ts);\
2659 } while(0)
2660
2661 #define SOP_OUT do {\
2662 GETTIME(te);\
2663 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2664 } while(0)
2665
2666 extern void
onig_statistics_init(void)2667 onig_statistics_init(void)
2668 {
2669 int i;
2670 for (i = 0; i < 256; i++) {
2671 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2672 }
2673 MaxStackDepth = 0;
2674 }
2675
2676 extern int
onig_print_statistics(FILE * f)2677 onig_print_statistics(FILE* f)
2678 {
2679 int r;
2680 int i;
2681
2682 r = fprintf(f, " count prev time\n");
2683 if (r < 0) return -1;
2684
2685 for (i = 0; OpInfo[i].opcode >= 0; i++) {
2686 r = fprintf(f, "%8d: %8d: %10ld: %s\n",
2687 OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name);
2688 if (r < 0) return -1;
2689 }
2690 r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2691 if (r < 0) return -1;
2692
2693 return 0;
2694 }
2695
2696 #define STACK_INC do {\
2697 stk++;\
2698 if (stk - stk_base > MaxStackDepth) \
2699 MaxStackDepth = stk - stk_base;\
2700 } while(0)
2701
2702 #else
2703 #define STACK_INC stk++
2704
2705 #define SOP_IN(opcode)
2706 #define SOP_OUT
2707 #endif
2708
2709
2710 /* matching region of POSIX API */
2711 typedef int regoff_t;
2712
2713 typedef struct {
2714 regoff_t rm_so;
2715 regoff_t rm_eo;
2716 } posix_regmatch_t;
2717
2718
2719
2720 #ifdef USE_THREADED_CODE
2721
2722 #define BYTECODE_INTERPRETER_START GOTO_OP;
2723 #define BYTECODE_INTERPRETER_END
2724 #define CASE_OP(x) L_##x: SOP_IN(OP_##x); MATCH_DEBUG_OUT(0)
2725 #define DEFAULT_OP /* L_DEFAULT: */
2726 #define NEXT_OP JUMP_OP
2727 #define JUMP_OP GOTO_OP
2728 #ifdef USE_DIRECT_THREADED_CODE
2729 #define GOTO_OP goto *(p->opaddr)
2730 #else
2731 #define GOTO_OP goto *opcode_to_label[p->opcode]
2732 #endif
2733 #define BREAK_OP /* Nothing */
2734
2735 #else
2736
2737 #define BYTECODE_INTERPRETER_START \
2738 while (1) {\
2739 MATCH_DEBUG_OUT(0)\
2740 switch (p->opcode) {
2741 #define BYTECODE_INTERPRETER_END } }
2742 #define CASE_OP(x) case OP_##x: SOP_IN(OP_##x);
2743 #define DEFAULT_OP default:
2744 #define NEXT_OP break
2745 #define JUMP_OP GOTO_OP
2746 #define GOTO_OP continue; break
2747 #define BREAK_OP break
2748
2749 #endif /* USE_THREADED_CODE */
2750
2751 #define INC_OP p++
2752 #define JUMP_OUT_WITH_SPREV_SET SOP_OUT; NEXT_OP
2753 #define JUMP_OUT SOP_OUT; JUMP_OP
2754 #define BREAK_OUT SOP_OUT; BREAK_OP
2755 #define CHECK_INTERRUPT_JUMP_OUT SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP
2756
2757
2758 #ifdef ONIG_DEBUG_MATCH
2759 #define MATCH_DEBUG_OUT(offset) do {\
2760 Operation *xp;\
2761 UChar *q, *bp, buf[50];\
2762 int len, spos;\
2763 spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\
2764 xp = p - (offset);\
2765 fprintf(DBGFP, "%7u: %7ld: %4d> \"",\
2766 counter, GET_STACK_INDEX(stk), spos);\
2767 counter++;\
2768 bp = buf;\
2769 if (IS_NOT_NULL(s)) {\
2770 for (i = 0, q = s; i < 7 && q < end; i++) {\
2771 len = enclen(encode, q);\
2772 while (len-- > 0) *bp++ = *q++;\
2773 }\
2774 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }\
2775 else { xmemcpy(bp, "\"", 1); bp += 1; }\
2776 }\
2777 else {\
2778 xmemcpy(bp, "\"", 1); bp += 1;\
2779 }\
2780 *bp = 0;\
2781 fputs((char* )buf, DBGFP);\
2782 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', DBGFP);\
2783 if (xp == FinishCode)\
2784 fprintf(DBGFP, "----: finish");\
2785 else {\
2786 int index;\
2787 enum OpCode zopcode;\
2788 Operation* addr;\
2789 index = (int )(xp - reg->ops);\
2790 fprintf(DBGFP, "%4d: ", index);\
2791 print_compiled_byte_code(DBGFP, reg, index, reg->ops, encode); \
2792 zopcode = GET_OPCODE(reg, index);\
2793 if (zopcode == OP_RETURN) {\
2794 GET_STACK_RETURN_CALL(stkp, addr);\
2795 fprintf(DBGFP, " f:%ld -> %d", \
2796 GET_STACK_INDEX(stkp), (int )(addr - reg->ops));\
2797 }\
2798 }\
2799 fprintf(DBGFP, "\n");\
2800 } while(0);
2801 #else
2802 #define MATCH_DEBUG_OUT(offset)
2803 #endif
2804
2805 #define MATCH_AT_ERROR_RETURN(err_code) do {\
2806 best_len = err_code; goto match_at_end;\
2807 } while(0)
2808
2809 #define MATCH_COUNTER_OUT(title) do {\
2810 int i;\
2811 fprintf(DBGFP, "%s (%ld): retry limit: %8lu, subexp_call: %8lu\n", (title), (sstart - str), retry_in_match_counter, msa->subexp_call_in_search_counter); \
2812 fprintf(DBGFP, " ");\
2813 for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) {\
2814 fprintf(DBGFP, " %6lu", subexp_call_counters[i]);\
2815 }\
2816 fprintf(DBGFP, "\n");\
2817 fflush(DBGFP);\
2818 } while (0)
2819
2820
2821 /* match data(str - end) from position (sstart). */
2822 static int
match_at(regex_t * reg,const UChar * str,const UChar * end,const UChar * in_right_range,const UChar * sstart,MatchArg * msa)2823 match_at(regex_t* reg, const UChar* str, const UChar* end,
2824 const UChar* in_right_range, const UChar* sstart,
2825 MatchArg* msa)
2826 {
2827
2828 #if defined(USE_DIRECT_THREADED_CODE)
2829 static Operation FinishCode[] = { { .opaddr=&&L_FINISH } };
2830 #else
2831 static Operation FinishCode[] = { { OP_FINISH } };
2832 #endif
2833
2834 #ifdef USE_THREADED_CODE
2835 static const void *opcode_to_label[] = {
2836 &&L_FINISH,
2837 &&L_END,
2838 &&L_STR_1,
2839 &&L_STR_2,
2840 &&L_STR_3,
2841 &&L_STR_4,
2842 &&L_STR_5,
2843 &&L_STR_N,
2844 &&L_STR_MB2N1,
2845 &&L_STR_MB2N2,
2846 &&L_STR_MB2N3,
2847 &&L_STR_MB2N,
2848 &&L_STR_MB3N,
2849 &&L_STR_MBN,
2850 &&L_CCLASS,
2851 &&L_CCLASS_MB,
2852 &&L_CCLASS_MIX,
2853 &&L_CCLASS_NOT,
2854 &&L_CCLASS_MB_NOT,
2855 &&L_CCLASS_MIX_NOT,
2856 &&L_ANYCHAR,
2857 &&L_ANYCHAR_ML,
2858 &&L_ANYCHAR_STAR,
2859 &&L_ANYCHAR_ML_STAR,
2860 &&L_ANYCHAR_STAR_PEEK_NEXT,
2861 &&L_ANYCHAR_ML_STAR_PEEK_NEXT,
2862 &&L_WORD,
2863 &&L_WORD_ASCII,
2864 &&L_NO_WORD,
2865 &&L_NO_WORD_ASCII,
2866 &&L_WORD_BOUNDARY,
2867 &&L_NO_WORD_BOUNDARY,
2868 &&L_WORD_BEGIN,
2869 &&L_WORD_END,
2870 &&L_TEXT_SEGMENT_BOUNDARY,
2871 &&L_BEGIN_BUF,
2872 &&L_END_BUF,
2873 &&L_BEGIN_LINE,
2874 &&L_END_LINE,
2875 &&L_SEMI_END_BUF,
2876 &&L_CHECK_POSITION,
2877 &&L_BACKREF1,
2878 &&L_BACKREF2,
2879 &&L_BACKREF_N,
2880 &&L_BACKREF_N_IC,
2881 &&L_BACKREF_MULTI,
2882 &&L_BACKREF_MULTI_IC,
2883 #ifdef USE_BACKREF_WITH_LEVEL
2884 &&L_BACKREF_WITH_LEVEL,
2885 &&L_BACKREF_WITH_LEVEL_IC,
2886 #endif
2887 &&L_BACKREF_CHECK,
2888 #ifdef USE_BACKREF_WITH_LEVEL
2889 &&L_BACKREF_CHECK_WITH_LEVEL,
2890 #endif
2891 &&L_MEM_START,
2892 &&L_MEM_START_PUSH,
2893 &&L_MEM_END_PUSH,
2894 #ifdef USE_CALL
2895 &&L_MEM_END_PUSH_REC,
2896 #endif
2897 &&L_MEM_END,
2898 #ifdef USE_CALL
2899 &&L_MEM_END_REC,
2900 #endif
2901 &&L_FAIL,
2902 &&L_JUMP,
2903 &&L_PUSH,
2904 &&L_PUSH_SUPER,
2905 &&L_POP,
2906 &&L_POP_TO_MARK,
2907 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2908 &&L_PUSH_OR_JUMP_EXACT1,
2909 #endif
2910 &&L_PUSH_IF_PEEK_NEXT,
2911 &&L_REPEAT,
2912 &&L_REPEAT_NG,
2913 &&L_REPEAT_INC,
2914 &&L_REPEAT_INC_NG,
2915 &&L_EMPTY_CHECK_START,
2916 &&L_EMPTY_CHECK_END,
2917 &&L_EMPTY_CHECK_END_MEMST,
2918 #ifdef USE_CALL
2919 &&L_EMPTY_CHECK_END_MEMST_PUSH,
2920 #endif
2921 &&L_MOVE,
2922 &&L_STEP_BACK_START,
2923 &&L_STEP_BACK_NEXT,
2924 &&L_CUT_TO_MARK,
2925 &&L_MARK,
2926 &&L_SAVE_VAL,
2927 &&L_UPDATE_VAR,
2928 #ifdef USE_CALL
2929 &&L_CALL,
2930 &&L_RETURN,
2931 #endif
2932 #ifdef USE_CALLOUT
2933 &&L_CALLOUT_CONTENTS,
2934 &&L_CALLOUT_NAME,
2935 #endif
2936 };
2937 #endif
2938
2939 int i, n, num_mem, best_len, pop_level;
2940 LengthType tlen, tlen2;
2941 MemNumType mem;
2942 RelAddrType addr;
2943 UChar *s, *ps;
2944 UChar *right_range;
2945 int is_alloca;
2946 char *alloc_base;
2947 StackType *stk_base, *stk, *stk_end;
2948 StackType *stkp; /* used as any purpose. */
2949 StkPtrType *mem_start_stk, *mem_end_stk;
2950 UChar* keep;
2951 OnigRegion* region;
2952
2953 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2954 StackIndex *repeat_stk;
2955 StackIndex *empty_check_stk;
2956 #endif
2957 #ifdef USE_RETRY_LIMIT
2958 unsigned long retry_limit_in_match;
2959 unsigned long retry_in_match_counter;
2960 #endif
2961 #ifdef USE_CALLOUT
2962 int of;
2963 #endif
2964 #ifdef ONIG_DEBUG_MATCH_COUNTER
2965 #define MAX_SUBEXP_CALL_COUNTERS 9
2966 unsigned long subexp_call_counters[MAX_SUBEXP_CALL_COUNTERS];
2967 #endif
2968
2969 OnigOptionType options;
2970 Operation* p = reg->ops;
2971 OnigEncoding encode = reg->enc;
2972 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2973
2974 #ifdef USE_CALL
2975 unsigned long subexp_call_nest_counter = 0;
2976 #endif
2977
2978 #ifdef ONIG_DEBUG_MATCH
2979 static unsigned int counter = 1;
2980 #endif
2981
2982 #ifdef ONIG_DEBUG_MATCH_COUNTER
2983 for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) {
2984 subexp_call_counters[i] = 0;
2985 }
2986 #endif
2987
2988 #ifdef USE_DIRECT_THREADED_CODE
2989 if (IS_NULL(msa)) {
2990 for (i = 0; i < reg->ops_used; i++) {
2991 const void* addr;
2992 addr = opcode_to_label[reg->ocs[i]];
2993 p->opaddr = addr;
2994 p++;
2995 }
2996 return ONIG_NORMAL;
2997 }
2998 #endif
2999
3000 options = msa->options;
3001
3002 #ifdef USE_CALLOUT
3003 msa->mp->match_at_call_counter++;
3004 #endif
3005
3006 #ifdef USE_RETRY_LIMIT
3007 retry_limit_in_match = msa->retry_limit_in_match;
3008 if (msa->retry_limit_in_search != 0) {
3009 unsigned long rem = msa->retry_limit_in_search
3010 - msa->retry_limit_in_search_counter;
3011 if (rem < retry_limit_in_match)
3012 retry_limit_in_match = rem;
3013 }
3014 #endif
3015
3016 pop_level = reg->stack_pop_level;
3017 num_mem = reg->num_mem;
3018 STACK_INIT(INIT_MATCH_STACK_SIZE);
3019 UPDATE_FOR_STACK_REALLOC;
3020 for (i = 1; i <= num_mem; i++) {
3021 mem_start_stk[i].i = mem_end_stk[i].i = INVALID_STACK_INDEX;
3022 }
3023
3024 #ifdef ONIG_DEBUG_MATCH
3025 fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p\n", str, end, sstart);
3026 fprintf(DBGFP, "size: %d, start offset: %d\n",
3027 (int )(end - str), (int )(sstart - str));
3028 #endif
3029
3030 best_len = ONIG_MISMATCH;
3031 keep = s = (UChar* )sstart;
3032 STACK_PUSH_BOTTOM(STK_ALT, FinishCode); /* bottom stack */
3033 INIT_RIGHT_RANGE;
3034
3035 #ifdef USE_RETRY_LIMIT
3036 retry_in_match_counter = 0;
3037 #endif
3038
3039 BYTECODE_INTERPRETER_START {
3040 CASE_OP(END)
3041 n = (int )(s - sstart);
3042 if (n == 0 && OPTON_FIND_NOT_EMPTY(options)) {
3043 best_len = ONIG_MISMATCH;
3044 goto fail; /* for retry */
3045 }
3046
3047 if (n > best_len) {
3048 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3049 if (OPTON_FIND_LONGEST(options)) {
3050 if (n > msa->best_len) {
3051 msa->best_len = n;
3052 msa->best_s = (UChar* )sstart;
3053 }
3054 else {
3055 if (s >= in_right_range && msa->best_s == sstart) {
3056 best_len = msa->best_len; /* end of find */
3057 }
3058 else {
3059 SOP_OUT;
3060 goto fail; /* for retry */
3061 }
3062 }
3063 }
3064 else {
3065 best_len = n;
3066 }
3067 #else
3068 best_len = n;
3069 #endif
3070 }
3071
3072 /* set region */
3073 region = msa->region;
3074 if (region) {
3075 if (keep > s) keep = s;
3076
3077 #ifdef USE_POSIX_API
3078 if (OPTON_POSIX_REGION(options)) {
3079 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
3080
3081 rmt[0].rm_so = (regoff_t )(keep - str);
3082 rmt[0].rm_eo = (regoff_t )(s - str);
3083 for (i = 1; i <= num_mem; i++) {
3084 if (mem_end_stk[i].i != INVALID_STACK_INDEX) {
3085 rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str);
3086 rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i) - str);
3087 }
3088 else {
3089 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
3090 }
3091 }
3092 }
3093 else {
3094 #endif /* USE_POSIX_API */
3095 region->beg[0] = (int )(keep - str);
3096 region->end[0] = (int )(s - str);
3097 for (i = 1; i <= num_mem; i++) {
3098 if (mem_end_stk[i].i != INVALID_STACK_INDEX) {
3099 region->beg[i] = (int )(STACK_MEM_START(reg, i) - str);
3100 region->end[i] = (int )(STACK_MEM_END(reg, i) - str);
3101 }
3102 else {
3103 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
3104 }
3105 }
3106
3107 #ifdef USE_CAPTURE_HISTORY
3108 if (reg->capture_history != 0) {
3109 OnigCaptureTreeNode* node;
3110
3111 if (IS_NULL(region->history_root)) {
3112 region->history_root = node = history_node_new();
3113 CHECK_NULL_RETURN_MEMERR(node);
3114 }
3115 else {
3116 node = region->history_root;
3117 history_tree_clear(node);
3118 }
3119
3120 node->group = 0;
3121 node->beg = (int )(keep - str);
3122 node->end = (int )(s - str);
3123
3124 stkp = stk_base;
3125 i = make_capture_history_tree(region->history_root, &stkp,
3126 stk, (UChar* )str, reg);
3127 if (i < 0) MATCH_AT_ERROR_RETURN(i);
3128 }
3129 #endif /* USE_CAPTURE_HISTORY */
3130 #ifdef USE_POSIX_API
3131 } /* else OPTON_POSIX_REGION() */
3132 #endif
3133 } /* if (region) */
3134
3135 SOP_OUT;
3136
3137 if (OPTON_CALLBACK_EACH_MATCH(options) &&
3138 IS_NOT_NULL(CallbackEachMatch)) {
3139 i = CallbackEachMatch(str, end, sstart, region,
3140 msa->mp->callout_user_data);
3141 if (i < 0) MATCH_AT_ERROR_RETURN(i);
3142
3143 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3144 if (! OPTON_FIND_LONGEST(options))
3145 #endif
3146 best_len = ONIG_MISMATCH;
3147
3148 goto fail;
3149 }
3150
3151 /* default behavior: return first-matching result. */
3152 goto match_at_end;
3153
3154 CASE_OP(STR_1)
3155 DATA_ENSURE(1);
3156 ps = p->exact.s;
3157 if (*ps != *s) goto fail;
3158 s++;
3159 INC_OP;
3160 JUMP_OUT_WITH_SPREV_SET;
3161
3162 CASE_OP(STR_2)
3163 DATA_ENSURE(2);
3164 ps = p->exact.s;
3165 if (*ps != *s) goto fail;
3166 ps++; s++;
3167 if (*ps != *s) goto fail;
3168 s++;
3169 INC_OP;
3170 JUMP_OUT;
3171
3172 CASE_OP(STR_3)
3173 DATA_ENSURE(3);
3174 ps = p->exact.s;
3175 if (*ps != *s) goto fail;
3176 ps++; s++;
3177 if (*ps != *s) goto fail;
3178 ps++; s++;
3179 if (*ps != *s) goto fail;
3180 s++;
3181 INC_OP;
3182 JUMP_OUT;
3183
3184 CASE_OP(STR_4)
3185 DATA_ENSURE(4);
3186 ps = p->exact.s;
3187 if (*ps != *s) goto fail;
3188 ps++; s++;
3189 if (*ps != *s) goto fail;
3190 ps++; s++;
3191 if (*ps != *s) goto fail;
3192 ps++; s++;
3193 if (*ps != *s) goto fail;
3194 s++;
3195 INC_OP;
3196 JUMP_OUT;
3197
3198 CASE_OP(STR_5)
3199 DATA_ENSURE(5);
3200 ps = p->exact.s;
3201 if (*ps != *s) goto fail;
3202 ps++; s++;
3203 if (*ps != *s) goto fail;
3204 ps++; s++;
3205 if (*ps != *s) goto fail;
3206 ps++; s++;
3207 if (*ps != *s) goto fail;
3208 ps++; s++;
3209 if (*ps != *s) goto fail;
3210 s++;
3211 INC_OP;
3212 JUMP_OUT;
3213
3214 CASE_OP(STR_N)
3215 tlen = p->exact_n.n;
3216 DATA_ENSURE(tlen);
3217 ps = p->exact_n.s;
3218 while (tlen-- > 0) {
3219 if (*ps++ != *s++) goto fail;
3220 }
3221 INC_OP;
3222 JUMP_OUT;
3223
3224 CASE_OP(STR_MB2N1)
3225 DATA_ENSURE(2);
3226 ps = p->exact.s;
3227 if (*ps != *s) goto fail;
3228 ps++; s++;
3229 if (*ps != *s) goto fail;
3230 s++;
3231 INC_OP;
3232 JUMP_OUT_WITH_SPREV_SET;
3233
3234 CASE_OP(STR_MB2N2)
3235 DATA_ENSURE(4);
3236 ps = p->exact.s;
3237 if (*ps != *s) goto fail;
3238 ps++; s++;
3239 if (*ps != *s) goto fail;
3240 ps++; s++;
3241 if (*ps != *s) goto fail;
3242 ps++; s++;
3243 if (*ps != *s) goto fail;
3244 s++;
3245 INC_OP;
3246 JUMP_OUT;
3247
3248 CASE_OP(STR_MB2N3)
3249 DATA_ENSURE(6);
3250 ps = p->exact.s;
3251 if (*ps != *s) goto fail;
3252 ps++; s++;
3253 if (*ps != *s) goto fail;
3254 ps++; s++;
3255 if (*ps != *s) goto fail;
3256 ps++; s++;
3257 if (*ps != *s) goto fail;
3258 ps++; s++;
3259 if (*ps != *s) goto fail;
3260 ps++; s++;
3261 if (*ps != *s) goto fail;
3262 ps++; s++;
3263 INC_OP;
3264 JUMP_OUT;
3265
3266 CASE_OP(STR_MB2N)
3267 tlen = p->exact_n.n;
3268 DATA_ENSURE(tlen * 2);
3269 ps = p->exact_n.s;
3270 while (tlen-- > 0) {
3271 if (*ps != *s) goto fail;
3272 ps++; s++;
3273 if (*ps != *s) goto fail;
3274 ps++; s++;
3275 }
3276 INC_OP;
3277 JUMP_OUT;
3278
3279 CASE_OP(STR_MB3N)
3280 tlen = p->exact_n.n;
3281 DATA_ENSURE(tlen * 3);
3282 ps = p->exact_n.s;
3283 while (tlen-- > 0) {
3284 if (*ps != *s) goto fail;
3285 ps++; s++;
3286 if (*ps != *s) goto fail;
3287 ps++; s++;
3288 if (*ps != *s) goto fail;
3289 ps++; s++;
3290 }
3291 INC_OP;
3292 JUMP_OUT;
3293
3294 CASE_OP(STR_MBN)
3295 tlen = p->exact_len_n.len; /* mb byte len */
3296 tlen2 = p->exact_len_n.n; /* number of chars */
3297 tlen2 *= tlen;
3298 DATA_ENSURE(tlen2);
3299 ps = p->exact_len_n.s;
3300 while (tlen2-- > 0) {
3301 if (*ps != *s) goto fail;
3302 ps++; s++;
3303 }
3304 INC_OP;
3305 JUMP_OUT;
3306
3307 CASE_OP(CCLASS)
3308 DATA_ENSURE(1);
3309 if (BITSET_AT(p->cclass.bsp, *s) == 0) goto fail;
3310 if (ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
3311 s++;
3312 INC_OP;
3313 JUMP_OUT_WITH_SPREV_SET;
3314
3315 CASE_OP(CCLASS_MB)
3316 DATA_ENSURE(1);
3317 if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
3318
3319 cclass_mb:
3320 {
3321 OnigCodePoint code;
3322 UChar *ss;
3323 int mb_len;
3324
3325 mb_len = enclen(encode, s);
3326 DATA_ENSURE(mb_len);
3327 ss = s;
3328 s += mb_len;
3329 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3330 if (! onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
3331 }
3332 INC_OP;
3333 JUMP_OUT_WITH_SPREV_SET;
3334
3335 CASE_OP(CCLASS_MIX)
3336 DATA_ENSURE(1);
3337 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
3338 goto cclass_mb;
3339 }
3340 else {
3341 if (BITSET_AT(p->cclass_mix.bsp, *s) == 0)
3342 goto fail;
3343
3344 s++;
3345 }
3346 INC_OP;
3347 JUMP_OUT_WITH_SPREV_SET;
3348
3349 CASE_OP(CCLASS_NOT)
3350 DATA_ENSURE(1);
3351 if (BITSET_AT(p->cclass.bsp, *s) != 0) goto fail;
3352 s += enclen(encode, s);
3353 INC_OP;
3354 JUMP_OUT_WITH_SPREV_SET;
3355
3356 CASE_OP(CCLASS_MB_NOT)
3357 DATA_ENSURE(1);
3358 if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
3359 s++;
3360 goto cc_mb_not_success;
3361 }
3362
3363 cclass_mb_not:
3364 {
3365 OnigCodePoint code;
3366 UChar *ss;
3367 int mb_len = enclen(encode, s);
3368
3369 if (! DATA_ENSURE_CHECK(mb_len)) {
3370 DATA_ENSURE(1);
3371 s = (UChar* )end;
3372 goto cc_mb_not_success;
3373 }
3374
3375 ss = s;
3376 s += mb_len;
3377 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3378 if (onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
3379 }
3380
3381 cc_mb_not_success:
3382 INC_OP;
3383 JUMP_OUT_WITH_SPREV_SET;
3384
3385 CASE_OP(CCLASS_MIX_NOT)
3386 DATA_ENSURE(1);
3387 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
3388 goto cclass_mb_not;
3389 }
3390 else {
3391 if (BITSET_AT(p->cclass_mix.bsp, *s) != 0)
3392 goto fail;
3393
3394 s++;
3395 }
3396 INC_OP;
3397 JUMP_OUT_WITH_SPREV_SET;
3398
3399 CASE_OP(ANYCHAR)
3400 DATA_ENSURE(1);
3401 n = enclen(encode, s);
3402 DATA_ENSURE(n);
3403 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3404 s += n;
3405 INC_OP;
3406 JUMP_OUT_WITH_SPREV_SET;
3407
3408 CASE_OP(ANYCHAR_ML)
3409 DATA_ENSURE(1);
3410 n = enclen(encode, s);
3411 DATA_ENSURE(n);
3412 s += n;
3413 INC_OP;
3414 JUMP_OUT_WITH_SPREV_SET;
3415
3416 CASE_OP(ANYCHAR_STAR)
3417 INC_OP;
3418 while (DATA_ENSURE_CHECK1) {
3419 STACK_PUSH_ALT(p, s);
3420 n = enclen(encode, s);
3421 DATA_ENSURE(n);
3422 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3423 s += n;
3424 }
3425 JUMP_OUT;
3426
3427 CASE_OP(ANYCHAR_ML_STAR)
3428 INC_OP;
3429 while (DATA_ENSURE_CHECK1) {
3430 STACK_PUSH_ALT(p, s);
3431 n = enclen(encode, s);
3432 if (n > 1) {
3433 DATA_ENSURE(n);
3434 s += n;
3435 }
3436 else {
3437 s++;
3438 }
3439 }
3440 JUMP_OUT;
3441
3442 CASE_OP(ANYCHAR_STAR_PEEK_NEXT)
3443 {
3444 UChar c;
3445
3446 c = p->anychar_star_peek_next.c;
3447 INC_OP;
3448 while (DATA_ENSURE_CHECK1) {
3449 if (c == *s) {
3450 STACK_PUSH_ALT(p, s);
3451 }
3452 n = enclen(encode, s);
3453 DATA_ENSURE(n);
3454 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3455 s += n;
3456 }
3457 }
3458 JUMP_OUT;
3459
3460 CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT)
3461 {
3462 UChar c;
3463
3464 c = p->anychar_star_peek_next.c;
3465 INC_OP;
3466 while (DATA_ENSURE_CHECK1) {
3467 if (c == *s) {
3468 STACK_PUSH_ALT(p, s);
3469 }
3470 n = enclen(encode, s);
3471 if (n > 1) {
3472 DATA_ENSURE(n);
3473 s += n;
3474 }
3475 else {
3476 s++;
3477 }
3478 }
3479 }
3480 JUMP_OUT;
3481
3482 CASE_OP(WORD)
3483 DATA_ENSURE(1);
3484 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3485 goto fail;
3486
3487 s += enclen(encode, s);
3488 INC_OP;
3489 JUMP_OUT_WITH_SPREV_SET;
3490
3491 CASE_OP(WORD_ASCII)
3492 DATA_ENSURE(1);
3493 if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3494 goto fail;
3495
3496 s += enclen(encode, s);
3497 INC_OP;
3498 JUMP_OUT_WITH_SPREV_SET;
3499
3500 CASE_OP(NO_WORD)
3501 DATA_ENSURE(1);
3502 if (ONIGENC_IS_MBC_WORD(encode, s, end))
3503 goto fail;
3504
3505 s += enclen(encode, s);
3506 INC_OP;
3507 JUMP_OUT_WITH_SPREV_SET;
3508
3509 CASE_OP(NO_WORD_ASCII)
3510 DATA_ENSURE(1);
3511 if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3512 goto fail;
3513
3514 s += enclen(encode, s);
3515 INC_OP;
3516 JUMP_OUT_WITH_SPREV_SET;
3517
3518 CASE_OP(WORD_BOUNDARY)
3519 {
3520 ModeType mode;
3521
3522 mode = p->word_boundary.mode;
3523 if (ON_STR_BEGIN(s)) {
3524 DATA_ENSURE(1);
3525 if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3526 goto fail;
3527 }
3528 else {
3529 UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3530 if (ON_STR_END(s)) {
3531 if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3532 goto fail;
3533 }
3534 else {
3535 if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3536 == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3537 goto fail;
3538 }
3539 }
3540 }
3541 INC_OP;
3542 JUMP_OUT;
3543
3544 CASE_OP(NO_WORD_BOUNDARY)
3545 {
3546 ModeType mode;
3547
3548 mode = p->word_boundary.mode;
3549 if (ON_STR_BEGIN(s)) {
3550 if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3551 goto fail;
3552 }
3553 else {
3554 UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3555 if (ON_STR_END(s)) {
3556 if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3557 goto fail;
3558 }
3559 else {
3560 if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3561 != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3562 goto fail;
3563 }
3564 }
3565 }
3566 INC_OP;
3567 JUMP_OUT;
3568
3569 #ifdef USE_WORD_BEGIN_END
3570 CASE_OP(WORD_BEGIN)
3571 {
3572 ModeType mode;
3573
3574 mode = p->word_boundary.mode;
3575 if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3576 UChar* sprev;
3577 if (ON_STR_BEGIN(s)) {
3578 INC_OP;
3579 JUMP_OUT;
3580 }
3581 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3582 if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3583 INC_OP;
3584 JUMP_OUT;
3585 }
3586 }
3587 }
3588 goto fail;
3589
3590 CASE_OP(WORD_END)
3591 {
3592 ModeType mode;
3593
3594 mode = p->word_boundary.mode;
3595 if (! ON_STR_BEGIN(s)) {
3596 UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3597 if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3598 if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3599 INC_OP;
3600 JUMP_OUT;
3601 }
3602 }
3603 }
3604 }
3605 goto fail;
3606 #endif
3607
3608 CASE_OP(TEXT_SEGMENT_BOUNDARY)
3609 {
3610 int is_break;
3611 UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3612
3613 switch (p->text_segment_boundary.type) {
3614 case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
3615 is_break = onigenc_egcb_is_break_position(encode, s, sprev, str, end);
3616 break;
3617 #ifdef USE_UNICODE_WORD_BREAK
3618 case WORD_BOUNDARY:
3619 is_break = onigenc_wb_is_break_position(encode, s, sprev, str, end);
3620 break;
3621 #endif
3622 default:
3623 MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
3624 break;
3625 }
3626
3627 if (p->text_segment_boundary.not != 0)
3628 is_break = ! is_break;
3629
3630 if (is_break != 0) {
3631 INC_OP;
3632 JUMP_OUT;
3633 }
3634 else {
3635 goto fail;
3636 }
3637 }
3638
3639 CASE_OP(BEGIN_BUF)
3640 if (! ON_STR_BEGIN(s)) goto fail;
3641 if (OPTON_NOTBOL(options)) goto fail;
3642 if (OPTON_NOT_BEGIN_STRING(options)) goto fail;
3643
3644 INC_OP;
3645 JUMP_OUT;
3646
3647 CASE_OP(END_BUF)
3648 if (! ON_STR_END(s)) goto fail;
3649 if (OPTON_NOTEOL(options)) goto fail;
3650 if (OPTON_NOT_END_STRING(options)) goto fail;
3651
3652 INC_OP;
3653 JUMP_OUT;
3654
3655 CASE_OP(BEGIN_LINE)
3656 if (ON_STR_BEGIN(s)) {
3657 if (OPTON_NOTBOL(options)) goto fail;
3658 INC_OP;
3659 JUMP_OUT;
3660 }
3661 else if (! ON_STR_END(s)) {
3662 UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3663 if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3664 INC_OP;
3665 JUMP_OUT;
3666 }
3667 }
3668 goto fail;
3669
3670 CASE_OP(END_LINE)
3671 if (ON_STR_END(s)) {
3672 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3673 UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3674 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3675 #endif
3676 if (OPTON_NOTEOL(options)) goto fail;
3677 INC_OP;
3678 JUMP_OUT;
3679 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3680 }
3681 #endif
3682 }
3683 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
3684 INC_OP;
3685 JUMP_OUT;
3686 }
3687 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3688 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3689 INC_OP;
3690 JUMP_OUT;
3691 }
3692 #endif
3693 goto fail;
3694
3695 CASE_OP(SEMI_END_BUF)
3696 if (ON_STR_END(s)) {
3697 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3698 UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3699 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3700 #endif
3701 if (OPTON_NOTEOL(options)) goto fail;
3702 if (OPTON_NOT_END_STRING(options)) goto fail;
3703 INC_OP;
3704 JUMP_OUT;
3705 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3706 }
3707 #endif
3708 }
3709 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
3710 ON_STR_END(s + enclen(encode, s))) {
3711 if (OPTON_NOTEOL(options)) goto fail;
3712 if (OPTON_NOT_END_STRING(options)) goto fail;
3713 INC_OP;
3714 JUMP_OUT;
3715 }
3716 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3717 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3718 UChar* ss = s + enclen(encode, s);
3719 ss += enclen(encode, ss);
3720 if (ON_STR_END(ss)) {
3721 if (OPTON_NOTEOL(options)) goto fail;
3722 if (OPTON_NOT_END_STRING(options)) goto fail;
3723 INC_OP;
3724 JUMP_OUT;
3725 }
3726 }
3727 #endif
3728 goto fail;
3729
3730 CASE_OP(CHECK_POSITION)
3731 switch (p->check_position.type) {
3732 case CHECK_POSITION_SEARCH_START:
3733 if (s != msa->start) goto fail;
3734 if (OPTON_NOT_BEGIN_POSITION(options)) goto fail;
3735 break;
3736 case CHECK_POSITION_CURRENT_RIGHT_RANGE:
3737 if (s != right_range) goto fail;
3738 break;
3739 default:
3740 break;
3741 }
3742 INC_OP;
3743 JUMP_OUT;
3744
3745 CASE_OP(MEM_START_PUSH)
3746 mem = p->memory_start.num;
3747 STACK_PUSH_MEM_START(mem, s);
3748 INC_OP;
3749 JUMP_OUT;
3750
3751 CASE_OP(MEM_START)
3752 mem = p->memory_start.num;
3753 mem_start_stk[mem].s = s;
3754 INC_OP;
3755 JUMP_OUT;
3756
3757 CASE_OP(MEM_END_PUSH)
3758 mem = p->memory_end.num;
3759 STACK_PUSH_MEM_END(mem, s);
3760 INC_OP;
3761 JUMP_OUT;
3762
3763 CASE_OP(MEM_END)
3764 mem = p->memory_end.num;
3765 mem_end_stk[mem].s = s;
3766 INC_OP;
3767 JUMP_OUT;
3768
3769 #ifdef USE_CALL
3770 CASE_OP(MEM_END_PUSH_REC)
3771 {
3772 StackIndex si;
3773
3774 mem = p->memory_end.num;
3775 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3776 si = GET_STACK_INDEX(stkp);
3777 STACK_PUSH_MEM_END(mem, s);
3778 mem_start_stk[mem].i = si;
3779 INC_OP;
3780 JUMP_OUT;
3781 }
3782
3783 CASE_OP(MEM_END_REC)
3784 mem = p->memory_end.num;
3785 mem_end_stk[mem].s = s;
3786 STACK_GET_MEM_START(mem, stkp);
3787
3788 if (MEM_STATUS_AT(reg->push_mem_start, mem))
3789 mem_start_stk[mem].i = GET_STACK_INDEX(stkp);
3790 else
3791 mem_start_stk[mem].s = stkp->u.mem.pstr;
3792
3793 STACK_PUSH_MEM_END_MARK(mem);
3794 INC_OP;
3795 JUMP_OUT;
3796 #endif
3797
3798 CASE_OP(BACKREF1)
3799 mem = 1;
3800 goto backref;
3801
3802 CASE_OP(BACKREF2)
3803 mem = 2;
3804 goto backref;
3805
3806 CASE_OP(BACKREF_N)
3807 mem = p->backref_n.n1;
3808 backref:
3809 {
3810 UChar *pstart, *pend;
3811
3812 if (mem_end_stk[mem].i == INVALID_STACK_INDEX) goto fail;
3813 if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail;
3814
3815 pstart = STACK_MEM_START(reg, mem);
3816 pend = STACK_MEM_END(reg, mem);
3817 n = (int )(pend - pstart);
3818 if (n != 0) {
3819 DATA_ENSURE(n);
3820 STRING_CMP(s, pstart, n);
3821 }
3822 }
3823 INC_OP;
3824 JUMP_OUT;
3825
3826 CASE_OP(BACKREF_N_IC)
3827 mem = p->backref_n.n1;
3828 {
3829 UChar *pstart, *pend;
3830
3831 if (mem_end_stk[mem].i == INVALID_STACK_INDEX) goto fail;
3832 if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail;
3833
3834 pstart = STACK_MEM_START(reg, mem);
3835 pend = STACK_MEM_END(reg, mem);
3836 n = (int )(pend - pstart);
3837 if (n != 0) {
3838 DATA_ENSURE(n);
3839 STRING_CMP_IC(case_fold_flag, pstart, &s, n);
3840 }
3841 }
3842 INC_OP;
3843 JUMP_OUT;
3844
3845 CASE_OP(BACKREF_MULTI)
3846 {
3847 int is_fail;
3848 UChar *pstart, *pend, *swork;
3849
3850 tlen = p->backref_general.num;
3851 for (i = 0; i < tlen; i++) {
3852 mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
3853
3854 if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue;
3855 if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
3856
3857 pstart = STACK_MEM_START(reg, mem);
3858 pend = STACK_MEM_END(reg, mem);
3859 n = (int )(pend - pstart);
3860 if (n != 0) {
3861 DATA_ENSURE(n);
3862 swork = s;
3863 STRING_CMP_VALUE(swork, pstart, n, is_fail);
3864 if (is_fail) continue;
3865 s = swork;
3866 }
3867 break; /* success */
3868 }
3869 if (i == tlen) goto fail;
3870 }
3871 INC_OP;
3872 JUMP_OUT;
3873
3874 CASE_OP(BACKREF_MULTI_IC)
3875 {
3876 int is_fail;
3877 UChar *pstart, *pend, *swork;
3878
3879 tlen = p->backref_general.num;
3880 for (i = 0; i < tlen; i++) {
3881 mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
3882
3883 if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue;
3884 if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
3885
3886 pstart = STACK_MEM_START(reg, mem);
3887 pend = STACK_MEM_END(reg, mem);
3888 n = (int )(pend - pstart);
3889 if (n != 0) {
3890 DATA_ENSURE(n);
3891 swork = s;
3892 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
3893 if (is_fail) continue;
3894 s = swork;
3895 }
3896 break; /* success */
3897 }
3898 if (i == tlen) goto fail;
3899 }
3900 INC_OP;
3901 JUMP_OUT;
3902
3903 #ifdef USE_BACKREF_WITH_LEVEL
3904 CASE_OP(BACKREF_WITH_LEVEL_IC)
3905 n = 1; /* ignore case */
3906 goto backref_with_level;
3907 CASE_OP(BACKREF_WITH_LEVEL)
3908 {
3909 int level;
3910 MemNumType* mems;
3911
3912 n = 0;
3913 backref_with_level:
3914 level = p->backref_general.nest_level;
3915 tlen = p->backref_general.num;
3916 mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3917
3918 if (! backref_match_at_nested_level(reg, stk, stk_base, n,
3919 case_fold_flag, level, (int )tlen, mems, &s, end)) {
3920 goto fail;
3921 }
3922 }
3923 INC_OP;
3924 JUMP_OUT;
3925 #endif
3926
3927 CASE_OP(BACKREF_CHECK)
3928 {
3929 MemNumType* mems;
3930
3931 tlen = p->backref_general.num;
3932 mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3933
3934 for (i = 0; i < tlen; i++) {
3935 mem = mems[i];
3936 if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue;
3937 if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
3938 break; /* success */
3939 }
3940 if (i == tlen) goto fail;
3941 }
3942 INC_OP;
3943 JUMP_OUT;
3944
3945 #ifdef USE_BACKREF_WITH_LEVEL
3946 CASE_OP(BACKREF_CHECK_WITH_LEVEL)
3947 {
3948 LengthType level;
3949 MemNumType* mems;
3950
3951 level = p->backref_general.nest_level;
3952 tlen = p->backref_general.num;
3953 mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3954
3955 if (backref_check_at_nested_level(reg, stk, stk_base,
3956 (int )level, (int )tlen, mems) == 0)
3957 goto fail;
3958 }
3959 INC_OP;
3960 JUMP_OUT;
3961 #endif
3962
3963 CASE_OP(EMPTY_CHECK_START)
3964 mem = p->empty_check_start.mem; /* mem: null check id */
3965 STACK_PUSH_EMPTY_CHECK_START(mem, s);
3966 INC_OP;
3967 JUMP_OUT;
3968
3969 CASE_OP(EMPTY_CHECK_END)
3970 {
3971 int is_empty;
3972
3973 mem = p->empty_check_end.mem; /* mem: null check id */
3974 STACK_EMPTY_CHECK(is_empty, mem, s);
3975 INC_OP;
3976 if (is_empty) {
3977 #ifdef ONIG_DEBUG_MATCH
3978 fprintf(DBGFP, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s);
3979 #endif
3980 empty_check_found:
3981 /* empty loop founded, skip next instruction */
3982 #if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
3983 switch (p->opcode) {
3984 case OP_JUMP:
3985 case OP_PUSH:
3986 case OP_REPEAT_INC:
3987 case OP_REPEAT_INC_NG:
3988 INC_OP;
3989 break;
3990 default:
3991 MATCH_AT_ERROR_RETURN(ONIGERR_UNEXPECTED_BYTECODE);
3992 break;
3993 }
3994 #else
3995 INC_OP;
3996 #endif
3997 }
3998 }
3999 JUMP_OUT;
4000
4001 #ifdef USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT
4002 CASE_OP(EMPTY_CHECK_END_MEMST)
4003 {
4004 int is_empty;
4005
4006 mem = p->empty_check_end.mem; /* mem: null check id */
4007 STACK_EMPTY_CHECK_MEM(is_empty, mem, p->empty_check_end.empty_status_mem, s, reg);
4008 INC_OP;
4009 if (is_empty) {
4010 #ifdef ONIG_DEBUG_MATCH
4011 fprintf(DBGFP, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s);
4012 #endif
4013 if (is_empty == -1) goto fail;
4014 goto empty_check_found;
4015 }
4016 }
4017 JUMP_OUT;
4018 #endif
4019
4020 #ifdef USE_CALL
4021 CASE_OP(EMPTY_CHECK_END_MEMST_PUSH)
4022 {
4023 int is_empty;
4024
4025 mem = p->empty_check_end.mem; /* mem: null check id */
4026 #ifdef USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT
4027 STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, p->empty_check_end.empty_status_mem, s, reg);
4028 #else
4029 STACK_EMPTY_CHECK_REC(is_empty, mem, s);
4030 #endif
4031 INC_OP;
4032 if (is_empty) {
4033 #ifdef ONIG_DEBUG_MATCH
4034 fprintf(DBGFP, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n",
4035 (int )mem, s);
4036 #endif
4037 if (is_empty == -1) goto fail;
4038 goto empty_check_found;
4039 }
4040 else {
4041 STACK_PUSH_EMPTY_CHECK_END(mem);
4042 }
4043 }
4044 JUMP_OUT;
4045 #endif
4046
4047 CASE_OP(JUMP)
4048 addr = p->jump.addr;
4049 p += addr;
4050 CHECK_INTERRUPT_JUMP_OUT;
4051
4052 CASE_OP(PUSH)
4053 addr = p->push.addr;
4054 STACK_PUSH_ALT(p + addr, s);
4055 INC_OP;
4056 JUMP_OUT;
4057
4058 CASE_OP(PUSH_SUPER)
4059 addr = p->push.addr;
4060 STACK_PUSH_SUPER_ALT(p + addr, s);
4061 INC_OP;
4062 JUMP_OUT;
4063
4064 CASE_OP(POP)
4065 STACK_POP_ONE;
4066 INC_OP;
4067 JUMP_OUT;
4068
4069 CASE_OP(POP_TO_MARK)
4070 STACK_POP_TO_MARK(p->pop_to_mark.id);
4071 INC_OP;
4072 JUMP_OUT;
4073
4074 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4075 CASE_OP(PUSH_OR_JUMP_EXACT1)
4076 {
4077 UChar c;
4078
4079 addr = p->push_or_jump_exact1.addr;
4080 c = p->push_or_jump_exact1.c;
4081 if (DATA_ENSURE_CHECK1 && c == *s) {
4082 STACK_PUSH_ALT(p + addr, s);
4083 INC_OP;
4084 JUMP_OUT;
4085 }
4086 }
4087 p += addr;
4088 JUMP_OUT;
4089 #endif
4090
4091 CASE_OP(PUSH_IF_PEEK_NEXT)
4092 {
4093 UChar c;
4094
4095 addr = p->push_if_peek_next.addr;
4096 c = p->push_if_peek_next.c;
4097 if (DATA_ENSURE_CHECK1 && c == *s) {
4098 STACK_PUSH_ALT(p + addr, s);
4099 }
4100 }
4101 INC_OP;
4102 JUMP_OUT;
4103
4104 CASE_OP(REPEAT)
4105 mem = p->repeat.id; /* mem: OP_REPEAT ID */
4106 addr = p->repeat.addr;
4107
4108 STACK_PUSH_REPEAT_INC(mem, 0);
4109 if (reg->repeat_range[mem].lower == 0) {
4110 STACK_PUSH_ALT(p + addr, s);
4111 }
4112 INC_OP;
4113 JUMP_OUT;
4114
4115 CASE_OP(REPEAT_NG)
4116 mem = p->repeat.id; /* mem: OP_REPEAT ID */
4117 addr = p->repeat.addr;
4118
4119 STACK_PUSH_REPEAT_INC(mem, 0);
4120 if (reg->repeat_range[mem].lower == 0) {
4121 STACK_PUSH_ALT(p + 1, s);
4122 p += addr;
4123 }
4124 else
4125 INC_OP;
4126 JUMP_OUT;
4127
4128 CASE_OP(REPEAT_INC)
4129 mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
4130 STACK_GET_REPEAT_COUNT(mem, n);
4131 n++;
4132 if (n >= reg->repeat_range[mem].upper) {
4133 /* end of repeat. Nothing to do. */
4134 INC_OP;
4135 }
4136 else if (n >= reg->repeat_range[mem].lower) {
4137 INC_OP;
4138 STACK_PUSH_ALT(p, s);
4139 p = reg->repeat_range[mem].u.pcode;
4140 }
4141 else {
4142 p = reg->repeat_range[mem].u.pcode;
4143 }
4144 STACK_PUSH_REPEAT_INC(mem, n);
4145 CHECK_INTERRUPT_JUMP_OUT;
4146
4147 CASE_OP(REPEAT_INC_NG)
4148 mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
4149 STACK_GET_REPEAT_COUNT(mem, n);
4150 n++;
4151 STACK_PUSH_REPEAT_INC(mem, n);
4152 if (n == reg->repeat_range[mem].upper) {
4153 INC_OP;
4154 }
4155 else {
4156 if (n >= reg->repeat_range[mem].lower) {
4157 STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s);
4158 INC_OP;
4159 }
4160 else {
4161 p = reg->repeat_range[mem].u.pcode;
4162 }
4163 }
4164 CHECK_INTERRUPT_JUMP_OUT;
4165
4166 #ifdef USE_CALL
4167 CASE_OP(CALL)
4168 if (subexp_call_nest_counter == SubexpCallMaxNestLevel)
4169 goto fail;
4170 subexp_call_nest_counter++;
4171
4172 if (SubexpCallLimitInSearch != 0) {
4173 msa->subexp_call_in_search_counter++;
4174 #ifdef ONIG_DEBUG_MATCH_COUNTER
4175 if (p->call.called_mem < MAX_SUBEXP_CALL_COUNTERS)
4176 subexp_call_counters[p->call.called_mem]++;
4177 if (msa->subexp_call_in_search_counter % 1000 == 0)
4178 MATCH_COUNTER_OUT("CALL");
4179 #endif
4180 if (msa->subexp_call_in_search_counter >
4181 SubexpCallLimitInSearch) {
4182 MATCH_AT_ERROR_RETURN(ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER);
4183 }
4184 }
4185
4186 #ifdef ONIG_DEBUG_CALL
4187 fprintf(DBGFP, "CALL: id:%d, at:%ld, level:%lu\n", p->call.called_mem, s - str, subexp_call_nest_counter);
4188 #endif
4189 addr = p->call.addr;
4190 INC_OP; STACK_PUSH_CALL_FRAME(p);
4191 p = reg->ops + addr;
4192
4193 JUMP_OUT;
4194
4195 CASE_OP(RETURN)
4196 STACK_RETURN(p);
4197 STACK_PUSH_RETURN;
4198 subexp_call_nest_counter--;
4199 JUMP_OUT;
4200 #endif
4201
4202 CASE_OP(MOVE)
4203 if (p->move.n < 0) {
4204 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, -p->move.n);
4205 if (IS_NULL(s)) goto fail;
4206 }
4207 else {
4208 int len;
4209
4210 for (tlen = p->move.n; tlen > 0; tlen--) {
4211 len = enclen(encode, s);
4212 s += len;
4213 if (s > end) goto fail;
4214 if (s == end) {
4215 if (tlen != 1) goto fail;
4216 else break;
4217 }
4218 }
4219 }
4220 INC_OP;
4221 JUMP_OUT;
4222
4223 CASE_OP(STEP_BACK_START)
4224 tlen = p->step_back_start.initial;
4225 if (tlen != 0) {
4226 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
4227 if (IS_NULL(s)) goto fail;
4228 }
4229 if (p->step_back_start.remaining != 0) {
4230 STACK_PUSH_ALT_WITH_ZID(p + 1, s, p->step_back_start.remaining);
4231 p += p->step_back_start.addr;
4232 }
4233 else
4234 INC_OP;
4235 JUMP_OUT;
4236
4237 CASE_OP(STEP_BACK_NEXT)
4238 tlen = (LengthType )stk->zid; /* remaining count */
4239 if (tlen != INFINITE_LEN) tlen--;
4240 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, 1);
4241 if (IS_NULL(s)) goto fail;
4242 if (tlen != 0) {
4243 STACK_PUSH_ALT_WITH_ZID(p, s, (int )tlen);
4244 }
4245 INC_OP;
4246 JUMP_OUT;
4247
4248 CASE_OP(CUT_TO_MARK)
4249 mem = p->cut_to_mark.id; /* mem: mark id */
4250 STACK_TO_VOID_TO_MARK(stkp, mem);
4251 if (p->cut_to_mark.restore_pos != 0) {
4252 s = stkp->u.val.v;
4253 }
4254 INC_OP;
4255 JUMP_OUT;
4256
4257 CASE_OP(MARK)
4258 mem = p->mark.id; /* mem: mark id */
4259 if (p->mark.save_pos != 0)
4260 STACK_PUSH_MARK_WITH_POS(mem, s);
4261 else
4262 STACK_PUSH_MARK(mem);
4263
4264 INC_OP;
4265 JUMP_OUT;
4266
4267 CASE_OP(SAVE_VAL)
4268 {
4269 SaveType type;
4270
4271 type = p->save_val.type;
4272 mem = p->save_val.id; /* mem: save id */
4273 switch ((enum SaveType )type) {
4274 case SAVE_KEEP:
4275 STACK_PUSH_SAVE_VAL(mem, type, s);
4276 break;
4277
4278 case SAVE_S:
4279 STACK_PUSH_SAVE_VAL_WITH_SPREV(mem, type, s);
4280 break;
4281
4282 case SAVE_RIGHT_RANGE:
4283 STACK_PUSH_SAVE_VAL(mem, SAVE_RIGHT_RANGE, right_range);
4284 break;
4285 }
4286 }
4287 INC_OP;
4288 JUMP_OUT;
4289
4290 CASE_OP(UPDATE_VAR)
4291 {
4292 UpdateVarType type;
4293 enum SaveType save_type;
4294
4295 type = p->update_var.type;
4296
4297 switch ((enum UpdateVarType )type) {
4298 case UPDATE_VAR_KEEP_FROM_STACK_LAST:
4299 STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep);
4300 break;
4301 case UPDATE_VAR_S_FROM_STACK:
4302 mem = p->update_var.id; /* mem: save id */
4303 STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s);
4304 break;
4305 case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK:
4306 save_type = SAVE_S;
4307 goto get_save_val_type_last_id;
4308 break;
4309 case UPDATE_VAR_RIGHT_RANGE_FROM_STACK:
4310 save_type = SAVE_RIGHT_RANGE;
4311 get_save_val_type_last_id:
4312 mem = p->update_var.id; /* mem: save id */
4313 STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type, mem, right_range, p->update_var.clear);
4314 break;
4315 case UPDATE_VAR_RIGHT_RANGE_TO_S:
4316 right_range = s;
4317 break;
4318 case UPDATE_VAR_RIGHT_RANGE_INIT:
4319 INIT_RIGHT_RANGE;
4320 break;
4321 }
4322 }
4323 INC_OP;
4324 JUMP_OUT;
4325
4326 #ifdef USE_CALLOUT
4327 CASE_OP(CALLOUT_CONTENTS)
4328 of = ONIG_CALLOUT_OF_CONTENTS;
4329 mem = p->callout_contents.num;
4330 goto callout_common_entry;
4331 BREAK_OUT;
4332
4333 CASE_OP(CALLOUT_NAME)
4334 {
4335 int call_result;
4336 int name_id;
4337 int in;
4338 CalloutListEntry* e;
4339 OnigCalloutFunc func;
4340 OnigCalloutArgs args;
4341
4342 of = ONIG_CALLOUT_OF_NAME;
4343 mem = p->callout_name.num;
4344
4345 callout_common_entry:
4346 e = onig_reg_callout_list_at(reg, mem);
4347 in = e->in;
4348 if (of == ONIG_CALLOUT_OF_NAME) {
4349 name_id = p->callout_name.id;
4350 func = onig_get_callout_start_func(reg, mem);
4351 }
4352 else {
4353 name_id = ONIG_NON_NAME_ID;
4354 func = msa->mp->progress_callout_of_contents;
4355 }
4356
4357 if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) {
4358 CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id,
4359 (int )mem, msa->mp->callout_user_data, args, call_result);
4360 switch (call_result) {
4361 case ONIG_CALLOUT_FAIL:
4362 goto fail;
4363 break;
4364 case ONIG_CALLOUT_SUCCESS:
4365 goto retraction_callout2;
4366 break;
4367 default: /* error code */
4368 if (call_result > 0) {
4369 call_result = ONIGERR_INVALID_ARGUMENT;
4370 }
4371 best_len = call_result;
4372 goto match_at_end;
4373 break;
4374 }
4375 }
4376 else {
4377 retraction_callout2:
4378 if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) {
4379 if (of == ONIG_CALLOUT_OF_NAME) {
4380 if (IS_NOT_NULL(func)) {
4381 STACK_PUSH_CALLOUT_NAME(name_id, mem, func);
4382 }
4383 }
4384 else {
4385 func = msa->mp->retraction_callout_of_contents;
4386 if (IS_NOT_NULL(func)) {
4387 STACK_PUSH_CALLOUT_CONTENTS(mem, func);
4388 }
4389 }
4390 }
4391 }
4392 }
4393 INC_OP;
4394 JUMP_OUT;
4395 #endif
4396
4397 CASE_OP(FINISH)
4398 goto match_at_end;
4399
4400 #ifdef ONIG_DEBUG_STATISTICS
4401 fail:
4402 SOP_OUT;
4403 goto fail2;
4404 #endif
4405 CASE_OP(FAIL)
4406 #ifdef ONIG_DEBUG_STATISTICS
4407 fail2:
4408 #else
4409 fail:
4410 #endif
4411 STACK_POP;
4412 p = stk->u.state.pcode;
4413 s = stk->u.state.pstr;
4414 CHECK_RETRY_LIMIT_IN_MATCH;
4415 JUMP_OUT;
4416
4417 DEFAULT_OP
4418 MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
4419
4420 } BYTECODE_INTERPRETER_END;
4421
4422 match_at_end:
4423 if (msa->retry_limit_in_search != 0) {
4424 msa->retry_limit_in_search_counter += retry_in_match_counter;
4425 }
4426
4427 #ifdef ONIG_DEBUG_MATCH_COUNTER
4428 MATCH_COUNTER_OUT("END");
4429 #endif
4430
4431 STACK_SAVE(msa, is_alloca, alloc_base);
4432 return best_len;
4433 }
4434
4435
4436 #ifdef USE_REGSET
4437
4438 typedef struct {
4439 regex_t* reg;
4440 OnigRegion* region;
4441 } RR;
4442
4443 struct OnigRegSetStruct {
4444 RR* rs;
4445 int n;
4446 int alloc;
4447 OnigEncoding enc;
4448 int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
4449 OnigLen anc_dmin; /* (SEMI_)END_BUF anchor distance */
4450 OnigLen anc_dmax; /* (SEMI_)END_BUF anchor distance */
4451 int all_low_high;
4452 int anychar_inf;
4453 };
4454
4455 enum SearchRangeStatus {
4456 SRS_DEAD = 0,
4457 SRS_LOW_HIGH = 1,
4458 SRS_ALL_RANGE = 2
4459 };
4460
4461 typedef struct {
4462 int state; /* value of enum SearchRangeStatus */
4463 UChar* low;
4464 UChar* high;
4465 UChar* sch_range;
4466 } SearchRange;
4467
4468 #define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \
4469 r = match_at(reg, str, end, (upper_range), s, msas + i); \
4470 if (r != ONIG_MISMATCH) {\
4471 if (r >= 0) {\
4472 goto match;\
4473 }\
4474 else goto finish; /* error */ \
4475 }
4476
4477 static inline int
regset_search_body_position_lead(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,const UChar * orig_range,OnigOptionType option,MatchArg * msas,int * rmatch_pos)4478 regset_search_body_position_lead(OnigRegSet* set,
4479 const UChar* str, const UChar* end,
4480 const UChar* start, const UChar* range, /* match start range */
4481 const UChar* orig_range, /* data range */
4482 OnigOptionType option, MatchArg* msas, int* rmatch_pos)
4483 {
4484 int r, n, i;
4485 UChar *s;
4486 UChar *low, *high;
4487 UChar* sch_range;
4488 regex_t* reg;
4489 OnigEncoding enc;
4490 SearchRange* sr;
4491
4492 n = set->n;
4493 enc = set->enc;
4494 s = (UChar* )start;
4495
4496 sr = (SearchRange* )xmalloc(sizeof(*sr) * n);
4497 CHECK_NULL_RETURN_MEMERR(sr);
4498
4499 for (i = 0; i < n; i++) {
4500 reg = set->rs[i].reg;
4501
4502 sr[i].state = SRS_DEAD;
4503 if (reg->optimize != OPTIMIZE_NONE) {
4504 if (reg->dist_max != INFINITE_LEN) {
4505 if (DIST_CAST(end - range) > reg->dist_max)
4506 sch_range = (UChar* )range + reg->dist_max;
4507 else
4508 sch_range = (UChar* )end;
4509
4510 if (forward_search(reg, str, end, s, sch_range, &low, &high)) {
4511 sr[i].state = SRS_LOW_HIGH;
4512 sr[i].low = low;
4513 sr[i].high = high;
4514 sr[i].sch_range = sch_range;
4515 }
4516 }
4517 else {
4518 sch_range = (UChar* )end;
4519 if (forward_search(reg, str, end, s, sch_range, &low, &high)) {
4520 goto total_active;
4521 }
4522 }
4523 }
4524 else {
4525 total_active:
4526 sr[i].state = SRS_ALL_RANGE;
4527 sr[i].low = s;
4528 sr[i].high = (UChar* )range;
4529 }
4530 }
4531
4532 #define ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN 500
4533
4534 if (set->all_low_high != 0
4535 && range - start > ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN) {
4536 do {
4537 int try_count = 0;
4538 for (i = 0; i < n; i++) {
4539 if (sr[i].state == SRS_DEAD) continue;
4540
4541 if (s < sr[i].low) continue;
4542 if (s >= sr[i].high) {
4543 if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
4544 &low, &high) != 0) {
4545 sr[i].low = low;
4546 sr[i].high = high;
4547 if (s < low) continue;
4548 }
4549 else {
4550 sr[i].state = SRS_DEAD;
4551 continue;
4552 }
4553 }
4554
4555 reg = set->rs[i].reg;
4556 REGSET_MATCH_AND_RETURN_CHECK(orig_range);
4557 try_count++;
4558 } /* for (i) */
4559
4560 if (s >= range) break;
4561
4562 if (try_count == 0) {
4563 low = (UChar* )range;
4564 for (i = 0; i < n; i++) {
4565 if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) {
4566 low = sr[i].low;
4567 }
4568 }
4569 if (low == range) break;
4570
4571 s = low;
4572 }
4573 else {
4574 s += enclen(enc, s);
4575 }
4576 } while (1);
4577 }
4578 else {
4579 int prev_is_newline = 1;
4580 do {
4581 for (i = 0; i < n; i++) {
4582 if (sr[i].state == SRS_DEAD) continue;
4583 if (sr[i].state == SRS_LOW_HIGH) {
4584 if (s < sr[i].low) continue;
4585 if (s >= sr[i].high) {
4586 if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
4587 &low, &high) != 0) {
4588 sr[i].low = low;
4589 sr[i].high = high;
4590 if (s < low) continue;
4591 }
4592 else {
4593 sr[i].state = SRS_DEAD;
4594 continue;
4595 }
4596 }
4597 }
4598
4599 reg = set->rs[i].reg;
4600 if ((reg->anchor & ANCR_ANYCHAR_INF) == 0 || prev_is_newline != 0) {
4601 REGSET_MATCH_AND_RETURN_CHECK(orig_range);
4602 }
4603 }
4604
4605 if (s >= range) break;
4606
4607 if (set->anychar_inf != 0)
4608 prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end);
4609
4610 s += enclen(enc, s);
4611 } while (1);
4612 }
4613
4614 xfree(sr);
4615 return ONIG_MISMATCH;
4616
4617 finish:
4618 xfree(sr);
4619 return r;
4620
4621 match:
4622 xfree(sr);
4623 *rmatch_pos = (int )(s - str);
4624 return i;
4625 }
4626
4627 static inline int
regset_search_body_regex_lead(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * orig_range,OnigRegSetLead lead,OnigOptionType option,OnigMatchParam * mps[],int * rmatch_pos)4628 regset_search_body_regex_lead(OnigRegSet* set,
4629 const UChar* str, const UChar* end,
4630 const UChar* start, const UChar* orig_range, OnigRegSetLead lead,
4631 OnigOptionType option, OnigMatchParam* mps[], int* rmatch_pos)
4632 {
4633 int r;
4634 int i;
4635 int n;
4636 int match_index;
4637 const UChar* ep;
4638 regex_t* reg;
4639 OnigRegion* region;
4640
4641 n = set->n;
4642
4643 match_index = ONIG_MISMATCH;
4644 ep = orig_range;
4645 for (i = 0; i < n; i++) {
4646 reg = set->rs[i].reg;
4647 region = set->rs[i].region;
4648 r = search_in_range(reg, str, end, start, ep, orig_range, region, option, mps[i]);
4649 if (r > 0) {
4650 if (str + r < ep) {
4651 match_index = i;
4652 *rmatch_pos = r;
4653 if (lead == ONIG_REGSET_PRIORITY_TO_REGEX_ORDER)
4654 break;
4655
4656 ep = str + r;
4657 }
4658 }
4659 else if (r == 0) {
4660 match_index = i;
4661 *rmatch_pos = r;
4662 break;
4663 }
4664 }
4665
4666 return match_index;
4667 }
4668
4669 extern int
onig_regset_search_with_param(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegSetLead lead,OnigOptionType option,OnigMatchParam * mps[],int * rmatch_pos)4670 onig_regset_search_with_param(OnigRegSet* set,
4671 const UChar* str, const UChar* end,
4672 const UChar* start, const UChar* range,
4673 OnigRegSetLead lead, OnigOptionType option, OnigMatchParam* mps[],
4674 int* rmatch_pos)
4675 {
4676 int r;
4677 int i;
4678 UChar *s;
4679 regex_t* reg;
4680 OnigEncoding enc;
4681 OnigRegion* region;
4682 MatchArg* msas;
4683 const UChar *orig_start = start;
4684 const UChar *orig_range = range;
4685
4686 if (set->n == 0)
4687 return ONIG_MISMATCH;
4688
4689 if (OPTON_POSIX_REGION(option) || OPTON_CALLBACK_EACH_MATCH(option))
4690 return ONIGERR_INVALID_ARGUMENT;
4691
4692 r = 0;
4693 enc = set->enc;
4694 msas = (MatchArg* )NULL;
4695
4696 for (i = 0; i < set->n; i++) {
4697 reg = set->rs[i].reg;
4698 region = set->rs[i].region;
4699 ADJUST_MATCH_PARAM(reg, mps[i]);
4700 if (IS_NOT_NULL(region)) {
4701 r = onig_region_resize_clear(region, reg->num_mem + 1);
4702 if (r != 0) goto finish_no_msa;
4703 }
4704 }
4705
4706 if (start > end || start < str) goto mismatch_no_msa;
4707 if (str < end) {
4708 /* forward search only */
4709 if (range < start)
4710 return ONIGERR_INVALID_ARGUMENT;
4711 }
4712
4713 if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
4714 if (! ONIGENC_IS_VALID_MBC_STRING(enc, str, end)) {
4715 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
4716 goto finish_no_msa;
4717 }
4718 }
4719
4720 if (set->anchor != OPTIMIZE_NONE && str < end) {
4721 UChar *min_semi_end, *max_semi_end;
4722
4723 if ((set->anchor & ANCR_BEGIN_POSITION) != 0) {
4724 /* search start-position only */
4725 begin_position:
4726 range = start + 1;
4727 }
4728 else if ((set->anchor & ANCR_BEGIN_BUF) != 0) {
4729 /* search str-position only */
4730 if (start != str) goto mismatch_no_msa;
4731 range = str + 1;
4732 }
4733 else if ((set->anchor & ANCR_END_BUF) != 0) {
4734 min_semi_end = max_semi_end = (UChar* )end;
4735
4736 end_buf:
4737 if ((OnigLen )(max_semi_end - str) < set->anc_dmin)
4738 goto mismatch_no_msa;
4739
4740 if ((OnigLen )(min_semi_end - start) > set->anc_dmax) {
4741 start = min_semi_end - set->anc_dmax;
4742 if (start < end)
4743 start = onigenc_get_right_adjust_char_head(enc, str, start);
4744 }
4745 if ((OnigLen )(max_semi_end - (range - 1)) < set->anc_dmin) {
4746 range = max_semi_end - set->anc_dmin + 1;
4747 }
4748 if (start > range) goto mismatch_no_msa;
4749 }
4750 else if ((set->anchor & ANCR_SEMI_END_BUF) != 0) {
4751 UChar* pre_end = ONIGENC_STEP_BACK(enc, str, end, 1);
4752
4753 max_semi_end = (UChar* )end;
4754 if (ONIGENC_IS_MBC_NEWLINE(enc, pre_end, end)) {
4755 min_semi_end = pre_end;
4756
4757 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4758 pre_end = ONIGENC_STEP_BACK(enc, str, pre_end, 1);
4759 if (IS_NOT_NULL(pre_end) &&
4760 ONIGENC_IS_MBC_CRNL(enc, pre_end, end)) {
4761 min_semi_end = pre_end;
4762 }
4763 #endif
4764 if (min_semi_end > str && start <= min_semi_end) {
4765 goto end_buf;
4766 }
4767 }
4768 else {
4769 min_semi_end = (UChar* )end;
4770 goto end_buf;
4771 }
4772 }
4773 else if ((set->anchor & ANCR_ANYCHAR_INF_ML) != 0) {
4774 goto begin_position;
4775 }
4776 }
4777 else if (str == end) { /* empty string */
4778 start = end = str;
4779 s = (UChar* )start;
4780
4781 msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
4782 CHECK_NULL_RETURN_MEMERR(msas);
4783 for (i = 0; i < set->n; i++) {
4784 reg = set->rs[i].reg;
4785 MATCH_ARG_INIT(msas[i], reg, option, set->rs[i].region, start, mps[i]);
4786 }
4787 for (i = 0; i < set->n; i++) {
4788 reg = set->rs[i].reg;
4789 if (reg->threshold_len == 0) {
4790 /* REGSET_MATCH_AND_RETURN_CHECK(end); */
4791 /* Can't use REGSET_MATCH_AND_RETURN_CHECK()
4792 because r must be set regex index (i)
4793 */
4794 r = match_at(reg, str, end, end, s, msas + i);
4795 if (r != ONIG_MISMATCH) {
4796 if (r >= 0) {
4797 r = i;
4798 goto match;
4799 }
4800 else goto finish; /* error */
4801 }
4802 }
4803 }
4804
4805 goto mismatch;
4806 }
4807
4808 if (lead == ONIG_REGSET_POSITION_LEAD) {
4809 msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
4810 CHECK_NULL_RETURN_MEMERR(msas);
4811
4812 for (i = 0; i < set->n; i++) {
4813 MATCH_ARG_INIT(msas[i], set->rs[i].reg, option, set->rs[i].region,
4814 orig_start, mps[i]);
4815 }
4816
4817 r = regset_search_body_position_lead(set, str, end, start, range,
4818 orig_range, option, msas, rmatch_pos);
4819 }
4820 else {
4821 r = regset_search_body_regex_lead(set, str, end, start, orig_range,
4822 lead, option, mps, rmatch_pos);
4823 }
4824 if (r < 0) goto finish;
4825 else goto match2;
4826
4827 mismatch:
4828 r = ONIG_MISMATCH;
4829 finish:
4830 for (i = 0; i < set->n; i++) {
4831 if (IS_NOT_NULL(msas))
4832 MATCH_ARG_FREE(msas[i]);
4833 if (OPTON_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
4834 IS_NOT_NULL(set->rs[i].region)) {
4835 onig_region_clear(set->rs[i].region);
4836 }
4837 }
4838 if (IS_NOT_NULL(msas)) xfree(msas);
4839 return r;
4840
4841 mismatch_no_msa:
4842 r = ONIG_MISMATCH;
4843 finish_no_msa:
4844 return r;
4845
4846 match:
4847 *rmatch_pos = (int )(s - str);
4848 match2:
4849 for (i = 0; i < set->n; i++) {
4850 if (IS_NOT_NULL(msas))
4851 MATCH_ARG_FREE(msas[i]);
4852 if (OPTON_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
4853 IS_NOT_NULL(set->rs[i].region)) {
4854 onig_region_clear(set->rs[i].region);
4855 }
4856 }
4857 if (IS_NOT_NULL(msas)) xfree(msas);
4858 return r; /* regex index */
4859 }
4860
4861 extern int
onig_regset_search(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegSetLead lead,OnigOptionType option,int * rmatch_pos)4862 onig_regset_search(OnigRegSet* set, const UChar* str, const UChar* end,
4863 const UChar* start, const UChar* range,
4864 OnigRegSetLead lead, OnigOptionType option, int* rmatch_pos)
4865 {
4866 int r;
4867 int i;
4868 OnigMatchParam* mp;
4869 OnigMatchParam** mps;
4870
4871 mps = (OnigMatchParam** )xmalloc((sizeof(OnigMatchParam*) + sizeof(OnigMatchParam)) * set->n);
4872 CHECK_NULL_RETURN_MEMERR(mps);
4873
4874 mp = (OnigMatchParam* )(mps + set->n);
4875
4876 for (i = 0; i < set->n; i++) {
4877 onig_initialize_match_param(mp + i);
4878 mps[i] = mp + i;
4879 }
4880
4881 r = onig_regset_search_with_param(set, str, end, start, range, lead, option, mps,
4882 rmatch_pos);
4883 for (i = 0; i < set->n; i++)
4884 onig_free_match_param_content(mp + i);
4885
4886 xfree(mps);
4887
4888 return r;
4889 }
4890
4891 #endif /* USE_REGSET */
4892
4893
4894 static UChar*
slow_search(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)4895 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
4896 const UChar* text, const UChar* text_end, UChar* text_range)
4897 {
4898 UChar *t, *p, *s, *end;
4899
4900 end = (UChar* )text_end;
4901 end -= target_end - target - 1;
4902 if (end > text_range)
4903 end = text_range;
4904
4905 s = (UChar* )text;
4906
4907 while (s < end) {
4908 if (*s == *target) {
4909 p = s + 1;
4910 t = target + 1;
4911 while (t < target_end) {
4912 if (*t != *p++)
4913 break;
4914 t++;
4915 }
4916 if (t == target_end)
4917 return s;
4918 }
4919 s += enclen(enc, s);
4920 }
4921
4922 return (UChar* )NULL;
4923 }
4924
4925 static UChar*
slow_search_backward(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)4926 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4927 const UChar* text, const UChar* adjust_text,
4928 const UChar* text_end, const UChar* text_start)
4929 {
4930 UChar *t, *p, *s;
4931
4932 s = (UChar* )text_end;
4933 s -= (target_end - target);
4934 if (s > text_start)
4935 s = (UChar* )text_start;
4936 else
4937 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
4938
4939 while (PTR_GE(s, text)) {
4940 if (*s == *target) {
4941 p = s + 1;
4942 t = target + 1;
4943 while (t < target_end) {
4944 if (*t != *p++)
4945 break;
4946 t++;
4947 }
4948 if (t == target_end)
4949 return s;
4950 }
4951 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
4952 }
4953
4954 return (UChar* )NULL;
4955 }
4956
4957 static UChar*
sunday_quick_search_step_forward(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)4958 sunday_quick_search_step_forward(regex_t* reg,
4959 const UChar* target, const UChar* target_end,
4960 const UChar* text, const UChar* text_end,
4961 const UChar* text_range)
4962 {
4963 const UChar *s, *se, *t, *p, *end;
4964 const UChar *tail, *next;
4965 int skip, tlen1;
4966 int map_offset;
4967 OnigEncoding enc;
4968
4969 #ifdef ONIG_DEBUG_SEARCH
4970 fprintf(DBGFP,
4971 "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n",
4972 text, text_end, text_range);
4973 #endif
4974
4975 enc = reg->enc;
4976
4977 tail = target_end - 1;
4978 tlen1 = (int )(tail - target);
4979 end = text_range;
4980 if (tlen1 > text_end - end)
4981 end = text_end - tlen1;
4982
4983 map_offset = reg->map_offset;
4984 s = text;
4985
4986 while (s < end) {
4987 p = se = s + tlen1;
4988 t = tail;
4989 while (*p == *t) {
4990 if (t == target) return (UChar* )s;
4991 p--; t--;
4992 }
4993 if (se + map_offset >= text_end) break;
4994 skip = reg->map[*(se + map_offset)];
4995 #if 0
4996 t = s;
4997 do {
4998 s += enclen(enc, s);
4999 } while ((s - t) < skip && s < end);
5000 #else
5001 next = s + skip;
5002 if (next < end)
5003 s = onigenc_get_right_adjust_char_head(enc, s, next);
5004 else
5005 break;
5006 #endif
5007 }
5008
5009 return (UChar* )NULL;
5010 }
5011
5012 static UChar*
sunday_quick_search(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)5013 sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
5014 const UChar* text, const UChar* text_end,
5015 const UChar* text_range)
5016 {
5017 const UChar *s, *t, *p, *end;
5018 const UChar *tail;
5019 int map_offset;
5020 ptrdiff_t target_len;
5021
5022 map_offset = reg->map_offset;
5023 tail = target_end - 1;
5024 target_len = target_end - target;
5025
5026 if (target_len > text_end - text_range) {
5027 end = text_end;
5028 if (target_len > text_end - text)
5029 return (UChar* )NULL;
5030 }
5031 else {
5032 end = text_range + target_len;
5033 }
5034
5035 s = text + target_len - 1;
5036
5037 #ifdef USE_STRICT_POINTER_ADDRESS
5038 if (s < end) {
5039 while (TRUE) {
5040 p = s;
5041 t = tail;
5042 while (*p == *t) {
5043 if (t == target) return (UChar* )p;
5044 p--; t--;
5045 }
5046 if (text_end - s <= map_offset) break;
5047 if (reg->map[*(s + map_offset)] >= end - s) break;
5048 s += reg->map[*(s + map_offset)];
5049 }
5050 }
5051 #else
5052 while (s < end) {
5053 p = s;
5054 t = tail;
5055 while (*p == *t) {
5056 if (t == target) return (UChar* )p;
5057 p--; t--;
5058 }
5059 if (text_end - s <= map_offset) break;
5060 s += reg->map[*(s + map_offset)];
5061 }
5062 #endif
5063
5064 return (UChar* )NULL;
5065 }
5066
5067 static UChar*
map_search(OnigEncoding enc,UChar map[],const UChar * text,const UChar * text_range)5068 map_search(OnigEncoding enc, UChar map[],
5069 const UChar* text, const UChar* text_range)
5070 {
5071 const UChar *s = text;
5072
5073 while (s < text_range) {
5074 if (map[*s]) return (UChar* )s;
5075
5076 s += enclen(enc, s);
5077 }
5078 return (UChar* )NULL;
5079 }
5080
5081 static UChar*
map_search_backward(OnigEncoding enc,UChar map[],const UChar * text,const UChar * adjust_text,const UChar * text_start)5082 map_search_backward(OnigEncoding enc, UChar map[],
5083 const UChar* text, const UChar* adjust_text,
5084 const UChar* text_start)
5085 {
5086 const UChar *s = text_start;
5087
5088 while (PTR_GE(s, text)) {
5089 if (map[*s]) return (UChar* )s;
5090
5091 s = onigenc_get_prev_char_head(enc, adjust_text, s);
5092 }
5093 return (UChar* )NULL;
5094 }
5095 extern int
onig_match(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option)5096 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
5097 OnigRegion* region, OnigOptionType option)
5098 {
5099 int r;
5100 OnigMatchParam mp;
5101
5102 onig_initialize_match_param(&mp);
5103 r = onig_match_with_param(reg, str, end, at, region, option, &mp);
5104 onig_free_match_param_content(&mp);
5105 return r;
5106 }
5107
5108 extern int
onig_match_with_param(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5109 onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
5110 const UChar* at, OnigRegion* region, OnigOptionType option,
5111 OnigMatchParam* mp)
5112 {
5113 int r;
5114 MatchArg msa;
5115
5116 #ifndef USE_POSIX_API
5117 if (OPTON_POSIX_REGION(option)) return ONIGERR_INVALID_ARGUMENT;
5118 #endif
5119
5120 ADJUST_MATCH_PARAM(reg, mp);
5121 MATCH_ARG_INIT(msa, reg, option, region, at, mp);
5122 if (region
5123 #ifdef USE_POSIX_API
5124 && !OPTON_POSIX_REGION(option)
5125 #endif
5126 ) {
5127 r = onig_region_resize_clear(region, reg->num_mem + 1);
5128 }
5129 else
5130 r = 0;
5131
5132 if (r == 0) {
5133 if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
5134 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
5135 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
5136 goto end;
5137 }
5138 }
5139
5140 r = match_at(reg, str, end, end, at, &msa);
5141 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5142 if (OPTON_FIND_LONGEST(option) && r == ONIG_MISMATCH) {
5143 if (msa.best_len >= 0) {
5144 r = msa.best_len;
5145 }
5146 }
5147 #endif
5148 }
5149
5150 end:
5151 MATCH_ARG_FREE(msa);
5152 return r;
5153 }
5154
5155 static int
forward_search(regex_t * reg,const UChar * str,const UChar * end,UChar * start,UChar * range,UChar ** low,UChar ** high)5156 forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
5157 UChar* range, UChar** low, UChar** high)
5158 {
5159 UChar *p, *pprev = (UChar* )NULL;
5160
5161 #ifdef ONIG_DEBUG_SEARCH
5162 fprintf(DBGFP, "forward_search: str: %p, end: %p, start: %p, range: %p\n",
5163 str, end, start, range);
5164 #endif
5165
5166 p = start;
5167 if (reg->dist_min != 0) {
5168 if (DIST_CAST(end - p) <= reg->dist_min)
5169 return 0; /* fail */
5170
5171 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
5172 p += reg->dist_min;
5173 }
5174 else {
5175 UChar *q = p + reg->dist_min;
5176 while (p < q) p += enclen(reg->enc, p);
5177 }
5178 }
5179
5180 retry:
5181 switch (reg->optimize) {
5182 case OPTIMIZE_STR:
5183 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
5184 break;
5185
5186 case OPTIMIZE_STR_FAST:
5187 p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
5188 break;
5189
5190 case OPTIMIZE_STR_FAST_STEP_FORWARD:
5191 p = sunday_quick_search_step_forward(reg, reg->exact, reg->exact_end,
5192 p, end, range);
5193 break;
5194
5195 case OPTIMIZE_MAP:
5196 p = map_search(reg->enc, reg->map, p, range);
5197 break;
5198 }
5199
5200 if (p && p < range) {
5201 if (DIST_CAST(p - start) < reg->dist_min) {
5202 retry_gate:
5203 pprev = p;
5204 p += enclen(reg->enc, p);
5205 goto retry;
5206 }
5207
5208 if (reg->sub_anchor) {
5209 UChar* prev;
5210
5211 switch (reg->sub_anchor) {
5212 case ANCR_BEGIN_LINE:
5213 if (!ON_STR_BEGIN(p)) {
5214 prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p);
5215 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
5216 goto retry_gate;
5217 }
5218 break;
5219
5220 case ANCR_END_LINE:
5221 if (ON_STR_END(p)) {
5222 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5223 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
5224 (pprev ? pprev : str), p);
5225 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
5226 goto retry_gate;
5227 #endif
5228 }
5229 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
5230 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5231 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
5232 #endif
5233 )
5234 goto retry_gate;
5235
5236 break;
5237 }
5238 }
5239
5240 if (reg->dist_max == 0) {
5241 *low = p;
5242 *high = p;
5243 }
5244 else {
5245 if (reg->dist_max != INFINITE_LEN) {
5246 if (DIST_CAST(p - str) < reg->dist_max) {
5247 *low = (UChar* )str;
5248 }
5249 else {
5250 *low = p - reg->dist_max;
5251 if (*low > start) {
5252 *low = onigenc_get_right_adjust_char_head(reg->enc, start, *low);
5253 }
5254 }
5255 }
5256 /* no needs to adjust *high, *high is used as range check only */
5257 if (DIST_CAST(p - str) < reg->dist_min)
5258 *high = (UChar* )str;
5259 else
5260 *high = p - reg->dist_min;
5261 }
5262
5263 #ifdef ONIG_DEBUG_SEARCH
5264 fprintf(DBGFP,
5265 "forward_search success: low: %d, high: %d, dmin: %u, dmax: %u\n",
5266 (int )(*low - str), (int )(*high - str),
5267 reg->dist_min, reg->dist_max);
5268 #endif
5269 return 1; /* success */
5270 }
5271
5272 return 0; /* fail */
5273 }
5274
5275
5276 static int
backward_search(regex_t * reg,const UChar * str,const UChar * end,UChar * s,const UChar * range,UChar * adjrange,UChar ** low,UChar ** high)5277 backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
5278 const UChar* range, UChar* adjrange, UChar** low, UChar** high)
5279 {
5280 UChar *p;
5281
5282 p = s;
5283
5284 retry:
5285 switch (reg->optimize) {
5286 case OPTIMIZE_STR:
5287 exact_method:
5288 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
5289 range, adjrange, end, p);
5290 break;
5291
5292 case OPTIMIZE_STR_FAST:
5293 case OPTIMIZE_STR_FAST_STEP_FORWARD:
5294 goto exact_method;
5295 break;
5296
5297 case OPTIMIZE_MAP:
5298 p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
5299 break;
5300 }
5301
5302 if (p) {
5303 if (reg->sub_anchor) {
5304 UChar* prev;
5305
5306 switch (reg->sub_anchor) {
5307 case ANCR_BEGIN_LINE:
5308 if (!ON_STR_BEGIN(p)) {
5309 prev = onigenc_get_prev_char_head(reg->enc, str, p);
5310 if (IS_NOT_NULL(prev) && !ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
5311 p = prev;
5312 goto retry;
5313 }
5314 }
5315 break;
5316
5317 case ANCR_END_LINE:
5318 if (ON_STR_END(p)) {
5319 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5320 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5321 if (IS_NULL(prev)) goto fail;
5322 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
5323 p = prev;
5324 goto retry;
5325 }
5326 #endif
5327 }
5328 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
5329 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5330 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
5331 #endif
5332 ) {
5333 p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5334 if (IS_NULL(p)) goto fail;
5335 goto retry;
5336 }
5337 break;
5338 }
5339 }
5340
5341 if (reg->dist_max != INFINITE_LEN) {
5342 if (DIST_CAST(p - str) < reg->dist_max)
5343 *low = (UChar* )str;
5344 else
5345 *low = p - reg->dist_max;
5346
5347 if (reg->dist_min != 0) {
5348 if (DIST_CAST(p - str) < reg->dist_min)
5349 *high = (UChar* )str;
5350 else
5351 *high = p - reg->dist_min;
5352 }
5353 else {
5354 *high = p;
5355 }
5356
5357 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
5358 }
5359
5360 #ifdef ONIG_DEBUG_SEARCH
5361 fprintf(DBGFP, "backward_search: low: %d, high: %d\n",
5362 (int )(*low - str), (int )(*high - str));
5363 #endif
5364 return 1; /* success */
5365 }
5366
5367 fail:
5368 #ifdef ONIG_DEBUG_SEARCH
5369 fprintf(DBGFP, "backward_search: fail.\n");
5370 #endif
5371 return 0; /* fail */
5372 }
5373
5374
5375 extern int
onig_search(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option)5376 onig_search(regex_t* reg, const UChar* str, const UChar* end,
5377 const UChar* start, const UChar* range, OnigRegion* region,
5378 OnigOptionType option)
5379 {
5380 int r;
5381 OnigMatchParam mp;
5382 const UChar* data_range;
5383
5384 onig_initialize_match_param(&mp);
5385
5386 /* The following is an expanded code of onig_search_with_param() */
5387 if (range > start)
5388 data_range = range;
5389 else
5390 data_range = end;
5391
5392 r = search_in_range(reg, str, end, start, range, data_range, region,
5393 option, &mp);
5394
5395 onig_free_match_param_content(&mp);
5396 return r;
5397
5398 }
5399
5400 static int
search_in_range(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,const UChar * data_range,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5401 search_in_range(regex_t* reg, const UChar* str, const UChar* end,
5402 const UChar* start, const UChar* range, /* match start range */
5403 const UChar* data_range, /* subject string range */
5404 OnigRegion* region,
5405 OnigOptionType option, OnigMatchParam* mp)
5406 {
5407 int r;
5408 UChar *s;
5409 MatchArg msa;
5410 const UChar *orig_start = start;
5411
5412 #ifdef ONIG_DEBUG_SEARCH
5413 fprintf(DBGFP,
5414 "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n",
5415 str, (int )(end - str), (int )(start - str), (int )(range - str));
5416 #endif
5417
5418 ADJUST_MATCH_PARAM(reg, mp);
5419
5420 #ifndef USE_POSIX_API
5421 if (OPTON_POSIX_REGION(option)) {
5422 r = ONIGERR_INVALID_ARGUMENT;
5423 goto finish_no_msa;
5424 }
5425 #endif
5426
5427 if (region
5428 #ifdef USE_POSIX_API
5429 && ! OPTON_POSIX_REGION(option)
5430 #endif
5431 ) {
5432 r = onig_region_resize_clear(region, reg->num_mem + 1);
5433 if (r != 0) goto finish_no_msa;
5434 }
5435
5436 if (start > end || start < str) goto mismatch_no_msa;
5437
5438 if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
5439 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
5440 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
5441 goto finish_no_msa;
5442 }
5443 }
5444
5445
5446 #define MATCH_AND_RETURN_CHECK(upper_range) \
5447 r = match_at(reg, str, end, (upper_range), s, &msa);\
5448 if (r != ONIG_MISMATCH) {\
5449 if (r >= 0) {\
5450 goto match;\
5451 }\
5452 else goto finish; /* error */ \
5453 }
5454
5455
5456 /* anchor optimize: resume search range */
5457 if (reg->anchor != 0 && str < end) {
5458 UChar *min_semi_end, *max_semi_end;
5459
5460 if (reg->anchor & ANCR_BEGIN_POSITION) {
5461 /* search start-position only */
5462 begin_position:
5463 if (range > start)
5464 range = start + 1;
5465 else
5466 range = start;
5467 }
5468 else if (reg->anchor & ANCR_BEGIN_BUF) {
5469 /* search str-position only */
5470 if (range > start) {
5471 if (start != str) goto mismatch_no_msa;
5472 range = str + 1;
5473 }
5474 else {
5475 if (range <= str) {
5476 start = str;
5477 range = str;
5478 }
5479 else
5480 goto mismatch_no_msa;
5481 }
5482 }
5483 else if (reg->anchor & ANCR_END_BUF) {
5484 min_semi_end = max_semi_end = (UChar* )end;
5485
5486 end_buf:
5487 if ((OnigLen )(max_semi_end - str) < reg->anc_dist_min)
5488 goto mismatch_no_msa;
5489
5490 if (range > start) {
5491 if (reg->anc_dist_max != INFINITE_LEN &&
5492 DIST_CAST(min_semi_end - start) > reg->anc_dist_max) {
5493 start = min_semi_end - reg->anc_dist_max;
5494 if (start < end)
5495 start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
5496 }
5497 if (DIST_CAST(max_semi_end - (range - 1)) < reg->anc_dist_min) {
5498 if (DIST_CAST(max_semi_end - str + 1) < reg->anc_dist_min)
5499 goto mismatch_no_msa;
5500 else
5501 range = max_semi_end - reg->anc_dist_min + 1;
5502 }
5503
5504 if (start > range) goto mismatch_no_msa;
5505 /* If start == range, match with empty at end.
5506 Backward search is used. */
5507 }
5508 else {
5509 if (reg->anc_dist_max != INFINITE_LEN &&
5510 DIST_CAST(min_semi_end - range) > reg->anc_dist_max) {
5511 range = min_semi_end - reg->anc_dist_max;
5512 }
5513 if (DIST_CAST(max_semi_end - start) < reg->anc_dist_min) {
5514 if (DIST_CAST(max_semi_end - str) < reg->anc_dist_min)
5515 goto mismatch_no_msa;
5516 else {
5517 start = max_semi_end - reg->anc_dist_min;
5518 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
5519 }
5520 }
5521 if (range > start) goto mismatch_no_msa;
5522 }
5523 }
5524 else if (reg->anchor & ANCR_SEMI_END_BUF) {
5525 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
5526
5527 max_semi_end = (UChar* )end;
5528 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5529 min_semi_end = pre_end;
5530
5531 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5532 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
5533 if (IS_NOT_NULL(pre_end) &&
5534 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5535 min_semi_end = pre_end;
5536 }
5537 #endif
5538 if (min_semi_end > str && start <= min_semi_end) {
5539 goto end_buf;
5540 }
5541 }
5542 else {
5543 min_semi_end = (UChar* )end;
5544 goto end_buf;
5545 }
5546 }
5547 else if ((reg->anchor & ANCR_ANYCHAR_INF_ML) && range > start) {
5548 goto begin_position;
5549 }
5550 }
5551 else if (str == end) { /* empty string */
5552 static const UChar* address_for_empty_string = (UChar* )"";
5553
5554 #ifdef ONIG_DEBUG_SEARCH
5555 fprintf(DBGFP, "onig_search: empty string.\n");
5556 #endif
5557
5558 if (reg->threshold_len == 0) {
5559 start = end = str = address_for_empty_string;
5560 s = (UChar* )start;
5561
5562 MATCH_ARG_INIT(msa, reg, option, region, start, mp);
5563 MATCH_AND_RETURN_CHECK(end);
5564 goto mismatch;
5565 }
5566 goto mismatch_no_msa;
5567 }
5568
5569 #ifdef ONIG_DEBUG_SEARCH
5570 fprintf(DBGFP, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5571 (int )(end - str), (int )(start - str), (int )(range - str));
5572 #endif
5573
5574 MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp);
5575
5576 s = (UChar* )start;
5577 if (range > start) { /* forward search */
5578 if (reg->optimize != OPTIMIZE_NONE) {
5579 UChar *sch_range, *low, *high;
5580
5581 if (reg->dist_max != 0) {
5582 if (reg->dist_max == INFINITE_LEN)
5583 sch_range = (UChar* )end;
5584 else {
5585 if (DIST_CAST(end - range) < reg->dist_max)
5586 sch_range = (UChar* )end;
5587 else {
5588 sch_range = (UChar* )range + reg->dist_max;
5589 }
5590 }
5591 }
5592 else
5593 sch_range = (UChar* )range;
5594
5595 if ((end - start) < reg->threshold_len)
5596 goto mismatch;
5597
5598 if (reg->dist_max != INFINITE_LEN) {
5599 do {
5600 if (! forward_search(reg, str, end, s, sch_range, &low, &high))
5601 goto mismatch;
5602 if (s < low) {
5603 s = low;
5604 }
5605 while (s <= high) {
5606 MATCH_AND_RETURN_CHECK(data_range);
5607 s += enclen(reg->enc, s);
5608 }
5609 } while (s < range);
5610 goto mismatch;
5611 }
5612 else { /* check only. */
5613 if (! forward_search(reg, str, end, s, sch_range, &low, &high))
5614 goto mismatch;
5615
5616 if ((reg->anchor & ANCR_ANYCHAR_INF) != 0 &&
5617 (reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) {
5618 do {
5619 UChar* prev;
5620
5621 MATCH_AND_RETURN_CHECK(data_range);
5622 prev = s;
5623 s += enclen(reg->enc, s);
5624
5625 while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
5626 prev = s;
5627 s += enclen(reg->enc, s);
5628 }
5629 } while (s < range);
5630 goto mismatch;
5631 }
5632 }
5633 }
5634
5635 do {
5636 MATCH_AND_RETURN_CHECK(data_range);
5637 s += enclen(reg->enc, s);
5638 } while (s < range);
5639
5640 if (s == range) { /* because empty match with /$/. */
5641 MATCH_AND_RETURN_CHECK(data_range);
5642 }
5643 }
5644 else { /* backward search */
5645 if (range < str) goto mismatch;
5646
5647 if (orig_start < end)
5648 orig_start += enclen(reg->enc, orig_start); /* is upper range */
5649
5650 if (reg->optimize != OPTIMIZE_NONE) {
5651 UChar *low, *high, *adjrange, *sch_start;
5652 const UChar *min_range;
5653
5654 if ((end - range) < reg->threshold_len) goto mismatch;
5655
5656 if (range < end)
5657 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
5658 else
5659 adjrange = (UChar* )end;
5660
5661 if (DIST_CAST(end - range) > reg->dist_min)
5662 min_range = range + reg->dist_min;
5663 else
5664 min_range = end;
5665
5666 if (reg->dist_max != INFINITE_LEN) {
5667 do {
5668 if (DIST_CAST(end - s) > reg->dist_max)
5669 sch_start = s + reg->dist_max;
5670 else {
5671 sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
5672 }
5673
5674 if (backward_search(reg, str, end, sch_start, min_range, adjrange,
5675 &low, &high) <= 0)
5676 goto mismatch;
5677
5678 if (s > high)
5679 s = high;
5680
5681 while (PTR_GE(s, low)) {
5682 MATCH_AND_RETURN_CHECK(orig_start);
5683 s = onigenc_get_prev_char_head(reg->enc, str, s);
5684 }
5685 } while (PTR_GE(s, range));
5686 goto mismatch;
5687 }
5688 else { /* check only. */
5689 sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
5690
5691 if (backward_search(reg, str, end, sch_start, min_range, adjrange,
5692 &low, &high) <= 0) goto mismatch;
5693 }
5694 }
5695
5696 do {
5697 MATCH_AND_RETURN_CHECK(orig_start);
5698 s = onigenc_get_prev_char_head(reg->enc, str, s);
5699 } while (PTR_GE(s, range));
5700 }
5701
5702 mismatch:
5703 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5704 if (OPTON_FIND_LONGEST(reg->options)) {
5705 if (msa.best_len >= 0) {
5706 s = msa.best_s;
5707 goto match;
5708 }
5709 }
5710 #endif
5711 r = ONIG_MISMATCH;
5712
5713 finish:
5714 MATCH_ARG_FREE(msa);
5715
5716 /* If result is mismatch and no FIND_NOT_EMPTY option,
5717 then the region is not set in match_at(). */
5718 if (OPTON_FIND_NOT_EMPTY(reg->options) && region
5719 #ifdef USE_POSIX_API
5720 && !OPTON_POSIX_REGION(option)
5721 #endif
5722 ) {
5723 onig_region_clear(region);
5724 }
5725
5726 #ifdef ONIG_DEBUG
5727 if (r != ONIG_MISMATCH)
5728 fprintf(DBGFP, "onig_search: error %d\n", r);
5729 #endif
5730 return r;
5731
5732 mismatch_no_msa:
5733 r = ONIG_MISMATCH;
5734 finish_no_msa:
5735 #ifdef ONIG_DEBUG
5736 if (r != ONIG_MISMATCH)
5737 fprintf(DBGFP, "onig_search: error %d\n", r);
5738 #endif
5739 return r;
5740
5741 match:
5742 MATCH_ARG_FREE(msa);
5743 return (int )(s - str);
5744 }
5745
5746 extern int
onig_search_with_param(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5747 onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
5748 const UChar* start, const UChar* range, OnigRegion* region,
5749 OnigOptionType option, OnigMatchParam* mp)
5750 {
5751 const UChar* data_range;
5752
5753 if (range > start)
5754 data_range = range;
5755 else
5756 data_range = end;
5757
5758 return search_in_range(reg, str, end, start, range, data_range, region,
5759 option, mp);
5760 }
5761
5762 extern int
onig_scan(regex_t * reg,const UChar * str,const UChar * end,OnigRegion * region,OnigOptionType option,int (* scan_callback)(int,int,OnigRegion *,void *),void * callback_arg)5763 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5764 OnigRegion* region, OnigOptionType option,
5765 int (*scan_callback)(int, int, OnigRegion*, void*),
5766 void* callback_arg)
5767 {
5768 int r;
5769 int n;
5770 int rs;
5771 const UChar* start;
5772
5773 if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
5774 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
5775 return ONIGERR_INVALID_WIDE_CHAR_VALUE;
5776
5777 ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING);
5778 }
5779
5780 n = 0;
5781 start = str;
5782 while (1) {
5783 r = onig_search(reg, str, end, start, end, region, option);
5784 if (r >= 0) {
5785 rs = scan_callback(n, r, region, callback_arg);
5786 n++;
5787 if (rs != 0)
5788 return rs;
5789
5790 if (region->end[0] == start - str) {
5791 if (start >= end) break;
5792 start += enclen(reg->enc, start);
5793 }
5794 else
5795 start = str + region->end[0];
5796
5797 if (start > end)
5798 break;
5799 }
5800 else if (r == ONIG_MISMATCH) {
5801 break;
5802 }
5803 else { /* error */
5804 return r;
5805 }
5806 }
5807
5808 return n;
5809 }
5810
5811 extern int
onig_get_subexp_call_max_nest_level(void)5812 onig_get_subexp_call_max_nest_level(void)
5813 {
5814 return SubexpCallMaxNestLevel;
5815 }
5816
5817 extern int
onig_set_subexp_call_max_nest_level(int level)5818 onig_set_subexp_call_max_nest_level(int level)
5819 {
5820 SubexpCallMaxNestLevel = level;
5821 return 0;
5822 }
5823
5824 extern OnigEncoding
onig_get_encoding(regex_t * reg)5825 onig_get_encoding(regex_t* reg)
5826 {
5827 return reg->enc;
5828 }
5829
5830 extern OnigOptionType
onig_get_options(regex_t * reg)5831 onig_get_options(regex_t* reg)
5832 {
5833 return reg->options;
5834 }
5835
5836 extern OnigCaseFoldType
onig_get_case_fold_flag(regex_t * reg)5837 onig_get_case_fold_flag(regex_t* reg)
5838 {
5839 return reg->case_fold_flag;
5840 }
5841
5842 extern OnigSyntaxType*
onig_get_syntax(regex_t * reg)5843 onig_get_syntax(regex_t* reg)
5844 {
5845 return reg->syntax;
5846 }
5847
5848 extern int
onig_number_of_captures(regex_t * reg)5849 onig_number_of_captures(regex_t* reg)
5850 {
5851 return reg->num_mem;
5852 }
5853
5854 extern int
onig_number_of_capture_histories(regex_t * reg)5855 onig_number_of_capture_histories(regex_t* reg)
5856 {
5857 #ifdef USE_CAPTURE_HISTORY
5858 int i, n;
5859
5860 n = 0;
5861 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5862 if (MEM_STATUS_AT(reg->capture_history, i) != 0)
5863 n++;
5864 }
5865 return n;
5866 #else
5867 return 0;
5868 #endif
5869 }
5870
5871 extern void
onig_copy_encoding(OnigEncoding to,OnigEncoding from)5872 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
5873 {
5874 *to = *from;
5875 }
5876
5877 #ifdef USE_REGSET
5878
5879 extern int
onig_regset_new(OnigRegSet ** rset,int n,regex_t * regs[])5880 onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[])
5881 {
5882 #define REGSET_INITIAL_ALLOC_SIZE 10
5883
5884 int i;
5885 int r;
5886 int alloc;
5887 OnigRegSet* set;
5888 RR* rs;
5889
5890 *rset = 0;
5891
5892 set = (OnigRegSet* )xmalloc(sizeof(*set));
5893 CHECK_NULL_RETURN_MEMERR(set);
5894
5895 alloc = n > REGSET_INITIAL_ALLOC_SIZE ? n : REGSET_INITIAL_ALLOC_SIZE;
5896 rs = (RR* )xmalloc(sizeof(set->rs[0]) * alloc);
5897 if (IS_NULL(rs)) {
5898 xfree(set);
5899 return ONIGERR_MEMORY;
5900 }
5901
5902 set->rs = rs;
5903 set->n = 0;
5904 set->alloc = alloc;
5905
5906 for (i = 0; i < n; i++) {
5907 regex_t* reg = regs[i];
5908
5909 r = onig_regset_add(set, reg);
5910 if (r != 0) {
5911 for (i = 0; i < set->n; i++) {
5912 OnigRegion* region = set->rs[i].region;
5913 if (IS_NOT_NULL(region))
5914 onig_region_free(region, 1);
5915 }
5916 xfree(set->rs);
5917 xfree(set);
5918 return r;
5919 }
5920 }
5921
5922 *rset = set;
5923 return 0;
5924 }
5925
5926 static void
update_regset_by_reg(OnigRegSet * set,regex_t * reg)5927 update_regset_by_reg(OnigRegSet* set, regex_t* reg)
5928 {
5929 if (set->n == 1) {
5930 set->enc = reg->enc;
5931 set->anchor = reg->anchor;
5932 set->anc_dmin = reg->anc_dist_min;
5933 set->anc_dmax = reg->anc_dist_max;
5934 set->all_low_high =
5935 (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN) ? 0 : 1;
5936 set->anychar_inf = (reg->anchor & ANCR_ANYCHAR_INF) != 0 ? 1 : 0;
5937 }
5938 else {
5939 int anchor;
5940
5941 anchor = set->anchor & reg->anchor;
5942 if (anchor != 0) {
5943 OnigLen anc_dmin;
5944 OnigLen anc_dmax;
5945
5946 anc_dmin = set->anc_dmin;
5947 anc_dmax = set->anc_dmax;
5948 if (anc_dmin > reg->anc_dist_min) anc_dmin = reg->anc_dist_min;
5949 if (anc_dmax < reg->anc_dist_max) anc_dmax = reg->anc_dist_max;
5950 set->anc_dmin = anc_dmin;
5951 set->anc_dmax = anc_dmax;
5952 }
5953
5954 set->anchor = anchor;
5955
5956 if (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN)
5957 set->all_low_high = 0;
5958
5959 if ((reg->anchor & ANCR_ANYCHAR_INF) != 0)
5960 set->anychar_inf = 1;
5961 }
5962 }
5963
5964 extern int
onig_regset_add(OnigRegSet * set,regex_t * reg)5965 onig_regset_add(OnigRegSet* set, regex_t* reg)
5966 {
5967 OnigRegion* region;
5968
5969 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5970 if (OPTON_FIND_LONGEST(reg->options))
5971 return ONIGERR_INVALID_ARGUMENT;
5972 #endif
5973
5974 if (set->n != 0 && reg->enc != set->enc)
5975 return ONIGERR_INVALID_ARGUMENT;
5976
5977 if (set->n >= set->alloc) {
5978 RR* nrs;
5979 int new_alloc;
5980
5981 new_alloc = set->alloc * 2;
5982 nrs = (RR* )xrealloc(set->rs, sizeof(set->rs[0]) * new_alloc);
5983 CHECK_NULL_RETURN_MEMERR(nrs);
5984
5985 set->rs = nrs;
5986 set->alloc = new_alloc;
5987 }
5988
5989 region = onig_region_new();
5990 CHECK_NULL_RETURN_MEMERR(region);
5991
5992 set->rs[set->n].reg = reg;
5993 set->rs[set->n].region = region;
5994 set->n++;
5995
5996 update_regset_by_reg(set, reg);
5997 return 0;
5998 }
5999
6000 extern int
onig_regset_replace(OnigRegSet * set,int at,regex_t * reg)6001 onig_regset_replace(OnigRegSet* set, int at, regex_t* reg)
6002 {
6003 int i;
6004
6005 if (at < 0 || at >= set->n)
6006 return ONIGERR_INVALID_ARGUMENT;
6007
6008 if (IS_NULL(reg)) {
6009 onig_region_free(set->rs[at].region, 1);
6010 for (i = at; i < set->n - 1; i++) {
6011 set->rs[i].reg = set->rs[i+1].reg;
6012 set->rs[i].region = set->rs[i+1].region;
6013 }
6014 set->n--;
6015 }
6016 else {
6017 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
6018 if (OPTON_FIND_LONGEST(reg->options))
6019 return ONIGERR_INVALID_ARGUMENT;
6020 #endif
6021
6022 if (set->n > 1 && reg->enc != set->enc)
6023 return ONIGERR_INVALID_ARGUMENT;
6024
6025 set->rs[at].reg = reg;
6026 }
6027
6028 for (i = 0; i < set->n; i++)
6029 update_regset_by_reg(set, set->rs[i].reg);
6030
6031 return 0;
6032 }
6033
6034 extern void
onig_regset_free(OnigRegSet * set)6035 onig_regset_free(OnigRegSet* set)
6036 {
6037 int i;
6038
6039 for (i = 0; i < set->n; i++) {
6040 regex_t* reg;
6041 OnigRegion* region;
6042
6043 reg = set->rs[i].reg;
6044 region = set->rs[i].region;
6045 onig_free(reg);
6046 if (IS_NOT_NULL(region))
6047 onig_region_free(region, 1);
6048 }
6049
6050 xfree(set->rs);
6051 xfree(set);
6052 }
6053
6054 extern int
onig_regset_number_of_regex(OnigRegSet * set)6055 onig_regset_number_of_regex(OnigRegSet* set)
6056 {
6057 return set->n;
6058 }
6059
6060 extern regex_t*
onig_regset_get_regex(OnigRegSet * set,int at)6061 onig_regset_get_regex(OnigRegSet* set, int at)
6062 {
6063 if (at < 0 || at >= set->n)
6064 return (regex_t* )0;
6065
6066 return set->rs[at].reg;
6067 }
6068
6069 extern OnigRegion*
onig_regset_get_region(OnigRegSet * set,int at)6070 onig_regset_get_region(OnigRegSet* set, int at)
6071 {
6072 if (at < 0 || at >= set->n)
6073 return (OnigRegion* )0;
6074
6075 return set->rs[at].region;
6076 }
6077
6078 #endif /* USE_REGSET */
6079
6080
6081 #ifdef USE_DIRECT_THREADED_CODE
6082 extern int
onig_init_for_match_at(regex_t * reg)6083 onig_init_for_match_at(regex_t* reg)
6084 {
6085 return match_at(reg, (const UChar* )NULL, (const UChar* )NULL,
6086 (const UChar* )NULL, (const UChar* )NULL,
6087 (MatchArg* )NULL);
6088 }
6089 #endif
6090
6091
6092 /* for callout functions */
6093
6094 #ifdef USE_CALLOUT
6095
6096 extern OnigCalloutFunc
onig_get_progress_callout(void)6097 onig_get_progress_callout(void)
6098 {
6099 return DefaultProgressCallout;
6100 }
6101
6102 extern int
onig_set_progress_callout(OnigCalloutFunc f)6103 onig_set_progress_callout(OnigCalloutFunc f)
6104 {
6105 DefaultProgressCallout = f;
6106 return ONIG_NORMAL;
6107 }
6108
6109 extern OnigCalloutFunc
onig_get_retraction_callout(void)6110 onig_get_retraction_callout(void)
6111 {
6112 return DefaultRetractionCallout;
6113 }
6114
6115 extern int
onig_set_retraction_callout(OnigCalloutFunc f)6116 onig_set_retraction_callout(OnigCalloutFunc f)
6117 {
6118 DefaultRetractionCallout = f;
6119 return ONIG_NORMAL;
6120 }
6121
6122 extern int
onig_get_callout_num_by_callout_args(OnigCalloutArgs * args)6123 onig_get_callout_num_by_callout_args(OnigCalloutArgs* args)
6124 {
6125 return args->num;
6126 }
6127
6128 extern OnigCalloutIn
onig_get_callout_in_by_callout_args(OnigCalloutArgs * args)6129 onig_get_callout_in_by_callout_args(OnigCalloutArgs* args)
6130 {
6131 return args->in;
6132 }
6133
6134 extern int
onig_get_name_id_by_callout_args(OnigCalloutArgs * args)6135 onig_get_name_id_by_callout_args(OnigCalloutArgs* args)
6136 {
6137 return args->name_id;
6138 }
6139
6140 extern const UChar*
onig_get_contents_by_callout_args(OnigCalloutArgs * args)6141 onig_get_contents_by_callout_args(OnigCalloutArgs* args)
6142 {
6143 int num;
6144 CalloutListEntry* e;
6145
6146 num = args->num;
6147 e = onig_reg_callout_list_at(args->regex, num);
6148 if (IS_NULL(e)) return 0;
6149 if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
6150 return e->u.content.start;
6151 }
6152
6153 return 0;
6154 }
6155
6156 extern const UChar*
onig_get_contents_end_by_callout_args(OnigCalloutArgs * args)6157 onig_get_contents_end_by_callout_args(OnigCalloutArgs* args)
6158 {
6159 int num;
6160 CalloutListEntry* e;
6161
6162 num = args->num;
6163 e = onig_reg_callout_list_at(args->regex, num);
6164 if (IS_NULL(e)) return 0;
6165 if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
6166 return e->u.content.end;
6167 }
6168
6169 return 0;
6170 }
6171
6172 extern int
onig_get_args_num_by_callout_args(OnigCalloutArgs * args)6173 onig_get_args_num_by_callout_args(OnigCalloutArgs* args)
6174 {
6175 int num;
6176 CalloutListEntry* e;
6177
6178 num = args->num;
6179 e = onig_reg_callout_list_at(args->regex, num);
6180 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
6181 if (e->of == ONIG_CALLOUT_OF_NAME) {
6182 return e->u.arg.num;
6183 }
6184
6185 return ONIGERR_INVALID_ARGUMENT;
6186 }
6187
6188 extern int
onig_get_passed_args_num_by_callout_args(OnigCalloutArgs * args)6189 onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args)
6190 {
6191 int num;
6192 CalloutListEntry* e;
6193
6194 num = args->num;
6195 e = onig_reg_callout_list_at(args->regex, num);
6196 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
6197 if (e->of == ONIG_CALLOUT_OF_NAME) {
6198 return e->u.arg.passed_num;
6199 }
6200
6201 return ONIGERR_INVALID_ARGUMENT;
6202 }
6203
6204 extern int
onig_get_arg_by_callout_args(OnigCalloutArgs * args,int index,OnigType * type,OnigValue * val)6205 onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index,
6206 OnigType* type, OnigValue* val)
6207 {
6208 int num;
6209 CalloutListEntry* e;
6210
6211 num = args->num;
6212 e = onig_reg_callout_list_at(args->regex, num);
6213 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
6214 if (e->of == ONIG_CALLOUT_OF_NAME) {
6215 if (IS_NOT_NULL(type)) *type = e->u.arg.types[index];
6216 if (IS_NOT_NULL(val)) *val = e->u.arg.vals[index];
6217 return ONIG_NORMAL;
6218 }
6219
6220 return ONIGERR_INVALID_ARGUMENT;
6221 }
6222
6223 extern const UChar*
onig_get_string_by_callout_args(OnigCalloutArgs * args)6224 onig_get_string_by_callout_args(OnigCalloutArgs* args)
6225 {
6226 return args->string;
6227 }
6228
6229 extern const UChar*
onig_get_string_end_by_callout_args(OnigCalloutArgs * args)6230 onig_get_string_end_by_callout_args(OnigCalloutArgs* args)
6231 {
6232 return args->string_end;
6233 }
6234
6235 extern const UChar*
onig_get_start_by_callout_args(OnigCalloutArgs * args)6236 onig_get_start_by_callout_args(OnigCalloutArgs* args)
6237 {
6238 return args->start;
6239 }
6240
6241 extern const UChar*
onig_get_right_range_by_callout_args(OnigCalloutArgs * args)6242 onig_get_right_range_by_callout_args(OnigCalloutArgs* args)
6243 {
6244 return args->right_range;
6245 }
6246
6247 extern const UChar*
onig_get_current_by_callout_args(OnigCalloutArgs * args)6248 onig_get_current_by_callout_args(OnigCalloutArgs* args)
6249 {
6250 return args->current;
6251 }
6252
6253 extern OnigRegex
onig_get_regex_by_callout_args(OnigCalloutArgs * args)6254 onig_get_regex_by_callout_args(OnigCalloutArgs* args)
6255 {
6256 return args->regex;
6257 }
6258
6259 extern unsigned long
onig_get_retry_counter_by_callout_args(OnigCalloutArgs * args)6260 onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args)
6261 {
6262 return args->retry_in_match_counter;
6263 }
6264
6265
6266 extern int
onig_get_capture_range_in_callout(OnigCalloutArgs * a,int mem_num,int * begin,int * end)6267 onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end)
6268 {
6269 OnigRegex reg;
6270 const UChar* str;
6271 StackType* stk_base;
6272 int i;
6273 StkPtrType* mem_start_stk;
6274 StkPtrType* mem_end_stk;
6275
6276 i = mem_num;
6277 reg = a->regex;
6278 str = a->string;
6279 stk_base = a->stk_base;
6280 mem_start_stk = a->mem_start_stk;
6281 mem_end_stk = a->mem_end_stk;
6282
6283 if (i > 0) {
6284 if (a->mem_end_stk[i].i != INVALID_STACK_INDEX) {
6285 *begin = (int )(STACK_MEM_START(reg, i) - str);
6286 *end = (int )(STACK_MEM_END(reg, i) - str);
6287 }
6288 else {
6289 *begin = *end = ONIG_REGION_NOTPOS;
6290 }
6291 }
6292 else
6293 return ONIGERR_INVALID_ARGUMENT;
6294
6295 return ONIG_NORMAL;
6296 }
6297
6298 extern int
onig_get_used_stack_size_in_callout(OnigCalloutArgs * a,int * used_num,int * used_bytes)6299 onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes)
6300 {
6301 int n;
6302
6303 n = (int )(a->stk - a->stk_base);
6304
6305 if (used_num != 0)
6306 *used_num = n;
6307
6308 if (used_bytes != 0)
6309 *used_bytes = n * sizeof(StackType);
6310
6311 return ONIG_NORMAL;
6312 }
6313
6314
6315 /* builtin callout functions */
6316
6317 extern int
onig_builtin_fail(OnigCalloutArgs * args ARG_UNUSED,void * user_data ARG_UNUSED)6318 onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
6319 {
6320 return ONIG_CALLOUT_FAIL;
6321 }
6322
6323 extern int
onig_builtin_mismatch(OnigCalloutArgs * args ARG_UNUSED,void * user_data ARG_UNUSED)6324 onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
6325 {
6326 return ONIG_MISMATCH;
6327 }
6328
6329 extern int
onig_builtin_error(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6330 onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6331 {
6332 int r;
6333 int n;
6334 OnigValue val;
6335
6336 r = onig_get_arg_by_callout_args(args, 0, 0, &val);
6337 if (r != ONIG_NORMAL) return r;
6338
6339 n = (int )val.l;
6340 if (n >= 0) {
6341 n = ONIGERR_INVALID_CALLOUT_BODY;
6342 }
6343 else if (onig_is_error_code_needs_param(n)) {
6344 n = ONIGERR_INVALID_CALLOUT_BODY;
6345 }
6346
6347 return n;
6348 }
6349
6350 extern int
onig_builtin_count(OnigCalloutArgs * args,void * user_data)6351 onig_builtin_count(OnigCalloutArgs* args, void* user_data)
6352 {
6353 (void )onig_check_callout_data_and_clear_old_values(args);
6354
6355 return onig_builtin_total_count(args, user_data);
6356 }
6357
6358 extern int
onig_builtin_total_count(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6359 onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6360 {
6361 int r;
6362 int slot;
6363 OnigType type;
6364 OnigValue val;
6365 OnigValue aval;
6366 OnigCodePoint count_type;
6367
6368 r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
6369 if (r != ONIG_NORMAL) return r;
6370
6371 count_type = aval.c;
6372 if (count_type != '>' && count_type != 'X' && count_type != '<')
6373 return ONIGERR_INVALID_CALLOUT_ARG;
6374
6375 r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0,
6376 &type, &val);
6377 if (r < ONIG_NORMAL)
6378 return r;
6379 else if (r > ONIG_NORMAL) {
6380 /* type == void: initial state */
6381 val.l = 0;
6382 }
6383
6384 if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
6385 slot = 2;
6386 if (count_type == '<')
6387 val.l++;
6388 else if (count_type == 'X')
6389 val.l--;
6390 }
6391 else {
6392 slot = 1;
6393 if (count_type != '<')
6394 val.l++;
6395 }
6396
6397 r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val);
6398 if (r != ONIG_NORMAL) return r;
6399
6400 /* slot 1: in progress counter, slot 2: in retraction counter */
6401 r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot,
6402 &type, &val);
6403 if (r < ONIG_NORMAL)
6404 return r;
6405 else if (r > ONIG_NORMAL) {
6406 val.l = 0;
6407 }
6408
6409 val.l++;
6410 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6411 if (r != ONIG_NORMAL) return r;
6412
6413 return ONIG_CALLOUT_SUCCESS;
6414 }
6415
6416 extern int
onig_builtin_max(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6417 onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6418 {
6419 int r;
6420 int slot;
6421 long max_val;
6422 OnigCodePoint count_type;
6423 OnigType type;
6424 OnigValue val;
6425 OnigValue aval;
6426
6427 (void )onig_check_callout_data_and_clear_old_values(args);
6428
6429 slot = 0;
6430 r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
6431 if (r < ONIG_NORMAL)
6432 return r;
6433 else if (r > ONIG_NORMAL) {
6434 /* type == void: initial state */
6435 type = ONIG_TYPE_LONG;
6436 val.l = 0;
6437 }
6438
6439 r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
6440 if (r != ONIG_NORMAL) return r;
6441 if (type == ONIG_TYPE_TAG) {
6442 r = onig_get_callout_data_by_callout_args(args, aval.tag, 0, &type, &aval);
6443 if (r < ONIG_NORMAL) return r;
6444 else if (r > ONIG_NORMAL)
6445 max_val = 0L;
6446 else
6447 max_val = aval.l;
6448 }
6449 else { /* LONG */
6450 max_val = aval.l;
6451 }
6452
6453 r = onig_get_arg_by_callout_args(args, 1, &type, &aval);
6454 if (r != ONIG_NORMAL) return r;
6455
6456 count_type = aval.c;
6457 if (count_type != '>' && count_type != 'X' && count_type != '<')
6458 return ONIGERR_INVALID_CALLOUT_ARG;
6459
6460 if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
6461 if (count_type == '<') {
6462 if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
6463 val.l++;
6464 }
6465 else if (count_type == 'X')
6466 val.l--;
6467 }
6468 else {
6469 if (count_type != '<') {
6470 if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
6471 val.l++;
6472 }
6473 }
6474
6475 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6476 if (r != ONIG_NORMAL) return r;
6477
6478 return ONIG_CALLOUT_SUCCESS;
6479 }
6480
6481 enum OP_CMP {
6482 OP_EQ,
6483 OP_NE,
6484 OP_LT,
6485 OP_GT,
6486 OP_LE,
6487 OP_GE
6488 };
6489
6490 extern int
onig_builtin_cmp(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6491 onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6492 {
6493 int r;
6494 int slot;
6495 long lv;
6496 long rv;
6497 OnigType type;
6498 OnigValue val;
6499 regex_t* reg;
6500 enum OP_CMP op;
6501
6502 reg = args->regex;
6503
6504 r = onig_get_arg_by_callout_args(args, 0, &type, &val);
6505 if (r != ONIG_NORMAL) return r;
6506
6507 if (type == ONIG_TYPE_TAG) {
6508 r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
6509 if (r < ONIG_NORMAL) return r;
6510 else if (r > ONIG_NORMAL)
6511 lv = 0L;
6512 else
6513 lv = val.l;
6514 }
6515 else { /* ONIG_TYPE_LONG */
6516 lv = val.l;
6517 }
6518
6519 r = onig_get_arg_by_callout_args(args, 2, &type, &val);
6520 if (r != ONIG_NORMAL) return r;
6521
6522 if (type == ONIG_TYPE_TAG) {
6523 r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
6524 if (r < ONIG_NORMAL) return r;
6525 else if (r > ONIG_NORMAL)
6526 rv = 0L;
6527 else
6528 rv = val.l;
6529 }
6530 else { /* ONIG_TYPE_LONG */
6531 rv = val.l;
6532 }
6533
6534 slot = 0;
6535 r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
6536 if (r < ONIG_NORMAL)
6537 return r;
6538 else if (r > ONIG_NORMAL) {
6539 /* type == void: initial state */
6540 OnigCodePoint c1, c2;
6541 UChar* p;
6542
6543 r = onig_get_arg_by_callout_args(args, 1, &type, &val);
6544 if (r != ONIG_NORMAL) return r;
6545
6546 p = val.s.start;
6547 c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
6548 p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
6549 if (p < val.s.end) {
6550 c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
6551 p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
6552 if (p != val.s.end) return ONIGERR_INVALID_CALLOUT_ARG;
6553 }
6554 else
6555 c2 = 0;
6556
6557 switch (c1) {
6558 case '=':
6559 if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
6560 op = OP_EQ;
6561 break;
6562 case '!':
6563 if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
6564 op = OP_NE;
6565 break;
6566 case '<':
6567 if (c2 == '=') op = OP_LE;
6568 else if (c2 == 0) op = OP_LT;
6569 else return ONIGERR_INVALID_CALLOUT_ARG;
6570 break;
6571 case '>':
6572 if (c2 == '=') op = OP_GE;
6573 else if (c2 == 0) op = OP_GT;
6574 else return ONIGERR_INVALID_CALLOUT_ARG;
6575 break;
6576 default:
6577 return ONIGERR_INVALID_CALLOUT_ARG;
6578 break;
6579 }
6580 val.l = (long )op;
6581 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6582 if (r != ONIG_NORMAL) return r;
6583 }
6584 else {
6585 op = (enum OP_CMP )val.l;
6586 }
6587
6588 switch (op) {
6589 case OP_EQ: r = (lv == rv); break;
6590 case OP_NE: r = (lv != rv); break;
6591 case OP_LT: r = (lv < rv); break;
6592 case OP_GT: r = (lv > rv); break;
6593 case OP_LE: r = (lv <= rv); break;
6594 case OP_GE: r = (lv >= rv); break;
6595 }
6596
6597 return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS;
6598 }
6599
6600
6601 #ifndef ONIG_NO_PRINT
6602
6603 static FILE* OutFp;
6604
6605 /* name start with "onig_" for macros. */
6606 static int
onig_builtin_monitor(OnigCalloutArgs * args,void * user_data)6607 onig_builtin_monitor(OnigCalloutArgs* args, void* user_data)
6608 {
6609 int r;
6610 int num;
6611 size_t tag_len;
6612 const UChar* start;
6613 const UChar* right;
6614 const UChar* current;
6615 const UChar* string;
6616 const UChar* strend;
6617 const UChar* tag_start;
6618 const UChar* tag_end;
6619 regex_t* reg;
6620 OnigCalloutIn in;
6621 OnigType type;
6622 OnigValue val;
6623 char buf[20];
6624 FILE* fp;
6625
6626 fp = OutFp;
6627
6628 r = onig_get_arg_by_callout_args(args, 0, &type, &val);
6629 if (r != ONIG_NORMAL) return r;
6630
6631 in = onig_get_callout_in_by_callout_args(args);
6632 if (in == ONIG_CALLOUT_IN_PROGRESS) {
6633 if (val.c == '<')
6634 return ONIG_CALLOUT_SUCCESS;
6635 }
6636 else {
6637 if (val.c != 'X' && val.c != '<')
6638 return ONIG_CALLOUT_SUCCESS;
6639 }
6640
6641 num = onig_get_callout_num_by_callout_args(args);
6642 start = onig_get_start_by_callout_args(args);
6643 right = onig_get_right_range_by_callout_args(args);
6644 current = onig_get_current_by_callout_args(args);
6645 string = onig_get_string_by_callout_args(args);
6646 strend = onig_get_string_end_by_callout_args(args);
6647 reg = onig_get_regex_by_callout_args(args);
6648 tag_start = onig_get_callout_tag_start(reg, num);
6649 tag_end = onig_get_callout_tag_end(reg, num);
6650
6651 if (tag_start == 0)
6652 xsnprintf(buf, sizeof(buf), "#%d", num);
6653 else {
6654 /* CAUTION: tag string is not terminated with NULL. */
6655 int i;
6656
6657 tag_len = tag_end - tag_start;
6658 if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1;
6659 for (i = 0; i < (int )tag_len; i++) buf[i] = tag_start[i];
6660 buf[tag_len] = '\0';
6661 }
6662
6663 fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n",
6664 buf,
6665 in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=",
6666 (int )(current - string),
6667 (int )(start - string),
6668 (int )(right - string),
6669 (int )(strend - string));
6670 fflush(fp);
6671
6672 return ONIG_CALLOUT_SUCCESS;
6673 }
6674
6675 extern int
onig_setup_builtin_monitors_by_ascii_encoded_name(void * fp)6676 onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */)
6677 {
6678 int id;
6679 char* name;
6680 OnigEncoding enc;
6681 unsigned int ts[4];
6682 OnigValue opts[4];
6683
6684 if (IS_NOT_NULL(fp))
6685 OutFp = (FILE* )fp;
6686 else
6687 OutFp = stdout;
6688
6689 enc = ONIG_ENCODING_ASCII;
6690
6691 name = "MON";
6692 ts[0] = ONIG_TYPE_CHAR;
6693 opts[0].c = '>';
6694 BC_B_O(name, monitor, 1, ts, 1, opts);
6695
6696 return ONIG_NORMAL;
6697 }
6698
6699 #endif /* ONIG_NO_PRINT */
6700
6701 #endif /* USE_CALLOUT */
6702