1 /**********************************************************************
2   regexec.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2020  K.Kosako
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #ifndef ONIG_NO_PRINT
31 #ifndef NEED_TO_INCLUDE_STDIO
32 #define NEED_TO_INCLUDE_STDIO
33 #endif
34 #endif
35 
36 #include "regint.h"
37 
38 #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
39   ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
40 
41 #ifdef USE_CRNL_AS_LINE_TERMINATOR
42 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
43   (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
44    ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
45 #endif
46 
47 #define CHECK_INTERRUPT_IN_MATCH
48 
49 #define STACK_MEM_START(reg, idx) \
50   (MEM_STATUS_AT((reg)->push_mem_start, (idx)) != 0 ? \
51    STACK_AT(mem_start_stk[idx].i)->u.mem.pstr : mem_start_stk[idx].s)
52 
53 #define STACK_MEM_END(reg, idx) \
54   (MEM_STATUS_AT((reg)->push_mem_end, (idx)) != 0 ? \
55    STACK_AT(mem_end_stk[idx].i)->u.mem.pstr : mem_end_stk[idx].s)
56 
57 static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high);
58 
59 static int
60 search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, /* match range */ const UChar* data_range, /* subject string range */ OnigRegion* region, OnigOptionType option, OnigMatchParam* mp);
61 
62 
63 #ifdef USE_CALLOUT
64 typedef struct {
65   int last_match_at_call_counter;
66   struct {
67     OnigType  type;
68     OnigValue val;
69   } slot[ONIG_CALLOUT_DATA_SLOT_NUM];
70 } CalloutData;
71 #endif
72 
73 struct OnigMatchParamStruct {
74   unsigned int    match_stack_limit;
75 #ifdef USE_RETRY_LIMIT
76   unsigned long   retry_limit_in_match;
77   unsigned long   retry_limit_in_search;
78 #endif
79 #ifdef USE_CALLOUT
80   OnigCalloutFunc progress_callout_of_contents;
81   OnigCalloutFunc retraction_callout_of_contents;
82   int             match_at_call_counter;
83   void*           callout_user_data;
84   CalloutData*    callout_data;
85   int             callout_data_alloc_num;
86 #endif
87 };
88 
89 extern int
onig_set_match_stack_limit_size_of_match_param(OnigMatchParam * param,unsigned int limit)90 onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param,
91                                                unsigned int limit)
92 {
93   param->match_stack_limit = limit;
94   return ONIG_NORMAL;
95 }
96 
97 extern int
onig_set_retry_limit_in_match_of_match_param(OnigMatchParam * param,unsigned long limit)98 onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param,
99                                              unsigned long limit)
100 {
101 #ifdef USE_RETRY_LIMIT
102   param->retry_limit_in_match = limit;
103   return ONIG_NORMAL;
104 #else
105   return ONIG_NO_SUPPORT_CONFIG;
106 #endif
107 }
108 
109 extern int
onig_set_retry_limit_in_search_of_match_param(OnigMatchParam * param,unsigned long limit)110 onig_set_retry_limit_in_search_of_match_param(OnigMatchParam* param,
111                                               unsigned long limit)
112 {
113 #ifdef USE_RETRY_LIMIT
114   param->retry_limit_in_search = limit;
115   return ONIG_NORMAL;
116 #else
117   return ONIG_NO_SUPPORT_CONFIG;
118 #endif
119 }
120 
121 extern int
onig_set_progress_callout_of_match_param(OnigMatchParam * param,OnigCalloutFunc f)122 onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
123 {
124 #ifdef USE_CALLOUT
125   param->progress_callout_of_contents = f;
126   return ONIG_NORMAL;
127 #else
128   return ONIG_NO_SUPPORT_CONFIG;
129 #endif
130 }
131 
132 extern int
onig_set_retraction_callout_of_match_param(OnigMatchParam * param,OnigCalloutFunc f)133 onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
134 {
135 #ifdef USE_CALLOUT
136   param->retraction_callout_of_contents = f;
137   return ONIG_NORMAL;
138 #else
139   return ONIG_NO_SUPPORT_CONFIG;
140 #endif
141 }
142 
143 extern int
onig_set_callout_user_data_of_match_param(OnigMatchParam * param,void * user_data)144 onig_set_callout_user_data_of_match_param(OnigMatchParam* param, void* user_data)
145 {
146 #ifdef USE_CALLOUT
147   param->callout_user_data = user_data;
148   return ONIG_NORMAL;
149 #else
150   return ONIG_NO_SUPPORT_CONFIG;
151 #endif
152 }
153 
154 
155 typedef struct {
156   void* stack_p;
157   int   stack_n;
158   OnigOptionType options;
159   OnigRegion*    region;
160   int            ptr_num;
161   const UChar*   start;   /* search start position (for \G: BEGIN_POSITION) */
162   unsigned int   match_stack_limit;
163 #ifdef USE_RETRY_LIMIT
164   unsigned long  retry_limit_in_match;
165   unsigned long  retry_limit_in_search;
166   unsigned long  retry_limit_in_search_counter;
167 #endif
168   OnigMatchParam* mp;
169 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
170   int    best_len;      /* for ONIG_OPTION_FIND_LONGEST */
171   UChar* best_s;
172 #endif
173 #ifdef USE_CALL
174   unsigned long  subexp_call_in_search_counter;
175 #endif
176 } MatchArg;
177 
178 
179 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
180 
181 /* arguments type */
182 typedef enum {
183   ARG_SPECIAL = -1,
184   ARG_NON     =  0,
185   ARG_RELADDR =  1,
186   ARG_ABSADDR =  2,
187   ARG_LENGTH  =  3,
188   ARG_MEMNUM  =  4,
189   ARG_OPTION  =  5,
190   ARG_MODE    =  6
191 } OpArgType;
192 
193 typedef struct {
194   short int opcode;
195   char*     name;
196 } OpInfoType;
197 
198 static OpInfoType OpInfo[] = {
199   { OP_FINISH,         "finish"},
200   { OP_END,            "end"},
201   { OP_STR_1,          "str_1"},
202   { OP_STR_2,          "str_2"},
203   { OP_STR_3,          "str_3"},
204   { OP_STR_4,          "str_4"},
205   { OP_STR_5,          "str_5"},
206   { OP_STR_N,          "str_n"},
207   { OP_STR_MB2N1,      "str_mb2-n1"},
208   { OP_STR_MB2N2,      "str_mb2-n2"},
209   { OP_STR_MB2N3,      "str_mb2-n3"},
210   { OP_STR_MB2N,       "str_mb2-n"},
211   { OP_STR_MB3N,       "str_mb3n"},
212   { OP_STR_MBN,        "str_mbn"},
213   { OP_CCLASS,         "cclass"},
214   { OP_CCLASS_MB,      "cclass-mb"},
215   { OP_CCLASS_MIX,     "cclass-mix"},
216   { OP_CCLASS_NOT,     "cclass-not"},
217   { OP_CCLASS_MB_NOT,  "cclass-mb-not"},
218   { OP_CCLASS_MIX_NOT, "cclass-mix-not"},
219   { OP_ANYCHAR,               "anychar"},
220   { OP_ANYCHAR_ML,            "anychar-ml"},
221   { OP_ANYCHAR_STAR,          "anychar*"},
222   { OP_ANYCHAR_ML_STAR,       "anychar-ml*"},
223   { OP_ANYCHAR_STAR_PEEK_NEXT,    "anychar*-peek-next"},
224   { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next"},
225   { OP_WORD,                  "word"},
226   { OP_WORD_ASCII,            "word-ascii"},
227   { OP_NO_WORD,               "not-word"},
228   { OP_NO_WORD_ASCII,         "not-word-ascii"},
229   { OP_WORD_BOUNDARY,         "word-boundary"},
230   { OP_NO_WORD_BOUNDARY,      "not-word-boundary"},
231   { OP_WORD_BEGIN,            "word-begin"},
232   { OP_WORD_END,              "word-end"},
233   { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary"},
234   { OP_BEGIN_BUF,             "begin-buf"},
235   { OP_END_BUF,               "end-buf"},
236   { OP_BEGIN_LINE,            "begin-line"},
237   { OP_END_LINE,              "end-line"},
238   { OP_SEMI_END_BUF,          "semi-end-buf"},
239   { OP_CHECK_POSITION,        "check-position"},
240   { OP_BACKREF1,              "backref1"},
241   { OP_BACKREF2,              "backref2"},
242   { OP_BACKREF_N,             "backref-n"},
243   { OP_BACKREF_N_IC,          "backref-n-ic"},
244   { OP_BACKREF_MULTI,         "backref_multi"},
245   { OP_BACKREF_MULTI_IC,      "backref_multi-ic"},
246   { OP_BACKREF_WITH_LEVEL,    "backref_with_level"},
247   { OP_BACKREF_WITH_LEVEL_IC, "backref_with_level-c"},
248   { OP_BACKREF_CHECK,         "backref_check"},
249   { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level"},
250   { OP_MEM_START_PUSH,        "mem-start-push"},
251   { OP_MEM_START,             "mem-start"},
252   { OP_MEM_END_PUSH,          "mem-end-push"},
253 #ifdef USE_CALL
254   { OP_MEM_END_PUSH_REC,      "mem-end-push-rec"},
255 #endif
256   { OP_MEM_END,               "mem-end"},
257 #ifdef USE_CALL
258   { OP_MEM_END_REC,           "mem-end-rec"},
259 #endif
260   { OP_FAIL,                  "fail"},
261   { OP_JUMP,                  "jump"},
262   { OP_PUSH,                  "push"},
263   { OP_PUSH_SUPER,            "push-super"},
264   { OP_POP,                   "pop"},
265   { OP_POP_TO_MARK,           "pop-to-mark"},
266 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
267   { OP_PUSH_OR_JUMP_EXACT1,   "push-or-jump-e1"},
268 #endif
269   { OP_PUSH_IF_PEEK_NEXT,     "push-if-peek-next"},
270   { OP_REPEAT,                "repeat"},
271   { OP_REPEAT_NG,             "repeat-ng"},
272   { OP_REPEAT_INC,            "repeat-inc"},
273   { OP_REPEAT_INC_NG,         "repeat-inc-ng"},
274   { OP_EMPTY_CHECK_START,     "empty-check-start"},
275   { OP_EMPTY_CHECK_END,       "empty-check-end"},
276   { OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst"},
277 #ifdef USE_CALL
278   { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push"},
279 #endif
280   { OP_MOVE,                  "move"},
281   { OP_STEP_BACK_START,       "step-back-start"},
282   { OP_STEP_BACK_NEXT,        "step-back-next"},
283   { OP_CUT_TO_MARK,           "cut-to-mark"},
284   { OP_MARK,                  "mark"},
285   { OP_SAVE_VAL,              "save-val"},
286   { OP_UPDATE_VAR,            "update-var"},
287 #ifdef USE_CALL
288   { OP_CALL,                  "call"},
289   { OP_RETURN,                "return"},
290 #endif
291 #ifdef USE_CALLOUT
292   { OP_CALLOUT_CONTENTS,      "callout-contents"},
293   { OP_CALLOUT_NAME,          "callout-name"},
294 #endif
295   { -1, ""}
296 };
297 
298 static char*
op2name(int opcode)299 op2name(int opcode)
300 {
301   int i;
302 
303   for (i = 0; OpInfo[i].opcode >= 0; i++) {
304     if (opcode == OpInfo[i].opcode) return OpInfo[i].name;
305   }
306 
307   return "";
308 }
309 
310 static void
p_after_op(FILE * f)311 p_after_op(FILE* f)
312 {
313   fputs("  ", f);
314 }
315 
316 static void
p_string(FILE * f,int len,UChar * s)317 p_string(FILE* f, int len, UChar* s)
318 {
319   while (len-- > 0) { fputc(*s++, f); }
320 }
321 
322 static void
p_len_string(FILE * f,LengthType len,int mb_len,UChar * s)323 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
324 {
325   int x = len * mb_len;
326 
327   fprintf(f, "len:%d ", len);
328   while (x-- > 0) { fputc(*s++, f); }
329 }
330 
331 static void
p_rel_addr(FILE * f,RelAddrType rel_addr,Operation * p,Operation * start)332 p_rel_addr(FILE* f, RelAddrType rel_addr, Operation* p, Operation* start)
333 {
334   char* flag;
335   char* space1;
336   char* space2;
337   RelAddrType curr;
338   AbsAddrType abs_addr;
339 
340   curr = (RelAddrType )(p - start);
341   abs_addr = curr + rel_addr;
342 
343   flag   = rel_addr <  0 ? ""  : "+";
344   space1 = rel_addr < 10 ? " " : "";
345   space2 = abs_addr < 10 ? " " : "";
346 
347   fprintf(f, "%s%s%d => %s%d", space1, flag, rel_addr, space2, abs_addr);
348 }
349 
350 static int
bitset_on_num(BitSetRef bs)351 bitset_on_num(BitSetRef bs)
352 {
353   int i, n;
354 
355   n = 0;
356   for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
357     if (BITSET_AT(bs, i)) n++;
358   }
359 
360   return n;
361 }
362 
363 
364 #ifdef USE_DIRECT_THREADED_CODE
365 #define GET_OPCODE(reg,index)  (reg)->ocs[index]
366 #else
367 #define GET_OPCODE(reg,index)  (reg)->ops[index].opcode
368 #endif
369 
370 static void
print_compiled_byte_code(FILE * f,regex_t * reg,int index,Operation * start,OnigEncoding enc)371 print_compiled_byte_code(FILE* f, regex_t* reg, int index,
372                          Operation* start, OnigEncoding enc)
373 {
374   static char* SaveTypeNames[] = {
375     "KEEP",
376     "S",
377     "RIGHT_RANGE"
378   };
379 
380   static char* UpdateVarTypeNames[] = {
381     "KEEP_FROM_STACK_LAST",
382     "S_FROM_STACK",
383     "RIGHT_RANGE_FROM_STACK",
384     "RIGHT_RANGE_FROM_S_STACK",
385     "RIGHT_RANGE_TO_S",
386     "RIGHT_RANGE_INIT"
387   };
388 
389   int i, n;
390   RelAddrType addr;
391   LengthType  len;
392   MemNumType  mem;
393   OnigCodePoint code;
394   ModeType mode;
395   UChar *q;
396   Operation* p;
397   enum OpCode opcode;
398 
399   p = reg->ops + index;
400 
401   opcode = GET_OPCODE(reg, index);
402 
403   fprintf(f, "%s", op2name(opcode));
404   p_after_op(f);
405 
406   switch (opcode) {
407   case OP_STR_1:
408     p_string(f, 1, p->exact.s); break;
409   case OP_STR_2:
410     p_string(f, 2, p->exact.s); break;
411   case OP_STR_3:
412     p_string(f, 3, p->exact.s); break;
413   case OP_STR_4:
414     p_string(f, 4, p->exact.s); break;
415   case OP_STR_5:
416     p_string(f, 5, p->exact.s); break;
417   case OP_STR_N:
418     len = p->exact_n.n;
419     p_string(f, len, p->exact_n.s); break;
420   case OP_STR_MB2N1:
421     p_string(f, 2, p->exact.s); break;
422   case OP_STR_MB2N2:
423     p_string(f, 4, p->exact.s); break;
424   case OP_STR_MB2N3:
425     p_string(f, 3, p->exact.s); break;
426   case OP_STR_MB2N:
427     len = p->exact_n.n;
428     p_len_string(f, len, 2, p->exact_n.s); break;
429   case OP_STR_MB3N:
430     len = p->exact_n.n;
431     p_len_string(f, len, 3, p->exact_n.s); break;
432   case OP_STR_MBN:
433     {
434       int mb_len;
435 
436       mb_len = p->exact_len_n.len;
437       len    = p->exact_len_n.n;
438       q      = p->exact_len_n.s;
439       fprintf(f, "mblen:%d len:%d ", mb_len, len);
440       n = len * mb_len;
441       while (n-- > 0) { fputc(*q++, f); }
442     }
443     break;
444 
445   case OP_CCLASS:
446   case OP_CCLASS_NOT:
447     n = bitset_on_num(p->cclass.bsp);
448     fprintf(f, "n:%d", n);
449     break;
450   case OP_CCLASS_MB:
451   case OP_CCLASS_MB_NOT:
452     {
453       OnigCodePoint ncode;
454       OnigCodePoint* codes;
455 
456       codes = (OnigCodePoint* )p->cclass_mb.mb;
457       GET_CODE_POINT(ncode, codes);
458       codes++;
459       GET_CODE_POINT(code, codes);
460       fprintf(f, "n:%d code:0x%x", ncode, code);
461     }
462     break;
463   case OP_CCLASS_MIX:
464   case OP_CCLASS_MIX_NOT:
465     {
466       OnigCodePoint ncode;
467       OnigCodePoint* codes;
468 
469       codes = (OnigCodePoint* )p->cclass_mix.mb;
470       n = bitset_on_num(p->cclass_mix.bsp);
471 
472       GET_CODE_POINT(ncode, codes);
473       codes++;
474       GET_CODE_POINT(code, codes);
475       fprintf(f, "nsg:%d code:%u nmb:%u", n, code, ncode);
476     }
477     break;
478 
479   case OP_ANYCHAR_STAR_PEEK_NEXT:
480   case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
481     p_string(f, 1, &(p->anychar_star_peek_next.c));
482     break;
483 
484   case OP_WORD_BOUNDARY:
485   case OP_NO_WORD_BOUNDARY:
486   case OP_WORD_BEGIN:
487   case OP_WORD_END:
488     mode = p->word_boundary.mode;
489     fprintf(f, "mode:%d", mode);
490     break;
491 
492   case OP_BACKREF_N:
493   case OP_BACKREF_N_IC:
494     mem = p->backref_n.n1;
495     fprintf(f, "n:%d", mem);
496     break;
497   case OP_BACKREF_MULTI_IC:
498   case OP_BACKREF_MULTI:
499   case OP_BACKREF_CHECK:
500     n = p->backref_general.num;
501     fprintf(f, "n:%d ", n);
502     for (i = 0; i < n; i++) {
503       mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
504       if (i > 0) fputs(", ", f);
505       fprintf(f, "%d", mem);
506     }
507     break;
508   case OP_BACKREF_WITH_LEVEL:
509   case OP_BACKREF_WITH_LEVEL_IC:
510   case OP_BACKREF_CHECK_WITH_LEVEL:
511     {
512       LengthType level;
513 
514       level = p->backref_general.nest_level;
515       fprintf(f, "level:%d ", level);
516       n = p->backref_general.num;
517       for (i = 0; i < n; i++) {
518         mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
519         if (i > 0) fputs(", ", f);
520         fprintf(f, "%d", mem);
521       }
522     }
523     break;
524 
525   case OP_MEM_START:
526   case OP_MEM_START_PUSH:
527     mem = p->memory_start.num;
528     fprintf(f, "mem:%d", mem);
529     break;
530 
531   case OP_MEM_END:
532   case OP_MEM_END_PUSH:
533 #ifdef USE_CALL
534   case OP_MEM_END_REC:
535   case OP_MEM_END_PUSH_REC:
536 #endif
537     mem = p->memory_end.num;
538     fprintf(f, "mem:%d", mem);
539     break;
540 
541   case OP_JUMP:
542     addr = p->jump.addr;
543     p_rel_addr(f, addr, p, start);
544     break;
545 
546   case OP_PUSH:
547   case OP_PUSH_SUPER:
548     addr = p->push.addr;
549     p_rel_addr(f, addr, p, start);
550     break;
551 
552 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
553   case OP_PUSH_OR_JUMP_EXACT1:
554     addr = p->push_or_jump_exact1.addr;
555     p_rel_addr(f, addr, p, start);
556     fprintf(f, " c:");
557     p_string(f, 1, &(p->push_or_jump_exact1.c));
558     break;
559 #endif
560 
561   case OP_PUSH_IF_PEEK_NEXT:
562     addr = p->push_if_peek_next.addr;
563     p_rel_addr(f, addr, p, start);
564     fprintf(f, " c:");
565     p_string(f, 1, &(p->push_if_peek_next.c));
566     break;
567 
568   case OP_REPEAT:
569   case OP_REPEAT_NG:
570     mem = p->repeat.id;
571     addr = p->repeat.addr;
572     fprintf(f, "id:%d ", mem);
573     p_rel_addr(f, addr, p, start);
574     break;
575 
576   case OP_REPEAT_INC:
577   case OP_REPEAT_INC_NG:
578     mem = p->repeat.id;
579     fprintf(f, "id:%d", mem);
580     break;
581 
582   case OP_EMPTY_CHECK_START:
583     mem = p->empty_check_start.mem;
584     fprintf(f, "id:%d", mem);
585     break;
586   case OP_EMPTY_CHECK_END:
587   case OP_EMPTY_CHECK_END_MEMST:
588 #ifdef USE_CALL
589   case OP_EMPTY_CHECK_END_MEMST_PUSH:
590 #endif
591     mem = p->empty_check_end.mem;
592     fprintf(f, "id:%d", mem);
593     break;
594 
595 #ifdef USE_CALL
596   case OP_CALL:
597     addr = p->call.addr;
598     fprintf(f, "=> %d", addr);
599     break;
600 #endif
601 
602   case OP_MOVE:
603     fprintf(f, "n:%d", p->move.n);
604     break;
605 
606   case OP_STEP_BACK_START:
607     addr = p->step_back_start.addr;
608     fprintf(f, "init:%d rem:%d ",
609             p->step_back_start.initial,
610             p->step_back_start.remaining);
611     p_rel_addr(f, addr, p, start);
612     break;
613 
614   case OP_POP_TO_MARK:
615     mem = p->pop_to_mark.id;
616     fprintf(f, "id:%d", mem);
617     break;
618 
619   case OP_CUT_TO_MARK:
620     {
621       int restore;
622 
623       mem     = p->cut_to_mark.id;
624       restore = p->cut_to_mark.restore_pos;
625       fprintf(f, "id:%d restore:%d", mem, restore);
626     }
627     break;
628 
629   case OP_MARK:
630     {
631       int save;
632 
633       mem  = p->mark.id;
634       save = p->mark.save_pos;
635       fprintf(f, "id:%d save:%d", mem, save);
636     }
637     break;
638 
639   case OP_SAVE_VAL:
640     {
641       SaveType type;
642 
643       type = p->save_val.type;
644       mem  = p->save_val.id;
645       fprintf(f, "%s id:%d", SaveTypeNames[type], mem);
646     }
647     break;
648 
649   case OP_UPDATE_VAR:
650     {
651       UpdateVarType type;
652       int clear;
653 
654       type = p->update_var.type;
655       mem  = p->update_var.id;
656       clear = p->update_var.clear;
657       fprintf(f, "%s id:%d", UpdateVarTypeNames[type], mem);
658       if (type == UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK ||
659           type ==  UPDATE_VAR_RIGHT_RANGE_FROM_STACK)
660         fprintf(f, " clear:%d", clear);
661     }
662     break;
663 
664 #ifdef USE_CALLOUT
665   case OP_CALLOUT_CONTENTS:
666     mem = p->callout_contents.num;
667     fprintf(f, "num:%d", mem);
668     break;
669 
670   case OP_CALLOUT_NAME:
671     {
672       int id;
673 
674       id  = p->callout_name.id;
675       mem = p->callout_name.num;
676       fprintf(f, "id:%d num:%d", id, mem);
677     }
678     break;
679 #endif
680 
681   case OP_TEXT_SEGMENT_BOUNDARY:
682     if (p->text_segment_boundary.not != 0)
683       fprintf(f, " not");
684     break;
685 
686   case OP_CHECK_POSITION:
687     switch (p->check_position.type) {
688     case CHECK_POSITION_SEARCH_START:
689       fprintf(f, "search-start"); break;
690     case CHECK_POSITION_CURRENT_RIGHT_RANGE:
691       fprintf(f, "current-right-range"); break;
692     default:
693       break;
694     };
695     break;
696 
697   case OP_FINISH:
698   case OP_END:
699   case OP_ANYCHAR:
700   case OP_ANYCHAR_ML:
701   case OP_ANYCHAR_STAR:
702   case OP_ANYCHAR_ML_STAR:
703   case OP_WORD:
704   case OP_WORD_ASCII:
705   case OP_NO_WORD:
706   case OP_NO_WORD_ASCII:
707   case OP_BEGIN_BUF:
708   case OP_END_BUF:
709   case OP_BEGIN_LINE:
710   case OP_END_LINE:
711   case OP_SEMI_END_BUF:
712   case OP_BACKREF1:
713   case OP_BACKREF2:
714   case OP_FAIL:
715   case OP_POP:
716   case OP_STEP_BACK_NEXT:
717 #ifdef USE_CALL
718   case OP_RETURN:
719 #endif
720     break;
721 
722   default:
723     fprintf(DBGFP, "print_compiled_byte_code: undefined code %d\n", opcode);
724     break;
725   }
726 }
727 #endif /* defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) */
728 
729 #ifdef ONIG_DEBUG_COMPILE
730 extern void
onig_print_compiled_byte_code_list(FILE * f,regex_t * reg)731 onig_print_compiled_byte_code_list(FILE* f, regex_t* reg)
732 {
733   Operation* bp;
734   Operation* start = reg->ops;
735   Operation* end   = reg->ops + reg->ops_used;
736 
737   fprintf(f, "push_mem_start: 0x%x, push_mem_end: 0x%x\n",
738           reg->push_mem_start, reg->push_mem_end);
739   fprintf(f, "code-length: %d\n", reg->ops_used);
740 
741   bp = start;
742   while (bp < end) {
743     int pos = bp - start;
744 
745     fprintf(f, "%4d: ", pos);
746     print_compiled_byte_code(f, reg, pos, start, reg->enc);
747     fprintf(f, "\n");
748     bp++;
749   }
750   fprintf(f, "\n");
751 }
752 #endif
753 
754 
755 #ifdef USE_CAPTURE_HISTORY
756 static void history_tree_free(OnigCaptureTreeNode* node);
757 
758 static void
history_tree_clear(OnigCaptureTreeNode * node)759 history_tree_clear(OnigCaptureTreeNode* node)
760 {
761   int i;
762 
763   if (IS_NULL(node)) return ;
764 
765   for (i = 0; i < node->num_childs; i++) {
766     if (IS_NOT_NULL(node->childs[i])) {
767       history_tree_free(node->childs[i]);
768     }
769   }
770   for (i = 0; i < node->allocated; i++) {
771     node->childs[i] = (OnigCaptureTreeNode* )0;
772   }
773   node->num_childs = 0;
774   node->beg = ONIG_REGION_NOTPOS;
775   node->end = ONIG_REGION_NOTPOS;
776   node->group = -1;
777 }
778 
779 static void
history_tree_free(OnigCaptureTreeNode * node)780 history_tree_free(OnigCaptureTreeNode* node)
781 {
782   history_tree_clear(node);
783   if (IS_NOT_NULL(node->childs)) xfree(node->childs);
784 
785   xfree(node);
786 }
787 
788 static void
history_root_free(OnigRegion * r)789 history_root_free(OnigRegion* r)
790 {
791   if (IS_NULL(r->history_root)) return ;
792 
793   history_tree_free(r->history_root);
794   r->history_root = (OnigCaptureTreeNode* )0;
795 }
796 
797 static OnigCaptureTreeNode*
history_node_new(void)798 history_node_new(void)
799 {
800   OnigCaptureTreeNode* node;
801 
802   node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
803   CHECK_NULL_RETURN(node);
804 
805   node->childs     = (OnigCaptureTreeNode** )0;
806   node->allocated  =  0;
807   node->num_childs =  0;
808   node->group      = -1;
809   node->beg        = ONIG_REGION_NOTPOS;
810   node->end        = ONIG_REGION_NOTPOS;
811 
812   return node;
813 }
814 
815 static int
history_tree_add_child(OnigCaptureTreeNode * parent,OnigCaptureTreeNode * child)816 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
817 {
818 #define HISTORY_TREE_INIT_ALLOC_SIZE  8
819 
820   if (parent->num_childs >= parent->allocated) {
821     int n, i;
822 
823     if (IS_NULL(parent->childs)) {
824       n = HISTORY_TREE_INIT_ALLOC_SIZE;
825       parent->childs =
826         (OnigCaptureTreeNode** )xmalloc(sizeof(parent->childs[0]) * n);
827     }
828     else {
829       n = parent->allocated * 2;
830       parent->childs =
831         (OnigCaptureTreeNode** )xrealloc(parent->childs,
832                                          sizeof(parent->childs[0]) * n);
833     }
834     CHECK_NULL_RETURN_MEMERR(parent->childs);
835     for (i = parent->allocated; i < n; i++) {
836       parent->childs[i] = (OnigCaptureTreeNode* )0;
837     }
838     parent->allocated = n;
839   }
840 
841   parent->childs[parent->num_childs] = child;
842   parent->num_childs++;
843   return 0;
844 }
845 
846 static OnigCaptureTreeNode*
history_tree_clone(OnigCaptureTreeNode * node)847 history_tree_clone(OnigCaptureTreeNode* node)
848 {
849   int i;
850   OnigCaptureTreeNode *clone, *child;
851 
852   clone = history_node_new();
853   CHECK_NULL_RETURN(clone);
854 
855   clone->beg = node->beg;
856   clone->end = node->end;
857   for (i = 0; i < node->num_childs; i++) {
858     child = history_tree_clone(node->childs[i]);
859     if (IS_NULL(child)) {
860       history_tree_free(clone);
861       return (OnigCaptureTreeNode* )0;
862     }
863     history_tree_add_child(clone, child);
864   }
865 
866   return clone;
867 }
868 
869 extern  OnigCaptureTreeNode*
onig_get_capture_tree(OnigRegion * region)870 onig_get_capture_tree(OnigRegion* region)
871 {
872   return region->history_root;
873 }
874 #endif /* USE_CAPTURE_HISTORY */
875 
876 extern void
onig_region_clear(OnigRegion * region)877 onig_region_clear(OnigRegion* region)
878 {
879   int i;
880 
881   for (i = 0; i < region->num_regs; i++) {
882     region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
883   }
884 #ifdef USE_CAPTURE_HISTORY
885   history_root_free(region);
886 #endif
887 }
888 
889 extern int
onig_region_resize(OnigRegion * region,int n)890 onig_region_resize(OnigRegion* region, int n)
891 {
892   region->num_regs = n;
893 
894   if (n < ONIG_NREGION)
895     n = ONIG_NREGION;
896 
897   if (region->allocated == 0) {
898     region->beg = (int* )xmalloc(n * sizeof(int));
899     region->end = (int* )xmalloc(n * sizeof(int));
900 
901     if (region->beg == 0 || region->end == 0)
902       return ONIGERR_MEMORY;
903 
904     region->allocated = n;
905   }
906   else if (region->allocated < n) {
907     region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
908     region->end = (int* )xrealloc(region->end, n * sizeof(int));
909 
910     if (region->beg == 0 || region->end == 0)
911       return ONIGERR_MEMORY;
912 
913     region->allocated = n;
914   }
915 
916   return 0;
917 }
918 
919 static int
onig_region_resize_clear(OnigRegion * region,int n)920 onig_region_resize_clear(OnigRegion* region, int n)
921 {
922   int r;
923 
924   r = onig_region_resize(region, n);
925   if (r != 0) return r;
926   onig_region_clear(region);
927   return 0;
928 }
929 
930 extern int
onig_region_set(OnigRegion * region,int at,int beg,int end)931 onig_region_set(OnigRegion* region, int at, int beg, int end)
932 {
933   if (at < 0) return ONIGERR_INVALID_ARGUMENT;
934 
935   if (at >= region->allocated) {
936     int r = onig_region_resize(region, at + 1);
937     if (r < 0) return r;
938   }
939 
940   region->beg[at] = beg;
941   region->end[at] = end;
942   return 0;
943 }
944 
945 extern void
onig_region_init(OnigRegion * region)946 onig_region_init(OnigRegion* region)
947 {
948   region->num_regs     = 0;
949   region->allocated    = 0;
950   region->beg          = (int* )0;
951   region->end          = (int* )0;
952   region->history_root = (OnigCaptureTreeNode* )0;
953 }
954 
955 extern OnigRegion*
onig_region_new(void)956 onig_region_new(void)
957 {
958   OnigRegion* r;
959 
960   r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
961   CHECK_NULL_RETURN(r);
962   onig_region_init(r);
963   return r;
964 }
965 
966 extern void
onig_region_free(OnigRegion * r,int free_self)967 onig_region_free(OnigRegion* r, int free_self)
968 {
969   if (r != 0) {
970     if (r->allocated > 0) {
971       if (r->beg) xfree(r->beg);
972       if (r->end) xfree(r->end);
973       r->allocated = 0;
974     }
975 #ifdef USE_CAPTURE_HISTORY
976     history_root_free(r);
977 #endif
978     if (free_self) xfree(r);
979   }
980 }
981 
982 extern void
onig_region_copy(OnigRegion * to,OnigRegion * from)983 onig_region_copy(OnigRegion* to, OnigRegion* from)
984 {
985 #define RREGC_SIZE   (sizeof(int) * from->num_regs)
986   int i;
987 
988   if (to == from) return;
989 
990   if (to->allocated == 0) {
991     if (from->num_regs > 0) {
992       to->beg = (int* )xmalloc(RREGC_SIZE);
993       if (IS_NULL(to->beg)) return;
994       to->end = (int* )xmalloc(RREGC_SIZE);
995       if (IS_NULL(to->end)) return;
996       to->allocated = from->num_regs;
997     }
998   }
999   else if (to->allocated < from->num_regs) {
1000     to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
1001     if (IS_NULL(to->beg)) return;
1002     to->end = (int* )xrealloc(to->end, RREGC_SIZE);
1003     if (IS_NULL(to->end)) return;
1004     to->allocated = from->num_regs;
1005   }
1006 
1007   for (i = 0; i < from->num_regs; i++) {
1008     to->beg[i] = from->beg[i];
1009     to->end[i] = from->end[i];
1010   }
1011   to->num_regs = from->num_regs;
1012 
1013 #ifdef USE_CAPTURE_HISTORY
1014   history_root_free(to);
1015 
1016   if (IS_NOT_NULL(from->history_root)) {
1017     to->history_root = history_tree_clone(from->history_root);
1018   }
1019 #endif
1020 }
1021 
1022 #ifdef USE_CALLOUT
1023 #define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \
1024   args.in            = (ain);\
1025   args.name_id       = (aname_id);\
1026   args.num           = anum;\
1027   args.regex         = reg;\
1028   args.string        = str;\
1029   args.string_end    = end;\
1030   args.start         = sstart;\
1031   args.right_range   = right_range;\
1032   args.current       = s;\
1033   args.retry_in_match_counter = retry_in_match_counter;\
1034   args.msa           = msa;\
1035   args.stk_base      = stk_base;\
1036   args.stk           = stk;\
1037   args.mem_start_stk = mem_start_stk;\
1038   args.mem_end_stk   = mem_end_stk;\
1039   result = (func)(&args, user);\
1040 } while (0)
1041 
1042 #define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\
1043   int result;\
1044   OnigCalloutArgs args;\
1045   CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\
1046   switch (result) {\
1047   case ONIG_CALLOUT_FAIL:\
1048   case ONIG_CALLOUT_SUCCESS:\
1049     break;\
1050   default:\
1051     if (result > 0) {\
1052       result = ONIGERR_INVALID_ARGUMENT;\
1053     }\
1054     best_len = result;\
1055     goto match_at_end;\
1056     break;\
1057   }\
1058 } while(0)
1059 #endif
1060 
1061 
1062 /** stack **/
1063 #define STK_ALT_FLAG               0x0001
1064 
1065 /* stack type */
1066 /* used by normal-POP */
1067 #define STK_SUPER_ALT             STK_ALT_FLAG
1068 #define STK_ALT                   (0x0002 | STK_ALT_FLAG)
1069 
1070 /* handled by normal-POP */
1071 #define STK_MEM_START              0x0010
1072 #define STK_MEM_END                0x8030
1073 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1074 #define STK_REPEAT_INC             (0x0040 | STK_MASK_POP_HANDLED)
1075 #else
1076 #define STK_REPEAT_INC             0x0040
1077 #endif
1078 #ifdef USE_CALLOUT
1079 #define STK_CALLOUT                0x0070
1080 #endif
1081 
1082 /* avoided by normal-POP */
1083 #define STK_VOID                   0x0000  /* for fill a blank */
1084 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1085 #define STK_EMPTY_CHECK_START      (0x3000 | STK_MASK_POP_HANDLED)
1086 #else
1087 #define STK_EMPTY_CHECK_START      0x3000
1088 #endif
1089 #define STK_EMPTY_CHECK_END        0x5000  /* for recursive call */
1090 #define STK_MEM_END_MARK           0x8100
1091 #define STK_CALL_FRAME             (0x0400 | STK_MASK_POP_HANDLED)
1092 #define STK_RETURN                 (0x0500 | STK_MASK_POP_HANDLED)
1093 #define STK_SAVE_VAL               0x0600
1094 #define STK_MARK                   0x0704
1095 
1096 /* stack type check mask */
1097 #define STK_MASK_POP_USED          STK_ALT_FLAG
1098 #define STK_MASK_POP_HANDLED       0x0010
1099 #define STK_MASK_POP_HANDLED_TIL   (STK_MASK_POP_HANDLED | 0x0004)
1100 #define STK_MASK_TO_VOID_TARGET    0x100e
1101 #define STK_MASK_MEM_END_OR_MARK   0x8000  /* MEM_END or MEM_END_MARK */
1102 
1103 typedef ptrdiff_t StackIndex;
1104 
1105 #define INVALID_STACK_INDEX   ((StackIndex )-1)
1106 
1107 typedef union {
1108   StackIndex i;
1109   UChar*     s;
1110 } StkPtrType;
1111 
1112 
1113 typedef struct _StackType {
1114   unsigned int type;
1115   int zid;
1116   union {
1117     struct {
1118       Operation* pcode;     /* byte code position */
1119       UChar*     pstr;      /* string position */
1120     } state;
1121     struct {
1122       int        count;
1123 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1124       StackIndex prev_index;  /* index of stack */
1125 #endif
1126     } repeat_inc;
1127     struct {
1128       UChar *pstr;       /* start/end position */
1129       /* Following information is set, if this stack type is MEM-START */
1130       StkPtrType prev_start;  /* prev. info (for backtrack  "(...)*" ) */
1131       StkPtrType prev_end;    /* prev. info (for backtrack  "(...)*" ) */
1132     } mem;
1133     struct {
1134       UChar *pstr;            /* start position */
1135 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1136       StackIndex prev_index;  /* index of stack */
1137 #endif
1138     } empty_check;
1139 #ifdef USE_CALL
1140     struct {
1141       Operation *ret_addr; /* byte code position */
1142       UChar *pstr;         /* string position */
1143     } call_frame;
1144 #endif
1145     struct {
1146       enum SaveType type;
1147       UChar* v;
1148       UChar* v2;
1149     } val;
1150 #ifdef USE_CALLOUT
1151     struct {
1152       int num;
1153       OnigCalloutFunc func;
1154     } callout;
1155 #endif
1156   } u;
1157 } StackType;
1158 
1159 #ifdef USE_CALLOUT
1160 
1161 struct OnigCalloutArgsStruct {
1162   OnigCalloutIn    in;
1163   int              name_id;   /* name id or ONIG_NON_NAME_ID */
1164   int              num;
1165   OnigRegex        regex;
1166   const OnigUChar* string;
1167   const OnigUChar* string_end;
1168   const OnigUChar* start;
1169   const OnigUChar* right_range;
1170   const OnigUChar* current;  /* current matching position */
1171   unsigned long    retry_in_match_counter;
1172 
1173   /* invisible to users */
1174   MatchArg*   msa;
1175   StackType*  stk_base;
1176   StackType*  stk;
1177   StkPtrType* mem_start_stk;
1178   StkPtrType* mem_end_stk;
1179 };
1180 
1181 #endif
1182 
1183 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1184 
1185 #define PTR_NUM_SIZE(reg)  ((reg)->num_repeat + (reg)->num_empty_check + ((reg)->num_mem + 1) * 2)
1186 #define UPDATE_FOR_STACK_REALLOC do{\
1187   repeat_stk      = (StackIndex* )alloc_base;\
1188   empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
1189   mem_start_stk   = (StkPtrType* )(empty_check_stk + reg->num_empty_check);\
1190   mem_end_stk     = mem_start_stk + num_mem + 1;\
1191 } while(0)
1192 
1193 #define SAVE_REPEAT_STK_VAR(sid) stk->u.repeat_inc.prev_index = repeat_stk[sid]
1194 #define LOAD_TO_REPEAT_STK_VAR(sid)  repeat_stk[sid] = GET_STACK_INDEX(stk)
1195 #define POP_REPEAT_INC  else if (stk->type == STK_REPEAT_INC) {repeat_stk[stk->zid] = stk->u.repeat_inc.prev_index;}
1196 
1197 #define SAVE_EMPTY_CHECK_STK_VAR(sid) stk->u.empty_check.prev_index = empty_check_stk[sid]
1198 #define LOAD_TO_EMPTY_CHECK_STK_VAR(sid)  empty_check_stk[sid] = GET_STACK_INDEX(stk)
1199 #define POP_EMPTY_CHECK_START  else if (stk->type == STK_EMPTY_CHECK_START) {empty_check_stk[stk->zid] = stk->u.empty_check.prev_index;}
1200 
1201 #else
1202 
1203 #define PTR_NUM_SIZE(reg)  (((reg)->num_mem + 1) * 2)
1204 #define UPDATE_FOR_STACK_REALLOC do{\
1205   mem_start_stk = (StkPtrType* )alloc_base;\
1206   mem_end_stk   = mem_start_stk + num_mem + 1;\
1207 } while(0)
1208 
1209 #define SAVE_REPEAT_STK_VAR(sid)
1210 #define LOAD_TO_REPEAT_STK_VAR(sid)
1211 #define POP_REPEAT_INC
1212 
1213 #define SAVE_EMPTY_CHECK_STK_VAR(sid)
1214 #define LOAD_TO_EMPTY_CHECK_STK_VAR(sid)
1215 #define POP_EMPTY_CHECK_START
1216 
1217 #endif /* USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */
1218 
1219 #ifdef USE_RETRY_LIMIT
1220 #define RETRY_IN_MATCH_ARG_INIT(msa,mpv) \
1221   (msa).retry_limit_in_match  = (mpv)->retry_limit_in_match;\
1222   (msa).retry_limit_in_search = (mpv)->retry_limit_in_search;\
1223   (msa).retry_limit_in_search_counter = 0;
1224 #else
1225 #define RETRY_IN_MATCH_ARG_INIT(msa,mpv)
1226 #endif
1227 
1228 #if defined(USE_CALL)
1229 #define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv) \
1230   (msa).subexp_call_in_search_counter = 0;
1231 
1232 #define POP_CALL  else if (stk->type == STK_RETURN) {subexp_call_nest_counter++;} else if (stk->type == STK_CALL_FRAME) {subexp_call_nest_counter--;}
1233 #else
1234 #define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)
1235 #define POP_CALL
1236 #endif
1237 
1238 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1239 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
1240   (msa).stack_p  = (void* )0;\
1241   (msa).options  = (arg_option);\
1242   (msa).region   = (arg_region);\
1243   (msa).start    = (arg_start);\
1244   (msa).match_stack_limit  = (mpv)->match_stack_limit;\
1245   RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
1246   SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
1247   (msa).mp = mpv;\
1248   (msa).best_len = ONIG_MISMATCH;\
1249   (msa).ptr_num  = PTR_NUM_SIZE(reg);\
1250 } while(0)
1251 #else
1252 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
1253   (msa).stack_p  = (void* )0;\
1254   (msa).options  = (arg_option);\
1255   (msa).region   = (arg_region);\
1256   (msa).start    = (arg_start);\
1257   (msa).match_stack_limit  = (mpv)->match_stack_limit;\
1258   RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
1259   SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
1260   (msa).mp = mpv;\
1261   (msa).ptr_num  = PTR_NUM_SIZE(reg);\
1262 } while(0)
1263 #endif
1264 
1265 #define MATCH_ARG_FREE(msa)  if ((msa).stack_p) xfree((msa).stack_p)
1266 
1267 
1268 #define ALLOCA_PTR_NUM_LIMIT   50
1269 
1270 #define STACK_INIT(stack_num)  do {\
1271   if (msa->stack_p) {\
1272     is_alloca  = 0;\
1273     alloc_base = msa->stack_p;\
1274     stk_base   = (StackType* )(alloc_base\
1275                  + (sizeof(StkPtrType) * msa->ptr_num));\
1276     stk        = stk_base;\
1277     stk_end    = stk_base + msa->stack_n;\
1278   }\
1279   else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
1280     is_alloca  = 0;\
1281     alloc_base = (char* )xmalloc(sizeof(StkPtrType) * msa->ptr_num\
1282                   + sizeof(StackType) * (stack_num));\
1283     CHECK_NULL_RETURN_MEMERR(alloc_base);\
1284     stk_base   = (StackType* )(alloc_base\
1285                  + (sizeof(StkPtrType) * msa->ptr_num));\
1286     stk        = stk_base;\
1287     stk_end    = stk_base + (stack_num);\
1288   }\
1289   else {\
1290     is_alloca  = 1;\
1291     alloc_base = (char* )xalloca(sizeof(StkPtrType) * msa->ptr_num\
1292                  + sizeof(StackType) * (stack_num));\
1293     CHECK_NULL_RETURN_MEMERR(alloc_base);\
1294     stk_base   = (StackType* )(alloc_base\
1295                  + (sizeof(StkPtrType) * msa->ptr_num));\
1296     stk        = stk_base;\
1297     stk_end    = stk_base + (stack_num);\
1298   }\
1299 } while(0);
1300 
1301 
1302 #define STACK_SAVE(msa,is_alloca,alloc_base) do{\
1303   (msa)->stack_n = (int )(stk_end - stk_base);\
1304   if ((is_alloca) != 0) {\
1305     size_t size = sizeof(StkPtrType) * (msa)->ptr_num\
1306                 + sizeof(StackType) * (msa)->stack_n;\
1307     (msa)->stack_p = xmalloc(size);\
1308     CHECK_NULL_RETURN_MEMERR((msa)->stack_p);\
1309     xmemcpy((msa)->stack_p, (alloc_base), size);\
1310   }\
1311   else {\
1312     (msa)->stack_p = (alloc_base);\
1313   };\
1314 } while(0)
1315 
1316 static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1317 
1318 extern unsigned int
onig_get_match_stack_limit_size(void)1319 onig_get_match_stack_limit_size(void)
1320 {
1321   return MatchStackLimit;
1322 }
1323 
1324 extern int
onig_set_match_stack_limit_size(unsigned int size)1325 onig_set_match_stack_limit_size(unsigned int size)
1326 {
1327   MatchStackLimit = size;
1328   return 0;
1329 }
1330 
1331 #ifdef USE_RETRY_LIMIT
1332 
1333 static unsigned long RetryLimitInMatch  = DEFAULT_RETRY_LIMIT_IN_MATCH;
1334 static unsigned long RetryLimitInSearch = DEFAULT_RETRY_LIMIT_IN_SEARCH;
1335 
1336 #define CHECK_RETRY_LIMIT_IN_MATCH  do {\
1337   if (++retry_in_match_counter > retry_limit_in_match) {\
1338     MATCH_AT_ERROR_RETURN(retry_in_match_counter > msa->retry_limit_in_match ? ONIGERR_RETRY_LIMIT_IN_MATCH_OVER : ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER); \
1339   }\
1340 } while (0)
1341 
1342 #else
1343 
1344 #define CHECK_RETRY_LIMIT_IN_MATCH
1345 
1346 #endif /* USE_RETRY_LIMIT */
1347 
1348 extern unsigned long
onig_get_retry_limit_in_match(void)1349 onig_get_retry_limit_in_match(void)
1350 {
1351 #ifdef USE_RETRY_LIMIT
1352   return RetryLimitInMatch;
1353 #else
1354   return 0;
1355 #endif
1356 }
1357 
1358 extern int
onig_set_retry_limit_in_match(unsigned long n)1359 onig_set_retry_limit_in_match(unsigned long n)
1360 {
1361 #ifdef USE_RETRY_LIMIT
1362   RetryLimitInMatch = n;
1363   return 0;
1364 #else
1365   return ONIG_NO_SUPPORT_CONFIG;
1366 #endif
1367 }
1368 
1369 extern unsigned long
onig_get_retry_limit_in_search(void)1370 onig_get_retry_limit_in_search(void)
1371 {
1372 #ifdef USE_RETRY_LIMIT
1373   return RetryLimitInSearch;
1374 #else
1375   return 0;
1376 #endif
1377 }
1378 
1379 extern int
onig_set_retry_limit_in_search(unsigned long n)1380 onig_set_retry_limit_in_search(unsigned long n)
1381 {
1382 #ifdef USE_RETRY_LIMIT
1383   RetryLimitInSearch = n;
1384   return 0;
1385 #else
1386   return ONIG_NO_SUPPORT_CONFIG;
1387 #endif
1388 }
1389 
1390 #ifdef USE_CALL
1391 static unsigned long SubexpCallLimitInSearch = DEFAULT_SUBEXP_CALL_LIMIT_IN_SEARCH;
1392 
1393 extern unsigned long
onig_get_subexp_call_limit_in_search(void)1394 onig_get_subexp_call_limit_in_search(void)
1395 {
1396   return SubexpCallLimitInSearch;
1397 }
1398 
1399 extern int
onig_set_subexp_call_limit_in_search(unsigned long n)1400 onig_set_subexp_call_limit_in_search(unsigned long n)
1401 {
1402   SubexpCallLimitInSearch = n;
1403   return 0;
1404 }
1405 
1406 #endif
1407 
1408 #ifdef USE_CALLOUT
1409 static OnigCalloutFunc DefaultProgressCallout;
1410 static OnigCalloutFunc DefaultRetractionCallout;
1411 #endif
1412 
1413 extern OnigMatchParam*
onig_new_match_param(void)1414 onig_new_match_param(void)
1415 {
1416   OnigMatchParam* p;
1417 
1418   p = (OnigMatchParam* )xmalloc(sizeof(*p));
1419   if (IS_NOT_NULL(p)) {
1420     onig_initialize_match_param(p);
1421   }
1422 
1423   return p;
1424 }
1425 
1426 extern void
onig_free_match_param_content(OnigMatchParam * p)1427 onig_free_match_param_content(OnigMatchParam* p)
1428 {
1429 #ifdef USE_CALLOUT
1430   if (IS_NOT_NULL(p->callout_data)) {
1431     xfree(p->callout_data);
1432     p->callout_data = 0;
1433   }
1434 #endif
1435 }
1436 
1437 extern void
onig_free_match_param(OnigMatchParam * p)1438 onig_free_match_param(OnigMatchParam* p)
1439 {
1440   if (IS_NOT_NULL(p)) {
1441     onig_free_match_param_content(p);
1442     xfree(p);
1443   }
1444 }
1445 
1446 extern int
onig_initialize_match_param(OnigMatchParam * mp)1447 onig_initialize_match_param(OnigMatchParam* mp)
1448 {
1449   mp->match_stack_limit  = MatchStackLimit;
1450 #ifdef USE_RETRY_LIMIT
1451   mp->retry_limit_in_match  = RetryLimitInMatch;
1452   mp->retry_limit_in_search = RetryLimitInSearch;
1453 #endif
1454 
1455 #ifdef USE_CALLOUT
1456   mp->progress_callout_of_contents   = DefaultProgressCallout;
1457   mp->retraction_callout_of_contents = DefaultRetractionCallout;
1458   mp->match_at_call_counter  = 0;
1459   mp->callout_user_data      = 0;
1460   mp->callout_data           = 0;
1461   mp->callout_data_alloc_num = 0;
1462 #endif
1463 
1464   return ONIG_NORMAL;
1465 }
1466 
1467 #ifdef USE_CALLOUT
1468 
1469 static int
adjust_match_param(regex_t * reg,OnigMatchParam * mp)1470 adjust_match_param(regex_t* reg, OnigMatchParam* mp)
1471 {
1472   RegexExt* ext = reg->extp;
1473 
1474   mp->match_at_call_counter = 0;
1475 
1476   if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL;
1477 
1478   if (ext->callout_num > mp->callout_data_alloc_num) {
1479     CalloutData* d;
1480     size_t n = ext->callout_num * sizeof(*d);
1481     if (IS_NOT_NULL(mp->callout_data))
1482       d = (CalloutData* )xrealloc(mp->callout_data, n);
1483     else
1484       d = (CalloutData* )xmalloc(n);
1485     CHECK_NULL_RETURN_MEMERR(d);
1486 
1487     mp->callout_data = d;
1488     mp->callout_data_alloc_num = ext->callout_num;
1489   }
1490 
1491   xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData));
1492   return ONIG_NORMAL;
1493 }
1494 
1495 #define ADJUST_MATCH_PARAM(reg, mp) \
1496   r = adjust_match_param(reg, mp);\
1497   if (r != ONIG_NORMAL) return r;
1498 
1499 #define CALLOUT_DATA_AT_NUM(mp, num)  ((mp)->callout_data + ((num) - 1))
1500 
1501 extern int
onig_check_callout_data_and_clear_old_values(OnigCalloutArgs * args)1502 onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args)
1503 {
1504   OnigMatchParam* mp;
1505   int num;
1506   CalloutData* d;
1507 
1508   mp  = args->msa->mp;
1509   num = args->num;
1510 
1511   d = CALLOUT_DATA_AT_NUM(mp, num);
1512   if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1513     xmemset(d, 0, sizeof(*d));
1514     d->last_match_at_call_counter = mp->match_at_call_counter;
1515     return d->last_match_at_call_counter;
1516   }
1517 
1518   return 0;
1519 }
1520 
1521 extern int
onig_get_callout_data_dont_clear_old(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType * type,OnigValue * val)1522 onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp,
1523                                      int callout_num, int slot,
1524                                      OnigType* type, OnigValue* val)
1525 {
1526   OnigType t;
1527   CalloutData* d;
1528 
1529   if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1530 
1531   d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1532   t = d->slot[slot].type;
1533   if (IS_NOT_NULL(type)) *type = t;
1534   if (IS_NOT_NULL(val))  *val  = d->slot[slot].val;
1535   return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
1536 }
1537 
1538 extern int
onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs * args,int slot,OnigType * type,OnigValue * val)1539 onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args,
1540                                                           int slot, OnigType* type,
1541                                                           OnigValue* val)
1542 {
1543   return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp,
1544                                               args->num, slot, type, val);
1545 }
1546 
1547 extern int
onig_get_callout_data(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType * type,OnigValue * val)1548 onig_get_callout_data(regex_t* reg, OnigMatchParam* mp,
1549                       int callout_num, int slot,
1550                       OnigType* type, OnigValue* val)
1551 {
1552   OnigType t;
1553   CalloutData* d;
1554 
1555   if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1556 
1557   d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1558   if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1559     xmemset(d, 0, sizeof(*d));
1560     d->last_match_at_call_counter = mp->match_at_call_counter;
1561   }
1562 
1563   t = d->slot[slot].type;
1564   if (IS_NOT_NULL(type)) *type = t;
1565   if (IS_NOT_NULL(val))  *val  = d->slot[slot].val;
1566   return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
1567 }
1568 
1569 extern int
onig_get_callout_data_by_tag(regex_t * reg,OnigMatchParam * mp,const UChar * tag,const UChar * tag_end,int slot,OnigType * type,OnigValue * val)1570 onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1571                              const UChar* tag, const UChar* tag_end, int slot,
1572                              OnigType* type, OnigValue* val)
1573 {
1574   int num;
1575 
1576   num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1577   if (num < 0)  return num;
1578   if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1579 
1580   return onig_get_callout_data(reg, mp, num, slot, type, val);
1581 }
1582 
1583 extern int
onig_get_callout_data_by_callout_args(OnigCalloutArgs * args,int callout_num,int slot,OnigType * type,OnigValue * val)1584 onig_get_callout_data_by_callout_args(OnigCalloutArgs* args,
1585                                       int callout_num, int slot,
1586                                       OnigType* type, OnigValue* val)
1587 {
1588   return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot,
1589                                type, val);
1590 }
1591 
1592 extern int
onig_get_callout_data_by_callout_args_self(OnigCalloutArgs * args,int slot,OnigType * type,OnigValue * val)1593 onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1594                                            int slot, OnigType* type, OnigValue* val)
1595 {
1596   return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot,
1597                                type, val);
1598 }
1599 
1600 extern int
onig_set_callout_data(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType type,OnigValue * val)1601 onig_set_callout_data(regex_t* reg, OnigMatchParam* mp,
1602                       int callout_num, int slot,
1603                       OnigType type, OnigValue* val)
1604 {
1605   CalloutData* d;
1606 
1607   if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1608 
1609   d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1610   d->slot[slot].type = type;
1611   d->slot[slot].val  = *val;
1612   d->last_match_at_call_counter = mp->match_at_call_counter;
1613 
1614   return ONIG_NORMAL;
1615 }
1616 
1617 extern int
onig_set_callout_data_by_tag(regex_t * reg,OnigMatchParam * mp,const UChar * tag,const UChar * tag_end,int slot,OnigType type,OnigValue * val)1618 onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1619                              const UChar* tag, const UChar* tag_end, int slot,
1620                              OnigType type, OnigValue* val)
1621 {
1622   int num;
1623 
1624   num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1625   if (num < 0)  return num;
1626   if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1627 
1628   return onig_set_callout_data(reg, mp, num, slot, type, val);
1629 }
1630 
1631 extern int
onig_set_callout_data_by_callout_args(OnigCalloutArgs * args,int callout_num,int slot,OnigType type,OnigValue * val)1632 onig_set_callout_data_by_callout_args(OnigCalloutArgs* args,
1633                                       int callout_num, int slot,
1634                                       OnigType type, OnigValue* val)
1635 {
1636   return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot,
1637                                type, val);
1638 }
1639 
1640 extern int
onig_set_callout_data_by_callout_args_self(OnigCalloutArgs * args,int slot,OnigType type,OnigValue * val)1641 onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1642                                            int slot, OnigType type, OnigValue* val)
1643 {
1644   return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot,
1645                                type, val);
1646 }
1647 
1648 #else
1649 #define ADJUST_MATCH_PARAM(reg, mp)
1650 #endif /* USE_CALLOUT */
1651 
1652 
1653 static int
stack_double(int * is_alloca,char ** arg_alloc_base,StackType ** arg_stk_base,StackType ** arg_stk_end,StackType ** arg_stk,MatchArg * msa)1654 stack_double(int* is_alloca, char** arg_alloc_base,
1655              StackType** arg_stk_base, StackType** arg_stk_end,
1656              StackType** arg_stk, MatchArg* msa)
1657 {
1658   unsigned int n;
1659   int used;
1660   size_t size;
1661   size_t new_size;
1662   char* alloc_base;
1663   char* new_alloc_base;
1664   StackType *stk_base, *stk_end, *stk;
1665 
1666   alloc_base = *arg_alloc_base;
1667   stk_base = *arg_stk_base;
1668   stk_end  = *arg_stk_end;
1669   stk      = *arg_stk;
1670 
1671   n = (unsigned int )(stk_end - stk_base);
1672   size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n;
1673   n *= 2;
1674   new_size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n;
1675   if (*is_alloca != 0) {
1676     new_alloc_base = (char* )xmalloc(new_size);
1677     if (IS_NULL(new_alloc_base)) {
1678       STACK_SAVE(msa, *is_alloca, alloc_base);
1679       return ONIGERR_MEMORY;
1680     }
1681     xmemcpy(new_alloc_base, alloc_base, size);
1682     *is_alloca = 0;
1683   }
1684   else {
1685     if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) {
1686       if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit) {
1687         STACK_SAVE(msa, *is_alloca, alloc_base);
1688         return ONIGERR_MATCH_STACK_LIMIT_OVER;
1689       }
1690       else
1691         n = msa->match_stack_limit;
1692     }
1693     new_alloc_base = (char* )xrealloc(alloc_base, new_size);
1694     if (IS_NULL(new_alloc_base)) {
1695       STACK_SAVE(msa, *is_alloca, alloc_base);
1696       return ONIGERR_MEMORY;
1697     }
1698   }
1699 
1700   alloc_base = new_alloc_base;
1701   used = (int )(stk - stk_base);
1702   *arg_alloc_base = alloc_base;
1703   *arg_stk_base   = (StackType* )(alloc_base
1704                                   + (sizeof(StkPtrType) * msa->ptr_num));
1705   *arg_stk      = *arg_stk_base + used;
1706   *arg_stk_end  = *arg_stk_base + n;
1707   return 0;
1708 }
1709 
1710 #define STACK_ENSURE(n) do {\
1711     if ((int )(stk_end - stk) < (n)) {\
1712     int r = stack_double(&is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\
1713     if (r != 0) return r;\
1714     UPDATE_FOR_STACK_REALLOC;\
1715   }\
1716 } while(0)
1717 
1718 #define STACK_AT(index)        (stk_base + (index))
1719 #define GET_STACK_INDEX(stk)   ((stk) - stk_base)
1720 
1721 #define STACK_PUSH_TYPE(stack_type) do {\
1722   STACK_ENSURE(1);\
1723   stk->type = (stack_type);\
1724   STACK_INC;\
1725 } while(0)
1726 
1727 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1728 
1729 #define STACK_PUSH(stack_type,pat,s) do {\
1730   STACK_ENSURE(1);\
1731   stk->type = (stack_type);\
1732   stk->u.state.pcode     = (pat);\
1733   stk->u.state.pstr      = (s);\
1734   STACK_INC;\
1735 } while(0)
1736 
1737 #define STACK_PUSH_WITH_ZID(stack_type,pat,s,id) do {\
1738   STACK_ENSURE(1);\
1739   stk->type = (stack_type);\
1740   stk->zid  = (int )(id);\
1741   stk->u.state.pcode     = (pat);\
1742   stk->u.state.pstr      = (s);\
1743   STACK_INC;\
1744 } while(0)
1745 
1746 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
1747   stk->type = (stack_type);\
1748   stk->u.state.pcode = (pat);\
1749   STACK_INC;\
1750 } while(0)
1751 
1752 #ifdef ONIG_DEBUG_MATCH
1753 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1754   stk->type = (stack_type);\
1755   stk->u.state.pcode = (pat);\
1756   stk->u.state.pstr      = s;\
1757   STACK_INC;\
1758 } while (0)
1759 #else
1760 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1761   stk->type = (stack_type);\
1762   stk->u.state.pcode = (pat);\
1763   STACK_INC;\
1764 } while (0)
1765 #endif
1766 
1767 #define STACK_PUSH_ALT(pat,s)       STACK_PUSH(STK_ALT,pat,s)
1768 #define STACK_PUSH_SUPER_ALT(pat,s) STACK_PUSH(STK_SUPER_ALT,pat,s)
1769 #define STACK_PUSH_ALT_WITH_ZID(pat,s,id) STACK_PUSH_WITH_ZID(STK_ALT,pat,s,id)
1770 
1771 #if 0
1772 #define STACK_PUSH_REPEAT(sid, pat) do {\
1773   STACK_ENSURE(1);\
1774   stk->type = STK_REPEAT;\
1775   stk->zid  = (sid);\
1776   stk->u.repeat.pcode = (pat);\
1777   STACK_INC;\
1778 } while(0)
1779 #endif
1780 
1781 #define STACK_PUSH_REPEAT_INC(sid, ct) do {\
1782   STACK_ENSURE(1);\
1783   stk->type = STK_REPEAT_INC;\
1784   stk->zid  = (sid);\
1785   stk->u.repeat_inc.count = (ct);\
1786   SAVE_REPEAT_STK_VAR(sid);\
1787   LOAD_TO_REPEAT_STK_VAR(sid);\
1788   STACK_INC;\
1789 } while(0)
1790 
1791 #define STACK_PUSH_MEM_START(mnum, s) do {\
1792   STACK_ENSURE(1);\
1793   stk->type = STK_MEM_START;\
1794   stk->zid  = (mnum);\
1795   stk->u.mem.pstr       = (s);\
1796   stk->u.mem.prev_start = mem_start_stk[mnum];\
1797   stk->u.mem.prev_end   = mem_end_stk[mnum];\
1798   mem_start_stk[mnum].i = GET_STACK_INDEX(stk);\
1799   mem_end_stk[mnum].i   = INVALID_STACK_INDEX;\
1800   STACK_INC;\
1801 } while(0)
1802 
1803 #define STACK_PUSH_MEM_END(mnum, s) do {\
1804   STACK_ENSURE(1);\
1805   stk->type = STK_MEM_END;\
1806   stk->zid  = (mnum);\
1807   stk->u.mem.pstr       = (s);\
1808   stk->u.mem.prev_start = mem_start_stk[mnum];\
1809   stk->u.mem.prev_end   = mem_end_stk[mnum];\
1810   mem_end_stk[mnum].i   = GET_STACK_INDEX(stk);\
1811   STACK_INC;\
1812 } while(0)
1813 
1814 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
1815   STACK_ENSURE(1);\
1816   stk->type = STK_MEM_END_MARK;\
1817   stk->zid  = (mnum);\
1818   STACK_INC;\
1819 } while(0)
1820 
1821 #define STACK_GET_MEM_START(mnum, k) do {\
1822   int level = 0;\
1823   k = stk;\
1824   while (k > stk_base) {\
1825     k--;\
1826     if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1827       && k->zid == (mnum)) {\
1828       level++;\
1829     }\
1830     else if (k->type == STK_MEM_START && k->zid == (mnum)) {\
1831       if (level == 0) break;\
1832       level--;\
1833     }\
1834   }\
1835 } while(0)
1836 
1837 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1838   int level = 0;\
1839   while (k < stk) {\
1840     if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1841       if (level == 0) (start) = k->u.mem.pstr;\
1842       level++;\
1843     }\
1844     else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1845       level--;\
1846       if (level == 0) {\
1847         (end) = k->u.mem.pstr;\
1848         break;\
1849       }\
1850     }\
1851     k++;\
1852   }\
1853 } while(0)
1854 
1855 #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\
1856   STACK_ENSURE(1);\
1857   stk->type = STK_EMPTY_CHECK_START;\
1858   stk->zid  = (cnum);\
1859   stk->u.empty_check.pstr = (s);\
1860   SAVE_EMPTY_CHECK_STK_VAR(cnum);\
1861   LOAD_TO_EMPTY_CHECK_STK_VAR(cnum);\
1862   STACK_INC;\
1863 } while(0)
1864 
1865 #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\
1866   STACK_ENSURE(1);\
1867   stk->type = STK_EMPTY_CHECK_END;\
1868   stk->zid  = (cnum);\
1869   STACK_INC;\
1870 } while(0)
1871 
1872 #define STACK_PUSH_CALL_FRAME(pat) do {\
1873   STACK_ENSURE(1);\
1874   stk->type = STK_CALL_FRAME;\
1875   stk->u.call_frame.ret_addr = (pat);\
1876   STACK_INC;\
1877 } while(0)
1878 
1879 #define STACK_PUSH_RETURN do {\
1880   STACK_ENSURE(1);\
1881   stk->type = STK_RETURN;\
1882   STACK_INC;\
1883 } while(0)
1884 
1885 #define STACK_PUSH_MARK(sid) do {\
1886   STACK_ENSURE(1);\
1887   stk->type = STK_MARK;\
1888   stk->zid  = (sid);\
1889   STACK_INC;\
1890 } while(0)
1891 
1892 #define STACK_PUSH_MARK_WITH_POS(sid, s) do {\
1893   STACK_ENSURE(1);\
1894   stk->type = STK_MARK;\
1895   stk->zid  = (sid);\
1896   stk->u.val.v  = (UChar* )(s);\
1897   STACK_INC;\
1898 } while(0)
1899 
1900 #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\
1901   STACK_ENSURE(1);\
1902   stk->type = STK_SAVE_VAL;\
1903   stk->zid  = (sid);\
1904   stk->u.val.type = (stype);\
1905   stk->u.val.v    = (UChar* )(sval);\
1906   STACK_INC;\
1907 } while(0)
1908 
1909 #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\
1910   STACK_ENSURE(1);\
1911   stk->type = STK_SAVE_VAL;\
1912   stk->zid  = (sid);\
1913   stk->u.val.type = (stype);\
1914   stk->u.val.v    = (UChar* )(sval);\
1915   STACK_INC;\
1916 } while(0)
1917 
1918 #define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\
1919   StackType *k = stk;\
1920   while (k > stk_base) {\
1921     k--;\
1922     STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \
1923     if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\
1924       (sval) = k->u.val.v;\
1925       break;\
1926     }\
1927   }\
1928 } while (0)
1929 
1930 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval, clear) do {\
1931   int level = 0;\
1932   StackType *k = stk;\
1933   while (k > stk_base) {\
1934     k--;\
1935     STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1936     if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1937         && k->zid == (sid)) {\
1938       if (level == 0) {\
1939         (sval) = k->u.val.v;\
1940         if (clear != 0) k->type = STK_VOID;\
1941         break;\
1942       }\
1943     }\
1944     else if (k->type == STK_CALL_FRAME)\
1945       level--;\
1946     else if (k->type == STK_RETURN)\
1947       level++;\
1948   }\
1949 } while (0)
1950 
1951 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \
1952   int level = 0;\
1953   StackType *k = stk;\
1954   while (k > stk_base) {\
1955     k--;\
1956     STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1957     if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1958         && k->zid == (sid)) {\
1959       if (level == 0) {\
1960         (sval) = k->u.val.v;\
1961         break;\
1962       }\
1963     }\
1964     else if (k->type == STK_CALL_FRAME)\
1965       level--;\
1966     else if (k->type == STK_RETURN)\
1967       level++;\
1968   }\
1969 } while (0)
1970 
1971 #define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
1972   STACK_ENSURE(1);\
1973   stk->type = STK_CALLOUT;\
1974   stk->zid  = ONIG_NON_NAME_ID;\
1975   stk->u.callout.num = (anum);\
1976   stk->u.callout.func = (func);\
1977   STACK_INC;\
1978 } while(0)
1979 
1980 #define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\
1981   STACK_ENSURE(1);\
1982   stk->type = STK_CALLOUT;\
1983   stk->zid  = (aid);\
1984   stk->u.callout.num = (anum);\
1985   stk->u.callout.func = (func);\
1986   STACK_INC;\
1987 } while(0)
1988 
1989 #ifdef ONIG_DEBUG
1990 #define STACK_BASE_CHECK(p, at) \
1991   if ((p) < stk_base) {\
1992     fprintf(DBGFP, "at %s\n", at);\
1993     MATCH_AT_ERROR_RETURN(ONIGERR_STACK_BUG);\
1994   }
1995 #else
1996 #define STACK_BASE_CHECK(p, at)
1997 #endif
1998 
1999 #define STACK_POP_ONE do {\
2000   stk--;\
2001   STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
2002 } while(0)
2003 
2004 
2005 #ifdef USE_CALLOUT
2006 #define POP_CALLOUT_CASE \
2007   else if (stk->type == STK_CALLOUT) {\
2008     RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\
2009   }
2010 #else
2011 #define POP_CALLOUT_CASE
2012 #endif
2013 
2014 #define STACK_POP  do {\
2015   switch (pop_level) {\
2016   case STACK_POP_LEVEL_FREE:\
2017     while (1) {\
2018       stk--;\
2019       STACK_BASE_CHECK(stk, "STACK_POP"); \
2020       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
2021     }\
2022     break;\
2023   case STACK_POP_LEVEL_MEM_START:\
2024     while (1) {\
2025       stk--;\
2026       STACK_BASE_CHECK(stk, "STACK_POP 2"); \
2027       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
2028       else if (stk->type == STK_MEM_START) {\
2029         mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2030         mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
2031       }\
2032     }\
2033     break;\
2034   default:\
2035     while (1) {\
2036       stk--;\
2037       STACK_BASE_CHECK(stk, "STACK_POP 3"); \
2038       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
2039       else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\
2040         if (stk->type == STK_MEM_START) {\
2041           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2042           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
2043         }\
2044         else if (stk->type == STK_MEM_END) {\
2045           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2046           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
2047         }\
2048         POP_REPEAT_INC \
2049         POP_EMPTY_CHECK_START \
2050         POP_CALL \
2051         POP_CALLOUT_CASE\
2052       }\
2053     }\
2054     break;\
2055   }\
2056 } while(0)
2057 
2058 #define STACK_POP_TO_MARK(sid) do {\
2059   while (1) {\
2060     stk--;\
2061     STACK_BASE_CHECK(stk, "STACK_POP_TO_MARK");\
2062     if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
2063       if (stk->type == STK_MARK) {\
2064         if (stk->zid == (sid)) break;\
2065       }\
2066       else {\
2067         if (stk->type == STK_MEM_START) {\
2068           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2069           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
2070         }\
2071         else if (stk->type == STK_MEM_END) {\
2072           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2073           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
2074         }\
2075         POP_REPEAT_INC \
2076         POP_EMPTY_CHECK_START \
2077         POP_CALL \
2078         /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
2079       }\
2080     }\
2081   }\
2082 } while(0)
2083 
2084 
2085 #define POP_TIL_BODY(aname, til_type) do {\
2086   while (1) {\
2087     stk--;\
2088     STACK_BASE_CHECK(stk, (aname));\
2089     if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
2090       if (stk->type == (til_type)) break;\
2091       else {\
2092         if (stk->type == STK_MEM_START) {\
2093           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2094           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
2095         }\
2096         else if (stk->type == STK_MEM_END) {\
2097           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2098           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
2099         }\
2100         POP_REPEAT_INC \
2101         POP_EMPTY_CHECK_START \
2102         POP_CALL \
2103         /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
2104       }\
2105     }\
2106   }\
2107 } while(0)
2108 
2109 
2110 #define STACK_TO_VOID_TO_MARK(k,sid) do {\
2111   k = stk;\
2112   while (1) {\
2113     k--;\
2114     STACK_BASE_CHECK(k, "STACK_TO_VOID_TO_MARK");\
2115     if (IS_TO_VOID_TARGET(k)) {\
2116       if (k->type == STK_MARK) {\
2117         if (k->zid == (sid)) {\
2118           k->type = STK_VOID;\
2119           break;\
2120         } /* don't void different id mark */ \
2121       }\
2122       else\
2123         k->type = STK_VOID;\
2124     }\
2125   }\
2126 } while(0)
2127 
2128 #define EMPTY_CHECK_START_SEARCH(sid, k) do {\
2129   k = stk;\
2130   while (1) {\
2131     k--;\
2132     STACK_BASE_CHECK(k, "EMPTY_CHECK_START_SEARCH"); \
2133     if (k->type == STK_EMPTY_CHECK_START) {\
2134       if (k->zid == (sid)) break;\
2135     }\
2136   }\
2137 } while(0)
2138 
2139 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2140 
2141 #define GET_EMPTY_CHECK_START(sid, k) do {\
2142   if (reg->num_call == 0) {\
2143     k = STACK_AT(empty_check_stk[sid]);\
2144   }\
2145   else {\
2146     EMPTY_CHECK_START_SEARCH(sid, k);\
2147   }\
2148 } while(0)
2149 #else
2150 
2151 #define GET_EMPTY_CHECK_START(sid, k)  EMPTY_CHECK_START_SEARCH(sid, k)
2152 
2153 #endif
2154 
2155 
2156 #define STACK_EMPTY_CHECK(isnull, sid, s) do {\
2157   StackType* k;\
2158   GET_EMPTY_CHECK_START(sid, k);\
2159   (isnull) = (k->u.empty_check.pstr == (s));\
2160 } while(0)
2161 
2162 #define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
2163   if (k->u.mem.prev_end.i == INVALID_STACK_INDEX) {\
2164     (addr) = 0;\
2165   }\
2166   else {\
2167     if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\
2168       (addr) = STACK_AT(k->u.mem.prev_end.i)->u.mem.pstr;\
2169     else\
2170       (addr) = k->u.mem.prev_end.s;\
2171   }\
2172 } while (0)
2173 
2174 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
2175 #define STACK_EMPTY_CHECK_MEM(isnull, sid, s, reg) do {\
2176   StackType* k;\
2177   GET_EMPTY_CHECK_START(sid, k);\
2178   if (k->u.empty_check.pstr != (s)) {\
2179     (isnull) = 0;\
2180   }\
2181   else {\
2182     UChar* endp;\
2183     (isnull) = 1;\
2184     while (k < stk) {\
2185       if (k->type == STK_MEM_START &&\
2186         MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid)) {\
2187         STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
2188         if (endp == 0) {\
2189           (isnull) = 0; break;\
2190         }\
2191         else if (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != endp) {\
2192           (isnull) = 0; break;\
2193         }\
2194         else if (endp != s) {\
2195           (isnull) = -1; /* empty, but position changed */ \
2196         }\
2197       }\
2198       k++;\
2199     }\
2200   }\
2201 } while(0)
2202 
2203 #define STACK_EMPTY_CHECK_MEM_REC(isnull,sid,s,reg) do {\
2204   int level = 0;\
2205   StackType* k = stk;\
2206   while (1) {\
2207     k--;\
2208     STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM_REC");\
2209     if (k->type == STK_EMPTY_CHECK_START) {\
2210       if (k->zid == (sid)) {\
2211         if (level == 0) {\
2212           if (k->u.empty_check.pstr != (s)) {\
2213             (isnull) = 0;\
2214             break;\
2215           }\
2216           else {\
2217             UChar* endp;\
2218             (isnull) = 1;\
2219             while (k < stk) {\
2220               if (k->type == STK_MEM_START) {\
2221                 if (level == 0 && \
2222                   MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid) !=0) {\
2223                   STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
2224                   if (endp == 0) {\
2225                     (isnull) = 0; break;\
2226                   }\
2227                   else if (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != endp) { \
2228                     (isnull) = 0; break;\
2229                   }\
2230                   else if (endp != s) {\
2231                     (isnull) = -1; /* empty, but position changed */\
2232                   }\
2233                 }\
2234               }\
2235               else if (k->type == STK_EMPTY_CHECK_START) {\
2236                 if (k->zid == (sid)) level++;\
2237               }\
2238               else if (k->type == STK_EMPTY_CHECK_END) {\
2239                 if (k->zid == (sid)) level--;\
2240               }\
2241               k++;\
2242             }\
2243             break;\
2244           }\
2245         }\
2246         else {\
2247           level--;\
2248         }\
2249       }\
2250     }\
2251     else if (k->type == STK_EMPTY_CHECK_END) {\
2252       if (k->zid == (sid)) level++;\
2253     }\
2254   }\
2255 } while(0)
2256 #else
2257 #define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\
2258   int level = 0;\
2259   StackType* k = stk;\
2260   while (1) {\
2261     k--;\
2262     STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \
2263     if (k->type == STK_EMPTY_CHECK_START) {\
2264       if (k->u.empty_check.num == (id)) {\
2265         if (level == 0) {\
2266           (isnull) = (k->u.empty_check.pstr == (s));\
2267           break;\
2268         }\
2269       }\
2270       level--;\
2271     }\
2272     else if (k->type == STK_EMPTY_CHECK_END) {\
2273       level++;\
2274     }\
2275   }\
2276 } while(0)
2277 #endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
2278 
2279 #define STACK_GET_REPEAT_COUNT_SEARCH(sid, c) do {\
2280   StackType* k = stk;\
2281   while (1) {\
2282     (k)--;\
2283     STACK_BASE_CHECK(k, "STACK_GET_REPEAT_COUNT_SEARCH");\
2284     if ((k)->type == STK_REPEAT_INC) {\
2285       if ((k)->zid == (sid)) {\
2286         (c) = (k)->u.repeat_inc.count;\
2287         break;\
2288       }\
2289     }\
2290     else if ((k)->type == STK_RETURN) {\
2291       int level = -1;\
2292       while (1) {\
2293         (k)--;\
2294         if ((k)->type == STK_CALL_FRAME) {\
2295           level++;\
2296           if (level == 0) break;\
2297         }\
2298         else if ((k)->type == STK_RETURN) level--;\
2299       }\
2300     }\
2301   }\
2302 } while(0)
2303 
2304 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2305 
2306 #define STACK_GET_REPEAT_COUNT(sid, c) do {\
2307   if (reg->num_call == 0) {\
2308     (c) = (STACK_AT(repeat_stk[sid]))->u.repeat_inc.count;\
2309   }\
2310   else {\
2311     STACK_GET_REPEAT_COUNT_SEARCH(sid, c);\
2312   }\
2313 } while(0)
2314 #else
2315 #define STACK_GET_REPEAT_COUNT(sid, c) STACK_GET_REPEAT_COUNT_SEARCH(sid, c)
2316 #endif
2317 
2318 #ifdef USE_CALL
2319 #define STACK_RETURN(addr)  do {\
2320   int level = 0;\
2321   StackType* k = stk;\
2322   while (1) {\
2323     k--;\
2324     STACK_BASE_CHECK(k, "STACK_RETURN"); \
2325     if (k->type == STK_CALL_FRAME) {\
2326       if (level == 0) {\
2327         (addr) = k->u.call_frame.ret_addr;\
2328         break;\
2329       }\
2330       else level--;\
2331     }\
2332     else if (k->type == STK_RETURN)\
2333       level++;\
2334   }\
2335 } while(0)
2336 
2337 #define GET_STACK_RETURN_CALL(k,addr) do {\
2338   int level = 0;\
2339   k = stk;\
2340   while (1) {\
2341     k--;\
2342     STACK_BASE_CHECK(k, "GET_STACK_RETURN_CALL");\
2343     if (k->type == STK_CALL_FRAME) {\
2344       if (level == 0) {\
2345         (addr) = k->u.call_frame.ret_addr;\
2346         break;\
2347       }\
2348       else level--;\
2349     }\
2350     else if (k->type == STK_RETURN)\
2351       level++;\
2352   }\
2353 } while(0)
2354 #endif
2355 
2356 
2357 #define STRING_CMP(s1,s2,len) do {\
2358   while (len-- > 0) {\
2359     if (*s1++ != *s2++) goto fail;\
2360   }\
2361 } while(0)
2362 
2363 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
2364   if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2365     goto fail; \
2366 } while(0)
2367 
string_cmp_ic(OnigEncoding enc,int case_fold_flag,UChar * s1,UChar ** ps2,int mblen)2368 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
2369                          UChar* s1, UChar** ps2, int mblen)
2370 {
2371   UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2372   UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2373   UChar *p1, *p2, *end1, *s2, *end2;
2374   int len1, len2;
2375 
2376   s2   = *ps2;
2377   end1 = s1 + mblen;
2378   end2 = s2 + mblen;
2379   while (s1 < end1) {
2380     len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
2381     len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
2382     if (len1 != len2) return 0;
2383     p1 = buf1;
2384     p2 = buf2;
2385     while (len1-- > 0) {
2386       if (*p1 != *p2) return 0;
2387       p1++;
2388       p2++;
2389     }
2390     if (s2 >= end2) {
2391       if (s1 < end1) return 0;
2392       else           break;
2393     }
2394   }
2395 
2396   *ps2 = s2;
2397   return 1;
2398 }
2399 
2400 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
2401   is_fail = 0;\
2402   while (len-- > 0) {\
2403     if (*s1++ != *s2++) {\
2404       is_fail = 1; break;\
2405     }\
2406   }\
2407 } while(0)
2408 
2409 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
2410   if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2411     is_fail = 1; \
2412   else \
2413     is_fail = 0; \
2414 } while(0)
2415 
2416 
2417 #define IS_EMPTY_STR           (str == end)
2418 #define ON_STR_BEGIN(s)        ((s) == str)
2419 #define ON_STR_END(s)          ((s) == end)
2420 #define DATA_ENSURE_CHECK1     (s < right_range)
2421 #define DATA_ENSURE_CHECK(n)   (s + (n) <= right_range)
2422 #define DATA_ENSURE(n)         if (right_range - s < (n)) goto fail
2423 
2424 #define INIT_RIGHT_RANGE    right_range = (UChar* )in_right_range
2425 
2426 #ifdef USE_CAPTURE_HISTORY
2427 static int
make_capture_history_tree(OnigCaptureTreeNode * node,StackType ** kp,StackType * stk_top,UChar * str,regex_t * reg)2428 make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
2429                           StackType* stk_top, UChar* str, regex_t* reg)
2430 {
2431   int n, r;
2432   OnigCaptureTreeNode* child;
2433   StackType* k = *kp;
2434 
2435   while (k < stk_top) {
2436     if (k->type == STK_MEM_START) {
2437       n = k->zid;
2438       if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
2439           MEM_STATUS_AT(reg->capture_history, n) != 0) {
2440         child = history_node_new();
2441         CHECK_NULL_RETURN_MEMERR(child);
2442         child->group = n;
2443         child->beg = (int )(k->u.mem.pstr - str);
2444         r = history_tree_add_child(node, child);
2445         if (r != 0) return r;
2446         *kp = (k + 1);
2447         r = make_capture_history_tree(child, kp, stk_top, str, reg);
2448         if (r != 0) return r;
2449 
2450         k = *kp;
2451         child->end = (int )(k->u.mem.pstr - str);
2452       }
2453     }
2454     else if (k->type == STK_MEM_END) {
2455       if (k->zid == node->group) {
2456         node->end = (int )(k->u.mem.pstr - str);
2457         *kp = k;
2458         return 0;
2459       }
2460     }
2461     k++;
2462   }
2463 
2464   return 1; /* 1: root node ending. */
2465 }
2466 #endif
2467 
2468 #ifdef USE_BACKREF_WITH_LEVEL
mem_is_in_memp(int mem,int num,MemNumType * memp)2469 static int mem_is_in_memp(int mem, int num, MemNumType* memp)
2470 {
2471   int i;
2472 
2473   for (i = 0; i < num; i++) {
2474     if (mem == (int )memp[i]) return 1;
2475   }
2476   return 0;
2477 }
2478 
2479 static int
backref_match_at_nested_level(regex_t * reg,StackType * top,StackType * stk_base,int ignore_case,int case_fold_flag,int nest,int mem_num,MemNumType * memp,UChar ** s,const UChar * send)2480 backref_match_at_nested_level(regex_t* reg,
2481                               StackType* top, StackType* stk_base,
2482                               int ignore_case, int case_fold_flag,
2483                               int nest, int mem_num, MemNumType* memp,
2484                               UChar** s, const UChar* send)
2485 {
2486   UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
2487   int level;
2488   StackType* k;
2489 
2490   level = 0;
2491   k = top;
2492   k--;
2493   while (k >= stk_base) {
2494     if (k->type == STK_CALL_FRAME) {
2495       level--;
2496     }
2497     else if (k->type == STK_RETURN) {
2498       level++;
2499     }
2500     else if (level == nest) {
2501       if (k->type == STK_MEM_START) {
2502         if (mem_is_in_memp(k->zid, mem_num, memp)) {
2503           pstart = k->u.mem.pstr;
2504           if (IS_NOT_NULL(pend)) {
2505             if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
2506             p  = pstart;
2507             ss = *s;
2508 
2509             if (ignore_case != 0) {
2510               if (string_cmp_ic(reg->enc, case_fold_flag,
2511                                 pstart, &ss, (int )(pend - pstart)) == 0)
2512                 return 0; /* or goto next_mem; */
2513             }
2514             else {
2515               while (p < pend) {
2516                 if (*p++ != *ss++) return 0; /* or goto next_mem; */
2517               }
2518             }
2519 
2520             *s = ss;
2521             return 1;
2522           }
2523         }
2524       }
2525       else if (k->type == STK_MEM_END) {
2526         if (mem_is_in_memp(k->zid, mem_num, memp)) {
2527           pend = k->u.mem.pstr;
2528         }
2529       }
2530     }
2531     k--;
2532   }
2533 
2534   return 0;
2535 }
2536 
2537 static int
backref_check_at_nested_level(regex_t * reg,StackType * top,StackType * stk_base,int nest,int mem_num,MemNumType * memp)2538 backref_check_at_nested_level(regex_t* reg,
2539                               StackType* top, StackType* stk_base,
2540                               int nest, int mem_num, MemNumType* memp)
2541 {
2542   int level;
2543   StackType* k;
2544 
2545   level = 0;
2546   k = top;
2547   k--;
2548   while (k >= stk_base) {
2549     if (k->type == STK_CALL_FRAME) {
2550       level--;
2551     }
2552     else if (k->type == STK_RETURN) {
2553       level++;
2554     }
2555     else if (level == nest) {
2556       if (k->type == STK_MEM_END) {
2557         if (mem_is_in_memp(k->zid, mem_num, memp)) {
2558           return 1;
2559         }
2560       }
2561     }
2562     k--;
2563   }
2564 
2565   return 0;
2566 }
2567 #endif /* USE_BACKREF_WITH_LEVEL */
2568 
2569 static int SubexpCallMaxNestLevel = DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL;
2570 
2571 #ifdef ONIG_DEBUG_STATISTICS
2572 
2573 #ifdef USE_TIMEOFDAY
2574 
2575 static struct timeval ts, te;
2576 #define GETTIME(t)        gettimeofday(&(t), (struct timezone* )0)
2577 #define TIMEDIFF(te,ts)   (((te).tv_usec - (ts).tv_usec) + \
2578                            (((te).tv_sec - (ts).tv_sec)*1000000))
2579 #else
2580 
2581 static struct tms ts, te;
2582 #define GETTIME(t)         times(&(t))
2583 #define TIMEDIFF(te,ts)   ((te).tms_utime - (ts).tms_utime)
2584 
2585 #endif /* USE_TIMEOFDAY */
2586 
2587 static int OpCounter[256];
2588 static int OpPrevCounter[256];
2589 static unsigned long OpTime[256];
2590 static int OpCurr = OP_FINISH;
2591 static int OpPrevTarget = OP_FAIL;
2592 static int MaxStackDepth = 0;
2593 
2594 #define SOP_IN(opcode) do {\
2595   if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2596   OpCurr = opcode;\
2597   OpCounter[opcode]++;\
2598   GETTIME(ts);\
2599 } while(0)
2600 
2601 #define SOP_OUT do {\
2602   GETTIME(te);\
2603   OpTime[OpCurr] += TIMEDIFF(te, ts);\
2604 } while(0)
2605 
2606 extern void
onig_statistics_init(void)2607 onig_statistics_init(void)
2608 {
2609   int i;
2610   for (i = 0; i < 256; i++) {
2611     OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2612   }
2613   MaxStackDepth = 0;
2614 }
2615 
2616 extern int
onig_print_statistics(FILE * f)2617 onig_print_statistics(FILE* f)
2618 {
2619   int r;
2620   int i;
2621 
2622   r = fprintf(f, "   count      prev        time\n");
2623   if (r < 0) return -1;
2624 
2625   for (i = 0; OpInfo[i].opcode >= 0; i++) {
2626     r = fprintf(f, "%8d: %8d: %10ld: %s\n",
2627                 OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name);
2628     if (r < 0) return -1;
2629   }
2630   r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2631   if (r < 0) return -1;
2632 
2633   return 0;
2634 }
2635 
2636 #define STACK_INC do {\
2637   stk++;\
2638   if (stk - stk_base > MaxStackDepth) \
2639     MaxStackDepth = stk - stk_base;\
2640 } while(0)
2641 
2642 #else
2643 #define STACK_INC     stk++
2644 
2645 #define SOP_IN(opcode)
2646 #define SOP_OUT
2647 #endif
2648 
2649 
2650 /* matching region of POSIX API */
2651 typedef int regoff_t;
2652 
2653 typedef struct {
2654   regoff_t  rm_so;
2655   regoff_t  rm_eo;
2656 } posix_regmatch_t;
2657 
2658 
2659 
2660 #ifdef USE_THREADED_CODE
2661 
2662 #define BYTECODE_INTERPRETER_START      GOTO_OP;
2663 #define BYTECODE_INTERPRETER_END
2664 #define CASE_OP(x)   L_##x: SOP_IN(OP_##x); MATCH_DEBUG_OUT(0)
2665 #define DEFAULT_OP   /* L_DEFAULT: */
2666 #define NEXT_OP      JUMP_OP
2667 #define JUMP_OP      GOTO_OP
2668 #ifdef USE_DIRECT_THREADED_CODE
2669 #define GOTO_OP      goto *(p->opaddr)
2670 #else
2671 #define GOTO_OP      goto *opcode_to_label[p->opcode]
2672 #endif
2673 #define BREAK_OP     /* Nothing */
2674 
2675 #else
2676 
2677 #define BYTECODE_INTERPRETER_START \
2678   while (1) {\
2679   MATCH_DEBUG_OUT(0)\
2680   switch (p->opcode) {
2681 #define BYTECODE_INTERPRETER_END  } }
2682 #define CASE_OP(x)   case OP_##x: SOP_IN(OP_##x);
2683 #define DEFAULT_OP   default:
2684 #define NEXT_OP      break
2685 #define JUMP_OP      GOTO_OP
2686 #define GOTO_OP      continue; break
2687 #define BREAK_OP     break
2688 
2689 #endif /* USE_THREADED_CODE */
2690 
2691 #define INC_OP       p++
2692 #define JUMP_OUT_WITH_SPREV_SET   SOP_OUT; NEXT_OP
2693 #define JUMP_OUT                  SOP_OUT; JUMP_OP
2694 #define BREAK_OUT                 SOP_OUT; BREAK_OP
2695 #define CHECK_INTERRUPT_JUMP_OUT  SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP
2696 
2697 
2698 #ifdef ONIG_DEBUG_MATCH
2699 #define MATCH_DEBUG_OUT(offset) do {\
2700       Operation *xp;\
2701       UChar *q, *bp, buf[50];\
2702       int len, spos;\
2703       spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\
2704       xp = p - (offset);\
2705       fprintf(DBGFP, "%7u: %7ld: %4d> \"",\
2706               counter, GET_STACK_INDEX(stk), spos);\
2707       counter++;\
2708       bp = buf;\
2709       if (IS_NOT_NULL(s)) {\
2710         for (i = 0, q = s; i < 7 && q < end; i++) {\
2711           len = enclen(encode, q);\
2712           while (len-- > 0) *bp++ = *q++;\
2713         }\
2714         if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }\
2715         else         { xmemcpy(bp, "\"",    1); bp += 1; }\
2716       }\
2717       else {\
2718         xmemcpy(bp, "\"", 1); bp += 1;\
2719       }\
2720       *bp = 0;\
2721       fputs((char* )buf, DBGFP);\
2722       for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', DBGFP);\
2723       if (xp == FinishCode)\
2724         fprintf(DBGFP, "----: finish");\
2725       else {\
2726         int index;\
2727         enum OpCode zopcode;\
2728         Operation* addr;\
2729         index = (int )(xp - reg->ops);\
2730         fprintf(DBGFP, "%4d: ", index);\
2731         print_compiled_byte_code(DBGFP, reg, index, reg->ops, encode); \
2732         zopcode = GET_OPCODE(reg, index);\
2733         if (zopcode == OP_RETURN) {\
2734           GET_STACK_RETURN_CALL(stkp, addr);\
2735           fprintf(DBGFP, " f:%ld -> %d", \
2736             GET_STACK_INDEX(stkp), (int )(addr - reg->ops));\
2737         }\
2738       }\
2739       fprintf(DBGFP, "\n");\
2740   } while(0);
2741 #else
2742 #define MATCH_DEBUG_OUT(offset)
2743 #endif
2744 
2745 #define MATCH_AT_ERROR_RETURN(err_code) do {\
2746   best_len = err_code; goto match_at_end;\
2747 } while(0)
2748 
2749 #define MATCH_COUNTER_OUT(title) do {\
2750   int i;\
2751   fprintf(DBGFP, "%s (%ld): retry limit: %8lu, subexp_call: %8lu\n", (title), (sstart - str), retry_in_match_counter, msa->subexp_call_in_search_counter); \
2752   fprintf(DBGFP, "      ");\
2753   for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) {\
2754     fprintf(DBGFP, " %6lu", subexp_call_counters[i]);\
2755   }\
2756   fprintf(DBGFP, "\n");\
2757   fflush(DBGFP);\
2758 } while (0)
2759 
2760 
2761 /* match data(str - end) from position (sstart). */
2762 static int
match_at(regex_t * reg,const UChar * str,const UChar * end,const UChar * in_right_range,const UChar * sstart,MatchArg * msa)2763 match_at(regex_t* reg, const UChar* str, const UChar* end,
2764          const UChar* in_right_range, const UChar* sstart,
2765          MatchArg* msa)
2766 {
2767 
2768 #if defined(USE_DIRECT_THREADED_CODE)
2769   static Operation FinishCode[] = { { .opaddr=&&L_FINISH } };
2770 #else
2771   static Operation FinishCode[] = { { OP_FINISH } };
2772 #endif
2773 
2774 #ifdef USE_THREADED_CODE
2775   static const void *opcode_to_label[] = {
2776   &&L_FINISH,
2777   &&L_END,
2778   &&L_STR_1,
2779   &&L_STR_2,
2780   &&L_STR_3,
2781   &&L_STR_4,
2782   &&L_STR_5,
2783   &&L_STR_N,
2784   &&L_STR_MB2N1,
2785   &&L_STR_MB2N2,
2786   &&L_STR_MB2N3,
2787   &&L_STR_MB2N,
2788   &&L_STR_MB3N,
2789   &&L_STR_MBN,
2790   &&L_CCLASS,
2791   &&L_CCLASS_MB,
2792   &&L_CCLASS_MIX,
2793   &&L_CCLASS_NOT,
2794   &&L_CCLASS_MB_NOT,
2795   &&L_CCLASS_MIX_NOT,
2796   &&L_ANYCHAR,
2797   &&L_ANYCHAR_ML,
2798   &&L_ANYCHAR_STAR,
2799   &&L_ANYCHAR_ML_STAR,
2800   &&L_ANYCHAR_STAR_PEEK_NEXT,
2801   &&L_ANYCHAR_ML_STAR_PEEK_NEXT,
2802   &&L_WORD,
2803   &&L_WORD_ASCII,
2804   &&L_NO_WORD,
2805   &&L_NO_WORD_ASCII,
2806   &&L_WORD_BOUNDARY,
2807   &&L_NO_WORD_BOUNDARY,
2808   &&L_WORD_BEGIN,
2809   &&L_WORD_END,
2810   &&L_TEXT_SEGMENT_BOUNDARY,
2811   &&L_BEGIN_BUF,
2812   &&L_END_BUF,
2813   &&L_BEGIN_LINE,
2814   &&L_END_LINE,
2815   &&L_SEMI_END_BUF,
2816   &&L_CHECK_POSITION,
2817   &&L_BACKREF1,
2818   &&L_BACKREF2,
2819   &&L_BACKREF_N,
2820   &&L_BACKREF_N_IC,
2821   &&L_BACKREF_MULTI,
2822   &&L_BACKREF_MULTI_IC,
2823 #ifdef USE_BACKREF_WITH_LEVEL
2824   &&L_BACKREF_WITH_LEVEL,
2825   &&L_BACKREF_WITH_LEVEL_IC,
2826 #endif
2827   &&L_BACKREF_CHECK,
2828 #ifdef USE_BACKREF_WITH_LEVEL
2829   &&L_BACKREF_CHECK_WITH_LEVEL,
2830 #endif
2831   &&L_MEM_START,
2832   &&L_MEM_START_PUSH,
2833   &&L_MEM_END_PUSH,
2834 #ifdef USE_CALL
2835   &&L_MEM_END_PUSH_REC,
2836 #endif
2837   &&L_MEM_END,
2838 #ifdef USE_CALL
2839   &&L_MEM_END_REC,
2840 #endif
2841   &&L_FAIL,
2842   &&L_JUMP,
2843   &&L_PUSH,
2844   &&L_PUSH_SUPER,
2845   &&L_POP,
2846   &&L_POP_TO_MARK,
2847 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2848   &&L_PUSH_OR_JUMP_EXACT1,
2849 #endif
2850   &&L_PUSH_IF_PEEK_NEXT,
2851   &&L_REPEAT,
2852   &&L_REPEAT_NG,
2853   &&L_REPEAT_INC,
2854   &&L_REPEAT_INC_NG,
2855   &&L_EMPTY_CHECK_START,
2856   &&L_EMPTY_CHECK_END,
2857   &&L_EMPTY_CHECK_END_MEMST,
2858 #ifdef USE_CALL
2859   &&L_EMPTY_CHECK_END_MEMST_PUSH,
2860 #endif
2861   &&L_MOVE,
2862   &&L_STEP_BACK_START,
2863   &&L_STEP_BACK_NEXT,
2864   &&L_CUT_TO_MARK,
2865   &&L_MARK,
2866   &&L_SAVE_VAL,
2867   &&L_UPDATE_VAR,
2868 #ifdef USE_CALL
2869   &&L_CALL,
2870   &&L_RETURN,
2871 #endif
2872 #ifdef USE_CALLOUT
2873   &&L_CALLOUT_CONTENTS,
2874   &&L_CALLOUT_NAME,
2875 #endif
2876   };
2877 #endif
2878 
2879   int i, n, num_mem, best_len, pop_level;
2880   LengthType tlen, tlen2;
2881   MemNumType mem;
2882   RelAddrType addr;
2883   UChar *s, *ps;
2884   UChar *right_range;
2885   int is_alloca;
2886   char *alloc_base;
2887   StackType *stk_base, *stk, *stk_end;
2888   StackType *stkp; /* used as any purpose. */
2889   StkPtrType *mem_start_stk, *mem_end_stk;
2890   UChar* keep;
2891 
2892 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2893   StackIndex *repeat_stk;
2894   StackIndex *empty_check_stk;
2895 #endif
2896 #ifdef USE_RETRY_LIMIT
2897   unsigned long retry_limit_in_match;
2898   unsigned long retry_in_match_counter;
2899 #endif
2900 #ifdef USE_CALLOUT
2901   int of;
2902 #endif
2903 #ifdef ONIG_DEBUG_MATCH_COUNTER
2904 #define MAX_SUBEXP_CALL_COUNTERS  9
2905   unsigned long subexp_call_counters[MAX_SUBEXP_CALL_COUNTERS];
2906 #endif
2907 
2908   Operation* p = reg->ops;
2909   OnigOptionType option = reg->options;
2910   OnigEncoding encode = reg->enc;
2911   OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2912 
2913 #ifdef USE_CALL
2914   unsigned long subexp_call_nest_counter = 0;
2915 #endif
2916 
2917 #ifdef ONIG_DEBUG_MATCH
2918   static unsigned int counter = 1;
2919 #endif
2920 
2921 #ifdef ONIG_DEBUG_MATCH_COUNTER
2922   for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) {
2923     subexp_call_counters[i] = 0;
2924   }
2925 #endif
2926 
2927 #ifdef USE_DIRECT_THREADED_CODE
2928   if (IS_NULL(msa)) {
2929     for (i = 0; i < reg->ops_used; i++) {
2930        const void* addr;
2931        addr = opcode_to_label[reg->ocs[i]];
2932        p->opaddr = addr;
2933        p++;
2934     }
2935     return ONIG_NORMAL;
2936   }
2937 #endif
2938 
2939 #ifdef USE_CALLOUT
2940   msa->mp->match_at_call_counter++;
2941 #endif
2942 
2943 #ifdef USE_RETRY_LIMIT
2944   retry_limit_in_match = msa->retry_limit_in_match;
2945   if (msa->retry_limit_in_search != 0) {
2946     unsigned long rem = msa->retry_limit_in_search
2947                       - msa->retry_limit_in_search_counter;
2948     if (rem < retry_limit_in_match)
2949       retry_limit_in_match = rem;
2950   }
2951 #endif
2952 
2953   pop_level = reg->stack_pop_level;
2954   num_mem = reg->num_mem;
2955   STACK_INIT(INIT_MATCH_STACK_SIZE);
2956   UPDATE_FOR_STACK_REALLOC;
2957   for (i = 1; i <= num_mem; i++) {
2958     mem_start_stk[i].i = mem_end_stk[i].i = INVALID_STACK_INDEX;
2959   }
2960 
2961 #ifdef ONIG_DEBUG_MATCH
2962   fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p\n", str, end, sstart);
2963   fprintf(DBGFP, "size: %d, start offset: %d\n",
2964           (int )(end - str), (int )(sstart - str));
2965 #endif
2966 
2967   best_len = ONIG_MISMATCH;
2968   keep = s = (UChar* )sstart;
2969   STACK_PUSH_BOTTOM(STK_ALT, FinishCode);  /* bottom stack */
2970   INIT_RIGHT_RANGE;
2971 
2972 #ifdef USE_RETRY_LIMIT
2973   retry_in_match_counter = 0;
2974 #endif
2975 
2976   BYTECODE_INTERPRETER_START {
2977     CASE_OP(END)
2978       n = (int )(s - sstart);
2979       if (n > best_len) {
2980         OnigRegion* region;
2981 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2982         if (OPTON_FIND_LONGEST(option)) {
2983           if (n > msa->best_len) {
2984             msa->best_len = n;
2985             msa->best_s   = (UChar* )sstart;
2986             goto set_region;
2987           }
2988           else
2989             goto end_best_len;
2990         }
2991 #endif
2992         best_len = n;
2993 
2994       set_region:
2995         region = msa->region;
2996         if (region) {
2997           if (keep > s) keep = s;
2998 
2999 #ifdef USE_POSIX_API
3000           if (OPTON_POSIX_REGION(msa->options)) {
3001             posix_regmatch_t* rmt = (posix_regmatch_t* )region;
3002 
3003             rmt[0].rm_so = (regoff_t )(keep - str);
3004             rmt[0].rm_eo = (regoff_t )(s    - str);
3005             for (i = 1; i <= num_mem; i++) {
3006               if (mem_end_stk[i].i != INVALID_STACK_INDEX) {
3007                 rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str);
3008                 rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i)   - str);
3009               }
3010               else {
3011                 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
3012               }
3013             }
3014           }
3015           else {
3016 #endif /* USE_POSIX_API */
3017             region->beg[0] = (int )(keep - str);
3018             region->end[0] = (int )(s    - str);
3019             for (i = 1; i <= num_mem; i++) {
3020               if (mem_end_stk[i].i != INVALID_STACK_INDEX) {
3021                 region->beg[i] = (int )(STACK_MEM_START(reg, i) - str);
3022                 region->end[i] = (int )(STACK_MEM_END(reg, i)   - str);
3023               }
3024               else {
3025                 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
3026               }
3027             }
3028 
3029 #ifdef USE_CAPTURE_HISTORY
3030             if (reg->capture_history != 0) {
3031               int r;
3032               OnigCaptureTreeNode* node;
3033 
3034               if (IS_NULL(region->history_root)) {
3035                 region->history_root = node = history_node_new();
3036                 CHECK_NULL_RETURN_MEMERR(node);
3037               }
3038               else {
3039                 node = region->history_root;
3040                 history_tree_clear(node);
3041               }
3042 
3043               node->group = 0;
3044               node->beg   = (int )(keep - str);
3045               node->end   = (int )(s    - str);
3046 
3047               stkp = stk_base;
3048               r = make_capture_history_tree(region->history_root, &stkp,
3049                                             stk, (UChar* )str, reg);
3050               if (r < 0) MATCH_AT_ERROR_RETURN(r);
3051             }
3052 #endif /* USE_CAPTURE_HISTORY */
3053 #ifdef USE_POSIX_API
3054           } /* else OPTON_POSIX_REGION() */
3055 #endif
3056         } /* if (region) */
3057       } /* n > best_len */
3058 
3059 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3060     end_best_len:
3061 #endif
3062       SOP_OUT;
3063 
3064       if (OPTON_FIND_CONDITION(option)) {
3065         if (OPTON_FIND_NOT_EMPTY(option) && s == sstart) {
3066           best_len = ONIG_MISMATCH;
3067           goto fail; /* for retry */
3068         }
3069         if (OPTON_FIND_LONGEST(option)) {
3070           if (s >= in_right_range && msa->best_s == sstart)
3071             best_len = msa->best_len;
3072           else
3073             goto fail; /* for retry */
3074         }
3075       }
3076 
3077       /* default behavior: return first-matching result. */
3078       goto match_at_end;
3079 
3080     CASE_OP(STR_1)
3081       DATA_ENSURE(1);
3082       ps = p->exact.s;
3083       if (*ps != *s) goto fail;
3084       s++;
3085       INC_OP;
3086       JUMP_OUT_WITH_SPREV_SET;
3087 
3088     CASE_OP(STR_2)
3089       DATA_ENSURE(2);
3090       ps = p->exact.s;
3091       if (*ps != *s) goto fail;
3092       ps++; s++;
3093       if (*ps != *s) goto fail;
3094       s++;
3095       INC_OP;
3096       JUMP_OUT;
3097 
3098     CASE_OP(STR_3)
3099       DATA_ENSURE(3);
3100       ps = p->exact.s;
3101       if (*ps != *s) goto fail;
3102       ps++; s++;
3103       if (*ps != *s) goto fail;
3104       ps++; s++;
3105       if (*ps != *s) goto fail;
3106       s++;
3107       INC_OP;
3108       JUMP_OUT;
3109 
3110     CASE_OP(STR_4)
3111       DATA_ENSURE(4);
3112       ps = p->exact.s;
3113       if (*ps != *s) goto fail;
3114       ps++; s++;
3115       if (*ps != *s) goto fail;
3116       ps++; s++;
3117       if (*ps != *s) goto fail;
3118       ps++; s++;
3119       if (*ps != *s) goto fail;
3120       s++;
3121       INC_OP;
3122       JUMP_OUT;
3123 
3124     CASE_OP(STR_5)
3125       DATA_ENSURE(5);
3126       ps = p->exact.s;
3127       if (*ps != *s) goto fail;
3128       ps++; s++;
3129       if (*ps != *s) goto fail;
3130       ps++; s++;
3131       if (*ps != *s) goto fail;
3132       ps++; s++;
3133       if (*ps != *s) goto fail;
3134       ps++; s++;
3135       if (*ps != *s) goto fail;
3136       s++;
3137       INC_OP;
3138       JUMP_OUT;
3139 
3140     CASE_OP(STR_N)
3141       tlen = p->exact_n.n;
3142       DATA_ENSURE(tlen);
3143       ps = p->exact_n.s;
3144       while (tlen-- > 0) {
3145         if (*ps++ != *s++) goto fail;
3146       }
3147       INC_OP;
3148       JUMP_OUT;
3149 
3150     CASE_OP(STR_MB2N1)
3151       DATA_ENSURE(2);
3152       ps = p->exact.s;
3153       if (*ps != *s) goto fail;
3154       ps++; s++;
3155       if (*ps != *s) goto fail;
3156       s++;
3157       INC_OP;
3158       JUMP_OUT_WITH_SPREV_SET;
3159 
3160     CASE_OP(STR_MB2N2)
3161       DATA_ENSURE(4);
3162       ps = p->exact.s;
3163       if (*ps != *s) goto fail;
3164       ps++; s++;
3165       if (*ps != *s) goto fail;
3166       ps++; s++;
3167       if (*ps != *s) goto fail;
3168       ps++; s++;
3169       if (*ps != *s) goto fail;
3170       s++;
3171       INC_OP;
3172       JUMP_OUT;
3173 
3174     CASE_OP(STR_MB2N3)
3175       DATA_ENSURE(6);
3176       ps = p->exact.s;
3177       if (*ps != *s) goto fail;
3178       ps++; s++;
3179       if (*ps != *s) goto fail;
3180       ps++; s++;
3181       if (*ps != *s) goto fail;
3182       ps++; s++;
3183       if (*ps != *s) goto fail;
3184       ps++; s++;
3185       if (*ps != *s) goto fail;
3186       ps++; s++;
3187       if (*ps != *s) goto fail;
3188       ps++; s++;
3189       INC_OP;
3190       JUMP_OUT;
3191 
3192     CASE_OP(STR_MB2N)
3193       tlen = p->exact_n.n;
3194       DATA_ENSURE(tlen * 2);
3195       ps = p->exact_n.s;
3196       while (tlen-- > 0) {
3197         if (*ps != *s) goto fail;
3198         ps++; s++;
3199         if (*ps != *s) goto fail;
3200         ps++; s++;
3201       }
3202       INC_OP;
3203       JUMP_OUT;
3204 
3205     CASE_OP(STR_MB3N)
3206       tlen = p->exact_n.n;
3207       DATA_ENSURE(tlen * 3);
3208       ps = p->exact_n.s;
3209       while (tlen-- > 0) {
3210         if (*ps != *s) goto fail;
3211         ps++; s++;
3212         if (*ps != *s) goto fail;
3213         ps++; s++;
3214         if (*ps != *s) goto fail;
3215         ps++; s++;
3216       }
3217       INC_OP;
3218       JUMP_OUT;
3219 
3220     CASE_OP(STR_MBN)
3221       tlen  = p->exact_len_n.len; /* mb byte len */
3222       tlen2 = p->exact_len_n.n;   /* number of chars */
3223       tlen2 *= tlen;
3224       DATA_ENSURE(tlen2);
3225       ps = p->exact_len_n.s;
3226       while (tlen2-- > 0) {
3227         if (*ps != *s) goto fail;
3228         ps++; s++;
3229       }
3230       INC_OP;
3231       JUMP_OUT;
3232 
3233     CASE_OP(CCLASS)
3234       DATA_ENSURE(1);
3235       if (BITSET_AT(p->cclass.bsp, *s) == 0) goto fail;
3236       if (ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
3237       s++;
3238       INC_OP;
3239       JUMP_OUT_WITH_SPREV_SET;
3240 
3241     CASE_OP(CCLASS_MB)
3242       DATA_ENSURE(1);
3243       if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
3244 
3245     cclass_mb:
3246       {
3247         OnigCodePoint code;
3248         UChar *ss;
3249         int mb_len;
3250 
3251         mb_len = enclen(encode, s);
3252         DATA_ENSURE(mb_len);
3253         ss = s;
3254         s += mb_len;
3255         code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3256         if (! onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
3257       }
3258       INC_OP;
3259       JUMP_OUT_WITH_SPREV_SET;
3260 
3261     CASE_OP(CCLASS_MIX)
3262       DATA_ENSURE(1);
3263       if (ONIGENC_IS_MBC_HEAD(encode, s)) {
3264         goto cclass_mb;
3265       }
3266       else {
3267         if (BITSET_AT(p->cclass_mix.bsp, *s) == 0)
3268           goto fail;
3269 
3270         s++;
3271       }
3272       INC_OP;
3273       JUMP_OUT_WITH_SPREV_SET;
3274 
3275     CASE_OP(CCLASS_NOT)
3276       DATA_ENSURE(1);
3277       if (BITSET_AT(p->cclass.bsp, *s) != 0) goto fail;
3278       s += enclen(encode, s);
3279       INC_OP;
3280       JUMP_OUT_WITH_SPREV_SET;
3281 
3282     CASE_OP(CCLASS_MB_NOT)
3283       DATA_ENSURE(1);
3284       if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
3285         s++;
3286         goto cc_mb_not_success;
3287       }
3288 
3289     cclass_mb_not:
3290       {
3291         OnigCodePoint code;
3292         UChar *ss;
3293         int mb_len = enclen(encode, s);
3294 
3295         if (! DATA_ENSURE_CHECK(mb_len)) {
3296           DATA_ENSURE(1);
3297           s = (UChar* )end;
3298           goto cc_mb_not_success;
3299         }
3300 
3301         ss = s;
3302         s += mb_len;
3303         code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3304         if (onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
3305       }
3306 
3307     cc_mb_not_success:
3308       INC_OP;
3309       JUMP_OUT_WITH_SPREV_SET;
3310 
3311     CASE_OP(CCLASS_MIX_NOT)
3312       DATA_ENSURE(1);
3313       if (ONIGENC_IS_MBC_HEAD(encode, s)) {
3314         goto cclass_mb_not;
3315       }
3316       else {
3317         if (BITSET_AT(p->cclass_mix.bsp, *s) != 0)
3318           goto fail;
3319 
3320         s++;
3321       }
3322       INC_OP;
3323       JUMP_OUT_WITH_SPREV_SET;
3324 
3325     CASE_OP(ANYCHAR)
3326       DATA_ENSURE(1);
3327       n = enclen(encode, s);
3328       DATA_ENSURE(n);
3329       if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3330       s += n;
3331       INC_OP;
3332       JUMP_OUT_WITH_SPREV_SET;
3333 
3334     CASE_OP(ANYCHAR_ML)
3335       DATA_ENSURE(1);
3336       n = enclen(encode, s);
3337       DATA_ENSURE(n);
3338       s += n;
3339       INC_OP;
3340       JUMP_OUT_WITH_SPREV_SET;
3341 
3342     CASE_OP(ANYCHAR_STAR)
3343       INC_OP;
3344       while (DATA_ENSURE_CHECK1) {
3345         STACK_PUSH_ALT(p, s);
3346         n = enclen(encode, s);
3347         DATA_ENSURE(n);
3348         if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
3349         s += n;
3350       }
3351       JUMP_OUT;
3352 
3353     CASE_OP(ANYCHAR_ML_STAR)
3354       INC_OP;
3355       while (DATA_ENSURE_CHECK1) {
3356         STACK_PUSH_ALT(p, s);
3357         n = enclen(encode, s);
3358         if (n > 1) {
3359           DATA_ENSURE(n);
3360           s += n;
3361         }
3362         else {
3363           s++;
3364         }
3365       }
3366       JUMP_OUT;
3367 
3368     CASE_OP(ANYCHAR_STAR_PEEK_NEXT)
3369       {
3370         UChar c;
3371 
3372         c = p->anychar_star_peek_next.c;
3373         INC_OP;
3374         while (DATA_ENSURE_CHECK1) {
3375           if (c == *s) {
3376             STACK_PUSH_ALT(p, s);
3377           }
3378           n = enclen(encode, s);
3379           DATA_ENSURE(n);
3380           if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
3381           s += n;
3382         }
3383       }
3384       JUMP_OUT;
3385 
3386     CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT)
3387       {
3388         UChar c;
3389 
3390         c = p->anychar_star_peek_next.c;
3391         INC_OP;
3392         while (DATA_ENSURE_CHECK1) {
3393           if (c == *s) {
3394             STACK_PUSH_ALT(p, s);
3395           }
3396           n = enclen(encode, s);
3397           if (n > 1) {
3398             DATA_ENSURE(n);
3399             s += n;
3400           }
3401           else {
3402             s++;
3403           }
3404         }
3405       }
3406       JUMP_OUT;
3407 
3408     CASE_OP(WORD)
3409       DATA_ENSURE(1);
3410       if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3411         goto fail;
3412 
3413       s += enclen(encode, s);
3414       INC_OP;
3415       JUMP_OUT_WITH_SPREV_SET;
3416 
3417     CASE_OP(WORD_ASCII)
3418       DATA_ENSURE(1);
3419       if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3420         goto fail;
3421 
3422       s += enclen(encode, s);
3423       INC_OP;
3424       JUMP_OUT_WITH_SPREV_SET;
3425 
3426     CASE_OP(NO_WORD)
3427       DATA_ENSURE(1);
3428       if (ONIGENC_IS_MBC_WORD(encode, s, end))
3429         goto fail;
3430 
3431       s += enclen(encode, s);
3432       INC_OP;
3433       JUMP_OUT_WITH_SPREV_SET;
3434 
3435     CASE_OP(NO_WORD_ASCII)
3436       DATA_ENSURE(1);
3437       if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3438         goto fail;
3439 
3440       s += enclen(encode, s);
3441       INC_OP;
3442       JUMP_OUT_WITH_SPREV_SET;
3443 
3444     CASE_OP(WORD_BOUNDARY)
3445       {
3446         ModeType mode;
3447 
3448         mode = p->word_boundary.mode;
3449         if (ON_STR_BEGIN(s)) {
3450           DATA_ENSURE(1);
3451           if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3452             goto fail;
3453         }
3454         else {
3455           UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3456           if (ON_STR_END(s)) {
3457             if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3458               goto fail;
3459           }
3460           else {
3461             if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3462                 == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3463               goto fail;
3464           }
3465         }
3466       }
3467       INC_OP;
3468       JUMP_OUT;
3469 
3470     CASE_OP(NO_WORD_BOUNDARY)
3471       {
3472         ModeType mode;
3473 
3474         mode = p->word_boundary.mode;
3475         if (ON_STR_BEGIN(s)) {
3476           if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3477             goto fail;
3478         }
3479         else {
3480           UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3481           if (ON_STR_END(s)) {
3482             if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3483               goto fail;
3484           }
3485           else {
3486             if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3487                 != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3488               goto fail;
3489           }
3490         }
3491       }
3492       INC_OP;
3493       JUMP_OUT;
3494 
3495 #ifdef USE_WORD_BEGIN_END
3496     CASE_OP(WORD_BEGIN)
3497       {
3498         ModeType mode;
3499 
3500         mode = p->word_boundary.mode;
3501         if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3502           UChar* sprev;
3503           if (ON_STR_BEGIN(s)) {
3504             INC_OP;
3505             JUMP_OUT;
3506           }
3507           sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3508           if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3509             INC_OP;
3510             JUMP_OUT;
3511           }
3512         }
3513       }
3514       goto fail;
3515 
3516     CASE_OP(WORD_END)
3517       {
3518         ModeType mode;
3519 
3520         mode = p->word_boundary.mode;
3521         if (! ON_STR_BEGIN(s)) {
3522           UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3523           if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3524             if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3525               INC_OP;
3526               JUMP_OUT;
3527             }
3528           }
3529         }
3530       }
3531       goto fail;
3532 #endif
3533 
3534     CASE_OP(TEXT_SEGMENT_BOUNDARY)
3535       {
3536         int is_break;
3537         UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3538 
3539         switch (p->text_segment_boundary.type) {
3540         case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
3541           is_break = onigenc_egcb_is_break_position(encode, s, sprev, str, end);
3542           break;
3543 #ifdef USE_UNICODE_WORD_BREAK
3544         case WORD_BOUNDARY:
3545           is_break = onigenc_wb_is_break_position(encode, s, sprev, str, end);
3546           break;
3547 #endif
3548         default:
3549           MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
3550           break;
3551         }
3552 
3553         if (p->text_segment_boundary.not != 0)
3554           is_break = ! is_break;
3555 
3556         if (is_break != 0) {
3557           INC_OP;
3558           JUMP_OUT;
3559         }
3560         else {
3561           goto fail;
3562         }
3563       }
3564 
3565     CASE_OP(BEGIN_BUF)
3566       if (! ON_STR_BEGIN(s)) goto fail;
3567       if (OPTON_NOTBOL(msa->options)) goto fail;
3568       if (OPTON_NOT_BEGIN_STRING(msa->options)) goto fail;
3569 
3570       INC_OP;
3571       JUMP_OUT;
3572 
3573     CASE_OP(END_BUF)
3574       if (! ON_STR_END(s)) goto fail;
3575       if (OPTON_NOTEOL(msa->options)) goto fail;
3576       if (OPTON_NOT_END_STRING(msa->options)) goto fail;
3577 
3578       INC_OP;
3579       JUMP_OUT;
3580 
3581     CASE_OP(BEGIN_LINE)
3582       if (ON_STR_BEGIN(s)) {
3583         if (OPTON_NOTBOL(msa->options)) goto fail;
3584         INC_OP;
3585         JUMP_OUT;
3586       }
3587       else if (! ON_STR_END(s)) {
3588         UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3589         if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3590           INC_OP;
3591           JUMP_OUT;
3592         }
3593       }
3594       goto fail;
3595 
3596     CASE_OP(END_LINE)
3597       if (ON_STR_END(s)) {
3598 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3599         UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3600         if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3601 #endif
3602           if (OPTON_NOTEOL(msa->options)) goto fail;
3603           INC_OP;
3604           JUMP_OUT;
3605 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3606         }
3607 #endif
3608       }
3609       else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
3610         INC_OP;
3611         JUMP_OUT;
3612       }
3613 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3614       else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3615         INC_OP;
3616         JUMP_OUT;
3617       }
3618 #endif
3619       goto fail;
3620 
3621     CASE_OP(SEMI_END_BUF)
3622       if (ON_STR_END(s)) {
3623 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3624         UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3625         if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3626 #endif
3627           if (OPTON_NOTEOL(msa->options)) goto fail;
3628           if (OPTON_NOT_END_STRING(msa->options)) goto fail;
3629           INC_OP;
3630           JUMP_OUT;
3631 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3632         }
3633 #endif
3634       }
3635       else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
3636                ON_STR_END(s + enclen(encode, s))) {
3637         if (OPTON_NOTEOL(msa->options)) goto fail;
3638         if (OPTON_NOT_END_STRING(msa->options)) goto fail;
3639         INC_OP;
3640         JUMP_OUT;
3641       }
3642 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3643       else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3644         UChar* ss = s + enclen(encode, s);
3645         ss += enclen(encode, ss);
3646         if (ON_STR_END(ss)) {
3647           if (OPTON_NOTEOL(msa->options)) goto fail;
3648           if (OPTON_NOT_END_STRING(msa->options)) goto fail;
3649           INC_OP;
3650           JUMP_OUT;
3651         }
3652       }
3653 #endif
3654       goto fail;
3655 
3656     CASE_OP(CHECK_POSITION)
3657       switch (p->check_position.type) {
3658       case CHECK_POSITION_SEARCH_START:
3659         if (s != msa->start) goto fail;
3660         if (OPTON_NOT_BEGIN_POSITION(msa->options)) goto fail;
3661         break;
3662       case CHECK_POSITION_CURRENT_RIGHT_RANGE:
3663         if (s != right_range) goto fail;
3664         break;
3665       default:
3666         break;
3667       }
3668       INC_OP;
3669       JUMP_OUT;
3670 
3671     CASE_OP(MEM_START_PUSH)
3672       mem = p->memory_start.num;
3673       STACK_PUSH_MEM_START(mem, s);
3674       INC_OP;
3675       JUMP_OUT;
3676 
3677     CASE_OP(MEM_START)
3678       mem = p->memory_start.num;
3679       mem_start_stk[mem].s = s;
3680       INC_OP;
3681       JUMP_OUT;
3682 
3683     CASE_OP(MEM_END_PUSH)
3684       mem = p->memory_end.num;
3685       STACK_PUSH_MEM_END(mem, s);
3686       INC_OP;
3687       JUMP_OUT;
3688 
3689     CASE_OP(MEM_END)
3690       mem = p->memory_end.num;
3691       mem_end_stk[mem].s = s;
3692       INC_OP;
3693       JUMP_OUT;
3694 
3695 #ifdef USE_CALL
3696     CASE_OP(MEM_END_PUSH_REC)
3697       {
3698         StackIndex si;
3699 
3700         mem = p->memory_end.num;
3701         STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3702         si = GET_STACK_INDEX(stkp);
3703         STACK_PUSH_MEM_END(mem, s);
3704         mem_start_stk[mem].i = si;
3705         INC_OP;
3706         JUMP_OUT;
3707       }
3708 
3709     CASE_OP(MEM_END_REC)
3710       mem = p->memory_end.num;
3711       mem_end_stk[mem].s = s;
3712       STACK_GET_MEM_START(mem, stkp);
3713 
3714       if (MEM_STATUS_AT(reg->push_mem_start, mem))
3715         mem_start_stk[mem].i = GET_STACK_INDEX(stkp);
3716       else
3717         mem_start_stk[mem].s = stkp->u.mem.pstr;
3718 
3719       STACK_PUSH_MEM_END_MARK(mem);
3720       INC_OP;
3721       JUMP_OUT;
3722 #endif
3723 
3724     CASE_OP(BACKREF1)
3725       mem = 1;
3726       goto backref;
3727 
3728     CASE_OP(BACKREF2)
3729       mem = 2;
3730       goto backref;
3731 
3732     CASE_OP(BACKREF_N)
3733       mem = p->backref_n.n1;
3734     backref:
3735       {
3736         UChar *pstart, *pend;
3737 
3738         if (mem_end_stk[mem].i   == INVALID_STACK_INDEX) goto fail;
3739         if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail;
3740 
3741         pstart = STACK_MEM_START(reg, mem);
3742         pend   = STACK_MEM_END(reg, mem);
3743         n = (int )(pend - pstart);
3744         if (n != 0) {
3745           DATA_ENSURE(n);
3746           STRING_CMP(s, pstart, n);
3747         }
3748       }
3749       INC_OP;
3750       JUMP_OUT;
3751 
3752     CASE_OP(BACKREF_N_IC)
3753       mem = p->backref_n.n1;
3754       {
3755         UChar *pstart, *pend;
3756 
3757         if (mem_end_stk[mem].i   == INVALID_STACK_INDEX) goto fail;
3758         if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail;
3759 
3760         pstart = STACK_MEM_START(reg, mem);
3761         pend   = STACK_MEM_END(reg, mem);
3762         n = (int )(pend - pstart);
3763         if (n != 0) {
3764           DATA_ENSURE(n);
3765           STRING_CMP_IC(case_fold_flag, pstart, &s, n);
3766         }
3767       }
3768       INC_OP;
3769       JUMP_OUT;
3770 
3771     CASE_OP(BACKREF_MULTI)
3772       {
3773         int is_fail;
3774         UChar *pstart, *pend, *swork;
3775 
3776         tlen = p->backref_general.num;
3777         for (i = 0; i < tlen; i++) {
3778           mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
3779 
3780           if (mem_end_stk[mem].i   == INVALID_STACK_INDEX) continue;
3781           if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
3782 
3783           pstart = STACK_MEM_START(reg, mem);
3784           pend   = STACK_MEM_END(reg, mem);
3785           n = (int )(pend - pstart);
3786           if (n != 0) {
3787             DATA_ENSURE(n);
3788             swork = s;
3789             STRING_CMP_VALUE(swork, pstart, n, is_fail);
3790             if (is_fail) continue;
3791             s = swork;
3792           }
3793           break; /* success */
3794         }
3795         if (i == tlen) goto fail;
3796       }
3797       INC_OP;
3798       JUMP_OUT;
3799 
3800     CASE_OP(BACKREF_MULTI_IC)
3801       {
3802         int is_fail;
3803         UChar *pstart, *pend, *swork;
3804 
3805         tlen = p->backref_general.num;
3806         for (i = 0; i < tlen; i++) {
3807           mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
3808 
3809           if (mem_end_stk[mem].i   == INVALID_STACK_INDEX) continue;
3810           if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
3811 
3812           pstart = STACK_MEM_START(reg, mem);
3813           pend   = STACK_MEM_END(reg, mem);
3814           n = (int )(pend - pstart);
3815           if (n != 0) {
3816             DATA_ENSURE(n);
3817             swork = s;
3818             STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
3819             if (is_fail) continue;
3820             s = swork;
3821           }
3822           break; /* success */
3823         }
3824         if (i == tlen) goto fail;
3825       }
3826       INC_OP;
3827       JUMP_OUT;
3828 
3829 #ifdef USE_BACKREF_WITH_LEVEL
3830     CASE_OP(BACKREF_WITH_LEVEL_IC)
3831       n = 1; /* ignore case */
3832       goto backref_with_level;
3833     CASE_OP(BACKREF_WITH_LEVEL)
3834       {
3835         int level;
3836         MemNumType* mems;
3837 
3838         n = 0;
3839       backref_with_level:
3840         level = p->backref_general.nest_level;
3841         tlen  = p->backref_general.num;
3842         mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3843 
3844         if (! backref_match_at_nested_level(reg, stk, stk_base, n,
3845                       case_fold_flag, level, (int )tlen, mems, &s, end)) {
3846           goto fail;
3847         }
3848       }
3849       INC_OP;
3850       JUMP_OUT;
3851 #endif
3852 
3853     CASE_OP(BACKREF_CHECK)
3854       {
3855         MemNumType* mems;
3856 
3857         tlen  = p->backref_general.num;
3858         mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3859 
3860         for (i = 0; i < tlen; i++) {
3861           mem = mems[i];
3862           if (mem_end_stk[mem].i   == INVALID_STACK_INDEX) continue;
3863           if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
3864           break; /* success */
3865         }
3866         if (i == tlen) goto fail;
3867       }
3868       INC_OP;
3869       JUMP_OUT;
3870 
3871 #ifdef USE_BACKREF_WITH_LEVEL
3872     CASE_OP(BACKREF_CHECK_WITH_LEVEL)
3873       {
3874         LengthType level;
3875         MemNumType* mems;
3876 
3877         level = p->backref_general.nest_level;
3878         tlen  = p->backref_general.num;
3879         mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3880 
3881         if (backref_check_at_nested_level(reg, stk, stk_base,
3882                                           (int )level, (int )tlen, mems) == 0)
3883           goto fail;
3884       }
3885       INC_OP;
3886       JUMP_OUT;
3887 #endif
3888 
3889     CASE_OP(EMPTY_CHECK_START)
3890       mem = p->empty_check_start.mem;   /* mem: null check id */
3891       STACK_PUSH_EMPTY_CHECK_START(mem, s);
3892       INC_OP;
3893       JUMP_OUT;
3894 
3895     CASE_OP(EMPTY_CHECK_END)
3896       {
3897         int is_empty;
3898 
3899         mem = p->empty_check_end.mem;  /* mem: null check id */
3900         STACK_EMPTY_CHECK(is_empty, mem, s);
3901         INC_OP;
3902         if (is_empty) {
3903 #ifdef ONIG_DEBUG_MATCH
3904           fprintf(DBGFP, "EMPTY_CHECK_END: skip  id:%d, s:%p\n", (int )mem, s);
3905 #endif
3906         empty_check_found:
3907           /* empty loop founded, skip next instruction */
3908 #if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
3909           switch (p->opcode) {
3910           case OP_JUMP:
3911           case OP_PUSH:
3912           case OP_REPEAT_INC:
3913           case OP_REPEAT_INC_NG:
3914             INC_OP;
3915             break;
3916           default:
3917             MATCH_AT_ERROR_RETURN(ONIGERR_UNEXPECTED_BYTECODE);
3918             break;
3919           }
3920 #else
3921           INC_OP;
3922 #endif
3923         }
3924       }
3925       JUMP_OUT;
3926 
3927 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
3928     CASE_OP(EMPTY_CHECK_END_MEMST)
3929       {
3930         int is_empty;
3931 
3932         mem = p->empty_check_end.mem;  /* mem: null check id */
3933         STACK_EMPTY_CHECK_MEM(is_empty, mem, s, reg);
3934         INC_OP;
3935         if (is_empty) {
3936 #ifdef ONIG_DEBUG_MATCH
3937           fprintf(DBGFP, "EMPTY_CHECK_END_MEM: skip  id:%d, s:%p\n", (int)mem, s);
3938 #endif
3939           if (is_empty == -1) goto fail;
3940           goto empty_check_found;
3941         }
3942       }
3943       JUMP_OUT;
3944 #endif
3945 
3946 #ifdef USE_CALL
3947     CASE_OP(EMPTY_CHECK_END_MEMST_PUSH)
3948       {
3949         int is_empty;
3950 
3951         mem = p->empty_check_end.mem;  /* mem: null check id */
3952 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
3953         STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg);
3954 #else
3955         STACK_EMPTY_CHECK_REC(is_empty, mem, s);
3956 #endif
3957         INC_OP;
3958         if (is_empty) {
3959 #ifdef ONIG_DEBUG_MATCH
3960           fprintf(DBGFP, "EMPTY_CHECK_END_MEM_PUSH: skip  id:%d, s:%p\n",
3961                   (int )mem, s);
3962 #endif
3963           if (is_empty == -1) goto fail;
3964           goto empty_check_found;
3965         }
3966         else {
3967           STACK_PUSH_EMPTY_CHECK_END(mem);
3968         }
3969       }
3970       JUMP_OUT;
3971 #endif
3972 
3973     CASE_OP(JUMP)
3974       addr = p->jump.addr;
3975       p += addr;
3976       CHECK_INTERRUPT_JUMP_OUT;
3977 
3978     CASE_OP(PUSH)
3979       addr = p->push.addr;
3980       STACK_PUSH_ALT(p + addr, s);
3981       INC_OP;
3982       JUMP_OUT;
3983 
3984     CASE_OP(PUSH_SUPER)
3985       addr = p->push.addr;
3986       STACK_PUSH_SUPER_ALT(p + addr, s);
3987       INC_OP;
3988       JUMP_OUT;
3989 
3990     CASE_OP(POP)
3991       STACK_POP_ONE;
3992       INC_OP;
3993       JUMP_OUT;
3994 
3995     CASE_OP(POP_TO_MARK)
3996       STACK_POP_TO_MARK(p->pop_to_mark.id);
3997       INC_OP;
3998       JUMP_OUT;
3999 
4000  #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4001     CASE_OP(PUSH_OR_JUMP_EXACT1)
4002       {
4003         UChar c;
4004 
4005         addr = p->push_or_jump_exact1.addr;
4006         c    = p->push_or_jump_exact1.c;
4007         if (DATA_ENSURE_CHECK1 && c == *s) {
4008           STACK_PUSH_ALT(p + addr, s);
4009           INC_OP;
4010           JUMP_OUT;
4011         }
4012       }
4013       p += addr;
4014       JUMP_OUT;
4015 #endif
4016 
4017     CASE_OP(PUSH_IF_PEEK_NEXT)
4018       {
4019         UChar c;
4020 
4021         addr = p->push_if_peek_next.addr;
4022         c    = p->push_if_peek_next.c;
4023         if (DATA_ENSURE_CHECK1 && c == *s) {
4024           STACK_PUSH_ALT(p + addr, s);
4025         }
4026       }
4027       INC_OP;
4028       JUMP_OUT;
4029 
4030     CASE_OP(REPEAT)
4031       mem  = p->repeat.id;  /* mem: OP_REPEAT ID */
4032       addr = p->repeat.addr;
4033 
4034       STACK_PUSH_REPEAT_INC(mem, 0);
4035       if (reg->repeat_range[mem].lower == 0) {
4036         STACK_PUSH_ALT(p + addr, s);
4037       }
4038       INC_OP;
4039       JUMP_OUT;
4040 
4041     CASE_OP(REPEAT_NG)
4042       mem  = p->repeat.id;  /* mem: OP_REPEAT ID */
4043       addr = p->repeat.addr;
4044 
4045       STACK_PUSH_REPEAT_INC(mem, 0);
4046       if (reg->repeat_range[mem].lower == 0) {
4047         STACK_PUSH_ALT(p + 1, s);
4048         p += addr;
4049       }
4050       else
4051         INC_OP;
4052       JUMP_OUT;
4053 
4054     CASE_OP(REPEAT_INC)
4055       mem  = p->repeat_inc.id;  /* mem: OP_REPEAT ID */
4056       STACK_GET_REPEAT_COUNT(mem, n);
4057       n++;
4058       if (n >= reg->repeat_range[mem].upper) {
4059         /* end of repeat. Nothing to do. */
4060         INC_OP;
4061       }
4062       else if (n >= reg->repeat_range[mem].lower) {
4063         INC_OP;
4064         STACK_PUSH_ALT(p, s);
4065         p = reg->repeat_range[mem].u.pcode;
4066       }
4067       else {
4068         p = reg->repeat_range[mem].u.pcode;
4069       }
4070       STACK_PUSH_REPEAT_INC(mem, n);
4071       CHECK_INTERRUPT_JUMP_OUT;
4072 
4073     CASE_OP(REPEAT_INC_NG)
4074       mem = p->repeat_inc.id;  /* mem: OP_REPEAT ID */
4075       STACK_GET_REPEAT_COUNT(mem, n);
4076       n++;
4077       STACK_PUSH_REPEAT_INC(mem, n);
4078       if (n == reg->repeat_range[mem].upper) {
4079         INC_OP;
4080       }
4081       else {
4082         if (n >= reg->repeat_range[mem].lower) {
4083           STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s);
4084           INC_OP;
4085         }
4086         else {
4087           p = reg->repeat_range[mem].u.pcode;
4088         }
4089       }
4090       CHECK_INTERRUPT_JUMP_OUT;
4091 
4092 #ifdef USE_CALL
4093     CASE_OP(CALL)
4094       if (subexp_call_nest_counter == SubexpCallMaxNestLevel)
4095         goto fail;
4096       subexp_call_nest_counter++;
4097 
4098       if (SubexpCallLimitInSearch != 0) {
4099         msa->subexp_call_in_search_counter++;
4100 #ifdef ONIG_DEBUG_MATCH_COUNTER
4101         if (p->call.called_mem < MAX_SUBEXP_CALL_COUNTERS)
4102           subexp_call_counters[p->call.called_mem]++;
4103         if (msa->subexp_call_in_search_counter % 1000 == 0)
4104           MATCH_COUNTER_OUT("CALL");
4105 #endif
4106         if (msa->subexp_call_in_search_counter >
4107             SubexpCallLimitInSearch) {
4108           MATCH_AT_ERROR_RETURN(ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER);
4109         }
4110       }
4111 
4112       addr = p->call.addr;
4113       INC_OP; STACK_PUSH_CALL_FRAME(p);
4114       p = reg->ops + addr;
4115 
4116       JUMP_OUT;
4117 
4118     CASE_OP(RETURN)
4119       STACK_RETURN(p);
4120       STACK_PUSH_RETURN;
4121       subexp_call_nest_counter--;
4122       JUMP_OUT;
4123 #endif
4124 
4125     CASE_OP(MOVE)
4126       if (p->move.n < 0) {
4127         s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, -p->move.n);
4128         if (IS_NULL(s)) goto fail;
4129       }
4130       else {
4131         int len;
4132 
4133         for (tlen = p->move.n; tlen > 0; tlen--) {
4134           len = enclen(encode, s);
4135           s += len;
4136           if (s > end) goto fail;
4137           if (s == end) {
4138             if (tlen != 1) goto fail;
4139             else           break;
4140           }
4141         }
4142       }
4143       INC_OP;
4144       JUMP_OUT;
4145 
4146     CASE_OP(STEP_BACK_START)
4147       tlen = p->step_back_start.initial;
4148       if (tlen != 0) {
4149         s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
4150         if (IS_NULL(s)) goto fail;
4151       }
4152       if (p->step_back_start.remaining != 0) {
4153         STACK_PUSH_ALT_WITH_ZID(p + 1, s, p->step_back_start.remaining);
4154         p += p->step_back_start.addr;
4155       }
4156       else
4157         INC_OP;
4158       JUMP_OUT;
4159 
4160     CASE_OP(STEP_BACK_NEXT)
4161       tlen = (LengthType )stk->zid; /* remaining count */
4162       if (tlen != INFINITE_LEN) tlen--;
4163       s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, 1);
4164       if (IS_NULL(s)) goto fail;
4165       if (tlen != 0) {
4166         STACK_PUSH_ALT_WITH_ZID(p, s, (int )tlen);
4167       }
4168       INC_OP;
4169       JUMP_OUT;
4170 
4171     CASE_OP(CUT_TO_MARK)
4172       mem  = p->cut_to_mark.id; /* mem: mark id */
4173       STACK_TO_VOID_TO_MARK(stkp, mem);
4174       if (p->cut_to_mark.restore_pos != 0) {
4175         s = stkp->u.val.v;
4176       }
4177       INC_OP;
4178       JUMP_OUT;
4179 
4180     CASE_OP(MARK)
4181       mem  = p->mark.id; /* mem: mark id */
4182       if (p->mark.save_pos != 0)
4183         STACK_PUSH_MARK_WITH_POS(mem, s);
4184       else
4185         STACK_PUSH_MARK(mem);
4186 
4187       INC_OP;
4188       JUMP_OUT;
4189 
4190     CASE_OP(SAVE_VAL)
4191       {
4192         SaveType type;
4193 
4194         type = p->save_val.type;
4195         mem  = p->save_val.id; /* mem: save id */
4196         switch ((enum SaveType )type) {
4197         case SAVE_KEEP:
4198           STACK_PUSH_SAVE_VAL(mem, type, s);
4199           break;
4200 
4201         case SAVE_S:
4202           STACK_PUSH_SAVE_VAL_WITH_SPREV(mem, type, s);
4203           break;
4204 
4205         case SAVE_RIGHT_RANGE:
4206           STACK_PUSH_SAVE_VAL(mem, SAVE_RIGHT_RANGE, right_range);
4207           break;
4208         }
4209       }
4210       INC_OP;
4211       JUMP_OUT;
4212 
4213     CASE_OP(UPDATE_VAR)
4214       {
4215         UpdateVarType type;
4216         enum SaveType save_type;
4217 
4218         type = p->update_var.type;
4219 
4220         switch ((enum UpdateVarType )type) {
4221         case UPDATE_VAR_KEEP_FROM_STACK_LAST:
4222           STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep);
4223           break;
4224         case UPDATE_VAR_S_FROM_STACK:
4225           mem = p->update_var.id; /* mem: save id */
4226           STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s);
4227           break;
4228         case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK:
4229           save_type = SAVE_S;
4230           goto get_save_val_type_last_id;
4231           break;
4232         case UPDATE_VAR_RIGHT_RANGE_FROM_STACK:
4233           save_type = SAVE_RIGHT_RANGE;
4234         get_save_val_type_last_id:
4235           mem = p->update_var.id; /* mem: save id */
4236           STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type, mem, right_range, p->update_var.clear);
4237           break;
4238         case UPDATE_VAR_RIGHT_RANGE_TO_S:
4239           right_range = s;
4240           break;
4241         case UPDATE_VAR_RIGHT_RANGE_INIT:
4242           INIT_RIGHT_RANGE;
4243           break;
4244         }
4245       }
4246       INC_OP;
4247       JUMP_OUT;
4248 
4249 #ifdef USE_CALLOUT
4250     CASE_OP(CALLOUT_CONTENTS)
4251       of = ONIG_CALLOUT_OF_CONTENTS;
4252       mem = p->callout_contents.num;
4253       goto callout_common_entry;
4254       BREAK_OUT;
4255 
4256     CASE_OP(CALLOUT_NAME)
4257       {
4258         int call_result;
4259         int name_id;
4260         int in;
4261         CalloutListEntry* e;
4262         OnigCalloutFunc func;
4263         OnigCalloutArgs args;
4264 
4265         of  = ONIG_CALLOUT_OF_NAME;
4266         mem = p->callout_name.num;
4267 
4268       callout_common_entry:
4269         e = onig_reg_callout_list_at(reg, mem);
4270         in = e->in;
4271         if (of == ONIG_CALLOUT_OF_NAME) {
4272           name_id = p->callout_name.id;
4273           func = onig_get_callout_start_func(reg, mem);
4274         }
4275         else {
4276           name_id = ONIG_NON_NAME_ID;
4277           func = msa->mp->progress_callout_of_contents;
4278         }
4279 
4280         if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) {
4281           CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id,
4282                        (int )mem, msa->mp->callout_user_data, args, call_result);
4283           switch (call_result) {
4284           case ONIG_CALLOUT_FAIL:
4285             goto fail;
4286             break;
4287           case ONIG_CALLOUT_SUCCESS:
4288             goto retraction_callout2;
4289             break;
4290           default: /* error code */
4291             if (call_result > 0) {
4292               call_result = ONIGERR_INVALID_ARGUMENT;
4293             }
4294             best_len = call_result;
4295             goto match_at_end;
4296             break;
4297           }
4298         }
4299         else {
4300         retraction_callout2:
4301           if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) {
4302             if (of == ONIG_CALLOUT_OF_NAME) {
4303               if (IS_NOT_NULL(func)) {
4304                 STACK_PUSH_CALLOUT_NAME(name_id, mem, func);
4305               }
4306             }
4307             else {
4308               func = msa->mp->retraction_callout_of_contents;
4309               if (IS_NOT_NULL(func)) {
4310                 STACK_PUSH_CALLOUT_CONTENTS(mem, func);
4311               }
4312             }
4313           }
4314         }
4315       }
4316       INC_OP;
4317       JUMP_OUT;
4318 #endif
4319 
4320     CASE_OP(FINISH)
4321       goto match_at_end;
4322 
4323 #ifdef ONIG_DEBUG_STATISTICS
4324     fail:
4325       SOP_OUT;
4326       goto fail2;
4327 #endif
4328     CASE_OP(FAIL)
4329 #ifdef ONIG_DEBUG_STATISTICS
4330     fail2:
4331 #else
4332     fail:
4333 #endif
4334       STACK_POP;
4335       p = stk->u.state.pcode;
4336       s = stk->u.state.pstr;
4337       CHECK_RETRY_LIMIT_IN_MATCH;
4338       JUMP_OUT;
4339 
4340     DEFAULT_OP
4341       MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
4342 
4343   } BYTECODE_INTERPRETER_END;
4344 
4345  match_at_end:
4346   if (msa->retry_limit_in_search != 0) {
4347     msa->retry_limit_in_search_counter += retry_in_match_counter;
4348   }
4349 
4350 #ifdef ONIG_DEBUG_MATCH_COUNTER
4351   MATCH_COUNTER_OUT("END");
4352 #endif
4353 
4354   STACK_SAVE(msa, is_alloca, alloc_base);
4355   return best_len;
4356 }
4357 
4358 
4359 #ifdef USE_REGSET
4360 
4361 typedef struct {
4362   regex_t*    reg;
4363   OnigRegion* region;
4364 } RR;
4365 
4366 struct OnigRegSetStruct {
4367   RR*          rs;
4368   int          n;
4369   int          alloc;
4370   OnigEncoding enc;
4371   int          anchor;      /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
4372   OnigLen      anc_dmin;    /* (SEMI_)END_BUF anchor distance */
4373   OnigLen      anc_dmax;    /* (SEMI_)END_BUF anchor distance */
4374   int          all_low_high;
4375   int          anychar_inf;
4376 };
4377 
4378 enum SearchRangeStatus {
4379   SRS_DEAD      = 0,
4380   SRS_LOW_HIGH  = 1,
4381   SRS_ALL_RANGE = 2
4382 };
4383 
4384 typedef struct {
4385   int    state;  /* value of enum SearchRangeStatus */
4386   UChar* low;
4387   UChar* high;
4388   UChar* sch_range;
4389 } SearchRange;
4390 
4391 #define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \
4392   r = match_at(reg, str, end, (upper_range), s, msas + i); \
4393   if (r != ONIG_MISMATCH) {\
4394     if (r >= 0) {\
4395       goto match;\
4396     }\
4397     else goto finish; /* error */ \
4398   }
4399 
4400 static inline int
regset_search_body_position_lead(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,const UChar * orig_range,OnigOptionType option,MatchArg * msas,int * rmatch_pos)4401 regset_search_body_position_lead(OnigRegSet* set,
4402            const UChar* str, const UChar* end,
4403            const UChar* start, const UChar* range, /* match start range */
4404            const UChar* orig_range, /* data range */
4405            OnigOptionType option, MatchArg* msas, int* rmatch_pos)
4406 {
4407   int r, n, i;
4408   UChar *s;
4409   UChar *low, *high;
4410   UChar* sch_range;
4411   regex_t* reg;
4412   OnigEncoding enc;
4413   SearchRange* sr;
4414 
4415   n   = set->n;
4416   enc = set->enc;
4417   s = (UChar* )start;
4418 
4419   sr = (SearchRange* )xmalloc(sizeof(*sr) * n);
4420   CHECK_NULL_RETURN_MEMERR(sr);
4421 
4422   for (i = 0; i < n; i++) {
4423     reg = set->rs[i].reg;
4424 
4425     sr[i].state = SRS_DEAD;
4426     if (reg->optimize != OPTIMIZE_NONE) {
4427       if (reg->dist_max != INFINITE_LEN) {
4428         if (end - range > reg->dist_max)
4429           sch_range = (UChar* )range + reg->dist_max;
4430         else
4431           sch_range = (UChar* )end;
4432 
4433         if (forward_search(reg, str, end, s, sch_range, &low, &high)) {
4434           sr[i].state = SRS_LOW_HIGH;
4435           sr[i].low  = low;
4436           sr[i].high = high;
4437           sr[i].sch_range = sch_range;
4438         }
4439       }
4440       else {
4441         sch_range = (UChar* )end;
4442         if (forward_search(reg, str, end, s, sch_range, &low, &high)) {
4443           goto total_active;
4444         }
4445       }
4446     }
4447     else {
4448     total_active:
4449       sr[i].state    = SRS_ALL_RANGE;
4450       sr[i].low      = s;
4451       sr[i].high     = (UChar* )range;
4452     }
4453   }
4454 
4455 #define ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN   500
4456 
4457   if (set->all_low_high != 0
4458       && range - start > ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN) {
4459     do {
4460       int try_count = 0;
4461       for (i = 0; i < n; i++) {
4462         if (sr[i].state == SRS_DEAD) continue;
4463 
4464         if (s <  sr[i].low) continue;
4465         if (s >= sr[i].high) {
4466           if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
4467                              &low, &high) != 0) {
4468             sr[i].low      = low;
4469             sr[i].high     = high;
4470             if (s < low) continue;
4471           }
4472           else {
4473             sr[i].state = SRS_DEAD;
4474             continue;
4475           }
4476         }
4477 
4478         reg = set->rs[i].reg;
4479         REGSET_MATCH_AND_RETURN_CHECK(orig_range);
4480         try_count++;
4481       } /* for (i) */
4482 
4483       if (s >= range) break;
4484 
4485       if (try_count == 0) {
4486         low = (UChar* )range;
4487         for (i = 0; i < n; i++) {
4488           if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) {
4489             low = sr[i].low;
4490           }
4491         }
4492         if (low == range) break;
4493 
4494         s = low;
4495       }
4496       else {
4497         s += enclen(enc, s);
4498       }
4499     } while (1);
4500   }
4501   else {
4502     int prev_is_newline = 1;
4503     do {
4504       for (i = 0; i < n; i++) {
4505         if (sr[i].state == SRS_DEAD) continue;
4506         if (sr[i].state == SRS_LOW_HIGH) {
4507           if (s <  sr[i].low) continue;
4508           if (s >= sr[i].high) {
4509             if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
4510                                &low, &high) != 0) {
4511               sr[i].low      = low;
4512               sr[i].high     = high;
4513               if (s < low) continue;
4514             }
4515             else {
4516               sr[i].state = SRS_DEAD;
4517               continue;
4518             }
4519           }
4520         }
4521 
4522         reg = set->rs[i].reg;
4523         if ((reg->anchor & ANCR_ANYCHAR_INF) == 0 || prev_is_newline != 0) {
4524           REGSET_MATCH_AND_RETURN_CHECK(orig_range);
4525         }
4526       }
4527 
4528       if (s >= range) break;
4529 
4530       if (set->anychar_inf != 0)
4531         prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end);
4532 
4533       s += enclen(enc, s);
4534     } while (1);
4535   }
4536 
4537   xfree(sr);
4538   return ONIG_MISMATCH;
4539 
4540  finish:
4541   xfree(sr);
4542   return r;
4543 
4544  match:
4545   xfree(sr);
4546   *rmatch_pos = (int )(s - str);
4547   return i;
4548 }
4549 
4550 static inline int
regset_search_body_regex_lead(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * orig_range,OnigRegSetLead lead,OnigOptionType option,OnigMatchParam * mps[],int * rmatch_pos)4551 regset_search_body_regex_lead(OnigRegSet* set,
4552               const UChar* str, const UChar* end,
4553               const UChar* start, const UChar* orig_range, OnigRegSetLead lead,
4554               OnigOptionType option, OnigMatchParam* mps[], int* rmatch_pos)
4555 {
4556   int r;
4557   int i;
4558   int n;
4559   int match_index;
4560   const UChar* ep;
4561   regex_t* reg;
4562   OnigRegion* region;
4563 
4564   n = set->n;
4565 
4566   match_index = ONIG_MISMATCH;
4567   ep = orig_range;
4568   for (i = 0; i < n; i++) {
4569     reg    = set->rs[i].reg;
4570     region = set->rs[i].region;
4571     r = search_in_range(reg, str, end, start, ep, orig_range, region, option, mps[i]);
4572     if (r > 0) {
4573       if (str + r < ep) {
4574         match_index = i;
4575         *rmatch_pos = r;
4576         if (lead == ONIG_REGSET_PRIORITY_TO_REGEX_ORDER)
4577           break;
4578 
4579         ep = str + r;
4580       }
4581     }
4582     else if (r == 0) {
4583       match_index = i;
4584       *rmatch_pos = r;
4585       break;
4586     }
4587   }
4588 
4589   return match_index;
4590 }
4591 
4592 extern int
onig_regset_search_with_param(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegSetLead lead,OnigOptionType option,OnigMatchParam * mps[],int * rmatch_pos)4593 onig_regset_search_with_param(OnigRegSet* set,
4594            const UChar* str, const UChar* end,
4595            const UChar* start, const UChar* range,
4596            OnigRegSetLead lead, OnigOptionType option, OnigMatchParam* mps[],
4597            int* rmatch_pos)
4598 {
4599   int r;
4600   int i;
4601   UChar *s;
4602   regex_t* reg;
4603   OnigEncoding enc;
4604   OnigRegion* region;
4605   MatchArg* msas;
4606   const UChar *orig_start = start;
4607   const UChar *orig_range = range;
4608 
4609   if (set->n == 0)
4610     return ONIG_MISMATCH;
4611 
4612   if (OPTON_POSIX_REGION(option))
4613     return ONIGERR_INVALID_ARGUMENT;
4614 
4615   r = 0;
4616   enc = set->enc;
4617   msas = (MatchArg* )NULL;
4618 
4619   for (i = 0; i < set->n; i++) {
4620     reg    = set->rs[i].reg;
4621     region = set->rs[i].region;
4622     ADJUST_MATCH_PARAM(reg, mps[i]);
4623     if (IS_NOT_NULL(region)) {
4624       r = onig_region_resize_clear(region, reg->num_mem + 1);
4625       if (r != 0) goto finish_no_msa;
4626     }
4627   }
4628 
4629   if (start > end || start < str) goto mismatch_no_msa;
4630   if (str < end) {
4631     /* forward search only */
4632     if (range < start)
4633       return ONIGERR_INVALID_ARGUMENT;
4634   }
4635 
4636   if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
4637     if (! ONIGENC_IS_VALID_MBC_STRING(enc, str, end)) {
4638       r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
4639       goto finish_no_msa;
4640     }
4641   }
4642 
4643   if (set->anchor != OPTIMIZE_NONE && str < end) {
4644     UChar *min_semi_end, *max_semi_end;
4645 
4646     if ((set->anchor & ANCR_BEGIN_POSITION) != 0) {
4647       /* search start-position only */
4648     begin_position:
4649       range = start + 1;
4650     }
4651     else if ((set->anchor & ANCR_BEGIN_BUF) != 0) {
4652       /* search str-position only */
4653       if (start != str) goto mismatch_no_msa;
4654       range = str + 1;
4655     }
4656     else if ((set->anchor & ANCR_END_BUF) != 0) {
4657       min_semi_end = max_semi_end = (UChar* )end;
4658 
4659     end_buf:
4660       if ((OnigLen )(max_semi_end - str) < set->anc_dmin)
4661         goto mismatch_no_msa;
4662 
4663       if ((OnigLen )(min_semi_end - start) > set->anc_dmax) {
4664         start = min_semi_end - set->anc_dmax;
4665         if (start < end)
4666           start = onigenc_get_right_adjust_char_head(enc, str, start);
4667       }
4668       if ((OnigLen )(max_semi_end - (range - 1)) < set->anc_dmin) {
4669         range = max_semi_end - set->anc_dmin + 1;
4670       }
4671       if (start > range) goto mismatch_no_msa;
4672     }
4673     else if ((set->anchor & ANCR_SEMI_END_BUF) != 0) {
4674       UChar* pre_end = ONIGENC_STEP_BACK(enc, str, end, 1);
4675 
4676       max_semi_end = (UChar* )end;
4677       if (ONIGENC_IS_MBC_NEWLINE(enc, pre_end, end)) {
4678         min_semi_end = pre_end;
4679 
4680 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4681         pre_end = ONIGENC_STEP_BACK(enc, str, pre_end, 1);
4682         if (IS_NOT_NULL(pre_end) &&
4683             ONIGENC_IS_MBC_CRNL(enc, pre_end, end)) {
4684           min_semi_end = pre_end;
4685         }
4686 #endif
4687         if (min_semi_end > str && start <= min_semi_end) {
4688           goto end_buf;
4689         }
4690       }
4691       else {
4692         min_semi_end = (UChar* )end;
4693         goto end_buf;
4694       }
4695     }
4696     else if ((set->anchor & ANCR_ANYCHAR_INF_ML) != 0) {
4697       goto begin_position;
4698     }
4699   }
4700   else if (str == end) { /* empty string */
4701     start = end = str;
4702     s = (UChar* )start;
4703 
4704     msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
4705     CHECK_NULL_RETURN_MEMERR(msas);
4706     for (i = 0; i < set->n; i++) {
4707       reg = set->rs[i].reg;
4708       MATCH_ARG_INIT(msas[i], reg, option, set->rs[i].region, start, mps[i]);
4709     }
4710     for (i = 0; i < set->n; i++) {
4711       reg = set->rs[i].reg;
4712       if (reg->threshold_len == 0) {
4713         /* REGSET_MATCH_AND_RETURN_CHECK(end); */
4714         /* Can't use REGSET_MATCH_AND_RETURN_CHECK()
4715            because r must be set regex index (i)
4716         */
4717         r = match_at(reg, str, end, end, s, msas + i);
4718         if (r != ONIG_MISMATCH) {
4719           if (r >= 0) {
4720             r = i;
4721             goto match;
4722           }
4723           else goto finish; /* error */
4724         }
4725       }
4726     }
4727 
4728     goto mismatch;
4729   }
4730 
4731   if (lead == ONIG_REGSET_POSITION_LEAD) {
4732     msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
4733     CHECK_NULL_RETURN_MEMERR(msas);
4734 
4735     for (i = 0; i < set->n; i++) {
4736       MATCH_ARG_INIT(msas[i], set->rs[i].reg, option, set->rs[i].region,
4737                      orig_start, mps[i]);
4738     }
4739 
4740     r = regset_search_body_position_lead(set, str, end, start, range,
4741                                          orig_range, option, msas, rmatch_pos);
4742   }
4743   else {
4744     r = regset_search_body_regex_lead(set, str, end, start, orig_range,
4745                                       lead, option, mps, rmatch_pos);
4746   }
4747   if (r < 0) goto finish;
4748   else       goto match2;
4749 
4750  mismatch:
4751   r = ONIG_MISMATCH;
4752  finish:
4753   for (i = 0; i < set->n; i++) {
4754     if (IS_NOT_NULL(msas))
4755       MATCH_ARG_FREE(msas[i]);
4756     if (OPTON_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
4757         IS_NOT_NULL(set->rs[i].region)) {
4758       onig_region_clear(set->rs[i].region);
4759     }
4760   }
4761   if (IS_NOT_NULL(msas)) xfree(msas);
4762   return r;
4763 
4764  mismatch_no_msa:
4765   r = ONIG_MISMATCH;
4766  finish_no_msa:
4767   return r;
4768 
4769  match:
4770   *rmatch_pos = (int )(s - str);
4771  match2:
4772   for (i = 0; i < set->n; i++) {
4773     if (IS_NOT_NULL(msas))
4774       MATCH_ARG_FREE(msas[i]);
4775     if (OPTON_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
4776         IS_NOT_NULL(set->rs[i].region)) {
4777       onig_region_clear(set->rs[i].region);
4778     }
4779   }
4780   if (IS_NOT_NULL(msas)) xfree(msas);
4781   return r; /* regex index */
4782 }
4783 
4784 extern int
onig_regset_search(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegSetLead lead,OnigOptionType option,int * rmatch_pos)4785 onig_regset_search(OnigRegSet* set, const UChar* str, const UChar* end,
4786                    const UChar* start, const UChar* range,
4787                    OnigRegSetLead lead, OnigOptionType option, int* rmatch_pos)
4788 {
4789   int r;
4790   int i;
4791   OnigMatchParam* mp;
4792   OnigMatchParam** mps;
4793 
4794   mps = (OnigMatchParam** )xmalloc((sizeof(OnigMatchParam*) + sizeof(OnigMatchParam)) * set->n);
4795   CHECK_NULL_RETURN_MEMERR(mps);
4796 
4797   mp = (OnigMatchParam* )(mps + set->n);
4798 
4799   for (i = 0; i < set->n; i++) {
4800     onig_initialize_match_param(mp + i);
4801     mps[i] = mp + i;
4802   }
4803 
4804   r = onig_regset_search_with_param(set, str, end, start, range, lead, option, mps,
4805                                     rmatch_pos);
4806   for (i = 0; i < set->n; i++)
4807     onig_free_match_param_content(mp + i);
4808 
4809   xfree(mps);
4810 
4811   return r;
4812 }
4813 
4814 #endif /* USE_REGSET */
4815 
4816 
4817 static UChar*
slow_search(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)4818 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
4819             const UChar* text, const UChar* text_end, UChar* text_range)
4820 {
4821   UChar *t, *p, *s, *end;
4822 
4823   end = (UChar* )text_end;
4824   end -= target_end - target - 1;
4825   if (end > text_range)
4826     end = text_range;
4827 
4828   s = (UChar* )text;
4829 
4830   while (s < end) {
4831     if (*s == *target) {
4832       p = s + 1;
4833       t = target + 1;
4834       while (t < target_end) {
4835         if (*t != *p++)
4836           break;
4837         t++;
4838       }
4839       if (t == target_end)
4840         return s;
4841     }
4842     s += enclen(enc, s);
4843   }
4844 
4845   return (UChar* )NULL;
4846 }
4847 
4848 static UChar*
slow_search_backward(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)4849 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4850                      const UChar* text, const UChar* adjust_text,
4851                      const UChar* text_end, const UChar* text_start)
4852 {
4853   UChar *t, *p, *s;
4854 
4855   s = (UChar* )text_end;
4856   s -= (target_end - target);
4857   if (s > text_start)
4858     s = (UChar* )text_start;
4859   else
4860     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
4861 
4862   while (PTR_GE(s, text)) {
4863     if (*s == *target) {
4864       p = s + 1;
4865       t = target + 1;
4866       while (t < target_end) {
4867         if (*t != *p++)
4868           break;
4869         t++;
4870       }
4871       if (t == target_end)
4872         return s;
4873     }
4874     s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
4875   }
4876 
4877   return (UChar* )NULL;
4878 }
4879 
4880 static UChar*
sunday_quick_search_step_forward(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)4881 sunday_quick_search_step_forward(regex_t* reg,
4882                                  const UChar* target, const UChar* target_end,
4883                                  const UChar* text, const UChar* text_end,
4884                                  const UChar* text_range)
4885 {
4886   const UChar *s, *se, *t, *p, *end;
4887   const UChar *tail;
4888   int skip, tlen1;
4889   int map_offset;
4890   OnigEncoding enc;
4891 
4892 #ifdef ONIG_DEBUG_SEARCH
4893   fprintf(DBGFP,
4894   "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n",
4895           text, text_end, text_range);
4896 #endif
4897 
4898   enc = reg->enc;
4899 
4900   tail = target_end - 1;
4901   tlen1 = (int )(tail - target);
4902   end = text_range;
4903   if (tlen1 > text_end - end)
4904     end = text_end - tlen1;
4905 
4906   map_offset = reg->map_offset;
4907   s = text;
4908 
4909   while (s < end) {
4910     p = se = s + tlen1;
4911     t = tail;
4912     while (*p == *t) {
4913       if (t == target) return (UChar* )s;
4914       p--; t--;
4915     }
4916     if (se + map_offset >= text_end) break;
4917     skip = reg->map[*(se + map_offset)];
4918 #if 0
4919     t = s;
4920     do {
4921       s += enclen(enc, s);
4922     } while ((s - t) < skip && s < end);
4923 #else
4924     s += skip;
4925     if (s < end)
4926       s = onigenc_get_right_adjust_char_head(enc, text, s);
4927 #endif
4928   }
4929 
4930   return (UChar* )NULL;
4931 }
4932 
4933 static UChar*
sunday_quick_search(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)4934 sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
4935                     const UChar* text, const UChar* text_end,
4936                     const UChar* text_range)
4937 {
4938   const UChar *s, *t, *p, *end;
4939   const UChar *tail;
4940   int map_offset;
4941   ptrdiff_t target_len;
4942 
4943   map_offset = reg->map_offset;
4944   tail = target_end - 1;
4945   target_len = target_end - target;
4946 
4947   if (target_len > text_end - text_range) {
4948     end = text_end;
4949     if (target_len > text_end - text)
4950       return (UChar* )NULL;
4951   }
4952   else {
4953     end = text_range + target_len;
4954   }
4955 
4956   s = text + target_len - 1;
4957 
4958 #ifdef USE_STRICT_POINTER_ADDRESS
4959   if (s < end) {
4960     while (TRUE) {
4961       p = s;
4962       t = tail;
4963       while (*p == *t) {
4964         if (t == target) return (UChar* )p;
4965         p--; t--;
4966       }
4967       if (text_end - s <= map_offset) break;
4968       if (reg->map[*(s + map_offset)] >= end - s) break;
4969       s += reg->map[*(s + map_offset)];
4970     }
4971   }
4972 #else
4973   while (s < end) {
4974     p = s;
4975     t = tail;
4976     while (*p == *t) {
4977       if (t == target) return (UChar* )p;
4978       p--; t--;
4979     }
4980     if (text_end - s <= map_offset) break;
4981     s += reg->map[*(s + map_offset)];
4982   }
4983 #endif
4984 
4985   return (UChar* )NULL;
4986 }
4987 
4988 static UChar*
map_search(OnigEncoding enc,UChar map[],const UChar * text,const UChar * text_range)4989 map_search(OnigEncoding enc, UChar map[],
4990            const UChar* text, const UChar* text_range)
4991 {
4992   const UChar *s = text;
4993 
4994   while (s < text_range) {
4995     if (map[*s]) return (UChar* )s;
4996 
4997     s += enclen(enc, s);
4998   }
4999   return (UChar* )NULL;
5000 }
5001 
5002 static UChar*
map_search_backward(OnigEncoding enc,UChar map[],const UChar * text,const UChar * adjust_text,const UChar * text_start)5003 map_search_backward(OnigEncoding enc, UChar map[],
5004                     const UChar* text, const UChar* adjust_text,
5005                     const UChar* text_start)
5006 {
5007   const UChar *s = text_start;
5008 
5009   while (PTR_GE(s, text)) {
5010     if (map[*s]) return (UChar* )s;
5011 
5012     s = onigenc_get_prev_char_head(enc, adjust_text, s);
5013   }
5014   return (UChar* )NULL;
5015 }
5016 extern int
onig_match(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option)5017 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
5018            OnigRegion* region, OnigOptionType option)
5019 {
5020   int r;
5021   OnigMatchParam mp;
5022 
5023   onig_initialize_match_param(&mp);
5024   r = onig_match_with_param(reg, str, end, at, region, option, &mp);
5025   onig_free_match_param_content(&mp);
5026   return r;
5027 }
5028 
5029 extern int
onig_match_with_param(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5030 onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
5031                       const UChar* at, OnigRegion* region, OnigOptionType option,
5032                       OnigMatchParam* mp)
5033 {
5034   int r;
5035   MatchArg msa;
5036 
5037 #ifndef USE_POSIX_API
5038   if (OPTON_POSIX_REGION(option)) return ONIGERR_INVALID_ARGUMENT;
5039 #endif
5040 
5041   ADJUST_MATCH_PARAM(reg, mp);
5042   MATCH_ARG_INIT(msa, reg, option, region, at, mp);
5043   if (region
5044 #ifdef USE_POSIX_API
5045       && !OPTON_POSIX_REGION(option)
5046 #endif
5047       ) {
5048     r = onig_region_resize_clear(region, reg->num_mem + 1);
5049   }
5050   else
5051     r = 0;
5052 
5053   if (r == 0) {
5054     if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
5055       if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
5056         r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
5057         goto end;
5058       }
5059     }
5060 
5061     r = match_at(reg, str, end, end, at, &msa);
5062 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5063     if (OPTON_FIND_LONGEST(option) && r == ONIG_MISMATCH) {
5064       if (msa.best_len >= 0) {
5065         r = msa.best_len;
5066       }
5067     }
5068 #endif
5069   }
5070 
5071  end:
5072   MATCH_ARG_FREE(msa);
5073   return r;
5074 }
5075 
5076 static int
forward_search(regex_t * reg,const UChar * str,const UChar * end,UChar * start,UChar * range,UChar ** low,UChar ** high)5077 forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
5078                UChar* range, UChar** low, UChar** high)
5079 {
5080   UChar *p, *pprev = (UChar* )NULL;
5081 
5082 #ifdef ONIG_DEBUG_SEARCH
5083   fprintf(DBGFP, "forward_search: str: %p, end: %p, start: %p, range: %p\n",
5084           str, end, start, range);
5085 #endif
5086 
5087   p = start;
5088   if (reg->dist_min != 0) {
5089     if (end - p <= reg->dist_min)
5090       return 0; /* fail */
5091 
5092     if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
5093       p += reg->dist_min;
5094     }
5095     else {
5096       UChar *q = p + reg->dist_min;
5097       while (p < q) p += enclen(reg->enc, p);
5098     }
5099   }
5100 
5101  retry:
5102   switch (reg->optimize) {
5103   case OPTIMIZE_STR:
5104     p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
5105     break;
5106 
5107   case OPTIMIZE_STR_FAST:
5108     p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
5109     break;
5110 
5111   case OPTIMIZE_STR_FAST_STEP_FORWARD:
5112     p = sunday_quick_search_step_forward(reg, reg->exact, reg->exact_end,
5113                                          p, end, range);
5114     break;
5115 
5116   case OPTIMIZE_MAP:
5117     p = map_search(reg->enc, reg->map, p, range);
5118     break;
5119   }
5120 
5121   if (p && p < range) {
5122     if (p - start < reg->dist_min) {
5123     retry_gate:
5124       pprev = p;
5125       p += enclen(reg->enc, p);
5126       goto retry;
5127     }
5128 
5129     if (reg->sub_anchor) {
5130       UChar* prev;
5131 
5132       switch (reg->sub_anchor) {
5133       case ANCR_BEGIN_LINE:
5134         if (!ON_STR_BEGIN(p)) {
5135           prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p);
5136           if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
5137             goto retry_gate;
5138         }
5139         break;
5140 
5141       case ANCR_END_LINE:
5142         if (ON_STR_END(p)) {
5143 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5144           prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
5145                                                      (pprev ? pprev : str), p);
5146           if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
5147             goto retry_gate;
5148 #endif
5149         }
5150         else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
5151 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5152                  && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
5153 #endif
5154                  )
5155           goto retry_gate;
5156 
5157         break;
5158       }
5159     }
5160 
5161     if (reg->dist_max == 0) {
5162       *low  = p;
5163       *high = p;
5164     }
5165     else {
5166       if (reg->dist_max != INFINITE_LEN) {
5167         if (p - str < reg->dist_max) {
5168           *low = (UChar* )str;
5169         }
5170         else {
5171           *low = p - reg->dist_max;
5172           if (*low > start) {
5173             *low = onigenc_get_right_adjust_char_head(reg->enc, start, *low);
5174           }
5175         }
5176       }
5177       /* no needs to adjust *high, *high is used as range check only */
5178       if (p - str < reg->dist_min)
5179         *high = (UChar* )str;
5180       else
5181         *high = p - reg->dist_min;
5182     }
5183 
5184 #ifdef ONIG_DEBUG_SEARCH
5185     fprintf(DBGFP,
5186             "forward_search success: low: %d, high: %d, dmin: %u, dmax: %u\n",
5187             (int )(*low - str), (int )(*high - str),
5188             reg->dist_min, reg->dist_max);
5189 #endif
5190     return 1; /* success */
5191   }
5192 
5193   return 0; /* fail */
5194 }
5195 
5196 
5197 static int
backward_search(regex_t * reg,const UChar * str,const UChar * end,UChar * s,const UChar * range,UChar * adjrange,UChar ** low,UChar ** high)5198 backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
5199                 const UChar* range, UChar* adjrange, UChar** low, UChar** high)
5200 {
5201   UChar *p;
5202 
5203   p = s;
5204 
5205  retry:
5206   switch (reg->optimize) {
5207   case OPTIMIZE_STR:
5208   exact_method:
5209     p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
5210                              range, adjrange, end, p);
5211     break;
5212 
5213   case OPTIMIZE_STR_FAST:
5214   case OPTIMIZE_STR_FAST_STEP_FORWARD:
5215     goto exact_method;
5216     break;
5217 
5218   case OPTIMIZE_MAP:
5219     p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
5220     break;
5221   }
5222 
5223   if (p) {
5224     if (reg->sub_anchor) {
5225       UChar* prev;
5226 
5227       switch (reg->sub_anchor) {
5228       case ANCR_BEGIN_LINE:
5229         if (!ON_STR_BEGIN(p)) {
5230           prev = onigenc_get_prev_char_head(reg->enc, str, p);
5231           if (IS_NOT_NULL(prev) && !ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
5232             p = prev;
5233             goto retry;
5234           }
5235         }
5236         break;
5237 
5238       case ANCR_END_LINE:
5239         if (ON_STR_END(p)) {
5240 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5241           prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5242           if (IS_NULL(prev)) goto fail;
5243           if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
5244             p = prev;
5245             goto retry;
5246           }
5247 #endif
5248         }
5249         else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
5250 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5251                  && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
5252 #endif
5253                  ) {
5254           p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5255           if (IS_NULL(p)) goto fail;
5256           goto retry;
5257         }
5258         break;
5259       }
5260     }
5261 
5262     if (reg->dist_max != INFINITE_LEN) {
5263       if (p - str < reg->dist_max)
5264         *low = (UChar* )str;
5265       else
5266         *low = p - reg->dist_max;
5267 
5268       if (reg->dist_min != 0) {
5269         if (p - str < reg->dist_min)
5270           *high = (UChar* )str;
5271         else
5272           *high = p - reg->dist_min;
5273       }
5274       else {
5275         *high = p;
5276       }
5277 
5278       *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
5279     }
5280 
5281 #ifdef ONIG_DEBUG_SEARCH
5282     fprintf(DBGFP, "backward_search: low: %d, high: %d\n",
5283             (int )(*low - str), (int )(*high - str));
5284 #endif
5285     return 1; /* success */
5286   }
5287 
5288  fail:
5289 #ifdef ONIG_DEBUG_SEARCH
5290   fprintf(DBGFP, "backward_search: fail.\n");
5291 #endif
5292   return 0; /* fail */
5293 }
5294 
5295 
5296 extern int
onig_search(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option)5297 onig_search(regex_t* reg, const UChar* str, const UChar* end,
5298             const UChar* start, const UChar* range, OnigRegion* region,
5299             OnigOptionType option)
5300 {
5301   int r;
5302   OnigMatchParam mp;
5303   const UChar* data_range;
5304 
5305   onig_initialize_match_param(&mp);
5306 
5307   /* The following is an expanded code of onig_search_with_param()  */
5308   if (range > start)
5309     data_range = range;
5310   else
5311     data_range = end;
5312 
5313   r = search_in_range(reg, str, end, start, range, data_range, region,
5314                       option, &mp);
5315 
5316   onig_free_match_param_content(&mp);
5317   return r;
5318 
5319 }
5320 
5321 static int
search_in_range(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,const UChar * data_range,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5322 search_in_range(regex_t* reg, const UChar* str, const UChar* end,
5323                 const UChar* start, const UChar* range, /* match start range */
5324                 const UChar* data_range, /* subject string range */
5325                 OnigRegion* region,
5326                 OnigOptionType option, OnigMatchParam* mp)
5327 {
5328   int r;
5329   UChar *s;
5330   MatchArg msa;
5331   const UChar *orig_start = start;
5332 
5333 #ifdef ONIG_DEBUG_SEARCH
5334   fprintf(DBGFP,
5335      "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n",
5336      str, (int )(end - str), (int )(start - str), (int )(range - str));
5337 #endif
5338 
5339   ADJUST_MATCH_PARAM(reg, mp);
5340 
5341 #ifndef USE_POSIX_API
5342   if (OPTON_POSIX_REGION(option)) {
5343     r = ONIGERR_INVALID_ARGUMENT;
5344     goto finish_no_msa;
5345   }
5346 #endif
5347 
5348   if (region
5349 #ifdef USE_POSIX_API
5350       && ! OPTON_POSIX_REGION(option)
5351 #endif
5352       ) {
5353     r = onig_region_resize_clear(region, reg->num_mem + 1);
5354     if (r != 0) goto finish_no_msa;
5355   }
5356 
5357   if (start > end || start < str) goto mismatch_no_msa;
5358 
5359   if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
5360     if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
5361       r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
5362       goto finish_no_msa;
5363     }
5364   }
5365 
5366 
5367 #define MATCH_AND_RETURN_CHECK(upper_range) \
5368   r = match_at(reg, str, end, (upper_range), s, &msa);\
5369   if (r != ONIG_MISMATCH) {\
5370     if (r >= 0) {\
5371       goto match;\
5372     }\
5373     else goto finish; /* error */ \
5374   }
5375 
5376 
5377   /* anchor optimize: resume search range */
5378   if (reg->anchor != 0 && str < end) {
5379     UChar *min_semi_end, *max_semi_end;
5380 
5381     if (reg->anchor & ANCR_BEGIN_POSITION) {
5382       /* search start-position only */
5383     begin_position:
5384       if (range > start)
5385         range = start + 1;
5386       else
5387         range = start;
5388     }
5389     else if (reg->anchor & ANCR_BEGIN_BUF) {
5390       /* search str-position only */
5391       if (range > start) {
5392         if (start != str) goto mismatch_no_msa;
5393         range = str + 1;
5394       }
5395       else {
5396         if (range <= str) {
5397           start = str;
5398           range = str;
5399         }
5400         else
5401           goto mismatch_no_msa;
5402       }
5403     }
5404     else if (reg->anchor & ANCR_END_BUF) {
5405       min_semi_end = max_semi_end = (UChar* )end;
5406 
5407     end_buf:
5408       if ((OnigLen )(max_semi_end - str) < reg->anc_dist_min)
5409         goto mismatch_no_msa;
5410 
5411       if (range > start) {
5412         if (reg->anc_dist_max != INFINITE_LEN &&
5413             min_semi_end - start > reg->anc_dist_max) {
5414           start = min_semi_end - reg->anc_dist_max;
5415           if (start < end)
5416             start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
5417         }
5418         if (max_semi_end - (range - 1) < reg->anc_dist_min) {
5419           if (max_semi_end - str + 1 < reg->anc_dist_min)
5420             goto mismatch_no_msa;
5421           else
5422             range = max_semi_end - reg->anc_dist_min + 1;
5423         }
5424 
5425         if (start > range) goto mismatch_no_msa;
5426         /* If start == range, match with empty at end.
5427            Backward search is used. */
5428       }
5429       else {
5430         if (reg->anc_dist_max != INFINITE_LEN &&
5431             min_semi_end - range > reg->anc_dist_max) {
5432           range = min_semi_end - reg->anc_dist_max;
5433         }
5434         if (max_semi_end - start < reg->anc_dist_min) {
5435           if (max_semi_end - str < reg->anc_dist_min)
5436             goto mismatch_no_msa;
5437           else {
5438             start = max_semi_end - reg->anc_dist_min;
5439             start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
5440           }
5441         }
5442         if (range > start) goto mismatch_no_msa;
5443       }
5444     }
5445     else if (reg->anchor & ANCR_SEMI_END_BUF) {
5446       UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
5447 
5448       max_semi_end = (UChar* )end;
5449       if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5450         min_semi_end = pre_end;
5451 
5452 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5453         pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
5454         if (IS_NOT_NULL(pre_end) &&
5455             ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5456           min_semi_end = pre_end;
5457         }
5458 #endif
5459         if (min_semi_end > str && start <= min_semi_end) {
5460           goto end_buf;
5461         }
5462       }
5463       else {
5464         min_semi_end = (UChar* )end;
5465         goto end_buf;
5466       }
5467     }
5468     else if ((reg->anchor & ANCR_ANYCHAR_INF_ML) && range > start) {
5469       goto begin_position;
5470     }
5471   }
5472   else if (str == end) { /* empty string */
5473     static const UChar* address_for_empty_string = (UChar* )"";
5474 
5475 #ifdef ONIG_DEBUG_SEARCH
5476     fprintf(DBGFP, "onig_search: empty string.\n");
5477 #endif
5478 
5479     if (reg->threshold_len == 0) {
5480       start = end = str = address_for_empty_string;
5481       s = (UChar* )start;
5482 
5483       MATCH_ARG_INIT(msa, reg, option, region, start, mp);
5484       MATCH_AND_RETURN_CHECK(end);
5485       goto mismatch;
5486     }
5487     goto mismatch_no_msa;
5488   }
5489 
5490 #ifdef ONIG_DEBUG_SEARCH
5491   fprintf(DBGFP, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5492           (int )(end - str), (int )(start - str), (int )(range - str));
5493 #endif
5494 
5495   MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp);
5496 
5497   s = (UChar* )start;
5498   if (range > start) {   /* forward search */
5499     if (reg->optimize != OPTIMIZE_NONE) {
5500       UChar *sch_range, *low, *high;
5501 
5502       if (reg->dist_max != 0) {
5503         if (reg->dist_max == INFINITE_LEN)
5504           sch_range = (UChar* )end;
5505         else {
5506           if ((end - range) < reg->dist_max)
5507             sch_range = (UChar* )end;
5508           else {
5509             sch_range = (UChar* )range + reg->dist_max;
5510           }
5511         }
5512       }
5513       else
5514         sch_range = (UChar* )range;
5515 
5516       if ((end - start) < reg->threshold_len)
5517         goto mismatch;
5518 
5519       if (reg->dist_max != INFINITE_LEN) {
5520         do {
5521           if (! forward_search(reg, str, end, s, sch_range, &low, &high))
5522             goto mismatch;
5523           if (s < low) {
5524             s    = low;
5525           }
5526           while (s <= high) {
5527             MATCH_AND_RETURN_CHECK(data_range);
5528             s += enclen(reg->enc, s);
5529           }
5530         } while (s < range);
5531         goto mismatch;
5532       }
5533       else { /* check only. */
5534         if (! forward_search(reg, str, end, s, sch_range, &low, &high))
5535           goto mismatch;
5536 
5537         if ((reg->anchor & ANCR_ANYCHAR_INF) != 0 &&
5538             (reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) {
5539           do {
5540             UChar* prev;
5541 
5542             MATCH_AND_RETURN_CHECK(data_range);
5543             prev = s;
5544             s += enclen(reg->enc, s);
5545 
5546             while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
5547               prev = s;
5548               s += enclen(reg->enc, s);
5549             }
5550           } while (s < range);
5551           goto mismatch;
5552         }
5553       }
5554     }
5555 
5556     do {
5557       MATCH_AND_RETURN_CHECK(data_range);
5558       s += enclen(reg->enc, s);
5559     } while (s < range);
5560 
5561     if (s == range) { /* because empty match with /$/. */
5562       MATCH_AND_RETURN_CHECK(data_range);
5563     }
5564   }
5565   else {  /* backward search */
5566     if (range < str) goto mismatch;
5567 
5568     if (orig_start < end)
5569       orig_start += enclen(reg->enc, orig_start); /* is upper range */
5570 
5571     if (reg->optimize != OPTIMIZE_NONE) {
5572       UChar *low, *high, *adjrange, *sch_start;
5573       const UChar *min_range;
5574 
5575       if ((end - range) < reg->threshold_len) goto mismatch;
5576 
5577       if (range < end)
5578         adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
5579       else
5580         adjrange = (UChar* )end;
5581 
5582       if (end - range > reg->dist_min)
5583         min_range = range + reg->dist_min;
5584       else
5585         min_range = end;
5586 
5587       if (reg->dist_max != INFINITE_LEN) {
5588         do {
5589           if (end - s > reg->dist_max)
5590             sch_start = s + reg->dist_max;
5591           else {
5592             sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
5593           }
5594 
5595           if (backward_search(reg, str, end, sch_start, min_range, adjrange,
5596                               &low, &high) <= 0)
5597             goto mismatch;
5598 
5599           if (s > high)
5600             s = high;
5601 
5602           while (PTR_GE(s, low)) {
5603             MATCH_AND_RETURN_CHECK(orig_start);
5604             s = onigenc_get_prev_char_head(reg->enc, str, s);
5605           }
5606         } while (PTR_GE(s, range));
5607         goto mismatch;
5608       }
5609       else { /* check only. */
5610         sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
5611 
5612         if (backward_search(reg, str, end, sch_start, min_range, adjrange,
5613                             &low, &high) <= 0) goto mismatch;
5614       }
5615     }
5616 
5617     do {
5618       MATCH_AND_RETURN_CHECK(orig_start);
5619       s = onigenc_get_prev_char_head(reg->enc, str, s);
5620     } while (PTR_GE(s, range));
5621   }
5622 
5623  mismatch:
5624 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5625   if (OPTON_FIND_LONGEST(reg->options)) {
5626     if (msa.best_len >= 0) {
5627       s = msa.best_s;
5628       goto match;
5629     }
5630   }
5631 #endif
5632   r = ONIG_MISMATCH;
5633 
5634  finish:
5635   MATCH_ARG_FREE(msa);
5636 
5637   /* If result is mismatch and no FIND_NOT_EMPTY option,
5638      then the region is not set in match_at(). */
5639   if (OPTON_FIND_NOT_EMPTY(reg->options) && region
5640 #ifdef USE_POSIX_API
5641       && !OPTON_POSIX_REGION(option)
5642 #endif
5643       ) {
5644     onig_region_clear(region);
5645   }
5646 
5647 #ifdef ONIG_DEBUG
5648   if (r != ONIG_MISMATCH)
5649     fprintf(DBGFP, "onig_search: error %d\n", r);
5650 #endif
5651   return r;
5652 
5653  mismatch_no_msa:
5654   r = ONIG_MISMATCH;
5655  finish_no_msa:
5656 #ifdef ONIG_DEBUG
5657   if (r != ONIG_MISMATCH)
5658     fprintf(DBGFP, "onig_search: error %d\n", r);
5659 #endif
5660   return r;
5661 
5662  match:
5663   MATCH_ARG_FREE(msa);
5664   return (int )(s - str);
5665 }
5666 
5667 extern int
onig_search_with_param(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5668 onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
5669                        const UChar* start, const UChar* range, OnigRegion* region,
5670                        OnigOptionType option, OnigMatchParam* mp)
5671 {
5672   const UChar* data_range;
5673 
5674   if (range > start)
5675     data_range = range;
5676   else
5677     data_range = end;
5678 
5679   return search_in_range(reg, str, end, start, range, data_range, region,
5680                          option, mp);
5681 }
5682 
5683 extern int
onig_scan(regex_t * reg,const UChar * str,const UChar * end,OnigRegion * region,OnigOptionType option,int (* scan_callback)(int,int,OnigRegion *,void *),void * callback_arg)5684 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5685           OnigRegion* region, OnigOptionType option,
5686           int (*scan_callback)(int, int, OnigRegion*, void*),
5687           void* callback_arg)
5688 {
5689   int r;
5690   int n;
5691   int rs;
5692   const UChar* start;
5693 
5694   if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
5695     if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
5696       return ONIGERR_INVALID_WIDE_CHAR_VALUE;
5697 
5698     ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING);
5699   }
5700 
5701   n = 0;
5702   start = str;
5703   while (1) {
5704     r = onig_search(reg, str, end, start, end, region, option);
5705     if (r >= 0) {
5706       rs = scan_callback(n, r, region, callback_arg);
5707       n++;
5708       if (rs != 0)
5709         return rs;
5710 
5711       if (region->end[0] == start - str) {
5712         if (start >= end) break;
5713         start += enclen(reg->enc, start);
5714       }
5715       else
5716         start = str + region->end[0];
5717 
5718       if (start > end)
5719         break;
5720     }
5721     else if (r == ONIG_MISMATCH) {
5722       break;
5723     }
5724     else { /* error */
5725       return r;
5726     }
5727   }
5728 
5729   return n;
5730 }
5731 
5732 extern int
onig_get_subexp_call_max_nest_level(void)5733 onig_get_subexp_call_max_nest_level(void)
5734 {
5735   return SubexpCallMaxNestLevel;
5736 }
5737 
5738 extern int
onig_set_subexp_call_max_nest_level(int level)5739 onig_set_subexp_call_max_nest_level(int level)
5740 {
5741   SubexpCallMaxNestLevel = level;
5742   return 0;
5743 }
5744 
5745 extern OnigEncoding
onig_get_encoding(regex_t * reg)5746 onig_get_encoding(regex_t* reg)
5747 {
5748   return reg->enc;
5749 }
5750 
5751 extern OnigOptionType
onig_get_options(regex_t * reg)5752 onig_get_options(regex_t* reg)
5753 {
5754   return reg->options;
5755 }
5756 
5757 extern  OnigCaseFoldType
onig_get_case_fold_flag(regex_t * reg)5758 onig_get_case_fold_flag(regex_t* reg)
5759 {
5760   return reg->case_fold_flag;
5761 }
5762 
5763 extern OnigSyntaxType*
onig_get_syntax(regex_t * reg)5764 onig_get_syntax(regex_t* reg)
5765 {
5766   return reg->syntax;
5767 }
5768 
5769 extern int
onig_number_of_captures(regex_t * reg)5770 onig_number_of_captures(regex_t* reg)
5771 {
5772   return reg->num_mem;
5773 }
5774 
5775 extern int
onig_number_of_capture_histories(regex_t * reg)5776 onig_number_of_capture_histories(regex_t* reg)
5777 {
5778 #ifdef USE_CAPTURE_HISTORY
5779   int i, n;
5780 
5781   n = 0;
5782   for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5783     if (MEM_STATUS_AT(reg->capture_history, i) != 0)
5784       n++;
5785   }
5786   return n;
5787 #else
5788   return 0;
5789 #endif
5790 }
5791 
5792 extern void
onig_copy_encoding(OnigEncoding to,OnigEncoding from)5793 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
5794 {
5795   *to = *from;
5796 }
5797 
5798 #ifdef USE_REGSET
5799 
5800 extern int
onig_regset_new(OnigRegSet ** rset,int n,regex_t * regs[])5801 onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[])
5802 {
5803 #define REGSET_INITIAL_ALLOC_SIZE   10
5804 
5805   int i;
5806   int r;
5807   int alloc;
5808   OnigRegSet* set;
5809   RR* rs;
5810 
5811   *rset = 0;
5812 
5813   set = (OnigRegSet* )xmalloc(sizeof(*set));
5814   CHECK_NULL_RETURN_MEMERR(set);
5815 
5816   alloc = n > REGSET_INITIAL_ALLOC_SIZE ? n : REGSET_INITIAL_ALLOC_SIZE;
5817   rs = (RR* )xmalloc(sizeof(set->rs[0]) * alloc);
5818   if (IS_NULL(rs)) {
5819     xfree(set);
5820     return ONIGERR_MEMORY;
5821   }
5822 
5823   set->rs    = rs;
5824   set->n     = 0;
5825   set->alloc = alloc;
5826 
5827   for (i = 0; i < n; i++) {
5828     regex_t* reg = regs[i];
5829 
5830     r = onig_regset_add(set, reg);
5831     if (r != 0) {
5832       for (i = 0; i < set->n; i++) {
5833         OnigRegion* region = set->rs[i].region;
5834         if (IS_NOT_NULL(region))
5835           onig_region_free(region, 1);
5836       }
5837       xfree(set->rs);
5838       xfree(set);
5839       return r;
5840     }
5841   }
5842 
5843   *rset = set;
5844   return 0;
5845 }
5846 
5847 static void
update_regset_by_reg(OnigRegSet * set,regex_t * reg)5848 update_regset_by_reg(OnigRegSet* set, regex_t* reg)
5849 {
5850   if (set->n == 1) {
5851     set->enc          = reg->enc;
5852     set->anchor       = reg->anchor;
5853     set->anc_dmin     = reg->anc_dist_min;
5854     set->anc_dmax     = reg->anc_dist_max;
5855     set->all_low_high =
5856       (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN) ? 0 : 1;
5857     set->anychar_inf  = (reg->anchor & ANCR_ANYCHAR_INF) != 0 ? 1 : 0;
5858   }
5859   else {
5860     int anchor;
5861 
5862     anchor = set->anchor & reg->anchor;
5863     if (anchor != 0) {
5864       OnigLen anc_dmin;
5865       OnigLen anc_dmax;
5866 
5867       anc_dmin = set->anc_dmin;
5868       anc_dmax = set->anc_dmax;
5869       if (anc_dmin > reg->anc_dist_min) anc_dmin = reg->anc_dist_min;
5870       if (anc_dmax < reg->anc_dist_max) anc_dmax = reg->anc_dist_max;
5871       set->anc_dmin = anc_dmin;
5872       set->anc_dmax = anc_dmax;
5873     }
5874 
5875     set->anchor = anchor;
5876 
5877     if (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN)
5878       set->all_low_high = 0;
5879 
5880     if ((reg->anchor & ANCR_ANYCHAR_INF) != 0)
5881       set->anychar_inf = 1;
5882   }
5883 }
5884 
5885 extern int
onig_regset_add(OnigRegSet * set,regex_t * reg)5886 onig_regset_add(OnigRegSet* set, regex_t* reg)
5887 {
5888   OnigRegion* region;
5889 
5890   if (OPTON_FIND_LONGEST(reg->options))
5891     return ONIGERR_INVALID_ARGUMENT;
5892 
5893   if (set->n != 0 && reg->enc != set->enc)
5894     return ONIGERR_INVALID_ARGUMENT;
5895 
5896   if (set->n >= set->alloc) {
5897     RR* nrs;
5898     int new_alloc;
5899 
5900     new_alloc = set->alloc * 2;
5901     nrs = (RR* )xrealloc(set->rs, sizeof(set->rs[0]) * new_alloc);
5902     CHECK_NULL_RETURN_MEMERR(nrs);
5903 
5904     set->rs    = nrs;
5905     set->alloc = new_alloc;
5906   }
5907 
5908   region = onig_region_new();
5909   CHECK_NULL_RETURN_MEMERR(region);
5910 
5911   set->rs[set->n].reg    = reg;
5912   set->rs[set->n].region = region;
5913   set->n++;
5914 
5915   update_regset_by_reg(set, reg);
5916   return 0;
5917 }
5918 
5919 extern int
onig_regset_replace(OnigRegSet * set,int at,regex_t * reg)5920 onig_regset_replace(OnigRegSet* set, int at, regex_t* reg)
5921 {
5922   int i;
5923 
5924   if (at < 0 || at >= set->n)
5925     return ONIGERR_INVALID_ARGUMENT;
5926 
5927   if (IS_NULL(reg)) {
5928     onig_region_free(set->rs[at].region, 1);
5929     for (i = at; i < set->n - 1; i++) {
5930       set->rs[i].reg    = set->rs[i+1].reg;
5931       set->rs[i].region = set->rs[i+1].region;
5932     }
5933     set->n--;
5934   }
5935   else {
5936     if (OPTON_FIND_LONGEST(reg->options))
5937       return ONIGERR_INVALID_ARGUMENT;
5938 
5939     if (set->n > 1 && reg->enc != set->enc)
5940       return ONIGERR_INVALID_ARGUMENT;
5941 
5942     set->rs[at].reg = reg;
5943   }
5944 
5945   for (i = 0; i < set->n; i++)
5946     update_regset_by_reg(set, set->rs[i].reg);
5947 
5948   return 0;
5949 }
5950 
5951 extern void
onig_regset_free(OnigRegSet * set)5952 onig_regset_free(OnigRegSet* set)
5953 {
5954   int i;
5955 
5956   for (i = 0; i < set->n; i++) {
5957     regex_t* reg;
5958     OnigRegion* region;
5959 
5960     reg    = set->rs[i].reg;
5961     region = set->rs[i].region;
5962     onig_free(reg);
5963     if (IS_NOT_NULL(region))
5964       onig_region_free(region, 1);
5965   }
5966 
5967   xfree(set->rs);
5968   xfree(set);
5969 }
5970 
5971 extern int
onig_regset_number_of_regex(OnigRegSet * set)5972 onig_regset_number_of_regex(OnigRegSet* set)
5973 {
5974   return set->n;
5975 }
5976 
5977 extern regex_t*
onig_regset_get_regex(OnigRegSet * set,int at)5978 onig_regset_get_regex(OnigRegSet* set, int at)
5979 {
5980   if (at < 0 || at >= set->n)
5981     return (regex_t* )0;
5982 
5983   return set->rs[at].reg;
5984 }
5985 
5986 extern OnigRegion*
onig_regset_get_region(OnigRegSet * set,int at)5987 onig_regset_get_region(OnigRegSet* set, int at)
5988 {
5989   if (at < 0 || at >= set->n)
5990     return (OnigRegion* )0;
5991 
5992   return set->rs[at].region;
5993 }
5994 
5995 #endif /* USE_REGSET */
5996 
5997 
5998 #ifdef USE_DIRECT_THREADED_CODE
5999 extern int
onig_init_for_match_at(regex_t * reg)6000 onig_init_for_match_at(regex_t* reg)
6001 {
6002   return match_at(reg, (const UChar* )NULL, (const UChar* )NULL,
6003                   (const UChar* )NULL, (const UChar* )NULL,
6004                   (MatchArg* )NULL);
6005 }
6006 #endif
6007 
6008 
6009 /* for callout functions */
6010 
6011 #ifdef USE_CALLOUT
6012 
6013 extern OnigCalloutFunc
onig_get_progress_callout(void)6014 onig_get_progress_callout(void)
6015 {
6016   return DefaultProgressCallout;
6017 }
6018 
6019 extern int
onig_set_progress_callout(OnigCalloutFunc f)6020 onig_set_progress_callout(OnigCalloutFunc f)
6021 {
6022   DefaultProgressCallout = f;
6023   return ONIG_NORMAL;
6024 }
6025 
6026 extern OnigCalloutFunc
onig_get_retraction_callout(void)6027 onig_get_retraction_callout(void)
6028 {
6029   return DefaultRetractionCallout;
6030 }
6031 
6032 extern int
onig_set_retraction_callout(OnigCalloutFunc f)6033 onig_set_retraction_callout(OnigCalloutFunc f)
6034 {
6035   DefaultRetractionCallout = f;
6036   return ONIG_NORMAL;
6037 }
6038 
6039 extern int
onig_get_callout_num_by_callout_args(OnigCalloutArgs * args)6040 onig_get_callout_num_by_callout_args(OnigCalloutArgs* args)
6041 {
6042   return args->num;
6043 }
6044 
6045 extern OnigCalloutIn
onig_get_callout_in_by_callout_args(OnigCalloutArgs * args)6046 onig_get_callout_in_by_callout_args(OnigCalloutArgs* args)
6047 {
6048   return args->in;
6049 }
6050 
6051 extern int
onig_get_name_id_by_callout_args(OnigCalloutArgs * args)6052 onig_get_name_id_by_callout_args(OnigCalloutArgs* args)
6053 {
6054   return args->name_id;
6055 }
6056 
6057 extern const UChar*
onig_get_contents_by_callout_args(OnigCalloutArgs * args)6058 onig_get_contents_by_callout_args(OnigCalloutArgs* args)
6059 {
6060   int num;
6061   CalloutListEntry* e;
6062 
6063   num = args->num;
6064   e = onig_reg_callout_list_at(args->regex, num);
6065   if (IS_NULL(e)) return 0;
6066   if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
6067     return e->u.content.start;
6068   }
6069 
6070   return 0;
6071 }
6072 
6073 extern const UChar*
onig_get_contents_end_by_callout_args(OnigCalloutArgs * args)6074 onig_get_contents_end_by_callout_args(OnigCalloutArgs* args)
6075 {
6076   int num;
6077   CalloutListEntry* e;
6078 
6079   num = args->num;
6080   e = onig_reg_callout_list_at(args->regex, num);
6081   if (IS_NULL(e)) return 0;
6082   if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
6083     return e->u.content.end;
6084   }
6085 
6086   return 0;
6087 }
6088 
6089 extern int
onig_get_args_num_by_callout_args(OnigCalloutArgs * args)6090 onig_get_args_num_by_callout_args(OnigCalloutArgs* args)
6091 {
6092   int num;
6093   CalloutListEntry* e;
6094 
6095   num = args->num;
6096   e = onig_reg_callout_list_at(args->regex, num);
6097   if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
6098   if (e->of == ONIG_CALLOUT_OF_NAME) {
6099     return e->u.arg.num;
6100   }
6101 
6102   return ONIGERR_INVALID_ARGUMENT;
6103 }
6104 
6105 extern int
onig_get_passed_args_num_by_callout_args(OnigCalloutArgs * args)6106 onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args)
6107 {
6108   int num;
6109   CalloutListEntry* e;
6110 
6111   num = args->num;
6112   e = onig_reg_callout_list_at(args->regex, num);
6113   if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
6114   if (e->of == ONIG_CALLOUT_OF_NAME) {
6115     return e->u.arg.passed_num;
6116   }
6117 
6118   return ONIGERR_INVALID_ARGUMENT;
6119 }
6120 
6121 extern int
onig_get_arg_by_callout_args(OnigCalloutArgs * args,int index,OnigType * type,OnigValue * val)6122 onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index,
6123                              OnigType* type, OnigValue* val)
6124 {
6125   int num;
6126   CalloutListEntry* e;
6127 
6128   num = args->num;
6129   e = onig_reg_callout_list_at(args->regex, num);
6130   if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
6131   if (e->of == ONIG_CALLOUT_OF_NAME) {
6132     if (IS_NOT_NULL(type)) *type = e->u.arg.types[index];
6133     if (IS_NOT_NULL(val))  *val  = e->u.arg.vals[index];
6134     return ONIG_NORMAL;
6135   }
6136 
6137   return ONIGERR_INVALID_ARGUMENT;
6138 }
6139 
6140 extern const UChar*
onig_get_string_by_callout_args(OnigCalloutArgs * args)6141 onig_get_string_by_callout_args(OnigCalloutArgs* args)
6142 {
6143   return args->string;
6144 }
6145 
6146 extern const UChar*
onig_get_string_end_by_callout_args(OnigCalloutArgs * args)6147 onig_get_string_end_by_callout_args(OnigCalloutArgs* args)
6148 {
6149   return args->string_end;
6150 }
6151 
6152 extern const UChar*
onig_get_start_by_callout_args(OnigCalloutArgs * args)6153 onig_get_start_by_callout_args(OnigCalloutArgs* args)
6154 {
6155   return args->start;
6156 }
6157 
6158 extern const UChar*
onig_get_right_range_by_callout_args(OnigCalloutArgs * args)6159 onig_get_right_range_by_callout_args(OnigCalloutArgs* args)
6160 {
6161   return args->right_range;
6162 }
6163 
6164 extern const UChar*
onig_get_current_by_callout_args(OnigCalloutArgs * args)6165 onig_get_current_by_callout_args(OnigCalloutArgs* args)
6166 {
6167   return args->current;
6168 }
6169 
6170 extern OnigRegex
onig_get_regex_by_callout_args(OnigCalloutArgs * args)6171 onig_get_regex_by_callout_args(OnigCalloutArgs* args)
6172 {
6173   return args->regex;
6174 }
6175 
6176 extern unsigned long
onig_get_retry_counter_by_callout_args(OnigCalloutArgs * args)6177 onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args)
6178 {
6179   return args->retry_in_match_counter;
6180 }
6181 
6182 
6183 extern int
onig_get_capture_range_in_callout(OnigCalloutArgs * a,int mem_num,int * begin,int * end)6184 onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end)
6185 {
6186   OnigRegex    reg;
6187   const UChar* str;
6188   StackType*   stk_base;
6189   int i;
6190   StkPtrType* mem_start_stk;
6191   StkPtrType* mem_end_stk;
6192 
6193   i = mem_num;
6194   reg = a->regex;
6195   str = a->string;
6196   stk_base = a->stk_base;
6197   mem_start_stk = a->mem_start_stk;
6198   mem_end_stk   = a->mem_end_stk;
6199 
6200   if (i > 0) {
6201     if (a->mem_end_stk[i].i != INVALID_STACK_INDEX) {
6202       *begin = (int )(STACK_MEM_START(reg, i) - str);
6203       *end   = (int )(STACK_MEM_END(reg, i)   - str);
6204     }
6205     else {
6206       *begin = *end = ONIG_REGION_NOTPOS;
6207     }
6208   }
6209   else
6210     return ONIGERR_INVALID_ARGUMENT;
6211 
6212   return ONIG_NORMAL;
6213 }
6214 
6215 extern int
onig_get_used_stack_size_in_callout(OnigCalloutArgs * a,int * used_num,int * used_bytes)6216 onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes)
6217 {
6218   int n;
6219 
6220   n = (int )(a->stk - a->stk_base);
6221 
6222   if (used_num != 0)
6223     *used_num = n;
6224 
6225   if (used_bytes != 0)
6226     *used_bytes = n * sizeof(StackType);
6227 
6228   return ONIG_NORMAL;
6229 }
6230 
6231 
6232 /* builtin callout functions */
6233 
6234 extern int
onig_builtin_fail(OnigCalloutArgs * args ARG_UNUSED,void * user_data ARG_UNUSED)6235 onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
6236 {
6237   return ONIG_CALLOUT_FAIL;
6238 }
6239 
6240 extern int
onig_builtin_mismatch(OnigCalloutArgs * args ARG_UNUSED,void * user_data ARG_UNUSED)6241 onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
6242 {
6243   return ONIG_MISMATCH;
6244 }
6245 
6246 extern int
onig_builtin_error(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6247 onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6248 {
6249   int r;
6250   int n;
6251   OnigValue val;
6252 
6253   r = onig_get_arg_by_callout_args(args, 0, 0, &val);
6254   if (r != ONIG_NORMAL) return r;
6255 
6256   n = (int )val.l;
6257   if (n >= 0) {
6258     n = ONIGERR_INVALID_CALLOUT_BODY;
6259   }
6260   else if (onig_is_error_code_needs_param(n)) {
6261     n = ONIGERR_INVALID_CALLOUT_BODY;
6262   }
6263 
6264   return n;
6265 }
6266 
6267 extern int
onig_builtin_count(OnigCalloutArgs * args,void * user_data)6268 onig_builtin_count(OnigCalloutArgs* args, void* user_data)
6269 {
6270   (void )onig_check_callout_data_and_clear_old_values(args);
6271 
6272   return onig_builtin_total_count(args, user_data);
6273 }
6274 
6275 extern int
onig_builtin_total_count(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6276 onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6277 {
6278   int r;
6279   int slot;
6280   OnigType  type;
6281   OnigValue val;
6282   OnigValue aval;
6283   OnigCodePoint count_type;
6284 
6285   r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
6286   if (r != ONIG_NORMAL) return r;
6287 
6288   count_type = aval.c;
6289   if (count_type != '>' && count_type != 'X' && count_type != '<')
6290     return ONIGERR_INVALID_CALLOUT_ARG;
6291 
6292   r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0,
6293                                                                 &type, &val);
6294   if (r < ONIG_NORMAL)
6295     return r;
6296   else if (r > ONIG_NORMAL) {
6297     /* type == void: initial state */
6298     val.l = 0;
6299   }
6300 
6301   if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
6302     slot = 2;
6303     if (count_type == '<')
6304       val.l++;
6305     else if (count_type == 'X')
6306       val.l--;
6307   }
6308   else {
6309     slot = 1;
6310     if (count_type != '<')
6311       val.l++;
6312   }
6313 
6314   r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val);
6315   if (r != ONIG_NORMAL) return r;
6316 
6317   /* slot 1: in progress counter, slot 2: in retraction counter */
6318   r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot,
6319                                                                 &type, &val);
6320   if (r < ONIG_NORMAL)
6321     return r;
6322   else if (r > ONIG_NORMAL) {
6323     val.l = 0;
6324   }
6325 
6326   val.l++;
6327   r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6328   if (r != ONIG_NORMAL) return r;
6329 
6330   return ONIG_CALLOUT_SUCCESS;
6331 }
6332 
6333 extern int
onig_builtin_max(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6334 onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6335 {
6336   int r;
6337   int slot;
6338   long max_val;
6339   OnigCodePoint count_type;
6340   OnigType  type;
6341   OnigValue val;
6342   OnigValue aval;
6343 
6344   (void )onig_check_callout_data_and_clear_old_values(args);
6345 
6346   slot = 0;
6347   r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
6348   if (r < ONIG_NORMAL)
6349     return r;
6350   else if (r > ONIG_NORMAL) {
6351     /* type == void: initial state */
6352     type  = ONIG_TYPE_LONG;
6353     val.l = 0;
6354   }
6355 
6356   r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
6357   if (r != ONIG_NORMAL) return r;
6358   if (type == ONIG_TYPE_TAG) {
6359     r = onig_get_callout_data_by_callout_args(args, aval.tag, 0, &type, &aval);
6360     if (r < ONIG_NORMAL) return r;
6361     else if (r > ONIG_NORMAL)
6362       max_val = 0L;
6363     else
6364       max_val = aval.l;
6365   }
6366   else { /* LONG */
6367     max_val = aval.l;
6368   }
6369 
6370   r = onig_get_arg_by_callout_args(args, 1, &type, &aval);
6371   if (r != ONIG_NORMAL) return r;
6372 
6373   count_type = aval.c;
6374   if (count_type != '>' && count_type != 'X' && count_type != '<')
6375     return ONIGERR_INVALID_CALLOUT_ARG;
6376 
6377   if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
6378     if (count_type == '<') {
6379       if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
6380       val.l++;
6381     }
6382     else if (count_type == 'X')
6383       val.l--;
6384   }
6385   else {
6386     if (count_type != '<') {
6387       if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
6388       val.l++;
6389     }
6390   }
6391 
6392   r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6393   if (r != ONIG_NORMAL) return r;
6394 
6395   return ONIG_CALLOUT_SUCCESS;
6396 }
6397 
6398 enum OP_CMP {
6399   OP_EQ,
6400   OP_NE,
6401   OP_LT,
6402   OP_GT,
6403   OP_LE,
6404   OP_GE
6405 };
6406 
6407 extern int
onig_builtin_cmp(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6408 onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6409 {
6410   int r;
6411   int slot;
6412   long lv;
6413   long rv;
6414   OnigType  type;
6415   OnigValue val;
6416   regex_t* reg;
6417   enum OP_CMP op;
6418 
6419   reg = args->regex;
6420 
6421   r = onig_get_arg_by_callout_args(args, 0, &type, &val);
6422   if (r != ONIG_NORMAL) return r;
6423 
6424   if (type == ONIG_TYPE_TAG) {
6425     r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
6426     if (r < ONIG_NORMAL) return r;
6427     else if (r > ONIG_NORMAL)
6428       lv = 0L;
6429     else
6430       lv = val.l;
6431   }
6432   else { /* ONIG_TYPE_LONG */
6433     lv = val.l;
6434   }
6435 
6436   r = onig_get_arg_by_callout_args(args, 2, &type, &val);
6437   if (r != ONIG_NORMAL) return r;
6438 
6439   if (type == ONIG_TYPE_TAG) {
6440     r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
6441     if (r < ONIG_NORMAL) return r;
6442     else if (r > ONIG_NORMAL)
6443       rv = 0L;
6444     else
6445       rv = val.l;
6446   }
6447   else { /* ONIG_TYPE_LONG */
6448     rv = val.l;
6449   }
6450 
6451   slot = 0;
6452   r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
6453   if (r < ONIG_NORMAL)
6454     return r;
6455   else if (r > ONIG_NORMAL) {
6456     /* type == void: initial state */
6457     OnigCodePoint c1, c2;
6458     UChar* p;
6459 
6460     r = onig_get_arg_by_callout_args(args, 1, &type, &val);
6461     if (r != ONIG_NORMAL) return r;
6462 
6463     p = val.s.start;
6464     c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
6465     p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
6466     if (p < val.s.end) {
6467       c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
6468       p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
6469       if (p != val.s.end)  return ONIGERR_INVALID_CALLOUT_ARG;
6470     }
6471     else
6472       c2 = 0;
6473 
6474     switch (c1) {
6475     case '=':
6476       if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
6477       op = OP_EQ;
6478       break;
6479     case '!':
6480       if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
6481       op = OP_NE;
6482       break;
6483     case '<':
6484       if (c2 == '=') op = OP_LE;
6485       else if (c2 == 0) op = OP_LT;
6486       else  return ONIGERR_INVALID_CALLOUT_ARG;
6487       break;
6488     case '>':
6489       if (c2 == '=') op = OP_GE;
6490       else if (c2 == 0) op = OP_GT;
6491       else  return ONIGERR_INVALID_CALLOUT_ARG;
6492       break;
6493     default:
6494       return ONIGERR_INVALID_CALLOUT_ARG;
6495       break;
6496     }
6497     val.l = (long )op;
6498     r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6499     if (r != ONIG_NORMAL) return r;
6500   }
6501   else {
6502     op = (enum OP_CMP )val.l;
6503   }
6504 
6505   switch (op) {
6506   case OP_EQ: r = (lv == rv); break;
6507   case OP_NE: r = (lv != rv); break;
6508   case OP_LT: r = (lv <  rv); break;
6509   case OP_GT: r = (lv >  rv); break;
6510   case OP_LE: r = (lv <= rv); break;
6511   case OP_GE: r = (lv >= rv); break;
6512   }
6513 
6514   return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS;
6515 }
6516 
6517 
6518 #ifndef ONIG_NO_PRINT
6519 
6520 static FILE* OutFp;
6521 
6522 /* name start with "onig_" for macros. */
6523 static int
onig_builtin_monitor(OnigCalloutArgs * args,void * user_data)6524 onig_builtin_monitor(OnigCalloutArgs* args, void* user_data)
6525 {
6526   int r;
6527   int num;
6528   size_t tag_len;
6529   const UChar* start;
6530   const UChar* right;
6531   const UChar* current;
6532   const UChar* string;
6533   const UChar* strend;
6534   const UChar* tag_start;
6535   const UChar* tag_end;
6536   regex_t* reg;
6537   OnigCalloutIn in;
6538   OnigType type;
6539   OnigValue val;
6540   char buf[20];
6541   FILE* fp;
6542 
6543   fp = OutFp;
6544 
6545   r = onig_get_arg_by_callout_args(args, 0, &type, &val);
6546   if (r != ONIG_NORMAL) return r;
6547 
6548   in = onig_get_callout_in_by_callout_args(args);
6549   if (in == ONIG_CALLOUT_IN_PROGRESS) {
6550     if (val.c == '<')
6551       return ONIG_CALLOUT_SUCCESS;
6552   }
6553   else {
6554     if (val.c != 'X' && val.c != '<')
6555       return ONIG_CALLOUT_SUCCESS;
6556   }
6557 
6558   num       = onig_get_callout_num_by_callout_args(args);
6559   start     = onig_get_start_by_callout_args(args);
6560   right     = onig_get_right_range_by_callout_args(args);
6561   current   = onig_get_current_by_callout_args(args);
6562   string    = onig_get_string_by_callout_args(args);
6563   strend    = onig_get_string_end_by_callout_args(args);
6564   reg       = onig_get_regex_by_callout_args(args);
6565   tag_start = onig_get_callout_tag_start(reg, num);
6566   tag_end   = onig_get_callout_tag_end(reg, num);
6567 
6568   if (tag_start == 0)
6569     xsnprintf(buf, sizeof(buf), "#%d", num);
6570   else {
6571     /* CAUTION: tag string is not terminated with NULL. */
6572     int i;
6573 
6574     tag_len = tag_end - tag_start;
6575     if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1;
6576     for (i = 0; i < tag_len; i++) buf[i] = tag_start[i];
6577     buf[tag_len] = '\0';
6578   }
6579 
6580   fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n",
6581           buf,
6582           in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=",
6583           (int )(current - string),
6584           (int )(start   - string),
6585           (int )(right   - string),
6586           (int )(strend  - string));
6587   fflush(fp);
6588 
6589   return ONIG_CALLOUT_SUCCESS;
6590 }
6591 
6592 extern int
onig_setup_builtin_monitors_by_ascii_encoded_name(void * fp)6593 onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */)
6594 {
6595   int id;
6596   char* name;
6597   OnigEncoding enc;
6598   unsigned int ts[4];
6599   OnigValue opts[4];
6600 
6601   if (IS_NOT_NULL(fp))
6602     OutFp = (FILE* )fp;
6603   else
6604     OutFp = stdout;
6605 
6606   enc = ONIG_ENCODING_ASCII;
6607 
6608   name = "MON";
6609   ts[0] = ONIG_TYPE_CHAR;
6610   opts[0].c = '>';
6611   BC_B_O(name, monitor, 1, ts, 1, opts);
6612 
6613   return ONIG_NORMAL;
6614 }
6615 
6616 #endif /* ONIG_NO_PRINT */
6617 
6618 #endif /* USE_CALLOUT */
6619