1 /**********************************************************************
2   regexec.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2020  K.Kosako
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #ifndef ONIG_NO_PRINT
31 #ifndef NEED_TO_INCLUDE_STDIO
32 #define NEED_TO_INCLUDE_STDIO
33 #endif
34 #endif
35 
36 #include "regint.h"
37 
38 #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
39   ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
40 
41 #ifdef USE_CRNL_AS_LINE_TERMINATOR
42 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
43   (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
44    ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
45 #endif
46 
47 #define CHECK_INTERRUPT_IN_MATCH
48 
49 #define STACK_MEM_START(reg, i) \
50   (MEM_STATUS_AT((reg)->push_mem_start, (i)) != 0 ? \
51    STACK_AT(mem_start_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_start_stk[i])))
52 
53 #define STACK_MEM_END(reg, i) \
54   (MEM_STATUS_AT((reg)->push_mem_end, (i)) != 0 ? \
55    STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_end_stk[i])))
56 
57 static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high, UChar** low_prev);
58 
59 static int
60 search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, /* match range */ const UChar* data_range, /* subject string range */ OnigRegion* region, OnigOptionType option, OnigMatchParam* mp);
61 
62 
63 #ifdef USE_CALLOUT
64 typedef struct {
65   int last_match_at_call_counter;
66   struct {
67     OnigType  type;
68     OnigValue val;
69   } slot[ONIG_CALLOUT_DATA_SLOT_NUM];
70 } CalloutData;
71 #endif
72 
73 struct OnigMatchParamStruct {
74   unsigned int    match_stack_limit;
75 #ifdef USE_RETRY_LIMIT
76   unsigned long   retry_limit_in_match;
77   unsigned long   retry_limit_in_search;
78 #endif
79 #ifdef USE_CALLOUT
80   OnigCalloutFunc progress_callout_of_contents;
81   OnigCalloutFunc retraction_callout_of_contents;
82   int             match_at_call_counter;
83   void*           callout_user_data;
84   CalloutData*    callout_data;
85   int             callout_data_alloc_num;
86 #endif
87 };
88 
89 extern int
onig_set_match_stack_limit_size_of_match_param(OnigMatchParam * param,unsigned int limit)90 onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param,
91                                                unsigned int limit)
92 {
93   param->match_stack_limit = limit;
94   return ONIG_NORMAL;
95 }
96 
97 extern int
onig_set_retry_limit_in_match_of_match_param(OnigMatchParam * param,unsigned long limit)98 onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param,
99                                              unsigned long limit)
100 {
101 #ifdef USE_RETRY_LIMIT
102   param->retry_limit_in_match = limit;
103   return ONIG_NORMAL;
104 #else
105   return ONIG_NO_SUPPORT_CONFIG;
106 #endif
107 }
108 
109 extern int
onig_set_retry_limit_in_search_of_match_param(OnigMatchParam * param,unsigned long limit)110 onig_set_retry_limit_in_search_of_match_param(OnigMatchParam* param,
111                                               unsigned long limit)
112 {
113 #ifdef USE_RETRY_LIMIT
114   param->retry_limit_in_search = limit;
115   return ONIG_NORMAL;
116 #else
117   return ONIG_NO_SUPPORT_CONFIG;
118 #endif
119 }
120 
121 extern int
onig_set_progress_callout_of_match_param(OnigMatchParam * param,OnigCalloutFunc f)122 onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
123 {
124 #ifdef USE_CALLOUT
125   param->progress_callout_of_contents = f;
126   return ONIG_NORMAL;
127 #else
128   return ONIG_NO_SUPPORT_CONFIG;
129 #endif
130 }
131 
132 extern int
onig_set_retraction_callout_of_match_param(OnigMatchParam * param,OnigCalloutFunc f)133 onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
134 {
135 #ifdef USE_CALLOUT
136   param->retraction_callout_of_contents = f;
137   return ONIG_NORMAL;
138 #else
139   return ONIG_NO_SUPPORT_CONFIG;
140 #endif
141 }
142 
143 extern int
onig_set_callout_user_data_of_match_param(OnigMatchParam * param,void * user_data)144 onig_set_callout_user_data_of_match_param(OnigMatchParam* param, void* user_data)
145 {
146 #ifdef USE_CALLOUT
147   param->callout_user_data = user_data;
148   return ONIG_NORMAL;
149 #else
150   return ONIG_NO_SUPPORT_CONFIG;
151 #endif
152 }
153 
154 
155 typedef struct {
156   void* stack_p;
157   int   stack_n;
158   OnigOptionType options;
159   OnigRegion*    region;
160   int            ptr_num;
161   const UChar*   start;   /* search start position (for \G: BEGIN_POSITION) */
162   unsigned int   match_stack_limit;
163 #ifdef USE_RETRY_LIMIT
164   unsigned long  retry_limit_in_match;
165   unsigned long  retry_limit_in_search;
166   unsigned long  retry_limit_in_search_counter;
167 #endif
168   OnigMatchParam* mp;
169 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
170   int    best_len;      /* for ONIG_OPTION_FIND_LONGEST */
171   UChar* best_s;
172 #endif
173 } MatchArg;
174 
175 
176 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
177 
178 /* arguments type */
179 typedef enum {
180   ARG_SPECIAL = -1,
181   ARG_NON     =  0,
182   ARG_RELADDR =  1,
183   ARG_ABSADDR =  2,
184   ARG_LENGTH  =  3,
185   ARG_MEMNUM  =  4,
186   ARG_OPTION  =  5,
187   ARG_MODE    =  6
188 } OpArgType;
189 
190 typedef struct {
191   short int opcode;
192   char*     name;
193 } OpInfoType;
194 
195 static OpInfoType OpInfo[] = {
196   { OP_FINISH,         "finish"},
197   { OP_END,            "end"},
198   { OP_STR_1,          "str_1"},
199   { OP_STR_2,          "str_2"},
200   { OP_STR_3,          "str_3"},
201   { OP_STR_4,          "str_4"},
202   { OP_STR_5,          "str_5"},
203   { OP_STR_N,          "str_n"},
204   { OP_STR_MB2N1,      "str_mb2-n1"},
205   { OP_STR_MB2N2,      "str_mb2-n2"},
206   { OP_STR_MB2N3,      "str_mb2-n3"},
207   { OP_STR_MB2N,       "str_mb2-n"},
208   { OP_STR_MB3N,       "str_mb3n"},
209   { OP_STR_MBN,        "str_mbn"},
210   { OP_CCLASS,         "cclass"},
211   { OP_CCLASS_MB,      "cclass-mb"},
212   { OP_CCLASS_MIX,     "cclass-mix"},
213   { OP_CCLASS_NOT,     "cclass-not"},
214   { OP_CCLASS_MB_NOT,  "cclass-mb-not"},
215   { OP_CCLASS_MIX_NOT, "cclass-mix-not"},
216   { OP_ANYCHAR,               "anychar"},
217   { OP_ANYCHAR_ML,            "anychar-ml"},
218   { OP_ANYCHAR_STAR,          "anychar*"},
219   { OP_ANYCHAR_ML_STAR,       "anychar-ml*"},
220   { OP_ANYCHAR_STAR_PEEK_NEXT,    "anychar*-peek-next"},
221   { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next"},
222   { OP_WORD,                  "word"},
223   { OP_WORD_ASCII,            "word-ascii"},
224   { OP_NO_WORD,               "not-word"},
225   { OP_NO_WORD_ASCII,         "not-word-ascii"},
226   { OP_WORD_BOUNDARY,         "word-boundary"},
227   { OP_NO_WORD_BOUNDARY,      "not-word-boundary"},
228   { OP_WORD_BEGIN,            "word-begin"},
229   { OP_WORD_END,              "word-end"},
230   { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary"},
231   { OP_BEGIN_BUF,             "begin-buf"},
232   { OP_END_BUF,               "end-buf"},
233   { OP_BEGIN_LINE,            "begin-line"},
234   { OP_END_LINE,              "end-line"},
235   { OP_SEMI_END_BUF,          "semi-end-buf"},
236   { OP_CHECK_POSITION,        "check-position"},
237   { OP_BACKREF1,              "backref1"},
238   { OP_BACKREF2,              "backref2"},
239   { OP_BACKREF_N,             "backref-n"},
240   { OP_BACKREF_N_IC,          "backref-n-ic"},
241   { OP_BACKREF_MULTI,         "backref_multi"},
242   { OP_BACKREF_MULTI_IC,      "backref_multi-ic"},
243   { OP_BACKREF_WITH_LEVEL,    "backref_with_level"},
244   { OP_BACKREF_WITH_LEVEL_IC, "backref_with_level-c"},
245   { OP_BACKREF_CHECK,         "backref_check"},
246   { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level"},
247   { OP_MEM_START_PUSH,        "mem-start-push"},
248   { OP_MEM_START,             "mem-start"},
249   { OP_MEM_END_PUSH,          "mem-end-push"},
250 #ifdef USE_CALL
251   { OP_MEM_END_PUSH_REC,      "mem-end-push-rec"},
252 #endif
253   { OP_MEM_END,               "mem-end"},
254 #ifdef USE_CALL
255   { OP_MEM_END_REC,           "mem-end-rec"},
256 #endif
257   { OP_FAIL,                  "fail"},
258   { OP_JUMP,                  "jump"},
259   { OP_PUSH,                  "push"},
260   { OP_PUSH_SUPER,            "push-super"},
261   { OP_POP,                   "pop"},
262   { OP_POP_TO_MARK,           "pop-to-mark"},
263 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
264   { OP_PUSH_OR_JUMP_EXACT1,   "push-or-jump-e1"},
265 #endif
266   { OP_PUSH_IF_PEEK_NEXT,     "push-if-peek-next"},
267   { OP_REPEAT,                "repeat"},
268   { OP_REPEAT_NG,             "repeat-ng"},
269   { OP_REPEAT_INC,            "repeat-inc"},
270   { OP_REPEAT_INC_NG,         "repeat-inc-ng"},
271   { OP_EMPTY_CHECK_START,     "empty-check-start"},
272   { OP_EMPTY_CHECK_END,       "empty-check-end"},
273   { OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst"},
274 #ifdef USE_CALL
275   { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push"},
276 #endif
277   { OP_MOVE,                  "move"},
278   { OP_STEP_BACK_START,       "step-back-start"},
279   { OP_STEP_BACK_NEXT,        "step-back-next"},
280   { OP_CUT_TO_MARK,           "cut-to-mark"},
281   { OP_MARK,                  "mark"},
282   { OP_SAVE_VAL,              "save-val"},
283   { OP_UPDATE_VAR,            "update-var"},
284 #ifdef USE_CALL
285   { OP_CALL,                  "call"},
286   { OP_RETURN,                "return"},
287 #endif
288 #ifdef USE_CALLOUT
289   { OP_CALLOUT_CONTENTS,      "callout-contents"},
290   { OP_CALLOUT_NAME,          "callout-name"},
291 #endif
292   { -1, ""}
293 };
294 
295 static char*
op2name(int opcode)296 op2name(int opcode)
297 {
298   int i;
299 
300   for (i = 0; OpInfo[i].opcode >= 0; i++) {
301     if (opcode == OpInfo[i].opcode) return OpInfo[i].name;
302   }
303 
304   return "";
305 }
306 
307 static void
p_string(FILE * f,int len,UChar * s)308 p_string(FILE* f, int len, UChar* s)
309 {
310   fputs(":", f);
311   while (len-- > 0) { fputc(*s++, f); }
312 }
313 
314 static void
p_len_string(FILE * f,LengthType len,int mb_len,UChar * s)315 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
316 {
317   int x = len * mb_len;
318 
319   fprintf(f, ":%d:", len);
320   while (x-- > 0) { fputc(*s++, f); }
321 }
322 
323 static void
p_rel_addr(FILE * f,RelAddrType rel_addr,Operation * p,Operation * start)324 p_rel_addr(FILE* f, RelAddrType rel_addr, Operation* p, Operation* start)
325 {
326   RelAddrType curr = (RelAddrType )(p - start);
327 
328   fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr);
329 }
330 
331 static int
bitset_on_num(BitSetRef bs)332 bitset_on_num(BitSetRef bs)
333 {
334   int i, n;
335 
336   n = 0;
337   for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
338     if (BITSET_AT(bs, i)) n++;
339   }
340 
341   return n;
342 }
343 
344 
345 #ifdef USE_DIRECT_THREADED_CODE
346 #define GET_OPCODE(reg,index)  (reg)->ocs[index]
347 #else
348 #define GET_OPCODE(reg,index)  (reg)->ops[index].opcode
349 #endif
350 
351 static void
print_compiled_byte_code(FILE * f,regex_t * reg,int index,Operation * start,OnigEncoding enc)352 print_compiled_byte_code(FILE* f, regex_t* reg, int index,
353                          Operation* start, OnigEncoding enc)
354 {
355   int i, n;
356   RelAddrType addr;
357   LengthType  len;
358   MemNumType  mem;
359   OnigCodePoint code;
360   ModeType mode;
361   UChar *q;
362   Operation* p;
363   enum OpCode opcode;
364 
365   p = reg->ops + index;
366 
367   opcode = GET_OPCODE(reg, index);
368 
369   fprintf(f, "%s", op2name(opcode));
370   switch (opcode) {
371   case OP_STR_1:
372     p_string(f, 1, p->exact.s); break;
373   case OP_STR_2:
374     p_string(f, 2, p->exact.s); break;
375   case OP_STR_3:
376     p_string(f, 3, p->exact.s); break;
377   case OP_STR_4:
378     p_string(f, 4, p->exact.s); break;
379   case OP_STR_5:
380     p_string(f, 5, p->exact.s); break;
381   case OP_STR_N:
382     len = p->exact_n.n;
383     p_string(f, len, p->exact_n.s); break;
384   case OP_STR_MB2N1:
385     p_string(f, 2, p->exact.s); break;
386   case OP_STR_MB2N2:
387     p_string(f, 4, p->exact.s); break;
388   case OP_STR_MB2N3:
389     p_string(f, 3, p->exact.s); break;
390   case OP_STR_MB2N:
391     len = p->exact_n.n;
392     p_len_string(f, len, 2, p->exact_n.s); break;
393   case OP_STR_MB3N:
394     len = p->exact_n.n;
395     p_len_string(f, len, 3, p->exact_n.s); break;
396   case OP_STR_MBN:
397     {
398       int mb_len;
399 
400       mb_len = p->exact_len_n.len;
401       len    = p->exact_len_n.n;
402       q      = p->exact_len_n.s;
403       fprintf(f, ":%d:%d:", mb_len, len);
404       n = len * mb_len;
405       while (n-- > 0) { fputc(*q++, f); }
406     }
407     break;
408 
409   case OP_CCLASS:
410   case OP_CCLASS_NOT:
411     n = bitset_on_num(p->cclass.bsp);
412     fprintf(f, ":%d", n);
413     break;
414   case OP_CCLASS_MB:
415   case OP_CCLASS_MB_NOT:
416     {
417       OnigCodePoint ncode;
418       OnigCodePoint* codes;
419 
420       codes = (OnigCodePoint* )p->cclass_mb.mb;
421       GET_CODE_POINT(ncode, codes);
422       codes++;
423       GET_CODE_POINT(code, codes);
424       fprintf(f, ":%d:0x%x", ncode, code);
425     }
426     break;
427   case OP_CCLASS_MIX:
428   case OP_CCLASS_MIX_NOT:
429     {
430       OnigCodePoint ncode;
431       OnigCodePoint* codes;
432 
433       codes = (OnigCodePoint* )p->cclass_mix.mb;
434       n = bitset_on_num(p->cclass_mix.bsp);
435 
436       GET_CODE_POINT(ncode, codes);
437       codes++;
438       GET_CODE_POINT(code, codes);
439       fprintf(f, ":%d:%u:%u", n, code, ncode);
440     }
441     break;
442 
443   case OP_ANYCHAR_STAR_PEEK_NEXT:
444   case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
445     p_string(f, 1, &(p->anychar_star_peek_next.c));
446     break;
447 
448   case OP_WORD_BOUNDARY:
449   case OP_NO_WORD_BOUNDARY:
450   case OP_WORD_BEGIN:
451   case OP_WORD_END:
452     mode = p->word_boundary.mode;
453     fprintf(f, ":%d", mode);
454     break;
455 
456   case OP_BACKREF_N:
457   case OP_BACKREF_N_IC:
458     mem = p->backref_n.n1;
459     fprintf(f, ":%d", mem);
460     break;
461   case OP_BACKREF_MULTI_IC:
462   case OP_BACKREF_MULTI:
463   case OP_BACKREF_CHECK:
464     fputs(" ", f);
465     n = p->backref_general.num;
466     for (i = 0; i < n; i++) {
467       mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
468       if (i > 0) fputs(", ", f);
469       fprintf(f, "%d", mem);
470     }
471     break;
472   case OP_BACKREF_WITH_LEVEL:
473   case OP_BACKREF_WITH_LEVEL_IC:
474   case OP_BACKREF_CHECK_WITH_LEVEL:
475     {
476       LengthType level;
477 
478       level = p->backref_general.nest_level;
479       fprintf(f, ":%d", level);
480       fputs(" ", f);
481       n = p->backref_general.num;
482       for (i = 0; i < n; i++) {
483         mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
484         if (i > 0) fputs(", ", f);
485         fprintf(f, "%d", mem);
486       }
487     }
488     break;
489 
490   case OP_MEM_START:
491   case OP_MEM_START_PUSH:
492     mem = p->memory_start.num;
493     fprintf(f, ":%d", mem);
494     break;
495 
496   case OP_MEM_END:
497   case OP_MEM_END_PUSH:
498 #ifdef USE_CALL
499   case OP_MEM_END_REC:
500   case OP_MEM_END_PUSH_REC:
501 #endif
502     mem = p->memory_end.num;
503     fprintf(f, ":%d", mem);
504     break;
505 
506   case OP_JUMP:
507     addr = p->jump.addr;
508     fputc(':', f);
509     p_rel_addr(f, addr, p, start);
510     break;
511 
512   case OP_PUSH:
513   case OP_PUSH_SUPER:
514     addr = p->push.addr;
515     fputc(':', f);
516     p_rel_addr(f, addr, p, start);
517     break;
518 
519 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
520   case OP_PUSH_OR_JUMP_EXACT1:
521     addr = p->push_or_jump_exact1.addr;
522     fputc(':', f);
523     p_rel_addr(f, addr, p, start);
524     p_string(f, 1, &(p->push_or_jump_exact1.c));
525     break;
526 #endif
527 
528   case OP_PUSH_IF_PEEK_NEXT:
529     addr = p->push_if_peek_next.addr;
530     fputc(':', f);
531     p_rel_addr(f, addr, p, start);
532     p_string(f, 1, &(p->push_if_peek_next.c));
533     break;
534 
535   case OP_REPEAT:
536   case OP_REPEAT_NG:
537     mem = p->repeat.id;
538     addr = p->repeat.addr;
539     fprintf(f, ":%d:", mem);
540     p_rel_addr(f, addr, p, start);
541     break;
542 
543   case OP_REPEAT_INC:
544   case OP_REPEAT_INC_NG:
545     mem = p->repeat.id;
546     fprintf(f, ":%d", mem);
547     break;
548 
549   case OP_EMPTY_CHECK_START:
550     mem = p->empty_check_start.mem;
551     fprintf(f, ":%d", mem);
552     break;
553   case OP_EMPTY_CHECK_END:
554   case OP_EMPTY_CHECK_END_MEMST:
555 #ifdef USE_CALL
556   case OP_EMPTY_CHECK_END_MEMST_PUSH:
557 #endif
558     mem = p->empty_check_end.mem;
559     fprintf(f, ":%d", mem);
560     break;
561 
562 #ifdef USE_CALL
563   case OP_CALL:
564     addr = p->call.addr;
565     fprintf(f, ":{/%d}", addr);
566     break;
567 #endif
568 
569   case OP_MOVE:
570     fprintf(f, ":%d", p->move.n);
571     break;
572 
573   case OP_STEP_BACK_START:
574     addr = p->step_back_start.addr;
575     fprintf(f, ":%d:%d:",
576             p->step_back_start.initial,
577             p->step_back_start.remaining);
578     p_rel_addr(f, addr, p, start);
579     break;
580 
581   case OP_POP_TO_MARK:
582     mem = p->pop_to_mark.id;
583     fprintf(f, ":%d", mem);
584     break;
585 
586   case OP_CUT_TO_MARK:
587     {
588       int restore;
589 
590       mem     = p->cut_to_mark.id;
591       restore = p->cut_to_mark.restore_pos;
592       fprintf(f, ":%d:%d", mem, restore);
593     }
594     break;
595 
596   case OP_MARK:
597     {
598       int save;
599 
600       mem  = p->mark.id;
601       save = p->mark.save_pos;
602       fprintf(f, ":%d:%d", mem, save);
603     }
604     break;
605 
606   case OP_SAVE_VAL:
607     {
608       SaveType type;
609 
610       type = p->save_val.type;
611       mem  = p->save_val.id;
612       fprintf(f, ":%d:%d", type, mem);
613     }
614     break;
615 
616   case OP_UPDATE_VAR:
617     {
618       UpdateVarType type;
619       int clear;
620 
621       type = p->update_var.type;
622       mem  = p->update_var.id;
623       clear = p->update_var.clear;
624       fprintf(f, ":%d:%d", type, mem);
625       if (type == UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK ||
626           type ==  UPDATE_VAR_RIGHT_RANGE_FROM_STACK)
627         fprintf(f, ":%d", clear);
628     }
629     break;
630 
631 #ifdef USE_CALLOUT
632   case OP_CALLOUT_CONTENTS:
633     mem = p->callout_contents.num;
634     fprintf(f, ":%d", mem);
635     break;
636 
637   case OP_CALLOUT_NAME:
638     {
639       int id;
640 
641       id  = p->callout_name.id;
642       mem = p->callout_name.num;
643       fprintf(f, ":%d:%d", id, mem);
644     }
645     break;
646 #endif
647 
648   case OP_TEXT_SEGMENT_BOUNDARY:
649     if (p->text_segment_boundary.not != 0)
650       fprintf(f, ":not");
651     break;
652 
653   case OP_CHECK_POSITION:
654     switch (p->check_position.type) {
655     case CHECK_POSITION_SEARCH_START:
656       fprintf(f, ":search-start"); break;
657     case CHECK_POSITION_CURRENT_RIGHT_RANGE:
658       fprintf(f, ":current-right-range"); break;
659     default:
660       break;
661     };
662     break;
663 
664   case OP_FINISH:
665   case OP_END:
666   case OP_ANYCHAR:
667   case OP_ANYCHAR_ML:
668   case OP_ANYCHAR_STAR:
669   case OP_ANYCHAR_ML_STAR:
670   case OP_WORD:
671   case OP_WORD_ASCII:
672   case OP_NO_WORD:
673   case OP_NO_WORD_ASCII:
674   case OP_BEGIN_BUF:
675   case OP_END_BUF:
676   case OP_BEGIN_LINE:
677   case OP_END_LINE:
678   case OP_SEMI_END_BUF:
679   case OP_BACKREF1:
680   case OP_BACKREF2:
681   case OP_FAIL:
682   case OP_POP:
683   case OP_STEP_BACK_NEXT:
684 #ifdef USE_CALL
685   case OP_RETURN:
686 #endif
687     break;
688 
689   default:
690     fprintf(DBGFP, "print_compiled_byte_code: undefined code %d\n", opcode);
691     break;
692   }
693 }
694 #endif /* defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) */
695 
696 #ifdef ONIG_DEBUG_COMPILE
697 extern void
onig_print_compiled_byte_code_list(FILE * f,regex_t * reg)698 onig_print_compiled_byte_code_list(FILE* f, regex_t* reg)
699 {
700   Operation* bp;
701   Operation* start = reg->ops;
702   Operation* end   = reg->ops + reg->ops_used;
703 
704   fprintf(f, "push_mem_start: 0x%x, push_mem_end: 0x%x\n",
705           reg->push_mem_start, reg->push_mem_end);
706   fprintf(f, "code-length: %d\n", reg->ops_used);
707 
708   bp = start;
709   while (bp < end) {
710     int pos = bp - start;
711 
712     fprintf(f, "%4d: ", pos);
713     print_compiled_byte_code(f, reg, pos, start, reg->enc);
714     fprintf(f, "\n");
715     bp++;
716   }
717   fprintf(f, "\n");
718 }
719 #endif
720 
721 
722 #ifdef USE_CAPTURE_HISTORY
723 static void history_tree_free(OnigCaptureTreeNode* node);
724 
725 static void
history_tree_clear(OnigCaptureTreeNode * node)726 history_tree_clear(OnigCaptureTreeNode* node)
727 {
728   int i;
729 
730   if (IS_NULL(node)) return ;
731 
732   for (i = 0; i < node->num_childs; i++) {
733     if (IS_NOT_NULL(node->childs[i])) {
734       history_tree_free(node->childs[i]);
735     }
736   }
737   for (i = 0; i < node->allocated; i++) {
738     node->childs[i] = (OnigCaptureTreeNode* )0;
739   }
740   node->num_childs = 0;
741   node->beg = ONIG_REGION_NOTPOS;
742   node->end = ONIG_REGION_NOTPOS;
743   node->group = -1;
744 }
745 
746 static void
history_tree_free(OnigCaptureTreeNode * node)747 history_tree_free(OnigCaptureTreeNode* node)
748 {
749   history_tree_clear(node);
750   if (IS_NOT_NULL(node->childs)) xfree(node->childs);
751 
752   xfree(node);
753 }
754 
755 static void
history_root_free(OnigRegion * r)756 history_root_free(OnigRegion* r)
757 {
758   if (IS_NULL(r->history_root)) return ;
759 
760   history_tree_free(r->history_root);
761   r->history_root = (OnigCaptureTreeNode* )0;
762 }
763 
764 static OnigCaptureTreeNode*
history_node_new(void)765 history_node_new(void)
766 {
767   OnigCaptureTreeNode* node;
768 
769   node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
770   CHECK_NULL_RETURN(node);
771 
772   node->childs     = (OnigCaptureTreeNode** )0;
773   node->allocated  =  0;
774   node->num_childs =  0;
775   node->group      = -1;
776   node->beg        = ONIG_REGION_NOTPOS;
777   node->end        = ONIG_REGION_NOTPOS;
778 
779   return node;
780 }
781 
782 static int
history_tree_add_child(OnigCaptureTreeNode * parent,OnigCaptureTreeNode * child)783 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
784 {
785 #define HISTORY_TREE_INIT_ALLOC_SIZE  8
786 
787   if (parent->num_childs >= parent->allocated) {
788     int n, i;
789 
790     if (IS_NULL(parent->childs)) {
791       n = HISTORY_TREE_INIT_ALLOC_SIZE;
792       parent->childs =
793         (OnigCaptureTreeNode** )xmalloc(sizeof(parent->childs[0]) * n);
794     }
795     else {
796       n = parent->allocated * 2;
797       parent->childs =
798         (OnigCaptureTreeNode** )xrealloc(parent->childs,
799                                          sizeof(parent->childs[0]) * n);
800     }
801     CHECK_NULL_RETURN_MEMERR(parent->childs);
802     for (i = parent->allocated; i < n; i++) {
803       parent->childs[i] = (OnigCaptureTreeNode* )0;
804     }
805     parent->allocated = n;
806   }
807 
808   parent->childs[parent->num_childs] = child;
809   parent->num_childs++;
810   return 0;
811 }
812 
813 static OnigCaptureTreeNode*
history_tree_clone(OnigCaptureTreeNode * node)814 history_tree_clone(OnigCaptureTreeNode* node)
815 {
816   int i;
817   OnigCaptureTreeNode *clone, *child;
818 
819   clone = history_node_new();
820   CHECK_NULL_RETURN(clone);
821 
822   clone->beg = node->beg;
823   clone->end = node->end;
824   for (i = 0; i < node->num_childs; i++) {
825     child = history_tree_clone(node->childs[i]);
826     if (IS_NULL(child)) {
827       history_tree_free(clone);
828       return (OnigCaptureTreeNode* )0;
829     }
830     history_tree_add_child(clone, child);
831   }
832 
833   return clone;
834 }
835 
836 extern  OnigCaptureTreeNode*
onig_get_capture_tree(OnigRegion * region)837 onig_get_capture_tree(OnigRegion* region)
838 {
839   return region->history_root;
840 }
841 #endif /* USE_CAPTURE_HISTORY */
842 
843 extern void
onig_region_clear(OnigRegion * region)844 onig_region_clear(OnigRegion* region)
845 {
846   int i;
847 
848   for (i = 0; i < region->num_regs; i++) {
849     region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
850   }
851 #ifdef USE_CAPTURE_HISTORY
852   history_root_free(region);
853 #endif
854 }
855 
856 extern int
onig_region_resize(OnigRegion * region,int n)857 onig_region_resize(OnigRegion* region, int n)
858 {
859   region->num_regs = n;
860 
861   if (n < ONIG_NREGION)
862     n = ONIG_NREGION;
863 
864   if (region->allocated == 0) {
865     region->beg = (int* )xmalloc(n * sizeof(int));
866     region->end = (int* )xmalloc(n * sizeof(int));
867 
868     if (region->beg == 0 || region->end == 0)
869       return ONIGERR_MEMORY;
870 
871     region->allocated = n;
872   }
873   else if (region->allocated < n) {
874     region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
875     region->end = (int* )xrealloc(region->end, n * sizeof(int));
876 
877     if (region->beg == 0 || region->end == 0)
878       return ONIGERR_MEMORY;
879 
880     region->allocated = n;
881   }
882 
883   return 0;
884 }
885 
886 static int
onig_region_resize_clear(OnigRegion * region,int n)887 onig_region_resize_clear(OnigRegion* region, int n)
888 {
889   int r;
890 
891   r = onig_region_resize(region, n);
892   if (r != 0) return r;
893   onig_region_clear(region);
894   return 0;
895 }
896 
897 extern int
onig_region_set(OnigRegion * region,int at,int beg,int end)898 onig_region_set(OnigRegion* region, int at, int beg, int end)
899 {
900   if (at < 0) return ONIGERR_INVALID_ARGUMENT;
901 
902   if (at >= region->allocated) {
903     int r = onig_region_resize(region, at + 1);
904     if (r < 0) return r;
905   }
906 
907   region->beg[at] = beg;
908   region->end[at] = end;
909   return 0;
910 }
911 
912 extern void
onig_region_init(OnigRegion * region)913 onig_region_init(OnigRegion* region)
914 {
915   region->num_regs     = 0;
916   region->allocated    = 0;
917   region->beg          = (int* )0;
918   region->end          = (int* )0;
919   region->history_root = (OnigCaptureTreeNode* )0;
920 }
921 
922 extern OnigRegion*
onig_region_new(void)923 onig_region_new(void)
924 {
925   OnigRegion* r;
926 
927   r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
928   CHECK_NULL_RETURN(r);
929   onig_region_init(r);
930   return r;
931 }
932 
933 extern void
onig_region_free(OnigRegion * r,int free_self)934 onig_region_free(OnigRegion* r, int free_self)
935 {
936   if (r != 0) {
937     if (r->allocated > 0) {
938       if (r->beg) xfree(r->beg);
939       if (r->end) xfree(r->end);
940       r->allocated = 0;
941     }
942 #ifdef USE_CAPTURE_HISTORY
943     history_root_free(r);
944 #endif
945     if (free_self) xfree(r);
946   }
947 }
948 
949 extern void
onig_region_copy(OnigRegion * to,OnigRegion * from)950 onig_region_copy(OnigRegion* to, OnigRegion* from)
951 {
952 #define RREGC_SIZE   (sizeof(int) * from->num_regs)
953   int i;
954 
955   if (to == from) return;
956 
957   if (to->allocated == 0) {
958     if (from->num_regs > 0) {
959       to->beg = (int* )xmalloc(RREGC_SIZE);
960       if (IS_NULL(to->beg)) return;
961       to->end = (int* )xmalloc(RREGC_SIZE);
962       if (IS_NULL(to->end)) return;
963       to->allocated = from->num_regs;
964     }
965   }
966   else if (to->allocated < from->num_regs) {
967     to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
968     if (IS_NULL(to->beg)) return;
969     to->end = (int* )xrealloc(to->end, RREGC_SIZE);
970     if (IS_NULL(to->end)) return;
971     to->allocated = from->num_regs;
972   }
973 
974   for (i = 0; i < from->num_regs; i++) {
975     to->beg[i] = from->beg[i];
976     to->end[i] = from->end[i];
977   }
978   to->num_regs = from->num_regs;
979 
980 #ifdef USE_CAPTURE_HISTORY
981   history_root_free(to);
982 
983   if (IS_NOT_NULL(from->history_root)) {
984     to->history_root = history_tree_clone(from->history_root);
985   }
986 #endif
987 }
988 
989 #ifdef USE_CALLOUT
990 #define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \
991   args.in            = (ain);\
992   args.name_id       = (aname_id);\
993   args.num           = anum;\
994   args.regex         = reg;\
995   args.string        = str;\
996   args.string_end    = end;\
997   args.start         = sstart;\
998   args.right_range   = right_range;\
999   args.current       = s;\
1000   args.retry_in_match_counter = retry_in_match_counter;\
1001   args.msa           = msa;\
1002   args.stk_base      = stk_base;\
1003   args.stk           = stk;\
1004   args.mem_start_stk = mem_start_stk;\
1005   args.mem_end_stk   = mem_end_stk;\
1006   result = (func)(&args, user);\
1007 } while (0)
1008 
1009 #define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\
1010   int result;\
1011   OnigCalloutArgs args;\
1012   CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\
1013   switch (result) {\
1014   case ONIG_CALLOUT_FAIL:\
1015   case ONIG_CALLOUT_SUCCESS:\
1016     break;\
1017   default:\
1018     if (result > 0) {\
1019       result = ONIGERR_INVALID_ARGUMENT;\
1020     }\
1021     best_len = result;\
1022     goto match_at_end;\
1023     break;\
1024   }\
1025 } while(0)
1026 #endif
1027 
1028 
1029 /** stack **/
1030 #define INVALID_STACK_INDEX   -1
1031 
1032 #define STK_ALT_FLAG               0x0001
1033 
1034 /* stack type */
1035 /* used by normal-POP */
1036 #define STK_SUPER_ALT             STK_ALT_FLAG
1037 #define STK_ALT                   (0x0002 | STK_ALT_FLAG)
1038 
1039 /* handled by normal-POP */
1040 #define STK_MEM_START              0x0010
1041 #define STK_MEM_END                0x8030
1042 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1043 #define STK_REPEAT_INC             (0x0040 | STK_MASK_POP_HANDLED)
1044 #else
1045 #define STK_REPEAT_INC             0x0040
1046 #endif
1047 #ifdef USE_CALLOUT
1048 #define STK_CALLOUT                0x0070
1049 #endif
1050 
1051 /* avoided by normal-POP */
1052 #define STK_VOID                   0x0000  /* for fill a blank */
1053 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1054 #define STK_EMPTY_CHECK_START      (0x3000 | STK_MASK_POP_HANDLED)
1055 #else
1056 #define STK_EMPTY_CHECK_START      0x3000
1057 #endif
1058 #define STK_EMPTY_CHECK_END        0x5000  /* for recursive call */
1059 #define STK_MEM_END_MARK           0x8100
1060 #define STK_CALL_FRAME             (0x0400 | STK_MASK_POP_HANDLED)
1061 #define STK_RETURN                 (0x0500 | STK_MASK_POP_HANDLED)
1062 #define STK_SAVE_VAL               0x0600
1063 #define STK_MARK                   0x0704
1064 
1065 /* stack type check mask */
1066 #define STK_MASK_POP_USED          STK_ALT_FLAG
1067 #define STK_MASK_POP_HANDLED       0x0010
1068 #define STK_MASK_POP_HANDLED_TIL   (STK_MASK_POP_HANDLED | 0x0004)
1069 #define STK_MASK_TO_VOID_TARGET    0x100e
1070 #define STK_MASK_MEM_END_OR_MARK   0x8000  /* MEM_END or MEM_END_MARK */
1071 
1072 typedef intptr_t StackIndex;
1073 
1074 typedef struct _StackType {
1075   unsigned int type;
1076   int zid;
1077   union {
1078     struct {
1079       Operation* pcode;     /* byte code position */
1080       UChar*     pstr;      /* string position */
1081       UChar*     pstr_prev; /* previous char position of pstr */
1082     } state;
1083     struct {
1084       int        count;
1085 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1086       StackIndex prev_index;  /* index of stack */
1087 #endif
1088     } repeat_inc;
1089     struct {
1090       UChar *pstr;       /* start/end position */
1091       /* Following information is set, if this stack type is MEM-START */
1092       StackIndex prev_start;  /* prev. info (for backtrack  "(...)*" ) */
1093       StackIndex prev_end;    /* prev. info (for backtrack  "(...)*" ) */
1094     } mem;
1095     struct {
1096       UChar *pstr;            /* start position */
1097 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1098       StackIndex prev_index;  /* index of stack */
1099 #endif
1100     } empty_check;
1101 #ifdef USE_CALL
1102     struct {
1103       Operation *ret_addr; /* byte code position */
1104       UChar *pstr;         /* string position */
1105     } call_frame;
1106 #endif
1107     struct {
1108       enum SaveType type;
1109       UChar* v;
1110       UChar* v2;
1111     } val;
1112 #ifdef USE_CALLOUT
1113     struct {
1114       int num;
1115       OnigCalloutFunc func;
1116     } callout;
1117 #endif
1118   } u;
1119 } StackType;
1120 
1121 #ifdef USE_CALLOUT
1122 
1123 struct OnigCalloutArgsStruct {
1124   OnigCalloutIn    in;
1125   int              name_id;   /* name id or ONIG_NON_NAME_ID */
1126   int              num;
1127   OnigRegex        regex;
1128   const OnigUChar* string;
1129   const OnigUChar* string_end;
1130   const OnigUChar* start;
1131   const OnigUChar* right_range;
1132   const OnigUChar* current;  /* current matching position */
1133   unsigned long    retry_in_match_counter;
1134 
1135   /* invisible to users */
1136   MatchArg*   msa;
1137   StackType*  stk_base;
1138   StackType*  stk;
1139   StackIndex* mem_start_stk;
1140   StackIndex* mem_end_stk;
1141 };
1142 
1143 #endif
1144 
1145 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
1146 
1147 #define PTR_NUM_SIZE(reg)  ((reg)->num_repeat + (reg)->num_empty_check + ((reg)->num_mem + 1) * 2)
1148 #define UPDATE_FOR_STACK_REALLOC do{\
1149   repeat_stk      = (StackIndex* )alloc_base;\
1150   empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
1151   mem_start_stk   = (StackIndex* )(empty_check_stk + reg->num_empty_check);\
1152   mem_end_stk     = mem_start_stk + num_mem + 1;\
1153 } while(0)
1154 
1155 #define SAVE_REPEAT_STK_VAR(sid) stk->u.repeat_inc.prev_index = repeat_stk[sid]
1156 #define LOAD_TO_REPEAT_STK_VAR(sid)  repeat_stk[sid] = GET_STACK_INDEX(stk)
1157 #define POP_REPEAT_INC  else if (stk->type == STK_REPEAT_INC) {repeat_stk[stk->zid] = stk->u.repeat_inc.prev_index;}
1158 
1159 #define SAVE_EMPTY_CHECK_STK_VAR(sid) stk->u.empty_check.prev_index = empty_check_stk[sid]
1160 #define LOAD_TO_EMPTY_CHECK_STK_VAR(sid)  empty_check_stk[sid] = GET_STACK_INDEX(stk)
1161 #define POP_EMPTY_CHECK_START  else if (stk->type == STK_EMPTY_CHECK_START) {empty_check_stk[stk->zid] = stk->u.empty_check.prev_index;}
1162 
1163 #else
1164 
1165 #define PTR_NUM_SIZE(reg)  (((reg)->num_mem + 1) * 2)
1166 #define UPDATE_FOR_STACK_REALLOC do{\
1167   mem_start_stk = (StackIndex* )alloc_base;\
1168   mem_end_stk   = mem_start_stk + num_mem + 1;\
1169 } while(0)
1170 
1171 #define SAVE_REPEAT_STK_VAR(sid)
1172 #define LOAD_TO_REPEAT_STK_VAR(sid)
1173 #define POP_REPEAT_INC
1174 
1175 #define SAVE_EMPTY_CHECK_STK_VAR(sid)
1176 #define LOAD_TO_EMPTY_CHECK_STK_VAR(sid)
1177 #define POP_EMPTY_CHECK_START
1178 
1179 #endif /* USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */
1180 
1181 #ifdef USE_RETRY_LIMIT
1182 #define RETRY_IN_MATCH_ARG_INIT(msa,mpv) \
1183   (msa).retry_limit_in_match  = (mpv)->retry_limit_in_match;\
1184   (msa).retry_limit_in_search = (mpv)->retry_limit_in_search;\
1185   (msa).retry_limit_in_search_counter = 0;
1186 #else
1187 #define RETRY_IN_MATCH_ARG_INIT(msa,mpv)
1188 #endif
1189 
1190 #if defined(USE_CALL) && defined(SUBEXP_CALL_MAX_NEST_LEVEL)
1191 #define POP_CALL  else if (stk->type == STK_RETURN) {subexp_call_nest_counter++;} else if (stk->type == STK_CALL_FRAME) {subexp_call_nest_counter--;}
1192 #else
1193 #define POP_CALL
1194 #endif
1195 
1196 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1197 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
1198   (msa).stack_p  = (void* )0;\
1199   (msa).options  = (arg_option);\
1200   (msa).region   = (arg_region);\
1201   (msa).start    = (arg_start);\
1202   (msa).match_stack_limit  = (mpv)->match_stack_limit;\
1203   RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
1204   (msa).mp = mpv;\
1205   (msa).best_len = ONIG_MISMATCH;\
1206   (msa).ptr_num  = PTR_NUM_SIZE(reg);\
1207 } while(0)
1208 #else
1209 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
1210   (msa).stack_p  = (void* )0;\
1211   (msa).options  = (arg_option);\
1212   (msa).region   = (arg_region);\
1213   (msa).start    = (arg_start);\
1214   (msa).match_stack_limit  = (mpv)->match_stack_limit;\
1215   RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
1216   (msa).mp = mpv;\
1217   (msa).ptr_num  = PTR_NUM_SIZE(reg);\
1218 } while(0)
1219 #endif
1220 
1221 #define MATCH_ARG_FREE(msa)  if ((msa).stack_p) xfree((msa).stack_p)
1222 
1223 
1224 #define ALLOCA_PTR_NUM_LIMIT   50
1225 
1226 #define STACK_INIT(stack_num)  do {\
1227   if (msa->stack_p) {\
1228     is_alloca  = 0;\
1229     alloc_base = msa->stack_p;\
1230     stk_base   = (StackType* )(alloc_base\
1231                  + (sizeof(StackIndex) * msa->ptr_num));\
1232     stk        = stk_base;\
1233     stk_end    = stk_base + msa->stack_n;\
1234   }\
1235   else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
1236     is_alloca  = 0;\
1237     alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
1238                   + sizeof(StackType) * (stack_num));\
1239     CHECK_NULL_RETURN_MEMERR(alloc_base);\
1240     stk_base   = (StackType* )(alloc_base\
1241                  + (sizeof(StackIndex) * msa->ptr_num));\
1242     stk        = stk_base;\
1243     stk_end    = stk_base + (stack_num);\
1244   }\
1245   else {\
1246     is_alloca  = 1;\
1247     alloc_base = (char* )xalloca(sizeof(StackIndex) * msa->ptr_num\
1248                  + sizeof(StackType) * (stack_num));\
1249     CHECK_NULL_RETURN_MEMERR(alloc_base);\
1250     stk_base   = (StackType* )(alloc_base\
1251                  + (sizeof(StackIndex) * msa->ptr_num));\
1252     stk        = stk_base;\
1253     stk_end    = stk_base + (stack_num);\
1254   }\
1255 } while(0);
1256 
1257 
1258 #define STACK_SAVE(msa,is_alloca,alloc_base) do{\
1259   (msa)->stack_n = (int )(stk_end - stk_base);\
1260   if ((is_alloca) != 0) {\
1261     size_t size = sizeof(StackIndex) * (msa)->ptr_num\
1262                 + sizeof(StackType) * (msa)->stack_n;\
1263     (msa)->stack_p = xmalloc(size);\
1264     CHECK_NULL_RETURN_MEMERR((msa)->stack_p);\
1265     xmemcpy((msa)->stack_p, (alloc_base), size);\
1266   }\
1267   else {\
1268     (msa)->stack_p = (alloc_base);\
1269   };\
1270 } while(0)
1271 
1272 static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1273 
1274 extern unsigned int
onig_get_match_stack_limit_size(void)1275 onig_get_match_stack_limit_size(void)
1276 {
1277   return MatchStackLimit;
1278 }
1279 
1280 extern int
onig_set_match_stack_limit_size(unsigned int size)1281 onig_set_match_stack_limit_size(unsigned int size)
1282 {
1283   MatchStackLimit = size;
1284   return 0;
1285 }
1286 
1287 #ifdef USE_RETRY_LIMIT
1288 
1289 static unsigned long RetryLimitInMatch  = DEFAULT_RETRY_LIMIT_IN_MATCH;
1290 static unsigned long RetryLimitInSearch = DEFAULT_RETRY_LIMIT_IN_SEARCH;
1291 
1292 #define CHECK_RETRY_LIMIT_IN_MATCH  do {\
1293   if (++retry_in_match_counter > retry_limit_in_match) {\
1294     MATCH_AT_ERROR_RETURN(retry_in_match_counter > msa->retry_limit_in_match ? ONIGERR_RETRY_LIMIT_IN_MATCH_OVER : ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER); \
1295   }\
1296 } while (0)
1297 
1298 #else
1299 
1300 #define CHECK_RETRY_LIMIT_IN_MATCH
1301 
1302 #endif /* USE_RETRY_LIMIT */
1303 
1304 extern unsigned long
onig_get_retry_limit_in_match(void)1305 onig_get_retry_limit_in_match(void)
1306 {
1307 #ifdef USE_RETRY_LIMIT
1308   return RetryLimitInMatch;
1309 #else
1310   return 0;
1311 #endif
1312 }
1313 
1314 extern int
onig_set_retry_limit_in_match(unsigned long n)1315 onig_set_retry_limit_in_match(unsigned long n)
1316 {
1317 #ifdef USE_RETRY_LIMIT
1318   RetryLimitInMatch = n;
1319   return 0;
1320 #else
1321   return ONIG_NO_SUPPORT_CONFIG;
1322 #endif
1323 }
1324 
1325 extern unsigned long
onig_get_retry_limit_in_search(void)1326 onig_get_retry_limit_in_search(void)
1327 {
1328 #ifdef USE_RETRY_LIMIT
1329   return RetryLimitInSearch;
1330 #else
1331   return 0;
1332 #endif
1333 }
1334 
1335 extern int
onig_set_retry_limit_in_search(unsigned long n)1336 onig_set_retry_limit_in_search(unsigned long n)
1337 {
1338 #ifdef USE_RETRY_LIMIT
1339   RetryLimitInSearch = n;
1340   return 0;
1341 #else
1342   return ONIG_NO_SUPPORT_CONFIG;
1343 #endif
1344 }
1345 
1346 #ifdef USE_CALLOUT
1347 static OnigCalloutFunc DefaultProgressCallout;
1348 static OnigCalloutFunc DefaultRetractionCallout;
1349 #endif
1350 
1351 extern OnigMatchParam*
onig_new_match_param(void)1352 onig_new_match_param(void)
1353 {
1354   OnigMatchParam* p;
1355 
1356   p = (OnigMatchParam* )xmalloc(sizeof(*p));
1357   if (IS_NOT_NULL(p)) {
1358     onig_initialize_match_param(p);
1359   }
1360 
1361   return p;
1362 }
1363 
1364 extern void
onig_free_match_param_content(OnigMatchParam * p)1365 onig_free_match_param_content(OnigMatchParam* p)
1366 {
1367 #ifdef USE_CALLOUT
1368   if (IS_NOT_NULL(p->callout_data)) {
1369     xfree(p->callout_data);
1370     p->callout_data = 0;
1371   }
1372 #endif
1373 }
1374 
1375 extern void
onig_free_match_param(OnigMatchParam * p)1376 onig_free_match_param(OnigMatchParam* p)
1377 {
1378   if (IS_NOT_NULL(p)) {
1379     onig_free_match_param_content(p);
1380     xfree(p);
1381   }
1382 }
1383 
1384 extern int
onig_initialize_match_param(OnigMatchParam * mp)1385 onig_initialize_match_param(OnigMatchParam* mp)
1386 {
1387   mp->match_stack_limit  = MatchStackLimit;
1388 #ifdef USE_RETRY_LIMIT
1389   mp->retry_limit_in_match  = RetryLimitInMatch;
1390   mp->retry_limit_in_search = RetryLimitInSearch;
1391 #endif
1392 
1393 #ifdef USE_CALLOUT
1394   mp->progress_callout_of_contents   = DefaultProgressCallout;
1395   mp->retraction_callout_of_contents = DefaultRetractionCallout;
1396   mp->match_at_call_counter  = 0;
1397   mp->callout_user_data      = 0;
1398   mp->callout_data           = 0;
1399   mp->callout_data_alloc_num = 0;
1400 #endif
1401 
1402   return ONIG_NORMAL;
1403 }
1404 
1405 #ifdef USE_CALLOUT
1406 
1407 static int
adjust_match_param(regex_t * reg,OnigMatchParam * mp)1408 adjust_match_param(regex_t* reg, OnigMatchParam* mp)
1409 {
1410   RegexExt* ext = reg->extp;
1411 
1412   mp->match_at_call_counter = 0;
1413 
1414   if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL;
1415 
1416   if (ext->callout_num > mp->callout_data_alloc_num) {
1417     CalloutData* d;
1418     size_t n = ext->callout_num * sizeof(*d);
1419     if (IS_NOT_NULL(mp->callout_data))
1420       d = (CalloutData* )xrealloc(mp->callout_data, n);
1421     else
1422       d = (CalloutData* )xmalloc(n);
1423     CHECK_NULL_RETURN_MEMERR(d);
1424 
1425     mp->callout_data = d;
1426     mp->callout_data_alloc_num = ext->callout_num;
1427   }
1428 
1429   xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData));
1430   return ONIG_NORMAL;
1431 }
1432 
1433 #define ADJUST_MATCH_PARAM(reg, mp) \
1434   r = adjust_match_param(reg, mp);\
1435   if (r != ONIG_NORMAL) return r;
1436 
1437 #define CALLOUT_DATA_AT_NUM(mp, num)  ((mp)->callout_data + ((num) - 1))
1438 
1439 extern int
onig_check_callout_data_and_clear_old_values(OnigCalloutArgs * args)1440 onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args)
1441 {
1442   OnigMatchParam* mp;
1443   int num;
1444   CalloutData* d;
1445 
1446   mp  = args->msa->mp;
1447   num = args->num;
1448 
1449   d = CALLOUT_DATA_AT_NUM(mp, num);
1450   if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1451     xmemset(d, 0, sizeof(*d));
1452     d->last_match_at_call_counter = mp->match_at_call_counter;
1453     return d->last_match_at_call_counter;
1454   }
1455 
1456   return 0;
1457 }
1458 
1459 extern int
onig_get_callout_data_dont_clear_old(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType * type,OnigValue * val)1460 onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp,
1461                                      int callout_num, int slot,
1462                                      OnigType* type, OnigValue* val)
1463 {
1464   OnigType t;
1465   CalloutData* d;
1466 
1467   if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1468 
1469   d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1470   t = d->slot[slot].type;
1471   if (IS_NOT_NULL(type)) *type = t;
1472   if (IS_NOT_NULL(val))  *val  = d->slot[slot].val;
1473   return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
1474 }
1475 
1476 extern int
onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs * args,int slot,OnigType * type,OnigValue * val)1477 onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args,
1478                                                           int slot, OnigType* type,
1479                                                           OnigValue* val)
1480 {
1481   return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp,
1482                                               args->num, slot, type, val);
1483 }
1484 
1485 extern int
onig_get_callout_data(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType * type,OnigValue * val)1486 onig_get_callout_data(regex_t* reg, OnigMatchParam* mp,
1487                       int callout_num, int slot,
1488                       OnigType* type, OnigValue* val)
1489 {
1490   OnigType t;
1491   CalloutData* d;
1492 
1493   if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1494 
1495   d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1496   if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1497     xmemset(d, 0, sizeof(*d));
1498     d->last_match_at_call_counter = mp->match_at_call_counter;
1499   }
1500 
1501   t = d->slot[slot].type;
1502   if (IS_NOT_NULL(type)) *type = t;
1503   if (IS_NOT_NULL(val))  *val  = d->slot[slot].val;
1504   return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
1505 }
1506 
1507 extern int
onig_get_callout_data_by_tag(regex_t * reg,OnigMatchParam * mp,const UChar * tag,const UChar * tag_end,int slot,OnigType * type,OnigValue * val)1508 onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1509                              const UChar* tag, const UChar* tag_end, int slot,
1510                              OnigType* type, OnigValue* val)
1511 {
1512   int num;
1513 
1514   num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1515   if (num < 0)  return num;
1516   if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1517 
1518   return onig_get_callout_data(reg, mp, num, slot, type, val);
1519 }
1520 
1521 extern int
onig_get_callout_data_by_callout_args(OnigCalloutArgs * args,int callout_num,int slot,OnigType * type,OnigValue * val)1522 onig_get_callout_data_by_callout_args(OnigCalloutArgs* args,
1523                                       int callout_num, int slot,
1524                                       OnigType* type, OnigValue* val)
1525 {
1526   return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot,
1527                                type, val);
1528 }
1529 
1530 extern int
onig_get_callout_data_by_callout_args_self(OnigCalloutArgs * args,int slot,OnigType * type,OnigValue * val)1531 onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1532                                            int slot, OnigType* type, OnigValue* val)
1533 {
1534   return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot,
1535                                type, val);
1536 }
1537 
1538 extern int
onig_set_callout_data(regex_t * reg,OnigMatchParam * mp,int callout_num,int slot,OnigType type,OnigValue * val)1539 onig_set_callout_data(regex_t* reg, OnigMatchParam* mp,
1540                       int callout_num, int slot,
1541                       OnigType type, OnigValue* val)
1542 {
1543   CalloutData* d;
1544 
1545   if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1546 
1547   d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1548   d->slot[slot].type = type;
1549   d->slot[slot].val  = *val;
1550   d->last_match_at_call_counter = mp->match_at_call_counter;
1551 
1552   return ONIG_NORMAL;
1553 }
1554 
1555 extern int
onig_set_callout_data_by_tag(regex_t * reg,OnigMatchParam * mp,const UChar * tag,const UChar * tag_end,int slot,OnigType type,OnigValue * val)1556 onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1557                              const UChar* tag, const UChar* tag_end, int slot,
1558                              OnigType type, OnigValue* val)
1559 {
1560   int num;
1561 
1562   num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1563   if (num < 0)  return num;
1564   if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1565 
1566   return onig_set_callout_data(reg, mp, num, slot, type, val);
1567 }
1568 
1569 extern int
onig_set_callout_data_by_callout_args(OnigCalloutArgs * args,int callout_num,int slot,OnigType type,OnigValue * val)1570 onig_set_callout_data_by_callout_args(OnigCalloutArgs* args,
1571                                       int callout_num, int slot,
1572                                       OnigType type, OnigValue* val)
1573 {
1574   return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot,
1575                                type, val);
1576 }
1577 
1578 extern int
onig_set_callout_data_by_callout_args_self(OnigCalloutArgs * args,int slot,OnigType type,OnigValue * val)1579 onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1580                                            int slot, OnigType type, OnigValue* val)
1581 {
1582   return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot,
1583                                type, val);
1584 }
1585 
1586 #else
1587 #define ADJUST_MATCH_PARAM(reg, mp)
1588 #endif /* USE_CALLOUT */
1589 
1590 
1591 static int
stack_double(int * is_alloca,char ** arg_alloc_base,StackType ** arg_stk_base,StackType ** arg_stk_end,StackType ** arg_stk,MatchArg * msa)1592 stack_double(int* is_alloca, char** arg_alloc_base,
1593              StackType** arg_stk_base, StackType** arg_stk_end,
1594              StackType** arg_stk, MatchArg* msa)
1595 {
1596   unsigned int n;
1597   int used;
1598   size_t size;
1599   size_t new_size;
1600   char* alloc_base;
1601   char* new_alloc_base;
1602   StackType *stk_base, *stk_end, *stk;
1603 
1604   alloc_base = *arg_alloc_base;
1605   stk_base = *arg_stk_base;
1606   stk_end  = *arg_stk_end;
1607   stk      = *arg_stk;
1608 
1609   n = (unsigned int )(stk_end - stk_base);
1610   size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
1611   n *= 2;
1612   new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
1613   if (*is_alloca != 0) {
1614     new_alloc_base = (char* )xmalloc(new_size);
1615     if (IS_NULL(new_alloc_base)) {
1616       STACK_SAVE(msa, *is_alloca, alloc_base);
1617       return ONIGERR_MEMORY;
1618     }
1619     xmemcpy(new_alloc_base, alloc_base, size);
1620     *is_alloca = 0;
1621   }
1622   else {
1623     if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) {
1624       if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit) {
1625         STACK_SAVE(msa, *is_alloca, alloc_base);
1626         return ONIGERR_MATCH_STACK_LIMIT_OVER;
1627       }
1628       else
1629         n = msa->match_stack_limit;
1630     }
1631     new_alloc_base = (char* )xrealloc(alloc_base, new_size);
1632     if (IS_NULL(new_alloc_base)) {
1633       STACK_SAVE(msa, *is_alloca, alloc_base);
1634       return ONIGERR_MEMORY;
1635     }
1636   }
1637 
1638   alloc_base = new_alloc_base;
1639   used = (int )(stk - stk_base);
1640   *arg_alloc_base = alloc_base;
1641   *arg_stk_base   = (StackType* )(alloc_base
1642                                   + (sizeof(StackIndex) * msa->ptr_num));
1643   *arg_stk      = *arg_stk_base + used;
1644   *arg_stk_end  = *arg_stk_base + n;
1645   return 0;
1646 }
1647 
1648 #define STACK_ENSURE(n) do {\
1649     if ((int )(stk_end - stk) < (n)) {\
1650     int r = stack_double(&is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\
1651     if (r != 0) return r;\
1652     UPDATE_FOR_STACK_REALLOC;\
1653   }\
1654 } while(0)
1655 
1656 #define STACK_AT(index)        (stk_base + (index))
1657 #define GET_STACK_INDEX(stk)   ((stk) - stk_base)
1658 
1659 #define STACK_PUSH_TYPE(stack_type) do {\
1660   STACK_ENSURE(1);\
1661   stk->type = (stack_type);\
1662   STACK_INC;\
1663 } while(0)
1664 
1665 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1666 
1667 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
1668   STACK_ENSURE(1);\
1669   stk->type = (stack_type);\
1670   stk->u.state.pcode     = (pat);\
1671   stk->u.state.pstr      = (s);\
1672   stk->u.state.pstr_prev = (sprev);\
1673   STACK_INC;\
1674 } while(0)
1675 
1676 #define STACK_PUSH_WITH_ZID(stack_type,pat,s,sprev,id) do {\
1677   STACK_ENSURE(1);\
1678   stk->type = (stack_type);\
1679   stk->zid  = (int )(id);\
1680   stk->u.state.pcode     = (pat);\
1681   stk->u.state.pstr      = (s);\
1682   stk->u.state.pstr_prev = (sprev);\
1683   STACK_INC;\
1684 } while(0)
1685 
1686 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
1687   stk->type = (stack_type);\
1688   stk->u.state.pcode = (pat);\
1689   STACK_INC;\
1690 } while(0)
1691 
1692 #ifdef ONIG_DEBUG_MATCH
1693 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1694   stk->type = (stack_type);\
1695   stk->u.state.pcode = (pat);\
1696   stk->u.state.pstr      = s;\
1697   stk->u.state.pstr_prev = sprev;\
1698   STACK_INC;\
1699 } while (0)
1700 #else
1701 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1702   stk->type = (stack_type);\
1703   stk->u.state.pcode = (pat);\
1704   STACK_INC;\
1705 } while (0)
1706 #endif
1707 
1708 #define STACK_PUSH_ALT(pat,s,sprev)       STACK_PUSH(STK_ALT,pat,s,sprev)
1709 #define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
1710 #define STACK_PUSH_ALT_WITH_ZID(pat,s,sprev,id) \
1711   STACK_PUSH_WITH_ZID(STK_ALT,pat,s,sprev,id)
1712 
1713 #if 0
1714 #define STACK_PUSH_REPEAT(sid, pat) do {\
1715   STACK_ENSURE(1);\
1716   stk->type = STK_REPEAT;\
1717   stk->zid  = (sid);\
1718   stk->u.repeat.pcode = (pat);\
1719   STACK_INC;\
1720 } while(0)
1721 #endif
1722 
1723 #define STACK_PUSH_REPEAT_INC(sid, ct) do {\
1724   STACK_ENSURE(1);\
1725   stk->type = STK_REPEAT_INC;\
1726   stk->zid  = (sid);\
1727   stk->u.repeat_inc.count = (ct);\
1728   SAVE_REPEAT_STK_VAR(sid);\
1729   LOAD_TO_REPEAT_STK_VAR(sid);\
1730   STACK_INC;\
1731 } while(0)
1732 
1733 #define STACK_PUSH_MEM_START(mnum, s) do {\
1734   STACK_ENSURE(1);\
1735   stk->type = STK_MEM_START;\
1736   stk->zid  = (mnum);\
1737   stk->u.mem.pstr       = (s);\
1738   stk->u.mem.prev_start = mem_start_stk[mnum];\
1739   stk->u.mem.prev_end   = mem_end_stk[mnum];\
1740   mem_start_stk[mnum]   = GET_STACK_INDEX(stk);\
1741   mem_end_stk[mnum]     = INVALID_STACK_INDEX;\
1742   STACK_INC;\
1743 } while(0)
1744 
1745 #define STACK_PUSH_MEM_END(mnum, s) do {\
1746   STACK_ENSURE(1);\
1747   stk->type = STK_MEM_END;\
1748   stk->zid  = (mnum);\
1749   stk->u.mem.pstr       = (s);\
1750   stk->u.mem.prev_start = mem_start_stk[mnum];\
1751   stk->u.mem.prev_end   = mem_end_stk[mnum];\
1752   mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1753   STACK_INC;\
1754 } while(0)
1755 
1756 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
1757   STACK_ENSURE(1);\
1758   stk->type = STK_MEM_END_MARK;\
1759   stk->zid  = (mnum);\
1760   STACK_INC;\
1761 } while(0)
1762 
1763 #define STACK_GET_MEM_START(mnum, k) do {\
1764   int level = 0;\
1765   k = stk;\
1766   while (k > stk_base) {\
1767     k--;\
1768     if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1769       && k->zid == (mnum)) {\
1770       level++;\
1771     }\
1772     else if (k->type == STK_MEM_START && k->zid == (mnum)) {\
1773       if (level == 0) break;\
1774       level--;\
1775     }\
1776   }\
1777 } while(0)
1778 
1779 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1780   int level = 0;\
1781   while (k < stk) {\
1782     if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1783       if (level == 0) (start) = k->u.mem.pstr;\
1784       level++;\
1785     }\
1786     else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1787       level--;\
1788       if (level == 0) {\
1789         (end) = k->u.mem.pstr;\
1790         break;\
1791       }\
1792     }\
1793     k++;\
1794   }\
1795 } while(0)
1796 
1797 #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\
1798   STACK_ENSURE(1);\
1799   stk->type = STK_EMPTY_CHECK_START;\
1800   stk->zid  = (cnum);\
1801   stk->u.empty_check.pstr = (s);\
1802   SAVE_EMPTY_CHECK_STK_VAR(cnum);\
1803   LOAD_TO_EMPTY_CHECK_STK_VAR(cnum);\
1804   STACK_INC;\
1805 } while(0)
1806 
1807 #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\
1808   STACK_ENSURE(1);\
1809   stk->type = STK_EMPTY_CHECK_END;\
1810   stk->zid  = (cnum);\
1811   STACK_INC;\
1812 } while(0)
1813 
1814 #define STACK_PUSH_CALL_FRAME(pat) do {\
1815   STACK_ENSURE(1);\
1816   stk->type = STK_CALL_FRAME;\
1817   stk->u.call_frame.ret_addr = (pat);\
1818   STACK_INC;\
1819 } while(0)
1820 
1821 #define STACK_PUSH_RETURN do {\
1822   STACK_ENSURE(1);\
1823   stk->type = STK_RETURN;\
1824   STACK_INC;\
1825 } while(0)
1826 
1827 #define STACK_PUSH_MARK(sid) do {\
1828   STACK_ENSURE(1);\
1829   stk->type = STK_MARK;\
1830   stk->zid  = (sid);\
1831   STACK_INC;\
1832 } while(0)
1833 
1834 #define STACK_PUSH_MARK_WITH_POS(sid, s, sprev) do {\
1835   STACK_ENSURE(1);\
1836   stk->type = STK_MARK;\
1837   stk->zid  = (sid);\
1838   stk->u.val.v  = (UChar* )(s);\
1839   stk->u.val.v2 = (sprev);\
1840   STACK_INC;\
1841 } while(0)
1842 
1843 #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\
1844   STACK_ENSURE(1);\
1845   stk->type = STK_SAVE_VAL;\
1846   stk->zid  = (sid);\
1847   stk->u.val.type = (stype);\
1848   stk->u.val.v    = (UChar* )(sval);\
1849   STACK_INC;\
1850 } while(0)
1851 
1852 #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\
1853   STACK_ENSURE(1);\
1854   stk->type = STK_SAVE_VAL;\
1855   stk->zid  = (sid);\
1856   stk->u.val.type = (stype);\
1857   stk->u.val.v    = (UChar* )(sval);\
1858   stk->u.val.v2   = sprev;\
1859   STACK_INC;\
1860 } while(0)
1861 
1862 #define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\
1863   StackType *k = stk;\
1864   while (k > stk_base) {\
1865     k--;\
1866     STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \
1867     if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\
1868       (sval) = k->u.val.v;\
1869       break;\
1870     }\
1871   }\
1872 } while (0)
1873 
1874 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval, clear) do {\
1875   int level = 0;\
1876   StackType *k = stk;\
1877   while (k > stk_base) {\
1878     k--;\
1879     STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1880     if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1881         && k->zid == (sid)) {\
1882       if (level == 0) {\
1883         (sval) = k->u.val.v;\
1884         if (clear != 0) k->type = STK_VOID;\
1885         break;\
1886       }\
1887     }\
1888     else if (k->type == STK_CALL_FRAME)\
1889       level--;\
1890     else if (k->type == STK_RETURN)\
1891       level++;\
1892   }\
1893 } while (0)
1894 
1895 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \
1896   int level = 0;\
1897   StackType *k = stk;\
1898   while (k > stk_base) {\
1899     k--;\
1900     STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1901     if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1902         && k->zid == (sid)) {\
1903       if (level == 0) {\
1904         (sval) = k->u.val.v;\
1905         sprev  = k->u.val.v2;\
1906         break;\
1907       }\
1908     }\
1909     else if (k->type == STK_CALL_FRAME)\
1910       level--;\
1911     else if (k->type == STK_RETURN)\
1912       level++;\
1913   }\
1914 } while (0)
1915 
1916 #define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
1917   STACK_ENSURE(1);\
1918   stk->type = STK_CALLOUT;\
1919   stk->zid  = ONIG_NON_NAME_ID;\
1920   stk->u.callout.num = (anum);\
1921   stk->u.callout.func = (func);\
1922   STACK_INC;\
1923 } while(0)
1924 
1925 #define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\
1926   STACK_ENSURE(1);\
1927   stk->type = STK_CALLOUT;\
1928   stk->zid  = (aid);\
1929   stk->u.callout.num = (anum);\
1930   stk->u.callout.func = (func);\
1931   STACK_INC;\
1932 } while(0)
1933 
1934 #ifdef ONIG_DEBUG
1935 #define STACK_BASE_CHECK(p, at) \
1936   if ((p) < stk_base) {\
1937     fprintf(DBGFP, "at %s\n", at);\
1938     MATCH_AT_ERROR_RETURN(ONIGERR_STACK_BUG);\
1939   }
1940 #else
1941 #define STACK_BASE_CHECK(p, at)
1942 #endif
1943 
1944 #define STACK_POP_ONE do {\
1945   stk--;\
1946   STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1947 } while(0)
1948 
1949 
1950 #ifdef USE_CALLOUT
1951 #define POP_CALLOUT_CASE \
1952   else if (stk->type == STK_CALLOUT) {\
1953     RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\
1954   }
1955 #else
1956 #define POP_CALLOUT_CASE
1957 #endif
1958 
1959 #define STACK_POP  do {\
1960   switch (pop_level) {\
1961   case STACK_POP_LEVEL_FREE:\
1962     while (1) {\
1963       stk--;\
1964       STACK_BASE_CHECK(stk, "STACK_POP"); \
1965       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
1966     }\
1967     break;\
1968   case STACK_POP_LEVEL_MEM_START:\
1969     while (1) {\
1970       stk--;\
1971       STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1972       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
1973       else if (stk->type == STK_MEM_START) {\
1974         mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1975         mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
1976       }\
1977     }\
1978     break;\
1979   default:\
1980     while (1) {\
1981       stk--;\
1982       STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1983       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
1984       else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\
1985         if (stk->type == STK_MEM_START) {\
1986           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1987           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
1988         }\
1989         else if (stk->type == STK_MEM_END) {\
1990           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1991           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
1992         }\
1993         POP_REPEAT_INC \
1994         POP_EMPTY_CHECK_START \
1995         POP_CALL \
1996         POP_CALLOUT_CASE\
1997       }\
1998     }\
1999     break;\
2000   }\
2001 } while(0)
2002 
2003 #define STACK_POP_TO_MARK(sid) do {\
2004   while (1) {\
2005     stk--;\
2006     STACK_BASE_CHECK(stk, "STACK_POP_TO_MARK");\
2007     if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
2008       if (stk->type == STK_MARK) {\
2009         if (stk->zid == (sid)) break;\
2010       }\
2011       else {\
2012         if (stk->type == STK_MEM_START) {\
2013           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2014           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
2015         }\
2016         else if (stk->type == STK_MEM_END) {\
2017           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2018           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
2019         }\
2020         POP_REPEAT_INC \
2021         POP_EMPTY_CHECK_START \
2022         POP_CALL \
2023         /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
2024       }\
2025     }\
2026   }\
2027 } while(0)
2028 
2029 
2030 #define POP_TIL_BODY(aname, til_type) do {\
2031   while (1) {\
2032     stk--;\
2033     STACK_BASE_CHECK(stk, (aname));\
2034     if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
2035       if (stk->type == (til_type)) break;\
2036       else {\
2037         if (stk->type == STK_MEM_START) {\
2038           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2039           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
2040         }\
2041         else if (stk->type == STK_MEM_END) {\
2042           mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
2043           mem_end_stk[stk->zid]   = stk->u.mem.prev_end;\
2044         }\
2045         POP_REPEAT_INC \
2046         POP_EMPTY_CHECK_START \
2047         POP_CALL \
2048         /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
2049       }\
2050     }\
2051   }\
2052 } while(0)
2053 
2054 
2055 #define STACK_TO_VOID_TO_MARK(k,sid) do {\
2056   k = stk;\
2057   while (1) {\
2058     k--;\
2059     STACK_BASE_CHECK(k, "STACK_TO_VOID_TO_MARK");\
2060     if (IS_TO_VOID_TARGET(k)) {\
2061       if (k->type == STK_MARK) {\
2062         if (k->zid == (sid)) {\
2063           k->type = STK_VOID;\
2064           break;\
2065         } /* don't void different id mark */ \
2066       }\
2067       else\
2068         k->type = STK_VOID;\
2069     }\
2070   }\
2071 } while(0)
2072 
2073 #define EMPTY_CHECK_START_SEARCH(sid, k) do {\
2074   k = stk;\
2075   while (1) {\
2076     k--;\
2077     STACK_BASE_CHECK(k, "EMPTY_CHECK_START_SEARCH"); \
2078     if (k->type == STK_EMPTY_CHECK_START) {\
2079       if (k->zid == (sid)) break;\
2080     }\
2081   }\
2082 } while(0)
2083 
2084 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2085 
2086 #define GET_EMPTY_CHECK_START(sid, k) do {\
2087   if (reg->num_call == 0) {\
2088     k = STACK_AT(empty_check_stk[sid]);\
2089   }\
2090   else {\
2091     EMPTY_CHECK_START_SEARCH(sid, k);\
2092   }\
2093 } while(0)
2094 #else
2095 
2096 #define GET_EMPTY_CHECK_START(sid, k)  EMPTY_CHECK_START_SEARCH(sid, k)
2097 
2098 #endif
2099 
2100 
2101 #define STACK_EMPTY_CHECK(isnull, sid, s) do {\
2102   StackType* k;\
2103   GET_EMPTY_CHECK_START(sid, k);\
2104   (isnull) = (k->u.empty_check.pstr == (s));\
2105 } while(0)
2106 
2107 #define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
2108   if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
2109     (addr) = 0;\
2110   }\
2111   else {\
2112     if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\
2113       (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\
2114     else\
2115       (addr) = (UChar* )k->u.mem.prev_end;\
2116   }\
2117 } while (0)
2118 
2119 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
2120 #define STACK_EMPTY_CHECK_MEM(isnull, sid, s, reg) do {\
2121   StackType* k;\
2122   GET_EMPTY_CHECK_START(sid, k);\
2123   if (k->u.empty_check.pstr != (s)) {\
2124     (isnull) = 0;\
2125   }\
2126   else {\
2127     UChar* endp;\
2128     (isnull) = 1;\
2129     while (k < stk) {\
2130       if (k->type == STK_MEM_START &&\
2131         MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid)) {\
2132         STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
2133         if (endp == 0) {\
2134           (isnull) = 0; break;\
2135         }\
2136         else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
2137           (isnull) = 0; break;\
2138         }\
2139         else if (endp != s) {\
2140           (isnull) = -1; /* empty, but position changed */ \
2141         }\
2142       }\
2143       k++;\
2144     }\
2145   }\
2146 } while(0)
2147 
2148 #define STACK_EMPTY_CHECK_MEM_REC(isnull,sid,s,reg) do {\
2149   int level = 0;\
2150   StackType* k = stk;\
2151   while (1) {\
2152     k--;\
2153     STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM_REC");\
2154     if (k->type == STK_EMPTY_CHECK_START) {\
2155       if (k->zid == (sid)) {\
2156         if (level == 0) {\
2157           if (k->u.empty_check.pstr != (s)) {\
2158             (isnull) = 0;\
2159             break;\
2160           }\
2161           else {\
2162             UChar* endp;\
2163             (isnull) = 1;\
2164             while (k < stk) {\
2165               if (k->type == STK_MEM_START) {\
2166                 if (level == 0 && \
2167                   MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid) !=0) {\
2168                   STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
2169                   if (endp == 0) {\
2170                     (isnull) = 0; break;\
2171                   }\
2172                   else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \
2173                     (isnull) = 0; break;\
2174                   }\
2175                   else if (endp != s) {\
2176                     (isnull) = -1; /* empty, but position changed */\
2177                   }\
2178                 }\
2179               }\
2180               else if (k->type == STK_EMPTY_CHECK_START) {\
2181                 if (k->zid == (sid)) level++;\
2182               }\
2183               else if (k->type == STK_EMPTY_CHECK_END) {\
2184                 if (k->zid == (sid)) level--;\
2185               }\
2186               k++;\
2187             }\
2188             break;\
2189           }\
2190         }\
2191         else {\
2192           level--;\
2193         }\
2194       }\
2195     }\
2196     else if (k->type == STK_EMPTY_CHECK_END) {\
2197       if (k->zid == (sid)) level++;\
2198     }\
2199   }\
2200 } while(0)
2201 #else
2202 #define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\
2203   int level = 0;\
2204   StackType* k = stk;\
2205   while (1) {\
2206     k--;\
2207     STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \
2208     if (k->type == STK_EMPTY_CHECK_START) {\
2209       if (k->u.empty_check.num == (id)) {\
2210         if (level == 0) {\
2211           (isnull) = (k->u.empty_check.pstr == (s));\
2212           break;\
2213         }\
2214       }\
2215       level--;\
2216     }\
2217     else if (k->type == STK_EMPTY_CHECK_END) {\
2218       level++;\
2219     }\
2220   }\
2221 } while(0)
2222 #endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
2223 
2224 #define STACK_GET_REPEAT_COUNT_SEARCH(sid, c) do {\
2225   StackType* k = stk;\
2226   while (1) {\
2227     (k)--;\
2228     STACK_BASE_CHECK(k, "STACK_GET_REPEAT_COUNT_SEARCH");\
2229     if ((k)->type == STK_REPEAT_INC) {\
2230       if ((k)->zid == (sid)) {\
2231         (c) = (k)->u.repeat_inc.count;\
2232         break;\
2233       }\
2234     }\
2235     else if ((k)->type == STK_RETURN) {\
2236       int level = -1;\
2237       while (1) {\
2238         (k)--;\
2239         if ((k)->type == STK_CALL_FRAME) {\
2240           level++;\
2241           if (level == 0) break;\
2242         }\
2243         else if ((k)->type == STK_RETURN) level--;\
2244       }\
2245     }\
2246   }\
2247 } while(0)
2248 
2249 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2250 
2251 #define STACK_GET_REPEAT_COUNT(sid, c) do {\
2252   if (reg->num_call == 0) {\
2253     (c) = (STACK_AT(repeat_stk[sid]))->u.repeat_inc.count;\
2254   }\
2255   else {\
2256     STACK_GET_REPEAT_COUNT_SEARCH(sid, c);\
2257   }\
2258 } while(0)
2259 #else
2260 #define STACK_GET_REPEAT_COUNT(sid, c) STACK_GET_REPEAT_COUNT_SEARCH(sid, c)
2261 #endif
2262 
2263 #ifdef USE_CALL
2264 #define STACK_RETURN(addr)  do {\
2265   int level = 0;\
2266   StackType* k = stk;\
2267   while (1) {\
2268     k--;\
2269     STACK_BASE_CHECK(k, "STACK_RETURN"); \
2270     if (k->type == STK_CALL_FRAME) {\
2271       if (level == 0) {\
2272         (addr) = k->u.call_frame.ret_addr;\
2273         break;\
2274       }\
2275       else level--;\
2276     }\
2277     else if (k->type == STK_RETURN)\
2278       level++;\
2279   }\
2280 } while(0)
2281 
2282 #define GET_STACK_RETURN_CALL(k,addr) do {\
2283   int level = 0;\
2284   k = stk;\
2285   while (1) {\
2286     k--;\
2287     STACK_BASE_CHECK(k, "GET_STACK_RETURN_CALL");\
2288     if (k->type == STK_CALL_FRAME) {\
2289       if (level == 0) {\
2290         (addr) = k->u.call_frame.ret_addr;\
2291         break;\
2292       }\
2293       else level--;\
2294     }\
2295     else if (k->type == STK_RETURN)\
2296       level++;\
2297   }\
2298 } while(0)
2299 #endif
2300 
2301 
2302 #define STRING_CMP(s1,s2,len) do {\
2303   while (len-- > 0) {\
2304     if (*s1++ != *s2++) goto fail;\
2305   }\
2306 } while(0)
2307 
2308 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
2309   if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2310     goto fail; \
2311 } while(0)
2312 
string_cmp_ic(OnigEncoding enc,int case_fold_flag,UChar * s1,UChar ** ps2,int mblen)2313 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
2314                          UChar* s1, UChar** ps2, int mblen)
2315 {
2316   UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2317   UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2318   UChar *p1, *p2, *end1, *s2, *end2;
2319   int len1, len2;
2320 
2321   s2   = *ps2;
2322   end1 = s1 + mblen;
2323   end2 = s2 + mblen;
2324   while (s1 < end1) {
2325     len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
2326     len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
2327     if (len1 != len2) return 0;
2328     p1 = buf1;
2329     p2 = buf2;
2330     while (len1-- > 0) {
2331       if (*p1 != *p2) return 0;
2332       p1++;
2333       p2++;
2334     }
2335   }
2336 
2337   *ps2 = s2;
2338   return 1;
2339 }
2340 
2341 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
2342   is_fail = 0;\
2343   while (len-- > 0) {\
2344     if (*s1++ != *s2++) {\
2345       is_fail = 1; break;\
2346     }\
2347   }\
2348 } while(0)
2349 
2350 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
2351   if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2352     is_fail = 1; \
2353   else \
2354     is_fail = 0; \
2355 } while(0)
2356 
2357 
2358 #define IS_EMPTY_STR           (str == end)
2359 #define ON_STR_BEGIN(s)        ((s) == str)
2360 #define ON_STR_END(s)          ((s) == end)
2361 #define DATA_ENSURE_CHECK1     (s < right_range)
2362 #define DATA_ENSURE_CHECK(n)   (s + (n) <= right_range)
2363 #define DATA_ENSURE(n)         if (s + (n) > right_range) goto fail
2364 
2365 #define INIT_RIGHT_RANGE    right_range = (UChar* )in_right_range
2366 
2367 #ifdef USE_CAPTURE_HISTORY
2368 static int
make_capture_history_tree(OnigCaptureTreeNode * node,StackType ** kp,StackType * stk_top,UChar * str,regex_t * reg)2369 make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
2370                           StackType* stk_top, UChar* str, regex_t* reg)
2371 {
2372   int n, r;
2373   OnigCaptureTreeNode* child;
2374   StackType* k = *kp;
2375 
2376   while (k < stk_top) {
2377     if (k->type == STK_MEM_START) {
2378       n = k->zid;
2379       if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
2380           MEM_STATUS_AT(reg->capture_history, n) != 0) {
2381         child = history_node_new();
2382         CHECK_NULL_RETURN_MEMERR(child);
2383         child->group = n;
2384         child->beg = (int )(k->u.mem.pstr - str);
2385         r = history_tree_add_child(node, child);
2386         if (r != 0) return r;
2387         *kp = (k + 1);
2388         r = make_capture_history_tree(child, kp, stk_top, str, reg);
2389         if (r != 0) return r;
2390 
2391         k = *kp;
2392         child->end = (int )(k->u.mem.pstr - str);
2393       }
2394     }
2395     else if (k->type == STK_MEM_END) {
2396       if (k->zid == node->group) {
2397         node->end = (int )(k->u.mem.pstr - str);
2398         *kp = k;
2399         return 0;
2400       }
2401     }
2402     k++;
2403   }
2404 
2405   return 1; /* 1: root node ending. */
2406 }
2407 #endif
2408 
2409 #ifdef USE_BACKREF_WITH_LEVEL
mem_is_in_memp(int mem,int num,MemNumType * memp)2410 static int mem_is_in_memp(int mem, int num, MemNumType* memp)
2411 {
2412   int i;
2413 
2414   for (i = 0; i < num; i++) {
2415     if (mem == (int )memp[i]) return 1;
2416   }
2417   return 0;
2418 }
2419 
2420 static int
backref_match_at_nested_level(regex_t * reg,StackType * top,StackType * stk_base,int ignore_case,int case_fold_flag,int nest,int mem_num,MemNumType * memp,UChar ** s,const UChar * send)2421 backref_match_at_nested_level(regex_t* reg,
2422                               StackType* top, StackType* stk_base,
2423                               int ignore_case, int case_fold_flag,
2424                               int nest, int mem_num, MemNumType* memp,
2425                               UChar** s, const UChar* send)
2426 {
2427   UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
2428   int level;
2429   StackType* k;
2430 
2431   level = 0;
2432   k = top;
2433   k--;
2434   while (k >= stk_base) {
2435     if (k->type == STK_CALL_FRAME) {
2436       level--;
2437     }
2438     else if (k->type == STK_RETURN) {
2439       level++;
2440     }
2441     else if (level == nest) {
2442       if (k->type == STK_MEM_START) {
2443         if (mem_is_in_memp(k->zid, mem_num, memp)) {
2444           pstart = k->u.mem.pstr;
2445           if (IS_NOT_NULL(pend)) {
2446             if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
2447             p  = pstart;
2448             ss = *s;
2449 
2450             if (ignore_case != 0) {
2451               if (string_cmp_ic(reg->enc, case_fold_flag,
2452                                 pstart, &ss, (int )(pend - pstart)) == 0)
2453                 return 0; /* or goto next_mem; */
2454             }
2455             else {
2456               while (p < pend) {
2457                 if (*p++ != *ss++) return 0; /* or goto next_mem; */
2458               }
2459             }
2460 
2461             *s = ss;
2462             return 1;
2463           }
2464         }
2465       }
2466       else if (k->type == STK_MEM_END) {
2467         if (mem_is_in_memp(k->zid, mem_num, memp)) {
2468           pend = k->u.mem.pstr;
2469         }
2470       }
2471     }
2472     k--;
2473   }
2474 
2475   return 0;
2476 }
2477 
2478 static int
backref_check_at_nested_level(regex_t * reg,StackType * top,StackType * stk_base,int nest,int mem_num,MemNumType * memp)2479 backref_check_at_nested_level(regex_t* reg,
2480                               StackType* top, StackType* stk_base,
2481                               int nest, int mem_num, MemNumType* memp)
2482 {
2483   int level;
2484   StackType* k;
2485 
2486   level = 0;
2487   k = top;
2488   k--;
2489   while (k >= stk_base) {
2490     if (k->type == STK_CALL_FRAME) {
2491       level--;
2492     }
2493     else if (k->type == STK_RETURN) {
2494       level++;
2495     }
2496     else if (level == nest) {
2497       if (k->type == STK_MEM_END) {
2498         if (mem_is_in_memp(k->zid, mem_num, memp)) {
2499           return 1;
2500         }
2501       }
2502     }
2503     k--;
2504   }
2505 
2506   return 0;
2507 }
2508 #endif /* USE_BACKREF_WITH_LEVEL */
2509 
2510 
2511 #ifdef ONIG_DEBUG_STATISTICS
2512 
2513 #ifdef USE_TIMEOFDAY
2514 
2515 static struct timeval ts, te;
2516 #define GETTIME(t)        gettimeofday(&(t), (struct timezone* )0)
2517 #define TIMEDIFF(te,ts)   (((te).tv_usec - (ts).tv_usec) + \
2518                            (((te).tv_sec - (ts).tv_sec)*1000000))
2519 #else
2520 
2521 static struct tms ts, te;
2522 #define GETTIME(t)         times(&(t))
2523 #define TIMEDIFF(te,ts)   ((te).tms_utime - (ts).tms_utime)
2524 
2525 #endif /* USE_TIMEOFDAY */
2526 
2527 static int OpCounter[256];
2528 static int OpPrevCounter[256];
2529 static unsigned long OpTime[256];
2530 static int OpCurr = OP_FINISH;
2531 static int OpPrevTarget = OP_FAIL;
2532 static int MaxStackDepth = 0;
2533 
2534 #define SOP_IN(opcode) do {\
2535   if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2536   OpCurr = opcode;\
2537   OpCounter[opcode]++;\
2538   GETTIME(ts);\
2539 } while(0)
2540 
2541 #define SOP_OUT do {\
2542   GETTIME(te);\
2543   OpTime[OpCurr] += TIMEDIFF(te, ts);\
2544 } while(0)
2545 
2546 extern void
onig_statistics_init(void)2547 onig_statistics_init(void)
2548 {
2549   int i;
2550   for (i = 0; i < 256; i++) {
2551     OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2552   }
2553   MaxStackDepth = 0;
2554 }
2555 
2556 extern int
onig_print_statistics(FILE * f)2557 onig_print_statistics(FILE* f)
2558 {
2559   int r;
2560   int i;
2561 
2562   r = fprintf(f, "   count      prev        time\n");
2563   if (r < 0) return -1;
2564 
2565   for (i = 0; OpInfo[i].opcode >= 0; i++) {
2566     r = fprintf(f, "%8d: %8d: %10ld: %s\n",
2567                 OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name);
2568     if (r < 0) return -1;
2569   }
2570   r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2571   if (r < 0) return -1;
2572 
2573   return 0;
2574 }
2575 
2576 #define STACK_INC do {\
2577   stk++;\
2578   if (stk - stk_base > MaxStackDepth) \
2579     MaxStackDepth = stk - stk_base;\
2580 } while(0)
2581 
2582 #else
2583 #define STACK_INC     stk++
2584 
2585 #define SOP_IN(opcode)
2586 #define SOP_OUT
2587 #endif
2588 
2589 
2590 /* matching region of POSIX API */
2591 typedef int regoff_t;
2592 
2593 typedef struct {
2594   regoff_t  rm_so;
2595   regoff_t  rm_eo;
2596 } posix_regmatch_t;
2597 
2598 
2599 
2600 #ifdef USE_THREADED_CODE
2601 
2602 #define BYTECODE_INTERPRETER_START      GOTO_OP;
2603 #define BYTECODE_INTERPRETER_END
2604 #define CASE_OP(x)   L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(0)
2605 #define DEFAULT_OP   /* L_DEFAULT: */
2606 #define NEXT_OP      sprev = sbegin; JUMP_OP
2607 #define JUMP_OP      GOTO_OP
2608 #ifdef USE_DIRECT_THREADED_CODE
2609 #define GOTO_OP      goto *(p->opaddr)
2610 #else
2611 #define GOTO_OP      goto *opcode_to_label[p->opcode]
2612 #endif
2613 #define BREAK_OP     /* Nothing */
2614 
2615 #else
2616 
2617 #define BYTECODE_INTERPRETER_START \
2618   while (1) {\
2619   MATCH_DEBUG_OUT(0)\
2620   sbegin = s;\
2621   switch (p->opcode) {
2622 #define BYTECODE_INTERPRETER_END  } sprev = sbegin; }
2623 #define CASE_OP(x)   case OP_##x: SOP_IN(OP_##x);
2624 #define DEFAULT_OP   default:
2625 #define NEXT_OP      break
2626 #define JUMP_OP      GOTO_OP
2627 #define GOTO_OP      continue; break
2628 #define BREAK_OP     break
2629 
2630 #endif /* USE_THREADED_CODE */
2631 
2632 #define INC_OP       p++
2633 #define JUMP_OUT_WITH_SPREV_SET   SOP_OUT; NEXT_OP
2634 #define JUMP_OUT                  SOP_OUT; JUMP_OP
2635 #define BREAK_OUT                 SOP_OUT; BREAK_OP
2636 #define CHECK_INTERRUPT_JUMP_OUT  SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP
2637 
2638 
2639 #ifdef ONIG_DEBUG_MATCH
2640 #define MATCH_DEBUG_OUT(offset) do {\
2641       Operation *xp;\
2642       UChar *q, *bp, buf[50];\
2643       int len, spos;\
2644       spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\
2645       xp = p - (offset);\
2646       fprintf(DBGFP, "%7u: %7ld: %4d> \"",\
2647               counter, GET_STACK_INDEX(stk), spos);\
2648       counter++;\
2649       bp = buf;\
2650       if (IS_NOT_NULL(s)) {\
2651         for (i = 0, q = s; i < 7 && q < end; i++) {\
2652           len = enclen(encode, q);\
2653           while (len-- > 0) *bp++ = *q++;\
2654         }\
2655         if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }\
2656         else         { xmemcpy(bp, "\"",    1); bp += 1; }\
2657       }\
2658       else {\
2659         xmemcpy(bp, "\"", 1); bp += 1;\
2660       }\
2661       *bp = 0;\
2662       fputs((char* )buf, DBGFP);\
2663       for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', DBGFP);\
2664       if (xp == FinishCode)\
2665         fprintf(DBGFP, "----: finish");\
2666       else {\
2667         int index;\
2668         enum OpCode zopcode;\
2669         Operation* addr;\
2670         index = (int )(xp - reg->ops);\
2671         fprintf(DBGFP, "%4d: ", index);\
2672         print_compiled_byte_code(DBGFP, reg, index, reg->ops, encode); \
2673         zopcode = GET_OPCODE(reg, index);\
2674         if (zopcode == OP_RETURN) {\
2675           GET_STACK_RETURN_CALL(stkp, addr);\
2676           fprintf(DBGFP, " f:%ld -> %d", \
2677             GET_STACK_INDEX(stkp), (int )(addr - reg->ops));\
2678         }\
2679       }\
2680       fprintf(DBGFP, "\n");\
2681   } while(0);
2682 #else
2683 #define MATCH_DEBUG_OUT(offset)
2684 #endif
2685 
2686 #define MATCH_AT_ERROR_RETURN(err_code) do {\
2687   best_len = err_code; goto match_at_end;\
2688 } while(0)
2689 
2690 
2691 /* match data(str - end) from position (sstart). */
2692 /* if sstart == str then set sprev to NULL. */
2693 static int
match_at(regex_t * reg,const UChar * str,const UChar * end,const UChar * in_right_range,const UChar * sstart,UChar * sprev,MatchArg * msa)2694 match_at(regex_t* reg, const UChar* str, const UChar* end,
2695          const UChar* in_right_range, const UChar* sstart, UChar* sprev,
2696          MatchArg* msa)
2697 {
2698 
2699 #if defined(USE_DIRECT_THREADED_CODE)
2700   static Operation FinishCode[] = { { .opaddr=&&L_FINISH } };
2701 #else
2702   static Operation FinishCode[] = { { OP_FINISH } };
2703 #endif
2704 
2705 #ifdef USE_THREADED_CODE
2706   static const void *opcode_to_label[] = {
2707   &&L_FINISH,
2708   &&L_END,
2709   &&L_STR_1,
2710   &&L_STR_2,
2711   &&L_STR_3,
2712   &&L_STR_4,
2713   &&L_STR_5,
2714   &&L_STR_N,
2715   &&L_STR_MB2N1,
2716   &&L_STR_MB2N2,
2717   &&L_STR_MB2N3,
2718   &&L_STR_MB2N,
2719   &&L_STR_MB3N,
2720   &&L_STR_MBN,
2721   &&L_CCLASS,
2722   &&L_CCLASS_MB,
2723   &&L_CCLASS_MIX,
2724   &&L_CCLASS_NOT,
2725   &&L_CCLASS_MB_NOT,
2726   &&L_CCLASS_MIX_NOT,
2727   &&L_ANYCHAR,
2728   &&L_ANYCHAR_ML,
2729   &&L_ANYCHAR_STAR,
2730   &&L_ANYCHAR_ML_STAR,
2731   &&L_ANYCHAR_STAR_PEEK_NEXT,
2732   &&L_ANYCHAR_ML_STAR_PEEK_NEXT,
2733   &&L_WORD,
2734   &&L_WORD_ASCII,
2735   &&L_NO_WORD,
2736   &&L_NO_WORD_ASCII,
2737   &&L_WORD_BOUNDARY,
2738   &&L_NO_WORD_BOUNDARY,
2739   &&L_WORD_BEGIN,
2740   &&L_WORD_END,
2741   &&L_TEXT_SEGMENT_BOUNDARY,
2742   &&L_BEGIN_BUF,
2743   &&L_END_BUF,
2744   &&L_BEGIN_LINE,
2745   &&L_END_LINE,
2746   &&L_SEMI_END_BUF,
2747   &&L_CHECK_POSITION,
2748   &&L_BACKREF1,
2749   &&L_BACKREF2,
2750   &&L_BACKREF_N,
2751   &&L_BACKREF_N_IC,
2752   &&L_BACKREF_MULTI,
2753   &&L_BACKREF_MULTI_IC,
2754   &&L_BACKREF_WITH_LEVEL,
2755   &&L_BACKREF_WITH_LEVEL_IC,
2756   &&L_BACKREF_CHECK,
2757   &&L_BACKREF_CHECK_WITH_LEVEL,
2758   &&L_MEM_START,
2759   &&L_MEM_START_PUSH,
2760   &&L_MEM_END_PUSH,
2761 #ifdef USE_CALL
2762   &&L_MEM_END_PUSH_REC,
2763 #endif
2764   &&L_MEM_END,
2765 #ifdef USE_CALL
2766   &&L_MEM_END_REC,
2767 #endif
2768   &&L_FAIL,
2769   &&L_JUMP,
2770   &&L_PUSH,
2771   &&L_PUSH_SUPER,
2772   &&L_POP,
2773   &&L_POP_TO_MARK,
2774 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2775   &&L_PUSH_OR_JUMP_EXACT1,
2776 #endif
2777   &&L_PUSH_IF_PEEK_NEXT,
2778   &&L_REPEAT,
2779   &&L_REPEAT_NG,
2780   &&L_REPEAT_INC,
2781   &&L_REPEAT_INC_NG,
2782   &&L_EMPTY_CHECK_START,
2783   &&L_EMPTY_CHECK_END,
2784   &&L_EMPTY_CHECK_END_MEMST,
2785 #ifdef USE_CALL
2786   &&L_EMPTY_CHECK_END_MEMST_PUSH,
2787 #endif
2788   &&L_MOVE,
2789   &&L_STEP_BACK_START,
2790   &&L_STEP_BACK_NEXT,
2791   &&L_CUT_TO_MARK,
2792   &&L_MARK,
2793   &&L_SAVE_VAL,
2794   &&L_UPDATE_VAR,
2795 #ifdef USE_CALL
2796   &&L_CALL,
2797   &&L_RETURN,
2798 #endif
2799 #ifdef USE_CALLOUT
2800   &&L_CALLOUT_CONTENTS,
2801   &&L_CALLOUT_NAME,
2802 #endif
2803   };
2804 #endif
2805 
2806   int i, n, num_mem, best_len, pop_level;
2807   LengthType tlen, tlen2;
2808   MemNumType mem;
2809   RelAddrType addr;
2810   UChar *s, *ps, *sbegin;
2811   UChar *right_range;
2812   int is_alloca;
2813   char *alloc_base;
2814   StackType *stk_base, *stk, *stk_end;
2815   StackType *stkp; /* used as any purpose. */
2816   StackIndex *mem_start_stk, *mem_end_stk;
2817   UChar* keep;
2818 
2819 #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
2820   StackIndex *repeat_stk;
2821   StackIndex *empty_check_stk;
2822 #endif
2823 #ifdef USE_RETRY_LIMIT
2824   unsigned long retry_limit_in_match;
2825   unsigned long retry_in_match_counter;
2826 #endif
2827 #ifdef USE_CALLOUT
2828   int of;
2829 #endif
2830 
2831   Operation* p = reg->ops;
2832   OnigOptionType option = reg->options;
2833   OnigEncoding encode = reg->enc;
2834   OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2835 
2836 #if defined(USE_CALL) && defined(SUBEXP_CALL_MAX_NEST_LEVEL)
2837   unsigned long subexp_call_nest_counter = 0;
2838 #endif
2839 
2840 #ifdef ONIG_DEBUG_MATCH
2841   static unsigned int counter = 1;
2842 #endif
2843 
2844 #ifdef USE_DIRECT_THREADED_CODE
2845   if (IS_NULL(msa)) {
2846     for (i = 0; i < reg->ops_used; i++) {
2847        const void* addr;
2848        addr = opcode_to_label[reg->ocs[i]];
2849        p->opaddr = addr;
2850        p++;
2851     }
2852     return ONIG_NORMAL;
2853   }
2854 #endif
2855 
2856 #ifdef USE_CALLOUT
2857   msa->mp->match_at_call_counter++;
2858 #endif
2859 
2860 #ifdef USE_RETRY_LIMIT
2861   retry_limit_in_match = msa->retry_limit_in_match;
2862   if (msa->retry_limit_in_search != 0) {
2863     unsigned long rem = msa->retry_limit_in_search
2864                       - msa->retry_limit_in_search_counter;
2865     if (rem < retry_limit_in_match)
2866       retry_limit_in_match = rem;
2867   }
2868 #endif
2869 
2870   pop_level = reg->stack_pop_level;
2871   num_mem = reg->num_mem;
2872   STACK_INIT(INIT_MATCH_STACK_SIZE);
2873   UPDATE_FOR_STACK_REALLOC;
2874   for (i = 1; i <= num_mem; i++) {
2875     mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
2876   }
2877 
2878 #ifdef ONIG_DEBUG_MATCH
2879   fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
2880           str, end, sstart, sprev);
2881   fprintf(DBGFP, "size: %d, start offset: %d\n",
2882           (int )(end - str), (int )(sstart - str));
2883 #endif
2884 
2885   best_len = ONIG_MISMATCH;
2886   keep = s = (UChar* )sstart;
2887   STACK_PUSH_BOTTOM(STK_ALT, FinishCode);  /* bottom stack */
2888   INIT_RIGHT_RANGE;
2889 
2890 #ifdef USE_RETRY_LIMIT
2891   retry_in_match_counter = 0;
2892 #endif
2893 
2894   BYTECODE_INTERPRETER_START {
2895     CASE_OP(END)
2896       n = (int )(s - sstart);
2897       if (n > best_len) {
2898         OnigRegion* region;
2899 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2900         if (OPTON_FIND_LONGEST(option)) {
2901           if (n > msa->best_len) {
2902             msa->best_len = n;
2903             msa->best_s   = (UChar* )sstart;
2904           }
2905           else
2906             goto end_best_len;
2907         }
2908 #endif
2909         best_len = n;
2910         region = msa->region;
2911         if (region) {
2912           if (keep > s) keep = s;
2913 
2914 #ifdef USE_POSIX_API_REGION_OPTION
2915           if (OPTON_POSIX_REGION(msa->options)) {
2916             posix_regmatch_t* rmt = (posix_regmatch_t* )region;
2917 
2918             rmt[0].rm_so = (regoff_t )(keep - str);
2919             rmt[0].rm_eo = (regoff_t )(s    - str);
2920             for (i = 1; i <= num_mem; i++) {
2921               if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2922                 rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str);
2923                 rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i)   - str);
2924               }
2925               else {
2926                 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
2927               }
2928             }
2929           }
2930           else {
2931 #endif /* USE_POSIX_API_REGION_OPTION */
2932             region->beg[0] = (int )(keep - str);
2933             region->end[0] = (int )(s    - str);
2934             for (i = 1; i <= num_mem; i++) {
2935               if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2936                 region->beg[i] = (int )(STACK_MEM_START(reg, i) - str);
2937                 region->end[i] = (int )(STACK_MEM_END(reg, i)   - str);
2938               }
2939               else {
2940                 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2941               }
2942             }
2943 
2944 #ifdef USE_CAPTURE_HISTORY
2945             if (reg->capture_history != 0) {
2946               int r;
2947               OnigCaptureTreeNode* node;
2948 
2949               if (IS_NULL(region->history_root)) {
2950                 region->history_root = node = history_node_new();
2951                 CHECK_NULL_RETURN_MEMERR(node);
2952               }
2953               else {
2954                 node = region->history_root;
2955                 history_tree_clear(node);
2956               }
2957 
2958               node->group = 0;
2959               node->beg   = (int )(keep - str);
2960               node->end   = (int )(s    - str);
2961 
2962               stkp = stk_base;
2963               r = make_capture_history_tree(region->history_root, &stkp,
2964                                             stk, (UChar* )str, reg);
2965               if (r < 0) MATCH_AT_ERROR_RETURN(r);
2966             }
2967 #endif /* USE_CAPTURE_HISTORY */
2968 #ifdef USE_POSIX_API_REGION_OPTION
2969           } /* else OPTON_POSIX_REGION() */
2970 #endif
2971         } /* if (region) */
2972       } /* n > best_len */
2973 
2974 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2975     end_best_len:
2976 #endif
2977       SOP_OUT;
2978 
2979       if (OPTON_FIND_CONDITION(option)) {
2980         if (OPTON_FIND_NOT_EMPTY(option) && s == sstart) {
2981           best_len = ONIG_MISMATCH;
2982           goto fail; /* for retry */
2983         }
2984         if (OPTON_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2985           goto fail; /* for retry */
2986         }
2987       }
2988 
2989       /* default behavior: return first-matching result. */
2990       goto match_at_end;
2991 
2992     CASE_OP(STR_1)
2993       DATA_ENSURE(1);
2994       ps = p->exact.s;
2995       if (*ps != *s) goto fail;
2996       s++;
2997       INC_OP;
2998       JUMP_OUT_WITH_SPREV_SET;
2999 
3000     CASE_OP(STR_2)
3001       DATA_ENSURE(2);
3002       ps = p->exact.s;
3003       if (*ps != *s) goto fail;
3004       ps++; s++;
3005       if (*ps != *s) goto fail;
3006       sprev = s;
3007       s++;
3008       INC_OP;
3009       JUMP_OUT;
3010 
3011     CASE_OP(STR_3)
3012       DATA_ENSURE(3);
3013       ps = p->exact.s;
3014       if (*ps != *s) goto fail;
3015       ps++; s++;
3016       if (*ps != *s) goto fail;
3017       ps++; s++;
3018       if (*ps != *s) goto fail;
3019       sprev = s;
3020       s++;
3021       INC_OP;
3022       JUMP_OUT;
3023 
3024     CASE_OP(STR_4)
3025       DATA_ENSURE(4);
3026       ps = p->exact.s;
3027       if (*ps != *s) goto fail;
3028       ps++; s++;
3029       if (*ps != *s) goto fail;
3030       ps++; s++;
3031       if (*ps != *s) goto fail;
3032       ps++; s++;
3033       if (*ps != *s) goto fail;
3034       sprev = s;
3035       s++;
3036       INC_OP;
3037       JUMP_OUT;
3038 
3039     CASE_OP(STR_5)
3040       DATA_ENSURE(5);
3041       ps = p->exact.s;
3042       if (*ps != *s) goto fail;
3043       ps++; s++;
3044       if (*ps != *s) goto fail;
3045       ps++; s++;
3046       if (*ps != *s) goto fail;
3047       ps++; s++;
3048       if (*ps != *s) goto fail;
3049       ps++; s++;
3050       if (*ps != *s) goto fail;
3051       sprev = s;
3052       s++;
3053       INC_OP;
3054       JUMP_OUT;
3055 
3056     CASE_OP(STR_N)
3057       tlen = p->exact_n.n;
3058       DATA_ENSURE(tlen);
3059       ps = p->exact_n.s;
3060       while (tlen-- > 0) {
3061         if (*ps++ != *s++) goto fail;
3062       }
3063       sprev = s - 1;
3064       INC_OP;
3065       JUMP_OUT;
3066 
3067     CASE_OP(STR_MB2N1)
3068       DATA_ENSURE(2);
3069       ps = p->exact.s;
3070       if (*ps != *s) goto fail;
3071       ps++; s++;
3072       if (*ps != *s) goto fail;
3073       s++;
3074       INC_OP;
3075       JUMP_OUT_WITH_SPREV_SET;
3076 
3077     CASE_OP(STR_MB2N2)
3078       DATA_ENSURE(4);
3079       ps = p->exact.s;
3080       if (*ps != *s) goto fail;
3081       ps++; s++;
3082       if (*ps != *s) goto fail;
3083       ps++; s++;
3084       sprev = s;
3085       if (*ps != *s) goto fail;
3086       ps++; s++;
3087       if (*ps != *s) goto fail;
3088       s++;
3089       INC_OP;
3090       JUMP_OUT;
3091 
3092     CASE_OP(STR_MB2N3)
3093       DATA_ENSURE(6);
3094       ps = p->exact.s;
3095       if (*ps != *s) goto fail;
3096       ps++; s++;
3097       if (*ps != *s) goto fail;
3098       ps++; s++;
3099       if (*ps != *s) goto fail;
3100       ps++; s++;
3101       if (*ps != *s) goto fail;
3102       ps++; s++;
3103       sprev = s;
3104       if (*ps != *s) goto fail;
3105       ps++; s++;
3106       if (*ps != *s) goto fail;
3107       ps++; s++;
3108       INC_OP;
3109       JUMP_OUT;
3110 
3111     CASE_OP(STR_MB2N)
3112       tlen = p->exact_n.n;
3113       DATA_ENSURE(tlen * 2);
3114       ps = p->exact_n.s;
3115       while (tlen-- > 0) {
3116         if (*ps != *s) goto fail;
3117         ps++; s++;
3118         if (*ps != *s) goto fail;
3119         ps++; s++;
3120       }
3121       sprev = s - 2;
3122       INC_OP;
3123       JUMP_OUT;
3124 
3125     CASE_OP(STR_MB3N)
3126       tlen = p->exact_n.n;
3127       DATA_ENSURE(tlen * 3);
3128       ps = p->exact_n.s;
3129       while (tlen-- > 0) {
3130         if (*ps != *s) goto fail;
3131         ps++; s++;
3132         if (*ps != *s) goto fail;
3133         ps++; s++;
3134         if (*ps != *s) goto fail;
3135         ps++; s++;
3136       }
3137       sprev = s - 3;
3138       INC_OP;
3139       JUMP_OUT;
3140 
3141     CASE_OP(STR_MBN)
3142       tlen  = p->exact_len_n.len; /* mb byte len */
3143       tlen2 = p->exact_len_n.n;   /* number of chars */
3144       tlen2 *= tlen;
3145       DATA_ENSURE(tlen2);
3146       ps = p->exact_len_n.s;
3147       while (tlen2-- > 0) {
3148         if (*ps != *s) goto fail;
3149         ps++; s++;
3150       }
3151       sprev = s - tlen;
3152       INC_OP;
3153       JUMP_OUT;
3154 
3155     CASE_OP(CCLASS)
3156       DATA_ENSURE(1);
3157       if (BITSET_AT(p->cclass.bsp, *s) == 0) goto fail;
3158       s++;
3159       INC_OP;
3160       JUMP_OUT_WITH_SPREV_SET;
3161 
3162     CASE_OP(CCLASS_MB)
3163       DATA_ENSURE(1);
3164       if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
3165 
3166     cclass_mb:
3167       {
3168         OnigCodePoint code;
3169         UChar *ss;
3170         int mb_len;
3171 
3172         mb_len = enclen(encode, s);
3173         DATA_ENSURE(mb_len);
3174         ss = s;
3175         s += mb_len;
3176         code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3177         if (! onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
3178       }
3179       INC_OP;
3180       JUMP_OUT_WITH_SPREV_SET;
3181 
3182     CASE_OP(CCLASS_MIX)
3183       DATA_ENSURE(1);
3184       if (ONIGENC_IS_MBC_HEAD(encode, s)) {
3185         goto cclass_mb;
3186       }
3187       else {
3188         if (BITSET_AT(p->cclass_mix.bsp, *s) == 0)
3189           goto fail;
3190 
3191         s++;
3192       }
3193       INC_OP;
3194       JUMP_OUT_WITH_SPREV_SET;
3195 
3196     CASE_OP(CCLASS_NOT)
3197       DATA_ENSURE(1);
3198       if (BITSET_AT(p->cclass.bsp, *s) != 0) goto fail;
3199       s += enclen(encode, s);
3200       INC_OP;
3201       JUMP_OUT_WITH_SPREV_SET;
3202 
3203     CASE_OP(CCLASS_MB_NOT)
3204       DATA_ENSURE(1);
3205       if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
3206         s++;
3207         goto cc_mb_not_success;
3208       }
3209 
3210     cclass_mb_not:
3211       {
3212         OnigCodePoint code;
3213         UChar *ss;
3214         int mb_len = enclen(encode, s);
3215 
3216         if (! DATA_ENSURE_CHECK(mb_len)) {
3217           DATA_ENSURE(1);
3218           s = (UChar* )end;
3219           goto cc_mb_not_success;
3220         }
3221 
3222         ss = s;
3223         s += mb_len;
3224         code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3225         if (onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
3226       }
3227 
3228     cc_mb_not_success:
3229       INC_OP;
3230       JUMP_OUT_WITH_SPREV_SET;
3231 
3232     CASE_OP(CCLASS_MIX_NOT)
3233       DATA_ENSURE(1);
3234       if (ONIGENC_IS_MBC_HEAD(encode, s)) {
3235         goto cclass_mb_not;
3236       }
3237       else {
3238         if (BITSET_AT(p->cclass_mix.bsp, *s) != 0)
3239           goto fail;
3240 
3241         s++;
3242       }
3243       INC_OP;
3244       JUMP_OUT_WITH_SPREV_SET;
3245 
3246     CASE_OP(ANYCHAR)
3247       DATA_ENSURE(1);
3248       n = enclen(encode, s);
3249       DATA_ENSURE(n);
3250       if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3251       s += n;
3252       INC_OP;
3253       JUMP_OUT_WITH_SPREV_SET;
3254 
3255     CASE_OP(ANYCHAR_ML)
3256       DATA_ENSURE(1);
3257       n = enclen(encode, s);
3258       DATA_ENSURE(n);
3259       s += n;
3260       INC_OP;
3261       JUMP_OUT_WITH_SPREV_SET;
3262 
3263     CASE_OP(ANYCHAR_STAR)
3264       INC_OP;
3265       while (DATA_ENSURE_CHECK1) {
3266         STACK_PUSH_ALT(p, s, sprev);
3267         n = enclen(encode, s);
3268         DATA_ENSURE(n);
3269         if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
3270         sprev = s;
3271         s += n;
3272       }
3273       JUMP_OUT;
3274 
3275     CASE_OP(ANYCHAR_ML_STAR)
3276       INC_OP;
3277       while (DATA_ENSURE_CHECK1) {
3278         STACK_PUSH_ALT(p, s, sprev);
3279         n = enclen(encode, s);
3280         if (n > 1) {
3281           DATA_ENSURE(n);
3282           sprev = s;
3283           s += n;
3284         }
3285         else {
3286           sprev = s;
3287           s++;
3288         }
3289       }
3290       JUMP_OUT;
3291 
3292     CASE_OP(ANYCHAR_STAR_PEEK_NEXT)
3293       {
3294         UChar c;
3295 
3296         c = p->anychar_star_peek_next.c;
3297         INC_OP;
3298         while (DATA_ENSURE_CHECK1) {
3299           if (c == *s) {
3300             STACK_PUSH_ALT(p, s, sprev);
3301           }
3302           n = enclen(encode, s);
3303           DATA_ENSURE(n);
3304           if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
3305           sprev = s;
3306           s += n;
3307         }
3308       }
3309       JUMP_OUT;
3310 
3311     CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT)
3312       {
3313         UChar c;
3314 
3315         c = p->anychar_star_peek_next.c;
3316         INC_OP;
3317         while (DATA_ENSURE_CHECK1) {
3318           if (c == *s) {
3319             STACK_PUSH_ALT(p, s, sprev);
3320           }
3321           n = enclen(encode, s);
3322           if (n > 1) {
3323             DATA_ENSURE(n);
3324             sprev = s;
3325             s += n;
3326           }
3327           else {
3328             sprev = s;
3329             s++;
3330           }
3331         }
3332       }
3333       JUMP_OUT;
3334 
3335     CASE_OP(WORD)
3336       DATA_ENSURE(1);
3337       if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3338         goto fail;
3339 
3340       s += enclen(encode, s);
3341       INC_OP;
3342       JUMP_OUT_WITH_SPREV_SET;
3343 
3344     CASE_OP(WORD_ASCII)
3345       DATA_ENSURE(1);
3346       if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3347         goto fail;
3348 
3349       s += enclen(encode, s);
3350       INC_OP;
3351       JUMP_OUT_WITH_SPREV_SET;
3352 
3353     CASE_OP(NO_WORD)
3354       DATA_ENSURE(1);
3355       if (ONIGENC_IS_MBC_WORD(encode, s, end))
3356         goto fail;
3357 
3358       s += enclen(encode, s);
3359       INC_OP;
3360       JUMP_OUT_WITH_SPREV_SET;
3361 
3362     CASE_OP(NO_WORD_ASCII)
3363       DATA_ENSURE(1);
3364       if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3365         goto fail;
3366 
3367       s += enclen(encode, s);
3368       INC_OP;
3369       JUMP_OUT_WITH_SPREV_SET;
3370 
3371     CASE_OP(WORD_BOUNDARY)
3372       {
3373         ModeType mode;
3374 
3375         mode = p->word_boundary.mode;
3376         if (ON_STR_BEGIN(s)) {
3377           DATA_ENSURE(1);
3378           if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3379             goto fail;
3380         }
3381         else if (ON_STR_END(s)) {
3382           if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3383             goto fail;
3384         }
3385         else {
3386           if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3387               == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3388             goto fail;
3389         }
3390       }
3391       INC_OP;
3392       JUMP_OUT;
3393 
3394     CASE_OP(NO_WORD_BOUNDARY)
3395       {
3396         ModeType mode;
3397 
3398         mode = p->word_boundary.mode;
3399         if (ON_STR_BEGIN(s)) {
3400           if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3401             goto fail;
3402         }
3403         else if (ON_STR_END(s)) {
3404           if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3405             goto fail;
3406         }
3407         else {
3408           if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3409               != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3410             goto fail;
3411         }
3412       }
3413       INC_OP;
3414       JUMP_OUT;
3415 
3416 #ifdef USE_WORD_BEGIN_END
3417     CASE_OP(WORD_BEGIN)
3418       {
3419         ModeType mode;
3420 
3421         mode = p->word_boundary.mode;
3422         if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3423           if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3424             INC_OP;
3425             JUMP_OUT;
3426           }
3427         }
3428       }
3429       goto fail;
3430 
3431     CASE_OP(WORD_END)
3432       {
3433         ModeType mode;
3434 
3435         mode = p->word_boundary.mode;
3436         if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3437           if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3438             INC_OP;
3439             JUMP_OUT;
3440           }
3441         }
3442       }
3443       goto fail;
3444 #endif
3445 
3446     CASE_OP(TEXT_SEGMENT_BOUNDARY)
3447       {
3448         int is_break;
3449 
3450         switch (p->text_segment_boundary.type) {
3451         case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
3452           is_break = onigenc_egcb_is_break_position(encode, s, sprev, str, end);
3453           break;
3454 #ifdef USE_UNICODE_WORD_BREAK
3455         case WORD_BOUNDARY:
3456           is_break = onigenc_wb_is_break_position(encode, s, sprev, str, end);
3457           break;
3458 #endif
3459         default:
3460           MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
3461           break;
3462         }
3463 
3464         if (p->text_segment_boundary.not != 0)
3465           is_break = ! is_break;
3466 
3467         if (is_break != 0) {
3468           INC_OP;
3469           JUMP_OUT;
3470         }
3471         else {
3472           goto fail;
3473         }
3474       }
3475 
3476     CASE_OP(BEGIN_BUF)
3477       if (! ON_STR_BEGIN(s)) goto fail;
3478 
3479       INC_OP;
3480       JUMP_OUT;
3481 
3482     CASE_OP(END_BUF)
3483       if (! ON_STR_END(s)) goto fail;
3484 
3485       INC_OP;
3486       JUMP_OUT;
3487 
3488     CASE_OP(BEGIN_LINE)
3489       if (ON_STR_BEGIN(s)) {
3490         if (OPTON_NOTBOL(msa->options)) goto fail;
3491         INC_OP;
3492         JUMP_OUT;
3493       }
3494       else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
3495         INC_OP;
3496         JUMP_OUT;
3497       }
3498       goto fail;
3499 
3500     CASE_OP(END_LINE)
3501       if (ON_STR_END(s)) {
3502 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3503         if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3504 #endif
3505           if (OPTON_NOTEOL(msa->options)) goto fail;
3506           INC_OP;
3507           JUMP_OUT;
3508 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3509         }
3510 #endif
3511       }
3512       else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
3513         INC_OP;
3514         JUMP_OUT;
3515       }
3516 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3517       else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3518         INC_OP;
3519         JUMP_OUT;
3520       }
3521 #endif
3522       goto fail;
3523 
3524     CASE_OP(SEMI_END_BUF)
3525       if (ON_STR_END(s)) {
3526 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3527         if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3528 #endif
3529           if (OPTON_NOTEOL(msa->options)) goto fail;
3530           INC_OP;
3531           JUMP_OUT;
3532 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3533         }
3534 #endif
3535       }
3536       else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
3537                ON_STR_END(s + enclen(encode, s))) {
3538         INC_OP;
3539         JUMP_OUT;
3540       }
3541 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3542       else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3543         UChar* ss = s + enclen(encode, s);
3544         ss += enclen(encode, ss);
3545         if (ON_STR_END(ss)) {
3546           INC_OP;
3547           JUMP_OUT;
3548         }
3549       }
3550 #endif
3551       goto fail;
3552 
3553     CASE_OP(CHECK_POSITION)
3554       switch (p->check_position.type) {
3555       case CHECK_POSITION_SEARCH_START:
3556         if (s != msa->start) goto fail;
3557         break;
3558       case CHECK_POSITION_CURRENT_RIGHT_RANGE:
3559         if (s != right_range) goto fail;
3560         break;
3561       default:
3562         break;
3563       }
3564       INC_OP;
3565       JUMP_OUT;
3566 
3567     CASE_OP(MEM_START_PUSH)
3568       mem = p->memory_start.num;
3569       STACK_PUSH_MEM_START(mem, s);
3570       INC_OP;
3571       JUMP_OUT;
3572 
3573     CASE_OP(MEM_START)
3574       mem = p->memory_start.num;
3575       mem_start_stk[mem] = (StackIndex )((void* )s);
3576       INC_OP;
3577       JUMP_OUT;
3578 
3579     CASE_OP(MEM_END_PUSH)
3580       mem = p->memory_end.num;
3581       STACK_PUSH_MEM_END(mem, s);
3582       INC_OP;
3583       JUMP_OUT;
3584 
3585     CASE_OP(MEM_END)
3586       mem = p->memory_end.num;
3587       mem_end_stk[mem] = (StackIndex )((void* )s);
3588       INC_OP;
3589       JUMP_OUT;
3590 
3591 #ifdef USE_CALL
3592     CASE_OP(MEM_END_PUSH_REC)
3593       {
3594         StackIndex si;
3595 
3596         mem = p->memory_end.num;
3597         STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3598         si = GET_STACK_INDEX(stkp);
3599         STACK_PUSH_MEM_END(mem, s);
3600         mem_start_stk[mem] = si;
3601         INC_OP;
3602         JUMP_OUT;
3603       }
3604 
3605     CASE_OP(MEM_END_REC)
3606       mem = p->memory_end.num;
3607       mem_end_stk[mem] = (StackIndex )((void* )s);
3608       STACK_GET_MEM_START(mem, stkp);
3609 
3610       if (MEM_STATUS_AT(reg->push_mem_start, mem))
3611         mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3612       else
3613         mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
3614 
3615       STACK_PUSH_MEM_END_MARK(mem);
3616       INC_OP;
3617       JUMP_OUT;
3618 #endif
3619 
3620     CASE_OP(BACKREF1)
3621       mem = 1;
3622       goto backref;
3623 
3624     CASE_OP(BACKREF2)
3625       mem = 2;
3626       goto backref;
3627 
3628     CASE_OP(BACKREF_N)
3629       mem = p->backref_n.n1;
3630     backref:
3631       {
3632         int len;
3633         UChar *pstart, *pend;
3634 
3635         if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail;
3636         if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3637 
3638         pstart = STACK_MEM_START(reg, mem);
3639         pend   = STACK_MEM_END(reg, mem);
3640         n = (int )(pend - pstart);
3641         if (n != 0) {
3642           DATA_ENSURE(n);
3643           sprev = s;
3644           STRING_CMP(s, pstart, n);
3645           while (sprev + (len = enclen(encode, sprev)) < s)
3646             sprev += len;
3647         }
3648       }
3649       INC_OP;
3650       JUMP_OUT;
3651 
3652     CASE_OP(BACKREF_N_IC)
3653       mem = p->backref_n.n1;
3654       {
3655         int len;
3656         UChar *pstart, *pend;
3657 
3658         if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail;
3659         if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3660 
3661         pstart = STACK_MEM_START(reg, mem);
3662         pend   = STACK_MEM_END(reg, mem);
3663         n = (int )(pend - pstart);
3664         if (n != 0) {
3665           DATA_ENSURE(n);
3666           sprev = s;
3667           STRING_CMP_IC(case_fold_flag, pstart, &s, n);
3668           while (sprev + (len = enclen(encode, sprev)) < s)
3669             sprev += len;
3670         }
3671       }
3672       INC_OP;
3673       JUMP_OUT;
3674 
3675     CASE_OP(BACKREF_MULTI)
3676       {
3677         int len, is_fail;
3678         UChar *pstart, *pend, *swork;
3679 
3680         tlen = p->backref_general.num;
3681         for (i = 0; i < tlen; i++) {
3682           mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
3683 
3684           if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue;
3685           if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3686 
3687           pstart = STACK_MEM_START(reg, mem);
3688           pend   = STACK_MEM_END(reg, mem);
3689           n = (int )(pend - pstart);
3690           if (n != 0) {
3691             DATA_ENSURE(n);
3692             sprev = s;
3693             swork = s;
3694             STRING_CMP_VALUE(swork, pstart, n, is_fail);
3695             if (is_fail) continue;
3696             s = swork;
3697             while (sprev + (len = enclen(encode, sprev)) < s)
3698               sprev += len;
3699           }
3700           break; /* success */
3701         }
3702         if (i == tlen) goto fail;
3703       }
3704       INC_OP;
3705       JUMP_OUT;
3706 
3707     CASE_OP(BACKREF_MULTI_IC)
3708       {
3709         int len, is_fail;
3710         UChar *pstart, *pend, *swork;
3711 
3712         tlen = p->backref_general.num;
3713         for (i = 0; i < tlen; i++) {
3714           mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
3715 
3716           if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue;
3717           if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3718 
3719           pstart = STACK_MEM_START(reg, mem);
3720           pend   = STACK_MEM_END(reg, mem);
3721           n = (int )(pend - pstart);
3722           if (n != 0) {
3723             DATA_ENSURE(n);
3724             sprev = s;
3725             swork = s;
3726             STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
3727             if (is_fail) continue;
3728             s = swork;
3729             while (sprev + (len = enclen(encode, sprev)) < s)
3730               sprev += len;
3731           }
3732           break; /* success */
3733         }
3734         if (i == tlen) goto fail;
3735       }
3736       INC_OP;
3737       JUMP_OUT;
3738 
3739 #ifdef USE_BACKREF_WITH_LEVEL
3740     CASE_OP(BACKREF_WITH_LEVEL_IC)
3741       n = 1; /* ignore case */
3742       goto backref_with_level;
3743     CASE_OP(BACKREF_WITH_LEVEL)
3744       {
3745         int len;
3746         int level;
3747         MemNumType* mems;
3748         UChar* ssave;
3749 
3750         n = 0;
3751       backref_with_level:
3752         level = p->backref_general.nest_level;
3753         tlen  = p->backref_general.num;
3754         mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3755 
3756         ssave = s;
3757         if (backref_match_at_nested_level(reg, stk, stk_base, n,
3758                     case_fold_flag, level, (int )tlen, mems, &s, end)) {
3759           if (ssave != s) {
3760             sprev = ssave;
3761             while (sprev + (len = enclen(encode, sprev)) < s)
3762               sprev += len;
3763           }
3764         }
3765         else
3766           goto fail;
3767       }
3768       INC_OP;
3769       JUMP_OUT;
3770 #endif
3771 
3772     CASE_OP(BACKREF_CHECK)
3773       {
3774         MemNumType* mems;
3775 
3776         tlen  = p->backref_general.num;
3777         mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3778 
3779         for (i = 0; i < tlen; i++) {
3780           mem = mems[i];
3781           if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue;
3782           if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3783           break; /* success */
3784         }
3785         if (i == tlen) goto fail;
3786       }
3787       INC_OP;
3788       JUMP_OUT;
3789 
3790 #ifdef USE_BACKREF_WITH_LEVEL
3791     CASE_OP(BACKREF_CHECK_WITH_LEVEL)
3792       {
3793         LengthType level;
3794         MemNumType* mems;
3795 
3796         level = p->backref_general.nest_level;
3797         tlen  = p->backref_general.num;
3798         mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3799 
3800         if (backref_check_at_nested_level(reg, stk, stk_base,
3801                                           (int )level, (int )tlen, mems) == 0)
3802           goto fail;
3803       }
3804       INC_OP;
3805       JUMP_OUT;
3806 #endif
3807 
3808     CASE_OP(EMPTY_CHECK_START)
3809       mem = p->empty_check_start.mem;   /* mem: null check id */
3810       STACK_PUSH_EMPTY_CHECK_START(mem, s);
3811       INC_OP;
3812       JUMP_OUT;
3813 
3814     CASE_OP(EMPTY_CHECK_END)
3815       {
3816         int is_empty;
3817 
3818         mem = p->empty_check_end.mem;  /* mem: null check id */
3819         STACK_EMPTY_CHECK(is_empty, mem, s);
3820         INC_OP;
3821         if (is_empty) {
3822 #ifdef ONIG_DEBUG_MATCH
3823           fprintf(DBGFP, "EMPTY_CHECK_END: skip  id:%d, s:%p\n", (int )mem, s);
3824 #endif
3825         empty_check_found:
3826           /* empty loop founded, skip next instruction */
3827 #if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
3828           switch (p->opcode) {
3829           case OP_JUMP:
3830           case OP_PUSH:
3831           case OP_REPEAT_INC:
3832           case OP_REPEAT_INC_NG:
3833             INC_OP;
3834             break;
3835           default:
3836             MATCH_AT_ERROR_RETURN(ONIGERR_UNEXPECTED_BYTECODE);
3837             break;
3838           }
3839 #else
3840           INC_OP;
3841 #endif
3842         }
3843       }
3844       JUMP_OUT;
3845 
3846 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
3847     CASE_OP(EMPTY_CHECK_END_MEMST)
3848       {
3849         int is_empty;
3850 
3851         mem = p->empty_check_end.mem;  /* mem: null check id */
3852         STACK_EMPTY_CHECK_MEM(is_empty, mem, s, reg);
3853         INC_OP;
3854         if (is_empty) {
3855 #ifdef ONIG_DEBUG_MATCH
3856           fprintf(DBGFP, "EMPTY_CHECK_END_MEM: skip  id:%d, s:%p\n", (int)mem, s);
3857 #endif
3858           if (is_empty == -1) goto fail;
3859           goto empty_check_found;
3860         }
3861       }
3862       JUMP_OUT;
3863 #endif
3864 
3865 #ifdef USE_CALL
3866     CASE_OP(EMPTY_CHECK_END_MEMST_PUSH)
3867       {
3868         int is_empty;
3869 
3870         mem = p->empty_check_end.mem;  /* mem: null check id */
3871 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
3872         STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg);
3873 #else
3874         STACK_EMPTY_CHECK_REC(is_empty, mem, s);
3875 #endif
3876         INC_OP;
3877         if (is_empty) {
3878 #ifdef ONIG_DEBUG_MATCH
3879           fprintf(DBGFP, "EMPTY_CHECK_END_MEM_PUSH: skip  id:%d, s:%p\n",
3880                   (int )mem, s);
3881 #endif
3882           if (is_empty == -1) goto fail;
3883           goto empty_check_found;
3884         }
3885         else {
3886           STACK_PUSH_EMPTY_CHECK_END(mem);
3887         }
3888       }
3889       JUMP_OUT;
3890 #endif
3891 
3892     CASE_OP(JUMP)
3893       addr = p->jump.addr;
3894       p += addr;
3895       CHECK_INTERRUPT_JUMP_OUT;
3896 
3897     CASE_OP(PUSH)
3898       addr = p->push.addr;
3899       STACK_PUSH_ALT(p + addr, s, sprev);
3900       INC_OP;
3901       JUMP_OUT;
3902 
3903     CASE_OP(PUSH_SUPER)
3904       addr = p->push.addr;
3905       STACK_PUSH_SUPER_ALT(p + addr, s, sprev);
3906       INC_OP;
3907       JUMP_OUT;
3908 
3909     CASE_OP(POP)
3910       STACK_POP_ONE;
3911       INC_OP;
3912       JUMP_OUT;
3913 
3914     CASE_OP(POP_TO_MARK)
3915       STACK_POP_TO_MARK(p->pop_to_mark.id);
3916       INC_OP;
3917       JUMP_OUT;
3918 
3919  #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3920     CASE_OP(PUSH_OR_JUMP_EXACT1)
3921       {
3922         UChar c;
3923 
3924         addr = p->push_or_jump_exact1.addr;
3925         c    = p->push_or_jump_exact1.c;
3926         if (DATA_ENSURE_CHECK1 && c == *s) {
3927           STACK_PUSH_ALT(p + addr, s, sprev);
3928           INC_OP;
3929           JUMP_OUT;
3930         }
3931       }
3932       p += addr;
3933       JUMP_OUT;
3934 #endif
3935 
3936     CASE_OP(PUSH_IF_PEEK_NEXT)
3937       {
3938         UChar c;
3939 
3940         addr = p->push_if_peek_next.addr;
3941         c    = p->push_if_peek_next.c;
3942         if (DATA_ENSURE_CHECK1 && c == *s) {
3943           STACK_PUSH_ALT(p + addr, s, sprev);
3944           INC_OP;
3945           JUMP_OUT;
3946         }
3947       }
3948       INC_OP;
3949       JUMP_OUT;
3950 
3951     CASE_OP(REPEAT)
3952       mem  = p->repeat.id;  /* mem: OP_REPEAT ID */
3953       addr = p->repeat.addr;
3954 
3955       STACK_PUSH_REPEAT_INC(mem, 0);
3956       if (reg->repeat_range[mem].lower == 0) {
3957         STACK_PUSH_ALT(p + addr, s, sprev);
3958       }
3959       INC_OP;
3960       JUMP_OUT;
3961 
3962     CASE_OP(REPEAT_NG)
3963       mem  = p->repeat.id;  /* mem: OP_REPEAT ID */
3964       addr = p->repeat.addr;
3965 
3966       STACK_PUSH_REPEAT_INC(mem, 0);
3967       if (reg->repeat_range[mem].lower == 0) {
3968         STACK_PUSH_ALT(p + 1, s, sprev);
3969         p += addr;
3970       }
3971       else
3972         INC_OP;
3973       JUMP_OUT;
3974 
3975     CASE_OP(REPEAT_INC)
3976       mem  = p->repeat_inc.id;  /* mem: OP_REPEAT ID */
3977       STACK_GET_REPEAT_COUNT(mem, n);
3978       n++;
3979       if (n >= reg->repeat_range[mem].upper) {
3980         /* end of repeat. Nothing to do. */
3981         INC_OP;
3982       }
3983       else if (n >= reg->repeat_range[mem].lower) {
3984         INC_OP;
3985         STACK_PUSH_ALT(p, s, sprev);
3986         p = reg->repeat_range[mem].u.pcode;
3987       }
3988       else {
3989         p = reg->repeat_range[mem].u.pcode;
3990       }
3991       STACK_PUSH_REPEAT_INC(mem, n);
3992       CHECK_INTERRUPT_JUMP_OUT;
3993 
3994     CASE_OP(REPEAT_INC_NG)
3995       mem = p->repeat_inc.id;  /* mem: OP_REPEAT ID */
3996       STACK_GET_REPEAT_COUNT(mem, n);
3997       n++;
3998       STACK_PUSH_REPEAT_INC(mem, n);
3999       if (n == reg->repeat_range[mem].upper) {
4000         INC_OP;
4001       }
4002       else {
4003         if (n >= reg->repeat_range[mem].lower) {
4004           STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev);
4005           INC_OP;
4006         }
4007         else {
4008           p = reg->repeat_range[mem].u.pcode;
4009         }
4010       }
4011       CHECK_INTERRUPT_JUMP_OUT;
4012 
4013 #ifdef USE_CALL
4014     CASE_OP(CALL)
4015 #ifdef SUBEXP_CALL_MAX_NEST_LEVEL
4016       if (subexp_call_nest_counter == SUBEXP_CALL_MAX_NEST_LEVEL)
4017         goto fail;
4018       subexp_call_nest_counter++;
4019 #endif
4020       addr = p->call.addr;
4021       INC_OP; STACK_PUSH_CALL_FRAME(p);
4022       p = reg->ops + addr;
4023 
4024       JUMP_OUT;
4025 
4026     CASE_OP(RETURN)
4027       STACK_RETURN(p);
4028       STACK_PUSH_RETURN;
4029 #ifdef SUBEXP_CALL_MAX_NEST_LEVEL
4030       subexp_call_nest_counter--;
4031 #endif
4032       JUMP_OUT;
4033 #endif
4034 
4035     CASE_OP(MOVE)
4036       if (p->move.n < 0) {
4037         s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, -p->move.n);
4038         if (IS_NULL(s)) goto fail;
4039       }
4040       else {
4041         int len;
4042 
4043         for (tlen = p->move.n; tlen > 0; tlen--) {
4044           len = enclen(encode, s);
4045           sprev = s;
4046           s += len;
4047           if (s > end) goto fail;
4048           if (s == end) {
4049             if (tlen != 1) goto fail;
4050             else           break;
4051           }
4052         }
4053       }
4054       sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
4055       INC_OP;
4056       JUMP_OUT;
4057 
4058     CASE_OP(STEP_BACK_START)
4059       tlen = p->step_back_start.initial;
4060       if (tlen != 0) {
4061         s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
4062         if (IS_NULL(s)) goto fail;
4063         sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
4064       }
4065       if (p->step_back_start.remaining != 0) {
4066         STACK_PUSH_ALT_WITH_ZID(p + 1, s, sprev, p->step_back_start.remaining);
4067         p += p->step_back_start.addr;
4068       }
4069       else
4070         INC_OP;
4071       JUMP_OUT;
4072 
4073     CASE_OP(STEP_BACK_NEXT)
4074       tlen = (LengthType )stk->zid; /* remaining count */
4075       if (tlen != INFINITE_LEN) tlen--;
4076       s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, 1);
4077       if (IS_NULL(s)) goto fail;
4078       sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
4079       if (tlen != 0) {
4080         STACK_PUSH_ALT_WITH_ZID(p, s, sprev, (int )tlen);
4081       }
4082       INC_OP;
4083       JUMP_OUT;
4084 
4085     CASE_OP(CUT_TO_MARK)
4086       mem  = p->cut_to_mark.id; /* mem: mark id */
4087       STACK_TO_VOID_TO_MARK(stkp, mem);
4088       if (p->cut_to_mark.restore_pos != 0) {
4089         s     = stkp->u.val.v;
4090         sprev = stkp->u.val.v2;
4091       }
4092       INC_OP;
4093       JUMP_OUT;
4094 
4095     CASE_OP(MARK)
4096       mem  = p->mark.id; /* mem: mark id */
4097       if (p->mark.save_pos != 0)
4098         STACK_PUSH_MARK_WITH_POS(mem, s, sprev);
4099       else
4100         STACK_PUSH_MARK(mem);
4101 
4102       INC_OP;
4103       JUMP_OUT;
4104 
4105     CASE_OP(SAVE_VAL)
4106       {
4107         SaveType type;
4108 
4109         type = p->save_val.type;
4110         mem  = p->save_val.id; /* mem: save id */
4111         switch ((enum SaveType )type) {
4112         case SAVE_KEEP:
4113           STACK_PUSH_SAVE_VAL(mem, type, s);
4114           break;
4115 
4116         case SAVE_S:
4117           STACK_PUSH_SAVE_VAL_WITH_SPREV(mem, type, s);
4118           break;
4119 
4120         case SAVE_RIGHT_RANGE:
4121           STACK_PUSH_SAVE_VAL(mem, SAVE_RIGHT_RANGE, right_range);
4122           break;
4123         }
4124       }
4125       INC_OP;
4126       JUMP_OUT;
4127 
4128     CASE_OP(UPDATE_VAR)
4129       {
4130         UpdateVarType type;
4131         enum SaveType save_type;
4132 
4133         type = p->update_var.type;
4134 
4135         switch ((enum UpdateVarType )type) {
4136         case UPDATE_VAR_KEEP_FROM_STACK_LAST:
4137           STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep);
4138           break;
4139         case UPDATE_VAR_S_FROM_STACK:
4140           mem = p->update_var.id; /* mem: save id */
4141           STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s);
4142           break;
4143         case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK:
4144           save_type = SAVE_S;
4145           goto get_save_val_type_last_id;
4146           break;
4147         case UPDATE_VAR_RIGHT_RANGE_FROM_STACK:
4148           save_type = SAVE_RIGHT_RANGE;
4149         get_save_val_type_last_id:
4150           mem = p->update_var.id; /* mem: save id */
4151           STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type, mem, right_range, p->update_var.clear);
4152           break;
4153         case UPDATE_VAR_RIGHT_RANGE_TO_S:
4154           right_range = s;
4155           break;
4156         case UPDATE_VAR_RIGHT_RANGE_INIT:
4157           INIT_RIGHT_RANGE;
4158           break;
4159         }
4160       }
4161       INC_OP;
4162       JUMP_OUT;
4163 
4164 #ifdef USE_CALLOUT
4165     CASE_OP(CALLOUT_CONTENTS)
4166       of = ONIG_CALLOUT_OF_CONTENTS;
4167       mem = p->callout_contents.num;
4168       goto callout_common_entry;
4169       BREAK_OUT;
4170 
4171     CASE_OP(CALLOUT_NAME)
4172       {
4173         int call_result;
4174         int name_id;
4175         int in;
4176         CalloutListEntry* e;
4177         OnigCalloutFunc func;
4178         OnigCalloutArgs args;
4179 
4180         of  = ONIG_CALLOUT_OF_NAME;
4181         mem = p->callout_name.num;
4182 
4183       callout_common_entry:
4184         e = onig_reg_callout_list_at(reg, mem);
4185         in = e->in;
4186         if (of == ONIG_CALLOUT_OF_NAME) {
4187           name_id = p->callout_name.id;
4188           func = onig_get_callout_start_func(reg, mem);
4189         }
4190         else {
4191           name_id = ONIG_NON_NAME_ID;
4192           func = msa->mp->progress_callout_of_contents;
4193         }
4194 
4195         if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) {
4196           CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id,
4197                        (int )mem, msa->mp->callout_user_data, args, call_result);
4198           switch (call_result) {
4199           case ONIG_CALLOUT_FAIL:
4200             goto fail;
4201             break;
4202           case ONIG_CALLOUT_SUCCESS:
4203             goto retraction_callout2;
4204             break;
4205           default: /* error code */
4206             if (call_result > 0) {
4207               call_result = ONIGERR_INVALID_ARGUMENT;
4208             }
4209             best_len = call_result;
4210             goto match_at_end;
4211             break;
4212           }
4213         }
4214         else {
4215         retraction_callout2:
4216           if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) {
4217             if (of == ONIG_CALLOUT_OF_NAME) {
4218               if (IS_NOT_NULL(func)) {
4219                 STACK_PUSH_CALLOUT_NAME(name_id, mem, func);
4220               }
4221             }
4222             else {
4223               func = msa->mp->retraction_callout_of_contents;
4224               if (IS_NOT_NULL(func)) {
4225                 STACK_PUSH_CALLOUT_CONTENTS(mem, func);
4226               }
4227             }
4228           }
4229         }
4230       }
4231       INC_OP;
4232       JUMP_OUT;
4233 #endif
4234 
4235     CASE_OP(FINISH)
4236       goto match_at_end;
4237 
4238 #ifdef ONIG_DEBUG_STATISTICS
4239     fail:
4240       SOP_OUT;
4241       goto fail2;
4242 #endif
4243     CASE_OP(FAIL)
4244 #ifdef ONIG_DEBUG_STATISTICS
4245     fail2:
4246 #else
4247     fail:
4248 #endif
4249       STACK_POP;
4250       p     = stk->u.state.pcode;
4251       s     = stk->u.state.pstr;
4252       sprev = stk->u.state.pstr_prev;
4253       CHECK_RETRY_LIMIT_IN_MATCH;
4254       JUMP_OUT;
4255 
4256     DEFAULT_OP
4257       MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
4258 
4259   } BYTECODE_INTERPRETER_END;
4260 
4261  match_at_end:
4262   if (msa->retry_limit_in_search != 0) {
4263     msa->retry_limit_in_search_counter += retry_in_match_counter;
4264   }
4265   STACK_SAVE(msa, is_alloca, alloc_base);
4266   return best_len;
4267 }
4268 
4269 
4270 #ifdef USE_REGSET
4271 
4272 typedef struct {
4273   regex_t*    reg;
4274   OnigRegion* region;
4275 } RR;
4276 
4277 struct OnigRegSetStruct {
4278   RR*          rs;
4279   int          n;
4280   int          alloc;
4281   OnigEncoding enc;
4282   int          anchor;      /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
4283   OnigLen      anc_dmin;    /* (SEMI_)END_BUF anchor distance */
4284   OnigLen      anc_dmax;    /* (SEMI_)END_BUF anchor distance */
4285   int          all_low_high;
4286   int          anychar_inf;
4287 };
4288 
4289 enum SearchRangeStatus {
4290   SRS_DEAD      = 0,
4291   SRS_LOW_HIGH  = 1,
4292   SRS_ALL_RANGE = 2
4293 };
4294 
4295 typedef struct {
4296   int    state;  /* value of enum SearchRangeStatus */
4297   UChar* low;
4298   UChar* high;
4299   UChar* low_prev;
4300   UChar* sch_range;
4301 } SearchRange;
4302 
4303 #define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \
4304   r = match_at(reg, str, end, (upper_range), s, prev, msas + i); \
4305   if (r != ONIG_MISMATCH) {\
4306     if (r >= 0) {\
4307       goto match;\
4308     }\
4309     else goto finish; /* error */ \
4310   }
4311 
4312 static inline int
regset_search_body_position_lead(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,const UChar * orig_range,OnigOptionType option,MatchArg * msas,int * rmatch_pos)4313 regset_search_body_position_lead(OnigRegSet* set,
4314            const UChar* str, const UChar* end,
4315            const UChar* start, const UChar* range, /* match start range */
4316            const UChar* orig_range, /* data range */
4317            OnigOptionType option, MatchArg* msas, int* rmatch_pos)
4318 {
4319   int r, n, i;
4320   UChar *s, *prev;
4321   UChar *low, *high, *low_prev;
4322   UChar* sch_range;
4323   regex_t* reg;
4324   OnigEncoding enc;
4325   SearchRange* sr;
4326 
4327   n   = set->n;
4328   enc = set->enc;
4329 
4330   s = (UChar* )start;
4331   if (s > str)
4332     prev = onigenc_get_prev_char_head(enc, str, s);
4333   else
4334     prev = (UChar* )NULL;
4335 
4336   sr = (SearchRange* )xmalloc(sizeof(*sr) * n);
4337   CHECK_NULL_RETURN_MEMERR(sr);
4338 
4339   for (i = 0; i < n; i++) {
4340     reg = set->rs[i].reg;
4341 
4342     sr[i].state = SRS_DEAD;
4343     if (reg->optimize != OPTIMIZE_NONE) {
4344       if (reg->dist_max != INFINITE_LEN) {
4345         if (end - range > reg->dist_max)
4346           sch_range = (UChar* )range + reg->dist_max;
4347         else
4348           sch_range = (UChar* )end;
4349 
4350         if (forward_search(reg, str, end, s, sch_range, &low, &high, &low_prev)) {
4351           sr[i].state = SRS_LOW_HIGH;
4352           sr[i].low  = low;
4353           sr[i].high = high;
4354           sr[i].low_prev = low_prev;
4355           sr[i].sch_range = sch_range;
4356         }
4357       }
4358       else {
4359         sch_range = (UChar* )end;
4360         if (forward_search(reg, str, end, s, sch_range,
4361                            &low, &high, (UChar** )NULL)) {
4362           goto total_active;
4363         }
4364       }
4365     }
4366     else {
4367     total_active:
4368       sr[i].state    = SRS_ALL_RANGE;
4369       sr[i].low      = s;
4370       sr[i].high     = (UChar* )range;
4371       sr[i].low_prev = prev;
4372     }
4373   }
4374 
4375 #define ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN   500
4376 
4377   if (set->all_low_high != 0
4378       && range - start > ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN) {
4379     do {
4380       int try_count = 0;
4381       for (i = 0; i < n; i++) {
4382         if (sr[i].state == SRS_DEAD) continue;
4383 
4384         if (s <  sr[i].low) continue;
4385         if (s >= sr[i].high) {
4386           if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
4387                              &low, &high, &low_prev) != 0) {
4388             sr[i].low      = low;
4389             sr[i].high     = high;
4390             sr[i].low_prev = low_prev;
4391             if (s < low) continue;
4392           }
4393           else {
4394             sr[i].state = SRS_DEAD;
4395             continue;
4396           }
4397         }
4398 
4399         reg = set->rs[i].reg;
4400         REGSET_MATCH_AND_RETURN_CHECK(orig_range);
4401         try_count++;
4402       } /* for (i) */
4403 
4404       if (s >= range) break;
4405 
4406       if (try_count == 0) {
4407         low = (UChar* )range;
4408         for (i = 0; i < n; i++) {
4409           if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) {
4410             low = sr[i].low;
4411             low_prev = sr[i].low_prev;
4412           }
4413         }
4414         if (low == range) break;
4415 
4416         s = low;
4417         prev = low_prev;
4418       }
4419       else {
4420         prev = s;
4421         s += enclen(enc, s);
4422       }
4423     } while (1);
4424   }
4425   else {
4426     int prev_is_newline = 1;
4427     do {
4428       for (i = 0; i < n; i++) {
4429         if (sr[i].state == SRS_DEAD) continue;
4430         if (sr[i].state == SRS_LOW_HIGH) {
4431           if (s <  sr[i].low) continue;
4432           if (s >= sr[i].high) {
4433             if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
4434                                &low, &high, &low_prev) != 0) {
4435               sr[i].low      = low;
4436               sr[i].high     = high;
4437               /* sr[i].low_prev = low_prev; */
4438               if (s < low) continue;
4439             }
4440             else {
4441               sr[i].state = SRS_DEAD;
4442               continue;
4443             }
4444           }
4445         }
4446 
4447         reg = set->rs[i].reg;
4448         if ((reg->anchor & ANCR_ANYCHAR_INF) == 0 || prev_is_newline != 0) {
4449           REGSET_MATCH_AND_RETURN_CHECK(orig_range);
4450         }
4451       }
4452 
4453       if (s >= range) break;
4454 
4455       if (set->anychar_inf != 0)
4456         prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end);
4457 
4458       prev = s;
4459       s += enclen(enc, s);
4460     } while (1);
4461   }
4462 
4463   xfree(sr);
4464   return ONIG_MISMATCH;
4465 
4466  finish:
4467   xfree(sr);
4468   return r;
4469 
4470  match:
4471   xfree(sr);
4472   *rmatch_pos = (int )(s - str);
4473   return i;
4474 }
4475 
4476 static inline int
regset_search_body_regex_lead(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * orig_range,OnigRegSetLead lead,OnigOptionType option,OnigMatchParam * mps[],int * rmatch_pos)4477 regset_search_body_regex_lead(OnigRegSet* set,
4478               const UChar* str, const UChar* end,
4479               const UChar* start, const UChar* orig_range, OnigRegSetLead lead,
4480               OnigOptionType option, OnigMatchParam* mps[], int* rmatch_pos)
4481 {
4482   int r;
4483   int i;
4484   int n;
4485   int match_index;
4486   const UChar* ep;
4487   regex_t* reg;
4488   OnigRegion* region;
4489 
4490   n = set->n;
4491 
4492   match_index = ONIG_MISMATCH;
4493   ep = orig_range;
4494   for (i = 0; i < n; i++) {
4495     reg    = set->rs[i].reg;
4496     region = set->rs[i].region;
4497     r = search_in_range(reg, str, end, start, ep, orig_range, region, option, mps[i]);
4498     if (r > 0) {
4499       if (str + r < ep) {
4500         match_index = i;
4501         *rmatch_pos = r;
4502         if (lead == ONIG_REGSET_PRIORITY_TO_REGEX_ORDER)
4503           break;
4504 
4505         ep = str + r;
4506       }
4507     }
4508     else if (r == 0) {
4509       match_index = i;
4510       *rmatch_pos = r;
4511       break;
4512     }
4513   }
4514 
4515   return match_index;
4516 }
4517 
4518 extern int
onig_regset_search_with_param(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegSetLead lead,OnigOptionType option,OnigMatchParam * mps[],int * rmatch_pos)4519 onig_regset_search_with_param(OnigRegSet* set,
4520            const UChar* str, const UChar* end,
4521            const UChar* start, const UChar* range,
4522            OnigRegSetLead lead, OnigOptionType option, OnigMatchParam* mps[],
4523            int* rmatch_pos)
4524 {
4525   int r;
4526   int i;
4527   UChar *s, *prev;
4528   regex_t* reg;
4529   OnigEncoding enc;
4530   OnigRegion* region;
4531   MatchArg* msas;
4532   const UChar *orig_start = start;
4533   const UChar *orig_range = range;
4534 
4535   if (set->n == 0)
4536     return ONIG_MISMATCH;
4537 
4538   if (OPTON_POSIX_REGION(option))
4539     return ONIGERR_INVALID_ARGUMENT;
4540 
4541   r = 0;
4542   enc = set->enc;
4543   msas = (MatchArg* )NULL;
4544 
4545   for (i = 0; i < set->n; i++) {
4546     reg    = set->rs[i].reg;
4547     region = set->rs[i].region;
4548     ADJUST_MATCH_PARAM(reg, mps[i]);
4549     if (IS_NOT_NULL(region)) {
4550       r = onig_region_resize_clear(region, reg->num_mem + 1);
4551       if (r != 0) goto finish_no_msa;
4552     }
4553   }
4554 
4555   if (start > end || start < str) goto mismatch_no_msa;
4556   if (str < end) {
4557     /* forward search only */
4558     if (range <= start)
4559       return ONIGERR_INVALID_ARGUMENT;
4560   }
4561 
4562   if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
4563     if (! ONIGENC_IS_VALID_MBC_STRING(enc, str, end)) {
4564       r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
4565       goto finish_no_msa;
4566     }
4567   }
4568 
4569   if (set->anchor != OPTIMIZE_NONE && str < end) {
4570     UChar *min_semi_end, *max_semi_end;
4571 
4572     if ((set->anchor & ANCR_BEGIN_POSITION) != 0) {
4573       /* search start-position only */
4574     begin_position:
4575       range = start + 1;
4576     }
4577     else if ((set->anchor & ANCR_BEGIN_BUF) != 0) {
4578       /* search str-position only */
4579       if (start != str) goto mismatch_no_msa;
4580       range = str + 1;
4581     }
4582     else if ((set->anchor & ANCR_END_BUF) != 0) {
4583       min_semi_end = max_semi_end = (UChar* )end;
4584 
4585     end_buf:
4586       if ((OnigLen )(max_semi_end - str) < set->anc_dmin)
4587         goto mismatch_no_msa;
4588 
4589       if ((OnigLen )(min_semi_end - start) > set->anc_dmax) {
4590         start = min_semi_end - set->anc_dmax;
4591         if (start < end)
4592           start = onigenc_get_right_adjust_char_head(enc, str, start);
4593       }
4594       if ((OnigLen )(max_semi_end - (range - 1)) < set->anc_dmin) {
4595         range = max_semi_end - set->anc_dmin + 1;
4596       }
4597       if (start > range) goto mismatch_no_msa;
4598     }
4599     else if ((set->anchor & ANCR_SEMI_END_BUF) != 0) {
4600       UChar* pre_end = ONIGENC_STEP_BACK(enc, str, end, 1);
4601 
4602       max_semi_end = (UChar* )end;
4603       if (ONIGENC_IS_MBC_NEWLINE(enc, pre_end, end)) {
4604         min_semi_end = pre_end;
4605 
4606 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4607         pre_end = ONIGENC_STEP_BACK(enc, str, pre_end, 1);
4608         if (IS_NOT_NULL(pre_end) &&
4609             ONIGENC_IS_MBC_CRNL(enc, pre_end, end)) {
4610           min_semi_end = pre_end;
4611         }
4612 #endif
4613         if (min_semi_end > str && start <= min_semi_end) {
4614           goto end_buf;
4615         }
4616       }
4617       else {
4618         min_semi_end = (UChar* )end;
4619         goto end_buf;
4620       }
4621     }
4622     else if ((set->anchor & ANCR_ANYCHAR_INF_ML) != 0) {
4623       goto begin_position;
4624     }
4625   }
4626   else if (str == end) { /* empty string */
4627     start = end = str;
4628     s = (UChar* )start;
4629     prev = (UChar* )NULL;
4630 
4631     msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
4632     CHECK_NULL_RETURN_MEMERR(msas);
4633     for (i = 0; i < set->n; i++) {
4634       reg = set->rs[i].reg;
4635       MATCH_ARG_INIT(msas[i], reg, option, set->rs[i].region, start, mps[i]);
4636     }
4637     for (i = 0; i < set->n; i++) {
4638       reg = set->rs[i].reg;
4639       if (reg->threshold_len == 0) {
4640         REGSET_MATCH_AND_RETURN_CHECK(end);
4641       }
4642     }
4643 
4644     goto mismatch;
4645   }
4646 
4647   if (lead == ONIG_REGSET_POSITION_LEAD) {
4648     msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
4649     CHECK_NULL_RETURN_MEMERR(msas);
4650 
4651     for (i = 0; i < set->n; i++) {
4652       MATCH_ARG_INIT(msas[i], set->rs[i].reg, option, set->rs[i].region,
4653                      orig_start, mps[i]);
4654     }
4655 
4656     r = regset_search_body_position_lead(set, str, end, start, range,
4657                                          orig_range, option, msas, rmatch_pos);
4658   }
4659   else {
4660     r = regset_search_body_regex_lead(set, str, end, start, orig_range,
4661                                       lead, option, mps, rmatch_pos);
4662   }
4663   if (r < 0) goto finish;
4664   else       goto match2;
4665 
4666  mismatch:
4667   r = ONIG_MISMATCH;
4668  finish:
4669   for (i = 0; i < set->n; i++) {
4670     if (IS_NOT_NULL(msas))
4671       MATCH_ARG_FREE(msas[i]);
4672     if (OPTON_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
4673         IS_NOT_NULL(set->rs[i].region)) {
4674       onig_region_clear(set->rs[i].region);
4675     }
4676   }
4677   if (IS_NOT_NULL(msas)) xfree(msas);
4678   return r;
4679 
4680  mismatch_no_msa:
4681   r = ONIG_MISMATCH;
4682  finish_no_msa:
4683   return r;
4684 
4685  match:
4686   *rmatch_pos = (int )(s - str);
4687  match2:
4688   for (i = 0; i < set->n; i++) {
4689     if (IS_NOT_NULL(msas))
4690       MATCH_ARG_FREE(msas[i]);
4691     if (OPTON_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
4692         IS_NOT_NULL(set->rs[i].region)) {
4693       onig_region_clear(set->rs[i].region);
4694     }
4695   }
4696   if (IS_NOT_NULL(msas)) xfree(msas);
4697   return r; /* regex index */
4698 }
4699 
4700 extern int
onig_regset_search(OnigRegSet * set,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegSetLead lead,OnigOptionType option,int * rmatch_pos)4701 onig_regset_search(OnigRegSet* set, const UChar* str, const UChar* end,
4702                    const UChar* start, const UChar* range,
4703                    OnigRegSetLead lead, OnigOptionType option, int* rmatch_pos)
4704 {
4705   int r;
4706   int i;
4707   OnigMatchParam* mp;
4708   OnigMatchParam** mps;
4709 
4710   mps = (OnigMatchParam** )xmalloc((sizeof(OnigMatchParam*) + sizeof(OnigMatchParam)) * set->n);
4711   CHECK_NULL_RETURN_MEMERR(mps);
4712 
4713   mp = (OnigMatchParam* )(mps + set->n);
4714 
4715   for (i = 0; i < set->n; i++) {
4716     onig_initialize_match_param(mp + i);
4717     mps[i] = mp + i;
4718   }
4719 
4720   r = onig_regset_search_with_param(set, str, end, start, range, lead, option, mps,
4721                                     rmatch_pos);
4722   for (i = 0; i < set->n; i++)
4723     onig_free_match_param_content(mp + i);
4724 
4725   xfree(mps);
4726 
4727   return r;
4728 }
4729 
4730 #endif /* USE_REGSET */
4731 
4732 
4733 static UChar*
slow_search(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)4734 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
4735             const UChar* text, const UChar* text_end, UChar* text_range)
4736 {
4737   UChar *t, *p, *s, *end;
4738 
4739   end = (UChar* )text_end;
4740   end -= target_end - target - 1;
4741   if (end > text_range)
4742     end = text_range;
4743 
4744   s = (UChar* )text;
4745 
4746   while (s < end) {
4747     if (*s == *target) {
4748       p = s + 1;
4749       t = target + 1;
4750       while (t < target_end) {
4751         if (*t != *p++)
4752           break;
4753         t++;
4754       }
4755       if (t == target_end)
4756         return s;
4757     }
4758     s += enclen(enc, s);
4759   }
4760 
4761   return (UChar* )NULL;
4762 }
4763 
4764 static UChar*
slow_search_backward(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)4765 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4766                      const UChar* text, const UChar* adjust_text,
4767                      const UChar* text_end, const UChar* text_start)
4768 {
4769   UChar *t, *p, *s;
4770 
4771   s = (UChar* )text_end;
4772   s -= (target_end - target);
4773   if (s > text_start)
4774     s = (UChar* )text_start;
4775   else
4776     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
4777 
4778   while (s >= text) {
4779     if (*s == *target) {
4780       p = s + 1;
4781       t = target + 1;
4782       while (t < target_end) {
4783         if (*t != *p++)
4784           break;
4785         t++;
4786       }
4787       if (t == target_end)
4788         return s;
4789     }
4790     s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
4791   }
4792 
4793   return (UChar* )NULL;
4794 }
4795 
4796 static UChar*
sunday_quick_search_step_forward(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)4797 sunday_quick_search_step_forward(regex_t* reg,
4798                                  const UChar* target, const UChar* target_end,
4799                                  const UChar* text, const UChar* text_end,
4800                                  const UChar* text_range)
4801 {
4802   const UChar *s, *se, *t, *p, *end;
4803   const UChar *tail;
4804   int skip, tlen1;
4805   int map_offset;
4806   OnigEncoding enc;
4807 
4808 #ifdef ONIG_DEBUG_SEARCH
4809   fprintf(DBGFP,
4810   "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n",
4811           text, text_end, text_range);
4812 #endif
4813 
4814   enc = reg->enc;
4815 
4816   tail = target_end - 1;
4817   tlen1 = (int )(tail - target);
4818   end = text_range;
4819   if (end + tlen1 > text_end)
4820     end = text_end - tlen1;
4821 
4822   map_offset = reg->map_offset;
4823   s = text;
4824 
4825   while (s < end) {
4826     p = se = s + tlen1;
4827     t = tail;
4828     while (*p == *t) {
4829       if (t == target) return (UChar* )s;
4830       p--; t--;
4831     }
4832     if (se + map_offset >= text_end) break;
4833     skip = reg->map[*(se + map_offset)];
4834 #if 0
4835     t = s;
4836     do {
4837       s += enclen(enc, s);
4838     } while ((s - t) < skip && s < end);
4839 #else
4840     s += skip;
4841     if (s < end)
4842       s = onigenc_get_right_adjust_char_head(enc, text, s);
4843 #endif
4844   }
4845 
4846   return (UChar* )NULL;
4847 }
4848 
4849 static UChar*
sunday_quick_search(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)4850 sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
4851                     const UChar* text, const UChar* text_end,
4852                     const UChar* text_range)
4853 {
4854   const UChar *s, *t, *p, *end;
4855   const UChar *tail;
4856   int map_offset;
4857 
4858   end = text_range + (target_end - target);
4859   if (end > text_end)
4860     end = text_end;
4861 
4862   map_offset = reg->map_offset;
4863   tail = target_end - 1;
4864   s = text + (tail - target);
4865 
4866   while (s < end) {
4867     p = s;
4868     t = tail;
4869     while (*p == *t) {
4870       if (t == target) return (UChar* )p;
4871       p--; t--;
4872     }
4873     if (s + map_offset >= text_end) break;
4874     s += reg->map[*(s + map_offset)];
4875   }
4876 
4877   return (UChar* )NULL;
4878 }
4879 
4880 static UChar*
map_search(OnigEncoding enc,UChar map[],const UChar * text,const UChar * text_range)4881 map_search(OnigEncoding enc, UChar map[],
4882            const UChar* text, const UChar* text_range)
4883 {
4884   const UChar *s = text;
4885 
4886   while (s < text_range) {
4887     if (map[*s]) return (UChar* )s;
4888 
4889     s += enclen(enc, s);
4890   }
4891   return (UChar* )NULL;
4892 }
4893 
4894 static UChar*
map_search_backward(OnigEncoding enc,UChar map[],const UChar * text,const UChar * adjust_text,const UChar * text_start)4895 map_search_backward(OnigEncoding enc, UChar map[],
4896                     const UChar* text, const UChar* adjust_text,
4897                     const UChar* text_start)
4898 {
4899   const UChar *s = text_start;
4900 
4901   while (s >= text) {
4902     if (map[*s]) return (UChar* )s;
4903 
4904     s = onigenc_get_prev_char_head(enc, adjust_text, s);
4905   }
4906   return (UChar* )NULL;
4907 }
4908 extern int
onig_match(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option)4909 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
4910            OnigRegion* region, OnigOptionType option)
4911 {
4912   int r;
4913   OnigMatchParam mp;
4914 
4915   onig_initialize_match_param(&mp);
4916   r = onig_match_with_param(reg, str, end, at, region, option, &mp);
4917   onig_free_match_param_content(&mp);
4918   return r;
4919 }
4920 
4921 extern int
onig_match_with_param(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)4922 onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
4923                       const UChar* at, OnigRegion* region, OnigOptionType option,
4924                       OnigMatchParam* mp)
4925 {
4926   int r;
4927   UChar *prev;
4928   MatchArg msa;
4929 
4930   ADJUST_MATCH_PARAM(reg, mp);
4931   MATCH_ARG_INIT(msa, reg, option, region, at, mp);
4932   if (region
4933 #ifdef USE_POSIX_API_REGION_OPTION
4934       && !OPTON_POSIX_REGION(option)
4935 #endif
4936       ) {
4937     r = onig_region_resize_clear(region, reg->num_mem + 1);
4938   }
4939   else
4940     r = 0;
4941 
4942   if (r == 0) {
4943     if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
4944       if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
4945         r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
4946         goto end;
4947       }
4948     }
4949 
4950     prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
4951     r = match_at(reg, str, end, end, at, prev, &msa);
4952   }
4953 
4954  end:
4955   MATCH_ARG_FREE(msa);
4956   return r;
4957 }
4958 
4959 static int
forward_search(regex_t * reg,const UChar * str,const UChar * end,UChar * start,UChar * range,UChar ** low,UChar ** high,UChar ** low_prev)4960 forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
4961                UChar* range, UChar** low, UChar** high, UChar** low_prev)
4962 {
4963   UChar *p, *pprev = (UChar* )NULL;
4964 
4965 #ifdef ONIG_DEBUG_SEARCH
4966   fprintf(DBGFP, "forward_search: str: %p, end: %p, start: %p, range: %p\n",
4967           str, end, start, range);
4968 #endif
4969 
4970   p = start;
4971   if (reg->dist_min != 0) {
4972     if (end - p <= reg->dist_min)
4973       return 0; /* fail */
4974 
4975     if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4976       p += reg->dist_min;
4977     }
4978     else {
4979       UChar *q = p + reg->dist_min;
4980       while (p < q) p += enclen(reg->enc, p);
4981     }
4982   }
4983 
4984  retry:
4985   switch (reg->optimize) {
4986   case OPTIMIZE_STR:
4987     p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4988     break;
4989 
4990   case OPTIMIZE_STR_FAST:
4991     p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
4992     break;
4993 
4994   case OPTIMIZE_STR_FAST_STEP_FORWARD:
4995     p = sunday_quick_search_step_forward(reg, reg->exact, reg->exact_end,
4996                                          p, end, range);
4997     break;
4998 
4999   case OPTIMIZE_MAP:
5000     p = map_search(reg->enc, reg->map, p, range);
5001     break;
5002   }
5003 
5004   if (p && p < range) {
5005     if (p - start < reg->dist_min) {
5006     retry_gate:
5007       pprev = p;
5008       p += enclen(reg->enc, p);
5009       goto retry;
5010     }
5011 
5012     if (reg->sub_anchor) {
5013       UChar* prev;
5014 
5015       switch (reg->sub_anchor) {
5016       case ANCR_BEGIN_LINE:
5017         if (!ON_STR_BEGIN(p)) {
5018           prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p);
5019           if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
5020             goto retry_gate;
5021         }
5022         break;
5023 
5024       case ANCR_END_LINE:
5025         if (ON_STR_END(p)) {
5026 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5027           prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
5028                                                      (pprev ? pprev : str), p);
5029           if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
5030             goto retry_gate;
5031 #endif
5032         }
5033         else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
5034 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5035                  && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
5036 #endif
5037                  )
5038           goto retry_gate;
5039 
5040         break;
5041       }
5042     }
5043 
5044     if (reg->dist_max == 0) {
5045       *low = p;
5046       if (low_prev) {
5047         if (*low > start)
5048           *low_prev = onigenc_get_prev_char_head(reg->enc, start, p);
5049         else
5050           *low_prev = onigenc_get_prev_char_head(reg->enc,
5051                                                  (pprev ? pprev : str), p);
5052       }
5053       *high = p;
5054     }
5055     else {
5056       if (reg->dist_max != INFINITE_LEN) {
5057         if (p - str < reg->dist_max) {
5058           *low = (UChar* )str;
5059           if (low_prev)
5060             *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);
5061         }
5062         else {
5063           *low = p - reg->dist_max;
5064           if (*low > start) {
5065             *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, start,
5066                                                  *low, (const UChar** )low_prev);
5067           }
5068           else {
5069             if (low_prev)
5070               *low_prev = onigenc_get_prev_char_head(reg->enc,
5071                                                      (pprev ? pprev : str), *low);
5072           }
5073         }
5074       }
5075       /* no needs to adjust *high, *high is used as range check only */
5076       if (p - str < reg->dist_min)
5077         *high = (UChar* )str;
5078       else
5079         *high = p - reg->dist_min;
5080     }
5081 
5082 #ifdef ONIG_DEBUG_SEARCH
5083     fprintf(DBGFP,
5084             "forward_search success: low: %d, high: %d, dmin: %u, dmax: %u\n",
5085             (int )(*low - str), (int )(*high - str),
5086             reg->dist_min, reg->dist_max);
5087 #endif
5088     return 1; /* success */
5089   }
5090 
5091   return 0; /* fail */
5092 }
5093 
5094 
5095 static int
backward_search(regex_t * reg,const UChar * str,const UChar * end,UChar * s,const UChar * range,UChar * adjrange,UChar ** low,UChar ** high)5096 backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
5097                 const UChar* range, UChar* adjrange, UChar** low, UChar** high)
5098 {
5099   UChar *p;
5100 
5101   p = s;
5102 
5103  retry:
5104   switch (reg->optimize) {
5105   case OPTIMIZE_STR:
5106   exact_method:
5107     p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
5108                              range, adjrange, end, p);
5109     break;
5110 
5111   case OPTIMIZE_STR_FAST:
5112   case OPTIMIZE_STR_FAST_STEP_FORWARD:
5113     goto exact_method;
5114     break;
5115 
5116   case OPTIMIZE_MAP:
5117     p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
5118     break;
5119   }
5120 
5121   if (p) {
5122     if (reg->sub_anchor) {
5123       UChar* prev;
5124 
5125       switch (reg->sub_anchor) {
5126       case ANCR_BEGIN_LINE:
5127         if (!ON_STR_BEGIN(p)) {
5128           prev = onigenc_get_prev_char_head(reg->enc, str, p);
5129           if (IS_NOT_NULL(prev) && !ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
5130             p = prev;
5131             goto retry;
5132           }
5133         }
5134         break;
5135 
5136       case ANCR_END_LINE:
5137         if (ON_STR_END(p)) {
5138 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5139           prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5140           if (IS_NULL(prev)) goto fail;
5141           if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
5142             p = prev;
5143             goto retry;
5144           }
5145 #endif
5146         }
5147         else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
5148 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5149                  && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
5150 #endif
5151                  ) {
5152           p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5153           if (IS_NULL(p)) goto fail;
5154           goto retry;
5155         }
5156         break;
5157       }
5158     }
5159 
5160     if (reg->dist_max != INFINITE_LEN) {
5161       if (p - str < reg->dist_max)
5162         *low = (UChar* )str;
5163       else
5164         *low = p - reg->dist_max;
5165 
5166       if (reg->dist_min != 0) {
5167         if (p - str < reg->dist_min)
5168           *high = (UChar* )str;
5169         else
5170           *high = p - reg->dist_min;
5171       }
5172       else {
5173         *high = p;
5174       }
5175 
5176       *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
5177     }
5178 
5179 #ifdef ONIG_DEBUG_SEARCH
5180     fprintf(DBGFP, "backward_search: low: %d, high: %d\n",
5181             (int )(*low - str), (int )(*high - str));
5182 #endif
5183     return 1; /* success */
5184   }
5185 
5186  fail:
5187 #ifdef ONIG_DEBUG_SEARCH
5188   fprintf(DBGFP, "backward_search: fail.\n");
5189 #endif
5190   return 0; /* fail */
5191 }
5192 
5193 
5194 extern int
onig_search(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option)5195 onig_search(regex_t* reg, const UChar* str, const UChar* end,
5196             const UChar* start, const UChar* range, OnigRegion* region,
5197             OnigOptionType option)
5198 {
5199   int r;
5200   OnigMatchParam mp;
5201   const UChar* data_range;
5202 
5203   onig_initialize_match_param(&mp);
5204 
5205   /* The following is an expanded code of onig_search_with_param()  */
5206   if (range > start)
5207     data_range = range;
5208   else
5209     data_range = end;
5210 
5211   r = search_in_range(reg, str, end, start, range, data_range, region,
5212                       option, &mp);
5213 
5214   onig_free_match_param_content(&mp);
5215   return r;
5216 
5217 }
5218 
5219 static int
search_in_range(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,const UChar * data_range,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5220 search_in_range(regex_t* reg, const UChar* str, const UChar* end,
5221                 const UChar* start, const UChar* range, /* match start range */
5222                 const UChar* data_range, /* subject string range */
5223                 OnigRegion* region,
5224                 OnigOptionType option, OnigMatchParam* mp)
5225 {
5226   int r;
5227   UChar *s, *prev;
5228   MatchArg msa;
5229   const UChar *orig_start = start;
5230 
5231 #ifdef ONIG_DEBUG_SEARCH
5232   fprintf(DBGFP,
5233      "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n",
5234      str, (int )(end - str), (int )(start - str), (int )(range - str));
5235 #endif
5236 
5237   ADJUST_MATCH_PARAM(reg, mp);
5238 
5239   if (region
5240 #ifdef USE_POSIX_API_REGION_OPTION
5241       && ! OPTON_POSIX_REGION(option)
5242 #endif
5243       ) {
5244     r = onig_region_resize_clear(region, reg->num_mem + 1);
5245     if (r != 0) goto finish_no_msa;
5246   }
5247 
5248   if (start > end || start < str) goto mismatch_no_msa;
5249 
5250   if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
5251     if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
5252       r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
5253       goto finish_no_msa;
5254     }
5255   }
5256 
5257 
5258 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5259 #define MATCH_AND_RETURN_CHECK(upper_range) \
5260   r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5261   if (r != ONIG_MISMATCH) {\
5262     if (r >= 0) {\
5263       if (! OPTON_FIND_LONGEST(reg->options)) {\
5264         goto match;\
5265       }\
5266     }\
5267     else goto finish; /* error */ \
5268   }
5269 #else
5270 #define MATCH_AND_RETURN_CHECK(upper_range) \
5271   r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5272   if (r != ONIG_MISMATCH) {\
5273     if (r >= 0) {\
5274       goto match;\
5275     }\
5276     else goto finish; /* error */ \
5277   }
5278 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
5279 
5280 
5281   /* anchor optimize: resume search range */
5282   if (reg->anchor != 0 && str < end) {
5283     UChar *min_semi_end, *max_semi_end;
5284 
5285     if (reg->anchor & ANCR_BEGIN_POSITION) {
5286       /* search start-position only */
5287     begin_position:
5288       if (range > start)
5289         range = start + 1;
5290       else
5291         range = start;
5292     }
5293     else if (reg->anchor & ANCR_BEGIN_BUF) {
5294       /* search str-position only */
5295       if (range > start) {
5296         if (start != str) goto mismatch_no_msa;
5297         range = str + 1;
5298       }
5299       else {
5300         if (range <= str) {
5301           start = str;
5302           range = str;
5303         }
5304         else
5305           goto mismatch_no_msa;
5306       }
5307     }
5308     else if (reg->anchor & ANCR_END_BUF) {
5309       min_semi_end = max_semi_end = (UChar* )end;
5310 
5311     end_buf:
5312       if ((OnigLen )(max_semi_end - str) < reg->anc_dist_min)
5313         goto mismatch_no_msa;
5314 
5315       if (range > start) {
5316         if (reg->anc_dist_max != INFINITE_LEN &&
5317             min_semi_end - start > reg->anc_dist_max) {
5318           start = min_semi_end - reg->anc_dist_max;
5319           if (start < end)
5320             start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
5321         }
5322         if (max_semi_end - (range - 1) < reg->anc_dist_min) {
5323           if (max_semi_end - str + 1 < reg->anc_dist_min)
5324             goto mismatch_no_msa;
5325           else
5326             range = max_semi_end - reg->anc_dist_min + 1;
5327         }
5328 
5329         if (start > range) goto mismatch_no_msa;
5330         /* If start == range, match with empty at end.
5331            Backward search is used. */
5332       }
5333       else {
5334         if (reg->anc_dist_max != INFINITE_LEN &&
5335             min_semi_end - range > reg->anc_dist_max) {
5336           range = min_semi_end - reg->anc_dist_max;
5337         }
5338         if (max_semi_end - start < reg->anc_dist_min) {
5339           if (max_semi_end - str < reg->anc_dist_min)
5340             goto mismatch_no_msa;
5341           else {
5342             start = max_semi_end - reg->anc_dist_min;
5343             start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
5344           }
5345         }
5346         if (range > start) goto mismatch_no_msa;
5347       }
5348     }
5349     else if (reg->anchor & ANCR_SEMI_END_BUF) {
5350       UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
5351 
5352       max_semi_end = (UChar* )end;
5353       if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5354         min_semi_end = pre_end;
5355 
5356 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5357         pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
5358         if (IS_NOT_NULL(pre_end) &&
5359             ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5360           min_semi_end = pre_end;
5361         }
5362 #endif
5363         if (min_semi_end > str && start <= min_semi_end) {
5364           goto end_buf;
5365         }
5366       }
5367       else {
5368         min_semi_end = (UChar* )end;
5369         goto end_buf;
5370       }
5371     }
5372     else if ((reg->anchor & ANCR_ANYCHAR_INF_ML)) {
5373       goto begin_position;
5374     }
5375   }
5376   else if (str == end) { /* empty string */
5377     static const UChar* address_for_empty_string = (UChar* )"";
5378 
5379 #ifdef ONIG_DEBUG_SEARCH
5380     fprintf(DBGFP, "onig_search: empty string.\n");
5381 #endif
5382 
5383     if (reg->threshold_len == 0) {
5384       start = end = str = address_for_empty_string;
5385       s = (UChar* )start;
5386       prev = (UChar* )NULL;
5387 
5388       MATCH_ARG_INIT(msa, reg, option, region, start, mp);
5389       MATCH_AND_RETURN_CHECK(end);
5390       goto mismatch;
5391     }
5392     goto mismatch_no_msa;
5393   }
5394 
5395 #ifdef ONIG_DEBUG_SEARCH
5396   fprintf(DBGFP, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5397           (int )(end - str), (int )(start - str), (int )(range - str));
5398 #endif
5399 
5400   MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp);
5401 
5402   s = (UChar* )start;
5403   if (range > start) {   /* forward search */
5404     if (s > str)
5405       prev = onigenc_get_prev_char_head(reg->enc, str, s);
5406     else
5407       prev = (UChar* )NULL;
5408 
5409     if (reg->optimize != OPTIMIZE_NONE) {
5410       UChar *sch_range, *low, *high, *low_prev;
5411 
5412       if (reg->dist_max != 0) {
5413         if (reg->dist_max == INFINITE_LEN)
5414           sch_range = (UChar* )end;
5415         else {
5416           if ((end - range) < reg->dist_max)
5417             sch_range = (UChar* )end;
5418           else {
5419             sch_range = (UChar* )range + reg->dist_max;
5420           }
5421         }
5422       }
5423       else
5424         sch_range = (UChar* )range;
5425 
5426       if ((end - start) < reg->threshold_len)
5427         goto mismatch;
5428 
5429       if (reg->dist_max != INFINITE_LEN) {
5430         do {
5431           if (! forward_search(reg, str, end, s, sch_range, &low, &high,
5432                                &low_prev)) goto mismatch;
5433           if (s < low) {
5434             s    = low;
5435             prev = low_prev;
5436           }
5437           while (s <= high) {
5438             MATCH_AND_RETURN_CHECK(data_range);
5439             prev = s;
5440             s += enclen(reg->enc, s);
5441           }
5442         } while (s < range);
5443         goto mismatch;
5444       }
5445       else { /* check only. */
5446         if (! forward_search(reg, str, end, s, sch_range, &low, &high,
5447                              (UChar** )NULL)) goto mismatch;
5448 
5449         if ((reg->anchor & ANCR_ANYCHAR_INF) != 0 &&
5450             (reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) {
5451           do {
5452             MATCH_AND_RETURN_CHECK(data_range);
5453             prev = s;
5454             s += enclen(reg->enc, s);
5455 
5456             while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
5457               prev = s;
5458               s += enclen(reg->enc, s);
5459             }
5460           } while (s < range);
5461           goto mismatch;
5462         }
5463       }
5464     }
5465 
5466     do {
5467       MATCH_AND_RETURN_CHECK(data_range);
5468       prev = s;
5469       s += enclen(reg->enc, s);
5470     } while (s < range);
5471 
5472     if (s == range) { /* because empty match with /$/. */
5473       MATCH_AND_RETURN_CHECK(data_range);
5474     }
5475   }
5476   else {  /* backward search */
5477     if (range < str) goto mismatch;
5478 
5479     if (orig_start < end)
5480       orig_start += enclen(reg->enc, orig_start); /* is upper range */
5481 
5482     if (reg->optimize != OPTIMIZE_NONE) {
5483       UChar *low, *high, *adjrange, *sch_start;
5484       const UChar *min_range;
5485 
5486       if ((end - range) < reg->threshold_len) goto mismatch;
5487 
5488       if (range < end)
5489         adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
5490       else
5491         adjrange = (UChar* )end;
5492 
5493       if (end - range > reg->dist_min)
5494         min_range = range + reg->dist_min;
5495       else
5496         min_range = end;
5497 
5498       if (reg->dist_max != INFINITE_LEN) {
5499         do {
5500           if (end - s > reg->dist_max)
5501             sch_start = s + reg->dist_max;
5502           else {
5503             sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
5504           }
5505 
5506           if (backward_search(reg, str, end, sch_start, min_range, adjrange,
5507                               &low, &high) <= 0)
5508             goto mismatch;
5509 
5510           if (s > high)
5511             s = high;
5512 
5513           while (s >= low) {
5514             prev = onigenc_get_prev_char_head(reg->enc, str, s);
5515             MATCH_AND_RETURN_CHECK(orig_start);
5516             s = prev;
5517           }
5518         } while (s >= range);
5519         goto mismatch;
5520       }
5521       else { /* check only. */
5522         sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
5523 
5524         if (backward_search(reg, str, end, sch_start, min_range, adjrange,
5525                             &low, &high) <= 0) goto mismatch;
5526       }
5527     }
5528 
5529     do {
5530       prev = onigenc_get_prev_char_head(reg->enc, str, s);
5531       MATCH_AND_RETURN_CHECK(orig_start);
5532       s = prev;
5533     } while (s >= range);
5534   }
5535 
5536  mismatch:
5537 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5538   if (OPTON_FIND_LONGEST(reg->options)) {
5539     if (msa.best_len >= 0) {
5540       s = msa.best_s;
5541       goto match;
5542     }
5543   }
5544 #endif
5545   r = ONIG_MISMATCH;
5546 
5547  finish:
5548   MATCH_ARG_FREE(msa);
5549 
5550   /* If result is mismatch and no FIND_NOT_EMPTY option,
5551      then the region is not set in match_at(). */
5552   if (OPTON_FIND_NOT_EMPTY(reg->options) && region
5553 #ifdef USE_POSIX_API_REGION_OPTION
5554       && !OPTON_POSIX_REGION(option)
5555 #endif
5556       ) {
5557     onig_region_clear(region);
5558   }
5559 
5560 #ifdef ONIG_DEBUG
5561   if (r != ONIG_MISMATCH)
5562     fprintf(DBGFP, "onig_search: error %d\n", r);
5563 #endif
5564   return r;
5565 
5566  mismatch_no_msa:
5567   r = ONIG_MISMATCH;
5568  finish_no_msa:
5569 #ifdef ONIG_DEBUG
5570   if (r != ONIG_MISMATCH)
5571     fprintf(DBGFP, "onig_search: error %d\n", r);
5572 #endif
5573   return r;
5574 
5575  match:
5576   MATCH_ARG_FREE(msa);
5577   return (int )(s - str);
5578 }
5579 
5580 extern int
onig_search_with_param(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option,OnigMatchParam * mp)5581 onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
5582                        const UChar* start, const UChar* range, OnigRegion* region,
5583                        OnigOptionType option, OnigMatchParam* mp)
5584 {
5585   const UChar* data_range;
5586 
5587   if (range > start)
5588     data_range = range;
5589   else
5590     data_range = end;
5591 
5592   return search_in_range(reg, str, end, start, range, data_range, region,
5593                          option, mp);
5594 }
5595 
5596 extern int
onig_scan(regex_t * reg,const UChar * str,const UChar * end,OnigRegion * region,OnigOptionType option,int (* scan_callback)(int,int,OnigRegion *,void *),void * callback_arg)5597 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5598           OnigRegion* region, OnigOptionType option,
5599           int (*scan_callback)(int, int, OnigRegion*, void*),
5600           void* callback_arg)
5601 {
5602   int r;
5603   int n;
5604   int rs;
5605   const UChar* start;
5606 
5607   if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
5608     if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
5609       return ONIGERR_INVALID_WIDE_CHAR_VALUE;
5610 
5611     ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING);
5612   }
5613 
5614   n = 0;
5615   start = str;
5616   while (1) {
5617     r = onig_search(reg, str, end, start, end, region, option);
5618     if (r >= 0) {
5619       rs = scan_callback(n, r, region, callback_arg);
5620       n++;
5621       if (rs != 0)
5622         return rs;
5623 
5624       if (region->end[0] == start - str) {
5625         if (start >= end) break;
5626         start += enclen(reg->enc, start);
5627       }
5628       else
5629         start = str + region->end[0];
5630 
5631       if (start > end)
5632         break;
5633     }
5634     else if (r == ONIG_MISMATCH) {
5635       break;
5636     }
5637     else { /* error */
5638       return r;
5639     }
5640   }
5641 
5642   return n;
5643 }
5644 
5645 extern OnigEncoding
onig_get_encoding(regex_t * reg)5646 onig_get_encoding(regex_t* reg)
5647 {
5648   return reg->enc;
5649 }
5650 
5651 extern OnigOptionType
onig_get_options(regex_t * reg)5652 onig_get_options(regex_t* reg)
5653 {
5654   return reg->options;
5655 }
5656 
5657 extern  OnigCaseFoldType
onig_get_case_fold_flag(regex_t * reg)5658 onig_get_case_fold_flag(regex_t* reg)
5659 {
5660   return reg->case_fold_flag;
5661 }
5662 
5663 extern OnigSyntaxType*
onig_get_syntax(regex_t * reg)5664 onig_get_syntax(regex_t* reg)
5665 {
5666   return reg->syntax;
5667 }
5668 
5669 extern int
onig_number_of_captures(regex_t * reg)5670 onig_number_of_captures(regex_t* reg)
5671 {
5672   return reg->num_mem;
5673 }
5674 
5675 extern int
onig_number_of_capture_histories(regex_t * reg)5676 onig_number_of_capture_histories(regex_t* reg)
5677 {
5678 #ifdef USE_CAPTURE_HISTORY
5679   int i, n;
5680 
5681   n = 0;
5682   for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5683     if (MEM_STATUS_AT(reg->capture_history, i) != 0)
5684       n++;
5685   }
5686   return n;
5687 #else
5688   return 0;
5689 #endif
5690 }
5691 
5692 extern void
onig_copy_encoding(OnigEncoding to,OnigEncoding from)5693 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
5694 {
5695   *to = *from;
5696 }
5697 
5698 #ifdef USE_REGSET
5699 
5700 extern int
onig_regset_new(OnigRegSet ** rset,int n,regex_t * regs[])5701 onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[])
5702 {
5703 #define REGSET_INITIAL_ALLOC_SIZE   10
5704 
5705   int i;
5706   int r;
5707   int alloc;
5708   OnigRegSet* set;
5709   RR* rs;
5710 
5711   *rset = 0;
5712 
5713   set = (OnigRegSet* )xmalloc(sizeof(*set));
5714   CHECK_NULL_RETURN_MEMERR(set);
5715 
5716   alloc = n > REGSET_INITIAL_ALLOC_SIZE ? n : REGSET_INITIAL_ALLOC_SIZE;
5717   rs = (RR* )xmalloc(sizeof(set->rs[0]) * alloc);
5718   if (IS_NULL(rs)) {
5719     xfree(set);
5720     return ONIGERR_MEMORY;
5721   }
5722 
5723   set->rs    = rs;
5724   set->n     = 0;
5725   set->alloc = alloc;
5726 
5727   for (i = 0; i < n; i++) {
5728     regex_t* reg = regs[i];
5729 
5730     r = onig_regset_add(set, reg);
5731     if (r != 0) {
5732       for (i = 0; i < set->n; i++) {
5733         OnigRegion* region = set->rs[i].region;
5734         if (IS_NOT_NULL(region))
5735           onig_region_free(region, 1);
5736       }
5737       xfree(set->rs);
5738       xfree(set);
5739       return r;
5740     }
5741   }
5742 
5743   *rset = set;
5744   return 0;
5745 }
5746 
5747 static void
update_regset_by_reg(OnigRegSet * set,regex_t * reg)5748 update_regset_by_reg(OnigRegSet* set, regex_t* reg)
5749 {
5750   if (set->n == 1) {
5751     set->enc          = reg->enc;
5752     set->anchor       = reg->anchor;
5753     set->anc_dmin     = reg->anc_dist_min;
5754     set->anc_dmax     = reg->anc_dist_max;
5755     set->all_low_high =
5756       (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN) ? 0 : 1;
5757     set->anychar_inf  = (reg->anchor & ANCR_ANYCHAR_INF) != 0 ? 1 : 0;
5758   }
5759   else {
5760     int anchor;
5761 
5762     anchor = set->anchor & reg->anchor;
5763     if (anchor != 0) {
5764       OnigLen anc_dmin;
5765       OnigLen anc_dmax;
5766 
5767       anc_dmin = set->anc_dmin;
5768       anc_dmax = set->anc_dmax;
5769       if (anc_dmin > reg->anc_dist_min) anc_dmin = reg->anc_dist_min;
5770       if (anc_dmax < reg->anc_dist_max) anc_dmax = reg->anc_dist_max;
5771       set->anc_dmin = anc_dmin;
5772       set->anc_dmax = anc_dmax;
5773     }
5774 
5775     set->anchor = anchor;
5776 
5777     if (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN)
5778       set->all_low_high = 0;
5779 
5780     if ((reg->anchor & ANCR_ANYCHAR_INF) != 0)
5781       set->anychar_inf = 1;
5782   }
5783 }
5784 
5785 extern int
onig_regset_add(OnigRegSet * set,regex_t * reg)5786 onig_regset_add(OnigRegSet* set, regex_t* reg)
5787 {
5788   OnigRegion* region;
5789 
5790   if (OPTON_FIND_LONGEST(reg->options))
5791     return ONIGERR_INVALID_ARGUMENT;
5792 
5793   if (set->n != 0 && reg->enc != set->enc)
5794     return ONIGERR_INVALID_ARGUMENT;
5795 
5796   if (set->n >= set->alloc) {
5797     RR* nrs;
5798     int new_alloc;
5799 
5800     new_alloc = set->alloc * 2;
5801     nrs = (RR* )xrealloc(set->rs, sizeof(set->rs[0]) * new_alloc);
5802     CHECK_NULL_RETURN_MEMERR(nrs);
5803 
5804     set->rs    = nrs;
5805     set->alloc = new_alloc;
5806   }
5807 
5808   region = onig_region_new();
5809   CHECK_NULL_RETURN_MEMERR(region);
5810 
5811   set->rs[set->n].reg    = reg;
5812   set->rs[set->n].region = region;
5813   set->n++;
5814 
5815   update_regset_by_reg(set, reg);
5816   return 0;
5817 }
5818 
5819 extern int
onig_regset_replace(OnigRegSet * set,int at,regex_t * reg)5820 onig_regset_replace(OnigRegSet* set, int at, regex_t* reg)
5821 {
5822   int i;
5823 
5824   if (at < 0 || at >= set->n)
5825     return ONIGERR_INVALID_ARGUMENT;
5826 
5827   if (IS_NULL(reg)) {
5828     onig_region_free(set->rs[at].region, 1);
5829     for (i = at; i < set->n - 1; i++) {
5830       set->rs[i].reg    = set->rs[i+1].reg;
5831       set->rs[i].region = set->rs[i+1].region;
5832     }
5833     set->n--;
5834   }
5835   else {
5836     if (OPTON_FIND_LONGEST(reg->options))
5837       return ONIGERR_INVALID_ARGUMENT;
5838 
5839     if (set->n > 1 && reg->enc != set->enc)
5840       return ONIGERR_INVALID_ARGUMENT;
5841 
5842     set->rs[at].reg = reg;
5843   }
5844 
5845   for (i = 0; i < set->n; i++)
5846     update_regset_by_reg(set, set->rs[i].reg);
5847 
5848   return 0;
5849 }
5850 
5851 extern void
onig_regset_free(OnigRegSet * set)5852 onig_regset_free(OnigRegSet* set)
5853 {
5854   int i;
5855 
5856   for (i = 0; i < set->n; i++) {
5857     regex_t* reg;
5858     OnigRegion* region;
5859 
5860     reg    = set->rs[i].reg;
5861     region = set->rs[i].region;
5862     onig_free(reg);
5863     if (IS_NOT_NULL(region))
5864       onig_region_free(region, 1);
5865   }
5866 
5867   xfree(set->rs);
5868   xfree(set);
5869 }
5870 
5871 extern int
onig_regset_number_of_regex(OnigRegSet * set)5872 onig_regset_number_of_regex(OnigRegSet* set)
5873 {
5874   return set->n;
5875 }
5876 
5877 extern regex_t*
onig_regset_get_regex(OnigRegSet * set,int at)5878 onig_regset_get_regex(OnigRegSet* set, int at)
5879 {
5880   if (at < 0 || at >= set->n)
5881     return (regex_t* )0;
5882 
5883   return set->rs[at].reg;
5884 }
5885 
5886 extern OnigRegion*
onig_regset_get_region(OnigRegSet * set,int at)5887 onig_regset_get_region(OnigRegSet* set, int at)
5888 {
5889   if (at < 0 || at >= set->n)
5890     return (OnigRegion* )0;
5891 
5892   return set->rs[at].region;
5893 }
5894 
5895 #endif /* USE_REGSET */
5896 
5897 
5898 #ifdef USE_DIRECT_THREADED_CODE
5899 extern int
onig_init_for_match_at(regex_t * reg)5900 onig_init_for_match_at(regex_t* reg)
5901 {
5902   return match_at(reg, (const UChar* )NULL, (const UChar* )NULL,
5903                   (const UChar* )NULL, (const UChar* )NULL, (UChar* )NULL,
5904                   (MatchArg* )NULL);
5905 }
5906 #endif
5907 
5908 
5909 /* for callout functions */
5910 
5911 #ifdef USE_CALLOUT
5912 
5913 extern OnigCalloutFunc
onig_get_progress_callout(void)5914 onig_get_progress_callout(void)
5915 {
5916   return DefaultProgressCallout;
5917 }
5918 
5919 extern int
onig_set_progress_callout(OnigCalloutFunc f)5920 onig_set_progress_callout(OnigCalloutFunc f)
5921 {
5922   DefaultProgressCallout = f;
5923   return ONIG_NORMAL;
5924 }
5925 
5926 extern OnigCalloutFunc
onig_get_retraction_callout(void)5927 onig_get_retraction_callout(void)
5928 {
5929   return DefaultRetractionCallout;
5930 }
5931 
5932 extern int
onig_set_retraction_callout(OnigCalloutFunc f)5933 onig_set_retraction_callout(OnigCalloutFunc f)
5934 {
5935   DefaultRetractionCallout = f;
5936   return ONIG_NORMAL;
5937 }
5938 
5939 extern int
onig_get_callout_num_by_callout_args(OnigCalloutArgs * args)5940 onig_get_callout_num_by_callout_args(OnigCalloutArgs* args)
5941 {
5942   return args->num;
5943 }
5944 
5945 extern OnigCalloutIn
onig_get_callout_in_by_callout_args(OnigCalloutArgs * args)5946 onig_get_callout_in_by_callout_args(OnigCalloutArgs* args)
5947 {
5948   return args->in;
5949 }
5950 
5951 extern int
onig_get_name_id_by_callout_args(OnigCalloutArgs * args)5952 onig_get_name_id_by_callout_args(OnigCalloutArgs* args)
5953 {
5954   return args->name_id;
5955 }
5956 
5957 extern const UChar*
onig_get_contents_by_callout_args(OnigCalloutArgs * args)5958 onig_get_contents_by_callout_args(OnigCalloutArgs* args)
5959 {
5960   int num;
5961   CalloutListEntry* e;
5962 
5963   num = args->num;
5964   e = onig_reg_callout_list_at(args->regex, num);
5965   if (IS_NULL(e)) return 0;
5966   if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
5967     return e->u.content.start;
5968   }
5969 
5970   return 0;
5971 }
5972 
5973 extern const UChar*
onig_get_contents_end_by_callout_args(OnigCalloutArgs * args)5974 onig_get_contents_end_by_callout_args(OnigCalloutArgs* args)
5975 {
5976   int num;
5977   CalloutListEntry* e;
5978 
5979   num = args->num;
5980   e = onig_reg_callout_list_at(args->regex, num);
5981   if (IS_NULL(e)) return 0;
5982   if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
5983     return e->u.content.end;
5984   }
5985 
5986   return 0;
5987 }
5988 
5989 extern int
onig_get_args_num_by_callout_args(OnigCalloutArgs * args)5990 onig_get_args_num_by_callout_args(OnigCalloutArgs* args)
5991 {
5992   int num;
5993   CalloutListEntry* e;
5994 
5995   num = args->num;
5996   e = onig_reg_callout_list_at(args->regex, num);
5997   if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
5998   if (e->of == ONIG_CALLOUT_OF_NAME) {
5999     return e->u.arg.num;
6000   }
6001 
6002   return ONIGERR_INVALID_ARGUMENT;
6003 }
6004 
6005 extern int
onig_get_passed_args_num_by_callout_args(OnigCalloutArgs * args)6006 onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args)
6007 {
6008   int num;
6009   CalloutListEntry* e;
6010 
6011   num = args->num;
6012   e = onig_reg_callout_list_at(args->regex, num);
6013   if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
6014   if (e->of == ONIG_CALLOUT_OF_NAME) {
6015     return e->u.arg.passed_num;
6016   }
6017 
6018   return ONIGERR_INVALID_ARGUMENT;
6019 }
6020 
6021 extern int
onig_get_arg_by_callout_args(OnigCalloutArgs * args,int index,OnigType * type,OnigValue * val)6022 onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index,
6023                              OnigType* type, OnigValue* val)
6024 {
6025   int num;
6026   CalloutListEntry* e;
6027 
6028   num = args->num;
6029   e = onig_reg_callout_list_at(args->regex, num);
6030   if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
6031   if (e->of == ONIG_CALLOUT_OF_NAME) {
6032     if (IS_NOT_NULL(type)) *type = e->u.arg.types[index];
6033     if (IS_NOT_NULL(val))  *val  = e->u.arg.vals[index];
6034     return ONIG_NORMAL;
6035   }
6036 
6037   return ONIGERR_INVALID_ARGUMENT;
6038 }
6039 
6040 extern const UChar*
onig_get_string_by_callout_args(OnigCalloutArgs * args)6041 onig_get_string_by_callout_args(OnigCalloutArgs* args)
6042 {
6043   return args->string;
6044 }
6045 
6046 extern const UChar*
onig_get_string_end_by_callout_args(OnigCalloutArgs * args)6047 onig_get_string_end_by_callout_args(OnigCalloutArgs* args)
6048 {
6049   return args->string_end;
6050 }
6051 
6052 extern const UChar*
onig_get_start_by_callout_args(OnigCalloutArgs * args)6053 onig_get_start_by_callout_args(OnigCalloutArgs* args)
6054 {
6055   return args->start;
6056 }
6057 
6058 extern const UChar*
onig_get_right_range_by_callout_args(OnigCalloutArgs * args)6059 onig_get_right_range_by_callout_args(OnigCalloutArgs* args)
6060 {
6061   return args->right_range;
6062 }
6063 
6064 extern const UChar*
onig_get_current_by_callout_args(OnigCalloutArgs * args)6065 onig_get_current_by_callout_args(OnigCalloutArgs* args)
6066 {
6067   return args->current;
6068 }
6069 
6070 extern OnigRegex
onig_get_regex_by_callout_args(OnigCalloutArgs * args)6071 onig_get_regex_by_callout_args(OnigCalloutArgs* args)
6072 {
6073   return args->regex;
6074 }
6075 
6076 extern unsigned long
onig_get_retry_counter_by_callout_args(OnigCalloutArgs * args)6077 onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args)
6078 {
6079   return args->retry_in_match_counter;
6080 }
6081 
6082 
6083 extern int
onig_get_capture_range_in_callout(OnigCalloutArgs * a,int mem_num,int * begin,int * end)6084 onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end)
6085 {
6086   OnigRegex    reg;
6087   const UChar* str;
6088   StackType*   stk_base;
6089   int i;
6090   StackIndex* mem_start_stk;
6091   StackIndex* mem_end_stk;
6092 
6093   i = mem_num;
6094   reg = a->regex;
6095   str = a->string;
6096   stk_base = a->stk_base;
6097   mem_start_stk = a->mem_start_stk;
6098   mem_end_stk   = a->mem_end_stk;
6099 
6100   if (i > 0) {
6101     if (a->mem_end_stk[i] != INVALID_STACK_INDEX) {
6102       *begin = (int )(STACK_MEM_START(reg, i) - str);
6103       *end   = (int )(STACK_MEM_END(reg, i)   - str);
6104     }
6105     else {
6106       *begin = *end = ONIG_REGION_NOTPOS;
6107     }
6108   }
6109   else
6110     return ONIGERR_INVALID_ARGUMENT;
6111 
6112   return ONIG_NORMAL;
6113 }
6114 
6115 extern int
onig_get_used_stack_size_in_callout(OnigCalloutArgs * a,int * used_num,int * used_bytes)6116 onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes)
6117 {
6118   int n;
6119 
6120   n = (int )(a->stk - a->stk_base);
6121 
6122   if (used_num != 0)
6123     *used_num = n;
6124 
6125   if (used_bytes != 0)
6126     *used_bytes = n * sizeof(StackType);
6127 
6128   return ONIG_NORMAL;
6129 }
6130 
6131 
6132 /* builtin callout functions */
6133 
6134 extern int
onig_builtin_fail(OnigCalloutArgs * args ARG_UNUSED,void * user_data ARG_UNUSED)6135 onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
6136 {
6137   return ONIG_CALLOUT_FAIL;
6138 }
6139 
6140 extern int
onig_builtin_mismatch(OnigCalloutArgs * args ARG_UNUSED,void * user_data ARG_UNUSED)6141 onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
6142 {
6143   return ONIG_MISMATCH;
6144 }
6145 
6146 extern int
onig_builtin_error(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6147 onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6148 {
6149   int r;
6150   int n;
6151   OnigValue val;
6152 
6153   r = onig_get_arg_by_callout_args(args, 0, 0, &val);
6154   if (r != ONIG_NORMAL) return r;
6155 
6156   n = (int )val.l;
6157   if (n >= 0) {
6158     n = ONIGERR_INVALID_CALLOUT_BODY;
6159   }
6160   else if (onig_is_error_code_needs_param(n)) {
6161     n = ONIGERR_INVALID_CALLOUT_BODY;
6162   }
6163 
6164   return n;
6165 }
6166 
6167 extern int
onig_builtin_count(OnigCalloutArgs * args,void * user_data)6168 onig_builtin_count(OnigCalloutArgs* args, void* user_data)
6169 {
6170   (void )onig_check_callout_data_and_clear_old_values(args);
6171 
6172   return onig_builtin_total_count(args, user_data);
6173 }
6174 
6175 extern int
onig_builtin_total_count(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6176 onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6177 {
6178   int r;
6179   int slot;
6180   OnigType  type;
6181   OnigValue val;
6182   OnigValue aval;
6183   OnigCodePoint count_type;
6184 
6185   r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
6186   if (r != ONIG_NORMAL) return r;
6187 
6188   count_type = aval.c;
6189   if (count_type != '>' && count_type != 'X' && count_type != '<')
6190     return ONIGERR_INVALID_CALLOUT_ARG;
6191 
6192   r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0,
6193                                                                 &type, &val);
6194   if (r < ONIG_NORMAL)
6195     return r;
6196   else if (r > ONIG_NORMAL) {
6197     /* type == void: initial state */
6198     val.l = 0;
6199   }
6200 
6201   if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
6202     slot = 2;
6203     if (count_type == '<')
6204       val.l++;
6205     else if (count_type == 'X')
6206       val.l--;
6207   }
6208   else {
6209     slot = 1;
6210     if (count_type != '<')
6211       val.l++;
6212   }
6213 
6214   r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val);
6215   if (r != ONIG_NORMAL) return r;
6216 
6217   /* slot 1: in progress counter, slot 2: in retraction counter */
6218   r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot,
6219                                                                 &type, &val);
6220   if (r < ONIG_NORMAL)
6221     return r;
6222   else if (r > ONIG_NORMAL) {
6223     val.l = 0;
6224   }
6225 
6226   val.l++;
6227   r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6228   if (r != ONIG_NORMAL) return r;
6229 
6230   return ONIG_CALLOUT_SUCCESS;
6231 }
6232 
6233 extern int
onig_builtin_max(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6234 onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6235 {
6236   int r;
6237   int slot;
6238   long max_val;
6239   OnigCodePoint count_type;
6240   OnigType  type;
6241   OnigValue val;
6242   OnigValue aval;
6243 
6244   (void )onig_check_callout_data_and_clear_old_values(args);
6245 
6246   slot = 0;
6247   r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
6248   if (r < ONIG_NORMAL)
6249     return r;
6250   else if (r > ONIG_NORMAL) {
6251     /* type == void: initial state */
6252     type  = ONIG_TYPE_LONG;
6253     val.l = 0;
6254   }
6255 
6256   r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
6257   if (r != ONIG_NORMAL) return r;
6258   if (type == ONIG_TYPE_TAG) {
6259     r = onig_get_callout_data_by_callout_args(args, aval.tag, 0, &type, &aval);
6260     if (r < ONIG_NORMAL) return r;
6261     else if (r > ONIG_NORMAL)
6262       max_val = 0L;
6263     else
6264       max_val = aval.l;
6265   }
6266   else { /* LONG */
6267     max_val = aval.l;
6268   }
6269 
6270   r = onig_get_arg_by_callout_args(args, 1, &type, &aval);
6271   if (r != ONIG_NORMAL) return r;
6272 
6273   count_type = aval.c;
6274   if (count_type != '>' && count_type != 'X' && count_type != '<')
6275     return ONIGERR_INVALID_CALLOUT_ARG;
6276 
6277   if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
6278     if (count_type == '<') {
6279       if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
6280       val.l++;
6281     }
6282     else if (count_type == 'X')
6283       val.l--;
6284   }
6285   else {
6286     if (count_type != '<') {
6287       if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
6288       val.l++;
6289     }
6290   }
6291 
6292   r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6293   if (r != ONIG_NORMAL) return r;
6294 
6295   return ONIG_CALLOUT_SUCCESS;
6296 }
6297 
6298 enum OP_CMP {
6299   OP_EQ,
6300   OP_NE,
6301   OP_LT,
6302   OP_GT,
6303   OP_LE,
6304   OP_GE
6305 };
6306 
6307 extern int
onig_builtin_cmp(OnigCalloutArgs * args,void * user_data ARG_UNUSED)6308 onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
6309 {
6310   int r;
6311   int slot;
6312   long lv;
6313   long rv;
6314   OnigType  type;
6315   OnigValue val;
6316   regex_t* reg;
6317   enum OP_CMP op;
6318 
6319   reg = args->regex;
6320 
6321   r = onig_get_arg_by_callout_args(args, 0, &type, &val);
6322   if (r != ONIG_NORMAL) return r;
6323 
6324   if (type == ONIG_TYPE_TAG) {
6325     r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
6326     if (r < ONIG_NORMAL) return r;
6327     else if (r > ONIG_NORMAL)
6328       lv = 0L;
6329     else
6330       lv = val.l;
6331   }
6332   else { /* ONIG_TYPE_LONG */
6333     lv = val.l;
6334   }
6335 
6336   r = onig_get_arg_by_callout_args(args, 2, &type, &val);
6337   if (r != ONIG_NORMAL) return r;
6338 
6339   if (type == ONIG_TYPE_TAG) {
6340     r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
6341     if (r < ONIG_NORMAL) return r;
6342     else if (r > ONIG_NORMAL)
6343       rv = 0L;
6344     else
6345       rv = val.l;
6346   }
6347   else { /* ONIG_TYPE_LONG */
6348     rv = val.l;
6349   }
6350 
6351   slot = 0;
6352   r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
6353   if (r < ONIG_NORMAL)
6354     return r;
6355   else if (r > ONIG_NORMAL) {
6356     /* type == void: initial state */
6357     OnigCodePoint c1, c2;
6358     UChar* p;
6359 
6360     r = onig_get_arg_by_callout_args(args, 1, &type, &val);
6361     if (r != ONIG_NORMAL) return r;
6362 
6363     p = val.s.start;
6364     c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
6365     p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
6366     if (p < val.s.end) {
6367       c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
6368       p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
6369       if (p != val.s.end)  return ONIGERR_INVALID_CALLOUT_ARG;
6370     }
6371     else
6372       c2 = 0;
6373 
6374     switch (c1) {
6375     case '=':
6376       if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
6377       op = OP_EQ;
6378       break;
6379     case '!':
6380       if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
6381       op = OP_NE;
6382       break;
6383     case '<':
6384       if (c2 == '=') op = OP_LE;
6385       else if (c2 == 0) op = OP_LT;
6386       else  return ONIGERR_INVALID_CALLOUT_ARG;
6387       break;
6388     case '>':
6389       if (c2 == '=') op = OP_GE;
6390       else if (c2 == 0) op = OP_GT;
6391       else  return ONIGERR_INVALID_CALLOUT_ARG;
6392       break;
6393     default:
6394       return ONIGERR_INVALID_CALLOUT_ARG;
6395       break;
6396     }
6397     val.l = (long )op;
6398     r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
6399     if (r != ONIG_NORMAL) return r;
6400   }
6401   else {
6402     op = (enum OP_CMP )val.l;
6403   }
6404 
6405   switch (op) {
6406   case OP_EQ: r = (lv == rv); break;
6407   case OP_NE: r = (lv != rv); break;
6408   case OP_LT: r = (lv <  rv); break;
6409   case OP_GT: r = (lv >  rv); break;
6410   case OP_LE: r = (lv <= rv); break;
6411   case OP_GE: r = (lv >= rv); break;
6412   }
6413 
6414   return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS;
6415 }
6416 
6417 
6418 #ifndef ONIG_NO_PRINT
6419 
6420 static FILE* OutFp;
6421 
6422 /* name start with "onig_" for macros. */
6423 static int
onig_builtin_monitor(OnigCalloutArgs * args,void * user_data)6424 onig_builtin_monitor(OnigCalloutArgs* args, void* user_data)
6425 {
6426   int r;
6427   int num;
6428   size_t tag_len;
6429   const UChar* start;
6430   const UChar* right;
6431   const UChar* current;
6432   const UChar* string;
6433   const UChar* strend;
6434   const UChar* tag_start;
6435   const UChar* tag_end;
6436   regex_t* reg;
6437   OnigCalloutIn in;
6438   OnigType type;
6439   OnigValue val;
6440   char buf[20];
6441   FILE* fp;
6442 
6443   fp = OutFp;
6444 
6445   r = onig_get_arg_by_callout_args(args, 0, &type, &val);
6446   if (r != ONIG_NORMAL) return r;
6447 
6448   in = onig_get_callout_in_by_callout_args(args);
6449   if (in == ONIG_CALLOUT_IN_PROGRESS) {
6450     if (val.c == '<')
6451       return ONIG_CALLOUT_SUCCESS;
6452   }
6453   else {
6454     if (val.c != 'X' && val.c != '<')
6455       return ONIG_CALLOUT_SUCCESS;
6456   }
6457 
6458   num       = onig_get_callout_num_by_callout_args(args);
6459   start     = onig_get_start_by_callout_args(args);
6460   right     = onig_get_right_range_by_callout_args(args);
6461   current   = onig_get_current_by_callout_args(args);
6462   string    = onig_get_string_by_callout_args(args);
6463   strend    = onig_get_string_end_by_callout_args(args);
6464   reg       = onig_get_regex_by_callout_args(args);
6465   tag_start = onig_get_callout_tag_start(reg, num);
6466   tag_end   = onig_get_callout_tag_end(reg, num);
6467 
6468   if (tag_start == 0)
6469     xsnprintf(buf, sizeof(buf), "#%d", num);
6470   else {
6471     /* CAUTION: tag string is not terminated with NULL. */
6472     int i;
6473 
6474     tag_len = tag_end - tag_start;
6475     if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1;
6476     for (i = 0; i < tag_len; i++) buf[i] = tag_start[i];
6477     buf[tag_len] = '\0';
6478   }
6479 
6480   fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n",
6481           buf,
6482           in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=",
6483           (int )(current - string),
6484           (int )(start   - string),
6485           (int )(right   - string),
6486           (int )(strend  - string));
6487   fflush(fp);
6488 
6489   return ONIG_CALLOUT_SUCCESS;
6490 }
6491 
6492 extern int
onig_setup_builtin_monitors_by_ascii_encoded_name(void * fp)6493 onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */)
6494 {
6495   int id;
6496   char* name;
6497   OnigEncoding enc;
6498   unsigned int ts[4];
6499   OnigValue opts[4];
6500 
6501   if (IS_NOT_NULL(fp))
6502     OutFp = (FILE* )fp;
6503   else
6504     OutFp = stdout;
6505 
6506   enc = ONIG_ENCODING_ASCII;
6507 
6508   name = "MON";
6509   ts[0] = ONIG_TYPE_CHAR;
6510   opts[0].c = '>';
6511   BC_B_O(name, monitor, 1, ts, 1, opts);
6512 
6513   return ONIG_NORMAL;
6514 }
6515 
6516 #endif /* ONIG_NO_PRINT */
6517 
6518 #endif /* USE_CALLOUT */
6519