1 /**********************************************************************
2   regcomp.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regparse.h"
31 
32 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
33 
34 extern OnigCaseFoldType
onig_get_default_case_fold_flag(void)35 onig_get_default_case_fold_flag(void)
36 {
37   return OnigDefaultCaseFoldFlag;
38 }
39 
40 extern int
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)41 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
42 {
43   OnigDefaultCaseFoldFlag = case_fold_flag;
44   return 0;
45 }
46 
47 
48 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
49 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
50 #endif
51 
52 static UChar*
str_dup(UChar * s,UChar * end)53 str_dup(UChar* s, UChar* end)
54 {
55   int len = end - s;
56 
57   if (len > 0) {
58     UChar* r = (UChar* )xmalloc(len + 1);
59     CHECK_NULL_RETURN(r);
60     xmemcpy(r, s, len);
61     r[len] = (UChar )0;
62     return r;
63   }
64   else return NULL;
65 }
66 
67 static void
swap_node(Node * a,Node * b)68 swap_node(Node* a, Node* b)
69 {
70   Node c;
71   c = *a; *a = *b; *b = c;
72 
73   if (NTYPE(a) == NT_STR) {
74     StrNode* sn = NSTR(a);
75     if (sn->capa == 0) {
76       int len = sn->end - sn->s;
77       sn->s   = sn->buf;
78       sn->end = sn->s + len;
79     }
80   }
81 
82   if (NTYPE(b) == NT_STR) {
83     StrNode* sn = NSTR(b);
84     if (sn->capa == 0) {
85       int len = sn->end - sn->s;
86       sn->s   = sn->buf;
87       sn->end = sn->s + len;
88     }
89   }
90 }
91 
92 static OnigDistance
distance_add(OnigDistance d1,OnigDistance d2)93 distance_add(OnigDistance d1, OnigDistance d2)
94 {
95   if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
96     return ONIG_INFINITE_DISTANCE;
97   else {
98     if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
99     else return ONIG_INFINITE_DISTANCE;
100   }
101 }
102 
103 static OnigDistance
distance_multiply(OnigDistance d,int m)104 distance_multiply(OnigDistance d, int m)
105 {
106   if (m == 0) return 0;
107 
108   if (d < ONIG_INFINITE_DISTANCE / m)
109     return d * m;
110   else
111     return ONIG_INFINITE_DISTANCE;
112 }
113 
114 static int
bitset_is_empty(BitSetRef bs)115 bitset_is_empty(BitSetRef bs)
116 {
117   int i;
118   for (i = 0; i < (int )BITSET_SIZE; i++) {
119     if (bs[i] != 0) return 0;
120   }
121   return 1;
122 }
123 
124 #ifdef ONIG_DEBUG
125 static int
bitset_on_num(BitSetRef bs)126 bitset_on_num(BitSetRef bs)
127 {
128   int i, n;
129 
130   n = 0;
131   for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
132     if (BITSET_AT(bs, i)) n++;
133   }
134   return n;
135 }
136 #endif
137 
138 extern int
onig_bbuf_init(BBuf * buf,int size)139 onig_bbuf_init(BBuf* buf, int size)
140 {
141   if (size <= 0) {
142     size   = 0;
143     buf->p = NULL;
144   }
145   else {
146     buf->p = (UChar* )xmalloc(size);
147     if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
148   }
149 
150   buf->alloc = size;
151   buf->used  = 0;
152   return 0;
153 }
154 
155 
156 #ifdef USE_SUBEXP_CALL
157 
158 static int
unset_addr_list_init(UnsetAddrList * uslist,int size)159 unset_addr_list_init(UnsetAddrList* uslist, int size)
160 {
161   UnsetAddr* p;
162 
163   p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
164   CHECK_NULL_RETURN_MEMERR(p);
165   uslist->num   = 0;
166   uslist->alloc = size;
167   uslist->us    = p;
168   return 0;
169 }
170 
171 static void
unset_addr_list_end(UnsetAddrList * uslist)172 unset_addr_list_end(UnsetAddrList* uslist)
173 {
174   if (IS_NOT_NULL(uslist->us))
175     xfree(uslist->us);
176 }
177 
178 static int
unset_addr_list_add(UnsetAddrList * uslist,int offset,struct _Node * node)179 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
180 {
181   UnsetAddr* p;
182   int size;
183 
184   if (uslist->num >= uslist->alloc) {
185     size = uslist->alloc * 2;
186     p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
187     CHECK_NULL_RETURN_MEMERR(p);
188     uslist->alloc = size;
189     uslist->us    = p;
190   }
191 
192   uslist->us[uslist->num].offset = offset;
193   uslist->us[uslist->num].target = node;
194   uslist->num++;
195   return 0;
196 }
197 #endif /* USE_SUBEXP_CALL */
198 
199 
200 static int
add_opcode(regex_t * reg,int opcode)201 add_opcode(regex_t* reg, int opcode)
202 {
203   BBUF_ADD1(reg, opcode);
204   return 0;
205 }
206 
207 #ifdef USE_COMBINATION_EXPLOSION_CHECK
208 static int
add_state_check_num(regex_t * reg,int num)209 add_state_check_num(regex_t* reg, int num)
210 {
211   StateCheckNumType n = (StateCheckNumType )num;
212 
213   BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
214   return 0;
215 }
216 #endif
217 
218 static int
add_rel_addr(regex_t * reg,int addr)219 add_rel_addr(regex_t* reg, int addr)
220 {
221   RelAddrType ra = (RelAddrType )addr;
222 
223   BBUF_ADD(reg, &ra, SIZE_RELADDR);
224   return 0;
225 }
226 
227 static int
add_abs_addr(regex_t * reg,int addr)228 add_abs_addr(regex_t* reg, int addr)
229 {
230   AbsAddrType ra = (AbsAddrType )addr;
231 
232   BBUF_ADD(reg, &ra, SIZE_ABSADDR);
233   return 0;
234 }
235 
236 static int
add_length(regex_t * reg,int len)237 add_length(regex_t* reg, int len)
238 {
239   LengthType l = (LengthType )len;
240 
241   BBUF_ADD(reg, &l, SIZE_LENGTH);
242   return 0;
243 }
244 
245 static int
add_mem_num(regex_t * reg,int num)246 add_mem_num(regex_t* reg, int num)
247 {
248   MemNumType n = (MemNumType )num;
249 
250   BBUF_ADD(reg, &n, SIZE_MEMNUM);
251   return 0;
252 }
253 
254 static int
add_pointer(regex_t * reg,void * addr)255 add_pointer(regex_t* reg, void* addr)
256 {
257   PointerType ptr = (PointerType )addr;
258 
259   BBUF_ADD(reg, &ptr, SIZE_POINTER);
260   return 0;
261 }
262 
263 static int
add_option(regex_t * reg,OnigOptionType option)264 add_option(regex_t* reg, OnigOptionType option)
265 {
266   BBUF_ADD(reg, &option, SIZE_OPTION);
267   return 0;
268 }
269 
270 static int
add_opcode_rel_addr(regex_t * reg,int opcode,int addr)271 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
272 {
273   int r;
274 
275   r = add_opcode(reg, opcode);
276   if (r) return r;
277   r = add_rel_addr(reg, addr);
278   return r;
279 }
280 
281 static int
add_bytes(regex_t * reg,UChar * bytes,int len)282 add_bytes(regex_t* reg, UChar* bytes, int len)
283 {
284   BBUF_ADD(reg, bytes, len);
285   return 0;
286 }
287 
288 static int
add_bitset(regex_t * reg,BitSetRef bs)289 add_bitset(regex_t* reg, BitSetRef bs)
290 {
291   BBUF_ADD(reg, bs, SIZE_BITSET);
292   return 0;
293 }
294 
295 static int
add_opcode_option(regex_t * reg,int opcode,OnigOptionType option)296 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
297 {
298   int r;
299 
300   r = add_opcode(reg, opcode);
301   if (r) return r;
302   r = add_option(reg, option);
303   return r;
304 }
305 
306 static int compile_length_tree(Node* node, regex_t* reg);
307 static int compile_tree(Node* node, regex_t* reg);
308 
309 
310 #define IS_NEED_STR_LEN_OP_EXACT(op) \
311    ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
312     (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
313 
314 static int
select_str_opcode(int mb_len,int str_len,int ignore_case)315 select_str_opcode(int mb_len, int str_len, int ignore_case)
316 {
317   int op;
318 
319   if (ignore_case) {
320     switch (str_len) {
321     case 1:  op = OP_EXACT1_IC; break;
322     default: op = OP_EXACTN_IC; break;
323     }
324   }
325   else {
326     switch (mb_len) {
327     case 1:
328       switch (str_len) {
329       case 1:  op = OP_EXACT1; break;
330       case 2:  op = OP_EXACT2; break;
331       case 3:  op = OP_EXACT3; break;
332       case 4:  op = OP_EXACT4; break;
333       case 5:  op = OP_EXACT5; break;
334       default: op = OP_EXACTN; break;
335       }
336       break;
337 
338     case 2:
339       switch (str_len) {
340       case 1:  op = OP_EXACTMB2N1; break;
341       case 2:  op = OP_EXACTMB2N2; break;
342       case 3:  op = OP_EXACTMB2N3; break;
343       default: op = OP_EXACTMB2N;  break;
344       }
345       break;
346 
347     case 3:
348       op = OP_EXACTMB3N;
349       break;
350 
351     default:
352       op = OP_EXACTMBN;
353       break;
354     }
355   }
356   return op;
357 }
358 
359 static int
compile_tree_empty_check(Node * node,regex_t * reg,int empty_info)360 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
361 {
362   int r;
363   int saved_num_null_check = reg->num_null_check;
364 
365   if (empty_info != 0) {
366     r = add_opcode(reg, OP_NULL_CHECK_START);
367     if (r) return r;
368     r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
369     if (r) return r;
370     reg->num_null_check++;
371   }
372 
373   r = compile_tree(node, reg);
374   if (r) return r;
375 
376   if (empty_info != 0) {
377     if (empty_info == NQ_TARGET_IS_EMPTY)
378       r = add_opcode(reg, OP_NULL_CHECK_END);
379     else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
380       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
381     else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
382       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
383 
384     if (r) return r;
385     r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
386   }
387   return r;
388 }
389 
390 #ifdef USE_SUBEXP_CALL
391 static int
compile_call(CallNode * node,regex_t * reg)392 compile_call(CallNode* node, regex_t* reg)
393 {
394   int r;
395 
396   r = add_opcode(reg, OP_CALL);
397   if (r) return r;
398   r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
399                           node->target);
400   if (r) return r;
401   r = add_abs_addr(reg, 0 /*dummy addr.*/);
402   return r;
403 }
404 #endif
405 
406 static int
compile_tree_n_times(Node * node,int n,regex_t * reg)407 compile_tree_n_times(Node* node, int n, regex_t* reg)
408 {
409   int i, r;
410 
411   for (i = 0; i < n; i++) {
412     r = compile_tree(node, reg);
413     if (r) return r;
414   }
415   return 0;
416 }
417 
418 static int
add_compile_string_length(UChar * s ARG_UNUSED,int mb_len,int str_len,regex_t * reg ARG_UNUSED,int ignore_case)419 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
420                           regex_t* reg ARG_UNUSED, int ignore_case)
421 {
422   int len;
423   int op = select_str_opcode(mb_len, str_len, ignore_case);
424 
425   len = SIZE_OPCODE;
426 
427   if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
428   if (IS_NEED_STR_LEN_OP_EXACT(op))
429     len += SIZE_LENGTH;
430 
431   len += mb_len * str_len;
432   return len;
433 }
434 
435 static int
add_compile_string(UChar * s,int mb_len,int str_len,regex_t * reg,int ignore_case)436 add_compile_string(UChar* s, int mb_len, int str_len,
437                    regex_t* reg, int ignore_case)
438 {
439   int op = select_str_opcode(mb_len, str_len, ignore_case);
440   add_opcode(reg, op);
441 
442   if (op == OP_EXACTMBN)
443     add_length(reg, mb_len);
444 
445   if (IS_NEED_STR_LEN_OP_EXACT(op)) {
446     if (op == OP_EXACTN_IC)
447       add_length(reg, mb_len * str_len);
448     else
449       add_length(reg, str_len);
450   }
451 
452   add_bytes(reg, s, mb_len * str_len);
453   return 0;
454 }
455 
456 
457 static int
compile_length_string_node(Node * node,regex_t * reg)458 compile_length_string_node(Node* node, regex_t* reg)
459 {
460   int rlen, r, len, prev_len, slen, ambig;
461   OnigEncoding enc = reg->enc;
462   UChar *p, *prev;
463   StrNode* sn;
464 
465   sn = NSTR(node);
466   if (sn->end <= sn->s)
467     return 0;
468 
469   ambig = NSTRING_IS_AMBIG(node);
470 
471   p = prev = sn->s;
472   prev_len = enclen(enc, p);
473   p += prev_len;
474   slen = 1;
475   rlen = 0;
476 
477   for (; p < sn->end; ) {
478     len = enclen(enc, p);
479     if (len == prev_len) {
480       slen++;
481     }
482     else {
483       r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
484       rlen += r;
485       prev = p;
486       slen = 1;
487       prev_len = len;
488     }
489     p += len;
490   }
491   r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
492   rlen += r;
493   return rlen;
494 }
495 
496 static int
compile_length_string_raw_node(StrNode * sn,regex_t * reg)497 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
498 {
499   if (sn->end <= sn->s)
500     return 0;
501 
502   return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
503 }
504 
505 static int
compile_string_node(Node * node,regex_t * reg)506 compile_string_node(Node* node, regex_t* reg)
507 {
508   int r, len, prev_len, slen, ambig;
509   OnigEncoding enc = reg->enc;
510   UChar *p, *prev, *end;
511   StrNode* sn;
512 
513   sn = NSTR(node);
514   if (sn->end <= sn->s)
515     return 0;
516 
517   end = sn->end;
518   ambig = NSTRING_IS_AMBIG(node);
519 
520   p = prev = sn->s;
521   prev_len = enclen(enc, p);
522   p += prev_len;
523   slen = 1;
524 
525   for (; p < end; ) {
526     len = enclen(enc, p);
527     if (len == prev_len) {
528       slen++;
529     }
530     else {
531       r = add_compile_string(prev, prev_len, slen, reg, ambig);
532       if (r) return r;
533 
534       prev  = p;
535       slen  = 1;
536       prev_len = len;
537     }
538 
539     p += len;
540   }
541   return add_compile_string(prev, prev_len, slen, reg, ambig);
542 }
543 
544 static int
compile_string_raw_node(StrNode * sn,regex_t * reg)545 compile_string_raw_node(StrNode* sn, regex_t* reg)
546 {
547   if (sn->end <= sn->s)
548     return 0;
549 
550   return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
551 }
552 
553 static int
add_multi_byte_cclass(BBuf * mbuf,regex_t * reg)554 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
555 {
556 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
557   add_length(reg, mbuf->used);
558   return add_bytes(reg, mbuf->p, mbuf->used);
559 #else
560   int r, pad_size;
561   UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
562 
563   GET_ALIGNMENT_PAD_SIZE(p, pad_size);
564   add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
565   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
566 
567   r = add_bytes(reg, mbuf->p, mbuf->used);
568 
569   /* padding for return value from compile_length_cclass_node() to be fix. */
570   pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
571   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
572   return r;
573 #endif
574 }
575 
576 static int
compile_length_cclass_node(CClassNode * cc,regex_t * reg)577 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
578 {
579   int len;
580 
581   if (IS_NCCLASS_SHARE(cc)) {
582     len = SIZE_OPCODE + SIZE_POINTER;
583     return len;
584   }
585 
586   if (IS_NULL(cc->mbuf)) {
587     len = SIZE_OPCODE + SIZE_BITSET;
588   }
589   else {
590     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
591       len = SIZE_OPCODE;
592     }
593     else {
594       len = SIZE_OPCODE + SIZE_BITSET;
595     }
596 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
597     len += SIZE_LENGTH + cc->mbuf->used;
598 #else
599     len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
600 #endif
601   }
602 
603   return len;
604 }
605 
606 static int
compile_cclass_node(CClassNode * cc,regex_t * reg)607 compile_cclass_node(CClassNode* cc, regex_t* reg)
608 {
609   int r;
610 
611   if (IS_NCCLASS_SHARE(cc)) {
612     add_opcode(reg, OP_CCLASS_NODE);
613     r = add_pointer(reg, cc);
614     return r;
615   }
616 
617   if (IS_NULL(cc->mbuf)) {
618     if (IS_NCCLASS_NOT(cc))
619       add_opcode(reg, OP_CCLASS_NOT);
620     else
621       add_opcode(reg, OP_CCLASS);
622 
623     r = add_bitset(reg, cc->bs);
624   }
625   else {
626     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
627       if (IS_NCCLASS_NOT(cc))
628         add_opcode(reg, OP_CCLASS_MB_NOT);
629       else
630         add_opcode(reg, OP_CCLASS_MB);
631 
632       r = add_multi_byte_cclass(cc->mbuf, reg);
633     }
634     else {
635       if (IS_NCCLASS_NOT(cc))
636         add_opcode(reg, OP_CCLASS_MIX_NOT);
637       else
638         add_opcode(reg, OP_CCLASS_MIX);
639 
640       r = add_bitset(reg, cc->bs);
641       if (r) return r;
642       r = add_multi_byte_cclass(cc->mbuf, reg);
643     }
644   }
645 
646   return r;
647 }
648 
649 static int
entry_repeat_range(regex_t * reg,int id,int lower,int upper)650 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
651 {
652 #define REPEAT_RANGE_ALLOC  4
653 
654   OnigRepeatRange* p;
655 
656   if (reg->repeat_range_alloc == 0) {
657     p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
658     CHECK_NULL_RETURN_MEMERR(p);
659     reg->repeat_range = p;
660     reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
661   }
662   else if (reg->repeat_range_alloc <= id) {
663     int n;
664     n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
665     p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
666                                     sizeof(OnigRepeatRange) * n);
667     CHECK_NULL_RETURN_MEMERR(p);
668     reg->repeat_range = p;
669     reg->repeat_range_alloc = n;
670   }
671   else {
672     p = reg->repeat_range;
673   }
674 
675   p[id].lower = lower;
676   p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
677   return 0;
678 }
679 
680 static int
compile_range_repeat_node(QtfrNode * qn,int target_len,int empty_info,regex_t * reg)681 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
682                           regex_t* reg)
683 {
684   int r;
685   int num_repeat = reg->num_repeat;
686 
687   r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
688   if (r) return r;
689   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
690   reg->num_repeat++;
691   if (r) return r;
692   r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
693   if (r) return r;
694 
695   r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
696   if (r) return r;
697 
698   r = compile_tree_empty_check(qn->target, reg, empty_info);
699   if (r) return r;
700 
701   if (
702 #ifdef USE_SUBEXP_CALL
703       reg->num_call > 0 ||
704 #endif
705       IS_QUANTIFIER_IN_REPEAT(qn)) {
706     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
707   }
708   else {
709     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
710   }
711   if (r) return r;
712   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
713   return r;
714 }
715 
716 static int
is_anychar_star_quantifier(QtfrNode * qn)717 is_anychar_star_quantifier(QtfrNode* qn)
718 {
719   if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
720       NTYPE(qn->target) == NT_CANY)
721     return 1;
722   else
723     return 0;
724 }
725 
726 #define QUANTIFIER_EXPAND_LIMIT_SIZE   50
727 #define CKN_ON   (ckn > 0)
728 
729 #ifdef USE_COMBINATION_EXPLOSION_CHECK
730 
731 static int
compile_length_quantifier_node(QtfrNode * qn,regex_t * reg)732 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
733 {
734   int len, mod_tlen, cklen;
735   int ckn;
736   int infinite = IS_REPEAT_INFINITE(qn->upper);
737   int empty_info = qn->target_empty_info;
738   int tlen = compile_length_tree(qn->target, reg);
739 
740   if (tlen < 0) return tlen;
741 
742   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
743 
744   cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
745 
746   /* anychar repeat */
747   if (NTYPE(qn->target) == NT_CANY) {
748     if (qn->greedy && infinite) {
749       if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
750         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
751       else
752         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
753     }
754   }
755 
756   if (empty_info != 0)
757     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
758   else
759     mod_tlen = tlen;
760 
761   if (infinite && qn->lower <= 1) {
762     if (qn->greedy) {
763       if (qn->lower == 1)
764 	len = SIZE_OP_JUMP;
765       else
766 	len = 0;
767 
768       len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
769     }
770     else {
771       if (qn->lower == 0)
772 	len = SIZE_OP_JUMP;
773       else
774 	len = 0;
775 
776       len += mod_tlen + SIZE_OP_PUSH + cklen;
777     }
778   }
779   else if (qn->upper == 0) {
780     if (qn->is_refered != 0) /* /(?<n>..){0}/ */
781       len = SIZE_OP_JUMP + tlen;
782     else
783       len = 0;
784   }
785   else if (qn->upper == 1 && qn->greedy) {
786     if (qn->lower == 0) {
787       if (CKN_ON) {
788 	len = SIZE_OP_STATE_CHECK_PUSH + tlen;
789       }
790       else {
791 	len = SIZE_OP_PUSH + tlen;
792       }
793     }
794     else {
795       len = tlen;
796     }
797   }
798   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
799     len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
800   }
801   else {
802     len = SIZE_OP_REPEAT_INC
803         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
804     if (CKN_ON)
805       len += SIZE_OP_STATE_CHECK;
806   }
807 
808   return len;
809 }
810 
811 static int
compile_quantifier_node(QtfrNode * qn,regex_t * reg)812 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
813 {
814   int r, mod_tlen;
815   int ckn;
816   int infinite = IS_REPEAT_INFINITE(qn->upper);
817   int empty_info = qn->target_empty_info;
818   int tlen = compile_length_tree(qn->target, reg);
819 
820   if (tlen < 0) return tlen;
821 
822   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
823 
824   if (is_anychar_star_quantifier(qn)) {
825     r = compile_tree_n_times(qn->target, qn->lower, reg);
826     if (r) return r;
827     if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
828       if (IS_MULTILINE(reg->options))
829 	r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
830       else
831 	r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
832       if (r) return r;
833       if (CKN_ON) {
834 	r = add_state_check_num(reg, ckn);
835 	if (r) return r;
836       }
837 
838       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
839     }
840     else {
841       if (IS_MULTILINE(reg->options)) {
842 	r = add_opcode(reg, (CKN_ON ?
843 			       OP_STATE_CHECK_ANYCHAR_ML_STAR
844 			     : OP_ANYCHAR_ML_STAR));
845       }
846       else {
847 	r = add_opcode(reg, (CKN_ON ?
848 			       OP_STATE_CHECK_ANYCHAR_STAR
849 			     : OP_ANYCHAR_STAR));
850       }
851       if (r) return r;
852       if (CKN_ON)
853 	r = add_state_check_num(reg, ckn);
854 
855       return r;
856     }
857   }
858 
859   if (empty_info != 0)
860     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
861   else
862     mod_tlen = tlen;
863 
864   if (infinite && qn->lower <= 1) {
865     if (qn->greedy) {
866       if (qn->lower == 1) {
867 	r = add_opcode_rel_addr(reg, OP_JUMP,
868 			(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
869 	if (r) return r;
870       }
871 
872       if (CKN_ON) {
873 	r = add_opcode(reg, OP_STATE_CHECK_PUSH);
874 	if (r) return r;
875 	r = add_state_check_num(reg, ckn);
876 	if (r) return r;
877 	r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
878       }
879       else {
880 	r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
881       }
882       if (r) return r;
883       r = compile_tree_empty_check(qn->target, reg, empty_info);
884       if (r) return r;
885       r = add_opcode_rel_addr(reg, OP_JUMP,
886 	      -(mod_tlen + (int )SIZE_OP_JUMP
887 		+ (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
888     }
889     else {
890       if (qn->lower == 0) {
891 	r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
892 	if (r) return r;
893       }
894       r = compile_tree_empty_check(qn->target, reg, empty_info);
895       if (r) return r;
896       if (CKN_ON) {
897 	r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
898 	if (r) return r;
899 	r = add_state_check_num(reg, ckn);
900 	if (r) return r;
901 	r = add_rel_addr(reg,
902 		 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
903       }
904       else
905 	r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
906     }
907   }
908   else if (qn->upper == 0) {
909     if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
910       r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
911       if (r) return r;
912       r = compile_tree(qn->target, reg);
913     }
914     else
915       r = 0;
916   }
917   else if (qn->upper == 1 && qn->greedy) {
918     if (qn->lower == 0) {
919       if (CKN_ON) {
920 	r = add_opcode(reg, OP_STATE_CHECK_PUSH);
921 	if (r) return r;
922 	r = add_state_check_num(reg, ckn);
923 	if (r) return r;
924 	r = add_rel_addr(reg, tlen);
925       }
926       else {
927 	r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
928       }
929       if (r) return r;
930     }
931 
932     r = compile_tree(qn->target, reg);
933   }
934   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
935     if (CKN_ON) {
936       r = add_opcode(reg, OP_STATE_CHECK_PUSH);
937       if (r) return r;
938       r = add_state_check_num(reg, ckn);
939       if (r) return r;
940       r = add_rel_addr(reg, SIZE_OP_JUMP);
941     }
942     else {
943       r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
944     }
945 
946     if (r) return r;
947     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
948     if (r) return r;
949     r = compile_tree(qn->target, reg);
950   }
951   else {
952     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
953     if (CKN_ON) {
954       if (r) return r;
955       r = add_opcode(reg, OP_STATE_CHECK);
956       if (r) return r;
957       r = add_state_check_num(reg, ckn);
958     }
959   }
960   return r;
961 }
962 
963 #else /* USE_COMBINATION_EXPLOSION_CHECK */
964 
965 static int
compile_length_quantifier_node(QtfrNode * qn,regex_t * reg)966 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
967 {
968   int len, mod_tlen;
969   int infinite = IS_REPEAT_INFINITE(qn->upper);
970   int empty_info = qn->target_empty_info;
971   int tlen = compile_length_tree(qn->target, reg);
972 
973   if (tlen < 0) return tlen;
974 
975   /* anychar repeat */
976   if (NTYPE(qn->target) == NT_CANY) {
977     if (qn->greedy && infinite) {
978       if (IS_NOT_NULL(qn->next_head_exact))
979         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
980       else
981         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
982     }
983   }
984 
985   if (empty_info != 0)
986     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
987   else
988     mod_tlen = tlen;
989 
990   if (infinite &&
991       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
992     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
993       len = SIZE_OP_JUMP;
994     }
995     else {
996       len = tlen * qn->lower;
997     }
998 
999     if (qn->greedy) {
1000       if (IS_NOT_NULL(qn->head_exact))
1001 	len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1002       else if (IS_NOT_NULL(qn->next_head_exact))
1003 	len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1004       else
1005 	len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1006     }
1007     else
1008       len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1009   }
1010   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1011     len = SIZE_OP_JUMP + tlen;
1012   }
1013   else if (!infinite && qn->greedy &&
1014            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1015                                       <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1016     len = tlen * qn->lower;
1017     len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1018   }
1019   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1020     len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1021   }
1022   else {
1023     len = SIZE_OP_REPEAT_INC
1024         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1025   }
1026 
1027   return len;
1028 }
1029 
1030 static int
compile_quantifier_node(QtfrNode * qn,regex_t * reg)1031 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1032 {
1033   int i, r, mod_tlen;
1034   int infinite = IS_REPEAT_INFINITE(qn->upper);
1035   int empty_info = qn->target_empty_info;
1036   int tlen = compile_length_tree(qn->target, reg);
1037 
1038   if (tlen < 0) return tlen;
1039 
1040   if (is_anychar_star_quantifier(qn)) {
1041     r = compile_tree_n_times(qn->target, qn->lower, reg);
1042     if (r) return r;
1043     if (IS_NOT_NULL(qn->next_head_exact)) {
1044       if (IS_MULTILINE(reg->options))
1045 	r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1046       else
1047 	r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1048       if (r) return r;
1049       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1050     }
1051     else {
1052       if (IS_MULTILINE(reg->options))
1053 	return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1054       else
1055 	return add_opcode(reg, OP_ANYCHAR_STAR);
1056     }
1057   }
1058 
1059   if (empty_info != 0)
1060     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1061   else
1062     mod_tlen = tlen;
1063 
1064   if (infinite &&
1065       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1066     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1067       if (qn->greedy) {
1068 	if (IS_NOT_NULL(qn->head_exact))
1069 	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1070 	else if (IS_NOT_NULL(qn->next_head_exact))
1071 	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1072 	else
1073 	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1074       }
1075       else {
1076 	r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1077       }
1078       if (r) return r;
1079     }
1080     else {
1081       r = compile_tree_n_times(qn->target, qn->lower, reg);
1082       if (r) return r;
1083     }
1084 
1085     if (qn->greedy) {
1086       if (IS_NOT_NULL(qn->head_exact)) {
1087 	r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1088 			     mod_tlen + SIZE_OP_JUMP);
1089 	if (r) return r;
1090 	add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1091 	r = compile_tree_empty_check(qn->target, reg, empty_info);
1092 	if (r) return r;
1093 	r = add_opcode_rel_addr(reg, OP_JUMP,
1094 	-(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1095       }
1096       else if (IS_NOT_NULL(qn->next_head_exact)) {
1097 	r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1098 				mod_tlen + SIZE_OP_JUMP);
1099 	if (r) return r;
1100 	add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1101 	r = compile_tree_empty_check(qn->target, reg, empty_info);
1102 	if (r) return r;
1103 	r = add_opcode_rel_addr(reg, OP_JUMP,
1104           -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1105       }
1106       else {
1107 	r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1108 	if (r) return r;
1109 	r = compile_tree_empty_check(qn->target, reg, empty_info);
1110 	if (r) return r;
1111 	r = add_opcode_rel_addr(reg, OP_JUMP,
1112 		     -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1113       }
1114     }
1115     else {
1116       r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1117       if (r) return r;
1118       r = compile_tree_empty_check(qn->target, reg, empty_info);
1119       if (r) return r;
1120       r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1121     }
1122   }
1123   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1124     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1125     if (r) return r;
1126     r = compile_tree(qn->target, reg);
1127   }
1128   else if (!infinite && qn->greedy &&
1129            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1130                                   <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1131     int n = qn->upper - qn->lower;
1132 
1133     r = compile_tree_n_times(qn->target, qn->lower, reg);
1134     if (r) return r;
1135 
1136     for (i = 0; i < n; i++) {
1137       r = add_opcode_rel_addr(reg, OP_PUSH,
1138 			   (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1139       if (r) return r;
1140       r = compile_tree(qn->target, reg);
1141       if (r) return r;
1142     }
1143   }
1144   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1145     r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1146     if (r) return r;
1147     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1148     if (r) return r;
1149     r = compile_tree(qn->target, reg);
1150   }
1151   else {
1152     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1153   }
1154   return r;
1155 }
1156 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1157 
1158 static int
compile_length_option_node(EncloseNode * node,regex_t * reg)1159 compile_length_option_node(EncloseNode* node, regex_t* reg)
1160 {
1161   int tlen;
1162   OnigOptionType prev = reg->options;
1163 
1164   reg->options = node->option;
1165   tlen = compile_length_tree(node->target, reg);
1166   reg->options = prev;
1167 
1168   if (tlen < 0) return tlen;
1169 
1170   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1171     return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1172            + tlen + SIZE_OP_SET_OPTION;
1173   }
1174   else
1175     return tlen;
1176 }
1177 
1178 static int
compile_option_node(EncloseNode * node,regex_t * reg)1179 compile_option_node(EncloseNode* node, regex_t* reg)
1180 {
1181   int r;
1182   OnigOptionType prev = reg->options;
1183 
1184   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1185     r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1186     if (r) return r;
1187     r = add_opcode_option(reg, OP_SET_OPTION, prev);
1188     if (r) return r;
1189     r = add_opcode(reg, OP_FAIL);
1190     if (r) return r;
1191   }
1192 
1193   reg->options = node->option;
1194   r = compile_tree(node->target, reg);
1195   reg->options = prev;
1196 
1197   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1198     if (r) return r;
1199     r = add_opcode_option(reg, OP_SET_OPTION, prev);
1200   }
1201   return r;
1202 }
1203 
1204 static int
compile_length_enclose_node(EncloseNode * node,regex_t * reg)1205 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1206 {
1207   int len;
1208   int tlen;
1209 
1210   if (node->type == ENCLOSE_OPTION)
1211     return compile_length_option_node(node, reg);
1212 
1213   if (node->target) {
1214     tlen = compile_length_tree(node->target, reg);
1215     if (tlen < 0) return tlen;
1216   }
1217   else
1218     tlen = 0;
1219 
1220   switch (node->type) {
1221   case ENCLOSE_MEMORY:
1222 #ifdef USE_SUBEXP_CALL
1223     if (IS_ENCLOSE_CALLED(node)) {
1224       len = SIZE_OP_MEMORY_START_PUSH + tlen
1225 	  + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1226       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1227 	len += (IS_ENCLOSE_RECURSION(node)
1228 		? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1229       else
1230 	len += (IS_ENCLOSE_RECURSION(node)
1231 		? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1232     }
1233     else
1234 #endif
1235     {
1236       if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1237 	len = SIZE_OP_MEMORY_START_PUSH;
1238       else
1239 	len = SIZE_OP_MEMORY_START;
1240 
1241       len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1242 		     ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1243     }
1244     break;
1245 
1246   case ENCLOSE_STOP_BACKTRACK:
1247     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1248       QtfrNode* qn = NQTFR(node->target);
1249       tlen = compile_length_tree(qn->target, reg);
1250       if (tlen < 0) return tlen;
1251 
1252       len = tlen * qn->lower
1253 	  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1254     }
1255     else {
1256       len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1257     }
1258     break;
1259 
1260   default:
1261     return ONIGERR_TYPE_BUG;
1262     break;
1263   }
1264 
1265   return len;
1266 }
1267 
1268 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1269 
1270 static int
compile_enclose_node(EncloseNode * node,regex_t * reg)1271 compile_enclose_node(EncloseNode* node, regex_t* reg)
1272 {
1273   int r, len;
1274 
1275   if (node->type == ENCLOSE_OPTION)
1276     return compile_option_node(node, reg);
1277 
1278   switch (node->type) {
1279   case ENCLOSE_MEMORY:
1280 #ifdef USE_SUBEXP_CALL
1281     if (IS_ENCLOSE_CALLED(node)) {
1282       r = add_opcode(reg, OP_CALL);
1283       if (r) return r;
1284       node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1285       node->state |= NST_ADDR_FIXED;
1286       r = add_abs_addr(reg, (int )node->call_addr);
1287       if (r) return r;
1288       len = compile_length_tree(node->target, reg);
1289       len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1290       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1291 	len += (IS_ENCLOSE_RECURSION(node)
1292 		? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1293       else
1294 	len += (IS_ENCLOSE_RECURSION(node)
1295 		? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1296 
1297       r = add_opcode_rel_addr(reg, OP_JUMP, len);
1298       if (r) return r;
1299     }
1300 #endif
1301     if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1302       r = add_opcode(reg, OP_MEMORY_START_PUSH);
1303     else
1304       r = add_opcode(reg, OP_MEMORY_START);
1305     if (r) return r;
1306     r = add_mem_num(reg, node->regnum);
1307     if (r) return r;
1308     r = compile_tree(node->target, reg);
1309     if (r) return r;
1310 #ifdef USE_SUBEXP_CALL
1311     if (IS_ENCLOSE_CALLED(node)) {
1312       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1313 	r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1314 			     ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1315       else
1316 	r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1317 			     ? OP_MEMORY_END_REC : OP_MEMORY_END));
1318 
1319       if (r) return r;
1320       r = add_mem_num(reg, node->regnum);
1321       if (r) return r;
1322       r = add_opcode(reg, OP_RETURN);
1323     }
1324     else
1325 #endif
1326     {
1327       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1328 	r = add_opcode(reg, OP_MEMORY_END_PUSH);
1329       else
1330 	r = add_opcode(reg, OP_MEMORY_END);
1331       if (r) return r;
1332       r = add_mem_num(reg, node->regnum);
1333     }
1334     break;
1335 
1336   case ENCLOSE_STOP_BACKTRACK:
1337     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1338       QtfrNode* qn = NQTFR(node->target);
1339       r = compile_tree_n_times(qn->target, qn->lower, reg);
1340       if (r) return r;
1341 
1342       len = compile_length_tree(qn->target, reg);
1343       if (len < 0) return len;
1344 
1345       r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1346       if (r) return r;
1347       r = compile_tree(qn->target, reg);
1348       if (r) return r;
1349       r = add_opcode(reg, OP_POP);
1350       if (r) return r;
1351       r = add_opcode_rel_addr(reg, OP_JUMP,
1352 	 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1353     }
1354     else {
1355       r = add_opcode(reg, OP_PUSH_STOP_BT);
1356       if (r) return r;
1357       r = compile_tree(node->target, reg);
1358       if (r) return r;
1359       r = add_opcode(reg, OP_POP_STOP_BT);
1360     }
1361     break;
1362 
1363   default:
1364     return ONIGERR_TYPE_BUG;
1365     break;
1366   }
1367 
1368   return r;
1369 }
1370 
1371 static int
compile_length_anchor_node(AnchorNode * node,regex_t * reg)1372 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1373 {
1374   int len;
1375   int tlen = 0;
1376 
1377   if (node->target) {
1378     tlen = compile_length_tree(node->target, reg);
1379     if (tlen < 0) return tlen;
1380   }
1381 
1382   switch (node->type) {
1383   case ANCHOR_PREC_READ:
1384     len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1385     break;
1386   case ANCHOR_PREC_READ_NOT:
1387     len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1388     break;
1389   case ANCHOR_LOOK_BEHIND:
1390     len = SIZE_OP_LOOK_BEHIND + tlen;
1391     break;
1392   case ANCHOR_LOOK_BEHIND_NOT:
1393     len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1394     break;
1395 
1396   default:
1397     len = SIZE_OPCODE;
1398     break;
1399   }
1400 
1401   return len;
1402 }
1403 
1404 static int
compile_anchor_node(AnchorNode * node,regex_t * reg)1405 compile_anchor_node(AnchorNode* node, regex_t* reg)
1406 {
1407   int r, len;
1408 
1409   switch (node->type) {
1410   case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
1411   case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
1412   case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
1413   case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
1414   case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
1415   case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1416 
1417   case ANCHOR_WORD_BOUND:     r = add_opcode(reg, OP_WORD_BOUND);     break;
1418   case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
1419 #ifdef USE_WORD_BEGIN_END
1420   case ANCHOR_WORD_BEGIN:     r = add_opcode(reg, OP_WORD_BEGIN);     break;
1421   case ANCHOR_WORD_END:       r = add_opcode(reg, OP_WORD_END);       break;
1422 #endif
1423 
1424   case ANCHOR_PREC_READ:
1425     r = add_opcode(reg, OP_PUSH_POS);
1426     if (r) return r;
1427     r = compile_tree(node->target, reg);
1428     if (r) return r;
1429     r = add_opcode(reg, OP_POP_POS);
1430     break;
1431 
1432   case ANCHOR_PREC_READ_NOT:
1433     len = compile_length_tree(node->target, reg);
1434     if (len < 0) return len;
1435     r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1436     if (r) return r;
1437     r = compile_tree(node->target, reg);
1438     if (r) return r;
1439     r = add_opcode(reg, OP_FAIL_POS);
1440     break;
1441 
1442   case ANCHOR_LOOK_BEHIND:
1443     {
1444       int n;
1445       r = add_opcode(reg, OP_LOOK_BEHIND);
1446       if (r) return r;
1447       if (node->char_len < 0) {
1448 	r = get_char_length_tree(node->target, reg, &n);
1449 	if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1450       }
1451       else
1452 	n = node->char_len;
1453       r = add_length(reg, n);
1454       if (r) return r;
1455       r = compile_tree(node->target, reg);
1456     }
1457     break;
1458 
1459   case ANCHOR_LOOK_BEHIND_NOT:
1460     {
1461       int n;
1462       len = compile_length_tree(node->target, reg);
1463       r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1464 			   len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1465       if (r) return r;
1466       if (node->char_len < 0) {
1467 	r = get_char_length_tree(node->target, reg, &n);
1468 	if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1469       }
1470       else
1471 	n = node->char_len;
1472       r = add_length(reg, n);
1473       if (r) return r;
1474       r = compile_tree(node->target, reg);
1475       if (r) return r;
1476       r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1477     }
1478     break;
1479 
1480   default:
1481     return ONIGERR_TYPE_BUG;
1482     break;
1483   }
1484 
1485   return r;
1486 }
1487 
1488 static int
compile_length_tree(Node * node,regex_t * reg)1489 compile_length_tree(Node* node, regex_t* reg)
1490 {
1491   int len, type, r;
1492 
1493   type = NTYPE(node);
1494   switch (type) {
1495   case NT_LIST:
1496     len = 0;
1497     do {
1498       r = compile_length_tree(NCAR(node), reg);
1499       if (r < 0) return r;
1500       len += r;
1501     } while (IS_NOT_NULL(node = NCDR(node)));
1502     r = len;
1503     break;
1504 
1505   case NT_ALT:
1506     {
1507       int n;
1508 
1509       n = r = 0;
1510       do {
1511 	r += compile_length_tree(NCAR(node), reg);
1512 	n++;
1513       } while (IS_NOT_NULL(node = NCDR(node)));
1514       r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1515     }
1516     break;
1517 
1518   case NT_STR:
1519     if (NSTRING_IS_RAW(node))
1520       r = compile_length_string_raw_node(NSTR(node), reg);
1521     else
1522       r = compile_length_string_node(node, reg);
1523     break;
1524 
1525   case NT_CCLASS:
1526     r = compile_length_cclass_node(NCCLASS(node), reg);
1527     break;
1528 
1529   case NT_CTYPE:
1530   case NT_CANY:
1531     r = SIZE_OPCODE;
1532     break;
1533 
1534   case NT_BREF:
1535     {
1536       BRefNode* br = NBREF(node);
1537 
1538 #ifdef USE_BACKREF_WITH_LEVEL
1539       if (IS_BACKREF_NEST_LEVEL(br)) {
1540         r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1541             SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1542       }
1543       else
1544 #endif
1545       if (br->back_num == 1) {
1546 	r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1547 	     ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1548       }
1549       else {
1550 	r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1551       }
1552     }
1553     break;
1554 
1555 #ifdef USE_SUBEXP_CALL
1556   case NT_CALL:
1557     r = SIZE_OP_CALL;
1558     break;
1559 #endif
1560 
1561   case NT_QTFR:
1562     r = compile_length_quantifier_node(NQTFR(node), reg);
1563     break;
1564 
1565   case NT_ENCLOSE:
1566     r = compile_length_enclose_node(NENCLOSE(node), reg);
1567     break;
1568 
1569   case NT_ANCHOR:
1570     r = compile_length_anchor_node(NANCHOR(node), reg);
1571     break;
1572 
1573   default:
1574     return ONIGERR_TYPE_BUG;
1575     break;
1576   }
1577 
1578   return r;
1579 }
1580 
1581 static int
compile_tree(Node * node,regex_t * reg)1582 compile_tree(Node* node, regex_t* reg)
1583 {
1584   int n, type, len, pos, r = 0;
1585 
1586   type = NTYPE(node);
1587   switch (type) {
1588   case NT_LIST:
1589     do {
1590       r = compile_tree(NCAR(node), reg);
1591     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1592     break;
1593 
1594   case NT_ALT:
1595     {
1596       Node* x = node;
1597       len = 0;
1598       do {
1599 	len += compile_length_tree(NCAR(x), reg);
1600 	if (NCDR(x) != NULL) {
1601 	  len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1602 	}
1603       } while (IS_NOT_NULL(x = NCDR(x)));
1604       pos = reg->used + len;  /* goal position */
1605 
1606       do {
1607 	len = compile_length_tree(NCAR(node), reg);
1608 	if (IS_NOT_NULL(NCDR(node))) {
1609 	  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1610 	  if (r) break;
1611 	}
1612 	r = compile_tree(NCAR(node), reg);
1613 	if (r) break;
1614 	if (IS_NOT_NULL(NCDR(node))) {
1615 	  len = pos - (reg->used + SIZE_OP_JUMP);
1616 	  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1617 	  if (r) break;
1618 	}
1619       } while (IS_NOT_NULL(node = NCDR(node)));
1620     }
1621     break;
1622 
1623   case NT_STR:
1624     if (NSTRING_IS_RAW(node))
1625       r = compile_string_raw_node(NSTR(node), reg);
1626     else
1627       r = compile_string_node(node, reg);
1628     break;
1629 
1630   case NT_CCLASS:
1631     r = compile_cclass_node(NCCLASS(node), reg);
1632     break;
1633 
1634   case NT_CTYPE:
1635     {
1636       int op;
1637 
1638       switch (NCTYPE(node)->ctype) {
1639       case ONIGENC_CTYPE_WORD:
1640 	if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
1641 	else                         op = OP_WORD;
1642 	break;
1643       default:
1644 	return ONIGERR_TYPE_BUG;
1645 	break;
1646       }
1647       r = add_opcode(reg, op);
1648     }
1649     break;
1650 
1651   case NT_CANY:
1652     if (IS_MULTILINE(reg->options))
1653       r = add_opcode(reg, OP_ANYCHAR_ML);
1654     else
1655       r = add_opcode(reg, OP_ANYCHAR);
1656     break;
1657 
1658   case NT_BREF:
1659     {
1660       BRefNode* br = NBREF(node);
1661 
1662 #ifdef USE_BACKREF_WITH_LEVEL
1663       if (IS_BACKREF_NEST_LEVEL(br)) {
1664 	r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1665 	if (r) return r;
1666 	r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1667 	if (r) return r;
1668 	r = add_length(reg, br->nest_level);
1669 	if (r) return r;
1670 
1671 	goto add_bacref_mems;
1672       }
1673       else
1674 #endif
1675       if (br->back_num == 1) {
1676 	n = br->back_static[0];
1677 	if (IS_IGNORECASE(reg->options)) {
1678 	  r = add_opcode(reg, OP_BACKREFN_IC);
1679 	  if (r) return r;
1680 	  r = add_mem_num(reg, n);
1681 	}
1682 	else {
1683 	  switch (n) {
1684 	  case 1:  r = add_opcode(reg, OP_BACKREF1); break;
1685 	  case 2:  r = add_opcode(reg, OP_BACKREF2); break;
1686 	  default:
1687 	    r = add_opcode(reg, OP_BACKREFN);
1688 	    if (r) return r;
1689 	    r = add_mem_num(reg, n);
1690 	    break;
1691 	  }
1692 	}
1693       }
1694       else {
1695 	int i;
1696 	int* p;
1697 
1698         if (IS_IGNORECASE(reg->options)) {
1699           r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1700         }
1701         else {
1702           r = add_opcode(reg, OP_BACKREF_MULTI);
1703         }
1704 	if (r) return r;
1705 
1706 #ifdef USE_BACKREF_WITH_LEVEL
1707       add_bacref_mems:
1708 #endif
1709 	r = add_length(reg, br->back_num);
1710 	if (r) return r;
1711 	p = BACKREFS_P(br);
1712 	for (i = br->back_num - 1; i >= 0; i--) {
1713 	  r = add_mem_num(reg, p[i]);
1714 	  if (r) return r;
1715 	}
1716       }
1717     }
1718     break;
1719 
1720 #ifdef USE_SUBEXP_CALL
1721   case NT_CALL:
1722     r = compile_call(NCALL(node), reg);
1723     break;
1724 #endif
1725 
1726   case NT_QTFR:
1727     r = compile_quantifier_node(NQTFR(node), reg);
1728     break;
1729 
1730   case NT_ENCLOSE:
1731     r = compile_enclose_node(NENCLOSE(node), reg);
1732     break;
1733 
1734   case NT_ANCHOR:
1735     r = compile_anchor_node(NANCHOR(node), reg);
1736     break;
1737 
1738   default:
1739 #ifdef ONIG_DEBUG
1740     fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1741 #endif
1742     break;
1743   }
1744 
1745   return r;
1746 }
1747 
1748 #ifdef USE_NAMED_GROUP
1749 
1750 static int
noname_disable_map(Node ** plink,GroupNumRemap * map,int * counter)1751 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1752 {
1753   int r = 0;
1754   Node* node = *plink;
1755 
1756   switch (NTYPE(node)) {
1757   case NT_LIST:
1758   case NT_ALT:
1759     do {
1760       r = noname_disable_map(&(NCAR(node)), map, counter);
1761     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1762     break;
1763 
1764   case NT_QTFR:
1765     {
1766       Node** ptarget = &(NQTFR(node)->target);
1767       Node*  old = *ptarget;
1768       r = noname_disable_map(ptarget, map, counter);
1769       if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1770 	onig_reduce_nested_quantifier(node, *ptarget);
1771       }
1772     }
1773     break;
1774 
1775   case NT_ENCLOSE:
1776     {
1777       EncloseNode* en = NENCLOSE(node);
1778       if (en->type == ENCLOSE_MEMORY) {
1779 	if (IS_ENCLOSE_NAMED_GROUP(en)) {
1780 	  (*counter)++;
1781 	  map[en->regnum].new_val = *counter;
1782 	  en->regnum = *counter;
1783 	  r = noname_disable_map(&(en->target), map, counter);
1784 	}
1785 	else {
1786 	  *plink = en->target;
1787 	  en->target = NULL_NODE;
1788 	  onig_node_free(node);
1789 	  r = noname_disable_map(plink, map, counter);
1790 	}
1791       }
1792       else
1793 	r = noname_disable_map(&(en->target), map, counter);
1794     }
1795     break;
1796 
1797   case NT_ANCHOR:
1798     {
1799       AnchorNode* an = NANCHOR(node);
1800       switch (an->type) {
1801       case ANCHOR_PREC_READ:
1802       case ANCHOR_PREC_READ_NOT:
1803       case ANCHOR_LOOK_BEHIND:
1804       case ANCHOR_LOOK_BEHIND_NOT:
1805        r = noname_disable_map(&(an->target), map, counter);
1806        break;
1807       }
1808     }
1809     break;
1810 
1811   default:
1812     break;
1813   }
1814 
1815   return r;
1816 }
1817 
1818 static int
renumber_node_backref(Node * node,GroupNumRemap * map)1819 renumber_node_backref(Node* node, GroupNumRemap* map)
1820 {
1821   int i, pos, n, old_num;
1822   int *backs;
1823   BRefNode* bn = NBREF(node);
1824 
1825   if (! IS_BACKREF_NAME_REF(bn))
1826     return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1827 
1828   old_num = bn->back_num;
1829   if (IS_NULL(bn->back_dynamic))
1830     backs = bn->back_static;
1831   else
1832     backs = bn->back_dynamic;
1833 
1834   for (i = 0, pos = 0; i < old_num; i++) {
1835     n = map[backs[i]].new_val;
1836     if (n > 0) {
1837       backs[pos] = n;
1838       pos++;
1839     }
1840   }
1841 
1842   bn->back_num = pos;
1843   return 0;
1844 }
1845 
1846 static int
renumber_by_map(Node * node,GroupNumRemap * map)1847 renumber_by_map(Node* node, GroupNumRemap* map)
1848 {
1849   int r = 0;
1850 
1851   switch (NTYPE(node)) {
1852   case NT_LIST:
1853   case NT_ALT:
1854     do {
1855       r = renumber_by_map(NCAR(node), map);
1856     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1857     break;
1858   case NT_QTFR:
1859     r = renumber_by_map(NQTFR(node)->target, map);
1860     break;
1861   case NT_ENCLOSE:
1862     r = renumber_by_map(NENCLOSE(node)->target, map);
1863     break;
1864 
1865   case NT_BREF:
1866     r = renumber_node_backref(node, map);
1867     break;
1868 
1869   case NT_ANCHOR:
1870     {
1871       AnchorNode* an = NANCHOR(node);
1872       switch (an->type) {
1873       case ANCHOR_PREC_READ:
1874       case ANCHOR_PREC_READ_NOT:
1875       case ANCHOR_LOOK_BEHIND:
1876       case ANCHOR_LOOK_BEHIND_NOT:
1877        r = renumber_by_map(an->target, map);
1878        break;
1879       }
1880     }
1881     break;
1882 
1883   default:
1884     break;
1885   }
1886 
1887   return r;
1888 }
1889 
1890 static int
numbered_ref_check(Node * node)1891 numbered_ref_check(Node* node)
1892 {
1893   int r = 0;
1894 
1895   switch (NTYPE(node)) {
1896   case NT_LIST:
1897   case NT_ALT:
1898     do {
1899       r = numbered_ref_check(NCAR(node));
1900     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1901     break;
1902   case NT_QTFR:
1903     r = numbered_ref_check(NQTFR(node)->target);
1904     break;
1905   case NT_ENCLOSE:
1906     r = numbered_ref_check(NENCLOSE(node)->target);
1907     break;
1908 
1909   case NT_BREF:
1910     if (! IS_BACKREF_NAME_REF(NBREF(node)))
1911       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1912     break;
1913 
1914   default:
1915     break;
1916   }
1917 
1918   return r;
1919 }
1920 
1921 static int
disable_noname_group_capture(Node ** root,regex_t * reg,ScanEnv * env)1922 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
1923 {
1924   int r, i, pos, counter;
1925   BitStatusType loc;
1926   GroupNumRemap* map;
1927 
1928   map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
1929   CHECK_NULL_RETURN_MEMERR(map);
1930   for (i = 1; i <= env->num_mem; i++) {
1931     map[i].new_val = 0;
1932   }
1933   counter = 0;
1934   r = noname_disable_map(root, map, &counter);
1935   if (r != 0) return r;
1936 
1937   r = renumber_by_map(*root, map);
1938   if (r != 0) return r;
1939 
1940   for (i = 1, pos = 1; i <= env->num_mem; i++) {
1941     if (map[i].new_val > 0) {
1942       SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
1943       pos++;
1944     }
1945   }
1946 
1947   loc = env->capture_history;
1948   BIT_STATUS_CLEAR(env->capture_history);
1949   for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
1950     if (BIT_STATUS_AT(loc, i)) {
1951       BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
1952     }
1953   }
1954 
1955   env->num_mem = env->num_named;
1956   reg->num_mem = env->num_named;
1957 
1958   return onig_renumber_name_table(reg, map);
1959 }
1960 #endif /* USE_NAMED_GROUP */
1961 
1962 #ifdef USE_SUBEXP_CALL
1963 static int
unset_addr_list_fix(UnsetAddrList * uslist,regex_t * reg)1964 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
1965 {
1966   int i, offset;
1967   EncloseNode* en;
1968   AbsAddrType addr;
1969 
1970   for (i = 0; i < uslist->num; i++) {
1971     en = NENCLOSE(uslist->us[i].target);
1972     if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
1973     addr = en->call_addr;
1974     offset = uslist->us[i].offset;
1975 
1976     BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
1977   }
1978   return 0;
1979 }
1980 #endif
1981 
1982 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1983 static int
quantifiers_memory_node_info(Node * node)1984 quantifiers_memory_node_info(Node* node)
1985 {
1986   int r = 0;
1987 
1988   switch (NTYPE(node)) {
1989   case NT_LIST:
1990   case NT_ALT:
1991     {
1992       int v;
1993       do {
1994 	v = quantifiers_memory_node_info(NCAR(node));
1995 	if (v > r) r = v;
1996       } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
1997     }
1998     break;
1999 
2000 #ifdef USE_SUBEXP_CALL
2001   case NT_CALL:
2002     if (IS_CALL_RECURSION(NCALL(node))) {
2003       return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2004     }
2005     else
2006       r = quantifiers_memory_node_info(NCALL(node)->target);
2007     break;
2008 #endif
2009 
2010   case NT_QTFR:
2011     {
2012       QtfrNode* qn = NQTFR(node);
2013       if (qn->upper != 0) {
2014 	r = quantifiers_memory_node_info(qn->target);
2015       }
2016     }
2017     break;
2018 
2019   case NT_ENCLOSE:
2020     {
2021       EncloseNode* en = NENCLOSE(node);
2022       switch (en->type) {
2023       case ENCLOSE_MEMORY:
2024 	return NQ_TARGET_IS_EMPTY_MEM;
2025 	break;
2026 
2027       case ENCLOSE_OPTION:
2028       case ENCLOSE_STOP_BACKTRACK:
2029 	r = quantifiers_memory_node_info(en->target);
2030 	break;
2031       default:
2032 	break;
2033       }
2034     }
2035     break;
2036 
2037   case NT_BREF:
2038   case NT_STR:
2039   case NT_CTYPE:
2040   case NT_CCLASS:
2041   case NT_CANY:
2042   case NT_ANCHOR:
2043   default:
2044     break;
2045   }
2046 
2047   return r;
2048 }
2049 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2050 
2051 static int
get_min_match_length(Node * node,OnigDistance * min,ScanEnv * env)2052 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2053 {
2054   OnigDistance tmin;
2055   int r = 0;
2056 
2057   *min = 0;
2058   switch (NTYPE(node)) {
2059   case NT_BREF:
2060     {
2061       int i;
2062       int* backs;
2063       Node** nodes = SCANENV_MEM_NODES(env);
2064       BRefNode* br = NBREF(node);
2065       if (br->state & NST_RECURSION) break;
2066 
2067       backs = BACKREFS_P(br);
2068       if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2069       r = get_min_match_length(nodes[backs[0]], min, env);
2070       if (r != 0) break;
2071       for (i = 1; i < br->back_num; i++) {
2072 	if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2073 	r = get_min_match_length(nodes[backs[i]], &tmin, env);
2074 	if (r != 0) break;
2075 	if (*min > tmin) *min = tmin;
2076       }
2077     }
2078     break;
2079 
2080 #ifdef USE_SUBEXP_CALL
2081   case NT_CALL:
2082     if (IS_CALL_RECURSION(NCALL(node))) {
2083       EncloseNode* en = NENCLOSE(NCALL(node)->target);
2084       if (IS_ENCLOSE_MIN_FIXED(en))
2085 	*min = en->min_len;
2086     }
2087     else
2088       r = get_min_match_length(NCALL(node)->target, min, env);
2089     break;
2090 #endif
2091 
2092   case NT_LIST:
2093     do {
2094       r = get_min_match_length(NCAR(node), &tmin, env);
2095       if (r == 0) *min += tmin;
2096     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2097     break;
2098 
2099   case NT_ALT:
2100     {
2101       Node *x, *y;
2102       y = node;
2103       do {
2104 	x = NCAR(y);
2105 	r = get_min_match_length(x, &tmin, env);
2106 	if (r != 0) break;
2107 	if (y == node) *min = tmin;
2108 	else if (*min > tmin) *min = tmin;
2109       } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2110     }
2111     break;
2112 
2113   case NT_STR:
2114     {
2115       StrNode* sn = NSTR(node);
2116       *min = sn->end - sn->s;
2117     }
2118     break;
2119 
2120   case NT_CTYPE:
2121     *min = 1;
2122     break;
2123 
2124   case NT_CCLASS:
2125   case NT_CANY:
2126     *min = 1;
2127     break;
2128 
2129   case NT_QTFR:
2130     {
2131       QtfrNode* qn = NQTFR(node);
2132 
2133       if (qn->lower > 0) {
2134 	r = get_min_match_length(qn->target, min, env);
2135 	if (r == 0)
2136 	  *min = distance_multiply(*min, qn->lower);
2137       }
2138     }
2139     break;
2140 
2141   case NT_ENCLOSE:
2142     {
2143       EncloseNode* en = NENCLOSE(node);
2144       switch (en->type) {
2145       case ENCLOSE_MEMORY:
2146 #ifdef USE_SUBEXP_CALL
2147 	if (IS_ENCLOSE_MIN_FIXED(en))
2148 	  *min = en->min_len;
2149 	else {
2150 	  r = get_min_match_length(en->target, min, env);
2151 	  if (r == 0) {
2152 	    en->min_len = *min;
2153 	    SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2154 	  }
2155 	}
2156 	break;
2157 #endif
2158       case ENCLOSE_OPTION:
2159       case ENCLOSE_STOP_BACKTRACK:
2160 	r = get_min_match_length(en->target, min, env);
2161 	break;
2162       }
2163     }
2164     break;
2165 
2166   case NT_ANCHOR:
2167   default:
2168     break;
2169   }
2170 
2171   return r;
2172 }
2173 
2174 static int
get_max_match_length(Node * node,OnigDistance * max,ScanEnv * env)2175 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2176 {
2177   OnigDistance tmax;
2178   int r = 0;
2179 
2180   *max = 0;
2181   switch (NTYPE(node)) {
2182   case NT_LIST:
2183     do {
2184       r = get_max_match_length(NCAR(node), &tmax, env);
2185       if (r == 0)
2186 	*max = distance_add(*max, tmax);
2187     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2188     break;
2189 
2190   case NT_ALT:
2191     do {
2192       r = get_max_match_length(NCAR(node), &tmax, env);
2193       if (r == 0 && *max < tmax) *max = tmax;
2194     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2195     break;
2196 
2197   case NT_STR:
2198     {
2199       StrNode* sn = NSTR(node);
2200       *max = sn->end - sn->s;
2201     }
2202     break;
2203 
2204   case NT_CTYPE:
2205     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2206     break;
2207 
2208   case NT_CCLASS:
2209   case NT_CANY:
2210     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2211     break;
2212 
2213   case NT_BREF:
2214     {
2215       int i;
2216       int* backs;
2217       Node** nodes = SCANENV_MEM_NODES(env);
2218       BRefNode* br = NBREF(node);
2219       if (br->state & NST_RECURSION) {
2220 	*max = ONIG_INFINITE_DISTANCE;
2221 	break;
2222       }
2223       backs = BACKREFS_P(br);
2224       for (i = 0; i < br->back_num; i++) {
2225 	if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2226 	r = get_max_match_length(nodes[backs[i]], &tmax, env);
2227 	if (r != 0) break;
2228 	if (*max < tmax) *max = tmax;
2229       }
2230     }
2231     break;
2232 
2233 #ifdef USE_SUBEXP_CALL
2234   case NT_CALL:
2235     if (! IS_CALL_RECURSION(NCALL(node)))
2236       r = get_max_match_length(NCALL(node)->target, max, env);
2237     else
2238       *max = ONIG_INFINITE_DISTANCE;
2239     break;
2240 #endif
2241 
2242   case NT_QTFR:
2243     {
2244       QtfrNode* qn = NQTFR(node);
2245 
2246       if (qn->upper != 0) {
2247 	r = get_max_match_length(qn->target, max, env);
2248 	if (r == 0 && *max != 0) {
2249 	  if (! IS_REPEAT_INFINITE(qn->upper))
2250 	    *max = distance_multiply(*max, qn->upper);
2251 	  else
2252 	    *max = ONIG_INFINITE_DISTANCE;
2253 	}
2254       }
2255     }
2256     break;
2257 
2258   case NT_ENCLOSE:
2259     {
2260       EncloseNode* en = NENCLOSE(node);
2261       switch (en->type) {
2262       case ENCLOSE_MEMORY:
2263 #ifdef USE_SUBEXP_CALL
2264 	if (IS_ENCLOSE_MAX_FIXED(en))
2265 	  *max = en->max_len;
2266 	else {
2267 	  r = get_max_match_length(en->target, max, env);
2268 	  if (r == 0) {
2269 	    en->max_len = *max;
2270 	    SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2271 	  }
2272 	}
2273 	break;
2274 #endif
2275       case ENCLOSE_OPTION:
2276       case ENCLOSE_STOP_BACKTRACK:
2277 	r = get_max_match_length(en->target, max, env);
2278 	break;
2279       }
2280     }
2281     break;
2282 
2283   case NT_ANCHOR:
2284   default:
2285     break;
2286   }
2287 
2288   return r;
2289 }
2290 
2291 #define GET_CHAR_LEN_VARLEN           -1
2292 #define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
2293 
2294 /* fixed size pattern node only */
2295 static int
get_char_length_tree1(Node * node,regex_t * reg,int * len,int level)2296 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2297 {
2298   int tlen;
2299   int r = 0;
2300 
2301   level++;
2302   *len = 0;
2303   switch (NTYPE(node)) {
2304   case NT_LIST:
2305     do {
2306       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2307       if (r == 0)
2308 	*len = distance_add(*len, tlen);
2309     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2310     break;
2311 
2312   case NT_ALT:
2313     {
2314       int tlen2;
2315       int varlen = 0;
2316 
2317       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2318       while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2319 	r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2320 	if (r == 0) {
2321 	  if (tlen != tlen2)
2322 	    varlen = 1;
2323 	}
2324       }
2325       if (r == 0) {
2326 	if (varlen != 0) {
2327 	  if (level == 1)
2328 	    r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2329 	  else
2330 	    r = GET_CHAR_LEN_VARLEN;
2331 	}
2332 	else
2333 	  *len = tlen;
2334       }
2335     }
2336     break;
2337 
2338   case NT_STR:
2339     {
2340       StrNode* sn = NSTR(node);
2341       UChar *s = sn->s;
2342       while (s < sn->end) {
2343 	s += enclen(reg->enc, s);
2344 	(*len)++;
2345       }
2346     }
2347     break;
2348 
2349   case NT_QTFR:
2350     {
2351       QtfrNode* qn = NQTFR(node);
2352       if (qn->lower == qn->upper) {
2353 	r = get_char_length_tree1(qn->target, reg, &tlen, level);
2354 	if (r == 0)
2355 	  *len = distance_multiply(tlen, qn->lower);
2356       }
2357       else
2358 	r = GET_CHAR_LEN_VARLEN;
2359     }
2360     break;
2361 
2362 #ifdef USE_SUBEXP_CALL
2363   case NT_CALL:
2364     if (! IS_CALL_RECURSION(NCALL(node)))
2365       r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2366     else
2367       r = GET_CHAR_LEN_VARLEN;
2368     break;
2369 #endif
2370 
2371   case NT_CTYPE:
2372     *len = 1;
2373     break;
2374 
2375   case NT_CCLASS:
2376   case NT_CANY:
2377     *len = 1;
2378     break;
2379 
2380   case NT_ENCLOSE:
2381     {
2382       EncloseNode* en = NENCLOSE(node);
2383       switch (en->type) {
2384       case ENCLOSE_MEMORY:
2385 #ifdef USE_SUBEXP_CALL
2386 	if (IS_ENCLOSE_CLEN_FIXED(en))
2387 	  *len = en->char_len;
2388 	else {
2389 	  r = get_char_length_tree1(en->target, reg, len, level);
2390 	  if (r == 0) {
2391 	    en->char_len = *len;
2392 	    SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2393 	  }
2394 	}
2395 	break;
2396 #endif
2397       case ENCLOSE_OPTION:
2398       case ENCLOSE_STOP_BACKTRACK:
2399 	r = get_char_length_tree1(en->target, reg, len, level);
2400 	break;
2401       default:
2402 	break;
2403       }
2404     }
2405     break;
2406 
2407   case NT_ANCHOR:
2408     break;
2409 
2410   default:
2411     r = GET_CHAR_LEN_VARLEN;
2412     break;
2413   }
2414 
2415   return r;
2416 }
2417 
2418 static int
get_char_length_tree(Node * node,regex_t * reg,int * len)2419 get_char_length_tree(Node* node, regex_t* reg, int* len)
2420 {
2421   return get_char_length_tree1(node, reg, len, 0);
2422 }
2423 
2424 /* x is not included y ==>  1 : 0 */
2425 static int
is_not_included(Node * x,Node * y,regex_t * reg)2426 is_not_included(Node* x, Node* y, regex_t* reg)
2427 {
2428   int i, len;
2429   OnigCodePoint code;
2430   UChar *p, c;
2431   int ytype;
2432 
2433  retry:
2434   ytype = NTYPE(y);
2435   switch (NTYPE(x)) {
2436   case NT_CTYPE:
2437     {
2438       switch (ytype) {
2439       case NT_CTYPE:
2440 	if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2441 	    NCTYPE(y)->not   != NCTYPE(x)->not)
2442 	  return 1;
2443 	else
2444 	  return 0;
2445 	break;
2446 
2447       case NT_CCLASS:
2448       swap:
2449 	{
2450 	  Node* tmp;
2451 	  tmp = x; x = y; y = tmp;
2452 	  goto retry;
2453 	}
2454 	break;
2455 
2456       case NT_STR:
2457 	goto swap;
2458 	break;
2459 
2460       default:
2461 	break;
2462       }
2463     }
2464     break;
2465 
2466   case NT_CCLASS:
2467     {
2468       CClassNode* xc = NCCLASS(x);
2469       switch (ytype) {
2470       case NT_CTYPE:
2471 	switch (NCTYPE(y)->ctype) {
2472 	case ONIGENC_CTYPE_WORD:
2473 	  if (NCTYPE(y)->not == 0) {
2474 	    if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2475 	      for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2476 		if (BITSET_AT(xc->bs, i)) {
2477 		  if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2478 		}
2479 	      }
2480 	      return 1;
2481 	    }
2482 	    return 0;
2483 	  }
2484 	  else {
2485 	    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2486 	      if (! IS_CODE_SB_WORD(reg->enc, i)) {
2487 		if (!IS_NCCLASS_NOT(xc)) {
2488 		  if (BITSET_AT(xc->bs, i))
2489 		    return 0;
2490 		}
2491 		else {
2492 		  if (! BITSET_AT(xc->bs, i))
2493 		    return 0;
2494 		}
2495 	      }
2496 	    }
2497 	    return 1;
2498 	  }
2499 	  break;
2500 
2501 	default:
2502 	  break;
2503 	}
2504 	break;
2505 
2506       case NT_CCLASS:
2507 	{
2508 	  int v;
2509 	  CClassNode* yc = NCCLASS(y);
2510 
2511 	  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2512 	    v = BITSET_AT(xc->bs, i);
2513 	    if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2514                 (v == 0 && IS_NCCLASS_NOT(xc))) {
2515 	      v = BITSET_AT(yc->bs, i);
2516 	      if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2517                   (v == 0 && IS_NCCLASS_NOT(yc)))
2518 		return 0;
2519 	    }
2520 	  }
2521 	  if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2522 	      (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2523 	    return 1;
2524 	  return 0;
2525 	}
2526 	break;
2527 
2528       case NT_STR:
2529 	goto swap;
2530 	break;
2531 
2532       default:
2533 	break;
2534       }
2535     }
2536     break;
2537 
2538   case NT_STR:
2539     {
2540       StrNode* xs = NSTR(x);
2541       if (NSTRING_LEN(x) == 0)
2542 	break;
2543 
2544       c = *(xs->s);
2545       switch (ytype) {
2546       case NT_CTYPE:
2547 	switch (NCTYPE(y)->ctype) {
2548 	case ONIGENC_CTYPE_WORD:
2549 	  if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2550 	    return NCTYPE(y)->not;
2551 	  else
2552 	    return !(NCTYPE(y)->not);
2553 	  break;
2554 	default:
2555 	  break;
2556 	}
2557 	break;
2558 
2559       case NT_CCLASS:
2560 	{
2561 	  CClassNode* cc = NCCLASS(y);
2562 
2563 	  code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2564 				     xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2565 	  return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2566 	}
2567 	break;
2568 
2569       case NT_STR:
2570 	{
2571 	  UChar *q;
2572 	  StrNode* ys = NSTR(y);
2573 	  len = NSTRING_LEN(x);
2574 	  if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2575 	  if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2576             /* tiny version */
2577             return 0;
2578 	  }
2579 	  else {
2580 	    for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
2581 	      if (*p != *q) return 1;
2582 	    }
2583 	  }
2584 	}
2585 	break;
2586 
2587       default:
2588 	break;
2589       }
2590     }
2591     break;
2592 
2593   default:
2594     break;
2595   }
2596 
2597   return 0;
2598 }
2599 
2600 static Node*
get_head_value_node(Node * node,int exact,regex_t * reg)2601 get_head_value_node(Node* node, int exact, regex_t* reg)
2602 {
2603   Node* n = NULL_NODE;
2604 
2605   switch (NTYPE(node)) {
2606   case NT_BREF:
2607   case NT_ALT:
2608   case NT_CANY:
2609 #ifdef USE_SUBEXP_CALL
2610   case NT_CALL:
2611 #endif
2612     break;
2613 
2614   case NT_CTYPE:
2615   case NT_CCLASS:
2616     if (exact == 0) {
2617       n = node;
2618     }
2619     break;
2620 
2621   case NT_LIST:
2622     n = get_head_value_node(NCAR(node), exact, reg);
2623     break;
2624 
2625   case NT_STR:
2626     {
2627       StrNode* sn = NSTR(node);
2628 
2629       if (sn->end <= sn->s)
2630 	break;
2631 
2632       if (exact != 0 &&
2633 	  !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2634       }
2635       else {
2636 	n = node;
2637       }
2638     }
2639     break;
2640 
2641   case NT_QTFR:
2642     {
2643       QtfrNode* qn = NQTFR(node);
2644       if (qn->lower > 0) {
2645 	if (IS_NOT_NULL(qn->head_exact))
2646 	  n = qn->head_exact;
2647 	else
2648 	  n = get_head_value_node(qn->target, exact, reg);
2649       }
2650     }
2651     break;
2652 
2653   case NT_ENCLOSE:
2654     {
2655       EncloseNode* en = NENCLOSE(node);
2656       switch (en->type) {
2657       case ENCLOSE_OPTION:
2658 	{
2659 	  OnigOptionType options = reg->options;
2660 
2661 	  reg->options = NENCLOSE(node)->option;
2662 	  n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2663 	  reg->options = options;
2664 	}
2665 	break;
2666 
2667       case ENCLOSE_MEMORY:
2668       case ENCLOSE_STOP_BACKTRACK:
2669 	n = get_head_value_node(en->target, exact, reg);
2670 	break;
2671       }
2672     }
2673     break;
2674 
2675   case NT_ANCHOR:
2676     if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2677       n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2678     break;
2679 
2680   default:
2681     break;
2682   }
2683 
2684   return n;
2685 }
2686 
2687 static int
check_type_tree(Node * node,int type_mask,int enclose_mask,int anchor_mask)2688 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2689 {
2690   int type, r = 0;
2691 
2692   type = NTYPE(node);
2693   if ((NTYPE2BIT(type) & type_mask) == 0)
2694     return 1;
2695 
2696   switch (type) {
2697   case NT_LIST:
2698   case NT_ALT:
2699     do {
2700       r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2701 			  anchor_mask);
2702     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2703     break;
2704 
2705   case NT_QTFR:
2706     r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2707 			anchor_mask);
2708     break;
2709 
2710   case NT_ENCLOSE:
2711     {
2712       EncloseNode* en = NENCLOSE(node);
2713       if ((en->type & enclose_mask) == 0)
2714 	return 1;
2715 
2716       r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2717     }
2718     break;
2719 
2720   case NT_ANCHOR:
2721     type = NANCHOR(node)->type;
2722     if ((type & anchor_mask) == 0)
2723       return 1;
2724 
2725     if (NANCHOR(node)->target)
2726       r = check_type_tree(NANCHOR(node)->target,
2727 			  type_mask, enclose_mask, anchor_mask);
2728     break;
2729 
2730   default:
2731     break;
2732   }
2733   return r;
2734 }
2735 
2736 #ifdef USE_SUBEXP_CALL
2737 
2738 #define RECURSION_EXIST       1
2739 #define RECURSION_INFINITE    2
2740 
2741 static int
subexp_inf_recursive_check(Node * node,ScanEnv * env,int head)2742 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2743 {
2744   int type;
2745   int r = 0;
2746 
2747   type = NTYPE(node);
2748   switch (type) {
2749   case NT_LIST:
2750     {
2751       Node *x;
2752       OnigDistance min;
2753       int ret;
2754 
2755       x = node;
2756       do {
2757 	ret = subexp_inf_recursive_check(NCAR(x), env, head);
2758 	if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2759 	r |= ret;
2760 	if (head) {
2761 	  ret = get_min_match_length(NCAR(x), &min, env);
2762 	  if (ret != 0) return ret;
2763 	  if (min != 0) head = 0;
2764 	}
2765       } while (IS_NOT_NULL(x = NCDR(x)));
2766     }
2767     break;
2768 
2769   case NT_ALT:
2770     {
2771       int ret;
2772       r = RECURSION_EXIST;
2773       do {
2774 	ret = subexp_inf_recursive_check(NCAR(node), env, head);
2775 	if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2776 	r &= ret;
2777       } while (IS_NOT_NULL(node = NCDR(node)));
2778     }
2779     break;
2780 
2781   case NT_QTFR:
2782     r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2783     if (r == RECURSION_EXIST) {
2784       if (NQTFR(node)->lower == 0) r = 0;
2785     }
2786     break;
2787 
2788   case NT_ANCHOR:
2789     {
2790       AnchorNode* an = NANCHOR(node);
2791       switch (an->type) {
2792       case ANCHOR_PREC_READ:
2793       case ANCHOR_PREC_READ_NOT:
2794       case ANCHOR_LOOK_BEHIND:
2795       case ANCHOR_LOOK_BEHIND_NOT:
2796 	r = subexp_inf_recursive_check(an->target, env, head);
2797 	break;
2798       }
2799     }
2800     break;
2801 
2802   case NT_CALL:
2803     r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2804     break;
2805 
2806   case NT_ENCLOSE:
2807     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2808       return 0;
2809     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2810       return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2811     else {
2812       SET_ENCLOSE_STATUS(node, NST_MARK2);
2813       r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2814       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2815     }
2816     break;
2817 
2818   default:
2819     break;
2820   }
2821 
2822   return r;
2823 }
2824 
2825 static int
subexp_inf_recursive_check_trav(Node * node,ScanEnv * env)2826 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2827 {
2828   int type;
2829   int r = 0;
2830 
2831   type = NTYPE(node);
2832   switch (type) {
2833   case NT_LIST:
2834   case NT_ALT:
2835     do {
2836       r = subexp_inf_recursive_check_trav(NCAR(node), env);
2837     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2838     break;
2839 
2840   case NT_QTFR:
2841     r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2842     break;
2843 
2844   case NT_ANCHOR:
2845     {
2846       AnchorNode* an = NANCHOR(node);
2847       switch (an->type) {
2848       case ANCHOR_PREC_READ:
2849       case ANCHOR_PREC_READ_NOT:
2850       case ANCHOR_LOOK_BEHIND:
2851       case ANCHOR_LOOK_BEHIND_NOT:
2852 	r = subexp_inf_recursive_check_trav(an->target, env);
2853 	break;
2854       }
2855     }
2856     break;
2857 
2858   case NT_ENCLOSE:
2859     {
2860       EncloseNode* en = NENCLOSE(node);
2861 
2862       if (IS_ENCLOSE_RECURSION(en)) {
2863 	SET_ENCLOSE_STATUS(node, NST_MARK1);
2864 	r = subexp_inf_recursive_check(en->target, env, 1);
2865 	if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
2866 	CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2867       }
2868       r = subexp_inf_recursive_check_trav(en->target, env);
2869     }
2870 
2871     break;
2872 
2873   default:
2874     break;
2875   }
2876 
2877   return r;
2878 }
2879 
2880 static int
subexp_recursive_check(Node * node)2881 subexp_recursive_check(Node* node)
2882 {
2883   int r = 0;
2884 
2885   switch (NTYPE(node)) {
2886   case NT_LIST:
2887   case NT_ALT:
2888     do {
2889       r |= subexp_recursive_check(NCAR(node));
2890     } while (IS_NOT_NULL(node = NCDR(node)));
2891     break;
2892 
2893   case NT_QTFR:
2894     r = subexp_recursive_check(NQTFR(node)->target);
2895     break;
2896 
2897   case NT_ANCHOR:
2898     {
2899       AnchorNode* an = NANCHOR(node);
2900       switch (an->type) {
2901       case ANCHOR_PREC_READ:
2902       case ANCHOR_PREC_READ_NOT:
2903       case ANCHOR_LOOK_BEHIND:
2904       case ANCHOR_LOOK_BEHIND_NOT:
2905 	r = subexp_recursive_check(an->target);
2906 	break;
2907       }
2908     }
2909     break;
2910 
2911   case NT_CALL:
2912     r = subexp_recursive_check(NCALL(node)->target);
2913     if (r != 0) SET_CALL_RECURSION(node);
2914     break;
2915 
2916   case NT_ENCLOSE:
2917     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2918       return 0;
2919     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2920       return 1; /* recursion */
2921     else {
2922       SET_ENCLOSE_STATUS(node, NST_MARK2);
2923       r = subexp_recursive_check(NENCLOSE(node)->target);
2924       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2925     }
2926     break;
2927 
2928   default:
2929     break;
2930   }
2931 
2932   return r;
2933 }
2934 
2935 
2936 static int
subexp_recursive_check_trav(Node * node,ScanEnv * env)2937 subexp_recursive_check_trav(Node* node, ScanEnv* env)
2938 {
2939 #define FOUND_CALLED_NODE    1
2940 
2941   int type;
2942   int r = 0;
2943 
2944   type = NTYPE(node);
2945   switch (type) {
2946   case NT_LIST:
2947   case NT_ALT:
2948     {
2949       int ret;
2950       do {
2951 	ret = subexp_recursive_check_trav(NCAR(node), env);
2952 	if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
2953 	else if (ret < 0) return ret;
2954       } while (IS_NOT_NULL(node = NCDR(node)));
2955     }
2956     break;
2957 
2958   case NT_QTFR:
2959     r = subexp_recursive_check_trav(NQTFR(node)->target, env);
2960     if (NQTFR(node)->upper == 0) {
2961       if (r == FOUND_CALLED_NODE)
2962 	NQTFR(node)->is_refered = 1;
2963     }
2964     break;
2965 
2966   case NT_ANCHOR:
2967     {
2968       AnchorNode* an = NANCHOR(node);
2969       switch (an->type) {
2970       case ANCHOR_PREC_READ:
2971       case ANCHOR_PREC_READ_NOT:
2972       case ANCHOR_LOOK_BEHIND:
2973       case ANCHOR_LOOK_BEHIND_NOT:
2974 	r = subexp_recursive_check_trav(an->target, env);
2975 	break;
2976       }
2977     }
2978     break;
2979 
2980   case NT_ENCLOSE:
2981     {
2982       EncloseNode* en = NENCLOSE(node);
2983 
2984       if (! IS_ENCLOSE_RECURSION(en)) {
2985 	if (IS_ENCLOSE_CALLED(en)) {
2986 	  SET_ENCLOSE_STATUS(node, NST_MARK1);
2987 	  r = subexp_recursive_check(en->target);
2988 	  if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
2989 	  CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2990 	}
2991       }
2992       r = subexp_recursive_check_trav(en->target, env);
2993       if (IS_ENCLOSE_CALLED(en))
2994 	r |= FOUND_CALLED_NODE;
2995     }
2996     break;
2997 
2998   default:
2999     break;
3000   }
3001 
3002   return r;
3003 }
3004 
3005 static int
setup_subexp_call(Node * node,ScanEnv * env)3006 setup_subexp_call(Node* node, ScanEnv* env)
3007 {
3008   int type;
3009   int r = 0;
3010 
3011   type = NTYPE(node);
3012   switch (type) {
3013   case NT_LIST:
3014     do {
3015       r = setup_subexp_call(NCAR(node), env);
3016     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3017     break;
3018 
3019   case NT_ALT:
3020     do {
3021       r = setup_subexp_call(NCAR(node), env);
3022     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3023     break;
3024 
3025   case NT_QTFR:
3026     r = setup_subexp_call(NQTFR(node)->target, env);
3027     break;
3028   case NT_ENCLOSE:
3029     r = setup_subexp_call(NENCLOSE(node)->target, env);
3030     break;
3031 
3032   case NT_CALL:
3033     {
3034       CallNode* cn = NCALL(node);
3035       Node** nodes = SCANENV_MEM_NODES(env);
3036 
3037       if (cn->group_num != 0) {
3038 	int gnum = cn->group_num;
3039 
3040 #ifdef USE_NAMED_GROUP
3041 	if (env->num_named > 0 &&
3042 	    IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3043 	    !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3044 	  return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3045 	}
3046 #endif
3047 	if (gnum > env->num_mem) {
3048 	  onig_scan_env_set_error_string(env,
3049 		 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3050 	  return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3051 	}
3052 
3053 #ifdef USE_NAMED_GROUP
3054       set_call_attr:
3055 #endif
3056 	cn->target = nodes[cn->group_num];
3057 	if (IS_NULL(cn->target)) {
3058 	  onig_scan_env_set_error_string(env,
3059 		 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3060 	  return ONIGERR_UNDEFINED_NAME_REFERENCE;
3061 	}
3062 	SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3063 	BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3064 	cn->unset_addr_list = env->unset_addr_list;
3065       }
3066 #ifdef USE_NAMED_GROUP
3067       else {
3068 	int *refs;
3069 
3070 	int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3071 					   &refs);
3072 	if (n <= 0) {
3073 	  onig_scan_env_set_error_string(env,
3074 		 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3075 	  return ONIGERR_UNDEFINED_NAME_REFERENCE;
3076 	}
3077 	else if (n > 1) {
3078 	  onig_scan_env_set_error_string(env,
3079 	    ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3080 	  return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3081 	}
3082 	else {
3083 	  cn->group_num = refs[0];
3084 	  goto set_call_attr;
3085 	}
3086       }
3087 #endif
3088     }
3089     break;
3090 
3091   case NT_ANCHOR:
3092     {
3093       AnchorNode* an = NANCHOR(node);
3094 
3095       switch (an->type) {
3096       case ANCHOR_PREC_READ:
3097       case ANCHOR_PREC_READ_NOT:
3098       case ANCHOR_LOOK_BEHIND:
3099       case ANCHOR_LOOK_BEHIND_NOT:
3100 	r = setup_subexp_call(an->target, env);
3101 	break;
3102       }
3103     }
3104     break;
3105 
3106   default:
3107     break;
3108   }
3109 
3110   return r;
3111 }
3112 #endif
3113 
3114 /* divide different length alternatives in look-behind.
3115   (?<=A|B) ==> (?<=A)|(?<=B)
3116   (?<!A|B) ==> (?<!A)(?<!B)
3117 */
3118 static int
divide_look_behind_alternatives(Node * node)3119 divide_look_behind_alternatives(Node* node)
3120 {
3121   Node *head, *np, *insert_node;
3122   AnchorNode* an = NANCHOR(node);
3123   int anc_type = an->type;
3124 
3125   head = an->target;
3126   np = NCAR(head);
3127   swap_node(node, head);
3128   NCAR(node) = head;
3129   NANCHOR(head)->target = np;
3130 
3131   np = node;
3132   while ((np = NCDR(np)) != NULL_NODE) {
3133     insert_node = onig_node_new_anchor(anc_type);
3134     CHECK_NULL_RETURN_MEMERR(insert_node);
3135     NANCHOR(insert_node)->target = NCAR(np);
3136     NCAR(np) = insert_node;
3137   }
3138 
3139   if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3140     np = node;
3141     do {
3142       SET_NTYPE(np, NT_LIST);  /* alt -> list */
3143     } while ((np = NCDR(np)) != NULL_NODE);
3144   }
3145   return 0;
3146 }
3147 
3148 static int
setup_look_behind(Node * node,regex_t * reg,ScanEnv * env)3149 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3150 {
3151   int r, len;
3152   AnchorNode* an = NANCHOR(node);
3153 
3154   r = get_char_length_tree(an->target, reg, &len);
3155   if (r == 0)
3156     an->char_len = len;
3157   else if (r == GET_CHAR_LEN_VARLEN)
3158     r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3159   else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3160     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3161       r = divide_look_behind_alternatives(node);
3162     else
3163       r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3164   }
3165 
3166   return r;
3167 }
3168 
3169 static int
next_setup(Node * node,Node * next_node,regex_t * reg)3170 next_setup(Node* node, Node* next_node, regex_t* reg)
3171 {
3172   int type;
3173 
3174  retry:
3175   type = NTYPE(node);
3176   if (type == NT_QTFR) {
3177     QtfrNode* qn = NQTFR(node);
3178     if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3179 #ifdef USE_QTFR_PEEK_NEXT
3180       Node* n = get_head_value_node(next_node, 1, reg);
3181       /* '\0': for UTF-16BE etc... */
3182       if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3183 	qn->next_head_exact = n;
3184       }
3185 #endif
3186       /* automatic posseivation a*b ==> (?>a*)b */
3187       if (qn->lower <= 1) {
3188 	int ttype = NTYPE(qn->target);
3189 	if (IS_NODE_TYPE_SIMPLE(ttype)) {
3190 	  Node *x, *y;
3191 	  x = get_head_value_node(qn->target, 0, reg);
3192 	  if (IS_NOT_NULL(x)) {
3193 	    y = get_head_value_node(next_node,  0, reg);
3194 	    if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3195 	      Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3196 	      CHECK_NULL_RETURN_MEMERR(en);
3197 	      SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3198 	      swap_node(node, en);
3199 	      NENCLOSE(node)->target = en;
3200 	    }
3201 	  }
3202 	}
3203       }
3204     }
3205   }
3206   else if (type == NT_ENCLOSE) {
3207     EncloseNode* en = NENCLOSE(node);
3208     if (en->type == ENCLOSE_MEMORY) {
3209       node = en->target;
3210       goto retry;
3211     }
3212   }
3213   return 0;
3214 }
3215 
3216 
3217 static int
update_string_node_case_fold(regex_t * reg,Node * node)3218 update_string_node_case_fold(regex_t* reg, Node *node)
3219 {
3220   UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3221   UChar *sbuf, *ebuf, *sp;
3222   int r, i, len, sbuf_size;
3223   StrNode* sn = NSTR(node);
3224 
3225   end = sn->end;
3226   sbuf_size = (end - sn->s) * 2;
3227   sbuf = (UChar* )xmalloc(sbuf_size);
3228   CHECK_NULL_RETURN_MEMERR(sbuf);
3229   ebuf = sbuf + sbuf_size;
3230 
3231   sp = sbuf;
3232   p = sn->s;
3233   while (p < end) {
3234     len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3235     q = buf;
3236     for (i = 0; i < len; i++) {
3237       if (sp >= ebuf) {
3238 	sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3239 	CHECK_NULL_RETURN_MEMERR(sbuf);
3240 	sp = sbuf + sbuf_size;
3241 	sbuf_size *= 2;
3242 	ebuf = sbuf + sbuf_size;
3243       }
3244 
3245       *sp++ = buf[i];
3246     }
3247   }
3248 
3249   r = onig_node_str_set(node, sbuf, sp);
3250   if (r != 0) {
3251     xfree(sbuf);
3252     return r;
3253   }
3254 
3255   xfree(sbuf);
3256   return 0;
3257 }
3258 
3259 static int
expand_case_fold_make_rem_string(Node ** rnode,UChar * s,UChar * end,regex_t * reg)3260 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3261 				 regex_t* reg)
3262 {
3263   int r;
3264   Node *node;
3265 
3266   node = onig_node_new_str(s, end);
3267   if (IS_NULL(node)) return ONIGERR_MEMORY;
3268 
3269   r = update_string_node_case_fold(reg, node);
3270   if (r != 0) {
3271     onig_node_free(node);
3272     return r;
3273   }
3274 
3275   NSTRING_SET_AMBIG(node);
3276   NSTRING_SET_DONT_GET_OPT_INFO(node);
3277   *rnode = node;
3278   return 0;
3279 }
3280 
3281 static int
expand_case_fold_string_alt(int item_num,OnigCaseFoldCodeItem items[],UChar * p,int slen,UChar * end,regex_t * reg,Node ** rnode)3282 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3283 			    UChar *p, int slen, UChar *end,
3284 			    regex_t* reg, Node **rnode)
3285 {
3286   int r, i, j, len, varlen;
3287   Node *anode, *var_anode, *snode, *xnode, *an;
3288   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3289 
3290   *rnode = var_anode = NULL_NODE;
3291 
3292   varlen = 0;
3293   for (i = 0; i < item_num; i++) {
3294     if (items[i].byte_len != slen) {
3295       varlen = 1;
3296       break;
3297     }
3298   }
3299 
3300   if (varlen != 0) {
3301     *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3302     if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3303 
3304     xnode = onig_node_new_list(NULL, NULL);
3305     if (IS_NULL(xnode)) goto mem_err;
3306     NCAR(var_anode) = xnode;
3307 
3308     anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3309     if (IS_NULL(anode)) goto mem_err;
3310     NCAR(xnode) = anode;
3311   }
3312   else {
3313     *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3314     if (IS_NULL(anode)) return ONIGERR_MEMORY;
3315   }
3316 
3317   snode = onig_node_new_str(p, p + slen);
3318   if (IS_NULL(snode)) goto mem_err;
3319 
3320   NCAR(anode) = snode;
3321 
3322   for (i = 0; i < item_num; i++) {
3323     snode = onig_node_new_str(NULL, NULL);
3324     if (IS_NULL(snode)) goto mem_err;
3325 
3326     for (j = 0; j < items[i].code_len; j++) {
3327       len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3328       if (len < 0) {
3329 	r = len;
3330 	goto mem_err2;
3331       }
3332 
3333       r = onig_node_str_cat(snode, buf, buf + len);
3334       if (r != 0) goto mem_err2;
3335     }
3336 
3337     an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3338     if (IS_NULL(an)) {
3339       goto mem_err2;
3340     }
3341 
3342     if (items[i].byte_len != slen) {
3343       Node *rem;
3344       UChar *q = p + items[i].byte_len;
3345 
3346       if (q < end) {
3347 	r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3348 	if (r != 0) {
3349 	  onig_node_free(an);
3350 	  goto mem_err2;
3351 	}
3352 
3353 	xnode = onig_node_list_add(NULL_NODE, snode);
3354 	if (IS_NULL(xnode)) {
3355 	  onig_node_free(an);
3356 	  onig_node_free(rem);
3357 	  goto mem_err2;
3358 	}
3359 	if (IS_NULL(onig_node_list_add(xnode, rem))) {
3360 	  onig_node_free(an);
3361 	  onig_node_free(xnode);
3362 	  onig_node_free(rem);
3363 	  goto mem_err;
3364 	}
3365 
3366 	NCAR(an) = xnode;
3367       }
3368       else {
3369 	NCAR(an) = snode;
3370       }
3371 
3372       NCDR(var_anode) = an;
3373       var_anode = an;
3374     }
3375     else {
3376       NCAR(an)     = snode;
3377       NCDR(anode) = an;
3378       anode = an;
3379     }
3380   }
3381 
3382   return varlen;
3383 
3384  mem_err2:
3385   onig_node_free(snode);
3386 
3387  mem_err:
3388   onig_node_free(*rnode);
3389 
3390   return ONIGERR_MEMORY;
3391 }
3392 
3393 static int
expand_case_fold_string(Node * node,regex_t * reg)3394 expand_case_fold_string(Node* node, regex_t* reg)
3395 {
3396 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
3397 
3398   int r, n, len, alt_num;
3399   UChar *start, *end, *p;
3400   Node *top_root, *root, *snode, *prev_node;
3401   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3402   StrNode* sn = NSTR(node);
3403 
3404   if (NSTRING_IS_AMBIG(node)) return 0;
3405 
3406   start = sn->s;
3407   end   = sn->end;
3408   if (start >= end) return 0;
3409 
3410   r = 0;
3411   top_root = root = prev_node = snode = NULL_NODE;
3412   alt_num = 1;
3413   p = start;
3414   while (p < end) {
3415     n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3416 					   p, end, items);
3417     if (n < 0) {
3418       r = n;
3419       goto err;
3420     }
3421 
3422     len = enclen(reg->enc, p);
3423 
3424     if (n == 0) {
3425       if (IS_NULL(snode)) {
3426 	if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3427 	  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3428 	  if (IS_NULL(root)) {
3429 	    onig_node_free(prev_node);
3430 	    goto mem_err;
3431 	  }
3432 	}
3433 
3434 	prev_node = snode = onig_node_new_str(NULL, NULL);
3435 	if (IS_NULL(snode)) goto mem_err;
3436 	if (IS_NOT_NULL(root)) {
3437 	  if (IS_NULL(onig_node_list_add(root, snode))) {
3438 	    onig_node_free(snode);
3439 	    goto mem_err;
3440 	  }
3441 	}
3442       }
3443 
3444       r = onig_node_str_cat(snode, p, p + len);
3445       if (r != 0) goto err;
3446     }
3447     else {
3448       alt_num *= (n + 1);
3449       if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3450 
3451       if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3452 	top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3453 	if (IS_NULL(root)) {
3454 	  onig_node_free(prev_node);
3455 	  goto mem_err;
3456 	}
3457       }
3458 
3459       r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3460       if (r < 0) goto mem_err;
3461       if (r == 1) {
3462 	if (IS_NULL(root)) {
3463 	  top_root = prev_node;
3464 	}
3465 	else {
3466 	  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3467 	    onig_node_free(prev_node);
3468 	    goto mem_err;
3469 	  }
3470 	}
3471 
3472 	root = NCAR(prev_node);
3473       }
3474       else { /* r == 0 */
3475 	if (IS_NOT_NULL(root)) {
3476 	  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3477 	    onig_node_free(prev_node);
3478 	    goto mem_err;
3479 	  }
3480 	}
3481       }
3482 
3483       snode = NULL_NODE;
3484     }
3485 
3486     p += len;
3487   }
3488 
3489   if (p < end) {
3490     Node *srem;
3491 
3492     r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3493     if (r != 0) goto mem_err;
3494 
3495     if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3496       top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3497       if (IS_NULL(root)) {
3498 	onig_node_free(srem);
3499 	onig_node_free(prev_node);
3500 	goto mem_err;
3501       }
3502     }
3503 
3504     if (IS_NULL(root)) {
3505       prev_node = srem;
3506     }
3507     else {
3508       if (IS_NULL(onig_node_list_add(root, srem))) {
3509 	onig_node_free(srem);
3510 	goto mem_err;
3511       }
3512     }
3513   }
3514 
3515   /* ending */
3516   top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3517   swap_node(node, top_root);
3518   onig_node_free(top_root);
3519   return 0;
3520 
3521  mem_err:
3522   r = ONIGERR_MEMORY;
3523 
3524  err:
3525   onig_node_free(top_root);
3526   return r;
3527 }
3528 
3529 
3530 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3531 
3532 #define CEC_THRES_NUM_BIG_REPEAT         512
3533 #define CEC_INFINITE_NUM          0x7fffffff
3534 
3535 #define CEC_IN_INFINITE_REPEAT    (1<<0)
3536 #define CEC_IN_FINITE_REPEAT      (1<<1)
3537 #define CEC_CONT_BIG_REPEAT       (1<<2)
3538 
3539 static int
setup_comb_exp_check(Node * node,int state,ScanEnv * env)3540 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3541 {
3542   int type;
3543   int r = state;
3544 
3545   type = NTYPE(node);
3546   switch (type) {
3547   case NT_LIST:
3548     {
3549       Node* prev = NULL_NODE;
3550       do {
3551 	r = setup_comb_exp_check(NCAR(node), r, env);
3552 	prev = NCAR(node);
3553       } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3554     }
3555     break;
3556 
3557   case NT_ALT:
3558     {
3559       int ret;
3560       do {
3561 	ret = setup_comb_exp_check(NCAR(node), state, env);
3562 	r |= ret;
3563       } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3564     }
3565     break;
3566 
3567   case NT_QTFR:
3568     {
3569       int child_state = state;
3570       int add_state = 0;
3571       QtfrNode* qn = NQTFR(node);
3572       Node* target = qn->target;
3573       int var_num;
3574 
3575       if (! IS_REPEAT_INFINITE(qn->upper)) {
3576 	if (qn->upper > 1) {
3577 	  /* {0,1}, {1,1} are allowed */
3578 	  child_state |= CEC_IN_FINITE_REPEAT;
3579 
3580 	  /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3581 	  if (env->backrefed_mem == 0) {
3582 	    if (NTYPE(qn->target) == NT_ENCLOSE) {
3583 	      EncloseNode* en = NENCLOSE(qn->target);
3584 	      if (en->type == ENCLOSE_MEMORY) {
3585 		if (NTYPE(en->target) == NT_QTFR) {
3586 		  QtfrNode* q = NQTFR(en->target);
3587 		  if (IS_REPEAT_INFINITE(q->upper)
3588 		      && q->greedy == qn->greedy) {
3589 		    qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3590 		    if (qn->upper == 1)
3591 		      child_state = state;
3592 		  }
3593 		}
3594 	      }
3595 	    }
3596 	  }
3597 	}
3598       }
3599 
3600       if (state & CEC_IN_FINITE_REPEAT) {
3601 	qn->comb_exp_check_num = -1;
3602       }
3603       else {
3604 	if (IS_REPEAT_INFINITE(qn->upper)) {
3605 	  var_num = CEC_INFINITE_NUM;
3606 	  child_state |= CEC_IN_INFINITE_REPEAT;
3607 	}
3608 	else {
3609 	  var_num = qn->upper - qn->lower;
3610 	}
3611 
3612 	if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3613 	  add_state |= CEC_CONT_BIG_REPEAT;
3614 
3615 	if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3616 	    ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3617 	     var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3618 	  if (qn->comb_exp_check_num == 0) {
3619 	    env->num_comb_exp_check++;
3620 	    qn->comb_exp_check_num = env->num_comb_exp_check;
3621 	    if (env->curr_max_regnum > env->comb_exp_max_regnum)
3622 	      env->comb_exp_max_regnum = env->curr_max_regnum;
3623 	  }
3624 	}
3625       }
3626 
3627       r = setup_comb_exp_check(target, child_state, env);
3628       r |= add_state;
3629     }
3630     break;
3631 
3632   case NT_ENCLOSE:
3633     {
3634       EncloseNode* en = NENCLOSE(node);
3635 
3636       switch (en->type) {
3637       case ENCLOSE_MEMORY:
3638 	{
3639 	  if (env->curr_max_regnum < en->regnum)
3640 	    env->curr_max_regnum = en->regnum;
3641 
3642 	  r = setup_comb_exp_check(en->target, state, env);
3643 	}
3644 	break;
3645 
3646       default:
3647 	r = setup_comb_exp_check(en->target, state, env);
3648 	break;
3649       }
3650     }
3651     break;
3652 
3653 #ifdef USE_SUBEXP_CALL
3654   case NT_CALL:
3655     if (IS_CALL_RECURSION(NCALL(node)))
3656       env->has_recursion = 1;
3657     else
3658       r = setup_comb_exp_check(NCALL(node)->target, state, env);
3659     break;
3660 #endif
3661 
3662   default:
3663     break;
3664   }
3665 
3666   return r;
3667 }
3668 #endif
3669 
3670 #define IN_ALT        (1<<0)
3671 #define IN_NOT        (1<<1)
3672 #define IN_REPEAT     (1<<2)
3673 #define IN_VAR_REPEAT (1<<3)
3674 
3675 /* setup_tree does the following work.
3676  1. check empty loop. (set qn->target_empty_info)
3677  2. expand ignore-case in char class.
3678  3. set memory status bit flags. (reg->mem_stats)
3679  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3680  5. find invalid patterns in look-behind.
3681  6. expand repeated string.
3682  */
3683 static int
setup_tree(Node * node,regex_t * reg,int state,ScanEnv * env)3684 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3685 {
3686   int type;
3687   int r = 0;
3688 
3689   type = NTYPE(node);
3690   switch (type) {
3691   case NT_LIST:
3692     {
3693       Node* prev = NULL_NODE;
3694       do {
3695 	r = setup_tree(NCAR(node), reg, state, env);
3696 	if (IS_NOT_NULL(prev) && r == 0) {
3697 	  r = next_setup(prev, NCAR(node), reg);
3698 	}
3699 	prev = NCAR(node);
3700       } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3701     }
3702     break;
3703 
3704   case NT_ALT:
3705     do {
3706       r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3707     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3708     break;
3709 
3710   case NT_CCLASS:
3711     break;
3712 
3713   case NT_STR:
3714     if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3715       r = expand_case_fold_string(node, reg);
3716     }
3717     break;
3718 
3719   case NT_CTYPE:
3720   case NT_CANY:
3721     break;
3722 
3723 #ifdef USE_SUBEXP_CALL
3724   case NT_CALL:
3725     break;
3726 #endif
3727 
3728   case NT_BREF:
3729     {
3730       int i;
3731       int* p;
3732       Node** nodes = SCANENV_MEM_NODES(env);
3733       BRefNode* br = NBREF(node);
3734       p = BACKREFS_P(br);
3735       for (i = 0; i < br->back_num; i++) {
3736 	if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
3737 	BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3738 	BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3739 #ifdef USE_BACKREF_WITH_LEVEL
3740 	if (IS_BACKREF_NEST_LEVEL(br)) {
3741 	  BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3742 	}
3743 #endif
3744 	SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3745       }
3746     }
3747     break;
3748 
3749   case NT_QTFR:
3750     {
3751       OnigDistance d;
3752       QtfrNode* qn = NQTFR(node);
3753       Node* target = qn->target;
3754 
3755       if ((state & IN_REPEAT) != 0) {
3756         qn->state |= NST_IN_REPEAT;
3757       }
3758 
3759       if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3760 	r = get_min_match_length(target, &d, env);
3761 	if (r) break;
3762 	if (d == 0) {
3763 	  qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3764 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3765 	  r = quantifiers_memory_node_info(target);
3766 	  if (r < 0) break;
3767 	  if (r > 0) {
3768 	    qn->target_empty_info = r;
3769 	  }
3770 #endif
3771 #if 0
3772 	  r = get_max_match_length(target, &d, env);
3773 	  if (r == 0 && d == 0) {
3774 	    /*  ()* ==> ()?, ()+ ==> ()  */
3775 	    qn->upper = 1;
3776 	    if (qn->lower > 1) qn->lower = 1;
3777 	    if (NTYPE(target) == NT_STR) {
3778 	      qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
3779 	    }
3780 	  }
3781 #endif
3782 	}
3783       }
3784 
3785       state |= IN_REPEAT;
3786       if (qn->lower != qn->upper)
3787 	state |= IN_VAR_REPEAT;
3788       r = setup_tree(target, reg, state, env);
3789       if (r) break;
3790 
3791       /* expand string */
3792 #define EXPAND_STRING_MAX_LENGTH  100
3793       if (NTYPE(target) == NT_STR) {
3794 	if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
3795 	    qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3796 	  int len = NSTRING_LEN(target);
3797 	  StrNode* sn = NSTR(target);
3798 
3799 	  if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3800 	    int i, n = qn->lower;
3801 	    onig_node_conv_to_str_node(node, NSTR(target)->flag);
3802 	    for (i = 0; i < n; i++) {
3803 	      r = onig_node_str_cat(node, sn->s, sn->end);
3804 	      if (r) break;
3805 	    }
3806 	    onig_node_free(target);
3807 	    break; /* break case NT_QTFR: */
3808 	  }
3809 	}
3810       }
3811 
3812 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3813       if (qn->greedy && (qn->target_empty_info != 0)) {
3814 	if (NTYPE(target) == NT_QTFR) {
3815 	  QtfrNode* tqn = NQTFR(target);
3816 	  if (IS_NOT_NULL(tqn->head_exact)) {
3817 	    qn->head_exact  = tqn->head_exact;
3818 	    tqn->head_exact = NULL;
3819 	  }
3820 	}
3821 	else {
3822 	  qn->head_exact = get_head_value_node(qn->target, 1, reg);
3823 	}
3824       }
3825 #endif
3826     }
3827     break;
3828 
3829   case NT_ENCLOSE:
3830     {
3831       EncloseNode* en = NENCLOSE(node);
3832 
3833       switch (en->type) {
3834       case ENCLOSE_OPTION:
3835 	{
3836 	  OnigOptionType options = reg->options;
3837 	  reg->options = NENCLOSE(node)->option;
3838 	  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
3839 	  reg->options = options;
3840 	}
3841 	break;
3842 
3843       case ENCLOSE_MEMORY:
3844 	if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
3845 	  BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
3846 	  /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
3847 	}
3848         r = setup_tree(en->target, reg, state, env);
3849         break;
3850 
3851       case ENCLOSE_STOP_BACKTRACK:
3852 	{
3853 	  Node* target = en->target;
3854 	  r = setup_tree(target, reg, state, env);
3855 	  if (NTYPE(target) == NT_QTFR) {
3856 	    QtfrNode* tqn = NQTFR(target);
3857 	    if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
3858 		tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
3859 	      int qtype = NTYPE(tqn->target);
3860 	      if (IS_NODE_TYPE_SIMPLE(qtype))
3861 		SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
3862 	    }
3863 	  }
3864 	}
3865 	break;
3866       }
3867     }
3868     break;
3869 
3870   case NT_ANCHOR:
3871     {
3872       AnchorNode* an = NANCHOR(node);
3873 
3874       switch (an->type) {
3875       case ANCHOR_PREC_READ:
3876 	r = setup_tree(an->target, reg, state, env);
3877 	break;
3878       case ANCHOR_PREC_READ_NOT:
3879 	r = setup_tree(an->target, reg, (state | IN_NOT), env);
3880 	break;
3881 
3882 /* allowed node types in look-behind */
3883 #define ALLOWED_TYPE_IN_LB  \
3884   ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
3885     BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
3886 
3887 #define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY )
3888 #define ALLOWED_ENCLOSE_IN_LB_NOT   0
3889 
3890 #define ALLOWED_ANCHOR_IN_LB \
3891 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3892 #define ALLOWED_ANCHOR_IN_LB_NOT \
3893 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3894 
3895       case ANCHOR_LOOK_BEHIND:
3896 	{
3897 	  r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
3898 			      ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
3899 	  if (r < 0) return r;
3900 	  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3901 	  r = setup_look_behind(node, reg, env);
3902 	  if (r != 0) return r;
3903 	  r = setup_tree(an->target, reg, state, env);
3904 	}
3905 	break;
3906 
3907       case ANCHOR_LOOK_BEHIND_NOT:
3908 	{
3909 	  r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
3910 		      ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
3911 	  if (r < 0) return r;
3912 	  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3913 	  r = setup_look_behind(node, reg, env);
3914 	  if (r != 0) return r;
3915 	  r = setup_tree(an->target, reg, (state | IN_NOT), env);
3916 	}
3917 	break;
3918       }
3919     }
3920     break;
3921 
3922   default:
3923     break;
3924   }
3925 
3926   return r;
3927 }
3928 
3929 /* set skip map for Boyer-Moor search */
3930 static int
set_bm_skip(UChar * s,UChar * end,OnigEncoding enc ARG_UNUSED,UChar skip[],int ** int_skip)3931 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
3932 	    UChar skip[], int** int_skip)
3933 {
3934   int i, len;
3935 
3936   len = end - s;
3937   if (len < ONIG_CHAR_TABLE_SIZE) {
3938     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
3939 
3940     for (i = 0; i < len - 1; i++)
3941       skip[s[i]] = len - 1 - i;
3942   }
3943   else {
3944     if (IS_NULL(*int_skip)) {
3945       *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3946       if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
3947     }
3948     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
3949 
3950     for (i = 0; i < len - 1; i++)
3951       (*int_skip)[s[i]] = len - 1 - i;
3952   }
3953   return 0;
3954 }
3955 
3956 #define OPT_EXACT_MAXLEN   24
3957 
3958 typedef struct {
3959   OnigDistance min;  /* min byte length */
3960   OnigDistance max;  /* max byte length */
3961 } MinMaxLen;
3962 
3963 typedef struct {
3964   MinMaxLen        mmd;
3965   OnigEncoding     enc;
3966   OnigOptionType   options;
3967   OnigCaseFoldType case_fold_flag;
3968   ScanEnv*         scan_env;
3969 } OptEnv;
3970 
3971 typedef struct {
3972   int left_anchor;
3973   int right_anchor;
3974 } OptAncInfo;
3975 
3976 typedef struct {
3977   MinMaxLen  mmd; /* info position */
3978   OptAncInfo anc;
3979 
3980   int   reach_end;
3981   int   ignore_case;
3982   int   len;
3983   UChar s[OPT_EXACT_MAXLEN];
3984 } OptExactInfo;
3985 
3986 typedef struct {
3987   MinMaxLen mmd; /* info position */
3988   OptAncInfo anc;
3989 
3990   int   value;      /* weighted value */
3991   UChar map[ONIG_CHAR_TABLE_SIZE];
3992 } OptMapInfo;
3993 
3994 typedef struct {
3995   MinMaxLen    len;
3996 
3997   OptAncInfo   anc;
3998   OptExactInfo exb;    /* boundary */
3999   OptExactInfo exm;    /* middle */
4000   OptExactInfo expr;   /* prec read (?=...) */
4001 
4002   OptMapInfo   map;   /* boundary */
4003 } NodeOptInfo;
4004 
4005 
4006 static int
map_position_value(OnigEncoding enc,int i)4007 map_position_value(OnigEncoding enc, int i)
4008 {
4009   static const short int ByteValTable[] = {
4010      5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
4011      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
4012     12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
4013      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
4014      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4015      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
4016      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4017      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
4018   };
4019 
4020   if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
4021     if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4022       return 20;
4023     else
4024       return (int )ByteValTable[i];
4025   }
4026   else
4027     return 4;   /* Take it easy. */
4028 }
4029 
4030 static int
distance_value(MinMaxLen * mm)4031 distance_value(MinMaxLen* mm)
4032 {
4033   /* 1000 / (min-max-dist + 1) */
4034   static const short int dist_vals[] = {
4035     1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
4036       91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
4037       48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
4038       32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
4039       24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
4040       20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
4041       16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
4042       14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
4043       12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
4044       11,   11,   11,   11,   11,   10,   10,   10,   10,   10
4045   };
4046 
4047   int d;
4048 
4049   if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4050 
4051   d = mm->max - mm->min;
4052   if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0])))
4053     /* return dist_vals[d] * 16 / (mm->min + 12); */
4054     return (int )dist_vals[d];
4055   else
4056     return 1;
4057 }
4058 
4059 static int
comp_distance_value(MinMaxLen * d1,MinMaxLen * d2,int v1,int v2)4060 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4061 {
4062   if (v2 <= 0) return -1;
4063   if (v1 <= 0) return  1;
4064 
4065   v1 *= distance_value(d1);
4066   v2 *= distance_value(d2);
4067 
4068   if (v2 > v1) return  1;
4069   if (v2 < v1) return -1;
4070 
4071   if (d2->min < d1->min) return  1;
4072   if (d2->min > d1->min) return -1;
4073   return 0;
4074 }
4075 
4076 static int
is_equal_mml(MinMaxLen * a,MinMaxLen * b)4077 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4078 {
4079   return (a->min == b->min && a->max == b->max) ? 1 : 0;
4080 }
4081 
4082 
4083 static void
set_mml(MinMaxLen * mml,OnigDistance min,OnigDistance max)4084 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4085 {
4086   mml->min = min;
4087   mml->max = max;
4088 }
4089 
4090 static void
clear_mml(MinMaxLen * mml)4091 clear_mml(MinMaxLen* mml)
4092 {
4093   mml->min = mml->max = 0;
4094 }
4095 
4096 static void
copy_mml(MinMaxLen * to,MinMaxLen * from)4097 copy_mml(MinMaxLen* to, MinMaxLen* from)
4098 {
4099   to->min = from->min;
4100   to->max = from->max;
4101 }
4102 
4103 static void
add_mml(MinMaxLen * to,MinMaxLen * from)4104 add_mml(MinMaxLen* to, MinMaxLen* from)
4105 {
4106   to->min = distance_add(to->min, from->min);
4107   to->max = distance_add(to->max, from->max);
4108 }
4109 
4110 #if 0
4111 static void
4112 add_len_mml(MinMaxLen* to, OnigDistance len)
4113 {
4114   to->min = distance_add(to->min, len);
4115   to->max = distance_add(to->max, len);
4116 }
4117 #endif
4118 
4119 static void
alt_merge_mml(MinMaxLen * to,MinMaxLen * from)4120 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4121 {
4122   if (to->min > from->min) to->min = from->min;
4123   if (to->max < from->max) to->max = from->max;
4124 }
4125 
4126 static void
copy_opt_env(OptEnv * to,OptEnv * from)4127 copy_opt_env(OptEnv* to, OptEnv* from)
4128 {
4129   *to = *from;
4130 }
4131 
4132 static void
clear_opt_anc_info(OptAncInfo * anc)4133 clear_opt_anc_info(OptAncInfo* anc)
4134 {
4135   anc->left_anchor  = 0;
4136   anc->right_anchor = 0;
4137 }
4138 
4139 static void
copy_opt_anc_info(OptAncInfo * to,OptAncInfo * from)4140 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4141 {
4142   *to = *from;
4143 }
4144 
4145 static void
concat_opt_anc_info(OptAncInfo * to,OptAncInfo * left,OptAncInfo * right,OnigDistance left_len,OnigDistance right_len)4146 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4147 		    OnigDistance left_len, OnigDistance right_len)
4148 {
4149   clear_opt_anc_info(to);
4150 
4151   to->left_anchor = left->left_anchor;
4152   if (left_len == 0) {
4153     to->left_anchor |= right->left_anchor;
4154   }
4155 
4156   to->right_anchor = right->right_anchor;
4157   if (right_len == 0) {
4158     to->right_anchor |= left->right_anchor;
4159   }
4160 }
4161 
4162 static int
is_left_anchor(int anc)4163 is_left_anchor(int anc)
4164 {
4165   if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4166       anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4167       anc == ANCHOR_PREC_READ_NOT)
4168     return 0;
4169 
4170   return 1;
4171 }
4172 
4173 static int
is_set_opt_anc_info(OptAncInfo * to,int anc)4174 is_set_opt_anc_info(OptAncInfo* to, int anc)
4175 {
4176   if ((to->left_anchor & anc) != 0) return 1;
4177 
4178   return ((to->right_anchor & anc) != 0 ? 1 : 0);
4179 }
4180 
4181 static void
add_opt_anc_info(OptAncInfo * to,int anc)4182 add_opt_anc_info(OptAncInfo* to, int anc)
4183 {
4184   if (is_left_anchor(anc))
4185     to->left_anchor |= anc;
4186   else
4187     to->right_anchor |= anc;
4188 }
4189 
4190 static void
remove_opt_anc_info(OptAncInfo * to,int anc)4191 remove_opt_anc_info(OptAncInfo* to, int anc)
4192 {
4193   if (is_left_anchor(anc))
4194     to->left_anchor &= ~anc;
4195   else
4196     to->right_anchor &= ~anc;
4197 }
4198 
4199 static void
alt_merge_opt_anc_info(OptAncInfo * to,OptAncInfo * add)4200 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4201 {
4202   to->left_anchor  &= add->left_anchor;
4203   to->right_anchor &= add->right_anchor;
4204 }
4205 
4206 static int
is_full_opt_exact_info(OptExactInfo * ex)4207 is_full_opt_exact_info(OptExactInfo* ex)
4208 {
4209   return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4210 }
4211 
4212 static void
clear_opt_exact_info(OptExactInfo * ex)4213 clear_opt_exact_info(OptExactInfo* ex)
4214 {
4215   clear_mml(&ex->mmd);
4216   clear_opt_anc_info(&ex->anc);
4217   ex->reach_end   = 0;
4218   ex->ignore_case = 0;
4219   ex->len         = 0;
4220   ex->s[0]        = '\0';
4221 }
4222 
4223 static void
copy_opt_exact_info(OptExactInfo * to,OptExactInfo * from)4224 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4225 {
4226   *to = *from;
4227 }
4228 
4229 static void
concat_opt_exact_info(OptExactInfo * to,OptExactInfo * add,OnigEncoding enc)4230 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4231 {
4232   int i, j, len;
4233   UChar *p, *end;
4234   OptAncInfo tanc;
4235 
4236   if (! to->ignore_case && add->ignore_case) {
4237     if (to->len >= add->len) return ;  /* avoid */
4238 
4239     to->ignore_case = 1;
4240   }
4241 
4242   p = add->s;
4243   end = p + add->len;
4244   for (i = to->len; p < end; ) {
4245     len = enclen(enc, p);
4246     if (i + len > OPT_EXACT_MAXLEN) break;
4247     for (j = 0; j < len && p < end; j++)
4248       to->s[i++] = *p++;
4249   }
4250 
4251   to->len = i;
4252   to->reach_end = (p == end ? add->reach_end : 0);
4253 
4254   concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4255   if (! to->reach_end) tanc.right_anchor = 0;
4256   copy_opt_anc_info(&to->anc, &tanc);
4257 }
4258 
4259 static void
concat_opt_exact_info_str(OptExactInfo * to,UChar * s,UChar * end,int raw ARG_UNUSED,OnigEncoding enc)4260 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4261 			  int raw ARG_UNUSED, OnigEncoding enc)
4262 {
4263   int i, j, len;
4264   UChar *p;
4265 
4266   for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4267     len = enclen(enc, p);
4268     if (i + len > OPT_EXACT_MAXLEN) break;
4269     for (j = 0; j < len && p < end; j++)
4270       to->s[i++] = *p++;
4271   }
4272 
4273   to->len = i;
4274 }
4275 
4276 static void
alt_merge_opt_exact_info(OptExactInfo * to,OptExactInfo * add,OptEnv * env)4277 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4278 {
4279   int i, j, len;
4280 
4281   if (add->len == 0 || to->len == 0) {
4282     clear_opt_exact_info(to);
4283     return ;
4284   }
4285 
4286   if (! is_equal_mml(&to->mmd, &add->mmd)) {
4287     clear_opt_exact_info(to);
4288     return ;
4289   }
4290 
4291   for (i = 0; i < to->len && i < add->len; ) {
4292     if (to->s[i] != add->s[i]) break;
4293     len = enclen(env->enc, to->s + i);
4294 
4295     for (j = 1; j < len; j++) {
4296       if (to->s[i+j] != add->s[i+j]) break;
4297     }
4298     if (j < len) break;
4299     i += len;
4300   }
4301 
4302   if (! add->reach_end || i < add->len || i < to->len) {
4303     to->reach_end = 0;
4304   }
4305   to->len = i;
4306   to->ignore_case |= add->ignore_case;
4307 
4308   alt_merge_opt_anc_info(&to->anc, &add->anc);
4309   if (! to->reach_end) to->anc.right_anchor = 0;
4310 }
4311 
4312 static void
select_opt_exact_info(OnigEncoding enc,OptExactInfo * now,OptExactInfo * alt)4313 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4314 {
4315   int v1, v2;
4316 
4317   v1 = now->len;
4318   v2 = alt->len;
4319 
4320   if (v2 == 0) {
4321     return ;
4322   }
4323   else if (v1 == 0) {
4324     copy_opt_exact_info(now, alt);
4325     return ;
4326   }
4327   else if (v1 <= 2 && v2 <= 2) {
4328     /* ByteValTable[x] is big value --> low price */
4329     v2 = map_position_value(enc, now->s[0]);
4330     v1 = map_position_value(enc, alt->s[0]);
4331 
4332     if (now->len > 1) v1 += 5;
4333     if (alt->len > 1) v2 += 5;
4334   }
4335 
4336   if (now->ignore_case == 0) v1 *= 2;
4337   if (alt->ignore_case == 0) v2 *= 2;
4338 
4339   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4340     copy_opt_exact_info(now, alt);
4341 }
4342 
4343 static void
clear_opt_map_info(OptMapInfo * map)4344 clear_opt_map_info(OptMapInfo* map)
4345 {
4346   static const OptMapInfo clean_info = {
4347     {0, 0}, {0, 0}, 0,
4348     {
4349       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4350       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4351       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4352       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4353       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4354       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4355       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4356       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4357       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4358       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4359       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4360       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4361       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4362       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4363       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4364       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4365     }
4366   };
4367 
4368   xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4369 }
4370 
4371 static void
copy_opt_map_info(OptMapInfo * to,OptMapInfo * from)4372 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4373 {
4374   *to = *from;
4375 }
4376 
4377 static void
add_char_opt_map_info(OptMapInfo * map,UChar c,OnigEncoding enc)4378 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4379 {
4380   if (map->map[c] == 0) {
4381     map->map[c] = 1;
4382     map->value += map_position_value(enc, c);
4383   }
4384 }
4385 
4386 static int
add_char_amb_opt_map_info(OptMapInfo * map,UChar * p,UChar * end,OnigEncoding enc,OnigCaseFoldType case_fold_flag)4387 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4388                           OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4389 {
4390   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4391   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4392   int i, n;
4393 
4394   add_char_opt_map_info(map, p[0], enc);
4395 
4396   case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4397   n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4398   if (n < 0) return n;
4399 
4400   for (i = 0; i < n; i++) {
4401     ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4402     add_char_opt_map_info(map, buf[0], enc);
4403   }
4404 
4405   return 0;
4406 }
4407 
4408 static void
select_opt_map_info(OptMapInfo * now,OptMapInfo * alt)4409 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4410 {
4411   static int z = 1<<15; /* 32768: something big value */
4412 
4413   int v1, v2;
4414 
4415   if (alt->value == 0) return ;
4416   if (now->value == 0) {
4417     copy_opt_map_info(now, alt);
4418     return ;
4419   }
4420 
4421   v1 = z / now->value;
4422   v2 = z / alt->value;
4423   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4424     copy_opt_map_info(now, alt);
4425 }
4426 
4427 static int
comp_opt_exact_or_map_info(OptExactInfo * e,OptMapInfo * m)4428 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4429 {
4430 #define COMP_EM_BASE  20
4431   int ve, vm;
4432 
4433   if (m->value <= 0) return -1;
4434 
4435   ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
4436   vm = COMP_EM_BASE * 5 * 2 / m->value;
4437   return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4438 }
4439 
4440 static void
alt_merge_opt_map_info(OnigEncoding enc,OptMapInfo * to,OptMapInfo * add)4441 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4442 {
4443   int i, val;
4444 
4445   /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4446   if (to->value == 0) return ;
4447   if (add->value == 0 || to->mmd.max < add->mmd.min) {
4448     clear_opt_map_info(to);
4449     return ;
4450   }
4451 
4452   alt_merge_mml(&to->mmd, &add->mmd);
4453 
4454   val = 0;
4455   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4456     if (add->map[i])
4457       to->map[i] = 1;
4458 
4459     if (to->map[i])
4460       val += map_position_value(enc, i);
4461   }
4462   to->value = val;
4463 
4464   alt_merge_opt_anc_info(&to->anc, &add->anc);
4465 }
4466 
4467 static void
set_bound_node_opt_info(NodeOptInfo * opt,MinMaxLen * mmd)4468 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4469 {
4470   copy_mml(&(opt->exb.mmd),  mmd);
4471   copy_mml(&(opt->expr.mmd), mmd);
4472   copy_mml(&(opt->map.mmd),  mmd);
4473 }
4474 
4475 static void
clear_node_opt_info(NodeOptInfo * opt)4476 clear_node_opt_info(NodeOptInfo* opt)
4477 {
4478   clear_mml(&opt->len);
4479   clear_opt_anc_info(&opt->anc);
4480   clear_opt_exact_info(&opt->exb);
4481   clear_opt_exact_info(&opt->exm);
4482   clear_opt_exact_info(&opt->expr);
4483   clear_opt_map_info(&opt->map);
4484 }
4485 
4486 static void
copy_node_opt_info(NodeOptInfo * to,NodeOptInfo * from)4487 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4488 {
4489   *to = *from;
4490 }
4491 
4492 static void
concat_left_node_opt_info(OnigEncoding enc,NodeOptInfo * to,NodeOptInfo * add)4493 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4494 {
4495   int exb_reach, exm_reach;
4496   OptAncInfo tanc;
4497 
4498   concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4499   copy_opt_anc_info(&to->anc, &tanc);
4500 
4501   if (add->exb.len > 0 && to->len.max == 0) {
4502     concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4503 			to->len.max, add->len.max);
4504     copy_opt_anc_info(&add->exb.anc, &tanc);
4505   }
4506 
4507   if (add->map.value > 0 && to->len.max == 0) {
4508     if (add->map.mmd.max == 0)
4509       add->map.anc.left_anchor |= to->anc.left_anchor;
4510   }
4511 
4512   exb_reach = to->exb.reach_end;
4513   exm_reach = to->exm.reach_end;
4514 
4515   if (add->len.max != 0)
4516     to->exb.reach_end = to->exm.reach_end = 0;
4517 
4518   if (add->exb.len > 0) {
4519     if (exb_reach) {
4520       concat_opt_exact_info(&to->exb, &add->exb, enc);
4521       clear_opt_exact_info(&add->exb);
4522     }
4523     else if (exm_reach) {
4524       concat_opt_exact_info(&to->exm, &add->exb, enc);
4525       clear_opt_exact_info(&add->exb);
4526     }
4527   }
4528   select_opt_exact_info(enc, &to->exm, &add->exb);
4529   select_opt_exact_info(enc, &to->exm, &add->exm);
4530 
4531   if (to->expr.len > 0) {
4532     if (add->len.max > 0) {
4533       if (to->expr.len > (int )add->len.max)
4534 	to->expr.len = add->len.max;
4535 
4536       if (to->expr.mmd.max == 0)
4537 	select_opt_exact_info(enc, &to->exb, &to->expr);
4538       else
4539 	select_opt_exact_info(enc, &to->exm, &to->expr);
4540     }
4541   }
4542   else if (add->expr.len > 0) {
4543     copy_opt_exact_info(&to->expr, &add->expr);
4544   }
4545 
4546   select_opt_map_info(&to->map, &add->map);
4547 
4548   add_mml(&to->len, &add->len);
4549 }
4550 
4551 static void
alt_merge_node_opt_info(NodeOptInfo * to,NodeOptInfo * add,OptEnv * env)4552 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4553 {
4554   alt_merge_opt_anc_info  (&to->anc,  &add->anc);
4555   alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
4556   alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
4557   alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4558   alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
4559 
4560   alt_merge_mml(&to->len, &add->len);
4561 }
4562 
4563 
4564 #define MAX_NODE_OPT_INFO_REF_COUNT    5
4565 
4566 static int
optimize_node_left(Node * node,NodeOptInfo * opt,OptEnv * env)4567 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4568 {
4569   int type;
4570   int r = 0;
4571 
4572   clear_node_opt_info(opt);
4573   set_bound_node_opt_info(opt, &env->mmd);
4574 
4575   type = NTYPE(node);
4576   switch (type) {
4577   case NT_LIST:
4578     {
4579       OptEnv nenv;
4580       NodeOptInfo nopt;
4581       Node* nd = node;
4582 
4583       copy_opt_env(&nenv, env);
4584       do {
4585 	r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4586 	if (r == 0) {
4587 	  add_mml(&nenv.mmd, &nopt.len);
4588 	  concat_left_node_opt_info(env->enc, opt, &nopt);
4589 	}
4590       } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4591     }
4592     break;
4593 
4594   case NT_ALT:
4595     {
4596       NodeOptInfo nopt;
4597       Node* nd = node;
4598 
4599       do {
4600 	r = optimize_node_left(NCAR(nd), &nopt, env);
4601 	if (r == 0) {
4602 	  if (nd == node) copy_node_opt_info(opt, &nopt);
4603 	  else            alt_merge_node_opt_info(opt, &nopt, env);
4604 	}
4605       } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4606     }
4607     break;
4608 
4609   case NT_STR:
4610     {
4611       StrNode* sn = NSTR(node);
4612       int slen = sn->end - sn->s;
4613       int is_raw = NSTRING_IS_RAW(node);
4614 
4615       if (! NSTRING_IS_AMBIG(node)) {
4616 	concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4617 				  NSTRING_IS_RAW(node), env->enc);
4618 	if (slen > 0) {
4619 	  add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4620 	}
4621         set_mml(&opt->len, slen, slen);
4622       }
4623       else {
4624         int max;
4625 
4626 	if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4627           int n = onigenc_strlen(env->enc, sn->s, sn->end);
4628           max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4629 	}
4630 	else {
4631 	  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4632 				    is_raw, env->enc);
4633 	  opt->exb.ignore_case = 1;
4634 
4635 	  if (slen > 0) {
4636 	    r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4637 					  env->enc, env->case_fold_flag);
4638 	    if (r != 0) break;
4639 	  }
4640 
4641 	  max = slen;
4642 	}
4643 
4644         set_mml(&opt->len, slen, max);
4645       }
4646 
4647       if (opt->exb.len == slen)
4648 	opt->exb.reach_end = 1;
4649     }
4650     break;
4651 
4652   case NT_CCLASS:
4653     {
4654       int i, z;
4655       CClassNode* cc = NCCLASS(node);
4656 
4657       /* no need to check ignore case. (setted in setup_tree()) */
4658 
4659       if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4660         OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4661 	OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4662 
4663 	set_mml(&opt->len, min, max);
4664       }
4665       else {
4666         for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4667           z = BITSET_AT(cc->bs, i);
4668           if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
4669             add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4670           }
4671         }
4672 	set_mml(&opt->len, 1, 1);
4673       }
4674     }
4675     break;
4676 
4677   case NT_CTYPE:
4678     {
4679       int i, min, max;
4680 
4681       max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4682 
4683       if (max == 1) {
4684         min = 1;
4685 
4686 	switch (NCTYPE(node)->ctype) {
4687 	case ONIGENC_CTYPE_WORD:
4688 	  if (NCTYPE(node)->not != 0) {
4689 	    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4690 	      if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
4691 		add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4692 	      }
4693 	    }
4694 	  }
4695 	  else {
4696 	    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4697 	      if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
4698 		add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4699 	      }
4700 	    }
4701 	  }
4702 	  break;
4703 	}
4704       }
4705       else {
4706         min = ONIGENC_MBC_MINLEN(env->enc);
4707       }
4708       set_mml(&opt->len, min, max);
4709     }
4710     break;
4711 
4712   case NT_CANY:
4713     {
4714       OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4715       OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4716       set_mml(&opt->len, min, max);
4717     }
4718     break;
4719 
4720   case NT_ANCHOR:
4721     switch (NANCHOR(node)->type) {
4722     case ANCHOR_BEGIN_BUF:
4723     case ANCHOR_BEGIN_POSITION:
4724     case ANCHOR_BEGIN_LINE:
4725     case ANCHOR_END_BUF:
4726     case ANCHOR_SEMI_END_BUF:
4727     case ANCHOR_END_LINE:
4728       add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
4729       break;
4730 
4731     case ANCHOR_PREC_READ:
4732       {
4733 	NodeOptInfo nopt;
4734 
4735 	r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
4736 	if (r == 0) {
4737 	  if (nopt.exb.len > 0)
4738 	    copy_opt_exact_info(&opt->expr, &nopt.exb);
4739 	  else if (nopt.exm.len > 0)
4740 	    copy_opt_exact_info(&opt->expr, &nopt.exm);
4741 
4742 	  opt->expr.reach_end = 0;
4743 
4744 	  if (nopt.map.value > 0)
4745 	    copy_opt_map_info(&opt->map, &nopt.map);
4746 	}
4747       }
4748       break;
4749 
4750     case ANCHOR_PREC_READ_NOT:
4751     case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
4752     case ANCHOR_LOOK_BEHIND_NOT:
4753       break;
4754     }
4755     break;
4756 
4757   case NT_BREF:
4758     {
4759       int i;
4760       int* backs;
4761       OnigDistance min, max, tmin, tmax;
4762       Node** nodes = SCANENV_MEM_NODES(env->scan_env);
4763       BRefNode* br = NBREF(node);
4764 
4765       if (br->state & NST_RECURSION) {
4766 	set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4767 	break;
4768       }
4769       backs = BACKREFS_P(br);
4770       r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
4771       if (r != 0) break;
4772       r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
4773       if (r != 0) break;
4774       for (i = 1; i < br->back_num; i++) {
4775 	r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
4776 	if (r != 0) break;
4777 	r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
4778 	if (r != 0) break;
4779 	if (min > tmin) min = tmin;
4780 	if (max < tmax) max = tmax;
4781       }
4782       if (r == 0) set_mml(&opt->len, min, max);
4783     }
4784     break;
4785 
4786 #ifdef USE_SUBEXP_CALL
4787   case NT_CALL:
4788     if (IS_CALL_RECURSION(NCALL(node)))
4789       set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4790     else {
4791       OnigOptionType save = env->options;
4792       env->options = NENCLOSE(NCALL(node)->target)->option;
4793       r = optimize_node_left(NCALL(node)->target, opt, env);
4794       env->options = save;
4795     }
4796     break;
4797 #endif
4798 
4799   case NT_QTFR:
4800     {
4801       int i;
4802       OnigDistance min, max;
4803       NodeOptInfo nopt;
4804       QtfrNode* qn = NQTFR(node);
4805 
4806       r = optimize_node_left(qn->target, &nopt, env);
4807       if (r) break;
4808 
4809       if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
4810 	if (env->mmd.max == 0 &&
4811 	    NTYPE(qn->target) == NT_CANY && qn->greedy) {
4812 	  if (IS_MULTILINE(env->options))
4813 	    add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
4814 	  else
4815 	    add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
4816 	}
4817       }
4818       else {
4819 	if (qn->lower > 0) {
4820 	  copy_node_opt_info(opt, &nopt);
4821 	  if (nopt.exb.len > 0) {
4822 	    if (nopt.exb.reach_end) {
4823 	      for (i = 2; i <= qn->lower &&
4824 		          ! is_full_opt_exact_info(&opt->exb); i++) {
4825 		concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
4826 	      }
4827 	      if (i < qn->lower) {
4828 		opt->exb.reach_end = 0;
4829 	      }
4830 	    }
4831 	  }
4832 
4833 	  if (qn->lower != qn->upper) {
4834 	    opt->exb.reach_end = 0;
4835 	    opt->exm.reach_end = 0;
4836 	  }
4837 	  if (qn->lower > 1)
4838 	    opt->exm.reach_end = 0;
4839 	}
4840       }
4841 
4842       min = distance_multiply(nopt.len.min, qn->lower);
4843       if (IS_REPEAT_INFINITE(qn->upper))
4844 	max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
4845       else
4846 	max = distance_multiply(nopt.len.max, qn->upper);
4847 
4848       set_mml(&opt->len, min, max);
4849     }
4850     break;
4851 
4852   case NT_ENCLOSE:
4853     {
4854       EncloseNode* en = NENCLOSE(node);
4855 
4856       switch (en->type) {
4857       case ENCLOSE_OPTION:
4858 	{
4859 	  OnigOptionType save = env->options;
4860 
4861 	  env->options = en->option;
4862 	  r = optimize_node_left(en->target, opt, env);
4863 	  env->options = save;
4864 	}
4865 	break;
4866 
4867       case ENCLOSE_MEMORY:
4868 #ifdef USE_SUBEXP_CALL
4869 	en->opt_count++;
4870 	if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
4871 	  OnigDistance min, max;
4872 
4873 	  min = 0;
4874 	  max = ONIG_INFINITE_DISTANCE;
4875 	  if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
4876 	  if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
4877 	  set_mml(&opt->len, min, max);
4878 	}
4879 	else
4880 #endif
4881 	{
4882 	  r = optimize_node_left(en->target, opt, env);
4883 
4884 	  if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
4885 	    if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
4886 	      remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
4887 	  }
4888 	}
4889 	break;
4890 
4891       case ENCLOSE_STOP_BACKTRACK:
4892 	r = optimize_node_left(en->target, opt, env);
4893 	break;
4894       }
4895     }
4896     break;
4897 
4898   default:
4899 #ifdef ONIG_DEBUG
4900     fprintf(stderr, "optimize_node_left: undefined node type %d\n",
4901 	    NTYPE(node));
4902 #endif
4903     r = ONIGERR_TYPE_BUG;
4904     break;
4905   }
4906 
4907   return r;
4908 }
4909 
4910 static int
set_optimize_exact_info(regex_t * reg,OptExactInfo * e)4911 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
4912 {
4913   int r;
4914 
4915   if (e->len == 0) return 0;
4916 
4917   if (e->ignore_case) {
4918     reg->exact = (UChar* )xmalloc(e->len);
4919     CHECK_NULL_RETURN_MEMERR(reg->exact);
4920     xmemcpy(reg->exact, e->s, e->len);
4921     reg->exact_end = reg->exact + e->len;
4922     reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
4923   }
4924   else {
4925     int allow_reverse;
4926 
4927     reg->exact = str_dup(e->s, e->s + e->len);
4928     CHECK_NULL_RETURN_MEMERR(reg->exact);
4929     reg->exact_end = reg->exact + e->len;
4930 
4931     allow_reverse =
4932 	ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
4933 
4934     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
4935       r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
4936 	              reg->map, &(reg->int_map));
4937       if (r) return r;
4938 
4939       reg->optimize = (allow_reverse != 0
4940 		       ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
4941     }
4942     else {
4943       reg->optimize = ONIG_OPTIMIZE_EXACT;
4944     }
4945   }
4946 
4947   reg->dmin = e->mmd.min;
4948   reg->dmax = e->mmd.max;
4949 
4950   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4951     reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact);
4952   }
4953 
4954   return 0;
4955 }
4956 
4957 static void
set_optimize_map_info(regex_t * reg,OptMapInfo * m)4958 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
4959 {
4960   int i;
4961 
4962   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4963     reg->map[i] = m->map[i];
4964 
4965   reg->optimize   = ONIG_OPTIMIZE_MAP;
4966   reg->dmin       = m->mmd.min;
4967   reg->dmax       = m->mmd.max;
4968 
4969   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4970     reg->threshold_len = reg->dmin + 1;
4971   }
4972 }
4973 
4974 static void
set_sub_anchor(regex_t * reg,OptAncInfo * anc)4975 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
4976 {
4977   reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
4978   reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
4979 }
4980 
4981 #ifdef ONIG_DEBUG
4982 static void print_optimize_info(FILE* f, regex_t* reg);
4983 #endif
4984 
4985 static int
set_optimize_info_from_tree(Node * node,regex_t * reg,ScanEnv * scan_env)4986 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
4987 {
4988 
4989   int r;
4990   NodeOptInfo opt;
4991   OptEnv env;
4992 
4993   env.enc            = reg->enc;
4994   env.options        = reg->options;
4995   env.case_fold_flag = reg->case_fold_flag;
4996   env.scan_env   = scan_env;
4997   clear_mml(&env.mmd);
4998 
4999   r = optimize_node_left(node, &opt, &env);
5000   if (r) return r;
5001 
5002   reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5003         ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
5004 
5005   reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
5006 
5007   if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5008     reg->anchor_dmin = opt.len.min;
5009     reg->anchor_dmax = opt.len.max;
5010   }
5011 
5012   if (opt.exb.len > 0 || opt.exm.len > 0) {
5013     select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5014     if (opt.map.value > 0 &&
5015 	comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5016       goto set_map;
5017     }
5018     else {
5019       r = set_optimize_exact_info(reg, &opt.exb);
5020       set_sub_anchor(reg, &opt.exb.anc);
5021     }
5022   }
5023   else if (opt.map.value > 0) {
5024   set_map:
5025     set_optimize_map_info(reg, &opt.map);
5026     set_sub_anchor(reg, &opt.map.anc);
5027   }
5028   else {
5029     reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5030     if (opt.len.max == 0)
5031       reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5032   }
5033 
5034 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5035   print_optimize_info(stderr, reg);
5036 #endif
5037   return r;
5038 }
5039 
5040 static void
clear_optimize_info(regex_t * reg)5041 clear_optimize_info(regex_t* reg)
5042 {
5043   reg->optimize      = ONIG_OPTIMIZE_NONE;
5044   reg->anchor        = 0;
5045   reg->anchor_dmin   = 0;
5046   reg->anchor_dmax   = 0;
5047   reg->sub_anchor    = 0;
5048   reg->exact_end     = (UChar* )NULL;
5049   reg->threshold_len = 0;
5050   if (IS_NOT_NULL(reg->exact)) {
5051     xfree(reg->exact);
5052     reg->exact = (UChar* )NULL;
5053   }
5054 }
5055 
5056 #ifdef ONIG_DEBUG
5057 
print_enc_string(FILE * fp,OnigEncoding enc,const UChar * s,const UChar * end)5058 static void print_enc_string(FILE* fp, OnigEncoding enc,
5059 			     const UChar *s, const UChar *end)
5060 {
5061   fprintf(fp, "\nPATTERN: /");
5062 
5063   if (ONIGENC_MBC_MINLEN(enc) > 1) {
5064     const UChar *p;
5065     OnigCodePoint code;
5066 
5067     p = s;
5068     while (p < end) {
5069       code = ONIGENC_MBC_TO_CODE(enc, p, end);
5070       if (code >= 0x80) {
5071 	fprintf(fp, " 0x%04x ", (int )code);
5072       }
5073       else {
5074 	fputc((int )code, fp);
5075       }
5076 
5077       p += enclen(enc, p);
5078     }
5079   }
5080   else {
5081     while (s < end) {
5082       fputc((int )*s, fp);
5083       s++;
5084     }
5085   }
5086 
5087   fprintf(fp, "/\n");
5088 }
5089 
5090 static void
print_distance_range(FILE * f,OnigDistance a,OnigDistance b)5091 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5092 {
5093   if (a == ONIG_INFINITE_DISTANCE)
5094     fputs("inf", f);
5095   else
5096     fprintf(f, "(%u)", a);
5097 
5098   fputs("-", f);
5099 
5100   if (b == ONIG_INFINITE_DISTANCE)
5101     fputs("inf", f);
5102   else
5103     fprintf(f, "(%u)", b);
5104 }
5105 
5106 static void
print_anchor(FILE * f,int anchor)5107 print_anchor(FILE* f, int anchor)
5108 {
5109   int q = 0;
5110 
5111   fprintf(f, "[");
5112 
5113   if (anchor & ANCHOR_BEGIN_BUF) {
5114     fprintf(f, "begin-buf");
5115     q = 1;
5116   }
5117   if (anchor & ANCHOR_BEGIN_LINE) {
5118     if (q) fprintf(f, ", ");
5119     q = 1;
5120     fprintf(f, "begin-line");
5121   }
5122   if (anchor & ANCHOR_BEGIN_POSITION) {
5123     if (q) fprintf(f, ", ");
5124     q = 1;
5125     fprintf(f, "begin-pos");
5126   }
5127   if (anchor & ANCHOR_END_BUF) {
5128     if (q) fprintf(f, ", ");
5129     q = 1;
5130     fprintf(f, "end-buf");
5131   }
5132   if (anchor & ANCHOR_SEMI_END_BUF) {
5133     if (q) fprintf(f, ", ");
5134     q = 1;
5135     fprintf(f, "semi-end-buf");
5136   }
5137   if (anchor & ANCHOR_END_LINE) {
5138     if (q) fprintf(f, ", ");
5139     q = 1;
5140     fprintf(f, "end-line");
5141   }
5142   if (anchor & ANCHOR_ANYCHAR_STAR) {
5143     if (q) fprintf(f, ", ");
5144     q = 1;
5145     fprintf(f, "anychar-star");
5146   }
5147   if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5148     if (q) fprintf(f, ", ");
5149     fprintf(f, "anychar-star-pl");
5150   }
5151 
5152   fprintf(f, "]");
5153 }
5154 
5155 static void
print_optimize_info(FILE * f,regex_t * reg)5156 print_optimize_info(FILE* f, regex_t* reg)
5157 {
5158   static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5159                               "EXACT_IC", "MAP" };
5160 
5161   fprintf(f, "optimize: %s\n", on[reg->optimize]);
5162   fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
5163   if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5164     print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5165   fprintf(f, "\n");
5166 
5167   if (reg->optimize) {
5168     fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
5169     fprintf(f, "\n");
5170   }
5171   fprintf(f, "\n");
5172 
5173   if (reg->exact) {
5174     UChar *p;
5175     fprintf(f, "exact: [");
5176     for (p = reg->exact; p < reg->exact_end; p++) {
5177       fputc(*p, f);
5178     }
5179     fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
5180   }
5181   else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5182     int c, i, n = 0;
5183 
5184     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5185       if (reg->map[i]) n++;
5186 
5187     fprintf(f, "map: n=%d\n", n);
5188     if (n > 0) {
5189       c = 0;
5190       fputc('[', f);
5191       for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5192 	if (reg->map[i] != 0) {
5193           if (c > 0)  fputs(", ", f);
5194           c++;
5195           if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5196               ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5197             fputc(i, f);
5198           else
5199             fprintf(f, "%d", i);
5200         }
5201       }
5202       fprintf(f, "]\n");
5203     }
5204   }
5205 }
5206 #endif /* ONIG_DEBUG */
5207 
5208 
5209 extern void
onig_free_body(regex_t * reg)5210 onig_free_body(regex_t* reg)
5211 {
5212   if (IS_NOT_NULL(reg)) {
5213     if (IS_NOT_NULL(reg->p))                xfree(reg->p);
5214     if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
5215     if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);
5216     if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
5217     if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
5218     if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
5219 
5220 #ifdef USE_NAMED_GROUP
5221     onig_names_free(reg);
5222 #endif
5223   }
5224 }
5225 
5226 extern void
onig_free(regex_t * reg)5227 onig_free(regex_t* reg)
5228 {
5229   if (IS_NOT_NULL(reg)) {
5230     onig_free_body(reg);
5231     xfree(reg);
5232   }
5233 }
5234 
5235 #define REGEX_TRANSFER(to,from) do {\
5236   (to)->state = ONIG_STATE_MODIFY;\
5237   onig_free_body(to);\
5238   xmemcpy(to, from, sizeof(regex_t));\
5239   xfree(from);\
5240 } while (0)
5241 
5242 extern void
onig_transfer(regex_t * to,regex_t * from)5243 onig_transfer(regex_t* to, regex_t* from)
5244 {
5245   THREAD_ATOMIC_START;
5246   REGEX_TRANSFER(to, from);
5247   THREAD_ATOMIC_END;
5248 }
5249 
5250 #define REGEX_CHAIN_HEAD(reg) do {\
5251   while (IS_NOT_NULL((reg)->chain)) {\
5252     (reg) = (reg)->chain;\
5253   }\
5254 } while (0)
5255 
5256 extern void
onig_chain_link_add(regex_t * to,regex_t * add)5257 onig_chain_link_add(regex_t* to, regex_t* add)
5258 {
5259   THREAD_ATOMIC_START;
5260   REGEX_CHAIN_HEAD(to);
5261   to->chain = add;
5262   THREAD_ATOMIC_END;
5263 }
5264 
5265 extern void
onig_chain_reduce(regex_t * reg)5266 onig_chain_reduce(regex_t* reg)
5267 {
5268   regex_t *head, *prev;
5269 
5270   prev = reg;
5271   head = prev->chain;
5272   if (IS_NOT_NULL(head)) {
5273     reg->state = ONIG_STATE_MODIFY;
5274     while (IS_NOT_NULL(head->chain)) {
5275       prev = head;
5276       head = head->chain;
5277     }
5278     prev->chain = (regex_t* )NULL;
5279     REGEX_TRANSFER(reg, head);
5280   }
5281 }
5282 
5283 #ifdef ONIG_DEBUG
5284 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
5285 #endif
5286 #ifdef ONIG_DEBUG_PARSE_TREE
5287 static void print_tree P_((FILE* f, Node* node));
5288 #endif
5289 
5290 extern int
onig_compile(regex_t * reg,const UChar * pattern,const UChar * pattern_end,OnigErrorInfo * einfo)5291 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5292 	      OnigErrorInfo* einfo)
5293 {
5294 #define COMPILE_INIT_SIZE  20
5295 
5296   int r, init_size;
5297   Node*  root;
5298   ScanEnv  scan_env;
5299 #ifdef USE_SUBEXP_CALL
5300   UnsetAddrList  uslist;
5301 #endif
5302 
5303   if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5304 
5305   reg->state = ONIG_STATE_COMPILING;
5306 
5307 #ifdef ONIG_DEBUG
5308   print_enc_string(stderr, reg->enc, pattern, pattern_end);
5309 #endif
5310 
5311   if (reg->alloc == 0) {
5312     init_size = (pattern_end - pattern) * 2;
5313     if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5314     r = BBUF_INIT(reg, init_size);
5315     if (r != 0) goto end;
5316   }
5317   else
5318     reg->used = 0;
5319 
5320   reg->num_mem            = 0;
5321   reg->num_repeat         = 0;
5322   reg->num_null_check     = 0;
5323   reg->repeat_range_alloc = 0;
5324   reg->repeat_range       = (OnigRepeatRange* )NULL;
5325 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5326   reg->num_comb_exp_check = 0;
5327 #endif
5328 
5329   r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5330   if (r != 0) goto err;
5331 
5332 #ifdef USE_NAMED_GROUP
5333   /* mixed use named group and no-named group */
5334   if (scan_env.num_named > 0 &&
5335       IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5336       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5337     if (scan_env.num_named != scan_env.num_mem)
5338       r = disable_noname_group_capture(&root, reg, &scan_env);
5339     else
5340       r = numbered_ref_check(root);
5341 
5342     if (r != 0) goto err;
5343   }
5344 #endif
5345 
5346 #ifdef USE_SUBEXP_CALL
5347   if (scan_env.num_call > 0) {
5348     r = unset_addr_list_init(&uslist, scan_env.num_call);
5349     if (r != 0) goto err;
5350     scan_env.unset_addr_list = &uslist;
5351     r = setup_subexp_call(root, &scan_env);
5352     if (r != 0) goto err_unset;
5353     r = subexp_recursive_check_trav(root, &scan_env);
5354     if (r  < 0) goto err_unset;
5355     r = subexp_inf_recursive_check_trav(root, &scan_env);
5356     if (r != 0) goto err_unset;
5357 
5358     reg->num_call = scan_env.num_call;
5359   }
5360   else
5361     reg->num_call = 0;
5362 #endif
5363 
5364   r = setup_tree(root, reg, 0, &scan_env);
5365   if (r != 0) goto err_unset;
5366 
5367 #ifdef ONIG_DEBUG_PARSE_TREE
5368   print_tree(stderr, root);
5369 #endif
5370 
5371   reg->capture_history  = scan_env.capture_history;
5372   reg->bt_mem_start     = scan_env.bt_mem_start;
5373   reg->bt_mem_start    |= reg->capture_history;
5374   if (IS_FIND_CONDITION(reg->options))
5375     BIT_STATUS_ON_ALL(reg->bt_mem_end);
5376   else {
5377     reg->bt_mem_end  = scan_env.bt_mem_end;
5378     reg->bt_mem_end |= reg->capture_history;
5379   }
5380 
5381 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5382   if (scan_env.backrefed_mem == 0
5383 #ifdef USE_SUBEXP_CALL
5384       || scan_env.num_call == 0
5385 #endif
5386       ) {
5387     setup_comb_exp_check(root, 0, &scan_env);
5388 #ifdef USE_SUBEXP_CALL
5389     if (scan_env.has_recursion != 0) {
5390       scan_env.num_comb_exp_check = 0;
5391     }
5392     else
5393 #endif
5394     if (scan_env.comb_exp_max_regnum > 0) {
5395       int i;
5396       for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5397 	if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5398 	  scan_env.num_comb_exp_check = 0;
5399 	  break;
5400 	}
5401       }
5402     }
5403   }
5404 
5405   reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5406 #endif
5407 
5408   clear_optimize_info(reg);
5409 #ifndef ONIG_DONT_OPTIMIZE
5410   r = set_optimize_info_from_tree(root, reg, &scan_env);
5411   if (r != 0) goto err_unset;
5412 #endif
5413 
5414   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5415     xfree(scan_env.mem_nodes_dynamic);
5416     scan_env.mem_nodes_dynamic = (Node** )NULL;
5417   }
5418 
5419   r = compile_tree(root, reg);
5420   if (r == 0) {
5421     r = add_opcode(reg, OP_END);
5422 #ifdef USE_SUBEXP_CALL
5423     if (scan_env.num_call > 0) {
5424       r = unset_addr_list_fix(&uslist, reg);
5425       unset_addr_list_end(&uslist);
5426       if (r) goto err;
5427     }
5428 #endif
5429 
5430     if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5431       reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5432     else {
5433       if (reg->bt_mem_start != 0)
5434 	reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5435       else
5436 	reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5437     }
5438   }
5439 #ifdef USE_SUBEXP_CALL
5440   else if (scan_env.num_call > 0) {
5441     unset_addr_list_end(&uslist);
5442   }
5443 #endif
5444   onig_node_free(root);
5445 
5446 #ifdef ONIG_DEBUG_COMPILE
5447 #ifdef USE_NAMED_GROUP
5448   onig_print_names(stderr, reg);
5449 #endif
5450   print_compiled_byte_code_list(stderr, reg);
5451 #endif
5452 
5453  end:
5454   reg->state = ONIG_STATE_NORMAL;
5455   return r;
5456 
5457  err_unset:
5458 #ifdef USE_SUBEXP_CALL
5459   if (scan_env.num_call > 0) {
5460     unset_addr_list_end(&uslist);
5461   }
5462 #endif
5463  err:
5464   if (IS_NOT_NULL(scan_env.error)) {
5465     if (IS_NOT_NULL(einfo)) {
5466       einfo->enc     = scan_env.enc;
5467       einfo->par     = scan_env.error;
5468       einfo->par_end = scan_env.error_end;
5469     }
5470   }
5471 
5472   onig_node_free(root);
5473   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5474       xfree(scan_env.mem_nodes_dynamic);
5475   return r;
5476 }
5477 
5478 #ifdef USE_RECOMPILE_API
5479 extern int
onig_recompile(regex_t * reg,const UChar * pattern,const UChar * pattern_end,OnigOptionType option,OnigEncoding enc,OnigSyntaxType * syntax,OnigErrorInfo * einfo)5480 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5481 	    OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5482 	    OnigErrorInfo* einfo)
5483 {
5484   int r;
5485   regex_t *new_reg;
5486 
5487   r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
5488   if (r) return r;
5489   if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
5490     onig_transfer(reg, new_reg);
5491   }
5492   else {
5493     onig_chain_link_add(reg, new_reg);
5494   }
5495   return 0;
5496 }
5497 #endif
5498 
5499 static int onig_inited = 0;
5500 
5501 extern int
onig_reg_init(regex_t * reg,OnigOptionType option,OnigCaseFoldType case_fold_flag,OnigEncoding enc,OnigSyntaxType * syntax)5502 onig_reg_init(regex_t* reg, OnigOptionType option,
5503 	      OnigCaseFoldType case_fold_flag,
5504 	      OnigEncoding enc, OnigSyntaxType* syntax)
5505 {
5506   if (! onig_inited)
5507     onig_init();
5508 
5509   if (IS_NULL(reg))
5510     return ONIGERR_INVALID_ARGUMENT;
5511 
5512   if (ONIGENC_IS_UNDEF(enc))
5513     return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
5514 
5515   if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5516       == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5517     return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5518   }
5519 
5520   (reg)->state = ONIG_STATE_MODIFY;
5521 
5522   if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5523     option |= syntax->options;
5524     option &= ~ONIG_OPTION_SINGLELINE;
5525   }
5526   else
5527     option |= syntax->options;
5528 
5529   (reg)->enc              = enc;
5530   (reg)->options          = option;
5531   (reg)->syntax           = syntax;
5532   (reg)->optimize         = 0;
5533   (reg)->exact            = (UChar* )NULL;
5534   (reg)->int_map          = (int* )NULL;
5535   (reg)->int_map_backward = (int* )NULL;
5536   (reg)->chain            = (regex_t* )NULL;
5537 
5538   (reg)->p                = (UChar* )NULL;
5539   (reg)->alloc            = 0;
5540   (reg)->used             = 0;
5541   (reg)->name_table       = (void* )NULL;
5542 
5543   (reg)->case_fold_flag   = case_fold_flag;
5544   return 0;
5545 }
5546 
5547 extern int
onig_new_without_alloc(regex_t * reg,const UChar * pattern,const UChar * pattern_end,OnigOptionType option,OnigEncoding enc,OnigSyntaxType * syntax,OnigErrorInfo * einfo)5548 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5549           const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5550           OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5551 {
5552   int r;
5553 
5554   r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5555   if (r) return r;
5556 
5557   r = onig_compile(reg, pattern, pattern_end, einfo);
5558   return r;
5559 }
5560 
5561 extern int
onig_new(regex_t ** reg,const UChar * pattern,const UChar * pattern_end,OnigOptionType option,OnigEncoding enc,OnigSyntaxType * syntax,OnigErrorInfo * einfo)5562 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5563 	  OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5564 	  OnigErrorInfo* einfo)
5565 {
5566   int r;
5567 
5568   *reg = (regex_t* )xmalloc(sizeof(regex_t));
5569   if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5570 
5571   r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5572   if (r) goto err;
5573 
5574   r = onig_compile(*reg, pattern, pattern_end, einfo);
5575   if (r) {
5576   err:
5577     onig_free(*reg);
5578     *reg = NULL;
5579   }
5580   return r;
5581 }
5582 
5583 
5584 extern int
onig_init(void)5585 onig_init(void)
5586 {
5587   if (onig_inited != 0)
5588     return 0;
5589 
5590   THREAD_SYSTEM_INIT;
5591   THREAD_ATOMIC_START;
5592 
5593   onig_inited = 1;
5594 
5595   onigenc_init();
5596   /* onigenc_set_default_caseconv_table((UChar* )0); */
5597 
5598 #ifdef ONIG_DEBUG_STATISTICS
5599   onig_statistics_init();
5600 #endif
5601 
5602   THREAD_ATOMIC_END;
5603   return 0;
5604 }
5605 
5606 
5607 extern int
onig_end(void)5608 onig_end(void)
5609 {
5610   THREAD_ATOMIC_START;
5611 
5612 #ifdef ONIG_DEBUG_STATISTICS
5613   onig_print_statistics(stderr);
5614 #endif
5615 
5616 #ifdef USE_SHARED_CCLASS_TABLE
5617   onig_free_shared_cclass_table();
5618 #endif
5619 
5620 #ifdef USE_PARSE_TREE_NODE_RECYCLE
5621   onig_free_node_list();
5622 #endif
5623 
5624   onig_inited = 0;
5625 
5626   THREAD_ATOMIC_END;
5627   THREAD_SYSTEM_END;
5628   return 0;
5629 }
5630 
5631 extern int
onig_is_in_code_range(const UChar * p,OnigCodePoint code)5632 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
5633 {
5634   OnigCodePoint n, *data;
5635   OnigCodePoint low, high, x;
5636 
5637   GET_CODE_POINT(n, p);
5638   data = (OnigCodePoint* )p;
5639   data++;
5640 
5641   for (low = 0, high = n; low < high; ) {
5642     x = (low + high) >> 1;
5643     if (code > data[x * 2 + 1])
5644       low = x + 1;
5645     else
5646       high = x;
5647   }
5648 
5649   return ((low < n && code >= data[low * 2]) ? 1 : 0);
5650 }
5651 
5652 extern int
onig_is_code_in_cc_len(int elen,OnigCodePoint code,CClassNode * cc)5653 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
5654 {
5655   int found;
5656 
5657   if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
5658     if (IS_NULL(cc->mbuf)) {
5659       found = 0;
5660     }
5661     else {
5662       found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
5663     }
5664   }
5665   else {
5666     found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
5667   }
5668 
5669   if (IS_NCCLASS_NOT(cc))
5670     return !found;
5671   else
5672     return found;
5673 }
5674 
5675 extern int
onig_is_code_in_cc(OnigEncoding enc,OnigCodePoint code,CClassNode * cc)5676 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
5677 {
5678   int len;
5679 
5680   if (ONIGENC_MBC_MINLEN(enc) > 1) {
5681     len = 2;
5682   }
5683   else {
5684     len = ONIGENC_CODE_TO_MBCLEN(enc, code);
5685   }
5686   return onig_is_code_in_cc_len(len, code, cc);
5687 }
5688 
5689 
5690 #ifdef ONIG_DEBUG
5691 
5692 /* arguments type */
5693 #define ARG_SPECIAL     -1
5694 #define ARG_NON          0
5695 #define ARG_RELADDR      1
5696 #define ARG_ABSADDR      2
5697 #define ARG_LENGTH       3
5698 #define ARG_MEMNUM       4
5699 #define ARG_OPTION       5
5700 #define ARG_STATE_CHECK  6
5701 
5702 OnigOpInfoType OnigOpInfo[] = {
5703   { OP_FINISH,            "finish",          ARG_NON },
5704   { OP_END,               "end",             ARG_NON },
5705   { OP_EXACT1,            "exact1",          ARG_SPECIAL },
5706   { OP_EXACT2,            "exact2",          ARG_SPECIAL },
5707   { OP_EXACT3,            "exact3",          ARG_SPECIAL },
5708   { OP_EXACT4,            "exact4",          ARG_SPECIAL },
5709   { OP_EXACT5,            "exact5",          ARG_SPECIAL },
5710   { OP_EXACTN,            "exactn",          ARG_SPECIAL },
5711   { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
5712   { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
5713   { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
5714   { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
5715   { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
5716   { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
5717   { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
5718   { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
5719   { OP_CCLASS,            "cclass",          ARG_SPECIAL },
5720   { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
5721   { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
5722   { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
5723   { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
5724   { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
5725   { OP_CCLASS_NODE,       "cclass-node",     ARG_SPECIAL },
5726   { OP_ANYCHAR,           "anychar",         ARG_NON },
5727   { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
5728   { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
5729   { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
5730   { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
5731   { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
5732   { OP_WORD,                "word",            ARG_NON },
5733   { OP_NOT_WORD,            "not-word",        ARG_NON },
5734   { OP_WORD_BOUND,          "word-bound",      ARG_NON },
5735   { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
5736   { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
5737   { OP_WORD_END,            "word-end",        ARG_NON },
5738   { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
5739   { OP_END_BUF,             "end-buf",         ARG_NON },
5740   { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
5741   { OP_END_LINE,            "end-line",        ARG_NON },
5742   { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
5743   { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
5744   { OP_BACKREF1,            "backref1",             ARG_NON },
5745   { OP_BACKREF2,            "backref2",             ARG_NON },
5746   { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
5747   { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
5748   { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
5749   { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
5750   { OP_BACKREF_WITH_LEVEL,    "backref_at_level",     ARG_SPECIAL },
5751   { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
5752   { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
5753   { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
5754   { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
5755   { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
5756   { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
5757   { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
5758   { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
5759   { OP_FAIL,                "fail",                 ARG_NON },
5760   { OP_JUMP,                "jump",                 ARG_RELADDR },
5761   { OP_PUSH,                "push",                 ARG_RELADDR },
5762   { OP_POP,                 "pop",                  ARG_NON },
5763   { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
5764   { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
5765   { OP_REPEAT,              "repeat",               ARG_SPECIAL },
5766   { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
5767   { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
5768   { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
5769   { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
5770   { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
5771   { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
5772   { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
5773   { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
5774   { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
5775   { OP_PUSH_POS,             "push-pos",             ARG_NON },
5776   { OP_POP_POS,              "pop-pos",              ARG_NON },
5777   { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
5778   { OP_FAIL_POS,             "fail-pos",             ARG_NON },
5779   { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
5780   { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
5781   { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
5782   { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
5783   { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
5784   { OP_CALL,                 "call",                 ARG_ABSADDR },
5785   { OP_RETURN,               "return",               ARG_NON },
5786   { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
5787   { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
5788   { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
5789   { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
5790   { OP_STATE_CHECK_ANYCHAR_ML_STAR,
5791     "state-check-anychar-ml*", ARG_STATE_CHECK },
5792   { -1, "", ARG_NON }
5793 };
5794 
5795 static char*
op2name(int opcode)5796 op2name(int opcode)
5797 {
5798   int i;
5799 
5800   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5801     if (opcode == OnigOpInfo[i].opcode)
5802       return OnigOpInfo[i].name;
5803   }
5804   return "";
5805 }
5806 
5807 static int
op2arg_type(int opcode)5808 op2arg_type(int opcode)
5809 {
5810   int i;
5811 
5812   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5813     if (opcode == OnigOpInfo[i].opcode)
5814       return OnigOpInfo[i].arg_type;
5815   }
5816   return ARG_SPECIAL;
5817 }
5818 
5819 static void
Indent(FILE * f,int indent)5820 Indent(FILE* f, int indent)
5821 {
5822   int i;
5823   for (i = 0; i < indent; i++) putc(' ', f);
5824 }
5825 
5826 static void
p_string(FILE * f,int len,UChar * s)5827 p_string(FILE* f, int len, UChar* s)
5828 {
5829   fputs(":", f);
5830   while (len-- > 0) { fputc(*s++, f); }
5831 }
5832 
5833 static void
p_len_string(FILE * f,LengthType len,int mb_len,UChar * s)5834 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
5835 {
5836   int x = len * mb_len;
5837 
5838   fprintf(f, ":%d:", len);
5839   while (x-- > 0) { fputc(*s++, f); }
5840 }
5841 
5842 extern void
onig_print_compiled_byte_code(FILE * f,UChar * bp,UChar ** nextp,OnigEncoding enc)5843 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
5844                               OnigEncoding enc)
5845 {
5846   int i, n, arg_type;
5847   RelAddrType addr;
5848   LengthType len;
5849   MemNumType mem;
5850   StateCheckNumType scn;
5851   OnigCodePoint code;
5852   UChar *q;
5853 
5854   fprintf(f, "[%s", op2name(*bp));
5855   arg_type = op2arg_type(*bp);
5856   if (arg_type != ARG_SPECIAL) {
5857     bp++;
5858     switch (arg_type) {
5859     case ARG_NON:
5860       break;
5861     case ARG_RELADDR:
5862       GET_RELADDR_INC(addr, bp);
5863       fprintf(f, ":(%d)", addr);
5864       break;
5865     case ARG_ABSADDR:
5866       GET_ABSADDR_INC(addr, bp);
5867       fprintf(f, ":(%d)", addr);
5868       break;
5869     case ARG_LENGTH:
5870       GET_LENGTH_INC(len, bp);
5871       fprintf(f, ":%d", len);
5872       break;
5873     case ARG_MEMNUM:
5874       mem = *((MemNumType* )bp);
5875       bp += SIZE_MEMNUM;
5876       fprintf(f, ":%d", mem);
5877       break;
5878     case ARG_OPTION:
5879       {
5880 	OnigOptionType option = *((OnigOptionType* )bp);
5881 	bp += SIZE_OPTION;
5882 	fprintf(f, ":%d", option);
5883       }
5884       break;
5885 
5886     case ARG_STATE_CHECK:
5887       scn = *((StateCheckNumType* )bp);
5888       bp += SIZE_STATE_CHECK_NUM;
5889       fprintf(f, ":%d", scn);
5890       break;
5891     }
5892   }
5893   else {
5894     switch (*bp++) {
5895     case OP_EXACT1:
5896     case OP_ANYCHAR_STAR_PEEK_NEXT:
5897     case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
5898       p_string(f, 1, bp++); break;
5899     case OP_EXACT2:
5900       p_string(f, 2, bp); bp += 2; break;
5901     case OP_EXACT3:
5902       p_string(f, 3, bp); bp += 3; break;
5903     case OP_EXACT4:
5904       p_string(f, 4, bp); bp += 4; break;
5905     case OP_EXACT5:
5906       p_string(f, 5, bp); bp += 5; break;
5907     case OP_EXACTN:
5908       GET_LENGTH_INC(len, bp);
5909       p_len_string(f, len, 1, bp);
5910       bp += len;
5911       break;
5912 
5913     case OP_EXACTMB2N1:
5914       p_string(f, 2, bp); bp += 2; break;
5915     case OP_EXACTMB2N2:
5916       p_string(f, 4, bp); bp += 4; break;
5917     case OP_EXACTMB2N3:
5918       p_string(f, 6, bp); bp += 6; break;
5919     case OP_EXACTMB2N:
5920       GET_LENGTH_INC(len, bp);
5921       p_len_string(f, len, 2, bp);
5922       bp += len * 2;
5923       break;
5924     case OP_EXACTMB3N:
5925       GET_LENGTH_INC(len, bp);
5926       p_len_string(f, len, 3, bp);
5927       bp += len * 3;
5928       break;
5929     case OP_EXACTMBN:
5930       {
5931 	int mb_len;
5932 
5933 	GET_LENGTH_INC(mb_len, bp);
5934 	GET_LENGTH_INC(len, bp);
5935 	fprintf(f, ":%d:%d:", mb_len, len);
5936 	n = len * mb_len;
5937 	while (n-- > 0) { fputc(*bp++, f); }
5938       }
5939       break;
5940 
5941     case OP_EXACT1_IC:
5942       len = enclen(enc, bp);
5943       p_string(f, len, bp);
5944       bp += len;
5945       break;
5946     case OP_EXACTN_IC:
5947       GET_LENGTH_INC(len, bp);
5948       p_len_string(f, len, 1, bp);
5949       bp += len;
5950       break;
5951 
5952     case OP_CCLASS:
5953       n = bitset_on_num((BitSetRef )bp);
5954       bp += SIZE_BITSET;
5955       fprintf(f, ":%d", n);
5956       break;
5957 
5958     case OP_CCLASS_NOT:
5959       n = bitset_on_num((BitSetRef )bp);
5960       bp += SIZE_BITSET;
5961       fprintf(f, ":%d", n);
5962       break;
5963 
5964     case OP_CCLASS_MB:
5965     case OP_CCLASS_MB_NOT:
5966       GET_LENGTH_INC(len, bp);
5967       q = bp;
5968 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
5969       ALIGNMENT_RIGHT(q);
5970 #endif
5971       GET_CODE_POINT(code, q);
5972       bp += len;
5973       fprintf(f, ":%d:%d", (int )code, len);
5974       break;
5975 
5976     case OP_CCLASS_MIX:
5977     case OP_CCLASS_MIX_NOT:
5978       n = bitset_on_num((BitSetRef )bp);
5979       bp += SIZE_BITSET;
5980       GET_LENGTH_INC(len, bp);
5981       q = bp;
5982 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
5983       ALIGNMENT_RIGHT(q);
5984 #endif
5985       GET_CODE_POINT(code, q);
5986       bp += len;
5987       fprintf(f, ":%d:%d:%d", n, (int )code, len);
5988       break;
5989 
5990     case OP_CCLASS_NODE:
5991       {
5992         CClassNode *cc;
5993 
5994         GET_POINTER_INC(cc, bp);
5995         n = bitset_on_num(cc->bs);
5996         fprintf(f, ":%u:%d", (unsigned int )cc, n);
5997       }
5998       break;
5999 
6000     case OP_BACKREFN_IC:
6001       mem = *((MemNumType* )bp);
6002       bp += SIZE_MEMNUM;
6003       fprintf(f, ":%d", mem);
6004       break;
6005 
6006     case OP_BACKREF_MULTI_IC:
6007     case OP_BACKREF_MULTI:
6008       fputs(" ", f);
6009       GET_LENGTH_INC(len, bp);
6010       for (i = 0; i < len; i++) {
6011 	GET_MEMNUM_INC(mem, bp);
6012 	if (i > 0) fputs(", ", f);
6013 	fprintf(f, "%d", mem);
6014       }
6015       break;
6016 
6017     case OP_BACKREF_WITH_LEVEL:
6018       {
6019 	OnigOptionType option;
6020 	LengthType level;
6021 
6022 	GET_OPTION_INC(option, bp);
6023 	fprintf(f, ":%d", option);
6024 	GET_LENGTH_INC(level, bp);
6025 	fprintf(f, ":%d", level);
6026 
6027 	fputs(" ", f);
6028 	GET_LENGTH_INC(len, bp);
6029 	for (i = 0; i < len; i++) {
6030 	  GET_MEMNUM_INC(mem, bp);
6031 	  if (i > 0) fputs(", ", f);
6032 	  fprintf(f, "%d", mem);
6033 	}
6034       }
6035       break;
6036 
6037     case OP_REPEAT:
6038     case OP_REPEAT_NG:
6039       {
6040 	mem = *((MemNumType* )bp);
6041 	bp += SIZE_MEMNUM;
6042 	addr = *((RelAddrType* )bp);
6043 	bp += SIZE_RELADDR;
6044 	fprintf(f, ":%d:%d", mem, addr);
6045       }
6046       break;
6047 
6048     case OP_PUSH_OR_JUMP_EXACT1:
6049     case OP_PUSH_IF_PEEK_NEXT:
6050       addr = *((RelAddrType* )bp);
6051       bp += SIZE_RELADDR;
6052       fprintf(f, ":(%d)", addr);
6053       p_string(f, 1, bp);
6054       bp += 1;
6055       break;
6056 
6057     case OP_LOOK_BEHIND:
6058       GET_LENGTH_INC(len, bp);
6059       fprintf(f, ":%d", len);
6060       break;
6061 
6062     case OP_PUSH_LOOK_BEHIND_NOT:
6063       GET_RELADDR_INC(addr, bp);
6064       GET_LENGTH_INC(len, bp);
6065       fprintf(f, ":%d:(%d)", len, addr);
6066       break;
6067 
6068     case OP_STATE_CHECK_PUSH:
6069     case OP_STATE_CHECK_PUSH_OR_JUMP:
6070       scn = *((StateCheckNumType* )bp);
6071       bp += SIZE_STATE_CHECK_NUM;
6072       addr = *((RelAddrType* )bp);
6073       bp += SIZE_RELADDR;
6074       fprintf(f, ":%d:(%d)", scn, addr);
6075       break;
6076 
6077     default:
6078       fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6079 	      *--bp);
6080     }
6081   }
6082   fputs("]", f);
6083   if (nextp) *nextp = bp;
6084 }
6085 
6086 static void
print_compiled_byte_code_list(FILE * f,regex_t * reg)6087 print_compiled_byte_code_list(FILE* f, regex_t* reg)
6088 {
6089   int ncode;
6090   UChar* bp = reg->p;
6091   UChar* end = reg->p + reg->used;
6092 
6093   fprintf(f, "code length: %d\n", reg->used);
6094 
6095   ncode = 0;
6096   while (bp < end) {
6097     ncode++;
6098     if (bp > reg->p) {
6099       if (ncode % 5 == 0)
6100 	fprintf(f, "\n");
6101       else
6102 	fputs(" ", f);
6103     }
6104     onig_print_compiled_byte_code(f, bp, &bp, reg->enc);
6105   }
6106 
6107   fprintf(f, "\n");
6108 }
6109 
6110 static void
print_indent_tree(FILE * f,Node * node,int indent)6111 print_indent_tree(FILE* f, Node* node, int indent)
6112 {
6113   int i, type;
6114   int add = 3;
6115   UChar* p;
6116 
6117   Indent(f, indent);
6118   if (IS_NULL(node)) {
6119     fprintf(f, "ERROR: null node!!!\n");
6120     exit (0);
6121   }
6122 
6123   type = NTYPE(node);
6124   switch (type) {
6125   case NT_LIST:
6126   case NT_ALT:
6127     if (NTYPE(node) == NT_LIST)
6128       fprintf(f, "<list:%x>\n", (int )node);
6129     else
6130       fprintf(f, "<alt:%x>\n", (int )node);
6131 
6132     print_indent_tree(f, NCAR(node), indent + add);
6133     while (IS_NOT_NULL(node = NCDR(node))) {
6134       if (NTYPE(node) != type) {
6135 	fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6136 	exit(0);
6137       }
6138       print_indent_tree(f, NCAR(node), indent + add);
6139     }
6140     break;
6141 
6142   case NT_STR:
6143     fprintf(f, "<string%s:%x>",
6144 	    (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
6145     for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6146       if (*p >= 0x20 && *p < 0x7f)
6147 	fputc(*p, f);
6148       else {
6149 	fprintf(f, " 0x%02x", *p);
6150       }
6151     }
6152     break;
6153 
6154   case NT_CCLASS:
6155     fprintf(f, "<cclass:%x>", (int )node);
6156     if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
6157     if (NCCLASS(node)->mbuf) {
6158       BBuf* bbuf = NCCLASS(node)->mbuf;
6159       for (i = 0; i < bbuf->used; i++) {
6160 	if (i > 0) fprintf(f, ",");
6161 	fprintf(f, "%0x", bbuf->p[i]);
6162       }
6163     }
6164     break;
6165 
6166   case NT_CTYPE:
6167     fprintf(f, "<ctype:%x> ", (int )node);
6168     switch (NCTYPE(node)->ctype) {
6169     case ONIGENC_CTYPE_WORD:
6170       if (NCTYPE(node)->not != 0)
6171 	fputs("not word",       f);
6172       else
6173 	fputs("word",           f);
6174       break;
6175 
6176     default:
6177       fprintf(f, "ERROR: undefined ctype.\n");
6178       exit(0);
6179     }
6180     break;
6181 
6182   case NT_CANY:
6183     fprintf(f, "<anychar:%x>", (int )node);
6184     break;
6185 
6186   case NT_ANCHOR:
6187     fprintf(f, "<anchor:%x> ", (int )node);
6188     switch (NANCHOR(node)->type) {
6189     case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
6190     case ANCHOR_END_BUF:        fputs("end buf",        f); break;
6191     case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
6192     case ANCHOR_END_LINE:       fputs("end line",       f); break;
6193     case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
6194     case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6195 
6196     case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
6197     case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
6198 #ifdef USE_WORD_BEGIN_END
6199     case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
6200     case ANCHOR_WORD_END:        fputs("word end", f);       break;
6201 #endif
6202     case ANCHOR_PREC_READ:       fputs("prec read",      f); break;
6203     case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); break;
6204     case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); break;
6205     case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
6206 
6207     default:
6208       fprintf(f, "ERROR: undefined anchor type.\n");
6209       break;
6210     }
6211     break;
6212 
6213   case NT_BREF:
6214     {
6215       int* p;
6216       BRefNode* br = NBREF(node);
6217       p = BACKREFS_P(br);
6218       fprintf(f, "<backref:%x>", (int )node);
6219       for (i = 0; i < br->back_num; i++) {
6220 	if (i > 0) fputs(", ", f);
6221 	fprintf(f, "%d", p[i]);
6222       }
6223     }
6224     break;
6225 
6226 #ifdef USE_SUBEXP_CALL
6227   case NT_CALL:
6228     {
6229       CallNode* cn = NCALL(node);
6230       fprintf(f, "<call:%x>", (int )node);
6231       p_string(f, cn->name_end - cn->name, cn->name);
6232     }
6233     break;
6234 #endif
6235 
6236   case NT_QTFR:
6237     fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
6238 	    NQTFR(node)->lower, NQTFR(node)->upper,
6239 	    (NQTFR(node)->greedy ? "" : "?"));
6240     print_indent_tree(f, NQTFR(node)->target, indent + add);
6241     break;
6242 
6243   case NT_ENCLOSE:
6244     fprintf(f, "<enclose:%x> ", (int )node);
6245     switch (NENCLOSE(node)->type) {
6246     case ENCLOSE_OPTION:
6247       fprintf(f, "option:%d\n", NENCLOSE(node)->option);
6248       print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6249       break;
6250     case ENCLOSE_MEMORY:
6251       fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6252       break;
6253     case ENCLOSE_STOP_BACKTRACK:
6254       fprintf(f, "stop-bt");
6255       break;
6256 
6257     default:
6258       break;
6259     }
6260     fprintf(f, "\n");
6261     print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6262     break;
6263 
6264   default:
6265     fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6266     break;
6267   }
6268 
6269   if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6270       type != NT_ENCLOSE)
6271     fprintf(f, "\n");
6272   fflush(f);
6273 }
6274 #endif /* ONIG_DEBUG */
6275 
6276 #ifdef ONIG_DEBUG_PARSE_TREE
6277 static void
print_tree(FILE * f,Node * node)6278 print_tree(FILE* f, Node* node)
6279 {
6280   print_indent_tree(f, node, 0);
6281 }
6282 #endif
6283