1 /**********************************************************************
2   regcomp.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regparse.h"
31 
32 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
33 
34 #if 0
35 typedef struct {
36   int  n;
37   int  alloc;
38   int* v;
39 } int_stack;
40 
41 static int
42 make_int_stack(int_stack** rs, int init_size)
43 {
44   int_stack* s;
45   int* v;
46 
47   *rs = 0;
48 
49   s = xmalloc(sizeof(*s));
50   if (IS_NULL(s)) return ONIGERR_MEMORY;
51 
52   v = (int* )xmalloc(sizeof(int) * init_size);
53   if (IS_NULL(v)) {
54     xfree(s);
55     return ONIGERR_MEMORY;
56   }
57 
58   s->n = 0;
59   s->alloc = init_size;
60   s->v = v;
61 
62   *rs = s;
63   return ONIG_NORMAL;
64 }
65 
66 static void
67 free_int_stack(int_stack* s)
68 {
69   if (IS_NOT_NULL(s)) {
70     if (IS_NOT_NULL(s->v))
71       xfree(s->v);
72     xfree(s);
73   }
74 }
75 
76 static int
77 int_stack_push(int_stack* s, int v)
78 {
79   if (s->n >= s->alloc) {
80     int new_size = s->alloc * 2;
81     int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size);
82     if (IS_NULL(nv)) return ONIGERR_MEMORY;
83 
84     s->alloc = new_size;
85     s->v = nv;
86   }
87 
88   s->v[s->n] = v;
89   s->n++;
90   return ONIG_NORMAL;
91 }
92 
93 static int
94 int_stack_pop(int_stack* s)
95 {
96   int v;
97 
98 #ifdef ONIG_DEBUG
99   if (s->n <= 0) {
100     fprintf(stderr, "int_stack_pop: fail empty. %p\n", s);
101     return 0;
102   }
103 #endif
104 
105   v = s->v[s->n];
106   s->n--;
107   return v;
108 }
109 #endif
110 
111 extern OnigCaseFoldType
onig_get_default_case_fold_flag(void)112 onig_get_default_case_fold_flag(void)
113 {
114   return OnigDefaultCaseFoldFlag;
115 }
116 
117 extern int
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)118 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
119 {
120   OnigDefaultCaseFoldFlag = case_fold_flag;
121   return 0;
122 }
123 
124 static int
int_multiply_cmp(int x,int y,int v)125 int_multiply_cmp(int x, int y, int v)
126 {
127   if (x == 0 || y == 0) return -1;
128 
129   if (x < INT_MAX / y) {
130     int xy = x * y;
131     if (xy > v) return 1;
132     else {
133       if (xy == v) return 0;
134       else return -1;
135     }
136   }
137   else
138     return 1;
139 }
140 
141 
142 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
143 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
144 #endif
145 
146 static void
swap_node(Node * a,Node * b)147 swap_node(Node* a, Node* b)
148 {
149   Node c;
150 
151   c = *a; *a = *b; *b = c;
152 
153   if (NODE_TYPE(a) == NODE_STRING) {
154     StrNode* sn = STR_(a);
155     if (sn->capa == 0) {
156       int len = (int )(sn->end - sn->s);
157       sn->s   = sn->buf;
158       sn->end = sn->s + len;
159     }
160   }
161 
162   if (NODE_TYPE(b) == NODE_STRING) {
163     StrNode* sn = STR_(b);
164     if (sn->capa == 0) {
165       int len = (int )(sn->end - sn->s);
166       sn->s   = sn->buf;
167       sn->end = sn->s + len;
168     }
169   }
170 }
171 
172 static OnigLen
distance_add(OnigLen d1,OnigLen d2)173 distance_add(OnigLen d1, OnigLen d2)
174 {
175   if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)
176     return INFINITE_LEN;
177   else {
178     if (d1 <= INFINITE_LEN - d2) return d1 + d2;
179     else return INFINITE_LEN;
180   }
181 }
182 
183 static OnigLen
distance_multiply(OnigLen d,int m)184 distance_multiply(OnigLen d, int m)
185 {
186   if (m == 0) return 0;
187 
188   if (d < INFINITE_LEN / m)
189     return d * m;
190   else
191     return INFINITE_LEN;
192 }
193 
194 static int
bitset_is_empty(BitSetRef bs)195 bitset_is_empty(BitSetRef bs)
196 {
197   int i;
198 
199   for (i = 0; i < (int )BITSET_SIZE; i++) {
200     if (bs[i] != 0) return 0;
201   }
202   return 1;
203 }
204 
205 extern int
onig_bbuf_init(BBuf * buf,int size)206 onig_bbuf_init(BBuf* buf, int size)
207 {
208   if (size <= 0) {
209     size   = 0;
210     buf->p = NULL;
211   }
212   else {
213     buf->p = (UChar* )xmalloc(size);
214     if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
215   }
216 
217   buf->alloc = size;
218   buf->used  = 0;
219   return 0;
220 }
221 
222 
223 #ifdef USE_CALL
224 
225 static int
unset_addr_list_init(UnsetAddrList * list,int size)226 unset_addr_list_init(UnsetAddrList* list, int size)
227 {
228   UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
229   CHECK_NULL_RETURN_MEMERR(p);
230 
231   list->num   = 0;
232   list->alloc = size;
233   list->us    = p;
234   return 0;
235 }
236 
237 static void
unset_addr_list_end(UnsetAddrList * list)238 unset_addr_list_end(UnsetAddrList* list)
239 {
240   if (IS_NOT_NULL(list->us))
241     xfree(list->us);
242 }
243 
244 static int
unset_addr_list_add(UnsetAddrList * list,int offset,struct _Node * node)245 unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)
246 {
247   UnsetAddr* p;
248   int size;
249 
250   if (list->num >= list->alloc) {
251     size = list->alloc * 2;
252     p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr)* list->alloc);
253     CHECK_NULL_RETURN_MEMERR(p);
254     list->alloc = size;
255     list->us    = p;
256   }
257 
258   list->us[list->num].offset = offset;
259   list->us[list->num].target = node;
260   list->num++;
261   return 0;
262 }
263 #endif /* USE_CALL */
264 
265 
266 static int
add_opcode(regex_t * reg,int opcode)267 add_opcode(regex_t* reg, int opcode)
268 {
269   BB_ADD1(reg, opcode);
270   return 0;
271 }
272 
273 static int
add_rel_addr(regex_t * reg,int addr)274 add_rel_addr(regex_t* reg, int addr)
275 {
276   RelAddrType ra = (RelAddrType )addr;
277 
278   BB_ADD(reg, &ra, SIZE_RELADDR);
279   return 0;
280 }
281 
282 static int
add_abs_addr(regex_t * reg,int addr)283 add_abs_addr(regex_t* reg, int addr)
284 {
285   AbsAddrType ra = (AbsAddrType )addr;
286 
287   BB_ADD(reg, &ra, SIZE_ABSADDR);
288   return 0;
289 }
290 
291 static int
add_length(regex_t * reg,int len)292 add_length(regex_t* reg, int len)
293 {
294   LengthType l = (LengthType )len;
295 
296   BB_ADD(reg, &l, SIZE_LENGTH);
297   return 0;
298 }
299 
300 static int
add_mem_num(regex_t * reg,int num)301 add_mem_num(regex_t* reg, int num)
302 {
303   MemNumType n = (MemNumType )num;
304 
305   BB_ADD(reg, &n, SIZE_MEMNUM);
306   return 0;
307 }
308 
309 #if 0
310 static int
311 add_pointer(regex_t* reg, void* addr)
312 {
313   PointerType ptr = (PointerType )addr;
314 
315   BB_ADD(reg, &ptr, SIZE_POINTER);
316   return 0;
317 }
318 #endif
319 
320 static int
add_option(regex_t * reg,OnigOptionType option)321 add_option(regex_t* reg, OnigOptionType option)
322 {
323   BB_ADD(reg, &option, SIZE_OPTION);
324   return 0;
325 }
326 
327 static int
add_save_type(regex_t * reg,enum SaveType type)328 add_save_type(regex_t* reg, enum SaveType type)
329 {
330   SaveType t = (SaveType )type;
331 
332   BB_ADD(reg, &t, SIZE_SAVE_TYPE);
333   return 0;
334 }
335 
336 static int
add_update_var_type(regex_t * reg,enum UpdateVarType type)337 add_update_var_type(regex_t* reg, enum UpdateVarType type)
338 {
339   UpdateVarType t = (UpdateVarType )type;
340 
341   BB_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE);
342   return 0;
343 }
344 
345 static int
add_mode(regex_t * reg,ModeType mode)346 add_mode(regex_t* reg, ModeType mode)
347 {
348   BB_ADD(reg, &mode, SIZE_MODE);
349   return 0;
350 }
351 
352 static int
add_opcode_rel_addr(regex_t * reg,int opcode,int addr)353 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
354 {
355   int r;
356 
357   r = add_opcode(reg, opcode);
358   if (r != 0) return r;
359   r = add_rel_addr(reg, addr);
360   return r;
361 }
362 
363 static int
add_bytes(regex_t * reg,UChar * bytes,int len)364 add_bytes(regex_t* reg, UChar* bytes, int len)
365 {
366   BB_ADD(reg, bytes, len);
367   return 0;
368 }
369 
370 static int
add_bitset(regex_t * reg,BitSetRef bs)371 add_bitset(regex_t* reg, BitSetRef bs)
372 {
373   BB_ADD(reg, bs, SIZE_BITSET);
374   return 0;
375 }
376 
377 static int compile_length_tree(Node* node, regex_t* reg);
378 static int compile_tree(Node* node, regex_t* reg, ScanEnv* env);
379 
380 
381 #define IS_NEED_STR_LEN_OP_EXACT(op) \
382    ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
383     (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
384 
385 static int
select_str_opcode(int mb_len,int str_len,int ignore_case)386 select_str_opcode(int mb_len, int str_len, int ignore_case)
387 {
388   int op;
389 
390   if (ignore_case) {
391     switch (str_len) {
392     case 1:  op = OP_EXACT1_IC; break;
393     default: op = OP_EXACTN_IC; break;
394     }
395   }
396   else {
397     switch (mb_len) {
398     case 1:
399       switch (str_len) {
400       case 1:  op = OP_EXACT1; break;
401       case 2:  op = OP_EXACT2; break;
402       case 3:  op = OP_EXACT3; break;
403       case 4:  op = OP_EXACT4; break;
404       case 5:  op = OP_EXACT5; break;
405       default: op = OP_EXACTN; break;
406       }
407       break;
408 
409     case 2:
410       switch (str_len) {
411       case 1:  op = OP_EXACTMB2N1; break;
412       case 2:  op = OP_EXACTMB2N2; break;
413       case 3:  op = OP_EXACTMB2N3; break;
414       default: op = OP_EXACTMB2N;  break;
415       }
416       break;
417 
418     case 3:
419       op = OP_EXACTMB3N;
420       break;
421 
422     default:
423       op = OP_EXACTMBN;
424       break;
425     }
426   }
427   return op;
428 }
429 
430 static int
compile_tree_empty_check(Node * node,regex_t * reg,int empty_info,ScanEnv * env)431 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)
432 {
433   int r;
434   int saved_num_null_check = reg->num_null_check;
435 
436   if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {
437     r = add_opcode(reg, OP_EMPTY_CHECK_START);
438     if (r != 0) return r;
439     r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
440     if (r != 0) return r;
441     reg->num_null_check++;
442   }
443 
444   r = compile_tree(node, reg, env);
445   if (r != 0) return r;
446 
447   if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {
448     if (empty_info == QUANT_BODY_IS_EMPTY)
449       r = add_opcode(reg, OP_EMPTY_CHECK_END);
450     else if (empty_info == QUANT_BODY_IS_EMPTY_MEM)
451       r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST);
452     else if (empty_info == QUANT_BODY_IS_EMPTY_REC)
453       r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
454 
455     if (r != 0) return r;
456     r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
457   }
458   return r;
459 }
460 
461 #ifdef USE_CALL
462 static int
compile_call(CallNode * node,regex_t * reg,ScanEnv * env)463 compile_call(CallNode* node, regex_t* reg, ScanEnv* env)
464 {
465   int r;
466 
467   r = add_opcode(reg, OP_CALL);
468   if (r != 0) return r;
469   r = unset_addr_list_add(env->unset_addr_list, BB_GET_OFFSET_POS(reg),
470                           NODE_CALL_BODY(node));
471   if (r != 0) return r;
472   r = add_abs_addr(reg, 0 /*dummy addr.*/);
473   return r;
474 }
475 #endif
476 
477 static int
compile_tree_n_times(Node * node,int n,regex_t * reg,ScanEnv * env)478 compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env)
479 {
480   int i, r;
481 
482   for (i = 0; i < n; i++) {
483     r = compile_tree(node, reg, env);
484     if (r != 0) return r;
485   }
486   return 0;
487 }
488 
489 static int
add_compile_string_length(UChar * s ARG_UNUSED,int mb_len,int str_len,regex_t * reg ARG_UNUSED,int ignore_case)490 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
491                           regex_t* reg ARG_UNUSED, int ignore_case)
492 {
493   int len;
494   int op = select_str_opcode(mb_len, str_len, ignore_case);
495 
496   len = SIZE_OPCODE;
497 
498   if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
499   if (IS_NEED_STR_LEN_OP_EXACT(op))
500     len += SIZE_LENGTH;
501 
502   len += mb_len * str_len;
503   return len;
504 }
505 
506 static int
add_compile_string(UChar * s,int mb_len,int str_len,regex_t * reg,int ignore_case)507 add_compile_string(UChar* s, int mb_len, int str_len,
508                    regex_t* reg, int ignore_case)
509 {
510   int op = select_str_opcode(mb_len, str_len, ignore_case);
511   add_opcode(reg, op);
512 
513   if (op == OP_EXACTMBN)
514     add_length(reg, mb_len);
515 
516   if (IS_NEED_STR_LEN_OP_EXACT(op)) {
517     if (op == OP_EXACTN_IC)
518       add_length(reg, mb_len * str_len);
519     else
520       add_length(reg, str_len);
521   }
522 
523   add_bytes(reg, s, mb_len * str_len);
524   return 0;
525 }
526 
527 
528 static int
compile_length_string_node(Node * node,regex_t * reg)529 compile_length_string_node(Node* node, regex_t* reg)
530 {
531   int rlen, r, len, prev_len, slen, ambig;
532   UChar *p, *prev;
533   StrNode* sn;
534   OnigEncoding enc = reg->enc;
535 
536   sn = STR_(node);
537   if (sn->end <= sn->s)
538     return 0;
539 
540   ambig = NODE_STRING_IS_AMBIG(node);
541 
542   p = prev = sn->s;
543   prev_len = enclen(enc, p);
544   p += prev_len;
545   slen = 1;
546   rlen = 0;
547 
548   for (; p < sn->end; ) {
549     len = enclen(enc, p);
550     if (len == prev_len) {
551       slen++;
552     }
553     else {
554       r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
555       rlen += r;
556       prev = p;
557       slen = 1;
558       prev_len = len;
559     }
560     p += len;
561   }
562 
563   r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
564   rlen += r;
565   return rlen;
566 }
567 
568 static int
compile_length_string_raw_node(StrNode * sn,regex_t * reg)569 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
570 {
571   if (sn->end <= sn->s)
572     return 0;
573 
574   return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),
575                                    reg, 0);
576 }
577 
578 static int
compile_string_node(Node * node,regex_t * reg)579 compile_string_node(Node* node, regex_t* reg)
580 {
581   int r, len, prev_len, slen, ambig;
582   UChar *p, *prev, *end;
583   StrNode* sn;
584   OnigEncoding enc = reg->enc;
585 
586   sn = STR_(node);
587   if (sn->end <= sn->s)
588     return 0;
589 
590   end = sn->end;
591   ambig = NODE_STRING_IS_AMBIG(node);
592 
593   p = prev = sn->s;
594   prev_len = enclen(enc, p);
595   p += prev_len;
596   slen = 1;
597 
598   for (; p < end; ) {
599     len = enclen(enc, p);
600     if (len == prev_len) {
601       slen++;
602     }
603     else {
604       r = add_compile_string(prev, prev_len, slen, reg, ambig);
605       if (r != 0) return r;
606 
607       prev  = p;
608       slen  = 1;
609       prev_len = len;
610     }
611 
612     p += len;
613   }
614 
615   return add_compile_string(prev, prev_len, slen, reg, ambig);
616 }
617 
618 static int
compile_string_raw_node(StrNode * sn,regex_t * reg)619 compile_string_raw_node(StrNode* sn, regex_t* reg)
620 {
621   if (sn->end <= sn->s)
622     return 0;
623 
624   return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0);
625 }
626 
627 static int
add_multi_byte_cclass(BBuf * mbuf,regex_t * reg)628 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
629 {
630 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
631   add_length(reg, mbuf->used);
632   return add_bytes(reg, mbuf->p, mbuf->used);
633 #else
634   int r, pad_size;
635   UChar* p = BB_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
636 
637   GET_ALIGNMENT_PAD_SIZE(p, pad_size);
638   add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
639   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
640 
641   r = add_bytes(reg, mbuf->p, mbuf->used);
642 
643   /* padding for return value from compile_length_cclass_node() to be fix. */
644   pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
645   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
646   return r;
647 #endif
648 }
649 
650 static int
compile_length_cclass_node(CClassNode * cc,regex_t * reg)651 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
652 {
653   int len;
654 
655   if (IS_NULL(cc->mbuf)) {
656     len = SIZE_OPCODE + SIZE_BITSET;
657   }
658   else {
659     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
660       len = SIZE_OPCODE;
661     }
662     else {
663       len = SIZE_OPCODE + SIZE_BITSET;
664     }
665 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
666     len += SIZE_LENGTH + cc->mbuf->used;
667 #else
668     len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
669 #endif
670   }
671 
672   return len;
673 }
674 
675 static int
compile_cclass_node(CClassNode * cc,regex_t * reg)676 compile_cclass_node(CClassNode* cc, regex_t* reg)
677 {
678   int r;
679 
680   if (IS_NULL(cc->mbuf)) {
681     if (IS_NCCLASS_NOT(cc))
682       add_opcode(reg, OP_CCLASS_NOT);
683     else
684       add_opcode(reg, OP_CCLASS);
685 
686     r = add_bitset(reg, cc->bs);
687   }
688   else {
689     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
690       if (IS_NCCLASS_NOT(cc))
691         add_opcode(reg, OP_CCLASS_MB_NOT);
692       else
693         add_opcode(reg, OP_CCLASS_MB);
694 
695       r = add_multi_byte_cclass(cc->mbuf, reg);
696     }
697     else {
698       if (IS_NCCLASS_NOT(cc))
699         add_opcode(reg, OP_CCLASS_MIX_NOT);
700       else
701         add_opcode(reg, OP_CCLASS_MIX);
702 
703       r = add_bitset(reg, cc->bs);
704       if (r != 0) return r;
705       r = add_multi_byte_cclass(cc->mbuf, reg);
706     }
707   }
708 
709   return r;
710 }
711 
712 static int
entry_repeat_range(regex_t * reg,int id,int lower,int upper)713 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
714 {
715 #define REPEAT_RANGE_ALLOC  4
716 
717   OnigRepeatRange* p;
718 
719   if (reg->repeat_range_alloc == 0) {
720     p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
721     CHECK_NULL_RETURN_MEMERR(p);
722     reg->repeat_range = p;
723     reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
724   }
725   else if (reg->repeat_range_alloc <= id) {
726     int n;
727     n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
728     p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
729                                     sizeof(OnigRepeatRange) * n,
730                                     sizeof(OnigRepeatRange) * reg->repeat_range_alloc);
731     CHECK_NULL_RETURN_MEMERR(p);
732     reg->repeat_range = p;
733     reg->repeat_range_alloc = n;
734   }
735   else {
736     p = reg->repeat_range;
737   }
738 
739   p[id].lower = lower;
740   p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
741   return 0;
742 }
743 
744 static int
compile_range_repeat_node(QuantNode * qn,int target_len,int empty_info,regex_t * reg,ScanEnv * env)745 compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
746                           regex_t* reg, ScanEnv* env)
747 {
748   int r;
749   int num_repeat = reg->num_repeat;
750 
751   r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
752   if (r != 0) return r;
753   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
754   reg->num_repeat++;
755   if (r != 0) return r;
756   r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
757   if (r != 0) return r;
758 
759   r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
760   if (r != 0) return r;
761 
762   r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
763   if (r != 0) return r;
764 
765   if (
766 #ifdef USE_CALL
767       NODE_IS_IN_MULTI_ENTRY(qn) ||
768 #endif
769       NODE_IS_IN_REAL_REPEAT(qn)) {
770     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
771   }
772   else {
773     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
774   }
775   if (r != 0) return r;
776   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
777   return r;
778 }
779 
780 static int
is_anychar_infinite_greedy(QuantNode * qn)781 is_anychar_infinite_greedy(QuantNode* qn)
782 {
783   if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
784       NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))
785     return 1;
786   else
787     return 0;
788 }
789 
790 #define QUANTIFIER_EXPAND_LIMIT_SIZE   50
791 #define CKN_ON   (ckn > 0)
792 
793 static int
compile_length_quantifier_node(QuantNode * qn,regex_t * reg)794 compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
795 {
796   int len, mod_tlen;
797   int infinite = IS_REPEAT_INFINITE(qn->upper);
798   enum QuantBodyEmpty empty_info = qn->body_empty_info;
799   int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
800 
801   if (tlen < 0) return tlen;
802   if (tlen == 0) return 0;
803 
804   /* anychar repeat */
805   if (is_anychar_infinite_greedy(qn)) {
806     if (qn->lower <= 1 ||
807         int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {
808       if (IS_NOT_NULL(qn->next_head_exact))
809         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
810       else
811         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
812     }
813   }
814 
815   if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
816     mod_tlen = tlen;
817   else
818     mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
819 
820   if (infinite &&
821       (qn->lower <= 1 ||
822        int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
823     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
824       len = SIZE_OP_JUMP;
825     }
826     else {
827       len = tlen * qn->lower;
828     }
829 
830     if (qn->greedy) {
831       if (IS_NOT_NULL(qn->head_exact))
832         len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
833       else if (IS_NOT_NULL(qn->next_head_exact))
834         len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
835       else
836         len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
837     }
838     else
839       len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
840   }
841   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
842     len = SIZE_OP_JUMP + tlen;
843   }
844   else if (!infinite && qn->greedy &&
845            (qn->upper == 1 ||
846             int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
847                              QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
848     len = tlen * qn->lower;
849     len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
850   }
851   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
852     len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
853   }
854   else {
855     len = SIZE_OP_REPEAT_INC
856         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
857   }
858 
859   return len;
860 }
861 
862 static int
compile_quantifier_node(QuantNode * qn,regex_t * reg,ScanEnv * env)863 compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
864 {
865   int i, r, mod_tlen;
866   int infinite = IS_REPEAT_INFINITE(qn->upper);
867   enum QuantBodyEmpty empty_info = qn->body_empty_info;
868   int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
869 
870   if (tlen < 0) return tlen;
871   if (tlen == 0) return 0;
872 
873   if (is_anychar_infinite_greedy(qn) &&
874       (qn->lower <= 1 ||
875        int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
876     r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
877     if (r != 0) return r;
878     if (IS_NOT_NULL(qn->next_head_exact)) {
879       if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))
880         r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
881       else
882         r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
883       if (r != 0) return r;
884       return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
885     }
886     else {
887       if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))
888         return add_opcode(reg, OP_ANYCHAR_ML_STAR);
889       else
890         return add_opcode(reg, OP_ANYCHAR_STAR);
891     }
892   }
893 
894   if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
895     mod_tlen = tlen;
896   else
897     mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
898 
899   if (infinite &&
900       (qn->lower <= 1 ||
901        int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
902     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
903       if (qn->greedy) {
904         if (IS_NOT_NULL(qn->head_exact))
905           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
906         else if (IS_NOT_NULL(qn->next_head_exact))
907           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
908         else
909           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
910       }
911       else {
912         r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
913       }
914       if (r != 0) return r;
915     }
916     else {
917       r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
918       if (r != 0) return r;
919     }
920 
921     if (qn->greedy) {
922       if (IS_NOT_NULL(qn->head_exact)) {
923         r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
924                                 mod_tlen + SIZE_OP_JUMP);
925         if (r != 0) return r;
926         add_bytes(reg, STR_(qn->head_exact)->s, 1);
927         r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
928         if (r != 0) return r;
929         r = add_opcode_rel_addr(reg, OP_JUMP,
930            -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
931       }
932       else if (IS_NOT_NULL(qn->next_head_exact)) {
933         r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
934                                 mod_tlen + SIZE_OP_JUMP);
935         if (r != 0) return r;
936         add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
937         r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
938         if (r != 0) return r;
939         r = add_opcode_rel_addr(reg, OP_JUMP,
940            -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
941       }
942       else {
943         r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
944         if (r != 0) return r;
945         r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
946         if (r != 0) return r;
947         r = add_opcode_rel_addr(reg, OP_JUMP,
948                    -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
949       }
950     }
951     else {
952       r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
953       if (r != 0) return r;
954       r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
955       if (r != 0) return r;
956       r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
957     }
958   }
959   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
960     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
961     if (r != 0) return r;
962     r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
963   }
964   else if (! infinite && qn->greedy &&
965            (qn->upper == 1 ||
966             int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
967                              QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
968     int n = qn->upper - qn->lower;
969 
970     r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
971     if (r != 0) return r;
972 
973     for (i = 0; i < n; i++) {
974       r = add_opcode_rel_addr(reg, OP_PUSH,
975                               (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
976       if (r != 0) return r;
977       r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
978       if (r != 0) return r;
979     }
980   }
981   else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
982     r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
983     if (r != 0) return r;
984     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
985     if (r != 0) return r;
986     r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
987   }
988   else {
989     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env);
990   }
991   return r;
992 }
993 
994 static int
compile_length_option_node(EnclosureNode * node,regex_t * reg)995 compile_length_option_node(EnclosureNode* node, regex_t* reg)
996 {
997   int tlen;
998   OnigOptionType prev = reg->options;
999 
1000   reg->options = node->o.options;
1001   tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
1002   reg->options = prev;
1003 
1004   return tlen;
1005 }
1006 
1007 static int
compile_option_node(EnclosureNode * node,regex_t * reg,ScanEnv * env)1008 compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
1009 {
1010   int r;
1011   OnigOptionType prev = reg->options;
1012 
1013   reg->options = node->o.options;
1014   r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
1015   reg->options = prev;
1016 
1017   return r;
1018 }
1019 
1020 static int
compile_length_enclosure_node(EnclosureNode * node,regex_t * reg)1021 compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)
1022 {
1023   int len;
1024   int tlen;
1025 
1026   if (node->type == ENCLOSURE_OPTION)
1027     return compile_length_option_node(node, reg);
1028 
1029   if (NODE_ENCLOSURE_BODY(node)) {
1030     tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
1031     if (tlen < 0) return tlen;
1032   }
1033   else
1034     tlen = 0;
1035 
1036   switch (node->type) {
1037   case ENCLOSURE_MEMORY:
1038 #ifdef USE_CALL
1039 
1040     if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
1041       len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1042       return len;
1043     }
1044 
1045     if (NODE_IS_CALLED(node)) {
1046       len = SIZE_OP_MEMORY_START_PUSH + tlen
1047         + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1048       if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1049         len += (NODE_IS_RECURSION(node)
1050                 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1051       else
1052         len += (NODE_IS_RECURSION(node)
1053                 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1054     }
1055     else if (NODE_IS_RECURSION(node)) {
1056       len = SIZE_OP_MEMORY_START_PUSH;
1057       len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
1058                      ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1059     }
1060     else
1061 #endif
1062     {
1063       if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
1064         len = SIZE_OP_MEMORY_START_PUSH;
1065       else
1066         len = SIZE_OP_MEMORY_START;
1067 
1068       len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
1069                      ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1070     }
1071     break;
1072 
1073   case ENCLOSURE_STOP_BACKTRACK:
1074     if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
1075       QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));
1076       tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
1077       if (tlen < 0) return tlen;
1078 
1079       len = tlen * qn->lower
1080         + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
1081     }
1082     else {
1083       len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;
1084     }
1085     break;
1086 
1087   case ENCLOSURE_IF_ELSE:
1088     {
1089       Node* cond = NODE_ENCLOSURE_BODY(node);
1090       Node* Then = node->te.Then;
1091       Node* Else = node->te.Else;
1092 
1093       len = compile_length_tree(cond, reg);
1094       if (len < 0) return len;
1095       len += SIZE_OP_PUSH;
1096       len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END;
1097 
1098       if (IS_NOT_NULL(Then)) {
1099         tlen = compile_length_tree(Then, reg);
1100         if (tlen < 0) return tlen;
1101         len += tlen;
1102       }
1103 
1104       if (IS_NOT_NULL(Else)) {
1105         len += SIZE_OP_JUMP;
1106         tlen = compile_length_tree(Else, reg);
1107         if (tlen < 0) return tlen;
1108         len += tlen;
1109       }
1110     }
1111     break;
1112 
1113   default:
1114     return ONIGERR_TYPE_BUG;
1115     break;
1116   }
1117 
1118   return len;
1119 }
1120 
1121 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1122 
1123 static int
compile_enclosure_memory_node(EnclosureNode * node,regex_t * reg,ScanEnv * env)1124 compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
1125 {
1126   int r;
1127   int len;
1128 
1129 #ifdef USE_CALL
1130   if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
1131     r = add_opcode(reg, OP_CALL);
1132     if (r != 0) return r;
1133     node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1134     NODE_STATUS_ADD(node, ADDR_FIXED);
1135     r = add_abs_addr(reg, (int )node->m.called_addr);
1136     if (r != 0) return r;
1137     len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
1138     len += SIZE_OP_RETURN;
1139     r = add_opcode_rel_addr(reg, OP_JUMP, len);
1140     if (r != 0) return r;
1141 
1142     r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
1143     if (r != 0) return r;
1144     r = add_opcode(reg, OP_RETURN);
1145     return r;
1146   }
1147 
1148   if (NODE_IS_CALLED(node)) {
1149     r = add_opcode(reg, OP_CALL);
1150     if (r != 0) return r;
1151     node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1152     NODE_STATUS_ADD(node, ADDR_FIXED);
1153     r = add_abs_addr(reg, (int )node->m.called_addr);
1154     if (r != 0) return r;
1155     len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
1156     len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1157     if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1158       len += (NODE_IS_RECURSION(node)
1159               ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1160     else
1161       len += (NODE_IS_RECURSION(node)
1162               ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1163 
1164     r = add_opcode_rel_addr(reg, OP_JUMP, len);
1165     if (r != 0) return r;
1166   }
1167 #endif
1168 
1169   if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
1170     r = add_opcode(reg, OP_MEMORY_START_PUSH);
1171   else
1172     r = add_opcode(reg, OP_MEMORY_START);
1173   if (r != 0) return r;
1174   r = add_mem_num(reg, node->m.regnum);
1175   if (r != 0) return r;
1176   r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
1177   if (r != 0) return r;
1178 
1179 #ifdef USE_CALL
1180   if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1181     r = add_opcode(reg, (NODE_IS_RECURSION(node)
1182                          ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1183   else
1184     r = add_opcode(reg, (NODE_IS_RECURSION(node)
1185                          ? OP_MEMORY_END_REC : OP_MEMORY_END));
1186   if (r != 0) return r;
1187   r = add_mem_num(reg, node->m.regnum);
1188   if (NODE_IS_CALLED(node)) {
1189     if (r != 0) return r;
1190     r = add_opcode(reg, OP_RETURN);
1191   }
1192 #else
1193   if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1194     r = add_opcode(reg, OP_MEMORY_END_PUSH);
1195   else
1196     r = add_opcode(reg, OP_MEMORY_END);
1197   if (r != 0) return r;
1198   r = add_mem_num(reg, node->m.regnum);
1199 #endif
1200 
1201   return r;
1202 }
1203 
1204 static int
compile_enclosure_node(EnclosureNode * node,regex_t * reg,ScanEnv * env)1205 compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
1206 {
1207   int r, len;
1208 
1209   switch (node->type) {
1210   case ENCLOSURE_MEMORY:
1211     r = compile_enclosure_memory_node(node, reg, env);
1212     break;
1213 
1214   case ENCLOSURE_OPTION:
1215     r = compile_option_node(node, reg, env);
1216     break;
1217 
1218   case ENCLOSURE_STOP_BACKTRACK:
1219     if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
1220       QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));
1221       r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
1222       if (r != 0) return r;
1223 
1224       len = compile_length_tree(NODE_QUANT_BODY(qn), reg);
1225       if (len < 0) return len;
1226 
1227       r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP_OUT + SIZE_OP_JUMP);
1228       if (r != 0) return r;
1229       r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
1230       if (r != 0) return r;
1231       r = add_opcode(reg, OP_POP_OUT);
1232       if (r != 0) return r;
1233       r = add_opcode_rel_addr(reg, OP_JUMP,
1234            -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT + (int )SIZE_OP_JUMP));
1235     }
1236     else {
1237       r = add_opcode(reg, OP_ATOMIC_START);
1238       if (r != 0) return r;
1239       r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
1240       if (r != 0) return r;
1241       r = add_opcode(reg, OP_ATOMIC_END);
1242     }
1243     break;
1244 
1245   case ENCLOSURE_IF_ELSE:
1246     {
1247       int cond_len, then_len, jump_len;
1248       Node* cond = NODE_ENCLOSURE_BODY(node);
1249       Node* Then = node->te.Then;
1250       Node* Else = node->te.Else;
1251 
1252       r = add_opcode(reg, OP_ATOMIC_START);
1253       if (r != 0) return r;
1254 
1255       cond_len = compile_length_tree(cond, reg);
1256       if (cond_len < 0) return cond_len;
1257       if (IS_NOT_NULL(Then)) {
1258         then_len = compile_length_tree(Then, reg);
1259         if (then_len < 0) return then_len;
1260       }
1261       else
1262         then_len = 0;
1263 
1264       jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END;
1265       if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP;
1266 
1267       r = add_opcode_rel_addr(reg, OP_PUSH, jump_len);
1268       if (r != 0) return r;
1269       r = compile_tree(cond, reg, env);
1270       if (r != 0) return r;
1271       r = add_opcode(reg, OP_ATOMIC_END);
1272       if (r != 0) return r;
1273 
1274       if (IS_NOT_NULL(Then)) {
1275         r = compile_tree(Then, reg, env);
1276         if (r != 0) return r;
1277       }
1278 
1279       if (IS_NOT_NULL(Else)) {
1280         int else_len = compile_length_tree(Else, reg);
1281         r = add_opcode_rel_addr(reg, OP_JUMP, else_len);
1282         if (r != 0) return r;
1283         r = compile_tree(Else, reg, env);
1284       }
1285     }
1286     break;
1287 
1288   default:
1289     return ONIGERR_TYPE_BUG;
1290     break;
1291   }
1292 
1293   return r;
1294 }
1295 
1296 static int
compile_length_anchor_node(AnchorNode * node,regex_t * reg)1297 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1298 {
1299   int len;
1300   int tlen = 0;
1301 
1302   if (IS_NOT_NULL(NODE_ANCHOR_BODY(node))) {
1303     tlen = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
1304     if (tlen < 0) return tlen;
1305   }
1306 
1307   switch (node->type) {
1308   case ANCHOR_PREC_READ:
1309     len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;
1310     break;
1311   case ANCHOR_PREC_READ_NOT:
1312     len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;
1313     break;
1314   case ANCHOR_LOOK_BEHIND:
1315     len = SIZE_OP_LOOK_BEHIND + tlen;
1316     break;
1317   case ANCHOR_LOOK_BEHIND_NOT:
1318     len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;
1319     break;
1320 
1321   case ANCHOR_WORD_BOUNDARY:
1322   case ANCHOR_NO_WORD_BOUNDARY:
1323 #ifdef USE_WORD_BEGIN_END
1324   case ANCHOR_WORD_BEGIN:
1325   case ANCHOR_WORD_END:
1326 #endif
1327     len = SIZE_OP_WORD_BOUNDARY;
1328     break;
1329 
1330   case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
1331   case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
1332     len = SIZE_OPCODE;
1333     break;
1334 
1335   default:
1336     len = SIZE_OPCODE;
1337     break;
1338   }
1339 
1340   return len;
1341 }
1342 
1343 static int
compile_anchor_node(AnchorNode * node,regex_t * reg,ScanEnv * env)1344 compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
1345 {
1346   int r, len;
1347   enum OpCode op;
1348 
1349   switch (node->type) {
1350   case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
1351   case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
1352   case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
1353   case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
1354   case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
1355   case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1356 
1357   case ANCHOR_WORD_BOUNDARY:
1358     op = OP_WORD_BOUNDARY;
1359   word:
1360     r = add_opcode(reg, op);
1361     if (r != 0) return r;
1362     r = add_mode(reg, (ModeType )node->ascii_mode);
1363     break;
1364 
1365   case ANCHOR_NO_WORD_BOUNDARY:
1366     op = OP_NO_WORD_BOUNDARY; goto word;
1367     break;
1368 #ifdef USE_WORD_BEGIN_END
1369   case ANCHOR_WORD_BEGIN:
1370     op = OP_WORD_BEGIN; goto word;
1371     break;
1372   case ANCHOR_WORD_END:
1373     op = OP_WORD_END; goto word;
1374     break;
1375 #endif
1376 
1377   case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
1378     r = add_opcode(reg, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
1379     break;
1380 
1381   case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
1382     r = add_opcode(reg, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
1383     break;
1384 
1385   case ANCHOR_PREC_READ:
1386     r = add_opcode(reg, OP_PREC_READ_START);
1387     if (r != 0) return r;
1388     r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1389     if (r != 0) return r;
1390     r = add_opcode(reg, OP_PREC_READ_END);
1391     break;
1392 
1393   case ANCHOR_PREC_READ_NOT:
1394     len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
1395     if (len < 0) return len;
1396     r = add_opcode_rel_addr(reg, OP_PREC_READ_NOT_START, len + SIZE_OP_PREC_READ_NOT_END);
1397     if (r != 0) return r;
1398     r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1399     if (r != 0) return r;
1400     r = add_opcode(reg, OP_PREC_READ_NOT_END);
1401     break;
1402 
1403   case ANCHOR_LOOK_BEHIND:
1404     {
1405       int n;
1406       r = add_opcode(reg, OP_LOOK_BEHIND);
1407       if (r != 0) return r;
1408       if (node->char_len < 0) {
1409         r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);
1410         if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1411       }
1412       else
1413         n = node->char_len;
1414 
1415       r = add_length(reg, n);
1416       if (r != 0) return r;
1417       r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1418     }
1419     break;
1420 
1421   case ANCHOR_LOOK_BEHIND_NOT:
1422     {
1423       int n;
1424 
1425       len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
1426       r = add_opcode_rel_addr(reg, OP_LOOK_BEHIND_NOT_START,
1427                               len + SIZE_OP_LOOK_BEHIND_NOT_END);
1428       if (r != 0) return r;
1429       if (node->char_len < 0) {
1430         r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);
1431         if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1432       }
1433       else
1434         n = node->char_len;
1435       r = add_length(reg, n);
1436       if (r != 0) return r;
1437       r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1438       if (r != 0) return r;
1439       r = add_opcode(reg, OP_LOOK_BEHIND_NOT_END);
1440     }
1441     break;
1442 
1443   default:
1444     return ONIGERR_TYPE_BUG;
1445     break;
1446   }
1447 
1448   return r;
1449 }
1450 
1451 static int
compile_gimmick_node(GimmickNode * node,regex_t * reg)1452 compile_gimmick_node(GimmickNode* node, regex_t* reg)
1453 {
1454   int r;
1455 
1456   switch (node->type) {
1457   case GIMMICK_FAIL:
1458     r = add_opcode(reg, OP_FAIL);
1459     break;
1460 
1461   case GIMMICK_KEEP:
1462     r = add_opcode(reg, OP_PUSH_SAVE_VAL);
1463     if (r != 0) return r;
1464     r = add_save_type(reg, SAVE_KEEP);
1465     if (r != 0) return r;
1466     r = add_mem_num(reg, node->id);
1467     break;
1468 
1469   case GIMMICK_SAVE:
1470     r = add_opcode(reg, OP_PUSH_SAVE_VAL);
1471     if (r != 0) return r;
1472     r = add_save_type(reg, node->detail_type);
1473     if (r != 0) return r;
1474     r = add_mem_num(reg, node->id);
1475     break;
1476 
1477   case GIMMICK_UPDATE_VAR:
1478     r = add_opcode(reg, OP_UPDATE_VAR);
1479     if (r != 0) return r;
1480     r = add_update_var_type(reg, node->detail_type);
1481     if (r != 0) return r;
1482     r = add_mem_num(reg, node->id);
1483     break;
1484 
1485 #ifdef USE_CALLOUT
1486   case GIMMICK_CALLOUT:
1487     switch (node->detail_type) {
1488     case ONIG_CALLOUT_OF_CONTENTS:
1489     case ONIG_CALLOUT_OF_NAME:
1490       {
1491         r = add_opcode(reg, (node->detail_type == ONIG_CALLOUT_OF_CONTENTS) ?
1492                                   OP_CALLOUT_CONTENTS : OP_CALLOUT_NAME);
1493         if (r != 0) return r;
1494         if (node->detail_type == ONIG_CALLOUT_OF_NAME) {
1495           r = add_mem_num(reg, node->id);
1496           if (r != 0) return r;
1497         }
1498         r = add_mem_num(reg, node->num);
1499         if (r != 0) return r;
1500       }
1501       break;
1502 
1503     default:
1504       r = ONIGERR_TYPE_BUG;
1505       break;
1506     }
1507 #endif
1508   }
1509 
1510   return r;
1511 }
1512 
1513 static int
compile_length_gimmick_node(GimmickNode * node,regex_t * reg)1514 compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
1515 {
1516   int len;
1517 
1518   switch (node->type) {
1519   case GIMMICK_FAIL:
1520     len = SIZE_OP_FAIL;
1521     break;
1522 
1523   case GIMMICK_KEEP:
1524   case GIMMICK_SAVE:
1525     len = SIZE_OP_PUSH_SAVE_VAL;
1526     break;
1527 
1528   case GIMMICK_UPDATE_VAR:
1529     len = SIZE_OP_UPDATE_VAR;
1530     break;
1531 
1532 #ifdef USE_CALLOUT
1533   case GIMMICK_CALLOUT:
1534     switch (node->detail_type) {
1535     case ONIG_CALLOUT_OF_CONTENTS:
1536       len = SIZE_OP_CALLOUT_CONTENTS;
1537       break;
1538     case ONIG_CALLOUT_OF_NAME:
1539       len = SIZE_OP_CALLOUT_NAME;
1540       break;
1541 
1542     default:
1543       len = ONIGERR_TYPE_BUG;
1544       break;
1545     }
1546     break;
1547 #endif
1548   }
1549 
1550   return len;
1551 }
1552 
1553 static int
compile_length_tree(Node * node,regex_t * reg)1554 compile_length_tree(Node* node, regex_t* reg)
1555 {
1556   int len, r;
1557 
1558   switch (NODE_TYPE(node)) {
1559   case NODE_LIST:
1560     len = 0;
1561     do {
1562       r = compile_length_tree(NODE_CAR(node), reg);
1563       if (r < 0) return r;
1564       len += r;
1565     } while (IS_NOT_NULL(node = NODE_CDR(node)));
1566     r = len;
1567     break;
1568 
1569   case NODE_ALT:
1570     {
1571       int n;
1572 
1573       n = r = 0;
1574       do {
1575         r += compile_length_tree(NODE_CAR(node), reg);
1576         n++;
1577       } while (IS_NOT_NULL(node = NODE_CDR(node)));
1578       r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1579     }
1580     break;
1581 
1582   case NODE_STRING:
1583     if (NODE_STRING_IS_RAW(node))
1584       r = compile_length_string_raw_node(STR_(node), reg);
1585     else
1586       r = compile_length_string_node(node, reg);
1587     break;
1588 
1589   case NODE_CCLASS:
1590     r = compile_length_cclass_node(CCLASS_(node), reg);
1591     break;
1592 
1593   case NODE_CTYPE:
1594     r = SIZE_OPCODE;
1595     break;
1596 
1597   case NODE_BACKREF:
1598     {
1599       BackRefNode* br = BACKREF_(node);
1600 
1601       if (NODE_IS_CHECKER(node)) {
1602 #ifdef USE_BACKREF_WITH_LEVEL
1603         if (NODE_IS_NEST_LEVEL(node)) {
1604           r = SIZE_OPCODE + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1605         }
1606         else
1607 #endif
1608           r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1609       }
1610       else {
1611 #ifdef USE_BACKREF_WITH_LEVEL
1612         if (NODE_IS_NEST_LEVEL(node)) {
1613           r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1614             SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1615         }
1616         else
1617 #endif
1618         if (br->back_num == 1) {
1619           r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1620                ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1621         }
1622         else {
1623           r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1624         }
1625       }
1626     }
1627     break;
1628 
1629 #ifdef USE_CALL
1630   case NODE_CALL:
1631     r = SIZE_OP_CALL;
1632     break;
1633 #endif
1634 
1635   case NODE_QUANT:
1636     r = compile_length_quantifier_node(QUANT_(node), reg);
1637     break;
1638 
1639   case NODE_ENCLOSURE:
1640     r = compile_length_enclosure_node(ENCLOSURE_(node), reg);
1641     break;
1642 
1643   case NODE_ANCHOR:
1644     r = compile_length_anchor_node(ANCHOR_(node), reg);
1645     break;
1646 
1647   case NODE_GIMMICK:
1648     r = compile_length_gimmick_node(GIMMICK_(node), reg);
1649     break;
1650 
1651   default:
1652     return ONIGERR_TYPE_BUG;
1653     break;
1654   }
1655 
1656   return r;
1657 }
1658 
1659 static int
compile_tree(Node * node,regex_t * reg,ScanEnv * env)1660 compile_tree(Node* node, regex_t* reg, ScanEnv* env)
1661 {
1662   int n, len, pos, r = 0;
1663 
1664   switch (NODE_TYPE(node)) {
1665   case NODE_LIST:
1666     do {
1667       r = compile_tree(NODE_CAR(node), reg, env);
1668     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
1669     break;
1670 
1671   case NODE_ALT:
1672     {
1673       Node* x = node;
1674       len = 0;
1675       do {
1676         len += compile_length_tree(NODE_CAR(x), reg);
1677         if (IS_NOT_NULL(NODE_CDR(x))) {
1678           len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1679         }
1680       } while (IS_NOT_NULL(x = NODE_CDR(x)));
1681       pos = reg->used + len;  /* goal position */
1682 
1683       do {
1684         len = compile_length_tree(NODE_CAR(node), reg);
1685         if (IS_NOT_NULL(NODE_CDR(node))) {
1686           enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;
1687           r = add_opcode_rel_addr(reg, push, len + SIZE_OP_JUMP);
1688           if (r != 0) break;
1689         }
1690         r = compile_tree(NODE_CAR(node), reg, env);
1691         if (r != 0) break;
1692         if (IS_NOT_NULL(NODE_CDR(node))) {
1693           len = pos - (reg->used + SIZE_OP_JUMP);
1694           r = add_opcode_rel_addr(reg, OP_JUMP, len);
1695           if (r != 0) break;
1696         }
1697       } while (IS_NOT_NULL(node = NODE_CDR(node)));
1698     }
1699     break;
1700 
1701   case NODE_STRING:
1702     if (NODE_STRING_IS_RAW(node))
1703       r = compile_string_raw_node(STR_(node), reg);
1704     else
1705       r = compile_string_node(node, reg);
1706     break;
1707 
1708   case NODE_CCLASS:
1709     r = compile_cclass_node(CCLASS_(node), reg);
1710     break;
1711 
1712   case NODE_CTYPE:
1713     {
1714       int op;
1715 
1716       switch (CTYPE_(node)->ctype) {
1717       case CTYPE_ANYCHAR:
1718         if (IS_MULTILINE(CTYPE_OPTION(node, reg)))
1719           r = add_opcode(reg, OP_ANYCHAR_ML);
1720         else
1721           r = add_opcode(reg, OP_ANYCHAR);
1722         break;
1723 
1724       case ONIGENC_CTYPE_WORD:
1725         if (CTYPE_(node)->ascii_mode == 0) {
1726           op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;
1727         }
1728         else {
1729           op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;
1730         }
1731         r = add_opcode(reg, op);
1732         break;
1733 
1734       default:
1735         return ONIGERR_TYPE_BUG;
1736         break;
1737       }
1738     }
1739     break;
1740 
1741   case NODE_BACKREF:
1742     {
1743       BackRefNode* br = BACKREF_(node);
1744 
1745       if (NODE_IS_CHECKER(node)) {
1746 #ifdef USE_BACKREF_WITH_LEVEL
1747         if (NODE_IS_NEST_LEVEL(node)) {
1748           r = add_opcode(reg, OP_BACKREF_CHECK_WITH_LEVEL);
1749           if (r != 0) return r;
1750           r = add_length(reg, br->nest_level);
1751           if (r != 0) return r;
1752         }
1753         else
1754 #endif
1755           {
1756             r = add_opcode(reg, OP_BACKREF_CHECK);
1757             if (r != 0) return r;
1758           }
1759 
1760         goto add_bacref_mems;
1761       }
1762       else {
1763 #ifdef USE_BACKREF_WITH_LEVEL
1764         if (NODE_IS_NEST_LEVEL(node)) {
1765           r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1766           if (r != 0) return r;
1767           r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1768           if (r != 0) return r;
1769           r = add_length(reg, br->nest_level);
1770           if (r != 0) return r;
1771 
1772           goto add_bacref_mems;
1773         }
1774         else
1775 #endif
1776         if (br->back_num == 1) {
1777           n = br->back_static[0];
1778           if (IS_IGNORECASE(reg->options)) {
1779             r = add_opcode(reg, OP_BACKREF_N_IC);
1780             if (r != 0) return r;
1781             r = add_mem_num(reg, n);
1782           }
1783           else {
1784             switch (n) {
1785             case 1:  r = add_opcode(reg, OP_BACKREF1); break;
1786             case 2:  r = add_opcode(reg, OP_BACKREF2); break;
1787             default:
1788               r = add_opcode(reg, OP_BACKREF_N);
1789               if (r != 0) return r;
1790               r = add_mem_num(reg, n);
1791               break;
1792             }
1793           }
1794         }
1795         else {
1796           int i;
1797           int* p;
1798 
1799           if (IS_IGNORECASE(reg->options)) {
1800             r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1801           }
1802           else {
1803             r = add_opcode(reg, OP_BACKREF_MULTI);
1804           }
1805           if (r != 0) return r;
1806 
1807         add_bacref_mems:
1808           r = add_length(reg, br->back_num);
1809           if (r != 0) return r;
1810           p = BACKREFS_P(br);
1811           for (i = br->back_num - 1; i >= 0; i--) {
1812             r = add_mem_num(reg, p[i]);
1813             if (r != 0) return r;
1814           }
1815         }
1816       }
1817     }
1818     break;
1819 
1820 #ifdef USE_CALL
1821   case NODE_CALL:
1822     r = compile_call(CALL_(node), reg, env);
1823     break;
1824 #endif
1825 
1826   case NODE_QUANT:
1827     r = compile_quantifier_node(QUANT_(node), reg, env);
1828     break;
1829 
1830   case NODE_ENCLOSURE:
1831     r = compile_enclosure_node(ENCLOSURE_(node), reg, env);
1832     break;
1833 
1834   case NODE_ANCHOR:
1835     r = compile_anchor_node(ANCHOR_(node), reg, env);
1836     break;
1837 
1838   case NODE_GIMMICK:
1839     r = compile_gimmick_node(GIMMICK_(node), reg);
1840     break;
1841 
1842   default:
1843 #ifdef ONIG_DEBUG
1844     fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node));
1845 #endif
1846     break;
1847   }
1848 
1849   return r;
1850 }
1851 
1852 static int
noname_disable_map(Node ** plink,GroupNumRemap * map,int * counter)1853 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1854 {
1855   int r = 0;
1856   Node* node = *plink;
1857 
1858   switch (NODE_TYPE(node)) {
1859   case NODE_LIST:
1860   case NODE_ALT:
1861     do {
1862       r = noname_disable_map(&(NODE_CAR(node)), map, counter);
1863     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
1864     break;
1865 
1866   case NODE_QUANT:
1867     {
1868       Node** ptarget = &(NODE_BODY(node));
1869       Node*  old = *ptarget;
1870       r = noname_disable_map(ptarget, map, counter);
1871       if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) {
1872         onig_reduce_nested_quantifier(node, *ptarget);
1873       }
1874     }
1875     break;
1876 
1877   case NODE_ENCLOSURE:
1878     {
1879       EnclosureNode* en = ENCLOSURE_(node);
1880       if (en->type == ENCLOSURE_MEMORY) {
1881         if (NODE_IS_NAMED_GROUP(node)) {
1882           (*counter)++;
1883           map[en->m.regnum].new_val = *counter;
1884           en->m.regnum = *counter;
1885           r = noname_disable_map(&(NODE_BODY(node)), map, counter);
1886         }
1887         else {
1888           *plink = NODE_BODY(node);
1889           NODE_BODY(node) = NULL_NODE;
1890           onig_node_free(node);
1891           r = noname_disable_map(plink, map, counter);
1892         }
1893       }
1894       else if (en->type == ENCLOSURE_IF_ELSE) {
1895         r = noname_disable_map(&(NODE_ENCLOSURE_BODY(en)), map, counter);
1896         if (r != 0) return r;
1897         if (IS_NOT_NULL(en->te.Then)) {
1898           r = noname_disable_map(&(en->te.Then), map, counter);
1899           if (r != 0) return r;
1900         }
1901         if (IS_NOT_NULL(en->te.Else)) {
1902           r = noname_disable_map(&(en->te.Else), map, counter);
1903           if (r != 0) return r;
1904         }
1905       }
1906       else
1907         r = noname_disable_map(&(NODE_BODY(node)), map, counter);
1908     }
1909     break;
1910 
1911   case NODE_ANCHOR:
1912     if (IS_NOT_NULL(NODE_BODY(node)))
1913       r = noname_disable_map(&(NODE_BODY(node)), map, counter);
1914     break;
1915 
1916   default:
1917     break;
1918   }
1919 
1920   return r;
1921 }
1922 
1923 static int
renumber_node_backref(Node * node,GroupNumRemap * map)1924 renumber_node_backref(Node* node, GroupNumRemap* map)
1925 {
1926   int i, pos, n, old_num;
1927   int *backs;
1928   BackRefNode* bn = BACKREF_(node);
1929 
1930   if (! NODE_IS_BY_NAME(node))
1931     return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1932 
1933   old_num = bn->back_num;
1934   if (IS_NULL(bn->back_dynamic))
1935     backs = bn->back_static;
1936   else
1937     backs = bn->back_dynamic;
1938 
1939   for (i = 0, pos = 0; i < old_num; i++) {
1940     n = map[backs[i]].new_val;
1941     if (n > 0) {
1942       backs[pos] = n;
1943       pos++;
1944     }
1945   }
1946 
1947   bn->back_num = pos;
1948   return 0;
1949 }
1950 
1951 static int
renumber_by_map(Node * node,GroupNumRemap * map)1952 renumber_by_map(Node* node, GroupNumRemap* map)
1953 {
1954   int r = 0;
1955 
1956   switch (NODE_TYPE(node)) {
1957   case NODE_LIST:
1958   case NODE_ALT:
1959     do {
1960       r = renumber_by_map(NODE_CAR(node), map);
1961     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
1962     break;
1963 
1964   case NODE_QUANT:
1965     r = renumber_by_map(NODE_BODY(node), map);
1966     break;
1967 
1968   case NODE_ENCLOSURE:
1969     {
1970       EnclosureNode* en = ENCLOSURE_(node);
1971 
1972       r = renumber_by_map(NODE_BODY(node), map);
1973       if (r != 0) return r;
1974 
1975       if (en->type == ENCLOSURE_IF_ELSE) {
1976         if (IS_NOT_NULL(en->te.Then)) {
1977           r = renumber_by_map(en->te.Then, map);
1978           if (r != 0) return r;
1979         }
1980         if (IS_NOT_NULL(en->te.Else)) {
1981           r = renumber_by_map(en->te.Else, map);
1982           if (r != 0) return r;
1983         }
1984       }
1985     }
1986     break;
1987 
1988   case NODE_BACKREF:
1989     r = renumber_node_backref(node, map);
1990     break;
1991 
1992   case NODE_ANCHOR:
1993     if (IS_NOT_NULL(NODE_BODY(node)))
1994       r = renumber_by_map(NODE_BODY(node), map);
1995     break;
1996 
1997   default:
1998     break;
1999   }
2000 
2001   return r;
2002 }
2003 
2004 static int
numbered_ref_check(Node * node)2005 numbered_ref_check(Node* node)
2006 {
2007   int r = 0;
2008 
2009   switch (NODE_TYPE(node)) {
2010   case NODE_LIST:
2011   case NODE_ALT:
2012     do {
2013       r = numbered_ref_check(NODE_CAR(node));
2014     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2015     break;
2016 
2017   case NODE_ANCHOR:
2018     if (IS_NULL(NODE_BODY(node)))
2019       break;
2020     /* fall */
2021   case NODE_QUANT:
2022     r = numbered_ref_check(NODE_BODY(node));
2023     break;
2024 
2025   case NODE_ENCLOSURE:
2026     {
2027       EnclosureNode* en = ENCLOSURE_(node);
2028 
2029       r = numbered_ref_check(NODE_BODY(node));
2030       if (r != 0) return r;
2031 
2032       if (en->type == ENCLOSURE_IF_ELSE) {
2033         if (IS_NOT_NULL(en->te.Then)) {
2034           r = numbered_ref_check(en->te.Then);
2035           if (r != 0) return r;
2036         }
2037         if (IS_NOT_NULL(en->te.Else)) {
2038           r = numbered_ref_check(en->te.Else);
2039           if (r != 0) return r;
2040         }
2041       }
2042     }
2043 
2044     break;
2045 
2046   case NODE_BACKREF:
2047     if (! NODE_IS_BY_NAME(node))
2048       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2049     break;
2050 
2051   default:
2052     break;
2053   }
2054 
2055   return r;
2056 }
2057 
2058 static int
disable_noname_group_capture(Node ** root,regex_t * reg,ScanEnv * env)2059 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2060 {
2061   int r, i, pos, counter;
2062   int result;
2063   MemStatusType loc;
2064   GroupNumRemap* map;
2065 
2066   map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));
2067   CHECK_NULL_RETURN_MEMERR(map);
2068   for (i = 1; i <= env->num_mem; i++) {
2069     map[i].new_val = 0;
2070   }
2071   counter = 0;
2072   r = noname_disable_map(root, map, &counter);
2073   if (r != 0) return r;
2074 
2075   r = renumber_by_map(*root, map);
2076   if (r != 0) return r;
2077 
2078   for (i = 1, pos = 1; i <= env->num_mem; i++) {
2079     if (map[i].new_val > 0) {
2080       SCANENV_MEMENV(env)[pos] = SCANENV_MEMENV(env)[i];
2081       pos++;
2082     }
2083   }
2084 
2085   loc = env->capture_history;
2086   MEM_STATUS_CLEAR(env->capture_history);
2087   for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2088     if (MEM_STATUS_AT(loc, i)) {
2089       MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val);
2090     }
2091   }
2092 
2093   env->num_mem = env->num_named;
2094   reg->num_mem = env->num_named;
2095   result = onig_renumber_name_table(reg, map);
2096   xfree(map);
2097   return result;
2098 }
2099 
2100 #ifdef USE_CALL
2101 static int
fix_unset_addr_list(UnsetAddrList * uslist,regex_t * reg)2102 fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
2103 {
2104   int i, offset;
2105   EnclosureNode* en;
2106   AbsAddrType addr;
2107 
2108   for (i = 0; i < uslist->num; i++) {
2109     if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))
2110       return ONIGERR_PARSER_BUG;
2111 
2112     en = ENCLOSURE_(uslist->us[i].target);
2113     addr   = en->m.called_addr;
2114     offset = uslist->us[i].offset;
2115 
2116     BB_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2117   }
2118   return 0;
2119 }
2120 #endif
2121 
2122 
2123 #define GET_CHAR_LEN_VARLEN           -1
2124 #define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
2125 
2126 /* fixed size pattern node only */
2127 static int
get_char_length_tree1(Node * node,regex_t * reg,int * len,int level)2128 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2129 {
2130   int tlen;
2131   int r = 0;
2132 
2133   level++;
2134   *len = 0;
2135   switch (NODE_TYPE(node)) {
2136   case NODE_LIST:
2137     do {
2138       r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);
2139       if (r == 0)
2140         *len = distance_add(*len, tlen);
2141     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2142     break;
2143 
2144   case NODE_ALT:
2145     {
2146       int tlen2;
2147       int varlen = 0;
2148 
2149       r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);
2150       while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) {
2151         r = get_char_length_tree1(NODE_CAR(node), reg, &tlen2, level);
2152         if (r == 0) {
2153           if (tlen != tlen2)
2154             varlen = 1;
2155         }
2156       }
2157       if (r == 0) {
2158         if (varlen != 0) {
2159           if (level == 1)
2160             r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2161           else
2162             r = GET_CHAR_LEN_VARLEN;
2163         }
2164         else
2165           *len = tlen;
2166       }
2167     }
2168     break;
2169 
2170   case NODE_STRING:
2171     {
2172       StrNode* sn = STR_(node);
2173       UChar *s = sn->s;
2174 
2175       while (s < sn->end) {
2176         s += enclen(reg->enc, s);
2177         (*len)++;
2178       }
2179     }
2180     break;
2181 
2182   case NODE_QUANT:
2183     {
2184       QuantNode* qn = QUANT_(node);
2185 
2186       if (qn->lower == qn->upper) {
2187         if (qn->upper == 0) {
2188           *len = 0;
2189         }
2190         else {
2191           r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level);
2192           if (r == 0)
2193             *len = distance_multiply(tlen, qn->lower);
2194         }
2195       }
2196       else
2197         r = GET_CHAR_LEN_VARLEN;
2198     }
2199     break;
2200 
2201 #ifdef USE_CALL
2202   case NODE_CALL:
2203     if (! NODE_IS_RECURSION(node))
2204       r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
2205     else
2206       r = GET_CHAR_LEN_VARLEN;
2207     break;
2208 #endif
2209 
2210   case NODE_CTYPE:
2211   case NODE_CCLASS:
2212     *len = 1;
2213     break;
2214 
2215   case NODE_ENCLOSURE:
2216     {
2217       EnclosureNode* en = ENCLOSURE_(node);
2218 
2219       switch (en->type) {
2220       case ENCLOSURE_MEMORY:
2221 #ifdef USE_CALL
2222         if (NODE_IS_CLEN_FIXED(node))
2223           *len = en->char_len;
2224         else {
2225           r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
2226           if (r == 0) {
2227             en->char_len = *len;
2228             NODE_STATUS_ADD(node, CLEN_FIXED);
2229           }
2230         }
2231         break;
2232 #endif
2233       case ENCLOSURE_OPTION:
2234       case ENCLOSURE_STOP_BACKTRACK:
2235         r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
2236         break;
2237       case ENCLOSURE_IF_ELSE:
2238         {
2239           int clen, elen;
2240 
2241           r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level);
2242           if (r == 0) {
2243             if (IS_NOT_NULL(en->te.Then)) {
2244               r = get_char_length_tree1(en->te.Then, reg, &tlen, level);
2245               if (r != 0) break;
2246             }
2247             else tlen = 0;
2248             if (IS_NOT_NULL(en->te.Else)) {
2249               r = get_char_length_tree1(en->te.Else, reg, &elen, level);
2250               if (r != 0) break;
2251             }
2252             else elen = 0;
2253 
2254             if (clen + tlen != elen) {
2255               r = GET_CHAR_LEN_VARLEN;
2256             }
2257             else {
2258               *len = elen;
2259             }
2260           }
2261         }
2262         break;
2263 
2264       default:
2265         break;
2266       }
2267     }
2268     break;
2269 
2270   case NODE_ANCHOR:
2271   case NODE_GIMMICK:
2272     break;
2273 
2274   case NODE_BACKREF:
2275     if (NODE_IS_CHECKER(node))
2276       break;
2277     /* fall */
2278   default:
2279     r = GET_CHAR_LEN_VARLEN;
2280     break;
2281   }
2282 
2283   return r;
2284 }
2285 
2286 static int
get_char_length_tree(Node * node,regex_t * reg,int * len)2287 get_char_length_tree(Node* node, regex_t* reg, int* len)
2288 {
2289   return get_char_length_tree1(node, reg, len, 0);
2290 }
2291 
2292 /* x is not included y ==>  1 : 0 */
2293 static int
is_exclusive(Node * x,Node * y,regex_t * reg)2294 is_exclusive(Node* x, Node* y, regex_t* reg)
2295 {
2296   int i, len;
2297   OnigCodePoint code;
2298   UChar *p;
2299   NodeType ytype;
2300 
2301  retry:
2302   ytype = NODE_TYPE(y);
2303   switch (NODE_TYPE(x)) {
2304   case NODE_CTYPE:
2305     {
2306       if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||
2307           CTYPE_(y)->ctype == CTYPE_ANYCHAR)
2308         break;
2309 
2310       switch (ytype) {
2311       case NODE_CTYPE:
2312         if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&
2313             CTYPE_(y)->not   != CTYPE_(x)->not &&
2314             CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)
2315           return 1;
2316         else
2317           return 0;
2318         break;
2319 
2320       case NODE_CCLASS:
2321       swap:
2322         {
2323           Node* tmp;
2324           tmp = x; x = y; y = tmp;
2325           goto retry;
2326         }
2327         break;
2328 
2329       case NODE_STRING:
2330         goto swap;
2331         break;
2332 
2333       default:
2334         break;
2335       }
2336     }
2337     break;
2338 
2339   case NODE_CCLASS:
2340     {
2341       int range;
2342       CClassNode* xc = CCLASS_(x);
2343 
2344       switch (ytype) {
2345       case NODE_CTYPE:
2346         switch (CTYPE_(y)->ctype) {
2347         case CTYPE_ANYCHAR:
2348           return 0;
2349           break;
2350 
2351         case ONIGENC_CTYPE_WORD:
2352           if (CTYPE_(y)->not == 0) {
2353             if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2354               range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
2355               for (i = 0; i < range; i++) {
2356                 if (BITSET_AT(xc->bs, i)) {
2357                   if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2358                 }
2359               }
2360               return 1;
2361             }
2362             return 0;
2363           }
2364           else {
2365             if (IS_NOT_NULL(xc->mbuf)) return 0;
2366             if (IS_NCCLASS_NOT(xc)) return 0;
2367 
2368             range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
2369             for (i = 0; i < range; i++) {
2370               if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {
2371                 if (BITSET_AT(xc->bs, i))
2372                   return 0;
2373               }
2374             }
2375             for (i = range; i < SINGLE_BYTE_SIZE; i++) {
2376               if (BITSET_AT(xc->bs, i)) return 0;
2377             }
2378             return 1;
2379           }
2380           break;
2381 
2382         default:
2383           break;
2384         }
2385         break;
2386 
2387       case NODE_CCLASS:
2388         {
2389           int v;
2390           CClassNode* yc = CCLASS_(y);
2391 
2392           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2393             v = BITSET_AT(xc->bs, i);
2394             if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) {
2395               v = BITSET_AT(yc->bs, i);
2396               if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2397                   (v == 0 && IS_NCCLASS_NOT(yc)))
2398                 return 0;
2399             }
2400           }
2401           if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2402               (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2403             return 1;
2404           return 0;
2405         }
2406         break;
2407 
2408       case NODE_STRING:
2409         goto swap;
2410         break;
2411 
2412       default:
2413         break;
2414       }
2415     }
2416     break;
2417 
2418   case NODE_STRING:
2419     {
2420       StrNode* xs = STR_(x);
2421 
2422       if (NODE_STRING_LEN(x) == 0)
2423         break;
2424 
2425       switch (ytype) {
2426       case NODE_CTYPE:
2427         switch (CTYPE_(y)->ctype) {
2428         case CTYPE_ANYCHAR:
2429           break;
2430 
2431         case ONIGENC_CTYPE_WORD:
2432           if (CTYPE_(y)->ascii_mode == 0) {
2433             if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2434               return CTYPE_(y)->not;
2435             else
2436               return !(CTYPE_(y)->not);
2437           }
2438           else {
2439             if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))
2440               return CTYPE_(y)->not;
2441             else
2442               return !(CTYPE_(y)->not);
2443           }
2444           break;
2445         default:
2446           break;
2447         }
2448         break;
2449 
2450       case NODE_CCLASS:
2451         {
2452           CClassNode* cc = CCLASS_(y);
2453 
2454           code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2455                                      xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2456           return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2457         }
2458         break;
2459 
2460       case NODE_STRING:
2461         {
2462           UChar *q;
2463           StrNode* ys = STR_(y);
2464 
2465           len = NODE_STRING_LEN(x);
2466           if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y);
2467           if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) {
2468             /* tiny version */
2469             return 0;
2470           }
2471           else {
2472             for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
2473               if (*p != *q) return 1;
2474             }
2475           }
2476         }
2477         break;
2478 
2479       default:
2480         break;
2481       }
2482     }
2483     break;
2484 
2485   default:
2486     break;
2487   }
2488 
2489   return 0;
2490 }
2491 
2492 static Node*
get_head_value_node(Node * node,int exact,regex_t * reg)2493 get_head_value_node(Node* node, int exact, regex_t* reg)
2494 {
2495   Node* n = NULL_NODE;
2496 
2497   switch (NODE_TYPE(node)) {
2498   case NODE_BACKREF:
2499   case NODE_ALT:
2500 #ifdef USE_CALL
2501   case NODE_CALL:
2502 #endif
2503     break;
2504 
2505   case NODE_CTYPE:
2506     if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
2507       break;
2508     /* fall */
2509   case NODE_CCLASS:
2510     if (exact == 0) {
2511       n = node;
2512     }
2513     break;
2514 
2515   case NODE_LIST:
2516     n = get_head_value_node(NODE_CAR(node), exact, reg);
2517     break;
2518 
2519   case NODE_STRING:
2520     {
2521       StrNode* sn = STR_(node);
2522 
2523       if (sn->end <= sn->s)
2524         break;
2525 
2526       if (exact != 0 &&
2527           !NODE_STRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2528       }
2529       else {
2530         n = node;
2531       }
2532     }
2533     break;
2534 
2535   case NODE_QUANT:
2536     {
2537       QuantNode* qn = QUANT_(node);
2538       if (qn->lower > 0) {
2539         if (IS_NOT_NULL(qn->head_exact))
2540           n = qn->head_exact;
2541         else
2542           n = get_head_value_node(NODE_BODY(node), exact, reg);
2543       }
2544     }
2545     break;
2546 
2547   case NODE_ENCLOSURE:
2548     {
2549       EnclosureNode* en = ENCLOSURE_(node);
2550       switch (en->type) {
2551       case ENCLOSURE_OPTION:
2552         {
2553           OnigOptionType options = reg->options;
2554 
2555           reg->options = ENCLOSURE_(node)->o.options;
2556           n = get_head_value_node(NODE_BODY(node), exact, reg);
2557           reg->options = options;
2558         }
2559         break;
2560 
2561       case ENCLOSURE_MEMORY:
2562       case ENCLOSURE_STOP_BACKTRACK:
2563       case ENCLOSURE_IF_ELSE:
2564         n = get_head_value_node(NODE_BODY(node), exact, reg);
2565         break;
2566       }
2567     }
2568     break;
2569 
2570   case NODE_ANCHOR:
2571     if (ANCHOR_(node)->type == ANCHOR_PREC_READ)
2572       n = get_head_value_node(NODE_BODY(node), exact, reg);
2573     break;
2574 
2575   case NODE_GIMMICK:
2576   default:
2577     break;
2578   }
2579 
2580   return n;
2581 }
2582 
2583 static int
check_type_tree(Node * node,int type_mask,int enclosure_mask,int anchor_mask)2584 check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)
2585 {
2586   NodeType type;
2587   int r = 0;
2588 
2589   type = NODE_TYPE(node);
2590   if ((NODE_TYPE2BIT(type) & type_mask) == 0)
2591     return 1;
2592 
2593   switch (type) {
2594   case NODE_LIST:
2595   case NODE_ALT:
2596     do {
2597       r = check_type_tree(NODE_CAR(node), type_mask, enclosure_mask,
2598                           anchor_mask);
2599     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2600     break;
2601 
2602   case NODE_QUANT:
2603     r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
2604     break;
2605 
2606   case NODE_ENCLOSURE:
2607     {
2608       EnclosureNode* en = ENCLOSURE_(node);
2609       if (((1<<en->type) & enclosure_mask) == 0)
2610         return 1;
2611 
2612       r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
2613       if (r == 0 && en->type == ENCLOSURE_IF_ELSE) {
2614         if (IS_NOT_NULL(en->te.Then)) {
2615           r = check_type_tree(en->te.Then, type_mask, enclosure_mask, anchor_mask);
2616           if (r != 0) break;
2617         }
2618         if (IS_NOT_NULL(en->te.Else)) {
2619           r = check_type_tree(en->te.Else, type_mask, enclosure_mask, anchor_mask);
2620         }
2621       }
2622     }
2623     break;
2624 
2625   case NODE_ANCHOR:
2626     type = ANCHOR_(node)->type;
2627     if ((type & anchor_mask) == 0)
2628       return 1;
2629 
2630     if (IS_NOT_NULL(NODE_BODY(node)))
2631       r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
2632     break;
2633 
2634   case NODE_GIMMICK:
2635   default:
2636     break;
2637   }
2638   return r;
2639 }
2640 
2641 static OnigLen
tree_min_len(Node * node,ScanEnv * env)2642 tree_min_len(Node* node, ScanEnv* env)
2643 {
2644   OnigLen len;
2645   OnigLen tmin;
2646 
2647   len = 0;
2648   switch (NODE_TYPE(node)) {
2649   case NODE_BACKREF:
2650     if (! NODE_IS_CHECKER(node)) {
2651       int i;
2652       int* backs;
2653       MemEnv* mem_env = SCANENV_MEMENV(env);
2654       BackRefNode* br = BACKREF_(node);
2655       if (NODE_IS_RECURSION(node)) break;
2656 
2657       backs = BACKREFS_P(br);
2658       len = tree_min_len(mem_env[backs[0]].node, env);
2659       for (i = 1; i < br->back_num; i++) {
2660         tmin = tree_min_len(mem_env[backs[i]].node, env);
2661         if (len > tmin) len = tmin;
2662       }
2663     }
2664     break;
2665 
2666 #ifdef USE_CALL
2667   case NODE_CALL:
2668     {
2669       Node* t = NODE_BODY(node);
2670       if (NODE_IS_RECURSION(node)) {
2671         if (NODE_IS_MIN_FIXED(t))
2672           len = ENCLOSURE_(t)->min_len;
2673       }
2674       else
2675         len = tree_min_len(t, env);
2676     }
2677     break;
2678 #endif
2679 
2680   case NODE_LIST:
2681     do {
2682       tmin = tree_min_len(NODE_CAR(node), env);
2683       len = distance_add(len, tmin);
2684     } while (IS_NOT_NULL(node = NODE_CDR(node)));
2685     break;
2686 
2687   case NODE_ALT:
2688     {
2689       Node *x, *y;
2690       y = node;
2691       do {
2692         x = NODE_CAR(y);
2693         tmin = tree_min_len(x, env);
2694         if (y == node) len = tmin;
2695         else if (len > tmin) len = tmin;
2696       } while (IS_NOT_NULL(y = NODE_CDR(y)));
2697     }
2698     break;
2699 
2700   case NODE_STRING:
2701     {
2702       StrNode* sn = STR_(node);
2703       len = (int )(sn->end - sn->s);
2704     }
2705     break;
2706 
2707   case NODE_CTYPE:
2708   case NODE_CCLASS:
2709     len = ONIGENC_MBC_MINLEN(env->enc);
2710     break;
2711 
2712   case NODE_QUANT:
2713     {
2714       QuantNode* qn = QUANT_(node);
2715 
2716       if (qn->lower > 0) {
2717         len = tree_min_len(NODE_BODY(node), env);
2718         len = distance_multiply(len, qn->lower);
2719       }
2720     }
2721     break;
2722 
2723   case NODE_ENCLOSURE:
2724     {
2725       EnclosureNode* en = ENCLOSURE_(node);
2726       switch (en->type) {
2727       case ENCLOSURE_MEMORY:
2728         if (NODE_IS_MIN_FIXED(node))
2729           len = en->min_len;
2730         else {
2731           if (NODE_IS_MARK1(node))
2732             len = 0;  /* recursive */
2733           else {
2734             NODE_STATUS_ADD(node, MARK1);
2735             len = tree_min_len(NODE_BODY(node), env);
2736             NODE_STATUS_REMOVE(node, MARK1);
2737 
2738             en->min_len = len;
2739             NODE_STATUS_ADD(node, MIN_FIXED);
2740           }
2741         }
2742         break;
2743 
2744       case ENCLOSURE_OPTION:
2745       case ENCLOSURE_STOP_BACKTRACK:
2746         len = tree_min_len(NODE_BODY(node), env);
2747         break;
2748       case ENCLOSURE_IF_ELSE:
2749         {
2750           OnigLen elen;
2751 
2752           len = tree_min_len(NODE_BODY(node), env);
2753           if (IS_NOT_NULL(en->te.Then))
2754             len += tree_min_len(en->te.Then, env);
2755           if (IS_NOT_NULL(en->te.Else))
2756             elen = tree_min_len(en->te.Else, env);
2757           else elen = 0;
2758 
2759           if (elen < len) len = elen;
2760         }
2761         break;
2762       }
2763     }
2764     break;
2765 
2766   case NODE_GIMMICK:
2767     {
2768       GimmickNode* g = GIMMICK_(node);
2769       if (g->type == GIMMICK_FAIL) {
2770         len = INFINITE_LEN;
2771         break;
2772       }
2773     }
2774     /* fall */
2775   case NODE_ANCHOR:
2776   default:
2777     break;
2778   }
2779 
2780   return len;
2781 }
2782 
2783 static OnigLen
tree_max_len(Node * node,ScanEnv * env)2784 tree_max_len(Node* node, ScanEnv* env)
2785 {
2786   OnigLen len;
2787   OnigLen tmax;
2788 
2789   len = 0;
2790   switch (NODE_TYPE(node)) {
2791   case NODE_LIST:
2792     do {
2793       tmax = tree_max_len(NODE_CAR(node), env);
2794       len = distance_add(len, tmax);
2795     } while (IS_NOT_NULL(node = NODE_CDR(node)));
2796     break;
2797 
2798   case NODE_ALT:
2799     do {
2800       tmax = tree_max_len(NODE_CAR(node), env);
2801       if (len < tmax) len = tmax;
2802     } while (IS_NOT_NULL(node = NODE_CDR(node)));
2803     break;
2804 
2805   case NODE_STRING:
2806     {
2807       StrNode* sn = STR_(node);
2808       len = (OnigLen )(sn->end - sn->s);
2809     }
2810     break;
2811 
2812   case NODE_CTYPE:
2813   case NODE_CCLASS:
2814     len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2815     break;
2816 
2817   case NODE_BACKREF:
2818     if (! NODE_IS_CHECKER(node)) {
2819       int i;
2820       int* backs;
2821       MemEnv* mem_env = SCANENV_MEMENV(env);
2822       BackRefNode* br = BACKREF_(node);
2823       if (NODE_IS_RECURSION(node)) {
2824         len = INFINITE_LEN;
2825         break;
2826       }
2827       backs = BACKREFS_P(br);
2828       for (i = 0; i < br->back_num; i++) {
2829         tmax = tree_max_len(mem_env[backs[i]].node, env);
2830         if (len < tmax) len = tmax;
2831       }
2832     }
2833     break;
2834 
2835 #ifdef USE_CALL
2836   case NODE_CALL:
2837     if (! NODE_IS_RECURSION(node))
2838       len = tree_max_len(NODE_BODY(node), env);
2839     else
2840       len = INFINITE_LEN;
2841     break;
2842 #endif
2843 
2844   case NODE_QUANT:
2845     {
2846       QuantNode* qn = QUANT_(node);
2847 
2848       if (qn->upper != 0) {
2849         len = tree_max_len(NODE_BODY(node), env);
2850         if (len != 0) {
2851           if (! IS_REPEAT_INFINITE(qn->upper))
2852             len = distance_multiply(len, qn->upper);
2853           else
2854             len = INFINITE_LEN;
2855         }
2856       }
2857     }
2858     break;
2859 
2860   case NODE_ENCLOSURE:
2861     {
2862       EnclosureNode* en = ENCLOSURE_(node);
2863       switch (en->type) {
2864       case ENCLOSURE_MEMORY:
2865         if (NODE_IS_MAX_FIXED(node))
2866           len = en->max_len;
2867         else {
2868           if (NODE_IS_MARK1(node))
2869             len = INFINITE_LEN;
2870           else {
2871             NODE_STATUS_ADD(node, MARK1);
2872             len = tree_max_len(NODE_BODY(node), env);
2873             NODE_STATUS_REMOVE(node, MARK1);
2874 
2875             en->max_len = len;
2876             NODE_STATUS_ADD(node, MAX_FIXED);
2877           }
2878         }
2879         break;
2880 
2881       case ENCLOSURE_OPTION:
2882       case ENCLOSURE_STOP_BACKTRACK:
2883         len = tree_max_len(NODE_BODY(node), env);
2884         break;
2885       case ENCLOSURE_IF_ELSE:
2886         {
2887           OnigLen tlen, elen;
2888 
2889           len = tree_max_len(NODE_BODY(node), env);
2890           if (IS_NOT_NULL(en->te.Then)) {
2891             tlen = tree_max_len(en->te.Then, env);
2892             len = distance_add(len, tlen);
2893           }
2894           if (IS_NOT_NULL(en->te.Else))
2895             elen = tree_max_len(en->te.Else, env);
2896           else elen = 0;
2897 
2898           if (elen > len) len = elen;
2899         }
2900         break;
2901       }
2902     }
2903     break;
2904 
2905   case NODE_ANCHOR:
2906   case NODE_GIMMICK:
2907   default:
2908     break;
2909   }
2910 
2911   return len;
2912 }
2913 
2914 static int
check_backrefs(Node * node,ScanEnv * env)2915 check_backrefs(Node* node, ScanEnv* env)
2916 {
2917   int r;
2918 
2919   switch (NODE_TYPE(node)) {
2920   case NODE_LIST:
2921   case NODE_ALT:
2922     do {
2923       r = check_backrefs(NODE_CAR(node), env);
2924     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2925     break;
2926 
2927   case NODE_ANCHOR:
2928     if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
2929       r = 0;
2930       break;
2931     }
2932     /* fall */
2933   case NODE_QUANT:
2934     r = check_backrefs(NODE_BODY(node), env);
2935     break;
2936 
2937   case NODE_ENCLOSURE:
2938     r = check_backrefs(NODE_BODY(node), env);
2939     {
2940       EnclosureNode* en = ENCLOSURE_(node);
2941 
2942       if (en->type == ENCLOSURE_IF_ELSE) {
2943         if (r != 0) return r;
2944         if (IS_NOT_NULL(en->te.Then)) {
2945           r = check_backrefs(en->te.Then, env);
2946           if (r != 0) return r;
2947         }
2948         if (IS_NOT_NULL(en->te.Else)) {
2949           r = check_backrefs(en->te.Else, env);
2950         }
2951       }
2952     }
2953     break;
2954 
2955   case NODE_BACKREF:
2956     {
2957       int i;
2958       BackRefNode* br = BACKREF_(node);
2959       int* backs = BACKREFS_P(br);
2960       MemEnv* mem_env = SCANENV_MEMENV(env);
2961 
2962       for (i = 0; i < br->back_num; i++) {
2963         if (backs[i] > env->num_mem)
2964           return ONIGERR_INVALID_BACKREF;
2965 
2966         NODE_STATUS_ADD(mem_env[backs[i]].node, BACKREF);
2967       }
2968       r = 0;
2969     }
2970     break;
2971 
2972   default:
2973     r = 0;
2974     break;
2975   }
2976 
2977   return r;
2978 }
2979 
2980 
2981 #ifdef USE_CALL
2982 
2983 #define RECURSION_EXIST        (1<<0)
2984 #define RECURSION_MUST         (1<<1)
2985 #define RECURSION_INFINITE     (1<<2)
2986 
2987 static int
infinite_recursive_call_check(Node * node,ScanEnv * env,int head)2988 infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
2989 {
2990   int ret;
2991   int r = 0;
2992 
2993   switch (NODE_TYPE(node)) {
2994   case NODE_LIST:
2995     {
2996       Node *x;
2997       OnigLen min;
2998 
2999       x = node;
3000       do {
3001         ret = infinite_recursive_call_check(NODE_CAR(x), env, head);
3002         if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3003         r |= ret;
3004         if (head != 0) {
3005           min = tree_min_len(NODE_CAR(x), env);
3006           if (min != 0) head = 0;
3007         }
3008       } while (IS_NOT_NULL(x = NODE_CDR(x)));
3009     }
3010     break;
3011 
3012   case NODE_ALT:
3013     {
3014       int must;
3015 
3016       must = RECURSION_MUST;
3017       do {
3018         ret = infinite_recursive_call_check(NODE_CAR(node), env, head);
3019         if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3020 
3021         r    |= (ret & RECURSION_EXIST);
3022         must &= ret;
3023       } while (IS_NOT_NULL(node = NODE_CDR(node)));
3024       r |= must;
3025     }
3026     break;
3027 
3028   case NODE_QUANT:
3029     r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3030     if (r < 0) return r;
3031     if ((r & RECURSION_MUST) != 0) {
3032       if (QUANT_(node)->lower == 0)
3033         r &= ~RECURSION_MUST;
3034     }
3035     break;
3036 
3037   case NODE_ANCHOR:
3038     if (! ANCHOR_HAS_BODY(ANCHOR_(node)))
3039       break;
3040     /* fall */
3041   case NODE_CALL:
3042     r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3043     break;
3044 
3045   case NODE_ENCLOSURE:
3046     {
3047       EnclosureNode* en = ENCLOSURE_(node);
3048 
3049       if (en->type == ENCLOSURE_MEMORY) {
3050         if (NODE_IS_MARK2(node))
3051           return 0;
3052         else if (NODE_IS_MARK1(node))
3053           return (head == 0 ? RECURSION_EXIST | RECURSION_MUST
3054                   : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);
3055         else {
3056           NODE_STATUS_ADD(node, MARK2);
3057           r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3058           NODE_STATUS_REMOVE(node, MARK2);
3059         }
3060       }
3061       else if (en->type == ENCLOSURE_IF_ELSE) {
3062         int eret;
3063 
3064         ret = infinite_recursive_call_check(NODE_BODY(node), env, head);
3065         if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3066         r |= ret;
3067         if (IS_NOT_NULL(en->te.Then)) {
3068           OnigLen min;
3069           if (head != 0) {
3070             min = tree_min_len(NODE_BODY(node), env);
3071           }
3072           else min = 0;
3073 
3074           ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);
3075           if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3076           r |= ret;
3077         }
3078         if (IS_NOT_NULL(en->te.Else)) {
3079           eret = infinite_recursive_call_check(en->te.Else, env, head);
3080           if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;
3081           r |= (eret & RECURSION_EXIST);
3082           if ((eret & RECURSION_MUST) == 0)
3083             r &= ~RECURSION_MUST;
3084         }
3085       }
3086       else {
3087         r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3088       }
3089     }
3090     break;
3091 
3092   default:
3093     break;
3094   }
3095 
3096   return r;
3097 }
3098 
3099 static int
infinite_recursive_call_check_trav(Node * node,ScanEnv * env)3100 infinite_recursive_call_check_trav(Node* node, ScanEnv* env)
3101 {
3102   int r;
3103 
3104   switch (NODE_TYPE(node)) {
3105   case NODE_LIST:
3106   case NODE_ALT:
3107     do {
3108       r = infinite_recursive_call_check_trav(NODE_CAR(node), env);
3109     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
3110     break;
3111 
3112   case NODE_ANCHOR:
3113     if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
3114       r = 0;
3115       break;
3116     }
3117     /* fall */
3118   case NODE_QUANT:
3119     r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
3120     break;
3121 
3122   case NODE_ENCLOSURE:
3123     {
3124       EnclosureNode* en = ENCLOSURE_(node);
3125 
3126       if (en->type == ENCLOSURE_MEMORY) {
3127         if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) {
3128           int ret;
3129 
3130           NODE_STATUS_ADD(node, MARK1);
3131 
3132           ret = infinite_recursive_call_check(NODE_BODY(node), env, 1);
3133           if (ret < 0) return ret;
3134           else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)
3135             return ONIGERR_NEVER_ENDING_RECURSION;
3136 
3137           NODE_STATUS_REMOVE(node, MARK1);
3138         }
3139       }
3140       else if (en->type == ENCLOSURE_IF_ELSE) {
3141         if (IS_NOT_NULL(en->te.Then)) {
3142           r = infinite_recursive_call_check_trav(en->te.Then, env);
3143           if (r != 0) return r;
3144         }
3145         if (IS_NOT_NULL(en->te.Else)) {
3146           r = infinite_recursive_call_check_trav(en->te.Else, env);
3147           if (r != 0) return r;
3148         }
3149       }
3150     }
3151 
3152     r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
3153     break;
3154 
3155   default:
3156     r = 0;
3157     break;
3158   }
3159 
3160   return r;
3161 }
3162 
3163 static int
recursive_call_check(Node * node)3164 recursive_call_check(Node* node)
3165 {
3166   int r;
3167 
3168   switch (NODE_TYPE(node)) {
3169   case NODE_LIST:
3170   case NODE_ALT:
3171     r = 0;
3172     do {
3173       r |= recursive_call_check(NODE_CAR(node));
3174     } while (IS_NOT_NULL(node = NODE_CDR(node)));
3175     break;
3176 
3177   case NODE_ANCHOR:
3178     if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
3179       r = 0;
3180       break;
3181     }
3182     /* fall */
3183   case NODE_QUANT:
3184     r = recursive_call_check(NODE_BODY(node));
3185     break;
3186 
3187   case NODE_CALL:
3188     r = recursive_call_check(NODE_BODY(node));
3189     if (r != 0) {
3190       if (NODE_IS_MARK1(NODE_BODY(node)))
3191         NODE_STATUS_ADD(node, RECURSION);
3192     }
3193     break;
3194 
3195   case NODE_ENCLOSURE:
3196     {
3197       EnclosureNode* en = ENCLOSURE_(node);
3198 
3199       if (en->type == ENCLOSURE_MEMORY) {
3200         if (NODE_IS_MARK2(node))
3201           return 0;
3202         else if (NODE_IS_MARK1(node))
3203           return 1; /* recursion */
3204         else {
3205           NODE_STATUS_ADD(node, MARK2);
3206           r = recursive_call_check(NODE_BODY(node));
3207           NODE_STATUS_REMOVE(node, MARK2);
3208         }
3209       }
3210       else if (en->type == ENCLOSURE_IF_ELSE) {
3211         r = 0;
3212         if (IS_NOT_NULL(en->te.Then)) {
3213           r |= recursive_call_check(en->te.Then);
3214         }
3215         if (IS_NOT_NULL(en->te.Else)) {
3216           r |= recursive_call_check(en->te.Else);
3217         }
3218         r |= recursive_call_check(NODE_BODY(node));
3219       }
3220       else {
3221         r = recursive_call_check(NODE_BODY(node));
3222       }
3223     }
3224     break;
3225 
3226   default:
3227     r = 0;
3228     break;
3229   }
3230 
3231   return r;
3232 }
3233 
3234 #define IN_RECURSION         (1<<0)
3235 #define FOUND_CALLED_NODE    1
3236 
3237 static int
recursive_call_check_trav(Node * node,ScanEnv * env,int state)3238 recursive_call_check_trav(Node* node, ScanEnv* env, int state)
3239 {
3240   int r = 0;
3241 
3242   switch (NODE_TYPE(node)) {
3243   case NODE_LIST:
3244   case NODE_ALT:
3245     {
3246       int ret;
3247       do {
3248         ret = recursive_call_check_trav(NODE_CAR(node), env, state);
3249         if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3250         else if (ret < 0) return ret;
3251       } while (IS_NOT_NULL(node = NODE_CDR(node)));
3252     }
3253     break;
3254 
3255   case NODE_QUANT:
3256     r = recursive_call_check_trav(NODE_BODY(node), env, state);
3257     if (QUANT_(node)->upper == 0) {
3258       if (r == FOUND_CALLED_NODE)
3259         QUANT_(node)->is_refered = 1;
3260     }
3261     break;
3262 
3263   case NODE_ANCHOR:
3264     {
3265       AnchorNode* an = ANCHOR_(node);
3266       if (ANCHOR_HAS_BODY(an))
3267         r = recursive_call_check_trav(NODE_ANCHOR_BODY(an), env, state);
3268     }
3269     break;
3270 
3271   case NODE_ENCLOSURE:
3272     {
3273       int ret;
3274       int state1;
3275       EnclosureNode* en = ENCLOSURE_(node);
3276 
3277       if (en->type == ENCLOSURE_MEMORY) {
3278         if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {
3279           if (! NODE_IS_RECURSION(node)) {
3280             NODE_STATUS_ADD(node, MARK1);
3281             r = recursive_call_check(NODE_BODY(node));
3282             if (r != 0)
3283               NODE_STATUS_ADD(node, RECURSION);
3284             NODE_STATUS_REMOVE(node, MARK1);
3285           }
3286 
3287           if (NODE_IS_CALLED(node))
3288             r = FOUND_CALLED_NODE;
3289         }
3290       }
3291 
3292       state1 = state;
3293       if (NODE_IS_RECURSION(node))
3294         state1 |= IN_RECURSION;
3295 
3296       ret = recursive_call_check_trav(NODE_BODY(node), env, state1);
3297       if (ret == FOUND_CALLED_NODE)
3298         r = FOUND_CALLED_NODE;
3299 
3300       if (en->type == ENCLOSURE_IF_ELSE) {
3301         if (IS_NOT_NULL(en->te.Then)) {
3302           ret = recursive_call_check_trav(en->te.Then, env, state1);
3303           if (ret == FOUND_CALLED_NODE)
3304             r = FOUND_CALLED_NODE;
3305         }
3306         if (IS_NOT_NULL(en->te.Else)) {
3307           ret = recursive_call_check_trav(en->te.Else, env, state1);
3308           if (ret == FOUND_CALLED_NODE)
3309             r = FOUND_CALLED_NODE;
3310         }
3311       }
3312     }
3313     break;
3314 
3315   default:
3316     break;
3317   }
3318 
3319   return r;
3320 }
3321 
3322 #endif
3323 
3324 /* divide different length alternatives in look-behind.
3325   (?<=A|B) ==> (?<=A)|(?<=B)
3326   (?<!A|B) ==> (?<!A)(?<!B)
3327 */
3328 static int
divide_look_behind_alternatives(Node * node)3329 divide_look_behind_alternatives(Node* node)
3330 {
3331   Node *head, *np, *insert_node;
3332   AnchorNode* an = ANCHOR_(node);
3333   int anc_type = an->type;
3334 
3335   head = NODE_ANCHOR_BODY(an);
3336   np = NODE_CAR(head);
3337   swap_node(node, head);
3338   NODE_CAR(node) = head;
3339   NODE_BODY(head) = np;
3340 
3341   np = node;
3342   while (IS_NOT_NULL(np = NODE_CDR(np))) {
3343     insert_node = onig_node_new_anchor(anc_type, an->ascii_mode);
3344     CHECK_NULL_RETURN_MEMERR(insert_node);
3345     NODE_BODY(insert_node) = NODE_CAR(np);
3346     NODE_CAR(np) = insert_node;
3347   }
3348 
3349   if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3350     np = node;
3351     do {
3352       NODE_SET_TYPE(np, NODE_LIST);  /* alt -> list */
3353     } while (IS_NOT_NULL(np = NODE_CDR(np)));
3354   }
3355   return 0;
3356 }
3357 
3358 static int
setup_look_behind(Node * node,regex_t * reg,ScanEnv * env)3359 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3360 {
3361   int r, len;
3362   AnchorNode* an = ANCHOR_(node);
3363 
3364   r = get_char_length_tree(NODE_ANCHOR_BODY(an), reg, &len);
3365   if (r == 0)
3366     an->char_len = len;
3367   else if (r == GET_CHAR_LEN_VARLEN)
3368     r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3369   else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3370     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3371       r = divide_look_behind_alternatives(node);
3372     else
3373       r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3374   }
3375 
3376   return r;
3377 }
3378 
3379 static int
next_setup(Node * node,Node * next_node,regex_t * reg)3380 next_setup(Node* node, Node* next_node, regex_t* reg)
3381 {
3382   NodeType type;
3383 
3384  retry:
3385   type = NODE_TYPE(node);
3386   if (type == NODE_QUANT) {
3387     QuantNode* qn = QUANT_(node);
3388     if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3389 #ifdef USE_QUANT_PEEK_NEXT
3390       Node* n = get_head_value_node(next_node, 1, reg);
3391       /* '\0': for UTF-16BE etc... */
3392       if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {
3393         qn->next_head_exact = n;
3394       }
3395 #endif
3396       /* automatic posseivation a*b ==> (?>a*)b */
3397       if (qn->lower <= 1) {
3398         if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node))) {
3399           Node *x, *y;
3400           x = get_head_value_node(NODE_BODY(node), 0, reg);
3401           if (IS_NOT_NULL(x)) {
3402             y = get_head_value_node(next_node,  0, reg);
3403             if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
3404               Node* en = onig_node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
3405               CHECK_NULL_RETURN_MEMERR(en);
3406               NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT);
3407               swap_node(node, en);
3408               NODE_BODY(node) = en;
3409             }
3410           }
3411         }
3412       }
3413     }
3414   }
3415   else if (type == NODE_ENCLOSURE) {
3416     EnclosureNode* en = ENCLOSURE_(node);
3417     if (en->type == ENCLOSURE_MEMORY) {
3418       node = NODE_BODY(node);
3419       goto retry;
3420     }
3421   }
3422   return 0;
3423 }
3424 
3425 
3426 static int
update_string_node_case_fold(regex_t * reg,Node * node)3427 update_string_node_case_fold(regex_t* reg, Node *node)
3428 {
3429   UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3430   UChar *sbuf, *ebuf, *sp;
3431   int r, i, len, sbuf_size;
3432   StrNode* sn = STR_(node);
3433 
3434   end = sn->end;
3435   sbuf_size = (int )(end - sn->s) * 2;
3436   sbuf = (UChar* )xmalloc(sbuf_size);
3437   CHECK_NULL_RETURN_MEMERR(sbuf);
3438   ebuf = sbuf + sbuf_size;
3439 
3440   sp = sbuf;
3441   p = sn->s;
3442   while (p < end) {
3443     len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3444     for (i = 0; i < len; i++) {
3445       if (sp >= ebuf) {
3446         sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);
3447         CHECK_NULL_RETURN_MEMERR(sbuf);
3448         sp = sbuf + sbuf_size;
3449         sbuf_size *= 2;
3450         ebuf = sbuf + sbuf_size;
3451       }
3452 
3453       *sp++ = buf[i];
3454     }
3455   }
3456 
3457   r = onig_node_str_set(node, sbuf, sp);
3458   if (r != 0) {
3459     xfree(sbuf);
3460     return r;
3461   }
3462 
3463   xfree(sbuf);
3464   return 0;
3465 }
3466 
3467 static int
expand_case_fold_make_rem_string(Node ** rnode,UChar * s,UChar * end,regex_t * reg)3468 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg)
3469 {
3470   int r;
3471   Node *node;
3472 
3473   node = onig_node_new_str(s, end);
3474   if (IS_NULL(node)) return ONIGERR_MEMORY;
3475 
3476   r = update_string_node_case_fold(reg, node);
3477   if (r != 0) {
3478     onig_node_free(node);
3479     return r;
3480   }
3481 
3482   NODE_STRING_SET_AMBIG(node);
3483   NODE_STRING_SET_DONT_GET_OPT_INFO(node);
3484   *rnode = node;
3485   return 0;
3486 }
3487 
3488 static int
expand_case_fold_string_alt(int item_num,OnigCaseFoldCodeItem items[],UChar * p,int slen,UChar * end,regex_t * reg,Node ** rnode)3489 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p,
3490                             int slen, UChar *end, regex_t* reg, Node **rnode)
3491 {
3492   int r, i, j;
3493   int len;
3494   int varlen;
3495   Node *anode, *var_anode, *snode, *xnode, *an;
3496   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3497 
3498   *rnode = var_anode = NULL_NODE;
3499 
3500   varlen = 0;
3501   for (i = 0; i < item_num; i++) {
3502     if (items[i].byte_len != slen) {
3503       varlen = 1;
3504       break;
3505     }
3506   }
3507 
3508   if (varlen != 0) {
3509     *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3510     if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3511 
3512     xnode = onig_node_new_list(NULL, NULL);
3513     if (IS_NULL(xnode)) goto mem_err;
3514     NODE_CAR(var_anode) = xnode;
3515 
3516     anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3517     if (IS_NULL(anode)) goto mem_err;
3518     NODE_CAR(xnode) = anode;
3519   }
3520   else {
3521     *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3522     if (IS_NULL(anode)) return ONIGERR_MEMORY;
3523   }
3524 
3525   snode = onig_node_new_str(p, p + slen);
3526   if (IS_NULL(snode)) goto mem_err;
3527 
3528   NODE_CAR(anode) = snode;
3529 
3530   for (i = 0; i < item_num; i++) {
3531     snode = onig_node_new_str(NULL, NULL);
3532     if (IS_NULL(snode)) goto mem_err;
3533 
3534     for (j = 0; j < items[i].code_len; j++) {
3535       len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3536       if (len < 0) {
3537         r = len;
3538         goto mem_err2;
3539       }
3540 
3541       r = onig_node_str_cat(snode, buf, buf + len);
3542       if (r != 0) goto mem_err2;
3543     }
3544 
3545     an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3546     if (IS_NULL(an)) {
3547       goto mem_err2;
3548     }
3549     //The NULL pointer check is not necessary. It is added just for pass static
3550     //analysis. When condition "items[i].byte_len != slen" is true, "varlen = 1"
3551     //in line 3503 will be reached ,so that "if (IS_NULL(var_anode)) return ONIGERR_MEMORY"
3552     //in line 3510 will be executed, so the null pointer has been checked before
3553     //deferenced in line 3584.
3554     if (items[i].byte_len != slen && IS_NOT_NULL(var_anode)) {
3555       Node *rem;
3556       UChar *q = p + items[i].byte_len;
3557 
3558       if (q < end) {
3559         r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3560         if (r != 0) {
3561           onig_node_free(an);
3562           goto mem_err2;
3563         }
3564 
3565         xnode = onig_node_list_add(NULL_NODE, snode);
3566         if (IS_NULL(xnode)) {
3567           onig_node_free(an);
3568           onig_node_free(rem);
3569           goto mem_err2;
3570         }
3571         if (IS_NULL(onig_node_list_add(xnode, rem))) {
3572           onig_node_free(an);
3573           onig_node_free(xnode);
3574           onig_node_free(rem);
3575           goto mem_err;
3576         }
3577 
3578         NODE_CAR(an) = xnode;
3579       }
3580       else {
3581         NODE_CAR(an) = snode;
3582       }
3583 
3584       NODE_CDR(var_anode) = an;
3585       var_anode = an;
3586     }
3587     else {
3588       NODE_CAR(an)     = snode;
3589       NODE_CDR(anode) = an;
3590       anode = an;
3591     }
3592   }
3593 
3594   return varlen;
3595 
3596  mem_err2:
3597   onig_node_free(snode);
3598 
3599  mem_err:
3600   onig_node_free(*rnode);
3601 
3602   return ONIGERR_MEMORY;
3603 }
3604 
3605 static int
expand_case_fold_string(Node * node,regex_t * reg)3606 expand_case_fold_string(Node* node, regex_t* reg)
3607 {
3608 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
3609 
3610   int r, n, len, alt_num;
3611   UChar *start, *end, *p;
3612   Node *top_root, *root, *snode, *prev_node;
3613   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3614   StrNode* sn = STR_(node);
3615 
3616   if (NODE_STRING_IS_AMBIG(node)) return 0;
3617 
3618   start = sn->s;
3619   end   = sn->end;
3620   if (start >= end) return 0;
3621 
3622   r = 0;
3623   top_root = root = prev_node = snode = NULL_NODE;
3624   alt_num = 1;
3625   p = start;
3626   while (p < end) {
3627     n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, p, end,
3628                                            items);
3629     if (n < 0) {
3630       r = n;
3631       goto err;
3632     }
3633 
3634     len = enclen(reg->enc, p);
3635 
3636     if (n == 0) {
3637       if (IS_NULL(snode)) {
3638         if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3639           top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3640           if (IS_NULL(root)) {
3641             onig_node_free(prev_node);
3642             goto mem_err;
3643           }
3644         }
3645 
3646         prev_node = snode = onig_node_new_str(NULL, NULL);
3647         if (IS_NULL(snode)) goto mem_err;
3648         if (IS_NOT_NULL(root)) {
3649           if (IS_NULL(onig_node_list_add(root, snode))) {
3650             onig_node_free(snode);
3651             goto mem_err;
3652           }
3653         }
3654       }
3655 
3656       r = onig_node_str_cat(snode, p, p + len);
3657       if (r != 0) goto err;
3658     }
3659     else {
3660       alt_num *= (n + 1);
3661       if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3662 
3663       if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3664         top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3665         if (IS_NULL(root)) {
3666           onig_node_free(prev_node);
3667           goto mem_err;
3668         }
3669       }
3670 
3671       r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3672       if (r < 0) goto mem_err;
3673       if (r == 1) {
3674         if (IS_NULL(root)) {
3675           top_root = prev_node;
3676         }
3677         else {
3678           if (IS_NULL(onig_node_list_add(root, prev_node))) {
3679             onig_node_free(prev_node);
3680             goto mem_err;
3681           }
3682         }
3683 
3684         root = NODE_CAR(prev_node);
3685       }
3686       else { /* r == 0 */
3687         if (IS_NOT_NULL(root)) {
3688           if (IS_NULL(onig_node_list_add(root, prev_node))) {
3689             onig_node_free(prev_node);
3690             goto mem_err;
3691           }
3692         }
3693       }
3694 
3695       snode = NULL_NODE;
3696     }
3697 
3698     p += len;
3699   }
3700 
3701   if (p < end) {
3702     Node *srem;
3703 
3704     r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3705     if (r != 0) goto mem_err;
3706 
3707     if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3708       top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3709       if (IS_NULL(root)) {
3710         onig_node_free(srem);
3711         onig_node_free(prev_node);
3712         goto mem_err;
3713       }
3714     }
3715 
3716     if (IS_NULL(root)) {
3717       prev_node = srem;
3718     }
3719     else {
3720       if (IS_NULL(onig_node_list_add(root, srem))) {
3721         onig_node_free(srem);
3722         goto mem_err;
3723       }
3724     }
3725   }
3726 
3727   /* ending */
3728   top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3729   swap_node(node, top_root);
3730   onig_node_free(top_root);
3731   return 0;
3732 
3733  mem_err:
3734   r = ONIGERR_MEMORY;
3735 
3736  err:
3737   onig_node_free(top_root);
3738   return r;
3739 }
3740 
3741 #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
3742 static enum QuantBodyEmpty
quantifiers_memory_node_info(Node * node)3743 quantifiers_memory_node_info(Node* node)
3744 {
3745   int r = QUANT_BODY_IS_EMPTY;
3746 
3747   switch (NODE_TYPE(node)) {
3748   case NODE_LIST:
3749   case NODE_ALT:
3750     {
3751       int v;
3752       do {
3753         v = quantifiers_memory_node_info(NODE_CAR(node));
3754         if (v > r) r = v;
3755       } while (IS_NOT_NULL(node = NODE_CDR(node)));
3756     }
3757     break;
3758 
3759 #ifdef USE_CALL
3760   case NODE_CALL:
3761     if (NODE_IS_RECURSION(node)) {
3762       return QUANT_BODY_IS_EMPTY_REC; /* tiny version */
3763     }
3764     else
3765       r = quantifiers_memory_node_info(NODE_BODY(node));
3766     break;
3767 #endif
3768 
3769   case NODE_QUANT:
3770     {
3771       QuantNode* qn = QUANT_(node);
3772       if (qn->upper != 0) {
3773         r = quantifiers_memory_node_info(NODE_BODY(node));
3774       }
3775     }
3776     break;
3777 
3778   case NODE_ENCLOSURE:
3779     {
3780       EnclosureNode* en = ENCLOSURE_(node);
3781       switch (en->type) {
3782       case ENCLOSURE_MEMORY:
3783         if (NODE_IS_RECURSION(node)) {
3784           return QUANT_BODY_IS_EMPTY_REC;
3785         }
3786         return QUANT_BODY_IS_EMPTY_MEM;
3787         break;
3788 
3789       case ENCLOSURE_OPTION:
3790       case ENCLOSURE_STOP_BACKTRACK:
3791         r = quantifiers_memory_node_info(NODE_BODY(node));
3792         break;
3793       case ENCLOSURE_IF_ELSE:
3794         {
3795           int v;
3796           r = quantifiers_memory_node_info(NODE_BODY(node));
3797           if (IS_NOT_NULL(en->te.Then)) {
3798             v = quantifiers_memory_node_info(en->te.Then);
3799             if (v > r) r = v;
3800           }
3801           if (IS_NOT_NULL(en->te.Else)) {
3802             v = quantifiers_memory_node_info(en->te.Else);
3803             if (v > r) r = v;
3804           }
3805         }
3806         break;
3807       default:
3808         break;
3809       }
3810     }
3811     break;
3812 
3813   case NODE_BACKREF:
3814   case NODE_STRING:
3815   case NODE_CTYPE:
3816   case NODE_CCLASS:
3817   case NODE_ANCHOR:
3818   case NODE_GIMMICK:
3819   default:
3820     break;
3821   }
3822 
3823   return r;
3824 }
3825 #endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
3826 
3827 
3828 #define IN_ALT          (1<<0)
3829 #define IN_NOT          (1<<1)
3830 #define IN_REAL_REPEAT  (1<<2)
3831 #define IN_VAR_REPEAT   (1<<3)
3832 #define IN_ZERO_REPEAT  (1<<4)
3833 #define IN_MULTI_ENTRY  (1<<5)
3834 
3835 #ifdef USE_CALL
3836 
3837 #ifdef __GNUC__
3838 __inline
3839 #endif
3840 static int
setup_call_node_call(CallNode * cn,ScanEnv * env,int state)3841 setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
3842 {
3843   MemEnv* mem_env = SCANENV_MEMENV(env);
3844 
3845   if (cn->by_number != 0) {
3846     int gnum = cn->group_num;
3847 
3848     if (env->num_named > 0 &&
3849         IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3850         ! ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) {
3851       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3852     }
3853 
3854     if (gnum > env->num_mem) {
3855       onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,
3856                                      cn->name, cn->name_end);
3857       return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3858     }
3859 
3860   set_call_attr:
3861     NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;
3862     if (IS_NULL(NODE_CALL_BODY(cn))) {
3863       onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
3864                                      cn->name, cn->name_end);
3865       return ONIGERR_UNDEFINED_NAME_REFERENCE;
3866     }
3867   }
3868   else {
3869     int *refs;
3870 
3871     int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
3872     if (n <= 0) {
3873       onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
3874                                      cn->name, cn->name_end);
3875       return ONIGERR_UNDEFINED_NAME_REFERENCE;
3876     }
3877     else if (n > 1) {
3878       onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,
3879                                      cn->name, cn->name_end);
3880       return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3881     }
3882     else {
3883       cn->group_num = refs[0];
3884       goto set_call_attr;
3885     }
3886   }
3887 
3888   return 0;
3889 }
3890 
3891 static void
setup_call2_call(Node * node)3892 setup_call2_call(Node* node)
3893 {
3894   switch (NODE_TYPE(node)) {
3895   case NODE_LIST:
3896   case NODE_ALT:
3897     do {
3898       setup_call2_call(NODE_CAR(node));
3899     } while (IS_NOT_NULL(node = NODE_CDR(node)));
3900     break;
3901 
3902   case NODE_QUANT:
3903     setup_call2_call(NODE_BODY(node));
3904     break;
3905 
3906   case NODE_ANCHOR:
3907     if (ANCHOR_HAS_BODY(ANCHOR_(node)))
3908       setup_call2_call(NODE_BODY(node));
3909     break;
3910 
3911   case NODE_ENCLOSURE:
3912     {
3913       EnclosureNode* en = ENCLOSURE_(node);
3914 
3915       if (en->type == ENCLOSURE_MEMORY) {
3916         if (! NODE_IS_MARK1(node)) {
3917           NODE_STATUS_ADD(node, MARK1);
3918           setup_call2_call(NODE_BODY(node));
3919           NODE_STATUS_REMOVE(node, MARK1);
3920         }
3921       }
3922       else if (en->type == ENCLOSURE_IF_ELSE) {
3923         setup_call2_call(NODE_BODY(node));
3924         if (IS_NOT_NULL(en->te.Then))
3925           setup_call2_call(en->te.Then);
3926         if (IS_NOT_NULL(en->te.Else))
3927           setup_call2_call(en->te.Else);
3928       }
3929       else {
3930         setup_call2_call(NODE_BODY(node));
3931       }
3932     }
3933     break;
3934 
3935   case NODE_CALL:
3936     if (! NODE_IS_MARK1(node)) {
3937       NODE_STATUS_ADD(node, MARK1);
3938       {
3939         CallNode* cn = CALL_(node);
3940         Node* called = NODE_CALL_BODY(cn);
3941 
3942         cn->entry_count++;
3943 
3944         NODE_STATUS_ADD(called, CALLED);
3945         ENCLOSURE_(called)->m.entry_count++;
3946         setup_call2_call(called);
3947       }
3948       NODE_STATUS_REMOVE(node, MARK1);
3949     }
3950     break;
3951 
3952   default:
3953     break;
3954   }
3955 }
3956 
3957 static int
setup_call(Node * node,ScanEnv * env,int state)3958 setup_call(Node* node, ScanEnv* env, int state)
3959 {
3960   int r;
3961 
3962   switch (NODE_TYPE(node)) {
3963   case NODE_LIST:
3964   case NODE_ALT:
3965     do {
3966       r = setup_call(NODE_CAR(node), env, state);
3967     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
3968     break;
3969 
3970   case NODE_QUANT:
3971     if (QUANT_(node)->upper == 0)
3972       state |= IN_ZERO_REPEAT;
3973 
3974     r = setup_call(NODE_BODY(node), env, state);
3975     break;
3976 
3977   case NODE_ANCHOR:
3978     if (ANCHOR_HAS_BODY(ANCHOR_(node)))
3979       r = setup_call(NODE_BODY(node), env, state);
3980     else
3981       r = 0;
3982     break;
3983 
3984   case NODE_ENCLOSURE:
3985     {
3986       EnclosureNode* en = ENCLOSURE_(node);
3987 
3988       if (en->type == ENCLOSURE_MEMORY) {
3989         if ((state & IN_ZERO_REPEAT) != 0) {
3990           NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
3991           ENCLOSURE_(node)->m.entry_count--;
3992         }
3993         r = setup_call(NODE_BODY(node), env, state);
3994       }
3995       else if (en->type == ENCLOSURE_IF_ELSE) {
3996         r = setup_call(NODE_BODY(node), env, state);
3997         if (r != 0) return r;
3998         if (IS_NOT_NULL(en->te.Then)) {
3999           r = setup_call(en->te.Then, env, state);
4000           if (r != 0) return r;
4001         }
4002         if (IS_NOT_NULL(en->te.Else))
4003           r = setup_call(en->te.Else, env, state);
4004       }
4005       else
4006         r = setup_call(NODE_BODY(node), env, state);
4007     }
4008     break;
4009 
4010   case NODE_CALL:
4011     if ((state & IN_ZERO_REPEAT) != 0) {
4012       NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
4013       CALL_(node)->entry_count--;
4014     }
4015 
4016     r = setup_call_node_call(CALL_(node), env, state);
4017     break;
4018 
4019   default:
4020     r = 0;
4021     break;
4022   }
4023 
4024   return r;
4025 }
4026 
4027 static int
setup_call2(Node * node)4028 setup_call2(Node* node)
4029 {
4030   int r = 0;
4031 
4032   switch (NODE_TYPE(node)) {
4033   case NODE_LIST:
4034   case NODE_ALT:
4035     do {
4036       r = setup_call2(NODE_CAR(node));
4037     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
4038     break;
4039 
4040   case NODE_QUANT:
4041     if (QUANT_(node)->upper != 0)
4042       r = setup_call2(NODE_BODY(node));
4043     break;
4044 
4045   case NODE_ANCHOR:
4046     if (ANCHOR_HAS_BODY(ANCHOR_(node)))
4047       r = setup_call2(NODE_BODY(node));
4048     break;
4049 
4050   case NODE_ENCLOSURE:
4051     if (! NODE_IS_IN_ZERO_REPEAT(node))
4052       r = setup_call2(NODE_BODY(node));
4053 
4054     {
4055       EnclosureNode* en = ENCLOSURE_(node);
4056 
4057       if (r != 0) return r;
4058       if (en->type == ENCLOSURE_IF_ELSE) {
4059         if (IS_NOT_NULL(en->te.Then)) {
4060           r = setup_call2(en->te.Then);
4061           if (r != 0) return r;
4062         }
4063         if (IS_NOT_NULL(en->te.Else))
4064           r = setup_call2(en->te.Else);
4065       }
4066     }
4067     break;
4068 
4069   case NODE_CALL:
4070     if (! NODE_IS_IN_ZERO_REPEAT(node)) {
4071       setup_call2_call(node);
4072     }
4073     break;
4074 
4075   default:
4076     break;
4077   }
4078 
4079   return r;
4080 }
4081 
4082 
4083 static void
setup_called_state_call(Node * node,int state)4084 setup_called_state_call(Node* node, int state)
4085 {
4086   switch (NODE_TYPE(node)) {
4087   case NODE_ALT:
4088     state |= IN_ALT;
4089     /* fall */
4090   case NODE_LIST:
4091     do {
4092       setup_called_state_call(NODE_CAR(node), state);
4093     } while (IS_NOT_NULL(node = NODE_CDR(node)));
4094     break;
4095 
4096   case NODE_QUANT:
4097     {
4098       QuantNode* qn = QUANT_(node);
4099 
4100       if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
4101         state |= IN_REAL_REPEAT;
4102       if (qn->lower != qn->upper)
4103         state |= IN_VAR_REPEAT;
4104 
4105       setup_called_state_call(NODE_QUANT_BODY(qn), state);
4106     }
4107     break;
4108 
4109   case NODE_ANCHOR:
4110     {
4111       AnchorNode* an = ANCHOR_(node);
4112 
4113       switch (an->type) {
4114       case ANCHOR_PREC_READ_NOT:
4115       case ANCHOR_LOOK_BEHIND_NOT:
4116         state |= IN_NOT;
4117         /* fall */
4118       case ANCHOR_PREC_READ:
4119       case ANCHOR_LOOK_BEHIND:
4120         setup_called_state_call(NODE_ANCHOR_BODY(an), state);
4121         break;
4122       default:
4123         break;
4124       }
4125     }
4126     break;
4127 
4128   case NODE_ENCLOSURE:
4129     {
4130       EnclosureNode* en = ENCLOSURE_(node);
4131 
4132       if (en->type == ENCLOSURE_MEMORY) {
4133         if (NODE_IS_MARK1(node)) {
4134           if ((~en->m.called_state & state) != 0) {
4135             en->m.called_state |= state;
4136             setup_called_state_call(NODE_BODY(node), state);
4137           }
4138         }
4139         else {
4140           NODE_STATUS_ADD(node, MARK1);
4141           en->m.called_state |= state;
4142           setup_called_state_call(NODE_BODY(node), state);
4143           NODE_STATUS_REMOVE(node, MARK1);
4144         }
4145       }
4146       else if (en->type == ENCLOSURE_IF_ELSE) {
4147         if (IS_NOT_NULL(en->te.Then)) {
4148           setup_called_state_call(en->te.Then, state);
4149         }
4150         if (IS_NOT_NULL(en->te.Else))
4151           setup_called_state_call(en->te.Else, state);
4152       }
4153       else {
4154         setup_called_state_call(NODE_BODY(node), state);
4155       }
4156     }
4157     break;
4158 
4159   case NODE_CALL:
4160     setup_called_state_call(NODE_BODY(node), state);
4161     break;
4162 
4163   default:
4164     break;
4165   }
4166 }
4167 
4168 static void
setup_called_state(Node * node,int state)4169 setup_called_state(Node* node, int state)
4170 {
4171   switch (NODE_TYPE(node)) {
4172   case NODE_ALT:
4173     state |= IN_ALT;
4174     /* fall */
4175   case NODE_LIST:
4176     do {
4177       setup_called_state(NODE_CAR(node), state);
4178     } while (IS_NOT_NULL(node = NODE_CDR(node)));
4179     break;
4180 
4181 #ifdef USE_CALL
4182   case NODE_CALL:
4183     setup_called_state_call(node, state);
4184     break;
4185 #endif
4186 
4187   case NODE_ENCLOSURE:
4188     {
4189       EnclosureNode* en = ENCLOSURE_(node);
4190 
4191       switch (en->type) {
4192       case ENCLOSURE_MEMORY:
4193         if (en->m.entry_count > 1)
4194           state |= IN_MULTI_ENTRY;
4195 
4196         en->m.called_state |= state;
4197         /* fall */
4198       case ENCLOSURE_OPTION:
4199       case ENCLOSURE_STOP_BACKTRACK:
4200         setup_called_state(NODE_BODY(node), state);
4201         break;
4202       case ENCLOSURE_IF_ELSE:
4203         setup_called_state(NODE_BODY(node), state);
4204         if (IS_NOT_NULL(en->te.Then))
4205           setup_called_state(en->te.Then, state);
4206         if (IS_NOT_NULL(en->te.Else))
4207           setup_called_state(en->te.Else, state);
4208         break;
4209       }
4210     }
4211     break;
4212 
4213   case NODE_QUANT:
4214     {
4215       QuantNode* qn = QUANT_(node);
4216 
4217       if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
4218         state |= IN_REAL_REPEAT;
4219       if (qn->lower != qn->upper)
4220         state |= IN_VAR_REPEAT;
4221 
4222       setup_called_state(NODE_QUANT_BODY(qn), state);
4223     }
4224     break;
4225 
4226   case NODE_ANCHOR:
4227     {
4228       AnchorNode* an = ANCHOR_(node);
4229 
4230       switch (an->type) {
4231       case ANCHOR_PREC_READ_NOT:
4232       case ANCHOR_LOOK_BEHIND_NOT:
4233         state |= IN_NOT;
4234         /* fall */
4235       case ANCHOR_PREC_READ:
4236       case ANCHOR_LOOK_BEHIND:
4237         setup_called_state(NODE_ANCHOR_BODY(an), state);
4238         break;
4239       default:
4240         break;
4241       }
4242     }
4243     break;
4244 
4245   case NODE_BACKREF:
4246   case NODE_STRING:
4247   case NODE_CTYPE:
4248   case NODE_CCLASS:
4249   case NODE_GIMMICK:
4250   default:
4251     break;
4252   }
4253 }
4254 
4255 #endif  /* USE_CALL */
4256 
4257 
4258 static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env);
4259 
4260 #ifdef __GNUC__
4261 __inline
4262 #endif
4263 static int
setup_anchor(Node * node,regex_t * reg,int state,ScanEnv * env)4264 setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
4265 {
4266 /* allowed node types in look-behind */
4267 #define ALLOWED_TYPE_IN_LB \
4268   ( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \
4269   | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_ENCLOSURE | NODE_BIT_QUANT \
4270   | NODE_BIT_CALL | NODE_BIT_GIMMICK)
4271 
4272 #define ALLOWED_ENCLOSURE_IN_LB       ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION )
4273 #define ALLOWED_ENCLOSURE_IN_LB_NOT   (1<<ENCLOSURE_OPTION)
4274 
4275 #define ALLOWED_ANCHOR_IN_LB \
4276   ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \
4277   | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY | ANCHOR_NO_WORD_BOUNDARY \
4278   | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \
4279   | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
4280   | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
4281 
4282 #define ALLOWED_ANCHOR_IN_LB_NOT \
4283   ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \
4284   | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY \
4285   | ANCHOR_NO_WORD_BOUNDARY | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \
4286   | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
4287   | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
4288 
4289   int r;
4290   AnchorNode* an = ANCHOR_(node);
4291 
4292   switch (an->type) {
4293   case ANCHOR_PREC_READ:
4294     r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
4295     break;
4296   case ANCHOR_PREC_READ_NOT:
4297     r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
4298     break;
4299 
4300   case ANCHOR_LOOK_BEHIND:
4301     {
4302       r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
4303                           ALLOWED_ENCLOSURE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4304       if (r < 0) return r;
4305       if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4306       r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
4307       if (r != 0) return r;
4308       r = setup_look_behind(node, reg, env);
4309     }
4310     break;
4311 
4312   case ANCHOR_LOOK_BEHIND_NOT:
4313     {
4314       r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
4315                           ALLOWED_ENCLOSURE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4316       if (r < 0) return r;
4317       if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4318       r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
4319       if (r != 0) return r;
4320       r = setup_look_behind(node, reg, env);
4321     }
4322     break;
4323 
4324   default:
4325     r = 0;
4326     break;
4327   }
4328 
4329   return r;
4330 }
4331 
4332 #ifdef __GNUC__
4333 __inline
4334 #endif
4335 static int
setup_quant(Node * node,regex_t * reg,int state,ScanEnv * env)4336 setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
4337 {
4338   int r;
4339   OnigLen d;
4340   QuantNode* qn = QUANT_(node);
4341   Node* body = NODE_BODY(node);
4342 
4343   if ((state & IN_REAL_REPEAT) != 0) {
4344     NODE_STATUS_ADD(node, IN_REAL_REPEAT);
4345   }
4346   if ((state & IN_MULTI_ENTRY) != 0) {
4347     NODE_STATUS_ADD(node, IN_MULTI_ENTRY);
4348   }
4349 
4350   if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
4351     d = tree_min_len(body, env);
4352     if (d == 0) {
4353 #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
4354       qn->body_empty_info = quantifiers_memory_node_info(body);
4355       if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) {
4356         if (NODE_TYPE(body) == NODE_ENCLOSURE &&
4357             ENCLOSURE_(body)->type == ENCLOSURE_MEMORY) {
4358           MEM_STATUS_ON(env->bt_mem_end, ENCLOSURE_(body)->m.regnum);
4359         }
4360       }
4361 #else
4362       qn->body_empty_info = QUANT_BODY_IS_EMPTY;
4363 #endif
4364     }
4365   }
4366 
4367   if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
4368     state |= IN_REAL_REPEAT;
4369   if (qn->lower != qn->upper)
4370     state |= IN_VAR_REPEAT;
4371 
4372   r = setup_tree(body, reg, state, env);
4373   if (r != 0) return r;
4374 
4375   /* expand string */
4376 #define EXPAND_STRING_MAX_LENGTH  100
4377   if (NODE_TYPE(body) == NODE_STRING) {
4378     if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
4379         qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
4380       int len = NODE_STRING_LEN(body);
4381       StrNode* sn = STR_(body);
4382 
4383       if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
4384         int i, n = qn->lower;
4385         onig_node_conv_to_str_node(node, STR_(body)->flag);
4386         for (i = 0; i < n; i++) {
4387           r = onig_node_str_cat(node, sn->s, sn->end);
4388           if (r != 0) return r;
4389         }
4390         onig_node_free(body);
4391         return r;
4392       }
4393     }
4394   }
4395 
4396 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4397   if (qn->greedy && (qn->body_empty_info != QUANT_BODY_IS_NOT_EMPTY)) {
4398     if (NODE_TYPE(body) == NODE_QUANT) {
4399       QuantNode* tqn = QUANT_(body);
4400       if (IS_NOT_NULL(tqn->head_exact)) {
4401         qn->head_exact  = tqn->head_exact;
4402         tqn->head_exact = NULL;
4403       }
4404     }
4405     else {
4406       qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg);
4407     }
4408   }
4409 #endif
4410 
4411   return r;
4412 }
4413 
4414 /* setup_tree does the following work.
4415  1. check empty loop. (set qn->body_empty_info)
4416  2. expand ignore-case in char class.
4417  3. set memory status bit flags. (reg->mem_stats)
4418  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
4419  5. find invalid patterns in look-behind.
4420  6. expand repeated string.
4421  */
4422 static int
setup_tree(Node * node,regex_t * reg,int state,ScanEnv * env)4423 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
4424 {
4425   int r = 0;
4426 
4427   switch (NODE_TYPE(node)) {
4428   case NODE_LIST:
4429     {
4430       Node* prev = NULL_NODE;
4431       do {
4432         r = setup_tree(NODE_CAR(node), reg, state, env);
4433         if (IS_NOT_NULL(prev) && r == 0) {
4434           r = next_setup(prev, NODE_CAR(node), reg);
4435         }
4436         prev = NODE_CAR(node);
4437       } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
4438     }
4439     break;
4440 
4441   case NODE_ALT:
4442     do {
4443       r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env);
4444     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
4445     break;
4446 
4447   case NODE_STRING:
4448     if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {
4449       r = expand_case_fold_string(node, reg);
4450     }
4451     break;
4452 
4453   case NODE_BACKREF:
4454     {
4455       int i;
4456       int* p;
4457       BackRefNode* br = BACKREF_(node);
4458       p = BACKREFS_P(br);
4459       for (i = 0; i < br->back_num; i++) {
4460         if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
4461         MEM_STATUS_ON(env->backrefed_mem, p[i]);
4462         MEM_STATUS_ON(env->bt_mem_start, p[i]);
4463 #ifdef USE_BACKREF_WITH_LEVEL
4464         if (NODE_IS_NEST_LEVEL(node)) {
4465           MEM_STATUS_ON(env->bt_mem_end, p[i]);
4466         }
4467 #endif
4468       }
4469     }
4470     break;
4471 
4472   case NODE_ENCLOSURE:
4473     {
4474       EnclosureNode* en = ENCLOSURE_(node);
4475 
4476       switch (en->type) {
4477       case ENCLOSURE_OPTION:
4478         {
4479           OnigOptionType options = reg->options;
4480           reg->options = ENCLOSURE_(node)->o.options;
4481           r = setup_tree(NODE_BODY(node), reg, state, env);
4482           reg->options = options;
4483         }
4484         break;
4485 
4486       case ENCLOSURE_MEMORY:
4487 #ifdef USE_CALL
4488         state |= en->m.called_state;
4489 #endif
4490 
4491         if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0
4492             || NODE_IS_RECURSION(node)) {
4493           MEM_STATUS_ON(env->bt_mem_start, en->m.regnum);
4494         }
4495         r = setup_tree(NODE_BODY(node), reg, state, env);
4496         break;
4497 
4498       case ENCLOSURE_STOP_BACKTRACK:
4499         {
4500           Node* target = NODE_BODY(node);
4501           r = setup_tree(target, reg, state, env);
4502           if (NODE_TYPE(target) == NODE_QUANT) {
4503             QuantNode* tqn = QUANT_(target);
4504             if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4505                 tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
4506               if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target)))
4507                 NODE_STATUS_ADD(node, STOP_BT_SIMPLE_REPEAT);
4508             }
4509           }
4510         }
4511         break;
4512 
4513       case ENCLOSURE_IF_ELSE:
4514         r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);
4515         if (r != 0) return r;
4516         if (IS_NOT_NULL(en->te.Then)) {
4517           r = setup_tree(en->te.Then, reg, (state | IN_ALT), env);
4518           if (r != 0) return r;
4519         }
4520         if (IS_NOT_NULL(en->te.Else))
4521           r = setup_tree(en->te.Else, reg, (state | IN_ALT), env);
4522         break;
4523       }
4524     }
4525     break;
4526 
4527   case NODE_QUANT:
4528     r = setup_quant(node, reg, state, env);
4529     break;
4530 
4531   case NODE_ANCHOR:
4532     r = setup_anchor(node, reg, state, env);
4533     break;
4534 
4535 #ifdef USE_CALL
4536   case NODE_CALL:
4537 #endif
4538   case NODE_CTYPE:
4539   case NODE_CCLASS:
4540   case NODE_GIMMICK:
4541   default:
4542     break;
4543   }
4544 
4545   return r;
4546 }
4547 
4548 /* set skip map for Boyer-Moore search */
4549 static int
set_bm_skip(UChar * s,UChar * end,OnigEncoding enc ARG_UNUSED,UChar skip[],int ** int_skip)4550 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
4551             UChar skip[], int** int_skip)
4552 {
4553   int i, len;
4554 
4555   len = (int )(end - s);
4556   if (len < ONIG_CHAR_TABLE_SIZE) {
4557     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
4558 
4559     for (i = 0; i < len - 1; i++)
4560       skip[s[i]] = len - 1 - i;
4561   }
4562   else {
4563     if (IS_NULL(*int_skip)) {
4564       *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4565       if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4566     }
4567     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
4568 
4569     for (i = 0; i < len - 1; i++)
4570       (*int_skip)[s[i]] = len - 1 - i;
4571   }
4572   return 0;
4573 }
4574 
4575 #define OPT_EXACT_MAXLEN   24
4576 
4577 typedef struct {
4578   OnigLen min;  /* min byte length */
4579   OnigLen max;  /* max byte length */
4580 } MinMax;
4581 
4582 typedef struct {
4583   MinMax           mmd;
4584   OnigEncoding     enc;
4585   OnigOptionType   options;
4586   OnigCaseFoldType case_fold_flag;
4587   ScanEnv*         scan_env;
4588 } OptEnv;
4589 
4590 typedef struct {
4591   int left;
4592   int right;
4593 } OptAnc;
4594 
4595 typedef struct {
4596   MinMax     mmd;   /* position */
4597   OptAnc     anc;
4598   int        reach_end;
4599   int        ignore_case;
4600   int        len;
4601   UChar      s[OPT_EXACT_MAXLEN];
4602 } OptExact;
4603 
4604 typedef struct {
4605   MinMax    mmd;    /* position */
4606   OptAnc    anc;
4607   int       value;  /* weighted value */
4608   UChar     map[ONIG_CHAR_TABLE_SIZE];
4609 } OptMap;
4610 
4611 typedef struct {
4612   MinMax    len;
4613   OptAnc    anc;
4614   OptExact  exb;     /* boundary */
4615   OptExact  exm;     /* middle */
4616   OptExact  expr;    /* prec read (?=...) */
4617   OptMap    map;     /* boundary */
4618 } NodeOpt;
4619 
4620 
4621 static int
map_position_value(OnigEncoding enc,int i)4622 map_position_value(OnigEncoding enc, int i)
4623 {
4624   static const short int Vals[] = {
4625      5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
4626      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
4627     12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
4628      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
4629      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4630      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
4631      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4632      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
4633   };
4634 
4635   if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) {
4636     if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4637       return 20;
4638     else
4639       return (int )Vals[i];
4640   }
4641   else
4642     return 4;   /* Take it easy. */
4643 }
4644 
4645 static int
distance_value(MinMax * mm)4646 distance_value(MinMax* mm)
4647 {
4648   /* 1000 / (min-max-dist + 1) */
4649   static const short int dist_vals[] = {
4650     1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
4651       91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
4652       48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
4653       32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
4654       24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
4655       20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
4656       16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
4657       14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
4658       12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
4659       11,   11,   11,   11,   11,   10,   10,   10,   10,   10
4660   };
4661 
4662   OnigLen d;
4663 
4664   if (mm->max == INFINITE_LEN) return 0;
4665 
4666   d = mm->max - mm->min;
4667   if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0])))
4668     /* return dist_vals[d] * 16 / (mm->min + 12); */
4669     return (int )dist_vals[d];
4670   else
4671     return 1;
4672 }
4673 
4674 static int
comp_distance_value(MinMax * d1,MinMax * d2,int v1,int v2)4675 comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2)
4676 {
4677   if (v2 <= 0) return -1;
4678   if (v1 <= 0) return  1;
4679 
4680   v1 *= distance_value(d1);
4681   v2 *= distance_value(d2);
4682 
4683   if (v2 > v1) return  1;
4684   if (v2 < v1) return -1;
4685 
4686   if (d2->min < d1->min) return  1;
4687   if (d2->min > d1->min) return -1;
4688   return 0;
4689 }
4690 
4691 static int
is_equal_mml(MinMax * a,MinMax * b)4692 is_equal_mml(MinMax* a, MinMax* b)
4693 {
4694   return (a->min == b->min && a->max == b->max) ? 1 : 0;
4695 }
4696 
4697 static void
set_mml(MinMax * l,OnigLen min,OnigLen max)4698 set_mml(MinMax* l, OnigLen min, OnigLen max)
4699 {
4700   l->min = min;
4701   l->max = max;
4702 }
4703 
4704 static void
clear_mml(MinMax * l)4705 clear_mml(MinMax* l)
4706 {
4707   l->min = l->max = 0;
4708 }
4709 
4710 static void
copy_mml(MinMax * to,MinMax * from)4711 copy_mml(MinMax* to, MinMax* from)
4712 {
4713   to->min = from->min;
4714   to->max = from->max;
4715 }
4716 
4717 static void
add_mml(MinMax * to,MinMax * from)4718 add_mml(MinMax* to, MinMax* from)
4719 {
4720   to->min = distance_add(to->min, from->min);
4721   to->max = distance_add(to->max, from->max);
4722 }
4723 
4724 static void
alt_merge_mml(MinMax * to,MinMax * from)4725 alt_merge_mml(MinMax* to, MinMax* from)
4726 {
4727   if (to->min > from->min) to->min = from->min;
4728   if (to->max < from->max) to->max = from->max;
4729 }
4730 
4731 static void
copy_opt_env(OptEnv * to,OptEnv * from)4732 copy_opt_env(OptEnv* to, OptEnv* from)
4733 {
4734   *to = *from;
4735 }
4736 
4737 static void
clear_opt_anc_info(OptAnc * a)4738 clear_opt_anc_info(OptAnc* a)
4739 {
4740   a->left  = 0;
4741   a->right = 0;
4742 }
4743 
4744 static void
copy_opt_anc_info(OptAnc * to,OptAnc * from)4745 copy_opt_anc_info(OptAnc* to, OptAnc* from)
4746 {
4747   *to = *from;
4748 }
4749 
4750 static void
concat_opt_anc_info(OptAnc * to,OptAnc * left,OptAnc * right,OnigLen left_len,OnigLen right_len)4751 concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,
4752                     OnigLen left_len, OnigLen right_len)
4753 {
4754   clear_opt_anc_info(to);
4755 
4756   to->left = left->left;
4757   if (left_len == 0) {
4758     to->left |= right->left;
4759   }
4760 
4761   to->right = right->right;
4762   if (right_len == 0) {
4763     to->right |= left->right;
4764   }
4765   else {
4766     to->right |= (left->right & ANCHOR_PREC_READ_NOT);
4767   }
4768 }
4769 
4770 static int
is_left(int a)4771 is_left(int a)
4772 {
4773   if (a == ANCHOR_END_BUF  || a == ANCHOR_SEMI_END_BUF ||
4774       a == ANCHOR_END_LINE || a == ANCHOR_PREC_READ || a == ANCHOR_PREC_READ_NOT)
4775     return 0;
4776 
4777   return 1;
4778 }
4779 
4780 static int
is_set_opt_anc_info(OptAnc * to,int anc)4781 is_set_opt_anc_info(OptAnc* to, int anc)
4782 {
4783   if ((to->left & anc) != 0) return 1;
4784 
4785   return ((to->right & anc) != 0 ? 1 : 0);
4786 }
4787 
4788 static void
add_opt_anc_info(OptAnc * to,int anc)4789 add_opt_anc_info(OptAnc* to, int anc)
4790 {
4791   if (is_left(anc))
4792     to->left |= anc;
4793   else
4794     to->right |= anc;
4795 }
4796 
4797 static void
remove_opt_anc_info(OptAnc * to,int anc)4798 remove_opt_anc_info(OptAnc* to, int anc)
4799 {
4800   if (is_left(anc))
4801     to->left &= ~anc;
4802   else
4803     to->right &= ~anc;
4804 }
4805 
4806 static void
alt_merge_opt_anc_info(OptAnc * to,OptAnc * add)4807 alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)
4808 {
4809   to->left  &= add->left;
4810   to->right &= add->right;
4811 }
4812 
4813 static int
is_full_opt_exact(OptExact * e)4814 is_full_opt_exact(OptExact* e)
4815 {
4816   return (e->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4817 }
4818 
4819 static void
clear_opt_exact(OptExact * e)4820 clear_opt_exact(OptExact* e)
4821 {
4822   clear_mml(&e->mmd);
4823   clear_opt_anc_info(&e->anc);
4824   e->reach_end   = 0;
4825   e->ignore_case = 0;
4826   e->len         = 0;
4827   e->s[0]        = '\0';
4828 }
4829 
4830 static void
copy_opt_exact(OptExact * to,OptExact * from)4831 copy_opt_exact(OptExact* to, OptExact* from)
4832 {
4833   *to = *from;
4834 }
4835 
4836 static int
concat_opt_exact(OptExact * to,OptExact * add,OnigEncoding enc)4837 concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc)
4838 {
4839   int i, j, len, r;
4840   UChar *p, *end;
4841   OptAnc tanc;
4842 
4843   if (! to->ignore_case && add->ignore_case) {
4844     if (to->len >= add->len) return 0;  /* avoid */
4845 
4846     to->ignore_case = 1;
4847   }
4848 
4849   r = 0;
4850   p = add->s;
4851   end = p + add->len;
4852   for (i = to->len; p < end; ) {
4853     len = enclen(enc, p);
4854     if (i + len > OPT_EXACT_MAXLEN) {
4855       r = 1; /* 1:full */
4856       break;
4857     }
4858     for (j = 0; j < len && p < end; j++)
4859       to->s[i++] = *p++;
4860   }
4861 
4862   to->len = i;
4863   to->reach_end = (p == end ? add->reach_end : 0);
4864 
4865   concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4866   if (! to->reach_end) tanc.right = 0;
4867   copy_opt_anc_info(&to->anc, &tanc);
4868 
4869   return r;
4870 }
4871 
4872 static void
concat_opt_exact_str(OptExact * to,UChar * s,UChar * end,OnigEncoding enc)4873 concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc)
4874 {
4875   int i, j, len;
4876   UChar *p;
4877 
4878   for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4879     len = enclen(enc, p);
4880     if (i + len > OPT_EXACT_MAXLEN) break;
4881     for (j = 0; j < len && p < end; j++)
4882       to->s[i++] = *p++;
4883   }
4884 
4885   to->len = i;
4886 }
4887 
4888 static void
alt_merge_opt_exact(OptExact * to,OptExact * add,OptEnv * env)4889 alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env)
4890 {
4891   int i, j, len;
4892 
4893   if (add->len == 0 || to->len == 0) {
4894     clear_opt_exact(to);
4895     return ;
4896   }
4897 
4898   if (! is_equal_mml(&to->mmd, &add->mmd)) {
4899     clear_opt_exact(to);
4900     return ;
4901   }
4902 
4903   for (i = 0; i < to->len && i < add->len; ) {
4904     if (to->s[i] != add->s[i]) break;
4905     len = enclen(env->enc, to->s + i);
4906 
4907     for (j = 1; j < len; j++) {
4908       if (to->s[i+j] != add->s[i+j]) break;
4909     }
4910     if (j < len) break;
4911     i += len;
4912   }
4913 
4914   if (! add->reach_end || i < add->len || i < to->len) {
4915     to->reach_end = 0;
4916   }
4917   to->len = i;
4918   to->ignore_case |= add->ignore_case;
4919 
4920   alt_merge_opt_anc_info(&to->anc, &add->anc);
4921   if (! to->reach_end) to->anc.right = 0;
4922 }
4923 
4924 static void
select_opt_exact(OnigEncoding enc,OptExact * now,OptExact * alt)4925 select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt)
4926 {
4927   int vn, va;
4928 
4929   vn = now->len;
4930   va = alt->len;
4931 
4932   if (va == 0) {
4933     return ;
4934   }
4935   else if (vn == 0) {
4936     copy_opt_exact(now, alt);
4937     return ;
4938   }
4939   else if (vn <= 2 && va <= 2) {
4940     /* ByteValTable[x] is big value --> low price */
4941     va = map_position_value(enc, now->s[0]);
4942     vn = map_position_value(enc, alt->s[0]);
4943 
4944     if (now->len > 1) vn += 5;
4945     if (alt->len > 1) va += 5;
4946   }
4947 
4948   if (now->ignore_case == 0) vn *= 2;
4949   if (alt->ignore_case == 0) va *= 2;
4950 
4951   if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
4952     copy_opt_exact(now, alt);
4953 }
4954 
4955 static void
clear_opt_map(OptMap * map)4956 clear_opt_map(OptMap* map)
4957 {
4958   static const OptMap clean_info = {
4959     {0, 0}, {0, 0}, 0,
4960     {
4961       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4962       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4963       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4964       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4965       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4966       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4967       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4968       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4969       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4970       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4971       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4972       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4973       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4974       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4975       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4976       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4977     }
4978   };
4979 
4980   xmemcpy(map, &clean_info, sizeof(OptMap));
4981 }
4982 
4983 static void
copy_opt_map(OptMap * to,OptMap * from)4984 copy_opt_map(OptMap* to, OptMap* from)
4985 {
4986   xmemcpy(to,from,sizeof(OptMap));
4987 }
4988 
4989 static void
add_char_opt_map(OptMap * m,UChar c,OnigEncoding enc)4990 add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc)
4991 {
4992   if (m->map[c] == 0) {
4993     m->map[c] = 1;
4994     m->value += map_position_value(enc, c);
4995   }
4996 }
4997 
4998 static int
add_char_amb_opt_map(OptMap * map,UChar * p,UChar * end,OnigEncoding enc,OnigCaseFoldType fold_flag)4999 add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end,
5000                      OnigEncoding enc, OnigCaseFoldType fold_flag)
5001 {
5002   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
5003   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
5004   int i, n;
5005 
5006   add_char_opt_map(map, p[0], enc);
5007 
5008   fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag);
5009   n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items);
5010   if (n < 0) return n;
5011 
5012   for (i = 0; i < n; i++) {
5013     ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
5014     add_char_opt_map(map, buf[0], enc);
5015   }
5016 
5017   return 0;
5018 }
5019 
5020 static void
select_opt_map(OptMap * now,OptMap * alt)5021 select_opt_map(OptMap* now, OptMap* alt)
5022 {
5023   static int z = 1<<15; /* 32768: something big value */
5024 
5025   int vn, va;
5026 
5027   if (alt->value == 0) return ;
5028   if (now->value == 0) {
5029     copy_opt_map(now, alt);
5030     return ;
5031   }
5032 
5033   vn = z / now->value;
5034   va = z / alt->value;
5035   if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
5036     copy_opt_map(now, alt);
5037 }
5038 
5039 static int
comp_opt_exact_or_map(OptExact * e,OptMap * m)5040 comp_opt_exact_or_map(OptExact* e, OptMap* m)
5041 {
5042 #define COMP_EM_BASE  20
5043   int ae, am;
5044 
5045   if (m->value <= 0) return -1;
5046 
5047   ae = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
5048   am = COMP_EM_BASE * 5 * 2 / m->value;
5049   return comp_distance_value(&e->mmd, &m->mmd, ae, am);
5050 }
5051 
5052 static void
alt_merge_opt_map(OnigEncoding enc,OptMap * to,OptMap * add)5053 alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
5054 {
5055   int i, val;
5056 
5057   /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
5058   if (to->value == 0) return ;
5059   if (add->value == 0 || to->mmd.max < add->mmd.min) {
5060     clear_opt_map(to);
5061     return ;
5062   }
5063 
5064   alt_merge_mml(&to->mmd, &add->mmd);
5065 
5066   val = 0;
5067   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5068     if (add->map[i])
5069       to->map[i] = 1;
5070 
5071     if (to->map[i])
5072       val += map_position_value(enc, i);
5073   }
5074   to->value = val;
5075 
5076   alt_merge_opt_anc_info(&to->anc, &add->anc);
5077 }
5078 
5079 static void
set_bound_node_opt_info(NodeOpt * opt,MinMax * plen)5080 set_bound_node_opt_info(NodeOpt* opt, MinMax* plen)
5081 {
5082   copy_mml(&(opt->exb.mmd),  plen);
5083   copy_mml(&(opt->expr.mmd), plen);
5084   copy_mml(&(opt->map.mmd),  plen);
5085 }
5086 
5087 static void
clear_node_opt_info(NodeOpt * opt)5088 clear_node_opt_info(NodeOpt* opt)
5089 {
5090   clear_mml(&opt->len);
5091   clear_opt_anc_info(&opt->anc);
5092   clear_opt_exact(&opt->exb);
5093   clear_opt_exact(&opt->exm);
5094   clear_opt_exact(&opt->expr);
5095   clear_opt_map(&opt->map);
5096 }
5097 
5098 static void
copy_node_opt_info(NodeOpt * to,NodeOpt * from)5099 copy_node_opt_info(NodeOpt* to, NodeOpt* from)
5100 {
5101   xmemcpy(to,from,sizeof(NodeOpt));
5102 }
5103 
5104 static void
concat_left_node_opt_info(OnigEncoding enc,NodeOpt * to,NodeOpt * add)5105 concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add)
5106 {
5107   int exb_reach, exm_reach;
5108   OptAnc tanc;
5109 
5110   concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
5111   copy_opt_anc_info(&to->anc, &tanc);
5112 
5113   if (add->exb.len > 0 && to->len.max == 0) {
5114     concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, to->len.max, add->len.max);
5115     copy_opt_anc_info(&add->exb.anc, &tanc);
5116   }
5117 
5118   if (add->map.value > 0 && to->len.max == 0) {
5119     if (add->map.mmd.max == 0)
5120       add->map.anc.left |= to->anc.left;
5121   }
5122 
5123   exb_reach = to->exb.reach_end;
5124   exm_reach = to->exm.reach_end;
5125 
5126   if (add->len.max != 0)
5127     to->exb.reach_end = to->exm.reach_end = 0;
5128 
5129   if (add->exb.len > 0) {
5130     if (exb_reach) {
5131       concat_opt_exact(&to->exb, &add->exb, enc);
5132       clear_opt_exact(&add->exb);
5133     }
5134     else if (exm_reach) {
5135       concat_opt_exact(&to->exm, &add->exb, enc);
5136       clear_opt_exact(&add->exb);
5137     }
5138   }
5139   select_opt_exact(enc, &to->exm, &add->exb);
5140   select_opt_exact(enc, &to->exm, &add->exm);
5141 
5142   if (to->expr.len > 0) {
5143     if (add->len.max > 0) {
5144       if (to->expr.len > (int )add->len.max)
5145         to->expr.len = add->len.max;
5146 
5147       if (to->expr.mmd.max == 0)
5148         select_opt_exact(enc, &to->exb, &to->expr);
5149       else
5150         select_opt_exact(enc, &to->exm, &to->expr);
5151     }
5152   }
5153   else if (add->expr.len > 0) {
5154     copy_opt_exact(&to->expr, &add->expr);
5155   }
5156 
5157   select_opt_map(&to->map, &add->map);
5158   add_mml(&to->len, &add->len);
5159 }
5160 
5161 static void
alt_merge_node_opt_info(NodeOpt * to,NodeOpt * add,OptEnv * env)5162 alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env)
5163 {
5164   alt_merge_opt_anc_info(&to->anc, &add->anc);
5165   alt_merge_opt_exact(&to->exb,  &add->exb, env);
5166   alt_merge_opt_exact(&to->exm,  &add->exm, env);
5167   alt_merge_opt_exact(&to->expr, &add->expr, env);
5168   alt_merge_opt_map(env->enc, &to->map, &add->map);
5169 
5170   alt_merge_mml(&to->len, &add->len);
5171 }
5172 
5173 
5174 #define MAX_NODE_OPT_INFO_REF_COUNT    5
5175 
5176 static int
optimize_nodes(Node * node,NodeOpt * opt,OptEnv * env)5177 optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
5178 {
5179   int i;
5180   int r;
5181   NodeOpt xo;
5182   OnigEncoding enc;
5183 
5184   r = 0;
5185   enc = env->enc;
5186   clear_node_opt_info(opt);
5187   set_bound_node_opt_info(opt, &env->mmd);
5188 
5189   switch (NODE_TYPE(node)) {
5190   case NODE_LIST:
5191     {
5192       OptEnv nenv;
5193       Node* nd = node;
5194 
5195       copy_opt_env(&nenv, env);
5196       do {
5197         r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);
5198         if (r == 0) {
5199           add_mml(&nenv.mmd, &xo.len);
5200           concat_left_node_opt_info(enc, opt, &xo);
5201         }
5202       } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));
5203     }
5204     break;
5205 
5206   case NODE_ALT:
5207     {
5208       Node* nd = node;
5209 
5210       do {
5211         r = optimize_nodes(NODE_CAR(nd), &xo, env);
5212         if (r == 0) {
5213           if (nd == node) copy_node_opt_info(opt, &xo);
5214           else            alt_merge_node_opt_info(opt, &xo, env);
5215         }
5216       } while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd)));
5217     }
5218     break;
5219 
5220   case NODE_STRING:
5221     {
5222       StrNode* sn = STR_(node);
5223       int slen = (int )(sn->end - sn->s);
5224       /* int is_raw = NODE_STRING_IS_RAW(node); */
5225 
5226       if (! NODE_STRING_IS_AMBIG(node)) {
5227         concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);
5228         if (slen > 0) {
5229           add_char_opt_map(&opt->map, *(sn->s), enc);
5230         }
5231         set_mml(&opt->len, slen, slen);
5232       }
5233       else {
5234         int max;
5235 
5236         if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) {
5237           int n = onigenc_strlen(enc, sn->s, sn->end);
5238           max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;
5239         }
5240         else {
5241           concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);
5242           opt->exb.ignore_case = 1;
5243 
5244           if (slen > 0) {
5245             r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,
5246                                      enc, env->case_fold_flag);
5247             if (r != 0) break;
5248           }
5249 
5250           max = slen;
5251         }
5252 
5253         set_mml(&opt->len, slen, max);
5254       }
5255 
5256       if (opt->exb.len == slen)
5257         opt->exb.reach_end = 1;
5258     }
5259     break;
5260 
5261   case NODE_CCLASS:
5262     {
5263       int z;
5264       CClassNode* cc = CCLASS_(node);
5265 
5266       /* no need to check ignore case. (set in setup_tree()) */
5267 
5268       if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5269         OnigLen min = ONIGENC_MBC_MINLEN(enc);
5270         OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc);
5271 
5272         set_mml(&opt->len, min, max);
5273       }
5274       else {
5275         for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5276           z = BITSET_AT(cc->bs, i);
5277           if ((z && ! IS_NCCLASS_NOT(cc)) || (! z && IS_NCCLASS_NOT(cc))) {
5278             add_char_opt_map(&opt->map, (UChar )i, enc);
5279           }
5280         }
5281         set_mml(&opt->len, 1, 1);
5282       }
5283     }
5284     break;
5285 
5286   case NODE_CTYPE:
5287     {
5288       int min, max;
5289       int range;
5290 
5291       max = ONIGENC_MBC_MAXLEN_DIST(enc);
5292 
5293       if (max == 1) {
5294         min = 1;
5295 
5296         switch (CTYPE_(node)->ctype) {
5297         case CTYPE_ANYCHAR:
5298           break;
5299 
5300         case ONIGENC_CTYPE_WORD:
5301           range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
5302           if (CTYPE_(node)->not != 0) {
5303             for (i = 0; i < range; i++) {
5304               if (! ONIGENC_IS_CODE_WORD(enc, i)) {
5305                 add_char_opt_map(&opt->map, (UChar )i, enc);
5306               }
5307             }
5308             for (i = range; i < SINGLE_BYTE_SIZE; i++) {
5309               add_char_opt_map(&opt->map, (UChar )i, enc);
5310             }
5311           }
5312           else {
5313             for (i = 0; i < range; i++) {
5314               if (ONIGENC_IS_CODE_WORD(enc, i)) {
5315                 add_char_opt_map(&opt->map, (UChar )i, enc);
5316               }
5317             }
5318           }
5319           break;
5320         }
5321       }
5322       else {
5323         min = ONIGENC_MBC_MINLEN(enc);
5324       }
5325       set_mml(&opt->len, min, max);
5326     }
5327     break;
5328 
5329   case NODE_ANCHOR:
5330     switch (ANCHOR_(node)->type) {
5331     case ANCHOR_BEGIN_BUF:
5332     case ANCHOR_BEGIN_POSITION:
5333     case ANCHOR_BEGIN_LINE:
5334     case ANCHOR_END_BUF:
5335     case ANCHOR_SEMI_END_BUF:
5336     case ANCHOR_END_LINE:
5337     case ANCHOR_PREC_READ_NOT:
5338     case ANCHOR_LOOK_BEHIND:
5339       add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);
5340       break;
5341 
5342     case ANCHOR_PREC_READ:
5343       {
5344         r = optimize_nodes(NODE_BODY(node), &xo, env);
5345         if (r == 0) {
5346           if (xo.exb.len > 0)
5347             copy_opt_exact(&opt->expr, &xo.exb);
5348           else if (xo.exm.len > 0)
5349             copy_opt_exact(&opt->expr, &xo.exm);
5350 
5351           opt->expr.reach_end = 0;
5352 
5353           if (xo.map.value > 0)
5354             copy_opt_map(&opt->map, &xo.map);
5355         }
5356       }
5357       break;
5358 
5359     case ANCHOR_LOOK_BEHIND_NOT:
5360       break;
5361     }
5362     break;
5363 
5364   case NODE_BACKREF:
5365     if (! NODE_IS_CHECKER(node)) {
5366       int* backs;
5367       OnigLen min, max, tmin, tmax;
5368       MemEnv* mem_env = SCANENV_MEMENV(env->scan_env);
5369       BackRefNode* br = BACKREF_(node);
5370 
5371       if (NODE_IS_RECURSION(node)) {
5372         set_mml(&opt->len, 0, INFINITE_LEN);
5373         break;
5374       }
5375       backs = BACKREFS_P(br);
5376       min = tree_min_len(mem_env[backs[0]].node, env->scan_env);
5377       max = tree_max_len(mem_env[backs[0]].node, env->scan_env);
5378       for (i = 1; i < br->back_num; i++) {
5379         tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env);
5380         tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env);
5381         if (min > tmin) min = tmin;
5382         if (max < tmax) max = tmax;
5383       }
5384       set_mml(&opt->len, min, max);
5385     }
5386     break;
5387 
5388 #ifdef USE_CALL
5389   case NODE_CALL:
5390     if (NODE_IS_RECURSION(node))
5391       set_mml(&opt->len, 0, INFINITE_LEN);
5392     else {
5393       OnigOptionType save = env->options;
5394       env->options = ENCLOSURE_(NODE_BODY(node))->o.options;
5395       r = optimize_nodes(NODE_BODY(node), opt, env);
5396       env->options = save;
5397     }
5398     break;
5399 #endif
5400 
5401   case NODE_QUANT:
5402     {
5403       OnigLen min, max;
5404       QuantNode* qn = QUANT_(node);
5405 
5406       r = optimize_nodes(NODE_BODY(node), &xo, env);
5407       if (r != 0) break;
5408 
5409       if (qn->lower > 0) {
5410         copy_node_opt_info(opt, &xo);
5411         if (xo.exb.len > 0) {
5412           if (xo.exb.reach_end) {
5413             for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) {
5414               int rc = concat_opt_exact(&opt->exb, &xo.exb, enc);
5415               if (rc > 0) break;
5416             }
5417             if (i < qn->lower) opt->exb.reach_end = 0;
5418           }
5419         }
5420 
5421         if (qn->lower != qn->upper) {
5422           opt->exb.reach_end = 0;
5423           opt->exm.reach_end = 0;
5424         }
5425         if (qn->lower > 1)
5426           opt->exm.reach_end = 0;
5427       }
5428 
5429       if (IS_REPEAT_INFINITE(qn->upper)) {
5430         if (env->mmd.max == 0 &&
5431             NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {
5432           if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))
5433             add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_ML);
5434           else
5435             add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF);
5436         }
5437 
5438         max = (xo.len.max > 0 ? INFINITE_LEN : 0);
5439       }
5440       else {
5441         max = distance_multiply(xo.len.max, qn->upper);
5442       }
5443 
5444       min = distance_multiply(xo.len.min, qn->lower);
5445       set_mml(&opt->len, min, max);
5446     }
5447     break;
5448 
5449   case NODE_ENCLOSURE:
5450     {
5451       EnclosureNode* en = ENCLOSURE_(node);
5452 
5453       switch (en->type) {
5454       case ENCLOSURE_OPTION:
5455         {
5456           OnigOptionType save = env->options;
5457 
5458           env->options = en->o.options;
5459           r = optimize_nodes(NODE_BODY(node), opt, env);
5460           env->options = save;
5461         }
5462         break;
5463 
5464       case ENCLOSURE_MEMORY:
5465 #ifdef USE_CALL
5466         en->opt_count++;
5467         if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5468           OnigLen min, max;
5469 
5470           min = 0;
5471           max = INFINITE_LEN;
5472           if (NODE_IS_MIN_FIXED(node)) min = en->min_len;
5473           if (NODE_IS_MAX_FIXED(node)) max = en->max_len;
5474           set_mml(&opt->len, min, max);
5475         }
5476         else
5477 #endif
5478           {
5479             r = optimize_nodes(NODE_BODY(node), opt, env);
5480             if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK)) {
5481               if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))
5482                 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK);
5483             }
5484           }
5485         break;
5486 
5487       case ENCLOSURE_STOP_BACKTRACK:
5488         r = optimize_nodes(NODE_BODY(node), opt, env);
5489         break;
5490 
5491       case ENCLOSURE_IF_ELSE:
5492         {
5493           OptEnv nenv;
5494 
5495           copy_opt_env(&nenv, env);
5496           r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &xo, &nenv);
5497           if (r == 0) {
5498             add_mml(&nenv.mmd, &xo.len);
5499             concat_left_node_opt_info(enc, opt, &xo);
5500             if (IS_NOT_NULL(en->te.Then)) {
5501               r = optimize_nodes(en->te.Then, &xo, &nenv);
5502               if (r == 0) {
5503                 concat_left_node_opt_info(enc, opt, &xo);
5504               }
5505             }
5506 
5507             if (IS_NOT_NULL(en->te.Else)) {
5508               r = optimize_nodes(en->te.Else, &xo, env);
5509               if (r == 0)
5510                 alt_merge_node_opt_info(opt, &xo, env);
5511             }
5512           }
5513         }
5514         break;
5515       }
5516     }
5517     break;
5518 
5519   case NODE_GIMMICK:
5520     break;
5521 
5522   default:
5523 #ifdef ONIG_DEBUG
5524     fprintf(stderr, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node));
5525 #endif
5526     r = ONIGERR_TYPE_BUG;
5527     break;
5528   }
5529 
5530   return r;
5531 }
5532 
5533 static int
set_optimize_exact(regex_t * reg,OptExact * e)5534 set_optimize_exact(regex_t* reg, OptExact* e)
5535 {
5536   int r;
5537 
5538   if (e->len == 0) return 0;
5539 
5540   if (e->ignore_case) {
5541     reg->exact = (UChar* )xmalloc(e->len);
5542     CHECK_NULL_RETURN_MEMERR(reg->exact);
5543     xmemcpy(reg->exact, e->s, e->len);
5544     reg->exact_end = reg->exact + e->len;
5545     reg->optimize = OPTIMIZE_EXACT_IC;
5546   }
5547   else {
5548     int allow_reverse;
5549 
5550     reg->exact = onigenc_strdup(reg->enc, e->s, e->s + e->len);
5551     CHECK_NULL_RETURN_MEMERR(reg->exact);
5552     reg->exact_end = reg->exact + e->len;
5553 
5554     allow_reverse =
5555       ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5556 
5557     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5558       r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
5559                       reg->map, &(reg->int_map));
5560       if (r != 0) return r;
5561 
5562       reg->optimize = (allow_reverse != 0
5563                        ? OPTIMIZE_EXACT_BM : OPTIMIZE_EXACT_BM_NO_REV);
5564     }
5565     else {
5566       reg->optimize = OPTIMIZE_EXACT;
5567     }
5568   }
5569 
5570   reg->dmin = e->mmd.min;
5571   reg->dmax = e->mmd.max;
5572 
5573   if (reg->dmin != INFINITE_LEN) {
5574     reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact);
5575   }
5576 
5577   return 0;
5578 }
5579 
5580 static void
set_optimize_map(regex_t * reg,OptMap * m)5581 set_optimize_map(regex_t* reg, OptMap* m)
5582 {
5583   int i;
5584 
5585   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5586     reg->map[i] = m->map[i];
5587 
5588   reg->optimize   = OPTIMIZE_MAP;
5589   reg->dmin       = m->mmd.min;
5590   reg->dmax       = m->mmd.max;
5591 
5592   if (reg->dmin != INFINITE_LEN) {
5593     reg->threshold_len = reg->dmin + 1;
5594   }
5595 }
5596 
5597 static void
set_sub_anchor(regex_t * reg,OptAnc * anc)5598 set_sub_anchor(regex_t* reg, OptAnc* anc)
5599 {
5600   reg->sub_anchor |= anc->left  & ANCHOR_BEGIN_LINE;
5601   reg->sub_anchor |= anc->right & ANCHOR_END_LINE;
5602 }
5603 
5604 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5605 static void print_optimize_info(FILE* f, regex_t* reg);
5606 #endif
5607 
5608 static int
set_optimize_info_from_tree(Node * node,regex_t * reg,ScanEnv * scan_env)5609 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5610 {
5611   int r;
5612   NodeOpt opt;
5613   OptEnv env;
5614 
5615   env.enc            = reg->enc;
5616   env.options        = reg->options;
5617   env.case_fold_flag = reg->case_fold_flag;
5618   env.scan_env       = scan_env;
5619   clear_mml(&env.mmd);
5620 
5621   r = optimize_nodes(node, &opt, &env);
5622   if (r != 0) return r;
5623 
5624   reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF |
5625         ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML |
5626         ANCHOR_LOOK_BEHIND);
5627 
5628   if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5629     reg->anchor &= ~ANCHOR_ANYCHAR_INF_ML;
5630 
5631   reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5632        ANCHOR_PREC_READ_NOT);
5633 
5634   if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5635     reg->anchor_dmin = opt.len.min;
5636     reg->anchor_dmax = opt.len.max;
5637   }
5638 
5639   if (opt.exb.len > 0 || opt.exm.len > 0) {
5640     select_opt_exact(reg->enc, &opt.exb, &opt.exm);
5641     if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.exb, &opt.map) > 0) {
5642       goto set_map;
5643     }
5644     else {
5645       r = set_optimize_exact(reg, &opt.exb);
5646       set_sub_anchor(reg, &opt.exb.anc);
5647     }
5648   }
5649   else if (opt.map.value > 0) {
5650   set_map:
5651     set_optimize_map(reg, &opt.map);
5652     set_sub_anchor(reg, &opt.map.anc);
5653   }
5654   else {
5655     reg->sub_anchor |= opt.anc.left & ANCHOR_BEGIN_LINE;
5656     if (opt.len.max == 0)
5657       reg->sub_anchor |= opt.anc.right & ANCHOR_END_LINE;
5658   }
5659 
5660 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5661   print_optimize_info(stderr, reg);
5662 #endif
5663   return r;
5664 }
5665 
5666 static void
clear_optimize_info(regex_t * reg)5667 clear_optimize_info(regex_t* reg)
5668 {
5669   reg->optimize      = OPTIMIZE_NONE;
5670   reg->anchor        = 0;
5671   reg->anchor_dmin   = 0;
5672   reg->anchor_dmax   = 0;
5673   reg->sub_anchor    = 0;
5674   reg->exact_end     = (UChar* )NULL;
5675   reg->threshold_len = 0;
5676   if (IS_NOT_NULL(reg->exact)) {
5677     xfree(reg->exact);
5678     reg->exact = (UChar* )NULL;
5679   }
5680 }
5681 
5682 #ifdef ONIG_DEBUG
5683 
print_enc_string(FILE * fp,OnigEncoding enc,const UChar * s,const UChar * end)5684 static void print_enc_string(FILE* fp, OnigEncoding enc,
5685                              const UChar *s, const UChar *end)
5686 {
5687   fprintf(fp, "\nPATTERN: /");
5688 
5689   if (ONIGENC_MBC_MINLEN(enc) > 1) {
5690     const UChar *p;
5691     OnigCodePoint code;
5692 
5693     p = s;
5694     while (p < end) {
5695       code = ONIGENC_MBC_TO_CODE(enc, p, end);
5696       if (code >= 0x80) {
5697         fprintf(fp, " 0x%04x ", (int )code);
5698       }
5699       else {
5700         fputc((int )code, fp);
5701       }
5702 
5703       p += enclen(enc, p);
5704     }
5705   }
5706   else {
5707     while (s < end) {
5708       fputc((int )*s, fp);
5709       s++;
5710     }
5711   }
5712 
5713   fprintf(fp, "/\n");
5714 }
5715 
5716 #endif /* ONIG_DEBUG */
5717 
5718 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5719 
5720 static void
print_distance_range(FILE * f,OnigLen a,OnigLen b)5721 print_distance_range(FILE* f, OnigLen a, OnigLen b)
5722 {
5723   if (a == INFINITE_LEN)
5724     fputs("inf", f);
5725   else
5726     fprintf(f, "(%u)", a);
5727 
5728   fputs("-", f);
5729 
5730   if (b == INFINITE_LEN)
5731     fputs("inf", f);
5732   else
5733     fprintf(f, "(%u)", b);
5734 }
5735 
5736 static void
print_anchor(FILE * f,int anchor)5737 print_anchor(FILE* f, int anchor)
5738 {
5739   int q = 0;
5740 
5741   fprintf(f, "[");
5742 
5743   if (anchor & ANCHOR_BEGIN_BUF) {
5744     fprintf(f, "begin-buf");
5745     q = 1;
5746   }
5747   if (anchor & ANCHOR_BEGIN_LINE) {
5748     if (q) fprintf(f, ", ");
5749     q = 1;
5750     fprintf(f, "begin-line");
5751   }
5752   if (anchor & ANCHOR_BEGIN_POSITION) {
5753     if (q) fprintf(f, ", ");
5754     q = 1;
5755     fprintf(f, "begin-pos");
5756   }
5757   if (anchor & ANCHOR_END_BUF) {
5758     if (q) fprintf(f, ", ");
5759     q = 1;
5760     fprintf(f, "end-buf");
5761   }
5762   if (anchor & ANCHOR_SEMI_END_BUF) {
5763     if (q) fprintf(f, ", ");
5764     q = 1;
5765     fprintf(f, "semi-end-buf");
5766   }
5767   if (anchor & ANCHOR_END_LINE) {
5768     if (q) fprintf(f, ", ");
5769     q = 1;
5770     fprintf(f, "end-line");
5771   }
5772   if (anchor & ANCHOR_ANYCHAR_INF) {
5773     if (q) fprintf(f, ", ");
5774     q = 1;
5775     fprintf(f, "anychar-inf");
5776   }
5777   if (anchor & ANCHOR_ANYCHAR_INF_ML) {
5778     if (q) fprintf(f, ", ");
5779     fprintf(f, "anychar-inf-ml");
5780   }
5781 
5782   fprintf(f, "]");
5783 }
5784 
5785 static void
print_optimize_info(FILE * f,regex_t * reg)5786 print_optimize_info(FILE* f, regex_t* reg)
5787 {
5788   static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5789                               "EXACT_IC", "MAP" };
5790 
5791   fprintf(f, "optimize: %s\n", on[reg->optimize]);
5792   fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
5793   if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5794     print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5795   fprintf(f, "\n");
5796 
5797   if (reg->optimize) {
5798     fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
5799     fprintf(f, "\n");
5800   }
5801   fprintf(f, "\n");
5802 
5803   if (reg->exact) {
5804     UChar *p;
5805     fprintf(f, "exact: [");
5806     for (p = reg->exact; p < reg->exact_end; p++) {
5807       fputc(*p, f);
5808     }
5809     fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
5810   }
5811   else if (reg->optimize & OPTIMIZE_MAP) {
5812     int c, i, n = 0;
5813 
5814     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5815       if (reg->map[i]) n++;
5816 
5817     fprintf(f, "map: n=%d\n", n);
5818     if (n > 0) {
5819       c = 0;
5820       fputc('[', f);
5821       for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5822         if (reg->map[i] != 0) {
5823           if (c > 0)  fputs(", ", f);
5824           c++;
5825           if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5826               ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5827             fputc(i, f);
5828           else
5829             fprintf(f, "%d", i);
5830         }
5831       }
5832       fprintf(f, "]\n");
5833     }
5834   }
5835 }
5836 #endif
5837 
5838 
5839 extern RegexExt*
onig_get_regex_ext(regex_t * reg)5840 onig_get_regex_ext(regex_t* reg)
5841 {
5842   if (IS_NULL(REG_EXTP(reg))) {
5843     RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));
5844     if (IS_NULL(ext)) return 0;
5845 
5846     ext->pattern      = 0;
5847     ext->pattern_end  = 0;
5848 #ifdef USE_CALLOUT
5849     ext->tag_table    = 0;
5850     ext->callout_num  = 0;
5851     ext->callout_list_alloc = 0;
5852     ext->callout_list = 0;
5853 #endif
5854 
5855     REG_EXTPL(reg) = (void* )ext;
5856   }
5857 
5858   return REG_EXTP(reg);
5859 }
5860 
5861 static void
free_regex_ext(RegexExt * ext)5862 free_regex_ext(RegexExt* ext)
5863 {
5864   if (IS_NOT_NULL(ext)) {
5865     if (IS_NOT_NULL(ext->pattern))
5866       xfree((void* )ext->pattern);
5867 
5868 #ifdef USE_CALLOUT
5869     if (IS_NOT_NULL(ext->tag_table))
5870       onig_callout_tag_table_free(ext->tag_table);
5871 
5872     if (IS_NOT_NULL(ext->callout_list))
5873       onig_free_reg_callout_list(ext->callout_num, ext->callout_list);
5874 #endif
5875 
5876     xfree(ext);
5877   }
5878 }
5879 
5880 extern int
onig_ext_set_pattern(regex_t * reg,const UChar * pattern,const UChar * pattern_end)5881 onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end)
5882 {
5883   RegexExt* ext;
5884   UChar* s;
5885 
5886   ext = onig_get_regex_ext(reg);
5887   CHECK_NULL_RETURN_MEMERR(ext);
5888 
5889   s = onigenc_strdup(reg->enc, pattern, pattern_end);
5890   CHECK_NULL_RETURN_MEMERR(s);
5891 
5892   ext->pattern     = s;
5893   ext->pattern_end = s + (pattern_end - pattern);
5894 
5895   return ONIG_NORMAL;
5896 }
5897 
5898 
5899 extern void
onig_free_body(regex_t * reg)5900 onig_free_body(regex_t* reg)
5901 {
5902   if (IS_NOT_NULL(reg)) {
5903     if (IS_NOT_NULL(reg->p))                xfree(reg->p);
5904     if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
5905     if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);
5906     if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
5907     if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
5908     if (IS_NOT_NULL(REG_EXTP(reg))) {
5909       free_regex_ext(REG_EXTP(reg));
5910       REG_EXTPL(reg) = 0;
5911     }
5912 
5913     onig_names_free(reg);
5914   }
5915 }
5916 
5917 extern void
onig_free(regex_t * reg)5918 onig_free(regex_t* reg)
5919 {
5920   if (IS_NOT_NULL(reg)) {
5921     onig_free_body(reg);
5922     xfree(reg);
5923   }
5924 }
5925 
5926 #define REGEX_TRANSFER(to,from) do {\
5927   onig_free_body(to);\
5928   xmemcpy(to, from, sizeof(regex_t));\
5929   xfree(from);\
5930 } while (0)
5931 
5932 extern void
onig_transfer(regex_t * to,regex_t * from)5933 onig_transfer(regex_t* to, regex_t* from)
5934 {
5935   REGEX_TRANSFER(to, from);
5936 }
5937 
5938 
5939 #ifdef ONIG_DEBUG_PARSE
5940 static void print_tree P_((FILE* f, Node* node));
5941 #endif
5942 
5943 extern int
onig_compile(regex_t * reg,const UChar * pattern,const UChar * pattern_end,OnigErrorInfo * einfo)5944 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5945              OnigErrorInfo* einfo)
5946 {
5947 #define COMPILE_INIT_SIZE  20
5948 
5949   int r, init_size;
5950   Node*  root;
5951   ScanEnv  scan_env;
5952 #ifdef USE_CALL
5953   UnsetAddrList  uslist;
5954 #endif
5955 
5956   root = 0;
5957   if (IS_NOT_NULL(einfo)) {
5958     einfo->enc = reg->enc;
5959     einfo->par = (UChar* )NULL;
5960   }
5961 
5962 #ifdef ONIG_DEBUG
5963   print_enc_string(stderr, reg->enc, pattern, pattern_end);
5964 #endif
5965 
5966   if (reg->alloc == 0) {
5967     init_size = (int )(pattern_end - pattern) * 2;
5968     if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5969     r = BB_INIT(reg, init_size);
5970     if (r != 0) goto end;
5971   }
5972   else
5973     reg->used = 0;
5974 
5975   reg->num_mem            = 0;
5976   reg->num_repeat         = 0;
5977   reg->num_null_check     = 0;
5978   reg->repeat_range_alloc = 0;
5979   reg->repeat_range       = (OnigRepeatRange* )NULL;
5980 
5981   r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);
5982   if (r != 0) goto err;
5983 
5984   /* mixed use named group and no-named group */
5985   if (scan_env.num_named > 0 &&
5986       IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5987       ! ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5988     if (scan_env.num_named != scan_env.num_mem)
5989       r = disable_noname_group_capture(&root, reg, &scan_env);
5990     else
5991       r = numbered_ref_check(root);
5992 
5993     if (r != 0) goto err;
5994   }
5995 
5996   r = check_backrefs(root, &scan_env);
5997   if (r != 0) goto err;
5998 
5999 #ifdef USE_CALL
6000   if (scan_env.num_call > 0) {
6001     r = unset_addr_list_init(&uslist, scan_env.num_call);
6002     if (r != 0) goto err;
6003     scan_env.unset_addr_list = &uslist;
6004     r = setup_call(root, &scan_env, 0);
6005     if (r != 0) goto err_unset;
6006     r = setup_call2(root);
6007     if (r != 0) goto err_unset;
6008     r = recursive_call_check_trav(root, &scan_env, 0);
6009     if (r  < 0) goto err_unset;
6010     r = infinite_recursive_call_check_trav(root, &scan_env);
6011     if (r != 0) goto err_unset;
6012 
6013     setup_called_state(root, 0);
6014   }
6015 
6016   reg->num_call = scan_env.num_call;
6017 #endif
6018 
6019   r = setup_tree(root, reg, 0, &scan_env);
6020   if (r != 0) goto err_unset;
6021 
6022 #ifdef ONIG_DEBUG_PARSE
6023   print_tree(stderr, root);
6024 #endif
6025 
6026   reg->capture_history  = scan_env.capture_history;
6027   reg->bt_mem_start     = scan_env.bt_mem_start;
6028   reg->bt_mem_start    |= reg->capture_history;
6029   if (IS_FIND_CONDITION(reg->options))
6030     MEM_STATUS_ON_ALL(reg->bt_mem_end);
6031   else {
6032     reg->bt_mem_end  = scan_env.bt_mem_end;
6033     reg->bt_mem_end |= reg->capture_history;
6034   }
6035   reg->bt_mem_start |= reg->bt_mem_end;
6036 
6037   clear_optimize_info(reg);
6038 #ifndef ONIG_DONT_OPTIMIZE
6039   r = set_optimize_info_from_tree(root, reg, &scan_env);
6040   if (r != 0) goto err_unset;
6041 #endif
6042 
6043   if (IS_NOT_NULL(scan_env.mem_env_dynamic)) {
6044     xfree(scan_env.mem_env_dynamic);
6045     scan_env.mem_env_dynamic = (MemEnv* )NULL;
6046   }
6047 
6048   r = compile_tree(root, reg, &scan_env);
6049   if (r == 0) {
6050     if (scan_env.keep_num > 0) {
6051       r = add_opcode(reg, OP_UPDATE_VAR);
6052       if (r != 0) goto err;
6053       r = add_update_var_type(reg, UPDATE_VAR_KEEP_FROM_STACK_LAST);
6054       if (r != 0) goto err;
6055       r = add_mem_num(reg, 0 /* not used */);
6056       if (r != 0) goto err;
6057     }
6058 
6059     r = add_opcode(reg, OP_END);
6060 #ifdef USE_CALL
6061     if (scan_env.num_call > 0) {
6062       r = fix_unset_addr_list(&uslist, reg);
6063       unset_addr_list_end(&uslist);
6064       if (r != 0) goto err;
6065     }
6066 #endif
6067 
6068     if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)
6069 #ifdef USE_CALLOUT
6070         || (IS_NOT_NULL(REG_EXTP(reg)) && REG_EXTP(reg)->callout_num != 0)
6071 #endif
6072         )
6073       reg->stack_pop_level = STACK_POP_LEVEL_ALL;
6074     else {
6075       if (reg->bt_mem_start != 0)
6076         reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
6077       else
6078         reg->stack_pop_level = STACK_POP_LEVEL_FREE;
6079     }
6080   }
6081 #ifdef USE_CALL
6082   else if (scan_env.num_call > 0) {
6083     unset_addr_list_end(&uslist);
6084   }
6085 #endif
6086   onig_node_free(root);
6087 
6088 #ifdef ONIG_DEBUG_COMPILE
6089   onig_print_names(stderr, reg);
6090   onig_print_compiled_byte_code_list(stderr, reg);
6091 #endif
6092 
6093  end:
6094   return r;
6095 
6096  err_unset:
6097 #ifdef USE_CALL
6098   if (scan_env.num_call > 0) {
6099     unset_addr_list_end(&uslist);
6100   }
6101 #endif
6102  err:
6103   if (IS_NOT_NULL(scan_env.error)) {
6104     if (IS_NOT_NULL(einfo)) {
6105       einfo->par     = scan_env.error;
6106       einfo->par_end = scan_env.error_end;
6107     }
6108   }
6109 
6110   onig_node_free(root);
6111   if (IS_NOT_NULL(scan_env.mem_env_dynamic))
6112       xfree(scan_env.mem_env_dynamic);
6113   return r;
6114 }
6115 
6116 
6117 static int onig_inited = 0;
6118 
6119 extern int
onig_reg_init(regex_t * reg,OnigOptionType option,OnigCaseFoldType case_fold_flag,OnigEncoding enc,OnigSyntaxType * syntax)6120 onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag,
6121               OnigEncoding enc, OnigSyntaxType* syntax)
6122 {
6123   int r;
6124 
6125   xmemset(reg, 0, sizeof(*reg));
6126 
6127   if (onig_inited == 0) {
6128 #if 0
6129     return ONIGERR_LIBRARY_IS_NOT_INITIALIZED;
6130 #else
6131     r = onig_initialize(&enc, 1);
6132     if (r != 0)
6133       return ONIGERR_FAIL_TO_INITIALIZE;
6134 
6135     onig_warning("You didn't call onig_initialize() explicitly");
6136 #endif
6137   }
6138 
6139   if (IS_NULL(reg))
6140     return ONIGERR_INVALID_ARGUMENT;
6141 
6142   if (ONIGENC_IS_UNDEF(enc))
6143     return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
6144 
6145   if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
6146       == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
6147     return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
6148   }
6149 
6150   if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
6151     option |= syntax->options;
6152     option &= ~ONIG_OPTION_SINGLELINE;
6153   }
6154   else
6155     option |= syntax->options;
6156 
6157   (reg)->enc              = enc;
6158   (reg)->options          = option;
6159   (reg)->syntax           = syntax;
6160   (reg)->optimize         = 0;
6161   (reg)->exact            = (UChar* )NULL;
6162   (reg)->int_map          = (int* )NULL;
6163   (reg)->int_map_backward = (int* )NULL;
6164   REG_EXTPL(reg) = NULL;
6165 
6166   (reg)->p                = (UChar* )NULL;
6167   (reg)->alloc            = 0;
6168   (reg)->used             = 0;
6169   (reg)->name_table       = (void* )NULL;
6170 
6171   (reg)->case_fold_flag   = case_fold_flag;
6172   return 0;
6173 }
6174 
6175 extern int
onig_new_without_alloc(regex_t * reg,const UChar * pattern,const UChar * pattern_end,OnigOptionType option,OnigEncoding enc,OnigSyntaxType * syntax,OnigErrorInfo * einfo)6176 onig_new_without_alloc(regex_t* reg,
6177                        const UChar* pattern, const UChar* pattern_end,
6178                        OnigOptionType option, OnigEncoding enc,
6179                        OnigSyntaxType* syntax, OnigErrorInfo* einfo)
6180 {
6181   int r;
6182 
6183   r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6184   if (r != 0) return r;
6185 
6186   r = onig_compile(reg, pattern, pattern_end, einfo);
6187   return r;
6188 }
6189 
6190 extern int
onig_new(regex_t ** reg,const UChar * pattern,const UChar * pattern_end,OnigOptionType option,OnigEncoding enc,OnigSyntaxType * syntax,OnigErrorInfo * einfo)6191 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
6192          OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
6193          OnigErrorInfo* einfo)
6194 {
6195   int r;
6196 
6197   *reg = (regex_t* )xmalloc(sizeof(regex_t));
6198   if (IS_NULL(*reg)) return ONIGERR_MEMORY;
6199 
6200   r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6201   if (r != 0) goto err;
6202 
6203   r = onig_compile(*reg, pattern, pattern_end, einfo);
6204   if (r != 0) {
6205   err:
6206     onig_free(*reg);
6207     *reg = NULL;
6208   }
6209   return r;
6210 }
6211 
6212 extern int
onig_initialize(OnigEncoding encodings[],int n)6213 onig_initialize(OnigEncoding encodings[], int n)
6214 {
6215   int i;
6216   int r;
6217 
6218   if (onig_inited != 0)
6219     return 0;
6220 
6221   onigenc_init();
6222 
6223   onig_inited = 1;
6224 
6225   for (i = 0; i < n; i++) {
6226     OnigEncoding enc = encodings[i];
6227     r = onig_initialize_encoding(enc);
6228     if (r != 0)
6229       return r;
6230   }
6231 
6232   return ONIG_NORMAL;
6233 }
6234 
6235 typedef struct EndCallListItem {
6236   struct EndCallListItem* next;
6237   void (*func)(void);
6238 } EndCallListItemType;
6239 
6240 static EndCallListItemType* EndCallTop;
6241 
onig_add_end_call(void (* func)(void))6242 extern void onig_add_end_call(void (*func)(void))
6243 {
6244   EndCallListItemType* item;
6245 
6246   item = (EndCallListItemType* )xmalloc(sizeof(*item));
6247   if (item == 0) return ;
6248 
6249   item->next = EndCallTop;
6250   item->func = func;
6251 
6252   EndCallTop = item;
6253 }
6254 
6255 static void
exec_end_call_list(void)6256 exec_end_call_list(void)
6257 {
6258   EndCallListItemType* prev;
6259   void (*func)(void);
6260 
6261   while (EndCallTop != 0) {
6262     func = EndCallTop->func;
6263     (*func)();
6264 
6265     prev = EndCallTop;
6266     EndCallTop = EndCallTop->next;
6267     xfree(prev);
6268   }
6269 }
6270 
6271 extern int
onig_end(void)6272 onig_end(void)
6273 {
6274   exec_end_call_list();
6275 
6276 #ifdef USE_CALLOUT
6277   onig_global_callout_names_free();
6278 #endif
6279 
6280   onigenc_end();
6281 
6282   onig_inited = 0;
6283 
6284   return 0;
6285 }
6286 
6287 extern int
onig_is_in_code_range(const UChar * p,OnigCodePoint code)6288 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
6289 {
6290   OnigCodePoint n, *data;
6291   OnigCodePoint low, high, x;
6292 
6293   GET_CODE_POINT(n, p);
6294   data = (OnigCodePoint* )p;
6295   data++;
6296 
6297   for (low = 0, high = n; low < high; ) {
6298     x = (low + high) >> 1;
6299     if (code > data[x * 2 + 1])
6300       low = x + 1;
6301     else
6302       high = x;
6303   }
6304 
6305   return ((low < n && code >= data[low * 2]) ? 1 : 0);
6306 }
6307 
6308 extern int
onig_is_code_in_cc_len(int elen,OnigCodePoint code,void * cc_arg)6309 onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_arg)
6310 {
6311   int found;
6312   CClassNode* cc = (CClassNode* )cc_arg;
6313 
6314   if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6315     if (IS_NULL(cc->mbuf)) {
6316       found = 0;
6317     }
6318     else {
6319       found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6320     }
6321   }
6322   else {
6323     found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6324   }
6325 
6326   if (IS_NCCLASS_NOT(cc))
6327     return !found;
6328   else
6329     return found;
6330 }
6331 
6332 extern int
onig_is_code_in_cc(OnigEncoding enc,OnigCodePoint code,CClassNode * cc)6333 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6334 {
6335   int len;
6336 
6337   if (ONIGENC_MBC_MINLEN(enc) > 1) {
6338     len = 2;
6339   }
6340   else {
6341     len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6342   }
6343   return onig_is_code_in_cc_len(len, code, cc);
6344 }
6345 
6346 
6347 #ifdef ONIG_DEBUG_PARSE
6348 
6349 static void
p_string(FILE * f,int len,UChar * s)6350 p_string(FILE* f, int len, UChar* s)
6351 {
6352   fputs(":", f);
6353   while (len-- > 0) { fputc(*s++, f); }
6354 }
6355 
6356 static void
Indent(FILE * f,int indent)6357 Indent(FILE* f, int indent)
6358 {
6359   int i;
6360   for (i = 0; i < indent; i++) putc(' ', f);
6361 }
6362 
6363 static void
print_indent_tree(FILE * f,Node * node,int indent)6364 print_indent_tree(FILE* f, Node* node, int indent)
6365 {
6366   int i;
6367   NodeType type;
6368   UChar* p;
6369   int add = 3;
6370 
6371   Indent(f, indent);
6372   if (IS_NULL(node)) {
6373     fprintf(f, "ERROR: null node!!!\n");
6374     exit (0);
6375   }
6376 
6377   type = NODE_TYPE(node);
6378   switch (type) {
6379   case NODE_LIST:
6380   case NODE_ALT:
6381     if (type == NODE_LIST)
6382       fprintf(f, "<list:%p>\n", node);
6383     else
6384       fprintf(f, "<alt:%p>\n", node);
6385 
6386     print_indent_tree(f, NODE_CAR(node), indent + add);
6387     while (IS_NOT_NULL(node = NODE_CDR(node))) {
6388       if (NODE_TYPE(node) != type) {
6389         fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NODE_TYPE(node));
6390         exit(0);
6391       }
6392       print_indent_tree(f, NODE_CAR(node), indent + add);
6393     }
6394     break;
6395 
6396   case NODE_STRING:
6397     fprintf(f, "<string%s:%p>", (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node);
6398     for (p = STR_(node)->s; p < STR_(node)->end; p++) {
6399       if (*p >= 0x20 && *p < 0x7f)
6400         fputc(*p, f);
6401       else {
6402         fprintf(f, " 0x%02x", *p);
6403       }
6404     }
6405     break;
6406 
6407   case NODE_CCLASS:
6408     fprintf(f, "<cclass:%p>", node);
6409     if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);
6410     if (CCLASS_(node)->mbuf) {
6411       BBuf* bbuf = CCLASS_(node)->mbuf;
6412       for (i = 0; i < bbuf->used; i++) {
6413         if (i > 0) fprintf(f, ",");
6414         fprintf(f, "%0x", bbuf->p[i]);
6415       }
6416     }
6417     break;
6418 
6419   case NODE_CTYPE:
6420     fprintf(f, "<ctype:%p> ", node);
6421     switch (CTYPE_(node)->ctype) {
6422     case CTYPE_ANYCHAR:
6423       fprintf(f, "<anychar:%p>", node);
6424       break;
6425 
6426     case ONIGENC_CTYPE_WORD:
6427       if (CTYPE_(node)->not != 0)
6428         fputs("not word", f);
6429       else
6430         fputs("word",     f);
6431 
6432       if (CTYPE_(node)->ascii_mode != 0)
6433         fputs(" (ascii)", f);
6434 
6435       break;
6436 
6437     default:
6438       fprintf(f, "ERROR: undefined ctype.\n");
6439       exit(0);
6440     }
6441     break;
6442 
6443   case NODE_ANCHOR:
6444     fprintf(f, "<anchor:%p> ", node);
6445     switch (ANCHOR_(node)->type) {
6446     case ANCHOR_BEGIN_BUF:        fputs("begin buf",      f); break;
6447     case ANCHOR_END_BUF:          fputs("end buf",        f); break;
6448     case ANCHOR_BEGIN_LINE:       fputs("begin line",     f); break;
6449     case ANCHOR_END_LINE:         fputs("end line",       f); break;
6450     case ANCHOR_SEMI_END_BUF:     fputs("semi end buf",   f); break;
6451     case ANCHOR_BEGIN_POSITION:   fputs("begin position", f); break;
6452 
6453     case ANCHOR_WORD_BOUNDARY:    fputs("word boundary",     f); break;
6454     case ANCHOR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;
6455 #ifdef USE_WORD_BEGIN_END
6456     case ANCHOR_WORD_BEGIN:       fputs("word begin", f);     break;
6457     case ANCHOR_WORD_END:         fputs("word end", f);       break;
6458 #endif
6459     case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
6460       fputs("extended-grapheme-cluster boundary", f); break;
6461     case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
6462       fputs("no-extended-grapheme-cluster boundary", f); break;
6463     case ANCHOR_PREC_READ:
6464       fprintf(f, "prec read\n");
6465       print_indent_tree(f, NODE_BODY(node), indent + add);
6466       break;
6467     case ANCHOR_PREC_READ_NOT:
6468       fprintf(f, "prec read not\n");
6469       print_indent_tree(f, NODE_BODY(node), indent + add);
6470       break;
6471     case ANCHOR_LOOK_BEHIND:
6472       fprintf(f, "look behind\n");
6473       print_indent_tree(f, NODE_BODY(node), indent + add);
6474       break;
6475     case ANCHOR_LOOK_BEHIND_NOT:
6476       fprintf(f, "look behind not\n");
6477       print_indent_tree(f, NODE_BODY(node), indent + add);
6478       break;
6479 
6480     default:
6481       fprintf(f, "ERROR: undefined anchor type.\n");
6482       break;
6483     }
6484     break;
6485 
6486   case NODE_BACKREF:
6487     {
6488       int* p;
6489       BackRefNode* br = BACKREF_(node);
6490       p = BACKREFS_P(br);
6491       fprintf(f, "<backref%s:%p>", NODE_IS_CHECKER(node) ? "-checker" : "", node);
6492       for (i = 0; i < br->back_num; i++) {
6493         if (i > 0) fputs(", ", f);
6494         fprintf(f, "%d", p[i]);
6495       }
6496     }
6497     break;
6498 
6499 #ifdef USE_CALL
6500   case NODE_CALL:
6501     {
6502       CallNode* cn = CALL_(node);
6503       fprintf(f, "<call:%p>", node);
6504       p_string(f, cn->name_end - cn->name, cn->name);
6505     }
6506     break;
6507 #endif
6508 
6509   case NODE_QUANT:
6510     fprintf(f, "<quantifier:%p>{%d,%d}%s\n", node,
6511             QUANT_(node)->lower, QUANT_(node)->upper,
6512             (QUANT_(node)->greedy ? "" : "?"));
6513     print_indent_tree(f, NODE_BODY(node), indent + add);
6514     break;
6515 
6516   case NODE_ENCLOSURE:
6517     fprintf(f, "<enclosure:%p> ", node);
6518     switch (ENCLOSURE_(node)->type) {
6519     case ENCLOSURE_OPTION:
6520       fprintf(f, "option:%d", ENCLOSURE_(node)->o.options);
6521       break;
6522     case ENCLOSURE_MEMORY:
6523       fprintf(f, "memory:%d", ENCLOSURE_(node)->m.regnum);
6524       break;
6525     case ENCLOSURE_STOP_BACKTRACK:
6526       fprintf(f, "stop-bt");
6527       break;
6528 
6529     default:
6530       break;
6531     }
6532     fprintf(f, "\n");
6533     print_indent_tree(f, NODE_BODY(node), indent + add);
6534     break;
6535 
6536   case NODE_GIMMICK:
6537     fprintf(f, "<gimmick:%p> ", node);
6538     switch (GIMMICK_(node)->type) {
6539     case GIMMICK_FAIL:
6540       fprintf(f, "fail");
6541       break;
6542     case GIMMICK_KEEP:
6543       fprintf(f, "keep:%d", GIMMICK_(node)->id);
6544       break;
6545     case GIMMICK_SAVE:
6546       fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
6547       break;
6548     case GIMMICK_UPDATE_VAR:
6549       fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
6550       break;
6551 #ifdef USE_CALLOUT
6552     case GIMMICK_CALLOUT:
6553       switch (GIMMICK_(node)->detail_type) {
6554       case ONIG_CALLOUT_OF_CONTENTS:
6555         fprintf(f, "callout:contents:%d", GIMMICK_(node)->num);
6556         break;
6557       case ONIG_CALLOUT_OF_NAME:
6558         fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num);
6559         break;
6560       }
6561 #endif
6562     }
6563     break;
6564 
6565   default:
6566     fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node));
6567     break;
6568   }
6569 
6570   if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT &&
6571       type != NODE_ENCLOSURE)
6572     fprintf(f, "\n");
6573   fflush(f);
6574 }
6575 
6576 static void
print_tree(FILE * f,Node * node)6577 print_tree(FILE* f, Node* node)
6578 {
6579   print_indent_tree(f, node, 0);
6580 }
6581 #endif
6582