1 /**********************************************************************
2   regcomp.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regparse.h"
31 
32 #define OPS_INIT_SIZE  8
33 
34 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
35 
36 #if 0
37 typedef struct {
38   int  n;
39   int  alloc;
40   int* v;
41 } int_stack;
42 
43 static int
44 make_int_stack(int_stack** rs, int init_size)
45 {
46   int_stack* s;
47   int* v;
48 
49   *rs = 0;
50 
51   s = xmalloc(sizeof(*s));
52   if (IS_NULL(s)) return ONIGERR_MEMORY;
53 
54   v = (int* )xmalloc(sizeof(int) * init_size);
55   if (IS_NULL(v)) {
56     xfree(s);
57     return ONIGERR_MEMORY;
58   }
59 
60   s->n = 0;
61   s->alloc = init_size;
62   s->v = v;
63 
64   *rs = s;
65   return ONIG_NORMAL;
66 }
67 
68 static void
69 free_int_stack(int_stack* s)
70 {
71   if (IS_NOT_NULL(s)) {
72     if (IS_NOT_NULL(s->v))
73       xfree(s->v);
74     xfree(s);
75   }
76 }
77 
78 static int
79 int_stack_push(int_stack* s, int v)
80 {
81   if (s->n >= s->alloc) {
82     int new_size = s->alloc * 2;
83     int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size, sizeof(int) * s->alloc);
84     if (IS_NULL(nv)) return ONIGERR_MEMORY;
85 
86     s->alloc = new_size;
87     s->v = nv;
88   }
89 
90   s->v[s->n] = v;
91   s->n++;
92   return ONIG_NORMAL;
93 }
94 
95 static int
96 int_stack_pop(int_stack* s)
97 {
98   int v;
99 
100 #ifdef ONIG_DEBUG
101   if (s->n <= 0) {
102     fprintf(stderr, "int_stack_pop: fail empty. %p\n", s);
103     return 0;
104   }
105 #endif
106 
107   v = s->v[s->n];
108   s->n--;
109   return v;
110 }
111 #endif
112 
113 static int
ops_init(regex_t * reg,int init_alloc_size)114 ops_init(regex_t* reg, int init_alloc_size)
115 {
116   Operation* p;
117   size_t size;
118 
119   if (init_alloc_size > 0) {
120     size = sizeof(Operation) * init_alloc_size;
121     p = (Operation* )xmalloc(size);
122     CHECK_NULL_RETURN_MEMERR(p);
123 #ifdef USE_DIRECT_THREADED_CODE
124     {
125       enum OpCode* cp;
126       size = sizeof(enum OpCode) * init_alloc_size;
127       cp = (enum OpCode* )xmalloc(size);
128       CHECK_NULL_RETURN_MEMERR(cp);
129       reg->ocs = cp;
130     }
131 #endif
132   }
133   else {
134     p  = (Operation* )0;
135 #ifdef USE_DIRECT_THREADED_CODE
136     reg->ocs = (enum OpCode* )0;
137 #endif
138   }
139 
140   reg->ops = p;
141   reg->ops_curr  = 0; /* !!! not yet done ops_new() */
142   reg->ops_alloc = init_alloc_size;
143   reg->ops_used  = 0;
144 
145   return ONIG_NORMAL;
146 }
147 
148 static int
ops_expand(regex_t * reg,int n)149 ops_expand(regex_t* reg, int n)
150 {
151 #define MIN_OPS_EXPAND_SIZE   4
152 
153 #ifdef USE_DIRECT_THREADED_CODE
154   enum OpCode* cp;
155 #endif
156   Operation* p;
157   size_t size;
158 
159   if (n <= 0) n = MIN_OPS_EXPAND_SIZE;
160 
161   n += reg->ops_alloc;
162 
163   size = sizeof(Operation) * n;
164   p = (Operation* )xrealloc(reg->ops, size, sizeof(Operation) * reg->ops_alloc);
165   CHECK_NULL_RETURN_MEMERR(p);
166 
167 #ifdef USE_DIRECT_THREADED_CODE
168   size = sizeof(enum OpCode) * n;
169   cp = (enum OpCode* )xrealloc(reg->ocs, size, sizeof(enum OpCode) * reg->ops_alloc);
170   CHECK_NULL_RETURN_MEMERR(cp);
171   reg->ocs = cp;
172 #endif
173 
174   reg->ops = p;
175   reg->ops_alloc = n;
176   if (reg->ops_used == 0)
177     reg->ops_curr = 0;
178   else
179     reg->ops_curr = reg->ops + (reg->ops_used - 1);
180 
181   return ONIG_NORMAL;
182 }
183 
184 static int
ops_new(regex_t * reg)185 ops_new(regex_t* reg)
186 {
187   int r;
188 
189   if (reg->ops_used >= reg->ops_alloc) {
190     r = ops_expand(reg, reg->ops_alloc);
191     if (r != ONIG_NORMAL) return r;
192   }
193 
194   reg->ops_curr = reg->ops + reg->ops_used;
195   reg->ops_used++;
196 
197   xmemset(reg->ops_curr, 0, sizeof(Operation));
198   return ONIG_NORMAL;
199 }
200 
201 static int
is_in_string_pool(regex_t * reg,UChar * s)202 is_in_string_pool(regex_t* reg, UChar* s)
203 {
204   return (s >= reg->string_pool && s < reg->string_pool_end);
205 }
206 
207 static void
ops_free(regex_t * reg)208 ops_free(regex_t* reg)
209 {
210   int i;
211 
212   if (IS_NULL(reg->ops)) return ;
213 
214   for (i = 0; i < (int )reg->ops_used; i++) {
215     enum OpCode opcode;
216     Operation* op;
217 
218     op = reg->ops + i;
219 
220 #ifdef USE_DIRECT_THREADED_CODE
221     opcode = *(reg->ocs + i);
222 #else
223     opcode = op->opcode;
224 #endif
225 
226     switch (opcode) {
227     case OP_EXACTMBN:
228       if (! is_in_string_pool(reg, op->exact_len_n.s))
229         xfree(op->exact_len_n.s);
230       break;
231     case OP_EXACTN: case OP_EXACTMB2N: case OP_EXACTMB3N: case OP_EXACTN_IC:
232       if (! is_in_string_pool(reg, op->exact_n.s))
233         xfree(op->exact_n.s);
234       break;
235     case OP_EXACT1: case OP_EXACT2: case OP_EXACT3: case OP_EXACT4:
236     case OP_EXACT5: case OP_EXACTMB2N1: case OP_EXACTMB2N2:
237     case OP_EXACTMB2N3: case OP_EXACT1_IC:
238       break;
239 
240     case OP_CCLASS_NOT: case OP_CCLASS:
241       xfree(op->cclass.bsp);
242       break;
243 
244     case OP_CCLASS_MB_NOT: case OP_CCLASS_MB:
245       xfree(op->cclass_mb.mb);
246       break;
247     case OP_CCLASS_MIX_NOT: case OP_CCLASS_MIX:
248       xfree(op->cclass_mix.mb);
249       xfree(op->cclass_mix.bsp);
250       break;
251 
252     case OP_BACKREF1: case OP_BACKREF2: case OP_BACKREF_N: case OP_BACKREF_N_IC:
253       break;
254     case OP_BACKREF_MULTI:      case OP_BACKREF_MULTI_IC:
255     case OP_BACKREF_WITH_LEVEL:
256     case OP_BACKREF_WITH_LEVEL_IC:
257     case OP_BACKREF_CHECK:
258     case OP_BACKREF_CHECK_WITH_LEVEL:
259       if (op->backref_general.num != 1)
260         xfree(op->backref_general.ns);
261       break;
262 
263     default:
264       break;
265     }
266   }
267 
268   xfree(reg->ops);
269 #ifdef USE_DIRECT_THREADED_CODE
270   xfree(reg->ocs);
271   reg->ocs = 0;
272 #endif
273 
274   reg->ops = 0;
275   reg->ops_curr  = 0;
276   reg->ops_alloc = 0;
277   reg->ops_used  = 0;
278 }
279 
280 static int
ops_calc_size_of_string_pool(regex_t * reg)281 ops_calc_size_of_string_pool(regex_t* reg)
282 {
283   int i;
284   int total;
285 
286   if (IS_NULL(reg->ops)) return 0;
287 
288   total = 0;
289   for (i = 0; i < (int )reg->ops_used; i++) {
290     enum OpCode opcode;
291     Operation* op;
292 
293     op = reg->ops + i;
294 #ifdef USE_DIRECT_THREADED_CODE
295     opcode = *(reg->ocs + i);
296 #else
297     opcode = op->opcode;
298 #endif
299 
300     switch (opcode) {
301     case OP_EXACTMBN:
302       total += op->exact_len_n.len * op->exact_len_n.n;
303       break;
304     case OP_EXACTN:
305     case OP_EXACTN_IC:
306       total += op->exact_n.n;
307       break;
308     case OP_EXACTMB2N:
309       total += op->exact_n.n * 2;
310       break;
311     case OP_EXACTMB3N:
312       total += op->exact_n.n * 3;
313       break;
314 
315     default:
316       break;
317     }
318   }
319 
320   return total;
321 }
322 
323 static int
ops_make_string_pool(regex_t * reg)324 ops_make_string_pool(regex_t* reg)
325 {
326   int i;
327   int len;
328   int size;
329   UChar* pool;
330   UChar* curr;
331 
332   size = ops_calc_size_of_string_pool(reg);
333   if (size <= 0) {
334     return 0;
335   }
336 
337   curr = pool = (UChar* )xmalloc((size_t )size);
338   CHECK_NULL_RETURN_MEMERR(pool);
339 
340   for (i = 0; i < (int )reg->ops_used; i++) {
341     enum OpCode opcode;
342     Operation* op;
343 
344     op = reg->ops + i;
345 #ifdef USE_DIRECT_THREADED_CODE
346     opcode = *(reg->ocs + i);
347 #else
348     opcode = op->opcode;
349 #endif
350 
351     switch (opcode) {
352     case OP_EXACTMBN:
353       len = op->exact_len_n.len * op->exact_len_n.n;
354       xmemcpy(curr, op->exact_len_n.s, len);
355       xfree(op->exact_len_n.s);
356       op->exact_len_n.s = curr;
357       curr += len;
358       break;
359     case OP_EXACTN:
360     case OP_EXACTN_IC:
361       len = op->exact_n.n;
362     copy:
363       xmemcpy(curr, op->exact_n.s, len);
364       xfree(op->exact_n.s);
365       op->exact_n.s = curr;
366       curr += len;
367       break;
368     case OP_EXACTMB2N:
369       len = op->exact_n.n * 2;
370       goto copy;
371       break;
372     case OP_EXACTMB3N:
373       len = op->exact_n.n * 3;
374       goto copy;
375       break;
376 
377     default:
378       break;
379     }
380   }
381 
382   reg->string_pool     = pool;
383   reg->string_pool_end = pool + size;
384   return 0;
385 }
386 
387 extern OnigCaseFoldType
onig_get_default_case_fold_flag(void)388 onig_get_default_case_fold_flag(void)
389 {
390   return OnigDefaultCaseFoldFlag;
391 }
392 
393 extern int
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)394 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
395 {
396   OnigDefaultCaseFoldFlag = case_fold_flag;
397   return 0;
398 }
399 
400 static int
int_multiply_cmp(int x,int y,int v)401 int_multiply_cmp(int x, int y, int v)
402 {
403   if (x == 0 || y == 0) return -1;
404 
405   if (x < INT_MAX / y) {
406     int xy = x * y;
407     if (xy > v) return 1;
408     else {
409       if (xy == v) return 0;
410       else return -1;
411     }
412   }
413   else
414     return 1;
415 }
416 
417 extern int
onig_positive_int_multiply(int x,int y)418 onig_positive_int_multiply(int x, int y)
419 {
420   if (x == 0 || y == 0) return 0;
421 
422   if (x < INT_MAX / y)
423     return x * y;
424   else
425     return -1;
426 }
427 
428 
429 static void
swap_node(Node * a,Node * b)430 swap_node(Node* a, Node* b)
431 {
432   Node c;
433 
434   c = *a; *a = *b; *b = c;
435 
436   if (NODE_TYPE(a) == NODE_STRING) {
437     StrNode* sn = STR_(a);
438     if (sn->capacity == 0) {
439       int len = (int )(sn->end - sn->s);
440       sn->s   = sn->buf;
441       sn->end = sn->s + len;
442     }
443   }
444 
445   if (NODE_TYPE(b) == NODE_STRING) {
446     StrNode* sn = STR_(b);
447     if (sn->capacity == 0) {
448       int len = (int )(sn->end - sn->s);
449       sn->s   = sn->buf;
450       sn->end = sn->s + len;
451     }
452   }
453 }
454 
455 static OnigLen
distance_add(OnigLen d1,OnigLen d2)456 distance_add(OnigLen d1, OnigLen d2)
457 {
458   if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)
459     return INFINITE_LEN;
460   else {
461     if (d1 <= INFINITE_LEN - d2) return d1 + d2;
462     else return INFINITE_LEN;
463   }
464 }
465 
466 static OnigLen
distance_multiply(OnigLen d,int m)467 distance_multiply(OnigLen d, int m)
468 {
469   if (m == 0) return 0;
470 
471   if (d < INFINITE_LEN / m)
472     return d * m;
473   else
474     return INFINITE_LEN;
475 }
476 
477 static int
bitset_is_empty(BitSetRef bs)478 bitset_is_empty(BitSetRef bs)
479 {
480   int i;
481 
482   for (i = 0; i < (int )BITSET_SIZE; i++) {
483     if (bs[i] != 0) return 0;
484   }
485   return 1;
486 }
487 
488 #ifdef USE_CALL
489 
490 static int
unset_addr_list_init(UnsetAddrList * list,int size)491 unset_addr_list_init(UnsetAddrList* list, int size)
492 {
493   UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
494   CHECK_NULL_RETURN_MEMERR(p);
495 
496   list->num   = 0;
497   list->alloc = size;
498   list->us    = p;
499   return 0;
500 }
501 
502 static void
unset_addr_list_end(UnsetAddrList * list)503 unset_addr_list_end(UnsetAddrList* list)
504 {
505   if (IS_NOT_NULL(list->us))
506     xfree(list->us);
507 }
508 
509 static int
unset_addr_list_add(UnsetAddrList * list,int offset,struct _Node * node)510 unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)
511 {
512   UnsetAddr* p;
513   int size;
514 
515   if (list->num >= list->alloc) {
516     size = list->alloc * 2;
517     p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr)* list->alloc);
518     CHECK_NULL_RETURN_MEMERR(p);
519     list->alloc = size;
520     list->us    = p;
521   }
522 
523   list->us[list->num].offset = offset;
524   list->us[list->num].target = node;
525   list->num++;
526   return 0;
527 }
528 #endif /* USE_CALL */
529 
530 
531 static int
add_op(regex_t * reg,int opcode)532 add_op(regex_t* reg, int opcode)
533 {
534   int r;
535 
536   r = ops_new(reg);
537   if (r != ONIG_NORMAL) return r;
538 
539 #ifdef USE_DIRECT_THREADED_CODE
540   *(reg->ocs + (reg->ops_curr - reg->ops)) = opcode;
541 #else
542   reg->ops_curr->opcode = opcode;
543 #endif
544 
545   return 0;
546 }
547 
548 static int compile_length_tree(Node* node, regex_t* reg);
549 static int compile_tree(Node* node, regex_t* reg, ScanEnv* env);
550 
551 
552 #define IS_NEED_STR_LEN_OP_EXACT(op) \
553    ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
554     (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
555 
556 static int
select_str_opcode(int mb_len,int str_len,int ignore_case)557 select_str_opcode(int mb_len, int str_len, int ignore_case)
558 {
559   int op;
560 
561   if (ignore_case) {
562     switch (str_len) {
563     case 1:  op = OP_EXACT1_IC; break;
564     default: op = OP_EXACTN_IC; break;
565     }
566   }
567   else {
568     switch (mb_len) {
569     case 1:
570       switch (str_len) {
571       case 1:  op = OP_EXACT1; break;
572       case 2:  op = OP_EXACT2; break;
573       case 3:  op = OP_EXACT3; break;
574       case 4:  op = OP_EXACT4; break;
575       case 5:  op = OP_EXACT5; break;
576       default: op = OP_EXACTN; break;
577       }
578       break;
579 
580     case 2:
581       switch (str_len) {
582       case 1:  op = OP_EXACTMB2N1; break;
583       case 2:  op = OP_EXACTMB2N2; break;
584       case 3:  op = OP_EXACTMB2N3; break;
585       default: op = OP_EXACTMB2N;  break;
586       }
587       break;
588 
589     case 3:
590       op = OP_EXACTMB3N;
591       break;
592 
593     default:
594       op = OP_EXACTMBN;
595       break;
596     }
597   }
598   return op;
599 }
600 
601 static int
is_strict_real_node(Node * node)602 is_strict_real_node(Node* node)
603 {
604   switch (NODE_TYPE(node)) {
605   case NODE_STRING:
606     {
607       StrNode* sn = STR_(node);
608       return (sn->end != sn->s);
609     }
610     break;
611 
612   case NODE_CCLASS:
613   case NODE_CTYPE:
614     return 1;
615     break;
616 
617   default:
618     return 0;
619     break;
620   }
621 }
622 
623 static int
compile_tree_empty_check(Node * node,regex_t * reg,int emptiness,ScanEnv * env)624 compile_tree_empty_check(Node* node, regex_t* reg, int emptiness, ScanEnv* env)
625 {
626   int r;
627   int saved_num_null_check = reg->num_null_check;
628 
629   if (emptiness != BODY_IS_NOT_EMPTY) {
630     r = add_op(reg, OP_EMPTY_CHECK_START);
631     if (r != 0) return r;
632     COP(reg)->empty_check_start.mem = reg->num_null_check; /* NULL CHECK ID */
633     reg->num_null_check++;
634   }
635 
636   r = compile_tree(node, reg, env);
637   if (r != 0) return r;
638 
639   if (emptiness != BODY_IS_NOT_EMPTY) {
640     if (emptiness == BODY_IS_EMPTY_POSSIBILITY)
641       r = add_op(reg, OP_EMPTY_CHECK_END);
642     else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM)
643       r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);
644     else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_REC)
645       r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
646 
647     if (r != 0) return r;
648     COP(reg)->empty_check_end.mem = saved_num_null_check; /* NULL CHECK ID */
649   }
650   return r;
651 }
652 
653 #ifdef USE_CALL
654 static int
compile_call(CallNode * node,regex_t * reg,ScanEnv * env)655 compile_call(CallNode* node, regex_t* reg, ScanEnv* env)
656 {
657   int r;
658   int offset;
659 
660   r = add_op(reg, OP_CALL);
661   if (r != 0) return r;
662 
663   COP(reg)->call.addr = 0; /* dummy addr. */
664 
665   offset = COP_CURR_OFFSET_BYTES(reg, call.addr);
666   r = unset_addr_list_add(env->unset_addr_list, offset, NODE_CALL_BODY(node));
667   return r;
668 }
669 #endif
670 
671 static int
compile_tree_n_times(Node * node,int n,regex_t * reg,ScanEnv * env)672 compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env)
673 {
674   int i, r;
675 
676   for (i = 0; i < n; i++) {
677     r = compile_tree(node, reg, env);
678     if (r != 0) return r;
679   }
680   return 0;
681 }
682 
683 static int
add_compile_string_length(UChar * s ARG_UNUSED,int mb_len,int str_len,regex_t * reg ARG_UNUSED,int ignore_case)684 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
685                           regex_t* reg ARG_UNUSED, int ignore_case)
686 {
687   return 1;
688 }
689 
690 static int
add_compile_string(UChar * s,int mb_len,int str_len,regex_t * reg,int ignore_case)691 add_compile_string(UChar* s, int mb_len, int str_len,
692                    regex_t* reg, int ignore_case)
693 {
694   int op;
695   int r;
696   int byte_len;
697   UChar* p;
698   UChar* end;
699 
700   op = select_str_opcode(mb_len, str_len, ignore_case);
701   r = add_op(reg, op);
702   if (r != 0) return r;
703 
704   byte_len = mb_len * str_len;
705   end = s + byte_len;
706 
707   if (op == OP_EXACTMBN) {
708     p = onigenc_strdup(reg->enc, s, end);
709     CHECK_NULL_RETURN_MEMERR(p);
710 
711     COP(reg)->exact_len_n.len = mb_len;
712     COP(reg)->exact_len_n.n   = str_len;
713     COP(reg)->exact_len_n.s   = p;
714   }
715   else if (IS_NEED_STR_LEN_OP_EXACT(op)) {
716     p = onigenc_strdup(reg->enc, s, end);
717     CHECK_NULL_RETURN_MEMERR(p);
718 
719     if (op == OP_EXACTN_IC)
720       COP(reg)->exact_n.n = byte_len;
721     else
722       COP(reg)->exact_n.n = str_len;
723 
724     COP(reg)->exact_n.s = p;
725   }
726   else {
727     xmemcpy(COP(reg)->exact.s, s, (size_t )byte_len);
728     COP(reg)->exact.s[byte_len] = '\0';
729   }
730 
731   return 0;
732 }
733 
734 static int
compile_length_string_node(Node * node,regex_t * reg)735 compile_length_string_node(Node* node, regex_t* reg)
736 {
737   int rlen, r, len, prev_len, slen, ambig;
738   UChar *p, *prev;
739   StrNode* sn;
740   OnigEncoding enc = reg->enc;
741 
742   sn = STR_(node);
743   if (sn->end <= sn->s)
744     return 0;
745 
746   ambig = NODE_STRING_IS_AMBIG(node);
747 
748   p = prev = sn->s;
749   prev_len = enclen(enc, p);
750   p += prev_len;
751   slen = 1;
752   rlen = 0;
753 
754   for (; p < sn->end; ) {
755     len = enclen(enc, p);
756     if (len == prev_len) {
757       slen++;
758     }
759     else {
760       r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
761       rlen += r;
762       prev = p;
763       slen = 1;
764       prev_len = len;
765     }
766     p += len;
767   }
768 
769   r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
770   rlen += r;
771   return rlen;
772 }
773 
774 static int
compile_length_string_raw_node(StrNode * sn,regex_t * reg)775 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
776 {
777   if (sn->end <= sn->s)
778     return 0;
779 
780   return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),
781                                    reg, 0);
782 }
783 
784 static int
compile_string_node(Node * node,regex_t * reg)785 compile_string_node(Node* node, regex_t* reg)
786 {
787   int r, len, prev_len, slen, ambig;
788   UChar *p, *prev, *end;
789   StrNode* sn;
790   OnigEncoding enc = reg->enc;
791 
792   sn = STR_(node);
793   if (sn->end <= sn->s)
794     return 0;
795 
796   end = sn->end;
797   ambig = NODE_STRING_IS_AMBIG(node);
798 
799   p = prev = sn->s;
800   prev_len = enclen(enc, p);
801   p += prev_len;
802   slen = 1;
803 
804   for (; p < end; ) {
805     len = enclen(enc, p);
806     if (len == prev_len) {
807       slen++;
808     }
809     else {
810       r = add_compile_string(prev, prev_len, slen, reg, ambig);
811       if (r != 0) return r;
812 
813       prev  = p;
814       slen  = 1;
815       prev_len = len;
816     }
817 
818     p += len;
819   }
820 
821   return add_compile_string(prev, prev_len, slen, reg, ambig);
822 }
823 
824 static int
compile_string_raw_node(StrNode * sn,regex_t * reg)825 compile_string_raw_node(StrNode* sn, regex_t* reg)
826 {
827   if (sn->end <= sn->s)
828     return 0;
829 
830   return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0);
831 }
832 
833 static void*
set_multi_byte_cclass(BBuf * mbuf,regex_t * reg)834 set_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
835 {
836   size_t len;
837   void* p;
838 
839   len = (size_t )mbuf->used;
840   p = xmalloc(len);
841   if (IS_NULL(p)) return NULL;
842 
843   xmemcpy(p, mbuf->p, len);
844   return p;
845 }
846 
847 static int
compile_length_cclass_node(CClassNode * cc,regex_t * reg)848 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
849 {
850   return 1;
851 }
852 
853 static int
compile_cclass_node(CClassNode * cc,regex_t * reg)854 compile_cclass_node(CClassNode* cc, regex_t* reg)
855 {
856   int r;
857 
858   if (IS_NULL(cc->mbuf)) {
859     r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_NOT : OP_CCLASS);
860     if (r != 0) return r;
861 
862     COP(reg)->cclass.bsp = xmalloc(SIZE_BITSET);
863     CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass.bsp);
864     xmemcpy(COP(reg)->cclass.bsp, cc->bs, SIZE_BITSET);
865   }
866   else {
867     void* p;
868 
869     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
870       r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MB_NOT : OP_CCLASS_MB);
871       if (r != 0) return r;
872 
873       p = set_multi_byte_cclass(cc->mbuf, reg);
874       CHECK_NULL_RETURN_MEMERR(p);
875       COP(reg)->cclass_mb.mb = p;
876     }
877     else {
878       r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MIX_NOT : OP_CCLASS_MIX);
879       if (r != 0) return r;
880 
881       COP(reg)->cclass_mix.bsp = xmalloc(SIZE_BITSET);
882       CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass_mix.bsp);
883       xmemcpy(COP(reg)->cclass_mix.bsp, cc->bs, SIZE_BITSET);
884 
885       p = set_multi_byte_cclass(cc->mbuf, reg);
886       CHECK_NULL_RETURN_MEMERR(p);
887       COP(reg)->cclass_mix.mb = p;
888     }
889   }
890 
891   return 0;
892 }
893 
894 static int
entry_repeat_range(regex_t * reg,int id,int lower,int upper)895 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
896 {
897 #define REPEAT_RANGE_ALLOC  4
898 
899   OnigRepeatRange* p;
900 
901   if (reg->repeat_range_alloc == 0) {
902     p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
903     CHECK_NULL_RETURN_MEMERR(p);
904     reg->repeat_range = p;
905     reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
906   }
907   else if (reg->repeat_range_alloc <= id) {
908     int n;
909     n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
910     p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
911                                     sizeof(OnigRepeatRange) * n,
912                                     sizeof(OnigRepeatRange) * reg->repeat_range_alloc);
913     CHECK_NULL_RETURN_MEMERR(p);
914     reg->repeat_range = p;
915     reg->repeat_range_alloc = n;
916   }
917   else {
918     p = reg->repeat_range;
919   }
920 
921   p[id].lower = lower;
922   p[id].upper = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper);
923   return 0;
924 }
925 
926 static int
compile_range_repeat_node(QuantNode * qn,int target_len,int emptiness,regex_t * reg,ScanEnv * env)927 compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness,
928                           regex_t* reg, ScanEnv* env)
929 {
930   int r;
931   int num_repeat = reg->num_repeat++;
932 
933   r = add_op(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
934   if (r != 0) return r;
935 
936   COP(reg)->repeat.id   = num_repeat;
937   COP(reg)->repeat.addr = SIZE_INC_OP + target_len + SIZE_OP_REPEAT_INC;
938 
939   r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
940   if (r != 0) return r;
941 
942   r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
943   if (r != 0) return r;
944 
945   if (
946 #ifdef USE_CALL
947       NODE_IS_IN_MULTI_ENTRY(qn) ||
948 #endif
949       NODE_IS_IN_REAL_REPEAT(qn)) {
950     r = add_op(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
951   }
952   else {
953     r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
954   }
955   if (r != 0) return r;
956 
957   COP(reg)->repeat_inc.id = num_repeat;
958   return r;
959 }
960 
961 static int
is_anychar_infinite_greedy(QuantNode * qn)962 is_anychar_infinite_greedy(QuantNode* qn)
963 {
964   if (qn->greedy && IS_INFINITE_REPEAT(qn->upper) &&
965       NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))
966     return 1;
967   else
968     return 0;
969 }
970 
971 #define QUANTIFIER_EXPAND_LIMIT_SIZE   10
972 #define CKN_ON   (ckn > 0)
973 
974 static int
compile_length_quantifier_node(QuantNode * qn,regex_t * reg)975 compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
976 {
977   int len, mod_tlen;
978   int infinite = IS_INFINITE_REPEAT(qn->upper);
979   enum BodyEmptyType emptiness = qn->emptiness;
980   int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
981 
982   if (tlen < 0) return tlen;
983   if (tlen == 0) return 0;
984 
985   /* anychar repeat */
986   if (is_anychar_infinite_greedy(qn)) {
987     if (qn->lower <= 1 ||
988         int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {
989       if (IS_NOT_NULL(qn->next_head_exact))
990         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
991       else
992         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
993     }
994   }
995 
996   mod_tlen = tlen;
997   if (emptiness != BODY_IS_NOT_EMPTY)
998     mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END;
999 
1000   if (infinite &&
1001       (qn->lower <= 1 ||
1002        int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1003     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1004       len = SIZE_OP_JUMP;
1005     }
1006     else {
1007       len = tlen * qn->lower;
1008     }
1009 
1010     if (qn->greedy) {
1011 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1012       if (IS_NOT_NULL(qn->head_exact))
1013         len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1014       else
1015 #endif
1016       if (IS_NOT_NULL(qn->next_head_exact))
1017         len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1018       else
1019         len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1020     }
1021     else
1022       len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1023   }
1024   else if (qn->upper == 0) {
1025     if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
1026       len = SIZE_OP_JUMP + tlen;
1027     }
1028     else
1029       len = 0;
1030   }
1031   else if (!infinite && qn->greedy &&
1032            (qn->upper == 1 ||
1033             int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
1034                              QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1035     len = tlen * qn->lower;
1036     len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1037   }
1038   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1039     len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1040   }
1041   else {
1042     len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OP_REPEAT;
1043   }
1044 
1045   return len;
1046 }
1047 
1048 static int
compile_quantifier_node(QuantNode * qn,regex_t * reg,ScanEnv * env)1049 compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
1050 {
1051   int i, r, mod_tlen;
1052   int infinite = IS_INFINITE_REPEAT(qn->upper);
1053   enum BodyEmptyType emptiness = qn->emptiness;
1054   int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
1055 
1056   if (tlen < 0) return tlen;
1057   if (tlen == 0) return 0;
1058 
1059   if (is_anychar_infinite_greedy(qn) &&
1060       (qn->lower <= 1 ||
1061        int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1062     r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
1063     if (r != 0) return r;
1064     if (IS_NOT_NULL(qn->next_head_exact)) {
1065       r = add_op(reg,
1066                  IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)) ?
1067                  OP_ANYCHAR_ML_STAR_PEEK_NEXT : OP_ANYCHAR_STAR_PEEK_NEXT);
1068       if (r != 0) return r;
1069 
1070       COP(reg)->anychar_star_peek_next.c = STR_(qn->next_head_exact)->s[0];
1071       return 0;
1072     }
1073     else {
1074       r = add_op(reg,
1075                  IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)) ?
1076                  OP_ANYCHAR_ML_STAR : OP_ANYCHAR_STAR);
1077       return r;
1078     }
1079   }
1080 
1081   mod_tlen = tlen;
1082   if (emptiness != BODY_IS_NOT_EMPTY)
1083     mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END;
1084 
1085   if (infinite &&
1086       (qn->lower <= 1 ||
1087        int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1088     int addr;
1089 
1090     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1091       r = add_op(reg, OP_JUMP);
1092       if (r != 0) return r;
1093       if (qn->greedy) {
1094 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1095         if (IS_NOT_NULL(qn->head_exact))
1096           COP(reg)->jump.addr = SIZE_OP_PUSH_OR_JUMP_EXACT1 + SIZE_INC_OP;
1097         else
1098 #endif
1099         if (IS_NOT_NULL(qn->next_head_exact))
1100           COP(reg)->jump.addr = SIZE_OP_PUSH_IF_PEEK_NEXT + SIZE_INC_OP;
1101         else
1102           COP(reg)->jump.addr = SIZE_OP_PUSH + SIZE_INC_OP;
1103       }
1104       else {
1105         COP(reg)->jump.addr = SIZE_OP_JUMP + SIZE_INC_OP;
1106       }
1107     }
1108     else {
1109       r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
1110       if (r != 0) return r;
1111     }
1112 
1113     if (qn->greedy) {
1114 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1115       if (IS_NOT_NULL(qn->head_exact)) {
1116         r = add_op(reg, OP_PUSH_OR_JUMP_EXACT1);
1117         if (r != 0) return r;
1118         COP(reg)->push_or_jump_exact1.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
1119         COP(reg)->push_or_jump_exact1.c    = STR_(qn->head_exact)->s[0];
1120 
1121         r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
1122         if (r != 0) return r;
1123 
1124         addr = -(mod_tlen + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1);
1125       }
1126       else
1127 #endif
1128       if (IS_NOT_NULL(qn->next_head_exact)) {
1129         r = add_op(reg, OP_PUSH_IF_PEEK_NEXT);
1130         if (r != 0) return r;
1131         COP(reg)->push_if_peek_next.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
1132         COP(reg)->push_if_peek_next.c    = STR_(qn->next_head_exact)->s[0];
1133 
1134         r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
1135         if (r != 0) return r;
1136 
1137         addr = -(mod_tlen + (int )SIZE_OP_PUSH_IF_PEEK_NEXT);
1138       }
1139       else {
1140         r = add_op(reg, OP_PUSH);
1141         if (r != 0) return r;
1142         COP(reg)->push.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
1143 
1144         r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
1145         if (r != 0) return r;
1146 
1147         addr = -(mod_tlen + (int )SIZE_OP_PUSH);
1148       }
1149 
1150       r = add_op(reg, OP_JUMP);
1151       if (r != 0) return r;
1152       COP(reg)->jump.addr = addr;
1153     }
1154     else {
1155       r = add_op(reg, OP_JUMP);
1156       if (r != 0) return r;
1157       COP(reg)->jump.addr = mod_tlen + SIZE_INC_OP;
1158 
1159       r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
1160       if (r != 0) return r;
1161 
1162       r = add_op(reg, OP_PUSH);
1163       if (r != 0) return r;
1164       COP(reg)->push.addr = -mod_tlen;
1165     }
1166   }
1167   else if (qn->upper == 0) {
1168     if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
1169       r = add_op(reg, OP_JUMP);
1170       if (r != 0) return r;
1171       COP(reg)->jump.addr = tlen + SIZE_INC_OP;
1172 
1173       r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
1174     }
1175     else {
1176       /* Nothing output */
1177       r = 0;
1178     }
1179   }
1180   else if (! infinite && qn->greedy &&
1181            (qn->upper == 1 ||
1182             int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
1183                              QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1184     int n = qn->upper - qn->lower;
1185 
1186     r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
1187     if (r != 0) return r;
1188 
1189     for (i = 0; i < n; i++) {
1190       int v = onig_positive_int_multiply(n - i, tlen + SIZE_OP_PUSH);
1191       if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
1192 
1193       r = add_op(reg, OP_PUSH);
1194       if (r != 0) return r;
1195       COP(reg)->push.addr = v;
1196 
1197       r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
1198       if (r != 0) return r;
1199     }
1200   }
1201   else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1202     r = add_op(reg, OP_PUSH);
1203     if (r != 0) return r;
1204     COP(reg)->push.addr = SIZE_INC_OP + SIZE_OP_JUMP;
1205 
1206     r = add_op(reg, OP_JUMP);
1207     if (r != 0) return r;
1208     COP(reg)->jump.addr = tlen + SIZE_INC_OP;
1209 
1210     r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
1211   }
1212   else {
1213     r = compile_range_repeat_node(qn, mod_tlen, emptiness, reg, env);
1214   }
1215   return r;
1216 }
1217 
1218 static int
compile_length_option_node(BagNode * node,regex_t * reg)1219 compile_length_option_node(BagNode* node, regex_t* reg)
1220 {
1221   int tlen;
1222   OnigOptionType prev = reg->options;
1223 
1224   reg->options = node->o.options;
1225   tlen = compile_length_tree(NODE_BAG_BODY(node), reg);
1226   reg->options = prev;
1227 
1228   return tlen;
1229 }
1230 
1231 static int
compile_option_node(BagNode * node,regex_t * reg,ScanEnv * env)1232 compile_option_node(BagNode* node, regex_t* reg, ScanEnv* env)
1233 {
1234   int r;
1235   OnigOptionType prev = reg->options;
1236 
1237   reg->options = node->o.options;
1238   r = compile_tree(NODE_BAG_BODY(node), reg, env);
1239   reg->options = prev;
1240 
1241   return r;
1242 }
1243 
1244 static int
compile_length_bag_node(BagNode * node,regex_t * reg)1245 compile_length_bag_node(BagNode* node, regex_t* reg)
1246 {
1247   int len;
1248   int tlen;
1249 
1250   if (node->type == BAG_OPTION)
1251     return compile_length_option_node(node, reg);
1252 
1253   if (NODE_BAG_BODY(node)) {
1254     tlen = compile_length_tree(NODE_BAG_BODY(node), reg);
1255     if (tlen < 0) return tlen;
1256   }
1257   else
1258     tlen = 0;
1259 
1260   switch (node->type) {
1261   case BAG_MEMORY:
1262 #ifdef USE_CALL
1263 
1264     if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
1265       len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1266       return len;
1267     }
1268 
1269     if (NODE_IS_CALLED(node)) {
1270       len = SIZE_OP_MEMORY_START_PUSH + tlen
1271         + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1272       if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1273         len += (NODE_IS_RECURSION(node)
1274                 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1275       else
1276         len += (NODE_IS_RECURSION(node)
1277                 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1278     }
1279     else if (NODE_IS_RECURSION(node)) {
1280       len = SIZE_OP_MEMORY_START_PUSH;
1281       len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
1282                      ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1283     }
1284     else
1285 #endif
1286     {
1287       if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
1288         len = SIZE_OP_MEMORY_START_PUSH;
1289       else
1290         len = SIZE_OP_MEMORY_START;
1291 
1292       len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
1293                      ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1294     }
1295     break;
1296 
1297   case BAG_STOP_BACKTRACK:
1298     if (NODE_IS_STRICT_REAL_REPEAT(node)) {
1299       int v;
1300       QuantNode* qn;
1301 
1302       qn = QUANT_(NODE_BAG_BODY(node));
1303       tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
1304       if (tlen < 0) return tlen;
1305 
1306       v = onig_positive_int_multiply(qn->lower, tlen);
1307       if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
1308       len = v + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
1309     }
1310     else {
1311       len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;
1312     }
1313     break;
1314 
1315   case BAG_IF_ELSE:
1316     {
1317       Node* cond = NODE_BAG_BODY(node);
1318       Node* Then = node->te.Then;
1319       Node* Else = node->te.Else;
1320 
1321       len = compile_length_tree(cond, reg);
1322       if (len < 0) return len;
1323       len += SIZE_OP_PUSH;
1324       len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END;
1325 
1326       if (IS_NOT_NULL(Then)) {
1327         tlen = compile_length_tree(Then, reg);
1328         if (tlen < 0) return tlen;
1329         len += tlen;
1330       }
1331 
1332       len += SIZE_OP_JUMP + SIZE_OP_ATOMIC_END;
1333 
1334       if (IS_NOT_NULL(Else)) {
1335         tlen = compile_length_tree(Else, reg);
1336         if (tlen < 0) return tlen;
1337         len += tlen;
1338       }
1339     }
1340     break;
1341 
1342   case BAG_OPTION:
1343     /* never come here, but set for escape warning */
1344     len = 0;
1345     break;
1346   }
1347 
1348   return len;
1349 }
1350 
1351 static int get_char_len_node(Node* node, regex_t* reg, int* len);
1352 
1353 static int
compile_bag_memory_node(BagNode * node,regex_t * reg,ScanEnv * env)1354 compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)
1355 {
1356   int r;
1357   int len;
1358 
1359 #ifdef USE_CALL
1360   if (NODE_IS_CALLED(node)) {
1361     r = add_op(reg, OP_CALL);
1362     if (r != 0) return r;
1363 
1364     node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + SIZE_OP_JUMP;
1365     NODE_STATUS_ADD(node, ADDR_FIXED);
1366     COP(reg)->call.addr = (int )node->m.called_addr;
1367 
1368     if (node->m.regnum == 0) {
1369       len = compile_length_tree(NODE_BAG_BODY(node), reg);
1370       len += SIZE_OP_RETURN;
1371 
1372       r = add_op(reg, OP_JUMP);
1373       if (r != 0) return r;
1374       COP(reg)->jump.addr = len + SIZE_INC_OP;
1375 
1376       r = compile_tree(NODE_BAG_BODY(node), reg, env);
1377       if (r != 0) return r;
1378 
1379       r = add_op(reg, OP_RETURN);
1380       return r;
1381     }
1382     else {
1383       len = compile_length_tree(NODE_BAG_BODY(node), reg);
1384       len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1385       if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1386         len += (NODE_IS_RECURSION(node)
1387                 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1388       else
1389         len += (NODE_IS_RECURSION(node)
1390                 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1391 
1392       r = add_op(reg, OP_JUMP);
1393       if (r != 0) return r;
1394       COP(reg)->jump.addr = len + SIZE_INC_OP;
1395     }
1396   }
1397 #endif
1398 
1399   if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
1400     r = add_op(reg, OP_MEMORY_START_PUSH);
1401   else
1402     r = add_op(reg, OP_MEMORY_START);
1403   if (r != 0) return r;
1404   COP(reg)->memory_start.num = node->m.regnum;
1405 
1406   r = compile_tree(NODE_BAG_BODY(node), reg, env);
1407   if (r != 0) return r;
1408 
1409 #ifdef USE_CALL
1410   if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1411     r = add_op(reg, (NODE_IS_RECURSION(node)
1412                      ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1413   else
1414     r = add_op(reg, (NODE_IS_RECURSION(node) ? OP_MEMORY_END_REC : OP_MEMORY_END));
1415   if (r != 0) return r;
1416   COP(reg)->memory_end.num = node->m.regnum;
1417 
1418   if (NODE_IS_CALLED(node)) {
1419     if (r != 0) return r;
1420     r = add_op(reg, OP_RETURN);
1421   }
1422 #else
1423   if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1424     r = add_op(reg, OP_MEMORY_END_PUSH);
1425   else
1426     r = add_op(reg, OP_MEMORY_END);
1427   if (r != 0) return r;
1428   COP(reg)->memory_end.num = node->m.regnum;
1429 #endif
1430 
1431   return r;
1432 }
1433 
1434 static int
compile_bag_node(BagNode * node,regex_t * reg,ScanEnv * env)1435 compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
1436 {
1437   int r, len;
1438 
1439   switch (node->type) {
1440   case BAG_MEMORY:
1441     r = compile_bag_memory_node(node, reg, env);
1442     break;
1443 
1444   case BAG_OPTION:
1445     r = compile_option_node(node, reg, env);
1446     break;
1447 
1448   case BAG_STOP_BACKTRACK:
1449     if (NODE_IS_STRICT_REAL_REPEAT(node)) {
1450       QuantNode* qn = QUANT_(NODE_BAG_BODY(node));
1451       r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
1452       if (r != 0) return r;
1453 
1454       len = compile_length_tree(NODE_QUANT_BODY(qn), reg);
1455       if (len < 0) return len;
1456 
1457       r = add_op(reg, OP_PUSH);
1458       if (r != 0) return r;
1459       COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
1460 
1461       r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
1462       if (r != 0) return r;
1463       r = add_op(reg, OP_POP_OUT);
1464       if (r != 0) return r;
1465 
1466       r = add_op(reg, OP_JUMP);
1467       if (r != 0) return r;
1468       COP(reg)->jump.addr = -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT);
1469     }
1470     else {
1471       r = add_op(reg, OP_ATOMIC_START);
1472       if (r != 0) return r;
1473       r = compile_tree(NODE_BAG_BODY(node), reg, env);
1474       if (r != 0) return r;
1475       r = add_op(reg, OP_ATOMIC_END);
1476     }
1477     break;
1478 
1479   case BAG_IF_ELSE:
1480     {
1481       int cond_len, then_len, else_len, jump_len;
1482       Node* cond = NODE_BAG_BODY(node);
1483       Node* Then = node->te.Then;
1484       Node* Else = node->te.Else;
1485 
1486       r = add_op(reg, OP_ATOMIC_START);
1487       if (r != 0) return r;
1488 
1489       cond_len = compile_length_tree(cond, reg);
1490       if (cond_len < 0) return cond_len;
1491       if (IS_NOT_NULL(Then)) {
1492         then_len = compile_length_tree(Then, reg);
1493         if (then_len < 0) return then_len;
1494       }
1495       else
1496         then_len = 0;
1497 
1498       jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END + SIZE_OP_JUMP;
1499 
1500       r = add_op(reg, OP_PUSH);
1501       if (r != 0) return r;
1502       COP(reg)->push.addr = SIZE_INC_OP + jump_len;
1503 
1504       r = compile_tree(cond, reg, env);
1505       if (r != 0) return r;
1506       r = add_op(reg, OP_ATOMIC_END);
1507       if (r != 0) return r;
1508 
1509       if (IS_NOT_NULL(Then)) {
1510         r = compile_tree(Then, reg, env);
1511         if (r != 0) return r;
1512       }
1513 
1514       if (IS_NOT_NULL(Else)) {
1515         else_len = compile_length_tree(Else, reg);
1516         if (else_len < 0) return else_len;
1517       }
1518       else
1519         else_len = 0;
1520 
1521       r = add_op(reg, OP_JUMP);
1522       if (r != 0) return r;
1523       COP(reg)->jump.addr = SIZE_OP_ATOMIC_END + else_len + SIZE_INC_OP;
1524 
1525       r = add_op(reg, OP_ATOMIC_END);
1526       if (r != 0) return r;
1527 
1528       if (IS_NOT_NULL(Else)) {
1529         r = compile_tree(Else, reg, env);
1530       }
1531     }
1532     break;
1533   }
1534 
1535   return r;
1536 }
1537 
1538 static int
compile_length_anchor_node(AnchorNode * node,regex_t * reg)1539 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1540 {
1541   int len;
1542   int tlen = 0;
1543 
1544   if (IS_NOT_NULL(NODE_ANCHOR_BODY(node))) {
1545     tlen = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
1546     if (tlen < 0) return tlen;
1547   }
1548 
1549   switch (node->type) {
1550   case ANCR_PREC_READ:
1551     len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;
1552     break;
1553   case ANCR_PREC_READ_NOT:
1554     len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;
1555     break;
1556   case ANCR_LOOK_BEHIND:
1557     len = SIZE_OP_LOOK_BEHIND + tlen;
1558     break;
1559   case ANCR_LOOK_BEHIND_NOT:
1560     len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;
1561     break;
1562 
1563   case ANCR_WORD_BOUNDARY:
1564   case ANCR_NO_WORD_BOUNDARY:
1565 #ifdef USE_WORD_BEGIN_END
1566   case ANCR_WORD_BEGIN:
1567   case ANCR_WORD_END:
1568 #endif
1569     len = SIZE_OP_WORD_BOUNDARY;
1570     break;
1571 
1572   case ANCR_TEXT_SEGMENT_BOUNDARY:
1573   case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
1574     len = SIZE_OPCODE;
1575     break;
1576 
1577   default:
1578     len = SIZE_OPCODE;
1579     break;
1580   }
1581 
1582   return len;
1583 }
1584 
1585 static int
compile_anchor_node(AnchorNode * node,regex_t * reg,ScanEnv * env)1586 compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
1587 {
1588   int r, len;
1589   enum OpCode op;
1590 
1591   switch (node->type) {
1592   case ANCR_BEGIN_BUF:      r = add_op(reg, OP_BEGIN_BUF);      break;
1593   case ANCR_END_BUF:        r = add_op(reg, OP_END_BUF);        break;
1594   case ANCR_BEGIN_LINE:     r = add_op(reg, OP_BEGIN_LINE);     break;
1595   case ANCR_END_LINE:       r = add_op(reg, OP_END_LINE);       break;
1596   case ANCR_SEMI_END_BUF:   r = add_op(reg, OP_SEMI_END_BUF);   break;
1597   case ANCR_BEGIN_POSITION: r = add_op(reg, OP_BEGIN_POSITION); break;
1598 
1599   case ANCR_WORD_BOUNDARY:
1600     op = OP_WORD_BOUNDARY;
1601   word:
1602     r = add_op(reg, op);
1603     if (r != 0) return r;
1604     COP(reg)->word_boundary.mode = (ModeType )node->ascii_mode;
1605     break;
1606 
1607   case ANCR_NO_WORD_BOUNDARY:
1608     op = OP_NO_WORD_BOUNDARY; goto word;
1609     break;
1610 #ifdef USE_WORD_BEGIN_END
1611   case ANCR_WORD_BEGIN:
1612     op = OP_WORD_BEGIN; goto word;
1613     break;
1614   case ANCR_WORD_END:
1615     op = OP_WORD_END; goto word;
1616     break;
1617 #endif
1618 
1619   case ANCR_TEXT_SEGMENT_BOUNDARY:
1620   case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
1621     {
1622       enum TextSegmentBoundaryType type;
1623 
1624       r = add_op(reg, OP_TEXT_SEGMENT_BOUNDARY);
1625       if (r != 0) return r;
1626 
1627       type = EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
1628 #ifdef USE_UNICODE_WORD_BREAK
1629       if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_TEXT_SEGMENT_WORD))
1630         type = WORD_BOUNDARY;
1631 #endif
1632 
1633       COP(reg)->text_segment_boundary.type = type;
1634       COP(reg)->text_segment_boundary.not =
1635         (node->type == ANCR_NO_TEXT_SEGMENT_BOUNDARY ? 1 : 0);
1636     }
1637     break;
1638 
1639   case ANCR_PREC_READ:
1640     r = add_op(reg, OP_PREC_READ_START);
1641     if (r != 0) return r;
1642     r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1643     if (r != 0) return r;
1644     r = add_op(reg, OP_PREC_READ_END);
1645     break;
1646 
1647   case ANCR_PREC_READ_NOT:
1648     len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
1649     if (len < 0) return len;
1650 
1651     r = add_op(reg, OP_PREC_READ_NOT_START);
1652     if (r != 0) return r;
1653     COP(reg)->prec_read_not_start.addr = SIZE_INC_OP + len + SIZE_OP_PREC_READ_NOT_END;
1654     r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1655     if (r != 0) return r;
1656     r = add_op(reg, OP_PREC_READ_NOT_END);
1657     break;
1658 
1659   case ANCR_LOOK_BEHIND:
1660     {
1661       int n;
1662       r = add_op(reg, OP_LOOK_BEHIND);
1663       if (r != 0) return r;
1664       if (node->char_len < 0) {
1665         r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);
1666         if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1667       }
1668       else
1669         n = node->char_len;
1670 
1671       COP(reg)->look_behind.len = n;
1672       r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1673     }
1674     break;
1675 
1676   case ANCR_LOOK_BEHIND_NOT:
1677     {
1678       int n;
1679 
1680       len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
1681       r = add_op(reg, OP_LOOK_BEHIND_NOT_START);
1682       if (r != 0) return r;
1683       COP(reg)->look_behind_not_start.addr = SIZE_INC_OP + len + SIZE_OP_LOOK_BEHIND_NOT_END;
1684 
1685       if (node->char_len < 0) {
1686         r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);
1687         if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1688       }
1689       else
1690         n = node->char_len;
1691 
1692       COP(reg)->look_behind_not_start.len = n;
1693 
1694       r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1695       if (r != 0) return r;
1696       r = add_op(reg, OP_LOOK_BEHIND_NOT_END);
1697     }
1698     break;
1699 
1700   default:
1701     return ONIGERR_TYPE_BUG;
1702     break;
1703   }
1704 
1705   return r;
1706 }
1707 
1708 static int
compile_gimmick_node(GimmickNode * node,regex_t * reg)1709 compile_gimmick_node(GimmickNode* node, regex_t* reg)
1710 {
1711   int r;
1712 
1713   switch (node->type) {
1714   case GIMMICK_FAIL:
1715     r = add_op(reg, OP_FAIL);
1716     break;
1717 
1718   case GIMMICK_SAVE:
1719     r = add_op(reg, OP_PUSH_SAVE_VAL);
1720     if (r != 0) return r;
1721     COP(reg)->push_save_val.type = node->detail_type;
1722     COP(reg)->push_save_val.id   = node->id;
1723     break;
1724 
1725   case GIMMICK_UPDATE_VAR:
1726     r = add_op(reg, OP_UPDATE_VAR);
1727     if (r != 0) return r;
1728     COP(reg)->update_var.type = node->detail_type;
1729     COP(reg)->update_var.id   = node->id;
1730     break;
1731 
1732 #ifdef USE_CALLOUT
1733   case GIMMICK_CALLOUT:
1734     switch (node->detail_type) {
1735     case ONIG_CALLOUT_OF_CONTENTS:
1736     case ONIG_CALLOUT_OF_NAME:
1737       {
1738         if (node->detail_type == ONIG_CALLOUT_OF_NAME) {
1739           r = add_op(reg, OP_CALLOUT_NAME);
1740           if (r != 0) return r;
1741           COP(reg)->callout_name.id  = node->id;
1742           COP(reg)->callout_name.num = node->num;
1743         }
1744         else {
1745           r = add_op(reg, OP_CALLOUT_CONTENTS);
1746           if (r != 0) return r;
1747           COP(reg)->callout_contents.num = node->num;
1748         }
1749       }
1750       break;
1751 
1752     default:
1753       r = ONIGERR_TYPE_BUG;
1754       break;
1755     }
1756 #endif
1757   }
1758 
1759   return r;
1760 }
1761 
1762 static int
compile_length_gimmick_node(GimmickNode * node,regex_t * reg)1763 compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
1764 {
1765   int len;
1766 
1767   switch (node->type) {
1768   case GIMMICK_FAIL:
1769     len = SIZE_OP_FAIL;
1770     break;
1771 
1772   case GIMMICK_SAVE:
1773     len = SIZE_OP_PUSH_SAVE_VAL;
1774     break;
1775 
1776   case GIMMICK_UPDATE_VAR:
1777     len = SIZE_OP_UPDATE_VAR;
1778     break;
1779 
1780 #ifdef USE_CALLOUT
1781   case GIMMICK_CALLOUT:
1782     switch (node->detail_type) {
1783     case ONIG_CALLOUT_OF_CONTENTS:
1784       len = SIZE_OP_CALLOUT_CONTENTS;
1785       break;
1786     case ONIG_CALLOUT_OF_NAME:
1787       len = SIZE_OP_CALLOUT_NAME;
1788       break;
1789 
1790     default:
1791       len = ONIGERR_TYPE_BUG;
1792       break;
1793     }
1794     break;
1795 #endif
1796   }
1797 
1798   return len;
1799 }
1800 
1801 static int
compile_length_tree(Node * node,regex_t * reg)1802 compile_length_tree(Node* node, regex_t* reg)
1803 {
1804   int len, r;
1805 
1806   switch (NODE_TYPE(node)) {
1807   case NODE_LIST:
1808     len = 0;
1809     do {
1810       r = compile_length_tree(NODE_CAR(node), reg);
1811       if (r < 0) return r;
1812       len += r;
1813     } while (IS_NOT_NULL(node = NODE_CDR(node)));
1814     r = len;
1815     break;
1816 
1817   case NODE_ALT:
1818     {
1819       int n;
1820 
1821       n = r = 0;
1822       do {
1823         r += compile_length_tree(NODE_CAR(node), reg);
1824         n++;
1825       } while (IS_NOT_NULL(node = NODE_CDR(node)));
1826       r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1827     }
1828     break;
1829 
1830   case NODE_STRING:
1831     if (NODE_STRING_IS_RAW(node))
1832       r = compile_length_string_raw_node(STR_(node), reg);
1833     else
1834       r = compile_length_string_node(node, reg);
1835     break;
1836 
1837   case NODE_CCLASS:
1838     r = compile_length_cclass_node(CCLASS_(node), reg);
1839     break;
1840 
1841   case NODE_CTYPE:
1842     r = SIZE_OPCODE;
1843     break;
1844 
1845   case NODE_BACKREF:
1846     r = SIZE_OP_BACKREF;
1847     break;
1848 
1849 #ifdef USE_CALL
1850   case NODE_CALL:
1851     r = SIZE_OP_CALL;
1852     break;
1853 #endif
1854 
1855   case NODE_QUANT:
1856     r = compile_length_quantifier_node(QUANT_(node), reg);
1857     break;
1858 
1859   case NODE_BAG:
1860     r = compile_length_bag_node(BAG_(node), reg);
1861     break;
1862 
1863   case NODE_ANCHOR:
1864     r = compile_length_anchor_node(ANCHOR_(node), reg);
1865     break;
1866 
1867   case NODE_GIMMICK:
1868     r = compile_length_gimmick_node(GIMMICK_(node), reg);
1869     break;
1870 
1871   default:
1872     return ONIGERR_TYPE_BUG;
1873     break;
1874   }
1875 
1876   return r;
1877 }
1878 
1879 static int
compile_tree(Node * node,regex_t * reg,ScanEnv * env)1880 compile_tree(Node* node, regex_t* reg, ScanEnv* env)
1881 {
1882   int n, len, pos, r = 0;
1883 
1884   switch (NODE_TYPE(node)) {
1885   case NODE_LIST:
1886     do {
1887       r = compile_tree(NODE_CAR(node), reg, env);
1888     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
1889     break;
1890 
1891   case NODE_ALT:
1892     {
1893       Node* x = node;
1894       len = 0;
1895       do {
1896         len += compile_length_tree(NODE_CAR(x), reg);
1897         if (IS_NOT_NULL(NODE_CDR(x))) {
1898           len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1899         }
1900       } while (IS_NOT_NULL(x = NODE_CDR(x)));
1901       pos = COP_CURR_OFFSET(reg) + 1 + len;  /* goal position */
1902 
1903       do {
1904         len = compile_length_tree(NODE_CAR(node), reg);
1905         if (IS_NOT_NULL(NODE_CDR(node))) {
1906           enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;
1907           r = add_op(reg, push);
1908           if (r != 0) break;
1909           COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_JUMP;
1910         }
1911         r = compile_tree(NODE_CAR(node), reg, env);
1912         if (r != 0) break;
1913         if (IS_NOT_NULL(NODE_CDR(node))) {
1914           len = pos - (COP_CURR_OFFSET(reg) + 1);
1915           r = add_op(reg, OP_JUMP);
1916           if (r != 0) break;
1917           COP(reg)->jump.addr = len;
1918         }
1919       } while (IS_NOT_NULL(node = NODE_CDR(node)));
1920     }
1921     break;
1922 
1923   case NODE_STRING:
1924     if (NODE_STRING_IS_RAW(node))
1925       r = compile_string_raw_node(STR_(node), reg);
1926     else
1927       r = compile_string_node(node, reg);
1928     break;
1929 
1930   case NODE_CCLASS:
1931     r = compile_cclass_node(CCLASS_(node), reg);
1932     break;
1933 
1934   case NODE_CTYPE:
1935     {
1936       int op;
1937 
1938       switch (CTYPE_(node)->ctype) {
1939       case CTYPE_ANYCHAR:
1940         r = add_op(reg, IS_MULTILINE(CTYPE_OPTION(node, reg)) ?
1941                    OP_ANYCHAR_ML : OP_ANYCHAR);
1942         break;
1943 
1944       case ONIGENC_CTYPE_WORD:
1945         if (CTYPE_(node)->ascii_mode == 0) {
1946           op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;
1947         }
1948         else {
1949           op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;
1950         }
1951         r = add_op(reg, op);
1952         break;
1953 
1954       default:
1955         return ONIGERR_TYPE_BUG;
1956         break;
1957       }
1958     }
1959     break;
1960 
1961   case NODE_BACKREF:
1962     {
1963       BackRefNode* br = BACKREF_(node);
1964 
1965       if (NODE_IS_CHECKER(node)) {
1966 #ifdef USE_BACKREF_WITH_LEVEL
1967         if (NODE_IS_NEST_LEVEL(node)) {
1968           r = add_op(reg, OP_BACKREF_CHECK_WITH_LEVEL);
1969           if (r != 0) return r;
1970           COP(reg)->backref_general.nest_level = br->nest_level;
1971         }
1972         else
1973 #endif
1974           {
1975             r = add_op(reg, OP_BACKREF_CHECK);
1976             if (r != 0) return r;
1977           }
1978         goto add_bacref_mems;
1979       }
1980       else {
1981 #ifdef USE_BACKREF_WITH_LEVEL
1982         if (NODE_IS_NEST_LEVEL(node)) {
1983           if ((reg->options & ONIG_OPTION_IGNORECASE) != 0)
1984             r = add_op(reg, OP_BACKREF_WITH_LEVEL_IC);
1985           else
1986             r = add_op(reg, OP_BACKREF_WITH_LEVEL);
1987 
1988           if (r != 0) return r;
1989           COP(reg)->backref_general.nest_level = br->nest_level;
1990           goto add_bacref_mems;
1991         }
1992         else
1993 #endif
1994         if (br->back_num == 1) {
1995           n = br->back_static[0];
1996           if (IS_IGNORECASE(reg->options)) {
1997             r = add_op(reg, OP_BACKREF_N_IC);
1998             if (r != 0) return r;
1999             COP(reg)->backref_n.n1 = n;
2000           }
2001           else {
2002             switch (n) {
2003             case 1:  r = add_op(reg, OP_BACKREF1); break;
2004             case 2:  r = add_op(reg, OP_BACKREF2); break;
2005             default:
2006               r = add_op(reg, OP_BACKREF_N);
2007               if (r != 0) return r;
2008               COP(reg)->backref_n.n1 = n;
2009               break;
2010             }
2011           }
2012         }
2013         else {
2014           int num;
2015           int* p;
2016 
2017           r = add_op(reg, IS_IGNORECASE(reg->options) ?
2018                      OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI);
2019           if (r != 0) return r;
2020 
2021         add_bacref_mems:
2022           num = br->back_num;
2023           COP(reg)->backref_general.num = num;
2024           if (num == 1) {
2025             COP(reg)->backref_general.n1 = br->back_static[0];
2026           }
2027           else {
2028             int i, j;
2029             MemNumType* ns;
2030 
2031             ns = xmalloc(sizeof(MemNumType) * num);
2032             CHECK_NULL_RETURN_MEMERR(ns);
2033             COP(reg)->backref_general.ns = ns;
2034             p = BACKREFS_P(br);
2035             for (i = num - 1, j = 0; i >= 0; i--, j++) {
2036               ns[j] = p[i];
2037             }
2038           }
2039         }
2040       }
2041     }
2042     break;
2043 
2044 #ifdef USE_CALL
2045   case NODE_CALL:
2046     r = compile_call(CALL_(node), reg, env);
2047     break;
2048 #endif
2049 
2050   case NODE_QUANT:
2051     r = compile_quantifier_node(QUANT_(node), reg, env);
2052     break;
2053 
2054   case NODE_BAG:
2055     r = compile_bag_node(BAG_(node), reg, env);
2056     break;
2057 
2058   case NODE_ANCHOR:
2059     r = compile_anchor_node(ANCHOR_(node), reg, env);
2060     break;
2061 
2062   case NODE_GIMMICK:
2063     r = compile_gimmick_node(GIMMICK_(node), reg);
2064     break;
2065 
2066   default:
2067 #ifdef ONIG_DEBUG
2068     fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node));
2069 #endif
2070     break;
2071   }
2072 
2073   return r;
2074 }
2075 
2076 static int
noname_disable_map(Node ** plink,GroupNumRemap * map,int * counter)2077 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
2078 {
2079   int r = 0;
2080   Node* node = *plink;
2081 
2082   switch (NODE_TYPE(node)) {
2083   case NODE_LIST:
2084   case NODE_ALT:
2085     do {
2086       r = noname_disable_map(&(NODE_CAR(node)), map, counter);
2087     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2088     break;
2089 
2090   case NODE_QUANT:
2091     {
2092       Node** ptarget = &(NODE_BODY(node));
2093       Node*  old = *ptarget;
2094       r = noname_disable_map(ptarget, map, counter);
2095       if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) {
2096         onig_reduce_nested_quantifier(node, *ptarget);
2097       }
2098     }
2099     break;
2100 
2101   case NODE_BAG:
2102     {
2103       BagNode* en = BAG_(node);
2104       if (en->type == BAG_MEMORY) {
2105         if (NODE_IS_NAMED_GROUP(node)) {
2106           (*counter)++;
2107           map[en->m.regnum].new_val = *counter;
2108           en->m.regnum = *counter;
2109           r = noname_disable_map(&(NODE_BODY(node)), map, counter);
2110         }
2111         else {
2112           *plink = NODE_BODY(node);
2113           NODE_BODY(node) = NULL_NODE;
2114           onig_node_free(node);
2115           r = noname_disable_map(plink, map, counter);
2116         }
2117       }
2118       else if (en->type == BAG_IF_ELSE) {
2119         r = noname_disable_map(&(NODE_BAG_BODY(en)), map, counter);
2120         if (r != 0) return r;
2121         if (IS_NOT_NULL(en->te.Then)) {
2122           r = noname_disable_map(&(en->te.Then), map, counter);
2123           if (r != 0) return r;
2124         }
2125         if (IS_NOT_NULL(en->te.Else)) {
2126           r = noname_disable_map(&(en->te.Else), map, counter);
2127           if (r != 0) return r;
2128         }
2129       }
2130       else
2131         r = noname_disable_map(&(NODE_BODY(node)), map, counter);
2132     }
2133     break;
2134 
2135   case NODE_ANCHOR:
2136     if (IS_NOT_NULL(NODE_BODY(node)))
2137       r = noname_disable_map(&(NODE_BODY(node)), map, counter);
2138     break;
2139 
2140   default:
2141     break;
2142   }
2143 
2144   return r;
2145 }
2146 
2147 static int
renumber_node_backref(Node * node,GroupNumRemap * map)2148 renumber_node_backref(Node* node, GroupNumRemap* map)
2149 {
2150   int i, pos, n, old_num;
2151   int *backs;
2152   BackRefNode* bn = BACKREF_(node);
2153 
2154   if (! NODE_IS_BY_NAME(node))
2155     return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2156 
2157   old_num = bn->back_num;
2158   if (IS_NULL(bn->back_dynamic))
2159     backs = bn->back_static;
2160   else
2161     backs = bn->back_dynamic;
2162 
2163   for (i = 0, pos = 0; i < old_num; i++) {
2164     n = map[backs[i]].new_val;
2165     if (n > 0) {
2166       backs[pos] = n;
2167       pos++;
2168     }
2169   }
2170 
2171   bn->back_num = pos;
2172   return 0;
2173 }
2174 
2175 static int
renumber_by_map(Node * node,GroupNumRemap * map)2176 renumber_by_map(Node* node, GroupNumRemap* map)
2177 {
2178   int r = 0;
2179 
2180   switch (NODE_TYPE(node)) {
2181   case NODE_LIST:
2182   case NODE_ALT:
2183     do {
2184       r = renumber_by_map(NODE_CAR(node), map);
2185     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2186     break;
2187 
2188   case NODE_QUANT:
2189     r = renumber_by_map(NODE_BODY(node), map);
2190     break;
2191 
2192   case NODE_BAG:
2193     {
2194       BagNode* en = BAG_(node);
2195 
2196       r = renumber_by_map(NODE_BODY(node), map);
2197       if (r != 0) return r;
2198 
2199       if (en->type == BAG_IF_ELSE) {
2200         if (IS_NOT_NULL(en->te.Then)) {
2201           r = renumber_by_map(en->te.Then, map);
2202           if (r != 0) return r;
2203         }
2204         if (IS_NOT_NULL(en->te.Else)) {
2205           r = renumber_by_map(en->te.Else, map);
2206           if (r != 0) return r;
2207         }
2208       }
2209     }
2210     break;
2211 
2212   case NODE_BACKREF:
2213     r = renumber_node_backref(node, map);
2214     break;
2215 
2216   case NODE_ANCHOR:
2217     if (IS_NOT_NULL(NODE_BODY(node)))
2218       r = renumber_by_map(NODE_BODY(node), map);
2219     break;
2220 
2221   default:
2222     break;
2223   }
2224 
2225   return r;
2226 }
2227 
2228 static int
numbered_ref_check(Node * node)2229 numbered_ref_check(Node* node)
2230 {
2231   int r = 0;
2232 
2233   switch (NODE_TYPE(node)) {
2234   case NODE_LIST:
2235   case NODE_ALT:
2236     do {
2237       r = numbered_ref_check(NODE_CAR(node));
2238     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2239     break;
2240 
2241   case NODE_ANCHOR:
2242     if (IS_NULL(NODE_BODY(node)))
2243       break;
2244     /* fall */
2245   case NODE_QUANT:
2246     r = numbered_ref_check(NODE_BODY(node));
2247     break;
2248 
2249   case NODE_BAG:
2250     {
2251       BagNode* en = BAG_(node);
2252 
2253       r = numbered_ref_check(NODE_BODY(node));
2254       if (r != 0) return r;
2255 
2256       if (en->type == BAG_IF_ELSE) {
2257         if (IS_NOT_NULL(en->te.Then)) {
2258           r = numbered_ref_check(en->te.Then);
2259           if (r != 0) return r;
2260         }
2261         if (IS_NOT_NULL(en->te.Else)) {
2262           r = numbered_ref_check(en->te.Else);
2263           if (r != 0) return r;
2264         }
2265       }
2266     }
2267 
2268     break;
2269 
2270   case NODE_BACKREF:
2271     if (! NODE_IS_BY_NAME(node))
2272       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2273     break;
2274 
2275   default:
2276     break;
2277   }
2278 
2279   return r;
2280 }
2281 
2282 static int
disable_noname_group_capture(Node ** root,regex_t * reg,ScanEnv * env)2283 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2284 {
2285   int r, i, pos, counter;
2286   int result;
2287   MemStatusType loc;
2288   GroupNumRemap* map;
2289 
2290   map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));
2291   CHECK_NULL_RETURN_MEMERR(map);
2292   for (i = 1; i <= env->num_mem; i++) {
2293     map[i].new_val = 0;
2294   }
2295   counter = 0;
2296   r = noname_disable_map(root, map, &counter);
2297   if (r != 0) return r;
2298 
2299   r = renumber_by_map(*root, map);
2300   if (r != 0) return r;
2301 
2302   for (i = 1, pos = 1; i <= env->num_mem; i++) {
2303     if (map[i].new_val > 0) {
2304       SCANENV_MEMENV(env)[pos] = SCANENV_MEMENV(env)[i];
2305       pos++;
2306     }
2307   }
2308 
2309   loc = env->capture_history;
2310   MEM_STATUS_CLEAR(env->capture_history);
2311   for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2312     if (MEM_STATUS_AT(loc, i)) {
2313       MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val);
2314     }
2315   }
2316 
2317   env->num_mem = env->num_named;
2318   reg->num_mem = env->num_named;
2319   result = onig_renumber_name_table(reg, map);
2320   xfree(map);
2321   return result;
2322 }
2323 
2324 #ifdef USE_CALL
2325 static int
fix_unset_addr_list(UnsetAddrList * uslist,regex_t * reg)2326 fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
2327 {
2328   int i, offset;
2329   BagNode* en;
2330   AbsAddrType addr;
2331   AbsAddrType* paddr;
2332 
2333   for (i = 0; i < uslist->num; i++) {
2334     if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))
2335       return ONIGERR_PARSER_BUG;
2336 
2337     en = BAG_(uslist->us[i].target);
2338     addr   = en->m.called_addr;
2339     offset = uslist->us[i].offset;
2340 
2341     paddr = (AbsAddrType* )((char* )reg->ops + offset);
2342     *paddr = addr;
2343   }
2344   return 0;
2345 }
2346 #endif
2347 
2348 
2349 #define GET_CHAR_LEN_VARLEN           -1
2350 #define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
2351 
2352 /* fixed size pattern node only */
2353 static int
get_char_len_node1(Node * node,regex_t * reg,int * len,int level)2354 get_char_len_node1(Node* node, regex_t* reg, int* len, int level)
2355 {
2356   int tlen;
2357   int r = 0;
2358 
2359   level++;
2360   *len = 0;
2361   switch (NODE_TYPE(node)) {
2362   case NODE_LIST:
2363     do {
2364       r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level);
2365       if (r == 0)
2366         *len = distance_add(*len, tlen);
2367     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2368     break;
2369 
2370   case NODE_ALT:
2371     {
2372       int tlen2;
2373       int varlen = 0;
2374 
2375       r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level);
2376       while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) {
2377         r = get_char_len_node1(NODE_CAR(node), reg, &tlen2, level);
2378         if (r == 0) {
2379           if (tlen != tlen2)
2380             varlen = 1;
2381         }
2382       }
2383       if (r == 0) {
2384         if (varlen != 0) {
2385           if (level == 1)
2386             r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2387           else
2388             r = GET_CHAR_LEN_VARLEN;
2389         }
2390         else
2391           *len = tlen;
2392       }
2393     }
2394     break;
2395 
2396   case NODE_STRING:
2397     {
2398       StrNode* sn = STR_(node);
2399       UChar *s = sn->s;
2400 
2401       while (s < sn->end) {
2402         s += enclen(reg->enc, s);
2403         (*len)++;
2404       }
2405     }
2406     break;
2407 
2408   case NODE_QUANT:
2409     {
2410       QuantNode* qn = QUANT_(node);
2411 
2412       if (qn->lower == qn->upper) {
2413         if (qn->upper == 0) {
2414           *len = 0;
2415         }
2416         else {
2417           r = get_char_len_node1(NODE_BODY(node), reg, &tlen, level);
2418           if (r == 0)
2419             *len = distance_multiply(tlen, qn->lower);
2420         }
2421       }
2422       else
2423         r = GET_CHAR_LEN_VARLEN;
2424     }
2425     break;
2426 
2427 #ifdef USE_CALL
2428   case NODE_CALL:
2429     if (! NODE_IS_RECURSION(node))
2430       r = get_char_len_node1(NODE_BODY(node), reg, len, level);
2431     else
2432       r = GET_CHAR_LEN_VARLEN;
2433     break;
2434 #endif
2435 
2436   case NODE_CTYPE:
2437   case NODE_CCLASS:
2438     *len = 1;
2439     break;
2440 
2441   case NODE_BAG:
2442     {
2443       BagNode* en = BAG_(node);
2444 
2445       switch (en->type) {
2446       case BAG_MEMORY:
2447 #ifdef USE_CALL
2448         if (NODE_IS_CLEN_FIXED(node))
2449           *len = en->char_len;
2450         else {
2451           r = get_char_len_node1(NODE_BODY(node), reg, len, level);
2452           if (r == 0) {
2453             en->char_len = *len;
2454             NODE_STATUS_ADD(node, CLEN_FIXED);
2455           }
2456         }
2457         break;
2458 #endif
2459       case BAG_OPTION:
2460       case BAG_STOP_BACKTRACK:
2461         r = get_char_len_node1(NODE_BODY(node), reg, len, level);
2462         break;
2463       case BAG_IF_ELSE:
2464         {
2465           int clen, elen;
2466 
2467           r = get_char_len_node1(NODE_BODY(node), reg, &clen, level);
2468           if (r == 0) {
2469             if (IS_NOT_NULL(en->te.Then)) {
2470               r = get_char_len_node1(en->te.Then, reg, &tlen, level);
2471               if (r != 0) break;
2472             }
2473             else tlen = 0;
2474             if (IS_NOT_NULL(en->te.Else)) {
2475               r = get_char_len_node1(en->te.Else, reg, &elen, level);
2476               if (r != 0) break;
2477             }
2478             else elen = 0;
2479 
2480             if (clen + tlen != elen) {
2481               r = GET_CHAR_LEN_VARLEN;
2482             }
2483             else {
2484               *len = elen;
2485             }
2486           }
2487         }
2488         break;
2489       }
2490     }
2491     break;
2492 
2493   case NODE_ANCHOR:
2494   case NODE_GIMMICK:
2495     break;
2496 
2497   case NODE_BACKREF:
2498     if (NODE_IS_CHECKER(node))
2499       break;
2500     /* fall */
2501   default:
2502     r = GET_CHAR_LEN_VARLEN;
2503     break;
2504   }
2505 
2506   return r;
2507 }
2508 
2509 static int
get_char_len_node(Node * node,regex_t * reg,int * len)2510 get_char_len_node(Node* node, regex_t* reg, int* len)
2511 {
2512   return get_char_len_node1(node, reg, len, 0);
2513 }
2514 
2515 /* x is not included y ==>  1 : 0 */
2516 static int
is_exclusive(Node * x,Node * y,regex_t * reg)2517 is_exclusive(Node* x, Node* y, regex_t* reg)
2518 {
2519   int i, len;
2520   OnigCodePoint code;
2521   UChar *p;
2522   NodeType ytype;
2523 
2524  retry:
2525   ytype = NODE_TYPE(y);
2526   switch (NODE_TYPE(x)) {
2527   case NODE_CTYPE:
2528     {
2529       if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||
2530           CTYPE_(y)->ctype == CTYPE_ANYCHAR)
2531         break;
2532 
2533       switch (ytype) {
2534       case NODE_CTYPE:
2535         if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&
2536             CTYPE_(y)->not   != CTYPE_(x)->not &&
2537             CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)
2538           return 1;
2539         else
2540           return 0;
2541         break;
2542 
2543       case NODE_CCLASS:
2544       swap:
2545         {
2546           Node* tmp;
2547           tmp = x; x = y; y = tmp;
2548           goto retry;
2549         }
2550         break;
2551 
2552       case NODE_STRING:
2553         goto swap;
2554         break;
2555 
2556       default:
2557         break;
2558       }
2559     }
2560     break;
2561 
2562   case NODE_CCLASS:
2563     {
2564       int range;
2565       CClassNode* xc = CCLASS_(x);
2566 
2567       switch (ytype) {
2568       case NODE_CTYPE:
2569         switch (CTYPE_(y)->ctype) {
2570         case CTYPE_ANYCHAR:
2571           return 0;
2572           break;
2573 
2574         case ONIGENC_CTYPE_WORD:
2575           if (CTYPE_(y)->not == 0) {
2576             if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2577               range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
2578               for (i = 0; i < range; i++) {
2579                 if (BITSET_AT(xc->bs, i)) {
2580                   if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2581                 }
2582               }
2583               return 1;
2584             }
2585             return 0;
2586           }
2587           else {
2588             if (IS_NOT_NULL(xc->mbuf)) return 0;
2589             if (IS_NCCLASS_NOT(xc)) return 0;
2590 
2591             range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
2592             for (i = 0; i < range; i++) {
2593               if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {
2594                 if (BITSET_AT(xc->bs, i))
2595                   return 0;
2596               }
2597             }
2598             for (i = range; i < SINGLE_BYTE_SIZE; i++) {
2599               if (BITSET_AT(xc->bs, i)) return 0;
2600             }
2601             return 1;
2602           }
2603           break;
2604 
2605         default:
2606           break;
2607         }
2608         break;
2609 
2610       case NODE_CCLASS:
2611         {
2612           int v;
2613           CClassNode* yc = CCLASS_(y);
2614 
2615           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2616             v = BITSET_AT(xc->bs, i);
2617             if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) {
2618               v = BITSET_AT(yc->bs, i);
2619               if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2620                   (v == 0 && IS_NCCLASS_NOT(yc)))
2621                 return 0;
2622             }
2623           }
2624           if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2625               (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2626             return 1;
2627           return 0;
2628         }
2629         break;
2630 
2631       case NODE_STRING:
2632         goto swap;
2633         break;
2634 
2635       default:
2636         break;
2637       }
2638     }
2639     break;
2640 
2641   case NODE_STRING:
2642     {
2643       StrNode* xs = STR_(x);
2644 
2645       if (NODE_STRING_LEN(x) == 0)
2646         break;
2647 
2648       switch (ytype) {
2649       case NODE_CTYPE:
2650         switch (CTYPE_(y)->ctype) {
2651         case CTYPE_ANYCHAR:
2652           break;
2653 
2654         case ONIGENC_CTYPE_WORD:
2655           if (CTYPE_(y)->ascii_mode == 0) {
2656             if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2657               return CTYPE_(y)->not;
2658             else
2659               return !(CTYPE_(y)->not);
2660           }
2661           else {
2662             if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))
2663               return CTYPE_(y)->not;
2664             else
2665               return !(CTYPE_(y)->not);
2666           }
2667           break;
2668         default:
2669           break;
2670         }
2671         break;
2672 
2673       case NODE_CCLASS:
2674         {
2675           CClassNode* cc = CCLASS_(y);
2676 
2677           code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2678                                      xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2679           return onig_is_code_in_cc(reg->enc, code, cc) == 0;
2680         }
2681         break;
2682 
2683       case NODE_STRING:
2684         {
2685           UChar *q;
2686           StrNode* ys = STR_(y);
2687 
2688           len = NODE_STRING_LEN(x);
2689           if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y);
2690           if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) {
2691             /* tiny version */
2692             return 0;
2693           }
2694           else {
2695             for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
2696               if (*p != *q) return 1;
2697             }
2698           }
2699         }
2700         break;
2701 
2702       default:
2703         break;
2704       }
2705     }
2706     break;
2707 
2708   default:
2709     break;
2710   }
2711 
2712   return 0;
2713 }
2714 
2715 static Node*
get_head_value_node(Node * node,int exact,regex_t * reg)2716 get_head_value_node(Node* node, int exact, regex_t* reg)
2717 {
2718   Node* n = NULL_NODE;
2719 
2720   switch (NODE_TYPE(node)) {
2721   case NODE_BACKREF:
2722   case NODE_ALT:
2723 #ifdef USE_CALL
2724   case NODE_CALL:
2725 #endif
2726     break;
2727 
2728   case NODE_CTYPE:
2729     if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
2730       break;
2731     /* fall */
2732   case NODE_CCLASS:
2733     if (exact == 0) {
2734       n = node;
2735     }
2736     break;
2737 
2738   case NODE_LIST:
2739     n = get_head_value_node(NODE_CAR(node), exact, reg);
2740     break;
2741 
2742   case NODE_STRING:
2743     {
2744       StrNode* sn = STR_(node);
2745 
2746       if (sn->end <= sn->s)
2747         break;
2748 
2749       if (exact == 0 ||
2750           ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_RAW(node)) {
2751         n = node;
2752       }
2753     }
2754     break;
2755 
2756   case NODE_QUANT:
2757     {
2758       QuantNode* qn = QUANT_(node);
2759       if (qn->lower > 0) {
2760         if (IS_NOT_NULL(qn->head_exact))
2761           n = qn->head_exact;
2762         else
2763           n = get_head_value_node(NODE_BODY(node), exact, reg);
2764       }
2765     }
2766     break;
2767 
2768   case NODE_BAG:
2769     {
2770       BagNode* en = BAG_(node);
2771       switch (en->type) {
2772       case BAG_OPTION:
2773         {
2774           OnigOptionType options = reg->options;
2775 
2776           reg->options = BAG_(node)->o.options;
2777           n = get_head_value_node(NODE_BODY(node), exact, reg);
2778           reg->options = options;
2779         }
2780         break;
2781 
2782       case BAG_MEMORY:
2783       case BAG_STOP_BACKTRACK:
2784       case BAG_IF_ELSE:
2785         n = get_head_value_node(NODE_BODY(node), exact, reg);
2786         break;
2787       }
2788     }
2789     break;
2790 
2791   case NODE_ANCHOR:
2792     if (ANCHOR_(node)->type == ANCR_PREC_READ)
2793       n = get_head_value_node(NODE_BODY(node), exact, reg);
2794     break;
2795 
2796   case NODE_GIMMICK:
2797   default:
2798     break;
2799   }
2800 
2801   return n;
2802 }
2803 
2804 static int
check_type_tree(Node * node,int type_mask,int bag_mask,int anchor_mask)2805 check_type_tree(Node* node, int type_mask, int bag_mask, int anchor_mask)
2806 {
2807   NodeType type;
2808   int r = 0;
2809 
2810   type = NODE_TYPE(node);
2811   if ((NODE_TYPE2BIT(type) & type_mask) == 0)
2812     return 1;
2813 
2814   switch (type) {
2815   case NODE_LIST:
2816   case NODE_ALT:
2817     do {
2818       r = check_type_tree(NODE_CAR(node), type_mask, bag_mask, anchor_mask);
2819     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2820     break;
2821 
2822   case NODE_QUANT:
2823     r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);
2824     break;
2825 
2826   case NODE_BAG:
2827     {
2828       BagNode* en = BAG_(node);
2829       if (((1<<en->type) & bag_mask) == 0)
2830         return 1;
2831 
2832       r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);
2833       if (r == 0 && en->type == BAG_IF_ELSE) {
2834         if (IS_NOT_NULL(en->te.Then)) {
2835           r = check_type_tree(en->te.Then, type_mask, bag_mask, anchor_mask);
2836           if (r != 0) break;
2837         }
2838         if (IS_NOT_NULL(en->te.Else)) {
2839           r = check_type_tree(en->te.Else, type_mask, bag_mask, anchor_mask);
2840         }
2841       }
2842     }
2843     break;
2844 
2845   case NODE_ANCHOR:
2846     type = ANCHOR_(node)->type;
2847     if ((type & anchor_mask) == 0)
2848       return 1;
2849 
2850     if (IS_NOT_NULL(NODE_BODY(node)))
2851       r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);
2852     break;
2853 
2854   case NODE_GIMMICK:
2855   default:
2856     break;
2857   }
2858   return r;
2859 }
2860 
2861 static OnigLen
tree_min_len(Node * node,ScanEnv * env)2862 tree_min_len(Node* node, ScanEnv* env)
2863 {
2864   OnigLen len;
2865   OnigLen tmin;
2866 
2867   len = 0;
2868   switch (NODE_TYPE(node)) {
2869   case NODE_BACKREF:
2870     if (! NODE_IS_CHECKER(node)) {
2871       int i;
2872       int* backs;
2873       MemEnv* mem_env = SCANENV_MEMENV(env);
2874       BackRefNode* br = BACKREF_(node);
2875       if (NODE_IS_RECURSION(node)) break;
2876 
2877       backs = BACKREFS_P(br);
2878       len = tree_min_len(mem_env[backs[0]].node, env);
2879       for (i = 1; i < br->back_num; i++) {
2880         tmin = tree_min_len(mem_env[backs[i]].node, env);
2881         if (len > tmin) len = tmin;
2882       }
2883     }
2884     break;
2885 
2886 #ifdef USE_CALL
2887   case NODE_CALL:
2888     {
2889       Node* t = NODE_BODY(node);
2890       if (NODE_IS_RECURSION(node)) {
2891         if (NODE_IS_MIN_FIXED(t))
2892           len = BAG_(t)->min_len;
2893       }
2894       else
2895         len = tree_min_len(t, env);
2896     }
2897     break;
2898 #endif
2899 
2900   case NODE_LIST:
2901     do {
2902       tmin = tree_min_len(NODE_CAR(node), env);
2903       len = distance_add(len, tmin);
2904     } while (IS_NOT_NULL(node = NODE_CDR(node)));
2905     break;
2906 
2907   case NODE_ALT:
2908     {
2909       Node *x, *y;
2910       y = node;
2911       do {
2912         x = NODE_CAR(y);
2913         tmin = tree_min_len(x, env);
2914         if (y == node) len = tmin;
2915         else if (len > tmin) len = tmin;
2916       } while (IS_NOT_NULL(y = NODE_CDR(y)));
2917     }
2918     break;
2919 
2920   case NODE_STRING:
2921     {
2922       StrNode* sn = STR_(node);
2923       len = (int )(sn->end - sn->s);
2924     }
2925     break;
2926 
2927   case NODE_CTYPE:
2928   case NODE_CCLASS:
2929     len = ONIGENC_MBC_MINLEN(env->enc);
2930     break;
2931 
2932   case NODE_QUANT:
2933     {
2934       QuantNode* qn = QUANT_(node);
2935 
2936       if (qn->lower > 0) {
2937         len = tree_min_len(NODE_BODY(node), env);
2938         len = distance_multiply(len, qn->lower);
2939       }
2940     }
2941     break;
2942 
2943   case NODE_BAG:
2944     {
2945       BagNode* en = BAG_(node);
2946       switch (en->type) {
2947       case BAG_MEMORY:
2948         if (NODE_IS_MIN_FIXED(node))
2949           len = en->min_len;
2950         else {
2951           if (NODE_IS_MARK1(node))
2952             len = 0;  /* recursive */
2953           else {
2954             NODE_STATUS_ADD(node, MARK1);
2955             len = tree_min_len(NODE_BODY(node), env);
2956             NODE_STATUS_REMOVE(node, MARK1);
2957 
2958             en->min_len = len;
2959             NODE_STATUS_ADD(node, MIN_FIXED);
2960           }
2961         }
2962         break;
2963 
2964       case BAG_OPTION:
2965       case BAG_STOP_BACKTRACK:
2966         len = tree_min_len(NODE_BODY(node), env);
2967         break;
2968       case BAG_IF_ELSE:
2969         {
2970           OnigLen elen;
2971 
2972           len = tree_min_len(NODE_BODY(node), env);
2973           if (IS_NOT_NULL(en->te.Then))
2974             len += tree_min_len(en->te.Then, env);
2975           if (IS_NOT_NULL(en->te.Else))
2976             elen = tree_min_len(en->te.Else, env);
2977           else elen = 0;
2978 
2979           if (elen < len) len = elen;
2980         }
2981         break;
2982       }
2983     }
2984     break;
2985 
2986   case NODE_GIMMICK:
2987     {
2988       GimmickNode* g = GIMMICK_(node);
2989       if (g->type == GIMMICK_FAIL) {
2990         len = INFINITE_LEN;
2991         break;
2992       }
2993     }
2994     /* fall */
2995   case NODE_ANCHOR:
2996   default:
2997     break;
2998   }
2999 
3000   return len;
3001 }
3002 
3003 static OnigLen
tree_max_len(Node * node,ScanEnv * env)3004 tree_max_len(Node* node, ScanEnv* env)
3005 {
3006   OnigLen len;
3007   OnigLen tmax;
3008 
3009   len = 0;
3010   switch (NODE_TYPE(node)) {
3011   case NODE_LIST:
3012     do {
3013       tmax = tree_max_len(NODE_CAR(node), env);
3014       len = distance_add(len, tmax);
3015     } while (IS_NOT_NULL(node = NODE_CDR(node)));
3016     break;
3017 
3018   case NODE_ALT:
3019     do {
3020       tmax = tree_max_len(NODE_CAR(node), env);
3021       if (len < tmax) len = tmax;
3022     } while (IS_NOT_NULL(node = NODE_CDR(node)));
3023     break;
3024 
3025   case NODE_STRING:
3026     {
3027       StrNode* sn = STR_(node);
3028       len = (OnigLen )(sn->end - sn->s);
3029     }
3030     break;
3031 
3032   case NODE_CTYPE:
3033   case NODE_CCLASS:
3034     len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
3035     break;
3036 
3037   case NODE_BACKREF:
3038     if (! NODE_IS_CHECKER(node)) {
3039       int i;
3040       int* backs;
3041       MemEnv* mem_env = SCANENV_MEMENV(env);
3042       BackRefNode* br = BACKREF_(node);
3043       if (NODE_IS_RECURSION(node)) {
3044         len = INFINITE_LEN;
3045         break;
3046       }
3047       backs = BACKREFS_P(br);
3048       for (i = 0; i < br->back_num; i++) {
3049         tmax = tree_max_len(mem_env[backs[i]].node, env);
3050         if (len < tmax) len = tmax;
3051       }
3052     }
3053     break;
3054 
3055 #ifdef USE_CALL
3056   case NODE_CALL:
3057     if (! NODE_IS_RECURSION(node))
3058       len = tree_max_len(NODE_BODY(node), env);
3059     else
3060       len = INFINITE_LEN;
3061     break;
3062 #endif
3063 
3064   case NODE_QUANT:
3065     {
3066       QuantNode* qn = QUANT_(node);
3067 
3068       if (qn->upper != 0) {
3069         len = tree_max_len(NODE_BODY(node), env);
3070         if (len != 0) {
3071           if (! IS_INFINITE_REPEAT(qn->upper))
3072             len = distance_multiply(len, qn->upper);
3073           else
3074             len = INFINITE_LEN;
3075         }
3076       }
3077     }
3078     break;
3079 
3080   case NODE_BAG:
3081     {
3082       BagNode* en = BAG_(node);
3083       switch (en->type) {
3084       case BAG_MEMORY:
3085         if (NODE_IS_MAX_FIXED(node))
3086           len = en->max_len;
3087         else {
3088           if (NODE_IS_MARK1(node))
3089             len = INFINITE_LEN;
3090           else {
3091             NODE_STATUS_ADD(node, MARK1);
3092             len = tree_max_len(NODE_BODY(node), env);
3093             NODE_STATUS_REMOVE(node, MARK1);
3094 
3095             en->max_len = len;
3096             NODE_STATUS_ADD(node, MAX_FIXED);
3097           }
3098         }
3099         break;
3100 
3101       case BAG_OPTION:
3102       case BAG_STOP_BACKTRACK:
3103         len = tree_max_len(NODE_BODY(node), env);
3104         break;
3105       case BAG_IF_ELSE:
3106         {
3107           OnigLen tlen, elen;
3108 
3109           len = tree_max_len(NODE_BODY(node), env);
3110           if (IS_NOT_NULL(en->te.Then)) {
3111             tlen = tree_max_len(en->te.Then, env);
3112             len = distance_add(len, tlen);
3113           }
3114           if (IS_NOT_NULL(en->te.Else))
3115             elen = tree_max_len(en->te.Else, env);
3116           else elen = 0;
3117 
3118           if (elen > len) len = elen;
3119         }
3120         break;
3121       }
3122     }
3123     break;
3124 
3125   case NODE_ANCHOR:
3126   case NODE_GIMMICK:
3127   default:
3128     break;
3129   }
3130 
3131   return len;
3132 }
3133 
3134 static int
check_backrefs(Node * node,ScanEnv * env)3135 check_backrefs(Node* node, ScanEnv* env)
3136 {
3137   int r;
3138 
3139   switch (NODE_TYPE(node)) {
3140   case NODE_LIST:
3141   case NODE_ALT:
3142     do {
3143       r = check_backrefs(NODE_CAR(node), env);
3144     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
3145     break;
3146 
3147   case NODE_ANCHOR:
3148     if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
3149       r = 0;
3150       break;
3151     }
3152     /* fall */
3153   case NODE_QUANT:
3154     r = check_backrefs(NODE_BODY(node), env);
3155     break;
3156 
3157   case NODE_BAG:
3158     r = check_backrefs(NODE_BODY(node), env);
3159     {
3160       BagNode* en = BAG_(node);
3161 
3162       if (en->type == BAG_IF_ELSE) {
3163         if (r != 0) return r;
3164         if (IS_NOT_NULL(en->te.Then)) {
3165           r = check_backrefs(en->te.Then, env);
3166           if (r != 0) return r;
3167         }
3168         if (IS_NOT_NULL(en->te.Else)) {
3169           r = check_backrefs(en->te.Else, env);
3170         }
3171       }
3172     }
3173     break;
3174 
3175   case NODE_BACKREF:
3176     {
3177       int i;
3178       BackRefNode* br = BACKREF_(node);
3179       int* backs = BACKREFS_P(br);
3180       MemEnv* mem_env = SCANENV_MEMENV(env);
3181 
3182       for (i = 0; i < br->back_num; i++) {
3183         if (backs[i] > env->num_mem)
3184           return ONIGERR_INVALID_BACKREF;
3185 
3186         NODE_STATUS_ADD(mem_env[backs[i]].node, BACKREF);
3187       }
3188       r = 0;
3189     }
3190     break;
3191 
3192   default:
3193     r = 0;
3194     break;
3195   }
3196 
3197   return r;
3198 }
3199 
3200 
3201 #ifdef USE_CALL
3202 
3203 #define RECURSION_EXIST        (1<<0)
3204 #define RECURSION_MUST         (1<<1)
3205 #define RECURSION_INFINITE     (1<<2)
3206 
3207 static int
infinite_recursive_call_check(Node * node,ScanEnv * env,int head)3208 infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
3209 {
3210   int ret;
3211   int r = 0;
3212 
3213   switch (NODE_TYPE(node)) {
3214   case NODE_LIST:
3215     {
3216       Node *x;
3217       OnigLen min;
3218 
3219       x = node;
3220       do {
3221         ret = infinite_recursive_call_check(NODE_CAR(x), env, head);
3222         if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3223         r |= ret;
3224         if (head != 0) {
3225           min = tree_min_len(NODE_CAR(x), env);
3226           if (min != 0) head = 0;
3227         }
3228       } while (IS_NOT_NULL(x = NODE_CDR(x)));
3229     }
3230     break;
3231 
3232   case NODE_ALT:
3233     {
3234       int must;
3235 
3236       must = RECURSION_MUST;
3237       do {
3238         ret = infinite_recursive_call_check(NODE_CAR(node), env, head);
3239         if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3240 
3241         r    |= (ret & RECURSION_EXIST);
3242         must &= ret;
3243       } while (IS_NOT_NULL(node = NODE_CDR(node)));
3244       r |= must;
3245     }
3246     break;
3247 
3248   case NODE_QUANT:
3249     r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3250     if (r < 0) return r;
3251     if ((r & RECURSION_MUST) != 0) {
3252       if (QUANT_(node)->lower == 0)
3253         r &= ~RECURSION_MUST;
3254     }
3255     break;
3256 
3257   case NODE_ANCHOR:
3258     if (! ANCHOR_HAS_BODY(ANCHOR_(node)))
3259       break;
3260     /* fall */
3261   case NODE_CALL:
3262     r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3263     break;
3264 
3265   case NODE_BAG:
3266     {
3267       BagNode* en = BAG_(node);
3268 
3269       if (en->type == BAG_MEMORY) {
3270         if (NODE_IS_MARK2(node))
3271           return 0;
3272         else if (NODE_IS_MARK1(node))
3273           return (head == 0 ? RECURSION_EXIST | RECURSION_MUST
3274                   : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);
3275         else {
3276           NODE_STATUS_ADD(node, MARK2);
3277           r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3278           NODE_STATUS_REMOVE(node, MARK2);
3279         }
3280       }
3281       else if (en->type == BAG_IF_ELSE) {
3282         int eret;
3283 
3284         ret = infinite_recursive_call_check(NODE_BODY(node), env, head);
3285         if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3286         r |= ret;
3287         if (IS_NOT_NULL(en->te.Then)) {
3288           OnigLen min;
3289           if (head != 0) {
3290             min = tree_min_len(NODE_BODY(node), env);
3291           }
3292           else min = 0;
3293 
3294           ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);
3295           if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3296           r |= ret;
3297         }
3298         if (IS_NOT_NULL(en->te.Else)) {
3299           eret = infinite_recursive_call_check(en->te.Else, env, head);
3300           if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;
3301           r |= (eret & RECURSION_EXIST);
3302           if ((eret & RECURSION_MUST) == 0)
3303             r &= ~RECURSION_MUST;
3304         }
3305       }
3306       else {
3307         r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3308       }
3309     }
3310     break;
3311 
3312   default:
3313     break;
3314   }
3315 
3316   return r;
3317 }
3318 
3319 static int
infinite_recursive_call_check_trav(Node * node,ScanEnv * env)3320 infinite_recursive_call_check_trav(Node* node, ScanEnv* env)
3321 {
3322   int r;
3323 
3324   switch (NODE_TYPE(node)) {
3325   case NODE_LIST:
3326   case NODE_ALT:
3327     do {
3328       r = infinite_recursive_call_check_trav(NODE_CAR(node), env);
3329     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
3330     break;
3331 
3332   case NODE_ANCHOR:
3333     if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
3334       r = 0;
3335       break;
3336     }
3337     /* fall */
3338   case NODE_QUANT:
3339     r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
3340     break;
3341 
3342   case NODE_BAG:
3343     {
3344       BagNode* en = BAG_(node);
3345 
3346       if (en->type == BAG_MEMORY) {
3347         if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) {
3348           int ret;
3349 
3350           NODE_STATUS_ADD(node, MARK1);
3351 
3352           ret = infinite_recursive_call_check(NODE_BODY(node), env, 1);
3353           if (ret < 0) return ret;
3354           else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)
3355             return ONIGERR_NEVER_ENDING_RECURSION;
3356 
3357           NODE_STATUS_REMOVE(node, MARK1);
3358         }
3359       }
3360       else if (en->type == BAG_IF_ELSE) {
3361         if (IS_NOT_NULL(en->te.Then)) {
3362           r = infinite_recursive_call_check_trav(en->te.Then, env);
3363           if (r != 0) return r;
3364         }
3365         if (IS_NOT_NULL(en->te.Else)) {
3366           r = infinite_recursive_call_check_trav(en->te.Else, env);
3367           if (r != 0) return r;
3368         }
3369       }
3370     }
3371 
3372     r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
3373     break;
3374 
3375   default:
3376     r = 0;
3377     break;
3378   }
3379 
3380   return r;
3381 }
3382 
3383 static int
recursive_call_check(Node * node)3384 recursive_call_check(Node* node)
3385 {
3386   int r;
3387 
3388   switch (NODE_TYPE(node)) {
3389   case NODE_LIST:
3390   case NODE_ALT:
3391     r = 0;
3392     do {
3393       r |= recursive_call_check(NODE_CAR(node));
3394     } while (IS_NOT_NULL(node = NODE_CDR(node)));
3395     break;
3396 
3397   case NODE_ANCHOR:
3398     if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
3399       r = 0;
3400       break;
3401     }
3402     /* fall */
3403   case NODE_QUANT:
3404     r = recursive_call_check(NODE_BODY(node));
3405     break;
3406 
3407   case NODE_CALL:
3408     r = recursive_call_check(NODE_BODY(node));
3409     if (r != 0) {
3410       if (NODE_IS_MARK1(NODE_BODY(node)))
3411         NODE_STATUS_ADD(node, RECURSION);
3412     }
3413     break;
3414 
3415   case NODE_BAG:
3416     {
3417       BagNode* en = BAG_(node);
3418 
3419       if (en->type == BAG_MEMORY) {
3420         if (NODE_IS_MARK2(node))
3421           return 0;
3422         else if (NODE_IS_MARK1(node))
3423           return 1; /* recursion */
3424         else {
3425           NODE_STATUS_ADD(node, MARK2);
3426           r = recursive_call_check(NODE_BODY(node));
3427           NODE_STATUS_REMOVE(node, MARK2);
3428         }
3429       }
3430       else if (en->type == BAG_IF_ELSE) {
3431         r = 0;
3432         if (IS_NOT_NULL(en->te.Then)) {
3433           r |= recursive_call_check(en->te.Then);
3434         }
3435         if (IS_NOT_NULL(en->te.Else)) {
3436           r |= recursive_call_check(en->te.Else);
3437         }
3438         r |= recursive_call_check(NODE_BODY(node));
3439       }
3440       else {
3441         r = recursive_call_check(NODE_BODY(node));
3442       }
3443     }
3444     break;
3445 
3446   default:
3447     r = 0;
3448     break;
3449   }
3450 
3451   return r;
3452 }
3453 
3454 #define IN_RECURSION         (1<<0)
3455 #define FOUND_CALLED_NODE    1
3456 
3457 static int
recursive_call_check_trav(Node * node,ScanEnv * env,int state)3458 recursive_call_check_trav(Node* node, ScanEnv* env, int state)
3459 {
3460   int r = 0;
3461 
3462   switch (NODE_TYPE(node)) {
3463   case NODE_LIST:
3464   case NODE_ALT:
3465     {
3466       int ret;
3467       do {
3468         ret = recursive_call_check_trav(NODE_CAR(node), env, state);
3469         if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3470         else if (ret < 0) return ret;
3471       } while (IS_NOT_NULL(node = NODE_CDR(node)));
3472     }
3473     break;
3474 
3475   case NODE_QUANT:
3476     r = recursive_call_check_trav(NODE_BODY(node), env, state);
3477     if (QUANT_(node)->upper == 0) {
3478       if (r == FOUND_CALLED_NODE)
3479         QUANT_(node)->is_refered = 1;
3480     }
3481     break;
3482 
3483   case NODE_ANCHOR:
3484     {
3485       AnchorNode* an = ANCHOR_(node);
3486       if (ANCHOR_HAS_BODY(an))
3487         r = recursive_call_check_trav(NODE_ANCHOR_BODY(an), env, state);
3488     }
3489     break;
3490 
3491   case NODE_BAG:
3492     {
3493       int ret;
3494       int state1;
3495       BagNode* en = BAG_(node);
3496 
3497       if (en->type == BAG_MEMORY) {
3498         if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {
3499           if (! NODE_IS_RECURSION(node)) {
3500             NODE_STATUS_ADD(node, MARK1);
3501             r = recursive_call_check(NODE_BODY(node));
3502             if (r != 0)
3503               NODE_STATUS_ADD(node, RECURSION);
3504             NODE_STATUS_REMOVE(node, MARK1);
3505           }
3506 
3507           if (NODE_IS_CALLED(node))
3508             r = FOUND_CALLED_NODE;
3509         }
3510       }
3511 
3512       state1 = state;
3513       if (NODE_IS_RECURSION(node))
3514         state1 |= IN_RECURSION;
3515 
3516       ret = recursive_call_check_trav(NODE_BODY(node), env, state1);
3517       if (ret == FOUND_CALLED_NODE)
3518         r = FOUND_CALLED_NODE;
3519 
3520       if (en->type == BAG_IF_ELSE) {
3521         if (IS_NOT_NULL(en->te.Then)) {
3522           ret = recursive_call_check_trav(en->te.Then, env, state1);
3523           if (ret == FOUND_CALLED_NODE)
3524             r = FOUND_CALLED_NODE;
3525         }
3526         if (IS_NOT_NULL(en->te.Else)) {
3527           ret = recursive_call_check_trav(en->te.Else, env, state1);
3528           if (ret == FOUND_CALLED_NODE)
3529             r = FOUND_CALLED_NODE;
3530         }
3531       }
3532     }
3533     break;
3534 
3535   default:
3536     break;
3537   }
3538 
3539   return r;
3540 }
3541 
3542 #endif
3543 
3544 #define IN_ALT          (1<<0)
3545 #define IN_NOT          (1<<1)
3546 #define IN_REAL_REPEAT  (1<<2)
3547 #define IN_VAR_REPEAT   (1<<3)
3548 #define IN_ZERO_REPEAT  (1<<4)
3549 #define IN_MULTI_ENTRY  (1<<5)
3550 #define IN_LOOK_BEHIND  (1<<6)
3551 
3552 
3553 /* divide different length alternatives in look-behind.
3554   (?<=A|B) ==> (?<=A)|(?<=B)
3555   (?<!A|B) ==> (?<!A)(?<!B)
3556 */
3557 static int
divide_look_behind_alternatives(Node * node)3558 divide_look_behind_alternatives(Node* node)
3559 {
3560   Node *head, *np, *insert_node;
3561   AnchorNode* an = ANCHOR_(node);
3562   int anc_type = an->type;
3563 
3564   head = NODE_ANCHOR_BODY(an);
3565   np = NODE_CAR(head);
3566   swap_node(node, head);
3567   NODE_CAR(node) = head;
3568   NODE_BODY(head) = np;
3569 
3570   np = node;
3571   while (IS_NOT_NULL(np = NODE_CDR(np))) {
3572     insert_node = onig_node_new_anchor(anc_type, an->ascii_mode);
3573     CHECK_NULL_RETURN_MEMERR(insert_node);
3574     NODE_BODY(insert_node) = NODE_CAR(np);
3575     NODE_CAR(np) = insert_node;
3576   }
3577 
3578   if (anc_type == ANCR_LOOK_BEHIND_NOT) {
3579     np = node;
3580     do {
3581       NODE_SET_TYPE(np, NODE_LIST);  /* alt -> list */
3582     } while (IS_NOT_NULL(np = NODE_CDR(np)));
3583   }
3584   return 0;
3585 }
3586 
3587 static int
setup_look_behind(Node * node,regex_t * reg,ScanEnv * env)3588 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3589 {
3590   int r, len;
3591   AnchorNode* an = ANCHOR_(node);
3592 
3593   r = get_char_len_node(NODE_ANCHOR_BODY(an), reg, &len);
3594   if (r == 0)
3595     an->char_len = len;
3596   else if (r == GET_CHAR_LEN_VARLEN)
3597     r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3598   else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3599     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3600       r = divide_look_behind_alternatives(node);
3601     else
3602       r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3603   }
3604 
3605   return r;
3606 }
3607 
3608 static int
next_setup(Node * node,Node * next_node,regex_t * reg)3609 next_setup(Node* node, Node* next_node, regex_t* reg)
3610 {
3611   NodeType type;
3612 
3613  retry:
3614   type = NODE_TYPE(node);
3615   if (type == NODE_QUANT) {
3616     QuantNode* qn = QUANT_(node);
3617     if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) {
3618 #ifdef USE_QUANT_PEEK_NEXT
3619       Node* n = get_head_value_node(next_node, 1, reg);
3620       /* '\0': for UTF-16BE etc... */
3621       if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {
3622         qn->next_head_exact = n;
3623       }
3624 #endif
3625       /* automatic posseivation a*b ==> (?>a*)b */
3626       if (qn->lower <= 1) {
3627         if (is_strict_real_node(NODE_BODY(node))) {
3628           Node *x, *y;
3629           x = get_head_value_node(NODE_BODY(node), 0, reg);
3630           if (IS_NOT_NULL(x)) {
3631             y = get_head_value_node(next_node,  0, reg);
3632             if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
3633               Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);
3634               CHECK_NULL_RETURN_MEMERR(en);
3635               NODE_STATUS_ADD(en, STRICT_REAL_REPEAT);
3636               swap_node(node, en);
3637               NODE_BODY(node) = en;
3638             }
3639           }
3640         }
3641       }
3642     }
3643   }
3644   else if (type == NODE_BAG) {
3645     BagNode* en = BAG_(node);
3646     if (en->type == BAG_MEMORY) {
3647       node = NODE_BODY(node);
3648       goto retry;
3649     }
3650   }
3651   return 0;
3652 }
3653 
3654 
3655 static int
update_string_node_case_fold(regex_t * reg,Node * node)3656 update_string_node_case_fold(regex_t* reg, Node *node)
3657 {
3658   UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3659   UChar *sbuf, *ebuf, *sp;
3660   int r, i, len, sbuf_size;
3661   StrNode* sn = STR_(node);
3662 
3663   end = sn->end;
3664   sbuf_size = (int )(end - sn->s) * 2;
3665   sbuf = (UChar* )xmalloc(sbuf_size);
3666   CHECK_NULL_RETURN_MEMERR(sbuf);
3667   ebuf = sbuf + sbuf_size;
3668 
3669   sp = sbuf;
3670   p = sn->s;
3671   while (p < end) {
3672     len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3673     for (i = 0; i < len; i++) {
3674       if (sp >= ebuf) {
3675         sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);
3676         CHECK_NULL_RETURN_MEMERR(sbuf);
3677         sp = sbuf + sbuf_size;
3678         sbuf_size *= 2;
3679         ebuf = sbuf + sbuf_size;
3680       }
3681 
3682       *sp++ = buf[i];
3683     }
3684   }
3685 
3686   r = onig_node_str_set(node, sbuf, sp);
3687   if (r != 0) {
3688     xfree(sbuf);
3689     return r;
3690   }
3691 
3692   xfree(sbuf);
3693   return 0;
3694 }
3695 
3696 static int
expand_case_fold_make_rem_string(Node ** rnode,UChar * s,UChar * end,regex_t * reg)3697 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg)
3698 {
3699   int r;
3700   Node *node;
3701 
3702   node = onig_node_new_str(s, end);
3703   if (IS_NULL(node)) return ONIGERR_MEMORY;
3704 
3705   r = update_string_node_case_fold(reg, node);
3706   if (r != 0) {
3707     onig_node_free(node);
3708     return r;
3709   }
3710 
3711   NODE_STRING_SET_AMBIG(node);
3712   NODE_STRING_SET_DONT_GET_OPT_INFO(node);
3713   *rnode = node;
3714   return 0;
3715 }
3716 
3717 static int
expand_case_fold_string_alt(int item_num,OnigCaseFoldCodeItem items[],UChar * p,int slen,UChar * end,regex_t * reg,Node ** rnode)3718 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p,
3719                             int slen, UChar *end, regex_t* reg, Node **rnode)
3720 {
3721   int r, i, j;
3722   int len;
3723   int varlen;
3724   Node *anode, *var_anode, *snode, *xnode, *an;
3725   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3726 
3727   *rnode = var_anode = NULL_NODE;
3728 
3729   varlen = 0;
3730   for (i = 0; i < item_num; i++) {
3731     if (items[i].byte_len != slen) {
3732       varlen = 1;
3733       break;
3734     }
3735   }
3736 
3737   if (varlen != 0) {
3738     *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3739     if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3740 
3741     xnode = onig_node_new_list(NULL, NULL);
3742     if (IS_NULL(xnode)) goto mem_err;
3743     NODE_CAR(var_anode) = xnode;
3744 
3745     anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3746     if (IS_NULL(anode)) goto mem_err;
3747     NODE_CAR(xnode) = anode;
3748   }
3749   else {
3750     *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3751     if (IS_NULL(anode)) return ONIGERR_MEMORY;
3752   }
3753 
3754   snode = onig_node_new_str(p, p + slen);
3755   if (IS_NULL(snode)) goto mem_err;
3756 
3757   NODE_CAR(anode) = snode;
3758 
3759   for (i = 0; i < item_num; i++) {
3760     snode = onig_node_new_str(NULL, NULL);
3761     if (IS_NULL(snode)) goto mem_err;
3762 
3763     for (j = 0; j < items[i].code_len; j++) {
3764       len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3765       if (len < 0) {
3766         r = len;
3767         goto mem_err2;
3768       }
3769 
3770       r = onig_node_str_cat(snode, buf, buf + len);
3771       if (r != 0) goto mem_err2;
3772     }
3773 
3774     an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3775     if (IS_NULL(an)) {
3776       goto mem_err2;
3777     }
3778     //The NULL pointer check is not necessary. It is added just for pass static
3779     //analysis. When condition "items[i].byte_len != slen" is true, "varlen = 1"
3780     //in line 3503 will be reached ,so that "if (IS_NULL(var_anode)) return ONIGERR_MEMORY"
3781     //in line 3510 will be executed, so the null pointer has been checked before
3782     //deferenced in line 3584.
3783     if (items[i].byte_len != slen && IS_NOT_NULL(var_anode)) {
3784       Node *rem;
3785       UChar *q = p + items[i].byte_len;
3786 
3787       if (q < end) {
3788         r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3789         if (r != 0) {
3790           onig_node_free(an);
3791           goto mem_err2;
3792         }
3793 
3794         xnode = onig_node_list_add(NULL_NODE, snode);
3795         if (IS_NULL(xnode)) {
3796           onig_node_free(an);
3797           onig_node_free(rem);
3798           goto mem_err2;
3799         }
3800         if (IS_NULL(onig_node_list_add(xnode, rem))) {
3801           onig_node_free(an);
3802           onig_node_free(xnode);
3803           onig_node_free(rem);
3804           goto mem_err;
3805         }
3806 
3807         NODE_CAR(an) = xnode;
3808       }
3809       else {
3810         NODE_CAR(an) = snode;
3811       }
3812 
3813       NODE_CDR(var_anode) = an;
3814       var_anode = an;
3815     }
3816     else {
3817       NODE_CAR(an)     = snode;
3818       NODE_CDR(anode) = an;
3819       anode = an;
3820     }
3821   }
3822 
3823   return varlen;
3824 
3825  mem_err2:
3826   onig_node_free(snode);
3827 
3828  mem_err:
3829   onig_node_free(*rnode);
3830 
3831   return ONIGERR_MEMORY;
3832 }
3833 
3834 static int
is_good_case_fold_items_for_search(OnigEncoding enc,int slen,int n,OnigCaseFoldCodeItem items[])3835 is_good_case_fold_items_for_search(OnigEncoding enc, int slen,
3836                                    int n, OnigCaseFoldCodeItem items[])
3837 {
3838   int i, len;
3839   UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3840 
3841   for (i = 0; i < n; i++) {
3842     OnigCaseFoldCodeItem* item = items + i;
3843 
3844     if (item->code_len != 1)    return 0;
3845     if (item->byte_len != slen) return 0;
3846     len = ONIGENC_CODE_TO_MBC(enc, item->code[0], buf);
3847     if (len != slen) return 0;
3848   }
3849 
3850   return 1;
3851 }
3852 
3853 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
3854 
3855 static int
expand_case_fold_string(Node * node,regex_t * reg,int state)3856 expand_case_fold_string(Node* node, regex_t* reg, int state)
3857 {
3858   int r, n, len, alt_num;
3859   int fold_len;
3860   int prev_is_ambig, prev_is_good, is_good, is_in_look_behind;
3861   UChar *start, *end, *p;
3862   UChar* foldp;
3863   Node *top_root, *root, *snode, *prev_node;
3864   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3865   UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3866   StrNode* sn;
3867 
3868   if (NODE_STRING_IS_AMBIG(node)) return 0;
3869 
3870   sn = STR_(node);
3871 
3872   start = sn->s;
3873   end   = sn->end;
3874   if (start >= end) return 0;
3875 
3876   is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
3877 
3878   r = 0;
3879   top_root = root = prev_node = snode = NULL_NODE;
3880   alt_num = 1;
3881   p = start;
3882   while (p < end) {
3883     n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3884                                            p, end, items);
3885     if (n < 0) {
3886       r = n;
3887       goto err;
3888     }
3889 
3890     len = enclen(reg->enc, p);
3891     is_good = is_good_case_fold_items_for_search(reg->enc, len, n, items);
3892 
3893     if (is_in_look_behind ||
3894         (IS_NOT_NULL(snode) ||
3895          (is_good
3896           /* expand single char case: ex. /(?i:a)/ */
3897           && !(p == start && p + len >= end)))) {
3898       if (IS_NULL(snode)) {
3899         if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3900           top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3901           if (IS_NULL(root)) {
3902             onig_node_free(prev_node);
3903             goto mem_err;
3904           }
3905         }
3906 
3907         prev_node = snode = onig_node_new_str(NULL, NULL);
3908         if (IS_NULL(snode)) goto mem_err;
3909         if (IS_NOT_NULL(root)) {
3910           if (IS_NULL(onig_node_list_add(root, snode))) {
3911             onig_node_free(snode);
3912             goto mem_err;
3913           }
3914         }
3915 
3916         prev_is_ambig = -1; /* -1: new */
3917         prev_is_good  =  0; /* escape compiler warning */
3918       }
3919       else {
3920         prev_is_ambig = NODE_STRING_IS_AMBIG(snode);
3921         prev_is_good  = NODE_STRING_IS_GOOD_AMBIG(snode);
3922       }
3923 
3924       if (n != 0) {
3925         foldp = p;
3926         fold_len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag,
3927                                          &foldp, end, buf);
3928         foldp = buf;
3929       }
3930       else {
3931         foldp = p; fold_len = len;
3932       }
3933 
3934       if ((prev_is_ambig == 0 && n != 0) ||
3935           (prev_is_ambig > 0 && (n == 0 || prev_is_good != is_good))) {
3936         if (IS_NULL(root) /* && IS_NOT_NULL(prev_node) */) {
3937           top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3938           if (IS_NULL(root)) {
3939             onig_node_free(prev_node);
3940             goto mem_err;
3941           }
3942         }
3943 
3944         prev_node = snode = onig_node_new_str(foldp, foldp + fold_len);
3945         if (IS_NULL(snode)) goto mem_err;
3946         if (IS_NULL(onig_node_list_add(root, snode))) {
3947           onig_node_free(snode);
3948           goto mem_err;
3949         }
3950       }
3951       else {
3952         r = onig_node_str_cat(snode, foldp, foldp + fold_len);
3953         if (r != 0) goto err;
3954       }
3955 
3956       if (n != 0) NODE_STRING_SET_AMBIG(snode);
3957       if (is_good != 0) NODE_STRING_SET_GOOD_AMBIG(snode);
3958     }
3959     else {
3960       alt_num *= (n + 1);
3961       if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3962 
3963       if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3964         top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3965         if (IS_NULL(root)) {
3966           onig_node_free(prev_node);
3967           goto mem_err;
3968         }
3969       }
3970 
3971       r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3972       if (r < 0) goto mem_err;
3973       if (r == 1) {
3974         if (IS_NULL(root)) {
3975           top_root = prev_node;
3976         }
3977         else {
3978           if (IS_NULL(onig_node_list_add(root, prev_node))) {
3979             onig_node_free(prev_node);
3980             goto mem_err;
3981           }
3982         }
3983 
3984         root = NODE_CAR(prev_node);
3985       }
3986       else { /* r == 0 */
3987         if (IS_NOT_NULL(root)) {
3988           if (IS_NULL(onig_node_list_add(root, prev_node))) {
3989             onig_node_free(prev_node);
3990             goto mem_err;
3991           }
3992         }
3993       }
3994 
3995       snode = NULL_NODE;
3996     }
3997 
3998     p += len;
3999   }
4000 
4001   if (p < end) {
4002     Node *srem;
4003 
4004     r = expand_case_fold_make_rem_string(&srem, p, end, reg);
4005     if (r != 0) goto mem_err;
4006 
4007     if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
4008       top_root = root = onig_node_list_add(NULL_NODE, prev_node);
4009       if (IS_NULL(root)) {
4010         onig_node_free(srem);
4011         onig_node_free(prev_node);
4012         goto mem_err;
4013       }
4014     }
4015 
4016     if (IS_NULL(root)) {
4017       prev_node = srem;
4018     }
4019     else {
4020       if (IS_NULL(onig_node_list_add(root, srem))) {
4021         onig_node_free(srem);
4022         goto mem_err;
4023       }
4024     }
4025   }
4026 
4027   /* ending */
4028   top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
4029   swap_node(node, top_root);
4030   onig_node_free(top_root);
4031   return 0;
4032 
4033  mem_err:
4034   r = ONIGERR_MEMORY;
4035 
4036  err:
4037   onig_node_free(top_root);
4038   return r;
4039 }
4040 
4041 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
4042 static enum BodyEmptyType
quantifiers_memory_node_info(Node * node)4043 quantifiers_memory_node_info(Node* node)
4044 {
4045   int r = BODY_IS_EMPTY_POSSIBILITY;
4046 
4047   switch (NODE_TYPE(node)) {
4048   case NODE_LIST:
4049   case NODE_ALT:
4050     {
4051       int v;
4052       do {
4053         v = quantifiers_memory_node_info(NODE_CAR(node));
4054         if (v > r) r = v;
4055       } while (IS_NOT_NULL(node = NODE_CDR(node)));
4056     }
4057     break;
4058 
4059 #ifdef USE_CALL
4060   case NODE_CALL:
4061     if (NODE_IS_RECURSION(node)) {
4062       return BODY_IS_EMPTY_POSSIBILITY_REC; /* tiny version */
4063     }
4064     else
4065       r = quantifiers_memory_node_info(NODE_BODY(node));
4066     break;
4067 #endif
4068 
4069   case NODE_QUANT:
4070     {
4071       QuantNode* qn = QUANT_(node);
4072       if (qn->upper != 0) {
4073         r = quantifiers_memory_node_info(NODE_BODY(node));
4074       }
4075     }
4076     break;
4077 
4078   case NODE_BAG:
4079     {
4080       BagNode* en = BAG_(node);
4081       switch (en->type) {
4082       case BAG_MEMORY:
4083         if (NODE_IS_RECURSION(node)) {
4084           return BODY_IS_EMPTY_POSSIBILITY_REC;
4085         }
4086         return BODY_IS_EMPTY_POSSIBILITY_MEM;
4087         break;
4088 
4089       case BAG_OPTION:
4090       case BAG_STOP_BACKTRACK:
4091         r = quantifiers_memory_node_info(NODE_BODY(node));
4092         break;
4093       case BAG_IF_ELSE:
4094         {
4095           int v;
4096           r = quantifiers_memory_node_info(NODE_BODY(node));
4097           if (IS_NOT_NULL(en->te.Then)) {
4098             v = quantifiers_memory_node_info(en->te.Then);
4099             if (v > r) r = v;
4100           }
4101           if (IS_NOT_NULL(en->te.Else)) {
4102             v = quantifiers_memory_node_info(en->te.Else);
4103             if (v > r) r = v;
4104           }
4105         }
4106         break;
4107       }
4108     }
4109     break;
4110 
4111   case NODE_BACKREF:
4112   case NODE_STRING:
4113   case NODE_CTYPE:
4114   case NODE_CCLASS:
4115   case NODE_ANCHOR:
4116   case NODE_GIMMICK:
4117   default:
4118     break;
4119   }
4120 
4121   return r;
4122 }
4123 #endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
4124 
4125 
4126 #ifdef USE_CALL
4127 
4128 #ifdef __GNUC__
4129 __inline
4130 #endif
4131 static int
setup_call_node_call(CallNode * cn,ScanEnv * env,int state)4132 setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
4133 {
4134   MemEnv* mem_env = SCANENV_MEMENV(env);
4135 
4136   if (cn->by_number != 0) {
4137     int gnum = cn->group_num;
4138 
4139     if (env->num_named > 0 &&
4140         IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4141         ! ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) {
4142       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4143     }
4144 
4145     if (gnum > env->num_mem) {
4146       onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,
4147                                      cn->name, cn->name_end);
4148       return ONIGERR_UNDEFINED_GROUP_REFERENCE;
4149     }
4150 
4151   set_call_attr:
4152     NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;
4153     if (IS_NULL(NODE_CALL_BODY(cn))) {
4154       onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
4155                                      cn->name, cn->name_end);
4156       return ONIGERR_UNDEFINED_NAME_REFERENCE;
4157     }
4158   }
4159   else {
4160     int *refs;
4161 
4162     int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
4163     if (n <= 0) {
4164       onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
4165                                      cn->name, cn->name_end);
4166       return ONIGERR_UNDEFINED_NAME_REFERENCE;
4167     }
4168     else if (n > 1) {
4169       onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,
4170                                      cn->name, cn->name_end);
4171       return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
4172     }
4173     else {
4174       cn->group_num = refs[0];
4175       goto set_call_attr;
4176     }
4177   }
4178 
4179   return 0;
4180 }
4181 
4182 static void
setup_call2_call(Node * node)4183 setup_call2_call(Node* node)
4184 {
4185   switch (NODE_TYPE(node)) {
4186   case NODE_LIST:
4187   case NODE_ALT:
4188     do {
4189       setup_call2_call(NODE_CAR(node));
4190     } while (IS_NOT_NULL(node = NODE_CDR(node)));
4191     break;
4192 
4193   case NODE_QUANT:
4194     setup_call2_call(NODE_BODY(node));
4195     break;
4196 
4197   case NODE_ANCHOR:
4198     if (ANCHOR_HAS_BODY(ANCHOR_(node)))
4199       setup_call2_call(NODE_BODY(node));
4200     break;
4201 
4202   case NODE_BAG:
4203     {
4204       BagNode* en = BAG_(node);
4205 
4206       if (en->type == BAG_MEMORY) {
4207         if (! NODE_IS_MARK1(node)) {
4208           NODE_STATUS_ADD(node, MARK1);
4209           setup_call2_call(NODE_BODY(node));
4210           NODE_STATUS_REMOVE(node, MARK1);
4211         }
4212       }
4213       else if (en->type == BAG_IF_ELSE) {
4214         setup_call2_call(NODE_BODY(node));
4215         if (IS_NOT_NULL(en->te.Then))
4216           setup_call2_call(en->te.Then);
4217         if (IS_NOT_NULL(en->te.Else))
4218           setup_call2_call(en->te.Else);
4219       }
4220       else {
4221         setup_call2_call(NODE_BODY(node));
4222       }
4223     }
4224     break;
4225 
4226   case NODE_CALL:
4227     if (! NODE_IS_MARK1(node)) {
4228       NODE_STATUS_ADD(node, MARK1);
4229       {
4230         CallNode* cn = CALL_(node);
4231         Node* called = NODE_CALL_BODY(cn);
4232 
4233         cn->entry_count++;
4234 
4235         NODE_STATUS_ADD(called, CALLED);
4236         BAG_(called)->m.entry_count++;
4237         setup_call2_call(called);
4238       }
4239       NODE_STATUS_REMOVE(node, MARK1);
4240     }
4241     break;
4242 
4243   default:
4244     break;
4245   }
4246 }
4247 
4248 static int
setup_call(Node * node,ScanEnv * env,int state)4249 setup_call(Node* node, ScanEnv* env, int state)
4250 {
4251   int r;
4252 
4253   switch (NODE_TYPE(node)) {
4254   case NODE_LIST:
4255   case NODE_ALT:
4256     do {
4257       r = setup_call(NODE_CAR(node), env, state);
4258     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
4259     break;
4260 
4261   case NODE_QUANT:
4262     if (QUANT_(node)->upper == 0)
4263       state |= IN_ZERO_REPEAT;
4264 
4265     r = setup_call(NODE_BODY(node), env, state);
4266     break;
4267 
4268   case NODE_ANCHOR:
4269     if (ANCHOR_HAS_BODY(ANCHOR_(node)))
4270       r = setup_call(NODE_BODY(node), env, state);
4271     else
4272       r = 0;
4273     break;
4274 
4275   case NODE_BAG:
4276     {
4277       BagNode* en = BAG_(node);
4278 
4279       if (en->type == BAG_MEMORY) {
4280         if ((state & IN_ZERO_REPEAT) != 0) {
4281           NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
4282           BAG_(node)->m.entry_count--;
4283         }
4284         r = setup_call(NODE_BODY(node), env, state);
4285       }
4286       else if (en->type == BAG_IF_ELSE) {
4287         r = setup_call(NODE_BODY(node), env, state);
4288         if (r != 0) return r;
4289         if (IS_NOT_NULL(en->te.Then)) {
4290           r = setup_call(en->te.Then, env, state);
4291           if (r != 0) return r;
4292         }
4293         if (IS_NOT_NULL(en->te.Else))
4294           r = setup_call(en->te.Else, env, state);
4295       }
4296       else
4297         r = setup_call(NODE_BODY(node), env, state);
4298     }
4299     break;
4300 
4301   case NODE_CALL:
4302     if ((state & IN_ZERO_REPEAT) != 0) {
4303       NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
4304       CALL_(node)->entry_count--;
4305     }
4306 
4307     r = setup_call_node_call(CALL_(node), env, state);
4308     break;
4309 
4310   default:
4311     r = 0;
4312     break;
4313   }
4314 
4315   return r;
4316 }
4317 
4318 static int
setup_call2(Node * node)4319 setup_call2(Node* node)
4320 {
4321   int r = 0;
4322 
4323   switch (NODE_TYPE(node)) {
4324   case NODE_LIST:
4325   case NODE_ALT:
4326     do {
4327       r = setup_call2(NODE_CAR(node));
4328     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
4329     break;
4330 
4331   case NODE_QUANT:
4332     if (QUANT_(node)->upper != 0)
4333       r = setup_call2(NODE_BODY(node));
4334     break;
4335 
4336   case NODE_ANCHOR:
4337     if (ANCHOR_HAS_BODY(ANCHOR_(node)))
4338       r = setup_call2(NODE_BODY(node));
4339     break;
4340 
4341   case NODE_BAG:
4342     if (! NODE_IS_IN_ZERO_REPEAT(node))
4343       r = setup_call2(NODE_BODY(node));
4344 
4345     {
4346       BagNode* en = BAG_(node);
4347 
4348       if (r != 0) return r;
4349       if (en->type == BAG_IF_ELSE) {
4350         if (IS_NOT_NULL(en->te.Then)) {
4351           r = setup_call2(en->te.Then);
4352           if (r != 0) return r;
4353         }
4354         if (IS_NOT_NULL(en->te.Else))
4355           r = setup_call2(en->te.Else);
4356       }
4357     }
4358     break;
4359 
4360   case NODE_CALL:
4361     if (! NODE_IS_IN_ZERO_REPEAT(node)) {
4362       setup_call2_call(node);
4363     }
4364     break;
4365 
4366   default:
4367     break;
4368   }
4369 
4370   return r;
4371 }
4372 
4373 
4374 static void
setup_called_state_call(Node * node,int state)4375 setup_called_state_call(Node* node, int state)
4376 {
4377   switch (NODE_TYPE(node)) {
4378   case NODE_ALT:
4379     state |= IN_ALT;
4380     /* fall */
4381   case NODE_LIST:
4382     do {
4383       setup_called_state_call(NODE_CAR(node), state);
4384     } while (IS_NOT_NULL(node = NODE_CDR(node)));
4385     break;
4386 
4387   case NODE_QUANT:
4388     {
4389       QuantNode* qn = QUANT_(node);
4390 
4391       if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
4392         state |= IN_REAL_REPEAT;
4393       if (qn->lower != qn->upper)
4394         state |= IN_VAR_REPEAT;
4395 
4396       setup_called_state_call(NODE_QUANT_BODY(qn), state);
4397     }
4398     break;
4399 
4400   case NODE_ANCHOR:
4401     {
4402       AnchorNode* an = ANCHOR_(node);
4403 
4404       switch (an->type) {
4405       case ANCR_PREC_READ_NOT:
4406       case ANCR_LOOK_BEHIND_NOT:
4407         state |= IN_NOT;
4408         /* fall */
4409       case ANCR_PREC_READ:
4410       case ANCR_LOOK_BEHIND:
4411         setup_called_state_call(NODE_ANCHOR_BODY(an), state);
4412         break;
4413       default:
4414         break;
4415       }
4416     }
4417     break;
4418 
4419   case NODE_BAG:
4420     {
4421       BagNode* en = BAG_(node);
4422 
4423       if (en->type == BAG_MEMORY) {
4424         if (NODE_IS_MARK1(node)) {
4425           if ((~en->m.called_state & state) != 0) {
4426             en->m.called_state |= state;
4427             setup_called_state_call(NODE_BODY(node), state);
4428           }
4429         }
4430         else {
4431           NODE_STATUS_ADD(node, MARK1);
4432           en->m.called_state |= state;
4433           setup_called_state_call(NODE_BODY(node), state);
4434           NODE_STATUS_REMOVE(node, MARK1);
4435         }
4436       }
4437       else if (en->type == BAG_IF_ELSE) {
4438         if (IS_NOT_NULL(en->te.Then)) {
4439           setup_called_state_call(en->te.Then, state);
4440         }
4441         if (IS_NOT_NULL(en->te.Else))
4442           setup_called_state_call(en->te.Else, state);
4443       }
4444       else {
4445         setup_called_state_call(NODE_BODY(node), state);
4446       }
4447     }
4448     break;
4449 
4450   case NODE_CALL:
4451     setup_called_state_call(NODE_BODY(node), state);
4452     break;
4453 
4454   default:
4455     break;
4456   }
4457 }
4458 
4459 static void
setup_called_state(Node * node,int state)4460 setup_called_state(Node* node, int state)
4461 {
4462   switch (NODE_TYPE(node)) {
4463   case NODE_ALT:
4464     state |= IN_ALT;
4465     /* fall */
4466   case NODE_LIST:
4467     do {
4468       setup_called_state(NODE_CAR(node), state);
4469     } while (IS_NOT_NULL(node = NODE_CDR(node)));
4470     break;
4471 
4472 #ifdef USE_CALL
4473   case NODE_CALL:
4474     setup_called_state_call(node, state);
4475     break;
4476 #endif
4477 
4478   case NODE_BAG:
4479     {
4480       BagNode* en = BAG_(node);
4481 
4482       switch (en->type) {
4483       case BAG_MEMORY:
4484         if (en->m.entry_count > 1)
4485           state |= IN_MULTI_ENTRY;
4486 
4487         en->m.called_state |= state;
4488         /* fall */
4489       case BAG_OPTION:
4490       case BAG_STOP_BACKTRACK:
4491         setup_called_state(NODE_BODY(node), state);
4492         break;
4493       case BAG_IF_ELSE:
4494         setup_called_state(NODE_BODY(node), state);
4495         if (IS_NOT_NULL(en->te.Then))
4496           setup_called_state(en->te.Then, state);
4497         if (IS_NOT_NULL(en->te.Else))
4498           setup_called_state(en->te.Else, state);
4499         break;
4500       }
4501     }
4502     break;
4503 
4504   case NODE_QUANT:
4505     {
4506       QuantNode* qn = QUANT_(node);
4507 
4508       if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
4509         state |= IN_REAL_REPEAT;
4510       if (qn->lower != qn->upper)
4511         state |= IN_VAR_REPEAT;
4512 
4513       setup_called_state(NODE_QUANT_BODY(qn), state);
4514     }
4515     break;
4516 
4517   case NODE_ANCHOR:
4518     {
4519       AnchorNode* an = ANCHOR_(node);
4520 
4521       switch (an->type) {
4522       case ANCR_PREC_READ_NOT:
4523       case ANCR_LOOK_BEHIND_NOT:
4524         state |= IN_NOT;
4525         /* fall */
4526       case ANCR_PREC_READ:
4527       case ANCR_LOOK_BEHIND:
4528         setup_called_state(NODE_ANCHOR_BODY(an), state);
4529         break;
4530       default:
4531         break;
4532       }
4533     }
4534     break;
4535 
4536   case NODE_BACKREF:
4537   case NODE_STRING:
4538   case NODE_CTYPE:
4539   case NODE_CCLASS:
4540   case NODE_GIMMICK:
4541   default:
4542     break;
4543   }
4544 }
4545 
4546 #endif  /* USE_CALL */
4547 
4548 
4549 static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env);
4550 
4551 #ifdef __GNUC__
4552 __inline
4553 #endif
4554 static int
setup_anchor(Node * node,regex_t * reg,int state,ScanEnv * env)4555 setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
4556 {
4557 /* allowed node types in look-behind */
4558 #define ALLOWED_TYPE_IN_LB \
4559   ( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \
4560   | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_BAG | NODE_BIT_QUANT \
4561   | NODE_BIT_CALL | NODE_BIT_GIMMICK)
4562 
4563 #define ALLOWED_BAG_IN_LB       ( 1<<BAG_MEMORY | 1<<BAG_OPTION | 1<<BAG_IF_ELSE )
4564 #define ALLOWED_BAG_IN_LB_NOT   ( 1<<BAG_OPTION | 1<<BAG_IF_ELSE )
4565 
4566 #define ALLOWED_ANCHOR_IN_LB \
4567   ( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \
4568   | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \
4569   | ANCR_WORD_BEGIN | ANCR_WORD_END \
4570   | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )
4571 
4572 #define ALLOWED_ANCHOR_IN_LB_NOT \
4573   ( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \
4574   | ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \
4575   | ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \
4576   | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )
4577 
4578   int r;
4579   AnchorNode* an = ANCHOR_(node);
4580 
4581   switch (an->type) {
4582   case ANCR_PREC_READ:
4583     r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
4584     break;
4585   case ANCR_PREC_READ_NOT:
4586     r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
4587     break;
4588 
4589   case ANCR_LOOK_BEHIND:
4590     {
4591       r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
4592                           ALLOWED_BAG_IN_LB, ALLOWED_ANCHOR_IN_LB);
4593       if (r < 0) return r;
4594       if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4595       r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env);
4596       if (r != 0) return r;
4597       r = setup_look_behind(node, reg, env);
4598     }
4599     break;
4600 
4601   case ANCR_LOOK_BEHIND_NOT:
4602     {
4603       r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
4604                           ALLOWED_BAG_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4605       if (r < 0) return r;
4606       if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4607       r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND),
4608                      env);
4609       if (r != 0) return r;
4610       r = setup_look_behind(node, reg, env);
4611     }
4612     break;
4613 
4614   default:
4615     r = 0;
4616     break;
4617   }
4618 
4619   return r;
4620 }
4621 
4622 #ifdef __GNUC__
4623 __inline
4624 #endif
4625 static int
setup_quant(Node * node,regex_t * reg,int state,ScanEnv * env)4626 setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
4627 {
4628   int r;
4629   OnigLen d;
4630   QuantNode* qn = QUANT_(node);
4631   Node* body = NODE_BODY(node);
4632 
4633   if ((state & IN_REAL_REPEAT) != 0) {
4634     NODE_STATUS_ADD(node, IN_REAL_REPEAT);
4635   }
4636   if ((state & IN_MULTI_ENTRY) != 0) {
4637     NODE_STATUS_ADD(node, IN_MULTI_ENTRY);
4638   }
4639 
4640   if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 1) {
4641     d = tree_min_len(body, env);
4642     if (d == 0) {
4643 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
4644       qn->emptiness = quantifiers_memory_node_info(body);
4645       if (qn->emptiness == BODY_IS_EMPTY_POSSIBILITY_REC) {
4646         if (NODE_TYPE(body) == NODE_BAG &&
4647             BAG_(body)->type == BAG_MEMORY) {
4648           MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum);
4649         }
4650       }
4651 #else
4652       qn->emptiness = BODY_IS_EMPTY_POSSIBILITY;
4653 #endif
4654     }
4655   }
4656 
4657   if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
4658     state |= IN_REAL_REPEAT;
4659   if (qn->lower != qn->upper)
4660     state |= IN_VAR_REPEAT;
4661 
4662   r = setup_tree(body, reg, state, env);
4663   if (r != 0) return r;
4664 
4665   /* expand string */
4666 #define EXPAND_STRING_MAX_LENGTH  100
4667   if (NODE_TYPE(body) == NODE_STRING) {
4668     if (!IS_INFINITE_REPEAT(qn->lower) && qn->lower == qn->upper &&
4669         qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
4670       int len = NODE_STRING_LEN(body);
4671       StrNode* sn = STR_(body);
4672 
4673       if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
4674         int i, n = qn->lower;
4675         onig_node_conv_to_str_node(node, STR_(body)->flag);
4676         for (i = 0; i < n; i++) {
4677           r = onig_node_str_cat(node, sn->s, sn->end);
4678           if (r != 0) return r;
4679         }
4680         onig_node_free(body);
4681         return r;
4682       }
4683     }
4684   }
4685 
4686   if (qn->greedy && (qn->emptiness == BODY_IS_NOT_EMPTY)) {
4687     if (NODE_TYPE(body) == NODE_QUANT) {
4688       QuantNode* tqn = QUANT_(body);
4689       if (IS_NOT_NULL(tqn->head_exact)) {
4690         qn->head_exact  = tqn->head_exact;
4691         tqn->head_exact = NULL;
4692       }
4693     }
4694     else {
4695       qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg);
4696     }
4697   }
4698 
4699   return r;
4700 }
4701 
4702 /* setup_tree does the following work.
4703  1. check empty loop. (set qn->emptiness)
4704  2. expand ignore-case in char class.
4705  3. set memory status bit flags. (reg->mem_stats)
4706  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
4707  5. find invalid patterns in look-behind.
4708  6. expand repeated string.
4709  */
4710 static int
setup_tree(Node * node,regex_t * reg,int state,ScanEnv * env)4711 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
4712 {
4713   int r = 0;
4714 
4715   switch (NODE_TYPE(node)) {
4716   case NODE_LIST:
4717     {
4718       Node* prev = NULL_NODE;
4719       do {
4720         r = setup_tree(NODE_CAR(node), reg, state, env);
4721         if (IS_NOT_NULL(prev) && r == 0) {
4722           r = next_setup(prev, NODE_CAR(node), reg);
4723         }
4724         prev = NODE_CAR(node);
4725       } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
4726     }
4727     break;
4728 
4729   case NODE_ALT:
4730     do {
4731       r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env);
4732     } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
4733     break;
4734 
4735   case NODE_STRING:
4736     if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {
4737       r = expand_case_fold_string(node, reg, state);
4738     }
4739     break;
4740 
4741   case NODE_BACKREF:
4742     {
4743       int i;
4744       int* p;
4745       BackRefNode* br = BACKREF_(node);
4746       p = BACKREFS_P(br);
4747       for (i = 0; i < br->back_num; i++) {
4748         if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
4749         MEM_STATUS_ON(env->backrefed_mem, p[i]);
4750         MEM_STATUS_ON(env->bt_mem_start, p[i]);
4751 #ifdef USE_BACKREF_WITH_LEVEL
4752         if (NODE_IS_NEST_LEVEL(node)) {
4753           MEM_STATUS_ON(env->bt_mem_end, p[i]);
4754         }
4755 #endif
4756       }
4757     }
4758     break;
4759 
4760   case NODE_BAG:
4761     {
4762       BagNode* en = BAG_(node);
4763 
4764       switch (en->type) {
4765       case BAG_OPTION:
4766         {
4767           OnigOptionType options = reg->options;
4768           reg->options = BAG_(node)->o.options;
4769           r = setup_tree(NODE_BODY(node), reg, state, env);
4770           reg->options = options;
4771         }
4772         break;
4773 
4774       case BAG_MEMORY:
4775 #ifdef USE_CALL
4776         state |= en->m.called_state;
4777 #endif
4778 
4779         if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0
4780             || NODE_IS_RECURSION(node)) {
4781           MEM_STATUS_ON(env->bt_mem_start, en->m.regnum);
4782         }
4783         r = setup_tree(NODE_BODY(node), reg, state, env);
4784         break;
4785 
4786       case BAG_STOP_BACKTRACK:
4787         {
4788           Node* target = NODE_BODY(node);
4789           r = setup_tree(target, reg, state, env);
4790           if (NODE_TYPE(target) == NODE_QUANT) {
4791             QuantNode* tqn = QUANT_(target);
4792             if (IS_INFINITE_REPEAT(tqn->upper) && tqn->lower <= 1 &&
4793                 tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
4794               if (is_strict_real_node(NODE_BODY(target)))
4795                 NODE_STATUS_ADD(node, STRICT_REAL_REPEAT);
4796             }
4797           }
4798         }
4799         break;
4800 
4801       case BAG_IF_ELSE:
4802         r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);
4803         if (r != 0) return r;
4804         if (IS_NOT_NULL(en->te.Then)) {
4805           r = setup_tree(en->te.Then, reg, (state | IN_ALT), env);
4806           if (r != 0) return r;
4807         }
4808         if (IS_NOT_NULL(en->te.Else))
4809           r = setup_tree(en->te.Else, reg, (state | IN_ALT), env);
4810         break;
4811       }
4812     }
4813     break;
4814 
4815   case NODE_QUANT:
4816     r = setup_quant(node, reg, state, env);
4817     break;
4818 
4819   case NODE_ANCHOR:
4820     r = setup_anchor(node, reg, state, env);
4821     break;
4822 
4823 #ifdef USE_CALL
4824   case NODE_CALL:
4825 #endif
4826   case NODE_CTYPE:
4827   case NODE_CCLASS:
4828   case NODE_GIMMICK:
4829   default:
4830     break;
4831   }
4832 
4833   return r;
4834 }
4835 
4836 static int
set_sunday_quick_search_or_bmh_skip_table(regex_t * reg,int case_expand,UChar * s,UChar * end,UChar skip[],int * roffset)4837 set_sunday_quick_search_or_bmh_skip_table(regex_t* reg, int case_expand,
4838                                           UChar* s, UChar* end,
4839                                           UChar skip[], int* roffset)
4840 {
4841   int i, j, k, len, offset;
4842   int n, clen;
4843   UChar* p;
4844   OnigEncoding enc;
4845   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4846   UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4847 
4848   enc = reg->enc;
4849   offset = ENC_GET_SKIP_OFFSET(enc);
4850   if (offset == ENC_SKIP_OFFSET_1_OR_0) {
4851     UChar* p = s;
4852     while (1) {
4853       len = enclen(enc, p);
4854       if (p + len >= end) {
4855         if (len == 1) offset = 1;
4856         else          offset = 0;
4857         break;
4858       }
4859       p += len;
4860     }
4861   }
4862 
4863   len = (int )(end - s);
4864   if (len + offset >= 255)
4865     return ONIGERR_PARSER_BUG;
4866 
4867   *roffset = offset;
4868 
4869   for (i = 0; i < CHAR_MAP_SIZE; i++) {
4870     skip[i] = (UChar )(len + offset);
4871   }
4872 
4873   for (p = s; p < end; ) {
4874     int z;
4875 
4876     clen = enclen(enc, p);
4877     if (p + clen > end) clen = (int )(end - p);
4878 
4879     len = (int )(end - p);
4880     for (j = 0; j < clen; j++) {
4881       z = len - j + (offset - 1);
4882       if (z <= 0) break;
4883       skip[p[j]] = z;
4884     }
4885 
4886     if (case_expand != 0) {
4887       n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4888                                              p, end, items);
4889       for (k = 0; k < n; k++) {
4890         ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
4891         for (j = 0; j < clen; j++) {
4892           z = len - j + (offset - 1);
4893           if (z <= 0) break;
4894           if (skip[buf[j]] > z)
4895             skip[buf[j]] = z;
4896         }
4897       }
4898     }
4899 
4900     p += clen;
4901   }
4902 
4903   return 0;
4904 }
4905 
4906 
4907 #define OPT_EXACT_MAXLEN   24
4908 
4909 #if OPT_EXACT_MAXLEN >= 255
4910 #error Too big OPT_EXACT_MAXLEN
4911 #endif
4912 
4913 typedef struct {
4914   OnigLen min;  /* min byte length */
4915   OnigLen max;  /* max byte length */
4916 } MinMax;
4917 
4918 typedef struct {
4919   MinMax           mmd;
4920   OnigEncoding     enc;
4921   OnigOptionType   options;
4922   OnigCaseFoldType case_fold_flag;
4923   ScanEnv*         scan_env;
4924 } OptEnv;
4925 
4926 typedef struct {
4927   int left;
4928   int right;
4929 } OptAnc;
4930 
4931 typedef struct {
4932   MinMax     mmd;   /* position */
4933   OptAnc     anc;
4934   int        reach_end;
4935   int        case_fold;
4936   int        good_case_fold;
4937   int        len;
4938   UChar      s[OPT_EXACT_MAXLEN];
4939 } OptStr;
4940 
4941 typedef struct {
4942   MinMax    mmd;    /* position */
4943   OptAnc    anc;
4944   int       value;  /* weighted value */
4945   UChar     map[CHAR_MAP_SIZE];
4946 } OptMap;
4947 
4948 typedef struct {
4949   MinMax  len;
4950   OptAnc  anc;
4951   OptStr  sb;     /* boundary */
4952   OptStr  sm;     /* middle */
4953   OptStr  spr;    /* prec read (?=...) */
4954   OptMap  map;    /* boundary */
4955 } OptNode;
4956 
4957 
4958 static int
map_position_value(OnigEncoding enc,int i)4959 map_position_value(OnigEncoding enc, int i)
4960 {
4961   static const short int Vals[] = {
4962      5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
4963      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
4964     12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
4965      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
4966      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4967      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
4968      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4969      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
4970   };
4971 
4972   if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) {
4973     if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4974       return 20;
4975     else
4976       return (int )Vals[i];
4977   }
4978   else
4979     return 4;   /* Take it easy. */
4980 }
4981 
4982 static int
distance_value(MinMax * mm)4983 distance_value(MinMax* mm)
4984 {
4985   /* 1000 / (min-max-dist + 1) */
4986   static const short int dist_vals[] = {
4987     1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
4988       91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
4989       48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
4990       32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
4991       24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
4992       20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
4993       16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
4994       14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
4995       12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
4996       11,   11,   11,   11,   11,   10,   10,   10,   10,   10
4997   };
4998 
4999   OnigLen d;
5000 
5001   if (mm->max == INFINITE_LEN) return 0;
5002 
5003   d = mm->max - mm->min;
5004   if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0])))
5005     /* return dist_vals[d] * 16 / (mm->min + 12); */
5006     return (int )dist_vals[d];
5007   else
5008     return 1;
5009 }
5010 
5011 static int
comp_distance_value(MinMax * d1,MinMax * d2,int v1,int v2)5012 comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2)
5013 {
5014   if (v2 <= 0) return -1;
5015   if (v1 <= 0) return  1;
5016 
5017   v1 *= distance_value(d1);
5018   v2 *= distance_value(d2);
5019 
5020   if (v2 > v1) return  1;
5021   if (v2 < v1) return -1;
5022 
5023   if (d2->min < d1->min) return  1;
5024   if (d2->min > d1->min) return -1;
5025   return 0;
5026 }
5027 
5028 static int
is_equal_mml(MinMax * a,MinMax * b)5029 is_equal_mml(MinMax* a, MinMax* b)
5030 {
5031   return a->min == b->min && a->max == b->max;
5032 }
5033 
5034 static void
set_mml(MinMax * l,OnigLen min,OnigLen max)5035 set_mml(MinMax* l, OnigLen min, OnigLen max)
5036 {
5037   l->min = min;
5038   l->max = max;
5039 }
5040 
5041 static void
clear_mml(MinMax * l)5042 clear_mml(MinMax* l)
5043 {
5044   l->min = l->max = 0;
5045 }
5046 
5047 static void
copy_mml(MinMax * to,MinMax * from)5048 copy_mml(MinMax* to, MinMax* from)
5049 {
5050   to->min = from->min;
5051   to->max = from->max;
5052 }
5053 
5054 static void
add_mml(MinMax * to,MinMax * from)5055 add_mml(MinMax* to, MinMax* from)
5056 {
5057   to->min = distance_add(to->min, from->min);
5058   to->max = distance_add(to->max, from->max);
5059 }
5060 
5061 static void
alt_merge_mml(MinMax * to,MinMax * from)5062 alt_merge_mml(MinMax* to, MinMax* from)
5063 {
5064   if (to->min > from->min) to->min = from->min;
5065   if (to->max < from->max) to->max = from->max;
5066 }
5067 
5068 static void
copy_opt_env(OptEnv * to,OptEnv * from)5069 copy_opt_env(OptEnv* to, OptEnv* from)
5070 {
5071   *to = *from;
5072 }
5073 
5074 static void
clear_opt_anc_info(OptAnc * a)5075 clear_opt_anc_info(OptAnc* a)
5076 {
5077   a->left  = 0;
5078   a->right = 0;
5079 }
5080 
5081 static void
copy_opt_anc_info(OptAnc * to,OptAnc * from)5082 copy_opt_anc_info(OptAnc* to, OptAnc* from)
5083 {
5084   *to = *from;
5085 }
5086 
5087 static void
concat_opt_anc_info(OptAnc * to,OptAnc * left,OptAnc * right,OnigLen left_len,OnigLen right_len)5088 concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,
5089                     OnigLen left_len, OnigLen right_len)
5090 {
5091   clear_opt_anc_info(to);
5092 
5093   to->left = left->left;
5094   if (left_len == 0) {
5095     to->left |= right->left;
5096   }
5097 
5098   to->right = right->right;
5099   if (right_len == 0) {
5100     to->right |= left->right;
5101   }
5102   else {
5103     to->right |= (left->right & ANCR_PREC_READ_NOT);
5104   }
5105 }
5106 
5107 static int
is_left(int a)5108 is_left(int a)
5109 {
5110   if (a == ANCR_END_BUF  || a == ANCR_SEMI_END_BUF ||
5111       a == ANCR_END_LINE || a == ANCR_PREC_READ || a == ANCR_PREC_READ_NOT)
5112     return 0;
5113 
5114   return 1;
5115 }
5116 
5117 static int
is_set_opt_anc_info(OptAnc * to,int anc)5118 is_set_opt_anc_info(OptAnc* to, int anc)
5119 {
5120   if ((to->left & anc) != 0) return 1;
5121 
5122   return ((to->right & anc) != 0 ? 1 : 0);
5123 }
5124 
5125 static void
add_opt_anc_info(OptAnc * to,int anc)5126 add_opt_anc_info(OptAnc* to, int anc)
5127 {
5128   if (is_left(anc))
5129     to->left |= anc;
5130   else
5131     to->right |= anc;
5132 }
5133 
5134 static void
remove_opt_anc_info(OptAnc * to,int anc)5135 remove_opt_anc_info(OptAnc* to, int anc)
5136 {
5137   if (is_left(anc))
5138     to->left &= ~anc;
5139   else
5140     to->right &= ~anc;
5141 }
5142 
5143 static void
alt_merge_opt_anc_info(OptAnc * to,OptAnc * add)5144 alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)
5145 {
5146   to->left  &= add->left;
5147   to->right &= add->right;
5148 }
5149 
5150 static int
is_full_opt_exact(OptStr * e)5151 is_full_opt_exact(OptStr* e)
5152 {
5153   return e->len >= OPT_EXACT_MAXLEN;
5154 }
5155 
5156 static void
clear_opt_exact(OptStr * e)5157 clear_opt_exact(OptStr* e)
5158 {
5159   clear_mml(&e->mmd);
5160   clear_opt_anc_info(&e->anc);
5161   e->reach_end      = 0;
5162   e->case_fold      = 0;
5163   e->good_case_fold = 0;
5164   e->len            = 0;
5165   e->s[0]           = '\0';
5166 }
5167 
5168 static void
copy_opt_exact(OptStr * to,OptStr * from)5169 copy_opt_exact(OptStr* to, OptStr* from)
5170 {
5171   *to = *from;
5172 }
5173 
5174 static int
concat_opt_exact(OptStr * to,OptStr * add,OnigEncoding enc)5175 concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)
5176 {
5177   int i, j, len, r;
5178   UChar *p, *end;
5179   OptAnc tanc;
5180 
5181   if (add->case_fold != 0) {
5182     if (! to->case_fold) {
5183       if (to->len > 1 || to->len >= add->len) return 0;  /* avoid */
5184 
5185       to->case_fold = 1;
5186     }
5187     else {
5188       if (to->good_case_fold != 0) {
5189         if (add->good_case_fold == 0) return 0;
5190       }
5191     }
5192   }
5193 
5194   r = 0;
5195   p = add->s;
5196   end = p + add->len;
5197   for (i = to->len; p < end; ) {
5198     len = enclen(enc, p);
5199     if (i + len > OPT_EXACT_MAXLEN) {
5200       r = 1; /* 1:full */
5201       break;
5202     }
5203     for (j = 0; j < len && p < end; j++)
5204       to->s[i++] = *p++;
5205   }
5206 
5207   to->len = i;
5208   to->reach_end = (p == end ? add->reach_end : 0);
5209 
5210   concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
5211   if (! to->reach_end) tanc.right = 0;
5212   copy_opt_anc_info(&to->anc, &tanc);
5213 
5214   return r;
5215 }
5216 
5217 static void
concat_opt_exact_str(OptStr * to,UChar * s,UChar * end,OnigEncoding enc)5218 concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc)
5219 {
5220   int i, j, len;
5221   UChar *p;
5222 
5223   for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
5224     len = enclen(enc, p);
5225     if (i + len > OPT_EXACT_MAXLEN) break;
5226     for (j = 0; j < len && p < end; j++)
5227       to->s[i++] = *p++;
5228   }
5229 
5230   to->len = i;
5231 
5232   if (p >= end && to->len == (int )(end - s))
5233     to->reach_end = 1;
5234 }
5235 
5236 static void
alt_merge_opt_exact(OptStr * to,OptStr * add,OptEnv * env)5237 alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)
5238 {
5239   int i, j, len;
5240 
5241   if (add->len == 0 || to->len == 0) {
5242     clear_opt_exact(to);
5243     return ;
5244   }
5245 
5246   if (! is_equal_mml(&to->mmd, &add->mmd)) {
5247     clear_opt_exact(to);
5248     return ;
5249   }
5250 
5251   for (i = 0; i < to->len && i < add->len; ) {
5252     if (to->s[i] != add->s[i]) break;
5253     len = enclen(env->enc, to->s + i);
5254 
5255     for (j = 1; j < len; j++) {
5256       if (to->s[i+j] != add->s[i+j]) break;
5257     }
5258     if (j < len) break;
5259     i += len;
5260   }
5261 
5262   if (! add->reach_end || i < add->len || i < to->len) {
5263     to->reach_end = 0;
5264   }
5265   to->len = i;
5266   if (add->case_fold != 0)
5267     to->case_fold = 1;
5268   if (add->good_case_fold == 0)
5269     to->good_case_fold = 0;
5270 
5271   alt_merge_opt_anc_info(&to->anc, &add->anc);
5272   if (! to->reach_end) to->anc.right = 0;
5273 }
5274 
5275 static void
select_opt_exact(OnigEncoding enc,OptStr * now,OptStr * alt)5276 select_opt_exact(OnigEncoding enc, OptStr* now, OptStr* alt)
5277 {
5278   int vn, va;
5279 
5280   vn = now->len;
5281   va = alt->len;
5282 
5283   if (va == 0) {
5284     return ;
5285   }
5286   else if (vn == 0) {
5287     copy_opt_exact(now, alt);
5288     return ;
5289   }
5290   else if (vn <= 2 && va <= 2) {
5291     /* ByteValTable[x] is big value --> low price */
5292     va = map_position_value(enc, now->s[0]);
5293     vn = map_position_value(enc, alt->s[0]);
5294 
5295     if (now->len > 1) vn += 5;
5296     if (alt->len > 1) va += 5;
5297   }
5298 
5299   if (now->case_fold == 0) vn *= 2;
5300   if (alt->case_fold == 0) va *= 2;
5301 
5302   if (now->good_case_fold != 0) vn *= 4;
5303   if (alt->good_case_fold != 0) va *= 4;
5304 
5305   if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
5306     copy_opt_exact(now, alt);
5307 }
5308 
5309 static void
clear_opt_map(OptMap * map)5310 clear_opt_map(OptMap* map)
5311 {
5312   static const OptMap clean_info = {
5313     {0, 0}, {0, 0}, 0,
5314     {
5315       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5316       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5317       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5318       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5319       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5320       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5321       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5322       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5323       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5324       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5325       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5326       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5327       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5328       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5329       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5330       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
5331     }
5332   };
5333 
5334   xmemcpy(map, &clean_info, sizeof(OptMap));
5335 }
5336 
5337 static void
copy_opt_map(OptMap * to,OptMap * from)5338 copy_opt_map(OptMap* to, OptMap* from)
5339 {
5340   xmemcpy(to,from,sizeof(OptMap));
5341 }
5342 
5343 static void
add_char_opt_map(OptMap * m,UChar c,OnigEncoding enc)5344 add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc)
5345 {
5346   if (m->map[c] == 0) {
5347     m->map[c] = 1;
5348     m->value += map_position_value(enc, c);
5349   }
5350 }
5351 
5352 static int
add_char_amb_opt_map(OptMap * map,UChar * p,UChar * end,OnigEncoding enc,OnigCaseFoldType fold_flag)5353 add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end,
5354                      OnigEncoding enc, OnigCaseFoldType fold_flag)
5355 {
5356   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
5357   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
5358   int i, n;
5359 
5360   add_char_opt_map(map, p[0], enc);
5361 
5362   fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag);
5363   n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items);
5364   if (n < 0) return n;
5365 
5366   for (i = 0; i < n; i++) {
5367     ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
5368     add_char_opt_map(map, buf[0], enc);
5369   }
5370 
5371   return 0;
5372 }
5373 
5374 static void
select_opt_map(OptMap * now,OptMap * alt)5375 select_opt_map(OptMap* now, OptMap* alt)
5376 {
5377   static int z = 1<<15; /* 32768: something big value */
5378 
5379   int vn, va;
5380 
5381   if (alt->value == 0) return ;
5382   if (now->value == 0) {
5383     copy_opt_map(now, alt);
5384     return ;
5385   }
5386 
5387   vn = z / now->value;
5388   va = z / alt->value;
5389   if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
5390     copy_opt_map(now, alt);
5391 }
5392 
5393 static int
comp_opt_exact_or_map(OptStr * e,OptMap * m)5394 comp_opt_exact_or_map(OptStr* e, OptMap* m)
5395 {
5396 #define COMP_EM_BASE  20
5397   int ae, am;
5398   int case_value;
5399 
5400   if (m->value <= 0) return -1;
5401 
5402   if (e->case_fold != 0) {
5403     if (e->good_case_fold != 0)
5404       case_value = 2;
5405     else
5406       case_value = 1;
5407   }
5408   else
5409     case_value = 3;
5410 
5411   ae = COMP_EM_BASE * e->len * case_value;
5412   am = COMP_EM_BASE * 5 * 2 / m->value;
5413   return comp_distance_value(&e->mmd, &m->mmd, ae, am);
5414 }
5415 
5416 static void
alt_merge_opt_map(OnigEncoding enc,OptMap * to,OptMap * add)5417 alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
5418 {
5419   int i, val;
5420 
5421   /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
5422   if (to->value == 0) return ;
5423   if (add->value == 0 || to->mmd.max < add->mmd.min) {
5424     clear_opt_map(to);
5425     return ;
5426   }
5427 
5428   alt_merge_mml(&to->mmd, &add->mmd);
5429 
5430   val = 0;
5431   for (i = 0; i < CHAR_MAP_SIZE; i++) {
5432     if (add->map[i])
5433       to->map[i] = 1;
5434 
5435     if (to->map[i])
5436       val += map_position_value(enc, i);
5437   }
5438   to->value = val;
5439 
5440   alt_merge_opt_anc_info(&to->anc, &add->anc);
5441 }
5442 
5443 static void
set_bound_node_opt_info(OptNode * opt,MinMax * plen)5444 set_bound_node_opt_info(OptNode* opt, MinMax* plen)
5445 {
5446   copy_mml(&(opt->sb.mmd),  plen);
5447   copy_mml(&(opt->spr.mmd), plen);
5448   copy_mml(&(opt->map.mmd), plen);
5449 }
5450 
5451 static void
clear_node_opt_info(OptNode * opt)5452 clear_node_opt_info(OptNode* opt)
5453 {
5454   clear_mml(&opt->len);
5455   clear_opt_anc_info(&opt->anc);
5456   clear_opt_exact(&opt->sb);
5457   clear_opt_exact(&opt->sm);
5458   clear_opt_exact(&opt->spr);
5459   clear_opt_map(&opt->map);
5460 }
5461 
5462 static void
copy_node_opt_info(OptNode * to,OptNode * from)5463 copy_node_opt_info(OptNode* to, OptNode* from)
5464 {
5465   xmemcpy(to,from,sizeof(OptNode));
5466 }
5467 
5468 static void
concat_left_node_opt_info(OnigEncoding enc,OptNode * to,OptNode * add)5469 concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)
5470 {
5471   int sb_reach, sm_reach;
5472   OptAnc tanc;
5473 
5474   concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
5475   copy_opt_anc_info(&to->anc, &tanc);
5476 
5477   if (add->sb.len > 0 && to->len.max == 0) {
5478     concat_opt_anc_info(&tanc, &to->anc, &add->sb.anc, to->len.max, add->len.max);
5479     copy_opt_anc_info(&add->sb.anc, &tanc);
5480   }
5481 
5482   if (add->map.value > 0 && to->len.max == 0) {
5483     if (add->map.mmd.max == 0)
5484       add->map.anc.left |= to->anc.left;
5485   }
5486 
5487   sb_reach = to->sb.reach_end;
5488   sm_reach = to->sm.reach_end;
5489 
5490   if (add->len.max != 0)
5491     to->sb.reach_end = to->sm.reach_end = 0;
5492 
5493   if (add->sb.len > 0) {
5494     if (sb_reach) {
5495       concat_opt_exact(&to->sb, &add->sb, enc);
5496       clear_opt_exact(&add->sb);
5497     }
5498     else if (sm_reach) {
5499       concat_opt_exact(&to->sm, &add->sb, enc);
5500       clear_opt_exact(&add->sb);
5501     }
5502   }
5503   select_opt_exact(enc, &to->sm, &add->sb);
5504   select_opt_exact(enc, &to->sm, &add->sm);
5505 
5506   if (to->spr.len > 0) {
5507     if (add->len.max > 0) {
5508       if (to->spr.len > (int )add->len.max)
5509         to->spr.len = add->len.max;
5510 
5511       if (to->spr.mmd.max == 0)
5512         select_opt_exact(enc, &to->sb, &to->spr);
5513       else
5514         select_opt_exact(enc, &to->sm, &to->spr);
5515     }
5516   }
5517   else if (add->spr.len > 0) {
5518     copy_opt_exact(&to->spr, &add->spr);
5519   }
5520 
5521   select_opt_map(&to->map, &add->map);
5522   add_mml(&to->len, &add->len);
5523 }
5524 
5525 static void
alt_merge_node_opt_info(OptNode * to,OptNode * add,OptEnv * env)5526 alt_merge_node_opt_info(OptNode* to, OptNode* add, OptEnv* env)
5527 {
5528   alt_merge_opt_anc_info(&to->anc, &add->anc);
5529   alt_merge_opt_exact(&to->sb,  &add->sb, env);
5530   alt_merge_opt_exact(&to->sm,  &add->sm, env);
5531   alt_merge_opt_exact(&to->spr, &add->spr, env);
5532   alt_merge_opt_map(env->enc, &to->map, &add->map);
5533 
5534   alt_merge_mml(&to->len, &add->len);
5535 }
5536 
5537 
5538 #define MAX_NODE_OPT_INFO_REF_COUNT    5
5539 
5540 static int
optimize_nodes(Node * node,OptNode * opt,OptEnv * env)5541 optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
5542 {
5543   int i;
5544   int r;
5545   OptNode xo;
5546   OnigEncoding enc;
5547 
5548   r = 0;
5549   enc = env->enc;
5550   clear_node_opt_info(opt);
5551   set_bound_node_opt_info(opt, &env->mmd);
5552 
5553   switch (NODE_TYPE(node)) {
5554   case NODE_LIST:
5555     {
5556       OptEnv nenv;
5557       Node* nd = node;
5558 
5559       copy_opt_env(&nenv, env);
5560       do {
5561         r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);
5562         if (r == 0) {
5563           add_mml(&nenv.mmd, &xo.len);
5564           concat_left_node_opt_info(enc, opt, &xo);
5565         }
5566       } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));
5567     }
5568     break;
5569 
5570   case NODE_ALT:
5571     {
5572       Node* nd = node;
5573 
5574       do {
5575         r = optimize_nodes(NODE_CAR(nd), &xo, env);
5576         if (r == 0) {
5577           if (nd == node) copy_node_opt_info(opt, &xo);
5578           else            alt_merge_node_opt_info(opt, &xo, env);
5579         }
5580       } while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd)));
5581     }
5582     break;
5583 
5584   case NODE_STRING:
5585     {
5586       StrNode* sn = STR_(node);
5587       int slen = (int )(sn->end - sn->s);
5588       /* int is_raw = NODE_STRING_IS_RAW(node); */
5589 
5590       if (! NODE_STRING_IS_AMBIG(node)) {
5591         concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);
5592         if (slen > 0) {
5593           add_char_opt_map(&opt->map, *(sn->s), enc);
5594         }
5595         set_mml(&opt->len, slen, slen);
5596       }
5597       else {
5598         int max;
5599 
5600         if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) {
5601           int n = onigenc_strlen(enc, sn->s, sn->end);
5602           max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;
5603         }
5604         else {
5605           concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);
5606           opt->sb.case_fold = 1;
5607           if (NODE_STRING_IS_GOOD_AMBIG(node))
5608             opt->sb.good_case_fold = 1;
5609 
5610           if (slen > 0) {
5611             r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,
5612                                      enc, env->case_fold_flag);
5613             if (r != 0) break;
5614           }
5615 
5616           max = slen;
5617         }
5618 
5619         set_mml(&opt->len, slen, max);
5620       }
5621     }
5622     break;
5623 
5624   case NODE_CCLASS:
5625     {
5626       int z;
5627       CClassNode* cc = CCLASS_(node);
5628 
5629       /* no need to check ignore case. (set in setup_tree()) */
5630 
5631       if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5632         OnigLen min = ONIGENC_MBC_MINLEN(enc);
5633         OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc);
5634 
5635         set_mml(&opt->len, min, max);
5636       }
5637       else {
5638         for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5639           z = BITSET_AT(cc->bs, i);
5640           if ((z && ! IS_NCCLASS_NOT(cc)) || (! z && IS_NCCLASS_NOT(cc))) {
5641             add_char_opt_map(&opt->map, (UChar )i, enc);
5642           }
5643         }
5644         set_mml(&opt->len, 1, 1);
5645       }
5646     }
5647     break;
5648 
5649   case NODE_CTYPE:
5650     {
5651       int min, max;
5652       int range;
5653 
5654       max = ONIGENC_MBC_MAXLEN_DIST(enc);
5655 
5656       if (max == 1) {
5657         min = 1;
5658 
5659         switch (CTYPE_(node)->ctype) {
5660         case CTYPE_ANYCHAR:
5661           break;
5662 
5663         case ONIGENC_CTYPE_WORD:
5664           range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
5665           if (CTYPE_(node)->not != 0) {
5666             for (i = 0; i < range; i++) {
5667               if (! ONIGENC_IS_CODE_WORD(enc, i)) {
5668                 add_char_opt_map(&opt->map, (UChar )i, enc);
5669               }
5670             }
5671             for (i = range; i < SINGLE_BYTE_SIZE; i++) {
5672               add_char_opt_map(&opt->map, (UChar )i, enc);
5673             }
5674           }
5675           else {
5676             for (i = 0; i < range; i++) {
5677               if (ONIGENC_IS_CODE_WORD(enc, i)) {
5678                 add_char_opt_map(&opt->map, (UChar )i, enc);
5679               }
5680             }
5681           }
5682           break;
5683         }
5684       }
5685       else {
5686         min = ONIGENC_MBC_MINLEN(enc);
5687       }
5688       set_mml(&opt->len, min, max);
5689     }
5690     break;
5691 
5692   case NODE_ANCHOR:
5693     switch (ANCHOR_(node)->type) {
5694     case ANCR_BEGIN_BUF:
5695     case ANCR_BEGIN_POSITION:
5696     case ANCR_BEGIN_LINE:
5697     case ANCR_END_BUF:
5698     case ANCR_SEMI_END_BUF:
5699     case ANCR_END_LINE:
5700     case ANCR_PREC_READ_NOT:
5701     case ANCR_LOOK_BEHIND:
5702       add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);
5703       break;
5704 
5705     case ANCR_PREC_READ:
5706       {
5707         r = optimize_nodes(NODE_BODY(node), &xo, env);
5708         if (r == 0) {
5709           if (xo.sb.len > 0)
5710             copy_opt_exact(&opt->spr, &xo.sb);
5711           else if (xo.sm.len > 0)
5712             copy_opt_exact(&opt->spr, &xo.sm);
5713 
5714           opt->spr.reach_end = 0;
5715 
5716           if (xo.map.value > 0)
5717             copy_opt_map(&opt->map, &xo.map);
5718         }
5719       }
5720       break;
5721 
5722     case ANCR_LOOK_BEHIND_NOT:
5723       break;
5724     }
5725     break;
5726 
5727   case NODE_BACKREF:
5728     if (! NODE_IS_CHECKER(node)) {
5729       int* backs;
5730       OnigLen min, max, tmin, tmax;
5731       MemEnv* mem_env = SCANENV_MEMENV(env->scan_env);
5732       BackRefNode* br = BACKREF_(node);
5733 
5734       if (NODE_IS_RECURSION(node)) {
5735         set_mml(&opt->len, 0, INFINITE_LEN);
5736         break;
5737       }
5738       backs = BACKREFS_P(br);
5739       min = tree_min_len(mem_env[backs[0]].node, env->scan_env);
5740       max = tree_max_len(mem_env[backs[0]].node, env->scan_env);
5741       for (i = 1; i < br->back_num; i++) {
5742         tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env);
5743         tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env);
5744         if (min > tmin) min = tmin;
5745         if (max < tmax) max = tmax;
5746       }
5747       set_mml(&opt->len, min, max);
5748     }
5749     break;
5750 
5751 #ifdef USE_CALL
5752   case NODE_CALL:
5753     if (NODE_IS_RECURSION(node))
5754       set_mml(&opt->len, 0, INFINITE_LEN);
5755     else {
5756       OnigOptionType save = env->options;
5757       env->options = BAG_(NODE_BODY(node))->o.options;
5758       r = optimize_nodes(NODE_BODY(node), opt, env);
5759       env->options = save;
5760     }
5761     break;
5762 #endif
5763 
5764   case NODE_QUANT:
5765     {
5766       OnigLen min, max;
5767       QuantNode* qn = QUANT_(node);
5768 
5769       r = optimize_nodes(NODE_BODY(node), &xo, env);
5770       if (r != 0) break;
5771 
5772       if (qn->lower > 0) {
5773         copy_node_opt_info(opt, &xo);
5774         if (xo.sb.len > 0) {
5775           if (xo.sb.reach_end) {
5776             for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->sb); i++) {
5777               int rc = concat_opt_exact(&opt->sb, &xo.sb, enc);
5778               if (rc > 0) break;
5779             }
5780             if (i < qn->lower) opt->sb.reach_end = 0;
5781           }
5782         }
5783 
5784         if (qn->lower != qn->upper) {
5785           opt->sb.reach_end = 0;
5786           opt->sm.reach_end = 0;
5787         }
5788         if (qn->lower > 1)
5789           opt->sm.reach_end = 0;
5790       }
5791 
5792       if (IS_INFINITE_REPEAT(qn->upper)) {
5793         if (env->mmd.max == 0 &&
5794             NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {
5795           if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))
5796             add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_ML);
5797           else
5798             add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF);
5799         }
5800 
5801         max = (xo.len.max > 0 ? INFINITE_LEN : 0);
5802       }
5803       else {
5804         max = distance_multiply(xo.len.max, qn->upper);
5805       }
5806 
5807       min = distance_multiply(xo.len.min, qn->lower);
5808       set_mml(&opt->len, min, max);
5809     }
5810     break;
5811 
5812   case NODE_BAG:
5813     {
5814       BagNode* en = BAG_(node);
5815 
5816       switch (en->type) {
5817       case BAG_OPTION:
5818         {
5819           OnigOptionType save = env->options;
5820 
5821           env->options = en->o.options;
5822           r = optimize_nodes(NODE_BODY(node), opt, env);
5823           env->options = save;
5824         }
5825         break;
5826 
5827       case BAG_MEMORY:
5828 #ifdef USE_CALL
5829         en->opt_count++;
5830         if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5831           OnigLen min, max;
5832 
5833           min = 0;
5834           max = INFINITE_LEN;
5835           if (NODE_IS_MIN_FIXED(node)) min = en->min_len;
5836           if (NODE_IS_MAX_FIXED(node)) max = en->max_len;
5837           set_mml(&opt->len, min, max);
5838         }
5839         else
5840 #endif
5841           {
5842             r = optimize_nodes(NODE_BODY(node), opt, env);
5843             if (is_set_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK)) {
5844               if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))
5845                 remove_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK);
5846             }
5847           }
5848         break;
5849 
5850       case BAG_STOP_BACKTRACK:
5851         r = optimize_nodes(NODE_BODY(node), opt, env);
5852         break;
5853 
5854       case BAG_IF_ELSE:
5855         {
5856           OptEnv nenv;
5857 
5858           copy_opt_env(&nenv, env);
5859           r = optimize_nodes(NODE_BAG_BODY(en), &xo, &nenv);
5860           if (r == 0) {
5861             add_mml(&nenv.mmd, &xo.len);
5862             concat_left_node_opt_info(enc, opt, &xo);
5863             if (IS_NOT_NULL(en->te.Then)) {
5864               r = optimize_nodes(en->te.Then, &xo, &nenv);
5865               if (r == 0) {
5866                 concat_left_node_opt_info(enc, opt, &xo);
5867               }
5868             }
5869 
5870             if (IS_NOT_NULL(en->te.Else)) {
5871               r = optimize_nodes(en->te.Else, &xo, env);
5872               if (r == 0)
5873                 alt_merge_node_opt_info(opt, &xo, env);
5874             }
5875           }
5876         }
5877         break;
5878       }
5879     }
5880     break;
5881 
5882   case NODE_GIMMICK:
5883     break;
5884 
5885   default:
5886 #ifdef ONIG_DEBUG
5887     fprintf(stderr, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node));
5888 #endif
5889     r = ONIGERR_TYPE_BUG;
5890     break;
5891   }
5892 
5893   return r;
5894 }
5895 
5896 static int
set_optimize_exact(regex_t * reg,OptStr * e)5897 set_optimize_exact(regex_t* reg, OptStr* e)
5898 {
5899   int r;
5900 
5901   if (e->len == 0) return 0;
5902 
5903   reg->exact = (UChar* )xmalloc(e->len);
5904   CHECK_NULL_RETURN_MEMERR(reg->exact);
5905   xmemcpy(reg->exact, e->s, e->len);
5906   reg->exact_end = reg->exact + e->len;
5907 
5908   if (e->case_fold) {
5909     reg->optimize = OPTIMIZE_STR_CASE_FOLD;
5910     if (e->good_case_fold != 0) {
5911       if (e->len >= 2) {
5912         r = set_sunday_quick_search_or_bmh_skip_table(reg, 1,
5913                              reg->exact, reg->exact_end,
5914                              reg->map, &(reg->map_offset));
5915         if (r != 0) return r;
5916         reg->optimize = OPTIMIZE_STR_CASE_FOLD_FAST;
5917       }
5918     }
5919   }
5920   else {
5921     int allow_reverse;
5922 
5923     allow_reverse =
5924       ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5925 
5926     if (e->len >= 2 || (e->len >= 1 && allow_reverse)) {
5927       r = set_sunday_quick_search_or_bmh_skip_table(reg, 0,
5928                                          reg->exact, reg->exact_end,
5929                                          reg->map, &(reg->map_offset));
5930       if (r != 0) return r;
5931 
5932       reg->optimize = (allow_reverse != 0
5933                        ? OPTIMIZE_STR_FAST
5934                        : OPTIMIZE_STR_FAST_STEP_FORWARD);
5935     }
5936     else {
5937       reg->optimize = OPTIMIZE_STR;
5938     }
5939   }
5940 
5941   reg->dmin = e->mmd.min;
5942   reg->dmax = e->mmd.max;
5943 
5944   if (reg->dmin != INFINITE_LEN) {
5945     reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact);
5946   }
5947 
5948   return 0;
5949 }
5950 
5951 static void
set_optimize_map(regex_t * reg,OptMap * m)5952 set_optimize_map(regex_t* reg, OptMap* m)
5953 {
5954   int i;
5955 
5956   for (i = 0; i < CHAR_MAP_SIZE; i++)
5957     reg->map[i] = m->map[i];
5958 
5959   reg->optimize   = OPTIMIZE_MAP;
5960   reg->dmin       = m->mmd.min;
5961   reg->dmax       = m->mmd.max;
5962 
5963   if (reg->dmin != INFINITE_LEN) {
5964     reg->threshold_len = reg->dmin + 1;
5965   }
5966 }
5967 
5968 static void
set_sub_anchor(regex_t * reg,OptAnc * anc)5969 set_sub_anchor(regex_t* reg, OptAnc* anc)
5970 {
5971   reg->sub_anchor |= anc->left  & ANCR_BEGIN_LINE;
5972   reg->sub_anchor |= anc->right & ANCR_END_LINE;
5973 }
5974 
5975 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5976 static void print_optimize_info(FILE* f, regex_t* reg);
5977 #endif
5978 
5979 static int
set_optimize_info_from_tree(Node * node,regex_t * reg,ScanEnv * scan_env)5980 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5981 {
5982   int r;
5983   OptNode opt;
5984   OptEnv env;
5985 
5986   env.enc            = reg->enc;
5987   env.options        = reg->options;
5988   env.case_fold_flag = reg->case_fold_flag;
5989   env.scan_env       = scan_env;
5990   clear_mml(&env.mmd);
5991 
5992   r = optimize_nodes(node, &opt, &env);
5993   if (r != 0) return r;
5994 
5995   reg->anchor = opt.anc.left & (ANCR_BEGIN_BUF |
5996         ANCR_BEGIN_POSITION | ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML |
5997         ANCR_LOOK_BEHIND);
5998 
5999   if ((opt.anc.left & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) != 0)
6000     reg->anchor &= ~ANCR_ANYCHAR_INF_ML;
6001 
6002   reg->anchor |= opt.anc.right & (ANCR_END_BUF | ANCR_SEMI_END_BUF |
6003                                   ANCR_PREC_READ_NOT);
6004 
6005   if (reg->anchor & (ANCR_END_BUF | ANCR_SEMI_END_BUF)) {
6006     reg->anchor_dmin = opt.len.min;
6007     reg->anchor_dmax = opt.len.max;
6008   }
6009 
6010   if (opt.sb.len > 0 || opt.sm.len > 0) {
6011     select_opt_exact(reg->enc, &opt.sb, &opt.sm);
6012     if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.sb, &opt.map) > 0) {
6013       goto set_map;
6014     }
6015     else {
6016       r = set_optimize_exact(reg, &opt.sb);
6017       set_sub_anchor(reg, &opt.sb.anc);
6018     }
6019   }
6020   else if (opt.map.value > 0) {
6021   set_map:
6022     set_optimize_map(reg, &opt.map);
6023     set_sub_anchor(reg, &opt.map.anc);
6024   }
6025   else {
6026     reg->sub_anchor |= opt.anc.left & ANCR_BEGIN_LINE;
6027     if (opt.len.max == 0)
6028       reg->sub_anchor |= opt.anc.right & ANCR_END_LINE;
6029   }
6030 
6031 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
6032   print_optimize_info(stderr, reg);
6033 #endif
6034   return r;
6035 }
6036 
6037 static void
clear_optimize_info(regex_t * reg)6038 clear_optimize_info(regex_t* reg)
6039 {
6040   reg->optimize      = OPTIMIZE_NONE;
6041   reg->anchor        = 0;
6042   reg->anchor_dmin   = 0;
6043   reg->anchor_dmax   = 0;
6044   reg->sub_anchor    = 0;
6045   reg->exact_end     = (UChar* )NULL;
6046   reg->map_offset    = 0;
6047   reg->threshold_len = 0;
6048   if (IS_NOT_NULL(reg->exact)) {
6049     xfree(reg->exact);
6050     reg->exact = (UChar* )NULL;
6051   }
6052 }
6053 
6054 #ifdef ONIG_DEBUG
6055 
print_enc_string(FILE * fp,OnigEncoding enc,const UChar * s,const UChar * end)6056 static void print_enc_string(FILE* fp, OnigEncoding enc,
6057                              const UChar *s, const UChar *end)
6058 {
6059   fprintf(fp, "\nPATTERN: /");
6060 
6061   if (ONIGENC_MBC_MINLEN(enc) > 1) {
6062     const UChar *p;
6063     OnigCodePoint code;
6064 
6065     p = s;
6066     while (p < end) {
6067       code = ONIGENC_MBC_TO_CODE(enc, p, end);
6068       if (code >= 0x80) {
6069         fprintf(fp, " 0x%04x ", (int )code);
6070       }
6071       else {
6072         fputc((int )code, fp);
6073       }
6074 
6075       p += enclen(enc, p);
6076     }
6077   }
6078   else {
6079     while (s < end) {
6080       fputc((int )*s, fp);
6081       s++;
6082     }
6083   }
6084 
6085   fprintf(fp, "/\n");
6086 }
6087 
6088 #endif /* ONIG_DEBUG */
6089 
6090 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
6091 
6092 static void
print_distance_range(FILE * f,OnigLen a,OnigLen b)6093 print_distance_range(FILE* f, OnigLen a, OnigLen b)
6094 {
6095   if (a == INFINITE_LEN)
6096     fputs("inf", f);
6097   else
6098     fprintf(f, "(%u)", a);
6099 
6100   fputs("-", f);
6101 
6102   if (b == INFINITE_LEN)
6103     fputs("inf", f);
6104   else
6105     fprintf(f, "(%u)", b);
6106 }
6107 
6108 static void
print_anchor(FILE * f,int anchor)6109 print_anchor(FILE* f, int anchor)
6110 {
6111   int q = 0;
6112 
6113   fprintf(f, "[");
6114 
6115   if (anchor & ANCR_BEGIN_BUF) {
6116     fprintf(f, "begin-buf");
6117     q = 1;
6118   }
6119   if (anchor & ANCR_BEGIN_LINE) {
6120     if (q) fprintf(f, ", ");
6121     q = 1;
6122     fprintf(f, "begin-line");
6123   }
6124   if (anchor & ANCR_BEGIN_POSITION) {
6125     if (q) fprintf(f, ", ");
6126     q = 1;
6127     fprintf(f, "begin-pos");
6128   }
6129   if (anchor & ANCR_END_BUF) {
6130     if (q) fprintf(f, ", ");
6131     q = 1;
6132     fprintf(f, "end-buf");
6133   }
6134   if (anchor & ANCR_SEMI_END_BUF) {
6135     if (q) fprintf(f, ", ");
6136     q = 1;
6137     fprintf(f, "semi-end-buf");
6138   }
6139   if (anchor & ANCR_END_LINE) {
6140     if (q) fprintf(f, ", ");
6141     q = 1;
6142     fprintf(f, "end-line");
6143   }
6144   if (anchor & ANCR_ANYCHAR_INF) {
6145     if (q) fprintf(f, ", ");
6146     q = 1;
6147     fprintf(f, "anychar-inf");
6148   }
6149   if (anchor & ANCR_ANYCHAR_INF_ML) {
6150     if (q) fprintf(f, ", ");
6151     fprintf(f, "anychar-inf-ml");
6152   }
6153 
6154   fprintf(f, "]");
6155 }
6156 
6157 static void
print_optimize_info(FILE * f,regex_t * reg)6158 print_optimize_info(FILE* f, regex_t* reg)
6159 {
6160   static const char* on[] = { "NONE", "STR",
6161                               "STR_FAST", "STR_FAST_STEP_FORWARD",
6162                               "STR_CASE_FOLD_FAST", "STR_CASE_FOLD", "MAP" };
6163 
6164   fprintf(f, "optimize: %s\n", on[reg->optimize]);
6165   fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
6166   if ((reg->anchor & ANCR_END_BUF_MASK) != 0)
6167     print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
6168   fprintf(f, "\n");
6169 
6170   if (reg->optimize) {
6171     fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
6172     fprintf(f, "\n");
6173   }
6174   fprintf(f, "\n");
6175 
6176   if (reg->exact) {
6177     UChar *p;
6178     fprintf(f, "exact: [");
6179     for (p = reg->exact; p < reg->exact_end; p++) {
6180       fputc(*p, f);
6181     }
6182     fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
6183   }
6184   else if (reg->optimize & OPTIMIZE_MAP) {
6185     int c, i, n = 0;
6186 
6187     for (i = 0; i < CHAR_MAP_SIZE; i++)
6188       if (reg->map[i]) n++;
6189 
6190     fprintf(f, "map: n=%d\n", n);
6191     if (n > 0) {
6192       c = 0;
6193       fputc('[', f);
6194       for (i = 0; i < CHAR_MAP_SIZE; i++) {
6195         if (reg->map[i] != 0) {
6196           if (c > 0)  fputs(", ", f);
6197           c++;
6198           if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
6199               ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
6200             fputc(i, f);
6201           else
6202             fprintf(f, "%d", i);
6203         }
6204       }
6205       fprintf(f, "]\n");
6206     }
6207   }
6208 }
6209 #endif
6210 
6211 
6212 extern RegexExt*
onig_get_regex_ext(regex_t * reg)6213 onig_get_regex_ext(regex_t* reg)
6214 {
6215   if (IS_NULL(reg->extp)) {
6216     RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));
6217     if (IS_NULL(ext)) return 0;
6218 
6219     ext->pattern      = 0;
6220     ext->pattern_end  = 0;
6221 #ifdef USE_CALLOUT
6222     ext->tag_table    = 0;
6223     ext->callout_num  = 0;
6224     ext->callout_list_alloc = 0;
6225     ext->callout_list = 0;
6226 #endif
6227 
6228     reg->extp = ext;
6229   }
6230 
6231   return reg->extp;
6232 }
6233 
6234 static void
free_regex_ext(RegexExt * ext)6235 free_regex_ext(RegexExt* ext)
6236 {
6237   if (IS_NOT_NULL(ext)) {
6238     if (IS_NOT_NULL(ext->pattern))
6239       xfree((void* )ext->pattern);
6240 
6241 #ifdef USE_CALLOUT
6242     if (IS_NOT_NULL(ext->tag_table))
6243       onig_callout_tag_table_free(ext->tag_table);
6244 
6245     if (IS_NOT_NULL(ext->callout_list))
6246       onig_free_reg_callout_list(ext->callout_num, ext->callout_list);
6247 #endif
6248 
6249     xfree(ext);
6250   }
6251 }
6252 
6253 extern int
onig_ext_set_pattern(regex_t * reg,const UChar * pattern,const UChar * pattern_end)6254 onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end)
6255 {
6256   RegexExt* ext;
6257   UChar* s;
6258 
6259   ext = onig_get_regex_ext(reg);
6260   CHECK_NULL_RETURN_MEMERR(ext);
6261 
6262   s = onigenc_strdup(reg->enc, pattern, pattern_end);
6263   CHECK_NULL_RETURN_MEMERR(s);
6264 
6265   ext->pattern     = s;
6266   ext->pattern_end = s + (pattern_end - pattern);
6267 
6268   return ONIG_NORMAL;
6269 }
6270 
6271 extern void
onig_free_body(regex_t * reg)6272 onig_free_body(regex_t* reg)
6273 {
6274   if (IS_NOT_NULL(reg)) {
6275     ops_free(reg);
6276     if (IS_NOT_NULL(reg->string_pool)) {
6277       xfree(reg->string_pool);
6278       reg->string_pool_end = reg->string_pool = 0;
6279     }
6280     if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
6281     if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
6282     if (IS_NOT_NULL(reg->extp)) {
6283       free_regex_ext(reg->extp);
6284       reg->extp = 0;
6285     }
6286 
6287     onig_names_free(reg);
6288   }
6289 }
6290 
6291 extern void
onig_free(regex_t * reg)6292 onig_free(regex_t* reg)
6293 {
6294   if (IS_NOT_NULL(reg)) {
6295     onig_free_body(reg);
6296     xfree(reg);
6297   }
6298 }
6299 
6300 
6301 #ifdef ONIG_DEBUG_PARSE
6302 static void print_tree P_((FILE* f, Node* node));
6303 #endif
6304 
6305 extern int onig_init_for_match_at(regex_t* reg);
6306 
6307 extern int
onig_compile(regex_t * reg,const UChar * pattern,const UChar * pattern_end,OnigErrorInfo * einfo)6308 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
6309              OnigErrorInfo* einfo)
6310 {
6311   int r;
6312   Node*  root;
6313   ScanEnv  scan_env;
6314 #ifdef USE_CALL
6315   UnsetAddrList  uslist;
6316 #endif
6317 
6318   root = 0;
6319   if (IS_NOT_NULL(einfo)) {
6320     einfo->enc = reg->enc;
6321     einfo->par = (UChar* )NULL;
6322   }
6323 
6324 #ifdef ONIG_DEBUG
6325   print_enc_string(stderr, reg->enc, pattern, pattern_end);
6326 #endif
6327 
6328   if (reg->ops_alloc == 0) {
6329     r = ops_init(reg, OPS_INIT_SIZE);
6330     if (r != 0) goto end;
6331   }
6332   else
6333     reg->ops_used = 0;
6334 
6335   reg->string_pool        = 0;
6336   reg->string_pool_end    = 0;
6337   reg->num_mem            = 0;
6338   reg->num_repeat         = 0;
6339   reg->num_null_check     = 0;
6340   reg->repeat_range_alloc = 0;
6341   reg->repeat_range       = (OnigRepeatRange* )NULL;
6342 
6343   r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);
6344   if (r != 0) goto err;
6345 
6346   /* mixed use named group and no-named group */
6347   if (scan_env.num_named > 0 &&
6348       IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
6349       ! ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
6350     if (scan_env.num_named != scan_env.num_mem)
6351       r = disable_noname_group_capture(&root, reg, &scan_env);
6352     else
6353       r = numbered_ref_check(root);
6354 
6355     if (r != 0) goto err;
6356   }
6357 
6358   r = check_backrefs(root, &scan_env);
6359   if (r != 0) goto err;
6360 
6361 #ifdef USE_CALL
6362   if (scan_env.num_call > 0) {
6363     r = unset_addr_list_init(&uslist, scan_env.num_call);
6364     if (r != 0) goto err;
6365     scan_env.unset_addr_list = &uslist;
6366     r = setup_call(root, &scan_env, 0);
6367     if (r != 0) goto err_unset;
6368     r = setup_call2(root);
6369     if (r != 0) goto err_unset;
6370     r = recursive_call_check_trav(root, &scan_env, 0);
6371     if (r  < 0) goto err_unset;
6372     r = infinite_recursive_call_check_trav(root, &scan_env);
6373     if (r != 0) goto err_unset;
6374 
6375     setup_called_state(root, 0);
6376   }
6377 
6378   reg->num_call = scan_env.num_call;
6379 #endif
6380 
6381   r = setup_tree(root, reg, 0, &scan_env);
6382   if (r != 0) goto err_unset;
6383 
6384 #ifdef ONIG_DEBUG_PARSE
6385   print_tree(stderr, root);
6386 #endif
6387 
6388   reg->capture_history  = scan_env.capture_history;
6389   reg->bt_mem_start     = scan_env.bt_mem_start;
6390   reg->bt_mem_start    |= reg->capture_history;
6391   if (IS_FIND_CONDITION(reg->options))
6392     MEM_STATUS_ON_ALL(reg->bt_mem_end);
6393   else {
6394     reg->bt_mem_end  = scan_env.bt_mem_end;
6395     reg->bt_mem_end |= reg->capture_history;
6396   }
6397   reg->bt_mem_start |= reg->bt_mem_end;
6398 
6399   clear_optimize_info(reg);
6400 #ifndef ONIG_DONT_OPTIMIZE
6401   r = set_optimize_info_from_tree(root, reg, &scan_env);
6402   if (r != 0) goto err_unset;
6403 #endif
6404 
6405   if (IS_NOT_NULL(scan_env.mem_env_dynamic)) {
6406     xfree(scan_env.mem_env_dynamic);
6407     scan_env.mem_env_dynamic = (MemEnv* )NULL;
6408   }
6409 
6410   r = compile_tree(root, reg, &scan_env);
6411   if (r == 0) {
6412     if (scan_env.keep_num > 0) {
6413       r = add_op(reg, OP_UPDATE_VAR);
6414       if (r != 0) goto err;
6415 
6416       COP(reg)->update_var.type = UPDATE_VAR_KEEP_FROM_STACK_LAST;
6417       COP(reg)->update_var.id   = 0; /* not used */
6418     }
6419 
6420     r = add_op(reg, OP_END);
6421     if (r != 0) goto err;
6422 
6423 #ifdef USE_CALL
6424     if (scan_env.num_call > 0) {
6425       r = fix_unset_addr_list(&uslist, reg);
6426       unset_addr_list_end(&uslist);
6427       if (r != 0) goto err;
6428     }
6429 #endif
6430 
6431     if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)
6432 #ifdef USE_CALLOUT
6433         || (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0)
6434 #endif
6435         )
6436       reg->stack_pop_level = STACK_POP_LEVEL_ALL;
6437     else {
6438       if (reg->bt_mem_start != 0)
6439         reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
6440       else
6441         reg->stack_pop_level = STACK_POP_LEVEL_FREE;
6442     }
6443 
6444     r = ops_make_string_pool(reg);
6445     if (r != 0) goto err;
6446   }
6447 #ifdef USE_CALL
6448   else if (scan_env.num_call > 0) {
6449     unset_addr_list_end(&uslist);
6450   }
6451 #endif
6452   onig_node_free(root);
6453 
6454 #ifdef ONIG_DEBUG_COMPILE
6455   onig_print_names(stderr, reg);
6456   onig_print_compiled_byte_code_list(stderr, reg);
6457 #endif
6458 
6459 #ifdef USE_DIRECT_THREADED_CODE
6460   /* opcode -> opaddr */
6461   onig_init_for_match_at(reg);
6462 #endif
6463 
6464  end:
6465   return r;
6466 
6467  err_unset:
6468 #ifdef USE_CALL
6469   if (scan_env.num_call > 0) {
6470     unset_addr_list_end(&uslist);
6471   }
6472 #endif
6473  err:
6474   if (IS_NOT_NULL(scan_env.error)) {
6475     if (IS_NOT_NULL(einfo)) {
6476       einfo->par     = scan_env.error;
6477       einfo->par_end = scan_env.error_end;
6478     }
6479   }
6480 
6481   onig_node_free(root);
6482   if (IS_NOT_NULL(scan_env.mem_env_dynamic))
6483       xfree(scan_env.mem_env_dynamic);
6484   return r;
6485 }
6486 
6487 
6488 static int onig_inited = 0;
6489 
6490 extern int
onig_reg_init(regex_t * reg,OnigOptionType option,OnigCaseFoldType case_fold_flag,OnigEncoding enc,OnigSyntaxType * syntax)6491 onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag,
6492               OnigEncoding enc, OnigSyntaxType* syntax)
6493 {
6494   int r;
6495 
6496   xmemset(reg, 0, sizeof(*reg));
6497 
6498   if (onig_inited == 0) {
6499 #if 0
6500     return ONIGERR_LIBRARY_IS_NOT_INITIALIZED;
6501 #else
6502     r = onig_initialize(&enc, 1);
6503     if (r != 0)
6504       return ONIGERR_FAIL_TO_INITIALIZE;
6505 
6506     onig_warning("You didn't call onig_initialize() explicitly");
6507 #endif
6508   }
6509 
6510   if (IS_NULL(reg))
6511     return ONIGERR_INVALID_ARGUMENT;
6512 
6513   if (ONIGENC_IS_UNDEF(enc))
6514     return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
6515 
6516   if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
6517       == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
6518     return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
6519   }
6520 
6521   if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
6522     option |= syntax->options;
6523     option &= ~ONIG_OPTION_SINGLELINE;
6524   }
6525   else
6526     option |= syntax->options;
6527 
6528   (reg)->enc              = enc;
6529   (reg)->options          = option;
6530   (reg)->syntax           = syntax;
6531   (reg)->optimize         = 0;
6532   (reg)->exact            = (UChar* )NULL;
6533   (reg)->extp             = (RegexExt* )NULL;
6534 
6535   (reg)->ops              = (Operation* )NULL;
6536   (reg)->ops_curr         = (Operation* )NULL;
6537   (reg)->ops_used         = 0;
6538   (reg)->ops_alloc        = 0;
6539   (reg)->name_table       = (void* )NULL;
6540 
6541   (reg)->case_fold_flag   = case_fold_flag;
6542   return 0;
6543 }
6544 
6545 extern int
onig_new_without_alloc(regex_t * reg,const UChar * pattern,const UChar * pattern_end,OnigOptionType option,OnigEncoding enc,OnigSyntaxType * syntax,OnigErrorInfo * einfo)6546 onig_new_without_alloc(regex_t* reg,
6547                        const UChar* pattern, const UChar* pattern_end,
6548                        OnigOptionType option, OnigEncoding enc,
6549                        OnigSyntaxType* syntax, OnigErrorInfo* einfo)
6550 {
6551   int r;
6552 
6553   r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6554   if (r != 0) return r;
6555 
6556   r = onig_compile(reg, pattern, pattern_end, einfo);
6557   return r;
6558 }
6559 
6560 extern int
onig_new(regex_t ** reg,const UChar * pattern,const UChar * pattern_end,OnigOptionType option,OnigEncoding enc,OnigSyntaxType * syntax,OnigErrorInfo * einfo)6561 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
6562          OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
6563          OnigErrorInfo* einfo)
6564 {
6565   int r;
6566 
6567   *reg = (regex_t* )xmalloc(sizeof(regex_t));
6568   if (IS_NULL(*reg)) return ONIGERR_MEMORY;
6569 
6570   r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6571   if (r != 0) goto err;
6572 
6573   r = onig_compile(*reg, pattern, pattern_end, einfo);
6574   if (r != 0) {
6575   err:
6576     onig_free(*reg);
6577     *reg = NULL;
6578   }
6579   return r;
6580 }
6581 
6582 extern int
onig_initialize(OnigEncoding encodings[],int n)6583 onig_initialize(OnigEncoding encodings[], int n)
6584 {
6585   int i;
6586   int r;
6587 
6588   if (onig_inited != 0)
6589     return 0;
6590 
6591   onigenc_init();
6592 
6593   onig_inited = 1;
6594 
6595   for (i = 0; i < n; i++) {
6596     OnigEncoding enc = encodings[i];
6597     r = onig_initialize_encoding(enc);
6598     if (r != 0)
6599       return r;
6600   }
6601 
6602   return ONIG_NORMAL;
6603 }
6604 
6605 typedef struct EndCallListItem {
6606   struct EndCallListItem* next;
6607   void (*func)(void);
6608 } EndCallListItemType;
6609 
6610 static EndCallListItemType* EndCallTop;
6611 
onig_add_end_call(void (* func)(void))6612 extern void onig_add_end_call(void (*func)(void))
6613 {
6614   EndCallListItemType* item;
6615 
6616   item = (EndCallListItemType* )xmalloc(sizeof(*item));
6617   if (item == 0) return ;
6618 
6619   item->next = EndCallTop;
6620   item->func = func;
6621 
6622   EndCallTop = item;
6623 }
6624 
6625 static void
exec_end_call_list(void)6626 exec_end_call_list(void)
6627 {
6628   EndCallListItemType* prev;
6629   void (*func)(void);
6630 
6631   while (EndCallTop != 0) {
6632     func = EndCallTop->func;
6633     (*func)();
6634 
6635     prev = EndCallTop;
6636     EndCallTop = EndCallTop->next;
6637     xfree(prev);
6638   }
6639 }
6640 
6641 extern int
onig_end(void)6642 onig_end(void)
6643 {
6644   exec_end_call_list();
6645 
6646 #ifdef USE_CALLOUT
6647   onig_global_callout_names_free();
6648 #endif
6649 
6650   onigenc_end();
6651 
6652   onig_inited = 0;
6653 
6654   return 0;
6655 }
6656 
6657 extern int
onig_is_in_code_range(const UChar * p,OnigCodePoint code)6658 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
6659 {
6660   OnigCodePoint n, *data;
6661   OnigCodePoint low, high, x;
6662 
6663   GET_CODE_POINT(n, p);
6664   data = (OnigCodePoint* )p;
6665   data++;
6666 
6667   for (low = 0, high = n; low < high; ) {
6668     x = (low + high) >> 1;
6669     if (code > data[x * 2 + 1])
6670       low = x + 1;
6671     else
6672       high = x;
6673   }
6674 
6675   return ((low < n && code >= data[low * 2]) ? 1 : 0);
6676 }
6677 
6678 extern int
onig_is_code_in_cc_len(int elen,OnigCodePoint code,void * cc_arg)6679 onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_arg)
6680 {
6681   int found;
6682   CClassNode* cc = (CClassNode* )cc_arg;
6683 
6684   if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6685     if (IS_NULL(cc->mbuf)) {
6686       found = 0;
6687     }
6688     else {
6689       found = onig_is_in_code_range(cc->mbuf->p, code) != 0;
6690     }
6691   }
6692   else {
6693     found = BITSET_AT(cc->bs, code) != 0;
6694   }
6695 
6696   if (IS_NCCLASS_NOT(cc))
6697     return !found;
6698   else
6699     return found;
6700 }
6701 
6702 extern int
onig_is_code_in_cc(OnigEncoding enc,OnigCodePoint code,CClassNode * cc)6703 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6704 {
6705   int len;
6706 
6707   if (ONIGENC_MBC_MINLEN(enc) > 1) {
6708     len = 2;
6709   }
6710   else {
6711     len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6712     if (len < 0) return 0;
6713   }
6714   return onig_is_code_in_cc_len(len, code, cc);
6715 }
6716 
6717 
6718 #ifdef ONIG_DEBUG_PARSE
6719 
6720 static void
p_string(FILE * f,int len,UChar * s)6721 p_string(FILE* f, int len, UChar* s)
6722 {
6723   fputs(":", f);
6724   while (len-- > 0) { fputc(*s++, f); }
6725 }
6726 
6727 static void
Indent(FILE * f,int indent)6728 Indent(FILE* f, int indent)
6729 {
6730   int i;
6731   for (i = 0; i < indent; i++) putc(' ', f);
6732 }
6733 
6734 static void
print_indent_tree(FILE * f,Node * node,int indent)6735 print_indent_tree(FILE* f, Node* node, int indent)
6736 {
6737   int i;
6738   NodeType type;
6739   UChar* p;
6740   int add = 3;
6741 
6742   Indent(f, indent);
6743   if (IS_NULL(node)) {
6744     fprintf(f, "ERROR: null node!!!\n");
6745     exit (0);
6746   }
6747 
6748   type = NODE_TYPE(node);
6749   switch (type) {
6750   case NODE_LIST:
6751   case NODE_ALT:
6752     if (type == NODE_LIST)
6753       fprintf(f, "<list:%p>\n", node);
6754     else
6755       fprintf(f, "<alt:%p>\n", node);
6756 
6757     print_indent_tree(f, NODE_CAR(node), indent + add);
6758     while (IS_NOT_NULL(node = NODE_CDR(node))) {
6759       if (NODE_TYPE(node) != type) {
6760         fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NODE_TYPE(node));
6761         exit(0);
6762       }
6763       print_indent_tree(f, NODE_CAR(node), indent + add);
6764     }
6765     break;
6766 
6767   case NODE_STRING:
6768     {
6769       char* mode;
6770       char* dont;
6771       char* good;
6772 
6773       if (NODE_STRING_IS_RAW(node))
6774         mode = "-raw";
6775       else if (NODE_STRING_IS_AMBIG(node))
6776         mode = "-ambig";
6777       else
6778         mode = "";
6779 
6780       if (NODE_STRING_IS_GOOD_AMBIG(node))
6781         good = "-good";
6782       else
6783         good = "";
6784 
6785       if (NODE_STRING_IS_DONT_GET_OPT_INFO(node))
6786         dont = " (dont-opt)";
6787       else
6788         dont = "";
6789 
6790       fprintf(f, "<string%s%s%s:%p>", mode, good, dont, node);
6791       for (p = STR_(node)->s; p < STR_(node)->end; p++) {
6792         if (*p >= 0x20 && *p < 0x7f)
6793           fputc(*p, f);
6794         else {
6795           fprintf(f, " 0x%02x", *p);
6796         }
6797       }
6798     }
6799     break;
6800 
6801   case NODE_CCLASS:
6802     fprintf(f, "<cclass:%p>", node);
6803     if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);
6804     if (CCLASS_(node)->mbuf) {
6805       BBuf* bbuf = CCLASS_(node)->mbuf;
6806       for (i = 0; i < bbuf->used; i++) {
6807         if (i > 0) fprintf(f, ",");
6808         fprintf(f, "%0x", bbuf->p[i]);
6809       }
6810     }
6811     break;
6812 
6813   case NODE_CTYPE:
6814     fprintf(f, "<ctype:%p> ", node);
6815     switch (CTYPE_(node)->ctype) {
6816     case CTYPE_ANYCHAR:
6817       fprintf(f, "<anychar:%p>", node);
6818       break;
6819 
6820     case ONIGENC_CTYPE_WORD:
6821       if (CTYPE_(node)->not != 0)
6822         fputs("not word", f);
6823       else
6824         fputs("word",     f);
6825 
6826       if (CTYPE_(node)->ascii_mode != 0)
6827         fputs(" (ascii)", f);
6828 
6829       break;
6830 
6831     default:
6832       fprintf(f, "ERROR: undefined ctype.\n");
6833       exit(0);
6834     }
6835     break;
6836 
6837   case NODE_ANCHOR:
6838     fprintf(f, "<anchor:%p> ", node);
6839     switch (ANCHOR_(node)->type) {
6840     case ANCR_BEGIN_BUF:        fputs("begin buf",      f); break;
6841     case ANCR_END_BUF:          fputs("end buf",        f); break;
6842     case ANCR_BEGIN_LINE:       fputs("begin line",     f); break;
6843     case ANCR_END_LINE:         fputs("end line",       f); break;
6844     case ANCR_SEMI_END_BUF:     fputs("semi end buf",   f); break;
6845     case ANCR_BEGIN_POSITION:   fputs("begin position", f); break;
6846 
6847     case ANCR_WORD_BOUNDARY:    fputs("word boundary",     f); break;
6848     case ANCR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;
6849 #ifdef USE_WORD_BEGIN_END
6850     case ANCR_WORD_BEGIN:       fputs("word begin", f);     break;
6851     case ANCR_WORD_END:         fputs("word end", f);       break;
6852 #endif
6853     case ANCR_TEXT_SEGMENT_BOUNDARY:
6854       fputs("text-segment boundary", f); break;
6855     case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
6856       fputs("no text-segment boundary", f); break;
6857     case ANCR_PREC_READ:
6858       fprintf(f, "prec read\n");
6859       print_indent_tree(f, NODE_BODY(node), indent + add);
6860       break;
6861     case ANCR_PREC_READ_NOT:
6862       fprintf(f, "prec read not\n");
6863       print_indent_tree(f, NODE_BODY(node), indent + add);
6864       break;
6865     case ANCR_LOOK_BEHIND:
6866       fprintf(f, "look behind\n");
6867       print_indent_tree(f, NODE_BODY(node), indent + add);
6868       break;
6869     case ANCR_LOOK_BEHIND_NOT:
6870       fprintf(f, "look behind not\n");
6871       print_indent_tree(f, NODE_BODY(node), indent + add);
6872       break;
6873 
6874     default:
6875       fprintf(f, "ERROR: undefined anchor type.\n");
6876       break;
6877     }
6878     break;
6879 
6880   case NODE_BACKREF:
6881     {
6882       int* p;
6883       BackRefNode* br = BACKREF_(node);
6884       p = BACKREFS_P(br);
6885       fprintf(f, "<backref%s:%p>", NODE_IS_CHECKER(node) ? "-checker" : "", node);
6886       for (i = 0; i < br->back_num; i++) {
6887         if (i > 0) fputs(", ", f);
6888         fprintf(f, "%d", p[i]);
6889       }
6890     }
6891     break;
6892 
6893 #ifdef USE_CALL
6894   case NODE_CALL:
6895     {
6896       CallNode* cn = CALL_(node);
6897       fprintf(f, "<call:%p>", node);
6898       p_string(f, cn->name_end - cn->name, cn->name);
6899     }
6900     break;
6901 #endif
6902 
6903   case NODE_QUANT:
6904     fprintf(f, "<quantifier:%p>{%d,%d}%s\n", node,
6905             QUANT_(node)->lower, QUANT_(node)->upper,
6906             (QUANT_(node)->greedy ? "" : "?"));
6907     print_indent_tree(f, NODE_BODY(node), indent + add);
6908     break;
6909 
6910   case NODE_BAG:
6911     fprintf(f, "<bag:%p> ", node);
6912     switch (BAG_(node)->type) {
6913     case BAG_OPTION:
6914       fprintf(f, "option:%d", BAG_(node)->o.options);
6915       break;
6916     case BAG_MEMORY:
6917       fprintf(f, "memory:%d", BAG_(node)->m.regnum);
6918       break;
6919     case BAG_STOP_BACKTRACK:
6920       fprintf(f, "stop-bt");
6921       break;
6922     case BAG_IF_ELSE:
6923       fprintf(f, "if-else");
6924       break;
6925     }
6926     fprintf(f, "\n");
6927     print_indent_tree(f, NODE_BODY(node), indent + add);
6928     break;
6929 
6930   case NODE_GIMMICK:
6931     fprintf(f, "<gimmick:%p> ", node);
6932     switch (GIMMICK_(node)->type) {
6933     case GIMMICK_FAIL:
6934       fprintf(f, "fail");
6935       break;
6936     case GIMMICK_SAVE:
6937       fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
6938       break;
6939     case GIMMICK_UPDATE_VAR:
6940       fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
6941       break;
6942 #ifdef USE_CALLOUT
6943     case GIMMICK_CALLOUT:
6944       switch (GIMMICK_(node)->detail_type) {
6945       case ONIG_CALLOUT_OF_CONTENTS:
6946         fprintf(f, "callout:contents:%d", GIMMICK_(node)->num);
6947         break;
6948       case ONIG_CALLOUT_OF_NAME:
6949         fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num);
6950         break;
6951       }
6952 #endif
6953     }
6954     break;
6955 
6956   default:
6957     fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node));
6958     break;
6959   }
6960 
6961   if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT &&
6962       type != NODE_BAG)
6963     fprintf(f, "\n");
6964   fflush(f);
6965 }
6966 
6967 static void
print_tree(FILE * f,Node * node)6968 print_tree(FILE* f, Node* node)
6969 {
6970   print_indent_tree(f, node, 0);
6971 }
6972 #endif
6973