1 /**********************************************************************
2   regparse.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2021  K.Kosako
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #ifdef DEBUG_NODE_FREE
31 #ifndef NEED_TO_INCLUDE_STDIO
32 #define NEED_TO_INCLUDE_STDIO
33 #endif
34 #endif
35 
36 #include "regparse.h"
37 #include "st.h"
38 
39 #define INIT_TAG_NAMES_ALLOC_NUM   5
40 
41 #define WARN_BUFSIZE    256
42 
43 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
44 
45 #define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \
46   ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)
47 #define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \
48   ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
49 
50 #define OPTON_SINGLELINE(option)     ((option) & ONIG_OPTION_SINGLELINE)
51 #define OPTON_MULTILINE(option)      ((option) & ONIG_OPTION_MULTILINE)
52 #define OPTON_IGNORECASE(option)     ((option) & ONIG_OPTION_IGNORECASE)
53 #define OPTON_EXTEND(option)         ((option) & ONIG_OPTION_EXTEND)
54 #define OPTON_WORD_ASCII(option) \
55   ((option) & (ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
56 #define OPTON_DIGIT_ASCII(option) \
57   ((option) & (ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
58 #define OPTON_SPACE_ASCII(option) \
59   ((option) & (ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
60 #define OPTON_POSIX_ASCII(option)    ((option) & ONIG_OPTION_POSIX_IS_ASCII)
61 #define OPTON_TEXT_SEGMENT_WORD(option)  ((option) & ONIG_OPTION_TEXT_SEGMENT_WORD)
62 
63 #define OPTON_IS_ASCII_MODE_CTYPE(ctype, options) \
64   ((ctype) >= 0 && \
65   (((ctype) < ONIGENC_CTYPE_ASCII  && OPTON_POSIX_ASCII(options)) ||\
66    ((ctype) == ONIGENC_CTYPE_WORD  && OPTON_WORD_ASCII(options))  ||\
67    ((ctype) == ONIGENC_CTYPE_DIGIT && OPTON_DIGIT_ASCII(options)) ||\
68    ((ctype) == ONIGENC_CTYPE_SPACE && OPTON_SPACE_ASCII(options))))
69 
70 
71 OnigSyntaxType OnigSyntaxOniguruma = {
72   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
73      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
74      ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
75      ONIG_SYN_OP_ESC_CONTROL_CHARS |
76      ONIG_SYN_OP_ESC_C_CONTROL )
77    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
78   , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
79       ONIG_SYN_OP2_OPTION_ONIGURUMA |
80       ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
81       ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
82       ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
83       ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
84       ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME    |
85       ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
86       ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
87       ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
88       ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
89       ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
90       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
91       ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
92       ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
93       ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
94       ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
95       ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
96   , ( SYN_GNU_REGEX_BV |
97       ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
98       ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
99       ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND |
100       ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
101       ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
102       ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
103       ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC |
104       ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
105       ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
106   , ONIG_OPTION_NONE
107   ,
108   {
109       (OnigCodePoint )'\\'                       /* esc */
110     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
111     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
112     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
113     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
114     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
115   }
116 };
117 
118 OnigSyntaxType OnigSyntaxRuby = {
119   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
120      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
121      ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
122      ONIG_SYN_OP_ESC_CONTROL_CHARS |
123      ONIG_SYN_OP_ESC_C_CONTROL )
124    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
125   , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
126       ONIG_SYN_OP2_OPTION_RUBY |
127       ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
128       ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
129       ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
130       ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
131       ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
132       ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
133       ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
134       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
135       ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
136       ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
137       ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
138       ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
139       ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
140   , ( SYN_GNU_REGEX_BV |
141       ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
142       ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
143       ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
144       ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
145       ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
146       ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
147       ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
148   , ONIG_OPTION_NONE
149   ,
150   {
151       (OnigCodePoint )'\\'                       /* esc */
152     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
153     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
154     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
155     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
156     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
157   }
158 };
159 
160 OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;
161 
162 
163 #define BB_INIT(buf,size)    bbuf_init((BBuf* )(buf), (size))
164 
165 #define BB_EXPAND(buf,low) do{\
166   do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
167   (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
168   if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
169 } while (0)
170 
171 #define BB_ENSURE_SIZE(buf,size) do{\
172   unsigned int new_alloc = (buf)->alloc;\
173   while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
174   if ((buf)->alloc != new_alloc) {\
175     (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
176     if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
177     (buf)->alloc = new_alloc;\
178   }\
179 } while (0)
180 
181 #define BB_WRITE(buf,pos,bytes,n) do{\
182   int used = (pos) + (n);\
183   if ((buf)->alloc < (unsigned int )used) BB_EXPAND((buf),used);\
184   xmemcpy((buf)->p + (pos), (bytes), (n));\
185   if ((buf)->used < (unsigned int )used) (buf)->used = used;\
186 } while (0)
187 
188 #define BB_WRITE1(buf,pos,byte) do{\
189   int used = (pos) + 1;\
190   if ((buf)->alloc < (unsigned int )used) BB_EXPAND((buf),used);\
191   (buf)->p[(pos)] = (byte);\
192   if ((buf)->used < (unsigned int )used) (buf)->used = used;\
193 } while (0)
194 
195 #define BB_ADD(buf,bytes,n)       BB_WRITE((buf),(buf)->used,(bytes),(n))
196 #define BB_ADD1(buf,byte)         BB_WRITE1((buf),(buf)->used,(byte))
197 #define BB_GET_ADD_ADDRESS(buf)   ((buf)->p + (buf)->used)
198 #define BB_GET_OFFSET_POS(buf)    ((buf)->used)
199 
200 /* from < to */
201 #define BB_MOVE_RIGHT(buf,from,to,n) do {\
202   if ((unsigned int )((to)+(n)) > (buf)->alloc) BB_EXPAND((buf),(to) + (n));\
203   xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
204   if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
205 } while (0)
206 
207 /* from > to */
208 #define BB_MOVE_LEFT(buf,from,to,n) do {\
209   xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
210 } while (0)
211 
212 /* from > to */
213 #define BB_MOVE_LEFT_REDUCE(buf,from,to) do {\
214   xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
215   (buf)->used -= (from - to);\
216 } while (0)
217 
218 #define BB_INSERT(buf,pos,bytes,n) do {\
219   if (pos >= (buf)->used) {\
220     BB_WRITE(buf,pos,bytes,n);\
221   }\
222   else {\
223     BB_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
224     xmemcpy((buf)->p + (pos), (bytes), (n));\
225   }\
226 } while (0)
227 
228 #define BB_GET_BYTE(buf, pos) (buf)->p[(pos)]
229 
230 
231 typedef enum {
232   CS_VALUE,
233   CS_RANGE,
234   CS_COMPLETE,
235   CS_START
236 } CSTATE;
237 
238 typedef enum {
239   CV_UNDEF,
240   CV_SB,
241   CV_MB,
242   CV_CPROP
243 } CVAL;
244 
onig_null_warn(const char * s ARG_UNUSED)245 extern void onig_null_warn(const char* s ARG_UNUSED) { }
246 
247 #ifdef DEFAULT_WARN_FUNCTION
248 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
249 #else
250 static OnigWarnFunc onig_warn = onig_null_warn;
251 #endif
252 
253 #ifdef DEFAULT_VERB_WARN_FUNCTION
254 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
255 #else
256 static OnigWarnFunc onig_verb_warn = onig_null_warn;
257 #endif
258 
onig_set_warn_func(OnigWarnFunc f)259 extern void onig_set_warn_func(OnigWarnFunc f)
260 {
261   onig_warn = f;
262 }
263 
onig_set_verb_warn_func(OnigWarnFunc f)264 extern void onig_set_verb_warn_func(OnigWarnFunc f)
265 {
266   onig_verb_warn = f;
267 }
268 
269 extern void
onig_warning(const char * s)270 onig_warning(const char* s)
271 {
272   if (onig_warn == onig_null_warn) return ;
273 
274   (*onig_warn)(s);
275 }
276 
277 #define DEFAULT_MAX_CAPTURE_NUM   32767
278 
279 static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;
280 
281 extern int
onig_set_capture_num_limit(int num)282 onig_set_capture_num_limit(int num)
283 {
284   if (num < 0) return -1;
285 
286   MaxCaptureNum = num;
287   return 0;
288 }
289 
290 static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
291 
292 extern unsigned int
onig_get_parse_depth_limit(void)293 onig_get_parse_depth_limit(void)
294 {
295   return ParseDepthLimit;
296 }
297 
298 extern int
onig_set_parse_depth_limit(unsigned int depth)299 onig_set_parse_depth_limit(unsigned int depth)
300 {
301   if (depth == 0)
302     ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
303   else
304     ParseDepthLimit = depth;
305   return 0;
306 }
307 
308 #ifdef ONIG_DEBUG_PARSE
309 #define INC_PARSE_DEPTH(d) do {\
310   (d)++;\
311   if (env->max_parse_depth < (d)) env->max_parse_depth = d;\
312   if ((d) > ParseDepthLimit) \
313     return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\
314 } while (0)
315 #else
316 #define INC_PARSE_DEPTH(d) do {\
317   (d)++;\
318   if ((d) > ParseDepthLimit) \
319     return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\
320 } while (0)
321 #endif
322 
323 #define DEC_PARSE_DEPTH(d)  (d)--
324 
325 
326 static int
bbuf_init(BBuf * buf,int size)327 bbuf_init(BBuf* buf, int size)
328 {
329   if (size <= 0) {
330     size   = 0;
331     buf->p = NULL;
332   }
333   else {
334     buf->p = (UChar* )xmalloc(size);
335     if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
336   }
337 
338   buf->alloc = size;
339   buf->used  = 0;
340   return 0;
341 }
342 
343 static void
bbuf_free(BBuf * bbuf)344 bbuf_free(BBuf* bbuf)
345 {
346   if (IS_NOT_NULL(bbuf)) {
347     if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
348     xfree(bbuf);
349   }
350 }
351 
352 static int
bbuf_clone(BBuf ** rto,BBuf * from)353 bbuf_clone(BBuf** rto, BBuf* from)
354 {
355   int r;
356   BBuf *to;
357 
358   *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
359   CHECK_NULL_RETURN_MEMERR(to);
360   r = BB_INIT(to, from->alloc);
361   if (r != 0) {
362     bbuf_free(to);
363     *rto = 0;
364     return r;
365   }
366   to->used = from->used;
367   xmemcpy(to->p, from->p, from->used);
368   return 0;
369 }
370 
371 static int
backref_rel_to_abs(int rel_no,ParseEnv * env)372 backref_rel_to_abs(int rel_no, ParseEnv* env)
373 {
374   if (rel_no > 0) {
375     if (rel_no > ONIG_INT_MAX - env->num_mem)
376       return ONIGERR_INVALID_BACKREF;
377     return env->num_mem + rel_no;
378   }
379   else {
380     return env->num_mem + 1 + rel_no;
381   }
382 }
383 
384 #define OPTION_ON(v,f)     ((v) |= (f))
385 #define OPTION_OFF(v,f)    ((v) &= ~(f))
386 
387 #define OPTION_NEGATE(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f))
388 
389 #define MBCODE_START_POS(enc) \
390   (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
391 
392 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
393   add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
394 
395 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
396   if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
397     r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
398     if (r != 0) return r;\
399   }\
400 } while (0)
401 
402 
403 #define BITSET_IS_EMPTY(bs,empty) do {\
404   int i;\
405   empty = 1;\
406   for (i = 0; i < (int )BITSET_REAL_SIZE; i++) {\
407     if ((bs)[i] != 0) {\
408       empty = 0; break;\
409     }\
410   }\
411 } while (0)
412 
413 static void
bitset_set_range(BitSetRef bs,int from,int to)414 bitset_set_range(BitSetRef bs, int from, int to)
415 {
416   int i;
417   for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
418     BITSET_SET_BIT(bs, i);
419   }
420 }
421 
422 static void
bitset_invert(BitSetRef bs)423 bitset_invert(BitSetRef bs)
424 {
425   int i;
426   for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { bs[i] = ~(bs[i]); }
427 }
428 
429 static void
bitset_invert_to(BitSetRef from,BitSetRef to)430 bitset_invert_to(BitSetRef from, BitSetRef to)
431 {
432   int i;
433   for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { to[i] = ~(from[i]); }
434 }
435 
436 static void
bitset_and(BitSetRef dest,BitSetRef bs)437 bitset_and(BitSetRef dest, BitSetRef bs)
438 {
439   int i;
440   for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] &= bs[i]; }
441 }
442 
443 static void
bitset_or(BitSetRef dest,BitSetRef bs)444 bitset_or(BitSetRef dest, BitSetRef bs)
445 {
446   int i;
447   for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] |= bs[i]; }
448 }
449 
450 static void
bitset_copy(BitSetRef dest,BitSetRef bs)451 bitset_copy(BitSetRef dest, BitSetRef bs)
452 {
453   int i;
454   for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] = bs[i]; }
455 }
456 
457 extern int
onig_strncmp(const UChar * s1,const UChar * s2,int n)458 onig_strncmp(const UChar* s1, const UChar* s2, int n)
459 {
460   int x;
461 
462   while (n-- > 0) {
463     x = *s2++ - *s1++;
464     if (x) return x;
465   }
466   return 0;
467 }
468 
469 extern void
onig_strcpy(UChar * dest,const UChar * src,const UChar * end)470 onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
471 {
472   int len = (int )(end - src);
473   if (len > 0) {
474     xmemcpy(dest, src, len);
475     dest[len] = (UChar )0;
476   }
477 }
478 
479 /* scan pattern methods */
480 #define PEND_VALUE   0
481 
482 #define PFETCH_READY  UChar* pfetch_prev
483 #define PEND         (p < end ?  0 : 1)
484 #define PUNFETCH     p = pfetch_prev
485 #define PINC       do { \
486   pfetch_prev = p; \
487   p += ONIGENC_MBC_ENC_LEN(enc, p); \
488 } while (0)
489 #define PFETCH(c)  do { \
490   c = ONIGENC_MBC_TO_CODE(enc, p, end); \
491   pfetch_prev = p; \
492   p += ONIGENC_MBC_ENC_LEN(enc, p); \
493 } while (0)
494 
495 #define PINC_S     do { \
496   p += ONIGENC_MBC_ENC_LEN(enc, p); \
497 } while (0)
498 #define PFETCH_S(c) do { \
499   c = ONIGENC_MBC_TO_CODE(enc, p, end); \
500   p += ONIGENC_MBC_ENC_LEN(enc, p); \
501 } while (0)
502 
503 #define PPEEK        (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
504 #define PPEEK_IS(c)  (PPEEK == (OnigCodePoint )c)
505 
506 static UChar*
strcat_capa(UChar * dest,UChar * dest_end,const UChar * src,const UChar * src_end,int capa)507 strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
508             int capa)
509 {
510   UChar* r;
511   ptrdiff_t dest_delta = dest_end - dest;
512 
513   if (dest)
514     r = (UChar* )xrealloc(dest, capa + 1);
515   else
516     r = (UChar* )xmalloc(capa + 1);
517 
518   CHECK_NULL_RETURN(r);
519   onig_strcpy(r + dest_delta, src, src_end);
520   return r;
521 }
522 
523 /* dest on static area */
524 static UChar*
strcat_capa_from_static(UChar * dest,UChar * dest_end,const UChar * src,const UChar * src_end,int capa)525 strcat_capa_from_static(UChar* dest, UChar* dest_end,
526                         const UChar* src, const UChar* src_end, int capa)
527 {
528   UChar* r;
529 
530   r = (UChar* )xmalloc(capa + 1);
531   CHECK_NULL_RETURN(r);
532   onig_strcpy(r, dest, dest_end);
533   onig_strcpy(r + (dest_end - dest), src, src_end);
534   return r;
535 }
536 
537 
538 #ifdef USE_ST_LIBRARY
539 
540 typedef struct {
541   UChar* s;
542   UChar* end;
543 } st_str_end_key;
544 
545 static int
str_end_cmp(st_str_end_key * x,st_str_end_key * y)546 str_end_cmp(st_str_end_key* x, st_str_end_key* y)
547 {
548   UChar *p, *q;
549   int c;
550 
551   if ((x->end - x->s) != (y->end - y->s))
552     return 1;
553 
554   p = x->s;
555   q = y->s;
556   while (p < x->end) {
557     c = (int )*p - (int )*q;
558     if (c != 0) return c;
559 
560     p++; q++;
561   }
562 
563   return 0;
564 }
565 
566 static int
str_end_hash(st_str_end_key * x)567 str_end_hash(st_str_end_key* x)
568 {
569   UChar *p;
570   unsigned val = 0;
571 
572   p = x->s;
573   while (p < x->end) {
574     val = val * 997 + (unsigned )*p++;
575   }
576 
577   return (int) (val + (val >> 5));
578 }
579 
580 extern hash_table_type
onig_st_init_strend_table_with_size(int size)581 onig_st_init_strend_table_with_size(int size)
582 {
583   static struct st_hash_type hashType = {
584     str_end_cmp,
585     str_end_hash,
586   };
587 
588   return (hash_table_type )onig_st_init_table_with_size(&hashType, size);
589 }
590 
591 extern int
onig_st_lookup_strend(hash_table_type table,const UChar * str_key,const UChar * end_key,hash_data_type * value)592 onig_st_lookup_strend(hash_table_type table, const UChar* str_key,
593                       const UChar* end_key, hash_data_type *value)
594 {
595   st_str_end_key key;
596 
597   key.s   = (UChar* )str_key;
598   key.end = (UChar* )end_key;
599 
600   return onig_st_lookup(table, (st_data_t )(&key), value);
601 }
602 
603 extern int
onig_st_insert_strend(hash_table_type table,const UChar * str_key,const UChar * end_key,hash_data_type value)604 onig_st_insert_strend(hash_table_type table, const UChar* str_key,
605                       const UChar* end_key, hash_data_type value)
606 {
607   st_str_end_key* key;
608   int result;
609 
610   key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
611   CHECK_NULL_RETURN_MEMERR(key);
612 
613   key->s   = (UChar* )str_key;
614   key->end = (UChar* )end_key;
615   result = onig_st_insert(table, (st_data_t )key, value);
616   if (result) {
617     xfree(key);
618   }
619   return result;
620 }
621 
622 
623 #ifdef USE_CALLOUT
624 
625 typedef struct {
626   OnigEncoding enc;
627   int    type; /* callout type: single or not */
628   UChar* s;
629   UChar* end;
630 } st_callout_name_key;
631 
632 static int
callout_name_table_cmp(st_callout_name_key * x,st_callout_name_key * y)633 callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)
634 {
635   UChar *p, *q;
636   int c;
637 
638   if (x->enc  != y->enc)  return 1;
639   if (x->type != y->type) return 1;
640   if ((x->end - x->s) != (y->end - y->s))
641     return 1;
642 
643   p = x->s;
644   q = y->s;
645   while (p < x->end) {
646     c = (int )*p - (int )*q;
647     if (c != 0) return c;
648 
649     p++; q++;
650   }
651 
652   return 0;
653 }
654 
655 static int
callout_name_table_hash(st_callout_name_key * x)656 callout_name_table_hash(st_callout_name_key* x)
657 {
658   UChar *p;
659   unsigned int val = 0;
660 
661   p = x->s;
662   while (p < x->end) {
663     val = val * 997 + (unsigned int )*p++;
664   }
665 
666   /* use intptr_t for escape warning in Windows */
667   return (int )(val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type);
668 }
669 
670 extern hash_table_type
onig_st_init_callout_name_table_with_size(int size)671 onig_st_init_callout_name_table_with_size(int size)
672 {
673   static struct st_hash_type hashType = {
674     callout_name_table_cmp,
675     callout_name_table_hash,
676   };
677 
678   return (hash_table_type )onig_st_init_table_with_size(&hashType, size);
679 }
680 
681 extern int
onig_st_lookup_callout_name_table(hash_table_type table,OnigEncoding enc,int type,const UChar * str_key,const UChar * end_key,hash_data_type * value)682 onig_st_lookup_callout_name_table(hash_table_type table,
683                                   OnigEncoding enc,
684                                   int type,
685                                   const UChar* str_key,
686                                   const UChar* end_key,
687                                   hash_data_type *value)
688 {
689   st_callout_name_key key;
690 
691   key.enc  = enc;
692   key.type = type;
693   key.s    = (UChar* )str_key;
694   key.end  = (UChar* )end_key;
695 
696   return onig_st_lookup(table, (st_data_t )(&key), value);
697 }
698 
699 static int
st_insert_callout_name_table(hash_table_type table,OnigEncoding enc,int type,UChar * str_key,UChar * end_key,hash_data_type value)700 st_insert_callout_name_table(hash_table_type table,
701                              OnigEncoding enc, int type,
702                              UChar* str_key, UChar* end_key,
703                              hash_data_type value)
704 {
705   st_callout_name_key* key;
706   int result;
707 
708   key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));
709   CHECK_NULL_RETURN_MEMERR(key);
710 
711   /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */
712   key->enc  = enc;
713   key->type = type;
714   key->s    = str_key;
715   key->end  = end_key;
716   result = onig_st_insert(table, (st_data_t )key, value);
717   if (result) {
718     xfree(key);
719   }
720   return result;
721 }
722 #endif
723 
724 #endif /* USE_ST_LIBRARY */
725 
726 
727 #define INIT_NAME_BACKREFS_ALLOC_NUM   8
728 
729 typedef struct {
730   UChar* name;
731   int    name_len;   /* byte length */
732   int    back_num;   /* number of backrefs */
733   int    back_alloc;
734   int    back_ref1;
735   int*   back_refs;
736 } NameEntry;
737 
738 #ifdef USE_ST_LIBRARY
739 
740 #define INIT_NAMES_ALLOC_NUM    5
741 
742 typedef st_table  NameTable;
743 typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */
744 
745 #define NAMEBUF_SIZE    24
746 #define NAMEBUF_SIZE_1  25
747 
748 #ifdef ONIG_DEBUG
749 static int
i_print_name_entry(UChar * key,NameEntry * e,void * arg)750 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
751 {
752   int i;
753   FILE* fp = (FILE* )arg;
754 
755   fprintf(fp, "%s: ", e->name);
756   if (e->back_num == 0)
757     fputs("-", fp);
758   else if (e->back_num == 1)
759     fprintf(fp, "%d", e->back_ref1);
760   else {
761     for (i = 0; i < e->back_num; i++) {
762       if (i > 0) fprintf(fp, ", ");
763       fprintf(fp, "%d", e->back_refs[i]);
764     }
765   }
766   fputs("\n", fp);
767   return ST_CONTINUE;
768 }
769 
770 extern int
onig_print_names(FILE * fp,regex_t * reg)771 onig_print_names(FILE* fp, regex_t* reg)
772 {
773   NameTable* t = (NameTable* )reg->name_table;
774 
775   if (IS_NOT_NULL(t)) {
776     fprintf(fp, "name table\n");
777     onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
778     fputs("\n", fp);
779   }
780   return 0;
781 }
782 #endif /* ONIG_DEBUG */
783 
784 static int
i_free_name_entry(UChar * key,NameEntry * e,void * arg ARG_UNUSED)785 i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
786 {
787   xfree(e->name);
788   if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
789   xfree(key);
790   xfree(e);
791   return ST_DELETE;
792 }
793 
794 static int
names_clear(regex_t * reg)795 names_clear(regex_t* reg)
796 {
797   NameTable* t = (NameTable* )reg->name_table;
798 
799   if (IS_NOT_NULL(t)) {
800     onig_st_foreach(t, i_free_name_entry, 0);
801   }
802   return 0;
803 }
804 
805 extern int
onig_names_free(regex_t * reg)806 onig_names_free(regex_t* reg)
807 {
808   int r;
809   NameTable* t;
810 
811   r = names_clear(reg);
812   if (r != 0) return r;
813 
814   t = (NameTable* )reg->name_table;
815   if (IS_NOT_NULL(t)) onig_st_free_table(t);
816   reg->name_table = (void* )NULL;
817   return 0;
818 }
819 
820 static NameEntry*
name_find(regex_t * reg,const UChar * name,const UChar * name_end)821 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
822 {
823   NameEntry* e;
824   NameTable* t = (NameTable* )reg->name_table;
825 
826   e = (NameEntry* )NULL;
827   if (IS_NOT_NULL(t)) {
828     onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
829   }
830   return e;
831 }
832 
833 typedef struct {
834   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
835   regex_t* reg;
836   void* arg;
837   int ret;
838   OnigEncoding enc;
839 } INamesArg;
840 
841 static int
i_names(UChar * key ARG_UNUSED,NameEntry * e,INamesArg * arg)842 i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
843 {
844   int r = (*(arg->func))(e->name,
845                          e->name + e->name_len,
846                          e->back_num,
847                          (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
848                          arg->reg, arg->arg);
849   if (r != 0) {
850     arg->ret = r;
851     return ST_STOP;
852   }
853   return ST_CONTINUE;
854 }
855 
856 extern int
onig_foreach_name(regex_t * reg,int (* func)(const UChar *,const UChar *,int,int *,regex_t *,void *),void * arg)857 onig_foreach_name(regex_t* reg,
858   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
859 {
860   INamesArg narg;
861   NameTable* t = (NameTable* )reg->name_table;
862 
863   narg.ret = 0;
864   if (IS_NOT_NULL(t)) {
865     narg.func = func;
866     narg.reg  = reg;
867     narg.arg  = arg;
868     narg.enc  = reg->enc; /* should be pattern encoding. */
869     onig_st_foreach(t, i_names, (HashDataType )&narg);
870   }
871   return narg.ret;
872 }
873 
874 static int
i_renumber_name(UChar * key ARG_UNUSED,NameEntry * e,GroupNumMap * map)875 i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumMap* map)
876 {
877   int i;
878 
879   if (e->back_num > 1) {
880     for (i = 0; i < e->back_num; i++) {
881       e->back_refs[i] = map[e->back_refs[i]].new_val;
882     }
883   }
884   else if (e->back_num == 1) {
885     e->back_ref1 = map[e->back_ref1].new_val;
886   }
887 
888   return ST_CONTINUE;
889 }
890 
891 extern int
onig_renumber_name_table(regex_t * reg,GroupNumMap * map)892 onig_renumber_name_table(regex_t* reg, GroupNumMap* map)
893 {
894   NameTable* t = (NameTable* )reg->name_table;
895 
896   if (IS_NOT_NULL(t)) {
897     onig_st_foreach(t, i_renumber_name, (HashDataType )map);
898   }
899   return 0;
900 }
901 
902 
903 extern int
onig_number_of_names(regex_t * reg)904 onig_number_of_names(regex_t* reg)
905 {
906   NameTable* t = (NameTable* )reg->name_table;
907 
908   if (IS_NOT_NULL(t))
909     return t->num_entries;
910   else
911     return 0;
912 }
913 
914 #else  /* USE_ST_LIBRARY */
915 
916 #define INIT_NAMES_ALLOC_NUM    8
917 
918 typedef struct {
919   NameEntry* e;
920   int        num;
921   int        alloc;
922 } NameTable;
923 
924 #ifdef ONIG_DEBUG
925 extern int
onig_print_names(FILE * fp,regex_t * reg)926 onig_print_names(FILE* fp, regex_t* reg)
927 {
928   int i, j;
929   NameEntry* e;
930   NameTable* t = (NameTable* )reg->name_table;
931 
932   if (IS_NOT_NULL(t) && t->num > 0) {
933     fprintf(fp, "name table\n");
934     for (i = 0; i < t->num; i++) {
935       e = &(t->e[i]);
936       fprintf(fp, "%s: ", e->name);
937       if (e->back_num == 0) {
938         fputs("-", fp);
939       }
940       else if (e->back_num == 1) {
941         fprintf(fp, "%d", e->back_ref1);
942       }
943       else {
944         for (j = 0; j < e->back_num; j++) {
945           if (j > 0) fprintf(fp, ", ");
946           fprintf(fp, "%d", e->back_refs[j]);
947         }
948       }
949       fputs("\n", fp);
950     }
951     fputs("\n", fp);
952   }
953   return 0;
954 }
955 #endif
956 
957 static int
names_clear(regex_t * reg)958 names_clear(regex_t* reg)
959 {
960   int i;
961   NameEntry* e;
962   NameTable* t = (NameTable* )reg->name_table;
963 
964   if (IS_NOT_NULL(t)) {
965     for (i = 0; i < t->num; i++) {
966       e = &(t->e[i]);
967       if (IS_NOT_NULL(e->name)) {
968         xfree(e->name);
969         e->name       = NULL;
970         e->name_len   = 0;
971         e->back_num   = 0;
972         e->back_alloc = 0;
973         if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
974         e->back_refs = (int* )NULL;
975       }
976     }
977     if (IS_NOT_NULL(t->e)) {
978       xfree(t->e);
979       t->e = NULL;
980     }
981     t->num = 0;
982   }
983   return 0;
984 }
985 
986 extern int
onig_names_free(regex_t * reg)987 onig_names_free(regex_t* reg)
988 {
989   int r;
990   NameTable* t;
991 
992   r = names_clear(reg);
993   if (r != 0) return r;
994 
995   t = (NameTable* )reg->name_table;
996   if (IS_NOT_NULL(t)) xfree(t);
997   reg->name_table = NULL;
998   return 0;
999 }
1000 
1001 static NameEntry*
name_find(regex_t * reg,UChar * name,UChar * name_end)1002 name_find(regex_t* reg, UChar* name, UChar* name_end)
1003 {
1004   int i, len;
1005   NameEntry* e;
1006   NameTable* t = (NameTable* )reg->name_table;
1007 
1008   if (IS_NOT_NULL(t)) {
1009     len = name_end - name;
1010     for (i = 0; i < t->num; i++) {
1011       e = &(t->e[i]);
1012       if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1013         return e;
1014     }
1015   }
1016   return (NameEntry* )NULL;
1017 }
1018 
1019 extern int
onig_foreach_name(regex_t * reg,int (* func)(const UChar *,const UChar *,int,int *,regex_t *,void *),void * arg)1020 onig_foreach_name(regex_t* reg,
1021   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
1022 {
1023   int i, r;
1024   NameEntry* e;
1025   NameTable* t = (NameTable* )reg->name_table;
1026 
1027   if (IS_NOT_NULL(t)) {
1028     for (i = 0; i < t->num; i++) {
1029       e = &(t->e[i]);
1030       r = (*func)(e->name, e->name + e->name_len, e->back_num,
1031                   (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
1032                   reg, arg);
1033       if (r != 0) return r;
1034     }
1035   }
1036   return 0;
1037 }
1038 
1039 extern int
onig_number_of_names(regex_t * reg)1040 onig_number_of_names(regex_t* reg)
1041 {
1042   NameTable* t = (NameTable* )reg->name_table;
1043 
1044   if (IS_NOT_NULL(t))
1045     return t->num;
1046   else
1047     return 0;
1048 }
1049 
1050 #endif /* else USE_ST_LIBRARY */
1051 
1052 static int
name_add(regex_t * reg,UChar * name,UChar * name_end,int backref,ParseEnv * env)1053 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ParseEnv* env)
1054 {
1055   int r;
1056   int alloc;
1057   NameEntry* e;
1058   NameTable* t = (NameTable* )reg->name_table;
1059 
1060   if (name_end - name <= 0)
1061     return ONIGERR_EMPTY_GROUP_NAME;
1062 
1063   e = name_find(reg, name, name_end);
1064   if (IS_NULL(e)) {
1065 #ifdef USE_ST_LIBRARY
1066     if (IS_NULL(t)) {
1067       t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);
1068       CHECK_NULL_RETURN_MEMERR(t);
1069       reg->name_table = (void* )t;
1070     }
1071     e = (NameEntry* )xmalloc(sizeof(NameEntry));
1072     CHECK_NULL_RETURN_MEMERR(e);
1073 
1074     e->name = onigenc_strdup(reg->enc, name, name_end);
1075     if (IS_NULL(e->name)) {
1076       xfree(e);  return ONIGERR_MEMORY;
1077     }
1078     r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
1079                               (HashDataType )e);
1080     if (r < 0) return r;
1081 
1082     e->name_len   = (int )(name_end - name);
1083     e->back_num   = 0;
1084     e->back_alloc = 0;
1085     e->back_refs  = (int* )NULL;
1086 
1087 #else
1088 
1089     if (IS_NULL(t)) {
1090       alloc = INIT_NAMES_ALLOC_NUM;
1091       t = (NameTable* )xmalloc(sizeof(NameTable));
1092       CHECK_NULL_RETURN_MEMERR(t);
1093       t->e     = NULL;
1094       t->alloc = 0;
1095       t->num   = 0;
1096 
1097       t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
1098       if (IS_NULL(t->e)) {
1099         xfree(t);
1100         return ONIGERR_MEMORY;
1101       }
1102       t->alloc = alloc;
1103       reg->name_table = t;
1104       goto clear;
1105     }
1106     else if (t->num == t->alloc) {
1107       int i;
1108 
1109       alloc = t->alloc * 2;
1110       t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
1111       CHECK_NULL_RETURN_MEMERR(t->e);
1112       t->alloc = alloc;
1113 
1114     clear:
1115       for (i = t->num; i < t->alloc; i++) {
1116         t->e[i].name       = NULL;
1117         t->e[i].name_len   = 0;
1118         t->e[i].back_num   = 0;
1119         t->e[i].back_alloc = 0;
1120         t->e[i].back_refs  = (int* )NULL;
1121       }
1122     }
1123     e = &(t->e[t->num]);
1124     t->num++;
1125     e->name = onigenc_strdup(reg->enc, name, name_end);
1126     if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1127     e->name_len = name_end - name;
1128 #endif
1129   }
1130 
1131   if (e->back_num >= 1 &&
1132       ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
1133     onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
1134                                    name, name_end);
1135     return ONIGERR_MULTIPLEX_DEFINED_NAME;
1136   }
1137 
1138   e->back_num++;
1139   if (e->back_num == 1) {
1140     e->back_ref1 = backref;
1141   }
1142   else {
1143     if (e->back_num == 2) {
1144       alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
1145       e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
1146       CHECK_NULL_RETURN_MEMERR(e->back_refs);
1147       e->back_alloc = alloc;
1148       e->back_refs[0] = e->back_ref1;
1149       e->back_refs[1] = backref;
1150     }
1151     else {
1152       if (e->back_num > e->back_alloc) {
1153         alloc = e->back_alloc * 2;
1154         e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
1155         CHECK_NULL_RETURN_MEMERR(e->back_refs);
1156         e->back_alloc = alloc;
1157       }
1158       e->back_refs[e->back_num - 1] = backref;
1159     }
1160   }
1161 
1162   return 0;
1163 }
1164 
1165 extern int
onig_name_to_group_numbers(regex_t * reg,const UChar * name,const UChar * name_end,int ** nums)1166 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
1167                            const UChar* name_end, int** nums)
1168 {
1169   NameEntry* e = name_find(reg, name, name_end);
1170 
1171   if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
1172 
1173   switch (e->back_num) {
1174   case 0:
1175     break;
1176   case 1:
1177     *nums = &(e->back_ref1);
1178     break;
1179   default:
1180     *nums = e->back_refs;
1181     break;
1182   }
1183   return e->back_num;
1184 }
1185 
1186 static int
name_to_group_numbers(ParseEnv * env,const UChar * name,const UChar * name_end,int ** nums)1187 name_to_group_numbers(ParseEnv* env, const UChar* name, const UChar* name_end,
1188                       int** nums)
1189 {
1190   regex_t* reg;
1191   NameEntry* e;
1192 
1193   reg = env->reg;
1194   e = name_find(reg, name, name_end);
1195 
1196   if (IS_NULL(e)) {
1197     onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
1198                                    (UChar* )name, (UChar* )name_end);
1199     return ONIGERR_UNDEFINED_NAME_REFERENCE;
1200   }
1201 
1202   switch (e->back_num) {
1203   case 0:
1204     break;
1205   case 1:
1206     *nums = &(e->back_ref1);
1207     break;
1208   default:
1209     *nums = e->back_refs;
1210     break;
1211   }
1212   return e->back_num;
1213 }
1214 
1215 extern int
onig_name_to_backref_number(regex_t * reg,const UChar * name,const UChar * name_end,OnigRegion * region)1216 onig_name_to_backref_number(regex_t* reg, const UChar* name,
1217                             const UChar* name_end, OnigRegion *region)
1218 {
1219   int i, n, *nums;
1220 
1221   n = onig_name_to_group_numbers(reg, name, name_end, &nums);
1222   if (n < 0)
1223     return n;
1224   else if (n == 0)
1225     return ONIGERR_PARSER_BUG;
1226   else if (n == 1)
1227     return nums[0];
1228   else {
1229     if (IS_NOT_NULL(region)) {
1230       for (i = n - 1; i >= 0; i--) {
1231         if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
1232           return nums[i];
1233       }
1234     }
1235     return nums[n - 1];
1236   }
1237 }
1238 
1239 extern int
onig_noname_group_capture_is_active(regex_t * reg)1240 onig_noname_group_capture_is_active(regex_t* reg)
1241 {
1242   if (OPTON_DONT_CAPTURE_GROUP(reg->options))
1243     return 0;
1244 
1245   if (onig_number_of_names(reg) > 0 &&
1246       IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
1247       ! OPTON_CAPTURE_GROUP(reg->options)) {
1248     return 0;
1249   }
1250 
1251   return 1;
1252 }
1253 
1254 #ifdef USE_CALLOUT
1255 
1256 typedef struct {
1257   OnigCalloutType type;
1258   int             in;
1259   OnigCalloutFunc start_func;
1260   OnigCalloutFunc end_func;
1261   int             arg_num;
1262   int             opt_arg_num;
1263   unsigned int    arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];
1264   OnigValue       opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];
1265   UChar*          name; /* reference to GlobalCalloutNameTable entry: e->name */
1266 } CalloutNameListEntry;
1267 
1268 typedef struct {
1269   int  n;
1270   int  alloc;
1271   CalloutNameListEntry* v;
1272 } CalloutNameListType;
1273 
1274 static CalloutNameListType* GlobalCalloutNameList;
1275 
1276 static int
make_callout_func_list(CalloutNameListType ** rs,int init_size)1277 make_callout_func_list(CalloutNameListType** rs, int init_size)
1278 {
1279   CalloutNameListType* s;
1280   CalloutNameListEntry* v;
1281 
1282   *rs = 0;
1283 
1284   s = xmalloc(sizeof(*s));
1285   if (IS_NULL(s)) return ONIGERR_MEMORY;
1286 
1287   v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);
1288   if (IS_NULL(v)) {
1289     xfree(s);
1290     return ONIGERR_MEMORY;
1291   }
1292 
1293   s->n = 0;
1294   s->alloc = init_size;
1295   s->v = v;
1296 
1297   *rs = s;
1298   return ONIG_NORMAL;
1299 }
1300 
1301 static void
free_callout_func_list(CalloutNameListType * s)1302 free_callout_func_list(CalloutNameListType* s)
1303 {
1304   if (IS_NOT_NULL(s)) {
1305     if (IS_NOT_NULL(s->v)) {
1306       int i, j;
1307 
1308       for (i = 0; i < s->n; i++) {
1309         CalloutNameListEntry* e = s->v + i;
1310         for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {
1311           if (e->arg_types[j] == ONIG_TYPE_STRING) {
1312             UChar* p = e->opt_defaults[j].s.start;
1313             if (IS_NOT_NULL(p)) xfree(p);
1314           }
1315         }
1316       }
1317       xfree(s->v);
1318     }
1319     xfree(s);
1320   }
1321 }
1322 
1323 static int
callout_func_list_add(CalloutNameListType * s,int * rid)1324 callout_func_list_add(CalloutNameListType* s, int* rid)
1325 {
1326   if (s->n >= s->alloc) {
1327     int new_size = s->alloc * 2;
1328     CalloutNameListEntry* nv = (CalloutNameListEntry* )
1329       xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size);
1330     if (IS_NULL(nv)) return ONIGERR_MEMORY;
1331 
1332     s->alloc = new_size;
1333     s->v = nv;
1334   }
1335 
1336   *rid = s->n;
1337 
1338   xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));
1339   s->n++;
1340   return ONIG_NORMAL;
1341 }
1342 
1343 
1344 typedef struct {
1345   UChar* name;
1346   int    name_len;   /* byte length */
1347   int    id;
1348 } CalloutNameEntry;
1349 
1350 #ifdef USE_ST_LIBRARY
1351 typedef st_table  CalloutNameTable;
1352 #else
1353 typedef struct {
1354   CalloutNameEntry* e;
1355   int               num;
1356   int               alloc;
1357 } CalloutNameTable;
1358 #endif
1359 
1360 static CalloutNameTable* GlobalCalloutNameTable;
1361 static int CalloutNameIDCounter;
1362 
1363 #ifdef USE_ST_LIBRARY
1364 
1365 static int
i_free_callout_name_entry(st_callout_name_key * key,CalloutNameEntry * e,void * arg ARG_UNUSED)1366 i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,
1367                           void* arg ARG_UNUSED)
1368 {
1369   if (IS_NOT_NULL(e)) {
1370     xfree(e->name);
1371   }
1372   /*xfree(key->s); */ /* is same as e->name */
1373   xfree(key);
1374   xfree(e);
1375   return ST_DELETE;
1376 }
1377 
1378 static int
callout_name_table_clear(CalloutNameTable * t)1379 callout_name_table_clear(CalloutNameTable* t)
1380 {
1381   if (IS_NOT_NULL(t)) {
1382     onig_st_foreach(t, i_free_callout_name_entry, 0);
1383   }
1384   return 0;
1385 }
1386 
1387 static int
global_callout_name_table_free(void)1388 global_callout_name_table_free(void)
1389 {
1390   if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1391     int r = callout_name_table_clear(GlobalCalloutNameTable);
1392     if (r != 0) return r;
1393 
1394     onig_st_free_table(GlobalCalloutNameTable);
1395     GlobalCalloutNameTable = 0;
1396     CalloutNameIDCounter = 0;
1397   }
1398 
1399   return 0;
1400 }
1401 
1402 static CalloutNameEntry*
callout_name_find(OnigEncoding enc,int is_not_single,const UChar * name,const UChar * name_end)1403 callout_name_find(OnigEncoding enc, int is_not_single,
1404                   const UChar* name, const UChar* name_end)
1405 {
1406   int r;
1407   CalloutNameEntry* e;
1408   CalloutNameTable* t = GlobalCalloutNameTable;
1409 
1410   e = (CalloutNameEntry* )NULL;
1411   if (IS_NOT_NULL(t)) {
1412     r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1413                                           (HashDataType* )((void* )(&e)));
1414     if (r == 0) { /* not found */
1415       if (enc != ONIG_ENCODING_ASCII &&
1416           ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
1417         enc = ONIG_ENCODING_ASCII;
1418         onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1419                                           (HashDataType* )((void* )(&e)));
1420       }
1421     }
1422   }
1423   return e;
1424 }
1425 
1426 #else
1427 
1428 static int
callout_name_table_clear(CalloutNameTable * t)1429 callout_name_table_clear(CalloutNameTable* t)
1430 {
1431   int i;
1432   CalloutNameEntry* e;
1433 
1434   if (IS_NOT_NULL(t)) {
1435     for (i = 0; i < t->num; i++) {
1436       e = &(t->e[i]);
1437       if (IS_NOT_NULL(e->name)) {
1438         xfree(e->name);
1439         e->name     = NULL;
1440         e->name_len = 0;
1441         e->id       = 0;
1442         e->func     = 0;
1443       }
1444     }
1445     if (IS_NOT_NULL(t->e)) {
1446       xfree(t->e);
1447       t->e = NULL;
1448     }
1449     t->num = 0;
1450   }
1451   return 0;
1452 }
1453 
1454 static int
global_callout_name_table_free(void)1455 global_callout_name_table_free(void)
1456 {
1457   if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1458     int r = callout_name_table_clear(GlobalCalloutNameTable);
1459     if (r != 0) return r;
1460 
1461     xfree(GlobalCalloutNameTable);
1462     GlobalCalloutNameTable = 0;
1463     CalloutNameIDCounter = 0;
1464   }
1465   return 0;
1466 }
1467 
1468 static CalloutNameEntry*
callout_name_find(UChar * name,UChar * name_end)1469 callout_name_find(UChar* name, UChar* name_end)
1470 {
1471   int i, len;
1472   CalloutNameEntry* e;
1473   CalloutNameTable* t = Calloutnames;
1474 
1475   if (IS_NOT_NULL(t)) {
1476     len = name_end - name;
1477     for (i = 0; i < t->num; i++) {
1478       e = &(t->e[i]);
1479       if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1480         return e;
1481     }
1482   }
1483   return (CalloutNameEntry* )NULL;
1484 }
1485 
1486 #endif
1487 
1488 /* name string must be single byte char string. */
1489 static int
callout_name_entry(CalloutNameEntry ** rentry,OnigEncoding enc,int is_not_single,UChar * name,UChar * name_end)1490 callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,
1491                    int is_not_single, UChar* name, UChar* name_end)
1492 {
1493   int r;
1494   CalloutNameEntry* e;
1495   CalloutNameTable* t = GlobalCalloutNameTable;
1496 
1497   *rentry = 0;
1498   if (name_end - name <= 0)
1499     return ONIGERR_INVALID_CALLOUT_NAME;
1500 
1501   e = callout_name_find(enc, is_not_single, name, name_end);
1502   if (IS_NULL(e)) {
1503 #ifdef USE_ST_LIBRARY
1504     if (IS_NULL(t)) {
1505       t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);
1506       CHECK_NULL_RETURN_MEMERR(t);
1507       GlobalCalloutNameTable = t;
1508     }
1509     e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));
1510     CHECK_NULL_RETURN_MEMERR(e);
1511 
1512     e->name = onigenc_strdup(enc, name, name_end);
1513     if (IS_NULL(e->name)) {
1514       xfree(e);  return ONIGERR_MEMORY;
1515     }
1516 
1517     r = st_insert_callout_name_table(t, enc, is_not_single,
1518                                      e->name, (e->name + (name_end - name)),
1519                                      (HashDataType )e);
1520     if (r < 0) return r;
1521 
1522 #else
1523 
1524     int alloc;
1525 
1526     if (IS_NULL(t)) {
1527       alloc = INIT_NAMES_ALLOC_NUM;
1528       t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));
1529       CHECK_NULL_RETURN_MEMERR(t);
1530       t->e     = NULL;
1531       t->alloc = 0;
1532       t->num   = 0;
1533 
1534       t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);
1535       if (IS_NULL(t->e)) {
1536         xfree(t);
1537         return ONIGERR_MEMORY;
1538       }
1539       t->alloc = alloc;
1540       GlobalCalloutNameTable = t;
1541       goto clear;
1542     }
1543     else if (t->num == t->alloc) {
1544       int i;
1545 
1546       alloc = t->alloc * 2;
1547       t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc);
1548       CHECK_NULL_RETURN_MEMERR(t->e);
1549       t->alloc = alloc;
1550 
1551     clear:
1552       for (i = t->num; i < t->alloc; i++) {
1553         t->e[i].name       = NULL;
1554         t->e[i].name_len   = 0;
1555         t->e[i].id         = 0;
1556       }
1557     }
1558     e = &(t->e[t->num]);
1559     t->num++;
1560     e->name = onigenc_strdup(enc, name, name_end);
1561     if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1562 #endif
1563 
1564     CalloutNameIDCounter++;
1565     e->id = CalloutNameIDCounter;
1566     e->name_len = (int )(name_end - name);
1567   }
1568 
1569   *rentry = e;
1570   return e->id;
1571 }
1572 
1573 static int
is_allowed_callout_name(OnigEncoding enc,UChar * name,UChar * name_end)1574 is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)
1575 {
1576   UChar* p;
1577   OnigCodePoint c;
1578 
1579   if (name >= name_end) return 0;
1580 
1581   p = name;
1582   while (p < name_end) {
1583     c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1584     if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))
1585       return 0;
1586 
1587     if (p == name) {
1588       if (c >= '0' && c <= '9') return 0;
1589     }
1590 
1591     p += ONIGENC_MBC_ENC_LEN(enc, p);
1592   }
1593 
1594   return 1;
1595 }
1596 
1597 static int
is_allowed_callout_tag_name(OnigEncoding enc,UChar * name,UChar * name_end)1598 is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)
1599 {
1600   UChar* p;
1601   OnigCodePoint c;
1602 
1603   if (name >= name_end) return 0;
1604 
1605   p = name;
1606   while (p < name_end) {
1607     c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1608     if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))
1609       return 0;
1610 
1611     if (p == name) {
1612       if (c >= '0' && c <= '9') return 0;
1613     }
1614 
1615     p += ONIGENC_MBC_ENC_LEN(enc, p);
1616   }
1617 
1618   return 1;
1619 }
1620 
1621 extern int
onig_set_callout_of_name(OnigEncoding enc,OnigCalloutType callout_type,UChar * name,UChar * name_end,int in,OnigCalloutFunc start_func,OnigCalloutFunc end_func,int arg_num,unsigned int arg_types[],int opt_arg_num,OnigValue opt_defaults[])1622 onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
1623                          UChar* name, UChar* name_end, int in,
1624                          OnigCalloutFunc start_func,
1625                          OnigCalloutFunc end_func,
1626                          int arg_num, unsigned int arg_types[],
1627                          int opt_arg_num, OnigValue opt_defaults[])
1628 {
1629   int r;
1630   int i;
1631   int j;
1632   int id;
1633   int is_not_single;
1634   CalloutNameEntry* e;
1635   CalloutNameListEntry* fe;
1636 
1637   if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)
1638     return ONIGERR_INVALID_ARGUMENT;
1639 
1640   if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)
1641     return ONIGERR_INVALID_CALLOUT_ARG;
1642 
1643   if (opt_arg_num < 0 || opt_arg_num > arg_num)
1644     return ONIGERR_INVALID_CALLOUT_ARG;
1645 
1646   if (start_func == 0 && end_func == 0)
1647     return ONIGERR_INVALID_CALLOUT_ARG;
1648 
1649   if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)
1650     return ONIGERR_INVALID_CALLOUT_ARG;
1651 
1652   for (i = 0; i < arg_num; i++) {
1653     unsigned int t = arg_types[i];
1654     if (t == ONIG_TYPE_VOID)
1655       return ONIGERR_INVALID_CALLOUT_ARG;
1656     else {
1657       if (i >= arg_num - opt_arg_num) {
1658         if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&
1659             t != ONIG_TYPE_TAG)
1660           return ONIGERR_INVALID_CALLOUT_ARG;
1661       }
1662       else {
1663         if (t != ONIG_TYPE_LONG) {
1664           t = t & ~ONIG_TYPE_LONG;
1665           if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)
1666             return ONIGERR_INVALID_CALLOUT_ARG;
1667         }
1668       }
1669     }
1670   }
1671 
1672   if (! is_allowed_callout_name(enc, name, name_end)) {
1673     return ONIGERR_INVALID_CALLOUT_NAME;
1674   }
1675 
1676   is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);
1677   id = callout_name_entry(&e, enc, is_not_single, name, name_end);
1678   if (id < 0) return id;
1679 
1680   r = ONIG_NORMAL;
1681   if (IS_NULL(GlobalCalloutNameList)) {
1682     r = make_callout_func_list(&GlobalCalloutNameList, 10);
1683     if (r != ONIG_NORMAL) return r;
1684   }
1685 
1686   while (id >= GlobalCalloutNameList->n) {
1687     int rid;
1688     r = callout_func_list_add(GlobalCalloutNameList, &rid);
1689     if (r != ONIG_NORMAL) return r;
1690   }
1691 
1692   fe = GlobalCalloutNameList->v + id;
1693   fe->type         = callout_type;
1694   fe->in           = in;
1695   fe->start_func   = start_func;
1696   fe->end_func     = end_func;
1697   fe->arg_num      = arg_num;
1698   fe->opt_arg_num  = opt_arg_num;
1699   fe->name         = e->name;
1700 
1701   for (i = 0; i < arg_num; i++) {
1702     fe->arg_types[i] = arg_types[i];
1703   }
1704   for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
1705     if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;
1706     if (fe->arg_types[i] == ONIG_TYPE_STRING) {
1707       OnigValue* val;
1708       UChar* ds;
1709 
1710       val = opt_defaults + j;
1711       ds = onigenc_strdup(enc, val->s.start, val->s.end);
1712       CHECK_NULL_RETURN_MEMERR(ds);
1713 
1714       fe->opt_defaults[i].s.start = ds;
1715       fe->opt_defaults[i].s.end   = ds + (val->s.end - val->s.start);
1716     }
1717     else {
1718       fe->opt_defaults[i] = opt_defaults[j];
1719     }
1720   }
1721 
1722   r = id;
1723   return r;
1724 }
1725 
1726 static int
get_callout_name_id_by_name(OnigEncoding enc,int is_not_single,UChar * name,UChar * name_end,int * rid)1727 get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,
1728                             UChar* name, UChar* name_end, int* rid)
1729 {
1730   int r;
1731   CalloutNameEntry* e;
1732 
1733   if (! is_allowed_callout_name(enc, name, name_end)) {
1734     return ONIGERR_INVALID_CALLOUT_NAME;
1735   }
1736 
1737   e = callout_name_find(enc, is_not_single, name, name_end);
1738   if (IS_NULL(e)) {
1739     return ONIGERR_UNDEFINED_CALLOUT_NAME;
1740   }
1741 
1742   r = ONIG_NORMAL;
1743   *rid = e->id;
1744 
1745   return r;
1746 }
1747 
1748 extern OnigCalloutFunc
onig_get_callout_start_func(regex_t * reg,int callout_num)1749 onig_get_callout_start_func(regex_t* reg, int callout_num)
1750 {
1751   /* If used for callouts of contents, return 0. */
1752   CalloutListEntry* e;
1753 
1754   e = onig_reg_callout_list_at(reg, callout_num);
1755   CHECK_NULL_RETURN(e);
1756   return e->start_func;
1757 }
1758 
1759 extern const UChar*
onig_get_callout_tag_start(regex_t * reg,int callout_num)1760 onig_get_callout_tag_start(regex_t* reg, int callout_num)
1761 {
1762   CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1763   CHECK_NULL_RETURN(e);
1764   return e->tag_start;
1765 }
1766 
1767 extern const UChar*
onig_get_callout_tag_end(regex_t * reg,int callout_num)1768 onig_get_callout_tag_end(regex_t* reg, int callout_num)
1769 {
1770   CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1771   CHECK_NULL_RETURN(e);
1772   return e->tag_end;
1773 }
1774 
1775 
1776 extern OnigCalloutType
onig_get_callout_type_by_name_id(int name_id)1777 onig_get_callout_type_by_name_id(int name_id)
1778 {
1779   if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1780     return 0;
1781 
1782   return GlobalCalloutNameList->v[name_id].type;
1783 }
1784 
1785 extern OnigCalloutFunc
onig_get_callout_start_func_by_name_id(int name_id)1786 onig_get_callout_start_func_by_name_id(int name_id)
1787 {
1788   if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1789     return 0;
1790 
1791   return GlobalCalloutNameList->v[name_id].start_func;
1792 }
1793 
1794 extern OnigCalloutFunc
onig_get_callout_end_func_by_name_id(int name_id)1795 onig_get_callout_end_func_by_name_id(int name_id)
1796 {
1797   if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1798     return 0;
1799 
1800   return GlobalCalloutNameList->v[name_id].end_func;
1801 }
1802 
1803 extern int
onig_get_callout_in_by_name_id(int name_id)1804 onig_get_callout_in_by_name_id(int name_id)
1805 {
1806   if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1807     return 0;
1808 
1809   return GlobalCalloutNameList->v[name_id].in;
1810 }
1811 
1812 static int
get_callout_arg_num_by_name_id(int name_id)1813 get_callout_arg_num_by_name_id(int name_id)
1814 {
1815   return GlobalCalloutNameList->v[name_id].arg_num;
1816 }
1817 
1818 static int
get_callout_opt_arg_num_by_name_id(int name_id)1819 get_callout_opt_arg_num_by_name_id(int name_id)
1820 {
1821   return GlobalCalloutNameList->v[name_id].opt_arg_num;
1822 }
1823 
1824 static unsigned int
get_callout_arg_type_by_name_id(int name_id,int index)1825 get_callout_arg_type_by_name_id(int name_id, int index)
1826 {
1827   return GlobalCalloutNameList->v[name_id].arg_types[index];
1828 }
1829 
1830 static OnigValue
get_callout_opt_default_by_name_id(int name_id,int index)1831 get_callout_opt_default_by_name_id(int name_id, int index)
1832 {
1833   return GlobalCalloutNameList->v[name_id].opt_defaults[index];
1834 }
1835 
1836 extern UChar*
onig_get_callout_name_by_name_id(int name_id)1837 onig_get_callout_name_by_name_id(int name_id)
1838 {
1839   if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1840     return 0;
1841 
1842   return GlobalCalloutNameList->v[name_id].name;
1843 }
1844 
1845 extern int
onig_global_callout_names_free(void)1846 onig_global_callout_names_free(void)
1847 {
1848   free_callout_func_list(GlobalCalloutNameList);
1849   GlobalCalloutNameList = 0;
1850 
1851   global_callout_name_table_free();
1852   return ONIG_NORMAL;
1853 }
1854 
1855 
1856 typedef st_table   CalloutTagTable;
1857 typedef intptr_t   CalloutTagVal;
1858 
1859 #define CALLOUT_TAG_LIST_FLAG_TAG_EXIST     (1<<0)
1860 
1861 static int
i_callout_callout_list_set(UChar * key,CalloutTagVal e,void * arg)1862 i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)
1863 {
1864   int num;
1865   RegexExt* ext = (RegexExt* )arg;
1866 
1867   num = (int )e - 1;
1868   ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;
1869   return ST_CONTINUE;
1870 }
1871 
1872 static int
setup_ext_callout_list_values(regex_t * reg)1873 setup_ext_callout_list_values(regex_t* reg)
1874 {
1875   int i, j;
1876   RegexExt* ext;
1877 
1878   ext = reg->extp;
1879   if (IS_NOT_NULL(ext->tag_table)) {
1880     onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,
1881                     (st_data_t )ext);
1882   }
1883 
1884   for (i = 0; i < ext->callout_num; i++) {
1885     CalloutListEntry* e = ext->callout_list + i;
1886     if (e->of == ONIG_CALLOUT_OF_NAME) {
1887       for (j = 0; j < e->u.arg.num; j++) {
1888         if (e->u.arg.types[j] == ONIG_TYPE_TAG) {
1889           UChar* start;
1890           UChar* end;
1891           int num;
1892           start = e->u.arg.vals[j].s.start;
1893           end   = e->u.arg.vals[j].s.end;
1894           num = onig_get_callout_num_by_tag(reg, start, end);
1895           if (num < 0) return num;
1896           e->u.arg.vals[j].tag = num;
1897         }
1898       }
1899     }
1900   }
1901 
1902   return ONIG_NORMAL;
1903 }
1904 
1905 extern int
onig_callout_tag_is_exist_at_callout_num(regex_t * reg,int callout_num)1906 onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)
1907 {
1908   RegexExt* ext = reg->extp;
1909 
1910   if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;
1911   if (callout_num > ext->callout_num) return 0;
1912 
1913   return (ext->callout_list[callout_num].flag &
1914           CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0;
1915 }
1916 
1917 static int
i_free_callout_tag_entry(UChar * key,CalloutTagVal e,void * arg ARG_UNUSED)1918 i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)
1919 {
1920   xfree(key);
1921   return ST_DELETE;
1922 }
1923 
1924 static int
callout_tag_table_clear(CalloutTagTable * t)1925 callout_tag_table_clear(CalloutTagTable* t)
1926 {
1927   if (IS_NOT_NULL(t)) {
1928     onig_st_foreach(t, i_free_callout_tag_entry, 0);
1929   }
1930   return 0;
1931 }
1932 
1933 extern int
onig_callout_tag_table_free(void * table)1934 onig_callout_tag_table_free(void* table)
1935 {
1936   CalloutTagTable* t = (CalloutTagTable* )table;
1937 
1938   if (IS_NOT_NULL(t)) {
1939     int r = callout_tag_table_clear(t);
1940     if (r != 0) return r;
1941 
1942     onig_st_free_table(t);
1943   }
1944 
1945   return 0;
1946 }
1947 
1948 extern int
onig_get_callout_num_by_tag(regex_t * reg,const UChar * tag,const UChar * tag_end)1949 onig_get_callout_num_by_tag(regex_t* reg,
1950                             const UChar* tag, const UChar* tag_end)
1951 {
1952   int r;
1953   RegexExt* ext;
1954   CalloutTagVal e;
1955 
1956   ext = reg->extp;
1957   if (IS_NULL(ext) || IS_NULL(ext->tag_table))
1958     return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1959 
1960   r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,
1961                             (HashDataType* )((void* )(&e)));
1962   if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1963   return (int )e;
1964 }
1965 
1966 static CalloutTagVal
callout_tag_find(CalloutTagTable * t,const UChar * name,const UChar * name_end)1967 callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)
1968 {
1969   CalloutTagVal e;
1970 
1971   e = -1;
1972   if (IS_NOT_NULL(t)) {
1973     onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
1974   }
1975   return e;
1976 }
1977 
1978 static int
callout_tag_table_new(CalloutTagTable ** rt)1979 callout_tag_table_new(CalloutTagTable** rt)
1980 {
1981   CalloutTagTable* t;
1982 
1983   *rt = 0;
1984   t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);
1985   CHECK_NULL_RETURN_MEMERR(t);
1986 
1987   *rt = t;
1988   return ONIG_NORMAL;
1989 }
1990 
1991 static int
callout_tag_entry_raw(ParseEnv * env,CalloutTagTable * t,UChar * name,UChar * name_end,CalloutTagVal entry_val)1992 callout_tag_entry_raw(ParseEnv* env, CalloutTagTable* t, UChar* name,
1993                       UChar* name_end, CalloutTagVal entry_val)
1994 {
1995   int r;
1996   CalloutTagVal val;
1997 
1998   if (name_end - name <= 0)
1999     return ONIGERR_INVALID_CALLOUT_TAG_NAME;
2000 
2001   val = callout_tag_find(t, name, name_end);
2002   if (val >= 0) {
2003     onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
2004                                    name, name_end);
2005     return ONIGERR_MULTIPLEX_DEFINED_NAME;
2006   }
2007 
2008   r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);
2009   if (r < 0) return r;
2010 
2011   return ONIG_NORMAL;
2012 }
2013 
2014 static int
ext_ensure_tag_table(regex_t * reg)2015 ext_ensure_tag_table(regex_t* reg)
2016 {
2017   int r;
2018   RegexExt* ext;
2019   CalloutTagTable* t;
2020 
2021   ext = onig_get_regex_ext(reg);
2022   CHECK_NULL_RETURN_MEMERR(ext);
2023 
2024   if (IS_NULL(ext->tag_table)) {
2025     r = callout_tag_table_new(&t);
2026     if (r != ONIG_NORMAL) return r;
2027 
2028     ext->tag_table = t;
2029   }
2030 
2031   return ONIG_NORMAL;
2032 }
2033 
2034 static int
callout_tag_entry(ParseEnv * env,regex_t * reg,UChar * name,UChar * name_end,CalloutTagVal entry_val)2035 callout_tag_entry(ParseEnv* env, regex_t* reg, UChar* name, UChar* name_end,
2036                   CalloutTagVal entry_val)
2037 {
2038   int r;
2039   RegexExt* ext;
2040   CalloutListEntry* e;
2041 
2042   r = ext_ensure_tag_table(reg);
2043   if (r != ONIG_NORMAL) return r;
2044 
2045   ext = onig_get_regex_ext(reg);
2046   CHECK_NULL_RETURN_MEMERR(ext);
2047   r = callout_tag_entry_raw(env, ext->tag_table, name, name_end, entry_val);
2048 
2049   e = onig_reg_callout_list_at(reg, (int )entry_val);
2050   CHECK_NULL_RETURN_MEMERR(e);
2051   e->tag_start = name;
2052   e->tag_end   = name_end;
2053 
2054   return r;
2055 }
2056 
2057 #endif /* USE_CALLOUT */
2058 
2059 
2060 #define INIT_PARSEENV_MEMENV_ALLOC_SIZE   16
2061 
2062 static void
scan_env_clear(ParseEnv * env)2063 scan_env_clear(ParseEnv* env)
2064 {
2065   MEM_STATUS_CLEAR(env->cap_history);
2066   MEM_STATUS_CLEAR(env->backtrack_mem);
2067   MEM_STATUS_CLEAR(env->backrefed_mem);
2068   env->error      = (UChar* )NULL;
2069   env->error_end  = (UChar* )NULL;
2070   env->num_call   = 0;
2071 
2072 #ifdef USE_CALL
2073   env->unset_addr_list = NULL;
2074   env->has_call_zero   = 0;
2075 #endif
2076 
2077   env->num_mem    = 0;
2078   env->num_named  = 0;
2079   env->mem_alloc  = 0;
2080   env->mem_env_dynamic = (MemEnv* )NULL;
2081 
2082   xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));
2083 
2084   env->parse_depth      = 0;
2085 #ifdef ONIG_DEBUG_PARSE
2086   env->max_parse_depth  = 0;
2087 #endif
2088   env->backref_num      = 0;
2089   env->keep_num         = 0;
2090   env->id_num           = 0;
2091   env->save_alloc_num   = 0;
2092   env->saves            = 0;
2093 }
2094 
2095 static int
scan_env_add_mem_entry(ParseEnv * env)2096 scan_env_add_mem_entry(ParseEnv* env)
2097 {
2098   int i, need, alloc;
2099   MemEnv* p;
2100 
2101   need = env->num_mem + 1;
2102   if (need > MaxCaptureNum && MaxCaptureNum != 0)
2103     return ONIGERR_TOO_MANY_CAPTURES;
2104 
2105   if (need >= PARSEENV_MEMENV_SIZE) {
2106     if (env->mem_alloc <= need) {
2107       if (IS_NULL(env->mem_env_dynamic)) {
2108         alloc = INIT_PARSEENV_MEMENV_ALLOC_SIZE;
2109         p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);
2110         CHECK_NULL_RETURN_MEMERR(p);
2111         xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));
2112       }
2113       else {
2114         alloc = env->mem_alloc * 2;
2115         p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc);
2116         CHECK_NULL_RETURN_MEMERR(p);
2117       }
2118 
2119       for (i = env->num_mem + 1; i < alloc; i++) {
2120         p[i].mem_node = NULL_NODE;
2121         p[i].empty_repeat_node = NULL_NODE;
2122       }
2123 
2124       env->mem_env_dynamic = p;
2125       env->mem_alloc = alloc;
2126     }
2127   }
2128 
2129   env->num_mem++;
2130   return env->num_mem;
2131 }
2132 
2133 static int
scan_env_set_mem_node(ParseEnv * env,int num,Node * node)2134 scan_env_set_mem_node(ParseEnv* env, int num, Node* node)
2135 {
2136   if (env->num_mem >= num)
2137     PARSEENV_MEMENV(env)[num].mem_node = node;
2138   else
2139     return ONIGERR_PARSER_BUG;
2140   return 0;
2141 }
2142 
2143 static void
node_free_body(Node * node)2144 node_free_body(Node* node)
2145 {
2146   if (IS_NULL(node)) return ;
2147 
2148   switch (NODE_TYPE(node)) {
2149   case NODE_STRING:
2150     if (STR_(node)->capacity != 0 &&
2151         IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
2152       xfree(STR_(node)->s);
2153     }
2154     break;
2155 
2156   case NODE_LIST:
2157   case NODE_ALT:
2158     onig_node_free(NODE_CAR(node));
2159     node = NODE_CDR(node);
2160     while (IS_NOT_NULL(node)) {
2161       Node* next = NODE_CDR(node);
2162       onig_node_free(NODE_CAR(node));
2163       xfree(node);
2164       node = next;
2165     }
2166     break;
2167 
2168   case NODE_CCLASS:
2169     {
2170       CClassNode* cc = CCLASS_(node);
2171 
2172       if (cc->mbuf)
2173         bbuf_free(cc->mbuf);
2174     }
2175     break;
2176 
2177   case NODE_BACKREF:
2178     if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))
2179       xfree(BACKREF_(node)->back_dynamic);
2180     break;
2181 
2182   case NODE_BAG:
2183     if (NODE_BODY(node))
2184       onig_node_free(NODE_BODY(node));
2185 
2186     {
2187       BagNode* en = BAG_(node);
2188       if (en->type == BAG_IF_ELSE) {
2189         onig_node_free(en->te.Then);
2190         onig_node_free(en->te.Else);
2191       }
2192     }
2193     break;
2194 
2195   case NODE_QUANT:
2196     if (NODE_BODY(node))
2197       onig_node_free(NODE_BODY(node));
2198     break;
2199 
2200   case NODE_ANCHOR:
2201     if (NODE_BODY(node))
2202       onig_node_free(NODE_BODY(node));
2203     if (IS_NOT_NULL(ANCHOR_(node)->lead_node))
2204       onig_node_free(ANCHOR_(node)->lead_node);
2205     break;
2206 
2207   case NODE_CTYPE:
2208   case NODE_CALL:
2209   case NODE_GIMMICK:
2210     break;
2211   }
2212 }
2213 
2214 extern void
onig_node_free(Node * node)2215 onig_node_free(Node* node)
2216 {
2217   if (IS_NULL(node)) return ;
2218 
2219 #ifdef DEBUG_NODE_FREE
2220   fprintf(stderr, "onig_node_free: %p\n", node);
2221 #endif
2222 
2223   node_free_body(node);
2224   xfree(node);
2225 }
2226 
2227 static void
cons_node_free_alone(Node * node)2228 cons_node_free_alone(Node* node)
2229 {
2230   NODE_CAR(node) = 0;
2231   NODE_CDR(node) = 0;
2232   onig_node_free(node);
2233 }
2234 
2235 static Node*
node_new(void)2236 node_new(void)
2237 {
2238   Node* node;
2239 
2240   node = (Node* )xmalloc(sizeof(Node));
2241   CHECK_NULL_RETURN(node);
2242   xmemset(node, 0, sizeof(*node));
2243 
2244 #ifdef DEBUG_NODE_FREE
2245   fprintf(stderr, "node_new: %p\n", node);
2246 #endif
2247   return node;
2248 }
2249 
2250 extern int
onig_node_copy(Node ** rcopy,Node * from)2251 onig_node_copy(Node** rcopy, Node* from)
2252 {
2253   int r;
2254   Node* copy;
2255 
2256   *rcopy = NULL_NODE;
2257 
2258   switch (NODE_TYPE(from)) {
2259   case NODE_LIST:
2260   case NODE_ALT:
2261   case NODE_ANCHOR:
2262     /* These node's link to other nodes are processed by caller. */
2263     break;
2264   case NODE_STRING:
2265   case NODE_CCLASS:
2266   case NODE_CTYPE:
2267     /* Fixed contents after copy. */
2268     break;
2269   default:
2270     /* Not supported yet. */
2271     return ONIGERR_TYPE_BUG;
2272     break;
2273   }
2274 
2275   copy = node_new();
2276   CHECK_NULL_RETURN_MEMERR(copy);
2277   xmemcpy(copy, from, sizeof(*copy));
2278 
2279   switch (NODE_TYPE(copy)) {
2280   case NODE_STRING:
2281     r = onig_node_str_set(copy, STR_(from)->s, STR_(from)->end, FALSE);
2282     if (r != 0) {
2283     err:
2284       onig_node_free(copy);
2285       return r;
2286     }
2287     break;
2288 
2289   case NODE_CCLASS:
2290     {
2291       CClassNode *fcc, *tcc;
2292 
2293       fcc = CCLASS_(from);
2294       tcc = CCLASS_(copy);
2295       if (IS_NOT_NULL(fcc->mbuf)) {
2296         r = bbuf_clone(&(tcc->mbuf), fcc->mbuf);
2297         if (r != 0) goto err;
2298       }
2299     }
2300     break;
2301 
2302   default:
2303     break;
2304   }
2305 
2306   *rcopy = copy;
2307   return ONIG_NORMAL;
2308 }
2309 
2310 
2311 static void
initialize_cclass(CClassNode * cc)2312 initialize_cclass(CClassNode* cc)
2313 {
2314   BITSET_CLEAR(cc->bs);
2315   cc->flags = 0;
2316   cc->mbuf  = NULL;
2317 }
2318 
2319 static Node*
node_new_cclass(void)2320 node_new_cclass(void)
2321 {
2322   Node* node = node_new();
2323   CHECK_NULL_RETURN(node);
2324 
2325   NODE_SET_TYPE(node, NODE_CCLASS);
2326   initialize_cclass(CCLASS_(node));
2327   return node;
2328 }
2329 
2330 static Node*
node_new_ctype(int type,int not,OnigOptionType options)2331 node_new_ctype(int type, int not, OnigOptionType options)
2332 {
2333   Node* node = node_new();
2334   CHECK_NULL_RETURN(node);
2335 
2336   NODE_SET_TYPE(node, NODE_CTYPE);
2337   CTYPE_(node)->ctype   = type;
2338   CTYPE_(node)->not     = not;
2339   CTYPE_(node)->ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(type, options);
2340   return node;
2341 }
2342 
2343 static Node*
node_new_anychar(OnigOptionType options)2344 node_new_anychar(OnigOptionType options)
2345 {
2346   Node* node;
2347 
2348   node = node_new_ctype(CTYPE_ANYCHAR, FALSE, options);
2349   CHECK_NULL_RETURN(node);
2350 
2351   if (OPTON_MULTILINE(options))
2352     NODE_STATUS_ADD(node, MULTILINE);
2353   return node;
2354 }
2355 
2356 static int
node_new_no_newline(Node ** node,ParseEnv * env)2357 node_new_no_newline(Node** node, ParseEnv* env)
2358 {
2359   Node* n;
2360 
2361   n = node_new_anychar(ONIG_OPTION_NONE);
2362   CHECK_NULL_RETURN_MEMERR(n);
2363   *node = n;
2364   return 0;
2365 }
2366 
2367 static int
node_new_true_anychar(Node ** node)2368 node_new_true_anychar(Node** node)
2369 {
2370   Node* n;
2371 
2372   n = node_new_anychar(ONIG_OPTION_MULTILINE);
2373   CHECK_NULL_RETURN_MEMERR(n);
2374   *node = n;
2375   return 0;
2376 }
2377 
2378 static Node*
node_new_list(Node * left,Node * right)2379 node_new_list(Node* left, Node* right)
2380 {
2381   Node* node = node_new();
2382   CHECK_NULL_RETURN(node);
2383 
2384   NODE_SET_TYPE(node, NODE_LIST);
2385   NODE_CAR(node)  = left;
2386   NODE_CDR(node) = right;
2387   return node;
2388 }
2389 
2390 extern Node*
onig_node_new_list(Node * left,Node * right)2391 onig_node_new_list(Node* left, Node* right)
2392 {
2393   return node_new_list(left, right);
2394 }
2395 
2396 extern Node*
onig_node_new_alt(Node * left,Node * right)2397 onig_node_new_alt(Node* left, Node* right)
2398 {
2399   Node* node = node_new();
2400   CHECK_NULL_RETURN(node);
2401 
2402   NODE_SET_TYPE(node, NODE_ALT);
2403   NODE_CAR(node)  = left;
2404   NODE_CDR(node) = right;
2405   return node;
2406 }
2407 
2408 static Node*
make_list_or_alt(NodeType type,int n,Node * ns[])2409 make_list_or_alt(NodeType type, int n, Node* ns[])
2410 {
2411   Node* r;
2412 
2413   if (n <= 0) return NULL_NODE;
2414 
2415   if (n == 1) {
2416     r = node_new();
2417     CHECK_NULL_RETURN(r);
2418     NODE_SET_TYPE(r, type);
2419     NODE_CAR(r) = ns[0];
2420     NODE_CDR(r) = NULL_NODE;
2421   }
2422   else {
2423     Node* right;
2424 
2425     r = node_new();
2426     CHECK_NULL_RETURN(r);
2427 
2428     right = make_list_or_alt(type, n - 1, ns + 1);
2429     if (IS_NULL(right)) {
2430       onig_node_free(r);
2431       return NULL_NODE;
2432     }
2433 
2434     NODE_SET_TYPE(r, type);
2435     NODE_CAR(r) = ns[0];
2436     NODE_CDR(r) = right;
2437   }
2438 
2439   return r;
2440 }
2441 
2442 static Node*
make_list(int n,Node * ns[])2443 make_list(int n, Node* ns[])
2444 {
2445   return make_list_or_alt(NODE_LIST, n, ns);
2446 }
2447 
2448 static Node*
make_alt(int n,Node * ns[])2449 make_alt(int n, Node* ns[])
2450 {
2451   return make_list_or_alt(NODE_ALT, n, ns);
2452 }
2453 
2454 static Node*
node_new_anchor(int type)2455 node_new_anchor(int type)
2456 {
2457   Node* node;
2458 
2459   node = node_new();
2460   CHECK_NULL_RETURN(node);
2461 
2462   NODE_SET_TYPE(node, NODE_ANCHOR);
2463   ANCHOR_(node)->type       = type;
2464   ANCHOR_(node)->char_min_len = 0;
2465   ANCHOR_(node)->char_max_len = INFINITE_LEN;
2466   ANCHOR_(node)->ascii_mode = 0;
2467   ANCHOR_(node)->lead_node  = NULL_NODE;
2468   return node;
2469 }
2470 
2471 static Node*
node_new_anchor_with_options(int type,OnigOptionType options)2472 node_new_anchor_with_options(int type, OnigOptionType options)
2473 {
2474   int ascii_mode;
2475   Node* node;
2476 
2477   node = node_new_anchor(type);
2478   CHECK_NULL_RETURN(node);
2479 
2480   ascii_mode = OPTON_WORD_ASCII(options) && IS_WORD_ANCHOR_TYPE(type) ? 1 : 0;
2481   ANCHOR_(node)->ascii_mode = ascii_mode;
2482 
2483   if (type == ANCR_TEXT_SEGMENT_BOUNDARY ||
2484       type == ANCR_NO_TEXT_SEGMENT_BOUNDARY) {
2485     if (OPTON_TEXT_SEGMENT_WORD(options))
2486       NODE_STATUS_ADD(node, TEXT_SEGMENT_WORD);
2487   }
2488 
2489   return node;
2490 }
2491 
2492 static Node*
node_new_backref(int back_num,int * backrefs,int by_name,int exist_level,int nest_level,ParseEnv * env)2493 node_new_backref(int back_num, int* backrefs, int by_name,
2494 #ifdef USE_BACKREF_WITH_LEVEL
2495                  int exist_level, int nest_level,
2496 #endif
2497                  ParseEnv* env)
2498 {
2499   int i;
2500   Node* node;
2501 
2502   node = node_new();
2503   CHECK_NULL_RETURN(node);
2504 
2505   NODE_SET_TYPE(node, NODE_BACKREF);
2506   BACKREF_(node)->back_num = back_num;
2507   BACKREF_(node)->back_dynamic = (int* )NULL;
2508   if (by_name != 0)
2509     NODE_STATUS_ADD(node, BY_NAME);
2510 
2511   if (OPTON_IGNORECASE(env->options))
2512     NODE_STATUS_ADD(node, IGNORECASE);
2513 
2514 #ifdef USE_BACKREF_WITH_LEVEL
2515   if (exist_level != 0) {
2516     NODE_STATUS_ADD(node, NEST_LEVEL);
2517     BACKREF_(node)->nest_level  = nest_level;
2518   }
2519 #endif
2520 
2521   for (i = 0; i < back_num; i++) {
2522     if (backrefs[i] <= env->num_mem &&
2523         IS_NULL(PARSEENV_MEMENV(env)[backrefs[i]].mem_node)) {
2524       NODE_STATUS_ADD(node, RECURSION);   /* /...(\1).../ */
2525       break;
2526     }
2527   }
2528 
2529   if (back_num <= NODE_BACKREFS_SIZE) {
2530     for (i = 0; i < back_num; i++)
2531       BACKREF_(node)->back_static[i] = backrefs[i];
2532   }
2533   else {
2534     int* p = (int* )xmalloc(sizeof(int) * back_num);
2535     if (IS_NULL(p)) {
2536       onig_node_free(node);
2537       return NULL;
2538     }
2539     BACKREF_(node)->back_dynamic = p;
2540     for (i = 0; i < back_num; i++)
2541       p[i] = backrefs[i];
2542   }
2543 
2544   env->backref_num++;
2545   return node;
2546 }
2547 
2548 static Node*
node_new_backref_checker(int back_num,int * backrefs,int by_name,int exist_level,int nest_level,ParseEnv * env)2549 node_new_backref_checker(int back_num, int* backrefs, int by_name,
2550 #ifdef USE_BACKREF_WITH_LEVEL
2551                          int exist_level, int nest_level,
2552 #endif
2553                          ParseEnv* env)
2554 {
2555   Node* node;
2556 
2557   node = node_new_backref(back_num, backrefs, by_name,
2558 #ifdef USE_BACKREF_WITH_LEVEL
2559                           exist_level, nest_level,
2560 #endif
2561                           env);
2562   CHECK_NULL_RETURN(node);
2563 
2564   NODE_STATUS_ADD(node, CHECKER);
2565   return node;
2566 }
2567 
2568 #ifdef USE_CALL
2569 static Node*
node_new_call(UChar * name,UChar * name_end,int gnum,int by_number)2570 node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)
2571 {
2572   Node* node = node_new();
2573   CHECK_NULL_RETURN(node);
2574 
2575   NODE_SET_TYPE(node, NODE_CALL);
2576   CALL_(node)->by_number   = by_number;
2577   CALL_(node)->name        = name;
2578   CALL_(node)->name_end    = name_end;
2579   CALL_(node)->called_gnum = gnum;
2580   CALL_(node)->entry_count = 1;
2581   return node;
2582 }
2583 #endif
2584 
2585 static Node*
node_new_quantifier(int lower,int upper,int by_number)2586 node_new_quantifier(int lower, int upper, int by_number)
2587 {
2588   Node* node = node_new();
2589   CHECK_NULL_RETURN(node);
2590 
2591   NODE_SET_TYPE(node, NODE_QUANT);
2592   QUANT_(node)->lower            = lower;
2593   QUANT_(node)->upper            = upper;
2594   QUANT_(node)->greedy           = 1;
2595   QUANT_(node)->emptiness        = BODY_IS_NOT_EMPTY;
2596   QUANT_(node)->head_exact       = NULL_NODE;
2597   QUANT_(node)->next_head_exact  = NULL_NODE;
2598   QUANT_(node)->include_referred = 0;
2599   QUANT_(node)->empty_status_mem = 0;
2600   if (by_number != 0)
2601     NODE_STATUS_ADD(node, BY_NUMBER);
2602 
2603   return node;
2604 }
2605 
2606 static Node*
node_new_bag(enum BagType type)2607 node_new_bag(enum BagType type)
2608 {
2609   Node* node = node_new();
2610   CHECK_NULL_RETURN(node);
2611 
2612   NODE_SET_TYPE(node, NODE_BAG);
2613   BAG_(node)->type = type;
2614 
2615   switch (type) {
2616   case BAG_MEMORY:
2617     BAG_(node)->m.regnum       =  0;
2618     BAG_(node)->m.called_addr  = -1;
2619     BAG_(node)->m.entry_count  =  1;
2620     BAG_(node)->m.called_state =  0;
2621     break;
2622 
2623   case BAG_OPTION:
2624     BAG_(node)->o.options =  0;
2625     break;
2626 
2627   case BAG_STOP_BACKTRACK:
2628     break;
2629 
2630   case BAG_IF_ELSE:
2631     BAG_(node)->te.Then = 0;
2632     BAG_(node)->te.Else = 0;
2633     break;
2634   }
2635 
2636   BAG_(node)->opt_count = 0;
2637   return node;
2638 }
2639 
2640 extern Node*
onig_node_new_bag(enum BagType type)2641 onig_node_new_bag(enum BagType type)
2642 {
2643   return node_new_bag(type);
2644 }
2645 
2646 static Node*
node_new_bag_if_else(Node * cond,Node * Then,Node * Else)2647 node_new_bag_if_else(Node* cond, Node* Then, Node* Else)
2648 {
2649   Node* n;
2650   n = node_new_bag(BAG_IF_ELSE);
2651   CHECK_NULL_RETURN(n);
2652 
2653   NODE_BODY(n) = cond;
2654   BAG_(n)->te.Then = Then;
2655   BAG_(n)->te.Else = Else;
2656   return n;
2657 }
2658 
2659 static Node*
node_new_memory(int is_named)2660 node_new_memory(int is_named)
2661 {
2662   Node* node = node_new_bag(BAG_MEMORY);
2663   CHECK_NULL_RETURN(node);
2664   if (is_named != 0)
2665     NODE_STATUS_ADD(node, NAMED_GROUP);
2666 
2667   return node;
2668 }
2669 
2670 static Node*
node_new_option(OnigOptionType option)2671 node_new_option(OnigOptionType option)
2672 {
2673   Node* node = node_new_bag(BAG_OPTION);
2674   CHECK_NULL_RETURN(node);
2675   BAG_(node)->o.options = option;
2676   return node;
2677 }
2678 
2679 static Node*
node_new_group(Node * content)2680 node_new_group(Node* content)
2681 {
2682   Node* node;
2683 
2684   node = node_new();
2685   CHECK_NULL_RETURN(node);
2686   NODE_SET_TYPE(node, NODE_LIST);
2687   NODE_CAR(node) = content;
2688   NODE_CDR(node) = NULL_NODE;
2689 
2690   return node;
2691 }
2692 
2693 static Node*
node_drop_group(Node * group)2694 node_drop_group(Node* group)
2695 {
2696   Node* content;
2697 
2698   content = NODE_CAR(group);
2699   NODE_CAR(group) = NULL_NODE;
2700   onig_node_free(group);
2701   return content;
2702 }
2703 
2704 static int
node_set_fail(Node * node)2705 node_set_fail(Node* node)
2706 {
2707   NODE_SET_TYPE(node, NODE_GIMMICK);
2708   GIMMICK_(node)->type = GIMMICK_FAIL;
2709   return ONIG_NORMAL;
2710 }
2711 
2712 static int
node_new_fail(Node ** node,ParseEnv * env)2713 node_new_fail(Node** node, ParseEnv* env)
2714 {
2715   *node = node_new();
2716   CHECK_NULL_RETURN_MEMERR(*node);
2717 
2718   return node_set_fail(*node);
2719 }
2720 
2721 extern int
onig_node_reset_fail(Node * node)2722 onig_node_reset_fail(Node* node)
2723 {
2724   node_free_body(node);
2725   return node_set_fail(node);
2726 }
2727 
2728 static int
node_new_save_gimmick(Node ** node,enum SaveType save_type,ParseEnv * env)2729 node_new_save_gimmick(Node** node, enum SaveType save_type, ParseEnv* env)
2730 {
2731   int id;
2732 
2733   ID_ENTRY(env, id);
2734 
2735   *node = node_new();
2736   CHECK_NULL_RETURN_MEMERR(*node);
2737 
2738   NODE_SET_TYPE(*node, NODE_GIMMICK);
2739   GIMMICK_(*node)->id   = id;
2740   GIMMICK_(*node)->type = GIMMICK_SAVE;
2741   GIMMICK_(*node)->detail_type = (int )save_type;
2742 
2743   return ONIG_NORMAL;
2744 }
2745 
2746 static int
node_new_update_var_gimmick(Node ** node,enum UpdateVarType update_var_type,int id,ParseEnv * env)2747 node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,
2748                             int id, ParseEnv* env)
2749 {
2750   *node = node_new();
2751   CHECK_NULL_RETURN_MEMERR(*node);
2752 
2753   NODE_SET_TYPE(*node, NODE_GIMMICK);
2754   GIMMICK_(*node)->id   = id;
2755   GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;
2756   GIMMICK_(*node)->detail_type = (int )update_var_type;
2757 
2758   return ONIG_NORMAL;
2759 }
2760 
2761 static int
node_new_keep(Node ** node,ParseEnv * env)2762 node_new_keep(Node** node, ParseEnv* env)
2763 {
2764   int r;
2765 
2766   r = node_new_save_gimmick(node, SAVE_KEEP, env);
2767   if (r != 0) return r;
2768 
2769   env->keep_num++;
2770   return ONIG_NORMAL;
2771 }
2772 
2773 #ifdef USE_CALLOUT
2774 
2775 extern void
onig_free_reg_callout_list(int n,CalloutListEntry * list)2776 onig_free_reg_callout_list(int n, CalloutListEntry* list)
2777 {
2778   int i;
2779   int j;
2780 
2781   if (IS_NULL(list)) return ;
2782 
2783   for (i = 0; i < n; i++) {
2784     if (list[i].of == ONIG_CALLOUT_OF_NAME) {
2785       for (j = 0; j < list[i].u.arg.passed_num; j++) {
2786         if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {
2787           if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))
2788             xfree(list[i].u.arg.vals[j].s.start);
2789         }
2790       }
2791     }
2792     else { /* ONIG_CALLOUT_OF_CONTENTS */
2793       if (IS_NOT_NULL(list[i].u.content.start)) {
2794         xfree((void* )list[i].u.content.start);
2795       }
2796     }
2797   }
2798 
2799   xfree(list);
2800 }
2801 
2802 extern CalloutListEntry*
onig_reg_callout_list_at(regex_t * reg,int num)2803 onig_reg_callout_list_at(regex_t* reg, int num)
2804 {
2805   RegexExt* ext = reg->extp;
2806   CHECK_NULL_RETURN(ext);
2807 
2808   if (num <= 0 || num > ext->callout_num)
2809     return 0;
2810 
2811   num--;
2812   return ext->callout_list + num;
2813 }
2814 
2815 static int
reg_callout_list_entry(ParseEnv * env,int * rnum)2816 reg_callout_list_entry(ParseEnv* env, int* rnum)
2817 {
2818 #define INIT_CALLOUT_LIST_NUM  3
2819 
2820   int num;
2821   CalloutListEntry* list;
2822   CalloutListEntry* e;
2823   RegexExt* ext;
2824 
2825   ext = onig_get_regex_ext(env->reg);
2826   CHECK_NULL_RETURN_MEMERR(ext);
2827 
2828   if (IS_NULL(ext->callout_list)) {
2829     list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);
2830     CHECK_NULL_RETURN_MEMERR(list);
2831 
2832     ext->callout_list = list;
2833     ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;
2834     ext->callout_num = 0;
2835   }
2836 
2837   num = ext->callout_num + 1;
2838   if (num > ext->callout_list_alloc) {
2839     int alloc = ext->callout_list_alloc * 2;
2840     list = (CalloutListEntry* )xrealloc(ext->callout_list,
2841                                         sizeof(CalloutListEntry) * alloc);
2842     CHECK_NULL_RETURN_MEMERR(list);
2843 
2844     ext->callout_list       = list;
2845     ext->callout_list_alloc = alloc;
2846   }
2847 
2848   e = ext->callout_list + (num - 1);
2849 
2850   e->flag             = 0;
2851   e->of               = 0;
2852   e->in               = ONIG_CALLOUT_OF_CONTENTS;
2853   e->type             = 0;
2854   e->tag_start        = 0;
2855   e->tag_end          = 0;
2856   e->start_func       = 0;
2857   e->end_func         = 0;
2858   e->u.arg.num        = 0;
2859   e->u.arg.passed_num = 0;
2860 
2861   ext->callout_num = num;
2862   *rnum = num;
2863   return ONIG_NORMAL;
2864 }
2865 
2866 static int
node_new_callout(Node ** node,OnigCalloutOf callout_of,int num,int id,ParseEnv * env)2867 node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,
2868                  ParseEnv* env)
2869 {
2870   *node = node_new();
2871   CHECK_NULL_RETURN_MEMERR(*node);
2872 
2873   NODE_SET_TYPE(*node, NODE_GIMMICK);
2874   GIMMICK_(*node)->id          = id;
2875   GIMMICK_(*node)->num         = num;
2876   GIMMICK_(*node)->type        = GIMMICK_CALLOUT;
2877   GIMMICK_(*node)->detail_type = (int )callout_of;
2878 
2879   return ONIG_NORMAL;
2880 }
2881 #endif
2882 
2883 static int
make_text_segment(Node ** node,ParseEnv * env)2884 make_text_segment(Node** node, ParseEnv* env)
2885 {
2886   int r;
2887   int i;
2888   Node* x;
2889   Node* ns[2];
2890 
2891   /* \X == (?>\O(?:\Y\O)*) */
2892 
2893   ns[1] = NULL_NODE;
2894 
2895   r = ONIGERR_MEMORY;
2896   ns[0] = node_new_anchor_with_options(ANCR_NO_TEXT_SEGMENT_BOUNDARY, env->options);
2897   if (IS_NULL(ns[0])) goto err;
2898 
2899   r = node_new_true_anychar(&ns[1]);
2900   if (r != 0) goto err1;
2901 
2902   x = make_list(2, ns);
2903   if (IS_NULL(x)) goto err;
2904   ns[0] = x;
2905   ns[1] = NULL_NODE;
2906 
2907   x = node_new_quantifier(0, INFINITE_REPEAT, TRUE);
2908   if (IS_NULL(x)) goto err;
2909 
2910   NODE_BODY(x) = ns[0];
2911   ns[0] = NULL_NODE;
2912   ns[1] = x;
2913 
2914   r = node_new_true_anychar(&ns[0]);
2915   if (r != 0) goto err1;
2916 
2917   x = make_list(2, ns);
2918   if (IS_NULL(x)) goto err;
2919 
2920   ns[0] = x;
2921   ns[1] = NULL_NODE;
2922 
2923   x = node_new_bag(BAG_STOP_BACKTRACK);
2924   if (IS_NULL(x)) goto err;
2925 
2926   NODE_BODY(x) = ns[0];
2927 
2928   *node = x;
2929   return ONIG_NORMAL;
2930 
2931  err:
2932   r = ONIGERR_MEMORY;
2933  err1:
2934   for (i = 0; i < 2; i++) onig_node_free(ns[i]);
2935   return r;
2936 }
2937 
2938 static int
make_absent_engine(Node ** node,int pre_save_right_id,Node * absent,Node * step_one,int lower,int upper,int possessive,int is_range_cutter,ParseEnv * env)2939 make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
2940                    Node* step_one, int lower, int upper, int possessive,
2941                    int is_range_cutter, ParseEnv* env)
2942 {
2943   int r;
2944   int i;
2945   int id;
2946   Node* x;
2947   Node* ns[4];
2948 
2949   for (i = 0; i < 4; i++) ns[i] = NULL_NODE;
2950 
2951   ns[1] = absent;
2952   ns[3] = step_one; /* for err */
2953   r = node_new_save_gimmick(&ns[0], SAVE_S, env);
2954   if (r != 0) goto err;
2955 
2956   id = GIMMICK_(ns[0])->id;
2957   r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,
2958                                   id, env);
2959   if (r != 0) goto err;
2960 
2961   if (is_range_cutter != 0)
2962     NODE_STATUS_ADD(ns[2], ABSENT_WITH_SIDE_EFFECTS);
2963 
2964   r = node_new_fail(&ns[3], env);
2965   if (r != 0) goto err;
2966 
2967   x = make_list(4, ns);
2968   if (IS_NULL(x)) goto err0;
2969 
2970   ns[0] = x;
2971   ns[1] = step_one;
2972   ns[2] = ns[3] = NULL_NODE;
2973 
2974   x = make_alt(2, ns);
2975   if (IS_NULL(x)) goto err0;
2976 
2977   ns[0] = x;
2978 
2979   x = node_new_quantifier(lower, upper, FALSE);
2980   if (IS_NULL(x)) goto err0;
2981 
2982   NODE_BODY(x) = ns[0];
2983   ns[0] = x;
2984 
2985   if (possessive != 0) {
2986     x = node_new_bag(BAG_STOP_BACKTRACK);
2987     if (IS_NULL(x)) goto err0;
2988 
2989     NODE_BODY(x) = ns[0];
2990     ns[0] = x;
2991   }
2992 
2993   r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2994                                   pre_save_right_id, env);
2995   if (r != 0) goto err;
2996 
2997   r = node_new_fail(&ns[2], env);
2998   if (r != 0) goto err;
2999 
3000   x = make_list(2, ns + 1);
3001   if (IS_NULL(x)) goto err0;
3002 
3003   ns[1] = x; ns[2] = NULL_NODE;
3004 
3005   x = make_alt(2, ns);
3006   if (IS_NULL(x)) goto err0;
3007 
3008   if (is_range_cutter != FALSE)
3009     NODE_STATUS_ADD(x, SUPER);
3010 
3011   *node = x;
3012   return ONIG_NORMAL;
3013 
3014  err0:
3015   r = ONIGERR_MEMORY;
3016  err:
3017   for (i = 0; i < 4; i++) onig_node_free(ns[i]);
3018   return r;
3019 }
3020 
3021 static int
make_absent_tail(Node ** node1,Node ** node2,int pre_save_right_id,ParseEnv * env)3022 make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,
3023                  ParseEnv* env)
3024 {
3025   int r;
3026   int id;
3027   Node* save;
3028   Node* x;
3029   Node* ns[2];
3030 
3031   *node1 = *node2 = NULL_NODE;
3032   save = ns[0] = ns[1] = NULL_NODE;
3033 
3034   r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
3035   if (r != 0) goto err;
3036 
3037   id = GIMMICK_(save)->id;
3038   r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3039                                   id, env);
3040   if (r != 0) goto err;
3041 
3042   r = node_new_fail(&ns[1], env);
3043   if (r != 0) goto err;
3044 
3045   x = make_list(2, ns);
3046   if (IS_NULL(x)) goto err0;
3047 
3048   ns[0] = NULL_NODE; ns[1] = x;
3049 
3050   r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3051                                   pre_save_right_id, env);
3052   if (r != 0) goto err;
3053 
3054   x = make_alt(2, ns);
3055   if (IS_NULL(x)) goto err0;
3056 
3057   *node1 = save;
3058   *node2 = x;
3059   return ONIG_NORMAL;
3060 
3061  err0:
3062   r = ONIGERR_MEMORY;
3063  err:
3064   onig_node_free(save);
3065   onig_node_free(ns[0]);
3066   onig_node_free(ns[1]);
3067   return r;
3068 }
3069 
3070 static int
make_range_clear(Node ** node,ParseEnv * env)3071 make_range_clear(Node** node, ParseEnv* env)
3072 {
3073   int r;
3074   int id;
3075   Node* save;
3076   Node* x;
3077   Node* ns[2];
3078 
3079   *node = NULL_NODE;
3080   save = ns[0] = ns[1] = NULL_NODE;
3081 
3082   r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
3083   if (r != 0) goto err;
3084 
3085   id = GIMMICK_(save)->id;
3086   r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3087                                   id, env);
3088   if (r != 0) goto err;
3089 
3090   r = node_new_fail(&ns[1], env);
3091   if (r != 0) goto err;
3092 
3093   x = make_list(2, ns);
3094   if (IS_NULL(x)) goto err0;
3095 
3096   ns[0] = NULL_NODE; ns[1] = x;
3097 
3098 #define ID_NOT_USED_DONT_CARE_ME   0
3099 
3100   r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT,
3101                                   ID_NOT_USED_DONT_CARE_ME, env);
3102   if (r != 0) goto err;
3103   NODE_STATUS_ADD(ns[0], ABSENT_WITH_SIDE_EFFECTS);
3104 
3105   x = make_alt(2, ns);
3106   if (IS_NULL(x)) goto err0;
3107 
3108   NODE_STATUS_ADD(x, SUPER);
3109 
3110   ns[0] = save;
3111   ns[1] = x;
3112   save = NULL_NODE;
3113   x = make_list(2, ns);
3114   if (IS_NULL(x)) goto err0;
3115 
3116   *node = x;
3117   return ONIG_NORMAL;
3118 
3119  err0:
3120   r = ONIGERR_MEMORY;
3121  err:
3122   onig_node_free(save);
3123   onig_node_free(ns[0]);
3124   onig_node_free(ns[1]);
3125   return r;
3126 }
3127 
3128 static int
is_simple_one_char_repeat(Node * node,Node ** rquant,Node ** rbody,int * is_possessive,ParseEnv * env)3129 is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,
3130                           int* is_possessive, ParseEnv* env)
3131 {
3132   Node* quant;
3133   Node* body;
3134 
3135   *rquant = *rbody = 0;
3136   *is_possessive = 0;
3137 
3138   if (NODE_TYPE(node) == NODE_QUANT) {
3139     quant = node;
3140   }
3141   else {
3142     if (NODE_TYPE(node) == NODE_BAG) {
3143       BagNode* en = BAG_(node);
3144       if (en->type == BAG_STOP_BACKTRACK) {
3145         *is_possessive = 1;
3146         quant = NODE_BAG_BODY(en);
3147         if (NODE_TYPE(quant) != NODE_QUANT)
3148           return 0;
3149       }
3150       else
3151         return 0;
3152     }
3153     else
3154       return 0;
3155   }
3156 
3157   if (QUANT_(quant)->greedy == 0)
3158     return 0;
3159 
3160   body = NODE_BODY(quant);
3161   switch (NODE_TYPE(body)) {
3162   case NODE_STRING:
3163     {
3164       int len;
3165       StrNode* sn = STR_(body);
3166       UChar *s = sn->s;
3167 
3168       len = 0;
3169       while (s < sn->end) {
3170         s += enclen(env->enc, s);
3171         len++;
3172       }
3173       if (len != 1)
3174         return 0;
3175     }
3176 
3177   case NODE_CCLASS:
3178     break;
3179 
3180   default:
3181     return 0;
3182     break;
3183   }
3184 
3185   if (node != quant) {
3186     NODE_BODY(node) = 0;
3187     onig_node_free(node);
3188   }
3189   NODE_BODY(quant) = NULL_NODE;
3190   *rquant = quant;
3191   *rbody  = body;
3192   return 1;
3193 }
3194 
3195 static int
make_absent_tree_for_simple_one_char_repeat(Node ** node,Node * absent,Node * quant,Node * body,int possessive,ParseEnv * env)3196 make_absent_tree_for_simple_one_char_repeat(Node** node,
3197   Node* absent, Node* quant, Node* body, int possessive, ParseEnv* env)
3198 {
3199   int r;
3200   int i;
3201   int id1;
3202   int lower, upper;
3203   Node* x;
3204   Node* ns[4];
3205 
3206   *node = NULL_NODE;
3207   r = ONIGERR_MEMORY;
3208   ns[0] = ns[1] = NULL_NODE;
3209   ns[2] = body, ns[3] = absent;
3210 
3211   lower = QUANT_(quant)->lower;
3212   upper = QUANT_(quant)->upper;
3213 
3214   r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3215   if (r != 0) goto err;
3216 
3217   id1 = GIMMICK_(ns[0])->id;
3218 
3219   r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,
3220                          FALSE, env);
3221   if (r != 0) goto err;
3222 
3223   ns[2] = ns[3] = NULL_NODE;
3224 
3225   r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3226                                   id1, env);
3227   if (r != 0) goto err;
3228 
3229   x = make_list(3, ns);
3230   if (IS_NULL(x)) goto err0;
3231 
3232   *node = x;
3233   return ONIG_NORMAL;
3234 
3235  err0:
3236   r = ONIGERR_MEMORY;
3237  err:
3238   for (i = 0; i < 4; i++) onig_node_free(ns[i]);
3239   return r;
3240 }
3241 
3242 static int
make_absent_tree(Node ** node,Node * absent,Node * expr,int is_range_cutter,ParseEnv * env)3243 make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
3244                  ParseEnv* env)
3245 {
3246   int r;
3247   int i;
3248   int id1, id2;
3249   int possessive;
3250   Node* x;
3251   Node* ns[7];
3252 
3253   r = ONIGERR_MEMORY;
3254   for (i = 0; i < 7; i++) ns[i] = NULL_NODE;
3255   ns[4] = expr; ns[5] = absent;
3256 
3257   if (is_range_cutter == 0) {
3258     Node* quant;
3259     Node* body;
3260 
3261     if (expr == NULL_NODE) {
3262       /* default expr \O* */
3263       quant = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
3264       if (IS_NULL(quant)) goto err0;
3265 
3266       r = node_new_true_anychar(&body);
3267       if (r != 0) {
3268         onig_node_free(quant);
3269         goto err;
3270       }
3271       possessive = 0;
3272       goto simple;
3273     }
3274     else {
3275       if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {
3276       simple:
3277         r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,
3278                                                         body, possessive, env);
3279         onig_node_free(quant);
3280         if (r != 0) {
3281           ns[4] = NULL_NODE;
3282           onig_node_free(body);
3283           goto err;
3284         }
3285 
3286         return ONIG_NORMAL;
3287       }
3288     }
3289   }
3290 
3291   r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3292   if (r != 0) goto err;
3293 
3294   id1 = GIMMICK_(ns[0])->id;
3295 
3296   r = node_new_save_gimmick(&ns[1], SAVE_S, env);
3297   if (r != 0) goto err;
3298 
3299   id2 = GIMMICK_(ns[1])->id;
3300 
3301   r = node_new_true_anychar(&ns[3]);
3302   if (r != 0) goto err;
3303 
3304   possessive = 1;
3305   r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, INFINITE_REPEAT,
3306                          possessive, is_range_cutter, env);
3307   if (r != 0) goto err;
3308 
3309   ns[3] = NULL_NODE;
3310   ns[5] = NULL_NODE;
3311 
3312   r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);
3313   if (r != 0) goto err;
3314 
3315   if (is_range_cutter != 0) {
3316     x = make_list(4, ns);
3317     if (IS_NULL(x)) goto err0;
3318   }
3319   else {
3320     r = make_absent_tail(&ns[5], &ns[6], id1, env);
3321     if (r != 0) goto err;
3322 
3323     x = make_list(7, ns);
3324     if (IS_NULL(x)) goto err0;
3325   }
3326 
3327   *node = x;
3328   return ONIG_NORMAL;
3329 
3330  err0:
3331   r = ONIGERR_MEMORY;
3332  err:
3333   for (i = 0; i < 7; i++) onig_node_free(ns[i]);
3334   return r;
3335 }
3336 
3337 extern int
onig_node_str_cat(Node * node,const UChar * s,const UChar * end)3338 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
3339 {
3340   int addlen = (int )(end - s);
3341 
3342   if (addlen > 0) {
3343     int len  = (int )(STR_(node)->end - STR_(node)->s);
3344 
3345     if (STR_(node)->capacity > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {
3346       UChar* p;
3347       int capa = len + addlen + NODE_STRING_MARGIN;
3348 
3349       if (capa <= STR_(node)->capacity) {
3350         onig_strcpy(STR_(node)->s + len, s, end);
3351       }
3352       else {
3353         if (STR_(node)->s == STR_(node)->buf)
3354           p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,
3355                                       s, end, capa);
3356         else
3357           p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa);
3358 
3359         CHECK_NULL_RETURN_MEMERR(p);
3360         STR_(node)->s        = p;
3361         STR_(node)->capacity = capa;
3362       }
3363     }
3364     else {
3365       onig_strcpy(STR_(node)->s + len, s, end);
3366     }
3367     STR_(node)->end = STR_(node)->s + len + addlen;
3368   }
3369 
3370   return 0;
3371 }
3372 
3373 extern int
onig_node_str_set(Node * node,const UChar * s,const UChar * end,int need_free)3374 onig_node_str_set(Node* node, const UChar* s, const UChar* end, int need_free)
3375 {
3376   onig_node_str_clear(node, need_free);
3377   return onig_node_str_cat(node, s, end);
3378 }
3379 
3380 static int
node_str_cat_char(Node * node,UChar c)3381 node_str_cat_char(Node* node, UChar c)
3382 {
3383   UChar s[1];
3384 
3385   s[0] = c;
3386   return onig_node_str_cat(node, s, s + 1);
3387 }
3388 
3389 extern void
onig_node_str_clear(Node * node,int need_free)3390 onig_node_str_clear(Node* node, int need_free)
3391 {
3392   if (need_free != 0 &&
3393       STR_(node)->capacity != 0 &&
3394       IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
3395     xfree(STR_(node)->s);
3396   }
3397 
3398   STR_(node)->flag     = 0;
3399   STR_(node)->s        = STR_(node)->buf;
3400   STR_(node)->end      = STR_(node)->buf;
3401   STR_(node)->capacity = 0;
3402 }
3403 
3404 static int
node_set_str(Node * node,const UChar * s,const UChar * end)3405 node_set_str(Node* node, const UChar* s, const UChar* end)
3406 {
3407   int r;
3408 
3409   NODE_SET_TYPE(node, NODE_STRING);
3410   STR_(node)->flag     = 0;
3411   STR_(node)->s        = STR_(node)->buf;
3412   STR_(node)->end      = STR_(node)->buf;
3413   STR_(node)->capacity = 0;
3414 
3415   r = onig_node_str_cat(node, s, end);
3416   return r;
3417 }
3418 
3419 static Node*
node_new_str(const UChar * s,const UChar * end)3420 node_new_str(const UChar* s, const UChar* end)
3421 {
3422   int r;
3423   Node* node = node_new();
3424   CHECK_NULL_RETURN(node);
3425 
3426   r = node_set_str(node, s, end);
3427   if (r != 0) {
3428     onig_node_free(node);
3429     return NULL;
3430   }
3431 
3432   return node;
3433 }
3434 
3435 static int
node_reset_str(Node * node,const UChar * s,const UChar * end)3436 node_reset_str(Node* node, const UChar* s, const UChar* end)
3437 {
3438   node_free_body(node);
3439   return node_set_str(node, s, end);
3440 }
3441 
3442 extern int
onig_node_reset_empty(Node * node)3443 onig_node_reset_empty(Node* node)
3444 {
3445   return node_reset_str(node, NULL, NULL);
3446 }
3447 
3448 extern Node*
onig_node_new_str(const UChar * s,const UChar * end)3449 onig_node_new_str(const UChar* s, const UChar* end)
3450 {
3451   return node_new_str(s, end);
3452 }
3453 
3454 static Node*
node_new_str_with_options(const UChar * s,const UChar * end,OnigOptionType options)3455 node_new_str_with_options(const UChar* s, const UChar* end,
3456                           OnigOptionType options)
3457 {
3458   Node* node;
3459   node = node_new_str(s, end);
3460 
3461   if (OPTON_IGNORECASE(options))
3462     NODE_STATUS_ADD(node, IGNORECASE);
3463 
3464   return node;
3465 }
3466 
3467 static Node*
node_new_str_crude(UChar * s,UChar * end,OnigOptionType options)3468 node_new_str_crude(UChar* s, UChar* end, OnigOptionType options)
3469 {
3470   Node* node = node_new_str_with_options(s, end, options);
3471   CHECK_NULL_RETURN(node);
3472   NODE_STRING_SET_CRUDE(node);
3473   return node;
3474 }
3475 
3476 static Node*
node_new_empty(void)3477 node_new_empty(void)
3478 {
3479   return node_new_str(NULL, NULL);
3480 }
3481 
3482 static Node*
node_new_str_crude_char(UChar c,OnigOptionType options)3483 node_new_str_crude_char(UChar c, OnigOptionType options)
3484 {
3485   int i;
3486   UChar p[1];
3487   Node* node;
3488 
3489   p[0] = c;
3490   node = node_new_str_crude(p, p + 1, options);
3491 
3492   /* clear buf tail */
3493   for (i = 1; i < NODE_STRING_BUF_SIZE; i++)
3494     STR_(node)->buf[i] = '\0';
3495 
3496   return node;
3497 }
3498 
3499 static Node*
str_node_split_last_char(Node * node,OnigEncoding enc)3500 str_node_split_last_char(Node* node, OnigEncoding enc)
3501 {
3502   const UChar *p;
3503   Node* rn;
3504   StrNode* sn;
3505 
3506   sn = STR_(node);
3507   rn = NULL_NODE;
3508   if (sn->end > sn->s) {
3509     p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
3510     if (p && p > sn->s) { /* can be split. */
3511       rn = node_new_str(p, sn->end);
3512       CHECK_NULL_RETURN(rn);
3513 
3514       sn->end = (UChar* )p;
3515       STR_(rn)->flag = sn->flag;
3516       NODE_STATUS(rn) = NODE_STATUS(node);
3517     }
3518   }
3519 
3520   return rn;
3521 }
3522 
3523 static int
str_node_can_be_split(Node * node,OnigEncoding enc)3524 str_node_can_be_split(Node* node, OnigEncoding enc)
3525 {
3526   StrNode* sn = STR_(node);
3527   if (sn->end > sn->s) {
3528     return ((enclen(enc, sn->s) < sn->end - sn->s)  ?  1 : 0);
3529   }
3530   return 0;
3531 }
3532 
3533 static int
scan_number(UChar ** src,const UChar * end,OnigEncoding enc)3534 scan_number(UChar** src, const UChar* end, OnigEncoding enc)
3535 {
3536   int num, val;
3537   OnigCodePoint c;
3538   UChar* p = *src;
3539   PFETCH_READY;
3540 
3541   num = 0;
3542   while (! PEND) {
3543     PFETCH(c);
3544     if (IS_CODE_DIGIT_ASCII(enc, c)) {
3545       val = (int )DIGITVAL(c);
3546       if ((ONIG_INT_MAX - val) / 10 < num)
3547         return -1;  /* overflow */
3548 
3549       num = num * 10 + val;
3550     }
3551     else {
3552       PUNFETCH;
3553       break;
3554     }
3555   }
3556   *src = p;
3557   return num;
3558 }
3559 
3560 static int
scan_hexadecimal_number(UChar ** src,UChar * end,int minlen,int maxlen,OnigEncoding enc,OnigCodePoint * rcode)3561 scan_hexadecimal_number(UChar** src, UChar* end, int minlen, int maxlen,
3562                         OnigEncoding enc, OnigCodePoint* rcode)
3563 {
3564   OnigCodePoint code;
3565   OnigCodePoint c;
3566   unsigned int val;
3567   int n;
3568   UChar* p = *src;
3569   PFETCH_READY;
3570 
3571   code = 0;
3572   n = 0;
3573   while (! PEND && n < maxlen) {
3574     PFETCH(c);
3575     if (IS_CODE_XDIGIT_ASCII(enc, c)) {
3576       n++;
3577       val = (unsigned int )XDIGITVAL(enc, c);
3578       if ((UINT_MAX - val) / 16UL < code)
3579         return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3580 
3581       code = (code << 4) + val;
3582     }
3583     else {
3584       PUNFETCH;
3585       break;
3586     }
3587   }
3588 
3589   if (n < minlen)
3590     return ONIGERR_INVALID_CODE_POINT_VALUE;
3591 
3592   *rcode = code;
3593   *src = p;
3594   return ONIG_NORMAL;
3595 }
3596 
3597 static int
scan_octal_number(UChar ** src,UChar * end,int minlen,int maxlen,OnigEncoding enc,OnigCodePoint * rcode)3598 scan_octal_number(UChar** src, UChar* end, int minlen, int maxlen,
3599                   OnigEncoding enc, OnigCodePoint* rcode)
3600 {
3601   OnigCodePoint code;
3602   OnigCodePoint c;
3603   unsigned int val;
3604   int n;
3605   UChar* p = *src;
3606   PFETCH_READY;
3607 
3608   code = 0;
3609   n = 0;
3610   while (! PEND && n < maxlen) {
3611     PFETCH(c);
3612     if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {
3613       n++;
3614       val = (unsigned int )ODIGITVAL(c);
3615       if ((UINT_MAX - val) / 8UL < code)
3616         return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3617 
3618       code = (code << 3) + val;
3619     }
3620     else {
3621       PUNFETCH;
3622       break;
3623     }
3624   }
3625 
3626   if (n < minlen)
3627     return ONIGERR_INVALID_CODE_POINT_VALUE;
3628 
3629   *rcode = code;
3630   *src = p;
3631   return ONIG_NORMAL;
3632 }
3633 
3634 static int
scan_number_of_base(UChar ** src,UChar * end,int minlen,OnigEncoding enc,OnigCodePoint * rcode,int base)3635 scan_number_of_base(UChar** src, UChar* end, int minlen,
3636                     OnigEncoding enc, OnigCodePoint* rcode, int base)
3637 {
3638   int r;
3639 
3640   if (base == 16)
3641     r = scan_hexadecimal_number(src, end, minlen, 8, enc, rcode);
3642   else if (base == 8)
3643     r = scan_octal_number(src, end, minlen, 11, enc, rcode);
3644   else
3645     r = ONIGERR_INVALID_CODE_POINT_VALUE;
3646 
3647   return r;
3648 }
3649 
3650 #define IS_CODE_POINT_DIVIDE(c)  ((c) == ' ' || (c) == '\n')
3651 
3652 enum CPS_STATE {
3653   CPS_EMPTY = 0,
3654   CPS_START = 1,
3655   CPS_RANGE = 2
3656 };
3657 
3658 static int
check_code_point_sequence_cc(UChar * p,UChar * end,int base,OnigEncoding enc,int state)3659 check_code_point_sequence_cc(UChar* p, UChar* end, int base,
3660                              OnigEncoding enc, int state)
3661 {
3662   int r;
3663   int n;
3664   int end_digit;
3665   OnigCodePoint code;
3666   OnigCodePoint c;
3667   PFETCH_READY;
3668 
3669   end_digit = FALSE;
3670   n = 0;
3671   while (! PEND) {
3672   start:
3673     PFETCH(c);
3674     if (c == '}') {
3675     end_char:
3676       if (state == CPS_RANGE) return ONIGERR_INVALID_CODE_POINT_VALUE;
3677       return n;
3678     }
3679 
3680     if (IS_CODE_POINT_DIVIDE(c)) {
3681       while (! PEND) {
3682         PFETCH(c);
3683         if (! IS_CODE_POINT_DIVIDE(c)) break;
3684       }
3685       if (IS_CODE_POINT_DIVIDE(c))
3686         return ONIGERR_INVALID_CODE_POINT_VALUE;
3687     }
3688     else if (c == '-') {
3689     range:
3690       if (state != CPS_START) return ONIGERR_INVALID_CODE_POINT_VALUE;
3691       if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
3692       end_digit = FALSE;
3693       state = CPS_RANGE;
3694       goto start;
3695     }
3696     else if (end_digit == TRUE) {
3697       if (base == 16) {
3698         if (IS_CODE_XDIGIT_ASCII(enc, c))
3699           return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3700       }
3701       else if (base == 8) {
3702         if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8')
3703           return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3704       }
3705 
3706       return ONIGERR_INVALID_CODE_POINT_VALUE;
3707     }
3708 
3709     if (c == '}') goto end_char;
3710     if (c == '-') goto range;
3711 
3712     PUNFETCH;
3713     r = scan_number_of_base(&p, end, 1, enc, &code, base);
3714     if (r != 0) return r;
3715     n++;
3716     end_digit = TRUE;
3717     state = (state == CPS_RANGE) ? CPS_EMPTY : CPS_START;
3718   }
3719 
3720   return ONIGERR_INVALID_CODE_POINT_VALUE;
3721 }
3722 
3723 static int
check_code_point_sequence(UChar * p,UChar * end,int base,OnigEncoding enc)3724 check_code_point_sequence(UChar* p, UChar* end, int base, OnigEncoding enc)
3725 {
3726   int r;
3727   int n;
3728   int end_digit;
3729   OnigCodePoint code;
3730   OnigCodePoint c;
3731   PFETCH_READY;
3732 
3733   end_digit = FALSE;
3734   n = 0;
3735   while (! PEND) {
3736     PFETCH(c);
3737     if (c == '}') {
3738     end_char:
3739       return n;
3740     }
3741 
3742     if (IS_CODE_POINT_DIVIDE(c)) {
3743       while (! PEND) {
3744         PFETCH(c);
3745         if (! IS_CODE_POINT_DIVIDE(c)) break;
3746       }
3747       if (IS_CODE_POINT_DIVIDE(c))
3748         return ONIGERR_INVALID_CODE_POINT_VALUE;
3749     }
3750     else if (end_digit == TRUE) {
3751       if (base == 16) {
3752         if (IS_CODE_XDIGIT_ASCII(enc, c))
3753           return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3754       }
3755       else if (base == 8) {
3756         if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8')
3757           return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3758       }
3759 
3760       return ONIGERR_INVALID_CODE_POINT_VALUE;
3761     }
3762 
3763     if (c == '}') goto end_char;
3764 
3765     PUNFETCH;
3766     r = scan_number_of_base(&p, end, 1, enc, &code, base);
3767     if (r != 0) return r;
3768     n++;
3769     end_digit = TRUE;
3770   }
3771 
3772   return ONIGERR_INVALID_CODE_POINT_VALUE;
3773 }
3774 
3775 static int
get_next_code_point(UChar ** src,UChar * end,int base,OnigEncoding enc,int in_cc,OnigCodePoint * rcode)3776 get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_cc, OnigCodePoint* rcode)
3777 {
3778   int r;
3779   OnigCodePoint c;
3780   UChar* p = *src;
3781   PFETCH_READY;
3782 
3783   while (! PEND) {
3784     PFETCH(c);
3785     if (! IS_CODE_POINT_DIVIDE(c)) {
3786       if (c == '}') {
3787         *src = p;
3788         return 1; /* end of sequence */
3789       }
3790       else if (c == '-' && in_cc == TRUE) {
3791         *src = p;
3792         return 2; /* range */
3793       }
3794       PUNFETCH;
3795       break;
3796     }
3797     else {
3798       if (PEND)
3799         return ONIGERR_INVALID_CODE_POINT_VALUE;
3800     }
3801   }
3802 
3803   r = scan_number_of_base(&p, end, 1, enc, rcode, base);
3804   if (r != 0) return r;
3805 
3806   *src = p;
3807   return ONIG_NORMAL;
3808 }
3809 
3810 
3811 #define BB_WRITE_CODE_POINT(bbuf,pos,code) \
3812     BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
3813 
3814 /* data format:
3815      [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
3816      (all data size is OnigCodePoint)
3817  */
3818 static int
new_code_range(BBuf ** pbuf)3819 new_code_range(BBuf** pbuf)
3820 {
3821 #define INIT_MULTI_BYTE_RANGE_SIZE  (SIZE_CODE_POINT * 5)
3822   int r;
3823   OnigCodePoint n;
3824   BBuf* bbuf;
3825 
3826   bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
3827   CHECK_NULL_RETURN_MEMERR(bbuf);
3828   r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);
3829   if (r != 0) {
3830     xfree(bbuf);
3831     *pbuf = 0;
3832     return r;
3833   }
3834 
3835   n = 0;
3836   BB_WRITE_CODE_POINT(bbuf, 0, n);
3837   return 0;
3838 }
3839 
3840 static int
add_code_range_to_buf(BBuf ** pbuf,OnigCodePoint from,OnigCodePoint to)3841 add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
3842 {
3843   int r, inc_n, pos;
3844   int low, high, bound, x;
3845   OnigCodePoint n, *data;
3846   BBuf* bbuf;
3847 
3848   if (from > to) {
3849     n = from; from = to; to = n;
3850   }
3851 
3852   if (IS_NULL(*pbuf)) {
3853     r = new_code_range(pbuf);
3854     if (r != 0) return r;
3855     bbuf = *pbuf;
3856     n = 0;
3857   }
3858   else {
3859     bbuf = *pbuf;
3860     GET_CODE_POINT(n, bbuf->p);
3861   }
3862   data = (OnigCodePoint* )(bbuf->p);
3863   data++;
3864 
3865   for (low = 0, bound = n; low < bound; ) {
3866     x = (low + bound) >> 1;
3867     if (from > data[x*2 + 1])
3868       low = x + 1;
3869     else
3870       bound = x;
3871   }
3872 
3873   high = (to == ~((OnigCodePoint )0)) ? n : low;
3874   for (bound = n; high < bound; ) {
3875     x = (high + bound) >> 1;
3876     if (to + 1 >= data[x*2])
3877       high = x + 1;
3878     else
3879       bound = x;
3880   }
3881 
3882   inc_n = low + 1 - high;
3883   if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
3884     return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
3885 
3886   if (inc_n != 1) {
3887     if (from > data[low*2])
3888       from = data[low*2];
3889     if (to < data[(high - 1)*2 + 1])
3890       to = data[(high - 1)*2 + 1];
3891   }
3892 
3893   if (inc_n != 0 && (OnigCodePoint )high < n) {
3894     int from_pos = SIZE_CODE_POINT * (1 + high * 2);
3895     int to_pos   = SIZE_CODE_POINT * (1 + (low + 1) * 2);
3896     int size = (n - high) * 2 * SIZE_CODE_POINT;
3897 
3898     if (inc_n > 0) {
3899       BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
3900     }
3901     else {
3902       BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
3903     }
3904   }
3905 
3906   pos = SIZE_CODE_POINT * (1 + low * 2);
3907   BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
3908   BB_WRITE_CODE_POINT(bbuf, pos, from);
3909   BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
3910   n += inc_n;
3911   BB_WRITE_CODE_POINT(bbuf, 0, n);
3912 
3913   return 0;
3914 }
3915 
3916 static int
add_code_range(BBuf ** pbuf,ParseEnv * env,OnigCodePoint from,OnigCodePoint to)3917 add_code_range(BBuf** pbuf, ParseEnv* env, OnigCodePoint from, OnigCodePoint to)
3918 {
3919   if (from > to) {
3920     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
3921       return 0;
3922     else
3923       return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
3924   }
3925 
3926   return add_code_range_to_buf(pbuf, from, to);
3927 }
3928 
3929 static int
not_code_range_buf(OnigEncoding enc,BBuf * bbuf,BBuf ** pbuf)3930 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
3931 {
3932   int r, i, n;
3933   OnigCodePoint pre, from, *data, to = 0;
3934 
3935   *pbuf = (BBuf* )NULL;
3936   if (IS_NULL(bbuf)) {
3937   set_all:
3938     return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3939   }
3940 
3941   data = (OnigCodePoint* )(bbuf->p);
3942   GET_CODE_POINT(n, data);
3943   data++;
3944   if (n <= 0) goto set_all;
3945 
3946   r = 0;
3947   pre = MBCODE_START_POS(enc);
3948   for (i = 0; i < n; i++) {
3949     from = data[i*2];
3950     to   = data[i*2+1];
3951     if (pre <= from - 1) {
3952       r = add_code_range_to_buf(pbuf, pre, from - 1);
3953       if (r != 0) {
3954         bbuf_free(*pbuf);
3955         return r;
3956       }
3957     }
3958     if (to == ~((OnigCodePoint )0)) break;
3959     pre = to + 1;
3960   }
3961   if (to < ~((OnigCodePoint )0)) {
3962     r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
3963     if (r != 0) bbuf_free(*pbuf);
3964   }
3965   return r;
3966 }
3967 
3968 #define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\
3969   BBuf *tbuf; \
3970   int  tnot; \
3971   tnot = not1;  not1  = not2;  not2  = tnot; \
3972   tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
3973 } while (0)
3974 
3975 static int
or_code_range_buf(OnigEncoding enc,BBuf * bbuf1,int not1,BBuf * bbuf2,int not2,BBuf ** pbuf)3976 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
3977                   BBuf* bbuf2, int not2, BBuf** pbuf)
3978 {
3979   int r;
3980   OnigCodePoint i, n1, *data1;
3981   OnigCodePoint from, to;
3982 
3983   *pbuf = (BBuf* )NULL;
3984   if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
3985     if (not1 != 0 || not2 != 0)
3986       return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3987     return 0;
3988   }
3989 
3990   r = 0;
3991   if (IS_NULL(bbuf2))
3992     SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
3993 
3994   if (IS_NULL(bbuf1)) {
3995     if (not1 != 0) {
3996       return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3997     }
3998     else {
3999       if (not2 == 0) {
4000         return bbuf_clone(pbuf, bbuf2);
4001       }
4002       else {
4003         return not_code_range_buf(enc, bbuf2, pbuf);
4004       }
4005     }
4006   }
4007 
4008   if (not1 != 0)
4009     SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4010 
4011   data1 = (OnigCodePoint* )(bbuf1->p);
4012   GET_CODE_POINT(n1, data1);
4013   data1++;
4014 
4015   if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
4016     r = bbuf_clone(pbuf, bbuf2);
4017   }
4018   else if (not1 == 0) { /* 1 OR (not 2) */
4019     r = not_code_range_buf(enc, bbuf2, pbuf);
4020   }
4021   if (r != 0) return r;
4022 
4023   for (i = 0; i < n1; i++) {
4024     from = data1[i*2];
4025     to   = data1[i*2+1];
4026     r = add_code_range_to_buf(pbuf, from, to);
4027     if (r != 0) return r;
4028   }
4029   return 0;
4030 }
4031 
4032 static int
and_code_range1(BBuf ** pbuf,OnigCodePoint from1,OnigCodePoint to1,OnigCodePoint * data,int n)4033 and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
4034                 OnigCodePoint* data, int n)
4035 {
4036   int i, r;
4037   OnigCodePoint from2, to2;
4038 
4039   for (i = 0; i < n; i++) {
4040     from2 = data[i*2];
4041     to2   = data[i*2+1];
4042     if (from2 < from1) {
4043       if (to2 < from1) continue;
4044       else {
4045         from1 = to2 + 1;
4046       }
4047     }
4048     else if (from2 <= to1) {
4049       if (to2 < to1) {
4050         if (from1 <= from2 - 1) {
4051           r = add_code_range_to_buf(pbuf, from1, from2-1);
4052           if (r != 0) return r;
4053         }
4054         from1 = to2 + 1;
4055       }
4056       else {
4057         to1 = from2 - 1;
4058       }
4059     }
4060     else {
4061       from1 = from2;
4062     }
4063     if (from1 > to1) break;
4064   }
4065   if (from1 <= to1) {
4066     r = add_code_range_to_buf(pbuf, from1, to1);
4067     if (r != 0) return r;
4068   }
4069   return 0;
4070 }
4071 
4072 static int
and_code_range_buf(BBuf * bbuf1,int not1,BBuf * bbuf2,int not2,BBuf ** pbuf)4073 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
4074 {
4075   int r;
4076   OnigCodePoint i, j, n1, n2, *data1, *data2;
4077   OnigCodePoint from, to, from1, to1, from2, to2;
4078 
4079   *pbuf = (BBuf* )NULL;
4080   if (IS_NULL(bbuf1)) {
4081     if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
4082       return bbuf_clone(pbuf, bbuf2);
4083     return 0;
4084   }
4085   else if (IS_NULL(bbuf2)) {
4086     if (not2 != 0)
4087       return bbuf_clone(pbuf, bbuf1);
4088     return 0;
4089   }
4090 
4091   if (not1 != 0)
4092     SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4093 
4094   data1 = (OnigCodePoint* )(bbuf1->p);
4095   data2 = (OnigCodePoint* )(bbuf2->p);
4096   GET_CODE_POINT(n1, data1);
4097   GET_CODE_POINT(n2, data2);
4098   data1++;
4099   data2++;
4100 
4101   if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
4102     for (i = 0; i < n1; i++) {
4103       from1 = data1[i*2];
4104       to1   = data1[i*2+1];
4105       for (j = 0; j < n2; j++) {
4106         from2 = data2[j*2];
4107         to2   = data2[j*2+1];
4108         if (from2 > to1) break;
4109         if (to2 < from1) continue;
4110         from = MAX(from1, from2);
4111         to   = MIN(to1, to2);
4112         r = add_code_range_to_buf(pbuf, from, to);
4113         if (r != 0) return r;
4114       }
4115     }
4116   }
4117   else if (not1 == 0) { /* 1 AND (not 2) */
4118     for (i = 0; i < n1; i++) {
4119       from1 = data1[i*2];
4120       to1   = data1[i*2+1];
4121       r = and_code_range1(pbuf, from1, to1, data2, n2);
4122       if (r != 0) return r;
4123     }
4124   }
4125 
4126   return 0;
4127 }
4128 
4129 static int
and_cclass(CClassNode * dest,CClassNode * cc,OnigEncoding enc)4130 and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
4131 {
4132   int r, not1, not2;
4133   BBuf *buf1, *buf2, *pbuf;
4134   BitSetRef bsr1, bsr2;
4135   BitSet bs1, bs2;
4136 
4137   not1 = IS_NCCLASS_NOT(dest);
4138   bsr1 = dest->bs;
4139   buf1 = dest->mbuf;
4140   not2 = IS_NCCLASS_NOT(cc);
4141   bsr2 = cc->bs;
4142   buf2 = cc->mbuf;
4143 
4144   if (not1 != 0) {
4145     bitset_invert_to(bsr1, bs1);
4146     bsr1 = bs1;
4147   }
4148   if (not2 != 0) {
4149     bitset_invert_to(bsr2, bs2);
4150     bsr2 = bs2;
4151   }
4152   bitset_and(bsr1, bsr2);
4153   if (bsr1 != dest->bs) {
4154     bitset_copy(dest->bs, bsr1);
4155   }
4156   if (not1 != 0) {
4157     bitset_invert(dest->bs);
4158   }
4159 
4160   if (! ONIGENC_IS_SINGLEBYTE(enc)) {
4161     if (not1 != 0 && not2 != 0) {
4162       r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
4163     }
4164     else {
4165       r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
4166       if (r == 0 && not1 != 0) {
4167         BBuf *tbuf;
4168         r = not_code_range_buf(enc, pbuf, &tbuf);
4169         if (r != 0) {
4170           bbuf_free(pbuf);
4171           return r;
4172         }
4173         bbuf_free(pbuf);
4174         pbuf = tbuf;
4175       }
4176     }
4177     if (r != 0) return r;
4178 
4179     dest->mbuf = pbuf;
4180     bbuf_free(buf1);
4181     return r;
4182   }
4183   return 0;
4184 }
4185 
4186 static int
or_cclass(CClassNode * dest,CClassNode * cc,OnigEncoding enc)4187 or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
4188 {
4189   int r, not1, not2;
4190   BBuf *buf1, *buf2, *pbuf;
4191   BitSetRef bsr1, bsr2;
4192   BitSet bs1, bs2;
4193 
4194   not1 = IS_NCCLASS_NOT(dest);
4195   bsr1 = dest->bs;
4196   buf1 = dest->mbuf;
4197   not2 = IS_NCCLASS_NOT(cc);
4198   bsr2 = cc->bs;
4199   buf2 = cc->mbuf;
4200 
4201   if (not1 != 0) {
4202     bitset_invert_to(bsr1, bs1);
4203     bsr1 = bs1;
4204   }
4205   if (not2 != 0) {
4206     bitset_invert_to(bsr2, bs2);
4207     bsr2 = bs2;
4208   }
4209   bitset_or(bsr1, bsr2);
4210   if (bsr1 != dest->bs) {
4211     bitset_copy(dest->bs, bsr1);
4212   }
4213   if (not1 != 0) {
4214     bitset_invert(dest->bs);
4215   }
4216 
4217   if (! ONIGENC_IS_SINGLEBYTE(enc)) {
4218     if (not1 != 0 && not2 != 0) {
4219       r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
4220     }
4221     else {
4222       r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
4223       if (r == 0 && not1 != 0) {
4224         BBuf *tbuf;
4225         r = not_code_range_buf(enc, pbuf, &tbuf);
4226         if (r != 0) {
4227           bbuf_free(pbuf);
4228           return r;
4229         }
4230         bbuf_free(pbuf);
4231         pbuf = tbuf;
4232       }
4233     }
4234     if (r != 0) return r;
4235 
4236     dest->mbuf = pbuf;
4237     bbuf_free(buf1);
4238     return r;
4239   }
4240   else
4241     return 0;
4242 }
4243 
4244 static OnigCodePoint
conv_backslash_value(OnigCodePoint c,ParseEnv * env)4245 conv_backslash_value(OnigCodePoint c, ParseEnv* env)
4246 {
4247   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
4248     switch (c) {
4249     case 'n': return '\n';
4250     case 't': return '\t';
4251     case 'r': return '\r';
4252     case 'f': return '\f';
4253     case 'a': return '\007';
4254     case 'b': return '\010';
4255     case 'e': return '\033';
4256     case 'v':
4257       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
4258         return '\v';
4259       break;
4260 
4261     default:
4262       break;
4263     }
4264   }
4265   return c;
4266 }
4267 
4268 static int
is_invalid_quantifier_target(Node * node)4269 is_invalid_quantifier_target(Node* node)
4270 {
4271   switch (NODE_TYPE(node)) {
4272   case NODE_ANCHOR:
4273   case NODE_GIMMICK:
4274     return 1;
4275     break;
4276 
4277   case NODE_BAG:
4278     /* allow enclosed elements */
4279     /* return is_invalid_quantifier_target(NODE_BODY(node)); */
4280     break;
4281 
4282   case NODE_LIST:
4283     do {
4284       if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0;
4285     } while (IS_NOT_NULL(node = NODE_CDR(node)));
4286     return 0;
4287     break;
4288 
4289   case NODE_ALT:
4290     do {
4291       if (is_invalid_quantifier_target(NODE_CAR(node))) return 1;
4292     } while (IS_NOT_NULL(node = NODE_CDR(node)));
4293     break;
4294 
4295   default:
4296     break;
4297   }
4298   return 0;
4299 }
4300 
4301 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
4302 static int
quantifier_type_num(QuantNode * q)4303 quantifier_type_num(QuantNode* q)
4304 {
4305   if (q->greedy) {
4306     if (q->lower == 0) {
4307       if (q->upper == 1) return 0;
4308       else if (IS_INFINITE_REPEAT(q->upper)) return 1;
4309     }
4310     else if (q->lower == 1) {
4311       if (IS_INFINITE_REPEAT(q->upper)) return 2;
4312     }
4313   }
4314   else {
4315     if (q->lower == 0) {
4316       if (q->upper == 1) return 3;
4317       else if (IS_INFINITE_REPEAT(q->upper)) return 4;
4318     }
4319     else if (q->lower == 1) {
4320       if (IS_INFINITE_REPEAT(q->upper)) return 5;
4321     }
4322   }
4323   return -1;
4324 }
4325 
4326 
4327 enum ReduceType {
4328   RQ_ASIS = 0, /* as is */
4329   RQ_DEL  = 1, /* delete parent */
4330   RQ_A,        /* to '*'    */
4331   RQ_P,        /* to '+'    */
4332   RQ_AQ,       /* to '*?'   */
4333   RQ_QQ,       /* to '??'   */
4334   RQ_P_QQ,     /* to '+)??' */
4335 };
4336 
4337 static enum ReduceType ReduceTypeTable[6][6] = {
4338   {RQ_DEL,  RQ_A,    RQ_A,   RQ_QQ,   RQ_AQ,   RQ_ASIS}, /* '?'  */
4339   {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},  /* '*'  */
4340   {RQ_A,    RQ_A,    RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},  /* '+'  */
4341   {RQ_DEL,  RQ_AQ,   RQ_AQ,  RQ_DEL,  RQ_AQ,   RQ_AQ},   /* '??' */
4342   {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_DEL,  RQ_DEL,  RQ_DEL},  /* '*?' */
4343   {RQ_ASIS, RQ_A,    RQ_P,   RQ_AQ,   RQ_AQ,   RQ_DEL}   /* '+?' */
4344 };
4345 
4346 extern int
onig_reduce_nested_quantifier(Node * pnode)4347 onig_reduce_nested_quantifier(Node* pnode)
4348 {
4349   int pnum, cnum;
4350   QuantNode *p, *c;
4351   Node* cnode;
4352 
4353   cnode = NODE_BODY(pnode);
4354 
4355   p = QUANT_(pnode);
4356   c = QUANT_(cnode);
4357   pnum = quantifier_type_num(p);
4358   cnum = quantifier_type_num(c);
4359   if (pnum < 0 || cnum < 0) {
4360     if (p->lower == p->upper && c->lower == c->upper) {
4361       int n = onig_positive_int_multiply(p->lower, c->lower);
4362       if (n < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4363 
4364       p->lower = p->upper = n;
4365       NODE_BODY(pnode) = NODE_BODY(cnode);
4366       goto remove_cnode;
4367     }
4368 
4369     return 0;
4370   }
4371 
4372   switch(ReduceTypeTable[cnum][pnum]) {
4373   case RQ_DEL:
4374     *pnode = *cnode;
4375     goto remove_cnode;
4376     break;
4377   case RQ_A:
4378     NODE_BODY(pnode) = NODE_BODY(cnode);
4379     p->lower  = 0;  p->upper = INFINITE_REPEAT;  p->greedy = 1;
4380     goto remove_cnode;
4381     break;
4382   case RQ_P:
4383     NODE_BODY(pnode) = NODE_BODY(cnode);
4384     p->lower  = 1;  p->upper = INFINITE_REPEAT;  p->greedy = 1;
4385     goto remove_cnode;
4386     break;
4387   case RQ_AQ:
4388     NODE_BODY(pnode) = NODE_BODY(cnode);
4389     p->lower  = 0;  p->upper = INFINITE_REPEAT;  p->greedy = 0;
4390     goto remove_cnode;
4391     break;
4392   case RQ_QQ:
4393     NODE_BODY(pnode) = NODE_BODY(cnode);
4394     p->lower  = 0;  p->upper = 1;  p->greedy = 0;
4395     goto remove_cnode;
4396     break;
4397   case RQ_P_QQ:
4398     p->lower  = 0;  p->upper = 1;  p->greedy = 0;
4399     c->lower  = 1;  c->upper = INFINITE_REPEAT;  c->greedy = 1;
4400     break;
4401   case RQ_ASIS:
4402     break;
4403   }
4404 
4405   return 0;
4406 
4407  remove_cnode:
4408   NODE_BODY(cnode) = NULL_NODE;
4409   onig_node_free(cnode);
4410   return 0;
4411 }
4412 
4413 static int
node_new_general_newline(Node ** node,ParseEnv * env)4414 node_new_general_newline(Node** node, ParseEnv* env)
4415 {
4416   int r;
4417   int dlen, alen;
4418   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
4419   Node* crnl;
4420   Node* ncc;
4421   Node* x;
4422   CClassNode* cc;
4423 
4424   dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);
4425   if (dlen < 0) return dlen;
4426   alen = ONIGENC_CODE_TO_MBC(env->enc, NEWLINE_CODE, buf + dlen);
4427   if (alen < 0) return alen;
4428 
4429   crnl = node_new_str_crude(buf, buf + dlen + alen, ONIG_OPTION_NONE);
4430   CHECK_NULL_RETURN_MEMERR(crnl);
4431 
4432   ncc = node_new_cclass();
4433   if (IS_NULL(ncc)) goto err2;
4434 
4435   cc = CCLASS_(ncc);
4436   if (dlen == 1) {
4437     bitset_set_range(cc->bs, NEWLINE_CODE, 0x0d);
4438   }
4439   else {
4440     r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, 0x0d);
4441     if (r != 0) {
4442     err1:
4443       onig_node_free(ncc);
4444     err2:
4445       onig_node_free(crnl);
4446       return ONIGERR_MEMORY;
4447     }
4448   }
4449 
4450   if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {
4451     r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
4452     if (r != 0) goto err1;
4453     r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
4454     if (r != 0) goto err1;
4455   }
4456 
4457   x = node_new_bag_if_else(crnl, NULL_NODE, ncc);
4458   if (IS_NULL(x)) goto err1;
4459 
4460   *node = x;
4461   return 0;
4462 }
4463 
4464 enum TokenSyms {
4465   TK_EOT      = 0,   /* end of token */
4466   TK_CRUDE_BYTE = 1,
4467   TK_CHAR,
4468   TK_STRING,
4469   TK_CODE_POINT,
4470   TK_ANYCHAR,
4471   TK_CHAR_TYPE,
4472   TK_BACKREF,
4473   TK_CALL,
4474   TK_ANCHOR,
4475   TK_REPEAT,
4476   TK_INTERVAL,
4477   TK_ANYCHAR_ANYTIME,  /* SQL '%' == .* */
4478   TK_ALT,
4479   TK_SUBEXP_OPEN,
4480   TK_SUBEXP_CLOSE,
4481   TK_OPEN_CC,
4482   TK_QUOTE_OPEN,
4483   TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */
4484   TK_KEEP,             /* \K */
4485   TK_GENERAL_NEWLINE,  /* \R */
4486   TK_NO_NEWLINE,       /* \N */
4487   TK_TRUE_ANYCHAR,     /* \O */
4488   TK_TEXT_SEGMENT,     /* \X */
4489 
4490   /* in cc */
4491   TK_CC_CLOSE,
4492   TK_CC_RANGE,
4493   TK_CC_POSIX_BRACKET_OPEN,
4494   TK_CC_AND,           /* && */
4495   TK_CC_OPEN_CC        /* [ */
4496 };
4497 
4498 typedef struct {
4499   enum TokenSyms type;
4500   int code_point_continue;
4501   int escaped;
4502   int base_num;   /* is number: 8, 16 (used in [....]) */
4503   UChar* backp;
4504   union {
4505     UChar* s;
4506     UChar byte;
4507     OnigCodePoint code;
4508     int   anchor;
4509     int   subtype;
4510     struct {
4511       int lower;
4512       int upper;
4513       int greedy;
4514       int possessive;
4515     } repeat;
4516     struct {
4517       int  num;
4518       int  ref1;
4519       int* refs;
4520       int  by_name;
4521 #ifdef USE_BACKREF_WITH_LEVEL
4522       int  exist_level;
4523       int  level;   /* \k<name+n> */
4524 #endif
4525     } backref;
4526     struct {
4527       UChar* name;
4528       UChar* name_end;
4529       int    gnum;
4530       int    by_number;
4531     } call;
4532     struct {
4533       int ctype;
4534       int not;
4535     } prop;
4536   } u;
4537 } PToken;
4538 
4539 static void
ptoken_init(PToken * tok)4540 ptoken_init(PToken* tok)
4541 {
4542   tok->code_point_continue = 0;
4543 }
4544 
4545 static int
fetch_interval(UChar ** src,UChar * end,PToken * tok,ParseEnv * env)4546 fetch_interval(UChar** src, UChar* end, PToken* tok, ParseEnv* env)
4547 {
4548   int low, up, syn_allow, non_low = 0;
4549   int r = 0;
4550   OnigCodePoint c;
4551   OnigEncoding enc = env->enc;
4552   UChar* p = *src;
4553   PFETCH_READY;
4554 
4555   syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
4556 
4557   if (PEND) {
4558     if (syn_allow)
4559       return 1;  /* "....{" : OK! */
4560     else
4561       return ONIGERR_END_PATTERN_AT_LEFT_BRACE;  /* "....{" syntax error */
4562   }
4563 
4564   if (! syn_allow) {
4565     c = PPEEK;
4566     if (c == ')' || c == '(' || c == '|') {
4567       return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
4568     }
4569   }
4570 
4571   low = scan_number(&p, end, env->enc);
4572   if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4573   if (low > ONIG_MAX_REPEAT_NUM)
4574     return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4575 
4576   if (p == *src) { /* can't read low */
4577     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
4578       /* allow {,n} as {0,n} */
4579       low = 0;
4580       non_low = 1;
4581     }
4582     else
4583       goto invalid;
4584   }
4585 
4586   if (PEND) goto invalid;
4587   PFETCH(c);
4588   if (c == ',') {
4589     UChar* prev = p;
4590     up = scan_number(&p, end, env->enc);
4591     if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4592     if (up > ONIG_MAX_REPEAT_NUM)
4593       return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4594 
4595     if (p == prev) {
4596       if (non_low != 0)
4597         goto invalid;
4598       up = INFINITE_REPEAT;  /* {n,} : {n,infinite} */
4599     }
4600   }
4601   else {
4602     if (non_low != 0)
4603       goto invalid;
4604 
4605     PUNFETCH;
4606     up = low;  /* {n} : exact n times */
4607     r = 2;     /* fixed */
4608   }
4609 
4610   if (PEND) goto invalid;
4611   PFETCH(c);
4612   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
4613     if (c != MC_ESC(env->syntax) || PEND) goto invalid;
4614     PFETCH(c);
4615   }
4616   if (c != '}') goto invalid;
4617 
4618   if (!IS_INFINITE_REPEAT(up) && low > up) {
4619     /* {n,m}+ supported case */
4620     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL))
4621       return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
4622 
4623     tok->u.repeat.possessive = 1;
4624     {
4625       int tmp;
4626       tmp = low; low = up; up = tmp;
4627     }
4628   }
4629   else
4630     tok->u.repeat.possessive = 0;
4631 
4632   tok->type = TK_INTERVAL;
4633   tok->u.repeat.lower = low;
4634   tok->u.repeat.upper = up;
4635   *src = p;
4636   return r; /* 0: normal {n,m}, 2: fixed {n} */
4637 
4638  invalid:
4639   if (syn_allow) {
4640     /* *src = p; */ /* !!! Don't do this line !!! */
4641     return 1;  /* OK */
4642   }
4643   else
4644     return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
4645 }
4646 
4647 /* \M-, \C-, \c, or \... */
4648 static int
fetch_escaped_value_raw(UChar ** src,UChar * end,ParseEnv * env,OnigCodePoint * val)4649 fetch_escaped_value_raw(UChar** src, UChar* end, ParseEnv* env,
4650                         OnigCodePoint* val)
4651 {
4652   int v;
4653   OnigCodePoint c;
4654   OnigEncoding enc = env->enc;
4655   UChar* p = *src;
4656 
4657   if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4658 
4659   PFETCH_S(c);
4660   switch (c) {
4661   case 'M':
4662     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
4663       if (PEND) return ONIGERR_END_PATTERN_AT_META;
4664       PFETCH_S(c);
4665       if (c != '-') return ONIGERR_META_CODE_SYNTAX;
4666       if (PEND) return ONIGERR_END_PATTERN_AT_META;
4667       PFETCH_S(c);
4668       if (c == MC_ESC(env->syntax)) {
4669         v = fetch_escaped_value_raw(&p, end, env, &c);
4670         if (v < 0) return v;
4671       }
4672       c = ((c & 0xff) | 0x80);
4673     }
4674     else
4675       goto backslash;
4676     break;
4677 
4678   case 'C':
4679     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
4680       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4681       PFETCH_S(c);
4682       if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
4683       goto control;
4684     }
4685     else
4686       goto backslash;
4687 
4688   case 'c':
4689     if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
4690     control:
4691       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4692       PFETCH_S(c);
4693       if (c == '?') {
4694         c = 0177;
4695       }
4696       else {
4697         if (c == MC_ESC(env->syntax)) {
4698           v = fetch_escaped_value_raw(&p, end, env, &c);
4699           if (v < 0) return v;
4700         }
4701         c &= 0x9f;
4702       }
4703       break;
4704     }
4705     /* fall through */
4706 
4707   default:
4708     {
4709     backslash:
4710       c = conv_backslash_value(c, env);
4711     }
4712     break;
4713   }
4714 
4715   *src = p;
4716   *val = c;
4717   return 0;
4718 }
4719 
4720 static int
fetch_escaped_value(UChar ** src,UChar * end,ParseEnv * env,OnigCodePoint * val)4721 fetch_escaped_value(UChar** src, UChar* end, ParseEnv* env, OnigCodePoint* val)
4722 {
4723   int r;
4724   int len;
4725 
4726   r = fetch_escaped_value_raw(src, end, env, val);
4727   if (r != 0) return r;
4728 
4729   len = ONIGENC_CODE_TO_MBCLEN(env->enc, *val);
4730   if (len < 0) return len;
4731 
4732   return 0;
4733 }
4734 
4735 static int fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env);
4736 
4737 static OnigCodePoint
get_name_end_code_point(OnigCodePoint start)4738 get_name_end_code_point(OnigCodePoint start)
4739 {
4740   switch (start) {
4741   case '<':  return (OnigCodePoint )'>';  break;
4742   case '\'': return (OnigCodePoint )'\''; break;
4743   case '(':  return (OnigCodePoint )')';  break;
4744   default:
4745     break;
4746   }
4747 
4748   return (OnigCodePoint )0;
4749 }
4750 
4751 enum REF_NUM {
4752   IS_NOT_NUM = 0,
4753   IS_ABS_NUM = 1,
4754   IS_REL_NUM = 2
4755 };
4756 
4757 #ifdef USE_BACKREF_WITH_LEVEL
4758 /*
4759    \k<name+n>, \k<name-n>
4760    \k<num+n>,  \k<num-n>
4761    \k<-num+n>, \k<-num-n>
4762    \k<+num+n>, \k<+num-n>
4763 */
4764 static int
fetch_name_with_level(OnigCodePoint start_code,UChar ** src,UChar * end,UChar ** rname_end,ParseEnv * env,int * rback_num,int * rlevel,enum REF_NUM * num_type)4765 fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
4766                       UChar** rname_end, ParseEnv* env,
4767                       int* rback_num, int* rlevel, enum REF_NUM* num_type)
4768 {
4769   int r, sign, exist_level;
4770   int digit_count;
4771   OnigCodePoint end_code;
4772   OnigCodePoint c = 0;
4773   OnigEncoding enc = env->enc;
4774   UChar *name_end;
4775   UChar *pnum_head;
4776   UChar *p = *src;
4777   PFETCH_READY;
4778 
4779   *rback_num = 0;
4780   exist_level = 0;
4781   *num_type = IS_NOT_NUM;
4782   sign = 1;
4783   pnum_head = *src;
4784 
4785   end_code = get_name_end_code_point(start_code);
4786 
4787   digit_count = 0;
4788   name_end = end;
4789   r = 0;
4790   if (PEND) {
4791     return ONIGERR_EMPTY_GROUP_NAME;
4792   }
4793   else {
4794     PFETCH(c);
4795     if (c == end_code)
4796       return ONIGERR_EMPTY_GROUP_NAME;
4797 
4798     if (IS_CODE_DIGIT_ASCII(enc, c)) {
4799       *num_type = IS_ABS_NUM;
4800       digit_count++;
4801     }
4802     else if (c == '-') {
4803       *num_type = IS_REL_NUM;
4804       sign = -1;
4805       pnum_head = p;
4806     }
4807     else if (c == '+') {
4808       *num_type = IS_REL_NUM;
4809       sign = 1;
4810       pnum_head = p;
4811     }
4812     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4813       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4814     }
4815   }
4816 
4817   while (!PEND) {
4818     name_end = p;
4819     PFETCH(c);
4820     if (c == end_code || c == ')' || c == '+' || c == '-') {
4821       if (*num_type != IS_NOT_NUM && digit_count == 0)
4822         r = ONIGERR_INVALID_GROUP_NAME;
4823       break;
4824     }
4825 
4826     if (*num_type != IS_NOT_NUM) {
4827       if (IS_CODE_DIGIT_ASCII(enc, c)) {
4828         digit_count++;
4829       }
4830       else {
4831         r = ONIGERR_INVALID_GROUP_NAME;
4832         *num_type = IS_NOT_NUM;
4833       }
4834     }
4835     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4836       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4837     }
4838   }
4839 
4840   if (r == 0 && c != end_code) {
4841     if (c == '+' || c == '-') {
4842       int level;
4843       int flag = (c == '-' ? -1 : 1);
4844 
4845       if (PEND) {
4846         r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4847         goto end;
4848       }
4849       PFETCH(c);
4850       if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;
4851       PUNFETCH;
4852       level = scan_number(&p, end, enc);
4853       if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
4854       *rlevel = (level * flag);
4855       exist_level = 1;
4856 
4857       if (!PEND) {
4858         PFETCH(c);
4859         if (c == end_code)
4860           goto end;
4861       }
4862     }
4863 
4864   err:
4865     name_end = end;
4866   err2:
4867     r = ONIGERR_INVALID_GROUP_NAME;
4868   }
4869 
4870  end:
4871   if (r == 0) {
4872     if (*num_type != IS_NOT_NUM) {
4873       *rback_num = scan_number(&pnum_head, name_end, enc);
4874       if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
4875       else if (*rback_num == 0) {
4876         if (*num_type == IS_REL_NUM)
4877           goto err2;
4878       }
4879 
4880       *rback_num *= sign;
4881     }
4882 
4883     *rname_end = name_end;
4884     *src = p;
4885     return (exist_level ? 1 : 0);
4886   }
4887   else {
4888     onig_scan_env_set_error_string(env, r, *src, name_end);
4889     return r;
4890   }
4891 }
4892 #endif /* USE_BACKREF_WITH_LEVEL */
4893 
4894 /*
4895   ref: 0 -> define name    (don't allow number name)
4896        1 -> reference name (allow number name)
4897 */
4898 static int
fetch_name(OnigCodePoint start_code,UChar ** src,UChar * end,UChar ** rname_end,ParseEnv * env,int * rback_num,enum REF_NUM * num_type,int is_ref)4899 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
4900            UChar** rname_end, ParseEnv* env, int* rback_num,
4901            enum REF_NUM* num_type, int is_ref)
4902 {
4903   int r, sign;
4904   int digit_count;
4905   OnigCodePoint end_code;
4906   OnigCodePoint c = 0;
4907   OnigEncoding enc = env->enc;
4908   UChar *name_end;
4909   UChar *pnum_head;
4910   UChar *p = *src;
4911 
4912   *rback_num = 0;
4913 
4914   end_code = get_name_end_code_point(start_code);
4915 
4916   digit_count = 0;
4917   name_end = end;
4918   pnum_head = *src;
4919   r = 0;
4920   *num_type = IS_NOT_NUM;
4921   sign = 1;
4922   if (PEND) {
4923     return ONIGERR_EMPTY_GROUP_NAME;
4924   }
4925   else {
4926     PFETCH_S(c);
4927     if (c == end_code)
4928       return ONIGERR_EMPTY_GROUP_NAME;
4929 
4930     if (IS_CODE_DIGIT_ASCII(enc, c)) {
4931       if (is_ref == TRUE)
4932         *num_type = IS_ABS_NUM;
4933       else {
4934         r = ONIGERR_INVALID_GROUP_NAME;
4935       }
4936       digit_count++;
4937     }
4938     else if (c == '-') {
4939       if (is_ref == TRUE) {
4940         *num_type = IS_REL_NUM;
4941         sign = -1;
4942         pnum_head = p;
4943       }
4944       else {
4945         r = ONIGERR_INVALID_GROUP_NAME;
4946       }
4947     }
4948     else if (c == '+') {
4949       if (is_ref == TRUE) {
4950         *num_type = IS_REL_NUM;
4951         sign = 1;
4952         pnum_head = p;
4953       }
4954       else {
4955         r = ONIGERR_INVALID_GROUP_NAME;
4956       }
4957     }
4958     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4959       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4960     }
4961   }
4962 
4963   if (r == 0) {
4964     while (!PEND) {
4965       name_end = p;
4966       PFETCH_S(c);
4967       if (c == end_code || c == ')') {
4968         if (*num_type != IS_NOT_NUM && digit_count == 0)
4969           r = ONIGERR_INVALID_GROUP_NAME;
4970         break;
4971       }
4972 
4973       if (*num_type != IS_NOT_NUM) {
4974         if (IS_CODE_DIGIT_ASCII(enc, c)) {
4975           digit_count++;
4976         }
4977         else {
4978           if (!ONIGENC_IS_CODE_WORD(enc, c))
4979             r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4980           else
4981             r = ONIGERR_INVALID_GROUP_NAME;
4982 
4983           *num_type = IS_NOT_NUM;
4984         }
4985       }
4986       else {
4987         if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4988           r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4989         }
4990       }
4991     }
4992 
4993     if (c != end_code) {
4994       r = ONIGERR_INVALID_GROUP_NAME;
4995       goto err;
4996     }
4997 
4998     if (*num_type != IS_NOT_NUM) {
4999       *rback_num = scan_number(&pnum_head, name_end, enc);
5000       if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
5001       else if (*rback_num == 0) {
5002         if (*num_type == IS_REL_NUM) {
5003           r = ONIGERR_INVALID_GROUP_NAME;
5004           goto err;
5005         }
5006       }
5007 
5008       *rback_num *= sign;
5009     }
5010 
5011     *rname_end = name_end;
5012     *src = p;
5013     return 0;
5014   }
5015   else {
5016     while (!PEND) {
5017       name_end = p;
5018       PFETCH_S(c);
5019       if (c == end_code || c == ')')
5020         break;
5021     }
5022     if (PEND)
5023       name_end = end;
5024 
5025   err:
5026     onig_scan_env_set_error_string(env, r, *src, name_end);
5027     return r;
5028   }
5029 }
5030 
5031 static void
CC_ESC_WARN(ParseEnv * env,UChar * c)5032 CC_ESC_WARN(ParseEnv* env, UChar *c)
5033 {
5034   if (onig_warn == onig_null_warn) return ;
5035 
5036   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
5037       IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
5038     UChar buf[WARN_BUFSIZE];
5039     onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
5040                                env->pattern, env->pattern_end,
5041                                (UChar* )"character class has '%s' without escape",
5042                                c);
5043     (*onig_warn)((char* )buf);
5044   }
5045 }
5046 
5047 static void
CLOSE_BRACKET_WITHOUT_ESC_WARN(ParseEnv * env,UChar * c)5048 CLOSE_BRACKET_WITHOUT_ESC_WARN(ParseEnv* env, UChar* c)
5049 {
5050   if (onig_warn == onig_null_warn) return ;
5051 
5052   if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
5053     UChar buf[WARN_BUFSIZE];
5054     onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
5055                          (env)->pattern, (env)->pattern_end,
5056                          (UChar* )"regular expression has '%s' without escape", c);
5057     (*onig_warn)((char* )buf);
5058   }
5059 }
5060 
5061 static UChar*
find_str_position(OnigCodePoint s[],int n,UChar * from,UChar * to,UChar ** next,OnigEncoding enc)5062 find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
5063                   UChar **next, OnigEncoding enc)
5064 {
5065   int i;
5066   OnigCodePoint x;
5067   UChar *q;
5068   UChar *p = from;
5069 
5070   while (p < to) {
5071     x = ONIGENC_MBC_TO_CODE(enc, p, to);
5072     q = p + enclen(enc, p);
5073     if (x == s[0]) {
5074       for (i = 1; i < n && q < to; i++) {
5075         x = ONIGENC_MBC_TO_CODE(enc, q, to);
5076         if (x != s[i]) break;
5077         q += enclen(enc, q);
5078       }
5079       if (i >= n) {
5080         if (IS_NOT_NULL(next))
5081           *next = q;
5082         return p;
5083       }
5084     }
5085     p = q;
5086   }
5087   return NULL_UCHARP;
5088 }
5089 
5090 static int
str_exist_check_with_esc(OnigCodePoint s[],int n,UChar * from,UChar * to,OnigCodePoint bad,OnigEncoding enc,OnigSyntaxType * syn)5091 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
5092                          OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)
5093 {
5094   int i, in_esc;
5095   OnigCodePoint x;
5096   UChar *q;
5097   UChar *p = from;
5098 
5099   in_esc = 0;
5100   while (p < to) {
5101     if (in_esc) {
5102       in_esc = 0;
5103       p += enclen(enc, p);
5104     }
5105     else {
5106       x = ONIGENC_MBC_TO_CODE(enc, p, to);
5107       q = p + enclen(enc, p);
5108       if (x == s[0]) {
5109         for (i = 1; i < n && q < to; i++) {
5110           x = ONIGENC_MBC_TO_CODE(enc, q, to);
5111           if (x != s[i]) break;
5112           q += enclen(enc, q);
5113         }
5114         if (i >= n) return 1;
5115         p += enclen(enc, p);
5116       }
5117       else {
5118         x = ONIGENC_MBC_TO_CODE(enc, p, to);
5119         if (x == bad) return 0;
5120         else if (x == MC_ESC(syn)) in_esc = 1;
5121         p = q;
5122       }
5123     }
5124   }
5125   return 0;
5126 }
5127 
5128 static int
fetch_token_cc(PToken * tok,UChar ** src,UChar * end,ParseEnv * env,int state)5129 fetch_token_cc(PToken* tok, UChar** src, UChar* end, ParseEnv* env, int state)
5130 {
5131   int r;
5132   OnigCodePoint code;
5133   OnigCodePoint c, c2;
5134   int mindigits, maxdigits;
5135   OnigSyntaxType* syn = env->syntax;
5136   OnigEncoding enc = env->enc;
5137   UChar* prev;
5138   UChar* p = *src;
5139   PFETCH_READY;
5140 
5141   if (tok->code_point_continue != 0) {
5142     r = get_next_code_point(&p, end, tok->base_num, enc, TRUE, &code);
5143     if (r == 1) {
5144       tok->code_point_continue = 0;
5145     }
5146     else if (r == 2) {
5147       tok->type = TK_CC_RANGE;
5148       goto end;
5149     }
5150     else if (r == 0) {
5151       tok->type   = TK_CODE_POINT;
5152       tok->u.code = code;
5153       goto end;
5154     }
5155     else
5156       return r; /* error */
5157   }
5158 
5159   if (PEND) {
5160     tok->type = TK_EOT;
5161     return tok->type;
5162   }
5163 
5164   PFETCH(c);
5165   tok->type = TK_CHAR;
5166   tok->base_num = 0;
5167   tok->u.code   = c;
5168   tok->escaped  = 0;
5169 
5170   if (c == ']') {
5171     tok->type = TK_CC_CLOSE;
5172   }
5173   else if (c == '-') {
5174     tok->type = TK_CC_RANGE;
5175   }
5176   else if (c == MC_ESC(syn)) {
5177     if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
5178       goto end;
5179 
5180     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
5181 
5182     PFETCH(c);
5183     tok->escaped = 1;
5184     tok->u.code = c;
5185     switch (c) {
5186     case 'w':
5187       tok->type = TK_CHAR_TYPE;
5188       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5189       tok->u.prop.not   = 0;
5190       break;
5191     case 'W':
5192       tok->type = TK_CHAR_TYPE;
5193       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5194       tok->u.prop.not   = 1;
5195       break;
5196     case 'd':
5197       tok->type = TK_CHAR_TYPE;
5198       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5199       tok->u.prop.not   = 0;
5200       break;
5201     case 'D':
5202       tok->type = TK_CHAR_TYPE;
5203       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5204       tok->u.prop.not   = 1;
5205       break;
5206     case 's':
5207       tok->type = TK_CHAR_TYPE;
5208       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5209       tok->u.prop.not   = 0;
5210       break;
5211     case 'S':
5212       tok->type = TK_CHAR_TYPE;
5213       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5214       tok->u.prop.not   = 1;
5215       break;
5216     case 'h':
5217       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5218       tok->type = TK_CHAR_TYPE;
5219       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5220       tok->u.prop.not   = 0;
5221       break;
5222     case 'H':
5223       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5224       tok->type = TK_CHAR_TYPE;
5225       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5226       tok->u.prop.not   = 1;
5227       break;
5228 
5229     case 'p':
5230     case 'P':
5231       if (PEND) break;
5232 
5233       c2 = PPEEK;
5234       if (c2 == '{' &&
5235           IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
5236         PINC;
5237         tok->type = TK_CHAR_PROPERTY;
5238         tok->u.prop.not = c == 'P';
5239 
5240         if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
5241           PFETCH(c2);
5242           if (c2 == '^') {
5243             tok->u.prop.not = tok->u.prop.not == 0;
5244           }
5245           else
5246             PUNFETCH;
5247         }
5248       }
5249       break;
5250 
5251     case 'o':
5252       if (PEND) break;
5253 
5254       prev = p;
5255       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
5256         PINC;
5257         r = scan_octal_number(&p, end, 0, 11, enc, &code);
5258         if (r < 0) return r;
5259         if (!PEND) {
5260           c2 = PPEEK;
5261           if (IS_CODE_DIGIT_ASCII(enc, c2))
5262             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5263         }
5264 
5265         tok->base_num = 8;
5266         goto brace_code_point_entry;
5267       }
5268       break;
5269 
5270     case 'x':
5271       if (PEND) break;
5272 
5273       prev = p;
5274       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
5275         PINC;
5276         r = scan_hexadecimal_number(&p, end, 0, 8, enc, &code);
5277         if (r < 0) return r;
5278         if (!PEND) {
5279           c2 = PPEEK;
5280           if (IS_CODE_XDIGIT_ASCII(enc, c2))
5281             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5282         }
5283 
5284         tok->base_num = 16;
5285       brace_code_point_entry:
5286         if ((p > prev + enclen(enc, prev))) {
5287           if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
5288           if (PPEEK_IS('}')) {
5289             PINC;
5290           }
5291           else {
5292             int curr_state;
5293 
5294             curr_state = (state == CS_RANGE) ? CPS_EMPTY : CPS_START;
5295             r = check_code_point_sequence_cc(p, end, tok->base_num, enc,
5296                                              curr_state);
5297             if (r < 0) return r;
5298             if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
5299             tok->code_point_continue = TRUE;
5300           }
5301           tok->type   = TK_CODE_POINT;
5302           tok->u.code = code;
5303         }
5304         else {
5305           /* can't read nothing or invalid format */
5306           p = prev;
5307         }
5308       }
5309       else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
5310         r = scan_hexadecimal_number(&p, end, 0, 2, enc, &code);
5311         if (r < 0) return r;
5312         if (p == prev) {  /* can't read nothing. */
5313           code = 0; /* but, it's not error */
5314         }
5315         tok->type = TK_CRUDE_BYTE;
5316         tok->base_num = 16;
5317         tok->u.byte   = (UChar )code;
5318       }
5319       break;
5320 
5321     case 'u':
5322       if (PEND) break;
5323       prev = p;
5324       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
5325         mindigits = maxdigits = 4;
5326       u_hex_digits:
5327         r = scan_hexadecimal_number(&p, end, mindigits, maxdigits, enc, &code);
5328         if (r < 0) return r;
5329         if (p == prev) {  /* can't read nothing. */
5330           code = 0; /* but, it's not error */
5331         }
5332         tok->type = TK_CODE_POINT;
5333         tok->base_num = 16;
5334         tok->u.code   = code;
5335       }
5336       break;
5337 
5338     case 'U':
5339       if (PEND) break;
5340       prev = p;
5341       if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON)) {
5342         mindigits = maxdigits = 8;
5343         goto u_hex_digits;
5344       }
5345       break;
5346 
5347     case '0':
5348     case '1': case '2': case '3': case '4': case '5': case '6': case '7':
5349       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
5350         PUNFETCH;
5351         prev = p;
5352         r = scan_octal_number(&p, end, 0, 3, enc, &code);
5353         if (r < 0) return r;
5354         if (code >= 256) return ONIGERR_TOO_BIG_NUMBER;
5355         if (p == prev) {  /* can't read nothing. */
5356           code = 0; /* but, it's not error */
5357         }
5358         tok->type = TK_CRUDE_BYTE;
5359         tok->base_num = 8;
5360         tok->u.byte   = (UChar )code;
5361       }
5362       break;
5363 
5364     default:
5365       PUNFETCH;
5366       r = fetch_escaped_value(&p, end, env, &c2);
5367       if (r < 0) return r;
5368       if (tok->u.code != c2) {
5369         tok->u.code = c2;
5370         tok->type   = TK_CODE_POINT;
5371       }
5372       break;
5373     }
5374   }
5375   else if (c == '[') {
5376     if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
5377       OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
5378       tok->backp = p; /* point at '[' is read */
5379       PINC;
5380       if (str_exist_check_with_esc(send, 2, p, end,
5381                                    (OnigCodePoint )']', enc, syn)) {
5382         tok->type = TK_CC_POSIX_BRACKET_OPEN;
5383       }
5384       else {
5385         PUNFETCH;
5386         goto cc_in_cc;
5387       }
5388     }
5389     else {
5390     cc_in_cc:
5391       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
5392         tok->type = TK_CC_OPEN_CC;
5393       }
5394       else {
5395         CC_ESC_WARN(env, (UChar* )"[");
5396       }
5397     }
5398   }
5399   else if (c == '&') {
5400     if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
5401         !PEND && (PPEEK_IS('&'))) {
5402       PINC;
5403       tok->type = TK_CC_AND;
5404     }
5405   }
5406 
5407  end:
5408   *src = p;
5409   return tok->type;
5410 }
5411 
5412 static int
fetch_token(PToken * tok,UChar ** src,UChar * end,ParseEnv * env)5413 fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env)
5414 {
5415   int r;
5416   OnigCodePoint code;
5417   OnigCodePoint c;
5418   int mindigits, maxdigits;
5419   UChar* prev;
5420   int allow_num;
5421   OnigEncoding enc;
5422   OnigSyntaxType* syn;
5423   UChar* p;
5424 
5425   enc = env->enc;
5426   syn = env->syntax;
5427   p = *src;
5428 
5429   PFETCH_READY;
5430 
5431   if (tok->code_point_continue != 0) {
5432     r = get_next_code_point(&p, end, tok->base_num, enc, FALSE, &code);
5433     if (r == 1) {
5434       tok->code_point_continue = 0;
5435     }
5436     else if (r == 0) {
5437       tok->type   = TK_CODE_POINT;
5438       tok->u.code = code;
5439       goto out;
5440     }
5441     else
5442       return r; /* error */
5443   }
5444 
5445  start:
5446   if (PEND) {
5447     tok->type = TK_EOT;
5448     return tok->type;
5449   }
5450 
5451   tok->type = TK_STRING;
5452   tok->base_num = 0;
5453   tok->backp    = p;
5454 
5455   PFETCH(c);
5456   if (IS_MC_ESC_CODE(c, syn)) {
5457     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
5458 
5459     tok->backp = p;
5460     PFETCH(c);
5461 
5462     tok->u.code = c;
5463     tok->escaped = 1;
5464     switch (c) {
5465     case '*':
5466       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
5467       tok->type = TK_REPEAT;
5468       tok->u.repeat.lower = 0;
5469       tok->u.repeat.upper = INFINITE_REPEAT;
5470       goto greedy_check;
5471       break;
5472 
5473     case '+':
5474       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
5475       tok->type = TK_REPEAT;
5476       tok->u.repeat.lower = 1;
5477       tok->u.repeat.upper = INFINITE_REPEAT;
5478       goto greedy_check;
5479       break;
5480 
5481     case '?':
5482       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
5483       tok->type = TK_REPEAT;
5484       tok->u.repeat.lower = 0;
5485       tok->u.repeat.upper = 1;
5486     greedy_check:
5487       tok->u.repeat.possessive = 0;
5488     greedy_check2:
5489       if (!PEND && PPEEK_IS('?') &&
5490           IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY) &&
5491           tok->u.repeat.possessive == 0) {
5492         PFETCH(c);
5493         tok->u.repeat.greedy = 0;
5494         tok->u.repeat.possessive = 0;
5495       }
5496       else {
5497       possessive_check:
5498         tok->u.repeat.greedy = 1;
5499         if (!PEND && PPEEK_IS('+') &&
5500             ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
5501               tok->type != TK_INTERVAL)  ||
5502              (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
5503               tok->type == TK_INTERVAL)) &&
5504           tok->u.repeat.possessive == 0) {
5505           PFETCH(c);
5506           tok->u.repeat.possessive = 1;
5507         }
5508       }
5509       break;
5510 
5511     case '{':
5512       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
5513       r = fetch_interval(&p, end, tok, env);
5514       if (r < 0) return r;  /* error */
5515       if (r == 0) goto greedy_check2;
5516       else if (r == 2) { /* {n} */
5517         if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
5518           goto possessive_check;
5519 
5520         goto greedy_check2;
5521       }
5522       /* r == 1 : normal char */
5523       break;
5524 
5525     case '|':
5526       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
5527       tok->type = TK_ALT;
5528       break;
5529 
5530     case '(':
5531       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
5532       tok->type = TK_SUBEXP_OPEN;
5533       break;
5534 
5535     case ')':
5536       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
5537       tok->type = TK_SUBEXP_CLOSE;
5538       break;
5539 
5540     case 'w':
5541       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
5542       tok->type = TK_CHAR_TYPE;
5543       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5544       tok->u.prop.not   = 0;
5545       break;
5546 
5547     case 'W':
5548       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
5549       tok->type = TK_CHAR_TYPE;
5550       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5551       tok->u.prop.not   = 1;
5552       break;
5553 
5554     case 'b':
5555       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
5556       tok->type = TK_ANCHOR;
5557       tok->u.anchor = ANCR_WORD_BOUNDARY;
5558       break;
5559 
5560     case 'B':
5561       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
5562       tok->type = TK_ANCHOR;
5563       tok->u.anchor = ANCR_NO_WORD_BOUNDARY;
5564       break;
5565 
5566     case 'y':
5567       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5568       tok->type = TK_ANCHOR;
5569       tok->u.anchor = ANCR_TEXT_SEGMENT_BOUNDARY;
5570       break;
5571 
5572     case 'Y':
5573       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5574       tok->type = TK_ANCHOR;
5575       tok->u.anchor = ANCR_NO_TEXT_SEGMENT_BOUNDARY;
5576       break;
5577 
5578 #ifdef USE_WORD_BEGIN_END
5579     case '<':
5580       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
5581       tok->type = TK_ANCHOR;
5582       tok->u.anchor = ANCR_WORD_BEGIN;
5583       break;
5584 
5585     case '>':
5586       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
5587       tok->type = TK_ANCHOR;
5588       tok->u.anchor = ANCR_WORD_END;
5589       break;
5590 #endif
5591 
5592     case 's':
5593       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
5594       tok->type = TK_CHAR_TYPE;
5595       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5596       tok->u.prop.not   = 0;
5597       break;
5598 
5599     case 'S':
5600       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
5601       tok->type = TK_CHAR_TYPE;
5602       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5603       tok->u.prop.not   = 1;
5604       break;
5605 
5606     case 'd':
5607       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
5608       tok->type = TK_CHAR_TYPE;
5609       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5610       tok->u.prop.not   = 0;
5611       break;
5612 
5613     case 'D':
5614       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
5615       tok->type = TK_CHAR_TYPE;
5616       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5617       tok->u.prop.not   = 1;
5618       break;
5619 
5620     case 'h':
5621       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5622       tok->type = TK_CHAR_TYPE;
5623       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5624       tok->u.prop.not   = 0;
5625       break;
5626 
5627     case 'H':
5628       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5629       tok->type = TK_CHAR_TYPE;
5630       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5631       tok->u.prop.not   = 1;
5632       break;
5633 
5634     case 'K':
5635       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;
5636       tok->type = TK_KEEP;
5637       break;
5638 
5639     case 'R':
5640       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;
5641       tok->type = TK_GENERAL_NEWLINE;
5642       break;
5643 
5644     case 'N':
5645       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
5646       tok->type = TK_NO_NEWLINE;
5647       break;
5648 
5649     case 'O':
5650       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
5651       tok->type = TK_TRUE_ANYCHAR;
5652       break;
5653 
5654     case 'X':
5655       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5656       tok->type = TK_TEXT_SEGMENT;
5657       break;
5658 
5659     case 'A':
5660       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5661     begin_buf:
5662       tok->type = TK_ANCHOR;
5663       tok->u.subtype = ANCR_BEGIN_BUF;
5664       break;
5665 
5666     case 'Z':
5667       if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON)) {
5668         goto end_buf;
5669       }
5670       else {
5671         if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5672         tok->type = TK_ANCHOR;
5673         tok->u.subtype = ANCR_SEMI_END_BUF;
5674       }
5675       break;
5676 
5677     case 'z':
5678       if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON))
5679         return ONIGERR_UNDEFINED_OPERATOR;
5680 
5681       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5682     end_buf:
5683       tok->type = TK_ANCHOR;
5684       tok->u.subtype = ANCR_END_BUF;
5685       break;
5686 
5687     case 'G':
5688       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
5689       tok->type = TK_ANCHOR;
5690       tok->u.subtype = ANCR_BEGIN_POSITION;
5691       break;
5692 
5693     case '`':
5694       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
5695       goto begin_buf;
5696       break;
5697 
5698     case '\'':
5699       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
5700       goto end_buf;
5701       break;
5702 
5703     case 'o':
5704       if (PEND) break;
5705 
5706       prev = p;
5707       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
5708         PINC;
5709         r = scan_octal_number(&p, end, 0, 11, enc, &code);
5710         if (r < 0) return r;
5711         if (!PEND) {
5712           if (IS_CODE_DIGIT_ASCII(enc, PPEEK))
5713             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5714         }
5715 
5716         tok->base_num = 8;
5717         goto brace_code_point_entry;
5718       }
5719       break;
5720 
5721     case 'x':
5722       if (PEND) break;
5723 
5724       prev = p;
5725       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
5726         PINC;
5727         r = scan_hexadecimal_number(&p, end, 0, 8, enc, &code);
5728         if (r < 0) return r;
5729         if (!PEND) {
5730           if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))
5731             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5732         }
5733 
5734         tok->base_num = 16;
5735       brace_code_point_entry:
5736         if ((p > prev + enclen(enc, prev))) {
5737           if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
5738           if (PPEEK_IS('}')) {
5739             PINC;
5740           }
5741           else {
5742             r = check_code_point_sequence(p, end, tok->base_num, enc);
5743             if (r < 0) return r;
5744             if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
5745             tok->code_point_continue = TRUE;
5746           }
5747           tok->type   = TK_CODE_POINT;
5748           tok->u.code = code;
5749         }
5750         else {
5751           /* can't read nothing or invalid format */
5752           p = prev;
5753         }
5754       }
5755       else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
5756         r = scan_hexadecimal_number(&p, end, 0, 2, enc, &code);
5757         if (r < 0) return r;
5758         if (p == prev) {  /* can't read nothing. */
5759           code = 0; /* but, it's not error */
5760         }
5761         tok->type = TK_CRUDE_BYTE;
5762         tok->base_num = 16;
5763         tok->u.byte   = (UChar )code;
5764       }
5765       break;
5766 
5767     case 'u':
5768       if (PEND) break;
5769       prev = p;
5770       mindigits = maxdigits = 4;
5771       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
5772     u_hex_digits:
5773         r = scan_hexadecimal_number(&p, end, mindigits, maxdigits, enc, &code);
5774         if (r < 0) return r;
5775         if (p == prev) {  /* can't read nothing. */
5776           code = 0; /* but, it's not error */
5777         }
5778         tok->type = TK_CODE_POINT;
5779         tok->base_num = 16;
5780         tok->u.code   = code;
5781       }
5782       break;
5783 
5784     case 'U':
5785       if (PEND) break;
5786       prev = p;
5787       if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON)) {
5788         mindigits = maxdigits = 8;
5789         goto u_hex_digits;
5790       }
5791       break;
5792 
5793     case '1': case '2': case '3': case '4':
5794     case '5': case '6': case '7': case '8': case '9':
5795       PUNFETCH;
5796       prev = p;
5797       r = scan_number(&p, end, enc);
5798       if (r < 0 || r > ONIG_MAX_BACKREF_NUM) {
5799         goto skip_backref;
5800       }
5801 
5802       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
5803           (r <= env->num_mem || r <= 9)) { /* This spec. from GNU regex */
5804         if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5805           if (r > env->num_mem || IS_NULL(PARSEENV_MEMENV(env)[r].mem_node))
5806             return ONIGERR_INVALID_BACKREF;
5807         }
5808 
5809         tok->type = TK_BACKREF;
5810         tok->u.backref.num     = 1;
5811         tok->u.backref.ref1    = r;
5812         tok->u.backref.by_name = 0;
5813 #ifdef USE_BACKREF_WITH_LEVEL
5814         tok->u.backref.exist_level = 0;
5815 #endif
5816         break;
5817       }
5818 
5819     skip_backref:
5820       if (c == '8' || c == '9') {
5821         /* normal char */
5822         p = prev; PINC;
5823         break;
5824       }
5825 
5826       p = prev;
5827       /* fall through */
5828     case '0':
5829       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
5830         prev = p;
5831         r = scan_octal_number(&p, end, 0, (c == '0' ? 2:3), enc, &code);
5832         if (r < 0 || r >= 256) return ONIGERR_TOO_BIG_NUMBER;
5833         if (p == prev) {  /* can't read nothing. */
5834           code = 0; /* but, it's not error */
5835         }
5836         tok->type = TK_CRUDE_BYTE;
5837         tok->base_num = 8;
5838         tok->u.byte   = (UChar )code;
5839       }
5840       else if (c != '0') {
5841         PINC;
5842       }
5843       break;
5844 
5845     case 'k':
5846       if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
5847         PFETCH(c);
5848         if (c == '<' || c == '\'') {
5849           UChar* name_end;
5850           int* backs;
5851           int back_num;
5852           enum REF_NUM num_type;
5853 
5854           allow_num = 1;
5855 
5856         backref_start:
5857           prev = p;
5858 
5859 #ifdef USE_BACKREF_WITH_LEVEL
5860           name_end = NULL_UCHARP; /* no need. escape gcc warning. */
5861           r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
5862                                  env, &back_num, &tok->u.backref.level, &num_type);
5863           if (r == 1) tok->u.backref.exist_level = 1;
5864           else        tok->u.backref.exist_level = 0;
5865 #else
5866           r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, TRUE);
5867 #endif
5868           if (r < 0) return r;
5869 
5870           if (num_type != IS_NOT_NUM) {
5871             if (allow_num == 0) return ONIGERR_INVALID_BACKREF;
5872 
5873             if (num_type == IS_REL_NUM) {
5874               back_num = backref_rel_to_abs(back_num, env);
5875             }
5876             if (back_num <= 0)
5877               return ONIGERR_INVALID_BACKREF;
5878 
5879             if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5880               if (back_num > env->num_mem ||
5881                   IS_NULL(PARSEENV_MEMENV(env)[back_num].mem_node))
5882                 return ONIGERR_INVALID_BACKREF;
5883             }
5884             tok->type = TK_BACKREF;
5885             tok->u.backref.by_name = 0;
5886             tok->u.backref.num  = 1;
5887             tok->u.backref.ref1 = back_num;
5888           }
5889           else {
5890             int num = name_to_group_numbers(env, prev, name_end, &backs);
5891             if (num <= 0) {
5892               return ONIGERR_UNDEFINED_NAME_REFERENCE;
5893             }
5894             if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5895               int i;
5896               for (i = 0; i < num; i++) {
5897                 if (backs[i] > env->num_mem ||
5898                     IS_NULL(PARSEENV_MEMENV(env)[backs[i]].mem_node))
5899                   return ONIGERR_INVALID_BACKREF;
5900               }
5901             }
5902 
5903             tok->type = TK_BACKREF;
5904             tok->u.backref.by_name = 1;
5905             if (num == 1) {
5906               tok->u.backref.num  = 1;
5907               tok->u.backref.ref1 = backs[0];
5908             }
5909             else {
5910               tok->u.backref.num  = num;
5911               tok->u.backref.refs = backs;
5912             }
5913           }
5914         }
5915         else
5916           PUNFETCH;
5917       }
5918       break;
5919 
5920 #ifdef USE_CALL
5921     case 'g':
5922       if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
5923         PFETCH(c);
5924         if (c == '<' || c == '\'') {
5925           int gnum;
5926           UChar* name_end;
5927           enum REF_NUM num_type;
5928 
5929           allow_num = 1;
5930 
5931         call_start:
5932           prev = p;
5933           r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,
5934                          &gnum, &num_type, TRUE);
5935           if (r < 0) return r;
5936 
5937           if (num_type != IS_NOT_NUM) {
5938             if (allow_num == 0) return ONIGERR_UNDEFINED_GROUP_REFERENCE;
5939 
5940             if (num_type == IS_REL_NUM) {
5941               gnum = backref_rel_to_abs(gnum, env);
5942               if (gnum < 0) {
5943                 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
5944                                                prev, name_end);
5945                 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
5946               }
5947             }
5948             tok->u.call.by_number = 1;
5949             tok->u.call.gnum      = gnum;
5950           }
5951           else {
5952             tok->u.call.by_number = 0;
5953             tok->u.call.gnum      = 0;
5954           }
5955 
5956           tok->type = TK_CALL;
5957           tok->u.call.name     = prev;
5958           tok->u.call.name_end = name_end;
5959         }
5960         else
5961           PUNFETCH;
5962       }
5963       break;
5964 #endif
5965 
5966     case 'Q':
5967       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
5968         tok->type = TK_QUOTE_OPEN;
5969       }
5970       break;
5971 
5972     case 'p':
5973     case 'P':
5974       if (!PEND && PPEEK_IS('{') &&
5975           IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
5976         PINC;
5977         tok->type = TK_CHAR_PROPERTY;
5978         tok->u.prop.not = c == 'P';
5979 
5980         if (!PEND &&
5981             IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
5982           PFETCH(c);
5983           if (c == '^') {
5984             tok->u.prop.not = tok->u.prop.not == 0;
5985           }
5986           else
5987             PUNFETCH;
5988         }
5989       }
5990       break;
5991 
5992     default:
5993       {
5994         OnigCodePoint c2;
5995 
5996         PUNFETCH;
5997         r = fetch_escaped_value(&p, end, env, &c2);
5998         if (r < 0) return r;
5999         if (tok->u.code != c2) {
6000           tok->type = TK_CODE_POINT;
6001           tok->u.code = c2;
6002         }
6003         else { /* string */
6004           p = tok->backp + enclen(enc, tok->backp);
6005         }
6006       }
6007       break;
6008     }
6009   }
6010   else {
6011     tok->u.code = c;
6012     tok->escaped = 0;
6013 
6014 #ifdef USE_VARIABLE_META_CHARS
6015     if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
6016         IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
6017       if (c == MC_ANYCHAR(syn))
6018         goto any_char;
6019       else if (c == MC_ANYTIME(syn))
6020         goto any_time;
6021       else if (c == MC_ZERO_OR_ONE_TIME(syn))
6022         goto zero_or_one_time;
6023       else if (c == MC_ONE_OR_MORE_TIME(syn))
6024         goto one_or_more_time;
6025       else if (c == MC_ANYCHAR_ANYTIME(syn)) {
6026         tok->type = TK_ANYCHAR_ANYTIME;
6027         goto out;
6028       }
6029     }
6030 #endif
6031 
6032     switch (c) {
6033     case '.':
6034       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
6035 #ifdef USE_VARIABLE_META_CHARS
6036     any_char:
6037 #endif
6038       tok->type = TK_ANYCHAR;
6039       break;
6040 
6041     case '*':
6042       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
6043 #ifdef USE_VARIABLE_META_CHARS
6044     any_time:
6045 #endif
6046       tok->type = TK_REPEAT;
6047       tok->u.repeat.lower = 0;
6048       tok->u.repeat.upper = INFINITE_REPEAT;
6049       goto greedy_check;
6050       break;
6051 
6052     case '+':
6053       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
6054 #ifdef USE_VARIABLE_META_CHARS
6055     one_or_more_time:
6056 #endif
6057       tok->type = TK_REPEAT;
6058       tok->u.repeat.lower = 1;
6059       tok->u.repeat.upper = INFINITE_REPEAT;
6060       goto greedy_check;
6061       break;
6062 
6063     case '?':
6064       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
6065 #ifdef USE_VARIABLE_META_CHARS
6066     zero_or_one_time:
6067 #endif
6068       tok->type = TK_REPEAT;
6069       tok->u.repeat.lower = 0;
6070       tok->u.repeat.upper = 1;
6071       goto greedy_check;
6072       break;
6073 
6074     case '{':
6075       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
6076       r = fetch_interval(&p, end, tok, env);
6077       if (r < 0) return r;  /* error */
6078       if (r == 0) goto greedy_check2;
6079       else if (r == 2) { /* {n} */
6080         if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
6081           goto possessive_check;
6082 
6083         goto greedy_check2;
6084       }
6085       /* r == 1 : normal char */
6086       break;
6087 
6088     case '|':
6089       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
6090       tok->type = TK_ALT;
6091       break;
6092 
6093     case '(':
6094       if (!PEND && PPEEK_IS('?') &&
6095           IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
6096         prev = p;
6097         PINC;
6098         if (! PEND) {
6099           c = PPEEK;
6100           if (c == '#') {
6101             PFETCH(c);
6102             while (1) {
6103               if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6104               PFETCH(c);
6105               if (c == MC_ESC(syn)) {
6106                 if (! PEND) PFETCH(c);
6107               }
6108               else {
6109                 if (c == ')') break;
6110               }
6111             }
6112             goto start;
6113           }
6114           else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) {
6115             int gnum;
6116             UChar* name;
6117             UChar* name_end;
6118             enum REF_NUM num_type;
6119 
6120             switch (c) {
6121             case '&':
6122               {
6123                 PINC;
6124                 name = p;
6125                 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
6126                                &gnum, &num_type, FALSE);
6127                 if (r < 0) return r;
6128 
6129                 tok->type = TK_CALL;
6130                 tok->u.call.by_number = 0;
6131                 tok->u.call.gnum      = 0;
6132                 tok->u.call.name      = name;
6133                 tok->u.call.name_end  = name_end;
6134               }
6135               break;
6136 
6137             case 'R':
6138               tok->type = TK_CALL;
6139               tok->u.call.by_number = 1;
6140               tok->u.call.gnum      = 0;
6141               tok->u.call.name      = p;
6142               PINC;
6143               if (! PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION;
6144               tok->u.call.name_end  = p;
6145               break;
6146 
6147             case '-':
6148             case '+':
6149               goto lparen_qmark_num;
6150               break;
6151             default:
6152               if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end;
6153 
6154             lparen_qmark_num:
6155               {
6156                 name = p;
6157                 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
6158                                &gnum, &num_type, TRUE);
6159                 if (r < 0) return r;
6160 
6161                 if (num_type == IS_NOT_NUM) {
6162                   return ONIGERR_INVALID_GROUP_NAME;
6163                 }
6164                 else {
6165                   if (num_type == IS_REL_NUM) {
6166                     gnum = backref_rel_to_abs(gnum, env);
6167                     if (gnum < 0) {
6168                       onig_scan_env_set_error_string(env,
6169                              ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);
6170                       return ONIGERR_UNDEFINED_GROUP_REFERENCE;
6171                     }
6172                   }
6173                   tok->u.call.by_number = 1;
6174                   tok->u.call.gnum      = gnum;
6175                 }
6176 
6177                 tok->type = TK_CALL;
6178                 tok->u.call.name     = name;
6179                 tok->u.call.name_end = name_end;
6180               }
6181               break;
6182             }
6183           }
6184           else if (c == 'P' &&
6185                    IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAME)) {
6186             PINC; /* skip 'P' */
6187             if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6188             PFETCH(c);
6189             allow_num = 0;
6190             if (c == '=') {
6191               c = '(';
6192               goto backref_start;
6193             }
6194             else if (c == '>') {
6195 #ifdef USE_CALL
6196               c = '(';
6197               goto call_start;
6198 #else
6199               return ONIGERR_UNDEFINED_OPERATOR;
6200 #endif
6201             }
6202             else {
6203               p = prev;
6204               goto lparen_qmark_end2;
6205             }
6206           }
6207         }
6208       lparen_qmark_end:
6209         PUNFETCH;
6210       }
6211 
6212     lparen_qmark_end2:
6213       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
6214       tok->type = TK_SUBEXP_OPEN;
6215       break;
6216 
6217     case ')':
6218       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
6219       tok->type = TK_SUBEXP_CLOSE;
6220       break;
6221 
6222     case '^':
6223       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
6224       tok->type = TK_ANCHOR;
6225       tok->u.subtype = (OPTON_SINGLELINE(env->options)
6226                         ? ANCR_BEGIN_BUF : ANCR_BEGIN_LINE);
6227       break;
6228 
6229     case '$':
6230       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
6231       tok->type = TK_ANCHOR;
6232       tok->u.subtype = (OPTON_SINGLELINE(env->options)
6233                         ? ANCR_SEMI_END_BUF : ANCR_END_LINE);
6234       break;
6235 
6236     case '[':
6237       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
6238       tok->type = TK_OPEN_CC;
6239       break;
6240 
6241     case ']':
6242       if (*src > env->pattern)   /* /].../ is allowed. */
6243         CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
6244       break;
6245 
6246     case '#':
6247       if (OPTON_EXTEND(env->options)) {
6248         while (!PEND) {
6249           PFETCH(c);
6250           if (ONIGENC_IS_CODE_NEWLINE(enc, c))
6251             break;
6252         }
6253         goto start;
6254         break;
6255       }
6256       break;
6257 
6258     case ' ': case '\t': case '\n': case '\r': case '\f':
6259       if (OPTON_EXTEND(env->options))
6260         goto start;
6261       break;
6262 
6263     default:
6264       /* string */
6265       break;
6266     }
6267   }
6268 
6269  out:
6270   *src = p;
6271   return tok->type;
6272 }
6273 
6274 static int
add_ctype_to_cc_by_range(CClassNode * cc,int ctype ARG_UNUSED,int not,OnigEncoding enc ARG_UNUSED,OnigCodePoint sb_out,const OnigCodePoint mbr[])6275 add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
6276                          OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,
6277                          const OnigCodePoint mbr[])
6278 {
6279   int i, r;
6280   OnigCodePoint j;
6281 
6282   int n = ONIGENC_CODE_RANGE_NUM(mbr);
6283 
6284   if (not == 0) {
6285     for (i = 0; i < n; i++) {
6286       for (j  = ONIGENC_CODE_RANGE_FROM(mbr, i);
6287            j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
6288         if (j >= sb_out) {
6289           if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
6290             r = add_code_range_to_buf(&(cc->mbuf), j,
6291                                       ONIGENC_CODE_RANGE_TO(mbr, i));
6292             if (r != 0) return r;
6293             i++;
6294           }
6295 
6296           goto sb_end;
6297         }
6298         BITSET_SET_BIT(cc->bs, j);
6299       }
6300     }
6301 
6302   sb_end:
6303     for ( ; i < n; i++) {
6304       r = add_code_range_to_buf(&(cc->mbuf),
6305                                 ONIGENC_CODE_RANGE_FROM(mbr, i),
6306                                 ONIGENC_CODE_RANGE_TO(mbr, i));
6307       if (r != 0) return r;
6308     }
6309   }
6310   else {
6311     OnigCodePoint prev = 0;
6312 
6313     for (i = 0; i < n; i++) {
6314       for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
6315         if (j >= sb_out) {
6316           goto sb_end2;
6317         }
6318         BITSET_SET_BIT(cc->bs, j);
6319       }
6320       prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
6321     }
6322     for (j = prev; j < sb_out; j++) {
6323       BITSET_SET_BIT(cc->bs, j);
6324     }
6325 
6326   sb_end2:
6327     prev = sb_out;
6328 
6329     for (i = 0; i < n; i++) {
6330       if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
6331         r = add_code_range_to_buf(&(cc->mbuf), prev,
6332                                   ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
6333         if (r != 0) return r;
6334       }
6335       prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
6336       if (prev == 0) goto end;
6337     }
6338 
6339     r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
6340     if (r != 0) return r;
6341   }
6342 
6343  end:
6344   return 0;
6345 }
6346 
6347 static int
add_ctype_to_cc_by_range_limit(CClassNode * cc,int ctype ARG_UNUSED,int not,OnigEncoding enc ARG_UNUSED,OnigCodePoint sb_out,const OnigCodePoint mbr[],OnigCodePoint limit)6348 add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,
6349                                OnigEncoding enc ARG_UNUSED,
6350                                OnigCodePoint sb_out,
6351                                const OnigCodePoint mbr[], OnigCodePoint limit)
6352 {
6353   int i, r;
6354   OnigCodePoint j;
6355   OnigCodePoint from;
6356   OnigCodePoint to;
6357 
6358   int n = ONIGENC_CODE_RANGE_NUM(mbr);
6359 
6360   if (not == 0) {
6361     for (i = 0; i < n; i++) {
6362       for (j  = ONIGENC_CODE_RANGE_FROM(mbr, i);
6363            j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
6364         if (j > limit) goto end;
6365         if (j >= sb_out) {
6366           if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
6367             to = ONIGENC_CODE_RANGE_TO(mbr, i);
6368             if (to > limit) to = limit;
6369             r = add_code_range_to_buf(&(cc->mbuf), j, to);
6370             if (r != 0) return r;
6371             i++;
6372           }
6373 
6374           goto sb_end;
6375         }
6376         BITSET_SET_BIT(cc->bs, j);
6377       }
6378     }
6379 
6380   sb_end:
6381     for ( ; i < n; i++) {
6382       from = ONIGENC_CODE_RANGE_FROM(mbr, i);
6383       to   = ONIGENC_CODE_RANGE_TO(mbr, i);
6384       if (from > limit) break;
6385       if (to   > limit) to = limit;
6386       r = add_code_range_to_buf(&(cc->mbuf), from, to);
6387       if (r != 0) return r;
6388     }
6389   }
6390   else {
6391     OnigCodePoint prev = 0;
6392 
6393     for (i = 0; i < n; i++) {
6394       from = ONIGENC_CODE_RANGE_FROM(mbr, i);
6395       if (from > limit) {
6396         for (j = prev; j < sb_out; j++) {
6397           BITSET_SET_BIT(cc->bs, j);
6398         }
6399         goto sb_end2;
6400       }
6401       for (j = prev; j < from; j++) {
6402         if (j >= sb_out) goto sb_end2;
6403         BITSET_SET_BIT(cc->bs, j);
6404       }
6405       prev = ONIGENC_CODE_RANGE_TO(mbr, i);
6406       if (prev > limit) prev = limit;
6407       prev++;
6408       if (prev == 0) goto end;
6409     }
6410     for (j = prev; j < sb_out; j++) {
6411       BITSET_SET_BIT(cc->bs, j);
6412     }
6413 
6414   sb_end2:
6415     prev = sb_out;
6416 
6417     for (i = 0; i < n; i++) {
6418       from = ONIGENC_CODE_RANGE_FROM(mbr, i);
6419       if (from > limit) goto last;
6420 
6421       if (prev < from) {
6422         r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);
6423         if (r != 0) return r;
6424       }
6425       prev = ONIGENC_CODE_RANGE_TO(mbr, i);
6426       if (prev > limit) prev = limit;
6427       prev++;
6428       if (prev == 0) goto end;
6429     }
6430 
6431   last:
6432     r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
6433     if (r != 0) return r;
6434   }
6435 
6436  end:
6437   return 0;
6438 }
6439 
6440 static int
add_ctype_to_cc(CClassNode * cc,int ctype,int not,ParseEnv * env)6441 add_ctype_to_cc(CClassNode* cc, int ctype, int not, ParseEnv* env)
6442 {
6443   int c, r;
6444   int ascii_mode;
6445   int is_single;
6446   const OnigCodePoint *ranges;
6447   OnigCodePoint limit;
6448   OnigCodePoint sb_out;
6449   OnigEncoding enc = env->enc;
6450 
6451   ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(ctype, env->options);
6452 
6453   r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
6454   if (r == 0) {
6455     if (ascii_mode == 0)
6456       r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
6457     else
6458       r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,
6459                                          ranges, ASCII_LIMIT);
6460     return r;
6461   }
6462   else if (r != ONIG_NO_SUPPORT_CONFIG) {
6463     return r;
6464   }
6465 
6466   r = 0;
6467   is_single = ONIGENC_IS_SINGLEBYTE(enc);
6468   limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;
6469 
6470   switch (ctype) {
6471   case ONIGENC_CTYPE_ALPHA:
6472   case ONIGENC_CTYPE_BLANK:
6473   case ONIGENC_CTYPE_CNTRL:
6474   case ONIGENC_CTYPE_DIGIT:
6475   case ONIGENC_CTYPE_LOWER:
6476   case ONIGENC_CTYPE_PUNCT:
6477   case ONIGENC_CTYPE_SPACE:
6478   case ONIGENC_CTYPE_UPPER:
6479   case ONIGENC_CTYPE_XDIGIT:
6480   case ONIGENC_CTYPE_ASCII:
6481   case ONIGENC_CTYPE_ALNUM:
6482     if (not != 0) {
6483       for (c = 0; c < (int )limit; c++) {
6484         if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
6485           if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6486             BITSET_SET_BIT(cc->bs, c);
6487         }
6488       }
6489       for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
6490         if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6491           BITSET_SET_BIT(cc->bs, c);
6492       }
6493 
6494       if (is_single == 0)
6495         ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
6496     }
6497     else {
6498       for (c = 0; c < (int )limit; c++) {
6499         if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
6500           if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6501             BITSET_SET_BIT(cc->bs, c);
6502         }
6503       }
6504     }
6505     break;
6506 
6507   case ONIGENC_CTYPE_GRAPH:
6508   case ONIGENC_CTYPE_PRINT:
6509   case ONIGENC_CTYPE_WORD:
6510     if (not != 0) {
6511       for (c = 0; c < (int )limit; c++) {
6512         /* check invalid code point */
6513         if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6514             && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6515           BITSET_SET_BIT(cc->bs, c);
6516       }
6517       for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
6518         if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6519           BITSET_SET_BIT(cc->bs, c);
6520       }
6521       if (ascii_mode != 0 && is_single == 0)
6522         ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
6523     }
6524     else {
6525       for (c = 0; c < (int )limit; c++) {
6526         if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6527             && ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6528           BITSET_SET_BIT(cc->bs, c);
6529       }
6530       if (ascii_mode == 0 && is_single == 0)
6531         ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
6532     }
6533     break;
6534 
6535   default:
6536     return ONIGERR_PARSER_BUG;
6537     break;
6538   }
6539 
6540   return r;
6541 }
6542 
6543 static int
prs_posix_bracket(CClassNode * cc,UChar ** src,UChar * end,ParseEnv * env)6544 prs_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ParseEnv* env)
6545 {
6546 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH  20
6547 #define POSIX_BRACKET_NAME_MIN_LEN         4
6548 
6549   static PosixBracketEntryType PBS[] = {
6550     { (UChar* )"alnum",  ONIGENC_CTYPE_ALNUM,  5 },
6551     { (UChar* )"alpha",  ONIGENC_CTYPE_ALPHA,  5 },
6552     { (UChar* )"blank",  ONIGENC_CTYPE_BLANK,  5 },
6553     { (UChar* )"cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
6554     { (UChar* )"digit",  ONIGENC_CTYPE_DIGIT,  5 },
6555     { (UChar* )"graph",  ONIGENC_CTYPE_GRAPH,  5 },
6556     { (UChar* )"lower",  ONIGENC_CTYPE_LOWER,  5 },
6557     { (UChar* )"print",  ONIGENC_CTYPE_PRINT,  5 },
6558     { (UChar* )"punct",  ONIGENC_CTYPE_PUNCT,  5 },
6559     { (UChar* )"space",  ONIGENC_CTYPE_SPACE,  5 },
6560     { (UChar* )"upper",  ONIGENC_CTYPE_UPPER,  5 },
6561     { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
6562     { (UChar* )"ascii",  ONIGENC_CTYPE_ASCII,  5 },
6563     { (UChar* )"word",   ONIGENC_CTYPE_WORD,   4 },
6564     { (UChar* )NULL,     -1, 0 }
6565   };
6566 
6567   PosixBracketEntryType *pb;
6568   int not, i, r;
6569   OnigCodePoint c;
6570   OnigEncoding enc = env->enc;
6571   UChar *p = *src;
6572 
6573   if (PPEEK_IS('^')) {
6574     PINC_S;
6575     not = 1;
6576   }
6577   else
6578     not = 0;
6579 
6580   if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
6581     goto not_posix_bracket;
6582 
6583   for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
6584     if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
6585       p = (UChar* )onigenc_step(enc, p, end, pb->len);
6586       if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
6587         return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
6588 
6589       r = add_ctype_to_cc(cc, pb->ctype, not, env);
6590       if (r != 0) return r;
6591 
6592       PINC_S; PINC_S;
6593       *src = p;
6594       return 0;
6595     }
6596   }
6597 
6598  not_posix_bracket:
6599   c = 0;
6600   i = 0;
6601   while (!PEND && ((c = PPEEK) != ':') && c != ']') {
6602     PINC_S;
6603     if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
6604   }
6605   if (c == ':' && ! PEND) {
6606     PINC_S;
6607     if (! PEND) {
6608       PFETCH_S(c);
6609       if (c == ']')
6610         return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
6611     }
6612   }
6613 
6614   return 1;  /* 1: is not POSIX bracket, but no error. */
6615 }
6616 
6617 static int
fetch_char_property_to_ctype(UChar ** src,UChar * end,ParseEnv * env)6618 fetch_char_property_to_ctype(UChar** src, UChar* end, ParseEnv* env)
6619 {
6620   int r;
6621   OnigCodePoint c;
6622   OnigEncoding enc;
6623   UChar *prev, *start, *p;
6624 
6625   p = *src;
6626   enc = env->enc;
6627   r = ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
6628   start = prev = p;
6629 
6630   while (!PEND) {
6631     prev = p;
6632     PFETCH_S(c);
6633     if (c == '}') {
6634       r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
6635       if (r >= 0) {
6636         *src = p;
6637       }
6638       else {
6639         onig_scan_env_set_error_string(env, r, *src, prev);
6640       }
6641 
6642       return r;
6643     }
6644     else if (c == '(' || c == ')' || c == '{' || c == '|') {
6645       break;
6646     }
6647   }
6648 
6649   return r;
6650 }
6651 
6652 static int
prs_char_property(Node ** np,PToken * tok,UChar ** src,UChar * end,ParseEnv * env)6653 prs_char_property(Node** np, PToken* tok, UChar** src, UChar* end,
6654                   ParseEnv* env)
6655 {
6656   int r, ctype;
6657   CClassNode* cc;
6658 
6659   ctype = fetch_char_property_to_ctype(src, end, env);
6660   if (ctype < 0) return ctype;
6661 
6662   *np = node_new_cclass();
6663   CHECK_NULL_RETURN_MEMERR(*np);
6664   cc = CCLASS_(*np);
6665   r = add_ctype_to_cc(cc, ctype, FALSE, env);
6666   if (r != 0) return r;
6667   if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
6668 
6669   return 0;
6670 }
6671 
6672 
6673 static int
cc_cprop_next(CClassNode * cc,OnigCodePoint * pcode,CVAL * val,CSTATE * state,ParseEnv * env)6674 cc_cprop_next(CClassNode* cc, OnigCodePoint* pcode, CVAL* val, CSTATE* state,
6675               ParseEnv* env)
6676 {
6677   int r;
6678 
6679   if (*state == CS_RANGE)
6680     return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
6681 
6682   if (*state == CS_VALUE) {
6683     if (*val == CV_SB)
6684       BITSET_SET_BIT(cc->bs, (int )(*pcode));
6685     else if (*val == CV_MB) {
6686       r = add_code_range(&(cc->mbuf), env, *pcode, *pcode);
6687       if (r < 0) return r;
6688     }
6689   }
6690 
6691   *state = CS_VALUE;
6692   *val   = CV_CPROP;
6693   return 0;
6694 }
6695 
6696 static int
cc_char_next(CClassNode * cc,OnigCodePoint * from,OnigCodePoint to,int * from_raw,int to_raw,CVAL intype,CVAL * type,CSTATE * state,ParseEnv * env)6697 cc_char_next(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,
6698              int* from_raw, int to_raw, CVAL intype, CVAL* type,
6699              CSTATE* state, ParseEnv* env)
6700 {
6701   int r;
6702 
6703   switch (*state) {
6704   case CS_VALUE:
6705     if (*type == CV_SB) {
6706       if (*from > 0xff)
6707           return ONIGERR_INVALID_CODE_POINT_VALUE;
6708 
6709       BITSET_SET_BIT(cc->bs, (int )(*from));
6710     }
6711     else if (*type == CV_MB) {
6712       r = add_code_range(&(cc->mbuf), env, *from, *from);
6713       if (r < 0) return r;
6714     }
6715     break;
6716 
6717   case CS_RANGE:
6718     if (intype == *type) {
6719       if (intype == CV_SB) {
6720         if (*from > 0xff || to > 0xff)
6721           return ONIGERR_INVALID_CODE_POINT_VALUE;
6722 
6723         if (*from > to) {
6724           if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
6725             goto ccs_range_end;
6726           else
6727             return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
6728         }
6729         bitset_set_range(cc->bs, (int )*from, (int )to);
6730       }
6731       else {
6732         r = add_code_range(&(cc->mbuf), env, *from, to);
6733         if (r < 0) return r;
6734       }
6735     }
6736     else {
6737       if (*from > to) {
6738         if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
6739           goto ccs_range_end;
6740         else
6741           return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
6742       }
6743       bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
6744       r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);
6745       if (r < 0) return r;
6746     }
6747   ccs_range_end:
6748     *state = CS_COMPLETE;
6749     break;
6750 
6751   case CS_COMPLETE:
6752   case CS_START:
6753     *state = CS_VALUE;
6754     break;
6755 
6756   default:
6757     break;
6758   }
6759 
6760   *from_raw = to_raw;
6761   *from     = to;
6762   *type     = intype;
6763   return 0;
6764 }
6765 
6766 static int
code_exist_check(OnigCodePoint c,UChar * from,UChar * end,int ignore_escaped,ParseEnv * env)6767 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
6768                  ParseEnv* env)
6769 {
6770   int in_esc;
6771   OnigCodePoint code;
6772   OnigEncoding enc = env->enc;
6773   UChar* p = from;
6774 
6775   in_esc = 0;
6776   while (! PEND) {
6777     if (ignore_escaped && in_esc) {
6778       in_esc = 0;
6779     }
6780     else {
6781       PFETCH_S(code);
6782       if (code == c) return 1;
6783       if (code == MC_ESC(env->syntax)) in_esc = 1;
6784     }
6785   }
6786   return 0;
6787 }
6788 
6789 static int
prs_cc(Node ** np,PToken * tok,UChar ** src,UChar * end,ParseEnv * env)6790 prs_cc(Node** np, PToken* tok, UChar** src, UChar* end, ParseEnv* env)
6791 {
6792   int r, neg, len, fetched, and_start;
6793   OnigCodePoint in_code, curr_code;
6794   UChar *p;
6795   Node* node;
6796   CClassNode *cc, *prev_cc;
6797   CClassNode work_cc;
6798   int curr_raw, in_raw;
6799   CSTATE state;
6800   CVAL in_type;
6801   CVAL curr_type;
6802 
6803   *np = NULL_NODE;
6804   INC_PARSE_DEPTH(env->parse_depth);
6805 
6806   state = CS_START;
6807   prev_cc = (CClassNode* )NULL;
6808   r = fetch_token_cc(tok, src, end, env, state);
6809   if (r == TK_CHAR && tok->u.code == (OnigCodePoint )'^' && tok->escaped == 0) {
6810     neg = 1;
6811     r = fetch_token_cc(tok, src, end, env, state);
6812   }
6813   else {
6814     neg = 0;
6815   }
6816 
6817   if (r < 0) return r;
6818   if (r == TK_CC_CLOSE) {
6819     if (! code_exist_check((OnigCodePoint )']',
6820                            *src, env->pattern_end, 1, env))
6821       return ONIGERR_EMPTY_CHAR_CLASS;
6822 
6823     CC_ESC_WARN(env, (UChar* )"]");
6824     r = tok->type = TK_CHAR;  /* allow []...] */
6825   }
6826 
6827   *np = node = node_new_cclass();
6828   CHECK_NULL_RETURN_MEMERR(node);
6829   cc = CCLASS_(node);
6830 
6831   and_start = 0;
6832   curr_type = CV_UNDEF;
6833 
6834   p = *src;
6835   while (r != TK_CC_CLOSE) {
6836     fetched = 0;
6837     switch (r) {
6838     case TK_CHAR:
6839     any_char_in:
6840       len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.code);
6841       if (len < 0) {
6842         r = len;
6843         goto err;
6844       }
6845       in_type = (len == 1) ? CV_SB : CV_MB;
6846       in_code = tok->u.code;
6847       in_raw = 0;
6848       goto val_entry2;
6849       break;
6850 
6851     case TK_CRUDE_BYTE:
6852       /* tok->base_num != 0 : octal or hexadec. */
6853       if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base_num != 0) {
6854         int i, j;
6855         UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
6856         UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
6857         UChar* psave = p;
6858         int base_num = tok->base_num;
6859 
6860         buf[0] = tok->u.byte;
6861         for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
6862           r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE);
6863           if (r < 0) goto err;
6864           if (r != TK_CRUDE_BYTE || tok->base_num != base_num) {
6865             fetched = 1;
6866             break;
6867           }
6868           buf[i] = tok->u.byte;
6869         }
6870 
6871         if (i < ONIGENC_MBC_MINLEN(env->enc)) {
6872           r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
6873           goto err;
6874         }
6875 
6876         /* clear buf tail */
6877         for (j = i; j < ONIGENC_CODE_TO_MBC_MAXLEN; j++) buf[j] = '\0';
6878 
6879         len = enclen(env->enc, buf);
6880         if (i < len) {
6881           r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
6882           goto err;
6883         }
6884         else if (i > len) { /* fetch back */
6885           p = psave;
6886           for (i = 1; i < len; i++) {
6887             r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE);
6888             if (r < 0) goto err;
6889           }
6890           fetched = 0;
6891         }
6892 
6893         if (i == 1) {
6894           in_code = (OnigCodePoint )buf[0];
6895           goto crude_single;
6896         }
6897         else {
6898           in_code = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
6899           in_type = CV_MB;
6900         }
6901       }
6902       else {
6903         in_code = (OnigCodePoint )tok->u.byte;
6904       crude_single:
6905         in_type = CV_SB;
6906       }
6907       in_raw = 1;
6908       goto val_entry2;
6909       break;
6910 
6911     case TK_CODE_POINT:
6912       in_code = tok->u.code;
6913       in_raw  = 1;
6914     val_entry:
6915       len = ONIGENC_CODE_TO_MBCLEN(env->enc, in_code);
6916       if (len < 0) {
6917         if (state != CS_RANGE ||
6918             ! IS_SYNTAX_BV(env->syntax,
6919                            ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC) ||
6920             in_code < 0x100 || ONIGENC_MBC_MAXLEN(env->enc) == 1) {
6921           r = len;
6922           goto err;
6923         }
6924       }
6925       in_type = (len == 1 ? CV_SB : CV_MB);
6926     val_entry2:
6927       r = cc_char_next(cc, &curr_code, in_code, &curr_raw, in_raw, in_type,
6928                        &curr_type, &state, env);
6929       if (r != 0) goto err;
6930       break;
6931 
6932     case TK_CC_POSIX_BRACKET_OPEN:
6933       r = prs_posix_bracket(cc, &p, end, env);
6934       if (r < 0) goto err;
6935       if (r == 1) {  /* is not POSIX bracket */
6936         CC_ESC_WARN(env, (UChar* )"[");
6937         p = tok->backp;
6938         in_code = tok->u.code;
6939         in_raw = 0;
6940         goto val_entry;
6941       }
6942       goto next_cprop;
6943       break;
6944 
6945     case TK_CHAR_TYPE:
6946       r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
6947       if (r != 0) goto err;
6948 
6949     next_cprop:
6950       r = cc_cprop_next(cc, &curr_code, &curr_type, &state, env);
6951       if (r != 0) goto err;
6952       break;
6953 
6954     case TK_CHAR_PROPERTY:
6955       {
6956         int ctype = fetch_char_property_to_ctype(&p, end, env);
6957         if (ctype < 0) {
6958           r = ctype;
6959           goto err;
6960         }
6961         r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
6962         if (r != 0) goto err;
6963         goto next_cprop;
6964       }
6965       break;
6966 
6967     case TK_CC_RANGE:
6968       if (state == CS_VALUE) {
6969         r = fetch_token_cc(tok, &p, end, env, CS_RANGE);
6970         if (r < 0) goto err;
6971 
6972         fetched = 1;
6973         if (r == TK_CC_CLOSE) { /* allow [x-] */
6974         range_end_val:
6975           in_code = (OnigCodePoint )'-';
6976           in_raw = 0;
6977           goto val_entry;
6978         }
6979         else if (r == TK_CC_AND) {
6980           CC_ESC_WARN(env, (UChar* )"-");
6981           goto range_end_val;
6982         }
6983 
6984         if (curr_type == CV_CPROP) {
6985           r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
6986           goto err;
6987         }
6988 
6989         state = CS_RANGE;
6990       }
6991       else if (state == CS_START) {
6992         /* [-xa] is allowed */
6993         in_code = tok->u.code;
6994         in_raw = 0;
6995 
6996         r = fetch_token_cc(tok, &p, end, env, CS_VALUE);
6997         if (r < 0) goto err;
6998 
6999         fetched = 1;
7000         /* [--x] or [a&&-x] is warned. */
7001         if (r == TK_CC_RANGE || and_start != 0)
7002           CC_ESC_WARN(env, (UChar* )"-");
7003 
7004         goto val_entry;
7005       }
7006       else if (state == CS_RANGE) {
7007         CC_ESC_WARN(env, (UChar* )"-");
7008         goto any_char_in;  /* [!--] is allowed */
7009       }
7010       else { /* CS_COMPLETE */
7011         r = fetch_token_cc(tok, &p, end, env, CS_VALUE);
7012         if (r < 0) goto err;
7013 
7014         fetched = 1;
7015         if (r == TK_CC_CLOSE)
7016           goto range_end_val; /* allow [a-b-] */
7017         else if (r == TK_CC_AND) {
7018           CC_ESC_WARN(env, (UChar* )"-");
7019           goto range_end_val;
7020         }
7021 
7022         if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
7023           CC_ESC_WARN(env, (UChar* )"-");
7024           goto range_end_val;   /* [0-9-a] is allowed as [0-9\-a] */
7025         }
7026         r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
7027         goto err;
7028       }
7029       break;
7030 
7031     case TK_CC_OPEN_CC: /* [ */
7032       {
7033         Node *anode;
7034         CClassNode* acc;
7035 
7036         if (state == CS_VALUE) {
7037           r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
7038                            &state, env);
7039           if (r != 0) goto err;
7040         }
7041         state = CS_COMPLETE;
7042 
7043         r = prs_cc(&anode, tok, &p, end, env);
7044         if (r != 0) {
7045           onig_node_free(anode);
7046           goto cc_open_err;
7047         }
7048         acc = CCLASS_(anode);
7049         r = or_cclass(cc, acc, env->enc);
7050         onig_node_free(anode);
7051 
7052       cc_open_err:
7053         if (r != 0) goto err;
7054       }
7055       break;
7056 
7057     case TK_CC_AND: /* && */
7058       {
7059         if (state == CS_VALUE) {
7060           r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
7061                            &state, env);
7062           if (r != 0) goto err;
7063         }
7064         /* initialize local variables */
7065         and_start = 1;
7066         state = CS_START;
7067 
7068         if (IS_NOT_NULL(prev_cc)) {
7069           r = and_cclass(prev_cc, cc, env->enc);
7070           if (r != 0) goto err;
7071           bbuf_free(cc->mbuf);
7072         }
7073         else {
7074           prev_cc = cc;
7075           cc = &work_cc;
7076         }
7077         initialize_cclass(cc);
7078       }
7079       break;
7080 
7081     case TK_EOT:
7082       r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
7083       goto err;
7084       break;
7085     default:
7086       r = ONIGERR_PARSER_BUG;
7087       goto err;
7088       break;
7089     }
7090 
7091     if (fetched)
7092       r = tok->type;
7093     else {
7094       r = fetch_token_cc(tok, &p, end, env, state);
7095       if (r < 0) goto err;
7096     }
7097   }
7098 
7099   if (state == CS_VALUE) {
7100     r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
7101                      &state, env);
7102     if (r != 0) goto err;
7103   }
7104 
7105   if (IS_NOT_NULL(prev_cc)) {
7106     r = and_cclass(prev_cc, cc, env->enc);
7107     if (r != 0) goto err;
7108     bbuf_free(cc->mbuf);
7109     cc = prev_cc;
7110   }
7111 
7112   if (neg != 0)
7113     NCCLASS_SET_NOT(cc);
7114   else
7115     NCCLASS_CLEAR_NOT(cc);
7116   if (IS_NCCLASS_NOT(cc) &&
7117       IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
7118     int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
7119     if (is_empty != 0)
7120       BITSET_IS_EMPTY(cc->bs, is_empty);
7121 
7122     if (is_empty == 0) {
7123       if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
7124         if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
7125           BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
7126         else
7127           add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
7128       }
7129     }
7130   }
7131   *src = p;
7132   DEC_PARSE_DEPTH(env->parse_depth);
7133   return 0;
7134 
7135  err:
7136   if (cc != CCLASS_(*np))
7137     bbuf_free(cc->mbuf);
7138   return r;
7139 }
7140 
7141 static int prs_alts(Node** top, PToken* tok, int term,
7142                     UChar** src, UChar* end, ParseEnv* env, int group_head);
7143 
7144 #ifdef USE_CALLOUT
7145 
7146 /* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */
7147 static int
prs_callout_of_contents(Node ** np,int cterm,UChar ** src,UChar * end,ParseEnv * env)7148 prs_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end,
7149                         ParseEnv* env)
7150 {
7151   int r;
7152   int i;
7153   int in;
7154   int num;
7155   OnigCodePoint c;
7156   UChar* code_start;
7157   UChar* code_end;
7158   UChar* contents;
7159   UChar* tag_start;
7160   UChar* tag_end;
7161   int brace_nest;
7162   CalloutListEntry* e;
7163   RegexExt* ext;
7164   OnigEncoding enc = env->enc;
7165   UChar* p = *src;
7166 
7167   if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7168 
7169   brace_nest = 0;
7170   while (PPEEK_IS('{')) {
7171     brace_nest++;
7172     PINC_S;
7173     if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7174   }
7175 
7176   in = ONIG_CALLOUT_IN_PROGRESS;
7177   code_start = p;
7178   while (1) {
7179     if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7180 
7181     code_end = p;
7182     PFETCH_S(c);
7183     if (c == '}') {
7184       i = brace_nest;
7185       while (i > 0) {
7186         if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7187         PFETCH_S(c);
7188         if (c == '}') i--;
7189         else break;
7190       }
7191       if (i == 0) break;
7192     }
7193   }
7194 
7195   if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7196 
7197   PFETCH_S(c);
7198   if (c == '[') {
7199     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7200     tag_end = tag_start = p;
7201     while (! PEND) {
7202       if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7203       tag_end = p;
7204       PFETCH_S(c);
7205       if (c == ']') break;
7206     }
7207     if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
7208       return ONIGERR_INVALID_CALLOUT_TAG_NAME;
7209 
7210     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7211     PFETCH_S(c);
7212   }
7213   else {
7214     tag_start = tag_end = 0;
7215   }
7216 
7217   if (c == 'X') {
7218     in |= ONIG_CALLOUT_IN_RETRACTION;
7219     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7220     PFETCH_S(c);
7221   }
7222   else if (c == '<') {
7223     in = ONIG_CALLOUT_IN_RETRACTION;
7224     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7225     PFETCH_S(c);
7226   }
7227   else if (c == '>') { /* no needs (default) */
7228     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7229     PFETCH_S(c);
7230   }
7231 
7232   if (c != cterm)
7233     return ONIGERR_INVALID_CALLOUT_PATTERN;
7234 
7235   r = reg_callout_list_entry(env, &num);
7236   if (r != 0) return r;
7237 
7238   ext = onig_get_regex_ext(env->reg);
7239   CHECK_NULL_RETURN_MEMERR(ext);
7240   if (IS_NULL(ext->pattern)) {
7241     r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
7242     if (r != ONIG_NORMAL) return r;
7243   }
7244 
7245   if (tag_start != tag_end) {
7246     r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
7247     if (r != ONIG_NORMAL) return r;
7248   }
7249 
7250   contents = onigenc_strdup(enc, code_start, code_end);
7251   CHECK_NULL_RETURN_MEMERR(contents);
7252 
7253   e = onig_reg_callout_list_at(env->reg, num);
7254   if (IS_NULL(e)) {
7255     xfree(contents);
7256     return ONIGERR_MEMORY;
7257   }
7258 
7259   r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);
7260   if (r != 0) {
7261     xfree(contents);
7262     return r;
7263   }
7264 
7265   e->of      = ONIG_CALLOUT_OF_CONTENTS;
7266   e->in      = in;
7267   e->name_id = ONIG_NON_NAME_ID;
7268   e->u.content.start = contents;
7269   e->u.content.end   = contents + (code_end - code_start);
7270 
7271   *src = p;
7272   return 0;
7273 }
7274 
7275 static long
prs_long(OnigEncoding enc,UChar * s,UChar * end,int sign_on,long max,long * rl)7276 prs_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)
7277 {
7278   long v;
7279   long d;
7280   int flag;
7281   UChar* p;
7282   OnigCodePoint c;
7283 
7284   if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;
7285 
7286   flag = 1;
7287   v = 0;
7288   p = s;
7289   while (p < end) {
7290     c = ONIGENC_MBC_TO_CODE(enc, p, end);
7291     p += ONIGENC_MBC_ENC_LEN(enc, p);
7292     if (c >= '0' && c <= '9') {
7293       d = (long )(c - '0');
7294       if (v > (max - d) / 10)
7295         return ONIGERR_INVALID_CALLOUT_ARG;
7296 
7297       v = v * 10 + d;
7298     }
7299     else if (sign_on != 0 && (c == '-' || c == '+')) {
7300       if (c == '-') flag = -1;
7301     }
7302     else
7303       return ONIGERR_INVALID_CALLOUT_ARG;
7304 
7305     sign_on = 0;
7306   }
7307 
7308   *rl = flag * v;
7309   return ONIG_NORMAL;
7310 }
7311 
7312 static void
clear_callout_args(int n,unsigned int types[],OnigValue vals[])7313 clear_callout_args(int n, unsigned int types[], OnigValue vals[])
7314 {
7315   int i;
7316 
7317   for (i = 0; i < n; i++) {
7318     switch (types[i]) {
7319     case ONIG_TYPE_STRING:
7320       if (IS_NOT_NULL(vals[i].s.start))
7321         xfree(vals[i].s.start);
7322       break;
7323     default:
7324       break;
7325     }
7326   }
7327 }
7328 
7329 static int
prs_callout_args(int skip_mode,int cterm,UChar ** src,UChar * end,int max_arg_num,unsigned int types[],OnigValue vals[],ParseEnv * env)7330 prs_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
7331                  int max_arg_num, unsigned int types[], OnigValue vals[],
7332                  ParseEnv* env)
7333 {
7334 #define MAX_CALLOUT_ARG_BYTE_LENGTH   128
7335 
7336   int r;
7337   int n;
7338   int esc;
7339   int cn;
7340   UChar* s;
7341   UChar* e;
7342   UChar* eesc;
7343   OnigCodePoint c;
7344   UChar* bufend;
7345   UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];
7346   OnigEncoding enc = env->enc;
7347   UChar* p = *src;
7348 
7349   if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7350 
7351   c = 0;
7352   n = 0;
7353   while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {
7354     cn  = 0;
7355     esc = 0;
7356     eesc = 0;
7357     bufend = buf;
7358     s = e = p;
7359     while (1) {
7360       if (PEND) {
7361         r = ONIGERR_INVALID_CALLOUT_PATTERN;
7362         goto err_clear;
7363       }
7364 
7365       e = p;
7366       PFETCH_S(c);
7367       if (esc != 0) {
7368         esc = 0;
7369         if (c == '\\' || c == cterm || c == ',') {
7370           /* */
7371         }
7372         else {
7373           e = eesc;
7374           cn++;
7375         }
7376         goto add_char;
7377       }
7378       else {
7379         if (c == '\\') {
7380           esc = 1;
7381           eesc = e;
7382         }
7383         else if (c == cterm || c == ',')
7384           break;
7385         else {
7386           size_t clen;
7387 
7388         add_char:
7389           if (skip_mode == FALSE) {
7390             clen = p - e;
7391             if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH) {
7392               r = ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */
7393               goto err_clear;
7394             }
7395 
7396             xmemcpy(bufend, e, clen);
7397             bufend += clen;
7398           }
7399           cn++;
7400         }
7401       }
7402     }
7403 
7404     if (cn != 0) {
7405       if (max_arg_num >= 0 && n >= max_arg_num) {
7406         r = ONIGERR_INVALID_CALLOUT_ARG;
7407         goto err_clear;
7408       }
7409 
7410       if (skip_mode == FALSE) {
7411         if ((types[n] & ONIG_TYPE_LONG) != 0) {
7412           int fixed = 0;
7413           if (cn > 0) {
7414             long rl;
7415             r = prs_long(enc, buf, bufend, 1, LONG_MAX, &rl);
7416             if (r == ONIG_NORMAL) {
7417               vals[n].l = rl;
7418               fixed = 1;
7419               types[n] = ONIG_TYPE_LONG;
7420             }
7421           }
7422 
7423           if (fixed == 0) {
7424             types[n] = (types[n] & ~ONIG_TYPE_LONG);
7425             if (types[n] == ONIG_TYPE_VOID) {
7426               r = ONIGERR_INVALID_CALLOUT_ARG;
7427               goto err_clear;
7428             }
7429           }
7430         }
7431 
7432         switch (types[n]) {
7433         case ONIG_TYPE_LONG:
7434           break;
7435 
7436         case ONIG_TYPE_CHAR:
7437           if (cn != 1) {
7438             r = ONIGERR_INVALID_CALLOUT_ARG;
7439             goto err_clear;
7440           }
7441           vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);
7442           break;
7443 
7444         case ONIG_TYPE_STRING:
7445           {
7446             UChar* rs = onigenc_strdup(enc, buf, bufend);
7447             if (IS_NULL(rs)) {
7448               r = ONIGERR_MEMORY; goto err_clear;
7449             }
7450             vals[n].s.start = rs;
7451             vals[n].s.end   = rs + (e - s);
7452           }
7453           break;
7454 
7455         case ONIG_TYPE_TAG:
7456           if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e)) {
7457             r = ONIGERR_INVALID_CALLOUT_TAG_NAME;
7458             goto err_clear;
7459           }
7460 
7461           vals[n].s.start = s;
7462           vals[n].s.end   = e;
7463           break;
7464 
7465         case ONIG_TYPE_VOID:
7466         case ONIG_TYPE_POINTER:
7467           r = ONIGERR_PARSER_BUG;
7468           goto err_clear;
7469           break;
7470         }
7471       }
7472 
7473       n++;
7474     }
7475 
7476     if (c == cterm) break;
7477   }
7478 
7479   if (c != cterm) {
7480     r = ONIGERR_INVALID_CALLOUT_PATTERN;
7481     goto err_clear;
7482   }
7483 
7484   *src = p;
7485   return n;
7486 
7487  err_clear:
7488   if (skip_mode == FALSE)
7489     clear_callout_args(n, types, vals);
7490   return r;
7491 }
7492 
7493 /* (*name[TAG]) (*name[TAG]{a,b,..}) */
7494 static int
prs_callout_of_name(Node ** np,int cterm,UChar ** src,UChar * end,ParseEnv * env)7495 prs_callout_of_name(Node** np, int cterm, UChar** src, UChar* end,
7496                     ParseEnv* env)
7497 {
7498   int r;
7499   int i;
7500   int in;
7501   int num;
7502   int name_id;
7503   int arg_num;
7504   int max_arg_num;
7505   int opt_arg_num;
7506   int is_not_single;
7507   OnigCodePoint c;
7508   UChar* name_start;
7509   UChar* name_end;
7510   UChar* tag_start;
7511   UChar* tag_end;
7512   Node*  node;
7513   CalloutListEntry* e;
7514   RegexExt* ext;
7515   unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];
7516   OnigValue    vals[ONIG_CALLOUT_MAX_ARGS_NUM];
7517   OnigEncoding enc = env->enc;
7518   UChar* p = *src;
7519 
7520   /* PFETCH_READY; */
7521   if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7522 
7523   node = 0;
7524   name_start = p;
7525   while (1) {
7526     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7527     name_end = p;
7528     PFETCH_S(c);
7529     if (c == cterm || c == '[' || c == '{') break;
7530   }
7531 
7532   if (! is_allowed_callout_name(enc, name_start, name_end))
7533     return ONIGERR_INVALID_CALLOUT_NAME;
7534 
7535   if (c == '[') {
7536     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7537     tag_end = tag_start = p;
7538     while (! PEND) {
7539       if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7540       tag_end = p;
7541       PFETCH_S(c);
7542       if (c == ']') break;
7543     }
7544     if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
7545       return ONIGERR_INVALID_CALLOUT_TAG_NAME;
7546 
7547     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7548     PFETCH_S(c);
7549   }
7550   else {
7551     tag_start = tag_end = 0;
7552   }
7553 
7554   if (c == '{') {
7555     UChar* save;
7556 
7557     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7558 
7559     /* read for single check only */
7560     save = p;
7561     arg_num = prs_callout_args(TRUE, '}', &p, end, -1, NULL, NULL, env);
7562     if (arg_num < 0) return arg_num;
7563 
7564     is_not_single = PPEEK_IS(cterm) ?  0 : 1;
7565     p = save;
7566     r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
7567                                     &name_id);
7568     if (r != ONIG_NORMAL) return r;
7569 
7570     max_arg_num = get_callout_arg_num_by_name_id(name_id);
7571     for (i = 0; i < max_arg_num; i++) {
7572       types[i] = get_callout_arg_type_by_name_id(name_id, i);
7573     }
7574 
7575     arg_num = prs_callout_args(FALSE, '}', &p, end, max_arg_num, types, vals, env);
7576     if (arg_num < 0) return arg_num;
7577 
7578     if (PEND) {
7579       r = ONIGERR_END_PATTERN_IN_GROUP;
7580       goto err_clear;
7581     }
7582     PFETCH_S(c);
7583   }
7584   else {
7585     arg_num = 0;
7586 
7587     is_not_single = 0;
7588     r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
7589                                       &name_id);
7590     if (r != ONIG_NORMAL) return r;
7591 
7592     max_arg_num = get_callout_arg_num_by_name_id(name_id);
7593     for (i = 0; i < max_arg_num; i++) {
7594       types[i] = get_callout_arg_type_by_name_id(name_id, i);
7595     }
7596   }
7597 
7598   in = onig_get_callout_in_by_name_id(name_id);
7599   opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);
7600   if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num)) {
7601     r = ONIGERR_INVALID_CALLOUT_ARG;
7602     goto err_clear;
7603   }
7604 
7605   if (c != cterm) {
7606     r = ONIGERR_INVALID_CALLOUT_PATTERN;
7607     goto err_clear;
7608   }
7609 
7610   r = reg_callout_list_entry(env, &num);
7611   if (r != 0) goto err_clear;
7612 
7613   ext = onig_get_regex_ext(env->reg);
7614   if (IS_NULL(ext)) {
7615     r = ONIGERR_MEMORY; goto err_clear;
7616   }
7617   if (IS_NULL(ext->pattern)) {
7618     r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
7619     if (r != ONIG_NORMAL) goto err_clear;
7620   }
7621 
7622   if (tag_start != tag_end) {
7623     r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
7624     if (r != ONIG_NORMAL) goto err_clear;
7625   }
7626 
7627   e = onig_reg_callout_list_at(env->reg, num);
7628   if (IS_NULL(e)) {
7629     r = ONIGERR_MEMORY; goto err_clear;
7630   }
7631 
7632   r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);
7633   if (r != ONIG_NORMAL) goto err_clear;
7634 
7635   e->of         = ONIG_CALLOUT_OF_NAME;
7636   e->in         = in;
7637   e->name_id    = name_id;
7638   e->type       = onig_get_callout_type_by_name_id(name_id);
7639   e->start_func = onig_get_callout_start_func_by_name_id(name_id);
7640   e->end_func   = onig_get_callout_end_func_by_name_id(name_id);
7641   e->u.arg.num        = max_arg_num;
7642   e->u.arg.passed_num = arg_num;
7643   for (i = 0; i < max_arg_num; i++) {
7644     e->u.arg.types[i] = types[i];
7645     if (i < arg_num)
7646       e->u.arg.vals[i] = vals[i];
7647     else
7648       e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);
7649   }
7650 
7651   *np = node;
7652   *src = p;
7653   return 0;
7654 
7655  err_clear:
7656   clear_callout_args(arg_num, types, vals);
7657   return r;
7658 }
7659 #endif
7660 
7661 static int
prs_bag(Node ** np,PToken * tok,int term,UChar ** src,UChar * end,ParseEnv * env)7662 prs_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
7663         ParseEnv* env)
7664 {
7665   int r, num;
7666   Node *target;
7667   OnigOptionType option;
7668   OnigCodePoint c;
7669   int list_capture;
7670   OnigEncoding enc = env->enc;
7671 
7672   UChar* p = *src;
7673   PFETCH_READY;
7674 
7675   *np = NULL;
7676   if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
7677 
7678   option = env->options;
7679   c = PPEEK;
7680   if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
7681     PINC;
7682     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7683 
7684     PFETCH(c);
7685     switch (c) {
7686     case ':':   /* (?:...) grouping only */
7687     group:
7688       r = fetch_token(tok, &p, end, env);
7689       if (r < 0) return r;
7690       r = prs_alts(np, tok, term, &p, end, env, FALSE);
7691       if (r < 0) return r;
7692       *src = p;
7693       return 1; /* group */
7694       break;
7695 
7696     case '=':
7697       *np = node_new_anchor(ANCR_PREC_READ);
7698       break;
7699     case '!':  /*         preceding read */
7700       *np = node_new_anchor(ANCR_PREC_READ_NOT);
7701       break;
7702     case '>':            /* (?>...) stop backtrack */
7703       *np = node_new_bag(BAG_STOP_BACKTRACK);
7704       break;
7705 
7706     case '\'':
7707       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7708         goto named_group1;
7709       }
7710       else
7711         return ONIGERR_UNDEFINED_GROUP_OPTION;
7712       break;
7713 
7714     case '<':   /* look behind (?<=...), (?<!...) */
7715       if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
7716       PFETCH(c);
7717       if (c == '=')
7718         *np = node_new_anchor(ANCR_LOOK_BEHIND);
7719       else if (c == '!')
7720         *np = node_new_anchor(ANCR_LOOK_BEHIND_NOT);
7721       else {
7722         if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7723           UChar *name;
7724           UChar *name_end;
7725           enum REF_NUM num_type;
7726 
7727           PUNFETCH;
7728           c = '<';
7729 
7730         named_group1:
7731           list_capture = 0;
7732 
7733 #ifdef USE_CAPTURE_HISTORY
7734         named_group2:
7735 #endif
7736           name = p;
7737           r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,
7738                          &num_type, FALSE);
7739           if (r < 0) return r;
7740 
7741           num = scan_env_add_mem_entry(env);
7742           if (num < 0) return num;
7743           if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)
7744             return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
7745 
7746           r = name_add(env->reg, name, name_end, num, env);
7747           if (r != 0) return r;
7748           *np = node_new_memory(1);
7749           CHECK_NULL_RETURN_MEMERR(*np);
7750           BAG_(*np)->m.regnum = num;
7751           if (list_capture != 0)
7752             MEM_STATUS_ON_SIMPLE(env->cap_history, num);
7753           env->num_named++;
7754         }
7755         else {
7756           return ONIGERR_UNDEFINED_GROUP_OPTION;
7757         }
7758       }
7759       break;
7760 
7761     case '~':
7762       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {
7763         Node* absent;
7764         Node* expr;
7765         int head_bar;
7766         int is_range_cutter;
7767 
7768         if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7769 
7770         if (PPEEK_IS('|')) { /* (?~|generator|absent) */
7771           PINC;
7772           if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7773 
7774           head_bar = 1;
7775           if (PPEEK_IS(')')) { /* (?~|)  : range clear */
7776             PINC;
7777             r = make_range_clear(np, env);
7778             if (r != 0) return r;
7779             goto end;
7780           }
7781         }
7782         else
7783           head_bar = 0;
7784 
7785         r = fetch_token(tok, &p, end, env);
7786         if (r < 0) return r;
7787         r = prs_alts(&absent, tok, term, &p, end, env, TRUE);
7788         if (r < 0) {
7789           onig_node_free(absent);
7790           return r;
7791         }
7792 
7793         expr = NULL_NODE;
7794         is_range_cutter = 0;
7795         if (head_bar != 0) {
7796           Node* top = absent;
7797           if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) {
7798             expr = NULL_NODE;
7799             is_range_cutter = 1;
7800             /* return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; */
7801           }
7802           else {
7803             absent = NODE_CAR(top);
7804             expr   = NODE_CDR(top);
7805             NODE_CAR(top) = NULL_NODE;
7806             NODE_CDR(top) = NULL_NODE;
7807             onig_node_free(top);
7808             if (IS_NULL(NODE_CDR(expr))) {
7809               top = expr;
7810               expr = NODE_CAR(top);
7811               NODE_CAR(top) = NULL_NODE;
7812               onig_node_free(top);
7813             }
7814           }
7815         }
7816 
7817         r = make_absent_tree(np, absent, expr, is_range_cutter, env);
7818         if (r != 0) {
7819           return r;
7820         }
7821         goto end;
7822       }
7823       else {
7824         return ONIGERR_UNDEFINED_GROUP_OPTION;
7825       }
7826       break;
7827 
7828 #ifdef USE_CALLOUT
7829     case '{':
7830       if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))
7831         return ONIGERR_UNDEFINED_GROUP_OPTION;
7832 
7833       r = prs_callout_of_contents(np, ')', &p, end, env);
7834       if (r != 0) return r;
7835 
7836       goto end;
7837       break;
7838 #endif
7839 
7840     case '(':
7841       /* (?()...) */
7842       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {
7843         UChar *prev;
7844         Node* condition;
7845         int condition_is_checker;
7846 
7847         if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7848         PFETCH(c);
7849         if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7850 
7851         if (IS_CODE_DIGIT_ASCII(enc, c)
7852             || c == '-' || c == '+' || c == '<' || c == '\'') {
7853 #ifdef USE_BACKREF_WITH_LEVEL
7854           int exist_level;
7855           int level;
7856 #endif
7857           UChar* name_end;
7858           int back_num;
7859           enum REF_NUM num_type;
7860           int is_enclosed;
7861 
7862           is_enclosed = (c == '<' || c == '\'') ? 1 : 0;
7863           if (! is_enclosed)
7864             PUNFETCH;
7865           prev = p;
7866 #ifdef USE_BACKREF_WITH_LEVEL
7867           exist_level = 0;
7868           name_end = NULL_UCHARP; /* no need. escape gcc warning. */
7869           r = fetch_name_with_level(
7870                     (OnigCodePoint )(is_enclosed != 0 ? c : '('),
7871                     &p, end, &name_end,
7872                     env, &back_num, &level, &num_type);
7873           if (r == 1) exist_level = 1;
7874 #else
7875           r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),
7876                          &p, end, &name_end, env, &back_num, &num_type, TRUE);
7877 #endif
7878           if (r < 0) {
7879             if (is_enclosed == 0) {
7880               goto any_condition;
7881             }
7882             else
7883               return r;
7884           }
7885 
7886           condition_is_checker = 1;
7887           if (num_type != IS_NOT_NUM) {
7888             if (num_type == IS_REL_NUM) {
7889               back_num = backref_rel_to_abs(back_num, env);
7890             }
7891             if (back_num <= 0)
7892               return ONIGERR_INVALID_BACKREF;
7893 
7894             if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
7895               if (back_num > env->num_mem ||
7896                   IS_NULL(PARSEENV_MEMENV(env)[back_num].mem_node))
7897                 return ONIGERR_INVALID_BACKREF;
7898             }
7899 
7900             condition = node_new_backref_checker(1, &back_num, FALSE,
7901 #ifdef USE_BACKREF_WITH_LEVEL
7902                                                  exist_level, level,
7903 #endif
7904                                                  env);
7905           }
7906           else {
7907             int num;
7908             int* backs;
7909 
7910             num = name_to_group_numbers(env, prev, name_end, &backs);
7911             if (num <= 0) {
7912               return ONIGERR_UNDEFINED_NAME_REFERENCE;
7913             }
7914             if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
7915               int i;
7916               for (i = 0; i < num; i++) {
7917                 if (backs[i] > env->num_mem ||
7918                     IS_NULL(PARSEENV_MEMENV(env)[backs[i]].mem_node))
7919                   return ONIGERR_INVALID_BACKREF;
7920               }
7921             }
7922 
7923             condition = node_new_backref_checker(num, backs, TRUE,
7924 #ifdef USE_BACKREF_WITH_LEVEL
7925                                                  exist_level, level,
7926 #endif
7927                                                  env);
7928           }
7929 
7930           if (is_enclosed != 0) {
7931             if (PEND) goto err_if_else;
7932             PFETCH(c);
7933             if (c != ')') goto err_if_else;
7934           }
7935         }
7936 #ifdef USE_CALLOUT
7937         else if (c == '?') {
7938           if (IS_SYNTAX_OP2(env->syntax,
7939                             ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {
7940             if (! PEND && PPEEK_IS('{')) {
7941               /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */
7942               condition_is_checker = 0;
7943               PFETCH(c);
7944               r = prs_callout_of_contents(&condition, ')', &p, end, env);
7945               if (r != 0) return r;
7946               goto end_condition;
7947             }
7948           }
7949           goto any_condition;
7950         }
7951         else if (c == '*' &&
7952                  IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
7953           condition_is_checker = 0;
7954           r = prs_callout_of_name(&condition, ')', &p, end, env);
7955           if (r != 0) return r;
7956           goto end_condition;
7957         }
7958 #endif
7959         else {
7960         any_condition:
7961           PUNFETCH;
7962           condition_is_checker = 0;
7963           r = fetch_token(tok, &p, end, env);
7964           if (r < 0) return r;
7965           r = prs_alts(&condition, tok, term, &p, end, env, FALSE);
7966           if (r < 0) {
7967             onig_node_free(condition);
7968             return r;
7969           }
7970         }
7971 
7972 #ifdef USE_CALLOUT
7973       end_condition:
7974 #endif
7975         CHECK_NULL_RETURN_MEMERR(condition);
7976 
7977         if (PEND) {
7978         err_if_else:
7979           onig_node_free(condition);
7980           return ONIGERR_END_PATTERN_IN_GROUP;
7981         }
7982 
7983         if (PPEEK_IS(')')) { /* case: empty body: make backref checker */
7984           if (condition_is_checker == 0) {
7985             onig_node_free(condition);
7986             return ONIGERR_INVALID_IF_ELSE_SYNTAX;
7987           }
7988           PFETCH(c);
7989           *np = condition;
7990         }
7991         else { /* if-else */
7992           int then_is_empty;
7993           Node *Then, *Else;
7994 
7995           Then = 0;
7996           if (PPEEK_IS('|')) {
7997             PFETCH(c);
7998             then_is_empty = 1;
7999           }
8000           else
8001             then_is_empty = 0;
8002 
8003           r = fetch_token(tok, &p, end, env);
8004           if (r < 0) {
8005             onig_node_free(condition);
8006             return r;
8007           }
8008           r = prs_alts(&target, tok, term, &p, end, env, TRUE);
8009           if (r < 0) {
8010             onig_node_free(condition);
8011             onig_node_free(target);
8012             return r;
8013           }
8014 
8015           if (then_is_empty != 0) {
8016             Else = target;
8017           }
8018           else {
8019             if (NODE_TYPE(target) == NODE_ALT) {
8020               Then = NODE_CAR(target);
8021               if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) {
8022                 Else = NODE_CAR(NODE_CDR(target));
8023                 cons_node_free_alone(NODE_CDR(target));
8024               }
8025               else {
8026                 Else = NODE_CDR(target);
8027               }
8028               cons_node_free_alone(target);
8029             }
8030             else {
8031               Then = target;
8032               Else = 0;
8033             }
8034           }
8035 
8036           *np = node_new_bag_if_else(condition, Then, Else);
8037           if (IS_NULL(*np)) {
8038             onig_node_free(condition);
8039             onig_node_free(Then);
8040             onig_node_free(Else);
8041             return ONIGERR_MEMORY;
8042           }
8043         }
8044         goto end;
8045       }
8046       else {
8047         return ONIGERR_UNDEFINED_GROUP_OPTION;
8048       }
8049       break;
8050 
8051 #ifdef USE_CAPTURE_HISTORY
8052     case '@':
8053       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
8054         if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
8055           PFETCH(c);
8056           if (c == '<' || c == '\'') {
8057             list_capture = 1;
8058             goto named_group2; /* (?@<name>...) */
8059           }
8060           PUNFETCH;
8061         }
8062 
8063         *np = node_new_memory(0);
8064         CHECK_NULL_RETURN_MEMERR(*np);
8065         num = scan_env_add_mem_entry(env);
8066         if (num < 0) {
8067           return num;
8068         }
8069         else if (num >= (int )MEM_STATUS_BITS_NUM) {
8070           return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
8071         }
8072         BAG_(*np)->m.regnum = num;
8073         MEM_STATUS_ON_SIMPLE(env->cap_history, num);
8074       }
8075       else {
8076         return ONIGERR_UNDEFINED_GROUP_OPTION;
8077       }
8078       break;
8079 #endif
8080 
8081     case 'P':
8082       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAME)) {
8083         if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8084         PFETCH(c);
8085         if (c == '<') goto named_group1;
8086 
8087         return ONIGERR_UNDEFINED_GROUP_OPTION;
8088       }
8089       /* else fall */
8090     case 'W': case 'D': case 'S':
8091     case 'y':
8092       if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8093         return ONIGERR_UNDEFINED_GROUP_OPTION;
8094       /* else fall */
8095 
8096 #ifdef USE_POSIXLINE_OPTION
8097     case 'p':
8098 #endif
8099     case 'a':
8100     case '-': case 'i': case 'm': case 's': case 'x':
8101       {
8102         int neg = 0;
8103 
8104         while (1) {
8105           switch (c) {
8106           case ':':
8107           case ')':
8108             break;
8109 
8110           case '-':  neg = 1; break;
8111           case 'x':  OPTION_NEGATE(option, ONIG_OPTION_EXTEND,     neg); break;
8112           case 'i':  OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;
8113           case 's':
8114             if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
8115               OPTION_NEGATE(option, ONIG_OPTION_MULTILINE,  neg);
8116             }
8117             else
8118               return ONIGERR_UNDEFINED_GROUP_OPTION;
8119             break;
8120 
8121           case 'm':
8122             if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
8123               OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? TRUE : FALSE));
8124             }
8125             else if (IS_SYNTAX_OP2(env->syntax,
8126                         ONIG_SYN_OP2_OPTION_ONIGURUMA|ONIG_SYN_OP2_OPTION_RUBY)) {
8127               OPTION_NEGATE(option, ONIG_OPTION_MULTILINE,  neg);
8128             }
8129             else
8130               return ONIGERR_UNDEFINED_GROUP_OPTION;
8131             break;
8132 #ifdef USE_POSIXLINE_OPTION
8133           case 'p':
8134             OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
8135             break;
8136 #endif
8137           case 'W':
8138             if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8139               return ONIGERR_UNDEFINED_GROUP_OPTION;
8140             OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg);
8141             break;
8142           case 'D':
8143             if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8144               return ONIGERR_UNDEFINED_GROUP_OPTION;
8145             OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg);
8146             break;
8147           case 'S':
8148             if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8149               return ONIGERR_UNDEFINED_GROUP_OPTION;
8150             OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg);
8151             break;
8152           case 'P':
8153             if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8154               return ONIGERR_UNDEFINED_GROUP_OPTION;
8155             OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg);
8156             break;
8157 
8158           case 'y': /* y{g}, y{w} */
8159             {
8160               if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8161                 return ONIGERR_UNDEFINED_GROUP_OPTION;
8162 
8163               if (neg != 0) return ONIGERR_UNDEFINED_GROUP_OPTION;
8164 
8165               if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8166               if (! PPEEK_IS('{')) return ONIGERR_UNDEFINED_GROUP_OPTION;
8167               PFETCH(c);
8168               if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8169               PFETCH(c);
8170               switch (c) {
8171               case 'g':
8172                 if (! ONIGENC_IS_UNICODE_ENCODING(enc))
8173                   return ONIGERR_UNDEFINED_GROUP_OPTION;
8174 
8175                 OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, FALSE);
8176                 OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, TRUE);
8177                 break;
8178 #ifdef USE_UNICODE_WORD_BREAK
8179               case 'w':
8180                 if (! ONIGENC_IS_UNICODE_ENCODING(enc))
8181                   return ONIGERR_UNDEFINED_GROUP_OPTION;
8182 
8183                 OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, FALSE);
8184                 OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, TRUE);
8185                 break;
8186 #endif
8187               default:
8188                 return ONIGERR_UNDEFINED_GROUP_OPTION;
8189                 break;
8190               }
8191               if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8192               PFETCH(c);
8193               if (c != '}')
8194                 return ONIGERR_UNDEFINED_GROUP_OPTION;
8195             } /* case 'y' */
8196             break;
8197 
8198           case 'a':
8199             if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_PYTHON))
8200               return ONIGERR_UNDEFINED_GROUP_OPTION;
8201 
8202             OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg);
8203             break;
8204 
8205           default:
8206             return ONIGERR_UNDEFINED_GROUP_OPTION;
8207           }
8208 
8209           if (c == ')') {
8210             *np = node_new_option(option);
8211             CHECK_NULL_RETURN_MEMERR(*np);
8212             *src = p;
8213             return 2; /* option only */
8214           }
8215           else if (c == ':') {
8216             OnigOptionType prev = env->options;
8217 
8218             env->options = option;
8219             r = fetch_token(tok, &p, end, env);
8220             if (r < 0) return r;
8221             r = prs_alts(&target, tok, term, &p, end, env, FALSE);
8222             env->options = prev;
8223             if (r < 0) {
8224               onig_node_free(target);
8225               return r;
8226             }
8227             *np = node_new_option(option);
8228             CHECK_NULL_RETURN_MEMERR(*np);
8229             NODE_BODY(*np) = target;
8230             *src = p;
8231             return 0;
8232           }
8233 
8234           if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8235           PFETCH(c);
8236         } /* while (1) */
8237       }
8238       break;
8239 
8240     default:
8241       return ONIGERR_UNDEFINED_GROUP_OPTION;
8242     }
8243   }
8244 #ifdef USE_CALLOUT
8245   else if (c == '*' &&
8246            IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
8247     PINC;
8248     r = prs_callout_of_name(np, ')', &p, end, env);
8249     if (r != 0) return r;
8250 
8251     goto end;
8252   }
8253 #endif
8254   else {
8255     if (OPTON_DONT_CAPTURE_GROUP(env->options))
8256       goto group;
8257 
8258     *np = node_new_memory(0);
8259     CHECK_NULL_RETURN_MEMERR(*np);
8260     num = scan_env_add_mem_entry(env);
8261     if (num < 0) return num;
8262     BAG_(*np)->m.regnum = num;
8263   }
8264 
8265   CHECK_NULL_RETURN_MEMERR(*np);
8266   r = fetch_token(tok, &p, end, env);
8267   if (r < 0) return r;
8268   r = prs_alts(&target, tok, term, &p, end, env, FALSE);
8269   if (r < 0) {
8270     onig_node_free(target);
8271     return r;
8272   }
8273 
8274   NODE_BODY(*np) = target;
8275 
8276   if (NODE_TYPE(*np) == NODE_BAG) {
8277     if (BAG_(*np)->type == BAG_MEMORY) {
8278       /* Don't move this to previous of prs_alts() */
8279       r = scan_env_set_mem_node(env, BAG_(*np)->m.regnum, *np);
8280       if (r != 0) return r;
8281     }
8282   }
8283 
8284  end:
8285   *src = p;
8286   return 0;
8287 }
8288 
8289 static const char* PopularQStr[] = {
8290   "?", "*", "+", "??", "*?", "+?"
8291 };
8292 
8293 static const char* ReduceQStr[] = {
8294   "", "", "*", "*?", "??", "+ and ??", "+? and ?"
8295 };
8296 
8297 static int
assign_quantifier_body(Node * qnode,Node * target,int group,ParseEnv * env)8298 assign_quantifier_body(Node* qnode, Node* target, int group, ParseEnv* env)
8299 {
8300   QuantNode* qn;
8301 
8302   qn = QUANT_(qnode);
8303   if (qn->lower == 1 && qn->upper == 1)
8304     return 1;
8305 
8306   switch (NODE_TYPE(target)) {
8307   case NODE_STRING:
8308     if (group == 0) {
8309       if (str_node_can_be_split(target, env->enc)) {
8310         Node* n = str_node_split_last_char(target, env->enc);
8311         if (IS_NOT_NULL(n)) {
8312           NODE_BODY(qnode) = n;
8313           return 2;
8314         }
8315       }
8316     }
8317     break;
8318 
8319   case NODE_QUANT:
8320     { /* check redundant double repeat. */
8321       /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
8322       QuantNode* qnt   = QUANT_(target);
8323       int nestq_num   = quantifier_type_num(qn);
8324       int targetq_num = quantifier_type_num(qnt);
8325 
8326 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
8327       if (targetq_num >= 0 && nestq_num >= 0 &&
8328           IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
8329         UChar buf[WARN_BUFSIZE];
8330 
8331         switch(ReduceTypeTable[targetq_num][nestq_num]) {
8332         case RQ_ASIS:
8333           break;
8334 
8335         case RQ_DEL:
8336           if (onig_verb_warn != onig_null_warn) {
8337             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
8338                                   env->pattern, env->pattern_end,
8339                                   (UChar* )"redundant nested repeat operator");
8340             (*onig_verb_warn)((char* )buf);
8341           }
8342           goto warn_exit;
8343           break;
8344 
8345         default:
8346           if (onig_verb_warn != onig_null_warn) {
8347             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
8348                                        env->pattern, env->pattern_end,
8349             (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
8350             PopularQStr[targetq_num], PopularQStr[nestq_num],
8351             ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
8352             (*onig_verb_warn)((char* )buf);
8353           }
8354           goto warn_exit;
8355           break;
8356         }
8357       }
8358 
8359     warn_exit:
8360 #endif
8361       if (targetq_num >= 0 && nestq_num < 0) {
8362         if (targetq_num == 1 || targetq_num == 2) { /* * or + */
8363           /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
8364           if (! IS_INFINITE_REPEAT(qn->upper) && qn->upper > 1 && qn->greedy) {
8365             qn->upper = (qn->lower == 0 ? 1 : qn->lower);
8366           }
8367         }
8368       }
8369       else {
8370         int r;
8371 
8372         NODE_BODY(qnode) = target;
8373         r = onig_reduce_nested_quantifier(qnode);
8374         return r;
8375       }
8376     }
8377     break;
8378 
8379   default:
8380     break;
8381   }
8382 
8383   NODE_BODY(qnode) = target;
8384   return 0;
8385 }
8386 
8387 
8388 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
8389 static int
clear_not_flag_cclass(CClassNode * cc,OnigEncoding enc)8390 clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
8391 {
8392   BBuf *tbuf;
8393   int r;
8394 
8395   if (IS_NCCLASS_NOT(cc)) {
8396     bitset_invert(cc->bs);
8397 
8398     if (! ONIGENC_IS_SINGLEBYTE(enc)) {
8399       r = not_code_range_buf(enc, cc->mbuf, &tbuf);
8400       if (r != 0) return r;
8401 
8402       bbuf_free(cc->mbuf);
8403       cc->mbuf = tbuf;
8404     }
8405 
8406     NCCLASS_CLEAR_NOT(cc);
8407   }
8408 
8409   return 0;
8410 }
8411 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
8412 
8413 #define ADD_CODE_INTO_CC(cc, code, enc) do {\
8414   if (ONIGENC_MBC_MINLEN(enc) > 1 || ONIGENC_CODE_TO_MBCLEN(enc, code) != 1) {\
8415     add_code_range_to_buf(&((cc)->mbuf), code, code);\
8416   }\
8417   else {\
8418     BITSET_SET_BIT((cc)->bs, code);\
8419   }\
8420 } while (0)
8421 
8422 extern int
onig_new_cclass_with_code_list(Node ** rnode,OnigEncoding enc,int n,OnigCodePoint codes[])8423 onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc,
8424                                int n, OnigCodePoint codes[])
8425 {
8426   int i;
8427   Node* node;
8428   CClassNode* cc;
8429 
8430   *rnode = NULL_NODE;
8431 
8432   node = node_new_cclass();
8433   CHECK_NULL_RETURN_MEMERR(node);
8434 
8435   cc = CCLASS_(node);
8436 
8437   for (i = 0; i < n; i++) {
8438     ADD_CODE_INTO_CC(cc, codes[i], enc);
8439   }
8440 
8441   *rnode = node;
8442   return 0;
8443 }
8444 
8445 typedef struct {
8446   ParseEnv*   env;
8447   CClassNode* cc;
8448   Node*       alt_root;
8449   Node**      ptail;
8450 } IApplyCaseFoldArg;
8451 
8452 static int
i_apply_case_fold(OnigCodePoint from,OnigCodePoint to[],int to_len,void * arg)8453 i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len,
8454                   void* arg)
8455 {
8456   IApplyCaseFoldArg* iarg;
8457   ParseEnv* env;
8458   OnigEncoding enc;
8459   CClassNode* cc;
8460 
8461   iarg = (IApplyCaseFoldArg* )arg;
8462   env = iarg->env;
8463   cc  = iarg->cc;
8464   enc = env->enc;
8465 
8466   if (to_len == 1) {
8467     int is_in = onig_is_code_in_cc(enc, from, cc);
8468 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
8469     if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
8470         (is_in == 0 &&  IS_NCCLASS_NOT(cc))) {
8471       ADD_CODE_INTO_CC(cc, *to, enc);
8472     }
8473 #else
8474     if (is_in != 0) {
8475       if (ONIGENC_MBC_MINLEN(enc) > 1 ||
8476           ONIGENC_CODE_TO_MBCLEN(enc, *to) != 1) {
8477         if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc);
8478         add_code_range(&(cc->mbuf), env, *to, *to);
8479       }
8480       else {
8481         if (IS_NCCLASS_NOT(cc)) {
8482           BITSET_CLEAR_BIT(cc->bs, *to);
8483         }
8484         else
8485           BITSET_SET_BIT(cc->bs, *to);
8486       }
8487     }
8488 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
8489   }
8490   else {
8491     int r, i, len;
8492     UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
8493 
8494     if (onig_is_code_in_cc(enc, from, cc)
8495 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
8496         && !IS_NCCLASS_NOT(cc)
8497 #endif
8498         ) {
8499       int n, j, m, index;
8500       Node* list_node;
8501       Node* ns[3];
8502 
8503       n = 0;
8504       for (i = 0; i < to_len; i++) {
8505         OnigCodePoint code;
8506         Node* csnode;
8507         CClassNode* cs_cc;
8508 
8509         index = 0;
8510         if (ONIGENC_IS_UNICODE_ENCODING(enc) &&
8511             (index = onigenc_unicode_fold1_key(&to[i])) >= 0) {
8512           csnode = node_new_cclass();
8513           cs_cc = CCLASS_(csnode);
8514           if (IS_NULL(csnode)) {
8515           err_free_ns:
8516             for (j = 0; j < n; j++) onig_node_free(ns[j]);
8517             return ONIGERR_MEMORY;
8518           }
8519           m = FOLDS1_UNFOLDS_NUM(index);
8520           for (j = 0; j < m; j++) {
8521             code = FOLDS1_UNFOLDS(index)[j];
8522             ADD_CODE_INTO_CC(cs_cc, code, enc);
8523           }
8524           ADD_CODE_INTO_CC(cs_cc, to[i], enc);
8525           ns[n++] = csnode;
8526         }
8527         else {
8528           len = ONIGENC_CODE_TO_MBC(enc, to[i], buf);
8529           if (n == 0 || NODE_TYPE(ns[n-1]) != NODE_STRING) {
8530             csnode = node_new_str(buf, buf + len);
8531             if (IS_NULL(csnode)) goto err_free_ns;
8532 
8533             if (index == 0)
8534               NODE_STATUS_ADD(csnode, IGNORECASE);
8535             else
8536               NODE_STRING_SET_CASE_EXPANDED(csnode);
8537 
8538             ns[n++] = csnode;
8539           }
8540           else {
8541             r = onig_node_str_cat(ns[n-1], buf, buf + len);
8542             if (r < 0) goto err_free_ns;
8543           }
8544         }
8545       }
8546 
8547       if (n == 1)
8548         list_node = ns[0];
8549       else
8550         list_node = make_list(n, ns);
8551 
8552       *(iarg->ptail) = onig_node_new_alt(list_node, NULL_NODE);
8553       if (IS_NULL(*(iarg->ptail))) {
8554         onig_node_free(list_node);
8555         return ONIGERR_MEMORY;
8556       }
8557       iarg->ptail = &(NODE_CDR((*(iarg->ptail))));
8558     }
8559   }
8560 
8561   return 0;
8562 }
8563 
8564 static int
prs_exp(Node ** np,PToken * tok,int term,UChar ** src,UChar * end,ParseEnv * env,int group_head)8565 prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
8566         ParseEnv* env, int group_head)
8567 {
8568   int r, len, group;
8569   Node* qn;
8570   Node** tp;
8571   unsigned int parse_depth;
8572 
8573  retry:
8574   group = 0;
8575   *np = NULL;
8576   if (tok->type == (enum TokenSyms )term)
8577     goto end_of_token;
8578 
8579   parse_depth = env->parse_depth;
8580 
8581   switch (tok->type) {
8582   case TK_ALT:
8583   case TK_EOT:
8584   end_of_token:
8585     *np = node_new_empty();
8586     CHECK_NULL_RETURN_MEMERR(*np);
8587     return tok->type;
8588   break;
8589 
8590   case TK_SUBEXP_OPEN:
8591     r = prs_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env);
8592     if (r < 0) return r;
8593     if (r == 1) { /* group */
8594       if (group_head == 0)
8595         group = 1;
8596       else {
8597         Node* target = *np;
8598         *np = node_new_group(target);
8599         if (IS_NULL(*np)) {
8600           onig_node_free(target);
8601           return ONIGERR_MEMORY;
8602         }
8603         group = 2;
8604       }
8605     }
8606     else if (r == 2) { /* option only */
8607       if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH)) {
8608         env->options = BAG_(*np)->o.options;
8609         r = fetch_token(tok, src, end, env);
8610         if (r < 0) return r;
8611         onig_node_free(*np);
8612         goto retry;
8613       }
8614       else {
8615         Node* target;
8616         OnigOptionType prev = env->options;
8617 
8618         env->options = BAG_(*np)->o.options;
8619         r = fetch_token(tok, src, end, env);
8620         if (r < 0) return r;
8621         r = prs_alts(&target, tok, term, src, end, env, FALSE);
8622         env->options = prev;
8623         if (r < 0) {
8624           onig_node_free(target);
8625           return r;
8626         }
8627         NODE_BODY(*np) = target;
8628       }
8629       return tok->type;
8630     }
8631     break;
8632 
8633   case TK_SUBEXP_CLOSE:
8634     if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
8635       return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
8636 
8637     if (tok->escaped) goto tk_crude_byte;
8638     else goto tk_byte;
8639     break;
8640 
8641   case TK_STRING:
8642   tk_byte:
8643     {
8644       *np = node_new_str_with_options(tok->backp, *src, env->options);
8645       CHECK_NULL_RETURN_MEMERR(*np);
8646 
8647       while (1) {
8648         r = fetch_token(tok, src, end, env);
8649         if (r < 0) return r;
8650         if (r != TK_STRING) break;
8651 
8652         r = onig_node_str_cat(*np, tok->backp, *src);
8653         if (r < 0) return r;
8654       }
8655 
8656     string_end:
8657       tp = np;
8658       goto repeat;
8659     }
8660     break;
8661 
8662   case TK_CRUDE_BYTE:
8663   tk_crude_byte:
8664     {
8665       *np = node_new_str_crude_char(tok->u.byte, env->options);
8666       CHECK_NULL_RETURN_MEMERR(*np);
8667       len = 1;
8668       while (1) {
8669         if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
8670           if (len == enclen(env->enc, STR_(*np)->s)) {
8671             r = fetch_token(tok, src, end, env);
8672             goto tk_crude_byte_end;
8673           }
8674         }
8675 
8676         r = fetch_token(tok, src, end, env);
8677         if (r < 0) return r;
8678         if (r != TK_CRUDE_BYTE)
8679           return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
8680 
8681         r = node_str_cat_char(*np, tok->u.byte);
8682         if (r < 0) return r;
8683 
8684         len++;
8685       }
8686 
8687     tk_crude_byte_end:
8688       if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, STR_(*np)->s, STR_(*np)->end))
8689         return ONIGERR_INVALID_WIDE_CHAR_VALUE;
8690 
8691       NODE_STRING_CLEAR_CRUDE(*np);
8692       goto string_end;
8693     }
8694     break;
8695 
8696   case TK_CODE_POINT:
8697     {
8698       UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
8699       len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.code);
8700       if (len < 0) return len;
8701       len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
8702 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
8703       *np = node_new_str_crude(buf, buf + len, env->options);
8704 #else
8705       *np = node_new_str_with_options(buf, buf + len, env->options);
8706 #endif
8707       CHECK_NULL_RETURN_MEMERR(*np);
8708     }
8709     break;
8710 
8711   case TK_QUOTE_OPEN:
8712     {
8713       OnigCodePoint end_op[2];
8714       UChar *qstart, *qend, *nextp;
8715 
8716       end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
8717       end_op[1] = (OnigCodePoint )'E';
8718       qstart = *src;
8719       qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
8720       if (IS_NULL(qend)) {
8721         nextp = qend = end;
8722       }
8723       *np = node_new_str_with_options(qstart, qend, env->options);
8724       CHECK_NULL_RETURN_MEMERR(*np);
8725       *src = nextp;
8726     }
8727     break;
8728 
8729   case TK_CHAR_TYPE:
8730     {
8731       switch (tok->u.prop.ctype) {
8732       case ONIGENC_CTYPE_WORD:
8733         *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);
8734         CHECK_NULL_RETURN_MEMERR(*np);
8735         break;
8736 
8737       case ONIGENC_CTYPE_SPACE:
8738       case ONIGENC_CTYPE_DIGIT:
8739       case ONIGENC_CTYPE_XDIGIT:
8740         {
8741           CClassNode* cc;
8742 
8743           *np = node_new_cclass();
8744           CHECK_NULL_RETURN_MEMERR(*np);
8745           cc = CCLASS_(*np);
8746           r = add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env);
8747           if (r != 0) {
8748             onig_node_free(*np);
8749             *np = NULL_NODE;
8750             return r;
8751           }
8752           if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
8753         }
8754         break;
8755 
8756       default:
8757         return ONIGERR_PARSER_BUG;
8758         break;
8759       }
8760     }
8761     break;
8762 
8763   case TK_CHAR_PROPERTY:
8764     r = prs_char_property(np, tok, src, end, env);
8765     if (r != 0) return r;
8766     break;
8767 
8768   case TK_OPEN_CC:
8769     {
8770       CClassNode* cc;
8771 
8772       r = prs_cc(np, tok, src, end, env);
8773       if (r != 0) return r;
8774 
8775       cc = CCLASS_(*np);
8776       if (OPTON_IGNORECASE(env->options)) {
8777         IApplyCaseFoldArg iarg;
8778 
8779         iarg.env      = env;
8780         iarg.cc       = cc;
8781         iarg.alt_root = NULL_NODE;
8782         iarg.ptail    = &(iarg.alt_root);
8783 
8784         r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
8785                                         i_apply_case_fold, &iarg);
8786         if (r != 0) {
8787           onig_node_free(iarg.alt_root);
8788           return r;
8789         }
8790         if (IS_NOT_NULL(iarg.alt_root)) {
8791           Node* work = onig_node_new_alt(*np, iarg.alt_root);
8792           if (IS_NULL(work)) {
8793             onig_node_free(iarg.alt_root);
8794             return ONIGERR_MEMORY;
8795           }
8796           *np = work;
8797         }
8798       }
8799     }
8800     break;
8801 
8802   case TK_ANYCHAR:
8803     *np = node_new_anychar(env->options);
8804     CHECK_NULL_RETURN_MEMERR(*np);
8805     break;
8806 
8807   case TK_ANYCHAR_ANYTIME:
8808     *np = node_new_anychar(env->options);
8809     CHECK_NULL_RETURN_MEMERR(*np);
8810     qn = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
8811     CHECK_NULL_RETURN_MEMERR(qn);
8812     NODE_BODY(qn) = *np;
8813     *np = qn;
8814     break;
8815 
8816   case TK_BACKREF:
8817     len = tok->u.backref.num;
8818     *np = node_new_backref(len,
8819                   (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
8820                   tok->u.backref.by_name,
8821 #ifdef USE_BACKREF_WITH_LEVEL
8822                            tok->u.backref.exist_level,
8823                            tok->u.backref.level,
8824 #endif
8825                            env);
8826     CHECK_NULL_RETURN_MEMERR(*np);
8827     break;
8828 
8829 #ifdef USE_CALL
8830   case TK_CALL:
8831     {
8832       int gnum = tok->u.call.gnum;
8833 
8834       *np = node_new_call(tok->u.call.name, tok->u.call.name_end,
8835                           gnum, tok->u.call.by_number);
8836       CHECK_NULL_RETURN_MEMERR(*np);
8837       env->num_call++;
8838       if (tok->u.call.by_number != 0 && gnum == 0) {
8839         env->has_call_zero = 1;
8840       }
8841     }
8842     break;
8843 #endif
8844 
8845   case TK_ANCHOR:
8846     *np = node_new_anchor_with_options(tok->u.anchor, env->options);
8847     CHECK_NULL_RETURN_MEMERR(*np);
8848     break;
8849 
8850   case TK_REPEAT:
8851   case TK_INTERVAL:
8852     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
8853       if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
8854         return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
8855       else {
8856         *np = node_new_empty();
8857         CHECK_NULL_RETURN_MEMERR(*np);
8858       }
8859     }
8860     else {
8861       goto tk_byte;
8862     }
8863     break;
8864 
8865   case TK_KEEP:
8866     r = node_new_keep(np, env);
8867     if (r < 0) return r;
8868     break;
8869 
8870   case TK_GENERAL_NEWLINE:
8871     r = node_new_general_newline(np, env);
8872     if (r < 0) return r;
8873     break;
8874 
8875   case TK_NO_NEWLINE:
8876     r = node_new_no_newline(np, env);
8877     if (r < 0) return r;
8878     break;
8879 
8880   case TK_TRUE_ANYCHAR:
8881     r = node_new_true_anychar(np);
8882     if (r < 0) return r;
8883     break;
8884 
8885   case TK_TEXT_SEGMENT:
8886     r = make_text_segment(np, env);
8887     if (r < 0) return r;
8888     break;
8889 
8890   default:
8891     return ONIGERR_PARSER_BUG;
8892     break;
8893   }
8894 
8895   {
8896     tp = np;
8897 
8898   re_entry:
8899     r = fetch_token(tok, src, end, env);
8900     if (r < 0) return r;
8901 
8902   repeat:
8903     if (r == TK_REPEAT || r == TK_INTERVAL) {
8904       Node* target;
8905 
8906       if (is_invalid_quantifier_target(*tp))
8907         return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
8908 
8909       INC_PARSE_DEPTH(parse_depth);
8910 
8911       qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
8912                                r == TK_INTERVAL);
8913       CHECK_NULL_RETURN_MEMERR(qn);
8914       QUANT_(qn)->greedy = tok->u.repeat.greedy;
8915       if (group == 2) {
8916         target = node_drop_group(*tp);
8917         *tp = NULL_NODE;
8918       }
8919       else {
8920         target = *tp;
8921       }
8922       r = assign_quantifier_body(qn, target, group, env);
8923       if (r < 0) {
8924         onig_node_free(qn);
8925         *tp = NULL_NODE;
8926         return r;
8927       }
8928 
8929       if (tok->u.repeat.possessive != 0) {
8930         Node* en;
8931         en = node_new_bag(BAG_STOP_BACKTRACK);
8932         if (IS_NULL(en)) {
8933           onig_node_free(qn);
8934           return ONIGERR_MEMORY;
8935         }
8936         NODE_BODY(en) = qn;
8937         qn = en;
8938       }
8939 
8940       if (r == 0) {
8941         *tp = qn;
8942       }
8943       else if (r == 1) { /* x{1,1} ==> x */
8944         onig_node_free(qn);
8945         *tp = target;
8946       }
8947       else if (r == 2) { /* split case: /abc+/ */
8948         Node *tmp;
8949 
8950         *tp = node_new_list(*tp, NULL);
8951         if (IS_NULL(*tp)) {
8952           onig_node_free(qn);
8953           return ONIGERR_MEMORY;
8954         }
8955         tmp = NODE_CDR(*tp) = node_new_list(qn, NULL);
8956         if (IS_NULL(tmp)) {
8957           onig_node_free(qn);
8958           return ONIGERR_MEMORY;
8959         }
8960         tp = &(NODE_CAR(tmp));
8961       }
8962       group = 0;
8963       goto re_entry;
8964     }
8965   }
8966 
8967   return r;
8968 }
8969 
8970 static int
prs_branch(Node ** top,PToken * tok,int term,UChar ** src,UChar * end,ParseEnv * env,int group_head)8971 prs_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
8972            ParseEnv* env, int group_head)
8973 {
8974   int r;
8975   Node *node, **headp;
8976 
8977   *top = NULL;
8978   INC_PARSE_DEPTH(env->parse_depth);
8979 
8980   r = prs_exp(&node, tok, term, src, end, env, group_head);
8981   if (r < 0) {
8982     onig_node_free(node);
8983     return r;
8984   }
8985 
8986   if (r == TK_EOT || r == term || r == TK_ALT) {
8987     *top = node;
8988   }
8989   else {
8990     *top = node_new_list(node, NULL);
8991     if (IS_NULL(*top)) {
8992       onig_node_free(node);
8993       return ONIGERR_MEMORY;
8994     }
8995 
8996     headp = &(NODE_CDR(*top));
8997     while (r != TK_EOT && r != term && r != TK_ALT) {
8998       r = prs_exp(&node, tok, term, src, end, env, FALSE);
8999       if (r < 0) {
9000         onig_node_free(node);
9001         return r;
9002       }
9003 
9004       if (NODE_TYPE(node) == NODE_LIST) {
9005         *headp = node;
9006         while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node);
9007         headp = &(NODE_CDR(node));
9008       }
9009       else {
9010         *headp = node_new_list(node, NULL);
9011         headp = &(NODE_CDR(*headp));
9012       }
9013     }
9014   }
9015 
9016   DEC_PARSE_DEPTH(env->parse_depth);
9017   return r;
9018 }
9019 
9020 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
9021 static int
prs_alts(Node ** top,PToken * tok,int term,UChar ** src,UChar * end,ParseEnv * env,int group_head)9022 prs_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
9023          ParseEnv* env, int group_head)
9024 {
9025   int r;
9026   Node *node, **headp;
9027   OnigOptionType save_options;
9028 
9029   *top = NULL;
9030   INC_PARSE_DEPTH(env->parse_depth);
9031   save_options = env->options;
9032 
9033   r = prs_branch(&node, tok, term, src, end, env, group_head);
9034   if (r < 0) {
9035     onig_node_free(node);
9036     return r;
9037   }
9038 
9039   if (r == term) {
9040     *top = node;
9041   }
9042   else if (r == TK_ALT) {
9043     *top  = onig_node_new_alt(node, NULL);
9044     if (IS_NULL(*top)) {
9045       onig_node_free(node);
9046       return ONIGERR_MEMORY;
9047     }
9048 
9049     headp = &(NODE_CDR(*top));
9050     while (r == TK_ALT) {
9051       r = fetch_token(tok, src, end, env);
9052       if (r < 0) return r;
9053       r = prs_branch(&node, tok, term, src, end, env, FALSE);
9054       if (r < 0) {
9055         onig_node_free(node);
9056         return r;
9057       }
9058       *headp = onig_node_new_alt(node, NULL);
9059       if (IS_NULL(*headp)) {
9060         onig_node_free(node);
9061         onig_node_free(*top);
9062         return ONIGERR_MEMORY;
9063       }
9064 
9065       headp = &(NODE_CDR(*headp));
9066     }
9067 
9068     if (tok->type != (enum TokenSyms )term)
9069       goto err;
9070   }
9071   else {
9072     onig_node_free(node);
9073   err:
9074     if (term == TK_SUBEXP_CLOSE)
9075       return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
9076     else
9077       return ONIGERR_PARSER_BUG;
9078   }
9079 
9080   env->options = save_options;
9081   DEC_PARSE_DEPTH(env->parse_depth);
9082   return r;
9083 }
9084 
9085 static int
prs_regexp(Node ** top,UChar ** src,UChar * end,ParseEnv * env)9086 prs_regexp(Node** top, UChar** src, UChar* end, ParseEnv* env)
9087 {
9088   int r;
9089   PToken tok;
9090 
9091   ptoken_init(&tok);
9092   r = fetch_token(&tok, src, end, env);
9093   if (r < 0) return r;
9094   r = prs_alts(top, &tok, TK_EOT, src, end, env, FALSE);
9095   if (r < 0) return r;
9096 
9097   return 0;
9098 }
9099 
9100 #ifdef USE_CALL
9101 static int
make_call_zero_body(Node * node,ParseEnv * env,Node ** rnode)9102 make_call_zero_body(Node* node, ParseEnv* env, Node** rnode)
9103 {
9104   int r;
9105 
9106   Node* x = node_new_memory(0 /* 0: is not named */);
9107   CHECK_NULL_RETURN_MEMERR(x);
9108 
9109   NODE_BODY(x) = node;
9110   BAG_(x)->m.regnum = 0;
9111   r = scan_env_set_mem_node(env, 0, x);
9112   if (r != 0) {
9113     onig_node_free(x);
9114     return r;
9115   }
9116 
9117   *rnode = x;
9118   return 0;
9119 }
9120 #endif
9121 
9122 extern int
onig_parse_tree(Node ** root,const UChar * pattern,const UChar * end,regex_t * reg,ParseEnv * env)9123 onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
9124                 regex_t* reg, ParseEnv* env)
9125 {
9126   int r;
9127   UChar* p;
9128 #ifdef USE_CALLOUT
9129   RegexExt* ext;
9130 #endif
9131 
9132   reg->string_pool        = 0;
9133   reg->string_pool_end    = 0;
9134   reg->num_mem            = 0;
9135   reg->num_repeat         = 0;
9136   reg->num_empty_check    = 0;
9137   reg->repeat_range_alloc = 0;
9138   reg->repeat_range       = (RepeatRange* )NULL;
9139 
9140   names_clear(reg);
9141 
9142   scan_env_clear(env);
9143   env->options        = reg->options;
9144   env->case_fold_flag = reg->case_fold_flag;
9145   env->enc            = reg->enc;
9146   env->syntax         = reg->syntax;
9147   env->pattern        = (UChar* )pattern;
9148   env->pattern_end    = (UChar* )end;
9149   env->reg            = reg;
9150 
9151   *root = NULL;
9152 
9153   if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))
9154     return ONIGERR_INVALID_WIDE_CHAR_VALUE;
9155 
9156   p = (UChar* )pattern;
9157   r = prs_regexp(root, &p, (UChar* )end, env);
9158   if (r != 0) return r;
9159 
9160 #ifdef USE_CALL
9161   if (env->has_call_zero != 0) {
9162     Node* zero_node;
9163     r = make_call_zero_body(*root, env, &zero_node);
9164     if (r != 0) return r;
9165 
9166     *root = zero_node;
9167   }
9168 #endif
9169 
9170   reg->num_mem = env->num_mem;
9171 
9172 #ifdef USE_CALLOUT
9173   ext = reg->extp;
9174   if (IS_NOT_NULL(ext) && ext->callout_num > 0) {
9175     r = setup_ext_callout_list_values(reg);
9176   }
9177 #endif
9178 
9179   return r;
9180 }
9181 
9182 extern void
onig_scan_env_set_error_string(ParseEnv * env,int ecode ARG_UNUSED,UChar * arg,UChar * arg_end)9183 onig_scan_env_set_error_string(ParseEnv* env, int ecode ARG_UNUSED,
9184                                UChar* arg, UChar* arg_end)
9185 {
9186   env->error     = arg;
9187   env->error_end = arg_end;
9188 }
9189