1 /**********************************************************************
2 regparse.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2021 K.Kosako
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #ifdef DEBUG_NODE_FREE
31 #ifndef NEED_TO_INCLUDE_STDIO
32 #define NEED_TO_INCLUDE_STDIO
33 #endif
34 #endif
35
36 #include "regparse.h"
37 #include "st.h"
38
39 #define INIT_TAG_NAMES_ALLOC_NUM 5
40
41 #define WARN_BUFSIZE 256
42
43 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
44
45 #define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \
46 ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)
47 #define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \
48 ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
49
50 #define OPTON_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
51 #define OPTON_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
52 #define OPTON_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
53 #define OPTON_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
54 #define OPTON_WORD_ASCII(option) \
55 ((option) & (ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
56 #define OPTON_DIGIT_ASCII(option) \
57 ((option) & (ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
58 #define OPTON_SPACE_ASCII(option) \
59 ((option) & (ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
60 #define OPTON_POSIX_ASCII(option) ((option) & ONIG_OPTION_POSIX_IS_ASCII)
61 #define OPTON_TEXT_SEGMENT_WORD(option) ((option) & ONIG_OPTION_TEXT_SEGMENT_WORD)
62
63 #define OPTON_IS_ASCII_MODE_CTYPE(ctype, options) \
64 ((ctype) >= 0 && \
65 (((ctype) < ONIGENC_CTYPE_ASCII && OPTON_POSIX_ASCII(options)) ||\
66 ((ctype) == ONIGENC_CTYPE_WORD && OPTON_WORD_ASCII(options)) ||\
67 ((ctype) == ONIGENC_CTYPE_DIGIT && OPTON_DIGIT_ASCII(options)) ||\
68 ((ctype) == ONIGENC_CTYPE_SPACE && OPTON_SPACE_ASCII(options))))
69
70
71 OnigSyntaxType OnigSyntaxOniguruma = {
72 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
73 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
74 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
75 ONIG_SYN_OP_ESC_CONTROL_CHARS |
76 ONIG_SYN_OP_ESC_C_CONTROL )
77 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
78 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
79 ONIG_SYN_OP2_OPTION_ONIGURUMA |
80 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
81 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
82 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
83 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
84 ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
85 ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
86 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
87 ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
88 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
89 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
90 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
91 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
92 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
93 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
94 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
95 ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
96 , ( SYN_GNU_REGEX_BV |
97 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
98 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
99 ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND |
100 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
101 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
102 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
103 ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC |
104 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
105 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
106 , ONIG_OPTION_NONE
107 ,
108 {
109 (OnigCodePoint )'\\' /* esc */
110 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
111 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
112 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
113 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
114 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
115 }
116 };
117
118 OnigSyntaxType OnigSyntaxRuby = {
119 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
120 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
121 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
122 ONIG_SYN_OP_ESC_CONTROL_CHARS |
123 ONIG_SYN_OP_ESC_C_CONTROL )
124 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
125 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
126 ONIG_SYN_OP2_OPTION_RUBY |
127 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
128 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
129 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
130 ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
131 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
132 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
133 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
134 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
135 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
136 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
137 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
138 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
139 ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
140 , ( SYN_GNU_REGEX_BV |
141 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
142 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
143 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
144 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
145 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
146 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
147 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
148 , ONIG_OPTION_NONE
149 ,
150 {
151 (OnigCodePoint )'\\' /* esc */
152 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
153 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
154 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
155 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
156 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
157 }
158 };
159
160 OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;
161
162
163 #define BB_INIT(buf,size) bbuf_init((BBuf* )(buf), (size))
164
165 #define BB_EXPAND(buf,low) do{\
166 do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
167 (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
168 if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
169 } while (0)
170
171 #define BB_ENSURE_SIZE(buf,size) do{\
172 unsigned int new_alloc = (buf)->alloc;\
173 while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
174 if ((buf)->alloc != new_alloc) {\
175 (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
176 if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
177 (buf)->alloc = new_alloc;\
178 }\
179 } while (0)
180
181 #define BB_WRITE(buf,pos,bytes,n) do{\
182 int used = (pos) + (n);\
183 if ((buf)->alloc < (unsigned int )used) BB_EXPAND((buf),used);\
184 xmemcpy((buf)->p + (pos), (bytes), (n));\
185 if ((buf)->used < (unsigned int )used) (buf)->used = used;\
186 } while (0)
187
188 #define BB_WRITE1(buf,pos,byte) do{\
189 int used = (pos) + 1;\
190 if ((buf)->alloc < (unsigned int )used) BB_EXPAND((buf),used);\
191 (buf)->p[(pos)] = (byte);\
192 if ((buf)->used < (unsigned int )used) (buf)->used = used;\
193 } while (0)
194
195 #define BB_ADD(buf,bytes,n) BB_WRITE((buf),(buf)->used,(bytes),(n))
196 #define BB_ADD1(buf,byte) BB_WRITE1((buf),(buf)->used,(byte))
197 #define BB_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used)
198 #define BB_GET_OFFSET_POS(buf) ((buf)->used)
199
200 /* from < to */
201 #define BB_MOVE_RIGHT(buf,from,to,n) do {\
202 if ((unsigned int )((to)+(n)) > (buf)->alloc) BB_EXPAND((buf),(to) + (n));\
203 xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
204 if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
205 } while (0)
206
207 /* from > to */
208 #define BB_MOVE_LEFT(buf,from,to,n) do {\
209 xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
210 } while (0)
211
212 /* from > to */
213 #define BB_MOVE_LEFT_REDUCE(buf,from,to) do {\
214 xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
215 (buf)->used -= (from - to);\
216 } while (0)
217
218 #define BB_INSERT(buf,pos,bytes,n) do {\
219 if (pos >= (buf)->used) {\
220 BB_WRITE(buf,pos,bytes,n);\
221 }\
222 else {\
223 BB_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
224 xmemcpy((buf)->p + (pos), (bytes), (n));\
225 }\
226 } while (0)
227
228 #define BB_GET_BYTE(buf, pos) (buf)->p[(pos)]
229
230
231 typedef enum {
232 CS_VALUE,
233 CS_RANGE,
234 CS_COMPLETE,
235 CS_START
236 } CSTATE;
237
238 typedef enum {
239 CV_UNDEF,
240 CV_SB,
241 CV_MB,
242 CV_CPROP
243 } CVAL;
244
onig_null_warn(const char * s ARG_UNUSED)245 extern void onig_null_warn(const char* s ARG_UNUSED) { }
246
247 #ifdef DEFAULT_WARN_FUNCTION
248 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
249 #else
250 static OnigWarnFunc onig_warn = onig_null_warn;
251 #endif
252
253 #ifdef DEFAULT_VERB_WARN_FUNCTION
254 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
255 #else
256 static OnigWarnFunc onig_verb_warn = onig_null_warn;
257 #endif
258
onig_set_warn_func(OnigWarnFunc f)259 extern void onig_set_warn_func(OnigWarnFunc f)
260 {
261 onig_warn = f;
262 }
263
onig_set_verb_warn_func(OnigWarnFunc f)264 extern void onig_set_verb_warn_func(OnigWarnFunc f)
265 {
266 onig_verb_warn = f;
267 }
268
269 extern void
onig_warning(const char * s)270 onig_warning(const char* s)
271 {
272 if (onig_warn == onig_null_warn) return ;
273
274 (*onig_warn)(s);
275 }
276
277 #define DEFAULT_MAX_CAPTURE_NUM 32767
278
279 static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;
280
281 extern int
onig_set_capture_num_limit(int num)282 onig_set_capture_num_limit(int num)
283 {
284 if (num < 0) return -1;
285
286 MaxCaptureNum = num;
287 return 0;
288 }
289
290 static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
291
292 extern unsigned int
onig_get_parse_depth_limit(void)293 onig_get_parse_depth_limit(void)
294 {
295 return ParseDepthLimit;
296 }
297
298 extern int
onig_set_parse_depth_limit(unsigned int depth)299 onig_set_parse_depth_limit(unsigned int depth)
300 {
301 if (depth == 0)
302 ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
303 else
304 ParseDepthLimit = depth;
305 return 0;
306 }
307
308 #ifdef ONIG_DEBUG_PARSE
309 #define INC_PARSE_DEPTH(d) do {\
310 (d)++;\
311 if (env->max_parse_depth < (d)) env->max_parse_depth = d;\
312 if ((d) > ParseDepthLimit) \
313 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\
314 } while (0)
315 #else
316 #define INC_PARSE_DEPTH(d) do {\
317 (d)++;\
318 if ((d) > ParseDepthLimit) \
319 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\
320 } while (0)
321 #endif
322
323 #define DEC_PARSE_DEPTH(d) (d)--
324
325
326 static int
bbuf_init(BBuf * buf,int size)327 bbuf_init(BBuf* buf, int size)
328 {
329 if (size <= 0) {
330 size = 0;
331 buf->p = NULL;
332 }
333 else {
334 buf->p = (UChar* )xmalloc(size);
335 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
336 }
337
338 buf->alloc = size;
339 buf->used = 0;
340 return 0;
341 }
342
343 static void
bbuf_free(BBuf * bbuf)344 bbuf_free(BBuf* bbuf)
345 {
346 if (IS_NOT_NULL(bbuf)) {
347 if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
348 xfree(bbuf);
349 }
350 }
351
352 static int
bbuf_clone(BBuf ** rto,BBuf * from)353 bbuf_clone(BBuf** rto, BBuf* from)
354 {
355 int r;
356 BBuf *to;
357
358 *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
359 CHECK_NULL_RETURN_MEMERR(to);
360 r = BB_INIT(to, from->alloc);
361 if (r != 0) {
362 bbuf_free(to);
363 *rto = 0;
364 return r;
365 }
366 to->used = from->used;
367 xmemcpy(to->p, from->p, from->used);
368 return 0;
369 }
370
371 static int
backref_rel_to_abs(int rel_no,ParseEnv * env)372 backref_rel_to_abs(int rel_no, ParseEnv* env)
373 {
374 if (rel_no > 0) {
375 if (rel_no > ONIG_INT_MAX - env->num_mem)
376 return ONIGERR_INVALID_BACKREF;
377 return env->num_mem + rel_no;
378 }
379 else {
380 return env->num_mem + 1 + rel_no;
381 }
382 }
383
384 #define OPTION_ON(v,f) ((v) |= (f))
385 #define OPTION_OFF(v,f) ((v) &= ~(f))
386
387 #define OPTION_NEGATE(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
388
389 #define MBCODE_START_POS(enc) \
390 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
391
392 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
393 add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
394
395 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
396 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
397 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
398 if (r != 0) return r;\
399 }\
400 } while (0)
401
402
403 #define BITSET_IS_EMPTY(bs,empty) do {\
404 int i;\
405 empty = 1;\
406 for (i = 0; i < (int )BITSET_REAL_SIZE; i++) {\
407 if ((bs)[i] != 0) {\
408 empty = 0; break;\
409 }\
410 }\
411 } while (0)
412
413 static void
bitset_set_range(BitSetRef bs,int from,int to)414 bitset_set_range(BitSetRef bs, int from, int to)
415 {
416 int i;
417 for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
418 BITSET_SET_BIT(bs, i);
419 }
420 }
421
422 static void
bitset_invert(BitSetRef bs)423 bitset_invert(BitSetRef bs)
424 {
425 int i;
426 for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { bs[i] = ~(bs[i]); }
427 }
428
429 static void
bitset_invert_to(BitSetRef from,BitSetRef to)430 bitset_invert_to(BitSetRef from, BitSetRef to)
431 {
432 int i;
433 for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { to[i] = ~(from[i]); }
434 }
435
436 static void
bitset_and(BitSetRef dest,BitSetRef bs)437 bitset_and(BitSetRef dest, BitSetRef bs)
438 {
439 int i;
440 for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] &= bs[i]; }
441 }
442
443 static void
bitset_or(BitSetRef dest,BitSetRef bs)444 bitset_or(BitSetRef dest, BitSetRef bs)
445 {
446 int i;
447 for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] |= bs[i]; }
448 }
449
450 static void
bitset_copy(BitSetRef dest,BitSetRef bs)451 bitset_copy(BitSetRef dest, BitSetRef bs)
452 {
453 int i;
454 for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] = bs[i]; }
455 }
456
457 extern int
onig_strncmp(const UChar * s1,const UChar * s2,int n)458 onig_strncmp(const UChar* s1, const UChar* s2, int n)
459 {
460 int x;
461
462 while (n-- > 0) {
463 x = *s2++ - *s1++;
464 if (x) return x;
465 }
466 return 0;
467 }
468
469 extern void
onig_strcpy(UChar * dest,const UChar * src,const UChar * end)470 onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
471 {
472 int len = (int )(end - src);
473 if (len > 0) {
474 xmemcpy(dest, src, len);
475 dest[len] = (UChar )0;
476 }
477 }
478
479 /* scan pattern methods */
480 #define PEND_VALUE 0
481
482 #define PFETCH_READY UChar* pfetch_prev
483 #define PEND (p < end ? 0 : 1)
484 #define PUNFETCH p = pfetch_prev
485 #define PINC do { \
486 pfetch_prev = p; \
487 p += ONIGENC_MBC_ENC_LEN(enc, p); \
488 } while (0)
489 #define PFETCH(c) do { \
490 c = ONIGENC_MBC_TO_CODE(enc, p, end); \
491 pfetch_prev = p; \
492 p += ONIGENC_MBC_ENC_LEN(enc, p); \
493 } while (0)
494
495 #define PINC_S do { \
496 p += ONIGENC_MBC_ENC_LEN(enc, p); \
497 } while (0)
498 #define PFETCH_S(c) do { \
499 c = ONIGENC_MBC_TO_CODE(enc, p, end); \
500 p += ONIGENC_MBC_ENC_LEN(enc, p); \
501 } while (0)
502
503 #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
504 #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
505
506 static UChar*
strcat_capa(UChar * dest,UChar * dest_end,const UChar * src,const UChar * src_end,int capa)507 strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
508 int capa)
509 {
510 UChar* r;
511 ptrdiff_t dest_delta = dest_end - dest;
512
513 if (dest)
514 r = (UChar* )xrealloc(dest, capa + 1);
515 else
516 r = (UChar* )xmalloc(capa + 1);
517
518 CHECK_NULL_RETURN(r);
519 onig_strcpy(r + dest_delta, src, src_end);
520 return r;
521 }
522
523 /* dest on static area */
524 static UChar*
strcat_capa_from_static(UChar * dest,UChar * dest_end,const UChar * src,const UChar * src_end,int capa)525 strcat_capa_from_static(UChar* dest, UChar* dest_end,
526 const UChar* src, const UChar* src_end, int capa)
527 {
528 UChar* r;
529
530 r = (UChar* )xmalloc(capa + 1);
531 CHECK_NULL_RETURN(r);
532 onig_strcpy(r, dest, dest_end);
533 onig_strcpy(r + (dest_end - dest), src, src_end);
534 return r;
535 }
536
537
538 #ifdef USE_ST_LIBRARY
539
540 typedef struct {
541 UChar* s;
542 UChar* end;
543 } st_str_end_key;
544
545 static int
str_end_cmp(st_str_end_key * x,st_str_end_key * y)546 str_end_cmp(st_str_end_key* x, st_str_end_key* y)
547 {
548 UChar *p, *q;
549 int c;
550
551 if ((x->end - x->s) != (y->end - y->s))
552 return 1;
553
554 p = x->s;
555 q = y->s;
556 while (p < x->end) {
557 c = (int )*p - (int )*q;
558 if (c != 0) return c;
559
560 p++; q++;
561 }
562
563 return 0;
564 }
565
566 static int
str_end_hash(st_str_end_key * x)567 str_end_hash(st_str_end_key* x)
568 {
569 UChar *p;
570 unsigned val = 0;
571
572 p = x->s;
573 while (p < x->end) {
574 val = val * 997 + (unsigned )*p++;
575 }
576
577 return (int) (val + (val >> 5));
578 }
579
580 extern hash_table_type
onig_st_init_strend_table_with_size(int size)581 onig_st_init_strend_table_with_size(int size)
582 {
583 static struct st_hash_type hashType = {
584 str_end_cmp,
585 str_end_hash,
586 };
587
588 return (hash_table_type )onig_st_init_table_with_size(&hashType, size);
589 }
590
591 extern int
onig_st_lookup_strend(hash_table_type table,const UChar * str_key,const UChar * end_key,hash_data_type * value)592 onig_st_lookup_strend(hash_table_type table, const UChar* str_key,
593 const UChar* end_key, hash_data_type *value)
594 {
595 st_str_end_key key;
596
597 key.s = (UChar* )str_key;
598 key.end = (UChar* )end_key;
599
600 return onig_st_lookup(table, (st_data_t )(&key), value);
601 }
602
603 extern int
onig_st_insert_strend(hash_table_type table,const UChar * str_key,const UChar * end_key,hash_data_type value)604 onig_st_insert_strend(hash_table_type table, const UChar* str_key,
605 const UChar* end_key, hash_data_type value)
606 {
607 st_str_end_key* key;
608 int result;
609
610 key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
611 CHECK_NULL_RETURN_MEMERR(key);
612
613 key->s = (UChar* )str_key;
614 key->end = (UChar* )end_key;
615 result = onig_st_insert(table, (st_data_t )key, value);
616 if (result) {
617 xfree(key);
618 }
619 return result;
620 }
621
622
623 #ifdef USE_CALLOUT
624
625 typedef struct {
626 OnigEncoding enc;
627 int type; /* callout type: single or not */
628 UChar* s;
629 UChar* end;
630 } st_callout_name_key;
631
632 static int
callout_name_table_cmp(st_callout_name_key * x,st_callout_name_key * y)633 callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)
634 {
635 UChar *p, *q;
636 int c;
637
638 if (x->enc != y->enc) return 1;
639 if (x->type != y->type) return 1;
640 if ((x->end - x->s) != (y->end - y->s))
641 return 1;
642
643 p = x->s;
644 q = y->s;
645 while (p < x->end) {
646 c = (int )*p - (int )*q;
647 if (c != 0) return c;
648
649 p++; q++;
650 }
651
652 return 0;
653 }
654
655 static int
callout_name_table_hash(st_callout_name_key * x)656 callout_name_table_hash(st_callout_name_key* x)
657 {
658 UChar *p;
659 unsigned int val = 0;
660
661 p = x->s;
662 while (p < x->end) {
663 val = val * 997 + (unsigned int )*p++;
664 }
665
666 /* use intptr_t for escape warning in Windows */
667 return (int )(val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type);
668 }
669
670 extern hash_table_type
onig_st_init_callout_name_table_with_size(int size)671 onig_st_init_callout_name_table_with_size(int size)
672 {
673 static struct st_hash_type hashType = {
674 callout_name_table_cmp,
675 callout_name_table_hash,
676 };
677
678 return (hash_table_type )onig_st_init_table_with_size(&hashType, size);
679 }
680
681 extern int
onig_st_lookup_callout_name_table(hash_table_type table,OnigEncoding enc,int type,const UChar * str_key,const UChar * end_key,hash_data_type * value)682 onig_st_lookup_callout_name_table(hash_table_type table,
683 OnigEncoding enc,
684 int type,
685 const UChar* str_key,
686 const UChar* end_key,
687 hash_data_type *value)
688 {
689 st_callout_name_key key;
690
691 key.enc = enc;
692 key.type = type;
693 key.s = (UChar* )str_key;
694 key.end = (UChar* )end_key;
695
696 return onig_st_lookup(table, (st_data_t )(&key), value);
697 }
698
699 static int
st_insert_callout_name_table(hash_table_type table,OnigEncoding enc,int type,UChar * str_key,UChar * end_key,hash_data_type value)700 st_insert_callout_name_table(hash_table_type table,
701 OnigEncoding enc, int type,
702 UChar* str_key, UChar* end_key,
703 hash_data_type value)
704 {
705 st_callout_name_key* key;
706 int result;
707
708 key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));
709 CHECK_NULL_RETURN_MEMERR(key);
710
711 /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */
712 key->enc = enc;
713 key->type = type;
714 key->s = str_key;
715 key->end = end_key;
716 result = onig_st_insert(table, (st_data_t )key, value);
717 if (result) {
718 xfree(key);
719 }
720 return result;
721 }
722 #endif
723
724 #endif /* USE_ST_LIBRARY */
725
726
727 #define INIT_NAME_BACKREFS_ALLOC_NUM 8
728
729 typedef struct {
730 UChar* name;
731 int name_len; /* byte length */
732 int back_num; /* number of backrefs */
733 int back_alloc;
734 int back_ref1;
735 int* back_refs;
736 } NameEntry;
737
738 #ifdef USE_ST_LIBRARY
739
740 #define INIT_NAMES_ALLOC_NUM 5
741
742 typedef st_table NameTable;
743 typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
744
745 #define NAMEBUF_SIZE 24
746 #define NAMEBUF_SIZE_1 25
747
748 #ifdef ONIG_DEBUG
749 static int
i_print_name_entry(UChar * key,NameEntry * e,void * arg)750 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
751 {
752 int i;
753 FILE* fp = (FILE* )arg;
754
755 fprintf(fp, "%s: ", e->name);
756 if (e->back_num == 0)
757 fputs("-", fp);
758 else if (e->back_num == 1)
759 fprintf(fp, "%d", e->back_ref1);
760 else {
761 for (i = 0; i < e->back_num; i++) {
762 if (i > 0) fprintf(fp, ", ");
763 fprintf(fp, "%d", e->back_refs[i]);
764 }
765 }
766 fputs("\n", fp);
767 return ST_CONTINUE;
768 }
769
770 extern int
onig_print_names(FILE * fp,regex_t * reg)771 onig_print_names(FILE* fp, regex_t* reg)
772 {
773 NameTable* t = (NameTable* )reg->name_table;
774
775 if (IS_NOT_NULL(t)) {
776 fprintf(fp, "name table\n");
777 onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
778 fputs("\n", fp);
779 }
780 return 0;
781 }
782 #endif /* ONIG_DEBUG */
783
784 static int
i_free_name_entry(UChar * key,NameEntry * e,void * arg ARG_UNUSED)785 i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
786 {
787 xfree(e->name);
788 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
789 xfree(key);
790 xfree(e);
791 return ST_DELETE;
792 }
793
794 static int
names_clear(regex_t * reg)795 names_clear(regex_t* reg)
796 {
797 NameTable* t = (NameTable* )reg->name_table;
798
799 if (IS_NOT_NULL(t)) {
800 onig_st_foreach(t, i_free_name_entry, 0);
801 }
802 return 0;
803 }
804
805 extern int
onig_names_free(regex_t * reg)806 onig_names_free(regex_t* reg)
807 {
808 int r;
809 NameTable* t;
810
811 r = names_clear(reg);
812 if (r != 0) return r;
813
814 t = (NameTable* )reg->name_table;
815 if (IS_NOT_NULL(t)) onig_st_free_table(t);
816 reg->name_table = (void* )NULL;
817 return 0;
818 }
819
820 static NameEntry*
name_find(regex_t * reg,const UChar * name,const UChar * name_end)821 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
822 {
823 NameEntry* e;
824 NameTable* t = (NameTable* )reg->name_table;
825
826 e = (NameEntry* )NULL;
827 if (IS_NOT_NULL(t)) {
828 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
829 }
830 return e;
831 }
832
833 typedef struct {
834 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
835 regex_t* reg;
836 void* arg;
837 int ret;
838 OnigEncoding enc;
839 } INamesArg;
840
841 static int
i_names(UChar * key ARG_UNUSED,NameEntry * e,INamesArg * arg)842 i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
843 {
844 int r = (*(arg->func))(e->name,
845 e->name + e->name_len,
846 e->back_num,
847 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
848 arg->reg, arg->arg);
849 if (r != 0) {
850 arg->ret = r;
851 return ST_STOP;
852 }
853 return ST_CONTINUE;
854 }
855
856 extern int
onig_foreach_name(regex_t * reg,int (* func)(const UChar *,const UChar *,int,int *,regex_t *,void *),void * arg)857 onig_foreach_name(regex_t* reg,
858 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
859 {
860 INamesArg narg;
861 NameTable* t = (NameTable* )reg->name_table;
862
863 narg.ret = 0;
864 if (IS_NOT_NULL(t)) {
865 narg.func = func;
866 narg.reg = reg;
867 narg.arg = arg;
868 narg.enc = reg->enc; /* should be pattern encoding. */
869 onig_st_foreach(t, i_names, (HashDataType )&narg);
870 }
871 return narg.ret;
872 }
873
874 static int
i_renumber_name(UChar * key ARG_UNUSED,NameEntry * e,GroupNumMap * map)875 i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumMap* map)
876 {
877 int i;
878
879 if (e->back_num > 1) {
880 for (i = 0; i < e->back_num; i++) {
881 e->back_refs[i] = map[e->back_refs[i]].new_val;
882 }
883 }
884 else if (e->back_num == 1) {
885 e->back_ref1 = map[e->back_ref1].new_val;
886 }
887
888 return ST_CONTINUE;
889 }
890
891 extern int
onig_renumber_name_table(regex_t * reg,GroupNumMap * map)892 onig_renumber_name_table(regex_t* reg, GroupNumMap* map)
893 {
894 NameTable* t = (NameTable* )reg->name_table;
895
896 if (IS_NOT_NULL(t)) {
897 onig_st_foreach(t, i_renumber_name, (HashDataType )map);
898 }
899 return 0;
900 }
901
902
903 extern int
onig_number_of_names(regex_t * reg)904 onig_number_of_names(regex_t* reg)
905 {
906 NameTable* t = (NameTable* )reg->name_table;
907
908 if (IS_NOT_NULL(t))
909 return t->num_entries;
910 else
911 return 0;
912 }
913
914 #else /* USE_ST_LIBRARY */
915
916 #define INIT_NAMES_ALLOC_NUM 8
917
918 typedef struct {
919 NameEntry* e;
920 int num;
921 int alloc;
922 } NameTable;
923
924 #ifdef ONIG_DEBUG
925 extern int
onig_print_names(FILE * fp,regex_t * reg)926 onig_print_names(FILE* fp, regex_t* reg)
927 {
928 int i, j;
929 NameEntry* e;
930 NameTable* t = (NameTable* )reg->name_table;
931
932 if (IS_NOT_NULL(t) && t->num > 0) {
933 fprintf(fp, "name table\n");
934 for (i = 0; i < t->num; i++) {
935 e = &(t->e[i]);
936 fprintf(fp, "%s: ", e->name);
937 if (e->back_num == 0) {
938 fputs("-", fp);
939 }
940 else if (e->back_num == 1) {
941 fprintf(fp, "%d", e->back_ref1);
942 }
943 else {
944 for (j = 0; j < e->back_num; j++) {
945 if (j > 0) fprintf(fp, ", ");
946 fprintf(fp, "%d", e->back_refs[j]);
947 }
948 }
949 fputs("\n", fp);
950 }
951 fputs("\n", fp);
952 }
953 return 0;
954 }
955 #endif
956
957 static int
names_clear(regex_t * reg)958 names_clear(regex_t* reg)
959 {
960 int i;
961 NameEntry* e;
962 NameTable* t = (NameTable* )reg->name_table;
963
964 if (IS_NOT_NULL(t)) {
965 for (i = 0; i < t->num; i++) {
966 e = &(t->e[i]);
967 if (IS_NOT_NULL(e->name)) {
968 xfree(e->name);
969 e->name = NULL;
970 e->name_len = 0;
971 e->back_num = 0;
972 e->back_alloc = 0;
973 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
974 e->back_refs = (int* )NULL;
975 }
976 }
977 if (IS_NOT_NULL(t->e)) {
978 xfree(t->e);
979 t->e = NULL;
980 }
981 t->num = 0;
982 }
983 return 0;
984 }
985
986 extern int
onig_names_free(regex_t * reg)987 onig_names_free(regex_t* reg)
988 {
989 int r;
990 NameTable* t;
991
992 r = names_clear(reg);
993 if (r != 0) return r;
994
995 t = (NameTable* )reg->name_table;
996 if (IS_NOT_NULL(t)) xfree(t);
997 reg->name_table = NULL;
998 return 0;
999 }
1000
1001 static NameEntry*
name_find(regex_t * reg,UChar * name,UChar * name_end)1002 name_find(regex_t* reg, UChar* name, UChar* name_end)
1003 {
1004 int i, len;
1005 NameEntry* e;
1006 NameTable* t = (NameTable* )reg->name_table;
1007
1008 if (IS_NOT_NULL(t)) {
1009 len = name_end - name;
1010 for (i = 0; i < t->num; i++) {
1011 e = &(t->e[i]);
1012 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1013 return e;
1014 }
1015 }
1016 return (NameEntry* )NULL;
1017 }
1018
1019 extern int
onig_foreach_name(regex_t * reg,int (* func)(const UChar *,const UChar *,int,int *,regex_t *,void *),void * arg)1020 onig_foreach_name(regex_t* reg,
1021 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
1022 {
1023 int i, r;
1024 NameEntry* e;
1025 NameTable* t = (NameTable* )reg->name_table;
1026
1027 if (IS_NOT_NULL(t)) {
1028 for (i = 0; i < t->num; i++) {
1029 e = &(t->e[i]);
1030 r = (*func)(e->name, e->name + e->name_len, e->back_num,
1031 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
1032 reg, arg);
1033 if (r != 0) return r;
1034 }
1035 }
1036 return 0;
1037 }
1038
1039 extern int
onig_number_of_names(regex_t * reg)1040 onig_number_of_names(regex_t* reg)
1041 {
1042 NameTable* t = (NameTable* )reg->name_table;
1043
1044 if (IS_NOT_NULL(t))
1045 return t->num;
1046 else
1047 return 0;
1048 }
1049
1050 #endif /* else USE_ST_LIBRARY */
1051
1052 static int
name_add(regex_t * reg,UChar * name,UChar * name_end,int backref,ParseEnv * env)1053 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ParseEnv* env)
1054 {
1055 int r;
1056 int alloc;
1057 NameEntry* e;
1058 NameTable* t = (NameTable* )reg->name_table;
1059
1060 if (name_end - name <= 0)
1061 return ONIGERR_EMPTY_GROUP_NAME;
1062
1063 e = name_find(reg, name, name_end);
1064 if (IS_NULL(e)) {
1065 #ifdef USE_ST_LIBRARY
1066 if (IS_NULL(t)) {
1067 t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);
1068 CHECK_NULL_RETURN_MEMERR(t);
1069 reg->name_table = (void* )t;
1070 }
1071 e = (NameEntry* )xmalloc(sizeof(NameEntry));
1072 CHECK_NULL_RETURN_MEMERR(e);
1073
1074 e->name = onigenc_strdup(reg->enc, name, name_end);
1075 if (IS_NULL(e->name)) {
1076 xfree(e); return ONIGERR_MEMORY;
1077 }
1078 r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
1079 (HashDataType )e);
1080 if (r < 0) return r;
1081
1082 e->name_len = (int )(name_end - name);
1083 e->back_num = 0;
1084 e->back_alloc = 0;
1085 e->back_refs = (int* )NULL;
1086
1087 #else
1088
1089 if (IS_NULL(t)) {
1090 alloc = INIT_NAMES_ALLOC_NUM;
1091 t = (NameTable* )xmalloc(sizeof(NameTable));
1092 CHECK_NULL_RETURN_MEMERR(t);
1093 t->e = NULL;
1094 t->alloc = 0;
1095 t->num = 0;
1096
1097 t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
1098 if (IS_NULL(t->e)) {
1099 xfree(t);
1100 return ONIGERR_MEMORY;
1101 }
1102 t->alloc = alloc;
1103 reg->name_table = t;
1104 goto clear;
1105 }
1106 else if (t->num == t->alloc) {
1107 int i;
1108
1109 alloc = t->alloc * 2;
1110 t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
1111 CHECK_NULL_RETURN_MEMERR(t->e);
1112 t->alloc = alloc;
1113
1114 clear:
1115 for (i = t->num; i < t->alloc; i++) {
1116 t->e[i].name = NULL;
1117 t->e[i].name_len = 0;
1118 t->e[i].back_num = 0;
1119 t->e[i].back_alloc = 0;
1120 t->e[i].back_refs = (int* )NULL;
1121 }
1122 }
1123 e = &(t->e[t->num]);
1124 t->num++;
1125 e->name = onigenc_strdup(reg->enc, name, name_end);
1126 if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1127 e->name_len = name_end - name;
1128 #endif
1129 }
1130
1131 if (e->back_num >= 1 &&
1132 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
1133 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
1134 name, name_end);
1135 return ONIGERR_MULTIPLEX_DEFINED_NAME;
1136 }
1137
1138 e->back_num++;
1139 if (e->back_num == 1) {
1140 e->back_ref1 = backref;
1141 }
1142 else {
1143 if (e->back_num == 2) {
1144 alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
1145 e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
1146 CHECK_NULL_RETURN_MEMERR(e->back_refs);
1147 e->back_alloc = alloc;
1148 e->back_refs[0] = e->back_ref1;
1149 e->back_refs[1] = backref;
1150 }
1151 else {
1152 if (e->back_num > e->back_alloc) {
1153 alloc = e->back_alloc * 2;
1154 e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
1155 CHECK_NULL_RETURN_MEMERR(e->back_refs);
1156 e->back_alloc = alloc;
1157 }
1158 e->back_refs[e->back_num - 1] = backref;
1159 }
1160 }
1161
1162 return 0;
1163 }
1164
1165 extern int
onig_name_to_group_numbers(regex_t * reg,const UChar * name,const UChar * name_end,int ** nums)1166 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
1167 const UChar* name_end, int** nums)
1168 {
1169 NameEntry* e = name_find(reg, name, name_end);
1170
1171 if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
1172
1173 switch (e->back_num) {
1174 case 0:
1175 break;
1176 case 1:
1177 *nums = &(e->back_ref1);
1178 break;
1179 default:
1180 *nums = e->back_refs;
1181 break;
1182 }
1183 return e->back_num;
1184 }
1185
1186 static int
name_to_group_numbers(ParseEnv * env,const UChar * name,const UChar * name_end,int ** nums)1187 name_to_group_numbers(ParseEnv* env, const UChar* name, const UChar* name_end,
1188 int** nums)
1189 {
1190 regex_t* reg;
1191 NameEntry* e;
1192
1193 reg = env->reg;
1194 e = name_find(reg, name, name_end);
1195
1196 if (IS_NULL(e)) {
1197 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
1198 (UChar* )name, (UChar* )name_end);
1199 return ONIGERR_UNDEFINED_NAME_REFERENCE;
1200 }
1201
1202 switch (e->back_num) {
1203 case 0:
1204 break;
1205 case 1:
1206 *nums = &(e->back_ref1);
1207 break;
1208 default:
1209 *nums = e->back_refs;
1210 break;
1211 }
1212 return e->back_num;
1213 }
1214
1215 extern int
onig_name_to_backref_number(regex_t * reg,const UChar * name,const UChar * name_end,OnigRegion * region)1216 onig_name_to_backref_number(regex_t* reg, const UChar* name,
1217 const UChar* name_end, OnigRegion *region)
1218 {
1219 int i, n, *nums;
1220
1221 n = onig_name_to_group_numbers(reg, name, name_end, &nums);
1222 if (n < 0)
1223 return n;
1224 else if (n == 0)
1225 return ONIGERR_PARSER_BUG;
1226 else if (n == 1)
1227 return nums[0];
1228 else {
1229 if (IS_NOT_NULL(region)) {
1230 for (i = n - 1; i >= 0; i--) {
1231 if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
1232 return nums[i];
1233 }
1234 }
1235 return nums[n - 1];
1236 }
1237 }
1238
1239 extern int
onig_noname_group_capture_is_active(regex_t * reg)1240 onig_noname_group_capture_is_active(regex_t* reg)
1241 {
1242 if (OPTON_DONT_CAPTURE_GROUP(reg->options))
1243 return 0;
1244
1245 if (onig_number_of_names(reg) > 0 &&
1246 IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
1247 ! OPTON_CAPTURE_GROUP(reg->options)) {
1248 return 0;
1249 }
1250
1251 return 1;
1252 }
1253
1254 #ifdef USE_CALLOUT
1255
1256 typedef struct {
1257 OnigCalloutType type;
1258 int in;
1259 OnigCalloutFunc start_func;
1260 OnigCalloutFunc end_func;
1261 int arg_num;
1262 int opt_arg_num;
1263 unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];
1264 OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];
1265 UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */
1266 } CalloutNameListEntry;
1267
1268 typedef struct {
1269 int n;
1270 int alloc;
1271 CalloutNameListEntry* v;
1272 } CalloutNameListType;
1273
1274 static CalloutNameListType* GlobalCalloutNameList;
1275
1276 static int
make_callout_func_list(CalloutNameListType ** rs,int init_size)1277 make_callout_func_list(CalloutNameListType** rs, int init_size)
1278 {
1279 CalloutNameListType* s;
1280 CalloutNameListEntry* v;
1281
1282 *rs = 0;
1283
1284 s = xmalloc(sizeof(*s));
1285 if (IS_NULL(s)) return ONIGERR_MEMORY;
1286
1287 v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);
1288 if (IS_NULL(v)) {
1289 xfree(s);
1290 return ONIGERR_MEMORY;
1291 }
1292
1293 s->n = 0;
1294 s->alloc = init_size;
1295 s->v = v;
1296
1297 *rs = s;
1298 return ONIG_NORMAL;
1299 }
1300
1301 static void
free_callout_func_list(CalloutNameListType * s)1302 free_callout_func_list(CalloutNameListType* s)
1303 {
1304 if (IS_NOT_NULL(s)) {
1305 if (IS_NOT_NULL(s->v)) {
1306 int i, j;
1307
1308 for (i = 0; i < s->n; i++) {
1309 CalloutNameListEntry* e = s->v + i;
1310 for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {
1311 if (e->arg_types[j] == ONIG_TYPE_STRING) {
1312 UChar* p = e->opt_defaults[j].s.start;
1313 if (IS_NOT_NULL(p)) xfree(p);
1314 }
1315 }
1316 }
1317 xfree(s->v);
1318 }
1319 xfree(s);
1320 }
1321 }
1322
1323 static int
callout_func_list_add(CalloutNameListType * s,int * rid)1324 callout_func_list_add(CalloutNameListType* s, int* rid)
1325 {
1326 if (s->n >= s->alloc) {
1327 int new_size = s->alloc * 2;
1328 CalloutNameListEntry* nv = (CalloutNameListEntry* )
1329 xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size);
1330 if (IS_NULL(nv)) return ONIGERR_MEMORY;
1331
1332 s->alloc = new_size;
1333 s->v = nv;
1334 }
1335
1336 *rid = s->n;
1337
1338 xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));
1339 s->n++;
1340 return ONIG_NORMAL;
1341 }
1342
1343
1344 typedef struct {
1345 UChar* name;
1346 int name_len; /* byte length */
1347 int id;
1348 } CalloutNameEntry;
1349
1350 #ifdef USE_ST_LIBRARY
1351 typedef st_table CalloutNameTable;
1352 #else
1353 typedef struct {
1354 CalloutNameEntry* e;
1355 int num;
1356 int alloc;
1357 } CalloutNameTable;
1358 #endif
1359
1360 static CalloutNameTable* GlobalCalloutNameTable;
1361 static int CalloutNameIDCounter;
1362
1363 #ifdef USE_ST_LIBRARY
1364
1365 static int
i_free_callout_name_entry(st_callout_name_key * key,CalloutNameEntry * e,void * arg ARG_UNUSED)1366 i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,
1367 void* arg ARG_UNUSED)
1368 {
1369 if (IS_NOT_NULL(e)) {
1370 xfree(e->name);
1371 }
1372 /*xfree(key->s); */ /* is same as e->name */
1373 xfree(key);
1374 xfree(e);
1375 return ST_DELETE;
1376 }
1377
1378 static int
callout_name_table_clear(CalloutNameTable * t)1379 callout_name_table_clear(CalloutNameTable* t)
1380 {
1381 if (IS_NOT_NULL(t)) {
1382 onig_st_foreach(t, i_free_callout_name_entry, 0);
1383 }
1384 return 0;
1385 }
1386
1387 static int
global_callout_name_table_free(void)1388 global_callout_name_table_free(void)
1389 {
1390 if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1391 int r = callout_name_table_clear(GlobalCalloutNameTable);
1392 if (r != 0) return r;
1393
1394 onig_st_free_table(GlobalCalloutNameTable);
1395 GlobalCalloutNameTable = 0;
1396 CalloutNameIDCounter = 0;
1397 }
1398
1399 return 0;
1400 }
1401
1402 static CalloutNameEntry*
callout_name_find(OnigEncoding enc,int is_not_single,const UChar * name,const UChar * name_end)1403 callout_name_find(OnigEncoding enc, int is_not_single,
1404 const UChar* name, const UChar* name_end)
1405 {
1406 int r;
1407 CalloutNameEntry* e;
1408 CalloutNameTable* t = GlobalCalloutNameTable;
1409
1410 e = (CalloutNameEntry* )NULL;
1411 if (IS_NOT_NULL(t)) {
1412 r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1413 (HashDataType* )((void* )(&e)));
1414 if (r == 0) { /* not found */
1415 if (enc != ONIG_ENCODING_ASCII &&
1416 ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
1417 enc = ONIG_ENCODING_ASCII;
1418 onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1419 (HashDataType* )((void* )(&e)));
1420 }
1421 }
1422 }
1423 return e;
1424 }
1425
1426 #else
1427
1428 static int
callout_name_table_clear(CalloutNameTable * t)1429 callout_name_table_clear(CalloutNameTable* t)
1430 {
1431 int i;
1432 CalloutNameEntry* e;
1433
1434 if (IS_NOT_NULL(t)) {
1435 for (i = 0; i < t->num; i++) {
1436 e = &(t->e[i]);
1437 if (IS_NOT_NULL(e->name)) {
1438 xfree(e->name);
1439 e->name = NULL;
1440 e->name_len = 0;
1441 e->id = 0;
1442 e->func = 0;
1443 }
1444 }
1445 if (IS_NOT_NULL(t->e)) {
1446 xfree(t->e);
1447 t->e = NULL;
1448 }
1449 t->num = 0;
1450 }
1451 return 0;
1452 }
1453
1454 static int
global_callout_name_table_free(void)1455 global_callout_name_table_free(void)
1456 {
1457 if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1458 int r = callout_name_table_clear(GlobalCalloutNameTable);
1459 if (r != 0) return r;
1460
1461 xfree(GlobalCalloutNameTable);
1462 GlobalCalloutNameTable = 0;
1463 CalloutNameIDCounter = 0;
1464 }
1465 return 0;
1466 }
1467
1468 static CalloutNameEntry*
callout_name_find(UChar * name,UChar * name_end)1469 callout_name_find(UChar* name, UChar* name_end)
1470 {
1471 int i, len;
1472 CalloutNameEntry* e;
1473 CalloutNameTable* t = Calloutnames;
1474
1475 if (IS_NOT_NULL(t)) {
1476 len = name_end - name;
1477 for (i = 0; i < t->num; i++) {
1478 e = &(t->e[i]);
1479 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1480 return e;
1481 }
1482 }
1483 return (CalloutNameEntry* )NULL;
1484 }
1485
1486 #endif
1487
1488 /* name string must be single byte char string. */
1489 static int
callout_name_entry(CalloutNameEntry ** rentry,OnigEncoding enc,int is_not_single,UChar * name,UChar * name_end)1490 callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,
1491 int is_not_single, UChar* name, UChar* name_end)
1492 {
1493 int r;
1494 CalloutNameEntry* e;
1495 CalloutNameTable* t = GlobalCalloutNameTable;
1496
1497 *rentry = 0;
1498 if (name_end - name <= 0)
1499 return ONIGERR_INVALID_CALLOUT_NAME;
1500
1501 e = callout_name_find(enc, is_not_single, name, name_end);
1502 if (IS_NULL(e)) {
1503 #ifdef USE_ST_LIBRARY
1504 if (IS_NULL(t)) {
1505 t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);
1506 CHECK_NULL_RETURN_MEMERR(t);
1507 GlobalCalloutNameTable = t;
1508 }
1509 e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));
1510 CHECK_NULL_RETURN_MEMERR(e);
1511
1512 e->name = onigenc_strdup(enc, name, name_end);
1513 if (IS_NULL(e->name)) {
1514 xfree(e); return ONIGERR_MEMORY;
1515 }
1516
1517 r = st_insert_callout_name_table(t, enc, is_not_single,
1518 e->name, (e->name + (name_end - name)),
1519 (HashDataType )e);
1520 if (r < 0) return r;
1521
1522 #else
1523
1524 int alloc;
1525
1526 if (IS_NULL(t)) {
1527 alloc = INIT_NAMES_ALLOC_NUM;
1528 t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));
1529 CHECK_NULL_RETURN_MEMERR(t);
1530 t->e = NULL;
1531 t->alloc = 0;
1532 t->num = 0;
1533
1534 t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);
1535 if (IS_NULL(t->e)) {
1536 xfree(t);
1537 return ONIGERR_MEMORY;
1538 }
1539 t->alloc = alloc;
1540 GlobalCalloutNameTable = t;
1541 goto clear;
1542 }
1543 else if (t->num == t->alloc) {
1544 int i;
1545
1546 alloc = t->alloc * 2;
1547 t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc);
1548 CHECK_NULL_RETURN_MEMERR(t->e);
1549 t->alloc = alloc;
1550
1551 clear:
1552 for (i = t->num; i < t->alloc; i++) {
1553 t->e[i].name = NULL;
1554 t->e[i].name_len = 0;
1555 t->e[i].id = 0;
1556 }
1557 }
1558 e = &(t->e[t->num]);
1559 t->num++;
1560 e->name = onigenc_strdup(enc, name, name_end);
1561 if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1562 #endif
1563
1564 CalloutNameIDCounter++;
1565 e->id = CalloutNameIDCounter;
1566 e->name_len = (int )(name_end - name);
1567 }
1568
1569 *rentry = e;
1570 return e->id;
1571 }
1572
1573 static int
is_allowed_callout_name(OnigEncoding enc,UChar * name,UChar * name_end)1574 is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)
1575 {
1576 UChar* p;
1577 OnigCodePoint c;
1578
1579 if (name >= name_end) return 0;
1580
1581 p = name;
1582 while (p < name_end) {
1583 c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1584 if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))
1585 return 0;
1586
1587 if (p == name) {
1588 if (c >= '0' && c <= '9') return 0;
1589 }
1590
1591 p += ONIGENC_MBC_ENC_LEN(enc, p);
1592 }
1593
1594 return 1;
1595 }
1596
1597 static int
is_allowed_callout_tag_name(OnigEncoding enc,UChar * name,UChar * name_end)1598 is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)
1599 {
1600 UChar* p;
1601 OnigCodePoint c;
1602
1603 if (name >= name_end) return 0;
1604
1605 p = name;
1606 while (p < name_end) {
1607 c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1608 if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))
1609 return 0;
1610
1611 if (p == name) {
1612 if (c >= '0' && c <= '9') return 0;
1613 }
1614
1615 p += ONIGENC_MBC_ENC_LEN(enc, p);
1616 }
1617
1618 return 1;
1619 }
1620
1621 extern int
onig_set_callout_of_name(OnigEncoding enc,OnigCalloutType callout_type,UChar * name,UChar * name_end,int in,OnigCalloutFunc start_func,OnigCalloutFunc end_func,int arg_num,unsigned int arg_types[],int opt_arg_num,OnigValue opt_defaults[])1622 onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
1623 UChar* name, UChar* name_end, int in,
1624 OnigCalloutFunc start_func,
1625 OnigCalloutFunc end_func,
1626 int arg_num, unsigned int arg_types[],
1627 int opt_arg_num, OnigValue opt_defaults[])
1628 {
1629 int r;
1630 int i;
1631 int j;
1632 int id;
1633 int is_not_single;
1634 CalloutNameEntry* e;
1635 CalloutNameListEntry* fe;
1636
1637 if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)
1638 return ONIGERR_INVALID_ARGUMENT;
1639
1640 if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)
1641 return ONIGERR_INVALID_CALLOUT_ARG;
1642
1643 if (opt_arg_num < 0 || opt_arg_num > arg_num)
1644 return ONIGERR_INVALID_CALLOUT_ARG;
1645
1646 if (start_func == 0 && end_func == 0)
1647 return ONIGERR_INVALID_CALLOUT_ARG;
1648
1649 if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)
1650 return ONIGERR_INVALID_CALLOUT_ARG;
1651
1652 for (i = 0; i < arg_num; i++) {
1653 unsigned int t = arg_types[i];
1654 if (t == ONIG_TYPE_VOID)
1655 return ONIGERR_INVALID_CALLOUT_ARG;
1656 else {
1657 if (i >= arg_num - opt_arg_num) {
1658 if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&
1659 t != ONIG_TYPE_TAG)
1660 return ONIGERR_INVALID_CALLOUT_ARG;
1661 }
1662 else {
1663 if (t != ONIG_TYPE_LONG) {
1664 t = t & ~ONIG_TYPE_LONG;
1665 if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)
1666 return ONIGERR_INVALID_CALLOUT_ARG;
1667 }
1668 }
1669 }
1670 }
1671
1672 if (! is_allowed_callout_name(enc, name, name_end)) {
1673 return ONIGERR_INVALID_CALLOUT_NAME;
1674 }
1675
1676 is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);
1677 id = callout_name_entry(&e, enc, is_not_single, name, name_end);
1678 if (id < 0) return id;
1679
1680 r = ONIG_NORMAL;
1681 if (IS_NULL(GlobalCalloutNameList)) {
1682 r = make_callout_func_list(&GlobalCalloutNameList, 10);
1683 if (r != ONIG_NORMAL) return r;
1684 }
1685
1686 while (id >= GlobalCalloutNameList->n) {
1687 int rid;
1688 r = callout_func_list_add(GlobalCalloutNameList, &rid);
1689 if (r != ONIG_NORMAL) return r;
1690 }
1691
1692 fe = GlobalCalloutNameList->v + id;
1693 fe->type = callout_type;
1694 fe->in = in;
1695 fe->start_func = start_func;
1696 fe->end_func = end_func;
1697 fe->arg_num = arg_num;
1698 fe->opt_arg_num = opt_arg_num;
1699 fe->name = e->name;
1700
1701 for (i = 0; i < arg_num; i++) {
1702 fe->arg_types[i] = arg_types[i];
1703 }
1704 for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
1705 if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;
1706 if (fe->arg_types[i] == ONIG_TYPE_STRING) {
1707 OnigValue* val;
1708 UChar* ds;
1709
1710 val = opt_defaults + j;
1711 ds = onigenc_strdup(enc, val->s.start, val->s.end);
1712 CHECK_NULL_RETURN_MEMERR(ds);
1713
1714 fe->opt_defaults[i].s.start = ds;
1715 fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start);
1716 }
1717 else {
1718 fe->opt_defaults[i] = opt_defaults[j];
1719 }
1720 }
1721
1722 r = id;
1723 return r;
1724 }
1725
1726 static int
get_callout_name_id_by_name(OnigEncoding enc,int is_not_single,UChar * name,UChar * name_end,int * rid)1727 get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,
1728 UChar* name, UChar* name_end, int* rid)
1729 {
1730 int r;
1731 CalloutNameEntry* e;
1732
1733 if (! is_allowed_callout_name(enc, name, name_end)) {
1734 return ONIGERR_INVALID_CALLOUT_NAME;
1735 }
1736
1737 e = callout_name_find(enc, is_not_single, name, name_end);
1738 if (IS_NULL(e)) {
1739 return ONIGERR_UNDEFINED_CALLOUT_NAME;
1740 }
1741
1742 r = ONIG_NORMAL;
1743 *rid = e->id;
1744
1745 return r;
1746 }
1747
1748 extern OnigCalloutFunc
onig_get_callout_start_func(regex_t * reg,int callout_num)1749 onig_get_callout_start_func(regex_t* reg, int callout_num)
1750 {
1751 /* If used for callouts of contents, return 0. */
1752 CalloutListEntry* e;
1753
1754 e = onig_reg_callout_list_at(reg, callout_num);
1755 CHECK_NULL_RETURN(e);
1756 return e->start_func;
1757 }
1758
1759 extern const UChar*
onig_get_callout_tag_start(regex_t * reg,int callout_num)1760 onig_get_callout_tag_start(regex_t* reg, int callout_num)
1761 {
1762 CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1763 CHECK_NULL_RETURN(e);
1764 return e->tag_start;
1765 }
1766
1767 extern const UChar*
onig_get_callout_tag_end(regex_t * reg,int callout_num)1768 onig_get_callout_tag_end(regex_t* reg, int callout_num)
1769 {
1770 CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1771 CHECK_NULL_RETURN(e);
1772 return e->tag_end;
1773 }
1774
1775
1776 extern OnigCalloutType
onig_get_callout_type_by_name_id(int name_id)1777 onig_get_callout_type_by_name_id(int name_id)
1778 {
1779 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1780 return 0;
1781
1782 return GlobalCalloutNameList->v[name_id].type;
1783 }
1784
1785 extern OnigCalloutFunc
onig_get_callout_start_func_by_name_id(int name_id)1786 onig_get_callout_start_func_by_name_id(int name_id)
1787 {
1788 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1789 return 0;
1790
1791 return GlobalCalloutNameList->v[name_id].start_func;
1792 }
1793
1794 extern OnigCalloutFunc
onig_get_callout_end_func_by_name_id(int name_id)1795 onig_get_callout_end_func_by_name_id(int name_id)
1796 {
1797 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1798 return 0;
1799
1800 return GlobalCalloutNameList->v[name_id].end_func;
1801 }
1802
1803 extern int
onig_get_callout_in_by_name_id(int name_id)1804 onig_get_callout_in_by_name_id(int name_id)
1805 {
1806 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1807 return 0;
1808
1809 return GlobalCalloutNameList->v[name_id].in;
1810 }
1811
1812 static int
get_callout_arg_num_by_name_id(int name_id)1813 get_callout_arg_num_by_name_id(int name_id)
1814 {
1815 return GlobalCalloutNameList->v[name_id].arg_num;
1816 }
1817
1818 static int
get_callout_opt_arg_num_by_name_id(int name_id)1819 get_callout_opt_arg_num_by_name_id(int name_id)
1820 {
1821 return GlobalCalloutNameList->v[name_id].opt_arg_num;
1822 }
1823
1824 static unsigned int
get_callout_arg_type_by_name_id(int name_id,int index)1825 get_callout_arg_type_by_name_id(int name_id, int index)
1826 {
1827 return GlobalCalloutNameList->v[name_id].arg_types[index];
1828 }
1829
1830 static OnigValue
get_callout_opt_default_by_name_id(int name_id,int index)1831 get_callout_opt_default_by_name_id(int name_id, int index)
1832 {
1833 return GlobalCalloutNameList->v[name_id].opt_defaults[index];
1834 }
1835
1836 extern UChar*
onig_get_callout_name_by_name_id(int name_id)1837 onig_get_callout_name_by_name_id(int name_id)
1838 {
1839 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1840 return 0;
1841
1842 return GlobalCalloutNameList->v[name_id].name;
1843 }
1844
1845 extern int
onig_global_callout_names_free(void)1846 onig_global_callout_names_free(void)
1847 {
1848 free_callout_func_list(GlobalCalloutNameList);
1849 GlobalCalloutNameList = 0;
1850
1851 global_callout_name_table_free();
1852 return ONIG_NORMAL;
1853 }
1854
1855
1856 typedef st_table CalloutTagTable;
1857 typedef intptr_t CalloutTagVal;
1858
1859 #define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0)
1860
1861 static int
i_callout_callout_list_set(UChar * key,CalloutTagVal e,void * arg)1862 i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)
1863 {
1864 int num;
1865 RegexExt* ext = (RegexExt* )arg;
1866
1867 num = (int )e - 1;
1868 ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;
1869 return ST_CONTINUE;
1870 }
1871
1872 static int
setup_ext_callout_list_values(regex_t * reg)1873 setup_ext_callout_list_values(regex_t* reg)
1874 {
1875 int i, j;
1876 RegexExt* ext;
1877
1878 ext = reg->extp;
1879 if (IS_NOT_NULL(ext->tag_table)) {
1880 onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,
1881 (st_data_t )ext);
1882 }
1883
1884 for (i = 0; i < ext->callout_num; i++) {
1885 CalloutListEntry* e = ext->callout_list + i;
1886 if (e->of == ONIG_CALLOUT_OF_NAME) {
1887 for (j = 0; j < e->u.arg.num; j++) {
1888 if (e->u.arg.types[j] == ONIG_TYPE_TAG) {
1889 UChar* start;
1890 UChar* end;
1891 int num;
1892 start = e->u.arg.vals[j].s.start;
1893 end = e->u.arg.vals[j].s.end;
1894 num = onig_get_callout_num_by_tag(reg, start, end);
1895 if (num < 0) return num;
1896 e->u.arg.vals[j].tag = num;
1897 }
1898 }
1899 }
1900 }
1901
1902 return ONIG_NORMAL;
1903 }
1904
1905 extern int
onig_callout_tag_is_exist_at_callout_num(regex_t * reg,int callout_num)1906 onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)
1907 {
1908 RegexExt* ext = reg->extp;
1909
1910 if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;
1911 if (callout_num > ext->callout_num) return 0;
1912
1913 return (ext->callout_list[callout_num].flag &
1914 CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0;
1915 }
1916
1917 static int
i_free_callout_tag_entry(UChar * key,CalloutTagVal e,void * arg ARG_UNUSED)1918 i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)
1919 {
1920 xfree(key);
1921 return ST_DELETE;
1922 }
1923
1924 static int
callout_tag_table_clear(CalloutTagTable * t)1925 callout_tag_table_clear(CalloutTagTable* t)
1926 {
1927 if (IS_NOT_NULL(t)) {
1928 onig_st_foreach(t, i_free_callout_tag_entry, 0);
1929 }
1930 return 0;
1931 }
1932
1933 extern int
onig_callout_tag_table_free(void * table)1934 onig_callout_tag_table_free(void* table)
1935 {
1936 CalloutTagTable* t = (CalloutTagTable* )table;
1937
1938 if (IS_NOT_NULL(t)) {
1939 int r = callout_tag_table_clear(t);
1940 if (r != 0) return r;
1941
1942 onig_st_free_table(t);
1943 }
1944
1945 return 0;
1946 }
1947
1948 extern int
onig_get_callout_num_by_tag(regex_t * reg,const UChar * tag,const UChar * tag_end)1949 onig_get_callout_num_by_tag(regex_t* reg,
1950 const UChar* tag, const UChar* tag_end)
1951 {
1952 int r;
1953 RegexExt* ext;
1954 CalloutTagVal e;
1955
1956 ext = reg->extp;
1957 if (IS_NULL(ext) || IS_NULL(ext->tag_table))
1958 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1959
1960 r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,
1961 (HashDataType* )((void* )(&e)));
1962 if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1963 return (int )e;
1964 }
1965
1966 static CalloutTagVal
callout_tag_find(CalloutTagTable * t,const UChar * name,const UChar * name_end)1967 callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)
1968 {
1969 CalloutTagVal e;
1970
1971 e = -1;
1972 if (IS_NOT_NULL(t)) {
1973 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
1974 }
1975 return e;
1976 }
1977
1978 static int
callout_tag_table_new(CalloutTagTable ** rt)1979 callout_tag_table_new(CalloutTagTable** rt)
1980 {
1981 CalloutTagTable* t;
1982
1983 *rt = 0;
1984 t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);
1985 CHECK_NULL_RETURN_MEMERR(t);
1986
1987 *rt = t;
1988 return ONIG_NORMAL;
1989 }
1990
1991 static int
callout_tag_entry_raw(ParseEnv * env,CalloutTagTable * t,UChar * name,UChar * name_end,CalloutTagVal entry_val)1992 callout_tag_entry_raw(ParseEnv* env, CalloutTagTable* t, UChar* name,
1993 UChar* name_end, CalloutTagVal entry_val)
1994 {
1995 int r;
1996 CalloutTagVal val;
1997
1998 if (name_end - name <= 0)
1999 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
2000
2001 val = callout_tag_find(t, name, name_end);
2002 if (val >= 0) {
2003 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
2004 name, name_end);
2005 return ONIGERR_MULTIPLEX_DEFINED_NAME;
2006 }
2007
2008 r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);
2009 if (r < 0) return r;
2010
2011 return ONIG_NORMAL;
2012 }
2013
2014 static int
ext_ensure_tag_table(regex_t * reg)2015 ext_ensure_tag_table(regex_t* reg)
2016 {
2017 int r;
2018 RegexExt* ext;
2019 CalloutTagTable* t;
2020
2021 ext = onig_get_regex_ext(reg);
2022 CHECK_NULL_RETURN_MEMERR(ext);
2023
2024 if (IS_NULL(ext->tag_table)) {
2025 r = callout_tag_table_new(&t);
2026 if (r != ONIG_NORMAL) return r;
2027
2028 ext->tag_table = t;
2029 }
2030
2031 return ONIG_NORMAL;
2032 }
2033
2034 static int
callout_tag_entry(ParseEnv * env,regex_t * reg,UChar * name,UChar * name_end,CalloutTagVal entry_val)2035 callout_tag_entry(ParseEnv* env, regex_t* reg, UChar* name, UChar* name_end,
2036 CalloutTagVal entry_val)
2037 {
2038 int r;
2039 RegexExt* ext;
2040 CalloutListEntry* e;
2041
2042 r = ext_ensure_tag_table(reg);
2043 if (r != ONIG_NORMAL) return r;
2044
2045 ext = onig_get_regex_ext(reg);
2046 CHECK_NULL_RETURN_MEMERR(ext);
2047 r = callout_tag_entry_raw(env, ext->tag_table, name, name_end, entry_val);
2048
2049 e = onig_reg_callout_list_at(reg, (int )entry_val);
2050 CHECK_NULL_RETURN_MEMERR(e);
2051 e->tag_start = name;
2052 e->tag_end = name_end;
2053
2054 return r;
2055 }
2056
2057 #endif /* USE_CALLOUT */
2058
2059
2060 #define INIT_PARSEENV_MEMENV_ALLOC_SIZE 16
2061
2062 static void
scan_env_clear(ParseEnv * env)2063 scan_env_clear(ParseEnv* env)
2064 {
2065 MEM_STATUS_CLEAR(env->cap_history);
2066 MEM_STATUS_CLEAR(env->backtrack_mem);
2067 MEM_STATUS_CLEAR(env->backrefed_mem);
2068 env->error = (UChar* )NULL;
2069 env->error_end = (UChar* )NULL;
2070 env->num_call = 0;
2071
2072 #ifdef USE_CALL
2073 env->unset_addr_list = NULL;
2074 env->has_call_zero = 0;
2075 #endif
2076
2077 env->num_mem = 0;
2078 env->num_named = 0;
2079 env->mem_alloc = 0;
2080 env->mem_env_dynamic = (MemEnv* )NULL;
2081
2082 xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));
2083
2084 env->parse_depth = 0;
2085 #ifdef ONIG_DEBUG_PARSE
2086 env->max_parse_depth = 0;
2087 #endif
2088 env->backref_num = 0;
2089 env->keep_num = 0;
2090 env->id_num = 0;
2091 env->save_alloc_num = 0;
2092 env->saves = 0;
2093 }
2094
2095 static int
scan_env_add_mem_entry(ParseEnv * env)2096 scan_env_add_mem_entry(ParseEnv* env)
2097 {
2098 int i, need, alloc;
2099 MemEnv* p;
2100
2101 need = env->num_mem + 1;
2102 if (need > MaxCaptureNum && MaxCaptureNum != 0)
2103 return ONIGERR_TOO_MANY_CAPTURES;
2104
2105 if (need >= PARSEENV_MEMENV_SIZE) {
2106 if (env->mem_alloc <= need) {
2107 if (IS_NULL(env->mem_env_dynamic)) {
2108 alloc = INIT_PARSEENV_MEMENV_ALLOC_SIZE;
2109 p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);
2110 CHECK_NULL_RETURN_MEMERR(p);
2111 xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));
2112 }
2113 else {
2114 alloc = env->mem_alloc * 2;
2115 p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc);
2116 CHECK_NULL_RETURN_MEMERR(p);
2117 }
2118
2119 for (i = env->num_mem + 1; i < alloc; i++) {
2120 p[i].mem_node = NULL_NODE;
2121 p[i].empty_repeat_node = NULL_NODE;
2122 }
2123
2124 env->mem_env_dynamic = p;
2125 env->mem_alloc = alloc;
2126 }
2127 }
2128
2129 env->num_mem++;
2130 return env->num_mem;
2131 }
2132
2133 static int
scan_env_set_mem_node(ParseEnv * env,int num,Node * node)2134 scan_env_set_mem_node(ParseEnv* env, int num, Node* node)
2135 {
2136 if (env->num_mem >= num)
2137 PARSEENV_MEMENV(env)[num].mem_node = node;
2138 else
2139 return ONIGERR_PARSER_BUG;
2140 return 0;
2141 }
2142
2143 static void
node_free_body(Node * node)2144 node_free_body(Node* node)
2145 {
2146 if (IS_NULL(node)) return ;
2147
2148 switch (NODE_TYPE(node)) {
2149 case NODE_STRING:
2150 if (STR_(node)->capacity != 0 &&
2151 IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
2152 xfree(STR_(node)->s);
2153 }
2154 break;
2155
2156 case NODE_LIST:
2157 case NODE_ALT:
2158 onig_node_free(NODE_CAR(node));
2159 node = NODE_CDR(node);
2160 while (IS_NOT_NULL(node)) {
2161 Node* next = NODE_CDR(node);
2162 onig_node_free(NODE_CAR(node));
2163 xfree(node);
2164 node = next;
2165 }
2166 break;
2167
2168 case NODE_CCLASS:
2169 {
2170 CClassNode* cc = CCLASS_(node);
2171
2172 if (cc->mbuf)
2173 bbuf_free(cc->mbuf);
2174 }
2175 break;
2176
2177 case NODE_BACKREF:
2178 if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))
2179 xfree(BACKREF_(node)->back_dynamic);
2180 break;
2181
2182 case NODE_BAG:
2183 if (NODE_BODY(node))
2184 onig_node_free(NODE_BODY(node));
2185
2186 {
2187 BagNode* en = BAG_(node);
2188 if (en->type == BAG_IF_ELSE) {
2189 onig_node_free(en->te.Then);
2190 onig_node_free(en->te.Else);
2191 }
2192 }
2193 break;
2194
2195 case NODE_QUANT:
2196 if (NODE_BODY(node))
2197 onig_node_free(NODE_BODY(node));
2198 break;
2199
2200 case NODE_ANCHOR:
2201 if (NODE_BODY(node))
2202 onig_node_free(NODE_BODY(node));
2203 if (IS_NOT_NULL(ANCHOR_(node)->lead_node))
2204 onig_node_free(ANCHOR_(node)->lead_node);
2205 break;
2206
2207 case NODE_CTYPE:
2208 case NODE_CALL:
2209 case NODE_GIMMICK:
2210 break;
2211 }
2212 }
2213
2214 extern void
onig_node_free(Node * node)2215 onig_node_free(Node* node)
2216 {
2217 if (IS_NULL(node)) return ;
2218
2219 #ifdef DEBUG_NODE_FREE
2220 fprintf(stderr, "onig_node_free: %p\n", node);
2221 #endif
2222
2223 node_free_body(node);
2224 xfree(node);
2225 }
2226
2227 static void
cons_node_free_alone(Node * node)2228 cons_node_free_alone(Node* node)
2229 {
2230 NODE_CAR(node) = 0;
2231 NODE_CDR(node) = 0;
2232 onig_node_free(node);
2233 }
2234
2235 static Node*
node_new(void)2236 node_new(void)
2237 {
2238 Node* node;
2239
2240 node = (Node* )xmalloc(sizeof(Node));
2241 CHECK_NULL_RETURN(node);
2242 xmemset(node, 0, sizeof(*node));
2243
2244 #ifdef DEBUG_NODE_FREE
2245 fprintf(stderr, "node_new: %p\n", node);
2246 #endif
2247 return node;
2248 }
2249
2250 extern int
onig_node_copy(Node ** rcopy,Node * from)2251 onig_node_copy(Node** rcopy, Node* from)
2252 {
2253 int r;
2254 Node* copy;
2255
2256 *rcopy = NULL_NODE;
2257
2258 switch (NODE_TYPE(from)) {
2259 case NODE_LIST:
2260 case NODE_ALT:
2261 case NODE_ANCHOR:
2262 /* These node's link to other nodes are processed by caller. */
2263 break;
2264 case NODE_STRING:
2265 case NODE_CCLASS:
2266 case NODE_CTYPE:
2267 /* Fixed contents after copy. */
2268 break;
2269 default:
2270 /* Not supported yet. */
2271 return ONIGERR_TYPE_BUG;
2272 break;
2273 }
2274
2275 copy = node_new();
2276 CHECK_NULL_RETURN_MEMERR(copy);
2277 xmemcpy(copy, from, sizeof(*copy));
2278
2279 switch (NODE_TYPE(copy)) {
2280 case NODE_STRING:
2281 r = onig_node_str_set(copy, STR_(from)->s, STR_(from)->end, FALSE);
2282 if (r != 0) {
2283 err:
2284 onig_node_free(copy);
2285 return r;
2286 }
2287 break;
2288
2289 case NODE_CCLASS:
2290 {
2291 CClassNode *fcc, *tcc;
2292
2293 fcc = CCLASS_(from);
2294 tcc = CCLASS_(copy);
2295 if (IS_NOT_NULL(fcc->mbuf)) {
2296 r = bbuf_clone(&(tcc->mbuf), fcc->mbuf);
2297 if (r != 0) goto err;
2298 }
2299 }
2300 break;
2301
2302 default:
2303 break;
2304 }
2305
2306 *rcopy = copy;
2307 return ONIG_NORMAL;
2308 }
2309
2310
2311 static void
initialize_cclass(CClassNode * cc)2312 initialize_cclass(CClassNode* cc)
2313 {
2314 BITSET_CLEAR(cc->bs);
2315 cc->flags = 0;
2316 cc->mbuf = NULL;
2317 }
2318
2319 static Node*
node_new_cclass(void)2320 node_new_cclass(void)
2321 {
2322 Node* node = node_new();
2323 CHECK_NULL_RETURN(node);
2324
2325 NODE_SET_TYPE(node, NODE_CCLASS);
2326 initialize_cclass(CCLASS_(node));
2327 return node;
2328 }
2329
2330 static Node*
node_new_ctype(int type,int not,OnigOptionType options)2331 node_new_ctype(int type, int not, OnigOptionType options)
2332 {
2333 Node* node = node_new();
2334 CHECK_NULL_RETURN(node);
2335
2336 NODE_SET_TYPE(node, NODE_CTYPE);
2337 CTYPE_(node)->ctype = type;
2338 CTYPE_(node)->not = not;
2339 CTYPE_(node)->ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(type, options);
2340 return node;
2341 }
2342
2343 static Node*
node_new_anychar(OnigOptionType options)2344 node_new_anychar(OnigOptionType options)
2345 {
2346 Node* node;
2347
2348 node = node_new_ctype(CTYPE_ANYCHAR, FALSE, options);
2349 CHECK_NULL_RETURN(node);
2350
2351 if (OPTON_MULTILINE(options))
2352 NODE_STATUS_ADD(node, MULTILINE);
2353 return node;
2354 }
2355
2356 static int
node_new_no_newline(Node ** node,ParseEnv * env)2357 node_new_no_newline(Node** node, ParseEnv* env)
2358 {
2359 Node* n;
2360
2361 n = node_new_anychar(ONIG_OPTION_NONE);
2362 CHECK_NULL_RETURN_MEMERR(n);
2363 *node = n;
2364 return 0;
2365 }
2366
2367 static int
node_new_true_anychar(Node ** node)2368 node_new_true_anychar(Node** node)
2369 {
2370 Node* n;
2371
2372 n = node_new_anychar(ONIG_OPTION_MULTILINE);
2373 CHECK_NULL_RETURN_MEMERR(n);
2374 *node = n;
2375 return 0;
2376 }
2377
2378 static Node*
node_new_list(Node * left,Node * right)2379 node_new_list(Node* left, Node* right)
2380 {
2381 Node* node = node_new();
2382 CHECK_NULL_RETURN(node);
2383
2384 NODE_SET_TYPE(node, NODE_LIST);
2385 NODE_CAR(node) = left;
2386 NODE_CDR(node) = right;
2387 return node;
2388 }
2389
2390 extern Node*
onig_node_new_list(Node * left,Node * right)2391 onig_node_new_list(Node* left, Node* right)
2392 {
2393 return node_new_list(left, right);
2394 }
2395
2396 extern Node*
onig_node_new_alt(Node * left,Node * right)2397 onig_node_new_alt(Node* left, Node* right)
2398 {
2399 Node* node = node_new();
2400 CHECK_NULL_RETURN(node);
2401
2402 NODE_SET_TYPE(node, NODE_ALT);
2403 NODE_CAR(node) = left;
2404 NODE_CDR(node) = right;
2405 return node;
2406 }
2407
2408 static Node*
make_list_or_alt(NodeType type,int n,Node * ns[])2409 make_list_or_alt(NodeType type, int n, Node* ns[])
2410 {
2411 Node* r;
2412
2413 if (n <= 0) return NULL_NODE;
2414
2415 if (n == 1) {
2416 r = node_new();
2417 CHECK_NULL_RETURN(r);
2418 NODE_SET_TYPE(r, type);
2419 NODE_CAR(r) = ns[0];
2420 NODE_CDR(r) = NULL_NODE;
2421 }
2422 else {
2423 Node* right;
2424
2425 r = node_new();
2426 CHECK_NULL_RETURN(r);
2427
2428 right = make_list_or_alt(type, n - 1, ns + 1);
2429 if (IS_NULL(right)) {
2430 onig_node_free(r);
2431 return NULL_NODE;
2432 }
2433
2434 NODE_SET_TYPE(r, type);
2435 NODE_CAR(r) = ns[0];
2436 NODE_CDR(r) = right;
2437 }
2438
2439 return r;
2440 }
2441
2442 static Node*
make_list(int n,Node * ns[])2443 make_list(int n, Node* ns[])
2444 {
2445 return make_list_or_alt(NODE_LIST, n, ns);
2446 }
2447
2448 static Node*
make_alt(int n,Node * ns[])2449 make_alt(int n, Node* ns[])
2450 {
2451 return make_list_or_alt(NODE_ALT, n, ns);
2452 }
2453
2454 static Node*
node_new_anchor(int type)2455 node_new_anchor(int type)
2456 {
2457 Node* node;
2458
2459 node = node_new();
2460 CHECK_NULL_RETURN(node);
2461
2462 NODE_SET_TYPE(node, NODE_ANCHOR);
2463 ANCHOR_(node)->type = type;
2464 ANCHOR_(node)->char_min_len = 0;
2465 ANCHOR_(node)->char_max_len = INFINITE_LEN;
2466 ANCHOR_(node)->ascii_mode = 0;
2467 ANCHOR_(node)->lead_node = NULL_NODE;
2468 return node;
2469 }
2470
2471 static Node*
node_new_anchor_with_options(int type,OnigOptionType options)2472 node_new_anchor_with_options(int type, OnigOptionType options)
2473 {
2474 int ascii_mode;
2475 Node* node;
2476
2477 node = node_new_anchor(type);
2478 CHECK_NULL_RETURN(node);
2479
2480 ascii_mode = OPTON_WORD_ASCII(options) && IS_WORD_ANCHOR_TYPE(type) ? 1 : 0;
2481 ANCHOR_(node)->ascii_mode = ascii_mode;
2482
2483 if (type == ANCR_TEXT_SEGMENT_BOUNDARY ||
2484 type == ANCR_NO_TEXT_SEGMENT_BOUNDARY) {
2485 if (OPTON_TEXT_SEGMENT_WORD(options))
2486 NODE_STATUS_ADD(node, TEXT_SEGMENT_WORD);
2487 }
2488
2489 return node;
2490 }
2491
2492 static Node*
node_new_backref(int back_num,int * backrefs,int by_name,int exist_level,int nest_level,ParseEnv * env)2493 node_new_backref(int back_num, int* backrefs, int by_name,
2494 #ifdef USE_BACKREF_WITH_LEVEL
2495 int exist_level, int nest_level,
2496 #endif
2497 ParseEnv* env)
2498 {
2499 int i;
2500 Node* node;
2501
2502 node = node_new();
2503 CHECK_NULL_RETURN(node);
2504
2505 NODE_SET_TYPE(node, NODE_BACKREF);
2506 BACKREF_(node)->back_num = back_num;
2507 BACKREF_(node)->back_dynamic = (int* )NULL;
2508 if (by_name != 0)
2509 NODE_STATUS_ADD(node, BY_NAME);
2510
2511 if (OPTON_IGNORECASE(env->options))
2512 NODE_STATUS_ADD(node, IGNORECASE);
2513
2514 #ifdef USE_BACKREF_WITH_LEVEL
2515 if (exist_level != 0) {
2516 NODE_STATUS_ADD(node, NEST_LEVEL);
2517 BACKREF_(node)->nest_level = nest_level;
2518 }
2519 #endif
2520
2521 for (i = 0; i < back_num; i++) {
2522 if (backrefs[i] <= env->num_mem &&
2523 IS_NULL(PARSEENV_MEMENV(env)[backrefs[i]].mem_node)) {
2524 NODE_STATUS_ADD(node, RECURSION); /* /...(\1).../ */
2525 break;
2526 }
2527 }
2528
2529 if (back_num <= NODE_BACKREFS_SIZE) {
2530 for (i = 0; i < back_num; i++)
2531 BACKREF_(node)->back_static[i] = backrefs[i];
2532 }
2533 else {
2534 int* p = (int* )xmalloc(sizeof(int) * back_num);
2535 if (IS_NULL(p)) {
2536 onig_node_free(node);
2537 return NULL;
2538 }
2539 BACKREF_(node)->back_dynamic = p;
2540 for (i = 0; i < back_num; i++)
2541 p[i] = backrefs[i];
2542 }
2543
2544 env->backref_num++;
2545 return node;
2546 }
2547
2548 static Node*
node_new_backref_checker(int back_num,int * backrefs,int by_name,int exist_level,int nest_level,ParseEnv * env)2549 node_new_backref_checker(int back_num, int* backrefs, int by_name,
2550 #ifdef USE_BACKREF_WITH_LEVEL
2551 int exist_level, int nest_level,
2552 #endif
2553 ParseEnv* env)
2554 {
2555 Node* node;
2556
2557 node = node_new_backref(back_num, backrefs, by_name,
2558 #ifdef USE_BACKREF_WITH_LEVEL
2559 exist_level, nest_level,
2560 #endif
2561 env);
2562 CHECK_NULL_RETURN(node);
2563
2564 NODE_STATUS_ADD(node, CHECKER);
2565 return node;
2566 }
2567
2568 #ifdef USE_CALL
2569 static Node*
node_new_call(UChar * name,UChar * name_end,int gnum,int by_number)2570 node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)
2571 {
2572 Node* node = node_new();
2573 CHECK_NULL_RETURN(node);
2574
2575 NODE_SET_TYPE(node, NODE_CALL);
2576 CALL_(node)->by_number = by_number;
2577 CALL_(node)->name = name;
2578 CALL_(node)->name_end = name_end;
2579 CALL_(node)->called_gnum = gnum;
2580 CALL_(node)->entry_count = 1;
2581 return node;
2582 }
2583 #endif
2584
2585 static Node*
node_new_quantifier(int lower,int upper,int by_number)2586 node_new_quantifier(int lower, int upper, int by_number)
2587 {
2588 Node* node = node_new();
2589 CHECK_NULL_RETURN(node);
2590
2591 NODE_SET_TYPE(node, NODE_QUANT);
2592 QUANT_(node)->lower = lower;
2593 QUANT_(node)->upper = upper;
2594 QUANT_(node)->greedy = 1;
2595 QUANT_(node)->emptiness = BODY_IS_NOT_EMPTY;
2596 QUANT_(node)->head_exact = NULL_NODE;
2597 QUANT_(node)->next_head_exact = NULL_NODE;
2598 QUANT_(node)->include_referred = 0;
2599 QUANT_(node)->empty_status_mem = 0;
2600 if (by_number != 0)
2601 NODE_STATUS_ADD(node, BY_NUMBER);
2602
2603 return node;
2604 }
2605
2606 static Node*
node_new_bag(enum BagType type)2607 node_new_bag(enum BagType type)
2608 {
2609 Node* node = node_new();
2610 CHECK_NULL_RETURN(node);
2611
2612 NODE_SET_TYPE(node, NODE_BAG);
2613 BAG_(node)->type = type;
2614
2615 switch (type) {
2616 case BAG_MEMORY:
2617 BAG_(node)->m.regnum = 0;
2618 BAG_(node)->m.called_addr = -1;
2619 BAG_(node)->m.entry_count = 1;
2620 BAG_(node)->m.called_state = 0;
2621 break;
2622
2623 case BAG_OPTION:
2624 BAG_(node)->o.options = 0;
2625 break;
2626
2627 case BAG_STOP_BACKTRACK:
2628 break;
2629
2630 case BAG_IF_ELSE:
2631 BAG_(node)->te.Then = 0;
2632 BAG_(node)->te.Else = 0;
2633 break;
2634 }
2635
2636 BAG_(node)->opt_count = 0;
2637 return node;
2638 }
2639
2640 extern Node*
onig_node_new_bag(enum BagType type)2641 onig_node_new_bag(enum BagType type)
2642 {
2643 return node_new_bag(type);
2644 }
2645
2646 static Node*
node_new_bag_if_else(Node * cond,Node * Then,Node * Else)2647 node_new_bag_if_else(Node* cond, Node* Then, Node* Else)
2648 {
2649 Node* n;
2650 n = node_new_bag(BAG_IF_ELSE);
2651 CHECK_NULL_RETURN(n);
2652
2653 NODE_BODY(n) = cond;
2654 BAG_(n)->te.Then = Then;
2655 BAG_(n)->te.Else = Else;
2656 return n;
2657 }
2658
2659 static Node*
node_new_memory(int is_named)2660 node_new_memory(int is_named)
2661 {
2662 Node* node = node_new_bag(BAG_MEMORY);
2663 CHECK_NULL_RETURN(node);
2664 if (is_named != 0)
2665 NODE_STATUS_ADD(node, NAMED_GROUP);
2666
2667 return node;
2668 }
2669
2670 static Node*
node_new_option(OnigOptionType option)2671 node_new_option(OnigOptionType option)
2672 {
2673 Node* node = node_new_bag(BAG_OPTION);
2674 CHECK_NULL_RETURN(node);
2675 BAG_(node)->o.options = option;
2676 return node;
2677 }
2678
2679 static Node*
node_new_group(Node * content)2680 node_new_group(Node* content)
2681 {
2682 Node* node;
2683
2684 node = node_new();
2685 CHECK_NULL_RETURN(node);
2686 NODE_SET_TYPE(node, NODE_LIST);
2687 NODE_CAR(node) = content;
2688 NODE_CDR(node) = NULL_NODE;
2689
2690 return node;
2691 }
2692
2693 static Node*
node_drop_group(Node * group)2694 node_drop_group(Node* group)
2695 {
2696 Node* content;
2697
2698 content = NODE_CAR(group);
2699 NODE_CAR(group) = NULL_NODE;
2700 onig_node_free(group);
2701 return content;
2702 }
2703
2704 static int
node_set_fail(Node * node)2705 node_set_fail(Node* node)
2706 {
2707 NODE_SET_TYPE(node, NODE_GIMMICK);
2708 GIMMICK_(node)->type = GIMMICK_FAIL;
2709 return ONIG_NORMAL;
2710 }
2711
2712 static int
node_new_fail(Node ** node,ParseEnv * env)2713 node_new_fail(Node** node, ParseEnv* env)
2714 {
2715 *node = node_new();
2716 CHECK_NULL_RETURN_MEMERR(*node);
2717
2718 return node_set_fail(*node);
2719 }
2720
2721 extern int
onig_node_reset_fail(Node * node)2722 onig_node_reset_fail(Node* node)
2723 {
2724 node_free_body(node);
2725 return node_set_fail(node);
2726 }
2727
2728 static int
node_new_save_gimmick(Node ** node,enum SaveType save_type,ParseEnv * env)2729 node_new_save_gimmick(Node** node, enum SaveType save_type, ParseEnv* env)
2730 {
2731 int id;
2732
2733 ID_ENTRY(env, id);
2734
2735 *node = node_new();
2736 CHECK_NULL_RETURN_MEMERR(*node);
2737
2738 NODE_SET_TYPE(*node, NODE_GIMMICK);
2739 GIMMICK_(*node)->id = id;
2740 GIMMICK_(*node)->type = GIMMICK_SAVE;
2741 GIMMICK_(*node)->detail_type = (int )save_type;
2742
2743 return ONIG_NORMAL;
2744 }
2745
2746 static int
node_new_update_var_gimmick(Node ** node,enum UpdateVarType update_var_type,int id,ParseEnv * env)2747 node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,
2748 int id, ParseEnv* env)
2749 {
2750 *node = node_new();
2751 CHECK_NULL_RETURN_MEMERR(*node);
2752
2753 NODE_SET_TYPE(*node, NODE_GIMMICK);
2754 GIMMICK_(*node)->id = id;
2755 GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;
2756 GIMMICK_(*node)->detail_type = (int )update_var_type;
2757
2758 return ONIG_NORMAL;
2759 }
2760
2761 static int
node_new_keep(Node ** node,ParseEnv * env)2762 node_new_keep(Node** node, ParseEnv* env)
2763 {
2764 int r;
2765
2766 r = node_new_save_gimmick(node, SAVE_KEEP, env);
2767 if (r != 0) return r;
2768
2769 env->keep_num++;
2770 return ONIG_NORMAL;
2771 }
2772
2773 #ifdef USE_CALLOUT
2774
2775 extern void
onig_free_reg_callout_list(int n,CalloutListEntry * list)2776 onig_free_reg_callout_list(int n, CalloutListEntry* list)
2777 {
2778 int i;
2779 int j;
2780
2781 if (IS_NULL(list)) return ;
2782
2783 for (i = 0; i < n; i++) {
2784 if (list[i].of == ONIG_CALLOUT_OF_NAME) {
2785 for (j = 0; j < list[i].u.arg.passed_num; j++) {
2786 if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {
2787 if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))
2788 xfree(list[i].u.arg.vals[j].s.start);
2789 }
2790 }
2791 }
2792 else { /* ONIG_CALLOUT_OF_CONTENTS */
2793 if (IS_NOT_NULL(list[i].u.content.start)) {
2794 xfree((void* )list[i].u.content.start);
2795 }
2796 }
2797 }
2798
2799 xfree(list);
2800 }
2801
2802 extern CalloutListEntry*
onig_reg_callout_list_at(regex_t * reg,int num)2803 onig_reg_callout_list_at(regex_t* reg, int num)
2804 {
2805 RegexExt* ext = reg->extp;
2806 CHECK_NULL_RETURN(ext);
2807
2808 if (num <= 0 || num > ext->callout_num)
2809 return 0;
2810
2811 num--;
2812 return ext->callout_list + num;
2813 }
2814
2815 static int
reg_callout_list_entry(ParseEnv * env,int * rnum)2816 reg_callout_list_entry(ParseEnv* env, int* rnum)
2817 {
2818 #define INIT_CALLOUT_LIST_NUM 3
2819
2820 int num;
2821 CalloutListEntry* list;
2822 CalloutListEntry* e;
2823 RegexExt* ext;
2824
2825 ext = onig_get_regex_ext(env->reg);
2826 CHECK_NULL_RETURN_MEMERR(ext);
2827
2828 if (IS_NULL(ext->callout_list)) {
2829 list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);
2830 CHECK_NULL_RETURN_MEMERR(list);
2831
2832 ext->callout_list = list;
2833 ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;
2834 ext->callout_num = 0;
2835 }
2836
2837 num = ext->callout_num + 1;
2838 if (num > ext->callout_list_alloc) {
2839 int alloc = ext->callout_list_alloc * 2;
2840 list = (CalloutListEntry* )xrealloc(ext->callout_list,
2841 sizeof(CalloutListEntry) * alloc);
2842 CHECK_NULL_RETURN_MEMERR(list);
2843
2844 ext->callout_list = list;
2845 ext->callout_list_alloc = alloc;
2846 }
2847
2848 e = ext->callout_list + (num - 1);
2849
2850 e->flag = 0;
2851 e->of = 0;
2852 e->in = ONIG_CALLOUT_OF_CONTENTS;
2853 e->type = 0;
2854 e->tag_start = 0;
2855 e->tag_end = 0;
2856 e->start_func = 0;
2857 e->end_func = 0;
2858 e->u.arg.num = 0;
2859 e->u.arg.passed_num = 0;
2860
2861 ext->callout_num = num;
2862 *rnum = num;
2863 return ONIG_NORMAL;
2864 }
2865
2866 static int
node_new_callout(Node ** node,OnigCalloutOf callout_of,int num,int id,ParseEnv * env)2867 node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,
2868 ParseEnv* env)
2869 {
2870 *node = node_new();
2871 CHECK_NULL_RETURN_MEMERR(*node);
2872
2873 NODE_SET_TYPE(*node, NODE_GIMMICK);
2874 GIMMICK_(*node)->id = id;
2875 GIMMICK_(*node)->num = num;
2876 GIMMICK_(*node)->type = GIMMICK_CALLOUT;
2877 GIMMICK_(*node)->detail_type = (int )callout_of;
2878
2879 return ONIG_NORMAL;
2880 }
2881 #endif
2882
2883 static int
make_text_segment(Node ** node,ParseEnv * env)2884 make_text_segment(Node** node, ParseEnv* env)
2885 {
2886 int r;
2887 int i;
2888 Node* x;
2889 Node* ns[2];
2890
2891 /* \X == (?>\O(?:\Y\O)*) */
2892
2893 ns[1] = NULL_NODE;
2894
2895 r = ONIGERR_MEMORY;
2896 ns[0] = node_new_anchor_with_options(ANCR_NO_TEXT_SEGMENT_BOUNDARY, env->options);
2897 if (IS_NULL(ns[0])) goto err;
2898
2899 r = node_new_true_anychar(&ns[1]);
2900 if (r != 0) goto err1;
2901
2902 x = make_list(2, ns);
2903 if (IS_NULL(x)) goto err;
2904 ns[0] = x;
2905 ns[1] = NULL_NODE;
2906
2907 x = node_new_quantifier(0, INFINITE_REPEAT, TRUE);
2908 if (IS_NULL(x)) goto err;
2909
2910 NODE_BODY(x) = ns[0];
2911 ns[0] = NULL_NODE;
2912 ns[1] = x;
2913
2914 r = node_new_true_anychar(&ns[0]);
2915 if (r != 0) goto err1;
2916
2917 x = make_list(2, ns);
2918 if (IS_NULL(x)) goto err;
2919
2920 ns[0] = x;
2921 ns[1] = NULL_NODE;
2922
2923 x = node_new_bag(BAG_STOP_BACKTRACK);
2924 if (IS_NULL(x)) goto err;
2925
2926 NODE_BODY(x) = ns[0];
2927
2928 *node = x;
2929 return ONIG_NORMAL;
2930
2931 err:
2932 r = ONIGERR_MEMORY;
2933 err1:
2934 for (i = 0; i < 2; i++) onig_node_free(ns[i]);
2935 return r;
2936 }
2937
2938 static int
make_absent_engine(Node ** node,int pre_save_right_id,Node * absent,Node * step_one,int lower,int upper,int possessive,int is_range_cutter,ParseEnv * env)2939 make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
2940 Node* step_one, int lower, int upper, int possessive,
2941 int is_range_cutter, ParseEnv* env)
2942 {
2943 int r;
2944 int i;
2945 int id;
2946 Node* x;
2947 Node* ns[4];
2948
2949 for (i = 0; i < 4; i++) ns[i] = NULL_NODE;
2950
2951 ns[1] = absent;
2952 ns[3] = step_one; /* for err */
2953 r = node_new_save_gimmick(&ns[0], SAVE_S, env);
2954 if (r != 0) goto err;
2955
2956 id = GIMMICK_(ns[0])->id;
2957 r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,
2958 id, env);
2959 if (r != 0) goto err;
2960
2961 if (is_range_cutter != 0)
2962 NODE_STATUS_ADD(ns[2], ABSENT_WITH_SIDE_EFFECTS);
2963
2964 r = node_new_fail(&ns[3], env);
2965 if (r != 0) goto err;
2966
2967 x = make_list(4, ns);
2968 if (IS_NULL(x)) goto err0;
2969
2970 ns[0] = x;
2971 ns[1] = step_one;
2972 ns[2] = ns[3] = NULL_NODE;
2973
2974 x = make_alt(2, ns);
2975 if (IS_NULL(x)) goto err0;
2976
2977 ns[0] = x;
2978
2979 x = node_new_quantifier(lower, upper, FALSE);
2980 if (IS_NULL(x)) goto err0;
2981
2982 NODE_BODY(x) = ns[0];
2983 ns[0] = x;
2984
2985 if (possessive != 0) {
2986 x = node_new_bag(BAG_STOP_BACKTRACK);
2987 if (IS_NULL(x)) goto err0;
2988
2989 NODE_BODY(x) = ns[0];
2990 ns[0] = x;
2991 }
2992
2993 r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2994 pre_save_right_id, env);
2995 if (r != 0) goto err;
2996
2997 r = node_new_fail(&ns[2], env);
2998 if (r != 0) goto err;
2999
3000 x = make_list(2, ns + 1);
3001 if (IS_NULL(x)) goto err0;
3002
3003 ns[1] = x; ns[2] = NULL_NODE;
3004
3005 x = make_alt(2, ns);
3006 if (IS_NULL(x)) goto err0;
3007
3008 if (is_range_cutter != FALSE)
3009 NODE_STATUS_ADD(x, SUPER);
3010
3011 *node = x;
3012 return ONIG_NORMAL;
3013
3014 err0:
3015 r = ONIGERR_MEMORY;
3016 err:
3017 for (i = 0; i < 4; i++) onig_node_free(ns[i]);
3018 return r;
3019 }
3020
3021 static int
make_absent_tail(Node ** node1,Node ** node2,int pre_save_right_id,ParseEnv * env)3022 make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,
3023 ParseEnv* env)
3024 {
3025 int r;
3026 int id;
3027 Node* save;
3028 Node* x;
3029 Node* ns[2];
3030
3031 *node1 = *node2 = NULL_NODE;
3032 save = ns[0] = ns[1] = NULL_NODE;
3033
3034 r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
3035 if (r != 0) goto err;
3036
3037 id = GIMMICK_(save)->id;
3038 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3039 id, env);
3040 if (r != 0) goto err;
3041
3042 r = node_new_fail(&ns[1], env);
3043 if (r != 0) goto err;
3044
3045 x = make_list(2, ns);
3046 if (IS_NULL(x)) goto err0;
3047
3048 ns[0] = NULL_NODE; ns[1] = x;
3049
3050 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3051 pre_save_right_id, env);
3052 if (r != 0) goto err;
3053
3054 x = make_alt(2, ns);
3055 if (IS_NULL(x)) goto err0;
3056
3057 *node1 = save;
3058 *node2 = x;
3059 return ONIG_NORMAL;
3060
3061 err0:
3062 r = ONIGERR_MEMORY;
3063 err:
3064 onig_node_free(save);
3065 onig_node_free(ns[0]);
3066 onig_node_free(ns[1]);
3067 return r;
3068 }
3069
3070 static int
make_range_clear(Node ** node,ParseEnv * env)3071 make_range_clear(Node** node, ParseEnv* env)
3072 {
3073 int r;
3074 int id;
3075 Node* save;
3076 Node* x;
3077 Node* ns[2];
3078
3079 *node = NULL_NODE;
3080 save = ns[0] = ns[1] = NULL_NODE;
3081
3082 r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
3083 if (r != 0) goto err;
3084
3085 id = GIMMICK_(save)->id;
3086 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3087 id, env);
3088 if (r != 0) goto err;
3089
3090 r = node_new_fail(&ns[1], env);
3091 if (r != 0) goto err;
3092
3093 x = make_list(2, ns);
3094 if (IS_NULL(x)) goto err0;
3095
3096 ns[0] = NULL_NODE; ns[1] = x;
3097
3098 #define ID_NOT_USED_DONT_CARE_ME 0
3099
3100 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT,
3101 ID_NOT_USED_DONT_CARE_ME, env);
3102 if (r != 0) goto err;
3103 NODE_STATUS_ADD(ns[0], ABSENT_WITH_SIDE_EFFECTS);
3104
3105 x = make_alt(2, ns);
3106 if (IS_NULL(x)) goto err0;
3107
3108 NODE_STATUS_ADD(x, SUPER);
3109
3110 ns[0] = save;
3111 ns[1] = x;
3112 save = NULL_NODE;
3113 x = make_list(2, ns);
3114 if (IS_NULL(x)) goto err0;
3115
3116 *node = x;
3117 return ONIG_NORMAL;
3118
3119 err0:
3120 r = ONIGERR_MEMORY;
3121 err:
3122 onig_node_free(save);
3123 onig_node_free(ns[0]);
3124 onig_node_free(ns[1]);
3125 return r;
3126 }
3127
3128 static int
is_simple_one_char_repeat(Node * node,Node ** rquant,Node ** rbody,int * is_possessive,ParseEnv * env)3129 is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,
3130 int* is_possessive, ParseEnv* env)
3131 {
3132 Node* quant;
3133 Node* body;
3134
3135 *rquant = *rbody = 0;
3136 *is_possessive = 0;
3137
3138 if (NODE_TYPE(node) == NODE_QUANT) {
3139 quant = node;
3140 }
3141 else {
3142 if (NODE_TYPE(node) == NODE_BAG) {
3143 BagNode* en = BAG_(node);
3144 if (en->type == BAG_STOP_BACKTRACK) {
3145 *is_possessive = 1;
3146 quant = NODE_BAG_BODY(en);
3147 if (NODE_TYPE(quant) != NODE_QUANT)
3148 return 0;
3149 }
3150 else
3151 return 0;
3152 }
3153 else
3154 return 0;
3155 }
3156
3157 if (QUANT_(quant)->greedy == 0)
3158 return 0;
3159
3160 body = NODE_BODY(quant);
3161 switch (NODE_TYPE(body)) {
3162 case NODE_STRING:
3163 {
3164 int len;
3165 StrNode* sn = STR_(body);
3166 UChar *s = sn->s;
3167
3168 len = 0;
3169 while (s < sn->end) {
3170 s += enclen(env->enc, s);
3171 len++;
3172 }
3173 if (len != 1)
3174 return 0;
3175 }
3176
3177 case NODE_CCLASS:
3178 break;
3179
3180 default:
3181 return 0;
3182 break;
3183 }
3184
3185 if (node != quant) {
3186 NODE_BODY(node) = 0;
3187 onig_node_free(node);
3188 }
3189 NODE_BODY(quant) = NULL_NODE;
3190 *rquant = quant;
3191 *rbody = body;
3192 return 1;
3193 }
3194
3195 static int
make_absent_tree_for_simple_one_char_repeat(Node ** node,Node * absent,Node * quant,Node * body,int possessive,ParseEnv * env)3196 make_absent_tree_for_simple_one_char_repeat(Node** node,
3197 Node* absent, Node* quant, Node* body, int possessive, ParseEnv* env)
3198 {
3199 int r;
3200 int i;
3201 int id1;
3202 int lower, upper;
3203 Node* x;
3204 Node* ns[4];
3205
3206 *node = NULL_NODE;
3207 r = ONIGERR_MEMORY;
3208 ns[0] = ns[1] = NULL_NODE;
3209 ns[2] = body, ns[3] = absent;
3210
3211 lower = QUANT_(quant)->lower;
3212 upper = QUANT_(quant)->upper;
3213
3214 r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3215 if (r != 0) goto err;
3216
3217 id1 = GIMMICK_(ns[0])->id;
3218
3219 r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,
3220 FALSE, env);
3221 if (r != 0) goto err;
3222
3223 ns[2] = ns[3] = NULL_NODE;
3224
3225 r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3226 id1, env);
3227 if (r != 0) goto err;
3228
3229 x = make_list(3, ns);
3230 if (IS_NULL(x)) goto err0;
3231
3232 *node = x;
3233 return ONIG_NORMAL;
3234
3235 err0:
3236 r = ONIGERR_MEMORY;
3237 err:
3238 for (i = 0; i < 4; i++) onig_node_free(ns[i]);
3239 return r;
3240 }
3241
3242 static int
make_absent_tree(Node ** node,Node * absent,Node * expr,int is_range_cutter,ParseEnv * env)3243 make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
3244 ParseEnv* env)
3245 {
3246 int r;
3247 int i;
3248 int id1, id2;
3249 int possessive;
3250 Node* x;
3251 Node* ns[7];
3252
3253 r = ONIGERR_MEMORY;
3254 for (i = 0; i < 7; i++) ns[i] = NULL_NODE;
3255 ns[4] = expr; ns[5] = absent;
3256
3257 if (is_range_cutter == 0) {
3258 Node* quant;
3259 Node* body;
3260
3261 if (expr == NULL_NODE) {
3262 /* default expr \O* */
3263 quant = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
3264 if (IS_NULL(quant)) goto err0;
3265
3266 r = node_new_true_anychar(&body);
3267 if (r != 0) {
3268 onig_node_free(quant);
3269 goto err;
3270 }
3271 possessive = 0;
3272 goto simple;
3273 }
3274 else {
3275 if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {
3276 simple:
3277 r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,
3278 body, possessive, env);
3279 onig_node_free(quant);
3280 if (r != 0) {
3281 ns[4] = NULL_NODE;
3282 onig_node_free(body);
3283 goto err;
3284 }
3285
3286 return ONIG_NORMAL;
3287 }
3288 }
3289 }
3290
3291 r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3292 if (r != 0) goto err;
3293
3294 id1 = GIMMICK_(ns[0])->id;
3295
3296 r = node_new_save_gimmick(&ns[1], SAVE_S, env);
3297 if (r != 0) goto err;
3298
3299 id2 = GIMMICK_(ns[1])->id;
3300
3301 r = node_new_true_anychar(&ns[3]);
3302 if (r != 0) goto err;
3303
3304 possessive = 1;
3305 r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, INFINITE_REPEAT,
3306 possessive, is_range_cutter, env);
3307 if (r != 0) goto err;
3308
3309 ns[3] = NULL_NODE;
3310 ns[5] = NULL_NODE;
3311
3312 r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);
3313 if (r != 0) goto err;
3314
3315 if (is_range_cutter != 0) {
3316 x = make_list(4, ns);
3317 if (IS_NULL(x)) goto err0;
3318 }
3319 else {
3320 r = make_absent_tail(&ns[5], &ns[6], id1, env);
3321 if (r != 0) goto err;
3322
3323 x = make_list(7, ns);
3324 if (IS_NULL(x)) goto err0;
3325 }
3326
3327 *node = x;
3328 return ONIG_NORMAL;
3329
3330 err0:
3331 r = ONIGERR_MEMORY;
3332 err:
3333 for (i = 0; i < 7; i++) onig_node_free(ns[i]);
3334 return r;
3335 }
3336
3337 extern int
onig_node_str_cat(Node * node,const UChar * s,const UChar * end)3338 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
3339 {
3340 int addlen = (int )(end - s);
3341
3342 if (addlen > 0) {
3343 int len = (int )(STR_(node)->end - STR_(node)->s);
3344
3345 if (STR_(node)->capacity > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {
3346 UChar* p;
3347 int capa = len + addlen + NODE_STRING_MARGIN;
3348
3349 if (capa <= STR_(node)->capacity) {
3350 onig_strcpy(STR_(node)->s + len, s, end);
3351 }
3352 else {
3353 if (STR_(node)->s == STR_(node)->buf)
3354 p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,
3355 s, end, capa);
3356 else
3357 p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa);
3358
3359 CHECK_NULL_RETURN_MEMERR(p);
3360 STR_(node)->s = p;
3361 STR_(node)->capacity = capa;
3362 }
3363 }
3364 else {
3365 onig_strcpy(STR_(node)->s + len, s, end);
3366 }
3367 STR_(node)->end = STR_(node)->s + len + addlen;
3368 }
3369
3370 return 0;
3371 }
3372
3373 extern int
onig_node_str_set(Node * node,const UChar * s,const UChar * end,int need_free)3374 onig_node_str_set(Node* node, const UChar* s, const UChar* end, int need_free)
3375 {
3376 onig_node_str_clear(node, need_free);
3377 return onig_node_str_cat(node, s, end);
3378 }
3379
3380 static int
node_str_cat_char(Node * node,UChar c)3381 node_str_cat_char(Node* node, UChar c)
3382 {
3383 UChar s[1];
3384
3385 s[0] = c;
3386 return onig_node_str_cat(node, s, s + 1);
3387 }
3388
3389 extern void
onig_node_str_clear(Node * node,int need_free)3390 onig_node_str_clear(Node* node, int need_free)
3391 {
3392 if (need_free != 0 &&
3393 STR_(node)->capacity != 0 &&
3394 IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
3395 xfree(STR_(node)->s);
3396 }
3397
3398 STR_(node)->flag = 0;
3399 STR_(node)->s = STR_(node)->buf;
3400 STR_(node)->end = STR_(node)->buf;
3401 STR_(node)->capacity = 0;
3402 }
3403
3404 static int
node_set_str(Node * node,const UChar * s,const UChar * end)3405 node_set_str(Node* node, const UChar* s, const UChar* end)
3406 {
3407 int r;
3408
3409 NODE_SET_TYPE(node, NODE_STRING);
3410 STR_(node)->flag = 0;
3411 STR_(node)->s = STR_(node)->buf;
3412 STR_(node)->end = STR_(node)->buf;
3413 STR_(node)->capacity = 0;
3414
3415 r = onig_node_str_cat(node, s, end);
3416 return r;
3417 }
3418
3419 static Node*
node_new_str(const UChar * s,const UChar * end)3420 node_new_str(const UChar* s, const UChar* end)
3421 {
3422 int r;
3423 Node* node = node_new();
3424 CHECK_NULL_RETURN(node);
3425
3426 r = node_set_str(node, s, end);
3427 if (r != 0) {
3428 onig_node_free(node);
3429 return NULL;
3430 }
3431
3432 return node;
3433 }
3434
3435 static int
node_reset_str(Node * node,const UChar * s,const UChar * end)3436 node_reset_str(Node* node, const UChar* s, const UChar* end)
3437 {
3438 node_free_body(node);
3439 return node_set_str(node, s, end);
3440 }
3441
3442 extern int
onig_node_reset_empty(Node * node)3443 onig_node_reset_empty(Node* node)
3444 {
3445 return node_reset_str(node, NULL, NULL);
3446 }
3447
3448 extern Node*
onig_node_new_str(const UChar * s,const UChar * end)3449 onig_node_new_str(const UChar* s, const UChar* end)
3450 {
3451 return node_new_str(s, end);
3452 }
3453
3454 static Node*
node_new_str_with_options(const UChar * s,const UChar * end,OnigOptionType options)3455 node_new_str_with_options(const UChar* s, const UChar* end,
3456 OnigOptionType options)
3457 {
3458 Node* node;
3459 node = node_new_str(s, end);
3460
3461 if (OPTON_IGNORECASE(options))
3462 NODE_STATUS_ADD(node, IGNORECASE);
3463
3464 return node;
3465 }
3466
3467 static Node*
node_new_str_crude(UChar * s,UChar * end,OnigOptionType options)3468 node_new_str_crude(UChar* s, UChar* end, OnigOptionType options)
3469 {
3470 Node* node = node_new_str_with_options(s, end, options);
3471 CHECK_NULL_RETURN(node);
3472 NODE_STRING_SET_CRUDE(node);
3473 return node;
3474 }
3475
3476 static Node*
node_new_empty(void)3477 node_new_empty(void)
3478 {
3479 return node_new_str(NULL, NULL);
3480 }
3481
3482 static Node*
node_new_str_crude_char(UChar c,OnigOptionType options)3483 node_new_str_crude_char(UChar c, OnigOptionType options)
3484 {
3485 int i;
3486 UChar p[1];
3487 Node* node;
3488
3489 p[0] = c;
3490 node = node_new_str_crude(p, p + 1, options);
3491
3492 /* clear buf tail */
3493 for (i = 1; i < NODE_STRING_BUF_SIZE; i++)
3494 STR_(node)->buf[i] = '\0';
3495
3496 return node;
3497 }
3498
3499 static Node*
str_node_split_last_char(Node * node,OnigEncoding enc)3500 str_node_split_last_char(Node* node, OnigEncoding enc)
3501 {
3502 const UChar *p;
3503 Node* rn;
3504 StrNode* sn;
3505
3506 sn = STR_(node);
3507 rn = NULL_NODE;
3508 if (sn->end > sn->s) {
3509 p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
3510 if (p && p > sn->s) { /* can be split. */
3511 rn = node_new_str(p, sn->end);
3512 CHECK_NULL_RETURN(rn);
3513
3514 sn->end = (UChar* )p;
3515 STR_(rn)->flag = sn->flag;
3516 NODE_STATUS(rn) = NODE_STATUS(node);
3517 }
3518 }
3519
3520 return rn;
3521 }
3522
3523 static int
str_node_can_be_split(Node * node,OnigEncoding enc)3524 str_node_can_be_split(Node* node, OnigEncoding enc)
3525 {
3526 StrNode* sn = STR_(node);
3527 if (sn->end > sn->s) {
3528 return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);
3529 }
3530 return 0;
3531 }
3532
3533 static int
scan_number(UChar ** src,const UChar * end,OnigEncoding enc)3534 scan_number(UChar** src, const UChar* end, OnigEncoding enc)
3535 {
3536 int num, val;
3537 OnigCodePoint c;
3538 UChar* p = *src;
3539 PFETCH_READY;
3540
3541 num = 0;
3542 while (! PEND) {
3543 PFETCH(c);
3544 if (IS_CODE_DIGIT_ASCII(enc, c)) {
3545 val = (int )DIGITVAL(c);
3546 if ((ONIG_INT_MAX - val) / 10 < num)
3547 return -1; /* overflow */
3548
3549 num = num * 10 + val;
3550 }
3551 else {
3552 PUNFETCH;
3553 break;
3554 }
3555 }
3556 *src = p;
3557 return num;
3558 }
3559
3560 static int
scan_hexadecimal_number(UChar ** src,UChar * end,int minlen,int maxlen,OnigEncoding enc,OnigCodePoint * rcode)3561 scan_hexadecimal_number(UChar** src, UChar* end, int minlen, int maxlen,
3562 OnigEncoding enc, OnigCodePoint* rcode)
3563 {
3564 OnigCodePoint code;
3565 OnigCodePoint c;
3566 unsigned int val;
3567 int n;
3568 UChar* p = *src;
3569 PFETCH_READY;
3570
3571 code = 0;
3572 n = 0;
3573 while (! PEND && n < maxlen) {
3574 PFETCH(c);
3575 if (IS_CODE_XDIGIT_ASCII(enc, c)) {
3576 n++;
3577 val = (unsigned int )XDIGITVAL(enc, c);
3578 if ((UINT_MAX - val) / 16UL < code)
3579 return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3580
3581 code = (code << 4) + val;
3582 }
3583 else {
3584 PUNFETCH;
3585 break;
3586 }
3587 }
3588
3589 if (n < minlen)
3590 return ONIGERR_INVALID_CODE_POINT_VALUE;
3591
3592 *rcode = code;
3593 *src = p;
3594 return ONIG_NORMAL;
3595 }
3596
3597 static int
scan_octal_number(UChar ** src,UChar * end,int minlen,int maxlen,OnigEncoding enc,OnigCodePoint * rcode)3598 scan_octal_number(UChar** src, UChar* end, int minlen, int maxlen,
3599 OnigEncoding enc, OnigCodePoint* rcode)
3600 {
3601 OnigCodePoint code;
3602 OnigCodePoint c;
3603 unsigned int val;
3604 int n;
3605 UChar* p = *src;
3606 PFETCH_READY;
3607
3608 code = 0;
3609 n = 0;
3610 while (! PEND && n < maxlen) {
3611 PFETCH(c);
3612 if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {
3613 n++;
3614 val = (unsigned int )ODIGITVAL(c);
3615 if ((UINT_MAX - val) / 8UL < code)
3616 return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3617
3618 code = (code << 3) + val;
3619 }
3620 else {
3621 PUNFETCH;
3622 break;
3623 }
3624 }
3625
3626 if (n < minlen)
3627 return ONIGERR_INVALID_CODE_POINT_VALUE;
3628
3629 *rcode = code;
3630 *src = p;
3631 return ONIG_NORMAL;
3632 }
3633
3634 static int
scan_number_of_base(UChar ** src,UChar * end,int minlen,OnigEncoding enc,OnigCodePoint * rcode,int base)3635 scan_number_of_base(UChar** src, UChar* end, int minlen,
3636 OnigEncoding enc, OnigCodePoint* rcode, int base)
3637 {
3638 int r;
3639
3640 if (base == 16)
3641 r = scan_hexadecimal_number(src, end, minlen, 8, enc, rcode);
3642 else if (base == 8)
3643 r = scan_octal_number(src, end, minlen, 11, enc, rcode);
3644 else
3645 r = ONIGERR_INVALID_CODE_POINT_VALUE;
3646
3647 return r;
3648 }
3649
3650 #define IS_CODE_POINT_DIVIDE(c) ((c) == ' ' || (c) == '\n')
3651
3652 enum CPS_STATE {
3653 CPS_EMPTY = 0,
3654 CPS_START = 1,
3655 CPS_RANGE = 2
3656 };
3657
3658 static int
check_code_point_sequence_cc(UChar * p,UChar * end,int base,OnigEncoding enc,int state)3659 check_code_point_sequence_cc(UChar* p, UChar* end, int base,
3660 OnigEncoding enc, int state)
3661 {
3662 int r;
3663 int n;
3664 int end_digit;
3665 OnigCodePoint code;
3666 OnigCodePoint c;
3667 PFETCH_READY;
3668
3669 end_digit = FALSE;
3670 n = 0;
3671 while (! PEND) {
3672 start:
3673 PFETCH(c);
3674 if (c == '}') {
3675 end_char:
3676 if (state == CPS_RANGE) return ONIGERR_INVALID_CODE_POINT_VALUE;
3677 return n;
3678 }
3679
3680 if (IS_CODE_POINT_DIVIDE(c)) {
3681 while (! PEND) {
3682 PFETCH(c);
3683 if (! IS_CODE_POINT_DIVIDE(c)) break;
3684 }
3685 if (IS_CODE_POINT_DIVIDE(c))
3686 return ONIGERR_INVALID_CODE_POINT_VALUE;
3687 }
3688 else if (c == '-') {
3689 range:
3690 if (state != CPS_START) return ONIGERR_INVALID_CODE_POINT_VALUE;
3691 if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
3692 end_digit = FALSE;
3693 state = CPS_RANGE;
3694 goto start;
3695 }
3696 else if (end_digit == TRUE) {
3697 if (base == 16) {
3698 if (IS_CODE_XDIGIT_ASCII(enc, c))
3699 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3700 }
3701 else if (base == 8) {
3702 if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8')
3703 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3704 }
3705
3706 return ONIGERR_INVALID_CODE_POINT_VALUE;
3707 }
3708
3709 if (c == '}') goto end_char;
3710 if (c == '-') goto range;
3711
3712 PUNFETCH;
3713 r = scan_number_of_base(&p, end, 1, enc, &code, base);
3714 if (r != 0) return r;
3715 n++;
3716 end_digit = TRUE;
3717 state = (state == CPS_RANGE) ? CPS_EMPTY : CPS_START;
3718 }
3719
3720 return ONIGERR_INVALID_CODE_POINT_VALUE;
3721 }
3722
3723 static int
check_code_point_sequence(UChar * p,UChar * end,int base,OnigEncoding enc)3724 check_code_point_sequence(UChar* p, UChar* end, int base, OnigEncoding enc)
3725 {
3726 int r;
3727 int n;
3728 int end_digit;
3729 OnigCodePoint code;
3730 OnigCodePoint c;
3731 PFETCH_READY;
3732
3733 end_digit = FALSE;
3734 n = 0;
3735 while (! PEND) {
3736 PFETCH(c);
3737 if (c == '}') {
3738 end_char:
3739 return n;
3740 }
3741
3742 if (IS_CODE_POINT_DIVIDE(c)) {
3743 while (! PEND) {
3744 PFETCH(c);
3745 if (! IS_CODE_POINT_DIVIDE(c)) break;
3746 }
3747 if (IS_CODE_POINT_DIVIDE(c))
3748 return ONIGERR_INVALID_CODE_POINT_VALUE;
3749 }
3750 else if (end_digit == TRUE) {
3751 if (base == 16) {
3752 if (IS_CODE_XDIGIT_ASCII(enc, c))
3753 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3754 }
3755 else if (base == 8) {
3756 if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8')
3757 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3758 }
3759
3760 return ONIGERR_INVALID_CODE_POINT_VALUE;
3761 }
3762
3763 if (c == '}') goto end_char;
3764
3765 PUNFETCH;
3766 r = scan_number_of_base(&p, end, 1, enc, &code, base);
3767 if (r != 0) return r;
3768 n++;
3769 end_digit = TRUE;
3770 }
3771
3772 return ONIGERR_INVALID_CODE_POINT_VALUE;
3773 }
3774
3775 static int
get_next_code_point(UChar ** src,UChar * end,int base,OnigEncoding enc,int in_cc,OnigCodePoint * rcode)3776 get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_cc, OnigCodePoint* rcode)
3777 {
3778 int r;
3779 OnigCodePoint c;
3780 UChar* p = *src;
3781 PFETCH_READY;
3782
3783 while (! PEND) {
3784 PFETCH(c);
3785 if (! IS_CODE_POINT_DIVIDE(c)) {
3786 if (c == '}') {
3787 *src = p;
3788 return 1; /* end of sequence */
3789 }
3790 else if (c == '-' && in_cc == TRUE) {
3791 *src = p;
3792 return 2; /* range */
3793 }
3794 PUNFETCH;
3795 break;
3796 }
3797 else {
3798 if (PEND)
3799 return ONIGERR_INVALID_CODE_POINT_VALUE;
3800 }
3801 }
3802
3803 r = scan_number_of_base(&p, end, 1, enc, rcode, base);
3804 if (r != 0) return r;
3805
3806 *src = p;
3807 return ONIG_NORMAL;
3808 }
3809
3810
3811 #define BB_WRITE_CODE_POINT(bbuf,pos,code) \
3812 BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
3813
3814 /* data format:
3815 [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
3816 (all data size is OnigCodePoint)
3817 */
3818 static int
new_code_range(BBuf ** pbuf)3819 new_code_range(BBuf** pbuf)
3820 {
3821 #define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
3822 int r;
3823 OnigCodePoint n;
3824 BBuf* bbuf;
3825
3826 bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
3827 CHECK_NULL_RETURN_MEMERR(bbuf);
3828 r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);
3829 if (r != 0) {
3830 xfree(bbuf);
3831 *pbuf = 0;
3832 return r;
3833 }
3834
3835 n = 0;
3836 BB_WRITE_CODE_POINT(bbuf, 0, n);
3837 return 0;
3838 }
3839
3840 static int
add_code_range_to_buf(BBuf ** pbuf,OnigCodePoint from,OnigCodePoint to)3841 add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
3842 {
3843 int r, inc_n, pos;
3844 int low, high, bound, x;
3845 OnigCodePoint n, *data;
3846 BBuf* bbuf;
3847
3848 if (from > to) {
3849 n = from; from = to; to = n;
3850 }
3851
3852 if (IS_NULL(*pbuf)) {
3853 r = new_code_range(pbuf);
3854 if (r != 0) return r;
3855 bbuf = *pbuf;
3856 n = 0;
3857 }
3858 else {
3859 bbuf = *pbuf;
3860 GET_CODE_POINT(n, bbuf->p);
3861 }
3862 data = (OnigCodePoint* )(bbuf->p);
3863 data++;
3864
3865 for (low = 0, bound = n; low < bound; ) {
3866 x = (low + bound) >> 1;
3867 if (from > data[x*2 + 1])
3868 low = x + 1;
3869 else
3870 bound = x;
3871 }
3872
3873 high = (to == ~((OnigCodePoint )0)) ? n : low;
3874 for (bound = n; high < bound; ) {
3875 x = (high + bound) >> 1;
3876 if (to + 1 >= data[x*2])
3877 high = x + 1;
3878 else
3879 bound = x;
3880 }
3881
3882 inc_n = low + 1 - high;
3883 if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
3884 return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
3885
3886 if (inc_n != 1) {
3887 if (from > data[low*2])
3888 from = data[low*2];
3889 if (to < data[(high - 1)*2 + 1])
3890 to = data[(high - 1)*2 + 1];
3891 }
3892
3893 if (inc_n != 0 && (OnigCodePoint )high < n) {
3894 int from_pos = SIZE_CODE_POINT * (1 + high * 2);
3895 int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
3896 int size = (n - high) * 2 * SIZE_CODE_POINT;
3897
3898 if (inc_n > 0) {
3899 BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
3900 }
3901 else {
3902 BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
3903 }
3904 }
3905
3906 pos = SIZE_CODE_POINT * (1 + low * 2);
3907 BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
3908 BB_WRITE_CODE_POINT(bbuf, pos, from);
3909 BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
3910 n += inc_n;
3911 BB_WRITE_CODE_POINT(bbuf, 0, n);
3912
3913 return 0;
3914 }
3915
3916 static int
add_code_range(BBuf ** pbuf,ParseEnv * env,OnigCodePoint from,OnigCodePoint to)3917 add_code_range(BBuf** pbuf, ParseEnv* env, OnigCodePoint from, OnigCodePoint to)
3918 {
3919 if (from > to) {
3920 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
3921 return 0;
3922 else
3923 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
3924 }
3925
3926 return add_code_range_to_buf(pbuf, from, to);
3927 }
3928
3929 static int
not_code_range_buf(OnigEncoding enc,BBuf * bbuf,BBuf ** pbuf)3930 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
3931 {
3932 int r, i, n;
3933 OnigCodePoint pre, from, *data, to = 0;
3934
3935 *pbuf = (BBuf* )NULL;
3936 if (IS_NULL(bbuf)) {
3937 set_all:
3938 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3939 }
3940
3941 data = (OnigCodePoint* )(bbuf->p);
3942 GET_CODE_POINT(n, data);
3943 data++;
3944 if (n <= 0) goto set_all;
3945
3946 r = 0;
3947 pre = MBCODE_START_POS(enc);
3948 for (i = 0; i < n; i++) {
3949 from = data[i*2];
3950 to = data[i*2+1];
3951 if (pre <= from - 1) {
3952 r = add_code_range_to_buf(pbuf, pre, from - 1);
3953 if (r != 0) {
3954 bbuf_free(*pbuf);
3955 return r;
3956 }
3957 }
3958 if (to == ~((OnigCodePoint )0)) break;
3959 pre = to + 1;
3960 }
3961 if (to < ~((OnigCodePoint )0)) {
3962 r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
3963 if (r != 0) bbuf_free(*pbuf);
3964 }
3965 return r;
3966 }
3967
3968 #define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\
3969 BBuf *tbuf; \
3970 int tnot; \
3971 tnot = not1; not1 = not2; not2 = tnot; \
3972 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
3973 } while (0)
3974
3975 static int
or_code_range_buf(OnigEncoding enc,BBuf * bbuf1,int not1,BBuf * bbuf2,int not2,BBuf ** pbuf)3976 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
3977 BBuf* bbuf2, int not2, BBuf** pbuf)
3978 {
3979 int r;
3980 OnigCodePoint i, n1, *data1;
3981 OnigCodePoint from, to;
3982
3983 *pbuf = (BBuf* )NULL;
3984 if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
3985 if (not1 != 0 || not2 != 0)
3986 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3987 return 0;
3988 }
3989
3990 r = 0;
3991 if (IS_NULL(bbuf2))
3992 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
3993
3994 if (IS_NULL(bbuf1)) {
3995 if (not1 != 0) {
3996 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3997 }
3998 else {
3999 if (not2 == 0) {
4000 return bbuf_clone(pbuf, bbuf2);
4001 }
4002 else {
4003 return not_code_range_buf(enc, bbuf2, pbuf);
4004 }
4005 }
4006 }
4007
4008 if (not1 != 0)
4009 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4010
4011 data1 = (OnigCodePoint* )(bbuf1->p);
4012 GET_CODE_POINT(n1, data1);
4013 data1++;
4014
4015 if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
4016 r = bbuf_clone(pbuf, bbuf2);
4017 }
4018 else if (not1 == 0) { /* 1 OR (not 2) */
4019 r = not_code_range_buf(enc, bbuf2, pbuf);
4020 }
4021 if (r != 0) return r;
4022
4023 for (i = 0; i < n1; i++) {
4024 from = data1[i*2];
4025 to = data1[i*2+1];
4026 r = add_code_range_to_buf(pbuf, from, to);
4027 if (r != 0) return r;
4028 }
4029 return 0;
4030 }
4031
4032 static int
and_code_range1(BBuf ** pbuf,OnigCodePoint from1,OnigCodePoint to1,OnigCodePoint * data,int n)4033 and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
4034 OnigCodePoint* data, int n)
4035 {
4036 int i, r;
4037 OnigCodePoint from2, to2;
4038
4039 for (i = 0; i < n; i++) {
4040 from2 = data[i*2];
4041 to2 = data[i*2+1];
4042 if (from2 < from1) {
4043 if (to2 < from1) continue;
4044 else {
4045 from1 = to2 + 1;
4046 }
4047 }
4048 else if (from2 <= to1) {
4049 if (to2 < to1) {
4050 if (from1 <= from2 - 1) {
4051 r = add_code_range_to_buf(pbuf, from1, from2-1);
4052 if (r != 0) return r;
4053 }
4054 from1 = to2 + 1;
4055 }
4056 else {
4057 to1 = from2 - 1;
4058 }
4059 }
4060 else {
4061 from1 = from2;
4062 }
4063 if (from1 > to1) break;
4064 }
4065 if (from1 <= to1) {
4066 r = add_code_range_to_buf(pbuf, from1, to1);
4067 if (r != 0) return r;
4068 }
4069 return 0;
4070 }
4071
4072 static int
and_code_range_buf(BBuf * bbuf1,int not1,BBuf * bbuf2,int not2,BBuf ** pbuf)4073 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
4074 {
4075 int r;
4076 OnigCodePoint i, j, n1, n2, *data1, *data2;
4077 OnigCodePoint from, to, from1, to1, from2, to2;
4078
4079 *pbuf = (BBuf* )NULL;
4080 if (IS_NULL(bbuf1)) {
4081 if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
4082 return bbuf_clone(pbuf, bbuf2);
4083 return 0;
4084 }
4085 else if (IS_NULL(bbuf2)) {
4086 if (not2 != 0)
4087 return bbuf_clone(pbuf, bbuf1);
4088 return 0;
4089 }
4090
4091 if (not1 != 0)
4092 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4093
4094 data1 = (OnigCodePoint* )(bbuf1->p);
4095 data2 = (OnigCodePoint* )(bbuf2->p);
4096 GET_CODE_POINT(n1, data1);
4097 GET_CODE_POINT(n2, data2);
4098 data1++;
4099 data2++;
4100
4101 if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
4102 for (i = 0; i < n1; i++) {
4103 from1 = data1[i*2];
4104 to1 = data1[i*2+1];
4105 for (j = 0; j < n2; j++) {
4106 from2 = data2[j*2];
4107 to2 = data2[j*2+1];
4108 if (from2 > to1) break;
4109 if (to2 < from1) continue;
4110 from = MAX(from1, from2);
4111 to = MIN(to1, to2);
4112 r = add_code_range_to_buf(pbuf, from, to);
4113 if (r != 0) return r;
4114 }
4115 }
4116 }
4117 else if (not1 == 0) { /* 1 AND (not 2) */
4118 for (i = 0; i < n1; i++) {
4119 from1 = data1[i*2];
4120 to1 = data1[i*2+1];
4121 r = and_code_range1(pbuf, from1, to1, data2, n2);
4122 if (r != 0) return r;
4123 }
4124 }
4125
4126 return 0;
4127 }
4128
4129 static int
and_cclass(CClassNode * dest,CClassNode * cc,OnigEncoding enc)4130 and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
4131 {
4132 int r, not1, not2;
4133 BBuf *buf1, *buf2, *pbuf;
4134 BitSetRef bsr1, bsr2;
4135 BitSet bs1, bs2;
4136
4137 not1 = IS_NCCLASS_NOT(dest);
4138 bsr1 = dest->bs;
4139 buf1 = dest->mbuf;
4140 not2 = IS_NCCLASS_NOT(cc);
4141 bsr2 = cc->bs;
4142 buf2 = cc->mbuf;
4143
4144 if (not1 != 0) {
4145 bitset_invert_to(bsr1, bs1);
4146 bsr1 = bs1;
4147 }
4148 if (not2 != 0) {
4149 bitset_invert_to(bsr2, bs2);
4150 bsr2 = bs2;
4151 }
4152 bitset_and(bsr1, bsr2);
4153 if (bsr1 != dest->bs) {
4154 bitset_copy(dest->bs, bsr1);
4155 }
4156 if (not1 != 0) {
4157 bitset_invert(dest->bs);
4158 }
4159
4160 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
4161 if (not1 != 0 && not2 != 0) {
4162 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
4163 }
4164 else {
4165 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
4166 if (r == 0 && not1 != 0) {
4167 BBuf *tbuf;
4168 r = not_code_range_buf(enc, pbuf, &tbuf);
4169 if (r != 0) {
4170 bbuf_free(pbuf);
4171 return r;
4172 }
4173 bbuf_free(pbuf);
4174 pbuf = tbuf;
4175 }
4176 }
4177 if (r != 0) return r;
4178
4179 dest->mbuf = pbuf;
4180 bbuf_free(buf1);
4181 return r;
4182 }
4183 return 0;
4184 }
4185
4186 static int
or_cclass(CClassNode * dest,CClassNode * cc,OnigEncoding enc)4187 or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
4188 {
4189 int r, not1, not2;
4190 BBuf *buf1, *buf2, *pbuf;
4191 BitSetRef bsr1, bsr2;
4192 BitSet bs1, bs2;
4193
4194 not1 = IS_NCCLASS_NOT(dest);
4195 bsr1 = dest->bs;
4196 buf1 = dest->mbuf;
4197 not2 = IS_NCCLASS_NOT(cc);
4198 bsr2 = cc->bs;
4199 buf2 = cc->mbuf;
4200
4201 if (not1 != 0) {
4202 bitset_invert_to(bsr1, bs1);
4203 bsr1 = bs1;
4204 }
4205 if (not2 != 0) {
4206 bitset_invert_to(bsr2, bs2);
4207 bsr2 = bs2;
4208 }
4209 bitset_or(bsr1, bsr2);
4210 if (bsr1 != dest->bs) {
4211 bitset_copy(dest->bs, bsr1);
4212 }
4213 if (not1 != 0) {
4214 bitset_invert(dest->bs);
4215 }
4216
4217 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
4218 if (not1 != 0 && not2 != 0) {
4219 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
4220 }
4221 else {
4222 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
4223 if (r == 0 && not1 != 0) {
4224 BBuf *tbuf;
4225 r = not_code_range_buf(enc, pbuf, &tbuf);
4226 if (r != 0) {
4227 bbuf_free(pbuf);
4228 return r;
4229 }
4230 bbuf_free(pbuf);
4231 pbuf = tbuf;
4232 }
4233 }
4234 if (r != 0) return r;
4235
4236 dest->mbuf = pbuf;
4237 bbuf_free(buf1);
4238 return r;
4239 }
4240 else
4241 return 0;
4242 }
4243
4244 static OnigCodePoint
conv_backslash_value(OnigCodePoint c,ParseEnv * env)4245 conv_backslash_value(OnigCodePoint c, ParseEnv* env)
4246 {
4247 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
4248 switch (c) {
4249 case 'n': return '\n';
4250 case 't': return '\t';
4251 case 'r': return '\r';
4252 case 'f': return '\f';
4253 case 'a': return '\007';
4254 case 'b': return '\010';
4255 case 'e': return '\033';
4256 case 'v':
4257 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
4258 return '\v';
4259 break;
4260
4261 default:
4262 break;
4263 }
4264 }
4265 return c;
4266 }
4267
4268 static int
is_invalid_quantifier_target(Node * node)4269 is_invalid_quantifier_target(Node* node)
4270 {
4271 switch (NODE_TYPE(node)) {
4272 case NODE_ANCHOR:
4273 case NODE_GIMMICK:
4274 return 1;
4275 break;
4276
4277 case NODE_BAG:
4278 /* allow enclosed elements */
4279 /* return is_invalid_quantifier_target(NODE_BODY(node)); */
4280 break;
4281
4282 case NODE_LIST:
4283 do {
4284 if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0;
4285 } while (IS_NOT_NULL(node = NODE_CDR(node)));
4286 return 0;
4287 break;
4288
4289 case NODE_ALT:
4290 do {
4291 if (is_invalid_quantifier_target(NODE_CAR(node))) return 1;
4292 } while (IS_NOT_NULL(node = NODE_CDR(node)));
4293 break;
4294
4295 default:
4296 break;
4297 }
4298 return 0;
4299 }
4300
4301 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
4302 static int
quantifier_type_num(QuantNode * q)4303 quantifier_type_num(QuantNode* q)
4304 {
4305 if (q->greedy) {
4306 if (q->lower == 0) {
4307 if (q->upper == 1) return 0;
4308 else if (IS_INFINITE_REPEAT(q->upper)) return 1;
4309 }
4310 else if (q->lower == 1) {
4311 if (IS_INFINITE_REPEAT(q->upper)) return 2;
4312 }
4313 }
4314 else {
4315 if (q->lower == 0) {
4316 if (q->upper == 1) return 3;
4317 else if (IS_INFINITE_REPEAT(q->upper)) return 4;
4318 }
4319 else if (q->lower == 1) {
4320 if (IS_INFINITE_REPEAT(q->upper)) return 5;
4321 }
4322 }
4323 return -1;
4324 }
4325
4326
4327 enum ReduceType {
4328 RQ_ASIS = 0, /* as is */
4329 RQ_DEL = 1, /* delete parent */
4330 RQ_A, /* to '*' */
4331 RQ_P, /* to '+' */
4332 RQ_AQ, /* to '*?' */
4333 RQ_QQ, /* to '??' */
4334 RQ_P_QQ, /* to '+)??' */
4335 };
4336
4337 static enum ReduceType ReduceTypeTable[6][6] = {
4338 {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
4339 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
4340 {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
4341 {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
4342 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
4343 {RQ_ASIS, RQ_A, RQ_P, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
4344 };
4345
4346 extern int
onig_reduce_nested_quantifier(Node * pnode)4347 onig_reduce_nested_quantifier(Node* pnode)
4348 {
4349 int pnum, cnum;
4350 QuantNode *p, *c;
4351 Node* cnode;
4352
4353 cnode = NODE_BODY(pnode);
4354
4355 p = QUANT_(pnode);
4356 c = QUANT_(cnode);
4357 pnum = quantifier_type_num(p);
4358 cnum = quantifier_type_num(c);
4359 if (pnum < 0 || cnum < 0) {
4360 if (p->lower == p->upper && c->lower == c->upper) {
4361 int n = onig_positive_int_multiply(p->lower, c->lower);
4362 if (n < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4363
4364 p->lower = p->upper = n;
4365 NODE_BODY(pnode) = NODE_BODY(cnode);
4366 goto remove_cnode;
4367 }
4368
4369 return 0;
4370 }
4371
4372 switch(ReduceTypeTable[cnum][pnum]) {
4373 case RQ_DEL:
4374 *pnode = *cnode;
4375 goto remove_cnode;
4376 break;
4377 case RQ_A:
4378 NODE_BODY(pnode) = NODE_BODY(cnode);
4379 p->lower = 0; p->upper = INFINITE_REPEAT; p->greedy = 1;
4380 goto remove_cnode;
4381 break;
4382 case RQ_P:
4383 NODE_BODY(pnode) = NODE_BODY(cnode);
4384 p->lower = 1; p->upper = INFINITE_REPEAT; p->greedy = 1;
4385 goto remove_cnode;
4386 break;
4387 case RQ_AQ:
4388 NODE_BODY(pnode) = NODE_BODY(cnode);
4389 p->lower = 0; p->upper = INFINITE_REPEAT; p->greedy = 0;
4390 goto remove_cnode;
4391 break;
4392 case RQ_QQ:
4393 NODE_BODY(pnode) = NODE_BODY(cnode);
4394 p->lower = 0; p->upper = 1; p->greedy = 0;
4395 goto remove_cnode;
4396 break;
4397 case RQ_P_QQ:
4398 p->lower = 0; p->upper = 1; p->greedy = 0;
4399 c->lower = 1; c->upper = INFINITE_REPEAT; c->greedy = 1;
4400 break;
4401 case RQ_ASIS:
4402 break;
4403 }
4404
4405 return 0;
4406
4407 remove_cnode:
4408 NODE_BODY(cnode) = NULL_NODE;
4409 onig_node_free(cnode);
4410 return 0;
4411 }
4412
4413 static int
node_new_general_newline(Node ** node,ParseEnv * env)4414 node_new_general_newline(Node** node, ParseEnv* env)
4415 {
4416 int r;
4417 int dlen, alen;
4418 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
4419 Node* crnl;
4420 Node* ncc;
4421 Node* x;
4422 CClassNode* cc;
4423
4424 dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);
4425 if (dlen < 0) return dlen;
4426 alen = ONIGENC_CODE_TO_MBC(env->enc, NEWLINE_CODE, buf + dlen);
4427 if (alen < 0) return alen;
4428
4429 crnl = node_new_str_crude(buf, buf + dlen + alen, ONIG_OPTION_NONE);
4430 CHECK_NULL_RETURN_MEMERR(crnl);
4431
4432 ncc = node_new_cclass();
4433 if (IS_NULL(ncc)) goto err2;
4434
4435 cc = CCLASS_(ncc);
4436 if (dlen == 1) {
4437 bitset_set_range(cc->bs, NEWLINE_CODE, 0x0d);
4438 }
4439 else {
4440 r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, 0x0d);
4441 if (r != 0) {
4442 err1:
4443 onig_node_free(ncc);
4444 err2:
4445 onig_node_free(crnl);
4446 return ONIGERR_MEMORY;
4447 }
4448 }
4449
4450 if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {
4451 r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
4452 if (r != 0) goto err1;
4453 r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
4454 if (r != 0) goto err1;
4455 }
4456
4457 x = node_new_bag_if_else(crnl, NULL_NODE, ncc);
4458 if (IS_NULL(x)) goto err1;
4459
4460 *node = x;
4461 return 0;
4462 }
4463
4464 enum TokenSyms {
4465 TK_EOT = 0, /* end of token */
4466 TK_CRUDE_BYTE = 1,
4467 TK_CHAR,
4468 TK_STRING,
4469 TK_CODE_POINT,
4470 TK_ANYCHAR,
4471 TK_CHAR_TYPE,
4472 TK_BACKREF,
4473 TK_CALL,
4474 TK_ANCHOR,
4475 TK_REPEAT,
4476 TK_INTERVAL,
4477 TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
4478 TK_ALT,
4479 TK_SUBEXP_OPEN,
4480 TK_SUBEXP_CLOSE,
4481 TK_OPEN_CC,
4482 TK_QUOTE_OPEN,
4483 TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
4484 TK_KEEP, /* \K */
4485 TK_GENERAL_NEWLINE, /* \R */
4486 TK_NO_NEWLINE, /* \N */
4487 TK_TRUE_ANYCHAR, /* \O */
4488 TK_TEXT_SEGMENT, /* \X */
4489
4490 /* in cc */
4491 TK_CC_CLOSE,
4492 TK_CC_RANGE,
4493 TK_CC_POSIX_BRACKET_OPEN,
4494 TK_CC_AND, /* && */
4495 TK_CC_OPEN_CC /* [ */
4496 };
4497
4498 typedef struct {
4499 enum TokenSyms type;
4500 int code_point_continue;
4501 int escaped;
4502 int base_num; /* is number: 8, 16 (used in [....]) */
4503 UChar* backp;
4504 union {
4505 UChar* s;
4506 UChar byte;
4507 OnigCodePoint code;
4508 int anchor;
4509 int subtype;
4510 struct {
4511 int lower;
4512 int upper;
4513 int greedy;
4514 int possessive;
4515 } repeat;
4516 struct {
4517 int num;
4518 int ref1;
4519 int* refs;
4520 int by_name;
4521 #ifdef USE_BACKREF_WITH_LEVEL
4522 int exist_level;
4523 int level; /* \k<name+n> */
4524 #endif
4525 } backref;
4526 struct {
4527 UChar* name;
4528 UChar* name_end;
4529 int gnum;
4530 int by_number;
4531 } call;
4532 struct {
4533 int ctype;
4534 int not;
4535 } prop;
4536 } u;
4537 } PToken;
4538
4539 static void
ptoken_init(PToken * tok)4540 ptoken_init(PToken* tok)
4541 {
4542 tok->code_point_continue = 0;
4543 }
4544
4545 static int
fetch_interval(UChar ** src,UChar * end,PToken * tok,ParseEnv * env)4546 fetch_interval(UChar** src, UChar* end, PToken* tok, ParseEnv* env)
4547 {
4548 int low, up, syn_allow, non_low = 0;
4549 int r = 0;
4550 OnigCodePoint c;
4551 OnigEncoding enc = env->enc;
4552 UChar* p = *src;
4553 PFETCH_READY;
4554
4555 syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
4556
4557 if (PEND) {
4558 if (syn_allow)
4559 return 1; /* "....{" : OK! */
4560 else
4561 return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
4562 }
4563
4564 if (! syn_allow) {
4565 c = PPEEK;
4566 if (c == ')' || c == '(' || c == '|') {
4567 return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
4568 }
4569 }
4570
4571 low = scan_number(&p, end, env->enc);
4572 if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4573 if (low > ONIG_MAX_REPEAT_NUM)
4574 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4575
4576 if (p == *src) { /* can't read low */
4577 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
4578 /* allow {,n} as {0,n} */
4579 low = 0;
4580 non_low = 1;
4581 }
4582 else
4583 goto invalid;
4584 }
4585
4586 if (PEND) goto invalid;
4587 PFETCH(c);
4588 if (c == ',') {
4589 UChar* prev = p;
4590 up = scan_number(&p, end, env->enc);
4591 if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4592 if (up > ONIG_MAX_REPEAT_NUM)
4593 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4594
4595 if (p == prev) {
4596 if (non_low != 0)
4597 goto invalid;
4598 up = INFINITE_REPEAT; /* {n,} : {n,infinite} */
4599 }
4600 }
4601 else {
4602 if (non_low != 0)
4603 goto invalid;
4604
4605 PUNFETCH;
4606 up = low; /* {n} : exact n times */
4607 r = 2; /* fixed */
4608 }
4609
4610 if (PEND) goto invalid;
4611 PFETCH(c);
4612 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
4613 if (c != MC_ESC(env->syntax) || PEND) goto invalid;
4614 PFETCH(c);
4615 }
4616 if (c != '}') goto invalid;
4617
4618 if (!IS_INFINITE_REPEAT(up) && low > up) {
4619 /* {n,m}+ supported case */
4620 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL))
4621 return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
4622
4623 tok->u.repeat.possessive = 1;
4624 {
4625 int tmp;
4626 tmp = low; low = up; up = tmp;
4627 }
4628 }
4629 else
4630 tok->u.repeat.possessive = 0;
4631
4632 tok->type = TK_INTERVAL;
4633 tok->u.repeat.lower = low;
4634 tok->u.repeat.upper = up;
4635 *src = p;
4636 return r; /* 0: normal {n,m}, 2: fixed {n} */
4637
4638 invalid:
4639 if (syn_allow) {
4640 /* *src = p; */ /* !!! Don't do this line !!! */
4641 return 1; /* OK */
4642 }
4643 else
4644 return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
4645 }
4646
4647 /* \M-, \C-, \c, or \... */
4648 static int
fetch_escaped_value_raw(UChar ** src,UChar * end,ParseEnv * env,OnigCodePoint * val)4649 fetch_escaped_value_raw(UChar** src, UChar* end, ParseEnv* env,
4650 OnigCodePoint* val)
4651 {
4652 int v;
4653 OnigCodePoint c;
4654 OnigEncoding enc = env->enc;
4655 UChar* p = *src;
4656
4657 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4658
4659 PFETCH_S(c);
4660 switch (c) {
4661 case 'M':
4662 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
4663 if (PEND) return ONIGERR_END_PATTERN_AT_META;
4664 PFETCH_S(c);
4665 if (c != '-') return ONIGERR_META_CODE_SYNTAX;
4666 if (PEND) return ONIGERR_END_PATTERN_AT_META;
4667 PFETCH_S(c);
4668 if (c == MC_ESC(env->syntax)) {
4669 v = fetch_escaped_value_raw(&p, end, env, &c);
4670 if (v < 0) return v;
4671 }
4672 c = ((c & 0xff) | 0x80);
4673 }
4674 else
4675 goto backslash;
4676 break;
4677
4678 case 'C':
4679 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
4680 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4681 PFETCH_S(c);
4682 if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
4683 goto control;
4684 }
4685 else
4686 goto backslash;
4687
4688 case 'c':
4689 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
4690 control:
4691 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4692 PFETCH_S(c);
4693 if (c == '?') {
4694 c = 0177;
4695 }
4696 else {
4697 if (c == MC_ESC(env->syntax)) {
4698 v = fetch_escaped_value_raw(&p, end, env, &c);
4699 if (v < 0) return v;
4700 }
4701 c &= 0x9f;
4702 }
4703 break;
4704 }
4705 /* fall through */
4706
4707 default:
4708 {
4709 backslash:
4710 c = conv_backslash_value(c, env);
4711 }
4712 break;
4713 }
4714
4715 *src = p;
4716 *val = c;
4717 return 0;
4718 }
4719
4720 static int
fetch_escaped_value(UChar ** src,UChar * end,ParseEnv * env,OnigCodePoint * val)4721 fetch_escaped_value(UChar** src, UChar* end, ParseEnv* env, OnigCodePoint* val)
4722 {
4723 int r;
4724 int len;
4725
4726 r = fetch_escaped_value_raw(src, end, env, val);
4727 if (r != 0) return r;
4728
4729 len = ONIGENC_CODE_TO_MBCLEN(env->enc, *val);
4730 if (len < 0) return len;
4731
4732 return 0;
4733 }
4734
4735 static int fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env);
4736
4737 static OnigCodePoint
get_name_end_code_point(OnigCodePoint start)4738 get_name_end_code_point(OnigCodePoint start)
4739 {
4740 switch (start) {
4741 case '<': return (OnigCodePoint )'>'; break;
4742 case '\'': return (OnigCodePoint )'\''; break;
4743 case '(': return (OnigCodePoint )')'; break;
4744 default:
4745 break;
4746 }
4747
4748 return (OnigCodePoint )0;
4749 }
4750
4751 enum REF_NUM {
4752 IS_NOT_NUM = 0,
4753 IS_ABS_NUM = 1,
4754 IS_REL_NUM = 2
4755 };
4756
4757 #ifdef USE_BACKREF_WITH_LEVEL
4758 /*
4759 \k<name+n>, \k<name-n>
4760 \k<num+n>, \k<num-n>
4761 \k<-num+n>, \k<-num-n>
4762 \k<+num+n>, \k<+num-n>
4763 */
4764 static int
fetch_name_with_level(OnigCodePoint start_code,UChar ** src,UChar * end,UChar ** rname_end,ParseEnv * env,int * rback_num,int * rlevel,enum REF_NUM * num_type)4765 fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
4766 UChar** rname_end, ParseEnv* env,
4767 int* rback_num, int* rlevel, enum REF_NUM* num_type)
4768 {
4769 int r, sign, exist_level;
4770 int digit_count;
4771 OnigCodePoint end_code;
4772 OnigCodePoint c = 0;
4773 OnigEncoding enc = env->enc;
4774 UChar *name_end;
4775 UChar *pnum_head;
4776 UChar *p = *src;
4777 PFETCH_READY;
4778
4779 *rback_num = 0;
4780 exist_level = 0;
4781 *num_type = IS_NOT_NUM;
4782 sign = 1;
4783 pnum_head = *src;
4784
4785 end_code = get_name_end_code_point(start_code);
4786
4787 digit_count = 0;
4788 name_end = end;
4789 r = 0;
4790 if (PEND) {
4791 return ONIGERR_EMPTY_GROUP_NAME;
4792 }
4793 else {
4794 PFETCH(c);
4795 if (c == end_code)
4796 return ONIGERR_EMPTY_GROUP_NAME;
4797
4798 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4799 *num_type = IS_ABS_NUM;
4800 digit_count++;
4801 }
4802 else if (c == '-') {
4803 *num_type = IS_REL_NUM;
4804 sign = -1;
4805 pnum_head = p;
4806 }
4807 else if (c == '+') {
4808 *num_type = IS_REL_NUM;
4809 sign = 1;
4810 pnum_head = p;
4811 }
4812 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4813 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4814 }
4815 }
4816
4817 while (!PEND) {
4818 name_end = p;
4819 PFETCH(c);
4820 if (c == end_code || c == ')' || c == '+' || c == '-') {
4821 if (*num_type != IS_NOT_NUM && digit_count == 0)
4822 r = ONIGERR_INVALID_GROUP_NAME;
4823 break;
4824 }
4825
4826 if (*num_type != IS_NOT_NUM) {
4827 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4828 digit_count++;
4829 }
4830 else {
4831 r = ONIGERR_INVALID_GROUP_NAME;
4832 *num_type = IS_NOT_NUM;
4833 }
4834 }
4835 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4836 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4837 }
4838 }
4839
4840 if (r == 0 && c != end_code) {
4841 if (c == '+' || c == '-') {
4842 int level;
4843 int flag = (c == '-' ? -1 : 1);
4844
4845 if (PEND) {
4846 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4847 goto end;
4848 }
4849 PFETCH(c);
4850 if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;
4851 PUNFETCH;
4852 level = scan_number(&p, end, enc);
4853 if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
4854 *rlevel = (level * flag);
4855 exist_level = 1;
4856
4857 if (!PEND) {
4858 PFETCH(c);
4859 if (c == end_code)
4860 goto end;
4861 }
4862 }
4863
4864 err:
4865 name_end = end;
4866 err2:
4867 r = ONIGERR_INVALID_GROUP_NAME;
4868 }
4869
4870 end:
4871 if (r == 0) {
4872 if (*num_type != IS_NOT_NUM) {
4873 *rback_num = scan_number(&pnum_head, name_end, enc);
4874 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
4875 else if (*rback_num == 0) {
4876 if (*num_type == IS_REL_NUM)
4877 goto err2;
4878 }
4879
4880 *rback_num *= sign;
4881 }
4882
4883 *rname_end = name_end;
4884 *src = p;
4885 return (exist_level ? 1 : 0);
4886 }
4887 else {
4888 onig_scan_env_set_error_string(env, r, *src, name_end);
4889 return r;
4890 }
4891 }
4892 #endif /* USE_BACKREF_WITH_LEVEL */
4893
4894 /*
4895 ref: 0 -> define name (don't allow number name)
4896 1 -> reference name (allow number name)
4897 */
4898 static int
fetch_name(OnigCodePoint start_code,UChar ** src,UChar * end,UChar ** rname_end,ParseEnv * env,int * rback_num,enum REF_NUM * num_type,int is_ref)4899 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
4900 UChar** rname_end, ParseEnv* env, int* rback_num,
4901 enum REF_NUM* num_type, int is_ref)
4902 {
4903 int r, sign;
4904 int digit_count;
4905 OnigCodePoint end_code;
4906 OnigCodePoint c = 0;
4907 OnigEncoding enc = env->enc;
4908 UChar *name_end;
4909 UChar *pnum_head;
4910 UChar *p = *src;
4911
4912 *rback_num = 0;
4913
4914 end_code = get_name_end_code_point(start_code);
4915
4916 digit_count = 0;
4917 name_end = end;
4918 pnum_head = *src;
4919 r = 0;
4920 *num_type = IS_NOT_NUM;
4921 sign = 1;
4922 if (PEND) {
4923 return ONIGERR_EMPTY_GROUP_NAME;
4924 }
4925 else {
4926 PFETCH_S(c);
4927 if (c == end_code)
4928 return ONIGERR_EMPTY_GROUP_NAME;
4929
4930 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4931 if (is_ref == TRUE)
4932 *num_type = IS_ABS_NUM;
4933 else {
4934 r = ONIGERR_INVALID_GROUP_NAME;
4935 }
4936 digit_count++;
4937 }
4938 else if (c == '-') {
4939 if (is_ref == TRUE) {
4940 *num_type = IS_REL_NUM;
4941 sign = -1;
4942 pnum_head = p;
4943 }
4944 else {
4945 r = ONIGERR_INVALID_GROUP_NAME;
4946 }
4947 }
4948 else if (c == '+') {
4949 if (is_ref == TRUE) {
4950 *num_type = IS_REL_NUM;
4951 sign = 1;
4952 pnum_head = p;
4953 }
4954 else {
4955 r = ONIGERR_INVALID_GROUP_NAME;
4956 }
4957 }
4958 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4959 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4960 }
4961 }
4962
4963 if (r == 0) {
4964 while (!PEND) {
4965 name_end = p;
4966 PFETCH_S(c);
4967 if (c == end_code || c == ')') {
4968 if (*num_type != IS_NOT_NUM && digit_count == 0)
4969 r = ONIGERR_INVALID_GROUP_NAME;
4970 break;
4971 }
4972
4973 if (*num_type != IS_NOT_NUM) {
4974 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4975 digit_count++;
4976 }
4977 else {
4978 if (!ONIGENC_IS_CODE_WORD(enc, c))
4979 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4980 else
4981 r = ONIGERR_INVALID_GROUP_NAME;
4982
4983 *num_type = IS_NOT_NUM;
4984 }
4985 }
4986 else {
4987 if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4988 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4989 }
4990 }
4991 }
4992
4993 if (c != end_code) {
4994 r = ONIGERR_INVALID_GROUP_NAME;
4995 goto err;
4996 }
4997
4998 if (*num_type != IS_NOT_NUM) {
4999 *rback_num = scan_number(&pnum_head, name_end, enc);
5000 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
5001 else if (*rback_num == 0) {
5002 if (*num_type == IS_REL_NUM) {
5003 r = ONIGERR_INVALID_GROUP_NAME;
5004 goto err;
5005 }
5006 }
5007
5008 *rback_num *= sign;
5009 }
5010
5011 *rname_end = name_end;
5012 *src = p;
5013 return 0;
5014 }
5015 else {
5016 while (!PEND) {
5017 name_end = p;
5018 PFETCH_S(c);
5019 if (c == end_code || c == ')')
5020 break;
5021 }
5022 if (PEND)
5023 name_end = end;
5024
5025 err:
5026 onig_scan_env_set_error_string(env, r, *src, name_end);
5027 return r;
5028 }
5029 }
5030
5031 static void
CC_ESC_WARN(ParseEnv * env,UChar * c)5032 CC_ESC_WARN(ParseEnv* env, UChar *c)
5033 {
5034 if (onig_warn == onig_null_warn) return ;
5035
5036 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
5037 IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
5038 UChar buf[WARN_BUFSIZE];
5039 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
5040 env->pattern, env->pattern_end,
5041 (UChar* )"character class has '%s' without escape",
5042 c);
5043 (*onig_warn)((char* )buf);
5044 }
5045 }
5046
5047 static void
CLOSE_BRACKET_WITHOUT_ESC_WARN(ParseEnv * env,UChar * c)5048 CLOSE_BRACKET_WITHOUT_ESC_WARN(ParseEnv* env, UChar* c)
5049 {
5050 if (onig_warn == onig_null_warn) return ;
5051
5052 if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
5053 UChar buf[WARN_BUFSIZE];
5054 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
5055 (env)->pattern, (env)->pattern_end,
5056 (UChar* )"regular expression has '%s' without escape", c);
5057 (*onig_warn)((char* )buf);
5058 }
5059 }
5060
5061 static UChar*
find_str_position(OnigCodePoint s[],int n,UChar * from,UChar * to,UChar ** next,OnigEncoding enc)5062 find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
5063 UChar **next, OnigEncoding enc)
5064 {
5065 int i;
5066 OnigCodePoint x;
5067 UChar *q;
5068 UChar *p = from;
5069
5070 while (p < to) {
5071 x = ONIGENC_MBC_TO_CODE(enc, p, to);
5072 q = p + enclen(enc, p);
5073 if (x == s[0]) {
5074 for (i = 1; i < n && q < to; i++) {
5075 x = ONIGENC_MBC_TO_CODE(enc, q, to);
5076 if (x != s[i]) break;
5077 q += enclen(enc, q);
5078 }
5079 if (i >= n) {
5080 if (IS_NOT_NULL(next))
5081 *next = q;
5082 return p;
5083 }
5084 }
5085 p = q;
5086 }
5087 return NULL_UCHARP;
5088 }
5089
5090 static int
str_exist_check_with_esc(OnigCodePoint s[],int n,UChar * from,UChar * to,OnigCodePoint bad,OnigEncoding enc,OnigSyntaxType * syn)5091 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
5092 OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)
5093 {
5094 int i, in_esc;
5095 OnigCodePoint x;
5096 UChar *q;
5097 UChar *p = from;
5098
5099 in_esc = 0;
5100 while (p < to) {
5101 if (in_esc) {
5102 in_esc = 0;
5103 p += enclen(enc, p);
5104 }
5105 else {
5106 x = ONIGENC_MBC_TO_CODE(enc, p, to);
5107 q = p + enclen(enc, p);
5108 if (x == s[0]) {
5109 for (i = 1; i < n && q < to; i++) {
5110 x = ONIGENC_MBC_TO_CODE(enc, q, to);
5111 if (x != s[i]) break;
5112 q += enclen(enc, q);
5113 }
5114 if (i >= n) return 1;
5115 p += enclen(enc, p);
5116 }
5117 else {
5118 x = ONIGENC_MBC_TO_CODE(enc, p, to);
5119 if (x == bad) return 0;
5120 else if (x == MC_ESC(syn)) in_esc = 1;
5121 p = q;
5122 }
5123 }
5124 }
5125 return 0;
5126 }
5127
5128 static int
fetch_token_cc(PToken * tok,UChar ** src,UChar * end,ParseEnv * env,int state)5129 fetch_token_cc(PToken* tok, UChar** src, UChar* end, ParseEnv* env, int state)
5130 {
5131 int r;
5132 OnigCodePoint code;
5133 OnigCodePoint c, c2;
5134 int mindigits, maxdigits;
5135 OnigSyntaxType* syn = env->syntax;
5136 OnigEncoding enc = env->enc;
5137 UChar* prev;
5138 UChar* p = *src;
5139 PFETCH_READY;
5140
5141 if (tok->code_point_continue != 0) {
5142 r = get_next_code_point(&p, end, tok->base_num, enc, TRUE, &code);
5143 if (r == 1) {
5144 tok->code_point_continue = 0;
5145 }
5146 else if (r == 2) {
5147 tok->type = TK_CC_RANGE;
5148 goto end;
5149 }
5150 else if (r == 0) {
5151 tok->type = TK_CODE_POINT;
5152 tok->u.code = code;
5153 goto end;
5154 }
5155 else
5156 return r; /* error */
5157 }
5158
5159 if (PEND) {
5160 tok->type = TK_EOT;
5161 return tok->type;
5162 }
5163
5164 PFETCH(c);
5165 tok->type = TK_CHAR;
5166 tok->base_num = 0;
5167 tok->u.code = c;
5168 tok->escaped = 0;
5169
5170 if (c == ']') {
5171 tok->type = TK_CC_CLOSE;
5172 }
5173 else if (c == '-') {
5174 tok->type = TK_CC_RANGE;
5175 }
5176 else if (c == MC_ESC(syn)) {
5177 if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
5178 goto end;
5179
5180 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
5181
5182 PFETCH(c);
5183 tok->escaped = 1;
5184 tok->u.code = c;
5185 switch (c) {
5186 case 'w':
5187 tok->type = TK_CHAR_TYPE;
5188 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5189 tok->u.prop.not = 0;
5190 break;
5191 case 'W':
5192 tok->type = TK_CHAR_TYPE;
5193 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5194 tok->u.prop.not = 1;
5195 break;
5196 case 'd':
5197 tok->type = TK_CHAR_TYPE;
5198 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5199 tok->u.prop.not = 0;
5200 break;
5201 case 'D':
5202 tok->type = TK_CHAR_TYPE;
5203 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5204 tok->u.prop.not = 1;
5205 break;
5206 case 's':
5207 tok->type = TK_CHAR_TYPE;
5208 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5209 tok->u.prop.not = 0;
5210 break;
5211 case 'S':
5212 tok->type = TK_CHAR_TYPE;
5213 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5214 tok->u.prop.not = 1;
5215 break;
5216 case 'h':
5217 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5218 tok->type = TK_CHAR_TYPE;
5219 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5220 tok->u.prop.not = 0;
5221 break;
5222 case 'H':
5223 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5224 tok->type = TK_CHAR_TYPE;
5225 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5226 tok->u.prop.not = 1;
5227 break;
5228
5229 case 'p':
5230 case 'P':
5231 if (PEND) break;
5232
5233 c2 = PPEEK;
5234 if (c2 == '{' &&
5235 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
5236 PINC;
5237 tok->type = TK_CHAR_PROPERTY;
5238 tok->u.prop.not = c == 'P';
5239
5240 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
5241 PFETCH(c2);
5242 if (c2 == '^') {
5243 tok->u.prop.not = tok->u.prop.not == 0;
5244 }
5245 else
5246 PUNFETCH;
5247 }
5248 }
5249 break;
5250
5251 case 'o':
5252 if (PEND) break;
5253
5254 prev = p;
5255 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
5256 PINC;
5257 r = scan_octal_number(&p, end, 0, 11, enc, &code);
5258 if (r < 0) return r;
5259 if (!PEND) {
5260 c2 = PPEEK;
5261 if (IS_CODE_DIGIT_ASCII(enc, c2))
5262 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5263 }
5264
5265 tok->base_num = 8;
5266 goto brace_code_point_entry;
5267 }
5268 break;
5269
5270 case 'x':
5271 if (PEND) break;
5272
5273 prev = p;
5274 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
5275 PINC;
5276 r = scan_hexadecimal_number(&p, end, 0, 8, enc, &code);
5277 if (r < 0) return r;
5278 if (!PEND) {
5279 c2 = PPEEK;
5280 if (IS_CODE_XDIGIT_ASCII(enc, c2))
5281 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5282 }
5283
5284 tok->base_num = 16;
5285 brace_code_point_entry:
5286 if ((p > prev + enclen(enc, prev))) {
5287 if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
5288 if (PPEEK_IS('}')) {
5289 PINC;
5290 }
5291 else {
5292 int curr_state;
5293
5294 curr_state = (state == CS_RANGE) ? CPS_EMPTY : CPS_START;
5295 r = check_code_point_sequence_cc(p, end, tok->base_num, enc,
5296 curr_state);
5297 if (r < 0) return r;
5298 if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
5299 tok->code_point_continue = TRUE;
5300 }
5301 tok->type = TK_CODE_POINT;
5302 tok->u.code = code;
5303 }
5304 else {
5305 /* can't read nothing or invalid format */
5306 p = prev;
5307 }
5308 }
5309 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
5310 r = scan_hexadecimal_number(&p, end, 0, 2, enc, &code);
5311 if (r < 0) return r;
5312 if (p == prev) { /* can't read nothing. */
5313 code = 0; /* but, it's not error */
5314 }
5315 tok->type = TK_CRUDE_BYTE;
5316 tok->base_num = 16;
5317 tok->u.byte = (UChar )code;
5318 }
5319 break;
5320
5321 case 'u':
5322 if (PEND) break;
5323 prev = p;
5324 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
5325 mindigits = maxdigits = 4;
5326 u_hex_digits:
5327 r = scan_hexadecimal_number(&p, end, mindigits, maxdigits, enc, &code);
5328 if (r < 0) return r;
5329 if (p == prev) { /* can't read nothing. */
5330 code = 0; /* but, it's not error */
5331 }
5332 tok->type = TK_CODE_POINT;
5333 tok->base_num = 16;
5334 tok->u.code = code;
5335 }
5336 break;
5337
5338 case 'U':
5339 if (PEND) break;
5340 prev = p;
5341 if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON)) {
5342 mindigits = maxdigits = 8;
5343 goto u_hex_digits;
5344 }
5345 break;
5346
5347 case '0':
5348 case '1': case '2': case '3': case '4': case '5': case '6': case '7':
5349 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
5350 PUNFETCH;
5351 prev = p;
5352 r = scan_octal_number(&p, end, 0, 3, enc, &code);
5353 if (r < 0) return r;
5354 if (code >= 256) return ONIGERR_TOO_BIG_NUMBER;
5355 if (p == prev) { /* can't read nothing. */
5356 code = 0; /* but, it's not error */
5357 }
5358 tok->type = TK_CRUDE_BYTE;
5359 tok->base_num = 8;
5360 tok->u.byte = (UChar )code;
5361 }
5362 break;
5363
5364 default:
5365 PUNFETCH;
5366 r = fetch_escaped_value(&p, end, env, &c2);
5367 if (r < 0) return r;
5368 if (tok->u.code != c2) {
5369 tok->u.code = c2;
5370 tok->type = TK_CODE_POINT;
5371 }
5372 break;
5373 }
5374 }
5375 else if (c == '[') {
5376 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
5377 OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
5378 tok->backp = p; /* point at '[' is read */
5379 PINC;
5380 if (str_exist_check_with_esc(send, 2, p, end,
5381 (OnigCodePoint )']', enc, syn)) {
5382 tok->type = TK_CC_POSIX_BRACKET_OPEN;
5383 }
5384 else {
5385 PUNFETCH;
5386 goto cc_in_cc;
5387 }
5388 }
5389 else {
5390 cc_in_cc:
5391 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
5392 tok->type = TK_CC_OPEN_CC;
5393 }
5394 else {
5395 CC_ESC_WARN(env, (UChar* )"[");
5396 }
5397 }
5398 }
5399 else if (c == '&') {
5400 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
5401 !PEND && (PPEEK_IS('&'))) {
5402 PINC;
5403 tok->type = TK_CC_AND;
5404 }
5405 }
5406
5407 end:
5408 *src = p;
5409 return tok->type;
5410 }
5411
5412 static int
fetch_token(PToken * tok,UChar ** src,UChar * end,ParseEnv * env)5413 fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env)
5414 {
5415 int r;
5416 OnigCodePoint code;
5417 OnigCodePoint c;
5418 int mindigits, maxdigits;
5419 UChar* prev;
5420 int allow_num;
5421 OnigEncoding enc;
5422 OnigSyntaxType* syn;
5423 UChar* p;
5424
5425 enc = env->enc;
5426 syn = env->syntax;
5427 p = *src;
5428
5429 PFETCH_READY;
5430
5431 if (tok->code_point_continue != 0) {
5432 r = get_next_code_point(&p, end, tok->base_num, enc, FALSE, &code);
5433 if (r == 1) {
5434 tok->code_point_continue = 0;
5435 }
5436 else if (r == 0) {
5437 tok->type = TK_CODE_POINT;
5438 tok->u.code = code;
5439 goto out;
5440 }
5441 else
5442 return r; /* error */
5443 }
5444
5445 start:
5446 if (PEND) {
5447 tok->type = TK_EOT;
5448 return tok->type;
5449 }
5450
5451 tok->type = TK_STRING;
5452 tok->base_num = 0;
5453 tok->backp = p;
5454
5455 PFETCH(c);
5456 if (IS_MC_ESC_CODE(c, syn)) {
5457 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
5458
5459 tok->backp = p;
5460 PFETCH(c);
5461
5462 tok->u.code = c;
5463 tok->escaped = 1;
5464 switch (c) {
5465 case '*':
5466 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
5467 tok->type = TK_REPEAT;
5468 tok->u.repeat.lower = 0;
5469 tok->u.repeat.upper = INFINITE_REPEAT;
5470 goto greedy_check;
5471 break;
5472
5473 case '+':
5474 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
5475 tok->type = TK_REPEAT;
5476 tok->u.repeat.lower = 1;
5477 tok->u.repeat.upper = INFINITE_REPEAT;
5478 goto greedy_check;
5479 break;
5480
5481 case '?':
5482 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
5483 tok->type = TK_REPEAT;
5484 tok->u.repeat.lower = 0;
5485 tok->u.repeat.upper = 1;
5486 greedy_check:
5487 tok->u.repeat.possessive = 0;
5488 greedy_check2:
5489 if (!PEND && PPEEK_IS('?') &&
5490 IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY) &&
5491 tok->u.repeat.possessive == 0) {
5492 PFETCH(c);
5493 tok->u.repeat.greedy = 0;
5494 tok->u.repeat.possessive = 0;
5495 }
5496 else {
5497 possessive_check:
5498 tok->u.repeat.greedy = 1;
5499 if (!PEND && PPEEK_IS('+') &&
5500 ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
5501 tok->type != TK_INTERVAL) ||
5502 (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
5503 tok->type == TK_INTERVAL)) &&
5504 tok->u.repeat.possessive == 0) {
5505 PFETCH(c);
5506 tok->u.repeat.possessive = 1;
5507 }
5508 }
5509 break;
5510
5511 case '{':
5512 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
5513 r = fetch_interval(&p, end, tok, env);
5514 if (r < 0) return r; /* error */
5515 if (r == 0) goto greedy_check2;
5516 else if (r == 2) { /* {n} */
5517 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
5518 goto possessive_check;
5519
5520 goto greedy_check2;
5521 }
5522 /* r == 1 : normal char */
5523 break;
5524
5525 case '|':
5526 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
5527 tok->type = TK_ALT;
5528 break;
5529
5530 case '(':
5531 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
5532 tok->type = TK_SUBEXP_OPEN;
5533 break;
5534
5535 case ')':
5536 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
5537 tok->type = TK_SUBEXP_CLOSE;
5538 break;
5539
5540 case 'w':
5541 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
5542 tok->type = TK_CHAR_TYPE;
5543 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5544 tok->u.prop.not = 0;
5545 break;
5546
5547 case 'W':
5548 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
5549 tok->type = TK_CHAR_TYPE;
5550 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5551 tok->u.prop.not = 1;
5552 break;
5553
5554 case 'b':
5555 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
5556 tok->type = TK_ANCHOR;
5557 tok->u.anchor = ANCR_WORD_BOUNDARY;
5558 break;
5559
5560 case 'B':
5561 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
5562 tok->type = TK_ANCHOR;
5563 tok->u.anchor = ANCR_NO_WORD_BOUNDARY;
5564 break;
5565
5566 case 'y':
5567 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5568 tok->type = TK_ANCHOR;
5569 tok->u.anchor = ANCR_TEXT_SEGMENT_BOUNDARY;
5570 break;
5571
5572 case 'Y':
5573 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5574 tok->type = TK_ANCHOR;
5575 tok->u.anchor = ANCR_NO_TEXT_SEGMENT_BOUNDARY;
5576 break;
5577
5578 #ifdef USE_WORD_BEGIN_END
5579 case '<':
5580 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
5581 tok->type = TK_ANCHOR;
5582 tok->u.anchor = ANCR_WORD_BEGIN;
5583 break;
5584
5585 case '>':
5586 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
5587 tok->type = TK_ANCHOR;
5588 tok->u.anchor = ANCR_WORD_END;
5589 break;
5590 #endif
5591
5592 case 's':
5593 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
5594 tok->type = TK_CHAR_TYPE;
5595 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5596 tok->u.prop.not = 0;
5597 break;
5598
5599 case 'S':
5600 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
5601 tok->type = TK_CHAR_TYPE;
5602 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5603 tok->u.prop.not = 1;
5604 break;
5605
5606 case 'd':
5607 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
5608 tok->type = TK_CHAR_TYPE;
5609 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5610 tok->u.prop.not = 0;
5611 break;
5612
5613 case 'D':
5614 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
5615 tok->type = TK_CHAR_TYPE;
5616 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5617 tok->u.prop.not = 1;
5618 break;
5619
5620 case 'h':
5621 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5622 tok->type = TK_CHAR_TYPE;
5623 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5624 tok->u.prop.not = 0;
5625 break;
5626
5627 case 'H':
5628 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5629 tok->type = TK_CHAR_TYPE;
5630 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5631 tok->u.prop.not = 1;
5632 break;
5633
5634 case 'K':
5635 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;
5636 tok->type = TK_KEEP;
5637 break;
5638
5639 case 'R':
5640 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;
5641 tok->type = TK_GENERAL_NEWLINE;
5642 break;
5643
5644 case 'N':
5645 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
5646 tok->type = TK_NO_NEWLINE;
5647 break;
5648
5649 case 'O':
5650 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
5651 tok->type = TK_TRUE_ANYCHAR;
5652 break;
5653
5654 case 'X':
5655 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5656 tok->type = TK_TEXT_SEGMENT;
5657 break;
5658
5659 case 'A':
5660 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5661 begin_buf:
5662 tok->type = TK_ANCHOR;
5663 tok->u.subtype = ANCR_BEGIN_BUF;
5664 break;
5665
5666 case 'Z':
5667 if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON)) {
5668 goto end_buf;
5669 }
5670 else {
5671 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5672 tok->type = TK_ANCHOR;
5673 tok->u.subtype = ANCR_SEMI_END_BUF;
5674 }
5675 break;
5676
5677 case 'z':
5678 if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON))
5679 return ONIGERR_UNDEFINED_OPERATOR;
5680
5681 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5682 end_buf:
5683 tok->type = TK_ANCHOR;
5684 tok->u.subtype = ANCR_END_BUF;
5685 break;
5686
5687 case 'G':
5688 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
5689 tok->type = TK_ANCHOR;
5690 tok->u.subtype = ANCR_BEGIN_POSITION;
5691 break;
5692
5693 case '`':
5694 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
5695 goto begin_buf;
5696 break;
5697
5698 case '\'':
5699 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
5700 goto end_buf;
5701 break;
5702
5703 case 'o':
5704 if (PEND) break;
5705
5706 prev = p;
5707 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
5708 PINC;
5709 r = scan_octal_number(&p, end, 0, 11, enc, &code);
5710 if (r < 0) return r;
5711 if (!PEND) {
5712 if (IS_CODE_DIGIT_ASCII(enc, PPEEK))
5713 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5714 }
5715
5716 tok->base_num = 8;
5717 goto brace_code_point_entry;
5718 }
5719 break;
5720
5721 case 'x':
5722 if (PEND) break;
5723
5724 prev = p;
5725 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
5726 PINC;
5727 r = scan_hexadecimal_number(&p, end, 0, 8, enc, &code);
5728 if (r < 0) return r;
5729 if (!PEND) {
5730 if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))
5731 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5732 }
5733
5734 tok->base_num = 16;
5735 brace_code_point_entry:
5736 if ((p > prev + enclen(enc, prev))) {
5737 if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
5738 if (PPEEK_IS('}')) {
5739 PINC;
5740 }
5741 else {
5742 r = check_code_point_sequence(p, end, tok->base_num, enc);
5743 if (r < 0) return r;
5744 if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
5745 tok->code_point_continue = TRUE;
5746 }
5747 tok->type = TK_CODE_POINT;
5748 tok->u.code = code;
5749 }
5750 else {
5751 /* can't read nothing or invalid format */
5752 p = prev;
5753 }
5754 }
5755 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
5756 r = scan_hexadecimal_number(&p, end, 0, 2, enc, &code);
5757 if (r < 0) return r;
5758 if (p == prev) { /* can't read nothing. */
5759 code = 0; /* but, it's not error */
5760 }
5761 tok->type = TK_CRUDE_BYTE;
5762 tok->base_num = 16;
5763 tok->u.byte = (UChar )code;
5764 }
5765 break;
5766
5767 case 'u':
5768 if (PEND) break;
5769 prev = p;
5770 mindigits = maxdigits = 4;
5771 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
5772 u_hex_digits:
5773 r = scan_hexadecimal_number(&p, end, mindigits, maxdigits, enc, &code);
5774 if (r < 0) return r;
5775 if (p == prev) { /* can't read nothing. */
5776 code = 0; /* but, it's not error */
5777 }
5778 tok->type = TK_CODE_POINT;
5779 tok->base_num = 16;
5780 tok->u.code = code;
5781 }
5782 break;
5783
5784 case 'U':
5785 if (PEND) break;
5786 prev = p;
5787 if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON)) {
5788 mindigits = maxdigits = 8;
5789 goto u_hex_digits;
5790 }
5791 break;
5792
5793 case '1': case '2': case '3': case '4':
5794 case '5': case '6': case '7': case '8': case '9':
5795 PUNFETCH;
5796 prev = p;
5797 r = scan_number(&p, end, enc);
5798 if (r < 0 || r > ONIG_MAX_BACKREF_NUM) {
5799 goto skip_backref;
5800 }
5801
5802 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
5803 (r <= env->num_mem || r <= 9)) { /* This spec. from GNU regex */
5804 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5805 if (r > env->num_mem || IS_NULL(PARSEENV_MEMENV(env)[r].mem_node))
5806 return ONIGERR_INVALID_BACKREF;
5807 }
5808
5809 tok->type = TK_BACKREF;
5810 tok->u.backref.num = 1;
5811 tok->u.backref.ref1 = r;
5812 tok->u.backref.by_name = 0;
5813 #ifdef USE_BACKREF_WITH_LEVEL
5814 tok->u.backref.exist_level = 0;
5815 #endif
5816 break;
5817 }
5818
5819 skip_backref:
5820 if (c == '8' || c == '9') {
5821 /* normal char */
5822 p = prev; PINC;
5823 break;
5824 }
5825
5826 p = prev;
5827 /* fall through */
5828 case '0':
5829 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
5830 prev = p;
5831 r = scan_octal_number(&p, end, 0, (c == '0' ? 2:3), enc, &code);
5832 if (r < 0 || r >= 256) return ONIGERR_TOO_BIG_NUMBER;
5833 if (p == prev) { /* can't read nothing. */
5834 code = 0; /* but, it's not error */
5835 }
5836 tok->type = TK_CRUDE_BYTE;
5837 tok->base_num = 8;
5838 tok->u.byte = (UChar )code;
5839 }
5840 else if (c != '0') {
5841 PINC;
5842 }
5843 break;
5844
5845 case 'k':
5846 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
5847 PFETCH(c);
5848 if (c == '<' || c == '\'') {
5849 UChar* name_end;
5850 int* backs;
5851 int back_num;
5852 enum REF_NUM num_type;
5853
5854 allow_num = 1;
5855
5856 backref_start:
5857 prev = p;
5858
5859 #ifdef USE_BACKREF_WITH_LEVEL
5860 name_end = NULL_UCHARP; /* no need. escape gcc warning. */
5861 r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
5862 env, &back_num, &tok->u.backref.level, &num_type);
5863 if (r == 1) tok->u.backref.exist_level = 1;
5864 else tok->u.backref.exist_level = 0;
5865 #else
5866 r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, TRUE);
5867 #endif
5868 if (r < 0) return r;
5869
5870 if (num_type != IS_NOT_NUM) {
5871 if (allow_num == 0) return ONIGERR_INVALID_BACKREF;
5872
5873 if (num_type == IS_REL_NUM) {
5874 back_num = backref_rel_to_abs(back_num, env);
5875 }
5876 if (back_num <= 0)
5877 return ONIGERR_INVALID_BACKREF;
5878
5879 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5880 if (back_num > env->num_mem ||
5881 IS_NULL(PARSEENV_MEMENV(env)[back_num].mem_node))
5882 return ONIGERR_INVALID_BACKREF;
5883 }
5884 tok->type = TK_BACKREF;
5885 tok->u.backref.by_name = 0;
5886 tok->u.backref.num = 1;
5887 tok->u.backref.ref1 = back_num;
5888 }
5889 else {
5890 int num = name_to_group_numbers(env, prev, name_end, &backs);
5891 if (num <= 0) {
5892 return ONIGERR_UNDEFINED_NAME_REFERENCE;
5893 }
5894 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5895 int i;
5896 for (i = 0; i < num; i++) {
5897 if (backs[i] > env->num_mem ||
5898 IS_NULL(PARSEENV_MEMENV(env)[backs[i]].mem_node))
5899 return ONIGERR_INVALID_BACKREF;
5900 }
5901 }
5902
5903 tok->type = TK_BACKREF;
5904 tok->u.backref.by_name = 1;
5905 if (num == 1) {
5906 tok->u.backref.num = 1;
5907 tok->u.backref.ref1 = backs[0];
5908 }
5909 else {
5910 tok->u.backref.num = num;
5911 tok->u.backref.refs = backs;
5912 }
5913 }
5914 }
5915 else
5916 PUNFETCH;
5917 }
5918 break;
5919
5920 #ifdef USE_CALL
5921 case 'g':
5922 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
5923 PFETCH(c);
5924 if (c == '<' || c == '\'') {
5925 int gnum;
5926 UChar* name_end;
5927 enum REF_NUM num_type;
5928
5929 allow_num = 1;
5930
5931 call_start:
5932 prev = p;
5933 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,
5934 &gnum, &num_type, TRUE);
5935 if (r < 0) return r;
5936
5937 if (num_type != IS_NOT_NUM) {
5938 if (allow_num == 0) return ONIGERR_UNDEFINED_GROUP_REFERENCE;
5939
5940 if (num_type == IS_REL_NUM) {
5941 gnum = backref_rel_to_abs(gnum, env);
5942 if (gnum < 0) {
5943 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
5944 prev, name_end);
5945 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
5946 }
5947 }
5948 tok->u.call.by_number = 1;
5949 tok->u.call.gnum = gnum;
5950 }
5951 else {
5952 tok->u.call.by_number = 0;
5953 tok->u.call.gnum = 0;
5954 }
5955
5956 tok->type = TK_CALL;
5957 tok->u.call.name = prev;
5958 tok->u.call.name_end = name_end;
5959 }
5960 else
5961 PUNFETCH;
5962 }
5963 break;
5964 #endif
5965
5966 case 'Q':
5967 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
5968 tok->type = TK_QUOTE_OPEN;
5969 }
5970 break;
5971
5972 case 'p':
5973 case 'P':
5974 if (!PEND && PPEEK_IS('{') &&
5975 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
5976 PINC;
5977 tok->type = TK_CHAR_PROPERTY;
5978 tok->u.prop.not = c == 'P';
5979
5980 if (!PEND &&
5981 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
5982 PFETCH(c);
5983 if (c == '^') {
5984 tok->u.prop.not = tok->u.prop.not == 0;
5985 }
5986 else
5987 PUNFETCH;
5988 }
5989 }
5990 break;
5991
5992 default:
5993 {
5994 OnigCodePoint c2;
5995
5996 PUNFETCH;
5997 r = fetch_escaped_value(&p, end, env, &c2);
5998 if (r < 0) return r;
5999 if (tok->u.code != c2) {
6000 tok->type = TK_CODE_POINT;
6001 tok->u.code = c2;
6002 }
6003 else { /* string */
6004 p = tok->backp + enclen(enc, tok->backp);
6005 }
6006 }
6007 break;
6008 }
6009 }
6010 else {
6011 tok->u.code = c;
6012 tok->escaped = 0;
6013
6014 #ifdef USE_VARIABLE_META_CHARS
6015 if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
6016 IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
6017 if (c == MC_ANYCHAR(syn))
6018 goto any_char;
6019 else if (c == MC_ANYTIME(syn))
6020 goto any_time;
6021 else if (c == MC_ZERO_OR_ONE_TIME(syn))
6022 goto zero_or_one_time;
6023 else if (c == MC_ONE_OR_MORE_TIME(syn))
6024 goto one_or_more_time;
6025 else if (c == MC_ANYCHAR_ANYTIME(syn)) {
6026 tok->type = TK_ANYCHAR_ANYTIME;
6027 goto out;
6028 }
6029 }
6030 #endif
6031
6032 switch (c) {
6033 case '.':
6034 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
6035 #ifdef USE_VARIABLE_META_CHARS
6036 any_char:
6037 #endif
6038 tok->type = TK_ANYCHAR;
6039 break;
6040
6041 case '*':
6042 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
6043 #ifdef USE_VARIABLE_META_CHARS
6044 any_time:
6045 #endif
6046 tok->type = TK_REPEAT;
6047 tok->u.repeat.lower = 0;
6048 tok->u.repeat.upper = INFINITE_REPEAT;
6049 goto greedy_check;
6050 break;
6051
6052 case '+':
6053 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
6054 #ifdef USE_VARIABLE_META_CHARS
6055 one_or_more_time:
6056 #endif
6057 tok->type = TK_REPEAT;
6058 tok->u.repeat.lower = 1;
6059 tok->u.repeat.upper = INFINITE_REPEAT;
6060 goto greedy_check;
6061 break;
6062
6063 case '?':
6064 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
6065 #ifdef USE_VARIABLE_META_CHARS
6066 zero_or_one_time:
6067 #endif
6068 tok->type = TK_REPEAT;
6069 tok->u.repeat.lower = 0;
6070 tok->u.repeat.upper = 1;
6071 goto greedy_check;
6072 break;
6073
6074 case '{':
6075 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
6076 r = fetch_interval(&p, end, tok, env);
6077 if (r < 0) return r; /* error */
6078 if (r == 0) goto greedy_check2;
6079 else if (r == 2) { /* {n} */
6080 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
6081 goto possessive_check;
6082
6083 goto greedy_check2;
6084 }
6085 /* r == 1 : normal char */
6086 break;
6087
6088 case '|':
6089 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
6090 tok->type = TK_ALT;
6091 break;
6092
6093 case '(':
6094 if (!PEND && PPEEK_IS('?') &&
6095 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
6096 prev = p;
6097 PINC;
6098 if (! PEND) {
6099 c = PPEEK;
6100 if (c == '#') {
6101 PFETCH(c);
6102 while (1) {
6103 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6104 PFETCH(c);
6105 if (c == MC_ESC(syn)) {
6106 if (! PEND) PFETCH(c);
6107 }
6108 else {
6109 if (c == ')') break;
6110 }
6111 }
6112 goto start;
6113 }
6114 else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) {
6115 int gnum;
6116 UChar* name;
6117 UChar* name_end;
6118 enum REF_NUM num_type;
6119
6120 switch (c) {
6121 case '&':
6122 {
6123 PINC;
6124 name = p;
6125 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
6126 &gnum, &num_type, FALSE);
6127 if (r < 0) return r;
6128
6129 tok->type = TK_CALL;
6130 tok->u.call.by_number = 0;
6131 tok->u.call.gnum = 0;
6132 tok->u.call.name = name;
6133 tok->u.call.name_end = name_end;
6134 }
6135 break;
6136
6137 case 'R':
6138 tok->type = TK_CALL;
6139 tok->u.call.by_number = 1;
6140 tok->u.call.gnum = 0;
6141 tok->u.call.name = p;
6142 PINC;
6143 if (! PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION;
6144 tok->u.call.name_end = p;
6145 break;
6146
6147 case '-':
6148 case '+':
6149 goto lparen_qmark_num;
6150 break;
6151 default:
6152 if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end;
6153
6154 lparen_qmark_num:
6155 {
6156 name = p;
6157 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
6158 &gnum, &num_type, TRUE);
6159 if (r < 0) return r;
6160
6161 if (num_type == IS_NOT_NUM) {
6162 return ONIGERR_INVALID_GROUP_NAME;
6163 }
6164 else {
6165 if (num_type == IS_REL_NUM) {
6166 gnum = backref_rel_to_abs(gnum, env);
6167 if (gnum < 0) {
6168 onig_scan_env_set_error_string(env,
6169 ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);
6170 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
6171 }
6172 }
6173 tok->u.call.by_number = 1;
6174 tok->u.call.gnum = gnum;
6175 }
6176
6177 tok->type = TK_CALL;
6178 tok->u.call.name = name;
6179 tok->u.call.name_end = name_end;
6180 }
6181 break;
6182 }
6183 }
6184 else if (c == 'P' &&
6185 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAME)) {
6186 PINC; /* skip 'P' */
6187 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6188 PFETCH(c);
6189 allow_num = 0;
6190 if (c == '=') {
6191 c = '(';
6192 goto backref_start;
6193 }
6194 else if (c == '>') {
6195 #ifdef USE_CALL
6196 c = '(';
6197 goto call_start;
6198 #else
6199 return ONIGERR_UNDEFINED_OPERATOR;
6200 #endif
6201 }
6202 else {
6203 p = prev;
6204 goto lparen_qmark_end2;
6205 }
6206 }
6207 }
6208 lparen_qmark_end:
6209 PUNFETCH;
6210 }
6211
6212 lparen_qmark_end2:
6213 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
6214 tok->type = TK_SUBEXP_OPEN;
6215 break;
6216
6217 case ')':
6218 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
6219 tok->type = TK_SUBEXP_CLOSE;
6220 break;
6221
6222 case '^':
6223 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
6224 tok->type = TK_ANCHOR;
6225 tok->u.subtype = (OPTON_SINGLELINE(env->options)
6226 ? ANCR_BEGIN_BUF : ANCR_BEGIN_LINE);
6227 break;
6228
6229 case '$':
6230 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
6231 tok->type = TK_ANCHOR;
6232 tok->u.subtype = (OPTON_SINGLELINE(env->options)
6233 ? ANCR_SEMI_END_BUF : ANCR_END_LINE);
6234 break;
6235
6236 case '[':
6237 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
6238 tok->type = TK_OPEN_CC;
6239 break;
6240
6241 case ']':
6242 if (*src > env->pattern) /* /].../ is allowed. */
6243 CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
6244 break;
6245
6246 case '#':
6247 if (OPTON_EXTEND(env->options)) {
6248 while (!PEND) {
6249 PFETCH(c);
6250 if (ONIGENC_IS_CODE_NEWLINE(enc, c))
6251 break;
6252 }
6253 goto start;
6254 break;
6255 }
6256 break;
6257
6258 case ' ': case '\t': case '\n': case '\r': case '\f':
6259 if (OPTON_EXTEND(env->options))
6260 goto start;
6261 break;
6262
6263 default:
6264 /* string */
6265 break;
6266 }
6267 }
6268
6269 out:
6270 *src = p;
6271 return tok->type;
6272 }
6273
6274 static int
add_ctype_to_cc_by_range(CClassNode * cc,int ctype ARG_UNUSED,int not,OnigEncoding enc ARG_UNUSED,OnigCodePoint sb_out,const OnigCodePoint mbr[])6275 add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
6276 OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,
6277 const OnigCodePoint mbr[])
6278 {
6279 int i, r;
6280 OnigCodePoint j;
6281
6282 int n = ONIGENC_CODE_RANGE_NUM(mbr);
6283
6284 if (not == 0) {
6285 for (i = 0; i < n; i++) {
6286 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
6287 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
6288 if (j >= sb_out) {
6289 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
6290 r = add_code_range_to_buf(&(cc->mbuf), j,
6291 ONIGENC_CODE_RANGE_TO(mbr, i));
6292 if (r != 0) return r;
6293 i++;
6294 }
6295
6296 goto sb_end;
6297 }
6298 BITSET_SET_BIT(cc->bs, j);
6299 }
6300 }
6301
6302 sb_end:
6303 for ( ; i < n; i++) {
6304 r = add_code_range_to_buf(&(cc->mbuf),
6305 ONIGENC_CODE_RANGE_FROM(mbr, i),
6306 ONIGENC_CODE_RANGE_TO(mbr, i));
6307 if (r != 0) return r;
6308 }
6309 }
6310 else {
6311 OnigCodePoint prev = 0;
6312
6313 for (i = 0; i < n; i++) {
6314 for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
6315 if (j >= sb_out) {
6316 goto sb_end2;
6317 }
6318 BITSET_SET_BIT(cc->bs, j);
6319 }
6320 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
6321 }
6322 for (j = prev; j < sb_out; j++) {
6323 BITSET_SET_BIT(cc->bs, j);
6324 }
6325
6326 sb_end2:
6327 prev = sb_out;
6328
6329 for (i = 0; i < n; i++) {
6330 if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
6331 r = add_code_range_to_buf(&(cc->mbuf), prev,
6332 ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
6333 if (r != 0) return r;
6334 }
6335 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
6336 if (prev == 0) goto end;
6337 }
6338
6339 r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
6340 if (r != 0) return r;
6341 }
6342
6343 end:
6344 return 0;
6345 }
6346
6347 static int
add_ctype_to_cc_by_range_limit(CClassNode * cc,int ctype ARG_UNUSED,int not,OnigEncoding enc ARG_UNUSED,OnigCodePoint sb_out,const OnigCodePoint mbr[],OnigCodePoint limit)6348 add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,
6349 OnigEncoding enc ARG_UNUSED,
6350 OnigCodePoint sb_out,
6351 const OnigCodePoint mbr[], OnigCodePoint limit)
6352 {
6353 int i, r;
6354 OnigCodePoint j;
6355 OnigCodePoint from;
6356 OnigCodePoint to;
6357
6358 int n = ONIGENC_CODE_RANGE_NUM(mbr);
6359
6360 if (not == 0) {
6361 for (i = 0; i < n; i++) {
6362 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
6363 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
6364 if (j > limit) goto end;
6365 if (j >= sb_out) {
6366 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
6367 to = ONIGENC_CODE_RANGE_TO(mbr, i);
6368 if (to > limit) to = limit;
6369 r = add_code_range_to_buf(&(cc->mbuf), j, to);
6370 if (r != 0) return r;
6371 i++;
6372 }
6373
6374 goto sb_end;
6375 }
6376 BITSET_SET_BIT(cc->bs, j);
6377 }
6378 }
6379
6380 sb_end:
6381 for ( ; i < n; i++) {
6382 from = ONIGENC_CODE_RANGE_FROM(mbr, i);
6383 to = ONIGENC_CODE_RANGE_TO(mbr, i);
6384 if (from > limit) break;
6385 if (to > limit) to = limit;
6386 r = add_code_range_to_buf(&(cc->mbuf), from, to);
6387 if (r != 0) return r;
6388 }
6389 }
6390 else {
6391 OnigCodePoint prev = 0;
6392
6393 for (i = 0; i < n; i++) {
6394 from = ONIGENC_CODE_RANGE_FROM(mbr, i);
6395 if (from > limit) {
6396 for (j = prev; j < sb_out; j++) {
6397 BITSET_SET_BIT(cc->bs, j);
6398 }
6399 goto sb_end2;
6400 }
6401 for (j = prev; j < from; j++) {
6402 if (j >= sb_out) goto sb_end2;
6403 BITSET_SET_BIT(cc->bs, j);
6404 }
6405 prev = ONIGENC_CODE_RANGE_TO(mbr, i);
6406 if (prev > limit) prev = limit;
6407 prev++;
6408 if (prev == 0) goto end;
6409 }
6410 for (j = prev; j < sb_out; j++) {
6411 BITSET_SET_BIT(cc->bs, j);
6412 }
6413
6414 sb_end2:
6415 prev = sb_out;
6416
6417 for (i = 0; i < n; i++) {
6418 from = ONIGENC_CODE_RANGE_FROM(mbr, i);
6419 if (from > limit) goto last;
6420
6421 if (prev < from) {
6422 r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);
6423 if (r != 0) return r;
6424 }
6425 prev = ONIGENC_CODE_RANGE_TO(mbr, i);
6426 if (prev > limit) prev = limit;
6427 prev++;
6428 if (prev == 0) goto end;
6429 }
6430
6431 last:
6432 r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
6433 if (r != 0) return r;
6434 }
6435
6436 end:
6437 return 0;
6438 }
6439
6440 static int
add_ctype_to_cc(CClassNode * cc,int ctype,int not,ParseEnv * env)6441 add_ctype_to_cc(CClassNode* cc, int ctype, int not, ParseEnv* env)
6442 {
6443 int c, r;
6444 int ascii_mode;
6445 int is_single;
6446 const OnigCodePoint *ranges;
6447 OnigCodePoint limit;
6448 OnigCodePoint sb_out;
6449 OnigEncoding enc = env->enc;
6450
6451 ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(ctype, env->options);
6452
6453 r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
6454 if (r == 0) {
6455 if (ascii_mode == 0)
6456 r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
6457 else
6458 r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,
6459 ranges, ASCII_LIMIT);
6460 return r;
6461 }
6462 else if (r != ONIG_NO_SUPPORT_CONFIG) {
6463 return r;
6464 }
6465
6466 r = 0;
6467 is_single = ONIGENC_IS_SINGLEBYTE(enc);
6468 limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;
6469
6470 switch (ctype) {
6471 case ONIGENC_CTYPE_ALPHA:
6472 case ONIGENC_CTYPE_BLANK:
6473 case ONIGENC_CTYPE_CNTRL:
6474 case ONIGENC_CTYPE_DIGIT:
6475 case ONIGENC_CTYPE_LOWER:
6476 case ONIGENC_CTYPE_PUNCT:
6477 case ONIGENC_CTYPE_SPACE:
6478 case ONIGENC_CTYPE_UPPER:
6479 case ONIGENC_CTYPE_XDIGIT:
6480 case ONIGENC_CTYPE_ASCII:
6481 case ONIGENC_CTYPE_ALNUM:
6482 if (not != 0) {
6483 for (c = 0; c < (int )limit; c++) {
6484 if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
6485 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6486 BITSET_SET_BIT(cc->bs, c);
6487 }
6488 }
6489 for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
6490 if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6491 BITSET_SET_BIT(cc->bs, c);
6492 }
6493
6494 if (is_single == 0)
6495 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
6496 }
6497 else {
6498 for (c = 0; c < (int )limit; c++) {
6499 if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
6500 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6501 BITSET_SET_BIT(cc->bs, c);
6502 }
6503 }
6504 }
6505 break;
6506
6507 case ONIGENC_CTYPE_GRAPH:
6508 case ONIGENC_CTYPE_PRINT:
6509 case ONIGENC_CTYPE_WORD:
6510 if (not != 0) {
6511 for (c = 0; c < (int )limit; c++) {
6512 /* check invalid code point */
6513 if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6514 && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6515 BITSET_SET_BIT(cc->bs, c);
6516 }
6517 for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
6518 if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6519 BITSET_SET_BIT(cc->bs, c);
6520 }
6521 if (ascii_mode != 0 && is_single == 0)
6522 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
6523 }
6524 else {
6525 for (c = 0; c < (int )limit; c++) {
6526 if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6527 && ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6528 BITSET_SET_BIT(cc->bs, c);
6529 }
6530 if (ascii_mode == 0 && is_single == 0)
6531 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
6532 }
6533 break;
6534
6535 default:
6536 return ONIGERR_PARSER_BUG;
6537 break;
6538 }
6539
6540 return r;
6541 }
6542
6543 static int
prs_posix_bracket(CClassNode * cc,UChar ** src,UChar * end,ParseEnv * env)6544 prs_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ParseEnv* env)
6545 {
6546 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
6547 #define POSIX_BRACKET_NAME_MIN_LEN 4
6548
6549 static PosixBracketEntryType PBS[] = {
6550 { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
6551 { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
6552 { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
6553 { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
6554 { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
6555 { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
6556 { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
6557 { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
6558 { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
6559 { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
6560 { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
6561 { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
6562 { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
6563 { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },
6564 { (UChar* )NULL, -1, 0 }
6565 };
6566
6567 PosixBracketEntryType *pb;
6568 int not, i, r;
6569 OnigCodePoint c;
6570 OnigEncoding enc = env->enc;
6571 UChar *p = *src;
6572
6573 if (PPEEK_IS('^')) {
6574 PINC_S;
6575 not = 1;
6576 }
6577 else
6578 not = 0;
6579
6580 if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
6581 goto not_posix_bracket;
6582
6583 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
6584 if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
6585 p = (UChar* )onigenc_step(enc, p, end, pb->len);
6586 if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
6587 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
6588
6589 r = add_ctype_to_cc(cc, pb->ctype, not, env);
6590 if (r != 0) return r;
6591
6592 PINC_S; PINC_S;
6593 *src = p;
6594 return 0;
6595 }
6596 }
6597
6598 not_posix_bracket:
6599 c = 0;
6600 i = 0;
6601 while (!PEND && ((c = PPEEK) != ':') && c != ']') {
6602 PINC_S;
6603 if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
6604 }
6605 if (c == ':' && ! PEND) {
6606 PINC_S;
6607 if (! PEND) {
6608 PFETCH_S(c);
6609 if (c == ']')
6610 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
6611 }
6612 }
6613
6614 return 1; /* 1: is not POSIX bracket, but no error. */
6615 }
6616
6617 static int
fetch_char_property_to_ctype(UChar ** src,UChar * end,ParseEnv * env)6618 fetch_char_property_to_ctype(UChar** src, UChar* end, ParseEnv* env)
6619 {
6620 int r;
6621 OnigCodePoint c;
6622 OnigEncoding enc;
6623 UChar *prev, *start, *p;
6624
6625 p = *src;
6626 enc = env->enc;
6627 r = ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
6628 start = prev = p;
6629
6630 while (!PEND) {
6631 prev = p;
6632 PFETCH_S(c);
6633 if (c == '}') {
6634 r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
6635 if (r >= 0) {
6636 *src = p;
6637 }
6638 else {
6639 onig_scan_env_set_error_string(env, r, *src, prev);
6640 }
6641
6642 return r;
6643 }
6644 else if (c == '(' || c == ')' || c == '{' || c == '|') {
6645 break;
6646 }
6647 }
6648
6649 return r;
6650 }
6651
6652 static int
prs_char_property(Node ** np,PToken * tok,UChar ** src,UChar * end,ParseEnv * env)6653 prs_char_property(Node** np, PToken* tok, UChar** src, UChar* end,
6654 ParseEnv* env)
6655 {
6656 int r, ctype;
6657 CClassNode* cc;
6658
6659 ctype = fetch_char_property_to_ctype(src, end, env);
6660 if (ctype < 0) return ctype;
6661
6662 *np = node_new_cclass();
6663 CHECK_NULL_RETURN_MEMERR(*np);
6664 cc = CCLASS_(*np);
6665 r = add_ctype_to_cc(cc, ctype, FALSE, env);
6666 if (r != 0) return r;
6667 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
6668
6669 return 0;
6670 }
6671
6672
6673 static int
cc_cprop_next(CClassNode * cc,OnigCodePoint * pcode,CVAL * val,CSTATE * state,ParseEnv * env)6674 cc_cprop_next(CClassNode* cc, OnigCodePoint* pcode, CVAL* val, CSTATE* state,
6675 ParseEnv* env)
6676 {
6677 int r;
6678
6679 if (*state == CS_RANGE)
6680 return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
6681
6682 if (*state == CS_VALUE) {
6683 if (*val == CV_SB)
6684 BITSET_SET_BIT(cc->bs, (int )(*pcode));
6685 else if (*val == CV_MB) {
6686 r = add_code_range(&(cc->mbuf), env, *pcode, *pcode);
6687 if (r < 0) return r;
6688 }
6689 }
6690
6691 *state = CS_VALUE;
6692 *val = CV_CPROP;
6693 return 0;
6694 }
6695
6696 static int
cc_char_next(CClassNode * cc,OnigCodePoint * from,OnigCodePoint to,int * from_raw,int to_raw,CVAL intype,CVAL * type,CSTATE * state,ParseEnv * env)6697 cc_char_next(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,
6698 int* from_raw, int to_raw, CVAL intype, CVAL* type,
6699 CSTATE* state, ParseEnv* env)
6700 {
6701 int r;
6702
6703 switch (*state) {
6704 case CS_VALUE:
6705 if (*type == CV_SB) {
6706 if (*from > 0xff)
6707 return ONIGERR_INVALID_CODE_POINT_VALUE;
6708
6709 BITSET_SET_BIT(cc->bs, (int )(*from));
6710 }
6711 else if (*type == CV_MB) {
6712 r = add_code_range(&(cc->mbuf), env, *from, *from);
6713 if (r < 0) return r;
6714 }
6715 break;
6716
6717 case CS_RANGE:
6718 if (intype == *type) {
6719 if (intype == CV_SB) {
6720 if (*from > 0xff || to > 0xff)
6721 return ONIGERR_INVALID_CODE_POINT_VALUE;
6722
6723 if (*from > to) {
6724 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
6725 goto ccs_range_end;
6726 else
6727 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
6728 }
6729 bitset_set_range(cc->bs, (int )*from, (int )to);
6730 }
6731 else {
6732 r = add_code_range(&(cc->mbuf), env, *from, to);
6733 if (r < 0) return r;
6734 }
6735 }
6736 else {
6737 if (*from > to) {
6738 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
6739 goto ccs_range_end;
6740 else
6741 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
6742 }
6743 bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
6744 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);
6745 if (r < 0) return r;
6746 }
6747 ccs_range_end:
6748 *state = CS_COMPLETE;
6749 break;
6750
6751 case CS_COMPLETE:
6752 case CS_START:
6753 *state = CS_VALUE;
6754 break;
6755
6756 default:
6757 break;
6758 }
6759
6760 *from_raw = to_raw;
6761 *from = to;
6762 *type = intype;
6763 return 0;
6764 }
6765
6766 static int
code_exist_check(OnigCodePoint c,UChar * from,UChar * end,int ignore_escaped,ParseEnv * env)6767 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
6768 ParseEnv* env)
6769 {
6770 int in_esc;
6771 OnigCodePoint code;
6772 OnigEncoding enc = env->enc;
6773 UChar* p = from;
6774
6775 in_esc = 0;
6776 while (! PEND) {
6777 if (ignore_escaped && in_esc) {
6778 in_esc = 0;
6779 }
6780 else {
6781 PFETCH_S(code);
6782 if (code == c) return 1;
6783 if (code == MC_ESC(env->syntax)) in_esc = 1;
6784 }
6785 }
6786 return 0;
6787 }
6788
6789 static int
prs_cc(Node ** np,PToken * tok,UChar ** src,UChar * end,ParseEnv * env)6790 prs_cc(Node** np, PToken* tok, UChar** src, UChar* end, ParseEnv* env)
6791 {
6792 int r, neg, len, fetched, and_start;
6793 OnigCodePoint in_code, curr_code;
6794 UChar *p;
6795 Node* node;
6796 CClassNode *cc, *prev_cc;
6797 CClassNode work_cc;
6798 int curr_raw, in_raw;
6799 CSTATE state;
6800 CVAL in_type;
6801 CVAL curr_type;
6802
6803 *np = NULL_NODE;
6804 INC_PARSE_DEPTH(env->parse_depth);
6805
6806 state = CS_START;
6807 prev_cc = (CClassNode* )NULL;
6808 r = fetch_token_cc(tok, src, end, env, state);
6809 if (r == TK_CHAR && tok->u.code == (OnigCodePoint )'^' && tok->escaped == 0) {
6810 neg = 1;
6811 r = fetch_token_cc(tok, src, end, env, state);
6812 }
6813 else {
6814 neg = 0;
6815 }
6816
6817 if (r < 0) return r;
6818 if (r == TK_CC_CLOSE) {
6819 if (! code_exist_check((OnigCodePoint )']',
6820 *src, env->pattern_end, 1, env))
6821 return ONIGERR_EMPTY_CHAR_CLASS;
6822
6823 CC_ESC_WARN(env, (UChar* )"]");
6824 r = tok->type = TK_CHAR; /* allow []...] */
6825 }
6826
6827 *np = node = node_new_cclass();
6828 CHECK_NULL_RETURN_MEMERR(node);
6829 cc = CCLASS_(node);
6830
6831 and_start = 0;
6832 curr_type = CV_UNDEF;
6833
6834 p = *src;
6835 while (r != TK_CC_CLOSE) {
6836 fetched = 0;
6837 switch (r) {
6838 case TK_CHAR:
6839 any_char_in:
6840 len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.code);
6841 if (len < 0) {
6842 r = len;
6843 goto err;
6844 }
6845 in_type = (len == 1) ? CV_SB : CV_MB;
6846 in_code = tok->u.code;
6847 in_raw = 0;
6848 goto val_entry2;
6849 break;
6850
6851 case TK_CRUDE_BYTE:
6852 /* tok->base_num != 0 : octal or hexadec. */
6853 if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base_num != 0) {
6854 int i, j;
6855 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
6856 UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
6857 UChar* psave = p;
6858 int base_num = tok->base_num;
6859
6860 buf[0] = tok->u.byte;
6861 for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
6862 r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE);
6863 if (r < 0) goto err;
6864 if (r != TK_CRUDE_BYTE || tok->base_num != base_num) {
6865 fetched = 1;
6866 break;
6867 }
6868 buf[i] = tok->u.byte;
6869 }
6870
6871 if (i < ONIGENC_MBC_MINLEN(env->enc)) {
6872 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
6873 goto err;
6874 }
6875
6876 /* clear buf tail */
6877 for (j = i; j < ONIGENC_CODE_TO_MBC_MAXLEN; j++) buf[j] = '\0';
6878
6879 len = enclen(env->enc, buf);
6880 if (i < len) {
6881 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
6882 goto err;
6883 }
6884 else if (i > len) { /* fetch back */
6885 p = psave;
6886 for (i = 1; i < len; i++) {
6887 r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE);
6888 if (r < 0) goto err;
6889 }
6890 fetched = 0;
6891 }
6892
6893 if (i == 1) {
6894 in_code = (OnigCodePoint )buf[0];
6895 goto crude_single;
6896 }
6897 else {
6898 in_code = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
6899 in_type = CV_MB;
6900 }
6901 }
6902 else {
6903 in_code = (OnigCodePoint )tok->u.byte;
6904 crude_single:
6905 in_type = CV_SB;
6906 }
6907 in_raw = 1;
6908 goto val_entry2;
6909 break;
6910
6911 case TK_CODE_POINT:
6912 in_code = tok->u.code;
6913 in_raw = 1;
6914 val_entry:
6915 len = ONIGENC_CODE_TO_MBCLEN(env->enc, in_code);
6916 if (len < 0) {
6917 if (state != CS_RANGE ||
6918 ! IS_SYNTAX_BV(env->syntax,
6919 ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC) ||
6920 in_code < 0x100 || ONIGENC_MBC_MAXLEN(env->enc) == 1) {
6921 r = len;
6922 goto err;
6923 }
6924 }
6925 in_type = (len == 1 ? CV_SB : CV_MB);
6926 val_entry2:
6927 r = cc_char_next(cc, &curr_code, in_code, &curr_raw, in_raw, in_type,
6928 &curr_type, &state, env);
6929 if (r != 0) goto err;
6930 break;
6931
6932 case TK_CC_POSIX_BRACKET_OPEN:
6933 r = prs_posix_bracket(cc, &p, end, env);
6934 if (r < 0) goto err;
6935 if (r == 1) { /* is not POSIX bracket */
6936 CC_ESC_WARN(env, (UChar* )"[");
6937 p = tok->backp;
6938 in_code = tok->u.code;
6939 in_raw = 0;
6940 goto val_entry;
6941 }
6942 goto next_cprop;
6943 break;
6944
6945 case TK_CHAR_TYPE:
6946 r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
6947 if (r != 0) goto err;
6948
6949 next_cprop:
6950 r = cc_cprop_next(cc, &curr_code, &curr_type, &state, env);
6951 if (r != 0) goto err;
6952 break;
6953
6954 case TK_CHAR_PROPERTY:
6955 {
6956 int ctype = fetch_char_property_to_ctype(&p, end, env);
6957 if (ctype < 0) {
6958 r = ctype;
6959 goto err;
6960 }
6961 r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
6962 if (r != 0) goto err;
6963 goto next_cprop;
6964 }
6965 break;
6966
6967 case TK_CC_RANGE:
6968 if (state == CS_VALUE) {
6969 r = fetch_token_cc(tok, &p, end, env, CS_RANGE);
6970 if (r < 0) goto err;
6971
6972 fetched = 1;
6973 if (r == TK_CC_CLOSE) { /* allow [x-] */
6974 range_end_val:
6975 in_code = (OnigCodePoint )'-';
6976 in_raw = 0;
6977 goto val_entry;
6978 }
6979 else if (r == TK_CC_AND) {
6980 CC_ESC_WARN(env, (UChar* )"-");
6981 goto range_end_val;
6982 }
6983
6984 if (curr_type == CV_CPROP) {
6985 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
6986 goto err;
6987 }
6988
6989 state = CS_RANGE;
6990 }
6991 else if (state == CS_START) {
6992 /* [-xa] is allowed */
6993 in_code = tok->u.code;
6994 in_raw = 0;
6995
6996 r = fetch_token_cc(tok, &p, end, env, CS_VALUE);
6997 if (r < 0) goto err;
6998
6999 fetched = 1;
7000 /* [--x] or [a&&-x] is warned. */
7001 if (r == TK_CC_RANGE || and_start != 0)
7002 CC_ESC_WARN(env, (UChar* )"-");
7003
7004 goto val_entry;
7005 }
7006 else if (state == CS_RANGE) {
7007 CC_ESC_WARN(env, (UChar* )"-");
7008 goto any_char_in; /* [!--] is allowed */
7009 }
7010 else { /* CS_COMPLETE */
7011 r = fetch_token_cc(tok, &p, end, env, CS_VALUE);
7012 if (r < 0) goto err;
7013
7014 fetched = 1;
7015 if (r == TK_CC_CLOSE)
7016 goto range_end_val; /* allow [a-b-] */
7017 else if (r == TK_CC_AND) {
7018 CC_ESC_WARN(env, (UChar* )"-");
7019 goto range_end_val;
7020 }
7021
7022 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
7023 CC_ESC_WARN(env, (UChar* )"-");
7024 goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */
7025 }
7026 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
7027 goto err;
7028 }
7029 break;
7030
7031 case TK_CC_OPEN_CC: /* [ */
7032 {
7033 Node *anode;
7034 CClassNode* acc;
7035
7036 if (state == CS_VALUE) {
7037 r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
7038 &state, env);
7039 if (r != 0) goto err;
7040 }
7041 state = CS_COMPLETE;
7042
7043 r = prs_cc(&anode, tok, &p, end, env);
7044 if (r != 0) {
7045 onig_node_free(anode);
7046 goto cc_open_err;
7047 }
7048 acc = CCLASS_(anode);
7049 r = or_cclass(cc, acc, env->enc);
7050 onig_node_free(anode);
7051
7052 cc_open_err:
7053 if (r != 0) goto err;
7054 }
7055 break;
7056
7057 case TK_CC_AND: /* && */
7058 {
7059 if (state == CS_VALUE) {
7060 r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
7061 &state, env);
7062 if (r != 0) goto err;
7063 }
7064 /* initialize local variables */
7065 and_start = 1;
7066 state = CS_START;
7067
7068 if (IS_NOT_NULL(prev_cc)) {
7069 r = and_cclass(prev_cc, cc, env->enc);
7070 if (r != 0) goto err;
7071 bbuf_free(cc->mbuf);
7072 }
7073 else {
7074 prev_cc = cc;
7075 cc = &work_cc;
7076 }
7077 initialize_cclass(cc);
7078 }
7079 break;
7080
7081 case TK_EOT:
7082 r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
7083 goto err;
7084 break;
7085 default:
7086 r = ONIGERR_PARSER_BUG;
7087 goto err;
7088 break;
7089 }
7090
7091 if (fetched)
7092 r = tok->type;
7093 else {
7094 r = fetch_token_cc(tok, &p, end, env, state);
7095 if (r < 0) goto err;
7096 }
7097 }
7098
7099 if (state == CS_VALUE) {
7100 r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
7101 &state, env);
7102 if (r != 0) goto err;
7103 }
7104
7105 if (IS_NOT_NULL(prev_cc)) {
7106 r = and_cclass(prev_cc, cc, env->enc);
7107 if (r != 0) goto err;
7108 bbuf_free(cc->mbuf);
7109 cc = prev_cc;
7110 }
7111
7112 if (neg != 0)
7113 NCCLASS_SET_NOT(cc);
7114 else
7115 NCCLASS_CLEAR_NOT(cc);
7116 if (IS_NCCLASS_NOT(cc) &&
7117 IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
7118 int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
7119 if (is_empty != 0)
7120 BITSET_IS_EMPTY(cc->bs, is_empty);
7121
7122 if (is_empty == 0) {
7123 if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
7124 if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
7125 BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
7126 else
7127 add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
7128 }
7129 }
7130 }
7131 *src = p;
7132 DEC_PARSE_DEPTH(env->parse_depth);
7133 return 0;
7134
7135 err:
7136 if (cc != CCLASS_(*np))
7137 bbuf_free(cc->mbuf);
7138 return r;
7139 }
7140
7141 static int prs_alts(Node** top, PToken* tok, int term,
7142 UChar** src, UChar* end, ParseEnv* env, int group_head);
7143
7144 #ifdef USE_CALLOUT
7145
7146 /* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */
7147 static int
prs_callout_of_contents(Node ** np,int cterm,UChar ** src,UChar * end,ParseEnv * env)7148 prs_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end,
7149 ParseEnv* env)
7150 {
7151 int r;
7152 int i;
7153 int in;
7154 int num;
7155 OnigCodePoint c;
7156 UChar* code_start;
7157 UChar* code_end;
7158 UChar* contents;
7159 UChar* tag_start;
7160 UChar* tag_end;
7161 int brace_nest;
7162 CalloutListEntry* e;
7163 RegexExt* ext;
7164 OnigEncoding enc = env->enc;
7165 UChar* p = *src;
7166
7167 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7168
7169 brace_nest = 0;
7170 while (PPEEK_IS('{')) {
7171 brace_nest++;
7172 PINC_S;
7173 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7174 }
7175
7176 in = ONIG_CALLOUT_IN_PROGRESS;
7177 code_start = p;
7178 while (1) {
7179 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7180
7181 code_end = p;
7182 PFETCH_S(c);
7183 if (c == '}') {
7184 i = brace_nest;
7185 while (i > 0) {
7186 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7187 PFETCH_S(c);
7188 if (c == '}') i--;
7189 else break;
7190 }
7191 if (i == 0) break;
7192 }
7193 }
7194
7195 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7196
7197 PFETCH_S(c);
7198 if (c == '[') {
7199 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7200 tag_end = tag_start = p;
7201 while (! PEND) {
7202 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7203 tag_end = p;
7204 PFETCH_S(c);
7205 if (c == ']') break;
7206 }
7207 if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
7208 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
7209
7210 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7211 PFETCH_S(c);
7212 }
7213 else {
7214 tag_start = tag_end = 0;
7215 }
7216
7217 if (c == 'X') {
7218 in |= ONIG_CALLOUT_IN_RETRACTION;
7219 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7220 PFETCH_S(c);
7221 }
7222 else if (c == '<') {
7223 in = ONIG_CALLOUT_IN_RETRACTION;
7224 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7225 PFETCH_S(c);
7226 }
7227 else if (c == '>') { /* no needs (default) */
7228 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7229 PFETCH_S(c);
7230 }
7231
7232 if (c != cterm)
7233 return ONIGERR_INVALID_CALLOUT_PATTERN;
7234
7235 r = reg_callout_list_entry(env, &num);
7236 if (r != 0) return r;
7237
7238 ext = onig_get_regex_ext(env->reg);
7239 CHECK_NULL_RETURN_MEMERR(ext);
7240 if (IS_NULL(ext->pattern)) {
7241 r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
7242 if (r != ONIG_NORMAL) return r;
7243 }
7244
7245 if (tag_start != tag_end) {
7246 r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
7247 if (r != ONIG_NORMAL) return r;
7248 }
7249
7250 contents = onigenc_strdup(enc, code_start, code_end);
7251 CHECK_NULL_RETURN_MEMERR(contents);
7252
7253 e = onig_reg_callout_list_at(env->reg, num);
7254 if (IS_NULL(e)) {
7255 xfree(contents);
7256 return ONIGERR_MEMORY;
7257 }
7258
7259 r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);
7260 if (r != 0) {
7261 xfree(contents);
7262 return r;
7263 }
7264
7265 e->of = ONIG_CALLOUT_OF_CONTENTS;
7266 e->in = in;
7267 e->name_id = ONIG_NON_NAME_ID;
7268 e->u.content.start = contents;
7269 e->u.content.end = contents + (code_end - code_start);
7270
7271 *src = p;
7272 return 0;
7273 }
7274
7275 static long
prs_long(OnigEncoding enc,UChar * s,UChar * end,int sign_on,long max,long * rl)7276 prs_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)
7277 {
7278 long v;
7279 long d;
7280 int flag;
7281 UChar* p;
7282 OnigCodePoint c;
7283
7284 if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;
7285
7286 flag = 1;
7287 v = 0;
7288 p = s;
7289 while (p < end) {
7290 c = ONIGENC_MBC_TO_CODE(enc, p, end);
7291 p += ONIGENC_MBC_ENC_LEN(enc, p);
7292 if (c >= '0' && c <= '9') {
7293 d = (long )(c - '0');
7294 if (v > (max - d) / 10)
7295 return ONIGERR_INVALID_CALLOUT_ARG;
7296
7297 v = v * 10 + d;
7298 }
7299 else if (sign_on != 0 && (c == '-' || c == '+')) {
7300 if (c == '-') flag = -1;
7301 }
7302 else
7303 return ONIGERR_INVALID_CALLOUT_ARG;
7304
7305 sign_on = 0;
7306 }
7307
7308 *rl = flag * v;
7309 return ONIG_NORMAL;
7310 }
7311
7312 static void
clear_callout_args(int n,unsigned int types[],OnigValue vals[])7313 clear_callout_args(int n, unsigned int types[], OnigValue vals[])
7314 {
7315 int i;
7316
7317 for (i = 0; i < n; i++) {
7318 switch (types[i]) {
7319 case ONIG_TYPE_STRING:
7320 if (IS_NOT_NULL(vals[i].s.start))
7321 xfree(vals[i].s.start);
7322 break;
7323 default:
7324 break;
7325 }
7326 }
7327 }
7328
7329 static int
prs_callout_args(int skip_mode,int cterm,UChar ** src,UChar * end,int max_arg_num,unsigned int types[],OnigValue vals[],ParseEnv * env)7330 prs_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
7331 int max_arg_num, unsigned int types[], OnigValue vals[],
7332 ParseEnv* env)
7333 {
7334 #define MAX_CALLOUT_ARG_BYTE_LENGTH 128
7335
7336 int r;
7337 int n;
7338 int esc;
7339 int cn;
7340 UChar* s;
7341 UChar* e;
7342 UChar* eesc;
7343 OnigCodePoint c;
7344 UChar* bufend;
7345 UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];
7346 OnigEncoding enc = env->enc;
7347 UChar* p = *src;
7348
7349 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7350
7351 c = 0;
7352 n = 0;
7353 while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {
7354 cn = 0;
7355 esc = 0;
7356 eesc = 0;
7357 bufend = buf;
7358 s = e = p;
7359 while (1) {
7360 if (PEND) {
7361 r = ONIGERR_INVALID_CALLOUT_PATTERN;
7362 goto err_clear;
7363 }
7364
7365 e = p;
7366 PFETCH_S(c);
7367 if (esc != 0) {
7368 esc = 0;
7369 if (c == '\\' || c == cterm || c == ',') {
7370 /* */
7371 }
7372 else {
7373 e = eesc;
7374 cn++;
7375 }
7376 goto add_char;
7377 }
7378 else {
7379 if (c == '\\') {
7380 esc = 1;
7381 eesc = e;
7382 }
7383 else if (c == cterm || c == ',')
7384 break;
7385 else {
7386 size_t clen;
7387
7388 add_char:
7389 if (skip_mode == FALSE) {
7390 clen = p - e;
7391 if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH) {
7392 r = ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */
7393 goto err_clear;
7394 }
7395
7396 xmemcpy(bufend, e, clen);
7397 bufend += clen;
7398 }
7399 cn++;
7400 }
7401 }
7402 }
7403
7404 if (cn != 0) {
7405 if (max_arg_num >= 0 && n >= max_arg_num) {
7406 r = ONIGERR_INVALID_CALLOUT_ARG;
7407 goto err_clear;
7408 }
7409
7410 if (skip_mode == FALSE) {
7411 if ((types[n] & ONIG_TYPE_LONG) != 0) {
7412 int fixed = 0;
7413 if (cn > 0) {
7414 long rl;
7415 r = prs_long(enc, buf, bufend, 1, LONG_MAX, &rl);
7416 if (r == ONIG_NORMAL) {
7417 vals[n].l = rl;
7418 fixed = 1;
7419 types[n] = ONIG_TYPE_LONG;
7420 }
7421 }
7422
7423 if (fixed == 0) {
7424 types[n] = (types[n] & ~ONIG_TYPE_LONG);
7425 if (types[n] == ONIG_TYPE_VOID) {
7426 r = ONIGERR_INVALID_CALLOUT_ARG;
7427 goto err_clear;
7428 }
7429 }
7430 }
7431
7432 switch (types[n]) {
7433 case ONIG_TYPE_LONG:
7434 break;
7435
7436 case ONIG_TYPE_CHAR:
7437 if (cn != 1) {
7438 r = ONIGERR_INVALID_CALLOUT_ARG;
7439 goto err_clear;
7440 }
7441 vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);
7442 break;
7443
7444 case ONIG_TYPE_STRING:
7445 {
7446 UChar* rs = onigenc_strdup(enc, buf, bufend);
7447 if (IS_NULL(rs)) {
7448 r = ONIGERR_MEMORY; goto err_clear;
7449 }
7450 vals[n].s.start = rs;
7451 vals[n].s.end = rs + (e - s);
7452 }
7453 break;
7454
7455 case ONIG_TYPE_TAG:
7456 if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e)) {
7457 r = ONIGERR_INVALID_CALLOUT_TAG_NAME;
7458 goto err_clear;
7459 }
7460
7461 vals[n].s.start = s;
7462 vals[n].s.end = e;
7463 break;
7464
7465 case ONIG_TYPE_VOID:
7466 case ONIG_TYPE_POINTER:
7467 r = ONIGERR_PARSER_BUG;
7468 goto err_clear;
7469 break;
7470 }
7471 }
7472
7473 n++;
7474 }
7475
7476 if (c == cterm) break;
7477 }
7478
7479 if (c != cterm) {
7480 r = ONIGERR_INVALID_CALLOUT_PATTERN;
7481 goto err_clear;
7482 }
7483
7484 *src = p;
7485 return n;
7486
7487 err_clear:
7488 if (skip_mode == FALSE)
7489 clear_callout_args(n, types, vals);
7490 return r;
7491 }
7492
7493 /* (*name[TAG]) (*name[TAG]{a,b,..}) */
7494 static int
prs_callout_of_name(Node ** np,int cterm,UChar ** src,UChar * end,ParseEnv * env)7495 prs_callout_of_name(Node** np, int cterm, UChar** src, UChar* end,
7496 ParseEnv* env)
7497 {
7498 int r;
7499 int i;
7500 int in;
7501 int num;
7502 int name_id;
7503 int arg_num;
7504 int max_arg_num;
7505 int opt_arg_num;
7506 int is_not_single;
7507 OnigCodePoint c;
7508 UChar* name_start;
7509 UChar* name_end;
7510 UChar* tag_start;
7511 UChar* tag_end;
7512 Node* node;
7513 CalloutListEntry* e;
7514 RegexExt* ext;
7515 unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];
7516 OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM];
7517 OnigEncoding enc = env->enc;
7518 UChar* p = *src;
7519
7520 /* PFETCH_READY; */
7521 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7522
7523 node = 0;
7524 name_start = p;
7525 while (1) {
7526 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7527 name_end = p;
7528 PFETCH_S(c);
7529 if (c == cterm || c == '[' || c == '{') break;
7530 }
7531
7532 if (! is_allowed_callout_name(enc, name_start, name_end))
7533 return ONIGERR_INVALID_CALLOUT_NAME;
7534
7535 if (c == '[') {
7536 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7537 tag_end = tag_start = p;
7538 while (! PEND) {
7539 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7540 tag_end = p;
7541 PFETCH_S(c);
7542 if (c == ']') break;
7543 }
7544 if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
7545 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
7546
7547 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7548 PFETCH_S(c);
7549 }
7550 else {
7551 tag_start = tag_end = 0;
7552 }
7553
7554 if (c == '{') {
7555 UChar* save;
7556
7557 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7558
7559 /* read for single check only */
7560 save = p;
7561 arg_num = prs_callout_args(TRUE, '}', &p, end, -1, NULL, NULL, env);
7562 if (arg_num < 0) return arg_num;
7563
7564 is_not_single = PPEEK_IS(cterm) ? 0 : 1;
7565 p = save;
7566 r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
7567 &name_id);
7568 if (r != ONIG_NORMAL) return r;
7569
7570 max_arg_num = get_callout_arg_num_by_name_id(name_id);
7571 for (i = 0; i < max_arg_num; i++) {
7572 types[i] = get_callout_arg_type_by_name_id(name_id, i);
7573 }
7574
7575 arg_num = prs_callout_args(FALSE, '}', &p, end, max_arg_num, types, vals, env);
7576 if (arg_num < 0) return arg_num;
7577
7578 if (PEND) {
7579 r = ONIGERR_END_PATTERN_IN_GROUP;
7580 goto err_clear;
7581 }
7582 PFETCH_S(c);
7583 }
7584 else {
7585 arg_num = 0;
7586
7587 is_not_single = 0;
7588 r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
7589 &name_id);
7590 if (r != ONIG_NORMAL) return r;
7591
7592 max_arg_num = get_callout_arg_num_by_name_id(name_id);
7593 for (i = 0; i < max_arg_num; i++) {
7594 types[i] = get_callout_arg_type_by_name_id(name_id, i);
7595 }
7596 }
7597
7598 in = onig_get_callout_in_by_name_id(name_id);
7599 opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);
7600 if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num)) {
7601 r = ONIGERR_INVALID_CALLOUT_ARG;
7602 goto err_clear;
7603 }
7604
7605 if (c != cterm) {
7606 r = ONIGERR_INVALID_CALLOUT_PATTERN;
7607 goto err_clear;
7608 }
7609
7610 r = reg_callout_list_entry(env, &num);
7611 if (r != 0) goto err_clear;
7612
7613 ext = onig_get_regex_ext(env->reg);
7614 if (IS_NULL(ext)) {
7615 r = ONIGERR_MEMORY; goto err_clear;
7616 }
7617 if (IS_NULL(ext->pattern)) {
7618 r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
7619 if (r != ONIG_NORMAL) goto err_clear;
7620 }
7621
7622 if (tag_start != tag_end) {
7623 r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
7624 if (r != ONIG_NORMAL) goto err_clear;
7625 }
7626
7627 e = onig_reg_callout_list_at(env->reg, num);
7628 if (IS_NULL(e)) {
7629 r = ONIGERR_MEMORY; goto err_clear;
7630 }
7631
7632 r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);
7633 if (r != ONIG_NORMAL) goto err_clear;
7634
7635 e->of = ONIG_CALLOUT_OF_NAME;
7636 e->in = in;
7637 e->name_id = name_id;
7638 e->type = onig_get_callout_type_by_name_id(name_id);
7639 e->start_func = onig_get_callout_start_func_by_name_id(name_id);
7640 e->end_func = onig_get_callout_end_func_by_name_id(name_id);
7641 e->u.arg.num = max_arg_num;
7642 e->u.arg.passed_num = arg_num;
7643 for (i = 0; i < max_arg_num; i++) {
7644 e->u.arg.types[i] = types[i];
7645 if (i < arg_num)
7646 e->u.arg.vals[i] = vals[i];
7647 else
7648 e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);
7649 }
7650
7651 *np = node;
7652 *src = p;
7653 return 0;
7654
7655 err_clear:
7656 clear_callout_args(arg_num, types, vals);
7657 return r;
7658 }
7659 #endif
7660
7661 static int
prs_bag(Node ** np,PToken * tok,int term,UChar ** src,UChar * end,ParseEnv * env)7662 prs_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
7663 ParseEnv* env)
7664 {
7665 int r, num;
7666 Node *target;
7667 OnigOptionType option;
7668 OnigCodePoint c;
7669 int list_capture;
7670 OnigEncoding enc = env->enc;
7671
7672 UChar* p = *src;
7673 PFETCH_READY;
7674
7675 *np = NULL;
7676 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
7677
7678 option = env->options;
7679 c = PPEEK;
7680 if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
7681 PINC;
7682 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7683
7684 PFETCH(c);
7685 switch (c) {
7686 case ':': /* (?:...) grouping only */
7687 group:
7688 r = fetch_token(tok, &p, end, env);
7689 if (r < 0) return r;
7690 r = prs_alts(np, tok, term, &p, end, env, FALSE);
7691 if (r < 0) return r;
7692 *src = p;
7693 return 1; /* group */
7694 break;
7695
7696 case '=':
7697 *np = node_new_anchor(ANCR_PREC_READ);
7698 break;
7699 case '!': /* preceding read */
7700 *np = node_new_anchor(ANCR_PREC_READ_NOT);
7701 break;
7702 case '>': /* (?>...) stop backtrack */
7703 *np = node_new_bag(BAG_STOP_BACKTRACK);
7704 break;
7705
7706 case '\'':
7707 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7708 goto named_group1;
7709 }
7710 else
7711 return ONIGERR_UNDEFINED_GROUP_OPTION;
7712 break;
7713
7714 case '<': /* look behind (?<=...), (?<!...) */
7715 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
7716 PFETCH(c);
7717 if (c == '=')
7718 *np = node_new_anchor(ANCR_LOOK_BEHIND);
7719 else if (c == '!')
7720 *np = node_new_anchor(ANCR_LOOK_BEHIND_NOT);
7721 else {
7722 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7723 UChar *name;
7724 UChar *name_end;
7725 enum REF_NUM num_type;
7726
7727 PUNFETCH;
7728 c = '<';
7729
7730 named_group1:
7731 list_capture = 0;
7732
7733 #ifdef USE_CAPTURE_HISTORY
7734 named_group2:
7735 #endif
7736 name = p;
7737 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,
7738 &num_type, FALSE);
7739 if (r < 0) return r;
7740
7741 num = scan_env_add_mem_entry(env);
7742 if (num < 0) return num;
7743 if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)
7744 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
7745
7746 r = name_add(env->reg, name, name_end, num, env);
7747 if (r != 0) return r;
7748 *np = node_new_memory(1);
7749 CHECK_NULL_RETURN_MEMERR(*np);
7750 BAG_(*np)->m.regnum = num;
7751 if (list_capture != 0)
7752 MEM_STATUS_ON_SIMPLE(env->cap_history, num);
7753 env->num_named++;
7754 }
7755 else {
7756 return ONIGERR_UNDEFINED_GROUP_OPTION;
7757 }
7758 }
7759 break;
7760
7761 case '~':
7762 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {
7763 Node* absent;
7764 Node* expr;
7765 int head_bar;
7766 int is_range_cutter;
7767
7768 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7769
7770 if (PPEEK_IS('|')) { /* (?~|generator|absent) */
7771 PINC;
7772 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7773
7774 head_bar = 1;
7775 if (PPEEK_IS(')')) { /* (?~|) : range clear */
7776 PINC;
7777 r = make_range_clear(np, env);
7778 if (r != 0) return r;
7779 goto end;
7780 }
7781 }
7782 else
7783 head_bar = 0;
7784
7785 r = fetch_token(tok, &p, end, env);
7786 if (r < 0) return r;
7787 r = prs_alts(&absent, tok, term, &p, end, env, TRUE);
7788 if (r < 0) {
7789 onig_node_free(absent);
7790 return r;
7791 }
7792
7793 expr = NULL_NODE;
7794 is_range_cutter = 0;
7795 if (head_bar != 0) {
7796 Node* top = absent;
7797 if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) {
7798 expr = NULL_NODE;
7799 is_range_cutter = 1;
7800 /* return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; */
7801 }
7802 else {
7803 absent = NODE_CAR(top);
7804 expr = NODE_CDR(top);
7805 NODE_CAR(top) = NULL_NODE;
7806 NODE_CDR(top) = NULL_NODE;
7807 onig_node_free(top);
7808 if (IS_NULL(NODE_CDR(expr))) {
7809 top = expr;
7810 expr = NODE_CAR(top);
7811 NODE_CAR(top) = NULL_NODE;
7812 onig_node_free(top);
7813 }
7814 }
7815 }
7816
7817 r = make_absent_tree(np, absent, expr, is_range_cutter, env);
7818 if (r != 0) {
7819 return r;
7820 }
7821 goto end;
7822 }
7823 else {
7824 return ONIGERR_UNDEFINED_GROUP_OPTION;
7825 }
7826 break;
7827
7828 #ifdef USE_CALLOUT
7829 case '{':
7830 if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))
7831 return ONIGERR_UNDEFINED_GROUP_OPTION;
7832
7833 r = prs_callout_of_contents(np, ')', &p, end, env);
7834 if (r != 0) return r;
7835
7836 goto end;
7837 break;
7838 #endif
7839
7840 case '(':
7841 /* (?()...) */
7842 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {
7843 UChar *prev;
7844 Node* condition;
7845 int condition_is_checker;
7846
7847 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7848 PFETCH(c);
7849 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7850
7851 if (IS_CODE_DIGIT_ASCII(enc, c)
7852 || c == '-' || c == '+' || c == '<' || c == '\'') {
7853 #ifdef USE_BACKREF_WITH_LEVEL
7854 int exist_level;
7855 int level;
7856 #endif
7857 UChar* name_end;
7858 int back_num;
7859 enum REF_NUM num_type;
7860 int is_enclosed;
7861
7862 is_enclosed = (c == '<' || c == '\'') ? 1 : 0;
7863 if (! is_enclosed)
7864 PUNFETCH;
7865 prev = p;
7866 #ifdef USE_BACKREF_WITH_LEVEL
7867 exist_level = 0;
7868 name_end = NULL_UCHARP; /* no need. escape gcc warning. */
7869 r = fetch_name_with_level(
7870 (OnigCodePoint )(is_enclosed != 0 ? c : '('),
7871 &p, end, &name_end,
7872 env, &back_num, &level, &num_type);
7873 if (r == 1) exist_level = 1;
7874 #else
7875 r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),
7876 &p, end, &name_end, env, &back_num, &num_type, TRUE);
7877 #endif
7878 if (r < 0) {
7879 if (is_enclosed == 0) {
7880 goto any_condition;
7881 }
7882 else
7883 return r;
7884 }
7885
7886 condition_is_checker = 1;
7887 if (num_type != IS_NOT_NUM) {
7888 if (num_type == IS_REL_NUM) {
7889 back_num = backref_rel_to_abs(back_num, env);
7890 }
7891 if (back_num <= 0)
7892 return ONIGERR_INVALID_BACKREF;
7893
7894 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
7895 if (back_num > env->num_mem ||
7896 IS_NULL(PARSEENV_MEMENV(env)[back_num].mem_node))
7897 return ONIGERR_INVALID_BACKREF;
7898 }
7899
7900 condition = node_new_backref_checker(1, &back_num, FALSE,
7901 #ifdef USE_BACKREF_WITH_LEVEL
7902 exist_level, level,
7903 #endif
7904 env);
7905 }
7906 else {
7907 int num;
7908 int* backs;
7909
7910 num = name_to_group_numbers(env, prev, name_end, &backs);
7911 if (num <= 0) {
7912 return ONIGERR_UNDEFINED_NAME_REFERENCE;
7913 }
7914 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
7915 int i;
7916 for (i = 0; i < num; i++) {
7917 if (backs[i] > env->num_mem ||
7918 IS_NULL(PARSEENV_MEMENV(env)[backs[i]].mem_node))
7919 return ONIGERR_INVALID_BACKREF;
7920 }
7921 }
7922
7923 condition = node_new_backref_checker(num, backs, TRUE,
7924 #ifdef USE_BACKREF_WITH_LEVEL
7925 exist_level, level,
7926 #endif
7927 env);
7928 }
7929
7930 if (is_enclosed != 0) {
7931 if (PEND) goto err_if_else;
7932 PFETCH(c);
7933 if (c != ')') goto err_if_else;
7934 }
7935 }
7936 #ifdef USE_CALLOUT
7937 else if (c == '?') {
7938 if (IS_SYNTAX_OP2(env->syntax,
7939 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {
7940 if (! PEND && PPEEK_IS('{')) {
7941 /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */
7942 condition_is_checker = 0;
7943 PFETCH(c);
7944 r = prs_callout_of_contents(&condition, ')', &p, end, env);
7945 if (r != 0) return r;
7946 goto end_condition;
7947 }
7948 }
7949 goto any_condition;
7950 }
7951 else if (c == '*' &&
7952 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
7953 condition_is_checker = 0;
7954 r = prs_callout_of_name(&condition, ')', &p, end, env);
7955 if (r != 0) return r;
7956 goto end_condition;
7957 }
7958 #endif
7959 else {
7960 any_condition:
7961 PUNFETCH;
7962 condition_is_checker = 0;
7963 r = fetch_token(tok, &p, end, env);
7964 if (r < 0) return r;
7965 r = prs_alts(&condition, tok, term, &p, end, env, FALSE);
7966 if (r < 0) {
7967 onig_node_free(condition);
7968 return r;
7969 }
7970 }
7971
7972 #ifdef USE_CALLOUT
7973 end_condition:
7974 #endif
7975 CHECK_NULL_RETURN_MEMERR(condition);
7976
7977 if (PEND) {
7978 err_if_else:
7979 onig_node_free(condition);
7980 return ONIGERR_END_PATTERN_IN_GROUP;
7981 }
7982
7983 if (PPEEK_IS(')')) { /* case: empty body: make backref checker */
7984 if (condition_is_checker == 0) {
7985 onig_node_free(condition);
7986 return ONIGERR_INVALID_IF_ELSE_SYNTAX;
7987 }
7988 PFETCH(c);
7989 *np = condition;
7990 }
7991 else { /* if-else */
7992 int then_is_empty;
7993 Node *Then, *Else;
7994
7995 Then = 0;
7996 if (PPEEK_IS('|')) {
7997 PFETCH(c);
7998 then_is_empty = 1;
7999 }
8000 else
8001 then_is_empty = 0;
8002
8003 r = fetch_token(tok, &p, end, env);
8004 if (r < 0) {
8005 onig_node_free(condition);
8006 return r;
8007 }
8008 r = prs_alts(&target, tok, term, &p, end, env, TRUE);
8009 if (r < 0) {
8010 onig_node_free(condition);
8011 onig_node_free(target);
8012 return r;
8013 }
8014
8015 if (then_is_empty != 0) {
8016 Else = target;
8017 }
8018 else {
8019 if (NODE_TYPE(target) == NODE_ALT) {
8020 Then = NODE_CAR(target);
8021 if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) {
8022 Else = NODE_CAR(NODE_CDR(target));
8023 cons_node_free_alone(NODE_CDR(target));
8024 }
8025 else {
8026 Else = NODE_CDR(target);
8027 }
8028 cons_node_free_alone(target);
8029 }
8030 else {
8031 Then = target;
8032 Else = 0;
8033 }
8034 }
8035
8036 *np = node_new_bag_if_else(condition, Then, Else);
8037 if (IS_NULL(*np)) {
8038 onig_node_free(condition);
8039 onig_node_free(Then);
8040 onig_node_free(Else);
8041 return ONIGERR_MEMORY;
8042 }
8043 }
8044 goto end;
8045 }
8046 else {
8047 return ONIGERR_UNDEFINED_GROUP_OPTION;
8048 }
8049 break;
8050
8051 #ifdef USE_CAPTURE_HISTORY
8052 case '@':
8053 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
8054 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
8055 PFETCH(c);
8056 if (c == '<' || c == '\'') {
8057 list_capture = 1;
8058 goto named_group2; /* (?@<name>...) */
8059 }
8060 PUNFETCH;
8061 }
8062
8063 *np = node_new_memory(0);
8064 CHECK_NULL_RETURN_MEMERR(*np);
8065 num = scan_env_add_mem_entry(env);
8066 if (num < 0) {
8067 return num;
8068 }
8069 else if (num >= (int )MEM_STATUS_BITS_NUM) {
8070 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
8071 }
8072 BAG_(*np)->m.regnum = num;
8073 MEM_STATUS_ON_SIMPLE(env->cap_history, num);
8074 }
8075 else {
8076 return ONIGERR_UNDEFINED_GROUP_OPTION;
8077 }
8078 break;
8079 #endif
8080
8081 case 'P':
8082 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAME)) {
8083 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8084 PFETCH(c);
8085 if (c == '<') goto named_group1;
8086
8087 return ONIGERR_UNDEFINED_GROUP_OPTION;
8088 }
8089 /* else fall */
8090 case 'W': case 'D': case 'S':
8091 case 'y':
8092 if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8093 return ONIGERR_UNDEFINED_GROUP_OPTION;
8094 /* else fall */
8095
8096 #ifdef USE_POSIXLINE_OPTION
8097 case 'p':
8098 #endif
8099 case 'a':
8100 case '-': case 'i': case 'm': case 's': case 'x':
8101 {
8102 int neg = 0;
8103
8104 while (1) {
8105 switch (c) {
8106 case ':':
8107 case ')':
8108 break;
8109
8110 case '-': neg = 1; break;
8111 case 'x': OPTION_NEGATE(option, ONIG_OPTION_EXTEND, neg); break;
8112 case 'i': OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;
8113 case 's':
8114 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
8115 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);
8116 }
8117 else
8118 return ONIGERR_UNDEFINED_GROUP_OPTION;
8119 break;
8120
8121 case 'm':
8122 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
8123 OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? TRUE : FALSE));
8124 }
8125 else if (IS_SYNTAX_OP2(env->syntax,
8126 ONIG_SYN_OP2_OPTION_ONIGURUMA|ONIG_SYN_OP2_OPTION_RUBY)) {
8127 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);
8128 }
8129 else
8130 return ONIGERR_UNDEFINED_GROUP_OPTION;
8131 break;
8132 #ifdef USE_POSIXLINE_OPTION
8133 case 'p':
8134 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
8135 break;
8136 #endif
8137 case 'W':
8138 if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8139 return ONIGERR_UNDEFINED_GROUP_OPTION;
8140 OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg);
8141 break;
8142 case 'D':
8143 if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8144 return ONIGERR_UNDEFINED_GROUP_OPTION;
8145 OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg);
8146 break;
8147 case 'S':
8148 if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8149 return ONIGERR_UNDEFINED_GROUP_OPTION;
8150 OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg);
8151 break;
8152 case 'P':
8153 if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8154 return ONIGERR_UNDEFINED_GROUP_OPTION;
8155 OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg);
8156 break;
8157
8158 case 'y': /* y{g}, y{w} */
8159 {
8160 if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8161 return ONIGERR_UNDEFINED_GROUP_OPTION;
8162
8163 if (neg != 0) return ONIGERR_UNDEFINED_GROUP_OPTION;
8164
8165 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8166 if (! PPEEK_IS('{')) return ONIGERR_UNDEFINED_GROUP_OPTION;
8167 PFETCH(c);
8168 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8169 PFETCH(c);
8170 switch (c) {
8171 case 'g':
8172 if (! ONIGENC_IS_UNICODE_ENCODING(enc))
8173 return ONIGERR_UNDEFINED_GROUP_OPTION;
8174
8175 OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, FALSE);
8176 OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, TRUE);
8177 break;
8178 #ifdef USE_UNICODE_WORD_BREAK
8179 case 'w':
8180 if (! ONIGENC_IS_UNICODE_ENCODING(enc))
8181 return ONIGERR_UNDEFINED_GROUP_OPTION;
8182
8183 OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, FALSE);
8184 OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, TRUE);
8185 break;
8186 #endif
8187 default:
8188 return ONIGERR_UNDEFINED_GROUP_OPTION;
8189 break;
8190 }
8191 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8192 PFETCH(c);
8193 if (c != '}')
8194 return ONIGERR_UNDEFINED_GROUP_OPTION;
8195 } /* case 'y' */
8196 break;
8197
8198 case 'a':
8199 if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_PYTHON))
8200 return ONIGERR_UNDEFINED_GROUP_OPTION;
8201
8202 OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg);
8203 break;
8204
8205 default:
8206 return ONIGERR_UNDEFINED_GROUP_OPTION;
8207 }
8208
8209 if (c == ')') {
8210 *np = node_new_option(option);
8211 CHECK_NULL_RETURN_MEMERR(*np);
8212 *src = p;
8213 return 2; /* option only */
8214 }
8215 else if (c == ':') {
8216 OnigOptionType prev = env->options;
8217
8218 env->options = option;
8219 r = fetch_token(tok, &p, end, env);
8220 if (r < 0) return r;
8221 r = prs_alts(&target, tok, term, &p, end, env, FALSE);
8222 env->options = prev;
8223 if (r < 0) {
8224 onig_node_free(target);
8225 return r;
8226 }
8227 *np = node_new_option(option);
8228 CHECK_NULL_RETURN_MEMERR(*np);
8229 NODE_BODY(*np) = target;
8230 *src = p;
8231 return 0;
8232 }
8233
8234 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8235 PFETCH(c);
8236 } /* while (1) */
8237 }
8238 break;
8239
8240 default:
8241 return ONIGERR_UNDEFINED_GROUP_OPTION;
8242 }
8243 }
8244 #ifdef USE_CALLOUT
8245 else if (c == '*' &&
8246 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
8247 PINC;
8248 r = prs_callout_of_name(np, ')', &p, end, env);
8249 if (r != 0) return r;
8250
8251 goto end;
8252 }
8253 #endif
8254 else {
8255 if (OPTON_DONT_CAPTURE_GROUP(env->options))
8256 goto group;
8257
8258 *np = node_new_memory(0);
8259 CHECK_NULL_RETURN_MEMERR(*np);
8260 num = scan_env_add_mem_entry(env);
8261 if (num < 0) return num;
8262 BAG_(*np)->m.regnum = num;
8263 }
8264
8265 CHECK_NULL_RETURN_MEMERR(*np);
8266 r = fetch_token(tok, &p, end, env);
8267 if (r < 0) return r;
8268 r = prs_alts(&target, tok, term, &p, end, env, FALSE);
8269 if (r < 0) {
8270 onig_node_free(target);
8271 return r;
8272 }
8273
8274 NODE_BODY(*np) = target;
8275
8276 if (NODE_TYPE(*np) == NODE_BAG) {
8277 if (BAG_(*np)->type == BAG_MEMORY) {
8278 /* Don't move this to previous of prs_alts() */
8279 r = scan_env_set_mem_node(env, BAG_(*np)->m.regnum, *np);
8280 if (r != 0) return r;
8281 }
8282 }
8283
8284 end:
8285 *src = p;
8286 return 0;
8287 }
8288
8289 static const char* PopularQStr[] = {
8290 "?", "*", "+", "??", "*?", "+?"
8291 };
8292
8293 static const char* ReduceQStr[] = {
8294 "", "", "*", "*?", "??", "+ and ??", "+? and ?"
8295 };
8296
8297 static int
assign_quantifier_body(Node * qnode,Node * target,int group,ParseEnv * env)8298 assign_quantifier_body(Node* qnode, Node* target, int group, ParseEnv* env)
8299 {
8300 QuantNode* qn;
8301
8302 qn = QUANT_(qnode);
8303 if (qn->lower == 1 && qn->upper == 1)
8304 return 1;
8305
8306 switch (NODE_TYPE(target)) {
8307 case NODE_STRING:
8308 if (group == 0) {
8309 if (str_node_can_be_split(target, env->enc)) {
8310 Node* n = str_node_split_last_char(target, env->enc);
8311 if (IS_NOT_NULL(n)) {
8312 NODE_BODY(qnode) = n;
8313 return 2;
8314 }
8315 }
8316 }
8317 break;
8318
8319 case NODE_QUANT:
8320 { /* check redundant double repeat. */
8321 /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
8322 QuantNode* qnt = QUANT_(target);
8323 int nestq_num = quantifier_type_num(qn);
8324 int targetq_num = quantifier_type_num(qnt);
8325
8326 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
8327 if (targetq_num >= 0 && nestq_num >= 0 &&
8328 IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
8329 UChar buf[WARN_BUFSIZE];
8330
8331 switch(ReduceTypeTable[targetq_num][nestq_num]) {
8332 case RQ_ASIS:
8333 break;
8334
8335 case RQ_DEL:
8336 if (onig_verb_warn != onig_null_warn) {
8337 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
8338 env->pattern, env->pattern_end,
8339 (UChar* )"redundant nested repeat operator");
8340 (*onig_verb_warn)((char* )buf);
8341 }
8342 goto warn_exit;
8343 break;
8344
8345 default:
8346 if (onig_verb_warn != onig_null_warn) {
8347 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
8348 env->pattern, env->pattern_end,
8349 (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
8350 PopularQStr[targetq_num], PopularQStr[nestq_num],
8351 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
8352 (*onig_verb_warn)((char* )buf);
8353 }
8354 goto warn_exit;
8355 break;
8356 }
8357 }
8358
8359 warn_exit:
8360 #endif
8361 if (targetq_num >= 0 && nestq_num < 0) {
8362 if (targetq_num == 1 || targetq_num == 2) { /* * or + */
8363 /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
8364 if (! IS_INFINITE_REPEAT(qn->upper) && qn->upper > 1 && qn->greedy) {
8365 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
8366 }
8367 }
8368 }
8369 else {
8370 int r;
8371
8372 NODE_BODY(qnode) = target;
8373 r = onig_reduce_nested_quantifier(qnode);
8374 return r;
8375 }
8376 }
8377 break;
8378
8379 default:
8380 break;
8381 }
8382
8383 NODE_BODY(qnode) = target;
8384 return 0;
8385 }
8386
8387
8388 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
8389 static int
clear_not_flag_cclass(CClassNode * cc,OnigEncoding enc)8390 clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
8391 {
8392 BBuf *tbuf;
8393 int r;
8394
8395 if (IS_NCCLASS_NOT(cc)) {
8396 bitset_invert(cc->bs);
8397
8398 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
8399 r = not_code_range_buf(enc, cc->mbuf, &tbuf);
8400 if (r != 0) return r;
8401
8402 bbuf_free(cc->mbuf);
8403 cc->mbuf = tbuf;
8404 }
8405
8406 NCCLASS_CLEAR_NOT(cc);
8407 }
8408
8409 return 0;
8410 }
8411 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
8412
8413 #define ADD_CODE_INTO_CC(cc, code, enc) do {\
8414 if (ONIGENC_MBC_MINLEN(enc) > 1 || ONIGENC_CODE_TO_MBCLEN(enc, code) != 1) {\
8415 add_code_range_to_buf(&((cc)->mbuf), code, code);\
8416 }\
8417 else {\
8418 BITSET_SET_BIT((cc)->bs, code);\
8419 }\
8420 } while (0)
8421
8422 extern int
onig_new_cclass_with_code_list(Node ** rnode,OnigEncoding enc,int n,OnigCodePoint codes[])8423 onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc,
8424 int n, OnigCodePoint codes[])
8425 {
8426 int i;
8427 Node* node;
8428 CClassNode* cc;
8429
8430 *rnode = NULL_NODE;
8431
8432 node = node_new_cclass();
8433 CHECK_NULL_RETURN_MEMERR(node);
8434
8435 cc = CCLASS_(node);
8436
8437 for (i = 0; i < n; i++) {
8438 ADD_CODE_INTO_CC(cc, codes[i], enc);
8439 }
8440
8441 *rnode = node;
8442 return 0;
8443 }
8444
8445 typedef struct {
8446 ParseEnv* env;
8447 CClassNode* cc;
8448 Node* alt_root;
8449 Node** ptail;
8450 } IApplyCaseFoldArg;
8451
8452 static int
i_apply_case_fold(OnigCodePoint from,OnigCodePoint to[],int to_len,void * arg)8453 i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len,
8454 void* arg)
8455 {
8456 IApplyCaseFoldArg* iarg;
8457 ParseEnv* env;
8458 OnigEncoding enc;
8459 CClassNode* cc;
8460
8461 iarg = (IApplyCaseFoldArg* )arg;
8462 env = iarg->env;
8463 cc = iarg->cc;
8464 enc = env->enc;
8465
8466 if (to_len == 1) {
8467 int is_in = onig_is_code_in_cc(enc, from, cc);
8468 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
8469 if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
8470 (is_in == 0 && IS_NCCLASS_NOT(cc))) {
8471 ADD_CODE_INTO_CC(cc, *to, enc);
8472 }
8473 #else
8474 if (is_in != 0) {
8475 if (ONIGENC_MBC_MINLEN(enc) > 1 ||
8476 ONIGENC_CODE_TO_MBCLEN(enc, *to) != 1) {
8477 if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc);
8478 add_code_range(&(cc->mbuf), env, *to, *to);
8479 }
8480 else {
8481 if (IS_NCCLASS_NOT(cc)) {
8482 BITSET_CLEAR_BIT(cc->bs, *to);
8483 }
8484 else
8485 BITSET_SET_BIT(cc->bs, *to);
8486 }
8487 }
8488 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
8489 }
8490 else {
8491 int r, i, len;
8492 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
8493
8494 if (onig_is_code_in_cc(enc, from, cc)
8495 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
8496 && !IS_NCCLASS_NOT(cc)
8497 #endif
8498 ) {
8499 int n, j, m, index;
8500 Node* list_node;
8501 Node* ns[3];
8502
8503 n = 0;
8504 for (i = 0; i < to_len; i++) {
8505 OnigCodePoint code;
8506 Node* csnode;
8507 CClassNode* cs_cc;
8508
8509 index = 0;
8510 if (ONIGENC_IS_UNICODE_ENCODING(enc) &&
8511 (index = onigenc_unicode_fold1_key(&to[i])) >= 0) {
8512 csnode = node_new_cclass();
8513 cs_cc = CCLASS_(csnode);
8514 if (IS_NULL(csnode)) {
8515 err_free_ns:
8516 for (j = 0; j < n; j++) onig_node_free(ns[j]);
8517 return ONIGERR_MEMORY;
8518 }
8519 m = FOLDS1_UNFOLDS_NUM(index);
8520 for (j = 0; j < m; j++) {
8521 code = FOLDS1_UNFOLDS(index)[j];
8522 ADD_CODE_INTO_CC(cs_cc, code, enc);
8523 }
8524 ADD_CODE_INTO_CC(cs_cc, to[i], enc);
8525 ns[n++] = csnode;
8526 }
8527 else {
8528 len = ONIGENC_CODE_TO_MBC(enc, to[i], buf);
8529 if (n == 0 || NODE_TYPE(ns[n-1]) != NODE_STRING) {
8530 csnode = node_new_str(buf, buf + len);
8531 if (IS_NULL(csnode)) goto err_free_ns;
8532
8533 if (index == 0)
8534 NODE_STATUS_ADD(csnode, IGNORECASE);
8535 else
8536 NODE_STRING_SET_CASE_EXPANDED(csnode);
8537
8538 ns[n++] = csnode;
8539 }
8540 else {
8541 r = onig_node_str_cat(ns[n-1], buf, buf + len);
8542 if (r < 0) goto err_free_ns;
8543 }
8544 }
8545 }
8546
8547 if (n == 1)
8548 list_node = ns[0];
8549 else
8550 list_node = make_list(n, ns);
8551
8552 *(iarg->ptail) = onig_node_new_alt(list_node, NULL_NODE);
8553 if (IS_NULL(*(iarg->ptail))) {
8554 onig_node_free(list_node);
8555 return ONIGERR_MEMORY;
8556 }
8557 iarg->ptail = &(NODE_CDR((*(iarg->ptail))));
8558 }
8559 }
8560
8561 return 0;
8562 }
8563
8564 static int
prs_exp(Node ** np,PToken * tok,int term,UChar ** src,UChar * end,ParseEnv * env,int group_head)8565 prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
8566 ParseEnv* env, int group_head)
8567 {
8568 int r, len, group;
8569 Node* qn;
8570 Node** tp;
8571 unsigned int parse_depth;
8572
8573 retry:
8574 group = 0;
8575 *np = NULL;
8576 if (tok->type == (enum TokenSyms )term)
8577 goto end_of_token;
8578
8579 parse_depth = env->parse_depth;
8580
8581 switch (tok->type) {
8582 case TK_ALT:
8583 case TK_EOT:
8584 end_of_token:
8585 *np = node_new_empty();
8586 CHECK_NULL_RETURN_MEMERR(*np);
8587 return tok->type;
8588 break;
8589
8590 case TK_SUBEXP_OPEN:
8591 r = prs_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env);
8592 if (r < 0) return r;
8593 if (r == 1) { /* group */
8594 if (group_head == 0)
8595 group = 1;
8596 else {
8597 Node* target = *np;
8598 *np = node_new_group(target);
8599 if (IS_NULL(*np)) {
8600 onig_node_free(target);
8601 return ONIGERR_MEMORY;
8602 }
8603 group = 2;
8604 }
8605 }
8606 else if (r == 2) { /* option only */
8607 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH)) {
8608 env->options = BAG_(*np)->o.options;
8609 r = fetch_token(tok, src, end, env);
8610 if (r < 0) return r;
8611 onig_node_free(*np);
8612 goto retry;
8613 }
8614 else {
8615 Node* target;
8616 OnigOptionType prev = env->options;
8617
8618 env->options = BAG_(*np)->o.options;
8619 r = fetch_token(tok, src, end, env);
8620 if (r < 0) return r;
8621 r = prs_alts(&target, tok, term, src, end, env, FALSE);
8622 env->options = prev;
8623 if (r < 0) {
8624 onig_node_free(target);
8625 return r;
8626 }
8627 NODE_BODY(*np) = target;
8628 }
8629 return tok->type;
8630 }
8631 break;
8632
8633 case TK_SUBEXP_CLOSE:
8634 if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
8635 return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
8636
8637 if (tok->escaped) goto tk_crude_byte;
8638 else goto tk_byte;
8639 break;
8640
8641 case TK_STRING:
8642 tk_byte:
8643 {
8644 *np = node_new_str_with_options(tok->backp, *src, env->options);
8645 CHECK_NULL_RETURN_MEMERR(*np);
8646
8647 while (1) {
8648 r = fetch_token(tok, src, end, env);
8649 if (r < 0) return r;
8650 if (r != TK_STRING) break;
8651
8652 r = onig_node_str_cat(*np, tok->backp, *src);
8653 if (r < 0) return r;
8654 }
8655
8656 string_end:
8657 tp = np;
8658 goto repeat;
8659 }
8660 break;
8661
8662 case TK_CRUDE_BYTE:
8663 tk_crude_byte:
8664 {
8665 *np = node_new_str_crude_char(tok->u.byte, env->options);
8666 CHECK_NULL_RETURN_MEMERR(*np);
8667 len = 1;
8668 while (1) {
8669 if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
8670 if (len == enclen(env->enc, STR_(*np)->s)) {
8671 r = fetch_token(tok, src, end, env);
8672 goto tk_crude_byte_end;
8673 }
8674 }
8675
8676 r = fetch_token(tok, src, end, env);
8677 if (r < 0) return r;
8678 if (r != TK_CRUDE_BYTE)
8679 return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
8680
8681 r = node_str_cat_char(*np, tok->u.byte);
8682 if (r < 0) return r;
8683
8684 len++;
8685 }
8686
8687 tk_crude_byte_end:
8688 if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, STR_(*np)->s, STR_(*np)->end))
8689 return ONIGERR_INVALID_WIDE_CHAR_VALUE;
8690
8691 NODE_STRING_CLEAR_CRUDE(*np);
8692 goto string_end;
8693 }
8694 break;
8695
8696 case TK_CODE_POINT:
8697 {
8698 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
8699 len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.code);
8700 if (len < 0) return len;
8701 len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
8702 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
8703 *np = node_new_str_crude(buf, buf + len, env->options);
8704 #else
8705 *np = node_new_str_with_options(buf, buf + len, env->options);
8706 #endif
8707 CHECK_NULL_RETURN_MEMERR(*np);
8708 }
8709 break;
8710
8711 case TK_QUOTE_OPEN:
8712 {
8713 OnigCodePoint end_op[2];
8714 UChar *qstart, *qend, *nextp;
8715
8716 end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
8717 end_op[1] = (OnigCodePoint )'E';
8718 qstart = *src;
8719 qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
8720 if (IS_NULL(qend)) {
8721 nextp = qend = end;
8722 }
8723 *np = node_new_str_with_options(qstart, qend, env->options);
8724 CHECK_NULL_RETURN_MEMERR(*np);
8725 *src = nextp;
8726 }
8727 break;
8728
8729 case TK_CHAR_TYPE:
8730 {
8731 switch (tok->u.prop.ctype) {
8732 case ONIGENC_CTYPE_WORD:
8733 *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);
8734 CHECK_NULL_RETURN_MEMERR(*np);
8735 break;
8736
8737 case ONIGENC_CTYPE_SPACE:
8738 case ONIGENC_CTYPE_DIGIT:
8739 case ONIGENC_CTYPE_XDIGIT:
8740 {
8741 CClassNode* cc;
8742
8743 *np = node_new_cclass();
8744 CHECK_NULL_RETURN_MEMERR(*np);
8745 cc = CCLASS_(*np);
8746 r = add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env);
8747 if (r != 0) {
8748 onig_node_free(*np);
8749 *np = NULL_NODE;
8750 return r;
8751 }
8752 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
8753 }
8754 break;
8755
8756 default:
8757 return ONIGERR_PARSER_BUG;
8758 break;
8759 }
8760 }
8761 break;
8762
8763 case TK_CHAR_PROPERTY:
8764 r = prs_char_property(np, tok, src, end, env);
8765 if (r != 0) return r;
8766 break;
8767
8768 case TK_OPEN_CC:
8769 {
8770 CClassNode* cc;
8771
8772 r = prs_cc(np, tok, src, end, env);
8773 if (r != 0) return r;
8774
8775 cc = CCLASS_(*np);
8776 if (OPTON_IGNORECASE(env->options)) {
8777 IApplyCaseFoldArg iarg;
8778
8779 iarg.env = env;
8780 iarg.cc = cc;
8781 iarg.alt_root = NULL_NODE;
8782 iarg.ptail = &(iarg.alt_root);
8783
8784 r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
8785 i_apply_case_fold, &iarg);
8786 if (r != 0) {
8787 onig_node_free(iarg.alt_root);
8788 return r;
8789 }
8790 if (IS_NOT_NULL(iarg.alt_root)) {
8791 Node* work = onig_node_new_alt(*np, iarg.alt_root);
8792 if (IS_NULL(work)) {
8793 onig_node_free(iarg.alt_root);
8794 return ONIGERR_MEMORY;
8795 }
8796 *np = work;
8797 }
8798 }
8799 }
8800 break;
8801
8802 case TK_ANYCHAR:
8803 *np = node_new_anychar(env->options);
8804 CHECK_NULL_RETURN_MEMERR(*np);
8805 break;
8806
8807 case TK_ANYCHAR_ANYTIME:
8808 *np = node_new_anychar(env->options);
8809 CHECK_NULL_RETURN_MEMERR(*np);
8810 qn = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
8811 CHECK_NULL_RETURN_MEMERR(qn);
8812 NODE_BODY(qn) = *np;
8813 *np = qn;
8814 break;
8815
8816 case TK_BACKREF:
8817 len = tok->u.backref.num;
8818 *np = node_new_backref(len,
8819 (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
8820 tok->u.backref.by_name,
8821 #ifdef USE_BACKREF_WITH_LEVEL
8822 tok->u.backref.exist_level,
8823 tok->u.backref.level,
8824 #endif
8825 env);
8826 CHECK_NULL_RETURN_MEMERR(*np);
8827 break;
8828
8829 #ifdef USE_CALL
8830 case TK_CALL:
8831 {
8832 int gnum = tok->u.call.gnum;
8833
8834 *np = node_new_call(tok->u.call.name, tok->u.call.name_end,
8835 gnum, tok->u.call.by_number);
8836 CHECK_NULL_RETURN_MEMERR(*np);
8837 env->num_call++;
8838 if (tok->u.call.by_number != 0 && gnum == 0) {
8839 env->has_call_zero = 1;
8840 }
8841 }
8842 break;
8843 #endif
8844
8845 case TK_ANCHOR:
8846 *np = node_new_anchor_with_options(tok->u.anchor, env->options);
8847 CHECK_NULL_RETURN_MEMERR(*np);
8848 break;
8849
8850 case TK_REPEAT:
8851 case TK_INTERVAL:
8852 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
8853 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
8854 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
8855 else {
8856 *np = node_new_empty();
8857 CHECK_NULL_RETURN_MEMERR(*np);
8858 }
8859 }
8860 else {
8861 goto tk_byte;
8862 }
8863 break;
8864
8865 case TK_KEEP:
8866 r = node_new_keep(np, env);
8867 if (r < 0) return r;
8868 break;
8869
8870 case TK_GENERAL_NEWLINE:
8871 r = node_new_general_newline(np, env);
8872 if (r < 0) return r;
8873 break;
8874
8875 case TK_NO_NEWLINE:
8876 r = node_new_no_newline(np, env);
8877 if (r < 0) return r;
8878 break;
8879
8880 case TK_TRUE_ANYCHAR:
8881 r = node_new_true_anychar(np);
8882 if (r < 0) return r;
8883 break;
8884
8885 case TK_TEXT_SEGMENT:
8886 r = make_text_segment(np, env);
8887 if (r < 0) return r;
8888 break;
8889
8890 default:
8891 return ONIGERR_PARSER_BUG;
8892 break;
8893 }
8894
8895 {
8896 tp = np;
8897
8898 re_entry:
8899 r = fetch_token(tok, src, end, env);
8900 if (r < 0) return r;
8901
8902 repeat:
8903 if (r == TK_REPEAT || r == TK_INTERVAL) {
8904 Node* target;
8905
8906 if (is_invalid_quantifier_target(*tp))
8907 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
8908
8909 INC_PARSE_DEPTH(parse_depth);
8910
8911 qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
8912 r == TK_INTERVAL);
8913 CHECK_NULL_RETURN_MEMERR(qn);
8914 QUANT_(qn)->greedy = tok->u.repeat.greedy;
8915 if (group == 2) {
8916 target = node_drop_group(*tp);
8917 *tp = NULL_NODE;
8918 }
8919 else {
8920 target = *tp;
8921 }
8922 r = assign_quantifier_body(qn, target, group, env);
8923 if (r < 0) {
8924 onig_node_free(qn);
8925 *tp = NULL_NODE;
8926 return r;
8927 }
8928
8929 if (tok->u.repeat.possessive != 0) {
8930 Node* en;
8931 en = node_new_bag(BAG_STOP_BACKTRACK);
8932 if (IS_NULL(en)) {
8933 onig_node_free(qn);
8934 return ONIGERR_MEMORY;
8935 }
8936 NODE_BODY(en) = qn;
8937 qn = en;
8938 }
8939
8940 if (r == 0) {
8941 *tp = qn;
8942 }
8943 else if (r == 1) { /* x{1,1} ==> x */
8944 onig_node_free(qn);
8945 *tp = target;
8946 }
8947 else if (r == 2) { /* split case: /abc+/ */
8948 Node *tmp;
8949
8950 *tp = node_new_list(*tp, NULL);
8951 if (IS_NULL(*tp)) {
8952 onig_node_free(qn);
8953 return ONIGERR_MEMORY;
8954 }
8955 tmp = NODE_CDR(*tp) = node_new_list(qn, NULL);
8956 if (IS_NULL(tmp)) {
8957 onig_node_free(qn);
8958 return ONIGERR_MEMORY;
8959 }
8960 tp = &(NODE_CAR(tmp));
8961 }
8962 group = 0;
8963 goto re_entry;
8964 }
8965 }
8966
8967 return r;
8968 }
8969
8970 static int
prs_branch(Node ** top,PToken * tok,int term,UChar ** src,UChar * end,ParseEnv * env,int group_head)8971 prs_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
8972 ParseEnv* env, int group_head)
8973 {
8974 int r;
8975 Node *node, **headp;
8976
8977 *top = NULL;
8978 INC_PARSE_DEPTH(env->parse_depth);
8979
8980 r = prs_exp(&node, tok, term, src, end, env, group_head);
8981 if (r < 0) {
8982 onig_node_free(node);
8983 return r;
8984 }
8985
8986 if (r == TK_EOT || r == term || r == TK_ALT) {
8987 *top = node;
8988 }
8989 else {
8990 *top = node_new_list(node, NULL);
8991 if (IS_NULL(*top)) {
8992 onig_node_free(node);
8993 return ONIGERR_MEMORY;
8994 }
8995
8996 headp = &(NODE_CDR(*top));
8997 while (r != TK_EOT && r != term && r != TK_ALT) {
8998 r = prs_exp(&node, tok, term, src, end, env, FALSE);
8999 if (r < 0) {
9000 onig_node_free(node);
9001 return r;
9002 }
9003
9004 if (NODE_TYPE(node) == NODE_LIST) {
9005 *headp = node;
9006 while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node);
9007 headp = &(NODE_CDR(node));
9008 }
9009 else {
9010 *headp = node_new_list(node, NULL);
9011 headp = &(NODE_CDR(*headp));
9012 }
9013 }
9014 }
9015
9016 DEC_PARSE_DEPTH(env->parse_depth);
9017 return r;
9018 }
9019
9020 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
9021 static int
prs_alts(Node ** top,PToken * tok,int term,UChar ** src,UChar * end,ParseEnv * env,int group_head)9022 prs_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
9023 ParseEnv* env, int group_head)
9024 {
9025 int r;
9026 Node *node, **headp;
9027 OnigOptionType save_options;
9028
9029 *top = NULL;
9030 INC_PARSE_DEPTH(env->parse_depth);
9031 save_options = env->options;
9032
9033 r = prs_branch(&node, tok, term, src, end, env, group_head);
9034 if (r < 0) {
9035 onig_node_free(node);
9036 return r;
9037 }
9038
9039 if (r == term) {
9040 *top = node;
9041 }
9042 else if (r == TK_ALT) {
9043 *top = onig_node_new_alt(node, NULL);
9044 if (IS_NULL(*top)) {
9045 onig_node_free(node);
9046 return ONIGERR_MEMORY;
9047 }
9048
9049 headp = &(NODE_CDR(*top));
9050 while (r == TK_ALT) {
9051 r = fetch_token(tok, src, end, env);
9052 if (r < 0) return r;
9053 r = prs_branch(&node, tok, term, src, end, env, FALSE);
9054 if (r < 0) {
9055 onig_node_free(node);
9056 return r;
9057 }
9058 *headp = onig_node_new_alt(node, NULL);
9059 if (IS_NULL(*headp)) {
9060 onig_node_free(node);
9061 onig_node_free(*top);
9062 return ONIGERR_MEMORY;
9063 }
9064
9065 headp = &(NODE_CDR(*headp));
9066 }
9067
9068 if (tok->type != (enum TokenSyms )term)
9069 goto err;
9070 }
9071 else {
9072 onig_node_free(node);
9073 err:
9074 if (term == TK_SUBEXP_CLOSE)
9075 return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
9076 else
9077 return ONIGERR_PARSER_BUG;
9078 }
9079
9080 env->options = save_options;
9081 DEC_PARSE_DEPTH(env->parse_depth);
9082 return r;
9083 }
9084
9085 static int
prs_regexp(Node ** top,UChar ** src,UChar * end,ParseEnv * env)9086 prs_regexp(Node** top, UChar** src, UChar* end, ParseEnv* env)
9087 {
9088 int r;
9089 PToken tok;
9090
9091 ptoken_init(&tok);
9092 r = fetch_token(&tok, src, end, env);
9093 if (r < 0) return r;
9094 r = prs_alts(top, &tok, TK_EOT, src, end, env, FALSE);
9095 if (r < 0) return r;
9096
9097 return 0;
9098 }
9099
9100 #ifdef USE_CALL
9101 static int
make_call_zero_body(Node * node,ParseEnv * env,Node ** rnode)9102 make_call_zero_body(Node* node, ParseEnv* env, Node** rnode)
9103 {
9104 int r;
9105
9106 Node* x = node_new_memory(0 /* 0: is not named */);
9107 CHECK_NULL_RETURN_MEMERR(x);
9108
9109 NODE_BODY(x) = node;
9110 BAG_(x)->m.regnum = 0;
9111 r = scan_env_set_mem_node(env, 0, x);
9112 if (r != 0) {
9113 onig_node_free(x);
9114 return r;
9115 }
9116
9117 *rnode = x;
9118 return 0;
9119 }
9120 #endif
9121
9122 extern int
onig_parse_tree(Node ** root,const UChar * pattern,const UChar * end,regex_t * reg,ParseEnv * env)9123 onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
9124 regex_t* reg, ParseEnv* env)
9125 {
9126 int r;
9127 UChar* p;
9128 #ifdef USE_CALLOUT
9129 RegexExt* ext;
9130 #endif
9131
9132 reg->string_pool = 0;
9133 reg->string_pool_end = 0;
9134 reg->num_mem = 0;
9135 reg->num_repeat = 0;
9136 reg->num_empty_check = 0;
9137 reg->repeat_range_alloc = 0;
9138 reg->repeat_range = (RepeatRange* )NULL;
9139
9140 names_clear(reg);
9141
9142 scan_env_clear(env);
9143 env->options = reg->options;
9144 env->case_fold_flag = reg->case_fold_flag;
9145 env->enc = reg->enc;
9146 env->syntax = reg->syntax;
9147 env->pattern = (UChar* )pattern;
9148 env->pattern_end = (UChar* )end;
9149 env->reg = reg;
9150
9151 *root = NULL;
9152
9153 if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))
9154 return ONIGERR_INVALID_WIDE_CHAR_VALUE;
9155
9156 p = (UChar* )pattern;
9157 r = prs_regexp(root, &p, (UChar* )end, env);
9158 if (r != 0) return r;
9159
9160 #ifdef USE_CALL
9161 if (env->has_call_zero != 0) {
9162 Node* zero_node;
9163 r = make_call_zero_body(*root, env, &zero_node);
9164 if (r != 0) return r;
9165
9166 *root = zero_node;
9167 }
9168 #endif
9169
9170 reg->num_mem = env->num_mem;
9171
9172 #ifdef USE_CALLOUT
9173 ext = reg->extp;
9174 if (IS_NOT_NULL(ext) && ext->callout_num > 0) {
9175 r = setup_ext_callout_list_values(reg);
9176 }
9177 #endif
9178
9179 return r;
9180 }
9181
9182 extern void
onig_scan_env_set_error_string(ParseEnv * env,int ecode ARG_UNUSED,UChar * arg,UChar * arg_end)9183 onig_scan_env_set_error_string(ParseEnv* env, int ecode ARG_UNUSED,
9184 UChar* arg, UChar* arg_end)
9185 {
9186 env->error = arg;
9187 env->error_end = arg_end;
9188 }
9189