1 #ifndef REGINT_H 2 #define REGINT_H 3 /********************************************************************** 4 regint.h - Oniguruma (regular expression library) 5 **********************************************************************/ 6 /*- 7 * Copyright (c) 2002-2021 K.Kosako 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* for debug */ 33 /* #define ONIG_DEBUG_PARSE */ 34 /* #define ONIG_DEBUG_COMPILE */ 35 /* #define ONIG_DEBUG_SEARCH */ 36 /* #define ONIG_DEBUG_MATCH */ 37 /* #define ONIG_DEBUG_MATCH_COUNTER */ 38 /* #define ONIG_DEBUG_CALL */ 39 /* #define ONIG_DONT_OPTIMIZE */ 40 41 /* for byte-code statistical data. */ 42 /* #define ONIG_DEBUG_STATISTICS */ 43 44 #if defined(ONIG_DEBUG_PARSE) || defined(ONIG_DEBUG_MATCH) || \ 45 defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ 46 defined(ONIG_DEBUG_MATCH_COUNTER) || defined(ONIG_DEBUG_CALL) || \ 47 defined(ONIG_DEBUG_STATISTICS) 48 #ifndef ONIG_DEBUG 49 #define ONIG_DEBUG 50 #define DBGFP stderr 51 #endif 52 #endif 53 54 #ifndef ONIG_DISABLE_DIRECT_THREADING 55 #ifdef __GNUC__ 56 #define USE_GOTO_LABELS_AS_VALUES 57 #endif 58 #endif 59 60 /* config */ 61 /* spec. config */ 62 #define USE_REGSET 63 #define USE_CALL 64 #define USE_CALLOUT 65 #define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */ 66 #define USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */ 67 #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ 68 #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR 69 #define USE_RETRY_LIMIT 70 #ifdef USE_GOTO_LABELS_AS_VALUES 71 #define USE_THREADED_CODE 72 #define USE_DIRECT_THREADED_CODE 73 #endif 74 75 /* internal config */ 76 #define USE_CHECK_VALIDITY_OF_STRING_IN_TREE 77 #define USE_OP_PUSH_OR_JUMP_EXACT 78 #define USE_QUANT_PEEK_NEXT 79 #define USE_ST_LIBRARY 80 #define USE_TIMEOFDAY 81 #define USE_STRICT_POINTER_ADDRESS 82 #define USE_STRICT_POINTER_COMPARISON 83 84 #define USE_WORD_BEGIN_END /* "\<", "\>" */ 85 #define USE_CAPTURE_HISTORY 86 #define USE_VARIABLE_META_CHARS 87 #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 88 /* #define USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */ 89 90 /* enabled by configure --enable-posix-api=yes */ 91 /* #define USE_POSIX_API */ 92 93 #define DEFAULT_PARSE_DEPTH_LIMIT 4096 94 #define INIT_MATCH_STACK_SIZE 160 95 #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ 96 #define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000 97 #define DEFAULT_RETRY_LIMIT_IN_SEARCH 0 /* unlimited */ 98 #define DEFAULT_SUBEXP_CALL_LIMIT_IN_SEARCH 0 /* unlimited */ 99 #define DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL 20 100 101 102 #include "regenc.h" 103 104 #ifndef ONIG_NO_STANDARD_C_HEADERS 105 106 #include <stddef.h> 107 #include <stdarg.h> 108 #include <limits.h> 109 #include <stdlib.h> 110 #include <string.h> 111 #include <ctype.h> 112 113 #ifdef HAVE_STDINT_H 114 #include <stdint.h> 115 #endif 116 117 #if defined(HAVE_ALLOCA_H) && !defined(__GNUC__) 118 #include <alloca.h> 119 #endif 120 121 #ifdef HAVE_SYS_TYPES_H 122 #ifndef __BORLANDC__ 123 #include <sys/types.h> 124 #endif 125 #endif 126 127 #ifdef HAVE_INTTYPES_H 128 #include <inttypes.h> 129 #endif 130 131 #if defined(_WIN32) || defined(__BORLANDC__) 132 #include <malloc.h> 133 #endif 134 135 #if defined(ONIG_DEBUG) || defined(NEED_TO_INCLUDE_STDIO) 136 # include <stdio.h> 137 #endif 138 139 #ifdef ONIG_DEBUG_STATISTICS 140 #ifdef USE_TIMEOFDAY 141 142 #ifdef HAVE_SYS_TIME_H 143 #include <sys/time.h> 144 #endif 145 #ifdef HAVE_UNISTD_H 146 #include <unistd.h> 147 #endif 148 149 #else /* USE_TIMEOFDAY */ 150 151 #ifdef HAVE_SYS_TIMES_H 152 #include <sys/times.h> 153 #endif 154 155 #endif /* USE_TIMEOFDAY */ 156 #endif /* ONIG_DEBUG_STATISTICS */ 157 158 /* I don't think these x....'s need to be included in 159 ONIG_NO_STANDARD_C_HEADERS, but they are required by Issue #170 160 and do so since there is no problem. 161 */ 162 #ifndef xmemset 163 #define xmemset memset 164 #endif 165 166 #ifndef xmemcpy 167 #define xmemcpy memcpy 168 #endif 169 170 #ifndef xmemmove 171 #define xmemmove memmove 172 #endif 173 174 #endif /* ONIG_NO_STANDARD_C_HEADERS */ 175 176 177 #ifdef MIN 178 #undef MIN 179 #endif 180 #ifdef MAX 181 #undef MAX 182 #endif 183 184 #define MIN(a,b) (((a)>(b))?(b):(a)) 185 #define MAX(a,b) (((a)<(b))?(b):(a)) 186 187 #define IS_NULL(p) (((void*)(p)) == (void*)0) 188 #define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) 189 #define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL 190 #define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY 191 #define NULL_UCHARP ((UChar* )0) 192 193 #ifdef USE_STRICT_POINTER_COMPARISON 194 #define PTR_GE(p,q) ((p) != NULL && (p) >= (q)) 195 #else 196 #define PTR_GE(p,q) (p) >= (q) 197 #endif 198 199 #ifndef ONIG_INT_MAX 200 #define ONIG_INT_MAX INT_MAX 201 #endif 202 203 #define CHAR_MAP_SIZE 256 204 #define INFINITE_LEN ONIG_INFINITE_DISTANCE 205 #define STEP_BACK_MAX_CHAR_LEN 65535 /* INT_MAX is too big */ 206 #define LOOK_BEHIND_MAX_CHAR_LEN STEP_BACK_MAX_CHAR_LEN 207 208 /* escape other system UChar definition */ 209 #ifdef ONIG_ESCAPE_UCHAR_COLLISION 210 #undef ONIG_ESCAPE_UCHAR_COLLISION 211 #endif 212 213 #define xmalloc malloc 214 #define xrealloc realloc 215 #define xcalloc calloc 216 #define xfree free 217 218 #define st_init_table onig_st_init_table 219 #define st_init_table_with_size onig_st_init_table_with_size 220 #define st_init_numtable onig_st_init_numtable 221 #define st_init_numtable_with_size onig_st_init_numtable_with_size 222 #define st_init_strtable onig_st_init_strtable 223 #define st_init_strtable_with_size onig_st_init_strtable_with_size 224 #define st_delete onig_st_delete 225 #define st_delete_safe onig_st_delete_safe 226 #define st_insert onig_st_insert 227 #define st_lookup onig_st_lookup 228 #define st_foreach onig_st_foreach 229 #define st_add_direct onig_st_add_direct 230 #define st_free_table onig_st_free_table 231 #define st_cleanup_safe onig_st_cleanup_safe 232 #define st_copy onig_st_copy 233 #define st_nothing_key_clone onig_st_nothing_key_clone 234 #define st_nothing_key_free onig_st_nothing_key_free 235 /* */ 236 #define onig_st_is_member st_is_member 237 238 239 #if defined(_WIN32) && !defined(__GNUC__) 240 241 #ifndef xalloca 242 #define xalloca _alloca 243 #endif 244 #ifndef xvsnprintf 245 #define xvsnprintf(buf,size,fmt,args) _vsnprintf_s(buf,size,_TRUNCATE,fmt,args) 246 #endif 247 #ifndef xsnprintf 248 #define xsnprintf sprintf_s 249 #endif 250 #ifndef xstrcat 251 #define xstrcat(dest,src,size) strcat_s(dest,size,src) 252 #endif 253 254 #else 255 256 #ifndef xalloca 257 #define xalloca alloca 258 #endif 259 #ifndef xvsnprintf 260 #define xvsnprintf vsnprintf 261 #endif 262 #ifndef xsnprintf 263 #define xsnprintf snprintf 264 #endif 265 #ifndef xstrcat 266 #define xstrcat(dest,src,size) strcat(dest,src) 267 #endif 268 269 #endif /* defined(_WIN32) && !defined(__GNUC__) */ 270 271 272 #ifdef _WIN32 273 #ifdef _MSC_VER 274 275 #if _MSC_VER < 1300 276 typedef int intptr_t; 277 typedef unsigned int uintptr_t; 278 #endif 279 280 #if _MSC_VER < 1600 281 typedef __int32 int32_t; 282 typedef unsigned __int32 uint32_t; 283 typedef __int64 int64_t; 284 typedef unsigned __int64 uint64_t; 285 #endif 286 287 #endif 288 #endif /* _WIN32 */ 289 290 #if SIZEOF_VOIDP == SIZEOF_LONG 291 typedef unsigned long hash_data_type; 292 #elif SIZEOF_VOIDP == SIZEOF_LONG_LONG 293 typedef unsigned long long hash_data_type; 294 #endif 295 296 /* strend hash */ 297 typedef void* hash_table_type; 298 299 300 #ifdef USE_CALLOUT 301 302 typedef struct { 303 int flag; 304 OnigCalloutOf of; 305 int in; 306 int name_id; 307 const UChar* tag_start; 308 const UChar* tag_end; 309 OnigCalloutType type; 310 OnigCalloutFunc start_func; 311 OnigCalloutFunc end_func; 312 union { 313 struct { 314 const UChar* start; 315 const UChar* end; 316 } content; 317 struct { 318 int num; 319 int passed_num; 320 OnigType types[ONIG_CALLOUT_MAX_ARGS_NUM]; 321 OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM]; 322 } arg; 323 } u; 324 } CalloutListEntry; 325 326 #endif 327 328 /* stack pop level */ 329 enum StackPopLevel { 330 STACK_POP_LEVEL_FREE = 0, 331 STACK_POP_LEVEL_MEM_START = 1, 332 STACK_POP_LEVEL_ALL = 2 333 }; 334 335 /* optimize flags */ 336 enum OptimizeType { 337 OPTIMIZE_NONE = 0, 338 OPTIMIZE_STR, /* Slow Search */ 339 OPTIMIZE_STR_FAST, /* Sunday quick search / BMH */ 340 OPTIMIZE_STR_FAST_STEP_FORWARD, /* Sunday quick search / BMH */ 341 OPTIMIZE_MAP /* char map */ 342 }; 343 344 /* bit status */ 345 typedef unsigned int MemStatusType; 346 347 #define MEM_STATUS_BITS_NUM (sizeof(MemStatusType) * 8) 348 #define MEM_STATUS_CLEAR(stats) (stats) = 0 349 #define MEM_STATUS_ON_ALL(stats) (stats) = ~((MemStatusType )0) 350 #define MEM_STATUS_AT(stats,n) \ 351 ((n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : ((stats) & 1)) 352 #define MEM_STATUS_AT0(stats,n) \ 353 ((n) > 0 && (n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : ((stats) & 1)) 354 355 #define MEM_STATUS_IS_ALL_ON(stats) (((stats) & 1) != 0) 356 357 #define MEM_STATUS_ON(stats,n) do {\ 358 if ((n) < (int )MEM_STATUS_BITS_NUM) {\ 359 if ((n) != 0)\ 360 (stats) |= ((MemStatusType )1 << (n));\ 361 }\ 362 else\ 363 (stats) |= 1;\ 364 } while (0) 365 366 #define MEM_STATUS_ON_SIMPLE(stats,n) do {\ 367 if ((n) < (int )MEM_STATUS_BITS_NUM)\ 368 (stats) |= ((MemStatusType )1 << (n));\ 369 } while (0) 370 371 #define MEM_STATUS_LIMIT_AT(stats,n) \ 372 ((n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : 0) 373 #define MEM_STATUS_LIMIT_ON(stats,n) do {\ 374 if ((n) < (int )MEM_STATUS_BITS_NUM && (n) != 0) {\ 375 (stats) |= ((MemStatusType )1 << (n));\ 376 }\ 377 } while (0) 378 379 380 #define IS_CODE_WORD_ASCII(enc,code) \ 381 (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) 382 #define IS_CODE_DIGIT_ASCII(enc, code) \ 383 (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_DIGIT(enc,code)) 384 #define IS_CODE_XDIGIT_ASCII(enc, code) \ 385 (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_XDIGIT(enc,code)) 386 387 #define DIGITVAL(code) ((code) - '0') 388 #define ODIGITVAL(code) DIGITVAL(code) 389 #define XDIGITVAL(enc,code) \ 390 (IS_CODE_DIGIT_ASCII(enc,code) ? DIGITVAL(code) \ 391 : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10)) 392 393 #define OPTON_CALLBACK_EACH_MATCH(option) \ 394 ((option) & ONIG_OPTION_CALLBACK_EACH_MATCH) 395 #define OPTON_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) 396 #define OPTON_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) 397 #define OPTON_NEGATE_SINGLELINE(option) ((option) & \ 398 ONIG_OPTION_NEGATE_SINGLELINE) 399 #define OPTON_DONT_CAPTURE_GROUP(option) ((option) & \ 400 ONIG_OPTION_DONT_CAPTURE_GROUP) 401 #define OPTON_CAPTURE_GROUP(option) ((option) & ONIG_OPTION_CAPTURE_GROUP) 402 #define OPTON_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) 403 #define OPTON_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) 404 #define OPTON_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) 405 #define OPTON_CHECK_VALIDITY_OF_STRING(option) ((option) & \ 406 ONIG_OPTION_CHECK_VALIDITY_OF_STRING) 407 #define OPTON_NOT_BEGIN_STRING(option) ((option) & ONIG_OPTION_NOT_BEGIN_STRING) 408 #define OPTON_NOT_END_STRING(option) ((option) & ONIG_OPTION_NOT_END_STRING) 409 #define OPTON_NOT_BEGIN_POSITION(option) ((option) & ONIG_OPTION_NOT_BEGIN_POSITION) 410 411 412 #define INFINITE_REPEAT -1 413 #define IS_INFINITE_REPEAT(n) ((n) == INFINITE_REPEAT) 414 415 /* bitset */ 416 #define BITS_PER_BYTE 8 417 #define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) 418 #define BITS_IN_ROOM 32 /* 4 * BITS_PER_BYTE */ 419 #define BITSET_REAL_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) 420 421 typedef uint32_t Bits; 422 typedef Bits BitSet[BITSET_REAL_SIZE]; 423 typedef Bits* BitSetRef; 424 425 #define SIZE_BITSET sizeof(BitSet) 426 427 #define BITSET_CLEAR(bs) do {\ 428 int i;\ 429 for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { (bs)[i] = 0; } \ 430 } while (0) 431 432 #define BS_ROOM(bs,pos) (bs)[(unsigned int )(pos) >> 5] 433 #define BS_BIT(pos) (1u << ((unsigned int )(pos) & 0x1f)) 434 435 #define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos)) 436 #define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos) 437 #define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos)) 438 #define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos) 439 440 /* has body */ 441 #define ANCR_PREC_READ (1<<0) 442 #define ANCR_PREC_READ_NOT (1<<1) 443 #define ANCR_LOOK_BEHIND (1<<2) 444 #define ANCR_LOOK_BEHIND_NOT (1<<3) 445 /* no body */ 446 #define ANCR_BEGIN_BUF (1<<4) 447 #define ANCR_BEGIN_LINE (1<<5) 448 #define ANCR_BEGIN_POSITION (1<<6) 449 #define ANCR_END_BUF (1<<7) 450 #define ANCR_SEMI_END_BUF (1<<8) 451 #define ANCR_END_LINE (1<<9) 452 #define ANCR_WORD_BOUNDARY (1<<10) 453 #define ANCR_NO_WORD_BOUNDARY (1<<11) 454 #define ANCR_WORD_BEGIN (1<<12) 455 #define ANCR_WORD_END (1<<13) 456 #define ANCR_ANYCHAR_INF (1<<14) 457 #define ANCR_ANYCHAR_INF_ML (1<<15) 458 #define ANCR_TEXT_SEGMENT_BOUNDARY (1<<16) 459 #define ANCR_NO_TEXT_SEGMENT_BOUNDARY (1<<17) 460 461 462 #define ANCHOR_HAS_BODY(a) ((a)->type < ANCR_BEGIN_BUF) 463 464 #define IS_WORD_ANCHOR_TYPE(type) \ 465 ((type) == ANCR_WORD_BOUNDARY || (type) == ANCR_NO_WORD_BOUNDARY || \ 466 (type) == ANCR_WORD_BEGIN || (type) == ANCR_WORD_END) 467 468 /* operation code */ 469 enum OpCode { 470 OP_FINISH = 0, /* matching process terminator (no more alternative) */ 471 OP_END = 1, /* pattern code terminator (success end) */ 472 OP_STR_1 = 2, /* single byte, N = 1 */ 473 OP_STR_2, /* single byte, N = 2 */ 474 OP_STR_3, /* single byte, N = 3 */ 475 OP_STR_4, /* single byte, N = 4 */ 476 OP_STR_5, /* single byte, N = 5 */ 477 OP_STR_N, /* single byte */ 478 OP_STR_MB2N1, /* mb-length = 2 N = 1 */ 479 OP_STR_MB2N2, /* mb-length = 2 N = 2 */ 480 OP_STR_MB2N3, /* mb-length = 2 N = 3 */ 481 OP_STR_MB2N, /* mb-length = 2 */ 482 OP_STR_MB3N, /* mb-length = 3 */ 483 OP_STR_MBN, /* other length */ 484 OP_CCLASS, 485 OP_CCLASS_MB, 486 OP_CCLASS_MIX, 487 OP_CCLASS_NOT, 488 OP_CCLASS_MB_NOT, 489 OP_CCLASS_MIX_NOT, 490 OP_ANYCHAR, /* "." */ 491 OP_ANYCHAR_ML, /* "." multi-line */ 492 OP_ANYCHAR_STAR, /* ".*" */ 493 OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ 494 OP_ANYCHAR_STAR_PEEK_NEXT, 495 OP_ANYCHAR_ML_STAR_PEEK_NEXT, 496 OP_WORD, 497 OP_WORD_ASCII, 498 OP_NO_WORD, 499 OP_NO_WORD_ASCII, 500 OP_WORD_BOUNDARY, 501 OP_NO_WORD_BOUNDARY, 502 OP_WORD_BEGIN, 503 OP_WORD_END, 504 OP_TEXT_SEGMENT_BOUNDARY, 505 OP_BEGIN_BUF, 506 OP_END_BUF, 507 OP_BEGIN_LINE, 508 OP_END_LINE, 509 OP_SEMI_END_BUF, 510 OP_CHECK_POSITION, 511 OP_BACKREF1, 512 OP_BACKREF2, 513 OP_BACKREF_N, 514 OP_BACKREF_N_IC, 515 OP_BACKREF_MULTI, 516 OP_BACKREF_MULTI_IC, 517 #ifdef USE_BACKREF_WITH_LEVEL 518 OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */ 519 OP_BACKREF_WITH_LEVEL_IC, /* \k<xxx+n>, \k<xxx-n> */ 520 #endif 521 OP_BACKREF_CHECK, /* (?(n)), (?('name')) */ 522 #ifdef USE_BACKREF_WITH_LEVEL 523 OP_BACKREF_CHECK_WITH_LEVEL, /* (?(n-level)), (?('name-level')) */ 524 #endif 525 OP_MEM_START, 526 OP_MEM_START_PUSH, /* push back-tracker to stack */ 527 OP_MEM_END_PUSH, /* push back-tracker to stack */ 528 #ifdef USE_CALL 529 OP_MEM_END_PUSH_REC, /* push back-tracker to stack */ 530 #endif 531 OP_MEM_END, 532 #ifdef USE_CALL 533 OP_MEM_END_REC, /* push marker to stack */ 534 #endif 535 OP_FAIL, /* pop stack and move */ 536 OP_JUMP, 537 OP_PUSH, 538 OP_PUSH_SUPER, 539 OP_POP, 540 OP_POP_TO_MARK, 541 #ifdef USE_OP_PUSH_OR_JUMP_EXACT 542 OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ 543 #endif 544 OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ 545 OP_REPEAT, /* {n,m} */ 546 OP_REPEAT_NG, /* {n,m}? (non greedy) */ 547 OP_REPEAT_INC, 548 OP_REPEAT_INC_NG, /* non greedy */ 549 OP_EMPTY_CHECK_START, /* null loop checker start */ 550 OP_EMPTY_CHECK_END, /* null loop checker end */ 551 OP_EMPTY_CHECK_END_MEMST, /* null loop checker end (with capture status) */ 552 #ifdef USE_CALL 553 OP_EMPTY_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ 554 #endif 555 OP_MOVE, 556 OP_STEP_BACK_START, 557 OP_STEP_BACK_NEXT, 558 OP_CUT_TO_MARK, 559 OP_MARK, 560 OP_SAVE_VAL, 561 OP_UPDATE_VAR, 562 #ifdef USE_CALL 563 OP_CALL, /* \g<name> */ 564 OP_RETURN, 565 #endif 566 #ifdef USE_CALLOUT 567 OP_CALLOUT_CONTENTS, /* (?{...}) (?{{...}}) */ 568 OP_CALLOUT_NAME, /* (*name) (*name[tag](args...)) */ 569 #endif 570 }; 571 572 enum SaveType { 573 SAVE_KEEP = 0, /* SAVE S */ 574 SAVE_S = 1, 575 SAVE_RIGHT_RANGE = 2, 576 }; 577 578 enum UpdateVarType { 579 UPDATE_VAR_KEEP_FROM_STACK_LAST = 0, 580 UPDATE_VAR_S_FROM_STACK = 1, 581 UPDATE_VAR_RIGHT_RANGE_FROM_STACK = 2, 582 UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK = 3, 583 UPDATE_VAR_RIGHT_RANGE_TO_S = 4, 584 UPDATE_VAR_RIGHT_RANGE_INIT = 5, 585 }; 586 587 enum CheckPositionType { 588 CHECK_POSITION_SEARCH_START = 0, 589 CHECK_POSITION_CURRENT_RIGHT_RANGE = 1, 590 }; 591 592 enum TextSegmentBoundaryType { 593 EXTENDED_GRAPHEME_CLUSTER_BOUNDARY = 0, 594 WORD_BOUNDARY = 1, 595 }; 596 597 typedef int RelAddrType; 598 typedef int AbsAddrType; 599 typedef int LengthType; 600 typedef int RelPositionType; 601 typedef int RepeatNumType; 602 typedef int MemNumType; 603 typedef void* PointerType; 604 typedef int SaveType; 605 typedef int UpdateVarType; 606 typedef int ModeType; 607 608 #define SIZE_OPCODE 1 609 #define SIZE_RELADDR sizeof(RelAddrType) 610 #define SIZE_ABSADDR sizeof(AbsAddrType) 611 #define SIZE_LENGTH sizeof(LengthType) 612 #define SIZE_MEMNUM sizeof(MemNumType) 613 #define SIZE_REPEATNUM sizeof(RepeatNumType) 614 #define SIZE_OPTION sizeof(OnigOptionType) 615 #define SIZE_CODE_POINT sizeof(OnigCodePoint) 616 #define SIZE_POINTER sizeof(PointerType) 617 #define SIZE_SAVE_TYPE sizeof(SaveType) 618 #define SIZE_UPDATE_VAR_TYPE sizeof(UpdateVarType) 619 #define SIZE_MODE sizeof(ModeType) 620 621 /* code point's address must be aligned address. */ 622 #define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) 623 624 625 /* op-code + arg size */ 626 627 /* for relative address increment to go next op. */ 628 #define SIZE_INC 1 629 630 #define OPSIZE_ANYCHAR_STAR 1 631 #define OPSIZE_ANYCHAR_STAR_PEEK_NEXT 1 632 #define OPSIZE_JUMP 1 633 #define OPSIZE_PUSH 1 634 #define OPSIZE_PUSH_SUPER 1 635 #define OPSIZE_POP 1 636 #define OPSIZE_POP_TO_MARK 1 637 #ifdef USE_OP_PUSH_OR_JUMP_EXACT 638 #define OPSIZE_PUSH_OR_JUMP_EXACT1 1 639 #endif 640 #define OPSIZE_PUSH_IF_PEEK_NEXT 1 641 #define OPSIZE_REPEAT 1 642 #define OPSIZE_REPEAT_INC 1 643 #define OPSIZE_REPEAT_INC_NG 1 644 #define OPSIZE_WORD_BOUNDARY 1 645 #define OPSIZE_BACKREF 1 646 #define OPSIZE_FAIL 1 647 #define OPSIZE_MEM_START 1 648 #define OPSIZE_MEM_START_PUSH 1 649 #define OPSIZE_MEM_END_PUSH 1 650 #define OPSIZE_MEM_END_PUSH_REC 1 651 #define OPSIZE_MEM_END 1 652 #define OPSIZE_MEM_END_REC 1 653 #define OPSIZE_EMPTY_CHECK_START 1 654 #define OPSIZE_EMPTY_CHECK_END 1 655 #define OPSIZE_CHECK_POSITION 1 656 #define OPSIZE_CALL 1 657 #define OPSIZE_RETURN 1 658 #define OPSIZE_MOVE 1 659 #define OPSIZE_STEP_BACK_START 1 660 #define OPSIZE_STEP_BACK_NEXT 1 661 #define OPSIZE_CUT_TO_MARK 1 662 #define OPSIZE_MARK 1 663 #define OPSIZE_SAVE_VAL 1 664 #define OPSIZE_UPDATE_VAR 1 665 666 #ifdef USE_CALLOUT 667 #define OPSIZE_CALLOUT_CONTENTS 1 668 #define OPSIZE_CALLOUT_NAME 1 669 #endif 670 671 672 #define MC_ESC(syn) (syn)->meta_char_table.esc 673 #define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar 674 #define MC_ANYTIME(syn) (syn)->meta_char_table.anytime 675 #define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time 676 #define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time 677 #define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime 678 679 #define IS_MC_ESC_CODE(code, syn) \ 680 ((code) == MC_ESC(syn) && \ 681 !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE)) 682 683 684 #define SYN_POSIX_COMMON_OP \ 685 ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ 686 ONIG_SYN_OP_DECIMAL_BACKREF | \ 687 ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ 688 ONIG_SYN_OP_LINE_ANCHOR | \ 689 ONIG_SYN_OP_ESC_CONTROL_CHARS ) 690 691 #define SYN_GNU_REGEX_OP \ 692 ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ 693 ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ 694 ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ 695 ONIG_SYN_OP_VBAR_ALT | \ 696 ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ 697 ONIG_SYN_OP_QMARK_ZERO_ONE | \ 698 ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ 699 ONIG_SYN_OP_ESC_W_WORD | \ 700 ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ 701 ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ 702 ONIG_SYN_OP_LINE_ANCHOR ) 703 704 #define SYN_GNU_REGEX_BV \ 705 ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ 706 ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ 707 ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) 708 709 710 #define NCCLASS_FLAGS(cc) ((cc)->flags) 711 #define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag)) 712 #define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag)) 713 #define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0) 714 715 /* cclass node */ 716 #define FLAG_NCCLASS_NOT (1<<0) 717 #define FLAG_NCCLASS_SHARE (1<<1) 718 719 #define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT) 720 #define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) 721 #define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) 722 723 724 typedef struct { 725 #ifdef USE_DIRECT_THREADED_CODE 726 const void* opaddr; 727 #else 728 enum OpCode opcode; 729 #endif 730 union { 731 struct { 732 UChar s[16]; /* Now used first 7 bytes only. */ 733 } exact; 734 struct { 735 UChar* s; 736 LengthType n; /* number of chars */ 737 } exact_n; /* EXACTN, EXACTN_IC, EXACTMB2N, EXACTMB3N */ 738 struct { 739 UChar* s; 740 LengthType n; /* number of chars */ 741 LengthType len; /* char byte length */ 742 } exact_len_n; /* EXACTMBN */ 743 struct { 744 BitSetRef bsp; 745 } cclass; 746 struct { 747 void* mb; 748 } cclass_mb; 749 struct { 750 void* mb; /* mb must be same position with cclass_mb for match_at(). */ 751 BitSetRef bsp; 752 } cclass_mix; 753 struct { 754 UChar c; 755 } anychar_star_peek_next; 756 struct { 757 ModeType mode; 758 } word_boundary; /* OP_WORD_BOUNDARY, OP_NO_WORD_BOUNDARY, OP_WORD_BEGIN, OP_WORD_END */ 759 struct { 760 enum TextSegmentBoundaryType type; 761 int not; 762 } text_segment_boundary; 763 struct { 764 enum CheckPositionType type; 765 } check_position; 766 struct { 767 union { 768 MemNumType n1; /* num == 1 */ 769 MemNumType* ns; /* num > 1 */ 770 }; 771 int num; 772 int nest_level; 773 } backref_general; /* BACKREF_MULTI, BACKREF_MULTI_IC, BACKREF_WITH_LEVEL, BACKREF_CHECK, BACKREF_CHECK_WITH_LEVEL, */ 774 struct { 775 MemNumType n1; 776 } backref_n; /* BACKREF_N, BACKREF_N_IC */ 777 struct { 778 MemNumType num; 779 } memory_start; /* MEMORY_START, MEMORY_START_PUSH */ 780 struct { 781 MemNumType num; 782 } memory_end; /* MEMORY_END, MEMORY_END_REC, MEMORY_END_PUSH, MEMORY_END_PUSH_REC */ 783 struct { 784 RelAddrType addr; 785 } jump; 786 struct { 787 RelAddrType addr; 788 } push; 789 struct { 790 RelAddrType addr; 791 UChar c; 792 } push_or_jump_exact1; 793 struct { 794 RelAddrType addr; 795 UChar c; 796 } push_if_peek_next; 797 struct { 798 MemNumType id; 799 } pop_to_mark; 800 struct { 801 MemNumType id; 802 RelAddrType addr; 803 } repeat; /* REPEAT, REPEAT_NG */ 804 struct { 805 MemNumType id; 806 } repeat_inc; /* REPEAT_INC, REPEAT_INC_NG */ 807 struct { 808 MemNumType mem; 809 } empty_check_start; 810 struct { 811 MemNumType mem; 812 MemStatusType empty_status_mem; 813 } empty_check_end; /* EMPTY_CHECK_END, EMPTY_CHECK_END_MEMST, EMPTY_CHECK_END_MEMST_PUSH */ 814 struct { 815 RelAddrType addr; 816 } prec_read_not_start; 817 struct { 818 LengthType len; 819 } look_behind; 820 struct { 821 LengthType len; 822 RelAddrType addr; 823 } look_behind_not_start; 824 struct { 825 RelPositionType n; /* char relative position */ 826 } move; 827 struct { 828 LengthType initial; /* char length */ 829 LengthType remaining; /* char length */ 830 RelAddrType addr; 831 } step_back_start; 832 struct { 833 MemNumType id; 834 int restore_pos; /* flag: restore current string position */ 835 } cut_to_mark; 836 struct { 837 MemNumType id; 838 int save_pos; /* flag: save current string position */ 839 } mark; 840 struct { 841 SaveType type; 842 MemNumType id; 843 } save_val; 844 struct { 845 UpdateVarType type; 846 MemNumType id; 847 int clear; /* UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK or UPDATE_VAR_RIGHT_RANGE_FROM_STACK */ 848 } update_var; 849 struct { 850 AbsAddrType addr; 851 #if defined(ONIG_DEBUG_MATCH_COUNTER) || defined(ONIG_DEBUG_CALL) 852 MemNumType called_mem; 853 #endif 854 } call; 855 #ifdef USE_CALLOUT 856 struct { 857 MemNumType num; 858 } callout_contents; 859 struct { 860 MemNumType num; 861 MemNumType id; 862 } callout_name; 863 #endif 864 }; 865 } Operation; 866 867 typedef struct { 868 const UChar* pattern; 869 const UChar* pattern_end; 870 #ifdef USE_CALLOUT 871 void* tag_table; 872 int callout_num; 873 int callout_list_alloc; 874 CalloutListEntry* callout_list; /* index: callout num */ 875 #endif 876 } RegexExt; 877 878 typedef struct { 879 int lower; 880 int upper; 881 union { 882 Operation* pcode; /* address of repeated body */ 883 int offset; 884 } u; 885 } RepeatRange; 886 887 struct re_pattern_buffer { 888 /* common members of BBuf(bytes-buffer) */ 889 Operation* ops; 890 #ifdef USE_DIRECT_THREADED_CODE 891 enum OpCode* ocs; 892 #endif 893 Operation* ops_curr; 894 unsigned int ops_used; /* used space for ops */ 895 unsigned int ops_alloc; /* allocated space for ops */ 896 unsigned char* string_pool; 897 unsigned char* string_pool_end; 898 899 int num_mem; /* used memory(...) num counted from 1 */ 900 int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ 901 int num_empty_check; /* OP_EMPTY_CHECK_START/END id counter */ 902 int num_call; /* number of subexp call */ 903 MemStatusType capture_history; /* (?@...) flag (1-31) */ 904 MemStatusType push_mem_start; /* need backtrack flag */ 905 MemStatusType push_mem_end; /* need backtrack flag */ 906 int stack_pop_level; 907 int repeat_range_alloc; 908 RepeatRange* repeat_range; 909 910 OnigEncoding enc; 911 OnigOptionType options; 912 OnigSyntaxType* syntax; 913 OnigCaseFoldType case_fold_flag; 914 void* name_table; 915 916 /* optimization info (string search, char-map and anchors) */ 917 int optimize; /* optimize flag */ 918 int threshold_len; /* search str-length for apply optimize */ 919 int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ 920 OnigLen anc_dist_min; /* (SEMI_)END_BUF anchor distance */ 921 OnigLen anc_dist_max; /* (SEMI_)END_BUF anchor distance */ 922 int sub_anchor; /* start-anchor for exact or map */ 923 unsigned char *exact; 924 unsigned char *exact_end; 925 unsigned char map[CHAR_MAP_SIZE]; /* used as BMH skip or char-map */ 926 int map_offset; 927 OnigLen dist_min; /* min-distance of exact or map */ 928 OnigLen dist_max; /* max-distance of exact or map */ 929 RegexExt* extp; 930 }; 931 932 #define COP(reg) ((reg)->ops_curr) 933 #define COP_CURR_OFFSET(reg) ((reg)->ops_used - 1) 934 #define COP_CURR_OFFSET_BYTES(reg, p) \ 935 ((int )((char* )(&((reg)->ops_curr->p)) - (char* )((reg)->ops))) 936 937 938 extern void onig_add_end_call(void (*func)(void)); 939 extern void onig_warning(const char* s); 940 extern UChar* onig_error_code_to_format P_((int code)); 941 extern void ONIG_VARIADIC_FUNC_ATTR onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); 942 extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); 943 extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc)); 944 extern RegexExt* onig_get_regex_ext(regex_t* reg); 945 extern int onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end); 946 extern int onig_positive_int_multiply(int x, int y); 947 extern hash_table_type onig_st_init_strend_table_with_size P_((int size)); 948 extern int onig_st_lookup_strend P_((hash_table_type table, const UChar* str_key, const UChar* end_key, hash_data_type *value)); 949 extern int onig_st_insert_strend P_((hash_table_type table, const UChar* str_key, const UChar* end_key, hash_data_type value)); 950 951 #ifdef ONIG_DEBUG 952 953 #ifdef ONIG_DEBUG_COMPILE 954 extern void onig_print_compiled_byte_code_list(FILE* f, regex_t* reg); 955 #endif 956 957 #ifdef ONIG_DEBUG_STATISTICS 958 extern void onig_statistics_init P_((void)); 959 extern int onig_print_statistics P_((FILE* f)); 960 #endif 961 962 #endif /* ONIG_DEBUG */ 963 964 #ifdef USE_CALLOUT 965 966 extern OnigCalloutType onig_get_callout_type_by_name_id(int name_id); 967 extern OnigCalloutFunc onig_get_callout_start_func_by_name_id(int id); 968 extern OnigCalloutFunc onig_get_callout_end_func_by_name_id(int id); 969 extern int onig_callout_tag_table_free(void* table); 970 extern void onig_free_reg_callout_list(int n, CalloutListEntry* list); 971 extern CalloutListEntry* onig_reg_callout_list_at(regex_t* reg, int num); 972 extern OnigCalloutFunc onig_get_callout_start_func(regex_t* reg, int callout_num); 973 974 /* for definition of builtin callout */ 975 #define BC0_P(name, func) do {\ 976 int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ 977 id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ 978 (UChar* )(name), (UChar* )((name) + len),\ 979 ONIG_CALLOUT_IN_PROGRESS,\ 980 onig_builtin_ ## func, 0, 0, 0, 0, 0);\ 981 if (id < 0) return id;\ 982 } while(0) 983 984 #define BC0_R(name, func) do {\ 985 int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ 986 id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ 987 (UChar* )(name), (UChar* )((name) + len),\ 988 ONIG_CALLOUT_IN_RETRACTION,\ 989 onig_builtin_ ## func, 0, 0, 0, 0, 0);\ 990 if (id < 0) return id;\ 991 } while(0) 992 993 #define BC0_B(name, func) do {\ 994 int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ 995 id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ 996 (UChar* )(name), (UChar* )((name) + len),\ 997 ONIG_CALLOUT_IN_BOTH,\ 998 onig_builtin_ ## func, 0, 0, 0, 0, 0);\ 999 if (id < 0) return id;\ 1000 } while(0) 1001 1002 #define BC_P(name, func, na, ts) do {\ 1003 int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ 1004 id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ 1005 (UChar* )(name), (UChar* )((name) + len),\ 1006 ONIG_CALLOUT_IN_PROGRESS,\ 1007 onig_builtin_ ## func, 0, (na), (ts), 0, 0); \ 1008 if (id < 0) return id;\ 1009 } while(0) 1010 1011 #define BC_P_O(name, func, nts, ts, nopts, opts) do {\ 1012 int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ 1013 id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ 1014 (UChar* )(name), (UChar* )((name) + len),\ 1015 ONIG_CALLOUT_IN_PROGRESS,\ 1016 onig_builtin_ ## func, 0, (nts), (ts), (nopts), (opts));\ 1017 if (id < 0) return id;\ 1018 } while(0) 1019 1020 #define BC_B(name, func, na, ts) do {\ 1021 int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ 1022 id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ 1023 (UChar* )(name), (UChar* )((name) + len),\ 1024 ONIG_CALLOUT_IN_BOTH,\ 1025 onig_builtin_ ## func, 0, (na), (ts), 0, 0);\ 1026 if (id < 0) return id;\ 1027 } while(0) 1028 1029 #define BC_B_O(name, func, nts, ts, nopts, opts) do {\ 1030 int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ 1031 id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ 1032 (UChar* )(name), (UChar* )((name) + len),\ 1033 ONIG_CALLOUT_IN_BOTH,\ 1034 onig_builtin_ ## func, 0, (nts), (ts), (nopts), (opts));\ 1035 if (id < 0) return id;\ 1036 } while(0) 1037 1038 #endif /* USE_CALLOUT */ 1039 1040 1041 typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void); 1042 1043 #endif /* REGINT_H */ 1044