1 /* regexp.h 2 * 3 * Copyright (C) 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2003, 4 * 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * Definitions etc. for regexp(3) routines. 13 * 14 * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof], 15 * not the System V one. 16 */ 17 #ifndef PLUGGABLE_RE_EXTENSION 18 /* we don't want to include this stuff if we are inside of 19 an external regex engine based on the core one - like re 'debug'*/ 20 21 # include "utf8.h" 22 23 typedef SSize_t regnode_offset; 24 25 struct regnode_meta { 26 U8 type; 27 U8 arg_len; 28 U8 arg_len_varies; 29 U8 off_by_arg; 30 }; 31 32 /* this ensures that on alignment sensitive platforms 33 * this struct is aligned on 32 bit boundaries */ 34 union regnode_head { 35 struct { 36 union { 37 U8 flags; 38 U8 str_len_u8; 39 U8 first_byte; 40 } u_8; 41 U8 type; 42 U16 next_off; 43 } data; 44 U32 data_u32; 45 }; 46 47 struct regnode { 48 union regnode_head head; 49 }; 50 51 typedef struct regnode regnode; 52 53 struct reg_substr_data; 54 55 struct reg_data; 56 57 struct regexp_engine; 58 struct regexp; 59 60 struct reg_substr_datum { 61 SSize_t min_offset; /* min pos (in chars) that substr must appear */ 62 SSize_t max_offset; /* max pos (in chars) that substr must appear */ 63 SV *substr; /* non-utf8 variant */ 64 SV *utf8_substr; /* utf8 variant */ 65 SSize_t end_shift; /* how many fixed chars must end the string */ 66 }; 67 struct reg_substr_data { 68 U8 check_ix; /* index into data[] of check substr */ 69 struct reg_substr_datum data[3]; /* Actual array */ 70 }; 71 72 # ifdef PERL_ANY_COW 73 # define SV_SAVED_COPY SV *saved_copy; /* If non-NULL, SV which is COW from original */ 74 # else 75 # define SV_SAVED_COPY 76 # endif 77 78 /* offsets within a string of a particular /(.)/ capture 79 * if you change this by adding new non-temporary fields 80 * then be sure to update Perl_rxres_save() in pp_ctl.c */ 81 typedef struct regexp_paren_pair { 82 SSize_t start; 83 SSize_t end; 84 85 /* 'start_tmp' records a new opening position before the matching end 86 * has been found, so that the old start and end values are still 87 * valid, e.g. 88 * "abc" =~ /(.(?{print "[$1]"}))+/ 89 *outputs [][a][b] 90 * This field is not part of the API. */ 91 SSize_t start_tmp; 92 } regexp_paren_pair; 93 94 # if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_UTF8_C) 95 # define _invlist_union(a, b, output) _invlist_union_maybe_complement_2nd(a, b, FALSE, output) 96 # define _invlist_intersection(a, b, output) _invlist_intersection_maybe_complement_2nd(a, b, FALSE, output) 97 98 /* Subtracting b from a leaves in a everything that was there that isn't in b, 99 * that is the intersection of a with b's complement */ 100 # define _invlist_subtract(a, b, output) _invlist_intersection_maybe_complement_2nd(a, b, TRUE, output) 101 # endif 102 103 /* record the position of a (?{...}) within a pattern */ 104 105 struct reg_code_block { 106 STRLEN start; 107 STRLEN end; 108 OP *block; 109 REGEXP *src_regex; 110 }; 111 112 /* array of reg_code_block's plus header info */ 113 114 struct reg_code_blocks { 115 int refcnt; /* we may be pointed to from a regex and from the savestack */ 116 int count; /* how many code blocks */ 117 struct reg_code_block *cb; /* array of reg_code_block's */ 118 }; 119 120 121 /* 122 = for apidoc AyT||regexp 123 The regexp/REGEXP struct, see L<perlreapi> for further documentation 124 on the individual fields. The struct is ordered so that the most 125 commonly used fields are placed at the start. 126 127 Any patch that adds items to this struct will need to include 128 changes to F<sv.c> (C<Perl_re_dup()>) and F<regcomp.c> 129 (C<pregfree()>). This involves freeing or cloning items in the 130 regexp's data array based on the data item's type. 131 */ 132 133 typedef struct regexp { 134 _XPV_HEAD; 135 const struct regexp_engine* engine; /* what engine created this regexp? */ 136 REGEXP *mother_re; /* what re is this a lightweight copy of? */ 137 HV *paren_names; /* Optional hash of paren names */ 138 139 /*---------------------------------------------------------------------- 140 * Information about the match that the perl core uses to manage things 141 */ 142 143 /* see comment in regcomp_internal.h about branch reset to understand 144 the distinction between physical and logical capture buffers */ 145 U32 nparens; /* physical number of capture buffers */ 146 U32 logical_nparens; /* logical_number of capture buffers */ 147 I32 *logical_to_parno; /* map logical parno to first physcial */ 148 I32 *parno_to_logical; /* map every physical parno to logical */ 149 I32 *parno_to_logical_next; /* map every physical parno to the next 150 physical with the same logical id */ 151 152 U32 extflags; /* Flags used both externally and internally */ 153 SSize_t maxlen; /* maximum possible number of chars in string to match */ 154 SSize_t minlen; /* minimum possible number of chars in string to match */ 155 SSize_t minlenret; /* minimum possible number of chars in $& */ 156 STRLEN gofs; /* chars left of pos that we search from */ 157 /* substring data about strings that must appear in 158 * the final match, used for optimisations */ 159 160 struct reg_substr_data *substrs; 161 162 /* private engine specific data */ 163 164 void *pprivate; /* Data private to the regex engine which 165 * created this object. */ 166 U32 intflags; /* Engine Specific Internal flags */ 167 168 /*---------------------------------------------------------------------- 169 * Data about the last/current match. These are modified during matching 170 */ 171 172 U32 lastparen; /* highest close paren matched ($+) */ 173 regexp_paren_pair *offs; /* Array of offsets for (@-) and (@+) */ 174 char **recurse_locinput; /* used to detect infinite recursion, XXX: move to internal */ 175 U32 lastcloseparen; /* last close paren matched ($^N) */ 176 177 178 /*---------------------------------------------------------------------- */ 179 180 /* offset from wrapped to the start of precomp */ 181 PERL_BITFIELD32 pre_prefix:4; 182 183 /* original flags used to compile the pattern, may differ from 184 * extflags in various ways */ 185 PERL_BITFIELD32 compflags:9; 186 187 /*---------------------------------------------------------------------- */ 188 189 char *subbeg; /* saved or original string so \digit works forever. */ 190 SV_SAVED_COPY /* If non-NULL, SV which is COW from original */ 191 SSize_t sublen; /* Length of string pointed by subbeg */ 192 SSize_t suboffset; /* byte offset of subbeg from logical start of str */ 193 SSize_t subcoffset; /* suboffset equiv, but in chars (for @-/@+) */ 194 195 /*---------------------------------------------------------------------- */ 196 197 198 CV *qr_anoncv; /* the anon sub wrapped round qr/(?{..})/ */ 199 } regexp; 200 201 202 #define RXp_PAREN_NAMES(rx) ((rx)->paren_names) 203 204 #define RXp_OFFS_START(rx,n) \ 205 RXp_OFFSp(rx)[(n)].start 206 207 #define RXp_OFFS_END(rx,n) \ 208 RXp_OFFSp(rx)[(n)].end 209 210 #define RXp_OFFS_VALID(rx,n) \ 211 (RXp_OFFSp(rx)[(n)].end != -1 && RXp_OFFSp(rx)[(n)].start != -1 ) 212 213 #define RX_OFFS_START(rx_sv,n) RXp_OFFS_START(ReANY(rx_sv),n) 214 #define RX_OFFS_END(rx_sv,n) RXp_OFFS_END(ReANY(rx_sv),n) 215 #define RX_OFFS_VALID(rx_sv,n) RXp_OFFS_VALID(ReANY(rx_sv),n) 216 217 /* used for high speed searches */ 218 typedef struct re_scream_pos_data_s 219 { 220 char **scream_olds; /* match pos */ 221 SSize_t *scream_pos; /* Internal iterator of scream. */ 222 } re_scream_pos_data; 223 224 /* regexp_engine structure. This is the dispatch table for regexes. 225 * Any regex engine implementation must be able to build one of these. 226 */ 227 typedef struct regexp_engine { 228 REGEXP* (*comp) (pTHX_ SV * const pattern, U32 flags); 229 I32 (*exec) (pTHX_ REGEXP * const rx, char* stringarg, char* strend, 230 char* strbeg, SSize_t minend, SV* sv, 231 void* data, U32 flags); 232 char* (*intuit) (pTHX_ 233 REGEXP * const rx, 234 SV *sv, 235 const char * const strbeg, 236 char *strpos, 237 char *strend, 238 const U32 flags, 239 re_scream_pos_data *data); 240 SV* (*checkstr) (pTHX_ REGEXP * const rx); 241 void (*rxfree) (pTHX_ REGEXP * const rx); 242 void (*numbered_buff_FETCH) (pTHX_ REGEXP * const rx, const I32 paren, 243 SV * const sv); 244 void (*numbered_buff_STORE) (pTHX_ REGEXP * const rx, const I32 paren, 245 SV const * const value); 246 I32 (*numbered_buff_LENGTH) (pTHX_ REGEXP * const rx, const SV * const sv, 247 const I32 paren); 248 SV* (*named_buff) (pTHX_ REGEXP * const rx, SV * const key, 249 SV * const value, const U32 flags); 250 SV* (*named_buff_iter) (pTHX_ REGEXP * const rx, const SV * const lastkey, 251 const U32 flags); 252 SV* (*qr_package)(pTHX_ REGEXP * const rx); 253 # ifdef USE_ITHREADS 254 void* (*dupe) (pTHX_ REGEXP * const rx, CLONE_PARAMS *param); 255 # endif 256 REGEXP* (*op_comp) (pTHX_ SV ** const patternp, int pat_count, 257 OP *expr, const struct regexp_engine* eng, 258 REGEXP *old_re, 259 bool *is_bare_re, U32 orig_rx_flags, U32 pm_flags); 260 } regexp_engine; 261 262 /* 263 These are passed to the numbered capture variable callbacks as the 264 paren name. >= 1 is reserved for actual numbered captures, i.e. $1, 265 $2 etc. 266 */ 267 # define RX_BUFF_IDX_CARET_PREMATCH -5 /* ${^PREMATCH} */ 268 # define RX_BUFF_IDX_CARET_POSTMATCH -4 /* ${^POSTMATCH} */ 269 # define RX_BUFF_IDX_CARET_FULLMATCH -3 /* ${^MATCH} */ 270 # define RX_BUFF_IDX_PREMATCH -2 /* $` */ 271 # define RX_BUFF_IDX_POSTMATCH -1 /* $' */ 272 # define RX_BUFF_IDX_FULLMATCH 0 /* $& */ 273 274 /* 275 Flags that are passed to the named_buff and named_buff_iter 276 callbacks above. Those routines are called from universal.c via the 277 Tie::Hash::NamedCapture interface for %+ and %- and the re:: 278 functions in the same file. 279 */ 280 281 /* The Tie::Hash::NamedCapture operation this is part of, if any */ 282 # define RXapif_FETCH 0x0001 283 # define RXapif_STORE 0x0002 284 # define RXapif_DELETE 0x0004 285 # define RXapif_CLEAR 0x0008 286 # define RXapif_EXISTS 0x0010 287 # define RXapif_SCALAR 0x0020 288 # define RXapif_FIRSTKEY 0x0040 289 # define RXapif_NEXTKEY 0x0080 290 291 /* Whether %+ or %- is being operated on */ 292 # define RXapif_ONE 0x0100 /* %+ */ 293 # define RXapif_ALL 0x0200 /* %- */ 294 295 /* Whether this is being called from a re:: function */ 296 # define RXapif_REGNAME 0x0400 297 # define RXapif_REGNAMES 0x0800 298 # define RXapif_REGNAMES_COUNT 0x1000 299 300 /* 301 =for apidoc Am|REGEXP *|SvRX|SV *sv 302 303 Convenience macro to get the REGEXP from a SV. This is approximately 304 equivalent to the following snippet: 305 306 if (SvMAGICAL(sv)) 307 mg_get(sv); 308 if (SvROK(sv)) 309 sv = MUTABLE_SV(SvRV(sv)); 310 if (SvTYPE(sv) == SVt_REGEXP) 311 return (REGEXP*) sv; 312 313 C<NULL> will be returned if a REGEXP* is not found. 314 315 =for apidoc Am|bool|SvRXOK|SV* sv 316 317 Returns a boolean indicating whether the SV (or the one it references) 318 is a REGEXP. 319 320 If you want to do something with the REGEXP* later use SvRX instead 321 and check for NULL. 322 323 =cut 324 */ 325 326 # define SvRX(sv) (Perl_get_re_arg(aTHX_ sv)) 327 # define SvRXOK(sv) cBOOL(Perl_get_re_arg(aTHX_ sv)) 328 329 330 /* Flags stored in regexp->extflags 331 * These are used by code external to the regexp engine 332 * 333 * Note that the flags whose names start with RXf_PMf_ are defined in 334 * op_reg_common.h, being copied from the parallel flags of op_pmflags 335 * 336 * NOTE: if you modify any RXf flags you should run regen.pl or 337 * regen/regcomp.pl so that regnodes.h is updated with the changes. 338 * 339 */ 340 341 # include "op_reg_common.h" 342 343 # define RXf_PMf_STD_PMMOD (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE|RXf_PMf_NOCAPTURE) 344 345 # define CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, x_count) \ 346 case IGNORE_PAT_MOD: *(pmfl) |= RXf_PMf_FOLD; break; \ 347 case MULTILINE_PAT_MOD: *(pmfl) |= RXf_PMf_MULTILINE; break; \ 348 case SINGLE_PAT_MOD: *(pmfl) |= RXf_PMf_SINGLELINE; break; \ 349 case XTENDED_PAT_MOD: if (x_count == 0) { \ 350 *(pmfl) |= RXf_PMf_EXTENDED; \ 351 *(pmfl) &= ~RXf_PMf_EXTENDED_MORE; \ 352 } \ 353 else { \ 354 *(pmfl) |= RXf_PMf_EXTENDED \ 355 |RXf_PMf_EXTENDED_MORE; \ 356 } \ 357 (x_count)++; break; \ 358 case NOCAPTURE_PAT_MOD: *(pmfl) |= RXf_PMf_NOCAPTURE; break; 359 360 /* Note, includes charset ones, assumes 0 is the default for them */ 361 # define STD_PMMOD_FLAGS_CLEAR(pmfl) \ 362 *(pmfl) &= ~(RXf_PMf_FOLD|RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE|RXf_PMf_CHARSET|RXf_PMf_NOCAPTURE) 363 364 /* chars and strings used as regex pattern modifiers 365 * Singular is a 'c'har, plural is a "string" 366 * 367 * NOTE, KEEPCOPY was originally 'k', but was changed to 'p' for preserve 368 * for compatibility reasons with Regexp::Common which highjacked (?k:...) 369 * for its own uses. So 'k' is out as well. 370 */ 371 # define DEFAULT_PAT_MOD '^' /* Short for all the default modifiers */ 372 # define EXEC_PAT_MOD 'e' 373 # define KEEPCOPY_PAT_MOD 'p' 374 # define NOCAPTURE_PAT_MOD 'n' 375 # define ONCE_PAT_MOD 'o' 376 # define GLOBAL_PAT_MOD 'g' 377 # define CONTINUE_PAT_MOD 'c' 378 # define MULTILINE_PAT_MOD 'm' 379 # define SINGLE_PAT_MOD 's' 380 # define IGNORE_PAT_MOD 'i' 381 # define XTENDED_PAT_MOD 'x' 382 # define NONDESTRUCT_PAT_MOD 'r' 383 # define LOCALE_PAT_MOD 'l' 384 # define UNICODE_PAT_MOD 'u' 385 # define DEPENDS_PAT_MOD 'd' 386 # define ASCII_RESTRICT_PAT_MOD 'a' 387 388 # define ONCE_PAT_MODS "o" 389 # define KEEPCOPY_PAT_MODS "p" 390 # define NOCAPTURE_PAT_MODS "n" 391 # define EXEC_PAT_MODS "e" 392 # define LOOP_PAT_MODS "gc" 393 # define NONDESTRUCT_PAT_MODS "r" 394 # define LOCALE_PAT_MODS "l" 395 # define UNICODE_PAT_MODS "u" 396 # define DEPENDS_PAT_MODS "d" 397 # define ASCII_RESTRICT_PAT_MODS "a" 398 # define ASCII_MORE_RESTRICT_PAT_MODS "aa" 399 400 /* This string is expected by regcomp.c to be ordered so that the first 401 * character is the flag in bit RXf_PMf_STD_PMMOD_SHIFT of extflags; the next 402 * character is bit +1, etc. */ 403 # define STD_PAT_MODS "msixxn" 404 405 # define CHARSET_PAT_MODS ASCII_RESTRICT_PAT_MODS DEPENDS_PAT_MODS LOCALE_PAT_MODS UNICODE_PAT_MODS 406 407 /* This string is expected by XS_re_regexp_pattern() in universal.c to be ordered 408 * so that the first character is the flag in bit RXf_PMf_STD_PMMOD_SHIFT of 409 * extflags; the next character is in bit +1, etc. */ 410 # define INT_PAT_MODS STD_PAT_MODS KEEPCOPY_PAT_MODS 411 412 # define EXT_PAT_MODS ONCE_PAT_MODS KEEPCOPY_PAT_MODS NOCAPTURE_PAT_MODS 413 # define QR_PAT_MODS STD_PAT_MODS EXT_PAT_MODS CHARSET_PAT_MODS 414 # define M_PAT_MODS QR_PAT_MODS LOOP_PAT_MODS 415 # define S_PAT_MODS M_PAT_MODS EXEC_PAT_MODS NONDESTRUCT_PAT_MODS 416 417 /* 418 * NOTE: if you modify any RXf flags you should run regen.pl or 419 * regen/regcomp.pl so that regnodes.h is updated with the changes. 420 * 421 */ 422 423 /* 424 Set in Perl_pmruntime for a split. Will be used by regex engines to 425 check whether they should set RXf_SKIPWHITE 426 */ 427 # define RXf_SPLIT RXf_PMf_SPLIT 428 429 /* Currently the regex flags occupy a single 32-bit word. Not all bits are 430 * currently used. The lower bits are shared with their corresponding PMf flag 431 * bits, up to but not including _RXf_PMf_SHIFT_NEXT. The unused bits 432 * immediately follow; finally the used RXf-only (unshared) bits, so that the 433 * highest bit in the word is used. This gathers all the unused bits as a pool 434 * in the middle, like so: 11111111111111110000001111111111 435 * where the '1's represent used bits, and the '0's unused. This design allows 436 * us to allocate off one end of the pool if we need to add a shared bit, and 437 * off the other end if we need a non-shared bit, without disturbing the other 438 * bits. This maximizes the likelihood of being able to change things without 439 * breaking binary compatibility. 440 * 441 * To add shared bits, do so in op_reg_common.h. This should change 442 * _RXf_PMf_SHIFT_NEXT so that things won't compile. Then come to regexp.h and 443 * op.h and adjust the constant adders in the definitions of RXf_BASE_SHIFT and 444 * Pmf_BASE_SHIFT down by the number of shared bits you added. That's it. 445 * Things should be binary compatible. But if either of these gets to having 446 * to subtract rather than add, leave at 0 and instead adjust all the entries 447 * that are in terms of it. But if the first one of those is already 448 * RXf_BASE_SHIFT+0, there are no bits left, and a redesign is in order. 449 * 450 * To remove unshared bits, just delete its entry. If you're where breaking 451 * binary compatibility is ok to do, you might want to adjust things to move 452 * the newly opened space so that it gets absorbed into the common pool. 453 * 454 * To add unshared bits, first use up any gaps in the middle. Otherwise, 455 * allocate off the low end until you get to RXf_BASE_SHIFT+0. If that isn't 456 * enough, move RXf_BASE_SHIFT down (if possible) and add the new bit at the 457 * other end instead; this preserves binary compatibility. 458 * 459 * For the regexp bits, PL_reg_extflags_name[] in regnodes.h has a comment 460 * giving which bits are used/unused */ 461 462 # define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT + 2) 463 464 /* What we have seen */ 465 # define RXf_NO_INPLACE_SUBST (1U<<(RXf_BASE_SHIFT+2)) 466 # define RXf_EVAL_SEEN (1U<<(RXf_BASE_SHIFT+3)) 467 468 /* Special */ 469 # define RXf_UNBOUNDED_QUANTIFIER_SEEN (1U<<(RXf_BASE_SHIFT+4)) 470 # define RXf_CHECK_ALL (1U<<(RXf_BASE_SHIFT+5)) 471 472 /* UTF8 related */ 473 # define RXf_MATCH_UTF8 (1U<<(RXf_BASE_SHIFT+6)) /* $1 etc are utf8 */ 474 475 /* Intuit related */ 476 # define RXf_USE_INTUIT_NOML (1U<<(RXf_BASE_SHIFT+7)) 477 # define RXf_USE_INTUIT_ML (1U<<(RXf_BASE_SHIFT+8)) 478 # define RXf_INTUIT_TAIL (1U<<(RXf_BASE_SHIFT+9)) 479 # define RXf_USE_INTUIT (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML) 480 481 /* Do we have some sort of anchor? */ 482 # define RXf_IS_ANCHORED (1U<<(RXf_BASE_SHIFT+10)) 483 484 /* Copy and tainted info */ 485 # define RXf_COPY_DONE (1U<<(RXf_BASE_SHIFT+11)) 486 487 /* post-execution: $1 et al are tainted */ 488 # define RXf_TAINTED_SEEN (1U<<(RXf_BASE_SHIFT+12)) 489 /* this pattern was tainted during compilation */ 490 # define RXf_TAINTED (1U<<(RXf_BASE_SHIFT+13)) 491 492 /* Flags indicating special patterns */ 493 # define RXf_START_ONLY (1U<<(RXf_BASE_SHIFT+14)) /* Pattern is /^/ */ 494 # define RXf_SKIPWHITE (1U<<(RXf_BASE_SHIFT+15)) /* Pattern is for a */ 495 /* split " " */ 496 # define RXf_WHITE (1U<<(RXf_BASE_SHIFT+16)) /* Pattern is /\s+/ */ 497 # define RXf_NULL (1U<<(RXf_BASE_SHIFT+17)) /* Pattern is // */ 498 499 /* See comments at the beginning of these defines about adding bits. The 500 * highest bit position should be used, so that if RXf_BASE_SHIFT gets 501 * increased, the #error below will be triggered so that you will be reminded 502 * to adjust things at the other end to keep the bit positions unchanged */ 503 # if RXf_BASE_SHIFT+17 > 31 504 # error Too many RXf_PMf bits used. See comments at beginning of these for what to do 505 # endif 506 507 /* 508 * NOTE: if you modify any RXf flags you should run regen.pl or 509 * regen/regcomp.pl so that regnodes.h is updated with the changes. 510 * 511 */ 512 513 # ifdef NO_TAINT_SUPPORT 514 # define RX_ISTAINTED(rx_sv) 0 515 # define RXp_ISTAINTED(prog) 0 516 # define RX_TAINT_on(rx_sv) NOOP 517 # define RXp_MATCH_TAINTED(prog) 0 518 # define RX_MATCH_TAINTED(rx_sv) 0 519 # define RXp_MATCH_TAINTED_on(prog) NOOP 520 # define RX_MATCH_TAINTED_on(rx_sv) NOOP 521 # define RXp_MATCH_TAINTED_off(prog) NOOP 522 # define RX_MATCH_TAINTED_off(rx_sv) NOOP 523 # else 524 # define RX_ISTAINTED(rx_sv) (RX_EXTFLAGS(rx_sv) & RXf_TAINTED) 525 # define RXp_ISTAINTED(prog) (RXp_EXTFLAGS(prog) & RXf_TAINTED) 526 # define RX_TAINT_on(rx_sv) (RX_EXTFLAGS(rx_sv) |= RXf_TAINTED) 527 # define RXp_MATCH_TAINTED(prog) (RXp_EXTFLAGS(prog) & RXf_TAINTED_SEEN) 528 # define RX_MATCH_TAINTED(rx_sv) (RX_EXTFLAGS(rx_sv) & RXf_TAINTED_SEEN) 529 # define RXp_MATCH_TAINTED_on(prog) (RXp_EXTFLAGS(prog) |= RXf_TAINTED_SEEN) 530 # define RX_MATCH_TAINTED_on(rx_sv) (RX_EXTFLAGS(rx_sv) |= RXf_TAINTED_SEEN) 531 # define RXp_MATCH_TAINTED_off(prog) (RXp_EXTFLAGS(prog) &= ~RXf_TAINTED_SEEN) 532 # define RX_MATCH_TAINTED_off(rx_sv) (RX_EXTFLAGS(rx_sv) &= ~RXf_TAINTED_SEEN) 533 # endif 534 535 # define RXp_HAS_CUTGROUP(prog) ((prog)->intflags & PREGf_CUTGROUP_SEEN) 536 537 # define RX_MATCH_TAINTED_set(rx_sv, t) ((t) \ 538 ? RX_MATCH_TAINTED_on(rx_sv) \ 539 : RX_MATCH_TAINTED_off(rx_sv)) 540 541 # define RXp_MATCH_COPIED(prog) (RXp_EXTFLAGS(prog) & RXf_COPY_DONE) 542 # define RX_MATCH_COPIED(rx_sv) (RX_EXTFLAGS(rx_sv) & RXf_COPY_DONE) 543 # define RXp_MATCH_COPIED_on(prog) (RXp_EXTFLAGS(prog) |= RXf_COPY_DONE) 544 # define RX_MATCH_COPIED_on(rx_sv) (RX_EXTFLAGS(rx_sv) |= RXf_COPY_DONE) 545 # define RXp_MATCH_COPIED_off(prog) (RXp_EXTFLAGS(prog) &= ~RXf_COPY_DONE) 546 # define RX_MATCH_COPIED_off(rx_sv) (RX_EXTFLAGS(rx_sv) &= ~RXf_COPY_DONE) 547 # define RX_MATCH_COPIED_set(rx_sv,t) ((t) \ 548 ? RX_MATCH_COPIED_on(rx_sv) \ 549 : RX_MATCH_COPIED_off(rx_sv)) 550 551 # define RXp_EXTFLAGS(rx) ((rx)->extflags) 552 # define RXp_COMPFLAGS(rx) ((rx)->compflags) 553 554 /* For source compatibility. We used to store these explicitly. */ 555 # define RX_PRECOMP(rx_sv) (RX_WRAPPED(rx_sv) \ 556 + ReANY(rx_sv)->pre_prefix) 557 # define RX_PRECOMP_const(rx_sv) (RX_WRAPPED_const(rx_sv) \ 558 + ReANY(rx_sv)->pre_prefix) 559 /* FIXME? Are we hardcoding too much here and constraining plugin extension 560 writers? Specifically, the value 1 assumes that the wrapped version always 561 has exactly one character at the end, a ')'. Will that always be true? */ 562 # define RX_PRELEN(rx_sv) (RX_WRAPLEN(rx_sv) \ 563 - ReANY(rx_sv)->pre_prefix - 1) 564 565 # define RX_WRAPPED(rx_sv) SvPVX(rx_sv) 566 # define RX_WRAPPED_const(rx_sv) SvPVX_const(rx_sv) 567 # define RX_WRAPLEN(rx_sv) SvCUR(rx_sv) 568 # define RX_CHECK_SUBSTR(rx_sv) (ReANY(rx_sv)->check_substr) 569 # define RX_REFCNT(rx_sv) SvREFCNT(rx_sv) 570 # define RX_EXTFLAGS(rx_sv) RXp_EXTFLAGS(ReANY(rx_sv)) 571 # define RX_COMPFLAGS(rx_sv) RXp_COMPFLAGS(ReANY(rx_sv)) 572 # define RXp_ENGINE(prog) ((prog)->engine) 573 # define RX_ENGINE(rx_sv) (RXp_ENGINE(ReANY(rx_sv))) 574 # define RXp_SUBBEG(prog) ((prog)->subbeg) 575 # define RX_SUBBEG(rx_sv) (RXp_SUBBEG(ReANY(rx_sv))) 576 # define RXp_SUBOFFSET(prog) ((prog)->suboffset) 577 # define RX_SUBOFFSET(rx_sv) (RXp_SUBOFFSET(ReANY(rx_sv))) 578 # define RXp_SUBCOFFSET(prog) ((prog)->subcoffset) 579 # define RX_SUBCOFFSET(rx_sv) (RXp_SUBCOFFSET(ReANY(rx_sv))) 580 # define RXp_OFFSp(prog) ((prog)->offs) 581 # define RX_OFFSp(rx_sv) (RXp_OFFSp(ReANY(rx_sv))) 582 # define RXp_LOGICAL_NPARENS(prog) ((prog)->logical_nparens) 583 # define RX_LOGICAL_NPARENS(rx_sv) (RXp_LOGICAL_NPARENS(ReANY(rx_sv))) 584 # define RXp_LOGICAL_TO_PARNO(prog) ((prog)->logical_to_parno) 585 # define RX_LOGICAL_TO_PARNO(rx_sv) (RXp_LOGICAL_TO_PARNO(ReANY(rx_sv))) 586 # define RXp_PARNO_TO_LOGICAL(prog) ((prog)->parno_to_logical) 587 # define RX_PARNO_TO_LOGICAL(rx_sv) (RXp_PARNO_TO_LOGICAL(ReANY(rx_sv))) 588 # define RXp_PARNO_TO_LOGICAL_NEXT(prog) ((prog)->parno_to_logical_next) 589 # define RX_PARNO_TO_LOGICAL_NEXT(rx_sv) (RXp_PARNO_TO_LOGICAL_NEXT(ReANY(rx_sv))) 590 # define RXp_NPARENS(prog) ((prog)->nparens) 591 # define RX_NPARENS(rx_sv) (RXp_NPARENS(ReANY(rx_sv))) 592 # define RXp_SUBLEN(prog) ((prog)->sublen) 593 # define RX_SUBLEN(rx_sv) (RXp_SUBLEN(ReANY(rx_sv))) 594 # define RXp_MINLEN(prog) ((prog)->minlen) 595 # define RX_MINLEN(rx_sv) (RXp_MINLEN(ReANY(rx_sv))) 596 # define RXp_MINLENRET(prog) ((prog)->minlenret) 597 # define RX_MINLENRET(rx_sv) (RXp_MINLENRET(ReANY(rx_sv))) 598 # define RXp_GOFS(prog) ((prog)->gofs) 599 # define RX_GOFS(rx_sv) (RXp_GOFS(ReANY(rx_sv))) 600 # define RXp_LASTPAREN(prog) ((prog)->lastparen) 601 # define RX_LASTPAREN(rx_sv) (RXp_LASTPAREN(ReANY(rx_sv))) 602 # define RXp_LASTCLOSEPAREN(prog) ((prog)->lastcloseparen) 603 # define RX_LASTCLOSEPAREN(rx_sv) (RXp_LASTCLOSEPAREN(ReANY(rx_sv))) 604 # define RXp_SAVED_COPY(prog) ((prog)->saved_copy) 605 # define RX_SAVED_COPY(rx_sv) (RXp_SAVED_COPY(ReANY(rx_sv))) 606 # define RXp_SUBSTRS(prog) ((prog)->substrs) 607 # define RX_SUBSTRS(rx_sv) (RXp_SUBSTRS(ReANY(rx_sv))) 608 # define RXp_PPRIVATE(prog) ((prog)->pprivate) 609 # define RX_PPRIVATE(rx_sv) (RXp_PPRIVATE(ReANY(rx_sv))) 610 # define RXp_QR_ANONCV(prog) ((prog)->qr_anoncv) 611 # define RX_QR_ANONCV(rx_sv) (RXp_QR_ANONCV(ReANY(rx_sv))) 612 # define RXp_MOTHER_RE(prog) ((prog)->mother_re) 613 # define RX_MOTHER_RE(rx_sv) (RXp_MOTHER_RE(ReANY(rx_sv))) 614 # define RXp_PRE_PREFIX(prog) ((prog)->pre_prefix) 615 # define RX_PRE_PREFIX(rx_sv) (RXp_PRE_PREFIX(ReANY(rx_sv))) 616 617 /* last match was zero-length */ 618 # define RXp_ZERO_LEN(prog) \ 619 (RXp_OFFS_START(prog,0) + (SSize_t)RXp_GOFS(prog) \ 620 == RXp_OFFS_END(prog,0)) 621 # define RX_ZERO_LEN(rx_sv) (RXp_ZERO_LEN(ReANY(rx_sv))) 622 623 #endif /* PLUGGABLE_RE_EXTENSION */ 624 625 /* Stuff that needs to be included in the pluggable extension goes below here */ 626 627 #ifdef PERL_ANY_COW 628 # define RXp_MATCH_COPY_FREE(prog) \ 629 STMT_START { \ 630 if (RXp_SAVED_COPY(prog)) { \ 631 SV_CHECK_THINKFIRST_COW_DROP(RXp_SAVED_COPY(prog)); \ 632 } \ 633 if (RXp_MATCH_COPIED(prog)) { \ 634 Safefree(RXp_SUBBEG(prog)); \ 635 RXp_MATCH_COPIED_off(prog); \ 636 } \ 637 } STMT_END 638 #else 639 # define RXp_MATCH_COPY_FREE(prog) \ 640 STMT_START { \ 641 if (RXp_MATCH_COPIED(prog)) { \ 642 Safefree(RXp_SUBBEG(prog)); \ 643 RXp_MATCH_COPIED_off(prog); \ 644 } \ 645 } STMT_END 646 #endif 647 #define RX_MATCH_COPY_FREE(rx_sv) RXp_MATCH_COPY_FREE(ReANY(rx_sv)) 648 649 #define RXp_MATCH_UTF8(prog) (RXp_EXTFLAGS(prog) & RXf_MATCH_UTF8) 650 #define RX_MATCH_UTF8(rx_sv) (RX_EXTFLAGS(rx_sv) & RXf_MATCH_UTF8) 651 #define RXp_MATCH_UTF8_on(prog) (RXp_EXTFLAGS(prog) |= RXf_MATCH_UTF8) 652 #define RX_MATCH_UTF8_on(rx_sv) (RXp_MATCH_UTF8_on(ReANY(rx_sv))) 653 #define RXp_MATCH_UTF8_off(prog) (RXp_EXTFLAGS(prog) &= ~RXf_MATCH_UTF8) 654 #define RX_MATCH_UTF8_off(rx_sv) (RXp_MATCH_UTF8_off(ReANY(rx_sv)) 655 #define RXp_MATCH_UTF8_set(prog, t) ((t) \ 656 ? RXp_MATCH_UTF8_on(prog) \ 657 : RXp_MATCH_UTF8_off(prog)) 658 #define RX_MATCH_UTF8_set(rx_sv, t) (RXp_MATCH_UTF8_set(ReANY(rx_sv), t)) 659 660 /* Whether the pattern stored at RX_WRAPPED is in UTF-8 */ 661 #define RX_UTF8(rx_sv) SvUTF8(rx_sv) 662 663 664 /* bits in flags arg of Perl_regexec_flags() */ 665 666 #define REXEC_COPY_STR 0x01 /* Need to copy the string for captures. */ 667 #define REXEC_CHECKED 0x02 /* re_intuit_start() already called. */ 668 #define REXEC_SCREAM 0x04 /* currently unused. */ 669 #define REXEC_IGNOREPOS 0x08 /* use stringarg, not pos(), for \G match */ 670 #define REXEC_NOT_FIRST 0x10 /* This is another iteration of //g: 671 no need to copy string again */ 672 673 /* under REXEC_COPY_STR, it's ok for the 674 engine (modulo PL_sawamperand etc) 675 to skip copying: ... */ 676 #define REXEC_COPY_SKIP_PRE 0x20 /* ...the $` part of the string, or */ 677 #define REXEC_COPY_SKIP_POST 0x40 /* ...the $' part of the string */ 678 #define REXEC_FAIL_ON_UNDERFLOW 0x80 /* fail the match if $& would start before 679 the start pos (so s/.\G// would fail 680 on second iteration */ 681 682 #if defined(PERL_USE_GCC_BRACE_GROUPS) 683 # define ReREFCNT_inc(re) \ 684 ({ \ 685 /* This is here to generate a casting warning if incorrect. */ \ 686 REGEXP *const _rerefcnt_inc = (re); \ 687 assert(SvTYPE(_rerefcnt_inc) == SVt_REGEXP); \ 688 SvREFCNT_inc(_rerefcnt_inc); \ 689 _rerefcnt_inc; \ 690 }) 691 # define ReREFCNT_dec(re) \ 692 ({ \ 693 /* This is here to generate a casting warning if incorrect. */ \ 694 REGEXP *const _rerefcnt_dec = (re); \ 695 SvREFCNT_dec(_rerefcnt_dec); \ 696 }) 697 #else 698 # define ReREFCNT_dec(re) SvREFCNT_dec(re) 699 # define ReREFCNT_inc(re) ((REGEXP *) SvREFCNT_inc(re)) 700 #endif 701 #define ReANY(re) Perl_ReANY((const REGEXP *)(re)) 702 703 /* FIXME for plugins. */ 704 705 #define FBMcf_TAIL_DOLLAR 1 706 #define FBMcf_TAIL_DOLLARM 2 707 #define FBMcf_TAIL_Z 4 708 #define FBMcf_TAIL_z 8 709 #define FBMcf_TAIL (FBMcf_TAIL_DOLLAR|FBMcf_TAIL_DOLLARM|FBMcf_TAIL_Z|FBMcf_TAIL_z) 710 711 #define FBMrf_MULTILINE 1 712 713 struct regmatch_state; 714 struct regmatch_slab; 715 716 /* like regmatch_info_aux, but contains extra fields only needed if the 717 * pattern contains (?{}). If used, is snuck into the second slot in the 718 * regmatch_state stack at the start of execution */ 719 720 typedef struct { 721 regexp *rex; 722 PMOP *curpm; /* saved PL_curpm */ 723 #ifdef PERL_ANY_COW 724 SV *saved_copy; /* saved saved_copy field from rex */ 725 #endif 726 char *subbeg; /* saved subbeg field from rex */ 727 STRLEN sublen; /* saved sublen field from rex */ 728 STRLEN suboffset; /* saved suboffset field from rex */ 729 STRLEN subcoffset; /* saved subcoffset field from rex */ 730 SV *sv; /* $_ during (?{}) */ 731 MAGIC *pos_magic; /* pos() magic attached to $_ */ 732 SSize_t pos; /* the original value of pos() in pos_magic */ 733 U8 pos_flags; /* flags to be restored; currently only MGf_BYTES*/ 734 } regmatch_info_aux_eval; 735 736 737 /* fields that logically live in regmatch_info, but which need cleaning 738 * up on croak(), and so are instead are snuck into the first slot in 739 * the regmatch_state stack at the start of execution */ 740 741 typedef struct { 742 regmatch_info_aux_eval *info_aux_eval; 743 struct regmatch_state *old_regmatch_state; /* saved PL_regmatch_state */ 744 struct regmatch_slab *old_regmatch_slab; /* saved PL_regmatch_slab */ 745 char *poscache; /* S-L cache of fail positions of WHILEMs */ 746 } regmatch_info_aux; 747 748 749 /* 750 =for apidoc Ay||regmatch_info 751 Some basic information about the current match that is created by 752 Perl_regexec_flags and then passed to regtry(), regmatch() etc. 753 It is allocated as a local var on the stack, so nothing should be 754 stored in it that needs preserving or clearing up on croak(). 755 For that, see the aux_info and aux_info_eval members of the 756 regmatch_state union. 757 758 =cut 759 */ 760 761 typedef struct { 762 REGEXP *prog; /* the regex being executed */ 763 const char * strbeg; /* real start of string */ 764 char *strend; /* one byte beyond last char of match string */ 765 char *till; /* matches shorter than this fail (see minlen arg) */ 766 SV *sv; /* the SV string currently being matched */ 767 char *ganch; /* position of \G anchor */ 768 char *cutpoint; /* (*COMMIT) position (if any) */ 769 regmatch_info_aux *info_aux; /* extra fields that need cleanup */ 770 regmatch_info_aux_eval *info_aux_eval; /* extra saved state for (?{}) */ 771 I32 poscache_maxiter; /* how many whilems todo before S-L cache kicks in */ 772 I32 poscache_iter; /* current countdown from _maxiter to zero */ 773 STRLEN poscache_size; /* size of regmatch_info_aux.poscache */ 774 bool intuit; /* re_intuit_start() is the top-level caller */ 775 bool is_utf8_pat; /* regex is utf8 */ 776 bool is_utf8_target; /* string being matched is utf8 */ 777 bool warned; /* we have issued a recursion warning; no need for more */ 778 } regmatch_info; 779 780 781 /* structures for holding and saving the state maintained by regmatch() */ 782 783 #ifndef MAX_RECURSE_EVAL_NOCHANGE_DEPTH 784 # define MAX_RECURSE_EVAL_NOCHANGE_DEPTH 10 785 #endif 786 787 /* The +1 is because everything matches itself, which isn't included in 788 * MAX_FOLD_FROMS; the +2 is based on the current Unicode standards needs, and 789 * is unlikely to change. An assertion should fail in regexec.c if it is too 790 * low. It is needed for certain edge cases involving multi-character folds 791 * when the first component also participates in a fold individually. */ 792 #define MAX_MATCHES (MAX_FOLD_FROMS + 1 + 2) 793 794 struct next_matchable_info { 795 U8 first_byte_mask; 796 U8 first_byte_anded; 797 U32 mask32; 798 U32 anded32; 799 PERL_INT_FAST8_T count; /* Negative means not initialized */ 800 PERL_UINT_FAST8_T min_length; 801 PERL_UINT_FAST8_T max_length; 802 PERL_UINT_FAST8_T initial_definitive; 803 PERL_UINT_FAST8_T initial_exact; 804 PERL_UINT_FAST8_T lengths[MAX_MATCHES]; 805 806 /* The size is from trial and error, and could change with new Unicode 807 * standards, in which case there is an assertion that should start 808 * failing. This size could be calculated in one of the regen scripts 809 * dealing with Unicode, but khw thinks the likelihood of it changing is 810 * low enough that it isn't worth the effort. */ 811 U8 matches[18]; 812 }; 813 814 typedef I32 CHECKPOINT; 815 816 typedef struct regmatch_state { 817 int resume_state; /* where to jump to on return */ 818 char *locinput; /* where to backtrack in string on failure */ 819 char *loceol; 820 U8 *sr0; /* position of start of script run, or NULL */ 821 822 union { 823 824 /* the 'info_aux' and 'info_aux_eval' union members are cuckoos in 825 * the nest. They aren't saved backtrack state; rather they 826 * represent one or two extra chunks of data that need allocating 827 * at the start of a match. These fields would logically live in 828 * the regmatch_info struct, except that is allocated on the 829 * C stack, and these fields are all things that require cleanup 830 * after a croak(), when the stack is lost. 831 * As a convenience, we just use the first 1 or 2 regmatch_state 832 * slots to store this info, as we will be allocating a slab of 833 * these anyway. Otherwise we'd have to malloc and then free them, 834 * or allocate them on the save stack (where they will get 835 * realloced if the save stack grows). 836 * info_aux contains the extra fields that are always needed; 837 * info_aux_eval contains extra fields that only needed if 838 * the pattern contains code blocks 839 * We split them into two separate structs to avoid increasing 840 * the size of the union. 841 */ 842 843 regmatch_info_aux info_aux; 844 845 regmatch_info_aux_eval info_aux_eval; 846 847 /* this is a fake union member that matches the first element 848 * of each member that needs to store positive backtrack 849 * information */ 850 struct { 851 struct regmatch_state *prev_yes_state; 852 } yes; 853 854 855 /* NOTE: Regarding 'cp' and 'lastcp' in the following structs... 856 * 857 * In the majority of cases we use 'cp' for the "normal" 858 * checkpoint for paren saves, and 'lastcp' for the addtional 859 * paren saves that are done only under RE_PESSIMISTIC_PARENS. 860 * 861 * There may be a few cases where both are used always. 862 * Regardless they tend be used something like this: 863 * 864 * ST.cp = regcppush(rex, 0, maxopenparen); 865 * REGCP_SET(ST.lastcp); 866 * 867 * thus ST.cp holds the checkpoint from before we push parens, 868 * and ST.lastcp holds the checkpoint from afterwards. 869 */ 870 871 /* branchlike members */ 872 /* this is a fake union member that matches the first elements 873 * of each member that needs to behave like a branch */ 874 struct { 875 /* this first element must match u.yes */ 876 struct regmatch_state *prev_yes_state; 877 U32 lastparen; 878 U32 lastcloseparen; 879 CHECKPOINT cp; /* see note above "struct branchlike" */ 880 CHECKPOINT lastcp; /* see note above "struct branchlike" */ 881 U16 before_paren; 882 U16 after_paren; 883 884 } branchlike; 885 886 struct { 887 /* the first elements must match u.branchlike */ 888 struct regmatch_state *prev_yes_state; 889 U32 lastparen; 890 U32 lastcloseparen; 891 CHECKPOINT cp; /* see note above "struct branchlike" */ 892 CHECKPOINT lastcp; /* see note above "struct branchlike" */ 893 U16 before_paren; 894 U16 after_paren; 895 896 regnode *next_branch; /* next branch node */ 897 } branch; 898 899 struct { 900 /* the first elements must match u.branchlike */ 901 struct regmatch_state *prev_yes_state; 902 U32 lastparen; 903 U32 lastcloseparen; 904 CHECKPOINT cp; /* see note above "struct branchlike" */ 905 CHECKPOINT lastcp; /* see note above "struct branchlike" */ 906 U16 before_paren; 907 U16 after_paren; 908 909 U32 accepted; /* how many accepting states left */ 910 bool longfold; /* saw a fold with a 1->n char mapping */ 911 U16 *jump; /* positive offsets from me */ 912 U16 *j_before_paren; 913 U16 *j_after_paren; 914 regnode *me; /* Which node am I - needed for jump tries*/ 915 U8 *firstpos; /* pos in string of first trie match */ 916 U32 firstchars; /* len in chars of firstpos from start */ 917 U16 nextword; /* next word to try */ 918 U16 topword; /* longest accepted word */ 919 } trie; 920 921 /* special types - these members are used to store state for special 922 regops like eval, if/then, lookaround and the markpoint state */ 923 struct { 924 /* this first element must match u.yes */ 925 struct regmatch_state *prev_yes_state; 926 struct regmatch_state *prev_curlyx; 927 struct regmatch_state *prev_eval; 928 REGEXP *prev_rex; 929 CHECKPOINT cp; /* see note above "struct branchlike" */ 930 CHECKPOINT lastcp; /* see note above "struct branchlike" */ 931 U32 close_paren; /* which close bracket is our end (+1) */ 932 regnode *B; /* the node following us */ 933 char *prev_recurse_locinput; 934 } eval; 935 936 struct { 937 /* this first element must match u.yes */ 938 struct regmatch_state *prev_yes_state; 939 I32 wanted; 940 I32 logical; /* saved copy of 'logical' var */ 941 U8 count; /* number of beginning positions */ 942 char *start; 943 char *end; 944 regnode *me; /* the IFMATCH/SUSPEND/UNLESSM node */ 945 char *prev_match_end; 946 } ifmatch; /* and SUSPEND/UNLESSM */ 947 948 struct { 949 /* this first element must match u.yes */ 950 struct regmatch_state *prev_yes_state; 951 struct regmatch_state *prev_mark; 952 SV *mark_name; 953 char *mark_loc; 954 } mark; 955 956 struct { 957 int val; 958 } keeper; 959 960 /* quantifiers - these members are used for storing state for 961 the regops used to implement quantifiers */ 962 struct { 963 /* this first element must match u.yes */ 964 struct regmatch_state *prev_yes_state; 965 struct regmatch_state *prev_curlyx; /* previous cur_curlyx */ 966 regnode *me; /* the CURLYX node */ 967 regnode *B; /* the B node in /A*B/ */ 968 CHECKPOINT cp; /* see note above "struct branchlike" */ 969 CHECKPOINT lastcp; /* see note above "struct branchlike" */ 970 bool minmod; 971 int parenfloor; /* how far back to strip paren data */ 972 973 /* these two are modified by WHILEM */ 974 int count; /* how many instances of A we've matched */ 975 char *lastloc; /* where previous A matched (0-len detect) */ 976 } curlyx; 977 978 struct { 979 /* this first element must match u.yes */ 980 struct regmatch_state *prev_yes_state; 981 struct regmatch_state *save_curlyx; 982 CHECKPOINT cp; /* see note above "struct branchlike" */ 983 CHECKPOINT lastcp; /* see note above "struct branchlike" */ 984 char *save_lastloc; /* previous curlyx.lastloc */ 985 I32 cache_offset; 986 I32 cache_mask; 987 } whilem; 988 989 struct { 990 /* this first element must match u.yes */ 991 struct regmatch_state *prev_yes_state; 992 U32 lastparen; 993 U32 lastcloseparen; 994 CHECKPOINT cp; /* see note above "struct branchlike" */ 995 CHECKPOINT lastcp; /* see note above "struct branchlike" */ 996 I32 alen; /* length of first-matched A string */ 997 I32 count; 998 bool minmod; 999 regnode *A, *B; /* the nodes corresponding to /A*B/ */ 1000 regnode *me; /* the curlym node */ 1001 struct next_matchable_info Binfo; 1002 } curlym; 1003 1004 struct { 1005 U32 paren; 1006 U32 lastparen; 1007 U32 lastcloseparen; 1008 CHECKPOINT cp; /* see note above "struct branchlike" */ 1009 CHECKPOINT lastcp; /* see note above "struct branchlike" */ 1010 char *maxpos; /* highest possible point in string to match */ 1011 char *oldloc; /* the previous locinput */ 1012 int count; 1013 int min, max; /* {m,n} */ 1014 regnode *A, *B; /* the nodes corresponding to /A*B/ */ 1015 struct next_matchable_info Binfo; 1016 } curly; /* and CURLYN/PLUS/STAR */ 1017 1018 struct { 1019 CHECKPOINT cp; 1020 CHECKPOINT lastcp; 1021 } backref; /* REF and friends */ 1022 } u; 1023 } regmatch_state; 1024 1025 1026 1027 /* how many regmatch_state structs to allocate as a single slab. 1028 * We do it in 4K blocks for efficiency. The "3" is 2 for the next/prev 1029 * pointers, plus 1 for any mythical malloc overhead. */ 1030 1031 #define PERL_REGMATCH_SLAB_SLOTS \ 1032 ((4096 - 3 * sizeof (void*)) / sizeof(regmatch_state)) 1033 1034 typedef struct regmatch_slab { 1035 regmatch_state states[PERL_REGMATCH_SLAB_SLOTS]; 1036 struct regmatch_slab *prev, *next; 1037 } regmatch_slab; 1038 1039 1040 #define REG_FETCH_ABSOLUTE 1 1041 1042 /* 1043 * ex: set ts=8 sts=4 sw=4 et: 1044 */ 1045