xref: /openbsd/gnu/usr.bin/perl/regexp.h (revision e0680481)
1 /*    regexp.h
2  *
3  *    Copyright (C) 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2003,
4  *    2005, 2006, 2007, 2008 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  * Definitions etc. for regexp(3) routines.
13  *
14  * Caveat:  this is V8 regexp(3) [actually, a reimplementation thereof],
15  * not the System V one.
16  */
17 #ifndef PLUGGABLE_RE_EXTENSION
18 /* we don't want to include this stuff if we are inside of
19    an external regex engine based on the core one - like re 'debug'*/
20 
21 #  include "utf8.h"
22 
23 typedef SSize_t regnode_offset;
24 
25 struct regnode_meta {
26     U8 type;
27     U8 arg_len;
28     U8 arg_len_varies;
29     U8 off_by_arg;
30 };
31 
32 /* this ensures that on alignment sensitive platforms
33  * this struct is aligned on 32 bit boundaries */
34 union regnode_head {
35     struct {
36         union {
37             U8 flags;
38             U8 str_len_u8;
39             U8 first_byte;
40         } u_8;
41         U8  type;
42         U16 next_off;
43     } data;
44     U32 data_u32;
45 };
46 
47 struct regnode {
48     union regnode_head head;
49 };
50 
51 typedef struct regnode regnode;
52 
53 struct reg_substr_data;
54 
55 struct reg_data;
56 
57 struct regexp_engine;
58 struct regexp;
59 
60 struct reg_substr_datum {
61     SSize_t min_offset; /* min pos (in chars) that substr must appear */
62     SSize_t max_offset; /* max pos (in chars) that substr must appear */
63     SV *substr;		/* non-utf8 variant */
64     SV *utf8_substr;	/* utf8 variant */
65     SSize_t end_shift;  /* how many fixed chars must end the string */
66 };
67 struct reg_substr_data {
68     U8      check_ix;   /* index into data[] of check substr */
69     struct reg_substr_datum data[3];	/* Actual array */
70 };
71 
72 #  ifdef PERL_ANY_COW
73 #    define SV_SAVED_COPY   SV *saved_copy; /* If non-NULL, SV which is COW from original */
74 #  else
75 #    define SV_SAVED_COPY
76 #  endif
77 
78 /* offsets within a string of a particular /(.)/ capture
79  * if you change this by adding new non-temporary fields
80  * then be sure to update Perl_rxres_save() in pp_ctl.c */
81 typedef struct regexp_paren_pair {
82     SSize_t start;
83     SSize_t end;
84 
85     /* 'start_tmp' records a new opening position before the matching end
86      * has been found, so that the old start and end values are still
87      * valid, e.g.
88      *	  "abc" =~ /(.(?{print "[$1]"}))+/
89      *outputs [][a][b]
90      * This field is not part of the API.  */
91     SSize_t start_tmp;
92 } regexp_paren_pair;
93 
94 #  if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_UTF8_C)
95 #    define _invlist_union(a, b, output) _invlist_union_maybe_complement_2nd(a, b, FALSE, output)
96 #    define _invlist_intersection(a, b, output) _invlist_intersection_maybe_complement_2nd(a, b, FALSE, output)
97 
98 /* Subtracting b from a leaves in a everything that was there that isn't in b,
99  * that is the intersection of a with b's complement */
100 #    define _invlist_subtract(a, b, output) _invlist_intersection_maybe_complement_2nd(a, b, TRUE, output)
101 #  endif
102 
103 /* record the position of a (?{...}) within a pattern */
104 
105 struct reg_code_block {
106     STRLEN start;
107     STRLEN end;
108     OP     *block;
109     REGEXP *src_regex;
110 };
111 
112 /* array of reg_code_block's plus header info */
113 
114 struct reg_code_blocks {
115     int refcnt; /* we may be pointed to from a regex and from the savestack */
116     int  count;    /* how many code blocks */
117     struct reg_code_block *cb; /* array of reg_code_block's */
118 };
119 
120 
121 /*
122 = for apidoc AyT||regexp
123   The regexp/REGEXP struct, see L<perlreapi> for further documentation
124   on the individual fields. The struct is ordered so that the most
125   commonly used fields are placed at the start.
126 
127   Any patch that adds items to this struct will need to include
128   changes to F<sv.c> (C<Perl_re_dup()>) and F<regcomp.c>
129   (C<pregfree()>). This involves freeing or cloning items in the
130   regexp's data array based on the data item's type.
131 */
132 
133 typedef struct regexp {
134     _XPV_HEAD;
135     const struct regexp_engine* engine; /* what engine created this regexp? */
136     REGEXP *mother_re; /* what re is this a lightweight copy of? */
137     HV *paren_names;   /* Optional hash of paren names */
138 
139     /*----------------------------------------------------------------------
140      * Information about the match that the perl core uses to manage things
141      */
142 
143     /* see comment in regcomp_internal.h about branch reset to understand
144        the distinction between physical and logical capture buffers */
145     U32 nparens;                    /* physical number of capture buffers */
146     U32 logical_nparens;            /* logical_number of capture buffers */
147     I32 *logical_to_parno;          /* map logical parno to first physcial */
148     I32 *parno_to_logical;          /* map every physical parno to logical */
149     I32 *parno_to_logical_next;     /* map every physical parno to the next
150                                        physical with the same logical id */
151 
152     U32 extflags;      /* Flags used both externally and internally */
153     SSize_t maxlen;    /* maximum possible number of chars in string to match */
154     SSize_t minlen;    /* minimum possible number of chars in string to match */
155     SSize_t minlenret; /* minimum possible number of chars in $& */
156     STRLEN gofs;       /* chars left of pos that we search from */
157                        /* substring data about strings that must appear in
158                         * the final match, used for optimisations */
159 
160     struct reg_substr_data *substrs;
161 
162     /* private engine specific data */
163 
164     void *pprivate;    /* Data private to the regex engine which
165                         * created this object. */
166     U32 intflags;      /* Engine Specific Internal flags */
167 
168     /*----------------------------------------------------------------------
169      * Data about the last/current match. These are modified during matching
170      */
171 
172     U32 lastparen;           /* highest close paren matched ($+) */
173     regexp_paren_pair *offs; /* Array of offsets for (@-) and (@+) */
174     char **recurse_locinput; /* used to detect infinite recursion, XXX: move to internal */
175     U32 lastcloseparen;      /* last close paren matched ($^N) */
176 
177 
178     /*---------------------------------------------------------------------- */
179 
180     /* offset from wrapped to the start of precomp */
181     PERL_BITFIELD32 pre_prefix:4;
182 
183     /* original flags used to compile the pattern, may differ from
184      * extflags in various ways */
185     PERL_BITFIELD32 compflags:9;
186 
187     /*---------------------------------------------------------------------- */
188 
189     char *subbeg;       /* saved or original string so \digit works forever. */
190     SV_SAVED_COPY       /* If non-NULL, SV which is COW from original */
191     SSize_t sublen;     /* Length of string pointed by subbeg */
192     SSize_t suboffset;  /* byte offset of subbeg from logical start of str */
193     SSize_t subcoffset; /* suboffset equiv, but in chars (for @-/@+) */
194 
195     /*---------------------------------------------------------------------- */
196 
197 
198     CV *qr_anoncv;      /* the anon sub wrapped round qr/(?{..})/ */
199 } regexp;
200 
201 
202 #define RXp_PAREN_NAMES(rx) ((rx)->paren_names)
203 
204 #define RXp_OFFS_START(rx,n) \
205      RXp_OFFSp(rx)[(n)].start
206 
207 #define RXp_OFFS_END(rx,n) \
208      RXp_OFFSp(rx)[(n)].end
209 
210 #define RXp_OFFS_VALID(rx,n) \
211      (RXp_OFFSp(rx)[(n)].end != -1 && RXp_OFFSp(rx)[(n)].start != -1 )
212 
213 #define RX_OFFS_START(rx_sv,n)  RXp_OFFS_START(ReANY(rx_sv),n)
214 #define RX_OFFS_END(rx_sv,n)    RXp_OFFS_END(ReANY(rx_sv),n)
215 #define RX_OFFS_VALID(rx_sv,n)  RXp_OFFS_VALID(ReANY(rx_sv),n)
216 
217 /* used for high speed searches */
218 typedef struct re_scream_pos_data_s
219 {
220     char **scream_olds;		/* match pos */
221     SSize_t *scream_pos;	/* Internal iterator of scream. */
222 } re_scream_pos_data;
223 
224 /* regexp_engine structure. This is the dispatch table for regexes.
225  * Any regex engine implementation must be able to build one of these.
226  */
227 typedef struct regexp_engine {
228     REGEXP* (*comp) (pTHX_ SV * const pattern, U32 flags);
229     I32     (*exec) (pTHX_ REGEXP * const rx, char* stringarg, char* strend,
230                      char* strbeg, SSize_t minend, SV* sv,
231                      void* data, U32 flags);
232     char*   (*intuit) (pTHX_
233                         REGEXP * const rx,
234                         SV *sv,
235                         const char * const strbeg,
236                         char *strpos,
237                         char *strend,
238                         const U32 flags,
239                        re_scream_pos_data *data);
240     SV*     (*checkstr) (pTHX_ REGEXP * const rx);
241     void    (*rxfree) (pTHX_ REGEXP * const rx);
242     void    (*numbered_buff_FETCH) (pTHX_ REGEXP * const rx, const I32 paren,
243                                     SV * const sv);
244     void    (*numbered_buff_STORE) (pTHX_ REGEXP * const rx, const I32 paren,
245                                    SV const * const value);
246     I32     (*numbered_buff_LENGTH) (pTHX_ REGEXP * const rx, const SV * const sv,
247                                     const I32 paren);
248     SV*     (*named_buff) (pTHX_ REGEXP * const rx, SV * const key,
249                            SV * const value, const U32 flags);
250     SV*     (*named_buff_iter) (pTHX_ REGEXP * const rx, const SV * const lastkey,
251                                 const U32 flags);
252     SV*     (*qr_package)(pTHX_ REGEXP * const rx);
253 #  ifdef USE_ITHREADS
254     void*   (*dupe) (pTHX_ REGEXP * const rx, CLONE_PARAMS *param);
255 #  endif
256     REGEXP* (*op_comp) (pTHX_ SV ** const patternp, int pat_count,
257                     OP *expr, const struct regexp_engine* eng,
258                     REGEXP *old_re,
259                     bool *is_bare_re, U32 orig_rx_flags, U32 pm_flags);
260 } regexp_engine;
261 
262 /*
263   These are passed to the numbered capture variable callbacks as the
264   paren name. >= 1 is reserved for actual numbered captures, i.e. $1,
265   $2 etc.
266 */
267 #  define RX_BUFF_IDX_CARET_PREMATCH  -5 /* ${^PREMATCH}  */
268 #  define RX_BUFF_IDX_CARET_POSTMATCH -4 /* ${^POSTMATCH} */
269 #  define RX_BUFF_IDX_CARET_FULLMATCH -3 /* ${^MATCH}     */
270 #  define RX_BUFF_IDX_PREMATCH        -2 /* $` */
271 #  define RX_BUFF_IDX_POSTMATCH       -1 /* $' */
272 #  define RX_BUFF_IDX_FULLMATCH        0 /* $& */
273 
274 /*
275   Flags that are passed to the named_buff and named_buff_iter
276   callbacks above. Those routines are called from universal.c via the
277   Tie::Hash::NamedCapture interface for %+ and %- and the re::
278   functions in the same file.
279 */
280 
281 /* The Tie::Hash::NamedCapture operation this is part of, if any */
282 #  define RXapif_FETCH     0x0001
283 #  define RXapif_STORE     0x0002
284 #  define RXapif_DELETE    0x0004
285 #  define RXapif_CLEAR     0x0008
286 #  define RXapif_EXISTS    0x0010
287 #  define RXapif_SCALAR    0x0020
288 #  define RXapif_FIRSTKEY  0x0040
289 #  define RXapif_NEXTKEY   0x0080
290 
291 /* Whether %+ or %- is being operated on */
292 #  define RXapif_ONE       0x0100 /* %+ */
293 #  define RXapif_ALL       0x0200 /* %- */
294 
295 /* Whether this is being called from a re:: function */
296 #  define RXapif_REGNAME         0x0400
297 #  define RXapif_REGNAMES        0x0800
298 #  define RXapif_REGNAMES_COUNT  0x1000
299 
300 /*
301 =for apidoc Am|REGEXP *|SvRX|SV *sv
302 
303 Convenience macro to get the REGEXP from a SV.  This is approximately
304 equivalent to the following snippet:
305 
306     if (SvMAGICAL(sv))
307         mg_get(sv);
308     if (SvROK(sv))
309         sv = MUTABLE_SV(SvRV(sv));
310     if (SvTYPE(sv) == SVt_REGEXP)
311         return (REGEXP*) sv;
312 
313 C<NULL> will be returned if a REGEXP* is not found.
314 
315 =for apidoc Am|bool|SvRXOK|SV* sv
316 
317 Returns a boolean indicating whether the SV (or the one it references)
318 is a REGEXP.
319 
320 If you want to do something with the REGEXP* later use SvRX instead
321 and check for NULL.
322 
323 =cut
324 */
325 
326 #  define SvRX(sv)   (Perl_get_re_arg(aTHX_ sv))
327 #  define SvRXOK(sv) cBOOL(Perl_get_re_arg(aTHX_ sv))
328 
329 
330 /* Flags stored in regexp->extflags
331  * These are used by code external to the regexp engine
332  *
333  * Note that the flags whose names start with RXf_PMf_ are defined in
334  * op_reg_common.h, being copied from the parallel flags of op_pmflags
335  *
336  * NOTE: if you modify any RXf flags you should run regen.pl or
337  * regen/regcomp.pl so that regnodes.h is updated with the changes.
338  *
339  */
340 
341 #  include "op_reg_common.h"
342 
343 #  define RXf_PMf_STD_PMMOD	(RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE|RXf_PMf_NOCAPTURE)
344 
345 #  define CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, x_count)                       \
346     case IGNORE_PAT_MOD:    *(pmfl) |= RXf_PMf_FOLD;       break;           \
347     case MULTILINE_PAT_MOD: *(pmfl) |= RXf_PMf_MULTILINE;  break;           \
348     case SINGLE_PAT_MOD:    *(pmfl) |= RXf_PMf_SINGLELINE; break;           \
349     case XTENDED_PAT_MOD:   if (x_count == 0) {                             \
350                                 *(pmfl) |= RXf_PMf_EXTENDED;                \
351                                 *(pmfl) &= ~RXf_PMf_EXTENDED_MORE;          \
352                             }                                               \
353                             else {                                          \
354                                 *(pmfl) |= RXf_PMf_EXTENDED                 \
355                                           |RXf_PMf_EXTENDED_MORE;           \
356                             }                                               \
357                             (x_count)++; break;                             \
358     case NOCAPTURE_PAT_MOD: *(pmfl) |= RXf_PMf_NOCAPTURE; break;
359 
360 /* Note, includes charset ones, assumes 0 is the default for them */
361 #  define STD_PMMOD_FLAGS_CLEAR(pmfl)                        \
362     *(pmfl) &= ~(RXf_PMf_FOLD|RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE|RXf_PMf_CHARSET|RXf_PMf_NOCAPTURE)
363 
364 /* chars and strings used as regex pattern modifiers
365  * Singular is a 'c'har, plural is a "string"
366  *
367  * NOTE, KEEPCOPY was originally 'k', but was changed to 'p' for preserve
368  * for compatibility reasons with Regexp::Common which highjacked (?k:...)
369  * for its own uses. So 'k' is out as well.
370  */
371 #  define DEFAULT_PAT_MOD      '^'    /* Short for all the default modifiers */
372 #  define EXEC_PAT_MOD         'e'
373 #  define KEEPCOPY_PAT_MOD     'p'
374 #  define NOCAPTURE_PAT_MOD    'n'
375 #  define ONCE_PAT_MOD         'o'
376 #  define GLOBAL_PAT_MOD       'g'
377 #  define CONTINUE_PAT_MOD     'c'
378 #  define MULTILINE_PAT_MOD    'm'
379 #  define SINGLE_PAT_MOD       's'
380 #  define IGNORE_PAT_MOD       'i'
381 #  define XTENDED_PAT_MOD      'x'
382 #  define NONDESTRUCT_PAT_MOD  'r'
383 #  define LOCALE_PAT_MOD       'l'
384 #  define UNICODE_PAT_MOD      'u'
385 #  define DEPENDS_PAT_MOD      'd'
386 #  define ASCII_RESTRICT_PAT_MOD 'a'
387 
388 #  define ONCE_PAT_MODS        "o"
389 #  define KEEPCOPY_PAT_MODS    "p"
390 #  define NOCAPTURE_PAT_MODS   "n"
391 #  define EXEC_PAT_MODS        "e"
392 #  define LOOP_PAT_MODS        "gc"
393 #  define NONDESTRUCT_PAT_MODS "r"
394 #  define LOCALE_PAT_MODS      "l"
395 #  define UNICODE_PAT_MODS     "u"
396 #  define DEPENDS_PAT_MODS     "d"
397 #  define ASCII_RESTRICT_PAT_MODS "a"
398 #  define ASCII_MORE_RESTRICT_PAT_MODS "aa"
399 
400 /* This string is expected by regcomp.c to be ordered so that the first
401  * character is the flag in bit RXf_PMf_STD_PMMOD_SHIFT of extflags; the next
402  * character is bit +1, etc. */
403 #  define STD_PAT_MODS        "msixxn"
404 
405 #  define CHARSET_PAT_MODS    ASCII_RESTRICT_PAT_MODS DEPENDS_PAT_MODS LOCALE_PAT_MODS UNICODE_PAT_MODS
406 
407 /* This string is expected by XS_re_regexp_pattern() in universal.c to be ordered
408  * so that the first character is the flag in bit RXf_PMf_STD_PMMOD_SHIFT of
409  * extflags; the next character is in bit +1, etc. */
410 #  define INT_PAT_MODS    STD_PAT_MODS    KEEPCOPY_PAT_MODS
411 
412 #  define EXT_PAT_MODS    ONCE_PAT_MODS   KEEPCOPY_PAT_MODS  NOCAPTURE_PAT_MODS
413 #  define QR_PAT_MODS     STD_PAT_MODS    EXT_PAT_MODS	   CHARSET_PAT_MODS
414 #  define M_PAT_MODS      QR_PAT_MODS     LOOP_PAT_MODS
415 #  define S_PAT_MODS      M_PAT_MODS      EXEC_PAT_MODS      NONDESTRUCT_PAT_MODS
416 
417 /*
418  * NOTE: if you modify any RXf flags you should run regen.pl or
419  * regen/regcomp.pl so that regnodes.h is updated with the changes.
420  *
421  */
422 
423 /*
424   Set in Perl_pmruntime for a split. Will be used by regex engines to
425   check whether they should set RXf_SKIPWHITE
426 */
427 #  define RXf_SPLIT   RXf_PMf_SPLIT
428 
429 /* Currently the regex flags occupy a single 32-bit word.  Not all bits are
430  * currently used.  The lower bits are shared with their corresponding PMf flag
431  * bits, up to but not including _RXf_PMf_SHIFT_NEXT.  The unused bits
432  * immediately follow; finally the used RXf-only (unshared) bits, so that the
433  * highest bit in the word is used.  This gathers all the unused bits as a pool
434  * in the middle, like so: 11111111111111110000001111111111
435  * where the '1's represent used bits, and the '0's unused.  This design allows
436  * us to allocate off one end of the pool if we need to add a shared bit, and
437  * off the other end if we need a non-shared bit, without disturbing the other
438  * bits.  This maximizes the likelihood of being able to change things without
439  * breaking binary compatibility.
440  *
441  * To add shared bits, do so in op_reg_common.h.  This should change
442  * _RXf_PMf_SHIFT_NEXT so that things won't compile.  Then come to regexp.h and
443  * op.h and adjust the constant adders in the definitions of RXf_BASE_SHIFT and
444  * Pmf_BASE_SHIFT down by the number of shared bits you added.  That's it.
445  * Things should be binary compatible.  But if either of these gets to having
446  * to subtract rather than add, leave at 0 and instead adjust all the entries
447  * that are in terms of it.  But if the first one of those is already
448  * RXf_BASE_SHIFT+0, there are no bits left, and a redesign is in order.
449  *
450  * To remove unshared bits, just delete its entry.  If you're where breaking
451  * binary compatibility is ok to do, you might want to adjust things to move
452  * the newly opened space so that it gets absorbed into the common pool.
453  *
454  * To add unshared bits, first use up any gaps in the middle.  Otherwise,
455  * allocate off the low end until you get to RXf_BASE_SHIFT+0.  If that isn't
456  * enough, move RXf_BASE_SHIFT down (if possible) and add the new bit at the
457  * other end instead; this preserves binary compatibility.
458  *
459  * For the regexp bits, PL_reg_extflags_name[] in regnodes.h has a comment
460  * giving which bits are used/unused */
461 
462 #  define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT + 2)
463 
464 /* What we have seen */
465 #  define RXf_NO_INPLACE_SUBST  (1U<<(RXf_BASE_SHIFT+2))
466 #  define RXf_EVAL_SEEN   	(1U<<(RXf_BASE_SHIFT+3))
467 
468 /* Special */
469 #  define RXf_UNBOUNDED_QUANTIFIER_SEEN   (1U<<(RXf_BASE_SHIFT+4))
470 #  define RXf_CHECK_ALL   	(1U<<(RXf_BASE_SHIFT+5))
471 
472 /* UTF8 related */
473 #  define RXf_MATCH_UTF8  	(1U<<(RXf_BASE_SHIFT+6)) /* $1 etc are utf8 */
474 
475 /* Intuit related */
476 #  define RXf_USE_INTUIT_NOML	(1U<<(RXf_BASE_SHIFT+7))
477 #  define RXf_USE_INTUIT_ML	(1U<<(RXf_BASE_SHIFT+8))
478 #  define RXf_INTUIT_TAIL 	(1U<<(RXf_BASE_SHIFT+9))
479 #  define RXf_USE_INTUIT        (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML)
480 
481 /* Do we have some sort of anchor? */
482 #  define RXf_IS_ANCHORED       (1U<<(RXf_BASE_SHIFT+10))
483 
484 /* Copy and tainted info */
485 #  define RXf_COPY_DONE   	(1U<<(RXf_BASE_SHIFT+11))
486 
487 /* post-execution: $1 et al are tainted */
488 #  define RXf_TAINTED_SEEN	(1U<<(RXf_BASE_SHIFT+12))
489 /* this pattern was tainted during compilation */
490 #  define RXf_TAINTED		(1U<<(RXf_BASE_SHIFT+13))
491 
492 /* Flags indicating special patterns */
493 #  define RXf_START_ONLY        (1U<<(RXf_BASE_SHIFT+14)) /* Pattern is /^/ */
494 #  define RXf_SKIPWHITE         (1U<<(RXf_BASE_SHIFT+15)) /* Pattern is for a */
495                                                           /* split " " */
496 #  define RXf_WHITE		(1U<<(RXf_BASE_SHIFT+16)) /* Pattern is /\s+/ */
497 #  define RXf_NULL		(1U<<(RXf_BASE_SHIFT+17)) /* Pattern is // */
498 
499 /* See comments at the beginning of these defines about adding bits.  The
500  * highest bit position should be used, so that if RXf_BASE_SHIFT gets
501  * increased, the #error below will be triggered so that you will be reminded
502  * to adjust things at the other end to keep the bit positions unchanged */
503 #  if RXf_BASE_SHIFT+17 > 31
504 #     error Too many RXf_PMf bits used.  See comments at beginning of these for what to do
505 #  endif
506 
507 /*
508  * NOTE: if you modify any RXf flags you should run regen.pl or
509  * regen/regcomp.pl so that regnodes.h is updated with the changes.
510  *
511  */
512 
513 #  ifdef NO_TAINT_SUPPORT
514 #    define RX_ISTAINTED(rx_sv)           0
515 #    define RXp_ISTAINTED(prog)           0
516 #    define RX_TAINT_on(rx_sv)            NOOP
517 #    define RXp_MATCH_TAINTED(prog)       0
518 #    define RX_MATCH_TAINTED(rx_sv)       0
519 #    define RXp_MATCH_TAINTED_on(prog)    NOOP
520 #    define RX_MATCH_TAINTED_on(rx_sv)    NOOP
521 #    define RXp_MATCH_TAINTED_off(prog)   NOOP
522 #    define RX_MATCH_TAINTED_off(rx_sv)   NOOP
523 #  else
524 #    define RX_ISTAINTED(rx_sv)           (RX_EXTFLAGS(rx_sv) & RXf_TAINTED)
525 #    define RXp_ISTAINTED(prog)           (RXp_EXTFLAGS(prog) & RXf_TAINTED)
526 #    define RX_TAINT_on(rx_sv)            (RX_EXTFLAGS(rx_sv) |= RXf_TAINTED)
527 #    define RXp_MATCH_TAINTED(prog)       (RXp_EXTFLAGS(prog) & RXf_TAINTED_SEEN)
528 #    define RX_MATCH_TAINTED(rx_sv)       (RX_EXTFLAGS(rx_sv) & RXf_TAINTED_SEEN)
529 #    define RXp_MATCH_TAINTED_on(prog)    (RXp_EXTFLAGS(prog) |= RXf_TAINTED_SEEN)
530 #    define RX_MATCH_TAINTED_on(rx_sv)    (RX_EXTFLAGS(rx_sv) |= RXf_TAINTED_SEEN)
531 #    define RXp_MATCH_TAINTED_off(prog)   (RXp_EXTFLAGS(prog) &= ~RXf_TAINTED_SEEN)
532 #    define RX_MATCH_TAINTED_off(rx_sv)   (RX_EXTFLAGS(rx_sv) &= ~RXf_TAINTED_SEEN)
533 #  endif
534 
535 #  define RXp_HAS_CUTGROUP(prog)          ((prog)->intflags & PREGf_CUTGROUP_SEEN)
536 
537 #  define RX_MATCH_TAINTED_set(rx_sv, t)  ((t) \
538                                         ? RX_MATCH_TAINTED_on(rx_sv) \
539                                         : RX_MATCH_TAINTED_off(rx_sv))
540 
541 #  define RXp_MATCH_COPIED(prog)          (RXp_EXTFLAGS(prog) & RXf_COPY_DONE)
542 #  define RX_MATCH_COPIED(rx_sv)          (RX_EXTFLAGS(rx_sv) & RXf_COPY_DONE)
543 #  define RXp_MATCH_COPIED_on(prog)       (RXp_EXTFLAGS(prog) |= RXf_COPY_DONE)
544 #  define RX_MATCH_COPIED_on(rx_sv)       (RX_EXTFLAGS(rx_sv) |= RXf_COPY_DONE)
545 #  define RXp_MATCH_COPIED_off(prog)      (RXp_EXTFLAGS(prog) &= ~RXf_COPY_DONE)
546 #  define RX_MATCH_COPIED_off(rx_sv)      (RX_EXTFLAGS(rx_sv) &= ~RXf_COPY_DONE)
547 #  define RX_MATCH_COPIED_set(rx_sv,t)    ((t) \
548                                          ? RX_MATCH_COPIED_on(rx_sv) \
549                                          : RX_MATCH_COPIED_off(rx_sv))
550 
551 #  define RXp_EXTFLAGS(rx)                ((rx)->extflags)
552 #  define RXp_COMPFLAGS(rx)               ((rx)->compflags)
553 
554 /* For source compatibility. We used to store these explicitly.  */
555 #  define RX_PRECOMP(rx_sv)              (RX_WRAPPED(rx_sv) \
556                                             + ReANY(rx_sv)->pre_prefix)
557 #  define RX_PRECOMP_const(rx_sv)        (RX_WRAPPED_const(rx_sv) \
558                                             + ReANY(rx_sv)->pre_prefix)
559 /* FIXME? Are we hardcoding too much here and constraining plugin extension
560    writers? Specifically, the value 1 assumes that the wrapped version always
561    has exactly one character at the end, a ')'. Will that always be true?  */
562 #  define RX_PRELEN(rx_sv)                (RX_WRAPLEN(rx_sv) \
563                                             - ReANY(rx_sv)->pre_prefix - 1)
564 
565 #  define RX_WRAPPED(rx_sv)               SvPVX(rx_sv)
566 #  define RX_WRAPPED_const(rx_sv)         SvPVX_const(rx_sv)
567 #  define RX_WRAPLEN(rx_sv)               SvCUR(rx_sv)
568 #  define RX_CHECK_SUBSTR(rx_sv)          (ReANY(rx_sv)->check_substr)
569 #  define RX_REFCNT(rx_sv)                SvREFCNT(rx_sv)
570 #  define RX_EXTFLAGS(rx_sv)              RXp_EXTFLAGS(ReANY(rx_sv))
571 #  define RX_COMPFLAGS(rx_sv)             RXp_COMPFLAGS(ReANY(rx_sv))
572 #  define RXp_ENGINE(prog)                ((prog)->engine)
573 #  define RX_ENGINE(rx_sv)                (RXp_ENGINE(ReANY(rx_sv)))
574 #  define RXp_SUBBEG(prog)                ((prog)->subbeg)
575 #  define RX_SUBBEG(rx_sv)                (RXp_SUBBEG(ReANY(rx_sv)))
576 #  define RXp_SUBOFFSET(prog)             ((prog)->suboffset)
577 #  define RX_SUBOFFSET(rx_sv)             (RXp_SUBOFFSET(ReANY(rx_sv)))
578 #  define RXp_SUBCOFFSET(prog)            ((prog)->subcoffset)
579 #  define RX_SUBCOFFSET(rx_sv)            (RXp_SUBCOFFSET(ReANY(rx_sv)))
580 #  define RXp_OFFSp(prog)                 ((prog)->offs)
581 #  define RX_OFFSp(rx_sv)                 (RXp_OFFSp(ReANY(rx_sv)))
582 #  define RXp_LOGICAL_NPARENS(prog)       ((prog)->logical_nparens)
583 #  define RX_LOGICAL_NPARENS(rx_sv)       (RXp_LOGICAL_NPARENS(ReANY(rx_sv)))
584 #  define RXp_LOGICAL_TO_PARNO(prog)      ((prog)->logical_to_parno)
585 #  define RX_LOGICAL_TO_PARNO(rx_sv)      (RXp_LOGICAL_TO_PARNO(ReANY(rx_sv)))
586 #  define RXp_PARNO_TO_LOGICAL(prog)      ((prog)->parno_to_logical)
587 #  define RX_PARNO_TO_LOGICAL(rx_sv)      (RXp_PARNO_TO_LOGICAL(ReANY(rx_sv)))
588 #  define RXp_PARNO_TO_LOGICAL_NEXT(prog) ((prog)->parno_to_logical_next)
589 #  define RX_PARNO_TO_LOGICAL_NEXT(rx_sv) (RXp_PARNO_TO_LOGICAL_NEXT(ReANY(rx_sv)))
590 #  define RXp_NPARENS(prog)               ((prog)->nparens)
591 #  define RX_NPARENS(rx_sv)               (RXp_NPARENS(ReANY(rx_sv)))
592 #  define RXp_SUBLEN(prog)                ((prog)->sublen)
593 #  define RX_SUBLEN(rx_sv)                (RXp_SUBLEN(ReANY(rx_sv)))
594 #  define RXp_MINLEN(prog)                ((prog)->minlen)
595 #  define RX_MINLEN(rx_sv)                (RXp_MINLEN(ReANY(rx_sv)))
596 #  define RXp_MINLENRET(prog)             ((prog)->minlenret)
597 #  define RX_MINLENRET(rx_sv)             (RXp_MINLENRET(ReANY(rx_sv)))
598 #  define RXp_GOFS(prog)                  ((prog)->gofs)
599 #  define RX_GOFS(rx_sv)                  (RXp_GOFS(ReANY(rx_sv)))
600 #  define RXp_LASTPAREN(prog)             ((prog)->lastparen)
601 #  define RX_LASTPAREN(rx_sv)             (RXp_LASTPAREN(ReANY(rx_sv)))
602 #  define RXp_LASTCLOSEPAREN(prog)        ((prog)->lastcloseparen)
603 #  define RX_LASTCLOSEPAREN(rx_sv)        (RXp_LASTCLOSEPAREN(ReANY(rx_sv)))
604 #  define RXp_SAVED_COPY(prog)            ((prog)->saved_copy)
605 #  define RX_SAVED_COPY(rx_sv)            (RXp_SAVED_COPY(ReANY(rx_sv)))
606 #  define RXp_SUBSTRS(prog)               ((prog)->substrs)
607 #  define RX_SUBSTRS(rx_sv)               (RXp_SUBSTRS(ReANY(rx_sv)))
608 #  define RXp_PPRIVATE(prog)              ((prog)->pprivate)
609 #  define RX_PPRIVATE(rx_sv)              (RXp_PPRIVATE(ReANY(rx_sv)))
610 #  define RXp_QR_ANONCV(prog)             ((prog)->qr_anoncv)
611 #  define RX_QR_ANONCV(rx_sv)             (RXp_QR_ANONCV(ReANY(rx_sv)))
612 #  define RXp_MOTHER_RE(prog)             ((prog)->mother_re)
613 #  define RX_MOTHER_RE(rx_sv)             (RXp_MOTHER_RE(ReANY(rx_sv)))
614 #  define RXp_PRE_PREFIX(prog)            ((prog)->pre_prefix)
615 #  define RX_PRE_PREFIX(rx_sv)            (RXp_PRE_PREFIX(ReANY(rx_sv)))
616 
617 /* last match was zero-length */
618 #  define RXp_ZERO_LEN(prog) \
619         (RXp_OFFS_START(prog,0) + (SSize_t)RXp_GOFS(prog) \
620           == RXp_OFFS_END(prog,0))
621 #  define RX_ZERO_LEN(rx_sv)              (RXp_ZERO_LEN(ReANY(rx_sv)))
622 
623 #endif /* PLUGGABLE_RE_EXTENSION */
624 
625 /* Stuff that needs to be included in the pluggable extension goes below here */
626 
627 #ifdef PERL_ANY_COW
628 #  define RXp_MATCH_COPY_FREE(prog)                                 \
629     STMT_START {                                                    \
630         if (RXp_SAVED_COPY(prog)) {                                 \
631             SV_CHECK_THINKFIRST_COW_DROP(RXp_SAVED_COPY(prog));     \
632         }                                                           \
633         if (RXp_MATCH_COPIED(prog)) {                               \
634             Safefree(RXp_SUBBEG(prog));                             \
635             RXp_MATCH_COPIED_off(prog);                             \
636         }                                                           \
637     } STMT_END
638 #else
639 #  define RXp_MATCH_COPY_FREE(prog)                     \
640     STMT_START {                                        \
641         if (RXp_MATCH_COPIED(prog)) {                   \
642             Safefree(RXp_SUBBEG(prog));                 \
643             RXp_MATCH_COPIED_off(prog);                 \
644         }                                               \
645     } STMT_END
646 #endif
647 #define RX_MATCH_COPY_FREE(rx_sv)       RXp_MATCH_COPY_FREE(ReANY(rx_sv))
648 
649 #define RXp_MATCH_UTF8(prog)            (RXp_EXTFLAGS(prog) & RXf_MATCH_UTF8)
650 #define RX_MATCH_UTF8(rx_sv)            (RX_EXTFLAGS(rx_sv) & RXf_MATCH_UTF8)
651 #define RXp_MATCH_UTF8_on(prog)         (RXp_EXTFLAGS(prog) |= RXf_MATCH_UTF8)
652 #define RX_MATCH_UTF8_on(rx_sv)         (RXp_MATCH_UTF8_on(ReANY(rx_sv)))
653 #define RXp_MATCH_UTF8_off(prog)        (RXp_EXTFLAGS(prog) &= ~RXf_MATCH_UTF8)
654 #define RX_MATCH_UTF8_off(rx_sv)        (RXp_MATCH_UTF8_off(ReANY(rx_sv))
655 #define RXp_MATCH_UTF8_set(prog, t)     ((t) \
656                                         ? RXp_MATCH_UTF8_on(prog) \
657                                         : RXp_MATCH_UTF8_off(prog))
658 #define RX_MATCH_UTF8_set(rx_sv, t)     (RXp_MATCH_UTF8_set(ReANY(rx_sv), t))
659 
660 /* Whether the pattern stored at RX_WRAPPED is in UTF-8  */
661 #define RX_UTF8(rx_sv)                  SvUTF8(rx_sv)
662 
663 
664 /* bits in flags arg of Perl_regexec_flags() */
665 
666 #define REXEC_COPY_STR  0x01    /* Need to copy the string for captures. */
667 #define REXEC_CHECKED   0x02    /* re_intuit_start() already called. */
668 #define REXEC_SCREAM    0x04    /* currently unused. */
669 #define REXEC_IGNOREPOS 0x08    /* use stringarg, not pos(), for \G match */
670 #define REXEC_NOT_FIRST 0x10    /* This is another iteration of //g:
671                                    no need to copy string again */
672 
673                                      /* under REXEC_COPY_STR, it's ok for the
674                                         engine (modulo PL_sawamperand etc)
675                                         to skip copying: ... */
676 #define REXEC_COPY_SKIP_PRE  0x20    /* ...the $` part of the string, or */
677 #define REXEC_COPY_SKIP_POST 0x40    /* ...the $' part of the string */
678 #define REXEC_FAIL_ON_UNDERFLOW 0x80 /* fail the match if $& would start before
679                                         the start pos (so s/.\G// would fail
680                                         on second iteration */
681 
682 #if defined(PERL_USE_GCC_BRACE_GROUPS)
683 #  define ReREFCNT_inc(re)						\
684     ({									\
685         /* This is here to generate a casting warning if incorrect.  */	\
686         REGEXP *const _rerefcnt_inc = (re);				\
687         assert(SvTYPE(_rerefcnt_inc) == SVt_REGEXP);			\
688         SvREFCNT_inc(_rerefcnt_inc);					\
689         _rerefcnt_inc;							\
690     })
691 #  define ReREFCNT_dec(re)						\
692     ({									\
693         /* This is here to generate a casting warning if incorrect.  */	\
694         REGEXP *const _rerefcnt_dec = (re);				\
695         SvREFCNT_dec(_rerefcnt_dec);					\
696     })
697 #else
698 #  define ReREFCNT_dec(re)	SvREFCNT_dec(re)
699 #  define ReREFCNT_inc(re)	((REGEXP *) SvREFCNT_inc(re))
700 #endif
701 #define ReANY(re)		Perl_ReANY((const REGEXP *)(re))
702 
703 /* FIXME for plugins. */
704 
705 #define FBMcf_TAIL_DOLLAR	1
706 #define FBMcf_TAIL_DOLLARM	2
707 #define FBMcf_TAIL_Z		4
708 #define FBMcf_TAIL_z		8
709 #define FBMcf_TAIL		(FBMcf_TAIL_DOLLAR|FBMcf_TAIL_DOLLARM|FBMcf_TAIL_Z|FBMcf_TAIL_z)
710 
711 #define FBMrf_MULTILINE	1
712 
713 struct regmatch_state;
714 struct regmatch_slab;
715 
716 /* like regmatch_info_aux, but contains extra fields only needed if the
717  * pattern contains (?{}). If used, is snuck into the second slot in the
718  * regmatch_state stack at the start of execution */
719 
720 typedef struct {
721     regexp *rex;
722     PMOP    *curpm;     /* saved PL_curpm */
723 #ifdef PERL_ANY_COW
724     SV      *saved_copy; /* saved saved_copy field from rex */
725 #endif
726     char    *subbeg;    /* saved subbeg     field from rex */
727     STRLEN  sublen;     /* saved sublen     field from rex */
728     STRLEN  suboffset;  /* saved suboffset  field from rex */
729     STRLEN  subcoffset; /* saved subcoffset field from rex */
730     SV      *sv;        /* $_  during (?{}) */
731     MAGIC   *pos_magic; /* pos() magic attached to $_ */
732     SSize_t pos;        /* the original value of pos() in pos_magic */
733     U8      pos_flags;  /* flags to be restored; currently only MGf_BYTES*/
734 } regmatch_info_aux_eval;
735 
736 
737 /* fields that logically  live in regmatch_info, but which need cleaning
738  * up on croak(), and so are instead are snuck into the first slot in
739  * the regmatch_state stack at the start of execution */
740 
741 typedef struct {
742     regmatch_info_aux_eval *info_aux_eval;
743     struct regmatch_state *old_regmatch_state; /* saved PL_regmatch_state */
744     struct regmatch_slab  *old_regmatch_slab;  /* saved PL_regmatch_slab */
745     char *poscache;	/* S-L cache of fail positions of WHILEMs */
746 } regmatch_info_aux;
747 
748 
749 /*
750 =for apidoc Ay||regmatch_info
751 Some basic information about the current match that is created by
752 Perl_regexec_flags and then passed to regtry(), regmatch() etc.
753 It is allocated as a local var on the stack, so nothing should be
754 stored in it that needs preserving or clearing up on croak().
755 For that, see the aux_info and aux_info_eval members of the
756 regmatch_state union.
757 
758 =cut
759 */
760 
761 typedef struct {
762     REGEXP *prog;        /* the regex being executed */
763     const char * strbeg; /* real start of string */
764     char *strend;        /* one byte beyond last char of match string */
765     char *till;          /* matches shorter than this fail (see minlen arg) */
766     SV *sv;              /* the SV string currently being matched */
767     char *ganch;         /* position of \G anchor */
768     char *cutpoint;      /* (*COMMIT) position (if any) */
769     regmatch_info_aux      *info_aux; /* extra fields that need cleanup */
770     regmatch_info_aux_eval *info_aux_eval; /* extra saved state for (?{}) */
771     I32  poscache_maxiter; /* how many whilems todo before S-L cache kicks in */
772     I32  poscache_iter;    /* current countdown from _maxiter to zero */
773     STRLEN poscache_size;  /* size of regmatch_info_aux.poscache */
774     bool intuit;    /* re_intuit_start() is the top-level caller */
775     bool is_utf8_pat;    /* regex is utf8 */
776     bool is_utf8_target; /* string being matched is utf8 */
777     bool warned; /* we have issued a recursion warning; no need for more */
778 } regmatch_info;
779 
780 
781 /* structures for holding and saving the state maintained by regmatch() */
782 
783 #ifndef MAX_RECURSE_EVAL_NOCHANGE_DEPTH
784 #  define MAX_RECURSE_EVAL_NOCHANGE_DEPTH 10
785 #endif
786 
787 /* The +1 is because everything matches itself, which isn't included in
788  * MAX_FOLD_FROMS; the +2 is based on the current Unicode standards needs, and
789  * is unlikely to change.  An assertion should fail in regexec.c if it is too
790  * low.  It is needed for certain edge cases involving multi-character folds
791  * when the first component also participates in a fold individually. */
792 #define MAX_MATCHES (MAX_FOLD_FROMS + 1 + 2)
793 
794 struct next_matchable_info {
795     U8     first_byte_mask;
796     U8     first_byte_anded;
797     U32    mask32;
798     U32    anded32;
799     PERL_INT_FAST8_T count; /* Negative means not initialized */
800     PERL_UINT_FAST8_T min_length;
801     PERL_UINT_FAST8_T max_length;
802     PERL_UINT_FAST8_T initial_definitive;
803     PERL_UINT_FAST8_T initial_exact;
804     PERL_UINT_FAST8_T lengths[MAX_MATCHES];
805 
806     /* The size is from trial and error, and could change with new Unicode
807      * standards, in which case there is an assertion that should start
808      * failing.  This size could be calculated in one of the regen scripts
809      * dealing with Unicode, but khw thinks the likelihood of it changing is
810      * low enough that it isn't worth the effort. */
811     U8 matches[18];
812 };
813 
814 typedef I32 CHECKPOINT;
815 
816 typedef struct regmatch_state {
817     int resume_state;		/* where to jump to on return */
818     char *locinput;		/* where to backtrack in string on failure */
819     char *loceol;
820     U8 *sr0;                    /* position of start of script run, or NULL */
821 
822     union {
823 
824         /* the 'info_aux' and 'info_aux_eval' union members are cuckoos in
825          * the nest. They aren't saved backtrack state; rather they
826          * represent one or two extra chunks of data that need allocating
827          * at the start of a match. These fields would logically live in
828          * the regmatch_info struct, except that is allocated on the
829          * C stack, and these fields are all things that require cleanup
830          * after a croak(), when the stack is lost.
831          * As a convenience, we just use the first 1 or 2 regmatch_state
832          * slots to store this info, as we will be allocating a slab of
833          * these anyway. Otherwise we'd have to malloc and then free them,
834          * or allocate them on the save stack (where they will get
835          * realloced if the save stack grows).
836          * info_aux contains the extra fields that are always needed;
837          * info_aux_eval contains extra fields that only needed if
838          * the pattern contains code blocks
839          * We split them into two separate structs to avoid increasing
840          * the size of the union.
841          */
842 
843         regmatch_info_aux info_aux;
844 
845         regmatch_info_aux_eval info_aux_eval;
846 
847         /* this is a fake union member that matches the first element
848          * of each member that needs to store positive backtrack
849          * information */
850         struct {
851             struct regmatch_state *prev_yes_state;
852         } yes;
853 
854 
855         /* NOTE: Regarding 'cp' and 'lastcp' in the following structs...
856          *
857          * In the majority of cases we use 'cp' for the "normal"
858          * checkpoint for paren saves, and 'lastcp' for the addtional
859          * paren saves that are done only under RE_PESSIMISTIC_PARENS.
860          *
861          * There may be a few cases where both are used always.
862          * Regardless they tend be used something like this:
863          *
864          *   ST.cp = regcppush(rex, 0, maxopenparen);
865          *   REGCP_SET(ST.lastcp);
866          *
867          * thus ST.cp holds the checkpoint from before we push parens,
868          * and ST.lastcp holds the checkpoint from afterwards.
869          */
870 
871         /* branchlike members */
872         /* this is a fake union member that matches the first elements
873          * of each member that needs to behave like a branch */
874         struct {
875             /* this first element must match u.yes */
876             struct regmatch_state *prev_yes_state;
877             U32         lastparen;
878             U32         lastcloseparen;
879             CHECKPOINT  cp;         /* see note above "struct branchlike" */
880             CHECKPOINT  lastcp;     /* see note above "struct branchlike" */
881             U16         before_paren;
882             U16         after_paren;
883 
884         } branchlike;
885 
886         struct {
887             /* the first elements must match u.branchlike */
888             struct regmatch_state *prev_yes_state;
889             U32         lastparen;
890             U32         lastcloseparen;
891             CHECKPOINT  cp;         /* see note above "struct branchlike" */
892             CHECKPOINT  lastcp;     /* see note above "struct branchlike" */
893             U16         before_paren;
894             U16         after_paren;
895 
896             regnode *next_branch;   /* next branch node */
897         } branch;
898 
899         struct {
900             /* the first elements must match u.branchlike */
901             struct regmatch_state *prev_yes_state;
902             U32         lastparen;
903             U32         lastcloseparen;
904             CHECKPOINT  cp;         /* see note above "struct branchlike" */
905             CHECKPOINT  lastcp;     /* see note above "struct branchlike" */
906             U16         before_paren;
907             U16         after_paren;
908 
909             U32         accepted;   /* how many accepting states left */
910             bool        longfold;   /* saw a fold with a 1->n char mapping */
911             U16         *jump;      /* positive offsets from me */
912             U16         *j_before_paren;
913             U16         *j_after_paren;
914             regnode     *me;        /* Which node am I - needed for jump tries*/
915             U8          *firstpos;  /* pos in string of first trie match */
916             U32         firstchars; /* len in chars of firstpos from start */
917             U16         nextword;   /* next word to try */
918             U16         topword;    /* longest accepted word */
919         } trie;
920 
921         /* special types - these members are used to store state for special
922            regops like eval, if/then, lookaround and the markpoint state */
923         struct {
924             /* this first element must match u.yes */
925             struct regmatch_state *prev_yes_state;
926             struct regmatch_state *prev_curlyx;
927             struct regmatch_state *prev_eval;
928             REGEXP	*prev_rex;
929             CHECKPOINT  cp;             /* see note above "struct branchlike" */
930             CHECKPOINT  lastcp;         /* see note above "struct branchlike" */
931             U32         close_paren;    /* which close bracket is our end (+1) */
932             regnode     *B;             /* the node following us  */
933             char        *prev_recurse_locinput;
934         } eval;
935 
936         struct {
937             /* this first element must match u.yes */
938             struct regmatch_state *prev_yes_state;
939             I32     wanted;
940             I32     logical;    /* saved copy of 'logical' var */
941             U8      count;      /* number of beginning positions */
942             char    *start;
943             char    *end;
944             regnode *me;        /* the IFMATCH/SUSPEND/UNLESSM node  */
945             char    *prev_match_end;
946         } ifmatch;              /* and SUSPEND/UNLESSM */
947 
948         struct {
949             /* this first element must match u.yes */
950             struct regmatch_state *prev_yes_state;
951             struct regmatch_state *prev_mark;
952             SV      *mark_name;
953             char    *mark_loc;
954         } mark;
955 
956         struct {
957             int val;
958         } keeper;
959 
960         /* quantifiers - these members are used for storing state for
961            the regops used to implement quantifiers */
962         struct {
963             /* this first element must match u.yes */
964             struct regmatch_state *prev_yes_state;
965             struct regmatch_state *prev_curlyx; /* previous cur_curlyx */
966             regnode     *me;        /* the CURLYX node  */
967             regnode     *B;         /* the B node in /A*B/  */
968             CHECKPOINT  cp;         /* see note above "struct branchlike" */
969             CHECKPOINT  lastcp;     /* see note above "struct branchlike" */
970             bool	minmod;
971             int         parenfloor; /* how far back to strip paren data */
972 
973             /* these two are modified by WHILEM */
974             int         count;      /* how many instances of A we've matched */
975             char        *lastloc;   /* where previous A matched (0-len detect) */
976         } curlyx;
977 
978         struct {
979             /* this first element must match u.yes */
980             struct regmatch_state *prev_yes_state;
981             struct regmatch_state *save_curlyx;
982             CHECKPOINT  cp;             /* see note above "struct branchlike" */
983             CHECKPOINT  lastcp;         /* see note above "struct branchlike" */
984             char        *save_lastloc;  /* previous curlyx.lastloc */
985             I32		cache_offset;
986             I32		cache_mask;
987         } whilem;
988 
989         struct {
990             /* this first element must match u.yes */
991             struct regmatch_state *prev_yes_state;
992             U32         lastparen;
993             U32         lastcloseparen;
994             CHECKPOINT  cp;         /* see note above "struct branchlike" */
995             CHECKPOINT  lastcp;     /* see note above "struct branchlike" */
996             I32         alen;       /* length of first-matched A string */
997             I32         count;
998             bool        minmod;
999             regnode     *A, *B;     /* the nodes corresponding to /A*B/  */
1000             regnode     *me;        /* the curlym node */
1001             struct next_matchable_info Binfo;
1002         } curlym;
1003 
1004         struct {
1005             U32         paren;
1006             U32         lastparen;
1007             U32         lastcloseparen;
1008             CHECKPOINT  cp;         /* see note above "struct branchlike" */
1009             CHECKPOINT  lastcp;     /* see note above "struct branchlike" */
1010             char        *maxpos;    /* highest possible point in string to match */
1011             char        *oldloc;    /* the previous locinput */
1012             int         count;
1013             int         min, max;   /* {m,n} */
1014             regnode     *A, *B;     /* the nodes corresponding to /A*B/  */
1015             struct next_matchable_info Binfo;
1016         } curly; /* and CURLYN/PLUS/STAR */
1017 
1018         struct {
1019             CHECKPOINT  cp;
1020             CHECKPOINT  lastcp;
1021         } backref; /* REF and friends */
1022     } u;
1023 } regmatch_state;
1024 
1025 
1026 
1027 /* how many regmatch_state structs to allocate as a single slab.
1028  * We do it in 4K blocks for efficiency. The "3" is 2 for the next/prev
1029  * pointers, plus 1 for any mythical malloc overhead. */
1030 
1031 #define PERL_REGMATCH_SLAB_SLOTS \
1032     ((4096 - 3 * sizeof (void*)) / sizeof(regmatch_state))
1033 
1034 typedef struct regmatch_slab {
1035     regmatch_state states[PERL_REGMATCH_SLAB_SLOTS];
1036     struct regmatch_slab *prev, *next;
1037 } regmatch_slab;
1038 
1039 
1040 #define REG_FETCH_ABSOLUTE 1
1041 
1042 /*
1043  * ex: set ts=8 sts=4 sw=4 et:
1044  */
1045