1 /* regexec.c 2 */ 3 4 /* 5 * "One Ring to rule them all, One Ring to find them..." 6 */ 7 8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not 9 * confused with the original package (see point 3 below). Thanks, Henry! 10 */ 11 12 /* Additional note: this code is very heavily munged from Henry's version 13 * in places. In some spots I've traded clarity for efficiency, so don't 14 * blame Henry for some of the lack of readability. 15 */ 16 17 /* The names of the functions have been changed from regcomp and 18 * regexec to pregcomp and pregexec in order to avoid conflicts 19 * with the POSIX routines of the same names. 20 */ 21 22 #ifdef PERL_EXT_RE_BUILD 23 /* need to replace pregcomp et al, so enable that */ 24 # ifndef PERL_IN_XSUB_RE 25 # define PERL_IN_XSUB_RE 26 # endif 27 /* need access to debugger hooks */ 28 # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING) 29 # define DEBUGGING 30 # endif 31 #endif 32 33 #ifdef PERL_IN_XSUB_RE 34 /* We *really* need to overwrite these symbols: */ 35 # define Perl_regexec_flags my_regexec 36 # define Perl_regdump my_regdump 37 # define Perl_regprop my_regprop 38 # define Perl_re_intuit_start my_re_intuit_start 39 /* *These* symbols are masked to allow static link. */ 40 # define Perl_pregexec my_pregexec 41 # define Perl_reginitcolors my_reginitcolors 42 43 # define PERL_NO_GET_CONTEXT 44 #endif 45 46 /*SUPPRESS 112*/ 47 /* 48 * pregcomp and pregexec -- regsub and regerror are not used in perl 49 * 50 * Copyright (c) 1986 by University of Toronto. 51 * Written by Henry Spencer. Not derived from licensed software. 52 * 53 * Permission is granted to anyone to use this software for any 54 * purpose on any computer system, and to redistribute it freely, 55 * subject to the following restrictions: 56 * 57 * 1. The author is not responsible for the consequences of use of 58 * this software, no matter how awful, even if they arise 59 * from defects in it. 60 * 61 * 2. The origin of this software must not be misrepresented, either 62 * by explicit claim or by omission. 63 * 64 * 3. Altered versions must be plainly marked as such, and must not 65 * be misrepresented as being the original software. 66 * 67 **** Alterations to Henry's code are... 68 **** 69 **** Copyright (c) 1991-2001, Larry Wall 70 **** 71 **** You may distribute under the terms of either the GNU General Public 72 **** License or the Artistic License, as specified in the README file. 73 * 74 * Beware that some of this code is subtly aware of the way operator 75 * precedence is structured in regular expressions. Serious changes in 76 * regular-expression syntax might require a total rethink. 77 */ 78 #include "EXTERN.h" 79 #define PERL_IN_REGEXEC_C 80 #include "perl.h" 81 82 #ifdef PERL_IN_XSUB_RE 83 # if defined(PERL_CAPI) || defined(PERL_OBJECT) 84 # include "XSUB.h" 85 # endif 86 #endif 87 88 #include "regcomp.h" 89 90 #define RF_tainted 1 /* tainted information used? */ 91 #define RF_warned 2 /* warned about big count? */ 92 #define RF_evaled 4 /* Did an EVAL with setting? */ 93 #define RF_utf8 8 /* String contains multibyte chars? */ 94 95 #define UTF (PL_reg_flags & RF_utf8) 96 97 #define RS_init 1 /* eval environment created */ 98 #define RS_set 2 /* replsv value is set */ 99 100 #ifndef STATIC 101 #define STATIC static 102 #endif 103 104 /* 105 * Forwards. 106 */ 107 108 #define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c)) 109 #ifdef DEBUGGING 110 # define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch(*av_fetch((AV*)SvRV((SV*)PL_regdata->data[ARG2(f)]),0,FALSE),p)) 111 #else 112 # define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p)) 113 #endif 114 115 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv)) 116 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b) 117 118 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off)) 119 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off)) 120 #define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off)) 121 #define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off)) 122 #define HOPc(pos,off) ((char*)HOP(pos,off)) 123 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off)) 124 125 static void restore_pos(pTHXo_ void *arg); 126 127 128 STATIC CHECKPOINT 129 S_regcppush(pTHX_ I32 parenfloor) 130 { 131 int retval = PL_savestack_ix; 132 #define REGCP_PAREN_ELEMS 4 133 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS; 134 int p; 135 136 #define REGCP_OTHER_ELEMS 5 137 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS); 138 for (p = PL_regsize; p > parenfloor; p--) { 139 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */ 140 SSPUSHINT(PL_regendp[p]); 141 SSPUSHINT(PL_regstartp[p]); 142 SSPUSHPTR(PL_reg_start_tmp[p]); 143 SSPUSHINT(p); 144 } 145 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */ 146 SSPUSHINT(PL_regsize); 147 SSPUSHINT(*PL_reglastparen); 148 SSPUSHPTR(PL_reginput); 149 #define REGCP_FRAME_ELEMS 2 150 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and 151 * are needed for the regexp context stack bookkeeping. */ 152 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS); 153 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */ 154 155 return retval; 156 } 157 158 /* These are needed since we do not localize EVAL nodes: */ 159 # define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \ 160 " Setting an EVAL scope, savestack=%"IVdf"\n", \ 161 (IV)PL_savestack_ix)); cp = PL_savestack_ix 162 163 # define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \ 164 PerlIO_printf(Perl_debug_log, \ 165 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \ 166 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp) 167 168 STATIC char * 169 S_regcppop(pTHX) 170 { 171 I32 i; 172 U32 paren = 0; 173 char *input; 174 I32 tmps; 175 176 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */ 177 i = SSPOPINT; 178 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */ 179 i = SSPOPINT; /* Parentheses elements to pop. */ 180 input = (char *) SSPOPPTR; 181 *PL_reglastparen = SSPOPINT; 182 PL_regsize = SSPOPINT; 183 184 /* Now restore the parentheses context. */ 185 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS); 186 i > 0; i -= REGCP_PAREN_ELEMS) { 187 paren = (U32)SSPOPINT; 188 PL_reg_start_tmp[paren] = (char *) SSPOPPTR; 189 PL_regstartp[paren] = SSPOPINT; 190 tmps = SSPOPINT; 191 if (paren <= *PL_reglastparen) 192 PL_regendp[paren] = tmps; 193 DEBUG_r( 194 PerlIO_printf(Perl_debug_log, 195 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n", 196 (UV)paren, (IV)PL_regstartp[paren], 197 (IV)(PL_reg_start_tmp[paren] - PL_bostr), 198 (IV)PL_regendp[paren], 199 (paren > *PL_reglastparen ? "(no)" : "")); 200 ); 201 } 202 DEBUG_r( 203 if (*PL_reglastparen + 1 <= PL_regnpar) { 204 PerlIO_printf(Perl_debug_log, 205 " restoring \\%"IVdf"..\\%"IVdf" to undef\n", 206 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar); 207 } 208 ); 209 #if 1 210 /* It would seem that the similar code in regtry() 211 * already takes care of this, and in fact it is in 212 * a better location to since this code can #if 0-ed out 213 * but the code in regtry() is needed or otherwise tests 214 * requiring null fields (pat.t#187 and split.t#{13,14} 215 * (as of patchlevel 7877) will fail. Then again, 216 * this code seems to be necessary or otherwise 217 * building DynaLoader will fail: 218 * "Error: '*' not in typemap in DynaLoader.xs, line 164" 219 * --jhi */ 220 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) { 221 if (paren > PL_regsize) 222 PL_regstartp[paren] = -1; 223 PL_regendp[paren] = -1; 224 } 225 #endif 226 return input; 227 } 228 229 STATIC char * 230 S_regcp_set_to(pTHX_ I32 ss) 231 { 232 I32 tmp = PL_savestack_ix; 233 234 PL_savestack_ix = ss; 235 regcppop(); 236 PL_savestack_ix = tmp; 237 return Nullch; 238 } 239 240 typedef struct re_cc_state 241 { 242 I32 ss; 243 regnode *node; 244 struct re_cc_state *prev; 245 CURCUR *cc; 246 regexp *re; 247 } re_cc_state; 248 249 #define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */ 250 251 #define TRYPAREN(paren, n, input) { \ 252 if (paren) { \ 253 if (n) { \ 254 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \ 255 PL_regendp[paren] = input - PL_bostr; \ 256 } \ 257 else \ 258 PL_regendp[paren] = -1; \ 259 } \ 260 if (regmatch(next)) \ 261 sayYES; \ 262 if (paren && n) \ 263 PL_regendp[paren] = -1; \ 264 } 265 266 267 /* 268 * pregexec and friends 269 */ 270 271 /* 272 - pregexec - match a regexp against a string 273 */ 274 I32 275 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend, 276 char *strbeg, I32 minend, SV *screamer, U32 nosave) 277 /* strend: pointer to null at end of string */ 278 /* strbeg: real beginning of string */ 279 /* minend: end of match must be >=minend after stringarg. */ 280 /* nosave: For optimizations. */ 281 { 282 return 283 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL, 284 nosave ? 0 : REXEC_COPY_STR); 285 } 286 287 STATIC void 288 S_cache_re(pTHX_ regexp *prog) 289 { 290 PL_regprecomp = prog->precomp; /* Needed for FAIL. */ 291 #ifdef DEBUGGING 292 PL_regprogram = prog->program; 293 #endif 294 PL_regnpar = prog->nparens; 295 PL_regdata = prog->data; 296 PL_reg_re = prog; 297 } 298 299 /* 300 * Need to implement the following flags for reg_anch: 301 * 302 * USE_INTUIT_NOML - Useful to call re_intuit_start() first 303 * USE_INTUIT_ML 304 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer 305 * INTUIT_AUTORITATIVE_ML 306 * INTUIT_ONCE_NOML - Intuit can match in one location only. 307 * INTUIT_ONCE_ML 308 * 309 * Another flag for this function: SECOND_TIME (so that float substrs 310 * with giant delta may be not rechecked). 311 */ 312 313 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */ 314 315 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend. 316 Otherwise, only SvCUR(sv) is used to get strbeg. */ 317 318 /* XXXX We assume that strpos is strbeg unless sv. */ 319 320 /* XXXX Some places assume that there is a fixed substring. 321 An update may be needed if optimizer marks as "INTUITable" 322 RExen without fixed substrings. Similarly, it is assumed that 323 lengths of all the strings are no more than minlen, thus they 324 cannot come from lookahead. 325 (Or minlen should take into account lookahead.) */ 326 327 /* A failure to find a constant substring means that there is no need to make 328 an expensive call to REx engine, thus we celebrate a failure. Similarly, 329 finding a substring too deep into the string means that less calls to 330 regtry() should be needed. 331 332 REx compiler's optimizer found 4 possible hints: 333 a) Anchored substring; 334 b) Fixed substring; 335 c) Whether we are anchored (beginning-of-line or \G); 336 d) First node (of those at offset 0) which may distingush positions; 337 We use a)b)d) and multiline-part of c), and try to find a position in the 338 string which does not contradict any of them. 339 */ 340 341 /* Most of decisions we do here should have been done at compile time. 342 The nodes of the REx which we used for the search should have been 343 deleted from the finite automaton. */ 344 345 char * 346 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, 347 char *strend, U32 flags, re_scream_pos_data *data) 348 { 349 register I32 start_shift; 350 /* Should be nonnegative! */ 351 register I32 end_shift; 352 register char *s; 353 register SV *check; 354 char *strbeg; 355 char *t; 356 I32 ml_anch; 357 char *tmp; 358 register char *other_last = Nullch; /* other substr checked before this */ 359 char *check_at; /* check substr found at this pos */ 360 #ifdef DEBUGGING 361 char *i_strpos = strpos; 362 #endif 363 364 DEBUG_r( if (!PL_colorset) reginitcolors() ); 365 DEBUG_r(PerlIO_printf(Perl_debug_log, 366 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n", 367 PL_colors[4],PL_colors[5],PL_colors[0], 368 prog->precomp, 369 PL_colors[1], 370 (strlen(prog->precomp) > 60 ? "..." : ""), 371 PL_colors[0], 372 (int)(strend - strpos > 60 ? 60 : strend - strpos), 373 strpos, PL_colors[1], 374 (strend - strpos > 60 ? "..." : "")) 375 ); 376 377 if (prog->minlen > strend - strpos) { 378 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n")); 379 goto fail; 380 } 381 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos; 382 check = prog->check_substr; 383 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */ 384 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE) 385 || ( (prog->reganch & ROPT_ANCH_BOL) 386 && !PL_multiline ) ); /* Check after \n? */ 387 388 if (!ml_anch) { 389 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */ 390 /* SvCUR is not set on references: SvRV and SvPVX overlap */ 391 && sv && !SvROK(sv) 392 && (strpos != strbeg)) { 393 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n")); 394 goto fail; 395 } 396 if (prog->check_offset_min == prog->check_offset_max) { 397 /* Substring at constant offset from beg-of-str... */ 398 I32 slen; 399 400 PL_regeol = strend; /* Used in HOP() */ 401 s = HOPc(strpos, prog->check_offset_min); 402 if (SvTAIL(check)) { 403 slen = SvCUR(check); /* >= 1 */ 404 405 if ( strend - s > slen || strend - s < slen - 1 406 || (strend - s == slen && strend[-1] != '\n')) { 407 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n")); 408 goto fail_finish; 409 } 410 /* Now should match s[0..slen-2] */ 411 slen--; 412 if (slen && (*SvPVX(check) != *s 413 || (slen > 1 414 && memNE(SvPVX(check), s, slen)))) { 415 report_neq: 416 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n")); 417 goto fail_finish; 418 } 419 } 420 else if (*SvPVX(check) != *s 421 || ((slen = SvCUR(check)) > 1 422 && memNE(SvPVX(check), s, slen))) 423 goto report_neq; 424 goto success_at_start; 425 } 426 } 427 /* Match is anchored, but substr is not anchored wrt beg-of-str. */ 428 s = strpos; 429 start_shift = prog->check_offset_min; /* okay to underestimate on CC */ 430 end_shift = prog->minlen - start_shift - 431 CHR_SVLEN(check) + (SvTAIL(check) != 0); 432 if (!ml_anch) { 433 I32 end = prog->check_offset_max + CHR_SVLEN(check) 434 - (SvTAIL(check) != 0); 435 I32 eshift = strend - s - end; 436 437 if (end_shift < eshift) 438 end_shift = eshift; 439 } 440 } 441 else { /* Can match at random position */ 442 ml_anch = 0; 443 s = strpos; 444 start_shift = prog->check_offset_min; /* okay to underestimate on CC */ 445 /* Should be nonnegative! */ 446 end_shift = prog->minlen - start_shift - 447 CHR_SVLEN(check) + (SvTAIL(check) != 0); 448 } 449 450 #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */ 451 if (end_shift < 0) 452 Perl_croak(aTHX_ "panic: end_shift"); 453 #endif 454 455 restart: 456 other_last = Nullch; 457 458 /* Find a possible match in the region s..strend by looking for 459 the "check" substring in the region corrected by start/end_shift. */ 460 if (flags & REXEC_SCREAM) { 461 I32 p = -1; /* Internal iterator of scream. */ 462 I32 *pp = data ? data->scream_pos : &p; 463 464 if (PL_screamfirst[BmRARE(check)] >= 0 465 || ( BmRARE(check) == '\n' 466 && (BmPREVIOUS(check) == SvCUR(check) - 1) 467 && SvTAIL(check) )) 468 s = screaminstr(sv, check, 469 start_shift + (s - strbeg), end_shift, pp, 0); 470 else 471 goto fail_finish; 472 if (data) 473 *data->scream_olds = s; 474 } 475 else 476 s = fbm_instr((unsigned char*)s + start_shift, 477 (unsigned char*)strend - end_shift, 478 check, PL_multiline ? FBMrf_MULTILINE : 0); 479 480 /* Update the count-of-usability, remove useless subpatterns, 481 unshift s. */ 482 483 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s", 484 (s ? "Found" : "Did not find"), 485 ((check == prog->anchored_substr) ? "anchored" : "floating"), 486 PL_colors[0], 487 (int)(SvCUR(check) - (SvTAIL(check)!=0)), 488 SvPVX(check), 489 PL_colors[1], (SvTAIL(check) ? "$" : ""), 490 (s ? " at offset " : "...\n") ) ); 491 492 if (!s) 493 goto fail_finish; 494 495 check_at = s; 496 497 /* Finish the diagnostic message */ 498 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) ); 499 500 /* Got a candidate. Check MBOL anchoring, and the *other* substr. 501 Start with the other substr. 502 XXXX no SCREAM optimization yet - and a very coarse implementation 503 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will 504 *always* match. Probably should be marked during compile... 505 Probably it is right to do no SCREAM here... 506 */ 507 508 if (prog->float_substr && prog->anchored_substr) { 509 /* Take into account the "other" substring. */ 510 /* XXXX May be hopelessly wrong for UTF... */ 511 if (!other_last) 512 other_last = strpos; 513 if (check == prog->float_substr) { 514 do_other_anchored: 515 { 516 char *last = s - start_shift, *last1, *last2; 517 char *s1 = s; 518 519 tmp = PL_bostr; 520 t = s - prog->check_offset_max; 521 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */ 522 && (!(prog->reganch & ROPT_UTF8) 523 || (PL_bostr = strpos, /* Used in regcopmaybe() */ 524 (t = reghopmaybe_c(s, -(prog->check_offset_max))) 525 && t > strpos))) 526 /* EMPTY */; 527 else 528 t = strpos; 529 t += prog->anchored_offset; 530 if (t < other_last) /* These positions already checked */ 531 t = other_last; 532 PL_bostr = tmp; 533 last2 = last1 = strend - prog->minlen; 534 if (last < last1) 535 last1 = last; 536 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */ 537 /* On end-of-str: see comment below. */ 538 s = fbm_instr((unsigned char*)t, 539 (unsigned char*)last1 + prog->anchored_offset 540 + SvCUR(prog->anchored_substr) 541 - (SvTAIL(prog->anchored_substr)!=0), 542 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0); 543 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s", 544 (s ? "Found" : "Contradicts"), 545 PL_colors[0], 546 (int)(SvCUR(prog->anchored_substr) 547 - (SvTAIL(prog->anchored_substr)!=0)), 548 SvPVX(prog->anchored_substr), 549 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : ""))); 550 if (!s) { 551 if (last1 >= last2) { 552 DEBUG_r(PerlIO_printf(Perl_debug_log, 553 ", giving up...\n")); 554 goto fail_finish; 555 } 556 DEBUG_r(PerlIO_printf(Perl_debug_log, 557 ", trying floating at offset %ld...\n", 558 (long)(s1 + 1 - i_strpos))); 559 PL_regeol = strend; /* Used in HOP() */ 560 other_last = last1 + prog->anchored_offset + 1; 561 s = HOPc(last, 1); 562 goto restart; 563 } 564 else { 565 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n", 566 (long)(s - i_strpos))); 567 t = s - prog->anchored_offset; 568 other_last = s + 1; 569 s = s1; 570 if (t == strpos) 571 goto try_at_start; 572 goto try_at_offset; 573 } 574 } 575 } 576 else { /* Take into account the floating substring. */ 577 char *last, *last1; 578 char *s1 = s; 579 580 t = s - start_shift; 581 last1 = last = strend - prog->minlen + prog->float_min_offset; 582 if (last - t > prog->float_max_offset) 583 last = t + prog->float_max_offset; 584 s = t + prog->float_min_offset; 585 if (s < other_last) 586 s = other_last; 587 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */ 588 /* fbm_instr() takes into account exact value of end-of-str 589 if the check is SvTAIL(ed). Since false positives are OK, 590 and end-of-str is not later than strend we are OK. */ 591 s = fbm_instr((unsigned char*)s, 592 (unsigned char*)last + SvCUR(prog->float_substr) 593 - (SvTAIL(prog->float_substr)!=0), 594 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0); 595 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s", 596 (s ? "Found" : "Contradicts"), 597 PL_colors[0], 598 (int)(SvCUR(prog->float_substr) 599 - (SvTAIL(prog->float_substr)!=0)), 600 SvPVX(prog->float_substr), 601 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : ""))); 602 if (!s) { 603 if (last1 == last) { 604 DEBUG_r(PerlIO_printf(Perl_debug_log, 605 ", giving up...\n")); 606 goto fail_finish; 607 } 608 DEBUG_r(PerlIO_printf(Perl_debug_log, 609 ", trying anchored starting at offset %ld...\n", 610 (long)(s1 + 1 - i_strpos))); 611 other_last = last; 612 PL_regeol = strend; /* Used in HOP() */ 613 s = HOPc(t, 1); 614 goto restart; 615 } 616 else { 617 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n", 618 (long)(s - i_strpos))); 619 other_last = s; /* Fix this later. --Hugo */ 620 s = s1; 621 if (t == strpos) 622 goto try_at_start; 623 goto try_at_offset; 624 } 625 } 626 } 627 628 t = s - prog->check_offset_max; 629 tmp = PL_bostr; 630 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */ 631 && (!(prog->reganch & ROPT_UTF8) 632 || (PL_bostr = strpos, /* Used in regcopmaybe() */ 633 ((t = reghopmaybe_c(s, -(prog->check_offset_max))) 634 && t > strpos)))) { 635 PL_bostr = tmp; 636 /* Fixed substring is found far enough so that the match 637 cannot start at strpos. */ 638 try_at_offset: 639 if (ml_anch && t[-1] != '\n') { 640 /* Eventually fbm_*() should handle this, but often 641 anchored_offset is not 0, so this check will not be wasted. */ 642 /* XXXX In the code below we prefer to look for "^" even in 643 presence of anchored substrings. And we search even 644 beyond the found float position. These pessimizations 645 are historical artefacts only. */ 646 find_anchor: 647 while (t < strend - prog->minlen) { 648 if (*t == '\n') { 649 if (t < check_at - prog->check_offset_min) { 650 if (prog->anchored_substr) { 651 /* Since we moved from the found position, 652 we definitely contradict the found anchored 653 substr. Due to the above check we do not 654 contradict "check" substr. 655 Thus we can arrive here only if check substr 656 is float. Redo checking for "other"=="fixed". 657 */ 658 strpos = t + 1; 659 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n", 660 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset))); 661 goto do_other_anchored; 662 } 663 /* We don't contradict the found floating substring. */ 664 /* XXXX Why not check for STCLASS? */ 665 s = t + 1; 666 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n", 667 PL_colors[0],PL_colors[1], (long)(s - i_strpos))); 668 goto set_useful; 669 } 670 /* Position contradicts check-string */ 671 /* XXXX probably better to look for check-string 672 than for "\n", so one should lower the limit for t? */ 673 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n", 674 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos))); 675 other_last = strpos = s = t + 1; 676 goto restart; 677 } 678 t++; 679 } 680 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n", 681 PL_colors[0],PL_colors[1])); 682 goto fail_finish; 683 } 684 else { 685 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n", 686 PL_colors[0],PL_colors[1])); 687 } 688 s = t; 689 set_useful: 690 ++BmUSEFUL(prog->check_substr); /* hooray/5 */ 691 } 692 else { 693 PL_bostr = tmp; 694 /* The found string does not prohibit matching at strpos, 695 - no optimization of calling REx engine can be performed, 696 unless it was an MBOL and we are not after MBOL, 697 or a future STCLASS check will fail this. */ 698 try_at_start: 699 /* Even in this situation we may use MBOL flag if strpos is offset 700 wrt the start of the string. */ 701 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */ 702 && (strpos != strbeg) && strpos[-1] != '\n' 703 /* May be due to an implicit anchor of m{.*foo} */ 704 && !(prog->reganch & ROPT_IMPLICIT)) 705 { 706 t = strpos; 707 goto find_anchor; 708 } 709 DEBUG_r( if (ml_anch) 710 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n", 711 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]); 712 ); 713 success_at_start: 714 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */ 715 && prog->check_substr /* Could be deleted already */ 716 && --BmUSEFUL(prog->check_substr) < 0 717 && prog->check_substr == prog->float_substr) 718 { 719 /* If flags & SOMETHING - do not do it many times on the same match */ 720 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n")); 721 SvREFCNT_dec(prog->check_substr); 722 prog->check_substr = Nullsv; /* disable */ 723 prog->float_substr = Nullsv; /* clear */ 724 check = Nullsv; /* abort */ 725 s = strpos; 726 /* XXXX This is a remnant of the old implementation. It 727 looks wasteful, since now INTUIT can use many 728 other heuristics. */ 729 prog->reganch &= ~RE_USE_INTUIT; 730 } 731 else 732 s = strpos; 733 } 734 735 /* Last resort... */ 736 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */ 737 if (prog->regstclass) { 738 /* minlen == 0 is possible if regstclass is \b or \B, 739 and the fixed substr is ''$. 740 Since minlen is already taken into account, s+1 is before strend; 741 accidentally, minlen >= 1 guaranties no false positives at s + 1 742 even for \b or \B. But (minlen? 1 : 0) below assumes that 743 regstclass does not come from lookahead... */ 744 /* If regstclass takes bytelength more than 1: If charlength==1, OK. 745 This leaves EXACTF only, which is dealt with in find_byclass(). */ 746 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT 747 ? STR_LEN(prog->regstclass) 748 : 1); 749 char *endpos = (prog->anchored_substr || ml_anch) 750 ? s + (prog->minlen? cl_l : 0) 751 : (prog->float_substr ? check_at - start_shift + cl_l 752 : strend) ; 753 char *startpos = strbeg; 754 755 t = s; 756 if (prog->reganch & ROPT_UTF8) { 757 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */ 758 PL_bostr = startpos; 759 } 760 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1); 761 if (!s) { 762 #ifdef DEBUGGING 763 char *what; 764 #endif 765 if (endpos == strend) { 766 DEBUG_r( PerlIO_printf(Perl_debug_log, 767 "Could not match STCLASS...\n") ); 768 goto fail; 769 } 770 DEBUG_r( PerlIO_printf(Perl_debug_log, 771 "This position contradicts STCLASS...\n") ); 772 if ((prog->reganch & ROPT_ANCH) && !ml_anch) 773 goto fail; 774 /* Contradict one of substrings */ 775 if (prog->anchored_substr) { 776 if (prog->anchored_substr == check) { 777 DEBUG_r( what = "anchored" ); 778 hop_and_restart: 779 PL_regeol = strend; /* Used in HOP() */ 780 s = HOPc(t, 1); 781 if (s + start_shift + end_shift > strend) { 782 /* XXXX Should be taken into account earlier? */ 783 DEBUG_r( PerlIO_printf(Perl_debug_log, 784 "Could not match STCLASS...\n") ); 785 goto fail; 786 } 787 if (!check) 788 goto giveup; 789 DEBUG_r( PerlIO_printf(Perl_debug_log, 790 "Looking for %s substr starting at offset %ld...\n", 791 what, (long)(s + start_shift - i_strpos)) ); 792 goto restart; 793 } 794 /* Have both, check_string is floating */ 795 if (t + start_shift >= check_at) /* Contradicts floating=check */ 796 goto retry_floating_check; 797 /* Recheck anchored substring, but not floating... */ 798 s = check_at; 799 if (!check) 800 goto giveup; 801 DEBUG_r( PerlIO_printf(Perl_debug_log, 802 "Looking for anchored substr starting at offset %ld...\n", 803 (long)(other_last - i_strpos)) ); 804 goto do_other_anchored; 805 } 806 /* Another way we could have checked stclass at the 807 current position only: */ 808 if (ml_anch) { 809 s = t = t + 1; 810 if (!check) 811 goto giveup; 812 DEBUG_r( PerlIO_printf(Perl_debug_log, 813 "Looking for /%s^%s/m starting at offset %ld...\n", 814 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) ); 815 goto try_at_offset; 816 } 817 if (!prog->float_substr) /* Could have been deleted */ 818 goto fail; 819 /* Check is floating subtring. */ 820 retry_floating_check: 821 t = check_at - start_shift; 822 DEBUG_r( what = "floating" ); 823 goto hop_and_restart; 824 } 825 DEBUG_r( if (t != s) 826 PerlIO_printf(Perl_debug_log, 827 "By STCLASS: moving %ld --> %ld\n", 828 (long)(t - i_strpos), (long)(s - i_strpos)); 829 else 830 PerlIO_printf(Perl_debug_log, 831 "Does not contradict STCLASS...\n") ); 832 } 833 giveup: 834 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n", 835 PL_colors[4], (check ? "Guessed" : "Giving up"), 836 PL_colors[5], (long)(s - i_strpos)) ); 837 return s; 838 839 fail_finish: /* Substring not found */ 840 if (prog->check_substr) /* could be removed already */ 841 BmUSEFUL(prog->check_substr) += 5; /* hooray */ 842 fail: 843 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n", 844 PL_colors[4],PL_colors[5])); 845 return Nullch; 846 } 847 848 /* We know what class REx starts with. Try to find this position... */ 849 STATIC char * 850 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun) 851 { 852 I32 doevery = (prog->reganch & ROPT_SKIP) == 0; 853 char *m; 854 STRLEN ln; 855 unsigned int c1; 856 unsigned int c2; 857 char *e; 858 register I32 tmp = 1; /* Scratch variable? */ 859 860 /* We know what class it must start with. */ 861 switch (OP(c)) { 862 case ANYOFUTF8: 863 while (s < strend) { 864 if (REGINCLASSUTF8(c, (U8*)s)) { 865 if (tmp && (norun || regtry(prog, s))) 866 goto got_it; 867 else 868 tmp = doevery; 869 } 870 else 871 tmp = 1; 872 s += UTF8SKIP(s); 873 } 874 break; 875 case ANYOF: 876 while (s < strend) { 877 if (REGINCLASS(c, *(U8*)s)) { 878 if (tmp && (norun || regtry(prog, s))) 879 goto got_it; 880 else 881 tmp = doevery; 882 } 883 else 884 tmp = 1; 885 s++; 886 } 887 break; 888 case EXACTF: 889 m = STRING(c); 890 ln = STR_LEN(c); 891 c1 = *(U8*)m; 892 c2 = PL_fold[c1]; 893 goto do_exactf; 894 case EXACTFL: 895 m = STRING(c); 896 ln = STR_LEN(c); 897 c1 = *(U8*)m; 898 c2 = PL_fold_locale[c1]; 899 do_exactf: 900 e = strend - ln; 901 902 if (norun && e < s) 903 e = s; /* Due to minlen logic of intuit() */ 904 /* Here it is NOT UTF! */ 905 if (c1 == c2) { 906 while (s <= e) { 907 if ( *(U8*)s == c1 908 && (ln == 1 || !(OP(c) == EXACTF 909 ? ibcmp(s, m, ln) 910 : ibcmp_locale(s, m, ln))) 911 && (norun || regtry(prog, s)) ) 912 goto got_it; 913 s++; 914 } 915 } else { 916 while (s <= e) { 917 if ( (*(U8*)s == c1 || *(U8*)s == c2) 918 && (ln == 1 || !(OP(c) == EXACTF 919 ? ibcmp(s, m, ln) 920 : ibcmp_locale(s, m, ln))) 921 && (norun || regtry(prog, s)) ) 922 goto got_it; 923 s++; 924 } 925 } 926 break; 927 case BOUNDL: 928 PL_reg_flags |= RF_tainted; 929 /* FALL THROUGH */ 930 case BOUND: 931 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n'; 932 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0); 933 while (s < strend) { 934 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) { 935 tmp = !tmp; 936 if ((norun || regtry(prog, s))) 937 goto got_it; 938 } 939 s++; 940 } 941 if ((!prog->minlen && tmp) && (norun || regtry(prog, s))) 942 goto got_it; 943 break; 944 case BOUNDLUTF8: 945 PL_reg_flags |= RF_tainted; 946 /* FALL THROUGH */ 947 case BOUNDUTF8: 948 if (s == startpos) 949 tmp = '\n'; 950 else { 951 U8 *r = reghop((U8*)s, -1); 952 953 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0); 954 } 955 tmp = ((OP(c) == BOUNDUTF8 ? 956 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); 957 while (s < strend) { 958 if (tmp == !(OP(c) == BOUNDUTF8 ? 959 swash_fetch(PL_utf8_alnum, (U8*)s) : 960 isALNUM_LC_utf8((U8*)s))) 961 { 962 tmp = !tmp; 963 if ((norun || regtry(prog, s))) 964 goto got_it; 965 } 966 s += UTF8SKIP(s); 967 } 968 if ((!prog->minlen && tmp) && (norun || regtry(prog, s))) 969 goto got_it; 970 break; 971 case NBOUNDL: 972 PL_reg_flags |= RF_tainted; 973 /* FALL THROUGH */ 974 case NBOUND: 975 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n'; 976 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0); 977 while (s < strend) { 978 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s))) 979 tmp = !tmp; 980 else if ((norun || regtry(prog, s))) 981 goto got_it; 982 s++; 983 } 984 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s))) 985 goto got_it; 986 break; 987 case NBOUNDLUTF8: 988 PL_reg_flags |= RF_tainted; 989 /* FALL THROUGH */ 990 case NBOUNDUTF8: 991 if (s == startpos) 992 tmp = '\n'; 993 else { 994 U8 *r = reghop((U8*)s, -1); 995 996 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0); 997 } 998 tmp = ((OP(c) == NBOUNDUTF8 ? 999 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); 1000 while (s < strend) { 1001 if (tmp == !(OP(c) == NBOUNDUTF8 ? 1002 swash_fetch(PL_utf8_alnum, (U8*)s) : 1003 isALNUM_LC_utf8((U8*)s))) 1004 tmp = !tmp; 1005 else if ((norun || regtry(prog, s))) 1006 goto got_it; 1007 s += UTF8SKIP(s); 1008 } 1009 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s))) 1010 goto got_it; 1011 break; 1012 case ALNUM: 1013 while (s < strend) { 1014 if (isALNUM(*s)) { 1015 if (tmp && (norun || regtry(prog, s))) 1016 goto got_it; 1017 else 1018 tmp = doevery; 1019 } 1020 else 1021 tmp = 1; 1022 s++; 1023 } 1024 break; 1025 case ALNUMUTF8: 1026 while (s < strend) { 1027 if (swash_fetch(PL_utf8_alnum, (U8*)s)) { 1028 if (tmp && (norun || regtry(prog, s))) 1029 goto got_it; 1030 else 1031 tmp = doevery; 1032 } 1033 else 1034 tmp = 1; 1035 s += UTF8SKIP(s); 1036 } 1037 break; 1038 case ALNUML: 1039 PL_reg_flags |= RF_tainted; 1040 while (s < strend) { 1041 if (isALNUM_LC(*s)) { 1042 if (tmp && (norun || regtry(prog, s))) 1043 goto got_it; 1044 else 1045 tmp = doevery; 1046 } 1047 else 1048 tmp = 1; 1049 s++; 1050 } 1051 break; 1052 case ALNUMLUTF8: 1053 PL_reg_flags |= RF_tainted; 1054 while (s < strend) { 1055 if (isALNUM_LC_utf8((U8*)s)) { 1056 if (tmp && (norun || regtry(prog, s))) 1057 goto got_it; 1058 else 1059 tmp = doevery; 1060 } 1061 else 1062 tmp = 1; 1063 s += UTF8SKIP(s); 1064 } 1065 break; 1066 case NALNUM: 1067 while (s < strend) { 1068 if (!isALNUM(*s)) { 1069 if (tmp && (norun || regtry(prog, s))) 1070 goto got_it; 1071 else 1072 tmp = doevery; 1073 } 1074 else 1075 tmp = 1; 1076 s++; 1077 } 1078 break; 1079 case NALNUMUTF8: 1080 while (s < strend) { 1081 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) { 1082 if (tmp && (norun || regtry(prog, s))) 1083 goto got_it; 1084 else 1085 tmp = doevery; 1086 } 1087 else 1088 tmp = 1; 1089 s += UTF8SKIP(s); 1090 } 1091 break; 1092 case NALNUML: 1093 PL_reg_flags |= RF_tainted; 1094 while (s < strend) { 1095 if (!isALNUM_LC(*s)) { 1096 if (tmp && (norun || regtry(prog, s))) 1097 goto got_it; 1098 else 1099 tmp = doevery; 1100 } 1101 else 1102 tmp = 1; 1103 s++; 1104 } 1105 break; 1106 case NALNUMLUTF8: 1107 PL_reg_flags |= RF_tainted; 1108 while (s < strend) { 1109 if (!isALNUM_LC_utf8((U8*)s)) { 1110 if (tmp && (norun || regtry(prog, s))) 1111 goto got_it; 1112 else 1113 tmp = doevery; 1114 } 1115 else 1116 tmp = 1; 1117 s += UTF8SKIP(s); 1118 } 1119 break; 1120 case SPACE: 1121 while (s < strend) { 1122 if (isSPACE(*s)) { 1123 if (tmp && (norun || regtry(prog, s))) 1124 goto got_it; 1125 else 1126 tmp = doevery; 1127 } 1128 else 1129 tmp = 1; 1130 s++; 1131 } 1132 break; 1133 case SPACEUTF8: 1134 while (s < strend) { 1135 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) { 1136 if (tmp && (norun || regtry(prog, s))) 1137 goto got_it; 1138 else 1139 tmp = doevery; 1140 } 1141 else 1142 tmp = 1; 1143 s += UTF8SKIP(s); 1144 } 1145 break; 1146 case SPACEL: 1147 PL_reg_flags |= RF_tainted; 1148 while (s < strend) { 1149 if (isSPACE_LC(*s)) { 1150 if (tmp && (norun || regtry(prog, s))) 1151 goto got_it; 1152 else 1153 tmp = doevery; 1154 } 1155 else 1156 tmp = 1; 1157 s++; 1158 } 1159 break; 1160 case SPACELUTF8: 1161 PL_reg_flags |= RF_tainted; 1162 while (s < strend) { 1163 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) { 1164 if (tmp && (norun || regtry(prog, s))) 1165 goto got_it; 1166 else 1167 tmp = doevery; 1168 } 1169 else 1170 tmp = 1; 1171 s += UTF8SKIP(s); 1172 } 1173 break; 1174 case NSPACE: 1175 while (s < strend) { 1176 if (!isSPACE(*s)) { 1177 if (tmp && (norun || regtry(prog, s))) 1178 goto got_it; 1179 else 1180 tmp = doevery; 1181 } 1182 else 1183 tmp = 1; 1184 s++; 1185 } 1186 break; 1187 case NSPACEUTF8: 1188 while (s < strend) { 1189 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) { 1190 if (tmp && (norun || regtry(prog, s))) 1191 goto got_it; 1192 else 1193 tmp = doevery; 1194 } 1195 else 1196 tmp = 1; 1197 s += UTF8SKIP(s); 1198 } 1199 break; 1200 case NSPACEL: 1201 PL_reg_flags |= RF_tainted; 1202 while (s < strend) { 1203 if (!isSPACE_LC(*s)) { 1204 if (tmp && (norun || regtry(prog, s))) 1205 goto got_it; 1206 else 1207 tmp = doevery; 1208 } 1209 else 1210 tmp = 1; 1211 s++; 1212 } 1213 break; 1214 case NSPACELUTF8: 1215 PL_reg_flags |= RF_tainted; 1216 while (s < strend) { 1217 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) { 1218 if (tmp && (norun || regtry(prog, s))) 1219 goto got_it; 1220 else 1221 tmp = doevery; 1222 } 1223 else 1224 tmp = 1; 1225 s += UTF8SKIP(s); 1226 } 1227 break; 1228 case DIGIT: 1229 while (s < strend) { 1230 if (isDIGIT(*s)) { 1231 if (tmp && (norun || regtry(prog, s))) 1232 goto got_it; 1233 else 1234 tmp = doevery; 1235 } 1236 else 1237 tmp = 1; 1238 s++; 1239 } 1240 break; 1241 case DIGITUTF8: 1242 while (s < strend) { 1243 if (swash_fetch(PL_utf8_digit,(U8*)s)) { 1244 if (tmp && (norun || regtry(prog, s))) 1245 goto got_it; 1246 else 1247 tmp = doevery; 1248 } 1249 else 1250 tmp = 1; 1251 s += UTF8SKIP(s); 1252 } 1253 break; 1254 case DIGITL: 1255 PL_reg_flags |= RF_tainted; 1256 while (s < strend) { 1257 if (isDIGIT_LC(*s)) { 1258 if (tmp && (norun || regtry(prog, s))) 1259 goto got_it; 1260 else 1261 tmp = doevery; 1262 } 1263 else 1264 tmp = 1; 1265 s++; 1266 } 1267 break; 1268 case DIGITLUTF8: 1269 PL_reg_flags |= RF_tainted; 1270 while (s < strend) { 1271 if (isDIGIT_LC_utf8((U8*)s)) { 1272 if (tmp && (norun || regtry(prog, s))) 1273 goto got_it; 1274 else 1275 tmp = doevery; 1276 } 1277 else 1278 tmp = 1; 1279 s += UTF8SKIP(s); 1280 } 1281 break; 1282 case NDIGIT: 1283 while (s < strend) { 1284 if (!isDIGIT(*s)) { 1285 if (tmp && (norun || regtry(prog, s))) 1286 goto got_it; 1287 else 1288 tmp = doevery; 1289 } 1290 else 1291 tmp = 1; 1292 s++; 1293 } 1294 break; 1295 case NDIGITUTF8: 1296 while (s < strend) { 1297 if (!swash_fetch(PL_utf8_digit,(U8*)s)) { 1298 if (tmp && (norun || regtry(prog, s))) 1299 goto got_it; 1300 else 1301 tmp = doevery; 1302 } 1303 else 1304 tmp = 1; 1305 s += UTF8SKIP(s); 1306 } 1307 break; 1308 case NDIGITL: 1309 PL_reg_flags |= RF_tainted; 1310 while (s < strend) { 1311 if (!isDIGIT_LC(*s)) { 1312 if (tmp && (norun || regtry(prog, s))) 1313 goto got_it; 1314 else 1315 tmp = doevery; 1316 } 1317 else 1318 tmp = 1; 1319 s++; 1320 } 1321 break; 1322 case NDIGITLUTF8: 1323 PL_reg_flags |= RF_tainted; 1324 while (s < strend) { 1325 if (!isDIGIT_LC_utf8((U8*)s)) { 1326 if (tmp && (norun || regtry(prog, s))) 1327 goto got_it; 1328 else 1329 tmp = doevery; 1330 } 1331 else 1332 tmp = 1; 1333 s += UTF8SKIP(s); 1334 } 1335 break; 1336 default: 1337 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c)); 1338 break; 1339 } 1340 return 0; 1341 got_it: 1342 return s; 1343 } 1344 1345 /* 1346 - regexec_flags - match a regexp against a string 1347 */ 1348 I32 1349 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend, 1350 char *strbeg, I32 minend, SV *sv, void *data, U32 flags) 1351 /* strend: pointer to null at end of string */ 1352 /* strbeg: real beginning of string */ 1353 /* minend: end of match must be >=minend after stringarg. */ 1354 /* data: May be used for some additional optimizations. */ 1355 /* nosave: For optimizations. */ 1356 { 1357 register char *s; 1358 register regnode *c; 1359 register char *startpos = stringarg; 1360 I32 minlen; /* must match at least this many chars */ 1361 I32 dontbother = 0; /* how many characters not to try at end */ 1362 /* I32 start_shift = 0; */ /* Offset of the start to find 1363 constant substr. */ /* CC */ 1364 I32 end_shift = 0; /* Same for the end. */ /* CC */ 1365 I32 scream_pos = -1; /* Internal iterator of scream. */ 1366 char *scream_olds; 1367 SV* oreplsv = GvSV(PL_replgv); 1368 1369 PL_regcc = 0; 1370 1371 cache_re(prog); 1372 #ifdef DEBUGGING 1373 PL_regnarrate = PL_debug & 512; 1374 #endif 1375 1376 /* Be paranoid... */ 1377 if (prog == NULL || startpos == NULL) { 1378 Perl_croak(aTHX_ "NULL regexp parameter"); 1379 return 0; 1380 } 1381 1382 minlen = prog->minlen; 1383 if (strend - startpos < minlen) goto phooey; 1384 1385 if (startpos == strbeg) /* is ^ valid at stringarg? */ 1386 PL_regprev = '\n'; 1387 else { 1388 PL_regprev = (U32)stringarg[-1]; 1389 if (!PL_multiline && PL_regprev == '\n') 1390 PL_regprev = '\0'; /* force ^ to NOT match */ 1391 } 1392 1393 /* Check validity of program. */ 1394 if (UCHARAT(prog->program) != REG_MAGIC) { 1395 Perl_croak(aTHX_ "corrupted regexp program"); 1396 } 1397 1398 PL_reg_flags = 0; 1399 PL_reg_eval_set = 0; 1400 PL_reg_maxiter = 0; 1401 1402 if (prog->reganch & ROPT_UTF8) 1403 PL_reg_flags |= RF_utf8; 1404 1405 /* Mark beginning of line for ^ and lookbehind. */ 1406 PL_regbol = startpos; 1407 PL_bostr = strbeg; 1408 PL_reg_sv = sv; 1409 1410 /* Mark end of line for $ (and such) */ 1411 PL_regeol = strend; 1412 1413 /* see how far we have to get to not match where we matched before */ 1414 PL_regtill = startpos+minend; 1415 1416 /* We start without call_cc context. */ 1417 PL_reg_call_cc = 0; 1418 1419 /* If there is a "must appear" string, look for it. */ 1420 s = startpos; 1421 1422 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */ 1423 MAGIC *mg; 1424 1425 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */ 1426 PL_reg_ganch = startpos; 1427 else if (sv && SvTYPE(sv) >= SVt_PVMG 1428 && SvMAGIC(sv) 1429 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) { 1430 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */ 1431 if (prog->reganch & ROPT_ANCH_GPOS) { 1432 if (s > PL_reg_ganch) 1433 goto phooey; 1434 s = PL_reg_ganch; 1435 } 1436 } 1437 else /* pos() not defined */ 1438 PL_reg_ganch = strbeg; 1439 } 1440 1441 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) { 1442 re_scream_pos_data d; 1443 1444 d.scream_olds = &scream_olds; 1445 d.scream_pos = &scream_pos; 1446 s = re_intuit_start(prog, sv, s, strend, flags, &d); 1447 if (!s) 1448 goto phooey; /* not present */ 1449 } 1450 1451 DEBUG_r( if (!PL_colorset) reginitcolors() ); 1452 DEBUG_r(PerlIO_printf(Perl_debug_log, 1453 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n", 1454 PL_colors[4],PL_colors[5],PL_colors[0], 1455 prog->precomp, 1456 PL_colors[1], 1457 (strlen(prog->precomp) > 60 ? "..." : ""), 1458 PL_colors[0], 1459 (int)(strend - startpos > 60 ? 60 : strend - startpos), 1460 startpos, PL_colors[1], 1461 (strend - startpos > 60 ? "..." : "")) 1462 ); 1463 1464 /* Simplest case: anchored match need be tried only once. */ 1465 /* [unless only anchor is BOL and multiline is set] */ 1466 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) { 1467 if (s == startpos && regtry(prog, startpos)) 1468 goto got_it; 1469 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT) 1470 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */ 1471 { 1472 char *end; 1473 1474 if (minlen) 1475 dontbother = minlen - 1; 1476 end = HOPc(strend, -dontbother) - 1; 1477 /* for multiline we only have to try after newlines */ 1478 if (prog->check_substr) { 1479 if (s == startpos) 1480 goto after_try; 1481 while (1) { 1482 if (regtry(prog, s)) 1483 goto got_it; 1484 after_try: 1485 if (s >= end) 1486 goto phooey; 1487 if (prog->reganch & RE_USE_INTUIT) { 1488 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL); 1489 if (!s) 1490 goto phooey; 1491 } 1492 else 1493 s++; 1494 } 1495 } else { 1496 if (s > startpos) 1497 s--; 1498 while (s < end) { 1499 if (*s++ == '\n') { /* don't need PL_utf8skip here */ 1500 if (regtry(prog, s)) 1501 goto got_it; 1502 } 1503 } 1504 } 1505 } 1506 goto phooey; 1507 } else if (prog->reganch & ROPT_ANCH_GPOS) { 1508 if (regtry(prog, PL_reg_ganch)) 1509 goto got_it; 1510 goto phooey; 1511 } 1512 1513 /* Messy cases: unanchored match. */ 1514 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) { 1515 /* we have /x+whatever/ */ 1516 /* it must be a one character string (XXXX Except UTF?) */ 1517 char ch = SvPVX(prog->anchored_substr)[0]; 1518 #ifdef DEBUGGING 1519 int did_match = 0; 1520 #endif 1521 1522 if (UTF) { 1523 while (s < strend) { 1524 if (*s == ch) { 1525 DEBUG_r( did_match = 1 ); 1526 if (regtry(prog, s)) goto got_it; 1527 s += UTF8SKIP(s); 1528 while (s < strend && *s == ch) 1529 s += UTF8SKIP(s); 1530 } 1531 s += UTF8SKIP(s); 1532 } 1533 } 1534 else { 1535 while (s < strend) { 1536 if (*s == ch) { 1537 DEBUG_r( did_match = 1 ); 1538 if (regtry(prog, s)) goto got_it; 1539 s++; 1540 while (s < strend && *s == ch) 1541 s++; 1542 } 1543 s++; 1544 } 1545 } 1546 DEBUG_r(did_match || 1547 PerlIO_printf(Perl_debug_log, 1548 "Did not find anchored character...\n")); 1549 } 1550 /*SUPPRESS 560*/ 1551 else if (prog->anchored_substr != Nullsv 1552 || (prog->float_substr != Nullsv 1553 && prog->float_max_offset < strend - s)) { 1554 SV *must = prog->anchored_substr 1555 ? prog->anchored_substr : prog->float_substr; 1556 I32 back_max = 1557 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset; 1558 I32 back_min = 1559 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset; 1560 char *last = HOPc(strend, /* Cannot start after this */ 1561 -(I32)(CHR_SVLEN(must) 1562 - (SvTAIL(must) != 0) + back_min)); 1563 char *last1; /* Last position checked before */ 1564 #ifdef DEBUGGING 1565 int did_match = 0; 1566 #endif 1567 1568 if (s > PL_bostr) 1569 last1 = HOPc(s, -1); 1570 else 1571 last1 = s - 1; /* bogus */ 1572 1573 /* XXXX check_substr already used to find `s', can optimize if 1574 check_substr==must. */ 1575 scream_pos = -1; 1576 dontbother = end_shift; 1577 strend = HOPc(strend, -dontbother); 1578 while ( (s <= last) && 1579 ((flags & REXEC_SCREAM) 1580 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg, 1581 end_shift, &scream_pos, 0)) 1582 : (s = fbm_instr((unsigned char*)HOP(s, back_min), 1583 (unsigned char*)strend, must, 1584 PL_multiline ? FBMrf_MULTILINE : 0))) ) { 1585 DEBUG_r( did_match = 1 ); 1586 if (HOPc(s, -back_max) > last1) { 1587 last1 = HOPc(s, -back_min); 1588 s = HOPc(s, -back_max); 1589 } 1590 else { 1591 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1; 1592 1593 last1 = HOPc(s, -back_min); 1594 s = t; 1595 } 1596 if (UTF) { 1597 while (s <= last1) { 1598 if (regtry(prog, s)) 1599 goto got_it; 1600 s += UTF8SKIP(s); 1601 } 1602 } 1603 else { 1604 while (s <= last1) { 1605 if (regtry(prog, s)) 1606 goto got_it; 1607 s++; 1608 } 1609 } 1610 } 1611 DEBUG_r(did_match || 1612 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n", 1613 ((must == prog->anchored_substr) 1614 ? "anchored" : "floating"), 1615 PL_colors[0], 1616 (int)(SvCUR(must) - (SvTAIL(must)!=0)), 1617 SvPVX(must), 1618 PL_colors[1], (SvTAIL(must) ? "$" : ""))); 1619 goto phooey; 1620 } 1621 else if ((c = prog->regstclass)) { 1622 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT) 1623 /* don't bother with what can't match */ 1624 strend = HOPc(strend, -(minlen - 1)); 1625 if (find_byclass(prog, c, s, strend, startpos, 0)) 1626 goto got_it; 1627 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n")); 1628 } 1629 else { 1630 dontbother = 0; 1631 if (prog->float_substr != Nullsv) { /* Trim the end. */ 1632 char *last; 1633 1634 if (flags & REXEC_SCREAM) { 1635 last = screaminstr(sv, prog->float_substr, s - strbeg, 1636 end_shift, &scream_pos, 1); /* last one */ 1637 if (!last) 1638 last = scream_olds; /* Only one occurence. */ 1639 } 1640 else { 1641 STRLEN len; 1642 char *little = SvPV(prog->float_substr, len); 1643 1644 if (SvTAIL(prog->float_substr)) { 1645 if (memEQ(strend - len + 1, little, len - 1)) 1646 last = strend - len + 1; 1647 else if (!PL_multiline) 1648 last = memEQ(strend - len, little, len) 1649 ? strend - len : Nullch; 1650 else 1651 goto find_last; 1652 } else { 1653 find_last: 1654 if (len) 1655 last = rninstr(s, strend, little, little + len); 1656 else 1657 last = strend; /* matching `$' */ 1658 } 1659 } 1660 if (last == NULL) { 1661 DEBUG_r(PerlIO_printf(Perl_debug_log, 1662 "%sCan't trim the tail, match fails (should not happen)%s\n", 1663 PL_colors[4],PL_colors[5])); 1664 goto phooey; /* Should not happen! */ 1665 } 1666 dontbother = strend - last + prog->float_min_offset; 1667 } 1668 if (minlen && (dontbother < minlen)) 1669 dontbother = minlen - 1; 1670 strend -= dontbother; /* this one's always in bytes! */ 1671 /* We don't know much -- general case. */ 1672 if (UTF) { 1673 for (;;) { 1674 if (regtry(prog, s)) 1675 goto got_it; 1676 if (s >= strend) 1677 break; 1678 s += UTF8SKIP(s); 1679 }; 1680 } 1681 else { 1682 do { 1683 if (regtry(prog, s)) 1684 goto got_it; 1685 } while (s++ < strend); 1686 } 1687 } 1688 1689 /* Failure. */ 1690 goto phooey; 1691 1692 got_it: 1693 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted); 1694 1695 if (PL_reg_eval_set) { 1696 /* Preserve the current value of $^R */ 1697 if (oreplsv != GvSV(PL_replgv)) 1698 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is 1699 restored, the value remains 1700 the same. */ 1701 restore_pos(aTHXo_ 0); 1702 } 1703 1704 /* make sure $`, $&, $', and $digit will work later */ 1705 if ( !(flags & REXEC_NOT_FIRST) ) { 1706 if (RX_MATCH_COPIED(prog)) { 1707 Safefree(prog->subbeg); 1708 RX_MATCH_COPIED_off(prog); 1709 } 1710 if (flags & REXEC_COPY_STR) { 1711 I32 i = PL_regeol - startpos + (stringarg - strbeg); 1712 1713 s = savepvn(strbeg, i); 1714 prog->subbeg = s; 1715 prog->sublen = i; 1716 RX_MATCH_COPIED_on(prog); 1717 } 1718 else { 1719 prog->subbeg = strbeg; 1720 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */ 1721 } 1722 } 1723 1724 return 1; 1725 1726 phooey: 1727 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n", 1728 PL_colors[4],PL_colors[5])); 1729 if (PL_reg_eval_set) 1730 restore_pos(aTHXo_ 0); 1731 return 0; 1732 } 1733 1734 /* 1735 - regtry - try match at specific point 1736 */ 1737 STATIC I32 /* 0 failure, 1 success */ 1738 S_regtry(pTHX_ regexp *prog, char *startpos) 1739 { 1740 register I32 i; 1741 register I32 *sp; 1742 register I32 *ep; 1743 CHECKPOINT lastcp; 1744 1745 #ifdef DEBUGGING 1746 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */ 1747 #endif 1748 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) { 1749 MAGIC *mg; 1750 1751 PL_reg_eval_set = RS_init; 1752 DEBUG_r(DEBUG_s( 1753 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n", 1754 (IV)(PL_stack_sp - PL_stack_base)); 1755 )); 1756 SAVEI32(cxstack[cxstack_ix].blk_oldsp); 1757 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base; 1758 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */ 1759 SAVETMPS; 1760 /* Apparently this is not needed, judging by wantarray. */ 1761 /* SAVEI8(cxstack[cxstack_ix].blk_gimme); 1762 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */ 1763 1764 if (PL_reg_sv) { 1765 /* Make $_ available to executed code. */ 1766 if (PL_reg_sv != DEFSV) { 1767 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */ 1768 SAVESPTR(DEFSV); 1769 DEFSV = PL_reg_sv; 1770 } 1771 1772 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv) 1773 && (mg = mg_find(PL_reg_sv, 'g')))) { 1774 /* prepare for quick setting of pos */ 1775 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0); 1776 mg = mg_find(PL_reg_sv, 'g'); 1777 mg->mg_len = -1; 1778 } 1779 PL_reg_magic = mg; 1780 PL_reg_oldpos = mg->mg_len; 1781 SAVEDESTRUCTOR_X(restore_pos, 0); 1782 } 1783 if (!PL_reg_curpm) 1784 Newz(22,PL_reg_curpm, 1, PMOP); 1785 PL_reg_curpm->op_pmregexp = prog; 1786 PL_reg_oldcurpm = PL_curpm; 1787 PL_curpm = PL_reg_curpm; 1788 if (RX_MATCH_COPIED(prog)) { 1789 /* Here is a serious problem: we cannot rewrite subbeg, 1790 since it may be needed if this match fails. Thus 1791 $` inside (?{}) could fail... */ 1792 PL_reg_oldsaved = prog->subbeg; 1793 PL_reg_oldsavedlen = prog->sublen; 1794 RX_MATCH_COPIED_off(prog); 1795 } 1796 else 1797 PL_reg_oldsaved = Nullch; 1798 prog->subbeg = PL_bostr; 1799 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */ 1800 } 1801 prog->startp[0] = startpos - PL_bostr; 1802 PL_reginput = startpos; 1803 PL_regstartp = prog->startp; 1804 PL_regendp = prog->endp; 1805 PL_reglastparen = &prog->lastparen; 1806 prog->lastparen = 0; 1807 PL_regsize = 0; 1808 DEBUG_r(PL_reg_starttry = startpos); 1809 if (PL_reg_start_tmpl <= prog->nparens) { 1810 PL_reg_start_tmpl = prog->nparens*3/2 + 3; 1811 if(PL_reg_start_tmp) 1812 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*); 1813 else 1814 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*); 1815 } 1816 1817 /* XXXX What this code is doing here?!!! There should be no need 1818 to do this again and again, PL_reglastparen should take care of 1819 this! --ilya*/ 1820 1821 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code. 1822 * Actually, the code in regcppop() (which Ilya may be meaning by 1823 * PL_reglastparen), is not needed at all by the test suite 1824 * (op/regexp, op/pat, op/split), but that code is needed, oddly 1825 * enough, for building DynaLoader, or otherwise this 1826 * "Error: '*' not in typemap in DynaLoader.xs, line 164" 1827 * will happen. Meanwhile, this code *is* needed for the 1828 * above-mentioned test suite tests to succeed. The common theme 1829 * on those tests seems to be returning null fields from matches. 1830 * --jhi */ 1831 #if 1 1832 sp = prog->startp; 1833 ep = prog->endp; 1834 if (prog->nparens) { 1835 for (i = prog->nparens; i > *PL_reglastparen; i--) { 1836 *++sp = -1; 1837 *++ep = -1; 1838 } 1839 } 1840 #endif 1841 REGCP_SET(lastcp); 1842 if (regmatch(prog->program + 1)) { 1843 prog->endp[0] = PL_reginput - PL_bostr; 1844 return 1; 1845 } 1846 REGCP_UNWIND(lastcp); 1847 return 0; 1848 } 1849 1850 #define RE_UNWIND_BRANCH 1 1851 #define RE_UNWIND_BRANCHJ 2 1852 1853 union re_unwind_t; 1854 1855 typedef struct { /* XX: makes sense to enlarge it... */ 1856 I32 type; 1857 I32 prev; 1858 CHECKPOINT lastcp; 1859 } re_unwind_generic_t; 1860 1861 typedef struct { 1862 I32 type; 1863 I32 prev; 1864 CHECKPOINT lastcp; 1865 I32 lastparen; 1866 regnode *next; 1867 char *locinput; 1868 I32 nextchr; 1869 #ifdef DEBUGGING 1870 int regindent; 1871 #endif 1872 } re_unwind_branch_t; 1873 1874 typedef union re_unwind_t { 1875 I32 type; 1876 re_unwind_generic_t generic; 1877 re_unwind_branch_t branch; 1878 } re_unwind_t; 1879 1880 /* 1881 - regmatch - main matching routine 1882 * 1883 * Conceptually the strategy is simple: check to see whether the current 1884 * node matches, call self recursively to see whether the rest matches, 1885 * and then act accordingly. In practice we make some effort to avoid 1886 * recursion, in particular by going through "ordinary" nodes (that don't 1887 * need to know whether the rest of the match failed) by a loop instead of 1888 * by recursion. 1889 */ 1890 /* [lwall] I've hoisted the register declarations to the outer block in order to 1891 * maybe save a little bit of pushing and popping on the stack. It also takes 1892 * advantage of machines that use a register save mask on subroutine entry. 1893 */ 1894 STATIC I32 /* 0 failure, 1 success */ 1895 S_regmatch(pTHX_ regnode *prog) 1896 { 1897 register regnode *scan; /* Current node. */ 1898 regnode *next; /* Next node. */ 1899 regnode *inner; /* Next node in internal branch. */ 1900 register I32 nextchr; /* renamed nextchr - nextchar colides with 1901 function of same name */ 1902 register I32 n; /* no or next */ 1903 register I32 ln; /* len or last */ 1904 register char *s; /* operand or save */ 1905 register char *locinput = PL_reginput; 1906 register I32 c1, c2, paren; /* case fold search, parenth */ 1907 int minmod = 0, sw = 0, logical = 0; 1908 I32 unwind = 0; 1909 I32 firstcp = PL_savestack_ix; 1910 1911 #ifdef DEBUGGING 1912 PL_regindent++; 1913 #endif 1914 1915 /* Note that nextchr is a byte even in UTF */ 1916 nextchr = UCHARAT(locinput); 1917 scan = prog; 1918 while (scan != NULL) { 1919 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO) 1920 #if 1 1921 # define sayYES goto yes 1922 # define sayNO goto no 1923 # define sayYES_FINAL goto yes_final 1924 # define sayYES_LOUD goto yes_loud 1925 # define sayNO_FINAL goto no_final 1926 # define sayNO_SILENT goto do_no 1927 # define saySAME(x) if (x) goto yes; else goto no 1928 # define REPORT_CODE_OFF 24 1929 #else 1930 # define sayYES return 1 1931 # define sayNO return 0 1932 # define sayYES_FINAL return 1 1933 # define sayYES_LOUD return 1 1934 # define sayNO_FINAL return 0 1935 # define sayNO_SILENT return 0 1936 # define saySAME(x) return x 1937 #endif 1938 DEBUG_r( { 1939 SV *prop = sv_newmortal(); 1940 int docolor = *PL_colors[0]; 1941 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */ 1942 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput); 1943 /* The part of the string before starttry has one color 1944 (pref0_len chars), between starttry and current 1945 position another one (pref_len - pref0_len chars), 1946 after the current position the third one. 1947 We assume that pref0_len <= pref_len, otherwise we 1948 decrease pref0_len. */ 1949 int pref_len = (locinput - PL_bostr > (5 + taill) - l 1950 ? (5 + taill) - l : locinput - PL_bostr); 1951 int pref0_len = pref_len - (locinput - PL_reg_starttry); 1952 1953 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput) 1954 l = ( PL_regeol - locinput > (5 + taill) - pref_len 1955 ? (5 + taill) - pref_len : PL_regeol - locinput); 1956 if (pref0_len < 0) 1957 pref0_len = 0; 1958 if (pref0_len > pref_len) 1959 pref0_len = pref_len; 1960 regprop(prop, scan); 1961 PerlIO_printf(Perl_debug_log, 1962 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n", 1963 (IV)(locinput - PL_bostr), 1964 PL_colors[4], pref0_len, 1965 locinput - pref_len, PL_colors[5], 1966 PL_colors[2], pref_len - pref0_len, 1967 locinput - pref_len + pref0_len, PL_colors[3], 1968 (docolor ? "" : "> <"), 1969 PL_colors[0], l, locinput, PL_colors[1], 1970 15 - l - pref_len + 1, 1971 "", 1972 (IV)(scan - PL_regprogram), PL_regindent*2, "", 1973 SvPVX(prop)); 1974 } ); 1975 1976 next = scan + NEXT_OFF(scan); 1977 if (next == scan) 1978 next = NULL; 1979 1980 switch (OP(scan)) { 1981 case BOL: 1982 if (locinput == PL_bostr 1983 ? PL_regprev == '\n' 1984 : (PL_multiline && 1985 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') ) 1986 { 1987 /* regtill = regbol; */ 1988 break; 1989 } 1990 sayNO; 1991 case MBOL: 1992 if (locinput == PL_bostr 1993 ? PL_regprev == '\n' 1994 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') ) 1995 { 1996 break; 1997 } 1998 sayNO; 1999 case SBOL: 2000 if (locinput == PL_bostr) 2001 break; 2002 sayNO; 2003 case GPOS: 2004 if (locinput == PL_reg_ganch) 2005 break; 2006 sayNO; 2007 case EOL: 2008 if (PL_multiline) 2009 goto meol; 2010 else 2011 goto seol; 2012 case MEOL: 2013 meol: 2014 if ((nextchr || locinput < PL_regeol) && nextchr != '\n') 2015 sayNO; 2016 break; 2017 case SEOL: 2018 seol: 2019 if ((nextchr || locinput < PL_regeol) && nextchr != '\n') 2020 sayNO; 2021 if (PL_regeol - locinput > 1) 2022 sayNO; 2023 break; 2024 case EOS: 2025 if (PL_regeol != locinput) 2026 sayNO; 2027 break; 2028 case SANYUTF8: 2029 if (nextchr & 0x80) { 2030 locinput += PL_utf8skip[nextchr]; 2031 if (locinput > PL_regeol) 2032 sayNO; 2033 nextchr = UCHARAT(locinput); 2034 break; 2035 } 2036 if (!nextchr && locinput >= PL_regeol) 2037 sayNO; 2038 nextchr = UCHARAT(++locinput); 2039 break; 2040 case SANY: 2041 if (!nextchr && locinput >= PL_regeol) 2042 sayNO; 2043 nextchr = UCHARAT(++locinput); 2044 break; 2045 case ANYUTF8: 2046 if (nextchr & 0x80) { 2047 locinput += PL_utf8skip[nextchr]; 2048 if (locinput > PL_regeol) 2049 sayNO; 2050 nextchr = UCHARAT(locinput); 2051 break; 2052 } 2053 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n') 2054 sayNO; 2055 nextchr = UCHARAT(++locinput); 2056 break; 2057 case REG_ANY: 2058 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n') 2059 sayNO; 2060 nextchr = UCHARAT(++locinput); 2061 break; 2062 case EXACT: 2063 s = STRING(scan); 2064 ln = STR_LEN(scan); 2065 /* Inline the first character, for speed. */ 2066 if (UCHARAT(s) != nextchr) 2067 sayNO; 2068 if (PL_regeol - locinput < ln) 2069 sayNO; 2070 if (ln > 1 && memNE(s, locinput, ln)) 2071 sayNO; 2072 locinput += ln; 2073 nextchr = UCHARAT(locinput); 2074 break; 2075 case EXACTFL: 2076 PL_reg_flags |= RF_tainted; 2077 /* FALL THROUGH */ 2078 case EXACTF: 2079 s = STRING(scan); 2080 ln = STR_LEN(scan); 2081 2082 if (UTF) { 2083 char *l = locinput; 2084 char *e = s + ln; 2085 c1 = OP(scan) == EXACTF; 2086 while (s < e) { 2087 if (l >= PL_regeol) 2088 sayNO; 2089 if (utf8_to_uv((U8*)s, e - s, 0, 0) != 2090 (c1 ? 2091 toLOWER_utf8((U8*)l) : 2092 toLOWER_LC_utf8((U8*)l))) 2093 { 2094 sayNO; 2095 } 2096 s += UTF8SKIP(s); 2097 l += UTF8SKIP(l); 2098 } 2099 locinput = l; 2100 nextchr = UCHARAT(locinput); 2101 break; 2102 } 2103 2104 /* Inline the first character, for speed. */ 2105 if (UCHARAT(s) != nextchr && 2106 UCHARAT(s) != ((OP(scan) == EXACTF) 2107 ? PL_fold : PL_fold_locale)[nextchr]) 2108 sayNO; 2109 if (PL_regeol - locinput < ln) 2110 sayNO; 2111 if (ln > 1 && (OP(scan) == EXACTF 2112 ? ibcmp(s, locinput, ln) 2113 : ibcmp_locale(s, locinput, ln))) 2114 sayNO; 2115 locinput += ln; 2116 nextchr = UCHARAT(locinput); 2117 break; 2118 case ANYOFUTF8: 2119 if (!REGINCLASSUTF8(scan, (U8*)locinput)) 2120 sayNO; 2121 if (locinput >= PL_regeol) 2122 sayNO; 2123 locinput += PL_utf8skip[nextchr]; 2124 nextchr = UCHARAT(locinput); 2125 break; 2126 case ANYOF: 2127 if (nextchr < 0) 2128 nextchr = UCHARAT(locinput); 2129 if (!REGINCLASS(scan, nextchr)) 2130 sayNO; 2131 if (!nextchr && locinput >= PL_regeol) 2132 sayNO; 2133 nextchr = UCHARAT(++locinput); 2134 break; 2135 case ALNUML: 2136 PL_reg_flags |= RF_tainted; 2137 /* FALL THROUGH */ 2138 case ALNUM: 2139 if (!nextchr) 2140 sayNO; 2141 if (!(OP(scan) == ALNUM 2142 ? isALNUM(nextchr) : isALNUM_LC(nextchr))) 2143 sayNO; 2144 nextchr = UCHARAT(++locinput); 2145 break; 2146 case ALNUMLUTF8: 2147 PL_reg_flags |= RF_tainted; 2148 /* FALL THROUGH */ 2149 case ALNUMUTF8: 2150 if (!nextchr) 2151 sayNO; 2152 if (nextchr & 0x80) { 2153 if (!(OP(scan) == ALNUMUTF8 2154 ? swash_fetch(PL_utf8_alnum, (U8*)locinput) 2155 : isALNUM_LC_utf8((U8*)locinput))) 2156 { 2157 sayNO; 2158 } 2159 locinput += PL_utf8skip[nextchr]; 2160 nextchr = UCHARAT(locinput); 2161 break; 2162 } 2163 if (!(OP(scan) == ALNUMUTF8 2164 ? isALNUM(nextchr) : isALNUM_LC(nextchr))) 2165 sayNO; 2166 nextchr = UCHARAT(++locinput); 2167 break; 2168 case NALNUML: 2169 PL_reg_flags |= RF_tainted; 2170 /* FALL THROUGH */ 2171 case NALNUM: 2172 if (!nextchr && locinput >= PL_regeol) 2173 sayNO; 2174 if (OP(scan) == NALNUM 2175 ? isALNUM(nextchr) : isALNUM_LC(nextchr)) 2176 sayNO; 2177 nextchr = UCHARAT(++locinput); 2178 break; 2179 case NALNUMLUTF8: 2180 PL_reg_flags |= RF_tainted; 2181 /* FALL THROUGH */ 2182 case NALNUMUTF8: 2183 if (!nextchr && locinput >= PL_regeol) 2184 sayNO; 2185 if (nextchr & 0x80) { 2186 if (OP(scan) == NALNUMUTF8 2187 ? swash_fetch(PL_utf8_alnum, (U8*)locinput) 2188 : isALNUM_LC_utf8((U8*)locinput)) 2189 { 2190 sayNO; 2191 } 2192 locinput += PL_utf8skip[nextchr]; 2193 nextchr = UCHARAT(locinput); 2194 break; 2195 } 2196 if (OP(scan) == NALNUMUTF8 2197 ? isALNUM(nextchr) : isALNUM_LC(nextchr)) 2198 sayNO; 2199 nextchr = UCHARAT(++locinput); 2200 break; 2201 case BOUNDL: 2202 case NBOUNDL: 2203 PL_reg_flags |= RF_tainted; 2204 /* FALL THROUGH */ 2205 case BOUND: 2206 case NBOUND: 2207 /* was last char in word? */ 2208 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev; 2209 if (OP(scan) == BOUND || OP(scan) == NBOUND) { 2210 ln = isALNUM(ln); 2211 n = isALNUM(nextchr); 2212 } 2213 else { 2214 ln = isALNUM_LC(ln); 2215 n = isALNUM_LC(nextchr); 2216 } 2217 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL)) 2218 sayNO; 2219 break; 2220 case BOUNDLUTF8: 2221 case NBOUNDLUTF8: 2222 PL_reg_flags |= RF_tainted; 2223 /* FALL THROUGH */ 2224 case BOUNDUTF8: 2225 case NBOUNDUTF8: 2226 /* was last char in word? */ 2227 if (locinput == PL_regbol) 2228 ln = PL_regprev; 2229 else { 2230 U8 *r = reghop((U8*)locinput, -1); 2231 2232 ln = utf8_to_uv(r, s - (char*)r, 0, 0); 2233 } 2234 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) { 2235 ln = isALNUM_uni(ln); 2236 n = swash_fetch(PL_utf8_alnum, (U8*)locinput); 2237 } 2238 else { 2239 ln = isALNUM_LC_uni(ln); 2240 n = isALNUM_LC_utf8((U8*)locinput); 2241 } 2242 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8)) 2243 sayNO; 2244 break; 2245 case SPACEL: 2246 PL_reg_flags |= RF_tainted; 2247 /* FALL THROUGH */ 2248 case SPACE: 2249 if (!nextchr) 2250 sayNO; 2251 if (!(OP(scan) == SPACE 2252 ? isSPACE(nextchr) : isSPACE_LC(nextchr))) 2253 sayNO; 2254 nextchr = UCHARAT(++locinput); 2255 break; 2256 case SPACELUTF8: 2257 PL_reg_flags |= RF_tainted; 2258 /* FALL THROUGH */ 2259 case SPACEUTF8: 2260 if (!nextchr) 2261 sayNO; 2262 if (nextchr & 0x80) { 2263 if (!(OP(scan) == SPACEUTF8 2264 ? swash_fetch(PL_utf8_space, (U8*)locinput) 2265 : isSPACE_LC_utf8((U8*)locinput))) 2266 { 2267 sayNO; 2268 } 2269 locinput += PL_utf8skip[nextchr]; 2270 nextchr = UCHARAT(locinput); 2271 break; 2272 } 2273 if (!(OP(scan) == SPACEUTF8 2274 ? isSPACE(nextchr) : isSPACE_LC(nextchr))) 2275 sayNO; 2276 nextchr = UCHARAT(++locinput); 2277 break; 2278 case NSPACEL: 2279 PL_reg_flags |= RF_tainted; 2280 /* FALL THROUGH */ 2281 case NSPACE: 2282 if (!nextchr && locinput >= PL_regeol) 2283 sayNO; 2284 if (OP(scan) == NSPACE 2285 ? isSPACE(nextchr) : isSPACE_LC(nextchr)) 2286 sayNO; 2287 nextchr = UCHARAT(++locinput); 2288 break; 2289 case NSPACELUTF8: 2290 PL_reg_flags |= RF_tainted; 2291 /* FALL THROUGH */ 2292 case NSPACEUTF8: 2293 if (!nextchr && locinput >= PL_regeol) 2294 sayNO; 2295 if (nextchr & 0x80) { 2296 if (OP(scan) == NSPACEUTF8 2297 ? swash_fetch(PL_utf8_space, (U8*)locinput) 2298 : isSPACE_LC_utf8((U8*)locinput)) 2299 { 2300 sayNO; 2301 } 2302 locinput += PL_utf8skip[nextchr]; 2303 nextchr = UCHARAT(locinput); 2304 break; 2305 } 2306 if (OP(scan) == NSPACEUTF8 2307 ? isSPACE(nextchr) : isSPACE_LC(nextchr)) 2308 sayNO; 2309 nextchr = UCHARAT(++locinput); 2310 break; 2311 case DIGITL: 2312 PL_reg_flags |= RF_tainted; 2313 /* FALL THROUGH */ 2314 case DIGIT: 2315 if (!nextchr) 2316 sayNO; 2317 if (!(OP(scan) == DIGIT 2318 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))) 2319 sayNO; 2320 nextchr = UCHARAT(++locinput); 2321 break; 2322 case DIGITLUTF8: 2323 PL_reg_flags |= RF_tainted; 2324 /* FALL THROUGH */ 2325 case DIGITUTF8: 2326 if (!nextchr) 2327 sayNO; 2328 if (nextchr & 0x80) { 2329 if (!(OP(scan) == DIGITUTF8 2330 ? swash_fetch(PL_utf8_digit, (U8*)locinput) 2331 : isDIGIT_LC_utf8((U8*)locinput))) 2332 { 2333 sayNO; 2334 } 2335 locinput += PL_utf8skip[nextchr]; 2336 nextchr = UCHARAT(locinput); 2337 break; 2338 } 2339 if (!(OP(scan) == DIGITUTF8 2340 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))) 2341 sayNO; 2342 nextchr = UCHARAT(++locinput); 2343 break; 2344 case NDIGITL: 2345 PL_reg_flags |= RF_tainted; 2346 /* FALL THROUGH */ 2347 case NDIGIT: 2348 if (!nextchr && locinput >= PL_regeol) 2349 sayNO; 2350 if (OP(scan) == NDIGIT 2351 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)) 2352 sayNO; 2353 nextchr = UCHARAT(++locinput); 2354 break; 2355 case NDIGITLUTF8: 2356 PL_reg_flags |= RF_tainted; 2357 /* FALL THROUGH */ 2358 case NDIGITUTF8: 2359 if (!nextchr && locinput >= PL_regeol) 2360 sayNO; 2361 if (nextchr & 0x80) { 2362 if (OP(scan) == NDIGITUTF8 2363 ? swash_fetch(PL_utf8_digit, (U8*)locinput) 2364 : isDIGIT_LC_utf8((U8*)locinput)) 2365 { 2366 sayNO; 2367 } 2368 locinput += PL_utf8skip[nextchr]; 2369 nextchr = UCHARAT(locinput); 2370 break; 2371 } 2372 if (OP(scan) == NDIGITUTF8 2373 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)) 2374 sayNO; 2375 nextchr = UCHARAT(++locinput); 2376 break; 2377 case CLUMP: 2378 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput)) 2379 sayNO; 2380 locinput += PL_utf8skip[nextchr]; 2381 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput)) 2382 locinput += UTF8SKIP(locinput); 2383 if (locinput > PL_regeol) 2384 sayNO; 2385 nextchr = UCHARAT(locinput); 2386 break; 2387 case REFFL: 2388 PL_reg_flags |= RF_tainted; 2389 /* FALL THROUGH */ 2390 case REF: 2391 case REFF: 2392 n = ARG(scan); /* which paren pair */ 2393 ln = PL_regstartp[n]; 2394 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */ 2395 if (*PL_reglastparen < n || ln == -1) 2396 sayNO; /* Do not match unless seen CLOSEn. */ 2397 if (ln == PL_regendp[n]) 2398 break; 2399 2400 s = PL_bostr + ln; 2401 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */ 2402 char *l = locinput; 2403 char *e = PL_bostr + PL_regendp[n]; 2404 /* 2405 * Note that we can't do the "other character" lookup trick as 2406 * in the 8-bit case (no pun intended) because in Unicode we 2407 * have to map both upper and title case to lower case. 2408 */ 2409 if (OP(scan) == REFF) { 2410 while (s < e) { 2411 if (l >= PL_regeol) 2412 sayNO; 2413 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l)) 2414 sayNO; 2415 s += UTF8SKIP(s); 2416 l += UTF8SKIP(l); 2417 } 2418 } 2419 else { 2420 while (s < e) { 2421 if (l >= PL_regeol) 2422 sayNO; 2423 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l)) 2424 sayNO; 2425 s += UTF8SKIP(s); 2426 l += UTF8SKIP(l); 2427 } 2428 } 2429 locinput = l; 2430 nextchr = UCHARAT(locinput); 2431 break; 2432 } 2433 2434 /* Inline the first character, for speed. */ 2435 if (UCHARAT(s) != nextchr && 2436 (OP(scan) == REF || 2437 (UCHARAT(s) != ((OP(scan) == REFF 2438 ? PL_fold : PL_fold_locale)[nextchr])))) 2439 sayNO; 2440 ln = PL_regendp[n] - ln; 2441 if (locinput + ln > PL_regeol) 2442 sayNO; 2443 if (ln > 1 && (OP(scan) == REF 2444 ? memNE(s, locinput, ln) 2445 : (OP(scan) == REFF 2446 ? ibcmp(s, locinput, ln) 2447 : ibcmp_locale(s, locinput, ln)))) 2448 sayNO; 2449 locinput += ln; 2450 nextchr = UCHARAT(locinput); 2451 break; 2452 2453 case NOTHING: 2454 case TAIL: 2455 break; 2456 case BACK: 2457 break; 2458 case EVAL: 2459 { 2460 dSP; 2461 OP_4tree *oop = PL_op; 2462 COP *ocurcop = PL_curcop; 2463 SV **ocurpad = PL_curpad; 2464 SV *ret; 2465 2466 n = ARG(scan); 2467 PL_op = (OP_4tree*)PL_regdata->data[n]; 2468 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) ); 2469 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]); 2470 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr; 2471 2472 CALLRUNOPS(aTHX); /* Scalar context. */ 2473 SPAGAIN; 2474 ret = POPs; 2475 PUTBACK; 2476 2477 PL_op = oop; 2478 PL_curpad = ocurpad; 2479 PL_curcop = ocurcop; 2480 if (logical) { 2481 if (logical == 2) { /* Postponed subexpression. */ 2482 regexp *re; 2483 MAGIC *mg = Null(MAGIC*); 2484 re_cc_state state; 2485 CHECKPOINT cp, lastcp; 2486 2487 if(SvROK(ret) || SvRMAGICAL(ret)) { 2488 SV *sv = SvROK(ret) ? SvRV(ret) : ret; 2489 2490 if(SvMAGICAL(sv)) 2491 mg = mg_find(sv, 'r'); 2492 } 2493 if (mg) { 2494 re = (regexp *)mg->mg_obj; 2495 (void)ReREFCNT_inc(re); 2496 } 2497 else { 2498 STRLEN len; 2499 char *t = SvPV(ret, len); 2500 PMOP pm; 2501 char *oprecomp = PL_regprecomp; 2502 I32 osize = PL_regsize; 2503 I32 onpar = PL_regnpar; 2504 2505 pm.op_pmflags = 0; 2506 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0); 2507 re = CALLREGCOMP(aTHX_ t, t + len, &pm); 2508 if (!(SvFLAGS(ret) 2509 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY))) 2510 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0); 2511 PL_regprecomp = oprecomp; 2512 PL_regsize = osize; 2513 PL_regnpar = onpar; 2514 } 2515 DEBUG_r( 2516 PerlIO_printf(Perl_debug_log, 2517 "Entering embedded `%s%.60s%s%s'\n", 2518 PL_colors[0], 2519 re->precomp, 2520 PL_colors[1], 2521 (strlen(re->precomp) > 60 ? "..." : "")) 2522 ); 2523 state.node = next; 2524 state.prev = PL_reg_call_cc; 2525 state.cc = PL_regcc; 2526 state.re = PL_reg_re; 2527 2528 PL_regcc = 0; 2529 2530 cp = regcppush(0); /* Save *all* the positions. */ 2531 REGCP_SET(lastcp); 2532 cache_re(re); 2533 state.ss = PL_savestack_ix; 2534 *PL_reglastparen = 0; 2535 PL_reg_call_cc = &state; 2536 PL_reginput = locinput; 2537 2538 /* XXXX This is too dramatic a measure... */ 2539 PL_reg_maxiter = 0; 2540 2541 if (regmatch(re->program + 1)) { 2542 /* Even though we succeeded, we need to restore 2543 global variables, since we may be wrapped inside 2544 SUSPEND, thus the match may be not finished yet. */ 2545 2546 /* XXXX Do this only if SUSPENDed? */ 2547 PL_reg_call_cc = state.prev; 2548 PL_regcc = state.cc; 2549 PL_reg_re = state.re; 2550 cache_re(PL_reg_re); 2551 2552 /* XXXX This is too dramatic a measure... */ 2553 PL_reg_maxiter = 0; 2554 2555 /* These are needed even if not SUSPEND. */ 2556 ReREFCNT_dec(re); 2557 regcpblow(cp); 2558 sayYES; 2559 } 2560 ReREFCNT_dec(re); 2561 REGCP_UNWIND(lastcp); 2562 regcppop(); 2563 PL_reg_call_cc = state.prev; 2564 PL_regcc = state.cc; 2565 PL_reg_re = state.re; 2566 cache_re(PL_reg_re); 2567 2568 /* XXXX This is too dramatic a measure... */ 2569 PL_reg_maxiter = 0; 2570 2571 sayNO; 2572 } 2573 sw = SvTRUE(ret); 2574 logical = 0; 2575 } 2576 else 2577 sv_setsv(save_scalar(PL_replgv), ret); 2578 break; 2579 } 2580 case OPEN: 2581 n = ARG(scan); /* which paren pair */ 2582 PL_reg_start_tmp[n] = locinput; 2583 if (n > PL_regsize) 2584 PL_regsize = n; 2585 break; 2586 case CLOSE: 2587 n = ARG(scan); /* which paren pair */ 2588 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr; 2589 PL_regendp[n] = locinput - PL_bostr; 2590 if (n > *PL_reglastparen) 2591 *PL_reglastparen = n; 2592 break; 2593 case GROUPP: 2594 n = ARG(scan); /* which paren pair */ 2595 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1); 2596 break; 2597 case IFTHEN: 2598 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */ 2599 if (sw) 2600 next = NEXTOPER(NEXTOPER(scan)); 2601 else { 2602 next = scan + ARG(scan); 2603 if (OP(next) == IFTHEN) /* Fake one. */ 2604 next = NEXTOPER(NEXTOPER(next)); 2605 } 2606 break; 2607 case LOGICAL: 2608 logical = scan->flags; 2609 break; 2610 /******************************************************************* 2611 PL_regcc contains infoblock about the innermost (...)* loop, and 2612 a pointer to the next outer infoblock. 2613 2614 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM): 2615 2616 1) After matching X, regnode for CURLYX is processed; 2617 2618 2) This regnode creates infoblock on the stack, and calls 2619 regmatch() recursively with the starting point at WHILEM node; 2620 2621 3) Each hit of WHILEM node tries to match A and Z (in the order 2622 depending on the current iteration, min/max of {min,max} and 2623 greediness). The information about where are nodes for "A" 2624 and "Z" is read from the infoblock, as is info on how many times "A" 2625 was already matched, and greediness. 2626 2627 4) After A matches, the same WHILEM node is hit again. 2628 2629 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX 2630 of the same pair. Thus when WHILEM tries to match Z, it temporarily 2631 resets PL_regcc, since this Y(A)*Z can be a part of some other loop: 2632 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node 2633 of the external loop. 2634 2635 Currently present infoblocks form a tree with a stem formed by PL_curcc 2636 and whatever it mentions via ->next, and additional attached trees 2637 corresponding to temporarily unset infoblocks as in "5" above. 2638 2639 In the following picture infoblocks for outer loop of 2640 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block 2641 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed 2642 infoblocks are drawn below the "reset" infoblock. 2643 2644 In fact in the picture below we do not show failed matches for Z and T 2645 by WHILEM blocks. [We illustrate minimal matches, since for them it is 2646 more obvious *why* one needs to *temporary* unset infoblocks.] 2647 2648 Matched REx position InfoBlocks Comment 2649 (Y(A)*?Z)*?T x 2650 Y(A)*?Z)*?T x <- O 2651 Y (A)*?Z)*?T x <- O 2652 Y A)*?Z)*?T x <- O <- I 2653 YA )*?Z)*?T x <- O <- I 2654 YA A)*?Z)*?T x <- O <- I 2655 YAA )*?Z)*?T x <- O <- I 2656 YAA Z)*?T x <- O # Temporary unset I 2657 I 2658 2659 YAAZ Y(A)*?Z)*?T x <- O 2660 I 2661 2662 YAAZY (A)*?Z)*?T x <- O 2663 I 2664 2665 YAAZY A)*?Z)*?T x <- O <- I 2666 I 2667 2668 YAAZYA )*?Z)*?T x <- O <- I 2669 I 2670 2671 YAAZYA Z)*?T x <- O # Temporary unset I 2672 I,I 2673 2674 YAAZYAZ )*?T x <- O 2675 I,I 2676 2677 YAAZYAZ T x # Temporary unset O 2678 O 2679 I,I 2680 2681 YAAZYAZT x 2682 O 2683 I,I 2684 *******************************************************************/ 2685 case CURLYX: { 2686 CURCUR cc; 2687 CHECKPOINT cp = PL_savestack_ix; 2688 /* No need to save/restore up to this paren */ 2689 I32 parenfloor = scan->flags; 2690 2691 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */ 2692 next += ARG(next); 2693 cc.oldcc = PL_regcc; 2694 PL_regcc = &cc; 2695 /* XXXX Probably it is better to teach regpush to support 2696 parenfloor > PL_regsize... */ 2697 if (parenfloor > *PL_reglastparen) 2698 parenfloor = *PL_reglastparen; /* Pessimization... */ 2699 cc.parenfloor = parenfloor; 2700 cc.cur = -1; 2701 cc.min = ARG1(scan); 2702 cc.max = ARG2(scan); 2703 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS; 2704 cc.next = next; 2705 cc.minmod = minmod; 2706 cc.lastloc = 0; 2707 PL_reginput = locinput; 2708 n = regmatch(PREVOPER(next)); /* start on the WHILEM */ 2709 regcpblow(cp); 2710 PL_regcc = cc.oldcc; 2711 saySAME(n); 2712 } 2713 /* NOT REACHED */ 2714 case WHILEM: { 2715 /* 2716 * This is really hard to understand, because after we match 2717 * what we're trying to match, we must make sure the rest of 2718 * the REx is going to match for sure, and to do that we have 2719 * to go back UP the parse tree by recursing ever deeper. And 2720 * if it fails, we have to reset our parent's current state 2721 * that we can try again after backing off. 2722 */ 2723 2724 CHECKPOINT cp, lastcp; 2725 CURCUR* cc = PL_regcc; 2726 char *lastloc = cc->lastloc; /* Detection of 0-len. */ 2727 2728 n = cc->cur + 1; /* how many we know we matched */ 2729 PL_reginput = locinput; 2730 2731 DEBUG_r( 2732 PerlIO_printf(Perl_debug_log, 2733 "%*s %ld out of %ld..%ld cc=%lx\n", 2734 REPORT_CODE_OFF+PL_regindent*2, "", 2735 (long)n, (long)cc->min, 2736 (long)cc->max, (long)cc) 2737 ); 2738 2739 /* If degenerate scan matches "", assume scan done. */ 2740 2741 if (locinput == cc->lastloc && n >= cc->min) { 2742 PL_regcc = cc->oldcc; 2743 if (PL_regcc) 2744 ln = PL_regcc->cur; 2745 DEBUG_r( 2746 PerlIO_printf(Perl_debug_log, 2747 "%*s empty match detected, try continuation...\n", 2748 REPORT_CODE_OFF+PL_regindent*2, "") 2749 ); 2750 if (regmatch(cc->next)) 2751 sayYES; 2752 if (PL_regcc) 2753 PL_regcc->cur = ln; 2754 PL_regcc = cc; 2755 sayNO; 2756 } 2757 2758 /* First just match a string of min scans. */ 2759 2760 if (n < cc->min) { 2761 cc->cur = n; 2762 cc->lastloc = locinput; 2763 if (regmatch(cc->scan)) 2764 sayYES; 2765 cc->cur = n - 1; 2766 cc->lastloc = lastloc; 2767 sayNO; 2768 } 2769 2770 if (scan->flags) { 2771 /* Check whether we already were at this position. 2772 Postpone detection until we know the match is not 2773 *that* much linear. */ 2774 if (!PL_reg_maxiter) { 2775 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4); 2776 PL_reg_leftiter = PL_reg_maxiter; 2777 } 2778 if (PL_reg_leftiter-- == 0) { 2779 I32 size = (PL_reg_maxiter + 7)/8; 2780 if (PL_reg_poscache) { 2781 if (PL_reg_poscache_size < size) { 2782 Renew(PL_reg_poscache, size, char); 2783 PL_reg_poscache_size = size; 2784 } 2785 Zero(PL_reg_poscache, size, char); 2786 } 2787 else { 2788 PL_reg_poscache_size = size; 2789 Newz(29, PL_reg_poscache, size, char); 2790 } 2791 DEBUG_r( 2792 PerlIO_printf(Perl_debug_log, 2793 "%sDetected a super-linear match, switching on caching%s...\n", 2794 PL_colors[4], PL_colors[5]) 2795 ); 2796 } 2797 if (PL_reg_leftiter < 0) { 2798 I32 o = locinput - PL_bostr, b; 2799 2800 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4); 2801 b = o % 8; 2802 o /= 8; 2803 if (PL_reg_poscache[o] & (1<<b)) { 2804 DEBUG_r( 2805 PerlIO_printf(Perl_debug_log, 2806 "%*s already tried at this position...\n", 2807 REPORT_CODE_OFF+PL_regindent*2, "") 2808 ); 2809 sayNO_SILENT; 2810 } 2811 PL_reg_poscache[o] |= (1<<b); 2812 } 2813 } 2814 2815 /* Prefer next over scan for minimal matching. */ 2816 2817 if (cc->minmod) { 2818 PL_regcc = cc->oldcc; 2819 if (PL_regcc) 2820 ln = PL_regcc->cur; 2821 cp = regcppush(cc->parenfloor); 2822 REGCP_SET(lastcp); 2823 if (regmatch(cc->next)) { 2824 regcpblow(cp); 2825 sayYES; /* All done. */ 2826 } 2827 REGCP_UNWIND(lastcp); 2828 regcppop(); 2829 if (PL_regcc) 2830 PL_regcc->cur = ln; 2831 PL_regcc = cc; 2832 2833 if (n >= cc->max) { /* Maximum greed exceeded? */ 2834 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY 2835 && !(PL_reg_flags & RF_warned)) { 2836 PL_reg_flags |= RF_warned; 2837 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded", 2838 "Complex regular subexpression recursion", 2839 REG_INFTY - 1); 2840 } 2841 sayNO; 2842 } 2843 2844 DEBUG_r( 2845 PerlIO_printf(Perl_debug_log, 2846 "%*s trying longer...\n", 2847 REPORT_CODE_OFF+PL_regindent*2, "") 2848 ); 2849 /* Try scanning more and see if it helps. */ 2850 PL_reginput = locinput; 2851 cc->cur = n; 2852 cc->lastloc = locinput; 2853 cp = regcppush(cc->parenfloor); 2854 REGCP_SET(lastcp); 2855 if (regmatch(cc->scan)) { 2856 regcpblow(cp); 2857 sayYES; 2858 } 2859 REGCP_UNWIND(lastcp); 2860 regcppop(); 2861 cc->cur = n - 1; 2862 cc->lastloc = lastloc; 2863 sayNO; 2864 } 2865 2866 /* Prefer scan over next for maximal matching. */ 2867 2868 if (n < cc->max) { /* More greed allowed? */ 2869 cp = regcppush(cc->parenfloor); 2870 cc->cur = n; 2871 cc->lastloc = locinput; 2872 REGCP_SET(lastcp); 2873 if (regmatch(cc->scan)) { 2874 regcpblow(cp); 2875 sayYES; 2876 } 2877 REGCP_UNWIND(lastcp); 2878 regcppop(); /* Restore some previous $<digit>s? */ 2879 PL_reginput = locinput; 2880 DEBUG_r( 2881 PerlIO_printf(Perl_debug_log, 2882 "%*s failed, try continuation...\n", 2883 REPORT_CODE_OFF+PL_regindent*2, "") 2884 ); 2885 } 2886 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY 2887 && !(PL_reg_flags & RF_warned)) { 2888 PL_reg_flags |= RF_warned; 2889 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded", 2890 "Complex regular subexpression recursion", 2891 REG_INFTY - 1); 2892 } 2893 2894 /* Failed deeper matches of scan, so see if this one works. */ 2895 PL_regcc = cc->oldcc; 2896 if (PL_regcc) 2897 ln = PL_regcc->cur; 2898 if (regmatch(cc->next)) 2899 sayYES; 2900 if (PL_regcc) 2901 PL_regcc->cur = ln; 2902 PL_regcc = cc; 2903 cc->cur = n - 1; 2904 cc->lastloc = lastloc; 2905 sayNO; 2906 } 2907 /* NOT REACHED */ 2908 case BRANCHJ: 2909 next = scan + ARG(scan); 2910 if (next == scan) 2911 next = NULL; 2912 inner = NEXTOPER(NEXTOPER(scan)); 2913 goto do_branch; 2914 case BRANCH: 2915 inner = NEXTOPER(scan); 2916 do_branch: 2917 { 2918 CHECKPOINT lastcp; 2919 c1 = OP(scan); 2920 if (OP(next) != c1) /* No choice. */ 2921 next = inner; /* Avoid recursion. */ 2922 else { 2923 I32 lastparen = *PL_reglastparen; 2924 I32 unwind1; 2925 re_unwind_branch_t *uw; 2926 2927 /* Put unwinding data on stack */ 2928 unwind1 = SSNEWt(1,re_unwind_branch_t); 2929 uw = SSPTRt(unwind1,re_unwind_branch_t); 2930 uw->prev = unwind; 2931 unwind = unwind1; 2932 uw->type = ((c1 == BRANCH) 2933 ? RE_UNWIND_BRANCH 2934 : RE_UNWIND_BRANCHJ); 2935 uw->lastparen = lastparen; 2936 uw->next = next; 2937 uw->locinput = locinput; 2938 uw->nextchr = nextchr; 2939 #ifdef DEBUGGING 2940 uw->regindent = ++PL_regindent; 2941 #endif 2942 2943 REGCP_SET(uw->lastcp); 2944 2945 /* Now go into the first branch */ 2946 next = inner; 2947 } 2948 } 2949 break; 2950 case MINMOD: 2951 minmod = 1; 2952 break; 2953 case CURLYM: 2954 { 2955 I32 l = 0; 2956 CHECKPOINT lastcp; 2957 2958 /* We suppose that the next guy does not need 2959 backtracking: in particular, it is of constant length, 2960 and has no parenths to influence future backrefs. */ 2961 ln = ARG1(scan); /* min to match */ 2962 n = ARG2(scan); /* max to match */ 2963 paren = scan->flags; 2964 if (paren) { 2965 if (paren > PL_regsize) 2966 PL_regsize = paren; 2967 if (paren > *PL_reglastparen) 2968 *PL_reglastparen = paren; 2969 } 2970 scan = NEXTOPER(scan) + NODE_STEP_REGNODE; 2971 if (paren) 2972 scan += NEXT_OFF(scan); /* Skip former OPEN. */ 2973 PL_reginput = locinput; 2974 if (minmod) { 2975 minmod = 0; 2976 if (ln && regrepeat_hard(scan, ln, &l) < ln) 2977 sayNO; 2978 if (ln && l == 0 && n >= ln 2979 /* In fact, this is tricky. If paren, then the 2980 fact that we did/didnot match may influence 2981 future execution. */ 2982 && !(paren && ln == 0)) 2983 ln = n; 2984 locinput = PL_reginput; 2985 if (PL_regkind[(U8)OP(next)] == EXACT) { 2986 c1 = (U8)*STRING(next); 2987 if (OP(next) == EXACTF) 2988 c2 = PL_fold[c1]; 2989 else if (OP(next) == EXACTFL) 2990 c2 = PL_fold_locale[c1]; 2991 else 2992 c2 = c1; 2993 } 2994 else 2995 c1 = c2 = -1000; 2996 REGCP_SET(lastcp); 2997 /* This may be improved if l == 0. */ 2998 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */ 2999 /* If it could work, try it. */ 3000 if (c1 == -1000 || 3001 UCHARAT(PL_reginput) == c1 || 3002 UCHARAT(PL_reginput) == c2) 3003 { 3004 if (paren) { 3005 if (n) { 3006 PL_regstartp[paren] = 3007 HOPc(PL_reginput, -l) - PL_bostr; 3008 PL_regendp[paren] = PL_reginput - PL_bostr; 3009 } 3010 else 3011 PL_regendp[paren] = -1; 3012 } 3013 if (regmatch(next)) 3014 sayYES; 3015 REGCP_UNWIND(lastcp); 3016 } 3017 /* Couldn't or didn't -- move forward. */ 3018 PL_reginput = locinput; 3019 if (regrepeat_hard(scan, 1, &l)) { 3020 ln++; 3021 locinput = PL_reginput; 3022 } 3023 else 3024 sayNO; 3025 } 3026 } 3027 else { 3028 n = regrepeat_hard(scan, n, &l); 3029 if (n != 0 && l == 0 3030 /* In fact, this is tricky. If paren, then the 3031 fact that we did/didnot match may influence 3032 future execution. */ 3033 && !(paren && ln == 0)) 3034 ln = n; 3035 locinput = PL_reginput; 3036 DEBUG_r( 3037 PerlIO_printf(Perl_debug_log, 3038 "%*s matched %"IVdf" times, len=%"IVdf"...\n", 3039 (int)(REPORT_CODE_OFF+PL_regindent*2), "", 3040 (IV) n, (IV)l) 3041 ); 3042 if (n >= ln) { 3043 if (PL_regkind[(U8)OP(next)] == EXACT) { 3044 c1 = (U8)*STRING(next); 3045 if (OP(next) == EXACTF) 3046 c2 = PL_fold[c1]; 3047 else if (OP(next) == EXACTFL) 3048 c2 = PL_fold_locale[c1]; 3049 else 3050 c2 = c1; 3051 } 3052 else 3053 c1 = c2 = -1000; 3054 } 3055 REGCP_SET(lastcp); 3056 while (n >= ln) { 3057 /* If it could work, try it. */ 3058 if (c1 == -1000 || 3059 UCHARAT(PL_reginput) == c1 || 3060 UCHARAT(PL_reginput) == c2) 3061 { 3062 DEBUG_r( 3063 PerlIO_printf(Perl_debug_log, 3064 "%*s trying tail with n=%"IVdf"...\n", 3065 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n) 3066 ); 3067 if (paren) { 3068 if (n) { 3069 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr; 3070 PL_regendp[paren] = PL_reginput - PL_bostr; 3071 } 3072 else 3073 PL_regendp[paren] = -1; 3074 } 3075 if (regmatch(next)) 3076 sayYES; 3077 REGCP_UNWIND(lastcp); 3078 } 3079 /* Couldn't or didn't -- back up. */ 3080 n--; 3081 locinput = HOPc(locinput, -l); 3082 PL_reginput = locinput; 3083 } 3084 } 3085 sayNO; 3086 break; 3087 } 3088 case CURLYN: 3089 paren = scan->flags; /* Which paren to set */ 3090 if (paren > PL_regsize) 3091 PL_regsize = paren; 3092 if (paren > *PL_reglastparen) 3093 *PL_reglastparen = paren; 3094 ln = ARG1(scan); /* min to match */ 3095 n = ARG2(scan); /* max to match */ 3096 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE); 3097 goto repeat; 3098 case CURLY: 3099 paren = 0; 3100 ln = ARG1(scan); /* min to match */ 3101 n = ARG2(scan); /* max to match */ 3102 scan = NEXTOPER(scan) + NODE_STEP_REGNODE; 3103 goto repeat; 3104 case STAR: 3105 ln = 0; 3106 n = REG_INFTY; 3107 scan = NEXTOPER(scan); 3108 paren = 0; 3109 goto repeat; 3110 case PLUS: 3111 ln = 1; 3112 n = REG_INFTY; 3113 scan = NEXTOPER(scan); 3114 paren = 0; 3115 repeat: 3116 /* 3117 * Lookahead to avoid useless match attempts 3118 * when we know what character comes next. 3119 */ 3120 if (PL_regkind[(U8)OP(next)] == EXACT) { 3121 c1 = (U8)*STRING(next); 3122 if (OP(next) == EXACTF) 3123 c2 = PL_fold[c1]; 3124 else if (OP(next) == EXACTFL) 3125 c2 = PL_fold_locale[c1]; 3126 else 3127 c2 = c1; 3128 } 3129 else 3130 c1 = c2 = -1000; 3131 PL_reginput = locinput; 3132 if (minmod) { 3133 CHECKPOINT lastcp; 3134 minmod = 0; 3135 if (ln && regrepeat(scan, ln) < ln) 3136 sayNO; 3137 locinput = PL_reginput; 3138 REGCP_SET(lastcp); 3139 if (c1 != -1000) { 3140 char *e = locinput + n - ln; /* Should not check after this */ 3141 char *old = locinput; 3142 3143 if (e >= PL_regeol || (n == REG_INFTY)) 3144 e = PL_regeol - 1; 3145 while (1) { 3146 /* Find place 'next' could work */ 3147 if (c1 == c2) { 3148 while (locinput <= e && *locinput != c1) 3149 locinput++; 3150 } else { 3151 while (locinput <= e 3152 && *locinput != c1 3153 && *locinput != c2) 3154 locinput++; 3155 } 3156 if (locinput > e) 3157 sayNO; 3158 /* PL_reginput == old now */ 3159 if (locinput != old) { 3160 ln = 1; /* Did some */ 3161 if (regrepeat(scan, locinput - old) < 3162 locinput - old) 3163 sayNO; 3164 } 3165 /* PL_reginput == locinput now */ 3166 TRYPAREN(paren, ln, locinput); 3167 PL_reginput = locinput; /* Could be reset... */ 3168 REGCP_UNWIND(lastcp); 3169 /* Couldn't or didn't -- move forward. */ 3170 old = locinput++; 3171 } 3172 } 3173 else 3174 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */ 3175 /* If it could work, try it. */ 3176 if (c1 == -1000 || 3177 UCHARAT(PL_reginput) == c1 || 3178 UCHARAT(PL_reginput) == c2) 3179 { 3180 TRYPAREN(paren, n, PL_reginput); 3181 REGCP_UNWIND(lastcp); 3182 } 3183 /* Couldn't or didn't -- move forward. */ 3184 PL_reginput = locinput; 3185 if (regrepeat(scan, 1)) { 3186 ln++; 3187 locinput = PL_reginput; 3188 } 3189 else 3190 sayNO; 3191 } 3192 } 3193 else { 3194 CHECKPOINT lastcp; 3195 n = regrepeat(scan, n); 3196 locinput = PL_reginput; 3197 if (ln < n && PL_regkind[(U8)OP(next)] == EOL && 3198 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) { 3199 ln = n; /* why back off? */ 3200 /* ...because $ and \Z can match before *and* after 3201 newline at the end. Consider "\n\n" =~ /\n+\Z\n/. 3202 We should back off by one in this case. */ 3203 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS) 3204 ln--; 3205 } 3206 REGCP_SET(lastcp); 3207 if (paren) { 3208 while (n >= ln) { 3209 /* If it could work, try it. */ 3210 if (c1 == -1000 || 3211 UCHARAT(PL_reginput) == c1 || 3212 UCHARAT(PL_reginput) == c2) 3213 { 3214 TRYPAREN(paren, n, PL_reginput); 3215 REGCP_UNWIND(lastcp); 3216 } 3217 /* Couldn't or didn't -- back up. */ 3218 n--; 3219 PL_reginput = locinput = HOPc(locinput, -1); 3220 } 3221 } 3222 else { 3223 while (n >= ln) { 3224 /* If it could work, try it. */ 3225 if (c1 == -1000 || 3226 UCHARAT(PL_reginput) == c1 || 3227 UCHARAT(PL_reginput) == c2) 3228 { 3229 TRYPAREN(paren, n, PL_reginput); 3230 REGCP_UNWIND(lastcp); 3231 } 3232 /* Couldn't or didn't -- back up. */ 3233 n--; 3234 PL_reginput = locinput = HOPc(locinput, -1); 3235 } 3236 } 3237 } 3238 sayNO; 3239 break; 3240 case END: 3241 if (PL_reg_call_cc) { 3242 re_cc_state *cur_call_cc = PL_reg_call_cc; 3243 CURCUR *cctmp = PL_regcc; 3244 regexp *re = PL_reg_re; 3245 CHECKPOINT cp, lastcp; 3246 3247 cp = regcppush(0); /* Save *all* the positions. */ 3248 REGCP_SET(lastcp); 3249 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of 3250 the caller. */ 3251 PL_reginput = locinput; /* Make position available to 3252 the callcc. */ 3253 cache_re(PL_reg_call_cc->re); 3254 PL_regcc = PL_reg_call_cc->cc; 3255 PL_reg_call_cc = PL_reg_call_cc->prev; 3256 if (regmatch(cur_call_cc->node)) { 3257 PL_reg_call_cc = cur_call_cc; 3258 regcpblow(cp); 3259 sayYES; 3260 } 3261 REGCP_UNWIND(lastcp); 3262 regcppop(); 3263 PL_reg_call_cc = cur_call_cc; 3264 PL_regcc = cctmp; 3265 PL_reg_re = re; 3266 cache_re(re); 3267 3268 DEBUG_r( 3269 PerlIO_printf(Perl_debug_log, 3270 "%*s continuation failed...\n", 3271 REPORT_CODE_OFF+PL_regindent*2, "") 3272 ); 3273 sayNO_SILENT; 3274 } 3275 if (locinput < PL_regtill) { 3276 DEBUG_r(PerlIO_printf(Perl_debug_log, 3277 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n", 3278 PL_colors[4], 3279 (long)(locinput - PL_reg_starttry), 3280 (long)(PL_regtill - PL_reg_starttry), 3281 PL_colors[5])); 3282 sayNO_FINAL; /* Cannot match: too short. */ 3283 } 3284 PL_reginput = locinput; /* put where regtry can find it */ 3285 sayYES_FINAL; /* Success! */ 3286 case SUCCEED: 3287 PL_reginput = locinput; /* put where regtry can find it */ 3288 sayYES_LOUD; /* Success! */ 3289 case SUSPEND: 3290 n = 1; 3291 PL_reginput = locinput; 3292 goto do_ifmatch; 3293 case UNLESSM: 3294 n = 0; 3295 if (scan->flags) { 3296 if (UTF) { /* XXXX This is absolutely 3297 broken, we read before 3298 start of string. */ 3299 s = HOPMAYBEc(locinput, -scan->flags); 3300 if (!s) 3301 goto say_yes; 3302 PL_reginput = s; 3303 } 3304 else { 3305 if (locinput < PL_bostr + scan->flags) 3306 goto say_yes; 3307 PL_reginput = locinput - scan->flags; 3308 goto do_ifmatch; 3309 } 3310 } 3311 else 3312 PL_reginput = locinput; 3313 goto do_ifmatch; 3314 case IFMATCH: 3315 n = 1; 3316 if (scan->flags) { 3317 if (UTF) { /* XXXX This is absolutely 3318 broken, we read before 3319 start of string. */ 3320 s = HOPMAYBEc(locinput, -scan->flags); 3321 if (!s || s < PL_bostr) 3322 goto say_no; 3323 PL_reginput = s; 3324 } 3325 else { 3326 if (locinput < PL_bostr + scan->flags) 3327 goto say_no; 3328 PL_reginput = locinput - scan->flags; 3329 goto do_ifmatch; 3330 } 3331 } 3332 else 3333 PL_reginput = locinput; 3334 3335 do_ifmatch: 3336 inner = NEXTOPER(NEXTOPER(scan)); 3337 if (regmatch(inner) != n) { 3338 say_no: 3339 if (logical) { 3340 logical = 0; 3341 sw = 0; 3342 goto do_longjump; 3343 } 3344 else 3345 sayNO; 3346 } 3347 say_yes: 3348 if (logical) { 3349 logical = 0; 3350 sw = 1; 3351 } 3352 if (OP(scan) == SUSPEND) { 3353 locinput = PL_reginput; 3354 nextchr = UCHARAT(locinput); 3355 } 3356 /* FALL THROUGH. */ 3357 case LONGJMP: 3358 do_longjump: 3359 next = scan + ARG(scan); 3360 if (next == scan) 3361 next = NULL; 3362 break; 3363 default: 3364 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n", 3365 PTR2UV(scan), OP(scan)); 3366 Perl_croak(aTHX_ "regexp memory corruption"); 3367 } 3368 reenter: 3369 scan = next; 3370 } 3371 3372 /* 3373 * We get here only if there's trouble -- normally "case END" is 3374 * the terminating point. 3375 */ 3376 Perl_croak(aTHX_ "corrupted regexp pointers"); 3377 /*NOTREACHED*/ 3378 sayNO; 3379 3380 yes_loud: 3381 DEBUG_r( 3382 PerlIO_printf(Perl_debug_log, 3383 "%*s %scould match...%s\n", 3384 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5]) 3385 ); 3386 goto yes; 3387 yes_final: 3388 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n", 3389 PL_colors[4],PL_colors[5])); 3390 yes: 3391 #ifdef DEBUGGING 3392 PL_regindent--; 3393 #endif 3394 3395 #if 0 /* Breaks $^R */ 3396 if (unwind) 3397 regcpblow(firstcp); 3398 #endif 3399 return 1; 3400 3401 no: 3402 DEBUG_r( 3403 PerlIO_printf(Perl_debug_log, 3404 "%*s %sfailed...%s\n", 3405 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5]) 3406 ); 3407 goto do_no; 3408 no_final: 3409 do_no: 3410 if (unwind) { 3411 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t); 3412 3413 switch (uw->type) { 3414 case RE_UNWIND_BRANCH: 3415 case RE_UNWIND_BRANCHJ: 3416 { 3417 re_unwind_branch_t *uwb = &(uw->branch); 3418 I32 lastparen = uwb->lastparen; 3419 3420 REGCP_UNWIND(uwb->lastcp); 3421 for (n = *PL_reglastparen; n > lastparen; n--) 3422 PL_regendp[n] = -1; 3423 *PL_reglastparen = n; 3424 scan = next = uwb->next; 3425 if ( !scan || 3426 OP(scan) != (uwb->type == RE_UNWIND_BRANCH 3427 ? BRANCH : BRANCHJ) ) { /* Failure */ 3428 unwind = uwb->prev; 3429 #ifdef DEBUGGING 3430 PL_regindent--; 3431 #endif 3432 goto do_no; 3433 } 3434 /* Have more choice yet. Reuse the same uwb. */ 3435 /*SUPPRESS 560*/ 3436 if ((n = (uwb->type == RE_UNWIND_BRANCH 3437 ? NEXT_OFF(next) : ARG(next)))) 3438 next += n; 3439 else 3440 next = NULL; /* XXXX Needn't unwinding in this case... */ 3441 uwb->next = next; 3442 next = NEXTOPER(scan); 3443 if (uwb->type == RE_UNWIND_BRANCHJ) 3444 next = NEXTOPER(next); 3445 locinput = uwb->locinput; 3446 nextchr = uwb->nextchr; 3447 #ifdef DEBUGGING 3448 PL_regindent = uwb->regindent; 3449 #endif 3450 3451 goto reenter; 3452 } 3453 /* NOT REACHED */ 3454 default: 3455 Perl_croak(aTHX_ "regexp unwind memory corruption"); 3456 } 3457 /* NOT REACHED */ 3458 } 3459 #ifdef DEBUGGING 3460 PL_regindent--; 3461 #endif 3462 return 0; 3463 } 3464 3465 /* 3466 - regrepeat - repeatedly match something simple, report how many 3467 */ 3468 /* 3469 * [This routine now assumes that it will only match on things of length 1. 3470 * That was true before, but now we assume scan - reginput is the count, 3471 * rather than incrementing count on every character. [Er, except utf8.]] 3472 */ 3473 STATIC I32 3474 S_regrepeat(pTHX_ regnode *p, I32 max) 3475 { 3476 register char *scan; 3477 register I32 c; 3478 register char *loceol = PL_regeol; 3479 register I32 hardcount = 0; 3480 3481 scan = PL_reginput; 3482 if (max != REG_INFTY && max < loceol - scan) 3483 loceol = scan + max; 3484 switch (OP(p)) { 3485 case REG_ANY: 3486 while (scan < loceol && *scan != '\n') 3487 scan++; 3488 break; 3489 case SANY: 3490 scan = loceol; 3491 break; 3492 case ANYUTF8: 3493 loceol = PL_regeol; 3494 while (scan < loceol && *scan != '\n') { 3495 scan += UTF8SKIP(scan); 3496 hardcount++; 3497 } 3498 break; 3499 case SANYUTF8: 3500 loceol = PL_regeol; 3501 while (scan < loceol) { 3502 scan += UTF8SKIP(scan); 3503 hardcount++; 3504 } 3505 break; 3506 case EXACT: /* length of string is 1 */ 3507 c = (U8)*STRING(p); 3508 while (scan < loceol && UCHARAT(scan) == c) 3509 scan++; 3510 break; 3511 case EXACTF: /* length of string is 1 */ 3512 c = (U8)*STRING(p); 3513 while (scan < loceol && 3514 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c])) 3515 scan++; 3516 break; 3517 case EXACTFL: /* length of string is 1 */ 3518 PL_reg_flags |= RF_tainted; 3519 c = (U8)*STRING(p); 3520 while (scan < loceol && 3521 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c])) 3522 scan++; 3523 break; 3524 case ANYOFUTF8: 3525 loceol = PL_regeol; 3526 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) { 3527 scan += UTF8SKIP(scan); 3528 hardcount++; 3529 } 3530 break; 3531 case ANYOF: 3532 while (scan < loceol && REGINCLASS(p, *scan)) 3533 scan++; 3534 break; 3535 case ALNUM: 3536 while (scan < loceol && isALNUM(*scan)) 3537 scan++; 3538 break; 3539 case ALNUMUTF8: 3540 loceol = PL_regeol; 3541 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) { 3542 scan += UTF8SKIP(scan); 3543 hardcount++; 3544 } 3545 break; 3546 case ALNUML: 3547 PL_reg_flags |= RF_tainted; 3548 while (scan < loceol && isALNUM_LC(*scan)) 3549 scan++; 3550 break; 3551 case ALNUMLUTF8: 3552 PL_reg_flags |= RF_tainted; 3553 loceol = PL_regeol; 3554 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) { 3555 scan += UTF8SKIP(scan); 3556 hardcount++; 3557 } 3558 break; 3559 break; 3560 case NALNUM: 3561 while (scan < loceol && !isALNUM(*scan)) 3562 scan++; 3563 break; 3564 case NALNUMUTF8: 3565 loceol = PL_regeol; 3566 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) { 3567 scan += UTF8SKIP(scan); 3568 hardcount++; 3569 } 3570 break; 3571 case NALNUML: 3572 PL_reg_flags |= RF_tainted; 3573 while (scan < loceol && !isALNUM_LC(*scan)) 3574 scan++; 3575 break; 3576 case NALNUMLUTF8: 3577 PL_reg_flags |= RF_tainted; 3578 loceol = PL_regeol; 3579 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) { 3580 scan += UTF8SKIP(scan); 3581 hardcount++; 3582 } 3583 break; 3584 case SPACE: 3585 while (scan < loceol && isSPACE(*scan)) 3586 scan++; 3587 break; 3588 case SPACEUTF8: 3589 loceol = PL_regeol; 3590 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) { 3591 scan += UTF8SKIP(scan); 3592 hardcount++; 3593 } 3594 break; 3595 case SPACEL: 3596 PL_reg_flags |= RF_tainted; 3597 while (scan < loceol && isSPACE_LC(*scan)) 3598 scan++; 3599 break; 3600 case SPACELUTF8: 3601 PL_reg_flags |= RF_tainted; 3602 loceol = PL_regeol; 3603 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) { 3604 scan += UTF8SKIP(scan); 3605 hardcount++; 3606 } 3607 break; 3608 case NSPACE: 3609 while (scan < loceol && !isSPACE(*scan)) 3610 scan++; 3611 break; 3612 case NSPACEUTF8: 3613 loceol = PL_regeol; 3614 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) { 3615 scan += UTF8SKIP(scan); 3616 hardcount++; 3617 } 3618 break; 3619 case NSPACEL: 3620 PL_reg_flags |= RF_tainted; 3621 while (scan < loceol && !isSPACE_LC(*scan)) 3622 scan++; 3623 break; 3624 case NSPACELUTF8: 3625 PL_reg_flags |= RF_tainted; 3626 loceol = PL_regeol; 3627 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) { 3628 scan += UTF8SKIP(scan); 3629 hardcount++; 3630 } 3631 break; 3632 case DIGIT: 3633 while (scan < loceol && isDIGIT(*scan)) 3634 scan++; 3635 break; 3636 case DIGITUTF8: 3637 loceol = PL_regeol; 3638 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) { 3639 scan += UTF8SKIP(scan); 3640 hardcount++; 3641 } 3642 break; 3643 break; 3644 case NDIGIT: 3645 while (scan < loceol && !isDIGIT(*scan)) 3646 scan++; 3647 break; 3648 case NDIGITUTF8: 3649 loceol = PL_regeol; 3650 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) { 3651 scan += UTF8SKIP(scan); 3652 hardcount++; 3653 } 3654 break; 3655 default: /* Called on something of 0 width. */ 3656 break; /* So match right here or not at all. */ 3657 } 3658 3659 if (hardcount) 3660 c = hardcount; 3661 else 3662 c = scan - PL_reginput; 3663 PL_reginput = scan; 3664 3665 DEBUG_r( 3666 { 3667 SV *prop = sv_newmortal(); 3668 3669 regprop(prop, p); 3670 PerlIO_printf(Perl_debug_log, 3671 "%*s %s can match %"IVdf" times out of %"IVdf"...\n", 3672 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max); 3673 }); 3674 3675 return(c); 3676 } 3677 3678 /* 3679 - regrepeat_hard - repeatedly match something, report total lenth and length 3680 * 3681 * The repeater is supposed to have constant length. 3682 */ 3683 3684 STATIC I32 3685 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp) 3686 { 3687 register char *scan; 3688 register char *start; 3689 register char *loceol = PL_regeol; 3690 I32 l = 0; 3691 I32 count = 0, res = 1; 3692 3693 if (!max) 3694 return 0; 3695 3696 start = PL_reginput; 3697 if (UTF) { 3698 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) { 3699 if (!count++) { 3700 l = 0; 3701 while (start < PL_reginput) { 3702 l++; 3703 start += UTF8SKIP(start); 3704 } 3705 *lp = l; 3706 if (l == 0) 3707 return max; 3708 } 3709 if (count == max) 3710 return count; 3711 } 3712 } 3713 else { 3714 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) { 3715 if (!count++) { 3716 *lp = l = PL_reginput - start; 3717 if (max != REG_INFTY && l*max < loceol - scan) 3718 loceol = scan + l*max; 3719 if (l == 0) 3720 return max; 3721 } 3722 } 3723 } 3724 if (!res) 3725 PL_reginput = scan; 3726 3727 return count; 3728 } 3729 3730 /* 3731 - reginclass - determine if a character falls into a character class 3732 */ 3733 3734 STATIC bool 3735 S_reginclass(pTHX_ register regnode *p, register I32 c) 3736 { 3737 char flags = ANYOF_FLAGS(p); 3738 bool match = FALSE; 3739 3740 c &= 0xFF; 3741 if (ANYOF_BITMAP_TEST(p, c)) 3742 match = TRUE; 3743 else if (flags & ANYOF_FOLD) { 3744 I32 cf; 3745 if (flags & ANYOF_LOCALE) { 3746 PL_reg_flags |= RF_tainted; 3747 cf = PL_fold_locale[c]; 3748 } 3749 else 3750 cf = PL_fold[c]; 3751 if (ANYOF_BITMAP_TEST(p, cf)) 3752 match = TRUE; 3753 } 3754 3755 if (!match && (flags & ANYOF_CLASS)) { 3756 PL_reg_flags |= RF_tainted; 3757 if ( 3758 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) || 3759 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) || 3760 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) || 3761 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) || 3762 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) || 3763 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) || 3764 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) || 3765 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) || 3766 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) || 3767 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) || 3768 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) || 3769 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) || 3770 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) || 3771 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) || 3772 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) || 3773 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) || 3774 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) || 3775 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) || 3776 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) || 3777 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) || 3778 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) || 3779 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) || 3780 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) || 3781 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) || 3782 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) || 3783 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) || 3784 (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) || 3785 (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) || 3786 (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) || 3787 (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c)) 3788 ) /* How's that for a conditional? */ 3789 { 3790 match = TRUE; 3791 } 3792 } 3793 3794 return (flags & ANYOF_INVERT) ? !match : match; 3795 } 3796 3797 STATIC bool 3798 S_reginclassutf8(pTHX_ regnode *f, U8 *p) 3799 { 3800 char flags = ARG1(f); 3801 bool match = FALSE; 3802 #ifdef DEBUGGING 3803 SV *rv = (SV*)PL_regdata->data[ARG2(f)]; 3804 AV *av = (AV*)SvRV((SV*)rv); 3805 SV *sw = *av_fetch(av, 0, FALSE); 3806 SV *lv = *av_fetch(av, 1, FALSE); 3807 #else 3808 SV *sw = (SV*)PL_regdata->data[ARG2(f)]; 3809 #endif 3810 3811 if (swash_fetch(sw, p)) 3812 match = TRUE; 3813 else if (flags & ANYOF_FOLD) { 3814 U8 tmpbuf[UTF8_MAXLEN+1]; 3815 if (flags & ANYOF_LOCALE) { 3816 PL_reg_flags |= RF_tainted; 3817 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p)); 3818 } 3819 else 3820 uv_to_utf8(tmpbuf, toLOWER_utf8(p)); 3821 if (swash_fetch(sw, tmpbuf)) 3822 match = TRUE; 3823 } 3824 3825 /* UTF8 combined with ANYOF_CLASS is ill-defined. */ 3826 3827 return (flags & ANYOF_INVERT) ? !match : match; 3828 } 3829 3830 STATIC U8 * 3831 S_reghop(pTHX_ U8 *s, I32 off) 3832 { 3833 if (off >= 0) { 3834 while (off-- && s < (U8*)PL_regeol) 3835 s += UTF8SKIP(s); 3836 } 3837 else { 3838 while (off++) { 3839 if (s > (U8*)PL_bostr) { 3840 s--; 3841 if (*s & 0x80) { 3842 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80) 3843 s--; 3844 } /* XXX could check well-formedness here */ 3845 } 3846 } 3847 } 3848 return s; 3849 } 3850 3851 STATIC U8 * 3852 S_reghopmaybe(pTHX_ U8* s, I32 off) 3853 { 3854 if (off >= 0) { 3855 while (off-- && s < (U8*)PL_regeol) 3856 s += UTF8SKIP(s); 3857 if (off >= 0) 3858 return 0; 3859 } 3860 else { 3861 while (off++) { 3862 if (s > (U8*)PL_bostr) { 3863 s--; 3864 if (*s & 0x80) { 3865 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80) 3866 s--; 3867 } /* XXX could check well-formedness here */ 3868 } 3869 else 3870 break; 3871 } 3872 if (off <= 0) 3873 return 0; 3874 } 3875 return s; 3876 } 3877 3878 #ifdef PERL_OBJECT 3879 #include "XSUB.h" 3880 #endif 3881 3882 static void 3883 restore_pos(pTHXo_ void *arg) 3884 { 3885 if (PL_reg_eval_set) { 3886 if (PL_reg_oldsaved) { 3887 PL_reg_re->subbeg = PL_reg_oldsaved; 3888 PL_reg_re->sublen = PL_reg_oldsavedlen; 3889 RX_MATCH_COPIED_on(PL_reg_re); 3890 } 3891 PL_reg_magic->mg_len = PL_reg_oldpos; 3892 PL_reg_eval_set = 0; 3893 PL_curpm = PL_reg_oldcurpm; 3894 } 3895 } 3896