1 /* toke.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'It all comes from here, the stench and the peril.' --Frodo 13 * 14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"] 15 */ 16 17 /* 18 * This file is the lexer for Perl. It's closely linked to the 19 * parser, perly.y. 20 * 21 * The main routine is yylex(), which returns the next token. 22 */ 23 24 /* 25 =head1 Lexer interface 26 This is the lower layer of the Perl parser, managing characters and tokens. 27 28 =for apidoc AmnU|yy_parser *|PL_parser 29 30 Pointer to a structure encapsulating the state of the parsing operation 31 currently in progress. The pointer can be locally changed to perform 32 a nested parse without interfering with the state of an outer parse. 33 Individual members of C<PL_parser> have their own documentation. 34 35 =cut 36 */ 37 38 #include "EXTERN.h" 39 #define PERL_IN_TOKE_C 40 #include "perl.h" 41 #include "invlist_inline.h" 42 43 #define new_constant(a,b,c,d,e,f,g, h) \ 44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h) 45 46 #define pl_yylval (PL_parser->yylval) 47 48 /* XXX temporary backwards compatibility */ 49 #define PL_lex_brackets (PL_parser->lex_brackets) 50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets) 51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof) 52 #define PL_lex_brackstack (PL_parser->lex_brackstack) 53 #define PL_lex_casemods (PL_parser->lex_casemods) 54 #define PL_lex_casestack (PL_parser->lex_casestack) 55 #define PL_lex_dojoin (PL_parser->lex_dojoin) 56 #define PL_lex_formbrack (PL_parser->lex_formbrack) 57 #define PL_lex_inpat (PL_parser->lex_inpat) 58 #define PL_lex_inwhat (PL_parser->lex_inwhat) 59 #define PL_lex_op (PL_parser->lex_op) 60 #define PL_lex_repl (PL_parser->lex_repl) 61 #define PL_lex_starts (PL_parser->lex_starts) 62 #define PL_lex_stuff (PL_parser->lex_stuff) 63 #define PL_multi_start (PL_parser->multi_start) 64 #define PL_multi_open (PL_parser->multi_open) 65 #define PL_multi_close (PL_parser->multi_close) 66 #define PL_preambled (PL_parser->preambled) 67 #define PL_linestr (PL_parser->linestr) 68 #define PL_expect (PL_parser->expect) 69 #define PL_copline (PL_parser->copline) 70 #define PL_bufptr (PL_parser->bufptr) 71 #define PL_oldbufptr (PL_parser->oldbufptr) 72 #define PL_oldoldbufptr (PL_parser->oldoldbufptr) 73 #define PL_linestart (PL_parser->linestart) 74 #define PL_bufend (PL_parser->bufend) 75 #define PL_last_uni (PL_parser->last_uni) 76 #define PL_last_lop (PL_parser->last_lop) 77 #define PL_last_lop_op (PL_parser->last_lop_op) 78 #define PL_lex_state (PL_parser->lex_state) 79 #define PL_rsfp (PL_parser->rsfp) 80 #define PL_rsfp_filters (PL_parser->rsfp_filters) 81 #define PL_in_my (PL_parser->in_my) 82 #define PL_in_my_stash (PL_parser->in_my_stash) 83 #define PL_tokenbuf (PL_parser->tokenbuf) 84 #define PL_multi_end (PL_parser->multi_end) 85 #define PL_error_count (PL_parser->error_count) 86 87 # define PL_nexttoke (PL_parser->nexttoke) 88 # define PL_nexttype (PL_parser->nexttype) 89 # define PL_nextval (PL_parser->nextval) 90 91 92 #define SvEVALED(sv) \ 93 (SvTYPE(sv) >= SVt_PVNV \ 94 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen) 95 96 static const char ident_too_long[] = "Identifier too long"; 97 static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'"; 98 99 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke] 100 101 #define XENUMMASK 0x3f 102 #define XFAKEEOF 0x40 103 #define XFAKEBRACK 0x80 104 105 #ifdef USE_UTF8_SCRIPTS 106 # define UTF cBOOL(!IN_BYTES) 107 #else 108 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) 109 #endif 110 111 /* The maximum number of characters preceding the unrecognized one to display */ 112 #define UNRECOGNIZED_PRECEDE_COUNT 10 113 114 /* In variables named $^X, these are the legal values for X. 115 * 1999-02-27 mjd-perl-patch@plover.com */ 116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x))) 117 118 #define SPACE_OR_TAB(c) isBLANK_A(c) 119 120 #define HEXFP_PEEK(s) \ 121 (((s[0] == '.') && \ 122 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \ 123 isALPHA_FOLD_EQ(s[0], 'p')) 124 125 /* LEX_* are values for PL_lex_state, the state of the lexer. 126 * They are arranged oddly so that the guard on the switch statement 127 * can get by with a single comparison (if the compiler is smart enough). 128 * 129 * These values refer to the various states within a sublex parse, 130 * i.e. within a double quotish string 131 */ 132 133 /* #define LEX_NOTPARSING 11 is done in perl.h. */ 134 135 #define LEX_NORMAL 10 /* normal code (ie not within "...") */ 136 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */ 137 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */ 138 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */ 139 #define LEX_INTERPSTART 6 /* expecting the start of a $var */ 140 141 /* at end of code, eg "$x" followed by: */ 142 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */ 143 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */ 144 145 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of 146 string or after \E, $foo, etc */ 147 #define LEX_INTERPCONST 2 /* NOT USED */ 148 #define LEX_FORMLINE 1 /* expecting a format line */ 149 150 /* returned to yyl_try() to request it to retry the parse loop, expected to only 151 be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof() 152 can also return it. 153 154 yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1, 155 other token values are 258 or higher (see perly.h), so -1 should be 156 a safe value here. 157 */ 158 #define YYL_RETRY (-1) 159 160 #ifdef DEBUGGING 161 static const char* const lex_state_names[] = { 162 "KNOWNEXT", 163 "FORMLINE", 164 "INTERPCONST", 165 "INTERPCONCAT", 166 "INTERPENDMAYBE", 167 "INTERPEND", 168 "INTERPSTART", 169 "INTERPPUSH", 170 "INTERPCASEMOD", 171 "INTERPNORMAL", 172 "NORMAL" 173 }; 174 #endif 175 176 #include "keywords.h" 177 178 /* CLINE is a macro that ensures PL_copline has a sane value */ 179 180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) 181 182 /* 183 * Convenience functions to return different tokens and prime the 184 * lexer for the next token. They all take an argument. 185 * 186 * TOKEN : generic token (used for '(', DOLSHARP, etc) 187 * OPERATOR : generic operator 188 * AOPERATOR : assignment operator 189 * PREBLOCK : beginning the block after an if, while, foreach, ... 190 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref) 191 * PREREF : *EXPR where EXPR is not a simple identifier 192 * TERM : expression term 193 * POSTDEREF : postfix dereference (->$* ->@[...] etc.) 194 * LOOPX : loop exiting command (goto, last, dump, etc) 195 * FTST : file test operator 196 * FUN0 : zero-argument function 197 * FUN0OP : zero-argument function, with its op created in this file 198 * FUN1 : not used, except for not, which isn't a UNIOP 199 * BOop : bitwise or or xor 200 * BAop : bitwise and 201 * BCop : bitwise complement 202 * SHop : shift operator 203 * PWop : power operator 204 * PMop : pattern-matching operator 205 * Aop : addition-level operator 206 * AopNOASSIGN : addition-level operator that is never part of .= 207 * Mop : multiplication-level operator 208 * ChEop : chaining equality-testing operator 209 * NCEop : non-chaining comparison operator at equality precedence 210 * ChRop : chaining relational operator <= != gt 211 * NCRop : non-chaining relational operator isa 212 * 213 * Also see LOP and lop() below. 214 */ 215 216 #ifdef DEBUGGING /* Serve -DT. */ 217 # define REPORT(retval) tokereport((I32)retval, &pl_yylval) 218 #else 219 # define REPORT(retval) (retval) 220 #endif 221 222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval)) 223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval)) 224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval)) 225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval)) 226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval)) 227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) 228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) 229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1])) 230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \ 231 pl_yylval.ival=f, \ 232 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \ 233 REPORT((int)LOOPEX)) 234 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) 235 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) 236 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) 237 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1)) 238 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP)) 239 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP)) 240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \ 241 REPORT(PERLY_TILDE) 242 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP)) 243 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP)) 244 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) 245 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP)) 246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP)) 247 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP)) 248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP)) 249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP)) 250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP)) 251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP)) 252 253 /* This bit of chicanery makes a unary function followed by 254 * a parenthesis into a function with one argument, highest precedence. 255 * The UNIDOR macro is for unary functions that can be followed by the // 256 * operator (such as C<shift // 0>). 257 */ 258 #define UNI3(f,x,have_x) { \ 259 pl_yylval.ival = f; \ 260 if (have_x) PL_expect = x; \ 261 PL_bufptr = s; \ 262 PL_last_uni = PL_oldbufptr; \ 263 PL_last_lop_op = (f) < 0 ? -(f) : (f); \ 264 if (*s == '(') \ 265 return REPORT( (int)FUNC1 ); \ 266 s = skipspace(s); \ 267 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ 268 } 269 #define UNI(f) UNI3(f,XTERM,1) 270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1) 271 #define UNIPROTO(f,optional) { \ 272 if (optional) PL_last_uni = PL_oldbufptr; \ 273 OPERATOR(f); \ 274 } 275 276 #define UNIBRACK(f) UNI3(f,0,0) 277 278 /* return has special case parsing. 279 * 280 * List operators have low precedence. Functions have high precedence. 281 * Every built in, *except return*, if written with () around its arguments, is 282 * parsed as a function. Hence every other list built in: 283 * 284 * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9 285 * 429 286 * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5 287 * 639 288 * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()' 289 * Useless use of a constant (2) in void context at -e line 1. 290 * Useless use of a constant (4) in void context at -e line 1. 291 * 292 * $ 293 * 294 * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a 295 * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string. 296 * 297 * Whereas return: 298 * 299 * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()' 300 * 2 301 * 4 302 * 9 303 * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()' 304 * Useless use of a constant (2) in void context at -e line 1. 305 * Useless use of a constant (4) in void context at -e line 1. 306 * 9 307 * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()' 308 * Useless use of a constant (2) in void context at -e line 1. 309 * Useless use of a constant (4) in void context at -e line 1. 310 * 9 311 * $ 312 * 313 * and: 314 * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()' 315 * 2 316 * 4 317 * 6 318 * 319 * This last example is what we expect, but it's clearly inconsistent with how 320 * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently 321 * followed. 322 * 323 * 324 * Perl 3 attempted to be consistent: 325 * 326 * The rules are more consistent about where parens are needed and 327 * where they are not. In particular, unary operators and list operators now 328 * behave like functions if they're called like functions. 329 * 330 * However, the behaviour for return was reverted to the "old" parsing with 331 * patches 9-12: 332 * 333 * The construct 334 * return (1,2,3); 335 * did not do what was expected, since return was swallowing the 336 * parens in order to consider itself a function. The solution, 337 * since return never wants any trailing expression such as 338 * return (1,2,3) + 2; 339 * is to simply make return an exception to the paren-makes-a-function 340 * rule, and treat it the way it always was, so that it doesn't 341 * strip the parens. 342 * 343 * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with 344 * LOP(OP_RETURN, XTERM); 345 * 346 * and constructs such as 347 * 348 * return (Internals::V())[2] 349 * 350 * turn into syntax errors 351 */ 352 353 #define OLDLOP(f) \ 354 do { \ 355 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \ 356 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \ 357 pl_yylval.ival = (f); \ 358 PL_expect = XTERM; \ 359 PL_bufptr = s; \ 360 return (int)LSTOP; \ 361 } while(0) 362 363 #define COPLINE_INC_WITH_HERELINES \ 364 STMT_START { \ 365 CopLINE_inc(PL_curcop); \ 366 if (PL_parser->herelines) \ 367 CopLINE(PL_curcop) += PL_parser->herelines, \ 368 PL_parser->herelines = 0; \ 369 } STMT_END 370 /* Called after scan_str to update CopLINE(PL_curcop), but only when there 371 * is no sublex_push to follow. */ 372 #define COPLINE_SET_FROM_MULTI_END \ 373 STMT_START { \ 374 CopLINE_set(PL_curcop, PL_multi_end); \ 375 if (PL_multi_end != PL_multi_start) \ 376 PL_parser->herelines = 0; \ 377 } STMT_END 378 379 380 /* A file-local structure for passing around information about subroutines and 381 * related definable words */ 382 struct code { 383 SV *sv; 384 CV *cv; 385 GV *gv, **gvp; 386 OP *rv2cv_op; 387 PADOFFSET off; 388 bool lex; 389 }; 390 391 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE }; 392 393 #ifdef DEBUGGING 394 395 /* how to interpret the pl_yylval associated with the token */ 396 enum token_type { 397 TOKENTYPE_NONE, 398 TOKENTYPE_IVAL, 399 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */ 400 TOKENTYPE_PVAL, 401 TOKENTYPE_OPVAL 402 }; 403 404 #define DEBUG_TOKEN(Type, Name) \ 405 { Name, TOKENTYPE_##Type, #Name } 406 407 static struct debug_tokens { 408 const int token; 409 enum token_type type; 410 const char *name; 411 } const debug_tokens[] = 412 { 413 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" }, 414 { ANDAND, TOKENTYPE_NONE, "ANDAND" }, 415 { ANDOP, TOKENTYPE_NONE, "ANDOP" }, 416 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" }, 417 { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" }, 418 { ARROW, TOKENTYPE_NONE, "ARROW" }, 419 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" }, 420 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" }, 421 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" }, 422 { CATCH, TOKENTYPE_IVAL, "CATCH" }, 423 { CHEQOP, TOKENTYPE_OPNUM, "CHEQOP" }, 424 { CHRELOP, TOKENTYPE_OPNUM, "CHRELOP" }, 425 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" }, 426 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" }, 427 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" }, 428 { DO, TOKENTYPE_NONE, "DO" }, 429 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" }, 430 { DORDOR, TOKENTYPE_NONE, "DORDOR" }, 431 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" }, 432 { ELSE, TOKENTYPE_NONE, "ELSE" }, 433 { ELSIF, TOKENTYPE_IVAL, "ELSIF" }, 434 { FOR, TOKENTYPE_IVAL, "FOR" }, 435 { FORMAT, TOKENTYPE_NONE, "FORMAT" }, 436 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" }, 437 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" }, 438 { FUNC, TOKENTYPE_OPNUM, "FUNC" }, 439 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" }, 440 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" }, 441 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" }, 442 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" }, 443 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" }, 444 { GIVEN, TOKENTYPE_IVAL, "GIVEN" }, 445 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" }, 446 { IF, TOKENTYPE_IVAL, "IF" }, 447 { LABEL, TOKENTYPE_OPVAL, "LABEL" }, 448 { LOCAL, TOKENTYPE_IVAL, "LOCAL" }, 449 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" }, 450 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" }, 451 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" }, 452 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" }, 453 { METHOD, TOKENTYPE_OPVAL, "METHOD" }, 454 { MULOP, TOKENTYPE_OPNUM, "MULOP" }, 455 { MY, TOKENTYPE_IVAL, "MY" }, 456 { NCEQOP, TOKENTYPE_OPNUM, "NCEQOP" }, 457 { NCRELOP, TOKENTYPE_OPNUM, "NCRELOP" }, 458 { NOAMP, TOKENTYPE_NONE, "NOAMP" }, 459 { NOTOP, TOKENTYPE_NONE, "NOTOP" }, 460 { OROP, TOKENTYPE_IVAL, "OROP" }, 461 { OROR, TOKENTYPE_NONE, "OROR" }, 462 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" }, 463 DEBUG_TOKEN (IVAL, PERLY_AMPERSAND), 464 DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE), 465 DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN), 466 DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE), 467 DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN), 468 DEBUG_TOKEN (IVAL, PERLY_COLON), 469 DEBUG_TOKEN (IVAL, PERLY_COMMA), 470 DEBUG_TOKEN (IVAL, PERLY_DOT), 471 DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN), 472 DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK), 473 DEBUG_TOKEN (IVAL, PERLY_MINUS), 474 DEBUG_TOKEN (IVAL, PERLY_PAREN_OPEN), 475 DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN), 476 DEBUG_TOKEN (IVAL, PERLY_PLUS), 477 DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK), 478 DEBUG_TOKEN (IVAL, PERLY_SEMICOLON), 479 DEBUG_TOKEN (IVAL, PERLY_SLASH), 480 DEBUG_TOKEN (IVAL, PERLY_SNAIL), 481 DEBUG_TOKEN (IVAL, PERLY_STAR), 482 DEBUG_TOKEN (IVAL, PERLY_TILDE), 483 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" }, 484 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" }, 485 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" }, 486 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" }, 487 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" }, 488 { POSTINC, TOKENTYPE_NONE, "POSTINC" }, 489 { POWOP, TOKENTYPE_OPNUM, "POWOP" }, 490 { PREDEC, TOKENTYPE_NONE, "PREDEC" }, 491 { PREINC, TOKENTYPE_NONE, "PREINC" }, 492 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" }, 493 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" }, 494 { REFGEN, TOKENTYPE_NONE, "REFGEN" }, 495 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" }, 496 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" }, 497 { SIGSUB, TOKENTYPE_NONE, "SIGSUB" }, 498 { SUB, TOKENTYPE_NONE, "SUB" }, 499 { SUBLEXEND, TOKENTYPE_NONE, "SUBLEXEND" }, 500 { SUBLEXSTART, TOKENTYPE_NONE, "SUBLEXSTART" }, 501 { THING, TOKENTYPE_OPVAL, "THING" }, 502 { TRY, TOKENTYPE_IVAL, "TRY" }, 503 { UMINUS, TOKENTYPE_NONE, "UMINUS" }, 504 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" }, 505 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" }, 506 { UNLESS, TOKENTYPE_IVAL, "UNLESS" }, 507 { UNTIL, TOKENTYPE_IVAL, "UNTIL" }, 508 { USE, TOKENTYPE_IVAL, "USE" }, 509 { WHEN, TOKENTYPE_IVAL, "WHEN" }, 510 { WHILE, TOKENTYPE_IVAL, "WHILE" }, 511 { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" }, 512 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" }, 513 { 0, TOKENTYPE_NONE, NULL } 514 }; 515 516 #undef DEBUG_TOKEN 517 518 /* dump the returned token in rv, plus any optional arg in pl_yylval */ 519 520 STATIC int 521 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) 522 { 523 PERL_ARGS_ASSERT_TOKEREPORT; 524 525 if (DEBUG_T_TEST) { 526 const char *name = NULL; 527 enum token_type type = TOKENTYPE_NONE; 528 const struct debug_tokens *p; 529 SV* const report = newSVpvs("<== "); 530 531 for (p = debug_tokens; p->token; p++) { 532 if (p->token == (int)rv) { 533 name = p->name; 534 type = p->type; 535 break; 536 } 537 } 538 if (name) 539 Perl_sv_catpv(aTHX_ report, name); 540 else if (isGRAPH(rv)) 541 { 542 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); 543 if ((char)rv == 'p') 544 sv_catpvs(report, " (pending identifier)"); 545 } 546 else if (!rv) 547 sv_catpvs(report, "EOF"); 548 else 549 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv); 550 switch (type) { 551 case TOKENTYPE_NONE: 552 break; 553 case TOKENTYPE_IVAL: 554 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival); 555 break; 556 case TOKENTYPE_OPNUM: 557 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)", 558 PL_op_name[lvalp->ival]); 559 break; 560 case TOKENTYPE_PVAL: 561 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval); 562 break; 563 case TOKENTYPE_OPVAL: 564 if (lvalp->opval) { 565 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)", 566 PL_op_name[lvalp->opval->op_type]); 567 if (lvalp->opval->op_type == OP_CONST) { 568 Perl_sv_catpvf(aTHX_ report, " %s", 569 SvPEEK(cSVOPx_sv(lvalp->opval))); 570 } 571 572 } 573 else 574 sv_catpvs(report, "(opval=null)"); 575 break; 576 } 577 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report)); 578 }; 579 return (int)rv; 580 } 581 582 583 /* print the buffer with suitable escapes */ 584 585 STATIC void 586 S_printbuf(pTHX_ const char *const fmt, const char *const s) 587 { 588 SV* const tmp = newSVpvs(""); 589 590 PERL_ARGS_ASSERT_PRINTBUF; 591 592 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */ 593 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60)); 594 GCC_DIAG_RESTORE_STMT; 595 SvREFCNT_dec(tmp); 596 } 597 598 #endif 599 600 /* 601 * S_ao 602 * 603 * This subroutine looks for an '=' next to the operator that has just been 604 * parsed and turns it into an ASSIGNOP if it finds one. 605 */ 606 607 STATIC int 608 S_ao(pTHX_ int toketype) 609 { 610 if (*PL_bufptr == '=') { 611 PL_bufptr++; 612 613 switch (toketype) { 614 case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break; 615 case OROR: pl_yylval.ival = OP_ORASSIGN; break; 616 case DORDOR: pl_yylval.ival = OP_DORASSIGN; break; 617 } 618 619 toketype = ASSIGNOP; 620 } 621 return REPORT(toketype); 622 } 623 624 /* 625 * S_no_op 626 * When Perl expects an operator and finds something else, no_op 627 * prints the warning. It always prints "<something> found where 628 * operator expected. It prints "Missing semicolon on previous line?" 629 * if the surprise occurs at the start of the line. "do you need to 630 * predeclare ..." is printed out for code like "sub bar; foo bar $x" 631 * where the compiler doesn't know if foo is a method call or a function. 632 * It prints "Missing operator before end of line" if there's nothing 633 * after the missing operator, or "... before <...>" if there is something 634 * after the missing operator. 635 * 636 * PL_bufptr is expected to point to the start of the thing that was found, 637 * and s after the next token or partial token. 638 */ 639 640 STATIC void 641 S_no_op(pTHX_ const char *const what, char *s) 642 { 643 char * const oldbp = PL_bufptr; 644 const bool is_first = (PL_oldbufptr == PL_linestart); 645 646 PERL_ARGS_ASSERT_NO_OP; 647 648 if (!s) 649 s = oldbp; 650 else 651 PL_bufptr = s; 652 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0); 653 if (ckWARN_d(WARN_SYNTAX)) { 654 if (is_first) 655 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 656 "\t(Missing semicolon on previous line?)\n"); 657 else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr, 658 PL_bufend, 659 UTF)) 660 { 661 const char *t; 662 for (t = PL_oldoldbufptr; 663 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':'); 664 t += UTF ? UTF8SKIP(t) : 1) 665 { 666 NOOP; 667 } 668 if (t < PL_bufptr && isSPACE(*t)) 669 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 670 "\t(Do you need to predeclare %" UTF8f "?)\n", 671 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr)); 672 } 673 else { 674 assert(s >= oldbp); 675 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 676 "\t(Missing operator before %" UTF8f "?)\n", 677 UTF8fARG(UTF, s - oldbp, oldbp)); 678 } 679 } 680 PL_bufptr = oldbp; 681 } 682 683 /* 684 * S_missingterm 685 * Complain about missing quote/regexp/heredoc terminator. 686 * If it's called with NULL then it cauterizes the line buffer. 687 * If we're in a delimited string and the delimiter is a control 688 * character, it's reformatted into a two-char sequence like ^C. 689 * This is fatal. 690 */ 691 692 STATIC void 693 S_missingterm(pTHX_ char *s, STRLEN len) 694 { 695 char tmpbuf[UTF8_MAXBYTES + 1]; 696 char q; 697 bool uni = FALSE; 698 if (s) { 699 char * const nl = (char *) my_memrchr(s, '\n', len); 700 if (nl) { 701 *nl = '\0'; 702 len = nl - s; 703 } 704 uni = UTF; 705 } 706 else if (PL_multi_close < 32) { 707 *tmpbuf = '^'; 708 tmpbuf[1] = (char)toCTRL(PL_multi_close); 709 tmpbuf[2] = '\0'; 710 s = tmpbuf; 711 len = 2; 712 } 713 else { 714 if (! UTF && LIKELY(PL_multi_close < 256)) { 715 *tmpbuf = (char)PL_multi_close; 716 tmpbuf[1] = '\0'; 717 len = 1; 718 } 719 else { 720 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close); 721 *end = '\0'; 722 len = end - tmpbuf; 723 uni = TRUE; 724 } 725 s = tmpbuf; 726 } 727 q = memchr(s, '"', len) ? '\'' : '"'; 728 Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c" 729 " anywhere before EOF", q, UTF8fARG(uni, len, s), q); 730 } 731 732 #include "feature.h" 733 734 /* 735 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and 736 * utf16-to-utf8-reversed. 737 */ 738 739 #ifdef PERL_CR_FILTER 740 static void 741 strip_return(SV *sv) 742 { 743 const char *s = SvPVX_const(sv); 744 const char * const e = s + SvCUR(sv); 745 746 PERL_ARGS_ASSERT_STRIP_RETURN; 747 748 /* outer loop optimized to do nothing if there are no CR-LFs */ 749 while (s < e) { 750 if (*s++ == '\r' && *s == '\n') { 751 /* hit a CR-LF, need to copy the rest */ 752 char *d = s - 1; 753 *d++ = *s++; 754 while (s < e) { 755 if (*s == '\r' && s[1] == '\n') 756 s++; 757 *d++ = *s++; 758 } 759 SvCUR(sv) -= s - d; 760 return; 761 } 762 } 763 } 764 765 STATIC I32 766 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) 767 { 768 const I32 count = FILTER_READ(idx+1, sv, maxlen); 769 if (count > 0 && !maxlen) 770 strip_return(sv); 771 return count; 772 } 773 #endif 774 775 /* 776 =for apidoc lex_start 777 778 Creates and initialises a new lexer/parser state object, supplying 779 a context in which to lex and parse from a new source of Perl code. 780 A pointer to the new state object is placed in L</PL_parser>. An entry 781 is made on the save stack so that upon unwinding, the new state object 782 will be destroyed and the former value of L</PL_parser> will be restored. 783 Nothing else need be done to clean up the parsing context. 784 785 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if 786 non-null, provides a string (in SV form) containing code to be parsed. 787 A copy of the string is made, so subsequent modification of C<line> 788 does not affect parsing. C<rsfp>, if non-null, provides an input stream 789 from which code will be read to be parsed. If both are non-null, the 790 code in C<line> comes first and must consist of complete lines of input, 791 and C<rsfp> supplies the remainder of the source. 792 793 The C<flags> parameter is reserved for future use. Currently it is only 794 used by perl internally, so extensions should always pass zero. 795 796 =cut 797 */ 798 799 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it 800 can share filters with the current parser. 801 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the 802 caller, hence isn't owned by the parser, so shouldn't be closed on parser 803 destruction. This is used to handle the case of defaulting to reading the 804 script from the standard input because no filename was given on the command 805 line (without getting confused by situation where STDIN has been closed, so 806 the script handle is opened on fd 0) */ 807 808 void 809 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) 810 { 811 const char *s = NULL; 812 yy_parser *parser, *oparser; 813 814 if (flags && flags & ~LEX_START_FLAGS) 815 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); 816 817 /* create and initialise a parser */ 818 819 Newxz(parser, 1, yy_parser); 820 parser->old_parser = oparser = PL_parser; 821 PL_parser = parser; 822 823 parser->stack = NULL; 824 parser->stack_max1 = NULL; 825 parser->ps = NULL; 826 827 /* on scope exit, free this parser and restore any outer one */ 828 SAVEPARSER(parser); 829 parser->saved_curcop = PL_curcop; 830 831 /* initialise lexer state */ 832 833 parser->nexttoke = 0; 834 parser->error_count = oparser ? oparser->error_count : 0; 835 parser->copline = parser->preambling = NOLINE; 836 parser->lex_state = LEX_NORMAL; 837 parser->expect = XSTATE; 838 parser->rsfp = rsfp; 839 parser->recheck_utf8_validity = TRUE; 840 parser->rsfp_filters = 841 !(flags & LEX_START_SAME_FILTER) || !oparser 842 ? NULL 843 : MUTABLE_AV(SvREFCNT_inc( 844 oparser->rsfp_filters 845 ? oparser->rsfp_filters 846 : (oparser->rsfp_filters = newAV()) 847 )); 848 849 Newx(parser->lex_brackstack, 120, char); 850 Newx(parser->lex_casestack, 12, char); 851 *parser->lex_casestack = '\0'; 852 Newxz(parser->lex_shared, 1, LEXSHARED); 853 854 if (line) { 855 STRLEN len; 856 const U8* first_bad_char_loc; 857 858 s = SvPV_const(line, len); 859 860 if ( SvUTF8(line) 861 && UNLIKELY(! is_utf8_string_loc((U8 *) s, 862 SvCUR(line), 863 &first_bad_char_loc))) 864 { 865 _force_out_malformed_utf8_message(first_bad_char_loc, 866 (U8 *) s + SvCUR(line), 867 0, 868 1 /* 1 means die */ ); 869 NOT_REACHED; /* NOTREACHED */ 870 } 871 872 parser->linestr = flags & LEX_START_COPIED 873 ? SvREFCNT_inc_simple_NN(line) 874 : newSVpvn_flags(s, len, SvUTF8(line)); 875 if (!rsfp) 876 sv_catpvs(parser->linestr, "\n;"); 877 } else { 878 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2); 879 } 880 881 parser->oldoldbufptr = 882 parser->oldbufptr = 883 parser->bufptr = 884 parser->linestart = SvPVX(parser->linestr); 885 parser->bufend = parser->bufptr + SvCUR(parser->linestr); 886 parser->last_lop = parser->last_uni = NULL; 887 888 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 889 |LEX_DONT_CLOSE_RSFP)); 890 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 891 |LEX_DONT_CLOSE_RSFP)); 892 893 parser->in_pod = parser->filtered = 0; 894 } 895 896 897 /* delete a parser object */ 898 899 void 900 Perl_parser_free(pTHX_ const yy_parser *parser) 901 { 902 PERL_ARGS_ASSERT_PARSER_FREE; 903 904 PL_curcop = parser->saved_curcop; 905 SvREFCNT_dec(parser->linestr); 906 907 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 908 PerlIO_clearerr(parser->rsfp); 909 else if (parser->rsfp && (!parser->old_parser 910 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp))) 911 PerlIO_close(parser->rsfp); 912 SvREFCNT_dec(parser->rsfp_filters); 913 SvREFCNT_dec(parser->lex_stuff); 914 SvREFCNT_dec(parser->lex_sub_repl); 915 916 Safefree(parser->lex_brackstack); 917 Safefree(parser->lex_casestack); 918 Safefree(parser->lex_shared); 919 PL_parser = parser->old_parser; 920 Safefree(parser); 921 } 922 923 void 924 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab) 925 { 926 I32 nexttoke = parser->nexttoke; 927 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS; 928 while (nexttoke--) { 929 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff) 930 && parser->nextval[nexttoke].opval 931 && parser->nextval[nexttoke].opval->op_slabbed 932 && OpSLAB(parser->nextval[nexttoke].opval) == slab) { 933 op_free(parser->nextval[nexttoke].opval); 934 parser->nextval[nexttoke].opval = NULL; 935 } 936 } 937 } 938 939 940 /* 941 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr 942 943 Buffer scalar containing the chunk currently under consideration of the 944 text currently being lexed. This is always a plain string scalar (for 945 which C<SvPOK> is true). It is not intended to be used as a scalar by 946 normal scalar means; instead refer to the buffer directly by the pointer 947 variables described below. 948 949 The lexer maintains various C<char*> pointers to things in the 950 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever 951 reallocated, all of these pointers must be updated. Don't attempt to 952 do this manually, but rather use L</lex_grow_linestr> if you need to 953 reallocate the buffer. 954 955 The content of the text chunk in the buffer is commonly exactly one 956 complete line of input, up to and including a newline terminator, 957 but there are situations where it is otherwise. The octets of the 958 buffer may be intended to be interpreted as either UTF-8 or Latin-1. 959 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8> 960 flag on this scalar, which may disagree with it. 961 962 For direct examination of the buffer, the variable 963 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current 964 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use 965 of these pointers is usually preferable to examination of the scalar 966 through normal scalar means. 967 968 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend 969 970 Direct pointer to the end of the chunk of text currently being lexed, the 971 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr) 972 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is 973 always located at the end of the buffer, and does not count as part of 974 the buffer's contents. 975 976 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr 977 978 Points to the current position of lexing inside the lexer buffer. 979 Characters around this point may be freely examined, within 980 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and 981 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be 982 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>. 983 984 Lexing code (whether in the Perl core or not) moves this pointer past 985 the characters that it consumes. It is also expected to perform some 986 bookkeeping whenever a newline character is consumed. This movement 987 can be more conveniently performed by the function L</lex_read_to>, 988 which handles newlines appropriately. 989 990 Interpretation of the buffer's octets can be abstracted out by 991 using the slightly higher-level functions L</lex_peek_unichar> and 992 L</lex_read_unichar>. 993 994 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart 995 996 Points to the start of the current line inside the lexer buffer. 997 This is useful for indicating at which column an error occurred, and 998 not much else. This must be updated by any lexing code that consumes 999 a newline; the function L</lex_read_to> handles this detail. 1000 1001 =cut 1002 */ 1003 1004 /* 1005 =for apidoc lex_bufutf8 1006 1007 Indicates whether the octets in the lexer buffer 1008 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding 1009 of Unicode characters. If not, they should be interpreted as Latin-1 1010 characters. This is analogous to the C<SvUTF8> flag for scalars. 1011 1012 In UTF-8 mode, it is not guaranteed that the lexer buffer actually 1013 contains valid UTF-8. Lexing code must be robust in the face of invalid 1014 encoding. 1015 1016 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar 1017 is significant, but not the whole story regarding the input character 1018 encoding. Normally, when a file is being read, the scalar contains octets 1019 and its C<SvUTF8> flag is off, but the octets should be interpreted as 1020 UTF-8 if the C<use utf8> pragma is in effect. During a string eval, 1021 however, the scalar may have the C<SvUTF8> flag on, and in this case its 1022 octets should be interpreted as UTF-8 unless the C<use bytes> pragma 1023 is in effect. This logic may change in the future; use this function 1024 instead of implementing the logic yourself. 1025 1026 =cut 1027 */ 1028 1029 bool 1030 Perl_lex_bufutf8(pTHX) 1031 { 1032 return UTF; 1033 } 1034 1035 /* 1036 =for apidoc lex_grow_linestr 1037 1038 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate 1039 at least C<len> octets (including terminating C<NUL>). Returns a 1040 pointer to the reallocated buffer. This is necessary before making 1041 any direct modification of the buffer that would increase its length. 1042 L</lex_stuff_pvn> provides a more convenient way to insert text into 1043 the buffer. 1044 1045 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>; 1046 this function updates all of the lexer's variables that point directly 1047 into the buffer. 1048 1049 =cut 1050 */ 1051 1052 char * 1053 Perl_lex_grow_linestr(pTHX_ STRLEN len) 1054 { 1055 SV *linestr; 1056 char *buf; 1057 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 1058 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos; 1059 bool current; 1060 1061 linestr = PL_parser->linestr; 1062 buf = SvPVX(linestr); 1063 if (len <= SvLEN(linestr)) 1064 return buf; 1065 1066 /* Is the lex_shared linestr SV the same as the current linestr SV? 1067 * Only in this case does re_eval_start need adjusting, since it 1068 * points within lex_shared->ls_linestr's buffer */ 1069 current = ( !PL_parser->lex_shared->ls_linestr 1070 || linestr == PL_parser->lex_shared->ls_linestr); 1071 1072 bufend_pos = PL_parser->bufend - buf; 1073 bufptr_pos = PL_parser->bufptr - buf; 1074 oldbufptr_pos = PL_parser->oldbufptr - buf; 1075 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 1076 linestart_pos = PL_parser->linestart - buf; 1077 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 1078 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 1079 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ? 1080 PL_parser->lex_shared->re_eval_start - buf : 0; 1081 1082 buf = sv_grow(linestr, len); 1083 1084 PL_parser->bufend = buf + bufend_pos; 1085 PL_parser->bufptr = buf + bufptr_pos; 1086 PL_parser->oldbufptr = buf + oldbufptr_pos; 1087 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 1088 PL_parser->linestart = buf + linestart_pos; 1089 if (PL_parser->last_uni) 1090 PL_parser->last_uni = buf + last_uni_pos; 1091 if (PL_parser->last_lop) 1092 PL_parser->last_lop = buf + last_lop_pos; 1093 if (current && PL_parser->lex_shared->re_eval_start) 1094 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos; 1095 return buf; 1096 } 1097 1098 /* 1099 =for apidoc lex_stuff_pvn 1100 1101 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1102 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1103 reallocating the buffer if necessary. This means that lexing code that 1104 runs later will see the characters as if they had appeared in the input. 1105 It is not recommended to do this as part of normal parsing, and most 1106 uses of this facility run the risk of the inserted characters being 1107 interpreted in an unintended manner. 1108 1109 The string to be inserted is represented by C<len> octets starting 1110 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1, 1111 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>. 1112 The characters are recoded for the lexer buffer, according to how the 1113 buffer is currently being interpreted (L</lex_bufutf8>). If a string 1114 to be inserted is available as a Perl scalar, the L</lex_stuff_sv> 1115 function is more convenient. 1116 1117 =for apidoc Amnh||LEX_STUFF_UTF8 1118 1119 =cut 1120 */ 1121 1122 void 1123 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) 1124 { 1125 char *bufptr; 1126 PERL_ARGS_ASSERT_LEX_STUFF_PVN; 1127 if (flags & ~(LEX_STUFF_UTF8)) 1128 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn"); 1129 if (UTF) { 1130 if (flags & LEX_STUFF_UTF8) { 1131 goto plain_copy; 1132 } else { 1133 STRLEN highhalf = variant_under_utf8_count((U8 *) pv, 1134 (U8 *) pv + len); 1135 const char *p, *e = pv+len;; 1136 if (!highhalf) 1137 goto plain_copy; 1138 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf); 1139 bufptr = PL_parser->bufptr; 1140 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char); 1141 SvCUR_set(PL_parser->linestr, 1142 SvCUR(PL_parser->linestr) + len+highhalf); 1143 PL_parser->bufend += len+highhalf; 1144 for (p = pv; p != e; p++) { 1145 append_utf8_from_native_byte(*p, (U8 **) &bufptr); 1146 } 1147 } 1148 } else { 1149 if (flags & LEX_STUFF_UTF8) { 1150 STRLEN highhalf = 0; 1151 const char *p, *e = pv+len; 1152 for (p = pv; p != e; p++) { 1153 U8 c = (U8)*p; 1154 if (UTF8_IS_ABOVE_LATIN1(c)) { 1155 Perl_croak(aTHX_ "Lexing code attempted to stuff " 1156 "non-Latin-1 character into Latin-1 input"); 1157 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) { 1158 p++; 1159 highhalf++; 1160 } else assert(UTF8_IS_INVARIANT(c)); 1161 } 1162 if (!highhalf) 1163 goto plain_copy; 1164 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf); 1165 bufptr = PL_parser->bufptr; 1166 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char); 1167 SvCUR_set(PL_parser->linestr, 1168 SvCUR(PL_parser->linestr) + len-highhalf); 1169 PL_parser->bufend += len-highhalf; 1170 p = pv; 1171 while (p < e) { 1172 if (UTF8_IS_INVARIANT(*p)) { 1173 *bufptr++ = *p; 1174 p++; 1175 } 1176 else { 1177 assert(p < e -1 ); 1178 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)); 1179 p += 2; 1180 } 1181 } 1182 } else { 1183 plain_copy: 1184 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len); 1185 bufptr = PL_parser->bufptr; 1186 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char); 1187 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len); 1188 PL_parser->bufend += len; 1189 Copy(pv, bufptr, len, char); 1190 } 1191 } 1192 } 1193 1194 /* 1195 =for apidoc lex_stuff_pv 1196 1197 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1198 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1199 reallocating the buffer if necessary. This means that lexing code that 1200 runs later will see the characters as if they had appeared in the input. 1201 It is not recommended to do this as part of normal parsing, and most 1202 uses of this facility run the risk of the inserted characters being 1203 interpreted in an unintended manner. 1204 1205 The string to be inserted is represented by octets starting at C<pv> 1206 and continuing to the first nul. These octets are interpreted as either 1207 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set 1208 in C<flags>. The characters are recoded for the lexer buffer, according 1209 to how the buffer is currently being interpreted (L</lex_bufutf8>). 1210 If it is not convenient to nul-terminate a string to be inserted, the 1211 L</lex_stuff_pvn> function is more appropriate. 1212 1213 =cut 1214 */ 1215 1216 void 1217 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags) 1218 { 1219 PERL_ARGS_ASSERT_LEX_STUFF_PV; 1220 lex_stuff_pvn(pv, strlen(pv), flags); 1221 } 1222 1223 /* 1224 =for apidoc lex_stuff_sv 1225 1226 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1227 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1228 reallocating the buffer if necessary. This means that lexing code that 1229 runs later will see the characters as if they had appeared in the input. 1230 It is not recommended to do this as part of normal parsing, and most 1231 uses of this facility run the risk of the inserted characters being 1232 interpreted in an unintended manner. 1233 1234 The string to be inserted is the string value of C<sv>. The characters 1235 are recoded for the lexer buffer, according to how the buffer is currently 1236 being interpreted (L</lex_bufutf8>). If a string to be inserted is 1237 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the 1238 need to construct a scalar. 1239 1240 =cut 1241 */ 1242 1243 void 1244 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags) 1245 { 1246 char *pv; 1247 STRLEN len; 1248 PERL_ARGS_ASSERT_LEX_STUFF_SV; 1249 if (flags) 1250 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv"); 1251 pv = SvPV(sv, len); 1252 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0)); 1253 } 1254 1255 /* 1256 =for apidoc lex_unstuff 1257 1258 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to 1259 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened. 1260 This hides the discarded text from any lexing code that runs later, 1261 as if the text had never appeared. 1262 1263 This is not the normal way to consume lexed text. For that, use 1264 L</lex_read_to>. 1265 1266 =cut 1267 */ 1268 1269 void 1270 Perl_lex_unstuff(pTHX_ char *ptr) 1271 { 1272 char *buf, *bufend; 1273 STRLEN unstuff_len; 1274 PERL_ARGS_ASSERT_LEX_UNSTUFF; 1275 buf = PL_parser->bufptr; 1276 if (ptr < buf) 1277 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1278 if (ptr == buf) 1279 return; 1280 bufend = PL_parser->bufend; 1281 if (ptr > bufend) 1282 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1283 unstuff_len = ptr - buf; 1284 Move(ptr, buf, bufend+1-ptr, char); 1285 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len); 1286 PL_parser->bufend = bufend - unstuff_len; 1287 } 1288 1289 /* 1290 =for apidoc lex_read_to 1291 1292 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up 1293 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>, 1294 performing the correct bookkeeping whenever a newline character is passed. 1295 This is the normal way to consume lexed text. 1296 1297 Interpretation of the buffer's octets can be abstracted out by 1298 using the slightly higher-level functions L</lex_peek_unichar> and 1299 L</lex_read_unichar>. 1300 1301 =cut 1302 */ 1303 1304 void 1305 Perl_lex_read_to(pTHX_ char *ptr) 1306 { 1307 char *s; 1308 PERL_ARGS_ASSERT_LEX_READ_TO; 1309 s = PL_parser->bufptr; 1310 if (ptr < s || ptr > PL_parser->bufend) 1311 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to"); 1312 for (; s != ptr; s++) 1313 if (*s == '\n') { 1314 COPLINE_INC_WITH_HERELINES; 1315 PL_parser->linestart = s+1; 1316 } 1317 PL_parser->bufptr = ptr; 1318 } 1319 1320 /* 1321 =for apidoc lex_discard_to 1322 1323 Discards the first part of the L</PL_parser-E<gt>linestr> buffer, 1324 up to C<ptr>. The remaining content of the buffer will be moved, and 1325 all pointers into the buffer updated appropriately. C<ptr> must not 1326 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>: 1327 it is not permitted to discard text that has yet to be lexed. 1328 1329 Normally it is not necessarily to do this directly, because it suffices to 1330 use the implicit discarding behaviour of L</lex_next_chunk> and things 1331 based on it. However, if a token stretches across multiple lines, 1332 and the lexing code has kept multiple lines of text in the buffer for 1333 that purpose, then after completion of the token it would be wise to 1334 explicitly discard the now-unneeded earlier lines, to avoid future 1335 multi-line tokens growing the buffer without bound. 1336 1337 =cut 1338 */ 1339 1340 void 1341 Perl_lex_discard_to(pTHX_ char *ptr) 1342 { 1343 char *buf; 1344 STRLEN discard_len; 1345 PERL_ARGS_ASSERT_LEX_DISCARD_TO; 1346 buf = SvPVX(PL_parser->linestr); 1347 if (ptr < buf) 1348 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1349 if (ptr == buf) 1350 return; 1351 if (ptr > PL_parser->bufptr) 1352 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1353 discard_len = ptr - buf; 1354 if (PL_parser->oldbufptr < ptr) 1355 PL_parser->oldbufptr = ptr; 1356 if (PL_parser->oldoldbufptr < ptr) 1357 PL_parser->oldoldbufptr = ptr; 1358 if (PL_parser->last_uni && PL_parser->last_uni < ptr) 1359 PL_parser->last_uni = NULL; 1360 if (PL_parser->last_lop && PL_parser->last_lop < ptr) 1361 PL_parser->last_lop = NULL; 1362 Move(ptr, buf, PL_parser->bufend+1-ptr, char); 1363 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len); 1364 PL_parser->bufend -= discard_len; 1365 PL_parser->bufptr -= discard_len; 1366 PL_parser->oldbufptr -= discard_len; 1367 PL_parser->oldoldbufptr -= discard_len; 1368 if (PL_parser->last_uni) 1369 PL_parser->last_uni -= discard_len; 1370 if (PL_parser->last_lop) 1371 PL_parser->last_lop -= discard_len; 1372 } 1373 1374 void 1375 Perl_notify_parser_that_changed_to_utf8(pTHX) 1376 { 1377 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from 1378 * off to on. At compile time, this has the effect of entering a 'use 1379 * utf8' section. This means that any input was not previously checked for 1380 * UTF-8 (because it was off), but now we do need to check it, or our 1381 * assumptions about the input being sane could be wrong, and we could 1382 * segfault. This routine just sets a flag so that the next time we look 1383 * at the input we do the well-formed UTF-8 check. If we aren't in the 1384 * proper phase, there may not be a parser object, but if there is, setting 1385 * the flag is harmless */ 1386 1387 if (PL_parser) { 1388 PL_parser->recheck_utf8_validity = TRUE; 1389 } 1390 } 1391 1392 /* 1393 =for apidoc lex_next_chunk 1394 1395 Reads in the next chunk of text to be lexed, appending it to 1396 L</PL_parser-E<gt>linestr>. This should be called when lexing code has 1397 looked to the end of the current chunk and wants to know more. It is 1398 usual, but not necessary, for lexing to have consumed the entirety of 1399 the current chunk at this time. 1400 1401 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current 1402 chunk (i.e., the current chunk has been entirely consumed), normally the 1403 current chunk will be discarded at the same time that the new chunk is 1404 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk 1405 will not be discarded. If the current chunk has not been entirely 1406 consumed, then it will not be discarded regardless of the flag. 1407 1408 Returns true if some new text was added to the buffer, or false if the 1409 buffer has reached the end of the input text. 1410 1411 =for apidoc Amnh||LEX_KEEP_PREVIOUS 1412 1413 =cut 1414 */ 1415 1416 #define LEX_FAKE_EOF 0x80000000 1417 #define LEX_NO_TERM 0x40000000 /* here-doc */ 1418 1419 bool 1420 Perl_lex_next_chunk(pTHX_ U32 flags) 1421 { 1422 SV *linestr; 1423 char *buf; 1424 STRLEN old_bufend_pos, new_bufend_pos; 1425 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 1426 STRLEN linestart_pos, last_uni_pos, last_lop_pos; 1427 bool got_some_for_debugger = 0; 1428 bool got_some; 1429 1430 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM)) 1431 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); 1432 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat) 1433 return FALSE; 1434 linestr = PL_parser->linestr; 1435 buf = SvPVX(linestr); 1436 if (!(flags & LEX_KEEP_PREVIOUS) 1437 && PL_parser->bufptr == PL_parser->bufend) 1438 { 1439 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0; 1440 linestart_pos = 0; 1441 if (PL_parser->last_uni != PL_parser->bufend) 1442 PL_parser->last_uni = NULL; 1443 if (PL_parser->last_lop != PL_parser->bufend) 1444 PL_parser->last_lop = NULL; 1445 last_uni_pos = last_lop_pos = 0; 1446 *buf = 0; 1447 SvCUR_set(linestr, 0); 1448 } else { 1449 old_bufend_pos = PL_parser->bufend - buf; 1450 bufptr_pos = PL_parser->bufptr - buf; 1451 oldbufptr_pos = PL_parser->oldbufptr - buf; 1452 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 1453 linestart_pos = PL_parser->linestart - buf; 1454 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 1455 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 1456 } 1457 if (flags & LEX_FAKE_EOF) { 1458 goto eof; 1459 } else if (!PL_parser->rsfp && !PL_parser->filtered) { 1460 got_some = 0; 1461 } else if (filter_gets(linestr, old_bufend_pos)) { 1462 got_some = 1; 1463 got_some_for_debugger = 1; 1464 } else if (flags & LEX_NO_TERM) { 1465 got_some = 0; 1466 } else { 1467 if (!SvPOK(linestr)) /* can get undefined by filter_gets */ 1468 SvPVCLEAR(linestr); 1469 eof: 1470 /* End of real input. Close filehandle (unless it was STDIN), 1471 * then add implicit termination. 1472 */ 1473 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 1474 PerlIO_clearerr(PL_parser->rsfp); 1475 else if (PL_parser->rsfp) 1476 (void)PerlIO_close(PL_parser->rsfp); 1477 PL_parser->rsfp = NULL; 1478 PL_parser->in_pod = PL_parser->filtered = 0; 1479 if (!PL_in_eval && PL_minus_p) { 1480 sv_catpvs(linestr, 1481 /*{*/";}continue{print or die qq(-p destination: $!\\n);}"); 1482 PL_minus_n = PL_minus_p = 0; 1483 } else if (!PL_in_eval && PL_minus_n) { 1484 sv_catpvs(linestr, /*{*/";}"); 1485 PL_minus_n = 0; 1486 } else 1487 sv_catpvs(linestr, ";"); 1488 got_some = 1; 1489 } 1490 buf = SvPVX(linestr); 1491 new_bufend_pos = SvCUR(linestr); 1492 PL_parser->bufend = buf + new_bufend_pos; 1493 PL_parser->bufptr = buf + bufptr_pos; 1494 1495 if (UTF) { 1496 const U8* first_bad_char_loc; 1497 if (UNLIKELY(! is_utf8_string_loc( 1498 (U8 *) PL_parser->bufptr, 1499 PL_parser->bufend - PL_parser->bufptr, 1500 &first_bad_char_loc))) 1501 { 1502 _force_out_malformed_utf8_message(first_bad_char_loc, 1503 (U8 *) PL_parser->bufend, 1504 0, 1505 1 /* 1 means die */ ); 1506 NOT_REACHED; /* NOTREACHED */ 1507 } 1508 } 1509 1510 PL_parser->oldbufptr = buf + oldbufptr_pos; 1511 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 1512 PL_parser->linestart = buf + linestart_pos; 1513 if (PL_parser->last_uni) 1514 PL_parser->last_uni = buf + last_uni_pos; 1515 if (PL_parser->last_lop) 1516 PL_parser->last_lop = buf + last_lop_pos; 1517 if (PL_parser->preambling != NOLINE) { 1518 CopLINE_set(PL_curcop, PL_parser->preambling + 1); 1519 PL_parser->preambling = NOLINE; 1520 } 1521 if ( got_some_for_debugger 1522 && PERLDB_LINE_OR_SAVESRC 1523 && PL_curstash != PL_debstash) 1524 { 1525 /* debugger active and we're not compiling the debugger code, 1526 * so store the line into the debugger's array of lines 1527 */ 1528 update_debugger_info(NULL, buf+old_bufend_pos, 1529 new_bufend_pos-old_bufend_pos); 1530 } 1531 return got_some; 1532 } 1533 1534 /* 1535 =for apidoc lex_peek_unichar 1536 1537 Looks ahead one (Unicode) character in the text currently being lexed. 1538 Returns the codepoint (unsigned integer value) of the next character, 1539 or -1 if lexing has reached the end of the input text. To consume the 1540 peeked character, use L</lex_read_unichar>. 1541 1542 If the next character is in (or extends into) the next chunk of input 1543 text, the next chunk will be read in. Normally the current chunk will be 1544 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> 1545 bit set, then the current chunk will not be discarded. 1546 1547 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1548 is encountered, an exception is generated. 1549 1550 =cut 1551 */ 1552 1553 I32 1554 Perl_lex_peek_unichar(pTHX_ U32 flags) 1555 { 1556 char *s, *bufend; 1557 if (flags & ~(LEX_KEEP_PREVIOUS)) 1558 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar"); 1559 s = PL_parser->bufptr; 1560 bufend = PL_parser->bufend; 1561 if (UTF) { 1562 U8 head; 1563 I32 unichar; 1564 STRLEN len, retlen; 1565 if (s == bufend) { 1566 if (!lex_next_chunk(flags)) 1567 return -1; 1568 s = PL_parser->bufptr; 1569 bufend = PL_parser->bufend; 1570 } 1571 head = (U8)*s; 1572 if (UTF8_IS_INVARIANT(head)) 1573 return head; 1574 if (UTF8_IS_START(head)) { 1575 len = UTF8SKIP(&head); 1576 while ((STRLEN)(bufend-s) < len) { 1577 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS)) 1578 break; 1579 s = PL_parser->bufptr; 1580 bufend = PL_parser->bufend; 1581 } 1582 } 1583 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); 1584 if (retlen == (STRLEN)-1) { 1585 _force_out_malformed_utf8_message((U8 *) s, 1586 (U8 *) bufend, 1587 0, 1588 1 /* 1 means die */ ); 1589 NOT_REACHED; /* NOTREACHED */ 1590 } 1591 return unichar; 1592 } else { 1593 if (s == bufend) { 1594 if (!lex_next_chunk(flags)) 1595 return -1; 1596 s = PL_parser->bufptr; 1597 } 1598 return (U8)*s; 1599 } 1600 } 1601 1602 /* 1603 =for apidoc lex_read_unichar 1604 1605 Reads the next (Unicode) character in the text currently being lexed. 1606 Returns the codepoint (unsigned integer value) of the character read, 1607 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1 1608 if lexing has reached the end of the input text. To non-destructively 1609 examine the next character, use L</lex_peek_unichar> instead. 1610 1611 If the next character is in (or extends into) the next chunk of input 1612 text, the next chunk will be read in. Normally the current chunk will be 1613 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> 1614 bit set, then the current chunk will not be discarded. 1615 1616 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1617 is encountered, an exception is generated. 1618 1619 =cut 1620 */ 1621 1622 I32 1623 Perl_lex_read_unichar(pTHX_ U32 flags) 1624 { 1625 I32 c; 1626 if (flags & ~(LEX_KEEP_PREVIOUS)) 1627 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar"); 1628 c = lex_peek_unichar(flags); 1629 if (c != -1) { 1630 if (c == '\n') 1631 COPLINE_INC_WITH_HERELINES; 1632 if (UTF) 1633 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr); 1634 else 1635 ++(PL_parser->bufptr); 1636 } 1637 return c; 1638 } 1639 1640 /* 1641 =for apidoc lex_read_space 1642 1643 Reads optional spaces, in Perl style, in the text currently being 1644 lexed. The spaces may include ordinary whitespace characters and 1645 Perl-style comments. C<#line> directives are processed if encountered. 1646 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points 1647 at a non-space character (or the end of the input text). 1648 1649 If spaces extend into the next chunk of input text, the next chunk will 1650 be read in. Normally the current chunk will be discarded at the same 1651 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current 1652 chunk will not be discarded. 1653 1654 =cut 1655 */ 1656 1657 #define LEX_NO_INCLINE 0x40000000 1658 #define LEX_NO_NEXT_CHUNK 0x80000000 1659 1660 void 1661 Perl_lex_read_space(pTHX_ U32 flags) 1662 { 1663 char *s, *bufend; 1664 const bool can_incline = !(flags & LEX_NO_INCLINE); 1665 bool need_incline = 0; 1666 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE)) 1667 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); 1668 s = PL_parser->bufptr; 1669 bufend = PL_parser->bufend; 1670 while (1) { 1671 char c = *s; 1672 if (c == '#') { 1673 do { 1674 c = *++s; 1675 } while (!(c == '\n' || (c == 0 && s == bufend))); 1676 } else if (c == '\n') { 1677 s++; 1678 if (can_incline) { 1679 PL_parser->linestart = s; 1680 if (s == bufend) 1681 need_incline = 1; 1682 else 1683 incline(s, bufend); 1684 } 1685 } else if (isSPACE(c)) { 1686 s++; 1687 } else if (c == 0 && s == bufend) { 1688 bool got_more; 1689 line_t l; 1690 if (flags & LEX_NO_NEXT_CHUNK) 1691 break; 1692 PL_parser->bufptr = s; 1693 l = CopLINE(PL_curcop); 1694 CopLINE(PL_curcop) += PL_parser->herelines + 1; 1695 got_more = lex_next_chunk(flags); 1696 CopLINE_set(PL_curcop, l); 1697 s = PL_parser->bufptr; 1698 bufend = PL_parser->bufend; 1699 if (!got_more) 1700 break; 1701 if (can_incline && need_incline && PL_parser->rsfp) { 1702 incline(s, bufend); 1703 need_incline = 0; 1704 } 1705 } else if (!c) { 1706 s++; 1707 } else { 1708 break; 1709 } 1710 } 1711 PL_parser->bufptr = s; 1712 } 1713 1714 /* 1715 1716 =for apidoc validate_proto 1717 1718 This function performs syntax checking on a prototype, C<proto>. 1719 If C<warn> is true, any illegal characters or mismatched brackets 1720 will trigger illegalproto warnings, declaring that they were 1721 detected in the prototype for C<name>. 1722 1723 The return value is C<true> if this is a valid prototype, and 1724 C<false> if it is not, regardless of whether C<warn> was C<true> or 1725 C<false>. 1726 1727 Note that C<NULL> is a valid C<proto> and will always return C<true>. 1728 1729 =cut 1730 1731 */ 1732 1733 bool 1734 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash) 1735 { 1736 STRLEN len, origlen; 1737 char *p; 1738 bool bad_proto = FALSE; 1739 bool in_brackets = FALSE; 1740 bool after_slash = FALSE; 1741 char greedy_proto = ' '; 1742 bool proto_after_greedy_proto = FALSE; 1743 bool must_be_last = FALSE; 1744 bool underscore = FALSE; 1745 bool bad_proto_after_underscore = FALSE; 1746 1747 PERL_ARGS_ASSERT_VALIDATE_PROTO; 1748 1749 if (!proto) 1750 return TRUE; 1751 1752 p = SvPV(proto, len); 1753 origlen = len; 1754 for (; len--; p++) { 1755 if (!isSPACE(*p)) { 1756 if (must_be_last) 1757 proto_after_greedy_proto = TRUE; 1758 if (underscore) { 1759 if (!memCHRs(";@%", *p)) 1760 bad_proto_after_underscore = TRUE; 1761 underscore = FALSE; 1762 } 1763 if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') { 1764 bad_proto = TRUE; 1765 } 1766 else { 1767 if (*p == '[') 1768 in_brackets = TRUE; 1769 else if (*p == ']') 1770 in_brackets = FALSE; 1771 else if ((*p == '@' || *p == '%') 1772 && !after_slash 1773 && !in_brackets ) 1774 { 1775 must_be_last = TRUE; 1776 greedy_proto = *p; 1777 } 1778 else if (*p == '_') 1779 underscore = TRUE; 1780 } 1781 if (*p == '\\') 1782 after_slash = TRUE; 1783 else 1784 after_slash = FALSE; 1785 } 1786 } 1787 1788 if (warn) { 1789 SV *tmpsv = newSVpvs_flags("", SVs_TEMP); 1790 p -= origlen; 1791 p = SvUTF8(proto) 1792 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8), 1793 origlen, UNI_DISPLAY_ISPRINT) 1794 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); 1795 1796 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) { 1797 SV *name2 = sv_2mortal(newSVsv(PL_curstname)); 1798 sv_catpvs(name2, "::"); 1799 sv_catsv(name2, (SV *)name); 1800 name = name2; 1801 } 1802 1803 if (proto_after_greedy_proto) 1804 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1805 "Prototype after '%c' for %" SVf " : %s", 1806 greedy_proto, SVfARG(name), p); 1807 if (in_brackets) 1808 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1809 "Missing ']' in prototype for %" SVf " : %s", 1810 SVfARG(name), p); 1811 if (bad_proto) 1812 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1813 "Illegal character in prototype for %" SVf " : %s", 1814 SVfARG(name), p); 1815 if (bad_proto_after_underscore) 1816 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1817 "Illegal character after '_' in prototype for %" SVf " : %s", 1818 SVfARG(name), p); 1819 } 1820 1821 return (! (proto_after_greedy_proto || bad_proto) ); 1822 } 1823 1824 /* 1825 * S_incline 1826 * This subroutine has nothing to do with tilting, whether at windmills 1827 * or pinball tables. Its name is short for "increment line". It 1828 * increments the current line number in CopLINE(PL_curcop) and checks 1829 * to see whether the line starts with a comment of the form 1830 * # line 500 "foo.pm" 1831 * If so, it sets the current line number and file to the values in the comment. 1832 */ 1833 1834 STATIC void 1835 S_incline(pTHX_ const char *s, const char *end) 1836 { 1837 const char *t; 1838 const char *n; 1839 const char *e; 1840 line_t line_num; 1841 UV uv; 1842 1843 PERL_ARGS_ASSERT_INCLINE; 1844 1845 assert(end >= s); 1846 1847 COPLINE_INC_WITH_HERELINES; 1848 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL 1849 && s+1 == PL_bufend && *s == ';') { 1850 /* fake newline in string eval */ 1851 CopLINE_dec(PL_curcop); 1852 return; 1853 } 1854 if (*s++ != '#') 1855 return; 1856 while (SPACE_OR_TAB(*s)) 1857 s++; 1858 if (memBEGINs(s, (STRLEN) (end - s), "line")) 1859 s += sizeof("line") - 1; 1860 else 1861 return; 1862 if (SPACE_OR_TAB(*s)) 1863 s++; 1864 else 1865 return; 1866 while (SPACE_OR_TAB(*s)) 1867 s++; 1868 if (!isDIGIT(*s)) 1869 return; 1870 1871 n = s; 1872 while (isDIGIT(*s)) 1873 s++; 1874 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0') 1875 return; 1876 while (SPACE_OR_TAB(*s)) 1877 s++; 1878 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) { 1879 s++; 1880 e = t + 1; 1881 } 1882 else { 1883 t = s; 1884 while (*t && !isSPACE(*t)) 1885 t++; 1886 e = t; 1887 } 1888 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f') 1889 e++; 1890 if (*e != '\n' && *e != '\0') 1891 return; /* false alarm */ 1892 1893 if (!grok_atoUV(n, &uv, &e)) 1894 return; 1895 line_num = ((line_t)uv) - 1; 1896 1897 if (t - s > 0) { 1898 const STRLEN len = t - s; 1899 1900 if (!PL_rsfp && !PL_parser->filtered) { 1901 /* must copy *{"::_<(eval N)[oldfilename:L]"} 1902 * to *{"::_<newfilename"} */ 1903 /* However, the long form of evals is only turned on by the 1904 debugger - usually they're "(eval %lu)" */ 1905 GV * const cfgv = CopFILEGV(PL_curcop); 1906 if (cfgv) { 1907 char smallbuf[128]; 1908 STRLEN tmplen2 = len; 1909 char *tmpbuf2; 1910 GV *gv2; 1911 1912 if (tmplen2 + 2 <= sizeof smallbuf) 1913 tmpbuf2 = smallbuf; 1914 else 1915 Newx(tmpbuf2, tmplen2 + 2, char); 1916 1917 tmpbuf2[0] = '_'; 1918 tmpbuf2[1] = '<'; 1919 1920 memcpy(tmpbuf2 + 2, s, tmplen2); 1921 tmplen2 += 2; 1922 1923 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE); 1924 if (!isGV(gv2)) { 1925 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE); 1926 /* adjust ${"::_<newfilename"} to store the new file name */ 1927 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2); 1928 /* The line number may differ. If that is the case, 1929 alias the saved lines that are in the array. 1930 Otherwise alias the whole array. */ 1931 if (CopLINE(PL_curcop) == line_num) { 1932 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv))); 1933 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv))); 1934 } 1935 else if (GvAV(cfgv)) { 1936 AV * const av = GvAV(cfgv); 1937 const line_t start = CopLINE(PL_curcop)+1; 1938 SSize_t items = AvFILLp(av) - start; 1939 if (items > 0) { 1940 AV * const av2 = GvAVn(gv2); 1941 SV **svp = AvARRAY(av) + start; 1942 Size_t l = line_num+1; 1943 while (items-- && l < SSize_t_MAX && l == (line_t)l) 1944 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++)); 1945 } 1946 } 1947 } 1948 1949 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2); 1950 } 1951 } 1952 CopFILE_free(PL_curcop); 1953 CopFILE_setn(PL_curcop, s, len); 1954 } 1955 CopLINE_set(PL_curcop, line_num); 1956 } 1957 1958 STATIC void 1959 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) 1960 { 1961 AV *av = CopFILEAVx(PL_curcop); 1962 if (av) { 1963 SV * sv; 1964 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG); 1965 else { 1966 sv = *av_fetch(av, 0, 1); 1967 SvUPGRADE(sv, SVt_PVMG); 1968 } 1969 if (!SvPOK(sv)) SvPVCLEAR(sv); 1970 if (orig_sv) 1971 sv_catsv(sv, orig_sv); 1972 else 1973 sv_catpvn(sv, buf, len); 1974 if (!SvIOK(sv)) { 1975 (void)SvIOK_on(sv); 1976 SvIV_set(sv, 0); 1977 } 1978 if (PL_parser->preambling == NOLINE) 1979 av_store(av, CopLINE(PL_curcop), sv); 1980 } 1981 } 1982 1983 /* 1984 * skipspace 1985 * Called to gobble the appropriate amount and type of whitespace. 1986 * Skips comments as well. 1987 * Returns the next character after the whitespace that is skipped. 1988 * 1989 * peekspace 1990 * Same thing, but look ahead without incrementing line numbers or 1991 * adjusting PL_linestart. 1992 */ 1993 1994 #define skipspace(s) skipspace_flags(s, 0) 1995 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE) 1996 1997 char * 1998 Perl_skipspace_flags(pTHX_ char *s, U32 flags) 1999 { 2000 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS; 2001 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 2002 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s)) 2003 s++; 2004 } else { 2005 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); 2006 PL_bufptr = s; 2007 lex_read_space(flags | LEX_KEEP_PREVIOUS | 2008 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ? 2009 LEX_NO_NEXT_CHUNK : 0)); 2010 s = PL_bufptr; 2011 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos; 2012 if (PL_linestart > PL_bufptr) 2013 PL_bufptr = PL_linestart; 2014 return s; 2015 } 2016 return s; 2017 } 2018 2019 /* 2020 * S_check_uni 2021 * Check the unary operators to ensure there's no ambiguity in how they're 2022 * used. An ambiguous piece of code would be: 2023 * rand + 5 2024 * This doesn't mean rand() + 5. Because rand() is a unary operator, 2025 * the +5 is its argument. 2026 */ 2027 2028 STATIC void 2029 S_check_uni(pTHX) 2030 { 2031 const char *s; 2032 2033 if (PL_oldoldbufptr != PL_last_uni) 2034 return; 2035 while (isSPACE(*PL_last_uni)) 2036 PL_last_uni++; 2037 s = PL_last_uni; 2038 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-') 2039 s += UTF ? UTF8SKIP(s) : 1; 2040 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s)) 2041 return; 2042 2043 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 2044 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous", 2045 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni)); 2046 } 2047 2048 /* 2049 * LOP : macro to build a list operator. Its behaviour has been replaced 2050 * with a subroutine, S_lop() for which LOP is just another name. 2051 */ 2052 2053 #define LOP(f,x) return lop(f,x,s) 2054 2055 /* 2056 * S_lop 2057 * Build a list operator (or something that might be one). The rules: 2058 * - if we have a next token, then it's a list operator (no parens) for 2059 * which the next token has already been parsed; e.g., 2060 * sort foo @args 2061 * sort foo (@args) 2062 * - if the next thing is an opening paren, then it's a function 2063 * - else it's a list operator 2064 */ 2065 2066 STATIC I32 2067 S_lop(pTHX_ I32 f, U8 x, char *s) 2068 { 2069 PERL_ARGS_ASSERT_LOP; 2070 2071 pl_yylval.ival = f; 2072 CLINE; 2073 PL_bufptr = s; 2074 PL_last_lop = PL_oldbufptr; 2075 PL_last_lop_op = (OPCODE)f; 2076 if (PL_nexttoke) 2077 goto lstop; 2078 PL_expect = x; 2079 if (*s == '(') 2080 return REPORT(FUNC); 2081 s = skipspace(s); 2082 if (*s == '(') 2083 return REPORT(FUNC); 2084 else { 2085 lstop: 2086 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 2087 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 2088 return REPORT(LSTOP); 2089 } 2090 } 2091 2092 /* 2093 * S_force_next 2094 * When the lexer realizes it knows the next token (for instance, 2095 * it is reordering tokens for the parser) then it can call S_force_next 2096 * to know what token to return the next time the lexer is called. Caller 2097 * will need to set PL_nextval[] and possibly PL_expect to ensure 2098 * the lexer handles the token correctly. 2099 */ 2100 2101 STATIC void 2102 S_force_next(pTHX_ I32 type) 2103 { 2104 #ifdef DEBUGGING 2105 if (DEBUG_T_TEST) { 2106 PerlIO_printf(Perl_debug_log, "### forced token:\n"); 2107 tokereport(type, &NEXTVAL_NEXTTOKE); 2108 } 2109 #endif 2110 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype)); 2111 PL_nexttype[PL_nexttoke] = type; 2112 PL_nexttoke++; 2113 } 2114 2115 /* 2116 * S_postderef 2117 * 2118 * This subroutine handles postfix deref syntax after the arrow has already 2119 * been emitted. @* $* etc. are emitted as two separate tokens right here. 2120 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits 2121 * only the first, leaving yylex to find the next. 2122 */ 2123 2124 static int 2125 S_postderef(pTHX_ int const funny, char const next) 2126 { 2127 assert(funny == DOLSHARP 2128 || funny == PERLY_DOLLAR 2129 || funny == PERLY_SNAIL 2130 || funny == PERLY_PERCENT_SIGN 2131 || funny == PERLY_AMPERSAND 2132 || funny == PERLY_STAR 2133 ); 2134 if (next == '*') { 2135 PL_expect = XOPERATOR; 2136 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 2137 assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny); 2138 PL_lex_state = LEX_INTERPEND; 2139 if (PERLY_SNAIL == funny) 2140 force_next(POSTJOIN); 2141 } 2142 force_next(PERLY_STAR); 2143 PL_bufptr+=2; 2144 } 2145 else { 2146 if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL 2147 && !PL_lex_brackets) 2148 PL_lex_dojoin = 2; 2149 PL_expect = XOPERATOR; 2150 PL_bufptr++; 2151 } 2152 return funny; 2153 } 2154 2155 void 2156 Perl_yyunlex(pTHX) 2157 { 2158 int yyc = PL_parser->yychar; 2159 if (yyc != YYEMPTY) { 2160 if (yyc) { 2161 NEXTVAL_NEXTTOKE = PL_parser->yylval; 2162 if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) { 2163 PL_lex_allbrackets--; 2164 PL_lex_brackets--; 2165 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16); 2166 } else if (yyc == PERLY_PAREN_OPEN) { 2167 PL_lex_allbrackets--; 2168 yyc |= (2<<24); 2169 } 2170 force_next(yyc); 2171 } 2172 PL_parser->yychar = YYEMPTY; 2173 } 2174 } 2175 2176 STATIC SV * 2177 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) 2178 { 2179 SV * const sv = newSVpvn_utf8(start, len, 2180 ! IN_BYTES 2181 && UTF 2182 && len != 0 2183 && is_utf8_non_invariant_string((const U8*)start, len)); 2184 return sv; 2185 } 2186 2187 /* 2188 * S_force_word 2189 * When the lexer knows the next thing is a word (for instance, it has 2190 * just seen -> and it knows that the next char is a word char, then 2191 * it calls S_force_word to stick the next word into the PL_nexttoke/val 2192 * lookahead. 2193 * 2194 * Arguments: 2195 * char *start : buffer position (must be within PL_linestr) 2196 * int token : PL_next* will be this type of bare word 2197 * (e.g., METHOD,BAREWORD) 2198 * int check_keyword : if true, Perl checks to make sure the word isn't 2199 * a keyword (do this if the word is a label, e.g. goto FOO) 2200 * int allow_pack : if true, : characters will also be allowed (require, 2201 * use, etc. do this) 2202 */ 2203 2204 STATIC char * 2205 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) 2206 { 2207 char *s; 2208 STRLEN len; 2209 2210 PERL_ARGS_ASSERT_FORCE_WORD; 2211 2212 start = skipspace(start); 2213 s = start; 2214 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) 2215 || (allow_pack && *s == ':' && s[1] == ':') ) 2216 { 2217 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); 2218 if (check_keyword) { 2219 char *s2 = PL_tokenbuf; 2220 STRLEN len2 = len; 2221 if (allow_pack && memBEGINPs(s2, len, "CORE::")) { 2222 s2 += sizeof("CORE::") - 1; 2223 len2 -= sizeof("CORE::") - 1; 2224 } 2225 if (keyword(s2, len2, 0)) 2226 return start; 2227 } 2228 if (token == METHOD) { 2229 s = skipspace(s); 2230 if (*s == '(') 2231 PL_expect = XTERM; 2232 else { 2233 PL_expect = XOPERATOR; 2234 } 2235 } 2236 NEXTVAL_NEXTTOKE.opval 2237 = newSVOP(OP_CONST,0, 2238 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 2239 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 2240 force_next(token); 2241 } 2242 return s; 2243 } 2244 2245 /* 2246 * S_force_ident 2247 * Called when the lexer wants $foo *foo &foo etc, but the program 2248 * text only contains the "foo" portion. The first argument is a pointer 2249 * to the "foo", and the second argument is the type symbol to prefix. 2250 * Forces the next token to be a "BAREWORD". 2251 * Creates the symbol if it didn't already exist (via gv_fetchpv()). 2252 */ 2253 2254 STATIC void 2255 S_force_ident(pTHX_ const char *s, int kind) 2256 { 2257 PERL_ARGS_ASSERT_FORCE_IDENT; 2258 2259 if (s[0]) { 2260 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */ 2261 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len, 2262 UTF ? SVf_UTF8 : 0)); 2263 NEXTVAL_NEXTTOKE.opval = o; 2264 force_next(BAREWORD); 2265 if (kind) { 2266 o->op_private = OPpCONST_ENTERED; 2267 /* XXX see note in pp_entereval() for why we forgo typo 2268 warnings if the symbol must be introduced in an eval. 2269 GSAR 96-10-12 */ 2270 gv_fetchpvn_flags(s, len, 2271 (PL_in_eval ? GV_ADDMULTI 2272 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), 2273 kind == PERLY_DOLLAR ? SVt_PV : 2274 kind == PERLY_SNAIL ? SVt_PVAV : 2275 kind == PERLY_PERCENT_SIGN ? SVt_PVHV : 2276 SVt_PVGV 2277 ); 2278 } 2279 } 2280 } 2281 2282 static void 2283 S_force_ident_maybe_lex(pTHX_ char pit) 2284 { 2285 NEXTVAL_NEXTTOKE.ival = pit; 2286 force_next('p'); 2287 } 2288 2289 NV 2290 Perl_str_to_version(pTHX_ SV *sv) 2291 { 2292 NV retval = 0.0; 2293 NV nshift = 1.0; 2294 STRLEN len; 2295 const char *start = SvPV_const(sv,len); 2296 const char * const end = start + len; 2297 const bool utf = cBOOL(SvUTF8(sv)); 2298 2299 PERL_ARGS_ASSERT_STR_TO_VERSION; 2300 2301 while (start < end) { 2302 STRLEN skip; 2303 UV n; 2304 if (utf) 2305 n = utf8n_to_uvchr((U8*)start, len, &skip, 0); 2306 else { 2307 n = *(U8*)start; 2308 skip = 1; 2309 } 2310 retval += ((NV)n)/nshift; 2311 start += skip; 2312 nshift *= 1000; 2313 } 2314 return retval; 2315 } 2316 2317 /* 2318 * S_force_version 2319 * Forces the next token to be a version number. 2320 * If the next token appears to be an invalid version number, (e.g. "v2b"), 2321 * and if "guessing" is TRUE, then no new token is created (and the caller 2322 * must use an alternative parsing method). 2323 */ 2324 2325 STATIC char * 2326 S_force_version(pTHX_ char *s, int guessing) 2327 { 2328 OP *version = NULL; 2329 char *d; 2330 2331 PERL_ARGS_ASSERT_FORCE_VERSION; 2332 2333 s = skipspace(s); 2334 2335 d = s; 2336 if (*d == 'v') 2337 d++; 2338 if (isDIGIT(*d)) { 2339 while (isDIGIT(*d) || *d == '_' || *d == '.') 2340 d++; 2341 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) { 2342 SV *ver; 2343 s = scan_num(s, &pl_yylval); 2344 version = pl_yylval.opval; 2345 ver = cSVOPx(version)->op_sv; 2346 if (SvPOK(ver) && !SvNIOK(ver)) { 2347 SvUPGRADE(ver, SVt_PVNV); 2348 SvNV_set(ver, str_to_version(ver)); 2349 SvNOK_on(ver); /* hint that it is a version */ 2350 } 2351 } 2352 else if (guessing) { 2353 return s; 2354 } 2355 } 2356 2357 /* NOTE: The parser sees the package name and the VERSION swapped */ 2358 NEXTVAL_NEXTTOKE.opval = version; 2359 force_next(BAREWORD); 2360 2361 return s; 2362 } 2363 2364 /* 2365 * S_force_strict_version 2366 * Forces the next token to be a version number using strict syntax rules. 2367 */ 2368 2369 STATIC char * 2370 S_force_strict_version(pTHX_ char *s) 2371 { 2372 OP *version = NULL; 2373 const char *errstr = NULL; 2374 2375 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION; 2376 2377 while (isSPACE(*s)) /* leading whitespace */ 2378 s++; 2379 2380 if (is_STRICT_VERSION(s,&errstr)) { 2381 SV *ver = newSV_type(SVt_NULL); 2382 s = (char *)scan_version(s, ver, 0); 2383 version = newSVOP(OP_CONST, 0, ver); 2384 } 2385 else if ((*s != ';' && *s != '{' && *s != '}' ) 2386 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' ))) 2387 { 2388 PL_bufptr = s; 2389 if (errstr) 2390 yyerror(errstr); /* version required */ 2391 return s; 2392 } 2393 2394 /* NOTE: The parser sees the package name and the VERSION swapped */ 2395 NEXTVAL_NEXTTOKE.opval = version; 2396 force_next(BAREWORD); 2397 2398 return s; 2399 } 2400 2401 /* 2402 * S_tokeq 2403 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv', 2404 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is 2405 * unchanged, and a new SV containing the modified input is returned. 2406 */ 2407 2408 STATIC SV * 2409 S_tokeq(pTHX_ SV *sv) 2410 { 2411 char *s; 2412 char *send; 2413 char *d; 2414 SV *pv = sv; 2415 2416 PERL_ARGS_ASSERT_TOKEQ; 2417 2418 assert (SvPOK(sv)); 2419 assert (SvLEN(sv)); 2420 assert (!SvIsCOW(sv)); 2421 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */ 2422 goto finish; 2423 s = SvPVX(sv); 2424 send = SvEND(sv); 2425 /* This is relying on the SV being "well formed" with a trailing '\0' */ 2426 while (s < send && !(*s == '\\' && s[1] == '\\')) 2427 s++; 2428 if (s == send) 2429 goto finish; 2430 d = s; 2431 if ( PL_hints & HINT_NEW_STRING ) { 2432 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv), 2433 SVs_TEMP | SvUTF8(sv)); 2434 } 2435 while (s < send) { 2436 if (*s == '\\') { 2437 if (s + 1 < send && (s[1] == '\\')) 2438 s++; /* all that, just for this */ 2439 } 2440 *d++ = *s++; 2441 } 2442 *d = '\0'; 2443 SvCUR_set(sv, d - SvPVX_const(sv)); 2444 finish: 2445 if ( PL_hints & HINT_NEW_STRING ) 2446 return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL); 2447 return sv; 2448 } 2449 2450 /* 2451 * Now come three functions related to double-quote context, 2452 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when 2453 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They 2454 * interact with PL_lex_state, and create fake ( ... ) argument lists 2455 * to handle functions and concatenation. 2456 * For example, 2457 * "foo\lbar" 2458 * is tokenised as 2459 * stringify ( const[foo] concat lcfirst ( const[bar] ) ) 2460 */ 2461 2462 /* 2463 * S_sublex_start 2464 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST). 2465 * 2466 * Pattern matching will set PL_lex_op to the pattern-matching op to 2467 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise). 2468 * 2469 * OP_CONST is easy--just make the new op and return. 2470 * 2471 * Everything else becomes a FUNC. 2472 * 2473 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we 2474 * had an OP_CONST. This just sets us up for a 2475 * call to S_sublex_push(). 2476 */ 2477 2478 STATIC I32 2479 S_sublex_start(pTHX) 2480 { 2481 const I32 op_type = pl_yylval.ival; 2482 2483 if (op_type == OP_NULL) { 2484 pl_yylval.opval = PL_lex_op; 2485 PL_lex_op = NULL; 2486 return THING; 2487 } 2488 if (op_type == OP_CONST) { 2489 SV *sv = PL_lex_stuff; 2490 PL_lex_stuff = NULL; 2491 sv = tokeq(sv); 2492 2493 if (SvTYPE(sv) == SVt_PVIV) { 2494 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ 2495 STRLEN len; 2496 const char * const p = SvPV_const(sv, len); 2497 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv)); 2498 SvREFCNT_dec(sv); 2499 sv = nsv; 2500 } 2501 pl_yylval.opval = newSVOP(op_type, 0, sv); 2502 return THING; 2503 } 2504 2505 PL_parser->lex_super_state = PL_lex_state; 2506 PL_parser->lex_sub_inwhat = (U16)op_type; 2507 PL_parser->lex_sub_op = PL_lex_op; 2508 PL_parser->sub_no_recover = FALSE; 2509 PL_parser->sub_error_count = PL_error_count; 2510 PL_lex_state = LEX_INTERPPUSH; 2511 2512 PL_expect = XTERM; 2513 if (PL_lex_op) { 2514 pl_yylval.opval = PL_lex_op; 2515 PL_lex_op = NULL; 2516 return PMFUNC; 2517 } 2518 else 2519 return FUNC; 2520 } 2521 2522 /* 2523 * S_sublex_push 2524 * Create a new scope to save the lexing state. The scope will be 2525 * ended in S_sublex_done. Returns a '(', starting the function arguments 2526 * to the uc, lc, etc. found before. 2527 * Sets PL_lex_state to LEX_INTERPCONCAT. 2528 */ 2529 2530 STATIC I32 2531 S_sublex_push(pTHX) 2532 { 2533 LEXSHARED *shared; 2534 const bool is_heredoc = PL_multi_close == '<'; 2535 ENTER; 2536 2537 PL_lex_state = PL_parser->lex_super_state; 2538 SAVEI8(PL_lex_dojoin); 2539 SAVEI32(PL_lex_brackets); 2540 SAVEI32(PL_lex_allbrackets); 2541 SAVEI32(PL_lex_formbrack); 2542 SAVEI8(PL_lex_fakeeof); 2543 SAVEI32(PL_lex_casemods); 2544 SAVEI32(PL_lex_starts); 2545 SAVEI8(PL_lex_state); 2546 SAVESPTR(PL_lex_repl); 2547 SAVEVPTR(PL_lex_inpat); 2548 SAVEI16(PL_lex_inwhat); 2549 if (is_heredoc) 2550 { 2551 SAVECOPLINE(PL_curcop); 2552 SAVEI32(PL_multi_end); 2553 SAVEI32(PL_parser->herelines); 2554 PL_parser->herelines = 0; 2555 } 2556 SAVEIV(PL_multi_close); 2557 SAVEPPTR(PL_bufptr); 2558 SAVEPPTR(PL_bufend); 2559 SAVEPPTR(PL_oldbufptr); 2560 SAVEPPTR(PL_oldoldbufptr); 2561 SAVEPPTR(PL_last_lop); 2562 SAVEPPTR(PL_last_uni); 2563 SAVEPPTR(PL_linestart); 2564 SAVESPTR(PL_linestr); 2565 SAVEGENERICPV(PL_lex_brackstack); 2566 SAVEGENERICPV(PL_lex_casestack); 2567 SAVEGENERICPV(PL_parser->lex_shared); 2568 SAVEBOOL(PL_parser->lex_re_reparsing); 2569 SAVEI32(PL_copline); 2570 2571 /* The here-doc parser needs to be able to peek into outer lexing 2572 scopes to find the body of the here-doc. So we put PL_linestr and 2573 PL_bufptr into lex_shared, to 'share' those values. 2574 */ 2575 PL_parser->lex_shared->ls_linestr = PL_linestr; 2576 PL_parser->lex_shared->ls_bufptr = PL_bufptr; 2577 2578 PL_linestr = PL_lex_stuff; 2579 PL_lex_repl = PL_parser->lex_sub_repl; 2580 PL_lex_stuff = NULL; 2581 PL_parser->lex_sub_repl = NULL; 2582 2583 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets 2584 set for an inner quote-like operator and then an error causes scope- 2585 popping. We must not have a PL_lex_stuff value left dangling, as 2586 that breaks assumptions elsewhere. See bug #123617. */ 2587 SAVEGENERICSV(PL_lex_stuff); 2588 SAVEGENERICSV(PL_parser->lex_sub_repl); 2589 2590 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart 2591 = SvPVX(PL_linestr); 2592 PL_bufend += SvCUR(PL_linestr); 2593 PL_last_lop = PL_last_uni = NULL; 2594 SAVEFREESV(PL_linestr); 2595 if (PL_lex_repl) SAVEFREESV(PL_lex_repl); 2596 2597 PL_lex_dojoin = FALSE; 2598 PL_lex_brackets = PL_lex_formbrack = 0; 2599 PL_lex_allbrackets = 0; 2600 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2601 Newx(PL_lex_brackstack, 120, char); 2602 Newx(PL_lex_casestack, 12, char); 2603 PL_lex_casemods = 0; 2604 *PL_lex_casestack = '\0'; 2605 PL_lex_starts = 0; 2606 PL_lex_state = LEX_INTERPCONCAT; 2607 if (is_heredoc) 2608 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 2609 PL_copline = NOLINE; 2610 2611 Newxz(shared, 1, LEXSHARED); 2612 shared->ls_prev = PL_parser->lex_shared; 2613 PL_parser->lex_shared = shared; 2614 2615 PL_lex_inwhat = PL_parser->lex_sub_inwhat; 2616 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS; 2617 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST) 2618 PL_lex_inpat = PL_parser->lex_sub_op; 2619 else 2620 PL_lex_inpat = NULL; 2621 2622 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING); 2623 PL_in_eval &= ~EVAL_RE_REPARSING; 2624 2625 return SUBLEXSTART; 2626 } 2627 2628 /* 2629 * S_sublex_done 2630 * Restores lexer state after a S_sublex_push. 2631 */ 2632 2633 STATIC I32 2634 S_sublex_done(pTHX) 2635 { 2636 if (!PL_lex_starts++) { 2637 SV * const sv = newSVpvs(""); 2638 if (SvUTF8(PL_linestr)) 2639 SvUTF8_on(sv); 2640 PL_expect = XOPERATOR; 2641 pl_yylval.opval = newSVOP(OP_CONST, 0, sv); 2642 return THING; 2643 } 2644 2645 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */ 2646 PL_lex_state = LEX_INTERPCASEMOD; 2647 return yylex(); 2648 } 2649 2650 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */ 2651 assert(PL_lex_inwhat != OP_TRANSR); 2652 if (PL_lex_repl) { 2653 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS); 2654 PL_linestr = PL_lex_repl; 2655 PL_lex_inpat = 0; 2656 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); 2657 PL_bufend += SvCUR(PL_linestr); 2658 PL_last_lop = PL_last_uni = NULL; 2659 PL_lex_dojoin = FALSE; 2660 PL_lex_brackets = 0; 2661 PL_lex_allbrackets = 0; 2662 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2663 PL_lex_casemods = 0; 2664 *PL_lex_casestack = '\0'; 2665 PL_lex_starts = 0; 2666 if (SvEVALED(PL_lex_repl)) { 2667 PL_lex_state = LEX_INTERPNORMAL; 2668 PL_lex_starts++; 2669 /* we don't clear PL_lex_repl here, so that we can check later 2670 whether this is an evalled subst; that means we rely on the 2671 logic to ensure sublex_done() is called again only via the 2672 branch (in yylex()) that clears PL_lex_repl, else we'll loop */ 2673 } 2674 else { 2675 PL_lex_state = LEX_INTERPCONCAT; 2676 PL_lex_repl = NULL; 2677 } 2678 if (SvTYPE(PL_linestr) >= SVt_PVNV) { 2679 CopLINE(PL_curcop) += 2680 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines 2681 + PL_parser->herelines; 2682 PL_parser->herelines = 0; 2683 } 2684 return PERLY_SLASH; 2685 } 2686 else { 2687 const line_t l = CopLINE(PL_curcop); 2688 LEAVE; 2689 if (PL_parser->sub_error_count != PL_error_count) { 2690 if (PL_parser->sub_no_recover) { 2691 yyquit(); 2692 NOT_REACHED; 2693 } 2694 } 2695 if (PL_multi_close == '<') 2696 PL_parser->herelines += l - PL_multi_end; 2697 PL_bufend = SvPVX(PL_linestr); 2698 PL_bufend += SvCUR(PL_linestr); 2699 PL_expect = XOPERATOR; 2700 return SUBLEXEND; 2701 } 2702 } 2703 2704 HV * 2705 Perl_load_charnames(pTHX_ SV * char_name, const char * context, 2706 const STRLEN context_len, const char ** error_msg) 2707 { 2708 /* Load the official _charnames module if not already there. The 2709 * parameters are just to give info for any error messages generated: 2710 * char_name a name to look up which is the reason for loading this 2711 * context 'char_name' in the context in the input in which it appears 2712 * context_len how many bytes 'context' occupies 2713 * error_msg *error_msg will be set to any error 2714 * 2715 * Returns the ^H table if success; otherwise NULL */ 2716 2717 unsigned int i; 2718 HV * table; 2719 SV **cvp; 2720 SV * res; 2721 2722 PERL_ARGS_ASSERT_LOAD_CHARNAMES; 2723 2724 /* This loop is executed 1 1/2 times. On the first time through, if it 2725 * isn't already loaded, try loading it, and iterate just once to see if it 2726 * worked. */ 2727 for (i = 0; i < 2; i++) { 2728 table = GvHV(PL_hintgv); /* ^H */ 2729 2730 if ( table 2731 && (PL_hints & HINT_LOCALIZE_HH) 2732 && (cvp = hv_fetchs(table, "charnames", FALSE)) 2733 && SvOK(*cvp)) 2734 { 2735 return table; /* Quit if already loaded */ 2736 } 2737 2738 if (i == 0) { 2739 Perl_load_module(aTHX_ 2740 0, 2741 newSVpvs("_charnames"), 2742 2743 /* version parameter; no need to specify it, as if we get too early 2744 * a version, will fail anyway, not being able to find 'charnames' 2745 * */ 2746 NULL, 2747 newSVpvs(":full"), 2748 newSVpvs(":short"), 2749 NULL); 2750 } 2751 } 2752 2753 /* Here, it failed; new_constant will give appropriate error messages */ 2754 *error_msg = NULL; 2755 res = new_constant( NULL, 0, "charnames", char_name, NULL, 2756 context, context_len, error_msg); 2757 SvREFCNT_dec(res); 2758 2759 return NULL; 2760 } 2761 2762 STATIC SV* 2763 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e) 2764 { 2765 /* This justs wraps get_and_check_backslash_N_name() to output any error 2766 * message it returns. */ 2767 2768 const char * error_msg = NULL; 2769 SV * result; 2770 2771 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER; 2772 2773 /* charnames doesn't work well if there have been errors found */ 2774 if (PL_error_count > 0) { 2775 return NULL; 2776 } 2777 2778 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg); 2779 2780 if (error_msg) { 2781 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0); 2782 } 2783 2784 return result; 2785 } 2786 2787 SV* 2788 Perl_get_and_check_backslash_N_name(pTHX_ const char* s, 2789 const char* e, 2790 const bool is_utf8, 2791 const char ** error_msg) 2792 { 2793 /* <s> points to first character of interior of \N{}, <e> to one beyond the 2794 * interior, hence to the "}". Finds what the name resolves to, returning 2795 * an SV* containing it; NULL if no valid one found. 2796 * 2797 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it 2798 * doesn't have to be. */ 2799 2800 SV* char_name; 2801 SV* res; 2802 HV * table; 2803 SV **cvp; 2804 SV *cv; 2805 SV *rv; 2806 HV *stash; 2807 2808 /* Points to the beginning of the \N{... so that any messages include the 2809 * context of what's failing*/ 2810 const char* context = s - 3; 2811 STRLEN context_len = e - context + 1; /* include all of \N{...} */ 2812 2813 2814 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; 2815 2816 assert(e >= s); 2817 assert(s > (char *) 3); 2818 2819 while (s < e && isBLANK(*s)) { 2820 s++; 2821 } 2822 2823 while (s < e && isBLANK(*(e - 1))) { 2824 e--; 2825 } 2826 2827 char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0); 2828 2829 if (!SvCUR(char_name)) { 2830 SvREFCNT_dec_NN(char_name); 2831 /* diag_listed_as: Unknown charname '%s' */ 2832 *error_msg = Perl_form(aTHX_ "Unknown charname ''"); 2833 return NULL; 2834 } 2835 2836 /* Autoload the charnames module */ 2837 2838 table = load_charnames(char_name, context, context_len, error_msg); 2839 if (table == NULL) { 2840 return NULL; 2841 } 2842 2843 *error_msg = NULL; 2844 res = new_constant( NULL, 0, "charnames", char_name, NULL, 2845 context, context_len, error_msg); 2846 if (*error_msg) { 2847 *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name)); 2848 2849 SvREFCNT_dec(res); 2850 return NULL; 2851 } 2852 2853 /* See if the charnames handler is the Perl core's, and if so, we can skip 2854 * the validation needed for a user-supplied one, as Perl's does its own 2855 * validation. */ 2856 cvp = hv_fetchs(table, "charnames", FALSE); 2857 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv), 2858 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL)) 2859 { 2860 const char * const name = HvNAME(stash); 2861 if (memEQs(name, HvNAMELEN(stash), "_charnames")) { 2862 return res; 2863 } 2864 } 2865 2866 /* Here, it isn't Perl's charname handler. We can't rely on a 2867 * user-supplied handler to validate the input name. For non-ut8 input, 2868 * look to see that the first character is legal. Then loop through the 2869 * rest checking that each is a continuation */ 2870 2871 /* This code makes the reasonable assumption that the only Latin1-range 2872 * characters that begin a character name alias are alphabetic, otherwise 2873 * would have to create a isCHARNAME_BEGIN macro */ 2874 2875 if (! is_utf8) { 2876 if (! isALPHAU(*s)) { 2877 goto bad_charname; 2878 } 2879 s++; 2880 while (s < e) { 2881 if (! isCHARNAME_CONT(*s)) { 2882 goto bad_charname; 2883 } 2884 if (*s == ' ' && *(s-1) == ' ') { 2885 goto multi_spaces; 2886 } 2887 s++; 2888 } 2889 } 2890 else { 2891 /* Similarly for utf8. For invariants can check directly; for other 2892 * Latin1, can calculate their code point and check; otherwise use an 2893 * inversion list */ 2894 if (UTF8_IS_INVARIANT(*s)) { 2895 if (! isALPHAU(*s)) { 2896 goto bad_charname; 2897 } 2898 s++; 2899 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2900 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) { 2901 goto bad_charname; 2902 } 2903 s += 2; 2904 } 2905 else { 2906 if (! _invlist_contains_cp(PL_utf8_charname_begin, 2907 utf8_to_uvchr_buf((U8 *) s, 2908 (U8 *) e, 2909 NULL))) 2910 { 2911 goto bad_charname; 2912 } 2913 s += UTF8SKIP(s); 2914 } 2915 2916 while (s < e) { 2917 if (UTF8_IS_INVARIANT(*s)) { 2918 if (! isCHARNAME_CONT(*s)) { 2919 goto bad_charname; 2920 } 2921 if (*s == ' ' && *(s-1) == ' ') { 2922 goto multi_spaces; 2923 } 2924 s++; 2925 } 2926 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2927 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) 2928 { 2929 goto bad_charname; 2930 } 2931 s += 2; 2932 } 2933 else { 2934 if (! _invlist_contains_cp(PL_utf8_charname_continue, 2935 utf8_to_uvchr_buf((U8 *) s, 2936 (U8 *) e, 2937 NULL))) 2938 { 2939 goto bad_charname; 2940 } 2941 s += UTF8SKIP(s); 2942 } 2943 } 2944 } 2945 if (*(s-1) == ' ') { 2946 /* diag_listed_as: charnames alias definitions may not contain 2947 trailing white-space; marked by <-- HERE in %s 2948 */ 2949 *error_msg = Perl_form(aTHX_ 2950 "charnames alias definitions may not contain trailing " 2951 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s", 2952 (int)(s - context + 1), context, 2953 (int)(e - s + 1), s + 1); 2954 return NULL; 2955 } 2956 2957 if (SvUTF8(res)) { /* Don't accept malformed charname value */ 2958 const U8* first_bad_char_loc; 2959 STRLEN len; 2960 const char* const str = SvPV_const(res, len); 2961 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len, 2962 &first_bad_char_loc))) 2963 { 2964 _force_out_malformed_utf8_message(first_bad_char_loc, 2965 (U8 *) PL_parser->bufend, 2966 0, 2967 0 /* 0 means don't die */ ); 2968 /* diag_listed_as: Malformed UTF-8 returned by \N{%s} 2969 immediately after '%s' */ 2970 *error_msg = Perl_form(aTHX_ 2971 "Malformed UTF-8 returned by %.*s immediately after '%.*s'", 2972 (int) context_len, context, 2973 (int) ((char *) first_bad_char_loc - str), str); 2974 return NULL; 2975 } 2976 } 2977 2978 return res; 2979 2980 bad_charname: { 2981 2982 /* The final %.*s makes sure that should the trailing NUL be missing 2983 * that this print won't run off the end of the string */ 2984 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE 2985 in \N{%s} */ 2986 *error_msg = Perl_form(aTHX_ 2987 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s", 2988 (int)(s - context + 1), context, 2989 (int)(e - s + 1), s + 1); 2990 return NULL; 2991 } 2992 2993 multi_spaces: 2994 /* diag_listed_as: charnames alias definitions may not contain a 2995 sequence of multiple spaces; marked by <-- HERE 2996 in %s */ 2997 *error_msg = Perl_form(aTHX_ 2998 "charnames alias definitions may not contain a sequence of " 2999 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s", 3000 (int)(s - context + 1), context, 3001 (int)(e - s + 1), s + 1); 3002 return NULL; 3003 } 3004 3005 /* 3006 scan_const 3007 3008 Extracts the next constant part of a pattern, double-quoted string, 3009 or transliteration. This is terrifying code. 3010 3011 For example, in parsing the double-quoted string "ab\x63$d", it would 3012 stop at the '$' and return an OP_CONST containing 'abc'. 3013 3014 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's 3015 processing a pattern (PL_lex_inpat is true), a transliteration 3016 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string. 3017 3018 Returns a pointer to the character scanned up to. If this is 3019 advanced from the start pointer supplied (i.e. if anything was 3020 successfully parsed), will leave an OP_CONST for the substring scanned 3021 in pl_yylval. Caller must intuit reason for not parsing further 3022 by looking at the next characters herself. 3023 3024 In patterns: 3025 expand: 3026 \N{FOO} => \N{U+hex_for_character_FOO} 3027 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...}) 3028 3029 pass through: 3030 all other \-char, including \N and \N{ apart from \N{ABC} 3031 3032 stops on: 3033 @ and $ where it appears to be a var, but not for $ as tail anchor 3034 \l \L \u \U \Q \E 3035 (?{ or (??{ 3036 3037 In transliterations: 3038 characters are VERY literal, except for - not at the start or end 3039 of the string, which indicates a range. However some backslash sequences 3040 are recognized: \r, \n, and the like 3041 \007 \o{}, \x{}, \N{} 3042 If all elements in the transliteration are below 256, 3043 scan_const expands the range to the full set of intermediate 3044 characters. If the range is in utf8, the hyphen is replaced with 3045 a certain range mark which will be handled by pmtrans() in op.c. 3046 3047 In double-quoted strings: 3048 backslashes: 3049 all those recognized in transliterations 3050 deprecated backrefs: \1 (in substitution replacements) 3051 case and quoting: \U \Q \E 3052 stops on @ and $ 3053 3054 scan_const does *not* construct ops to handle interpolated strings. 3055 It stops processing as soon as it finds an embedded $ or @ variable 3056 and leaves it to the caller to work out what's going on. 3057 3058 embedded arrays (whether in pattern or not) could be: 3059 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-. 3060 3061 $ in double-quoted strings must be the symbol of an embedded scalar. 3062 3063 $ in pattern could be $foo or could be tail anchor. Assumption: 3064 it's a tail anchor if $ is the last thing in the string, or if it's 3065 followed by one of "()| \r\n\t" 3066 3067 \1 (backreferences) are turned into $1 in substitutions 3068 3069 The structure of the code is 3070 while (there's a character to process) { 3071 handle transliteration ranges 3072 skip regexp comments /(?#comment)/ and codes /(?{code})/ 3073 skip #-initiated comments in //x patterns 3074 check for embedded arrays 3075 check for embedded scalars 3076 if (backslash) { 3077 deprecate \1 in substitution replacements 3078 handle string-changing backslashes \l \U \Q \E, etc. 3079 switch (what was escaped) { 3080 handle \- in a transliteration (becomes a literal -) 3081 if a pattern and not \N{, go treat as regular character 3082 handle \132 (octal characters) 3083 handle \x15 and \x{1234} (hex characters) 3084 handle \N{name} (named characters, also \N{3,5} in a pattern) 3085 handle \cV (control characters) 3086 handle printf-style backslashes (\f, \r, \n, etc) 3087 } (end switch) 3088 continue 3089 } (end if backslash) 3090 handle regular character 3091 } (end while character to read) 3092 3093 */ 3094 3095 STATIC char * 3096 S_scan_const(pTHX_ char *start) 3097 { 3098 const char * const send = PL_bufend;/* end of the constant */ 3099 SV *sv = newSV(send - start); /* sv for the constant. See note below 3100 on sizing. */ 3101 char *s = start; /* start of the constant */ 3102 char *d = SvPVX(sv); /* destination for copies */ 3103 bool dorange = FALSE; /* are we in a translit range? */ 3104 bool didrange = FALSE; /* did we just finish a range? */ 3105 bool in_charclass = FALSE; /* within /[...]/ */ 3106 const bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be 3107 UTF8? But, this can show as true 3108 when the source isn't utf8, as for 3109 example when it is entirely composed 3110 of hex constants */ 3111 bool d_is_utf8 = FALSE; /* Output constant is UTF8 */ 3112 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the 3113 number of characters found so far 3114 that will expand (into 2 bytes) 3115 should we have to convert to 3116 UTF-8) */ 3117 SV *res; /* result from charnames */ 3118 STRLEN offset_to_max = 0; /* The offset in the output to where the range 3119 high-end character is temporarily placed */ 3120 3121 /* Does something require special handling in tr/// ? This avoids extra 3122 * work in a less likely case. As such, khw didn't feel it was worth 3123 * adding any branches to the more mainline code to handle this, which 3124 * means that this doesn't get set in some circumstances when things like 3125 * \x{100} get expanded out. As a result there needs to be extra testing 3126 * done in the tr code */ 3127 bool has_above_latin1 = FALSE; 3128 3129 /* Note on sizing: The scanned constant is placed into sv, which is 3130 * initialized by newSV() assuming one byte of output for every byte of 3131 * input. This routine expects newSV() to allocate an extra byte for a 3132 * trailing NUL, which this routine will append if it gets to the end of 3133 * the input. There may be more bytes of input than output (eg., \N{LATIN 3134 * CAPITAL LETTER A}), or more output than input if the constant ends up 3135 * recoded to utf8, but each time a construct is found that might increase 3136 * the needed size, SvGROW() is called. Its size parameter each time is 3137 * based on the best guess estimate at the time, namely the length used so 3138 * far, plus the length the current construct will occupy, plus room for 3139 * the trailing NUL, plus one byte for every input byte still unscanned */ 3140 3141 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses 3142 before set */ 3143 #ifdef EBCDIC 3144 int backslash_N = 0; /* ? was the character from \N{} */ 3145 int non_portable_endpoint = 0; /* ? In a range is an endpoint 3146 platform-specific like \x65 */ 3147 #endif 3148 3149 PERL_ARGS_ASSERT_SCAN_CONST; 3150 3151 assert(PL_lex_inwhat != OP_TRANSR); 3152 3153 /* Protect sv from errors and fatal warnings. */ 3154 ENTER_with_name("scan_const"); 3155 SAVEFREESV(sv); 3156 3157 /* A bunch of code in the loop below assumes that if s[n] exists and is not 3158 * NUL, then s[n+1] exists. This assertion makes sure that assumption is 3159 * valid */ 3160 assert(*send == '\0'); 3161 3162 while (s < send 3163 || dorange /* Handle tr/// range at right edge of input */ 3164 ) { 3165 3166 /* get transliterations out of the way (they're most literal) */ 3167 if (PL_lex_inwhat == OP_TRANS) { 3168 3169 /* But there isn't any special handling necessary unless there is a 3170 * range, so for most cases we just drop down and handle the value 3171 * as any other. There are two exceptions. 3172 * 3173 * 1. A hyphen indicates that we are actually going to have a 3174 * range. In this case, skip the '-', set a flag, then drop 3175 * down to handle what should be the end range value. 3176 * 2. After we've handled that value, the next time through, that 3177 * flag is set and we fix up the range. 3178 * 3179 * Ranges entirely within Latin1 are expanded out entirely, in 3180 * order to make the transliteration a simple table look-up. 3181 * Ranges that extend above Latin1 have to be done differently, so 3182 * there is no advantage to expanding them here, so they are 3183 * stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is 3184 * a byte that can't occur in legal UTF-8, and hence can signify a 3185 * hyphen without any possible ambiguity. On EBCDIC machines, if 3186 * the range is expressed as Unicode, the Latin1 portion is 3187 * expanded out even if the range extends above Latin1. This is 3188 * because each code point in it has to be processed here 3189 * individually to get its native translation */ 3190 3191 if (! dorange) { 3192 3193 /* Here, we don't think we're in a range. If the new character 3194 * is not a hyphen; or if it is a hyphen, but it's too close to 3195 * either edge to indicate a range, or if we haven't output any 3196 * characters yet then it's a regular character. */ 3197 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv)) 3198 { 3199 3200 /* A regular character. Process like any other, but first 3201 * clear any flags */ 3202 didrange = FALSE; 3203 dorange = FALSE; 3204 #ifdef EBCDIC 3205 non_portable_endpoint = 0; 3206 backslash_N = 0; 3207 #endif 3208 /* The tests here for being above Latin1 and similar ones 3209 * in the following 'else' suffice to find all such 3210 * occurences in the constant, except those added by a 3211 * backslash escape sequence, like \x{100}. Mostly, those 3212 * set 'has_above_latin1' as appropriate */ 3213 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) { 3214 has_above_latin1 = TRUE; 3215 } 3216 3217 /* Drops down to generic code to process current byte */ 3218 } 3219 else { /* Is a '-' in the context where it means a range */ 3220 if (didrange) { /* Something like y/A-C-Z// */ 3221 Perl_croak(aTHX_ "Ambiguous range in transliteration" 3222 " operator"); 3223 } 3224 3225 dorange = TRUE; 3226 3227 s++; /* Skip past the hyphen */ 3228 3229 /* d now points to where the end-range character will be 3230 * placed. Drop down to get that character. We'll finish 3231 * processing the range the next time through the loop */ 3232 3233 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) { 3234 has_above_latin1 = TRUE; 3235 } 3236 3237 /* Drops down to generic code to process current byte */ 3238 } 3239 } /* End of not a range */ 3240 else { 3241 /* Here we have parsed a range. Now must handle it. At this 3242 * point: 3243 * 'sv' is a SV* that contains the output string we are 3244 * constructing. The final two characters in that string 3245 * are the range start and range end, in order. 3246 * 'd' points to just beyond the range end in the 'sv' string, 3247 * where we would next place something 3248 */ 3249 char * max_ptr; 3250 char * min_ptr; 3251 IV range_min; 3252 IV range_max; /* last character in range */ 3253 STRLEN grow; 3254 Size_t offset_to_min = 0; 3255 Size_t extras = 0; 3256 #ifdef EBCDIC 3257 bool convert_unicode; 3258 IV real_range_max = 0; 3259 #endif 3260 /* Get the code point values of the range ends. */ 3261 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1; 3262 offset_to_max = max_ptr - SvPVX_const(sv); 3263 if (d_is_utf8) { 3264 /* We know the utf8 is valid, because we just constructed 3265 * it ourselves in previous loop iterations */ 3266 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1); 3267 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL); 3268 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL); 3269 3270 /* This compensates for not all code setting 3271 * 'has_above_latin1', so that we don't skip stuff that 3272 * should be executed */ 3273 if (range_max > 255) { 3274 has_above_latin1 = TRUE; 3275 } 3276 } 3277 else { 3278 min_ptr = max_ptr - 1; 3279 range_min = * (U8*) min_ptr; 3280 range_max = * (U8*) max_ptr; 3281 } 3282 3283 /* If the range is just a single code point, like tr/a-a/.../, 3284 * that code point is already in the output, twice. We can 3285 * just back up over the second instance and avoid all the rest 3286 * of the work. But if it is a variant character, it's been 3287 * counted twice, so decrement. (This unlikely scenario is 3288 * special cased, like the one for a range of 2 code points 3289 * below, only because the main-line code below needs a range 3290 * of 3 or more to work without special casing. Might as well 3291 * get it out of the way now.) */ 3292 if (UNLIKELY(range_max == range_min)) { 3293 d = max_ptr; 3294 if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) { 3295 utf8_variant_count--; 3296 } 3297 goto range_done; 3298 } 3299 3300 #ifdef EBCDIC 3301 /* On EBCDIC platforms, we may have to deal with portable 3302 * ranges. These happen if at least one range endpoint is a 3303 * Unicode value (\N{...}), or if the range is a subset of 3304 * [A-Z] or [a-z], and both ends are literal characters, 3305 * like 'A', and not like \x{C1} */ 3306 convert_unicode = 3307 cBOOL(backslash_N) /* \N{} forces Unicode, 3308 hence portable range */ 3309 || ( ! non_portable_endpoint 3310 && (( isLOWER_A(range_min) && isLOWER_A(range_max)) 3311 || (isUPPER_A(range_min) && isUPPER_A(range_max)))); 3312 if (convert_unicode) { 3313 3314 /* Special handling is needed for these portable ranges. 3315 * They are defined to be in Unicode terms, which includes 3316 * all the Unicode code points between the end points. 3317 * Convert to Unicode to get the Unicode range. Later we 3318 * will convert each code point in the range back to 3319 * native. */ 3320 range_min = NATIVE_TO_UNI(range_min); 3321 range_max = NATIVE_TO_UNI(range_max); 3322 } 3323 #endif 3324 3325 if (range_min > range_max) { 3326 #ifdef EBCDIC 3327 if (convert_unicode) { 3328 /* Need to convert back to native for meaningful 3329 * messages for this platform */ 3330 range_min = UNI_TO_NATIVE(range_min); 3331 range_max = UNI_TO_NATIVE(range_max); 3332 } 3333 #endif 3334 /* Use the characters themselves for the error message if 3335 * ASCII printables; otherwise some visible representation 3336 * of them */ 3337 if (isPRINT_A(range_min) && isPRINT_A(range_max)) { 3338 Perl_croak(aTHX_ 3339 "Invalid range \"%c-%c\" in transliteration operator", 3340 (char)range_min, (char)range_max); 3341 } 3342 #ifdef EBCDIC 3343 else if (convert_unicode) { 3344 /* diag_listed_as: Invalid range "%s" in transliteration operator */ 3345 Perl_croak(aTHX_ 3346 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" 3347 UVXf "}\" in transliteration operator", 3348 range_min, range_max); 3349 } 3350 #endif 3351 else { 3352 /* diag_listed_as: Invalid range "%s" in transliteration operator */ 3353 Perl_croak(aTHX_ 3354 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\"" 3355 " in transliteration operator", 3356 range_min, range_max); 3357 } 3358 } 3359 3360 /* If the range is exactly two code points long, they are 3361 * already both in the output */ 3362 if (UNLIKELY(range_min + 1 == range_max)) { 3363 goto range_done; 3364 } 3365 3366 /* Here the range contains at least 3 code points */ 3367 3368 if (d_is_utf8) { 3369 3370 /* If everything in the transliteration is below 256, we 3371 * can avoid special handling later. A translation table 3372 * for each of those bytes is created by op.c. So we 3373 * expand out all ranges to their constituent code points. 3374 * But if we've encountered something above 255, the 3375 * expanding won't help, so skip doing that. But if it's 3376 * EBCDIC, we may have to look at each character below 256 3377 * if we have to convert to/from Unicode values */ 3378 if ( has_above_latin1 3379 #ifdef EBCDIC 3380 && (range_min > 255 || ! convert_unicode) 3381 #endif 3382 ) { 3383 const STRLEN off = d - SvPVX(sv); 3384 const STRLEN extra = 1 + (send - s) + 1; 3385 char *e; 3386 3387 /* Move the high character one byte to the right; then 3388 * insert between it and the range begin, an illegal 3389 * byte which serves to indicate this is a range (using 3390 * a '-' would be ambiguous). */ 3391 3392 if (off + extra > SvLEN(sv)) { 3393 d = off + SvGROW(sv, off + extra); 3394 max_ptr = d - off + offset_to_max; 3395 } 3396 3397 e = d++; 3398 while (e-- > max_ptr) { 3399 *(e + 1) = *e; 3400 } 3401 *(e + 1) = (char) RANGE_INDICATOR; 3402 goto range_done; 3403 } 3404 3405 /* Here, we're going to expand out the range. For EBCDIC 3406 * the range can extend above 255 (not so in ASCII), so 3407 * for EBCDIC, split it into the parts above and below 3408 * 255/256 */ 3409 #ifdef EBCDIC 3410 if (range_max > 255) { 3411 real_range_max = range_max; 3412 range_max = 255; 3413 } 3414 #endif 3415 } 3416 3417 /* Here we need to expand out the string to contain each 3418 * character in the range. Grow the output to handle this. 3419 * For non-UTF8, we need a byte for each code point in the 3420 * range, minus the three that we've already allocated for: the 3421 * hyphen, the min, and the max. For UTF-8, we need this 3422 * plus an extra byte for each code point that occupies two 3423 * bytes (is variant) when in UTF-8 (except we've already 3424 * allocated for the end points, including if they are 3425 * variants). For ASCII platforms and Unicode ranges on EBCDIC 3426 * platforms, it's easy to calculate a precise number. To 3427 * start, we count the variants in the range, which we need 3428 * elsewhere in this function anyway. (For the case where it 3429 * isn't easy to calculate, 'extras' has been initialized to 0, 3430 * and the calculation is done in a loop further down.) */ 3431 #ifdef EBCDIC 3432 if (convert_unicode) 3433 #endif 3434 { 3435 /* This is executed unconditionally on ASCII, and for 3436 * Unicode ranges on EBCDIC. Under these conditions, all 3437 * code points above a certain value are variant; and none 3438 * under that value are. We just need to find out how much 3439 * of the range is above that value. We don't count the 3440 * end points here, as they will already have been counted 3441 * as they were parsed. */ 3442 if (range_min >= UTF_CONTINUATION_MARK) { 3443 3444 /* The whole range is made up of variants */ 3445 extras = (range_max - 1) - (range_min + 1) + 1; 3446 } 3447 else if (range_max >= UTF_CONTINUATION_MARK) { 3448 3449 /* Only the higher portion of the range is variants */ 3450 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1; 3451 } 3452 3453 utf8_variant_count += extras; 3454 } 3455 3456 /* The base growth is the number of code points in the range, 3457 * not including the endpoints, which have already been sized 3458 * for (and output). We don't subtract for the hyphen, as it 3459 * has been parsed but not output, and the SvGROW below is 3460 * based only on what's been output plus what's left to parse. 3461 * */ 3462 grow = (range_max - 1) - (range_min + 1) + 1; 3463 3464 if (d_is_utf8) { 3465 #ifdef EBCDIC 3466 /* In some cases in EBCDIC, we haven't yet calculated a 3467 * precise amount needed for the UTF-8 variants. Just 3468 * assume the worst case, that everything will expand by a 3469 * byte */ 3470 if (! convert_unicode) { 3471 grow *= 2; 3472 } 3473 else 3474 #endif 3475 { 3476 /* Otherwise we know exactly how many variants there 3477 * are in the range. */ 3478 grow += extras; 3479 } 3480 } 3481 3482 /* Grow, but position the output to overwrite the range min end 3483 * point, because in some cases we overwrite that */ 3484 SvCUR_set(sv, d - SvPVX_const(sv)); 3485 offset_to_min = min_ptr - SvPVX_const(sv); 3486 3487 /* See Note on sizing above. */ 3488 d = offset_to_min + SvGROW(sv, SvCUR(sv) 3489 + (send - s) 3490 + grow 3491 + 1 /* Trailing NUL */ ); 3492 3493 /* Now, we can expand out the range. */ 3494 #ifdef EBCDIC 3495 if (convert_unicode) { 3496 SSize_t i; 3497 3498 /* Recall that the min and max are now in Unicode terms, so 3499 * we have to convert each character to its native 3500 * equivalent */ 3501 if (d_is_utf8) { 3502 for (i = range_min; i <= range_max; i++) { 3503 append_utf8_from_native_byte( 3504 LATIN1_TO_NATIVE((U8) i), 3505 (U8 **) &d); 3506 } 3507 } 3508 else { 3509 for (i = range_min; i <= range_max; i++) { 3510 *d++ = (char)LATIN1_TO_NATIVE((U8) i); 3511 } 3512 } 3513 } 3514 else 3515 #endif 3516 /* Always gets run for ASCII, and sometimes for EBCDIC. */ 3517 { 3518 /* Here, no conversions are necessary, which means that the 3519 * first character in the range is already in 'd' and 3520 * valid, so we can skip overwriting it */ 3521 if (d_is_utf8) { 3522 SSize_t i; 3523 d += UTF8SKIP(d); 3524 for (i = range_min + 1; i <= range_max; i++) { 3525 append_utf8_from_native_byte((U8) i, (U8 **) &d); 3526 } 3527 } 3528 else { 3529 SSize_t i; 3530 d++; 3531 assert(range_min + 1 <= range_max); 3532 for (i = range_min + 1; i < range_max; i++) { 3533 #ifdef EBCDIC 3534 /* In this case on EBCDIC, we haven't calculated 3535 * the variants. Do it here, as we go along */ 3536 if (! UVCHR_IS_INVARIANT(i)) { 3537 utf8_variant_count++; 3538 } 3539 #endif 3540 *d++ = (char)i; 3541 } 3542 3543 /* The range_max is done outside the loop so as to 3544 * avoid having to special case not incrementing 3545 * 'utf8_variant_count' on EBCDIC (it's already been 3546 * counted when originally parsed) */ 3547 *d++ = (char) range_max; 3548 } 3549 } 3550 3551 #ifdef EBCDIC 3552 /* If the original range extended above 255, add in that 3553 * portion. */ 3554 if (real_range_max) { 3555 *d++ = (char) UTF8_TWO_BYTE_HI(0x100); 3556 *d++ = (char) UTF8_TWO_BYTE_LO(0x100); 3557 if (real_range_max > 0x100) { 3558 if (real_range_max > 0x101) { 3559 *d++ = (char) RANGE_INDICATOR; 3560 } 3561 d = (char*)uvchr_to_utf8((U8*)d, real_range_max); 3562 } 3563 } 3564 #endif 3565 3566 range_done: 3567 /* mark the range as done, and continue */ 3568 didrange = TRUE; 3569 dorange = FALSE; 3570 #ifdef EBCDIC 3571 non_portable_endpoint = 0; 3572 backslash_N = 0; 3573 #endif 3574 continue; 3575 } /* End of is a range */ 3576 } /* End of transliteration. Joins main code after these else's */ 3577 else if (*s == '[' && PL_lex_inpat && !in_charclass) { 3578 char *s1 = s-1; 3579 int esc = 0; 3580 while (s1 >= start && *s1-- == '\\') 3581 esc = !esc; 3582 if (!esc) 3583 in_charclass = TRUE; 3584 } 3585 else if (*s == ']' && PL_lex_inpat && in_charclass) { 3586 char *s1 = s-1; 3587 int esc = 0; 3588 while (s1 >= start && *s1-- == '\\') 3589 esc = !esc; 3590 if (!esc) 3591 in_charclass = FALSE; 3592 } 3593 /* skip for regexp comments /(?#comment)/, except for the last 3594 * char, which will be done separately. Stop on (?{..}) and 3595 * friends */ 3596 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { 3597 if (s[2] == '#') { 3598 if (s_is_utf8) { 3599 PERL_UINT_FAST8_T len = UTF8SKIP(s); 3600 3601 while (s + len < send && *s != ')') { 3602 Copy(s, d, len, U8); 3603 d += len; 3604 s += len; 3605 len = UTF8_SAFE_SKIP(s, send); 3606 } 3607 } 3608 else while (s+1 < send && *s != ')') { 3609 *d++ = *s++; 3610 } 3611 } 3612 else if (!PL_lex_casemods 3613 && ( s[2] == '{' /* This should match regcomp.c */ 3614 || (s[2] == '?' && s[3] == '{'))) 3615 { 3616 break; 3617 } 3618 } 3619 /* likewise skip #-initiated comments in //x patterns */ 3620 else if (*s == '#' 3621 && PL_lex_inpat 3622 && !in_charclass 3623 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) 3624 { 3625 while (s < send && *s != '\n') 3626 *d++ = *s++; 3627 } 3628 /* no further processing of single-quoted regex */ 3629 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') 3630 goto default_action; 3631 3632 /* check for embedded arrays 3633 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) 3634 */ 3635 else if (*s == '@' && s[1]) { 3636 if (UTF 3637 ? isIDFIRST_utf8_safe(s+1, send) 3638 : isWORDCHAR_A(s[1])) 3639 { 3640 break; 3641 } 3642 if (memCHRs(":'{$", s[1])) 3643 break; 3644 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) 3645 break; /* in regexp, neither @+ nor @- are interpolated */ 3646 } 3647 /* check for embedded scalars. only stop if we're sure it's a 3648 * variable. */ 3649 else if (*s == '$') { 3650 if (!PL_lex_inpat) /* not a regexp, so $ must be var */ 3651 break; 3652 if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) { 3653 if (s[1] == '\\') { 3654 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 3655 "Possible unintended interpolation of $\\ in regex"); 3656 } 3657 break; /* in regexp, $ might be tail anchor */ 3658 } 3659 } 3660 3661 /* End of else if chain - OP_TRANS rejoin rest */ 3662 3663 if (UNLIKELY(s >= send)) { 3664 assert(s == send); 3665 break; 3666 } 3667 3668 /* backslashes */ 3669 if (*s == '\\' && s+1 < send) { 3670 char* bslash = s; /* point to beginning \ */ 3671 char* rbrace; /* point to ending '}' */ 3672 char* e; /* 1 past the meat (non-blanks) before the 3673 brace */ 3674 s++; 3675 3676 /* warn on \1 - \9 in substitution replacements, but note that \11 3677 * is an octal; and \19 is \1 followed by '9' */ 3678 if (PL_lex_inwhat == OP_SUBST 3679 && !PL_lex_inpat 3680 && isDIGIT(*s) 3681 && *s != '0' 3682 && !isDIGIT(s[1])) 3683 { 3684 /* diag_listed_as: \%d better written as $%d */ 3685 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); 3686 s = bslash; 3687 *s = '$'; 3688 break; 3689 } 3690 3691 /* string-change backslash escapes */ 3692 if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) { 3693 s = bslash; 3694 break; 3695 } 3696 /* In a pattern, process \N, but skip any other backslash escapes. 3697 * This is because we don't want to translate an escape sequence 3698 * into a meta symbol and have the regex compiler use the meta 3699 * symbol meaning, e.g. \x{2E} would be confused with a dot. But 3700 * in spite of this, we do have to process \N here while the proper 3701 * charnames handler is in scope. See bugs #56444 and #62056. 3702 * 3703 * There is a complication because \N in a pattern may also stand 3704 * for 'match a non-nl', and not mean a charname, in which case its 3705 * processing should be deferred to the regex compiler. To be a 3706 * charname it must be followed immediately by a '{', and not look 3707 * like \N followed by a curly quantifier, i.e., not something like 3708 * \N{3,}. regcurly returns a boolean indicating if it is a legal 3709 * quantifier */ 3710 else if (PL_lex_inpat 3711 && (*s != 'N' 3712 || s[1] != '{' 3713 || regcurly(s + 1, send, NULL))) 3714 { 3715 *d++ = '\\'; 3716 goto default_action; 3717 } 3718 3719 switch (*s) { 3720 default: 3721 { 3722 if ((isALPHANUMERIC(*s))) 3723 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 3724 "Unrecognized escape \\%c passed through", 3725 *s); 3726 /* default action is to copy the quoted character */ 3727 goto default_action; 3728 } 3729 3730 /* eg. \132 indicates the octal constant 0132 */ 3731 case '0': case '1': case '2': case '3': 3732 case '4': case '5': case '6': case '7': 3733 { 3734 I32 flags = PERL_SCAN_SILENT_ILLDIGIT 3735 | PERL_SCAN_NOTIFY_ILLDIGIT; 3736 STRLEN len = 3; 3737 uv = grok_oct(s, &len, &flags, NULL); 3738 s += len; 3739 if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT) 3740 && s < send 3741 && isDIGIT(*s) /* like \08, \178 */ 3742 && ckWARN(WARN_MISC)) 3743 { 3744 Perl_warner(aTHX_ packWARN(WARN_MISC), "%s", 3745 form_alien_digit_msg(8, len, s, send, UTF, FALSE)); 3746 } 3747 } 3748 goto NUM_ESCAPE_INSERT; 3749 3750 /* eg. \o{24} indicates the octal constant \024 */ 3751 case 'o': 3752 { 3753 const char* error; 3754 3755 if (! grok_bslash_o(&s, send, 3756 &uv, &error, 3757 NULL, 3758 FALSE, /* Not strict */ 3759 FALSE, /* No illegal cp's */ 3760 UTF)) 3761 { 3762 yyerror(error); 3763 uv = 0; /* drop through to ensure range ends are set */ 3764 } 3765 goto NUM_ESCAPE_INSERT; 3766 } 3767 3768 /* eg. \x24 indicates the hex constant 0x24 */ 3769 case 'x': 3770 { 3771 const char* error; 3772 3773 if (! grok_bslash_x(&s, send, 3774 &uv, &error, 3775 NULL, 3776 FALSE, /* Not strict */ 3777 FALSE, /* No illegal cp's */ 3778 UTF)) 3779 { 3780 yyerror(error); 3781 uv = 0; /* drop through to ensure range ends are set */ 3782 } 3783 } 3784 3785 NUM_ESCAPE_INSERT: 3786 /* Insert oct or hex escaped character. */ 3787 3788 /* Here uv is the ordinal of the next character being added */ 3789 if (UVCHR_IS_INVARIANT(uv)) { 3790 *d++ = (char) uv; 3791 } 3792 else { 3793 if (!d_is_utf8 && uv > 255) { 3794 3795 /* Here, 'uv' won't fit unless we convert to UTF-8. 3796 * If we've only seen invariants so far, all we have to 3797 * do is turn on the flag */ 3798 if (utf8_variant_count == 0) { 3799 SvUTF8_on(sv); 3800 } 3801 else { 3802 SvCUR_set(sv, d - SvPVX_const(sv)); 3803 SvPOK_on(sv); 3804 *d = '\0'; 3805 3806 sv_utf8_upgrade_flags_grow( 3807 sv, 3808 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3809 3810 /* Since we're having to grow here, 3811 * make sure we have enough room for 3812 * this escape and a NUL, so the 3813 * code immediately below won't have 3814 * to actually grow again */ 3815 UVCHR_SKIP(uv) 3816 + (STRLEN)(send - s) + 1); 3817 d = SvPVX(sv) + SvCUR(sv); 3818 } 3819 3820 has_above_latin1 = TRUE; 3821 d_is_utf8 = TRUE; 3822 } 3823 3824 if (! d_is_utf8) { 3825 *d++ = (char)uv; 3826 utf8_variant_count++; 3827 } 3828 else { 3829 /* Usually, there will already be enough room in 'sv' 3830 * since such escapes are likely longer than any UTF-8 3831 * sequence they can end up as. This isn't the case on 3832 * EBCDIC where \x{40000000} contains 12 bytes, and the 3833 * UTF-8 for it contains 14. And, we have to allow for 3834 * a trailing NUL. It probably can't happen on ASCII 3835 * platforms, but be safe. See Note on sizing above. */ 3836 const STRLEN needed = d - SvPVX(sv) 3837 + UVCHR_SKIP(uv) 3838 + (send - s) 3839 + 1; 3840 if (UNLIKELY(needed > SvLEN(sv))) { 3841 SvCUR_set(sv, d - SvPVX_const(sv)); 3842 d = SvCUR(sv) + SvGROW(sv, needed); 3843 } 3844 3845 d = (char*) uvchr_to_utf8_flags((U8*)d, uv, 3846 (ckWARN(WARN_PORTABLE)) 3847 ? UNICODE_WARN_PERL_EXTENDED 3848 : 0); 3849 } 3850 } 3851 #ifdef EBCDIC 3852 non_portable_endpoint++; 3853 #endif 3854 continue; 3855 3856 case 'N': 3857 /* In a non-pattern \N must be like \N{U+0041}, or it can be a 3858 * named character, like \N{LATIN SMALL LETTER A}, or a named 3859 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND 3860 * GRAVE} (except y/// can't handle the latter, croaking). For 3861 * convenience all three forms are referred to as "named 3862 * characters" below. 3863 * 3864 * For patterns, \N also can mean to match a non-newline. Code 3865 * before this 'switch' statement should already have handled 3866 * this situation, and hence this code only has to deal with 3867 * the named character cases. 3868 * 3869 * For non-patterns, the named characters are converted to 3870 * their string equivalents. In patterns, named characters are 3871 * not converted to their ultimate forms for the same reasons 3872 * that other escapes aren't (mainly that the ultimate 3873 * character could be considered a meta-symbol by the regex 3874 * compiler). Instead, they are converted to the \N{U+...} 3875 * form to get the value from the charnames that is in effect 3876 * right now, while preserving the fact that it was a named 3877 * character, so that the regex compiler knows this. 3878 * 3879 * The structure of this section of code (besides checking for 3880 * errors and upgrading to utf8) is: 3881 * If the named character is of the form \N{U+...}, pass it 3882 * through if a pattern; otherwise convert the code point 3883 * to utf8 3884 * Otherwise must be some \N{NAME}: convert to 3885 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8 3886 * 3887 * Transliteration is an exception. The conversion to utf8 is 3888 * only done if the code point requires it to be representable. 3889 * 3890 * Here, 's' points to the 'N'; the test below is guaranteed to 3891 * succeed if we are being called on a pattern, as we already 3892 * know from a test above that the next character is a '{'. A 3893 * non-pattern \N must mean 'named character', which requires 3894 * braces */ 3895 s++; 3896 if (*s != '{') { 3897 yyerror("Missing braces on \\N{}"); 3898 *d++ = '\0'; 3899 continue; 3900 } 3901 s++; 3902 3903 /* If there is no matching '}', it is an error. */ 3904 if (! (rbrace = (char *) memchr(s, '}', send - s))) { 3905 if (! PL_lex_inpat) { 3906 yyerror("Missing right brace on \\N{}"); 3907 } else { 3908 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N"); 3909 } 3910 yyquit(); /* Have exhausted the input. */ 3911 } 3912 3913 /* Here it looks like a named character */ 3914 while (s < rbrace && isBLANK(*s)) { 3915 s++; 3916 } 3917 3918 e = rbrace; 3919 while (s < e && isBLANK(*(e - 1))) { 3920 e--; 3921 } 3922 3923 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */ 3924 s += 2; /* Skip to next char after the 'U+' */ 3925 if (PL_lex_inpat) { 3926 3927 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */ 3928 /* Check the syntax. */ 3929 if (!isXDIGIT(*s)) { 3930 bad_NU: 3931 yyerror( 3932 "Invalid hexadecimal number in \\N{U+...}" 3933 ); 3934 s = rbrace + 1; 3935 *d++ = '\0'; 3936 continue; 3937 } 3938 while (++s < e) { 3939 if (isXDIGIT(*s)) 3940 continue; 3941 else if ((*s == '.' || *s == '_') 3942 && isXDIGIT(s[1])) 3943 continue; 3944 goto bad_NU; 3945 } 3946 3947 /* Pass everything through unchanged. 3948 * +1 is to include the '}' */ 3949 Copy(bslash, d, rbrace - bslash + 1, char); 3950 d += rbrace - bslash + 1; 3951 } 3952 else { /* Not a pattern: convert the hex to string */ 3953 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES 3954 | PERL_SCAN_SILENT_ILLDIGIT 3955 | PERL_SCAN_SILENT_OVERFLOW 3956 | PERL_SCAN_DISALLOW_PREFIX; 3957 STRLEN len = e - s; 3958 3959 uv = grok_hex(s, &len, &flags, NULL); 3960 if (len == 0 || (len != (STRLEN)(e - s))) 3961 goto bad_NU; 3962 3963 if ( uv > MAX_LEGAL_CP 3964 || (flags & PERL_SCAN_GREATER_THAN_UV_MAX)) 3965 { 3966 yyerror(form_cp_too_large_msg(16, s, len, 0)); 3967 uv = 0; /* drop through to ensure range ends are 3968 set */ 3969 } 3970 3971 /* For non-tr///, if the destination is not in utf8, 3972 * unconditionally recode it to be so. This is 3973 * because \N{} implies Unicode semantics, and scalars 3974 * have to be in utf8 to guarantee those semantics. 3975 * tr/// doesn't care about Unicode rules, so no need 3976 * there to upgrade to UTF-8 for small enough code 3977 * points */ 3978 if (! d_is_utf8 && ( uv > 0xFF 3979 || PL_lex_inwhat != OP_TRANS)) 3980 { 3981 /* See Note on sizing above. */ 3982 const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1; 3983 3984 SvCUR_set(sv, d - SvPVX_const(sv)); 3985 SvPOK_on(sv); 3986 *d = '\0'; 3987 3988 if (utf8_variant_count == 0) { 3989 SvUTF8_on(sv); 3990 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra); 3991 } 3992 else { 3993 sv_utf8_upgrade_flags_grow( 3994 sv, 3995 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3996 extra); 3997 d = SvPVX(sv) + SvCUR(sv); 3998 } 3999 4000 d_is_utf8 = TRUE; 4001 has_above_latin1 = TRUE; 4002 } 4003 4004 /* Add the (Unicode) code point to the output. */ 4005 if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) { 4006 *d++ = (char) LATIN1_TO_NATIVE(uv); 4007 } 4008 else { 4009 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 4010 (ckWARN(WARN_PORTABLE)) 4011 ? UNICODE_WARN_PERL_EXTENDED 4012 : 0); 4013 } 4014 } 4015 } 4016 else /* Here is \N{NAME} but not \N{U+...}. */ 4017 if (! (res = get_and_check_backslash_N_name_wrapper(s, e))) 4018 { /* Failed. We should die eventually, but for now use a NUL 4019 to keep parsing */ 4020 *d++ = '\0'; 4021 } 4022 else { /* Successfully evaluated the name */ 4023 STRLEN len; 4024 const char *str = SvPV_const(res, len); 4025 if (PL_lex_inpat) { 4026 4027 if (! len) { /* The name resolved to an empty string */ 4028 const char empty_N[] = "\\N{_}"; 4029 Copy(empty_N, d, sizeof(empty_N) - 1, char); 4030 d += sizeof(empty_N) - 1; 4031 } 4032 else { 4033 /* In order to not lose information for the regex 4034 * compiler, pass the result in the specially made 4035 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are 4036 * the code points in hex of each character 4037 * returned by charnames */ 4038 4039 const char *str_end = str + len; 4040 const STRLEN off = d - SvPVX_const(sv); 4041 4042 if (! SvUTF8(res)) { 4043 /* For the non-UTF-8 case, we can determine the 4044 * exact length needed without having to parse 4045 * through the string. Each character takes up 4046 * 2 hex digits plus either a trailing dot or 4047 * the "}" */ 4048 const char initial_text[] = "\\N{U+"; 4049 const STRLEN initial_len = sizeof(initial_text) 4050 - 1; 4051 d = off + SvGROW(sv, off 4052 + 3 * len 4053 4054 /* +1 for trailing NUL */ 4055 + initial_len + 1 4056 4057 + (STRLEN)(send - rbrace)); 4058 Copy(initial_text, d, initial_len, char); 4059 d += initial_len; 4060 while (str < str_end) { 4061 char hex_string[4]; 4062 int len = 4063 my_snprintf(hex_string, 4064 sizeof(hex_string), 4065 "%02X.", 4066 4067 /* The regex compiler is 4068 * expecting Unicode, not 4069 * native */ 4070 NATIVE_TO_LATIN1(*str)); 4071 PERL_MY_SNPRINTF_POST_GUARD(len, 4072 sizeof(hex_string)); 4073 Copy(hex_string, d, 3, char); 4074 d += 3; 4075 str++; 4076 } 4077 d--; /* Below, we will overwrite the final 4078 dot with a right brace */ 4079 } 4080 else { 4081 STRLEN char_length; /* cur char's byte length */ 4082 4083 /* and the number of bytes after this is 4084 * translated into hex digits */ 4085 STRLEN output_length; 4086 4087 /* 2 hex per byte; 2 chars for '\N'; 2 chars 4088 * for max('U+', '.'); and 1 for NUL */ 4089 char hex_string[2 * UTF8_MAXBYTES + 5]; 4090 4091 /* Get the first character of the result. */ 4092 U32 uv = utf8n_to_uvchr((U8 *) str, 4093 len, 4094 &char_length, 4095 UTF8_ALLOW_ANYUV); 4096 /* Convert first code point to Unicode hex, 4097 * including the boiler plate before it. */ 4098 output_length = 4099 my_snprintf(hex_string, sizeof(hex_string), 4100 "\\N{U+%X", 4101 (unsigned int) NATIVE_TO_UNI(uv)); 4102 4103 /* Make sure there is enough space to hold it */ 4104 d = off + SvGROW(sv, off 4105 + output_length 4106 + (STRLEN)(send - rbrace) 4107 + 2); /* '}' + NUL */ 4108 /* And output it */ 4109 Copy(hex_string, d, output_length, char); 4110 d += output_length; 4111 4112 /* For each subsequent character, append dot and 4113 * its Unicode code point in hex */ 4114 while ((str += char_length) < str_end) { 4115 const STRLEN off = d - SvPVX_const(sv); 4116 U32 uv = utf8n_to_uvchr((U8 *) str, 4117 str_end - str, 4118 &char_length, 4119 UTF8_ALLOW_ANYUV); 4120 output_length = 4121 my_snprintf(hex_string, 4122 sizeof(hex_string), 4123 ".%X", 4124 (unsigned int) NATIVE_TO_UNI(uv)); 4125 4126 d = off + SvGROW(sv, off 4127 + output_length 4128 + (STRLEN)(send - rbrace) 4129 + 2); /* '}' + NUL */ 4130 Copy(hex_string, d, output_length, char); 4131 d += output_length; 4132 } 4133 } 4134 4135 *d++ = '}'; /* Done. Add the trailing brace */ 4136 } 4137 } 4138 else { /* Here, not in a pattern. Convert the name to a 4139 * string. */ 4140 4141 if (PL_lex_inwhat == OP_TRANS) { 4142 str = SvPV_const(res, len); 4143 if (len > ((SvUTF8(res)) 4144 ? UTF8SKIP(str) 4145 : 1U)) 4146 { 4147 yyerror(Perl_form(aTHX_ 4148 "%.*s must not be a named sequence" 4149 " in transliteration operator", 4150 /* +1 to include the "}" */ 4151 (int) (rbrace + 1 - start), start)); 4152 *d++ = '\0'; 4153 goto end_backslash_N; 4154 } 4155 4156 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) { 4157 has_above_latin1 = TRUE; 4158 } 4159 4160 } 4161 else if (! SvUTF8(res)) { 4162 /* Make sure \N{} return is UTF-8. This is because 4163 * \N{} implies Unicode semantics, and scalars have 4164 * to be in utf8 to guarantee those semantics; but 4165 * not needed in tr/// */ 4166 sv_utf8_upgrade_flags(res, 0); 4167 str = SvPV_const(res, len); 4168 } 4169 4170 /* Upgrade destination to be utf8 if this new 4171 * component is */ 4172 if (! d_is_utf8 && SvUTF8(res)) { 4173 /* See Note on sizing above. */ 4174 const STRLEN extra = len + (send - s) + 1; 4175 4176 SvCUR_set(sv, d - SvPVX_const(sv)); 4177 SvPOK_on(sv); 4178 *d = '\0'; 4179 4180 if (utf8_variant_count == 0) { 4181 SvUTF8_on(sv); 4182 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra); 4183 } 4184 else { 4185 sv_utf8_upgrade_flags_grow(sv, 4186 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4187 extra); 4188 d = SvPVX(sv) + SvCUR(sv); 4189 } 4190 d_is_utf8 = TRUE; 4191 } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */ 4192 4193 /* See Note on sizing above. (NOTE: SvCUR() is not 4194 * set correctly here). */ 4195 const STRLEN extra = len + (send - rbrace) + 1; 4196 const STRLEN off = d - SvPVX_const(sv); 4197 d = off + SvGROW(sv, off + extra); 4198 } 4199 Copy(str, d, len, char); 4200 d += len; 4201 } 4202 4203 SvREFCNT_dec(res); 4204 4205 } /* End \N{NAME} */ 4206 4207 end_backslash_N: 4208 #ifdef EBCDIC 4209 backslash_N++; /* \N{} is defined to be Unicode */ 4210 #endif 4211 s = rbrace + 1; /* Point to just after the '}' */ 4212 continue; 4213 4214 /* \c is a control character */ 4215 case 'c': 4216 s++; 4217 if (s < send) { 4218 const char * message; 4219 4220 if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) { 4221 yyerror(message); 4222 yyquit(); /* Have always immediately croaked on 4223 errors in this */ 4224 } 4225 d++; 4226 } 4227 else { 4228 yyerror("Missing control char name in \\c"); 4229 yyquit(); /* Are at end of input, no sense continuing */ 4230 } 4231 #ifdef EBCDIC 4232 non_portable_endpoint++; 4233 #endif 4234 break; 4235 4236 /* printf-style backslashes, formfeeds, newlines, etc */ 4237 case 'b': 4238 *d++ = '\b'; 4239 break; 4240 case 'n': 4241 *d++ = '\n'; 4242 break; 4243 case 'r': 4244 *d++ = '\r'; 4245 break; 4246 case 'f': 4247 *d++ = '\f'; 4248 break; 4249 case 't': 4250 *d++ = '\t'; 4251 break; 4252 case 'e': 4253 *d++ = ESC_NATIVE; 4254 break; 4255 case 'a': 4256 *d++ = '\a'; 4257 break; 4258 } /* end switch */ 4259 4260 s++; 4261 continue; 4262 } /* end if (backslash) */ 4263 4264 default_action: 4265 /* Just copy the input to the output, though we may have to convert 4266 * to/from UTF-8. 4267 * 4268 * If the input has the same representation in UTF-8 as not, it will be 4269 * a single byte, and we don't care about UTF8ness; just copy the byte */ 4270 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) { 4271 *d++ = *s++; 4272 } 4273 else if (! s_is_utf8 && ! d_is_utf8) { 4274 /* If neither source nor output is UTF-8, is also a single byte, 4275 * just copy it; but this byte counts should we later have to 4276 * convert to UTF-8 */ 4277 *d++ = *s++; 4278 utf8_variant_count++; 4279 } 4280 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */ 4281 const STRLEN len = UTF8SKIP(s); 4282 4283 /* We expect the source to have already been checked for 4284 * malformedness */ 4285 assert(isUTF8_CHAR((U8 *) s, (U8 *) send)); 4286 4287 Copy(s, d, len, U8); 4288 d += len; 4289 s += len; 4290 } 4291 else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */ 4292 STRLEN need = send - s + 1; /* See Note on sizing above. */ 4293 4294 SvCUR_set(sv, d - SvPVX_const(sv)); 4295 SvPOK_on(sv); 4296 *d = '\0'; 4297 4298 if (utf8_variant_count == 0) { 4299 SvUTF8_on(sv); 4300 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need); 4301 } 4302 else { 4303 sv_utf8_upgrade_flags_grow(sv, 4304 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4305 need); 4306 d = SvPVX(sv) + SvCUR(sv); 4307 } 4308 d_is_utf8 = TRUE; 4309 goto default_action; /* Redo, having upgraded so both are UTF-8 */ 4310 } 4311 else { /* UTF8ness matters: convert this non-UTF8 source char to 4312 UTF-8 for output. It will occupy 2 bytes, but don't include 4313 the input byte since we haven't incremented 's' yet. See 4314 Note on sizing above. */ 4315 const STRLEN off = d - SvPVX(sv); 4316 const STRLEN extra = 2 + (send - s - 1) + 1; 4317 if (off + extra > SvLEN(sv)) { 4318 d = off + SvGROW(sv, off + extra); 4319 } 4320 *d++ = UTF8_EIGHT_BIT_HI(*s); 4321 *d++ = UTF8_EIGHT_BIT_LO(*s); 4322 s++; 4323 } 4324 } /* while loop to process each character */ 4325 4326 { 4327 const STRLEN off = d - SvPVX(sv); 4328 4329 /* See if room for the terminating NUL */ 4330 if (UNLIKELY(off >= SvLEN(sv))) { 4331 4332 #ifndef DEBUGGING 4333 4334 if (off > SvLEN(sv)) 4335 #endif 4336 Perl_croak(aTHX_ "panic: constant overflowed allocated space," 4337 " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv)); 4338 4339 /* Whew! Here we don't have room for the terminating NUL, but 4340 * everything else so far has fit. It's not too late to grow 4341 * to fit the NUL and continue on. But it is a bug, as the code 4342 * above was supposed to have made room for this, so under 4343 * DEBUGGING builds, we panic anyway. */ 4344 d = off + SvGROW(sv, off + 1); 4345 } 4346 } 4347 4348 /* terminate the string and set up the sv */ 4349 *d = '\0'; 4350 SvCUR_set(sv, d - SvPVX_const(sv)); 4351 4352 SvPOK_on(sv); 4353 if (d_is_utf8) { 4354 SvUTF8_on(sv); 4355 } 4356 4357 /* shrink the sv if we allocated more than we used */ 4358 if (SvCUR(sv) + 5 < SvLEN(sv)) { 4359 SvPV_shrink_to_cur(sv); 4360 } 4361 4362 /* return the substring (via pl_yylval) only if we parsed anything */ 4363 if (s > start) { 4364 char *s2 = start; 4365 for (; s2 < s; s2++) { 4366 if (*s2 == '\n') 4367 COPLINE_INC_WITH_HERELINES; 4368 } 4369 SvREFCNT_inc_simple_void_NN(sv); 4370 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) 4371 && ! PL_parser->lex_re_reparsing) 4372 { 4373 const char *const key = PL_lex_inpat ? "qr" : "q"; 4374 const STRLEN keylen = PL_lex_inpat ? 2 : 1; 4375 const char *type; 4376 STRLEN typelen; 4377 4378 if (PL_lex_inwhat == OP_TRANS) { 4379 type = "tr"; 4380 typelen = 2; 4381 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) { 4382 type = "s"; 4383 typelen = 1; 4384 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') { 4385 type = "q"; 4386 typelen = 1; 4387 } else { 4388 type = "qq"; 4389 typelen = 2; 4390 } 4391 4392 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL, 4393 type, typelen, NULL); 4394 } 4395 pl_yylval.opval = newSVOP(OP_CONST, 0, sv); 4396 } 4397 LEAVE_with_name("scan_const"); 4398 return s; 4399 } 4400 4401 /* S_intuit_more 4402 * Returns TRUE if there's more to the expression (e.g., a subscript), 4403 * FALSE otherwise. 4404 * 4405 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/ 4406 * 4407 * ->[ and ->{ return TRUE 4408 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled 4409 * { and [ outside a pattern are always subscripts, so return TRUE 4410 * if we're outside a pattern and it's not { or [, then return FALSE 4411 * if we're in a pattern and the first char is a { 4412 * {4,5} (any digits around the comma) returns FALSE 4413 * if we're in a pattern and the first char is a [ 4414 * [] returns FALSE 4415 * [SOMETHING] has a funky algorithm to decide whether it's a 4416 * character class or not. It has to deal with things like 4417 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/ 4418 * anything else returns TRUE 4419 */ 4420 4421 /* This is the one truly awful dwimmer necessary to conflate C and sed. */ 4422 4423 STATIC int 4424 S_intuit_more(pTHX_ char *s, char *e) 4425 { 4426 PERL_ARGS_ASSERT_INTUIT_MORE; 4427 4428 if (PL_lex_brackets) 4429 return TRUE; 4430 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{')) 4431 return TRUE; 4432 if (*s == '-' && s[1] == '>' 4433 && FEATURE_POSTDEREF_QQ_IS_ENABLED 4434 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*'))) 4435 ||(s[2] == '@' && memCHRs("*[{",s[3])) )) 4436 return TRUE; 4437 if (*s != '{' && *s != '[') 4438 return FALSE; 4439 PL_parser->sub_no_recover = TRUE; 4440 if (!PL_lex_inpat) 4441 return TRUE; 4442 4443 /* In a pattern, so maybe we have {n,m}. */ 4444 if (*s == '{') { 4445 if (regcurly(s, e, NULL)) { 4446 return FALSE; 4447 } 4448 return TRUE; 4449 } 4450 4451 /* On the other hand, maybe we have a character class */ 4452 4453 s++; 4454 if (*s == ']' || *s == '^') 4455 return FALSE; 4456 else { 4457 /* this is terrifying, and it works */ 4458 int weight; 4459 char seen[256]; 4460 const char * const send = (char *) memchr(s, ']', e - s); 4461 unsigned char un_char, last_un_char; 4462 char tmpbuf[sizeof PL_tokenbuf * 4]; 4463 4464 if (!send) /* has to be an expression */ 4465 return TRUE; 4466 weight = 2; /* let's weigh the evidence */ 4467 4468 if (*s == '$') 4469 weight -= 3; 4470 else if (isDIGIT(*s)) { 4471 if (s[1] != ']') { 4472 if (isDIGIT(s[1]) && s[2] == ']') 4473 weight -= 10; 4474 } 4475 else 4476 weight -= 100; 4477 } 4478 Zero(seen,256,char); 4479 un_char = 255; 4480 for (; s < send; s++) { 4481 last_un_char = un_char; 4482 un_char = (unsigned char)*s; 4483 switch (*s) { 4484 case '@': 4485 case '&': 4486 case '$': 4487 weight -= seen[un_char] * 10; 4488 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) { 4489 int len; 4490 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); 4491 len = (int)strlen(tmpbuf); 4492 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 4493 UTF ? SVf_UTF8 : 0, SVt_PV)) 4494 weight -= 100; 4495 else 4496 weight -= 10; 4497 } 4498 else if (*s == '$' 4499 && s[1] 4500 && memCHRs("[#!%*<>()-=",s[1])) 4501 { 4502 if (/*{*/ memCHRs("])} =",s[2])) 4503 weight -= 10; 4504 else 4505 weight -= 1; 4506 } 4507 break; 4508 case '\\': 4509 un_char = 254; 4510 if (s[1]) { 4511 if (memCHRs("wds]",s[1])) 4512 weight += 100; 4513 else if (seen[(U8)'\''] || seen[(U8)'"']) 4514 weight += 1; 4515 else if (memCHRs("rnftbxcav",s[1])) 4516 weight += 40; 4517 else if (isDIGIT(s[1])) { 4518 weight += 40; 4519 while (s[1] && isDIGIT(s[1])) 4520 s++; 4521 } 4522 } 4523 else 4524 weight += 100; 4525 break; 4526 case '-': 4527 if (s[1] == '\\') 4528 weight += 50; 4529 if (memCHRs("aA01! ",last_un_char)) 4530 weight += 30; 4531 if (memCHRs("zZ79~",s[1])) 4532 weight += 30; 4533 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$')) 4534 weight -= 5; /* cope with negative subscript */ 4535 break; 4536 default: 4537 if (!isWORDCHAR(last_un_char) 4538 && !(last_un_char == '$' || last_un_char == '@' 4539 || last_un_char == '&') 4540 && isALPHA(*s) && s[1] && isALPHA(s[1])) { 4541 char *d = s; 4542 while (isALPHA(*s)) 4543 s++; 4544 if (keyword(d, s - d, 0)) 4545 weight -= 150; 4546 } 4547 if (un_char == last_un_char + 1) 4548 weight += 5; 4549 weight -= seen[un_char]; 4550 break; 4551 } 4552 seen[un_char]++; 4553 } 4554 if (weight >= 0) /* probably a character class */ 4555 return FALSE; 4556 } 4557 4558 return TRUE; 4559 } 4560 4561 /* 4562 * S_intuit_method 4563 * 4564 * Does all the checking to disambiguate 4565 * foo bar 4566 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise 4567 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args). 4568 * 4569 * First argument is the stuff after the first token, e.g. "bar". 4570 * 4571 * Not a method if foo is a filehandle. 4572 * Not a method if foo is a subroutine prototyped to take a filehandle. 4573 * Not a method if it's really "Foo $bar" 4574 * Method if it's "foo $bar" 4575 * Not a method if it's really "print foo $bar" 4576 * Method if it's really "foo package::" (interpreted as package->foo) 4577 * Not a method if bar is known to be a subroutine ("sub bar; foo bar") 4578 * Not a method if bar is a filehandle or package, but is quoted with 4579 * => 4580 */ 4581 4582 STATIC int 4583 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv) 4584 { 4585 char *s = start + (*start == '$'); 4586 char tmpbuf[sizeof PL_tokenbuf]; 4587 STRLEN len; 4588 GV* indirgv; 4589 /* Mustn't actually add anything to a symbol table. 4590 But also don't want to "initialise" any placeholder 4591 constants that might already be there into full 4592 blown PVGVs with attached PVCV. */ 4593 GV * const gv = 4594 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL; 4595 4596 PERL_ARGS_ASSERT_INTUIT_METHOD; 4597 4598 if (!FEATURE_INDIRECT_IS_ENABLED) 4599 return 0; 4600 4601 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv)) 4602 return 0; 4603 if (cv && SvPOK(cv)) { 4604 const char *proto = CvPROTO(cv); 4605 if (proto) { 4606 while (*proto && (isSPACE(*proto) || *proto == ';')) 4607 proto++; 4608 if (*proto == '*') 4609 return 0; 4610 } 4611 } 4612 4613 if (*start == '$') { 4614 SSize_t start_off = start - SvPVX(PL_linestr); 4615 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY 4616 || isUPPER(*PL_tokenbuf)) 4617 return 0; 4618 /* this could be $# */ 4619 if (isSPACE(*s)) 4620 s = skipspace(s); 4621 PL_bufptr = SvPVX(PL_linestr) + start_off; 4622 PL_expect = XREF; 4623 return *s == '(' ? FUNCMETH : METHOD; 4624 } 4625 4626 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 4627 /* start is the beginning of the possible filehandle/object, 4628 * and s is the end of it 4629 * tmpbuf is a copy of it (but with single quotes as double colons) 4630 */ 4631 4632 if (!keyword(tmpbuf, len, 0)) { 4633 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { 4634 len -= 2; 4635 tmpbuf[len] = '\0'; 4636 goto bare_package; 4637 } 4638 indirgv = gv_fetchpvn_flags(tmpbuf, len, 4639 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ), 4640 SVt_PVCV); 4641 if (indirgv && SvTYPE(indirgv) != SVt_NULL 4642 && (!isGV(indirgv) || GvCVu(indirgv))) 4643 return 0; 4644 /* filehandle or package name makes it a method */ 4645 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { 4646 s = skipspace(s); 4647 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') 4648 return 0; /* no assumptions -- "=>" quotes bareword */ 4649 bare_package: 4650 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, 4651 S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); 4652 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; 4653 PL_expect = XTERM; 4654 force_next(BAREWORD); 4655 PL_bufptr = s; 4656 return *s == '(' ? FUNCMETH : METHOD; 4657 } 4658 } 4659 return 0; 4660 } 4661 4662 /* Encoded script support. filter_add() effectively inserts a 4663 * 'pre-processing' function into the current source input stream. 4664 * Note that the filter function only applies to the current source file 4665 * (e.g., it will not affect files 'require'd or 'use'd by this one). 4666 * 4667 * The datasv parameter (which may be NULL) can be used to pass 4668 * private data to this instance of the filter. The filter function 4669 * can recover the SV using the FILTER_DATA macro and use it to 4670 * store private buffers and state information. 4671 * 4672 * The supplied datasv parameter is upgraded to a PVIO type 4673 * and the IoDIRP/IoANY field is used to store the function pointer, 4674 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such. 4675 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for 4676 * private use must be set using malloc'd pointers. 4677 */ 4678 4679 SV * 4680 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) 4681 { 4682 if (!funcp) 4683 return NULL; 4684 4685 if (!PL_parser) 4686 return NULL; 4687 4688 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) 4689 Perl_croak(aTHX_ "Source filters apply only to byte streams"); 4690 4691 if (!PL_rsfp_filters) 4692 PL_rsfp_filters = newAV(); 4693 if (!datasv) 4694 datasv = newSV(0); 4695 SvUPGRADE(datasv, SVt_PVIO); 4696 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */ 4697 IoFLAGS(datasv) |= IOf_FAKE_DIRP; 4698 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n", 4699 FPTR2DPTR(void *, IoANY(datasv)), 4700 SvPV_nolen(datasv))); 4701 av_unshift(PL_rsfp_filters, 1); 4702 av_store(PL_rsfp_filters, 0, datasv) ; 4703 if ( 4704 !PL_parser->filtered 4705 && PL_parser->lex_flags & LEX_EVALBYTES 4706 && PL_bufptr < PL_bufend 4707 ) { 4708 const char *s = PL_bufptr; 4709 while (s < PL_bufend) { 4710 if (*s == '\n') { 4711 SV *linestr = PL_parser->linestr; 4712 char *buf = SvPVX(linestr); 4713 STRLEN const bufptr_pos = PL_parser->bufptr - buf; 4714 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf; 4715 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf; 4716 STRLEN const linestart_pos = PL_parser->linestart - buf; 4717 STRLEN const last_uni_pos = 4718 PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 4719 STRLEN const last_lop_pos = 4720 PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 4721 av_push(PL_rsfp_filters, linestr); 4722 PL_parser->linestr = 4723 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr)); 4724 buf = SvPVX(PL_parser->linestr); 4725 PL_parser->bufend = buf + SvCUR(PL_parser->linestr); 4726 PL_parser->bufptr = buf + bufptr_pos; 4727 PL_parser->oldbufptr = buf + oldbufptr_pos; 4728 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 4729 PL_parser->linestart = buf + linestart_pos; 4730 if (PL_parser->last_uni) 4731 PL_parser->last_uni = buf + last_uni_pos; 4732 if (PL_parser->last_lop) 4733 PL_parser->last_lop = buf + last_lop_pos; 4734 SvLEN_set(linestr, SvCUR(linestr)); 4735 SvCUR_set(linestr, s - SvPVX(linestr)); 4736 PL_parser->filtered = 1; 4737 break; 4738 } 4739 s++; 4740 } 4741 } 4742 return(datasv); 4743 } 4744 4745 /* 4746 =for apidoc_section $filters 4747 =for apidoc filter_del 4748 4749 Delete most recently added instance of the filter function argument 4750 4751 =cut 4752 */ 4753 4754 void 4755 Perl_filter_del(pTHX_ filter_t funcp) 4756 { 4757 SV *datasv; 4758 4759 PERL_ARGS_ASSERT_FILTER_DEL; 4760 4761 #ifdef DEBUGGING 4762 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", 4763 FPTR2DPTR(void*, funcp))); 4764 #endif 4765 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0) 4766 return; 4767 /* if filter is on top of stack (usual case) just pop it off */ 4768 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters)); 4769 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) { 4770 sv_free(av_pop(PL_rsfp_filters)); 4771 4772 return; 4773 } 4774 /* we need to search for the correct entry and clear it */ 4775 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)"); 4776 } 4777 4778 4779 /* Invoke the idxth filter function for the current rsfp. */ 4780 /* maxlen 0 = read one text line */ 4781 I32 4782 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) 4783 { 4784 filter_t funcp; 4785 I32 ret; 4786 SV *datasv = NULL; 4787 /* This API is bad. It should have been using unsigned int for maxlen. 4788 Not sure if we want to change the API, but if not we should sanity 4789 check the value here. */ 4790 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen; 4791 4792 PERL_ARGS_ASSERT_FILTER_READ; 4793 4794 if (!PL_parser || !PL_rsfp_filters) 4795 return -1; 4796 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */ 4797 /* Provide a default input filter to make life easy. */ 4798 /* Note that we append to the line. This is handy. */ 4799 DEBUG_P(PerlIO_printf(Perl_debug_log, 4800 "filter_read %d: from rsfp\n", idx)); 4801 if (correct_length) { 4802 /* Want a block */ 4803 int len ; 4804 const int old_len = SvCUR(buf_sv); 4805 4806 /* ensure buf_sv is large enough */ 4807 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; 4808 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, 4809 correct_length)) <= 0) { 4810 if (PerlIO_error(PL_rsfp)) 4811 return -1; /* error */ 4812 else 4813 return 0 ; /* end of file */ 4814 } 4815 SvCUR_set(buf_sv, old_len + len) ; 4816 SvPVX(buf_sv)[old_len + len] = '\0'; 4817 } else { 4818 /* Want a line */ 4819 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { 4820 if (PerlIO_error(PL_rsfp)) 4821 return -1; /* error */ 4822 else 4823 return 0 ; /* end of file */ 4824 } 4825 } 4826 return SvCUR(buf_sv); 4827 } 4828 /* Skip this filter slot if filter has been deleted */ 4829 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) { 4830 DEBUG_P(PerlIO_printf(Perl_debug_log, 4831 "filter_read %d: skipped (filter deleted)\n", 4832 idx)); 4833 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */ 4834 } 4835 if (SvTYPE(datasv) != SVt_PVIO) { 4836 if (correct_length) { 4837 /* Want a block */ 4838 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv); 4839 if (!remainder) return 0; /* eof */ 4840 if (correct_length > remainder) correct_length = remainder; 4841 sv_catpvn(buf_sv, SvEND(datasv), correct_length); 4842 SvCUR_set(datasv, SvCUR(datasv) + correct_length); 4843 } else { 4844 /* Want a line */ 4845 const char *s = SvEND(datasv); 4846 const char *send = SvPVX(datasv) + SvLEN(datasv); 4847 while (s < send) { 4848 if (*s == '\n') { 4849 s++; 4850 break; 4851 } 4852 s++; 4853 } 4854 if (s == send) return 0; /* eof */ 4855 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv)); 4856 SvCUR_set(datasv, s-SvPVX(datasv)); 4857 } 4858 return SvCUR(buf_sv); 4859 } 4860 /* Get function pointer hidden within datasv */ 4861 funcp = DPTR2FPTR(filter_t, IoANY(datasv)); 4862 DEBUG_P(PerlIO_printf(Perl_debug_log, 4863 "filter_read %d: via function %p (%s)\n", 4864 idx, (void*)datasv, SvPV_nolen_const(datasv))); 4865 /* Call function. The function is expected to */ 4866 /* call "FILTER_READ(idx+1, buf_sv)" first. */ 4867 /* Return: <0:error, =0:eof, >0:not eof */ 4868 ENTER; 4869 save_scalar(PL_errgv); 4870 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length); 4871 LEAVE; 4872 return ret; 4873 } 4874 4875 STATIC char * 4876 S_filter_gets(pTHX_ SV *sv, STRLEN append) 4877 { 4878 PERL_ARGS_ASSERT_FILTER_GETS; 4879 4880 #ifdef PERL_CR_FILTER 4881 if (!PL_rsfp_filters) { 4882 filter_add(S_cr_textfilter,NULL); 4883 } 4884 #endif 4885 if (PL_rsfp_filters) { 4886 if (!append) 4887 SvCUR_set(sv, 0); /* start with empty line */ 4888 if (FILTER_READ(0, sv, 0) > 0) 4889 return ( SvPVX(sv) ) ; 4890 else 4891 return NULL ; 4892 } 4893 else 4894 return (sv_gets(sv, PL_rsfp, append)); 4895 } 4896 4897 STATIC HV * 4898 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) 4899 { 4900 GV *gv; 4901 4902 PERL_ARGS_ASSERT_FIND_IN_MY_STASH; 4903 4904 if (memEQs(pkgname, len, "__PACKAGE__")) 4905 return PL_curstash; 4906 4907 if (len > 2 4908 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') 4909 && (gv = gv_fetchpvn_flags(pkgname, 4910 len, 4911 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV))) 4912 { 4913 return GvHV(gv); /* Foo:: */ 4914 } 4915 4916 /* use constant CLASS => 'MyClass' */ 4917 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV); 4918 if (gv && GvCV(gv)) { 4919 SV * const sv = cv_const_sv(GvCV(gv)); 4920 if (sv) 4921 return gv_stashsv(sv, 0); 4922 } 4923 4924 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0); 4925 } 4926 4927 4928 STATIC char * 4929 S_tokenize_use(pTHX_ int is_use, char *s) { 4930 PERL_ARGS_ASSERT_TOKENIZE_USE; 4931 4932 if (PL_expect != XSTATE) 4933 /* diag_listed_as: "use" not allowed in expression */ 4934 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", 4935 is_use ? "use" : "no")); 4936 PL_expect = XTERM; 4937 s = skipspace(s); 4938 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { 4939 s = force_version(s, TRUE); 4940 if (*s == ';' || *s == '}' 4941 || (s = skipspace(s), (*s == ';' || *s == '}'))) { 4942 NEXTVAL_NEXTTOKE.opval = NULL; 4943 force_next(BAREWORD); 4944 } 4945 else if (*s == 'v') { 4946 s = force_word(s,BAREWORD,FALSE,TRUE); 4947 s = force_version(s, FALSE); 4948 } 4949 } 4950 else { 4951 s = force_word(s,BAREWORD,FALSE,TRUE); 4952 s = force_version(s, FALSE); 4953 } 4954 pl_yylval.ival = is_use; 4955 return s; 4956 } 4957 #ifdef DEBUGGING 4958 static const char* const exp_name[] = 4959 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", 4960 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF", 4961 "SIGVAR", "TERMORDORDOR" 4962 }; 4963 #endif 4964 4965 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l) 4966 STATIC bool 4967 S_word_takes_any_delimiter(char *p, STRLEN len) 4968 { 4969 return (len == 1 && memCHRs("msyq", p[0])) 4970 || (len == 2 4971 && ((p[0] == 't' && p[1] == 'r') 4972 || (p[0] == 'q' && memCHRs("qwxr", p[1])))); 4973 } 4974 4975 static void 4976 S_check_scalar_slice(pTHX_ char *s) 4977 { 4978 s++; 4979 while (SPACE_OR_TAB(*s)) s++; 4980 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2, 4981 PL_bufend, 4982 UTF)) 4983 { 4984 return; 4985 } 4986 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) 4987 || (*s && memCHRs(" \t$#+-'\"", *s))) 4988 { 4989 s += UTF ? UTF8SKIP(s) : 1; 4990 } 4991 if (*s == '}' || *s == ']') 4992 pl_yylval.ival = OPpSLICEWARNING; 4993 } 4994 4995 #define lex_token_boundary() S_lex_token_boundary(aTHX) 4996 static void 4997 S_lex_token_boundary(pTHX) 4998 { 4999 PL_oldoldbufptr = PL_oldbufptr; 5000 PL_oldbufptr = PL_bufptr; 5001 } 5002 5003 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s) 5004 static char * 5005 S_vcs_conflict_marker(pTHX_ char *s) 5006 { 5007 lex_token_boundary(); 5008 PL_bufptr = s; 5009 yyerror("Version control conflict marker"); 5010 while (s < PL_bufend && *s != '\n') 5011 s++; 5012 return s; 5013 } 5014 5015 static int 5016 yyl_sigvar(pTHX_ char *s) 5017 { 5018 /* we expect the sigil and optional var name part of a 5019 * signature element here. Since a '$' is not necessarily 5020 * followed by a var name, handle it specially here; the general 5021 * yylex code would otherwise try to interpret whatever follows 5022 * as a var; e.g. ($, ...) would be seen as the var '$,' 5023 */ 5024 5025 U8 sigil; 5026 5027 s = skipspace(s); 5028 sigil = *s++; 5029 PL_bufptr = s; /* for error reporting */ 5030 switch (sigil) { 5031 case '$': 5032 case '@': 5033 case '%': 5034 /* spot stuff that looks like an prototype */ 5035 if (memCHRs("$:@%&*;\\[]", *s)) { 5036 yyerror("Illegal character following sigil in a subroutine signature"); 5037 break; 5038 } 5039 /* '$#' is banned, while '$ # comment' isn't */ 5040 if (*s == '#') { 5041 yyerror("'#' not allowed immediately following a sigil in a subroutine signature"); 5042 break; 5043 } 5044 s = skipspace(s); 5045 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 5046 char *dest = PL_tokenbuf + 1; 5047 /* read var name, including sigil, into PL_tokenbuf */ 5048 PL_tokenbuf[0] = sigil; 5049 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1, 5050 0, cBOOL(UTF), FALSE, FALSE); 5051 *dest = '\0'; 5052 assert(PL_tokenbuf[1]); /* we have a variable name */ 5053 } 5054 else { 5055 *PL_tokenbuf = 0; 5056 PL_in_my = 0; 5057 } 5058 5059 s = skipspace(s); 5060 /* parse the = for the default ourselves to avoid '+=' etc being accepted here 5061 * as the ASSIGNOP, and exclude other tokens that start with = 5062 */ 5063 if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) { 5064 /* save now to report with the same context as we did when 5065 * all ASSIGNOPS were accepted */ 5066 PL_oldbufptr = s; 5067 5068 ++s; 5069 NEXTVAL_NEXTTOKE.ival = 0; 5070 force_next(ASSIGNOP); 5071 PL_expect = XTERM; 5072 } 5073 else if (*s == ',' || *s == ')') { 5074 PL_expect = XOPERATOR; 5075 } 5076 else { 5077 /* make sure the context shows the unexpected character and 5078 * hopefully a bit more */ 5079 if (*s) ++s; 5080 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')') 5081 s++; 5082 PL_bufptr = s; /* for error reporting */ 5083 yyerror("Illegal operator following parameter in a subroutine signature"); 5084 PL_in_my = 0; 5085 } 5086 if (*PL_tokenbuf) { 5087 NEXTVAL_NEXTTOKE.ival = sigil; 5088 force_next('p'); /* force a signature pending identifier */ 5089 } 5090 break; 5091 5092 case ')': 5093 PL_expect = XBLOCK; 5094 break; 5095 case ',': /* handle ($a,,$b) */ 5096 break; 5097 5098 default: 5099 PL_in_my = 0; 5100 yyerror("A signature parameter must start with '$', '@' or '%'"); 5101 /* very crude error recovery: skip to likely next signature 5102 * element */ 5103 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')') 5104 s++; 5105 break; 5106 } 5107 5108 switch (sigil) { 5109 case ',': TOKEN (PERLY_COMMA); 5110 case '$': TOKEN (PERLY_DOLLAR); 5111 case '@': TOKEN (PERLY_SNAIL); 5112 case '%': TOKEN (PERLY_PERCENT_SIGN); 5113 case ')': TOKEN (PERLY_PAREN_CLOSE); 5114 default: TOKEN (sigil); 5115 } 5116 } 5117 5118 static int 5119 yyl_dollar(pTHX_ char *s) 5120 { 5121 CLINE; 5122 5123 if (PL_expect == XPOSTDEREF) { 5124 if (s[1] == '#') { 5125 s++; 5126 POSTDEREF(DOLSHARP); 5127 } 5128 POSTDEREF(PERLY_DOLLAR); 5129 } 5130 5131 if ( s[1] == '#' 5132 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF) 5133 || memCHRs("{$:+-@", s[2]))) 5134 { 5135 PL_tokenbuf[0] = '@'; 5136 s = scan_ident(s + 1, PL_tokenbuf + 1, 5137 sizeof PL_tokenbuf - 1, FALSE); 5138 if (PL_expect == XOPERATOR) { 5139 char *d = s; 5140 if (PL_bufptr > s) { 5141 d = PL_bufptr-1; 5142 PL_bufptr = PL_oldbufptr; 5143 } 5144 no_op("Array length", d); 5145 } 5146 if (!PL_tokenbuf[1]) 5147 PREREF(DOLSHARP); 5148 PL_expect = XOPERATOR; 5149 force_ident_maybe_lex('#'); 5150 TOKEN(DOLSHARP); 5151 } 5152 5153 PL_tokenbuf[0] = '$'; 5154 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 5155 if (PL_expect == XOPERATOR) { 5156 char *d = s; 5157 if (PL_bufptr > s) { 5158 d = PL_bufptr-1; 5159 PL_bufptr = PL_oldbufptr; 5160 } 5161 no_op("Scalar", d); 5162 } 5163 if (!PL_tokenbuf[1]) { 5164 if (s == PL_bufend) 5165 yyerror("Final $ should be \\$ or $name"); 5166 PREREF(PERLY_DOLLAR); 5167 } 5168 5169 { 5170 const char tmp = *s; 5171 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) 5172 s = skipspace(s); 5173 5174 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 5175 && intuit_more(s, PL_bufend)) { 5176 if (*s == '[') { 5177 PL_tokenbuf[0] = '@'; 5178 if (ckWARN(WARN_SYNTAX)) { 5179 char *t = s+1; 5180 5181 while ( t < PL_bufend ) { 5182 if (isSPACE(*t)) { 5183 do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t)); 5184 /* consumed one or more space chars */ 5185 } else if (*t == '$' || *t == '@') { 5186 /* could be more than one '$' like $$ref or @$ref */ 5187 do { t++; } while (t < PL_bufend && *t == '$'); 5188 5189 /* could be an abigail style identifier like $ foo */ 5190 while (t < PL_bufend && *t == ' ') t++; 5191 5192 /* strip off the name of the var */ 5193 while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) 5194 t += UTF ? UTF8SKIP(t) : 1; 5195 /* consumed a varname */ 5196 } else if (isDIGIT(*t)) { 5197 /* deal with hex constants like 0x11 */ 5198 if (t[0] == '0' && t[1] == 'x') { 5199 t += 2; 5200 while (t < PL_bufend && isXDIGIT(*t)) t++; 5201 } else { 5202 /* deal with decimal/octal constants like 1 and 0123 */ 5203 do { t++; } while (isDIGIT(*t)); 5204 if (t<PL_bufend && *t == '.') { 5205 do { t++; } while (isDIGIT(*t)); 5206 } 5207 } 5208 /* consumed a number */ 5209 } else { 5210 /* not a var nor a space nor a number */ 5211 break; 5212 } 5213 } 5214 if (t < PL_bufend && *t++ == ',') { 5215 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */ 5216 while (t < PL_bufend && *t != ']') 5217 t++; 5218 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5219 "Multidimensional syntax %" UTF8f " not supported", 5220 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr)); 5221 } 5222 } 5223 } 5224 else if (*s == '{') { 5225 char *t; 5226 PL_tokenbuf[0] = '%'; 5227 if ( strEQ(PL_tokenbuf+1, "SIG") 5228 && ckWARN(WARN_SYNTAX) 5229 && (t = (char *) memchr(s, '}', PL_bufend - s)) 5230 && (t = (char *) memchr(t, '=', PL_bufend - t))) 5231 { 5232 char tmpbuf[sizeof PL_tokenbuf]; 5233 do { 5234 t++; 5235 } while (isSPACE(*t)); 5236 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) { 5237 STRLEN len; 5238 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, 5239 &len); 5240 while (isSPACE(*t)) 5241 t++; 5242 if ( *t == ';' 5243 && get_cvn_flags(tmpbuf, len, UTF 5244 ? SVf_UTF8 5245 : 0)) 5246 { 5247 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5248 "You need to quote \"%" UTF8f "\"", 5249 UTF8fARG(UTF, len, tmpbuf)); 5250 } 5251 } 5252 } 5253 } 5254 } 5255 5256 PL_expect = XOPERATOR; 5257 if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) { 5258 const bool islop = (PL_last_lop == PL_oldoldbufptr); 5259 if (!islop || PL_last_lop_op == OP_GREPSTART) 5260 PL_expect = XOPERATOR; 5261 else if (memCHRs("$@\"'`q", *s)) 5262 PL_expect = XTERM; /* e.g. print $fh "foo" */ 5263 else if ( memCHRs("&*<%", *s) 5264 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF)) 5265 { 5266 PL_expect = XTERM; /* e.g. print $fh &sub */ 5267 } 5268 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 5269 char tmpbuf[sizeof PL_tokenbuf]; 5270 int t2; 5271 STRLEN len; 5272 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 5273 if ((t2 = keyword(tmpbuf, len, 0))) { 5274 /* binary operators exclude handle interpretations */ 5275 switch (t2) { 5276 case -KEY_x: 5277 case -KEY_eq: 5278 case -KEY_ne: 5279 case -KEY_gt: 5280 case -KEY_lt: 5281 case -KEY_ge: 5282 case -KEY_le: 5283 case -KEY_cmp: 5284 break; 5285 default: 5286 PL_expect = XTERM; /* e.g. print $fh length() */ 5287 break; 5288 } 5289 } 5290 else { 5291 PL_expect = XTERM; /* e.g. print $fh subr() */ 5292 } 5293 } 5294 else if (isDIGIT(*s)) 5295 PL_expect = XTERM; /* e.g. print $fh 3 */ 5296 else if (*s == '.' && isDIGIT(s[1])) 5297 PL_expect = XTERM; /* e.g. print $fh .3 */ 5298 else if ((*s == '?' || *s == '-' || *s == '+') 5299 && !isSPACE(s[1]) && s[1] != '=') 5300 PL_expect = XTERM; /* e.g. print $fh -1 */ 5301 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' 5302 && s[1] != '/') 5303 PL_expect = XTERM; /* e.g. print $fh /.../ 5304 XXX except DORDOR operator 5305 */ 5306 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) 5307 && s[2] != '=') 5308 PL_expect = XTERM; /* print $fh <<"EOF" */ 5309 } 5310 } 5311 force_ident_maybe_lex('$'); 5312 TOKEN(PERLY_DOLLAR); 5313 } 5314 5315 static int 5316 yyl_sub(pTHX_ char *s, const int key) 5317 { 5318 char * const tmpbuf = PL_tokenbuf + 1; 5319 bool have_name, have_proto; 5320 STRLEN len; 5321 SV *format_name = NULL; 5322 bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED; 5323 5324 SSize_t off = s-SvPVX(PL_linestr); 5325 char *d; 5326 5327 s = skipspace(s); /* can move PL_linestr */ 5328 5329 d = SvPVX(PL_linestr)+off; 5330 5331 SAVEBOOL(PL_parser->sig_seen); 5332 PL_parser->sig_seen = FALSE; 5333 5334 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) 5335 || *s == '\'' 5336 || (*s == ':' && s[1] == ':')) 5337 { 5338 5339 PL_expect = XATTRBLOCK; 5340 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, 5341 &len); 5342 if (key == KEY_format) 5343 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s); 5344 *PL_tokenbuf = '&'; 5345 if (memchr(tmpbuf, ':', len) || key != KEY_sub 5346 || pad_findmy_pvn( 5347 PL_tokenbuf, len + 1, 0 5348 ) != NOT_IN_PAD) 5349 sv_setpvn(PL_subname, tmpbuf, len); 5350 else { 5351 sv_setsv(PL_subname,PL_curstname); 5352 sv_catpvs(PL_subname,"::"); 5353 sv_catpvn(PL_subname,tmpbuf,len); 5354 } 5355 if (SvUTF8(PL_linestr)) 5356 SvUTF8_on(PL_subname); 5357 have_name = TRUE; 5358 5359 s = skipspace(d); 5360 } 5361 else { 5362 if (key == KEY_my || key == KEY_our || key==KEY_state) { 5363 *d = '\0'; 5364 /* diag_listed_as: Missing name in "%s sub" */ 5365 Perl_croak(aTHX_ 5366 "Missing name in \"%s\"", PL_bufptr); 5367 } 5368 PL_expect = XATTRTERM; 5369 sv_setpvs(PL_subname,"?"); 5370 have_name = FALSE; 5371 } 5372 5373 if (key == KEY_format) { 5374 if (format_name) { 5375 NEXTVAL_NEXTTOKE.opval 5376 = newSVOP(OP_CONST,0, format_name); 5377 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 5378 force_next(BAREWORD); 5379 } 5380 PREBLOCK(FORMAT); 5381 } 5382 5383 /* Look for a prototype */ 5384 if (*s == '(' && !is_sigsub) { 5385 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 5386 if (!s) 5387 Perl_croak(aTHX_ "Prototype not terminated"); 5388 COPLINE_SET_FROM_MULTI_END; 5389 (void)validate_proto(PL_subname, PL_lex_stuff, 5390 ckWARN(WARN_ILLEGALPROTO), 0); 5391 have_proto = TRUE; 5392 5393 s = skipspace(s); 5394 } 5395 else 5396 have_proto = FALSE; 5397 5398 if ( !(*s == ':' && s[1] != ':') 5399 && (*s != '{' && *s != '(') && key != KEY_format) 5400 { 5401 assert(key == KEY_sub || key == KEY_AUTOLOAD || 5402 key == KEY_DESTROY || key == KEY_BEGIN || 5403 key == KEY_UNITCHECK || key == KEY_CHECK || 5404 key == KEY_INIT || key == KEY_END || 5405 key == KEY_my || key == KEY_state || 5406 key == KEY_our); 5407 if (!have_name) 5408 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine"); 5409 else if (*s != ';' && *s != '}') 5410 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname)); 5411 } 5412 5413 if (have_proto) { 5414 NEXTVAL_NEXTTOKE.opval = 5415 newSVOP(OP_CONST, 0, PL_lex_stuff); 5416 PL_lex_stuff = NULL; 5417 force_next(THING); 5418 } 5419 if (!have_name) { 5420 if (PL_curstash) 5421 sv_setpvs(PL_subname, "__ANON__"); 5422 else 5423 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 5424 if (is_sigsub) 5425 TOKEN(ANON_SIGSUB); 5426 else 5427 TOKEN(ANONSUB); 5428 } 5429 force_ident_maybe_lex('&'); 5430 if (is_sigsub) 5431 TOKEN(SIGSUB); 5432 else 5433 TOKEN(SUB); 5434 } 5435 5436 static int 5437 yyl_interpcasemod(pTHX_ char *s) 5438 { 5439 #ifdef DEBUGGING 5440 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\') 5441 Perl_croak(aTHX_ 5442 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u", 5443 PL_bufptr, PL_bufend, *PL_bufptr); 5444 #endif 5445 5446 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') { 5447 /* if at a \E */ 5448 if (PL_lex_casemods) { 5449 const char oldmod = PL_lex_casestack[--PL_lex_casemods]; 5450 PL_lex_casestack[PL_lex_casemods] = '\0'; 5451 5452 if (PL_bufptr != PL_bufend 5453 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q' 5454 || oldmod == 'F')) { 5455 PL_bufptr += 2; 5456 PL_lex_state = LEX_INTERPCONCAT; 5457 } 5458 PL_lex_allbrackets--; 5459 return REPORT(PERLY_PAREN_CLOSE); 5460 } 5461 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) { 5462 /* Got an unpaired \E */ 5463 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 5464 "Useless use of \\E"); 5465 } 5466 if (PL_bufptr != PL_bufend) 5467 PL_bufptr += 2; 5468 PL_lex_state = LEX_INTERPCONCAT; 5469 return yylex(); 5470 } 5471 else { 5472 DEBUG_T({ 5473 PerlIO_printf(Perl_debug_log, "### Saw case modifier\n"); 5474 }); 5475 s = PL_bufptr + 1; 5476 if (s[1] == '\\' && s[2] == 'E') { 5477 PL_bufptr = s + 3; 5478 PL_lex_state = LEX_INTERPCONCAT; 5479 return yylex(); 5480 } 5481 else { 5482 I32 tmp; 5483 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u") 5484 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l")) 5485 { 5486 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ 5487 } 5488 if ((*s == 'L' || *s == 'U' || *s == 'F') 5489 && (strpbrk(PL_lex_casestack, "LUF"))) 5490 { 5491 PL_lex_casestack[--PL_lex_casemods] = '\0'; 5492 PL_lex_allbrackets--; 5493 return REPORT(PERLY_PAREN_CLOSE); 5494 } 5495 if (PL_lex_casemods > 10) 5496 Renew(PL_lex_casestack, PL_lex_casemods + 2, char); 5497 PL_lex_casestack[PL_lex_casemods++] = *s; 5498 PL_lex_casestack[PL_lex_casemods] = '\0'; 5499 PL_lex_state = LEX_INTERPCONCAT; 5500 NEXTVAL_NEXTTOKE.ival = 0; 5501 force_next((2<<24)|PERLY_PAREN_OPEN); 5502 if (*s == 'l') 5503 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST; 5504 else if (*s == 'u') 5505 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST; 5506 else if (*s == 'L') 5507 NEXTVAL_NEXTTOKE.ival = OP_LC; 5508 else if (*s == 'U') 5509 NEXTVAL_NEXTTOKE.ival = OP_UC; 5510 else if (*s == 'Q') 5511 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA; 5512 else if (*s == 'F') 5513 NEXTVAL_NEXTTOKE.ival = OP_FC; 5514 else 5515 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s); 5516 PL_bufptr = s + 1; 5517 } 5518 force_next(FUNC); 5519 if (PL_lex_starts) { 5520 s = PL_bufptr; 5521 PL_lex_starts = 0; 5522 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 5523 if (PL_lex_casemods == 1 && PL_lex_inpat) 5524 TOKEN(PERLY_COMMA); 5525 else 5526 AopNOASSIGN(OP_CONCAT); 5527 } 5528 else 5529 return yylex(); 5530 } 5531 } 5532 5533 static int 5534 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword, 5535 GV **pgv, GV ***pgvp) 5536 { 5537 GV *ogv = NULL; /* override (winner) */ 5538 GV *hgv = NULL; /* hidden (loser) */ 5539 GV *gv = *pgv; 5540 5541 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) { 5542 CV *cv; 5543 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 5544 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL, 5545 SVt_PVCV)) 5546 && (cv = GvCVu(gv))) 5547 { 5548 if (GvIMPORTED_CV(gv)) 5549 ogv = gv; 5550 else if (! CvMETHOD(cv)) 5551 hgv = gv; 5552 } 5553 if (!ogv 5554 && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE)) 5555 && (gv = **pgvp) 5556 && (isGV_with_GP(gv) 5557 ? GvCVu(gv) && GvIMPORTED_CV(gv) 5558 : SvPCS_IMPORTED(gv) 5559 && (gv_init(gv, PL_globalstash, PL_tokenbuf, 5560 len, 0), 1))) 5561 { 5562 ogv = gv; 5563 } 5564 } 5565 5566 *pgv = gv; 5567 5568 if (ogv) { 5569 *orig_keyword = key; 5570 return 0; /* overridden by import or by GLOBAL */ 5571 } 5572 else if (gv && !*pgvp 5573 && -key==KEY_lock /* XXX generalizable kludge */ 5574 && GvCVu(gv)) 5575 { 5576 return 0; /* any sub overrides "weak" keyword */ 5577 } 5578 else { /* no override */ 5579 key = -key; 5580 if (key == KEY_dump) { 5581 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30"); 5582 } 5583 *pgv = NULL; 5584 *pgvp = 0; 5585 if (hgv && key != KEY_x) /* never ambiguous */ 5586 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 5587 "Ambiguous call resolved as CORE::%s(), " 5588 "qualify as such or use &", 5589 GvENAME(hgv)); 5590 return key; 5591 } 5592 } 5593 5594 static int 5595 yyl_qw(pTHX_ char *s, STRLEN len) 5596 { 5597 OP *words = NULL; 5598 5599 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 5600 if (!s) 5601 missingterm(NULL, 0); 5602 5603 COPLINE_SET_FROM_MULTI_END; 5604 PL_expect = XOPERATOR; 5605 if (SvCUR(PL_lex_stuff)) { 5606 int warned_comma = !ckWARN(WARN_QW); 5607 int warned_comment = warned_comma; 5608 char *d = SvPV_force(PL_lex_stuff, len); 5609 while (len) { 5610 for (; isSPACE(*d) && len; --len, ++d) 5611 /**/; 5612 if (len) { 5613 SV *sv; 5614 const char *b = d; 5615 if (!warned_comma || !warned_comment) { 5616 for (; !isSPACE(*d) && len; --len, ++d) { 5617 if (!warned_comma && *d == ',') { 5618 Perl_warner(aTHX_ packWARN(WARN_QW), 5619 "Possible attempt to separate words with commas"); 5620 ++warned_comma; 5621 } 5622 else if (!warned_comment && *d == '#') { 5623 Perl_warner(aTHX_ packWARN(WARN_QW), 5624 "Possible attempt to put comments in qw() list"); 5625 ++warned_comment; 5626 } 5627 } 5628 } 5629 else { 5630 for (; !isSPACE(*d) && len; --len, ++d) 5631 /**/; 5632 } 5633 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff)); 5634 words = op_append_elem(OP_LIST, words, 5635 newSVOP(OP_CONST, 0, tokeq(sv))); 5636 } 5637 } 5638 } 5639 if (!words) 5640 words = newNULLLIST(); 5641 SvREFCNT_dec_NN(PL_lex_stuff); 5642 PL_lex_stuff = NULL; 5643 PL_expect = XOPERATOR; 5644 pl_yylval.opval = sawparens(words); 5645 TOKEN(QWLIST); 5646 } 5647 5648 static int 5649 yyl_hyphen(pTHX_ char *s) 5650 { 5651 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) { 5652 I32 ftst = 0; 5653 char tmp; 5654 5655 s++; 5656 PL_bufptr = s; 5657 tmp = *s++; 5658 5659 while (s < PL_bufend && SPACE_OR_TAB(*s)) 5660 s++; 5661 5662 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) { 5663 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE); 5664 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); 5665 OPERATOR(PERLY_MINUS); /* unary minus */ 5666 } 5667 switch (tmp) { 5668 case 'r': ftst = OP_FTEREAD; break; 5669 case 'w': ftst = OP_FTEWRITE; break; 5670 case 'x': ftst = OP_FTEEXEC; break; 5671 case 'o': ftst = OP_FTEOWNED; break; 5672 case 'R': ftst = OP_FTRREAD; break; 5673 case 'W': ftst = OP_FTRWRITE; break; 5674 case 'X': ftst = OP_FTREXEC; break; 5675 case 'O': ftst = OP_FTROWNED; break; 5676 case 'e': ftst = OP_FTIS; break; 5677 case 'z': ftst = OP_FTZERO; break; 5678 case 's': ftst = OP_FTSIZE; break; 5679 case 'f': ftst = OP_FTFILE; break; 5680 case 'd': ftst = OP_FTDIR; break; 5681 case 'l': ftst = OP_FTLINK; break; 5682 case 'p': ftst = OP_FTPIPE; break; 5683 case 'S': ftst = OP_FTSOCK; break; 5684 case 'u': ftst = OP_FTSUID; break; 5685 case 'g': ftst = OP_FTSGID; break; 5686 case 'k': ftst = OP_FTSVTX; break; 5687 case 'b': ftst = OP_FTBLK; break; 5688 case 'c': ftst = OP_FTCHR; break; 5689 case 't': ftst = OP_FTTTY; break; 5690 case 'T': ftst = OP_FTTEXT; break; 5691 case 'B': ftst = OP_FTBINARY; break; 5692 case 'M': case 'A': case 'C': 5693 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV); 5694 switch (tmp) { 5695 case 'M': ftst = OP_FTMTIME; break; 5696 case 'A': ftst = OP_FTATIME; break; 5697 case 'C': ftst = OP_FTCTIME; break; 5698 default: break; 5699 } 5700 break; 5701 default: 5702 break; 5703 } 5704 if (ftst) { 5705 PL_last_uni = PL_oldbufptr; 5706 PL_last_lop_op = (OPCODE)ftst; 5707 DEBUG_T( { 5708 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp); 5709 } ); 5710 FTST(ftst); 5711 } 5712 else { 5713 /* Assume it was a minus followed by a one-letter named 5714 * subroutine call (or a -bareword), then. */ 5715 DEBUG_T( { 5716 PerlIO_printf(Perl_debug_log, 5717 "### '-%c' looked like a file test but was not\n", 5718 (int) tmp); 5719 } ); 5720 s = --PL_bufptr; 5721 } 5722 } 5723 { 5724 const char tmp = *s++; 5725 if (*s == tmp) { 5726 s++; 5727 if (PL_expect == XOPERATOR) 5728 TERM(POSTDEC); 5729 else 5730 OPERATOR(PREDEC); 5731 } 5732 else if (*s == '>') { 5733 s++; 5734 s = skipspace(s); 5735 if (((*s == '$' || *s == '&') && s[1] == '*') 5736 ||(*s == '$' && s[1] == '#' && s[2] == '*') 5737 ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1])) 5738 ||(*s == '*' && (s[1] == '*' || s[1] == '{')) 5739 ) 5740 { 5741 PL_expect = XPOSTDEREF; 5742 TOKEN(ARROW); 5743 } 5744 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 5745 s = force_word(s,METHOD,FALSE,TRUE); 5746 TOKEN(ARROW); 5747 } 5748 else if (*s == '$') 5749 OPERATOR(ARROW); 5750 else 5751 TERM(ARROW); 5752 } 5753 if (PL_expect == XOPERATOR) { 5754 if (*s == '=' 5755 && !PL_lex_allbrackets 5756 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5757 { 5758 s--; 5759 TOKEN(0); 5760 } 5761 Aop(OP_SUBTRACT); 5762 } 5763 else { 5764 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5765 check_uni(); 5766 OPERATOR(PERLY_MINUS); /* unary minus */ 5767 } 5768 } 5769 } 5770 5771 static int 5772 yyl_plus(pTHX_ char *s) 5773 { 5774 const char tmp = *s++; 5775 if (*s == tmp) { 5776 s++; 5777 if (PL_expect == XOPERATOR) 5778 TERM(POSTINC); 5779 else 5780 OPERATOR(PREINC); 5781 } 5782 if (PL_expect == XOPERATOR) { 5783 if (*s == '=' 5784 && !PL_lex_allbrackets 5785 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5786 { 5787 s--; 5788 TOKEN(0); 5789 } 5790 Aop(OP_ADD); 5791 } 5792 else { 5793 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5794 check_uni(); 5795 OPERATOR(PERLY_PLUS); 5796 } 5797 } 5798 5799 static int 5800 yyl_star(pTHX_ char *s) 5801 { 5802 if (PL_expect == XPOSTDEREF) 5803 POSTDEREF(PERLY_STAR); 5804 5805 if (PL_expect != XOPERATOR) { 5806 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 5807 PL_expect = XOPERATOR; 5808 force_ident(PL_tokenbuf, PERLY_STAR); 5809 if (!*PL_tokenbuf) 5810 PREREF(PERLY_STAR); 5811 TERM(PERLY_STAR); 5812 } 5813 5814 s++; 5815 if (*s == '*') { 5816 s++; 5817 if (*s == '=' && !PL_lex_allbrackets 5818 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5819 { 5820 s -= 2; 5821 TOKEN(0); 5822 } 5823 PWop(OP_POW); 5824 } 5825 5826 if (*s == '=' 5827 && !PL_lex_allbrackets 5828 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5829 { 5830 s--; 5831 TOKEN(0); 5832 } 5833 5834 Mop(OP_MULTIPLY); 5835 } 5836 5837 static int 5838 yyl_percent(pTHX_ char *s) 5839 { 5840 if (PL_expect == XOPERATOR) { 5841 if (s[1] == '=' 5842 && !PL_lex_allbrackets 5843 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5844 { 5845 TOKEN(0); 5846 } 5847 ++s; 5848 Mop(OP_MODULO); 5849 } 5850 else if (PL_expect == XPOSTDEREF) 5851 POSTDEREF(PERLY_PERCENT_SIGN); 5852 5853 PL_tokenbuf[0] = '%'; 5854 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 5855 pl_yylval.ival = 0; 5856 if (!PL_tokenbuf[1]) { 5857 PREREF(PERLY_PERCENT_SIGN); 5858 } 5859 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 5860 && intuit_more(s, PL_bufend)) { 5861 if (*s == '[') 5862 PL_tokenbuf[0] = '@'; 5863 } 5864 PL_expect = XOPERATOR; 5865 force_ident_maybe_lex('%'); 5866 TERM(PERLY_PERCENT_SIGN); 5867 } 5868 5869 static int 5870 yyl_caret(pTHX_ char *s) 5871 { 5872 char *d = s; 5873 const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED); 5874 if (bof && s[1] == '.') 5875 s++; 5876 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 5877 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) 5878 { 5879 s = d; 5880 TOKEN(0); 5881 } 5882 s++; 5883 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR); 5884 } 5885 5886 static int 5887 yyl_colon(pTHX_ char *s) 5888 { 5889 OP *attrs; 5890 5891 switch (PL_expect) { 5892 case XOPERATOR: 5893 if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets)) 5894 break; 5895 PL_bufptr = s; /* update in case we back off */ 5896 if (*s == '=') { 5897 Perl_croak(aTHX_ 5898 "Use of := for an empty attribute list is not allowed"); 5899 } 5900 goto grabattrs; 5901 case XATTRBLOCK: 5902 PL_expect = XBLOCK; 5903 goto grabattrs; 5904 case XATTRTERM: 5905 PL_expect = XTERMBLOCK; 5906 grabattrs: 5907 /* NB: as well as parsing normal attributes, we also end up 5908 * here if there is something looking like attributes 5909 * following a signature (which is illegal, but used to be 5910 * legal in 5.20..5.26). If the latter, we still parse the 5911 * attributes so that error messages(s) are less confusing, 5912 * but ignore them (parser->sig_seen). 5913 */ 5914 s = skipspace(s); 5915 attrs = NULL; 5916 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 5917 bool sig = PL_parser->sig_seen; 5918 I32 tmp; 5919 SV *sv; 5920 STRLEN len; 5921 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 5922 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) { 5923 if (tmp < 0) tmp = -tmp; 5924 switch (tmp) { 5925 case KEY_or: 5926 case KEY_and: 5927 case KEY_for: 5928 case KEY_foreach: 5929 case KEY_unless: 5930 case KEY_if: 5931 case KEY_while: 5932 case KEY_until: 5933 goto got_attrs; 5934 default: 5935 break; 5936 } 5937 } 5938 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0); 5939 if (*d == '(') { 5940 d = scan_str(d,TRUE,TRUE,FALSE,NULL); 5941 if (!d) { 5942 if (attrs) 5943 op_free(attrs); 5944 sv_free(sv); 5945 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list"); 5946 } 5947 COPLINE_SET_FROM_MULTI_END; 5948 } 5949 if (PL_lex_stuff) { 5950 sv_catsv(sv, PL_lex_stuff); 5951 attrs = op_append_elem(OP_LIST, attrs, 5952 newSVOP(OP_CONST, 0, sv)); 5953 SvREFCNT_dec_NN(PL_lex_stuff); 5954 PL_lex_stuff = NULL; 5955 } 5956 else { 5957 /* NOTE: any CV attrs applied here need to be part of 5958 the CVf_BUILTIN_ATTRS define in cv.h! */ 5959 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) { 5960 sv_free(sv); 5961 if (!sig) 5962 CvLVALUE_on(PL_compcv); 5963 } 5964 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) { 5965 sv_free(sv); 5966 if (!sig) 5967 CvMETHOD_on(PL_compcv); 5968 } 5969 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) { 5970 sv_free(sv); 5971 if (!sig) { 5972 Perl_ck_warner_d(aTHX_ 5973 packWARN(WARN_EXPERIMENTAL__CONST_ATTR), 5974 ":const is experimental" 5975 ); 5976 CvANONCONST_on(PL_compcv); 5977 if (!CvANON(PL_compcv)) 5978 yyerror(":const is not permitted on named " 5979 "subroutines"); 5980 } 5981 } 5982 /* After we've set the flags, it could be argued that 5983 we don't need to do the attributes.pm-based setting 5984 process, and shouldn't bother appending recognized 5985 flags. To experiment with that, uncomment the 5986 following "else". (Note that's already been 5987 uncommented. That keeps the above-applied built-in 5988 attributes from being intercepted (and possibly 5989 rejected) by a package's attribute routines, but is 5990 justified by the performance win for the common case 5991 of applying only built-in attributes.) */ 5992 else 5993 attrs = op_append_elem(OP_LIST, attrs, 5994 newSVOP(OP_CONST, 0, 5995 sv)); 5996 } 5997 s = skipspace(d); 5998 if (*s == ':' && s[1] != ':') 5999 s = skipspace(s+1); 6000 else if (s == d) 6001 break; /* require real whitespace or :'s */ 6002 /* XXX losing whitespace on sequential attributes here */ 6003 } 6004 6005 if (*s != ';' 6006 && *s != '}' 6007 && !(PL_expect == XOPERATOR 6008 ? (*s == '=' || *s == ')') 6009 : (*s == '{' || *s == '('))) 6010 { 6011 const char q = ((*s == '\'') ? '"' : '\''); 6012 /* If here for an expression, and parsed no attrs, back off. */ 6013 if (PL_expect == XOPERATOR && !attrs) { 6014 s = PL_bufptr; 6015 break; 6016 } 6017 /* MUST advance bufptr here to avoid bogus "at end of line" 6018 context messages from yyerror(). 6019 */ 6020 PL_bufptr = s; 6021 yyerror( (const char *) 6022 (*s 6023 ? Perl_form(aTHX_ "Invalid separator character " 6024 "%c%c%c in attribute list", q, *s, q) 6025 : "Unterminated attribute list" ) ); 6026 if (attrs) 6027 op_free(attrs); 6028 OPERATOR(PERLY_COLON); 6029 } 6030 6031 got_attrs: 6032 if (PL_parser->sig_seen) { 6033 /* see comment about about sig_seen and parser error 6034 * handling */ 6035 if (attrs) 6036 op_free(attrs); 6037 Perl_croak(aTHX_ "Subroutine attributes must come " 6038 "before the signature"); 6039 } 6040 if (attrs) { 6041 NEXTVAL_NEXTTOKE.opval = attrs; 6042 force_next(THING); 6043 } 6044 TOKEN(COLONATTR); 6045 } 6046 6047 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) { 6048 s--; 6049 TOKEN(0); 6050 } 6051 6052 PL_lex_allbrackets--; 6053 OPERATOR(PERLY_COLON); 6054 } 6055 6056 static int 6057 yyl_subproto(pTHX_ char *s, CV *cv) 6058 { 6059 STRLEN protolen = CvPROTOLEN(cv); 6060 const char *proto = CvPROTO(cv); 6061 bool optional; 6062 6063 proto = S_strip_spaces(aTHX_ proto, &protolen); 6064 if (!protolen) 6065 TERM(FUNC0SUB); 6066 if ((optional = *proto == ';')) { 6067 do { 6068 proto++; 6069 } while (*proto == ';'); 6070 } 6071 6072 if ( 6073 ( 6074 ( 6075 *proto == '$' || *proto == '_' 6076 || *proto == '*' || *proto == '+' 6077 ) 6078 && proto[1] == '\0' 6079 ) 6080 || ( 6081 *proto == '\\' && proto[1] && proto[2] == '\0' 6082 ) 6083 ) { 6084 UNIPROTO(UNIOPSUB,optional); 6085 } 6086 6087 if (*proto == '\\' && proto[1] == '[') { 6088 const char *p = proto + 2; 6089 while(*p && *p != ']') 6090 ++p; 6091 if(*p == ']' && !p[1]) 6092 UNIPROTO(UNIOPSUB,optional); 6093 } 6094 6095 if (*proto == '&' && *s == '{') { 6096 if (PL_curstash) 6097 sv_setpvs(PL_subname, "__ANON__"); 6098 else 6099 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 6100 if (!PL_lex_allbrackets 6101 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 6102 { 6103 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 6104 } 6105 PREBLOCK(LSTOPSUB); 6106 } 6107 6108 return KEY_NULL; 6109 } 6110 6111 static int 6112 yyl_leftcurly(pTHX_ char *s, const U8 formbrack) 6113 { 6114 char *d; 6115 if (PL_lex_brackets > 100) { 6116 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 6117 } 6118 6119 switch (PL_expect) { 6120 case XTERM: 6121 case XTERMORDORDOR: 6122 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6123 PL_lex_allbrackets++; 6124 OPERATOR(HASHBRACK); 6125 case XOPERATOR: 6126 while (s < PL_bufend && SPACE_OR_TAB(*s)) 6127 s++; 6128 d = s; 6129 PL_tokenbuf[0] = '\0'; 6130 if (d < PL_bufend && *d == '-') { 6131 PL_tokenbuf[0] = '-'; 6132 d++; 6133 while (d < PL_bufend && SPACE_OR_TAB(*d)) 6134 d++; 6135 } 6136 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) { 6137 STRLEN len; 6138 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 6139 FALSE, &len); 6140 while (d < PL_bufend && SPACE_OR_TAB(*d)) 6141 d++; 6142 if (*d == '}') { 6143 const char minus = (PL_tokenbuf[0] == '-'); 6144 s = force_word(s + minus, BAREWORD, FALSE, TRUE); 6145 if (minus) 6146 force_next(PERLY_MINUS); 6147 } 6148 } 6149 /* FALLTHROUGH */ 6150 case XATTRTERM: 6151 case XTERMBLOCK: 6152 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6153 PL_lex_allbrackets++; 6154 PL_expect = XSTATE; 6155 break; 6156 case XATTRBLOCK: 6157 case XBLOCK: 6158 PL_lex_brackstack[PL_lex_brackets++] = XSTATE; 6159 PL_lex_allbrackets++; 6160 PL_expect = XSTATE; 6161 break; 6162 case XBLOCKTERM: 6163 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 6164 PL_lex_allbrackets++; 6165 PL_expect = XSTATE; 6166 break; 6167 default: { 6168 const char *t; 6169 if (PL_oldoldbufptr == PL_last_lop) 6170 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 6171 else 6172 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6173 PL_lex_allbrackets++; 6174 s = skipspace(s); 6175 if (*s == '}') { 6176 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { 6177 PL_expect = XTERM; 6178 /* This hack is to get the ${} in the message. */ 6179 PL_bufptr = s+1; 6180 yyerror("syntax error"); 6181 break; 6182 } 6183 OPERATOR(HASHBRACK); 6184 } 6185 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) { 6186 /* ${...} or @{...} etc., but not print {...} 6187 * Skip the disambiguation and treat this as a block. 6188 */ 6189 goto block_expectation; 6190 } 6191 /* This hack serves to disambiguate a pair of curlies 6192 * as being a block or an anon hash. Normally, expectation 6193 * determines that, but in cases where we're not in a 6194 * position to expect anything in particular (like inside 6195 * eval"") we have to resolve the ambiguity. This code 6196 * covers the case where the first term in the curlies is a 6197 * quoted string. Most other cases need to be explicitly 6198 * disambiguated by prepending a "+" before the opening 6199 * curly in order to force resolution as an anon hash. 6200 * 6201 * XXX should probably propagate the outer expectation 6202 * into eval"" to rely less on this hack, but that could 6203 * potentially break current behavior of eval"". 6204 * GSAR 97-07-21 6205 */ 6206 t = s; 6207 if (*s == '\'' || *s == '"' || *s == '`') { 6208 /* common case: get past first string, handling escapes */ 6209 for (t++; t < PL_bufend && *t != *s;) 6210 if (*t++ == '\\') 6211 t++; 6212 t++; 6213 } 6214 else if (*s == 'q') { 6215 if (++t < PL_bufend 6216 && (!isWORDCHAR(*t) 6217 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend 6218 && !isWORDCHAR(*t)))) 6219 { 6220 /* skip q//-like construct */ 6221 const char *tmps; 6222 char open, close, term; 6223 I32 brackets = 1; 6224 6225 while (t < PL_bufend && isSPACE(*t)) 6226 t++; 6227 /* check for q => */ 6228 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') { 6229 OPERATOR(HASHBRACK); 6230 } 6231 term = *t; 6232 open = term; 6233 if (term && (tmps = memCHRs("([{< )]}> )]}>",term))) 6234 term = tmps[5]; 6235 close = term; 6236 if (open == close) 6237 for (t++; t < PL_bufend; t++) { 6238 if (*t == '\\' && t+1 < PL_bufend && open != '\\') 6239 t++; 6240 else if (*t == open) 6241 break; 6242 } 6243 else { 6244 for (t++; t < PL_bufend; t++) { 6245 if (*t == '\\' && t+1 < PL_bufend) 6246 t++; 6247 else if (*t == close && --brackets <= 0) 6248 break; 6249 else if (*t == open) 6250 brackets++; 6251 } 6252 } 6253 t++; 6254 } 6255 else 6256 /* skip plain q word */ 6257 while ( t < PL_bufend 6258 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) 6259 { 6260 t += UTF ? UTF8SKIP(t) : 1; 6261 } 6262 } 6263 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) { 6264 t += UTF ? UTF8SKIP(t) : 1; 6265 while ( t < PL_bufend 6266 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) 6267 { 6268 t += UTF ? UTF8SKIP(t) : 1; 6269 } 6270 } 6271 while (t < PL_bufend && isSPACE(*t)) 6272 t++; 6273 /* if comma follows first term, call it an anon hash */ 6274 /* XXX it could be a comma expression with loop modifiers */ 6275 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s))) 6276 || (*t == '=' && t[1] == '>'))) 6277 OPERATOR(HASHBRACK); 6278 if (PL_expect == XREF) { 6279 block_expectation: 6280 /* If there is an opening brace or 'sub:', treat it 6281 as a term to make ${{...}}{k} and &{sub:attr...} 6282 dwim. Otherwise, treat it as a statement, so 6283 map {no strict; ...} works. 6284 */ 6285 s = skipspace(s); 6286 if (*s == '{') { 6287 PL_expect = XTERM; 6288 break; 6289 } 6290 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) { 6291 PL_bufptr = s; 6292 d = s + 3; 6293 d = skipspace(d); 6294 s = PL_bufptr; 6295 if (*d == ':') { 6296 PL_expect = XTERM; 6297 break; 6298 } 6299 } 6300 PL_expect = XSTATE; 6301 } 6302 else { 6303 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE; 6304 PL_expect = XSTATE; 6305 } 6306 } 6307 break; 6308 } 6309 6310 pl_yylval.ival = CopLINE(PL_curcop); 6311 PL_copline = NOLINE; /* invalidate current command line number */ 6312 TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN); 6313 } 6314 6315 static int 6316 yyl_rightcurly(pTHX_ char *s, const U8 formbrack) 6317 { 6318 assert(s != PL_bufend); 6319 s++; 6320 6321 if (PL_lex_brackets <= 0) 6322 /* diag_listed_as: Unmatched right %s bracket */ 6323 yyerror("Unmatched right curly bracket"); 6324 else 6325 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets]; 6326 6327 PL_lex_allbrackets--; 6328 6329 if (PL_lex_state == LEX_INTERPNORMAL) { 6330 if (PL_lex_brackets == 0) { 6331 if (PL_expect & XFAKEBRACK) { 6332 PL_expect &= XENUMMASK; 6333 PL_lex_state = LEX_INTERPEND; 6334 PL_bufptr = s; 6335 return yylex(); /* ignore fake brackets */ 6336 } 6337 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 6338 && SvEVALED(PL_lex_repl)) 6339 PL_lex_state = LEX_INTERPEND; 6340 else if (*s == '-' && s[1] == '>') 6341 PL_lex_state = LEX_INTERPENDMAYBE; 6342 else if (*s != '[' && *s != '{') 6343 PL_lex_state = LEX_INTERPEND; 6344 } 6345 } 6346 6347 if (PL_expect & XFAKEBRACK) { 6348 PL_expect &= XENUMMASK; 6349 PL_bufptr = s; 6350 return yylex(); /* ignore fake brackets */ 6351 } 6352 6353 force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE); 6354 if (formbrack) LEAVE_with_name("lex_format"); 6355 if (formbrack == 2) { /* means . where arguments were expected */ 6356 force_next(PERLY_SEMICOLON); 6357 TOKEN(FORMRBRACK); 6358 } 6359 6360 TOKEN(PERLY_SEMICOLON); 6361 } 6362 6363 static int 6364 yyl_ampersand(pTHX_ char *s) 6365 { 6366 if (PL_expect == XPOSTDEREF) 6367 POSTDEREF(PERLY_AMPERSAND); 6368 6369 s++; 6370 if (*s++ == '&') { 6371 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6372 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6373 s -= 2; 6374 TOKEN(0); 6375 } 6376 AOPERATOR(ANDAND); 6377 } 6378 s--; 6379 6380 if (PL_expect == XOPERATOR) { 6381 char *d; 6382 bool bof; 6383 if ( PL_bufptr == PL_linestart 6384 && ckWARN(WARN_SEMICOLON) 6385 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) 6386 { 6387 CopLINE_dec(PL_curcop); 6388 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 6389 CopLINE_inc(PL_curcop); 6390 } 6391 d = s; 6392 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') 6393 s++; 6394 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6395 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6396 s = d; 6397 s--; 6398 TOKEN(0); 6399 } 6400 if (d == s) 6401 BAop(bof ? OP_NBIT_AND : OP_BIT_AND); 6402 else 6403 BAop(OP_SBIT_AND); 6404 } 6405 6406 PL_tokenbuf[0] = '&'; 6407 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE); 6408 pl_yylval.ival = (OPpENTERSUB_AMPER<<8); 6409 6410 if (PL_tokenbuf[1]) 6411 force_ident_maybe_lex('&'); 6412 else 6413 PREREF(PERLY_AMPERSAND); 6414 6415 TERM(PERLY_AMPERSAND); 6416 } 6417 6418 static int 6419 yyl_verticalbar(pTHX_ char *s) 6420 { 6421 char *d; 6422 bool bof; 6423 6424 s++; 6425 if (*s++ == '|') { 6426 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6427 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6428 s -= 2; 6429 TOKEN(0); 6430 } 6431 AOPERATOR(OROR); 6432 } 6433 6434 s--; 6435 d = s; 6436 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') 6437 s++; 6438 6439 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6440 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6441 s = d - 1; 6442 TOKEN(0); 6443 } 6444 6445 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR); 6446 } 6447 6448 static int 6449 yyl_bang(pTHX_ char *s) 6450 { 6451 const char tmp = *s++; 6452 if (tmp == '=') { 6453 /* was this !=~ where !~ was meant? 6454 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */ 6455 6456 if (*s == '~' && ckWARN(WARN_SYNTAX)) { 6457 const char *t = s+1; 6458 6459 while (t < PL_bufend && isSPACE(*t)) 6460 ++t; 6461 6462 if (*t == '/' || *t == '?' 6463 || ((*t == 'm' || *t == 's' || *t == 'y') 6464 && !isWORDCHAR(t[1])) 6465 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2]))) 6466 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6467 "!=~ should be !~"); 6468 } 6469 6470 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6471 s -= 2; 6472 TOKEN(0); 6473 } 6474 6475 ChEop(OP_NE); 6476 } 6477 6478 if (tmp == '~') 6479 PMop(OP_NOT); 6480 6481 s--; 6482 OPERATOR(PERLY_EXCLAMATION_MARK); 6483 } 6484 6485 static int 6486 yyl_snail(pTHX_ char *s) 6487 { 6488 if (PL_expect == XPOSTDEREF) 6489 POSTDEREF(PERLY_SNAIL); 6490 PL_tokenbuf[0] = '@'; 6491 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 6492 if (PL_expect == XOPERATOR) { 6493 char *d = s; 6494 if (PL_bufptr > s) { 6495 d = PL_bufptr-1; 6496 PL_bufptr = PL_oldbufptr; 6497 } 6498 no_op("Array", d); 6499 } 6500 pl_yylval.ival = 0; 6501 if (!PL_tokenbuf[1]) { 6502 PREREF(PERLY_SNAIL); 6503 } 6504 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) 6505 s = skipspace(s); 6506 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 6507 && intuit_more(s, PL_bufend)) 6508 { 6509 if (*s == '{') 6510 PL_tokenbuf[0] = '%'; 6511 6512 /* Warn about @ where they meant $. */ 6513 if (*s == '[' || *s == '{') { 6514 if (ckWARN(WARN_SYNTAX)) { 6515 S_check_scalar_slice(aTHX_ s); 6516 } 6517 } 6518 } 6519 PL_expect = XOPERATOR; 6520 force_ident_maybe_lex('@'); 6521 TERM(PERLY_SNAIL); 6522 } 6523 6524 static int 6525 yyl_slash(pTHX_ char *s) 6526 { 6527 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') { 6528 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6529 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) 6530 TOKEN(0); 6531 s += 2; 6532 AOPERATOR(DORDOR); 6533 } 6534 else if (PL_expect == XOPERATOR) { 6535 s++; 6536 if (*s == '=' && !PL_lex_allbrackets 6537 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6538 { 6539 s--; 6540 TOKEN(0); 6541 } 6542 Mop(OP_DIVIDE); 6543 } 6544 else { 6545 /* Disable warning on "study /blah/" */ 6546 if ( PL_oldoldbufptr == PL_last_uni 6547 && ( *PL_last_uni != 's' || s - PL_last_uni < 5 6548 || memNE(PL_last_uni, "study", 5) 6549 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF) 6550 )) 6551 check_uni(); 6552 s = scan_pat(s,OP_MATCH); 6553 TERM(sublex_start()); 6554 } 6555 } 6556 6557 static int 6558 yyl_leftsquare(pTHX_ char *s) 6559 { 6560 if (PL_lex_brackets > 100) 6561 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 6562 PL_lex_brackstack[PL_lex_brackets++] = 0; 6563 PL_lex_allbrackets++; 6564 s++; 6565 OPERATOR(PERLY_BRACKET_OPEN); 6566 } 6567 6568 static int 6569 yyl_rightsquare(pTHX_ char *s) 6570 { 6571 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 6572 TOKEN(0); 6573 s++; 6574 if (PL_lex_brackets <= 0) 6575 /* diag_listed_as: Unmatched right %s bracket */ 6576 yyerror("Unmatched right square bracket"); 6577 else 6578 --PL_lex_brackets; 6579 PL_lex_allbrackets--; 6580 if (PL_lex_state == LEX_INTERPNORMAL) { 6581 if (PL_lex_brackets == 0) { 6582 if (*s == '-' && s[1] == '>') 6583 PL_lex_state = LEX_INTERPENDMAYBE; 6584 else if (*s != '[' && *s != '{') 6585 PL_lex_state = LEX_INTERPEND; 6586 } 6587 } 6588 TERM(PERLY_BRACKET_CLOSE); 6589 } 6590 6591 static int 6592 yyl_tilde(pTHX_ char *s) 6593 { 6594 bool bof; 6595 if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) { 6596 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 6597 TOKEN(0); 6598 s += 2; 6599 Perl_ck_warner_d(aTHX_ 6600 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 6601 "Smartmatch is experimental"); 6602 NCEop(OP_SMARTMATCH); 6603 } 6604 s++; 6605 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') { 6606 s++; 6607 BCop(OP_SCOMPLEMENT); 6608 } 6609 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT); 6610 } 6611 6612 static int 6613 yyl_leftparen(pTHX_ char *s) 6614 { 6615 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr) 6616 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ 6617 else 6618 PL_expect = XTERM; 6619 s = skipspace(s); 6620 PL_lex_allbrackets++; 6621 TOKEN(PERLY_PAREN_OPEN); 6622 } 6623 6624 static int 6625 yyl_rightparen(pTHX_ char *s) 6626 { 6627 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) 6628 TOKEN(0); 6629 s++; 6630 PL_lex_allbrackets--; 6631 s = skipspace(s); 6632 if (*s == '{') 6633 PREBLOCK(PERLY_PAREN_CLOSE); 6634 TERM(PERLY_PAREN_CLOSE); 6635 } 6636 6637 static int 6638 yyl_leftpointy(pTHX_ char *s) 6639 { 6640 char tmp; 6641 6642 if (PL_expect != XOPERATOR) { 6643 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s)) 6644 check_uni(); 6645 if (s[1] == '<' && s[2] != '>') 6646 s = scan_heredoc(s); 6647 else 6648 s = scan_inputsymbol(s); 6649 PL_expect = XOPERATOR; 6650 TOKEN(sublex_start()); 6651 } 6652 6653 s++; 6654 6655 tmp = *s++; 6656 if (tmp == '<') { 6657 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6658 s -= 2; 6659 TOKEN(0); 6660 } 6661 SHop(OP_LEFT_SHIFT); 6662 } 6663 if (tmp == '=') { 6664 tmp = *s++; 6665 if (tmp == '>') { 6666 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6667 s -= 3; 6668 TOKEN(0); 6669 } 6670 NCEop(OP_NCMP); 6671 } 6672 s--; 6673 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6674 s -= 2; 6675 TOKEN(0); 6676 } 6677 ChRop(OP_LE); 6678 } 6679 6680 s--; 6681 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6682 s--; 6683 TOKEN(0); 6684 } 6685 6686 ChRop(OP_LT); 6687 } 6688 6689 static int 6690 yyl_rightpointy(pTHX_ char *s) 6691 { 6692 const char tmp = *s++; 6693 6694 if (tmp == '>') { 6695 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6696 s -= 2; 6697 TOKEN(0); 6698 } 6699 SHop(OP_RIGHT_SHIFT); 6700 } 6701 else if (tmp == '=') { 6702 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6703 s -= 2; 6704 TOKEN(0); 6705 } 6706 ChRop(OP_GE); 6707 } 6708 6709 s--; 6710 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6711 s--; 6712 TOKEN(0); 6713 } 6714 6715 ChRop(OP_GT); 6716 } 6717 6718 static int 6719 yyl_sglquote(pTHX_ char *s) 6720 { 6721 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6722 if (!s) 6723 missingterm(NULL, 0); 6724 COPLINE_SET_FROM_MULTI_END; 6725 DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); 6726 if (PL_expect == XOPERATOR) { 6727 no_op("String",s); 6728 } 6729 pl_yylval.ival = OP_CONST; 6730 TERM(sublex_start()); 6731 } 6732 6733 static int 6734 yyl_dblquote(pTHX_ char *s) 6735 { 6736 char *d; 6737 STRLEN len; 6738 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6739 DEBUG_T( { 6740 if (s) 6741 printbuf("### Saw string before %s\n", s); 6742 else 6743 PerlIO_printf(Perl_debug_log, 6744 "### Saw unterminated string\n"); 6745 } ); 6746 if (PL_expect == XOPERATOR) { 6747 no_op("String",s); 6748 } 6749 if (!s) 6750 missingterm(NULL, 0); 6751 pl_yylval.ival = OP_CONST; 6752 /* FIXME. I think that this can be const if char *d is replaced by 6753 more localised variables. */ 6754 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) { 6755 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) { 6756 pl_yylval.ival = OP_STRINGIFY; 6757 break; 6758 } 6759 } 6760 if (pl_yylval.ival == OP_CONST) 6761 COPLINE_SET_FROM_MULTI_END; 6762 TERM(sublex_start()); 6763 } 6764 6765 static int 6766 yyl_backtick(pTHX_ char *s) 6767 { 6768 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6769 DEBUG_T( { 6770 if (s) 6771 printbuf("### Saw backtick string before %s\n", s); 6772 else 6773 PerlIO_printf(Perl_debug_log, 6774 "### Saw unterminated backtick string\n"); 6775 } ); 6776 if (PL_expect == XOPERATOR) 6777 no_op("Backticks",s); 6778 if (!s) 6779 missingterm(NULL, 0); 6780 pl_yylval.ival = OP_BACKTICK; 6781 TERM(sublex_start()); 6782 } 6783 6784 static int 6785 yyl_backslash(pTHX_ char *s) 6786 { 6787 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s)) 6788 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", 6789 *s, *s); 6790 if (PL_expect == XOPERATOR) 6791 no_op("Backslash",s); 6792 OPERATOR(REFGEN); 6793 } 6794 6795 static void 6796 yyl_data_handle(pTHX) 6797 { 6798 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash 6799 ? PL_curstash 6800 : PL_defstash; 6801 GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1); 6802 6803 if (!isGV(gv)) 6804 gv_init(gv,stash,"DATA",4,0); 6805 6806 GvMULTI_on(gv); 6807 if (!GvIO(gv)) 6808 GvIOp(gv) = newIO(); 6809 IoIFP(GvIOp(gv)) = PL_rsfp; 6810 6811 /* Mark this internal pseudo-handle as clean */ 6812 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; 6813 if ((PerlIO*)PL_rsfp == PerlIO_stdin()) 6814 IoTYPE(GvIOp(gv)) = IoTYPE_STD; 6815 else 6816 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY; 6817 6818 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS) 6819 /* if the script was opened in binmode, we need to revert 6820 * it to text mode for compatibility; but only iff it has CRs 6821 * XXX this is a questionable hack at best. */ 6822 if (PL_bufend-PL_bufptr > 2 6823 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r') 6824 { 6825 Off_t loc = 0; 6826 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) { 6827 loc = PerlIO_tell(PL_rsfp); 6828 (void)PerlIO_seek(PL_rsfp, 0L, 0); 6829 } 6830 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) { 6831 if (loc > 0) 6832 PerlIO_seek(PL_rsfp, loc, 0); 6833 } 6834 } 6835 #endif 6836 6837 #ifdef PERLIO_LAYERS 6838 if (!IN_BYTES) { 6839 if (UTF) 6840 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); 6841 } 6842 #endif 6843 6844 PL_rsfp = NULL; 6845 } 6846 6847 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*) 6848 __attribute__noreturn__; 6849 6850 PERL_STATIC_NO_RET void 6851 yyl_croak_unrecognised(pTHX_ char *s) 6852 { 6853 SV *dsv = newSVpvs_flags("", SVs_TEMP); 6854 const char *c; 6855 char *d; 6856 STRLEN len; 6857 6858 if (UTF) { 6859 STRLEN skiplen = UTF8SKIP(s); 6860 STRLEN stravail = PL_bufend - s; 6861 c = sv_uni_display(dsv, newSVpvn_flags(s, 6862 skiplen > stravail ? stravail : skiplen, 6863 SVs_TEMP | SVf_UTF8), 6864 10, UNI_DISPLAY_ISPRINT); 6865 } 6866 else { 6867 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s); 6868 } 6869 6870 if (s >= PL_linestart) { 6871 d = PL_linestart; 6872 } 6873 else { 6874 /* somehow (probably due to a parse failure), PL_linestart has advanced 6875 * pass PL_bufptr, get a reasonable beginning of line 6876 */ 6877 d = s; 6878 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n') 6879 --d; 6880 } 6881 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d); 6882 if (len > UNRECOGNIZED_PRECEDE_COUNT) { 6883 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT; 6884 } 6885 6886 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c, 6887 UTF8fARG(UTF, (s - d), d), 6888 (int) len + 1); 6889 } 6890 6891 static int 6892 yyl_require(pTHX_ char *s, I32 orig_keyword) 6893 { 6894 s = skipspace(s); 6895 if (isDIGIT(*s)) { 6896 s = force_version(s, FALSE); 6897 } 6898 else if (*s != 'v' || !isDIGIT(s[1]) 6899 || (s = force_version(s, TRUE), *s == 'v')) 6900 { 6901 *PL_tokenbuf = '\0'; 6902 s = force_word(s,BAREWORD,TRUE,TRUE); 6903 if (isIDFIRST_lazy_if_safe(PL_tokenbuf, 6904 PL_tokenbuf + sizeof(PL_tokenbuf), 6905 UTF)) 6906 { 6907 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), 6908 GV_ADD | (UTF ? SVf_UTF8 : 0)); 6909 } 6910 else if (*s == '<') 6911 yyerror("<> at require-statement should be quotes"); 6912 } 6913 6914 if (orig_keyword == KEY_require) 6915 pl_yylval.ival = 1; 6916 else 6917 pl_yylval.ival = 0; 6918 6919 PL_expect = PL_nexttoke ? XOPERATOR : XTERM; 6920 PL_bufptr = s; 6921 PL_last_uni = PL_oldbufptr; 6922 PL_last_lop_op = OP_REQUIRE; 6923 s = skipspace(s); 6924 return REPORT( (int)REQUIRE ); 6925 } 6926 6927 static int 6928 yyl_foreach(pTHX_ char *s) 6929 { 6930 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 6931 return REPORT(0); 6932 pl_yylval.ival = CopLINE(PL_curcop); 6933 s = skipspace(s); 6934 if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 6935 char *p = s; 6936 SSize_t s_off = s - SvPVX(PL_linestr); 6937 bool paren_is_valid = FALSE; 6938 bool maybe_package = FALSE; 6939 bool saw_core = FALSE; 6940 bool core_valid = FALSE; 6941 6942 if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) { 6943 saw_core = TRUE; 6944 p += 6; 6945 } 6946 if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) { 6947 core_valid = TRUE; 6948 paren_is_valid = TRUE; 6949 if (isSPACE(p[2])) { 6950 p = skipspace(p + 3); 6951 maybe_package = TRUE; 6952 } 6953 else { 6954 p += 2; 6955 } 6956 } 6957 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) { 6958 core_valid = TRUE; 6959 if (isSPACE(p[3])) { 6960 p = skipspace(p + 4); 6961 maybe_package = TRUE; 6962 } 6963 else { 6964 p += 3; 6965 } 6966 } 6967 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) { 6968 core_valid = TRUE; 6969 if (isSPACE(p[5])) { 6970 p = skipspace(p + 6); 6971 } 6972 else { 6973 p += 5; 6974 } 6975 } 6976 if (saw_core && !core_valid) { 6977 Perl_croak(aTHX_ "Missing $ on loop variable"); 6978 } 6979 6980 if (maybe_package && !saw_core) { 6981 /* skip optional package name, as in "for my abc $x (..)" */ 6982 if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) { 6983 STRLEN len; 6984 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 6985 p = skipspace(p); 6986 paren_is_valid = FALSE; 6987 } 6988 } 6989 6990 if (UNLIKELY(paren_is_valid && *p == '(')) { 6991 Perl_ck_warner_d(aTHX_ 6992 packWARN(WARN_EXPERIMENTAL__FOR_LIST), 6993 "for my (...) is experimental"); 6994 } 6995 else if (UNLIKELY(*p != '$' && *p != '\\')) { 6996 /* "for myfoo (" will end up here, but with p pointing at the 'f' */ 6997 Perl_croak(aTHX_ "Missing $ on loop variable"); 6998 } 6999 /* The buffer may have been reallocated, update s */ 7000 s = SvPVX(PL_linestr) + s_off; 7001 } 7002 OPERATOR(FOR); 7003 } 7004 7005 static int 7006 yyl_do(pTHX_ char *s, I32 orig_keyword) 7007 { 7008 s = skipspace(s); 7009 if (*s == '{') 7010 PRETERMBLOCK(DO); 7011 if (*s != '\'') { 7012 char *d; 7013 STRLEN len; 7014 *PL_tokenbuf = '&'; 7015 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 7016 1, &len); 7017 if (len && memNEs(PL_tokenbuf+1, len, "CORE") 7018 && !keyword(PL_tokenbuf + 1, len, 0)) { 7019 SSize_t off = s-SvPVX(PL_linestr); 7020 d = skipspace(d); 7021 s = SvPVX(PL_linestr)+off; 7022 if (*d == '(') { 7023 force_ident_maybe_lex('&'); 7024 s = d; 7025 } 7026 } 7027 } 7028 if (orig_keyword == KEY_do) 7029 pl_yylval.ival = 1; 7030 else 7031 pl_yylval.ival = 0; 7032 OPERATOR(DO); 7033 } 7034 7035 static int 7036 yyl_my(pTHX_ char *s, I32 my) 7037 { 7038 if (PL_in_my) { 7039 PL_bufptr = s; 7040 yyerror(Perl_form(aTHX_ 7041 "Can't redeclare \"%s\" in \"%s\"", 7042 my == KEY_my ? "my" : 7043 my == KEY_state ? "state" : "our", 7044 PL_in_my == KEY_my ? "my" : 7045 PL_in_my == KEY_state ? "state" : "our")); 7046 } 7047 PL_in_my = (U16)my; 7048 s = skipspace(s); 7049 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 7050 STRLEN len; 7051 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 7052 if (memEQs(PL_tokenbuf, len, "sub")) 7053 return yyl_sub(aTHX_ s, my); 7054 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); 7055 if (!PL_in_my_stash) { 7056 char tmpbuf[1024]; 7057 int i; 7058 PL_bufptr = s; 7059 i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); 7060 PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf)); 7061 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0); 7062 } 7063 } 7064 else if (*s == '\\') { 7065 if (!FEATURE_MYREF_IS_ENABLED) 7066 Perl_croak(aTHX_ "The experimental declared_refs " 7067 "feature is not enabled"); 7068 Perl_ck_warner_d(aTHX_ 7069 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS), 7070 "Declaring references is experimental"); 7071 } 7072 OPERATOR(MY); 7073 } 7074 7075 static int yyl_try(pTHX_ char*); 7076 7077 static bool 7078 yyl_eol_needs_semicolon(pTHX_ char **ps) 7079 { 7080 char *s = *ps; 7081 if (PL_lex_state != LEX_NORMAL 7082 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) 7083 { 7084 const bool in_comment = *s == '#'; 7085 char *d; 7086 if (*s == '#' && s == PL_linestart && PL_in_eval 7087 && !PL_rsfp && !PL_parser->filtered) { 7088 /* handle eval qq[#line 1 "foo"\n ...] */ 7089 CopLINE_dec(PL_curcop); 7090 incline(s, PL_bufend); 7091 } 7092 d = s; 7093 while (d < PL_bufend && *d != '\n') 7094 d++; 7095 if (d < PL_bufend) 7096 d++; 7097 s = d; 7098 if (in_comment && d == PL_bufend 7099 && PL_lex_state == LEX_INTERPNORMAL 7100 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 7101 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--; 7102 else 7103 incline(s, PL_bufend); 7104 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 7105 PL_lex_state = LEX_FORMLINE; 7106 force_next(FORMRBRACK); 7107 *ps = s; 7108 return TRUE; 7109 } 7110 } 7111 else { 7112 while (s < PL_bufend && *s != '\n') 7113 s++; 7114 if (s < PL_bufend) { 7115 s++; 7116 if (s < PL_bufend) 7117 incline(s, PL_bufend); 7118 } 7119 } 7120 *ps = s; 7121 return FALSE; 7122 } 7123 7124 static int 7125 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s) 7126 { 7127 char *d; 7128 7129 goto start; 7130 7131 do { 7132 fake_eof = 0; 7133 bof = cBOOL(PL_rsfp); 7134 start: 7135 7136 PL_bufptr = PL_bufend; 7137 COPLINE_INC_WITH_HERELINES; 7138 if (!lex_next_chunk(fake_eof)) { 7139 CopLINE_dec(PL_curcop); 7140 s = PL_bufptr; 7141 TOKEN(PERLY_SEMICOLON); /* not infinite loop because rsfp is NULL now */ 7142 } 7143 CopLINE_dec(PL_curcop); 7144 s = PL_bufptr; 7145 /* If it looks like the start of a BOM or raw UTF-16, 7146 * check if it in fact is. */ 7147 if (bof && PL_rsfp 7148 && ( *s == 0 7149 || *(U8*)s == BOM_UTF8_FIRST_BYTE 7150 || *(U8*)s >= 0xFE 7151 || s[1] == 0)) 7152 { 7153 Off_t offset = (IV)PerlIO_tell(PL_rsfp); 7154 bof = (offset == (Off_t)SvCUR(PL_linestr)); 7155 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS) 7156 /* offset may include swallowed CR */ 7157 if (!bof) 7158 bof = (offset == (Off_t)SvCUR(PL_linestr)+1); 7159 #endif 7160 if (bof) { 7161 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7162 s = swallow_bom((U8*)s); 7163 } 7164 } 7165 if (PL_parser->in_pod) { 7166 /* Incest with pod. */ 7167 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut") 7168 && !isALPHA(s[4])) 7169 { 7170 SvPVCLEAR(PL_linestr); 7171 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 7172 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7173 PL_last_lop = PL_last_uni = NULL; 7174 PL_parser->in_pod = 0; 7175 } 7176 } 7177 if (PL_rsfp || PL_parser->filtered) 7178 incline(s, PL_bufend); 7179 } while (PL_parser->in_pod); 7180 7181 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; 7182 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7183 PL_last_lop = PL_last_uni = NULL; 7184 if (CopLINE(PL_curcop) == 1) { 7185 while (s < PL_bufend && isSPACE(*s)) 7186 s++; 7187 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */ 7188 s++; 7189 d = NULL; 7190 if (!PL_in_eval) { 7191 if (*s == '#' && *(s+1) == '!') 7192 d = s + 2; 7193 #ifdef ALTERNATE_SHEBANG 7194 else { 7195 static char const as[] = ALTERNATE_SHEBANG; 7196 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) 7197 d = s + (sizeof(as) - 1); 7198 } 7199 #endif /* ALTERNATE_SHEBANG */ 7200 } 7201 if (d) { 7202 char *ipath; 7203 char *ipathend; 7204 7205 while (isSPACE(*d)) 7206 d++; 7207 ipath = d; 7208 while (*d && !isSPACE(*d)) 7209 d++; 7210 ipathend = d; 7211 7212 #ifdef ARG_ZERO_IS_SCRIPT 7213 if (ipathend > ipath) { 7214 /* 7215 * HP-UX (at least) sets argv[0] to the script name, 7216 * which makes $^X incorrect. And Digital UNIX and Linux, 7217 * at least, set argv[0] to the basename of the Perl 7218 * interpreter. So, having found "#!", we'll set it right. 7219 */ 7220 SV* copfilesv = CopFILESV(PL_curcop); 7221 if (copfilesv) { 7222 SV * const x = 7223 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL, 7224 SVt_PV)); /* $^X */ 7225 assert(SvPOK(x) || SvGMAGICAL(x)); 7226 if (sv_eq(x, copfilesv)) { 7227 sv_setpvn(x, ipath, ipathend - ipath); 7228 SvSETMAGIC(x); 7229 } 7230 else { 7231 STRLEN blen; 7232 STRLEN llen; 7233 const char *bstart = SvPV_const(copfilesv, blen); 7234 const char * const lstart = SvPV_const(x, llen); 7235 if (llen < blen) { 7236 bstart += blen - llen; 7237 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { 7238 sv_setpvn(x, ipath, ipathend - ipath); 7239 SvSETMAGIC(x); 7240 } 7241 } 7242 } 7243 } 7244 else { 7245 /* Anything to do if no copfilesv? */ 7246 } 7247 TAINT_NOT; /* $^X is always tainted, but that's OK */ 7248 } 7249 #endif /* ARG_ZERO_IS_SCRIPT */ 7250 7251 /* 7252 * Look for options. 7253 */ 7254 d = instr(s,"perl -"); 7255 if (!d) { 7256 d = instr(s,"perl"); 7257 #if defined(DOSISH) 7258 /* avoid getting into infinite loops when shebang 7259 * line contains "Perl" rather than "perl" */ 7260 if (!d) { 7261 for (d = ipathend-4; d >= ipath; --d) { 7262 if (isALPHA_FOLD_EQ(*d, 'p') 7263 && !ibcmp(d, "perl", 4)) 7264 { 7265 break; 7266 } 7267 } 7268 if (d < ipath) 7269 d = NULL; 7270 } 7271 #endif 7272 } 7273 #ifdef ALTERNATE_SHEBANG 7274 /* 7275 * If the ALTERNATE_SHEBANG on this system starts with a 7276 * character that can be part of a Perl expression, then if 7277 * we see it but not "perl", we're probably looking at the 7278 * start of Perl code, not a request to hand off to some 7279 * other interpreter. Similarly, if "perl" is there, but 7280 * not in the first 'word' of the line, we assume the line 7281 * contains the start of the Perl program. 7282 */ 7283 if (d && *s != '#') { 7284 const char *c = ipath; 7285 while (*c && !memCHRs("; \t\r\n\f\v#", *c)) 7286 c++; 7287 if (c < d) 7288 d = NULL; /* "perl" not in first word; ignore */ 7289 else 7290 *s = '#'; /* Don't try to parse shebang line */ 7291 } 7292 #endif /* ALTERNATE_SHEBANG */ 7293 if (!d 7294 && *s == '#' 7295 && ipathend > ipath 7296 && !PL_minus_c 7297 && !instr(s,"indir") 7298 && instr(PL_origargv[0],"perl")) 7299 { 7300 char **newargv; 7301 7302 *ipathend = '\0'; 7303 s = ipathend + 1; 7304 while (s < PL_bufend && isSPACE(*s)) 7305 s++; 7306 if (s < PL_bufend) { 7307 Newx(newargv,PL_origargc+3,char*); 7308 newargv[1] = s; 7309 while (s < PL_bufend && !isSPACE(*s)) 7310 s++; 7311 *s = '\0'; 7312 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*); 7313 } 7314 else 7315 newargv = PL_origargv; 7316 newargv[0] = ipath; 7317 PERL_FPU_PRE_EXEC 7318 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); 7319 PERL_FPU_POST_EXEC 7320 Perl_croak(aTHX_ "Can't exec %s", ipath); 7321 } 7322 if (d) { 7323 while (*d && !isSPACE(*d)) 7324 d++; 7325 while (SPACE_OR_TAB(*d)) 7326 d++; 7327 7328 if (*d++ == '-') { 7329 const bool switches_done = PL_doswitches; 7330 const U32 oldpdb = PL_perldb; 7331 const bool oldn = PL_minus_n; 7332 const bool oldp = PL_minus_p; 7333 const char *d1 = d; 7334 7335 do { 7336 bool baduni = FALSE; 7337 if (*d1 == 'C') { 7338 const char *d2 = d1 + 1; 7339 if (parse_unicode_opts((const char **)&d2) 7340 != PL_unicode) 7341 baduni = TRUE; 7342 } 7343 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) { 7344 const char * const m = d1; 7345 while (*d1 && !isSPACE(*d1)) 7346 d1++; 7347 Perl_croak(aTHX_ "Too late for \"-%.*s\" option", 7348 (int)(d1 - m), m); 7349 } 7350 d1 = moreswitches(d1); 7351 } while (d1); 7352 if (PL_doswitches && !switches_done) { 7353 int argc = PL_origargc; 7354 char **argv = PL_origargv; 7355 do { 7356 argc--,argv++; 7357 } while (argc && argv[0][0] == '-' && argv[0][1]); 7358 init_argv_symbols(argc,argv); 7359 } 7360 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb) 7361 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) 7362 /* if we have already added "LINE: while (<>) {", 7363 we must not do it again */ 7364 { 7365 SvPVCLEAR(PL_linestr); 7366 PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 7367 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7368 PL_last_lop = PL_last_uni = NULL; 7369 PL_preambled = FALSE; 7370 if (PERLDB_LINE_OR_SAVESRC) 7371 (void)gv_fetchfile(PL_origfilename); 7372 return YYL_RETRY; 7373 } 7374 } 7375 } 7376 } 7377 } 7378 7379 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 7380 PL_lex_state = LEX_FORMLINE; 7381 force_next(FORMRBRACK); 7382 TOKEN(PERLY_SEMICOLON); 7383 } 7384 7385 PL_bufptr = s; 7386 return YYL_RETRY; 7387 } 7388 7389 static int 7390 yyl_fatcomma(pTHX_ char *s, STRLEN len) 7391 { 7392 CLINE; 7393 pl_yylval.opval 7394 = newSVOP(OP_CONST, 0, 7395 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 7396 pl_yylval.opval->op_private = OPpCONST_BARE; 7397 TERM(BAREWORD); 7398 } 7399 7400 static int 7401 yyl_safe_bareword(pTHX_ char *s, const char lastchar) 7402 { 7403 if ((lastchar == '*' || lastchar == '%' || lastchar == '&') 7404 && PL_parser->saw_infix_sigil) 7405 { 7406 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7407 "Operator or semicolon missing before %c%" UTF8f, 7408 lastchar, 7409 UTF8fARG(UTF, strlen(PL_tokenbuf), 7410 PL_tokenbuf)); 7411 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7412 "Ambiguous use of %c resolved as operator %c", 7413 lastchar, lastchar); 7414 } 7415 TOKEN(BAREWORD); 7416 } 7417 7418 static int 7419 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off) 7420 { 7421 if (sv) { 7422 op_free(rv2cv_op); 7423 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); 7424 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); 7425 if (SvTYPE(sv) == SVt_PVAV) 7426 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS, 7427 pl_yylval.opval); 7428 else { 7429 pl_yylval.opval->op_private = 0; 7430 pl_yylval.opval->op_folded = 1; 7431 pl_yylval.opval->op_flags |= OPf_SPECIAL; 7432 } 7433 TOKEN(BAREWORD); 7434 } 7435 7436 op_free(pl_yylval.opval); 7437 pl_yylval.opval = 7438 off ? newCVREF(0, rv2cv_op) : rv2cv_op; 7439 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 7440 PL_last_lop = PL_oldbufptr; 7441 PL_last_lop_op = OP_ENTERSUB; 7442 7443 /* Is there a prototype? */ 7444 if (SvPOK(cv)) { 7445 int k = yyl_subproto(aTHX_ s, cv); 7446 if (k != KEY_NULL) 7447 return k; 7448 } 7449 7450 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7451 PL_expect = XTERM; 7452 force_next(off ? PRIVATEREF : BAREWORD); 7453 if (!PL_lex_allbrackets 7454 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7455 { 7456 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7457 } 7458 7459 TOKEN(NOAMP); 7460 } 7461 7462 /* Honour "reserved word" warnings, and enforce strict subs */ 7463 static void 7464 yyl_strictwarn_bareword(pTHX_ const char lastchar) 7465 { 7466 /* after "print" and similar functions (corresponding to 7467 * "F? L" in opcode.pl), whatever wasn't already parsed as 7468 * a filehandle should be subject to "strict subs". 7469 * Likewise for the optional indirect-object argument to system 7470 * or exec, which can't be a bareword */ 7471 if ((PL_last_lop_op == OP_PRINT 7472 || PL_last_lop_op == OP_PRTF 7473 || PL_last_lop_op == OP_SAY 7474 || PL_last_lop_op == OP_SYSTEM 7475 || PL_last_lop_op == OP_EXEC) 7476 && (PL_hints & HINT_STRICT_SUBS)) 7477 { 7478 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7479 } 7480 7481 if (lastchar != '-' && ckWARN(WARN_RESERVED)) { 7482 char *d = PL_tokenbuf; 7483 while (isLOWER(*d)) 7484 d++; 7485 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) { 7486 /* PL_warn_reserved is constant */ 7487 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); 7488 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, 7489 PL_tokenbuf); 7490 GCC_DIAG_RESTORE_STMT; 7491 } 7492 } 7493 } 7494 7495 static int 7496 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c) 7497 { 7498 int pkgname = 0; 7499 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); 7500 bool safebw; 7501 bool no_op_error = FALSE; 7502 /* Use this var to track whether intuit_method has been 7503 called. intuit_method returns 0 or > 255. */ 7504 int key = 1; 7505 7506 if (PL_expect == XOPERATOR) { 7507 if (PL_bufptr == PL_linestart) { 7508 CopLINE_dec(PL_curcop); 7509 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 7510 CopLINE_inc(PL_curcop); 7511 } 7512 else 7513 /* We want to call no_op with s pointing after the 7514 bareword, so defer it. But we want it to come 7515 before the Bad name croak. */ 7516 no_op_error = TRUE; 7517 } 7518 7519 /* Get the rest if it looks like a package qualifier */ 7520 7521 if (*s == '\'' || (*s == ':' && s[1] == ':')) { 7522 STRLEN morelen; 7523 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, 7524 TRUE, &morelen); 7525 if (no_op_error) { 7526 no_op("Bareword",s); 7527 no_op_error = FALSE; 7528 } 7529 if (!morelen) 7530 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s", 7531 UTF8fARG(UTF, len, PL_tokenbuf), 7532 *s == '\'' ? "'" : "::"); 7533 len += morelen; 7534 pkgname = 1; 7535 } 7536 7537 if (no_op_error) 7538 no_op("Bareword",s); 7539 7540 /* See if the name is "Foo::", 7541 in which case Foo is a bareword 7542 (and a package name). */ 7543 7544 if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') { 7545 if (ckWARN(WARN_BAREWORD) 7546 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) 7547 Perl_warner(aTHX_ packWARN(WARN_BAREWORD), 7548 "Bareword \"%" UTF8f 7549 "\" refers to nonexistent package", 7550 UTF8fARG(UTF, len, PL_tokenbuf)); 7551 len -= 2; 7552 PL_tokenbuf[len] = '\0'; 7553 c.gv = NULL; 7554 c.gvp = 0; 7555 safebw = TRUE; 7556 } 7557 else { 7558 safebw = FALSE; 7559 } 7560 7561 /* if we saw a global override before, get the right name */ 7562 7563 if (!c.sv) 7564 c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len); 7565 if (c.gvp) { 7566 SV *sv = newSVpvs("CORE::GLOBAL::"); 7567 sv_catsv(sv, c.sv); 7568 SvREFCNT_dec(c.sv); 7569 c.sv = sv; 7570 } 7571 7572 /* Presume this is going to be a bareword of some sort. */ 7573 CLINE; 7574 pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv); 7575 pl_yylval.opval->op_private = OPpCONST_BARE; 7576 7577 /* And if "Foo::", then that's what it certainly is. */ 7578 if (safebw) 7579 return yyl_safe_bareword(aTHX_ s, lastchar); 7580 7581 if (!c.off) { 7582 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv)); 7583 const_op->op_private = OPpCONST_BARE; 7584 c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op); 7585 c.cv = c.lex 7586 ? isGV(c.gv) 7587 ? GvCV(c.gv) 7588 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV 7589 ? (CV *)SvRV(c.gv) 7590 : ((CV *)c.gv) 7591 : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB); 7592 } 7593 7594 /* See if it's the indirect object for a list operator. */ 7595 7596 if (PL_oldoldbufptr 7597 && PL_oldoldbufptr < PL_bufptr 7598 && (PL_oldoldbufptr == PL_last_lop 7599 || PL_oldoldbufptr == PL_last_uni) 7600 && /* NO SKIPSPACE BEFORE HERE! */ 7601 (PL_expect == XREF 7602 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) 7603 == OA_FILEREF)) 7604 { 7605 bool immediate_paren = *s == '('; 7606 SSize_t s_off; 7607 7608 /* (Now we can afford to cross potential line boundary.) */ 7609 s = skipspace(s); 7610 7611 /* intuit_method() can indirectly call lex_next_chunk(), 7612 * invalidating s 7613 */ 7614 s_off = s - SvPVX(PL_linestr); 7615 /* Two barewords in a row may indicate method call. */ 7616 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) 7617 || *s == '$') 7618 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv))) 7619 { 7620 /* the code at method: doesn't use s */ 7621 goto method; 7622 } 7623 s = SvPVX(PL_linestr) + s_off; 7624 7625 if (((PL_opargs[PL_last_lop_op] >> OASHIFT) & 7) == OA_FILEREF 7626 && !immediate_paren && !c.cv 7627 && !FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) { 7628 no_bareword_filehandle(PL_tokenbuf); 7629 } 7630 7631 /* If not a declared subroutine, it's an indirect object. */ 7632 /* (But it's an indir obj regardless for sort.) */ 7633 /* Also, if "_" follows a filetest operator, it's a bareword */ 7634 7635 if ( 7636 ( !immediate_paren && (PL_last_lop_op == OP_SORT 7637 || (!c.cv 7638 && (PL_last_lop_op != OP_MAPSTART 7639 && PL_last_lop_op != OP_GREPSTART)))) 7640 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' 7641 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) 7642 == OA_FILESTATOP)) 7643 ) 7644 { 7645 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; 7646 yyl_strictwarn_bareword(aTHX_ lastchar); 7647 op_free(c.rv2cv_op); 7648 return yyl_safe_bareword(aTHX_ s, lastchar); 7649 } 7650 } 7651 7652 PL_expect = XOPERATOR; 7653 s = skipspace(s); 7654 7655 /* Is this a word before a => operator? */ 7656 if (*s == '=' && s[1] == '>' && !pkgname) { 7657 op_free(c.rv2cv_op); 7658 CLINE; 7659 if (c.gvp || (c.lex && !c.off)) { 7660 assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv); 7661 /* This is our own scalar, created a few lines 7662 above, so this is safe. */ 7663 SvREADONLY_off(c.sv); 7664 sv_setpv(c.sv, PL_tokenbuf); 7665 if (UTF && !IN_BYTES 7666 && is_utf8_string((U8*)PL_tokenbuf, len)) 7667 SvUTF8_on(c.sv); 7668 SvREADONLY_on(c.sv); 7669 } 7670 TERM(BAREWORD); 7671 } 7672 7673 /* If followed by a paren, it's certainly a subroutine. */ 7674 if (*s == '(') { 7675 CLINE; 7676 if (c.cv) { 7677 char *d = s + 1; 7678 while (SPACE_OR_TAB(*d)) 7679 d++; 7680 if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv))) 7681 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off); 7682 } 7683 NEXTVAL_NEXTTOKE.opval = 7684 c.off ? c.rv2cv_op : pl_yylval.opval; 7685 if (c.off) 7686 op_free(pl_yylval.opval), force_next(PRIVATEREF); 7687 else op_free(c.rv2cv_op), force_next(BAREWORD); 7688 pl_yylval.ival = 0; 7689 TOKEN(PERLY_AMPERSAND); 7690 } 7691 7692 /* If followed by var or block, call it a method (unless sub) */ 7693 7694 if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) { 7695 op_free(c.rv2cv_op); 7696 PL_last_lop = PL_oldbufptr; 7697 PL_last_lop_op = OP_METHOD; 7698 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7699 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7700 PL_expect = XBLOCKTERM; 7701 PL_bufptr = s; 7702 return REPORT(METHOD); 7703 } 7704 7705 /* If followed by a bareword, see if it looks like indir obj. */ 7706 7707 if ( key == 1 7708 && !orig_keyword 7709 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$') 7710 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv))) 7711 { 7712 method: 7713 if (c.lex && !c.off) { 7714 assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv); 7715 SvREADONLY_off(c.sv); 7716 sv_setpvn(c.sv, PL_tokenbuf, len); 7717 if (UTF && !IN_BYTES 7718 && is_utf8_string((U8*)PL_tokenbuf, len)) 7719 SvUTF8_on(c.sv); 7720 else SvUTF8_off(c.sv); 7721 } 7722 op_free(c.rv2cv_op); 7723 if (key == METHOD && !PL_lex_allbrackets 7724 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7725 { 7726 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7727 } 7728 return REPORT(key); 7729 } 7730 7731 /* Not a method, so call it a subroutine (if defined) */ 7732 7733 if (c.cv) { 7734 /* Check for a constant sub */ 7735 c.sv = cv_const_sv_or_av(c.cv); 7736 return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off); 7737 } 7738 7739 /* Call it a bare word */ 7740 7741 if (PL_hints & HINT_STRICT_SUBS) 7742 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7743 else 7744 yyl_strictwarn_bareword(aTHX_ lastchar); 7745 7746 op_free(c.rv2cv_op); 7747 7748 return yyl_safe_bareword(aTHX_ s, lastchar); 7749 } 7750 7751 static int 7752 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c) 7753 { 7754 switch (key) { 7755 default: /* not a keyword */ 7756 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c); 7757 7758 case KEY___FILE__: 7759 FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) ); 7760 7761 case KEY___LINE__: 7762 FUN0OP( 7763 newSVOP(OP_CONST, 0, 7764 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop))) 7765 ); 7766 7767 case KEY___PACKAGE__: 7768 FUN0OP( 7769 newSVOP(OP_CONST, 0, (PL_curstash 7770 ? newSVhek(HvNAME_HEK(PL_curstash)) 7771 : &PL_sv_undef)) 7772 ); 7773 7774 case KEY___DATA__: 7775 case KEY___END__: 7776 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) 7777 yyl_data_handle(aTHX); 7778 return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s); 7779 7780 case KEY___SUB__: 7781 FUN0OP(CvCLONE(PL_compcv) 7782 ? newOP(OP_RUNCV, 0) 7783 : newPVOP(OP_RUNCV,0,NULL)); 7784 7785 case KEY_AUTOLOAD: 7786 case KEY_DESTROY: 7787 case KEY_BEGIN: 7788 case KEY_UNITCHECK: 7789 case KEY_CHECK: 7790 case KEY_INIT: 7791 case KEY_END: 7792 if (PL_expect == XSTATE) 7793 return yyl_sub(aTHX_ PL_bufptr, key); 7794 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c); 7795 7796 case KEY_abs: 7797 UNI(OP_ABS); 7798 7799 case KEY_alarm: 7800 UNI(OP_ALARM); 7801 7802 case KEY_accept: 7803 LOP(OP_ACCEPT,XTERM); 7804 7805 case KEY_and: 7806 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 7807 return REPORT(0); 7808 OPERATOR(ANDOP); 7809 7810 case KEY_atan2: 7811 LOP(OP_ATAN2,XTERM); 7812 7813 case KEY_bind: 7814 LOP(OP_BIND,XTERM); 7815 7816 case KEY_binmode: 7817 LOP(OP_BINMODE,XTERM); 7818 7819 case KEY_bless: 7820 LOP(OP_BLESS,XTERM); 7821 7822 case KEY_break: 7823 FUN0(OP_BREAK); 7824 7825 case KEY_catch: 7826 Perl_ck_warner_d(aTHX_ 7827 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental"); 7828 PREBLOCK(CATCH); 7829 7830 case KEY_chop: 7831 UNI(OP_CHOP); 7832 7833 case KEY_continue: 7834 /* We have to disambiguate the two senses of 7835 "continue". If the next token is a '{' then 7836 treat it as the start of a continue block; 7837 otherwise treat it as a control operator. 7838 */ 7839 s = skipspace(s); 7840 if (*s == '{') 7841 PREBLOCK(CONTINUE); 7842 else 7843 FUN0(OP_CONTINUE); 7844 7845 case KEY_chdir: 7846 /* may use HOME */ 7847 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV); 7848 UNI(OP_CHDIR); 7849 7850 case KEY_close: 7851 UNI(OP_CLOSE); 7852 7853 case KEY_closedir: 7854 UNI(OP_CLOSEDIR); 7855 7856 case KEY_cmp: 7857 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7858 return REPORT(0); 7859 NCEop(OP_SCMP); 7860 7861 case KEY_caller: 7862 UNI(OP_CALLER); 7863 7864 case KEY_crypt: 7865 7866 LOP(OP_CRYPT,XTERM); 7867 7868 case KEY_chmod: 7869 LOP(OP_CHMOD,XTERM); 7870 7871 case KEY_chown: 7872 LOP(OP_CHOWN,XTERM); 7873 7874 case KEY_connect: 7875 LOP(OP_CONNECT,XTERM); 7876 7877 case KEY_chr: 7878 UNI(OP_CHR); 7879 7880 case KEY_cos: 7881 UNI(OP_COS); 7882 7883 case KEY_chroot: 7884 UNI(OP_CHROOT); 7885 7886 case KEY_default: 7887 PREBLOCK(DEFAULT); 7888 7889 case KEY_defer: 7890 Perl_ck_warner_d(aTHX_ 7891 packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental"); 7892 PREBLOCK(DEFER); 7893 7894 case KEY_do: 7895 return yyl_do(aTHX_ s, orig_keyword); 7896 7897 case KEY_die: 7898 PL_hints |= HINT_BLOCK_SCOPE; 7899 LOP(OP_DIE,XTERM); 7900 7901 case KEY_defined: 7902 UNI(OP_DEFINED); 7903 7904 case KEY_delete: 7905 UNI(OP_DELETE); 7906 7907 case KEY_dbmopen: 7908 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"), 7909 STR_WITH_LEN("NDBM_File::"), 7910 STR_WITH_LEN("DB_File::"), 7911 STR_WITH_LEN("GDBM_File::"), 7912 STR_WITH_LEN("SDBM_File::"), 7913 STR_WITH_LEN("ODBM_File::"), 7914 NULL); 7915 LOP(OP_DBMOPEN,XTERM); 7916 7917 case KEY_dbmclose: 7918 UNI(OP_DBMCLOSE); 7919 7920 case KEY_dump: 7921 LOOPX(OP_DUMP); 7922 7923 case KEY_else: 7924 PREBLOCK(ELSE); 7925 7926 case KEY_elsif: 7927 pl_yylval.ival = CopLINE(PL_curcop); 7928 OPERATOR(ELSIF); 7929 7930 case KEY_eq: 7931 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7932 return REPORT(0); 7933 ChEop(OP_SEQ); 7934 7935 case KEY_exists: 7936 UNI(OP_EXISTS); 7937 7938 case KEY_exit: 7939 UNI(OP_EXIT); 7940 7941 case KEY_eval: 7942 s = skipspace(s); 7943 if (*s == '{') { /* block eval */ 7944 PL_expect = XTERMBLOCK; 7945 UNIBRACK(OP_ENTERTRY); 7946 } 7947 else { /* string eval */ 7948 PL_expect = XTERM; 7949 UNIBRACK(OP_ENTEREVAL); 7950 } 7951 7952 case KEY_evalbytes: 7953 PL_expect = XTERM; 7954 UNIBRACK(-OP_ENTEREVAL); 7955 7956 case KEY_eof: 7957 UNI(OP_EOF); 7958 7959 case KEY_exp: 7960 UNI(OP_EXP); 7961 7962 case KEY_each: 7963 UNI(OP_EACH); 7964 7965 case KEY_exec: 7966 LOP(OP_EXEC,XREF); 7967 7968 case KEY_endhostent: 7969 FUN0(OP_EHOSTENT); 7970 7971 case KEY_endnetent: 7972 FUN0(OP_ENETENT); 7973 7974 case KEY_endservent: 7975 FUN0(OP_ESERVENT); 7976 7977 case KEY_endprotoent: 7978 FUN0(OP_EPROTOENT); 7979 7980 case KEY_endpwent: 7981 FUN0(OP_EPWENT); 7982 7983 case KEY_endgrent: 7984 FUN0(OP_EGRENT); 7985 7986 case KEY_finally: 7987 Perl_ck_warner_d(aTHX_ 7988 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental"); 7989 PREBLOCK(FINALLY); 7990 7991 case KEY_for: 7992 case KEY_foreach: 7993 return yyl_foreach(aTHX_ s); 7994 7995 case KEY_formline: 7996 LOP(OP_FORMLINE,XTERM); 7997 7998 case KEY_fork: 7999 FUN0(OP_FORK); 8000 8001 case KEY_fc: 8002 UNI(OP_FC); 8003 8004 case KEY_fcntl: 8005 LOP(OP_FCNTL,XTERM); 8006 8007 case KEY_fileno: 8008 UNI(OP_FILENO); 8009 8010 case KEY_flock: 8011 LOP(OP_FLOCK,XTERM); 8012 8013 case KEY_gt: 8014 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8015 return REPORT(0); 8016 ChRop(OP_SGT); 8017 8018 case KEY_ge: 8019 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8020 return REPORT(0); 8021 ChRop(OP_SGE); 8022 8023 case KEY_grep: 8024 LOP(OP_GREPSTART, XREF); 8025 8026 case KEY_goto: 8027 LOOPX(OP_GOTO); 8028 8029 case KEY_gmtime: 8030 UNI(OP_GMTIME); 8031 8032 case KEY_getc: 8033 UNIDOR(OP_GETC); 8034 8035 case KEY_getppid: 8036 FUN0(OP_GETPPID); 8037 8038 case KEY_getpgrp: 8039 UNI(OP_GETPGRP); 8040 8041 case KEY_getpriority: 8042 LOP(OP_GETPRIORITY,XTERM); 8043 8044 case KEY_getprotobyname: 8045 UNI(OP_GPBYNAME); 8046 8047 case KEY_getprotobynumber: 8048 LOP(OP_GPBYNUMBER,XTERM); 8049 8050 case KEY_getprotoent: 8051 FUN0(OP_GPROTOENT); 8052 8053 case KEY_getpwent: 8054 FUN0(OP_GPWENT); 8055 8056 case KEY_getpwnam: 8057 UNI(OP_GPWNAM); 8058 8059 case KEY_getpwuid: 8060 UNI(OP_GPWUID); 8061 8062 case KEY_getpeername: 8063 UNI(OP_GETPEERNAME); 8064 8065 case KEY_gethostbyname: 8066 UNI(OP_GHBYNAME); 8067 8068 case KEY_gethostbyaddr: 8069 LOP(OP_GHBYADDR,XTERM); 8070 8071 case KEY_gethostent: 8072 FUN0(OP_GHOSTENT); 8073 8074 case KEY_getnetbyname: 8075 UNI(OP_GNBYNAME); 8076 8077 case KEY_getnetbyaddr: 8078 LOP(OP_GNBYADDR,XTERM); 8079 8080 case KEY_getnetent: 8081 FUN0(OP_GNETENT); 8082 8083 case KEY_getservbyname: 8084 LOP(OP_GSBYNAME,XTERM); 8085 8086 case KEY_getservbyport: 8087 LOP(OP_GSBYPORT,XTERM); 8088 8089 case KEY_getservent: 8090 FUN0(OP_GSERVENT); 8091 8092 case KEY_getsockname: 8093 UNI(OP_GETSOCKNAME); 8094 8095 case KEY_getsockopt: 8096 LOP(OP_GSOCKOPT,XTERM); 8097 8098 case KEY_getgrent: 8099 FUN0(OP_GGRENT); 8100 8101 case KEY_getgrnam: 8102 UNI(OP_GGRNAM); 8103 8104 case KEY_getgrgid: 8105 UNI(OP_GGRGID); 8106 8107 case KEY_getlogin: 8108 FUN0(OP_GETLOGIN); 8109 8110 case KEY_given: 8111 pl_yylval.ival = CopLINE(PL_curcop); 8112 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 8113 "given is experimental"); 8114 OPERATOR(GIVEN); 8115 8116 case KEY_glob: 8117 LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM ); 8118 8119 case KEY_hex: 8120 UNI(OP_HEX); 8121 8122 case KEY_if: 8123 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8124 return REPORT(0); 8125 pl_yylval.ival = CopLINE(PL_curcop); 8126 OPERATOR(IF); 8127 8128 case KEY_index: 8129 LOP(OP_INDEX,XTERM); 8130 8131 case KEY_int: 8132 UNI(OP_INT); 8133 8134 case KEY_ioctl: 8135 LOP(OP_IOCTL,XTERM); 8136 8137 case KEY_isa: 8138 NCRop(OP_ISA); 8139 8140 case KEY_join: 8141 LOP(OP_JOIN,XTERM); 8142 8143 case KEY_keys: 8144 UNI(OP_KEYS); 8145 8146 case KEY_kill: 8147 LOP(OP_KILL,XTERM); 8148 8149 case KEY_last: 8150 LOOPX(OP_LAST); 8151 8152 case KEY_lc: 8153 UNI(OP_LC); 8154 8155 case KEY_lcfirst: 8156 UNI(OP_LCFIRST); 8157 8158 case KEY_local: 8159 OPERATOR(LOCAL); 8160 8161 case KEY_length: 8162 UNI(OP_LENGTH); 8163 8164 case KEY_lt: 8165 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8166 return REPORT(0); 8167 ChRop(OP_SLT); 8168 8169 case KEY_le: 8170 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8171 return REPORT(0); 8172 ChRop(OP_SLE); 8173 8174 case KEY_localtime: 8175 UNI(OP_LOCALTIME); 8176 8177 case KEY_log: 8178 UNI(OP_LOG); 8179 8180 case KEY_link: 8181 LOP(OP_LINK,XTERM); 8182 8183 case KEY_listen: 8184 LOP(OP_LISTEN,XTERM); 8185 8186 case KEY_lock: 8187 UNI(OP_LOCK); 8188 8189 case KEY_lstat: 8190 UNI(OP_LSTAT); 8191 8192 case KEY_m: 8193 s = scan_pat(s,OP_MATCH); 8194 TERM(sublex_start()); 8195 8196 case KEY_map: 8197 LOP(OP_MAPSTART, XREF); 8198 8199 case KEY_mkdir: 8200 LOP(OP_MKDIR,XTERM); 8201 8202 case KEY_msgctl: 8203 LOP(OP_MSGCTL,XTERM); 8204 8205 case KEY_msgget: 8206 LOP(OP_MSGGET,XTERM); 8207 8208 case KEY_msgrcv: 8209 LOP(OP_MSGRCV,XTERM); 8210 8211 case KEY_msgsnd: 8212 LOP(OP_MSGSND,XTERM); 8213 8214 case KEY_our: 8215 case KEY_my: 8216 case KEY_state: 8217 return yyl_my(aTHX_ s, key); 8218 8219 case KEY_next: 8220 LOOPX(OP_NEXT); 8221 8222 case KEY_ne: 8223 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8224 return REPORT(0); 8225 ChEop(OP_SNE); 8226 8227 case KEY_no: 8228 s = tokenize_use(0, s); 8229 TOKEN(USE); 8230 8231 case KEY_not: 8232 if (*s == '(' || (s = skipspace(s), *s == '(')) 8233 FUN1(OP_NOT); 8234 else { 8235 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 8236 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 8237 OPERATOR(NOTOP); 8238 } 8239 8240 case KEY_open: 8241 s = skipspace(s); 8242 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 8243 const char *t; 8244 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 8245 for (t=d; isSPACE(*t);) 8246 t++; 8247 if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) 8248 /* [perl #16184] */ 8249 && !(t[0] == '=' && t[1] == '>') 8250 && !(t[0] == ':' && t[1] == ':') 8251 && !keyword(s, d-s, 0) 8252 ) { 8253 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), 8254 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")", 8255 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s)); 8256 } 8257 } 8258 LOP(OP_OPEN,XTERM); 8259 8260 case KEY_or: 8261 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8262 return REPORT(0); 8263 pl_yylval.ival = OP_OR; 8264 OPERATOR(OROP); 8265 8266 case KEY_ord: 8267 UNI(OP_ORD); 8268 8269 case KEY_oct: 8270 UNI(OP_OCT); 8271 8272 case KEY_opendir: 8273 LOP(OP_OPEN_DIR,XTERM); 8274 8275 case KEY_print: 8276 checkcomma(s,PL_tokenbuf,"filehandle"); 8277 LOP(OP_PRINT,XREF); 8278 8279 case KEY_printf: 8280 checkcomma(s,PL_tokenbuf,"filehandle"); 8281 LOP(OP_PRTF,XREF); 8282 8283 case KEY_prototype: 8284 UNI(OP_PROTOTYPE); 8285 8286 case KEY_push: 8287 LOP(OP_PUSH,XTERM); 8288 8289 case KEY_pop: 8290 UNIDOR(OP_POP); 8291 8292 case KEY_pos: 8293 UNIDOR(OP_POS); 8294 8295 case KEY_pack: 8296 LOP(OP_PACK,XTERM); 8297 8298 case KEY_package: 8299 s = force_word(s,BAREWORD,FALSE,TRUE); 8300 s = skipspace(s); 8301 s = force_strict_version(s); 8302 PREBLOCK(PACKAGE); 8303 8304 case KEY_pipe: 8305 LOP(OP_PIPE_OP,XTERM); 8306 8307 case KEY_q: 8308 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8309 if (!s) 8310 missingterm(NULL, 0); 8311 COPLINE_SET_FROM_MULTI_END; 8312 pl_yylval.ival = OP_CONST; 8313 TERM(sublex_start()); 8314 8315 case KEY_quotemeta: 8316 UNI(OP_QUOTEMETA); 8317 8318 case KEY_qw: 8319 return yyl_qw(aTHX_ s, len); 8320 8321 case KEY_qq: 8322 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8323 if (!s) 8324 missingterm(NULL, 0); 8325 pl_yylval.ival = OP_STRINGIFY; 8326 if (SvIVX(PL_lex_stuff) == '\'') 8327 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */ 8328 TERM(sublex_start()); 8329 8330 case KEY_qr: 8331 s = scan_pat(s,OP_QR); 8332 TERM(sublex_start()); 8333 8334 case KEY_qx: 8335 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8336 if (!s) 8337 missingterm(NULL, 0); 8338 pl_yylval.ival = OP_BACKTICK; 8339 TERM(sublex_start()); 8340 8341 case KEY_return: 8342 OLDLOP(OP_RETURN); 8343 8344 case KEY_require: 8345 return yyl_require(aTHX_ s, orig_keyword); 8346 8347 case KEY_reset: 8348 UNI(OP_RESET); 8349 8350 case KEY_redo: 8351 LOOPX(OP_REDO); 8352 8353 case KEY_rename: 8354 LOP(OP_RENAME,XTERM); 8355 8356 case KEY_rand: 8357 UNI(OP_RAND); 8358 8359 case KEY_rmdir: 8360 UNI(OP_RMDIR); 8361 8362 case KEY_rindex: 8363 LOP(OP_RINDEX,XTERM); 8364 8365 case KEY_read: 8366 LOP(OP_READ,XTERM); 8367 8368 case KEY_readdir: 8369 UNI(OP_READDIR); 8370 8371 case KEY_readline: 8372 UNIDOR(OP_READLINE); 8373 8374 case KEY_readpipe: 8375 UNIDOR(OP_BACKTICK); 8376 8377 case KEY_rewinddir: 8378 UNI(OP_REWINDDIR); 8379 8380 case KEY_recv: 8381 LOP(OP_RECV,XTERM); 8382 8383 case KEY_reverse: 8384 LOP(OP_REVERSE,XTERM); 8385 8386 case KEY_readlink: 8387 UNIDOR(OP_READLINK); 8388 8389 case KEY_ref: 8390 UNI(OP_REF); 8391 8392 case KEY_s: 8393 s = scan_subst(s); 8394 if (pl_yylval.opval) 8395 TERM(sublex_start()); 8396 else 8397 TOKEN(1); /* force error */ 8398 8399 case KEY_say: 8400 checkcomma(s,PL_tokenbuf,"filehandle"); 8401 LOP(OP_SAY,XREF); 8402 8403 case KEY_chomp: 8404 UNI(OP_CHOMP); 8405 8406 case KEY_scalar: 8407 UNI(OP_SCALAR); 8408 8409 case KEY_select: 8410 LOP(OP_SELECT,XTERM); 8411 8412 case KEY_seek: 8413 LOP(OP_SEEK,XTERM); 8414 8415 case KEY_semctl: 8416 LOP(OP_SEMCTL,XTERM); 8417 8418 case KEY_semget: 8419 LOP(OP_SEMGET,XTERM); 8420 8421 case KEY_semop: 8422 LOP(OP_SEMOP,XTERM); 8423 8424 case KEY_send: 8425 LOP(OP_SEND,XTERM); 8426 8427 case KEY_setpgrp: 8428 LOP(OP_SETPGRP,XTERM); 8429 8430 case KEY_setpriority: 8431 LOP(OP_SETPRIORITY,XTERM); 8432 8433 case KEY_sethostent: 8434 UNI(OP_SHOSTENT); 8435 8436 case KEY_setnetent: 8437 UNI(OP_SNETENT); 8438 8439 case KEY_setservent: 8440 UNI(OP_SSERVENT); 8441 8442 case KEY_setprotoent: 8443 UNI(OP_SPROTOENT); 8444 8445 case KEY_setpwent: 8446 FUN0(OP_SPWENT); 8447 8448 case KEY_setgrent: 8449 FUN0(OP_SGRENT); 8450 8451 case KEY_seekdir: 8452 LOP(OP_SEEKDIR,XTERM); 8453 8454 case KEY_setsockopt: 8455 LOP(OP_SSOCKOPT,XTERM); 8456 8457 case KEY_shift: 8458 UNIDOR(OP_SHIFT); 8459 8460 case KEY_shmctl: 8461 LOP(OP_SHMCTL,XTERM); 8462 8463 case KEY_shmget: 8464 LOP(OP_SHMGET,XTERM); 8465 8466 case KEY_shmread: 8467 LOP(OP_SHMREAD,XTERM); 8468 8469 case KEY_shmwrite: 8470 LOP(OP_SHMWRITE,XTERM); 8471 8472 case KEY_shutdown: 8473 LOP(OP_SHUTDOWN,XTERM); 8474 8475 case KEY_sin: 8476 UNI(OP_SIN); 8477 8478 case KEY_sleep: 8479 UNI(OP_SLEEP); 8480 8481 case KEY_socket: 8482 LOP(OP_SOCKET,XTERM); 8483 8484 case KEY_socketpair: 8485 LOP(OP_SOCKPAIR,XTERM); 8486 8487 case KEY_sort: 8488 checkcomma(s,PL_tokenbuf,"subroutine name"); 8489 s = skipspace(s); 8490 PL_expect = XTERM; 8491 s = force_word(s,BAREWORD,TRUE,TRUE); 8492 LOP(OP_SORT,XREF); 8493 8494 case KEY_split: 8495 LOP(OP_SPLIT,XTERM); 8496 8497 case KEY_sprintf: 8498 LOP(OP_SPRINTF,XTERM); 8499 8500 case KEY_splice: 8501 LOP(OP_SPLICE,XTERM); 8502 8503 case KEY_sqrt: 8504 UNI(OP_SQRT); 8505 8506 case KEY_srand: 8507 UNI(OP_SRAND); 8508 8509 case KEY_stat: 8510 UNI(OP_STAT); 8511 8512 case KEY_study: 8513 UNI(OP_STUDY); 8514 8515 case KEY_substr: 8516 LOP(OP_SUBSTR,XTERM); 8517 8518 case KEY_format: 8519 case KEY_sub: 8520 return yyl_sub(aTHX_ s, key); 8521 8522 case KEY_system: 8523 LOP(OP_SYSTEM,XREF); 8524 8525 case KEY_symlink: 8526 LOP(OP_SYMLINK,XTERM); 8527 8528 case KEY_syscall: 8529 LOP(OP_SYSCALL,XTERM); 8530 8531 case KEY_sysopen: 8532 LOP(OP_SYSOPEN,XTERM); 8533 8534 case KEY_sysseek: 8535 LOP(OP_SYSSEEK,XTERM); 8536 8537 case KEY_sysread: 8538 LOP(OP_SYSREAD,XTERM); 8539 8540 case KEY_syswrite: 8541 LOP(OP_SYSWRITE,XTERM); 8542 8543 case KEY_tr: 8544 case KEY_y: 8545 s = scan_trans(s); 8546 TERM(sublex_start()); 8547 8548 case KEY_tell: 8549 UNI(OP_TELL); 8550 8551 case KEY_telldir: 8552 UNI(OP_TELLDIR); 8553 8554 case KEY_tie: 8555 LOP(OP_TIE,XTERM); 8556 8557 case KEY_tied: 8558 UNI(OP_TIED); 8559 8560 case KEY_time: 8561 FUN0(OP_TIME); 8562 8563 case KEY_times: 8564 FUN0(OP_TMS); 8565 8566 case KEY_truncate: 8567 LOP(OP_TRUNCATE,XTERM); 8568 8569 case KEY_try: 8570 pl_yylval.ival = CopLINE(PL_curcop); 8571 Perl_ck_warner_d(aTHX_ 8572 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental"); 8573 PREBLOCK(TRY); 8574 8575 case KEY_uc: 8576 UNI(OP_UC); 8577 8578 case KEY_ucfirst: 8579 UNI(OP_UCFIRST); 8580 8581 case KEY_untie: 8582 UNI(OP_UNTIE); 8583 8584 case KEY_until: 8585 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8586 return REPORT(0); 8587 pl_yylval.ival = CopLINE(PL_curcop); 8588 OPERATOR(UNTIL); 8589 8590 case KEY_unless: 8591 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8592 return REPORT(0); 8593 pl_yylval.ival = CopLINE(PL_curcop); 8594 OPERATOR(UNLESS); 8595 8596 case KEY_unlink: 8597 LOP(OP_UNLINK,XTERM); 8598 8599 case KEY_undef: 8600 UNIDOR(OP_UNDEF); 8601 8602 case KEY_unpack: 8603 LOP(OP_UNPACK,XTERM); 8604 8605 case KEY_utime: 8606 LOP(OP_UTIME,XTERM); 8607 8608 case KEY_umask: 8609 UNIDOR(OP_UMASK); 8610 8611 case KEY_unshift: 8612 LOP(OP_UNSHIFT,XTERM); 8613 8614 case KEY_use: 8615 s = tokenize_use(1, s); 8616 TOKEN(USE); 8617 8618 case KEY_values: 8619 UNI(OP_VALUES); 8620 8621 case KEY_vec: 8622 LOP(OP_VEC,XTERM); 8623 8624 case KEY_when: 8625 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8626 return REPORT(0); 8627 pl_yylval.ival = CopLINE(PL_curcop); 8628 Perl_ck_warner_d(aTHX_ 8629 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 8630 "when is experimental"); 8631 OPERATOR(WHEN); 8632 8633 case KEY_while: 8634 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8635 return REPORT(0); 8636 pl_yylval.ival = CopLINE(PL_curcop); 8637 OPERATOR(WHILE); 8638 8639 case KEY_warn: 8640 PL_hints |= HINT_BLOCK_SCOPE; 8641 LOP(OP_WARN,XTERM); 8642 8643 case KEY_wait: 8644 FUN0(OP_WAIT); 8645 8646 case KEY_waitpid: 8647 LOP(OP_WAITPID,XTERM); 8648 8649 case KEY_wantarray: 8650 FUN0(OP_WANTARRAY); 8651 8652 case KEY_write: 8653 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and 8654 * we use the same number on EBCDIC */ 8655 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV); 8656 UNI(OP_ENTERWRITE); 8657 8658 case KEY_x: 8659 if (PL_expect == XOPERATOR) { 8660 if (*s == '=' && !PL_lex_allbrackets 8661 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 8662 { 8663 return REPORT(0); 8664 } 8665 Mop(OP_REPEAT); 8666 } 8667 check_uni(); 8668 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c); 8669 8670 case KEY_xor: 8671 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8672 return REPORT(0); 8673 pl_yylval.ival = OP_XOR; 8674 OPERATOR(OROP); 8675 } 8676 } 8677 8678 static int 8679 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c) 8680 { 8681 I32 key = 0; 8682 I32 orig_keyword = 0; 8683 STRLEN olen = len; 8684 char *d = s; 8685 s += 2; 8686 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 8687 if ((*s == ':' && s[1] == ':') 8688 || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) 8689 { 8690 Copy(PL_bufptr, PL_tokenbuf, olen, char); 8691 return yyl_just_a_word(aTHX_ d, olen, 0, c); 8692 } 8693 if (!key) 8694 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword", 8695 UTF8fARG(UTF, len, PL_tokenbuf)); 8696 if (key < 0) 8697 key = -key; 8698 else if (key == KEY_require || key == KEY_do 8699 || key == KEY_glob) 8700 /* that's a way to remember we saw "CORE::" */ 8701 orig_keyword = key; 8702 8703 /* Known to be a reserved word at this point */ 8704 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c); 8705 } 8706 8707 static int 8708 yyl_keylookup(pTHX_ char *s, GV *gv) 8709 { 8710 STRLEN len; 8711 bool anydelim; 8712 I32 key; 8713 struct code c = no_code; 8714 I32 orig_keyword = 0; 8715 char *d; 8716 8717 c.gv = gv; 8718 8719 PL_bufptr = s; 8720 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 8721 8722 /* Some keywords can be followed by any delimiter, including ':' */ 8723 anydelim = word_takes_any_delimiter(PL_tokenbuf, len); 8724 8725 /* x::* is just a word, unless x is "CORE" */ 8726 if (!anydelim && *s == ':' && s[1] == ':') { 8727 if (memEQs(PL_tokenbuf, len, "CORE")) 8728 return yyl_key_core(aTHX_ s, len, c); 8729 return yyl_just_a_word(aTHX_ s, len, 0, c); 8730 } 8731 8732 d = s; 8733 while (d < PL_bufend && isSPACE(*d)) 8734 d++; /* no comments skipped here, or s### is misparsed */ 8735 8736 /* Is this a word before a => operator? */ 8737 if (*d == '=' && d[1] == '>') { 8738 return yyl_fatcomma(aTHX_ s, len); 8739 } 8740 8741 /* Check for plugged-in keyword */ 8742 { 8743 OP *o; 8744 int result; 8745 char *saved_bufptr = PL_bufptr; 8746 PL_bufptr = s; 8747 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o); 8748 s = PL_bufptr; 8749 if (result == KEYWORD_PLUGIN_DECLINE) { 8750 /* not a plugged-in keyword */ 8751 PL_bufptr = saved_bufptr; 8752 } else if (result == KEYWORD_PLUGIN_STMT) { 8753 pl_yylval.opval = o; 8754 CLINE; 8755 if (!PL_nexttoke) PL_expect = XSTATE; 8756 return REPORT(PLUGSTMT); 8757 } else if (result == KEYWORD_PLUGIN_EXPR) { 8758 pl_yylval.opval = o; 8759 CLINE; 8760 if (!PL_nexttoke) PL_expect = XOPERATOR; 8761 return REPORT(PLUGEXPR); 8762 } else { 8763 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf); 8764 } 8765 } 8766 8767 /* Is this a label? */ 8768 if (!anydelim && PL_expect == XSTATE 8769 && d < PL_bufend && *d == ':' && *(d + 1) != ':') { 8770 s = d + 1; 8771 pl_yylval.opval = 8772 newSVOP(OP_CONST, 0, 8773 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0)); 8774 CLINE; 8775 TOKEN(LABEL); 8776 } 8777 8778 /* Check for lexical sub */ 8779 if (PL_expect != XOPERATOR) { 8780 char tmpbuf[sizeof PL_tokenbuf + 1]; 8781 *tmpbuf = '&'; 8782 Copy(PL_tokenbuf, tmpbuf+1, len, char); 8783 c.off = pad_findmy_pvn(tmpbuf, len+1, 0); 8784 if (c.off != NOT_IN_PAD) { 8785 assert(c.off); /* we assume this is boolean-true below */ 8786 if (PAD_COMPNAME_FLAGS_isOUR(c.off)) { 8787 HV * const stash = PAD_COMPNAME_OURSTASH(c.off); 8788 HEK * const stashname = HvNAME_HEK(stash); 8789 c.sv = newSVhek(stashname); 8790 sv_catpvs(c.sv, "::"); 8791 sv_catpvn_flags(c.sv, PL_tokenbuf, len, 8792 (UTF ? SV_CATUTF8 : SV_CATBYTES)); 8793 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv), 8794 SVt_PVCV); 8795 c.off = 0; 8796 if (!c.gv) { 8797 sv_free(c.sv); 8798 c.sv = NULL; 8799 return yyl_just_a_word(aTHX_ s, len, 0, c); 8800 } 8801 } 8802 else { 8803 c.rv2cv_op = newOP(OP_PADANY, 0); 8804 c.rv2cv_op->op_targ = c.off; 8805 c.cv = find_lexical_cv(c.off); 8806 } 8807 c.lex = TRUE; 8808 return yyl_just_a_word(aTHX_ s, len, 0, c); 8809 } 8810 c.off = 0; 8811 } 8812 8813 /* Check for built-in keyword */ 8814 key = keyword(PL_tokenbuf, len, 0); 8815 8816 if (key < 0) 8817 key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp); 8818 8819 if (key && key != KEY___DATA__ && key != KEY___END__ 8820 && (!anydelim || *s != '#')) { 8821 /* no override, and not s### either; skipspace is safe here 8822 * check for => on following line */ 8823 bool arrow; 8824 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); 8825 STRLEN soff = s - SvPVX(PL_linestr); 8826 s = peekspace(s); 8827 arrow = *s == '=' && s[1] == '>'; 8828 PL_bufptr = SvPVX(PL_linestr) + bufoff; 8829 s = SvPVX(PL_linestr) + soff; 8830 if (arrow) 8831 return yyl_fatcomma(aTHX_ s, len); 8832 } 8833 8834 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c); 8835 } 8836 8837 static int 8838 yyl_try(pTHX_ char *s) 8839 { 8840 char *d; 8841 GV *gv = NULL; 8842 int tok; 8843 8844 retry: 8845 switch (*s) { 8846 default: 8847 if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) { 8848 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 8849 return tok; 8850 goto retry_bufptr; 8851 } 8852 yyl_croak_unrecognised(aTHX_ s); 8853 8854 case 4: 8855 case 26: 8856 /* emulate EOF on ^D or ^Z */ 8857 if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY) 8858 return tok; 8859 retry_bufptr: 8860 s = PL_bufptr; 8861 goto retry; 8862 8863 case 0: 8864 if ((!PL_rsfp || PL_lex_inwhat) 8865 && (!PL_parser->filtered || s+1 < PL_bufend)) { 8866 PL_last_uni = 0; 8867 PL_last_lop = 0; 8868 if (PL_lex_brackets 8869 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) 8870 { 8871 yyerror((const char *) 8872 (PL_lex_formbrack 8873 ? "Format not terminated" 8874 : "Missing right curly or square bracket")); 8875 } 8876 DEBUG_T({ 8877 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n"); 8878 }); 8879 TOKEN(0); 8880 } 8881 if (s++ < PL_bufend) 8882 goto retry; /* ignore stray nulls */ 8883 PL_last_uni = 0; 8884 PL_last_lop = 0; 8885 if (!PL_in_eval && !PL_preambled) { 8886 PL_preambled = TRUE; 8887 if (PL_perldb) { 8888 /* Generate a string of Perl code to load the debugger. 8889 * If PERL5DB is set, it will return the contents of that, 8890 * otherwise a compile-time require of perl5db.pl. */ 8891 8892 const char * const pdb = PerlEnv_getenv("PERL5DB"); 8893 8894 if (pdb) { 8895 sv_setpv(PL_linestr, pdb); 8896 sv_catpvs(PL_linestr,";"); 8897 } else { 8898 SETERRNO(0,SS_NORMAL); 8899 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };"); 8900 } 8901 PL_parser->preambling = CopLINE(PL_curcop); 8902 } else 8903 SvPVCLEAR(PL_linestr); 8904 if (PL_preambleav) { 8905 SV **svp = AvARRAY(PL_preambleav); 8906 SV **const end = svp + AvFILLp(PL_preambleav); 8907 while(svp <= end) { 8908 sv_catsv(PL_linestr, *svp); 8909 ++svp; 8910 sv_catpvs(PL_linestr, ";"); 8911 } 8912 sv_free(MUTABLE_SV(PL_preambleav)); 8913 PL_preambleav = NULL; 8914 } 8915 if (PL_minus_E) 8916 sv_catpvs(PL_linestr, 8917 "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';"); 8918 if (PL_minus_n || PL_minus_p) { 8919 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/); 8920 if (PL_minus_l) 8921 sv_catpvs(PL_linestr,"chomp;"); 8922 if (PL_minus_a) { 8923 if (PL_minus_F) { 8924 if ( ( *PL_splitstr == '/' 8925 || *PL_splitstr == '\'' 8926 || *PL_splitstr == '"') 8927 && strchr(PL_splitstr + 1, *PL_splitstr)) 8928 { 8929 /* strchr is ok, because -F pattern can't contain 8930 * embeddded NULs */ 8931 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); 8932 } 8933 else { 8934 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL 8935 bytes can be used as quoting characters. :-) */ 8936 const char *splits = PL_splitstr; 8937 sv_catpvs(PL_linestr, "our @F=split(q\0"); 8938 do { 8939 /* Need to \ \s */ 8940 if (*splits == '\\') 8941 sv_catpvn(PL_linestr, splits, 1); 8942 sv_catpvn(PL_linestr, splits, 1); 8943 } while (*splits++); 8944 /* This loop will embed the trailing NUL of 8945 PL_linestr as the last thing it does before 8946 terminating. */ 8947 sv_catpvs(PL_linestr, ");"); 8948 } 8949 } 8950 else 8951 sv_catpvs(PL_linestr,"our @F=split(' ');"); 8952 } 8953 } 8954 sv_catpvs(PL_linestr, "\n"); 8955 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 8956 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 8957 PL_last_lop = PL_last_uni = NULL; 8958 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash) 8959 update_debugger_info(PL_linestr, NULL, 0); 8960 goto retry; 8961 } 8962 if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY) 8963 return tok; 8964 goto retry_bufptr; 8965 8966 case '\r': 8967 #ifdef PERL_STRICT_CR 8968 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); 8969 Perl_croak(aTHX_ 8970 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); 8971 #endif 8972 case ' ': case '\t': case '\f': case '\v': 8973 s++; 8974 goto retry; 8975 8976 case '#': 8977 case '\n': { 8978 const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s); 8979 if (needs_semicolon) 8980 TOKEN(PERLY_SEMICOLON); 8981 else 8982 goto retry; 8983 } 8984 8985 case '-': 8986 return yyl_hyphen(aTHX_ s); 8987 8988 case '+': 8989 return yyl_plus(aTHX_ s); 8990 8991 case '*': 8992 return yyl_star(aTHX_ s); 8993 8994 case '%': 8995 return yyl_percent(aTHX_ s); 8996 8997 case '^': 8998 return yyl_caret(aTHX_ s); 8999 9000 case '[': 9001 return yyl_leftsquare(aTHX_ s); 9002 9003 case '~': 9004 return yyl_tilde(aTHX_ s); 9005 9006 case ',': 9007 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 9008 TOKEN(0); 9009 s++; 9010 OPERATOR(PERLY_COMMA); 9011 case ':': 9012 if (s[1] == ':') 9013 return yyl_just_a_word(aTHX_ s, 0, 0, no_code); 9014 return yyl_colon(aTHX_ s + 1); 9015 9016 case '(': 9017 return yyl_leftparen(aTHX_ s + 1); 9018 9019 case ';': 9020 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 9021 TOKEN(0); 9022 CLINE; 9023 s++; 9024 PL_expect = XSTATE; 9025 TOKEN(PERLY_SEMICOLON); 9026 9027 case ')': 9028 return yyl_rightparen(aTHX_ s); 9029 9030 case ']': 9031 return yyl_rightsquare(aTHX_ s); 9032 9033 case '{': 9034 return yyl_leftcurly(aTHX_ s + 1, 0); 9035 9036 case '}': 9037 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 9038 TOKEN(0); 9039 return yyl_rightcurly(aTHX_ s, 0); 9040 9041 case '&': 9042 return yyl_ampersand(aTHX_ s); 9043 9044 case '|': 9045 return yyl_verticalbar(aTHX_ s); 9046 9047 case '=': 9048 if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n') 9049 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "=====")) 9050 { 9051 s = vcs_conflict_marker(s + 7); 9052 goto retry; 9053 } 9054 9055 s++; 9056 { 9057 const char tmp = *s++; 9058 if (tmp == '=') { 9059 if (!PL_lex_allbrackets 9060 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 9061 { 9062 s -= 2; 9063 TOKEN(0); 9064 } 9065 ChEop(OP_EQ); 9066 } 9067 if (tmp == '>') { 9068 if (!PL_lex_allbrackets 9069 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 9070 { 9071 s -= 2; 9072 TOKEN(0); 9073 } 9074 OPERATOR(PERLY_COMMA); 9075 } 9076 if (tmp == '~') 9077 PMop(OP_MATCH); 9078 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX) 9079 && memCHRs("+-*/%.^&|<",tmp)) 9080 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 9081 "Reversed %c= operator",(int)tmp); 9082 s--; 9083 if (PL_expect == XSTATE 9084 && isALPHA(tmp) 9085 && (s == PL_linestart+1 || s[-2] == '\n') ) 9086 { 9087 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered) 9088 || PL_lex_state != LEX_NORMAL) 9089 { 9090 d = PL_bufend; 9091 while (s < d) { 9092 if (*s++ == '\n') { 9093 incline(s, PL_bufend); 9094 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut")) 9095 { 9096 s = (char *) memchr(s,'\n', d - s); 9097 if (s) 9098 s++; 9099 else 9100 s = d; 9101 incline(s, PL_bufend); 9102 goto retry; 9103 } 9104 } 9105 } 9106 goto retry; 9107 } 9108 s = PL_bufend; 9109 PL_parser->in_pod = 1; 9110 goto retry; 9111 } 9112 } 9113 if (PL_expect == XBLOCK) { 9114 const char *t = s; 9115 #ifdef PERL_STRICT_CR 9116 while (SPACE_OR_TAB(*t)) 9117 #else 9118 while (SPACE_OR_TAB(*t) || *t == '\r') 9119 #endif 9120 t++; 9121 if (*t == '\n' || *t == '#') { 9122 ENTER_with_name("lex_format"); 9123 SAVEI8(PL_parser->form_lex_state); 9124 SAVEI32(PL_lex_formbrack); 9125 PL_parser->form_lex_state = PL_lex_state; 9126 PL_lex_formbrack = PL_lex_brackets + 1; 9127 PL_parser->sub_error_count = PL_error_count; 9128 return yyl_leftcurly(aTHX_ s, 1); 9129 } 9130 } 9131 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 9132 s--; 9133 TOKEN(0); 9134 } 9135 pl_yylval.ival = 0; 9136 OPERATOR(ASSIGNOP); 9137 9138 case '!': 9139 return yyl_bang(aTHX_ s + 1); 9140 9141 case '<': 9142 if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n') 9143 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<")) 9144 { 9145 s = vcs_conflict_marker(s + 7); 9146 goto retry; 9147 } 9148 return yyl_leftpointy(aTHX_ s); 9149 9150 case '>': 9151 if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n') 9152 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>")) 9153 { 9154 s = vcs_conflict_marker(s + 7); 9155 goto retry; 9156 } 9157 return yyl_rightpointy(aTHX_ s + 1); 9158 9159 case '$': 9160 return yyl_dollar(aTHX_ s); 9161 9162 case '@': 9163 return yyl_snail(aTHX_ s); 9164 9165 case '/': /* may be division, defined-or, or pattern */ 9166 return yyl_slash(aTHX_ s); 9167 9168 case '?': /* conditional */ 9169 s++; 9170 if (!PL_lex_allbrackets 9171 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) 9172 { 9173 s--; 9174 TOKEN(0); 9175 } 9176 PL_lex_allbrackets++; 9177 OPERATOR(PERLY_QUESTION_MARK); 9178 9179 case '.': 9180 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack 9181 #ifdef PERL_STRICT_CR 9182 && s[1] == '\n' 9183 #else 9184 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) 9185 #endif 9186 && (s == PL_linestart || s[-1] == '\n') ) 9187 { 9188 PL_expect = XSTATE; 9189 /* formbrack==2 means dot seen where arguments expected */ 9190 return yyl_rightcurly(aTHX_ s, 2); 9191 } 9192 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { 9193 s += 3; 9194 OPERATOR(YADAYADA); 9195 } 9196 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { 9197 char tmp = *s++; 9198 if (*s == tmp) { 9199 if (!PL_lex_allbrackets 9200 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) 9201 { 9202 s--; 9203 TOKEN(0); 9204 } 9205 s++; 9206 if (*s == tmp) { 9207 s++; 9208 pl_yylval.ival = OPf_SPECIAL; 9209 } 9210 else 9211 pl_yylval.ival = 0; 9212 OPERATOR(DOTDOT); 9213 } 9214 if (*s == '=' && !PL_lex_allbrackets 9215 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 9216 { 9217 s--; 9218 TOKEN(0); 9219 } 9220 Aop(OP_CONCAT); 9221 } 9222 /* FALLTHROUGH */ 9223 case '0': case '1': case '2': case '3': case '4': 9224 case '5': case '6': case '7': case '8': case '9': 9225 s = scan_num(s, &pl_yylval); 9226 DEBUG_T( { printbuf("### Saw number in %s\n", s); } ); 9227 if (PL_expect == XOPERATOR) 9228 no_op("Number",s); 9229 TERM(THING); 9230 9231 case '\'': 9232 return yyl_sglquote(aTHX_ s); 9233 9234 case '"': 9235 return yyl_dblquote(aTHX_ s); 9236 9237 case '`': 9238 return yyl_backtick(aTHX_ s); 9239 9240 case '\\': 9241 return yyl_backslash(aTHX_ s + 1); 9242 9243 case 'v': 9244 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { 9245 char *start = s + 2; 9246 while (isDIGIT(*start) || *start == '_') 9247 start++; 9248 if (*start == '.' && isDIGIT(start[1])) { 9249 s = scan_num(s, &pl_yylval); 9250 TERM(THING); 9251 } 9252 else if ((*start == ':' && start[1] == ':') 9253 || (PL_expect == XSTATE && *start == ':')) { 9254 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9255 return tok; 9256 goto retry_bufptr; 9257 } 9258 else if (PL_expect == XSTATE) { 9259 d = start; 9260 while (d < PL_bufend && isSPACE(*d)) d++; 9261 if (*d == ':') { 9262 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9263 return tok; 9264 goto retry_bufptr; 9265 } 9266 } 9267 /* avoid v123abc() or $h{v1}, allow C<print v10;> */ 9268 if (!isALPHA(*start) && (PL_expect == XTERM 9269 || PL_expect == XREF || PL_expect == XSTATE 9270 || PL_expect == XTERMORDORDOR)) { 9271 GV *const gv = gv_fetchpvn_flags(s, start - s, 9272 UTF ? SVf_UTF8 : 0, SVt_PVCV); 9273 if (!gv) { 9274 s = scan_num(s, &pl_yylval); 9275 TERM(THING); 9276 } 9277 } 9278 } 9279 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9280 return tok; 9281 goto retry_bufptr; 9282 9283 case 'x': 9284 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { 9285 s++; 9286 Mop(OP_REPEAT); 9287 } 9288 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9289 return tok; 9290 goto retry_bufptr; 9291 9292 case '_': 9293 case 'a': case 'A': 9294 case 'b': case 'B': 9295 case 'c': case 'C': 9296 case 'd': case 'D': 9297 case 'e': case 'E': 9298 case 'f': case 'F': 9299 case 'g': case 'G': 9300 case 'h': case 'H': 9301 case 'i': case 'I': 9302 case 'j': case 'J': 9303 case 'k': case 'K': 9304 case 'l': case 'L': 9305 case 'm': case 'M': 9306 case 'n': case 'N': 9307 case 'o': case 'O': 9308 case 'p': case 'P': 9309 case 'q': case 'Q': 9310 case 'r': case 'R': 9311 case 's': case 'S': 9312 case 't': case 'T': 9313 case 'u': case 'U': 9314 case 'V': 9315 case 'w': case 'W': 9316 case 'X': 9317 case 'y': case 'Y': 9318 case 'z': case 'Z': 9319 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9320 return tok; 9321 goto retry_bufptr; 9322 } 9323 } 9324 9325 9326 /* 9327 yylex 9328 9329 Works out what to call the token just pulled out of the input 9330 stream. The yacc parser takes care of taking the ops we return and 9331 stitching them into a tree. 9332 9333 Returns: 9334 The type of the next token 9335 9336 Structure: 9337 Check if we have already built the token; if so, use it. 9338 Switch based on the current state: 9339 - if we have a case modifier in a string, deal with that 9340 - handle other cases of interpolation inside a string 9341 - scan the next line if we are inside a format 9342 In the normal state, switch on the next character: 9343 - default: 9344 if alphabetic, go to key lookup 9345 unrecognized character - croak 9346 - 0/4/26: handle end-of-line or EOF 9347 - cases for whitespace 9348 - \n and #: handle comments and line numbers 9349 - various operators, brackets and sigils 9350 - numbers 9351 - quotes 9352 - 'v': vstrings (or go to key lookup) 9353 - 'x' repetition operator (or go to key lookup) 9354 - other ASCII alphanumerics (key lookup begins here): 9355 word before => ? 9356 keyword plugin 9357 scan built-in keyword (but do nothing with it yet) 9358 check for statement label 9359 check for lexical subs 9360 return yyl_just_a_word if there is one 9361 see whether built-in keyword is overridden 9362 switch on keyword number: 9363 - default: return yyl_just_a_word: 9364 not a built-in keyword; handle bareword lookup 9365 disambiguate between method and sub call 9366 fall back to bareword 9367 - cases for built-in keywords 9368 */ 9369 9370 int 9371 Perl_yylex(pTHX) 9372 { 9373 char *s = PL_bufptr; 9374 9375 if (UNLIKELY(PL_parser->recheck_utf8_validity)) { 9376 const U8* first_bad_char_loc; 9377 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr, 9378 PL_bufend - PL_bufptr, 9379 &first_bad_char_loc))) 9380 { 9381 _force_out_malformed_utf8_message(first_bad_char_loc, 9382 (U8 *) PL_bufend, 9383 0, 9384 1 /* 1 means die */ ); 9385 NOT_REACHED; /* NOTREACHED */ 9386 } 9387 PL_parser->recheck_utf8_validity = FALSE; 9388 } 9389 DEBUG_T( { 9390 SV* tmp = newSVpvs(""); 9391 PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n", 9392 (IV)CopLINE(PL_curcop), 9393 lex_state_names[PL_lex_state], 9394 exp_name[PL_expect], 9395 pv_display(tmp, s, strlen(s), 0, 60)); 9396 SvREFCNT_dec(tmp); 9397 } ); 9398 9399 /* when we've already built the next token, just pull it out of the queue */ 9400 if (PL_nexttoke) { 9401 PL_nexttoke--; 9402 pl_yylval = PL_nextval[PL_nexttoke]; 9403 { 9404 I32 next_type; 9405 next_type = PL_nexttype[PL_nexttoke]; 9406 if (next_type & (7<<24)) { 9407 if (next_type & (1<<24)) { 9408 if (PL_lex_brackets > 100) 9409 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 9410 PL_lex_brackstack[PL_lex_brackets++] = 9411 (char) ((U8) (next_type >> 16)); 9412 } 9413 if (next_type & (2<<24)) 9414 PL_lex_allbrackets++; 9415 if (next_type & (4<<24)) 9416 PL_lex_allbrackets--; 9417 next_type &= 0xffff; 9418 } 9419 return REPORT(next_type == 'p' ? pending_ident() : next_type); 9420 } 9421 } 9422 9423 switch (PL_lex_state) { 9424 case LEX_NORMAL: 9425 case LEX_INTERPNORMAL: 9426 break; 9427 9428 /* interpolated case modifiers like \L \U, including \Q and \E. 9429 when we get here, PL_bufptr is at the \ 9430 */ 9431 case LEX_INTERPCASEMOD: 9432 /* handle \E or end of string */ 9433 return yyl_interpcasemod(aTHX_ s); 9434 9435 case LEX_INTERPPUSH: 9436 return REPORT(sublex_push()); 9437 9438 case LEX_INTERPSTART: 9439 if (PL_bufptr == PL_bufend) 9440 return REPORT(sublex_done()); 9441 DEBUG_T({ 9442 if(*PL_bufptr != '(') 9443 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n"); 9444 }); 9445 PL_expect = XTERM; 9446 /* for /@a/, we leave the joining for the regex engine to do 9447 * (unless we're within \Q etc) */ 9448 PL_lex_dojoin = (*PL_bufptr == '@' 9449 && (!PL_lex_inpat || PL_lex_casemods)); 9450 PL_lex_state = LEX_INTERPNORMAL; 9451 if (PL_lex_dojoin) { 9452 NEXTVAL_NEXTTOKE.ival = 0; 9453 force_next(PERLY_COMMA); 9454 force_ident("\"", PERLY_DOLLAR); 9455 NEXTVAL_NEXTTOKE.ival = 0; 9456 force_next(PERLY_DOLLAR); 9457 NEXTVAL_NEXTTOKE.ival = 0; 9458 force_next((2<<24)|PERLY_PAREN_OPEN); 9459 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */ 9460 force_next(FUNC); 9461 } 9462 /* Convert (?{...}) and friends to 'do {...}' */ 9463 if (PL_lex_inpat && *PL_bufptr == '(') { 9464 PL_parser->lex_shared->re_eval_start = PL_bufptr; 9465 PL_bufptr += 2; 9466 if (*PL_bufptr != '{') 9467 PL_bufptr++; 9468 PL_expect = XTERMBLOCK; 9469 force_next(DO); 9470 } 9471 9472 if (PL_lex_starts++) { 9473 s = PL_bufptr; 9474 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 9475 if (!PL_lex_casemods && PL_lex_inpat) 9476 TOKEN(PERLY_COMMA); 9477 else 9478 AopNOASSIGN(OP_CONCAT); 9479 } 9480 return yylex(); 9481 9482 case LEX_INTERPENDMAYBE: 9483 if (intuit_more(PL_bufptr, PL_bufend)) { 9484 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ 9485 break; 9486 } 9487 /* FALLTHROUGH */ 9488 9489 case LEX_INTERPEND: 9490 if (PL_lex_dojoin) { 9491 const U8 dojoin_was = PL_lex_dojoin; 9492 PL_lex_dojoin = FALSE; 9493 PL_lex_state = LEX_INTERPCONCAT; 9494 PL_lex_allbrackets--; 9495 return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN); 9496 } 9497 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl 9498 && SvEVALED(PL_lex_repl)) 9499 { 9500 if (PL_bufptr != PL_bufend) 9501 Perl_croak(aTHX_ "Bad evalled substitution pattern"); 9502 PL_lex_repl = NULL; 9503 } 9504 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets 9505 re_eval_str. If the here-doc body's length equals the previous 9506 value of re_eval_start, re_eval_start will now be null. So 9507 check re_eval_str as well. */ 9508 if (PL_parser->lex_shared->re_eval_start 9509 || PL_parser->lex_shared->re_eval_str) { 9510 SV *sv; 9511 if (*PL_bufptr != ')') 9512 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'"); 9513 PL_bufptr++; 9514 /* having compiled a (?{..}) expression, return the original 9515 * text too, as a const */ 9516 if (PL_parser->lex_shared->re_eval_str) { 9517 sv = PL_parser->lex_shared->re_eval_str; 9518 PL_parser->lex_shared->re_eval_str = NULL; 9519 SvCUR_set(sv, 9520 PL_bufptr - PL_parser->lex_shared->re_eval_start); 9521 SvPV_shrink_to_cur(sv); 9522 } 9523 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start, 9524 PL_bufptr - PL_parser->lex_shared->re_eval_start); 9525 NEXTVAL_NEXTTOKE.opval = 9526 newSVOP(OP_CONST, 0, 9527 sv); 9528 force_next(THING); 9529 PL_parser->lex_shared->re_eval_start = NULL; 9530 PL_expect = XTERM; 9531 return REPORT(PERLY_COMMA); 9532 } 9533 9534 /* FALLTHROUGH */ 9535 case LEX_INTERPCONCAT: 9536 #ifdef DEBUGGING 9537 if (PL_lex_brackets) 9538 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld", 9539 (long) PL_lex_brackets); 9540 #endif 9541 if (PL_bufptr == PL_bufend) 9542 return REPORT(sublex_done()); 9543 9544 /* m'foo' still needs to be parsed for possible (?{...}) */ 9545 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) { 9546 SV *sv = newSVsv(PL_linestr); 9547 sv = tokeq(sv); 9548 pl_yylval.opval = newSVOP(OP_CONST, 0, sv); 9549 s = PL_bufend; 9550 } 9551 else { 9552 int save_error_count = PL_error_count; 9553 9554 s = scan_const(PL_bufptr); 9555 9556 /* Set flag if this was a pattern and there were errors. op.c will 9557 * refuse to compile a pattern with this flag set. Otherwise, we 9558 * could get segfaults, etc. */ 9559 if (PL_lex_inpat && PL_error_count > save_error_count) { 9560 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR; 9561 } 9562 if (*s == '\\') 9563 PL_lex_state = LEX_INTERPCASEMOD; 9564 else 9565 PL_lex_state = LEX_INTERPSTART; 9566 } 9567 9568 if (s != PL_bufptr) { 9569 NEXTVAL_NEXTTOKE = pl_yylval; 9570 PL_expect = XTERM; 9571 force_next(THING); 9572 if (PL_lex_starts++) { 9573 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 9574 if (!PL_lex_casemods && PL_lex_inpat) 9575 TOKEN(PERLY_COMMA); 9576 else 9577 AopNOASSIGN(OP_CONCAT); 9578 } 9579 else { 9580 PL_bufptr = s; 9581 return yylex(); 9582 } 9583 } 9584 9585 return yylex(); 9586 case LEX_FORMLINE: 9587 if (PL_parser->sub_error_count != PL_error_count) { 9588 /* There was an error parsing a formline, which tends to 9589 mess up the parser. 9590 Unlike interpolated sub-parsing, we can't treat any of 9591 these as recoverable, so no need to check sub_no_recover. 9592 */ 9593 yyquit(); 9594 } 9595 assert(PL_lex_formbrack); 9596 s = scan_formline(PL_bufptr); 9597 if (!PL_lex_formbrack) 9598 return yyl_rightcurly(aTHX_ s, 1); 9599 PL_bufptr = s; 9600 return yylex(); 9601 } 9602 9603 /* We really do *not* want PL_linestr ever becoming a COW. */ 9604 assert (!SvIsCOW(PL_linestr)); 9605 s = PL_bufptr; 9606 PL_oldoldbufptr = PL_oldbufptr; 9607 PL_oldbufptr = s; 9608 9609 if (PL_in_my == KEY_sigvar) { 9610 PL_parser->saw_infix_sigil = 0; 9611 return yyl_sigvar(aTHX_ s); 9612 } 9613 9614 { 9615 /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil. 9616 On its return, we then need to set it to indicate whether the token 9617 we just encountered was an infix operator that (if we hadn't been 9618 expecting an operator) have been a sigil. 9619 */ 9620 bool expected_operator = (PL_expect == XOPERATOR); 9621 int ret = yyl_try(aTHX_ s); 9622 switch (pl_yylval.ival) { 9623 case OP_BIT_AND: 9624 case OP_MODULO: 9625 case OP_MULTIPLY: 9626 case OP_NBIT_AND: 9627 if (expected_operator) { 9628 PL_parser->saw_infix_sigil = 1; 9629 break; 9630 } 9631 /* FALLTHROUGH */ 9632 default: 9633 PL_parser->saw_infix_sigil = 0; 9634 } 9635 return ret; 9636 } 9637 } 9638 9639 9640 /* 9641 S_pending_ident 9642 9643 Looks up an identifier in the pad or in a package 9644 9645 PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable 9646 rather than a plain pad var. 9647 9648 Returns: 9649 PRIVATEREF if this is a lexical name. 9650 BAREWORD if this belongs to a package. 9651 9652 Structure: 9653 if we're in a my declaration 9654 croak if they tried to say my($foo::bar) 9655 build the ops for a my() declaration 9656 if it's an access to a my() variable 9657 build ops for access to a my() variable 9658 if in a dq string, and they've said @foo and we can't find @foo 9659 warn 9660 build ops for a bareword 9661 */ 9662 9663 static int 9664 S_pending_ident(pTHX) 9665 { 9666 PADOFFSET tmp = 0; 9667 const char pit = (char)pl_yylval.ival; 9668 const STRLEN tokenbuf_len = strlen(PL_tokenbuf); 9669 /* All routes through this function want to know if there is a colon. */ 9670 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len); 9671 9672 DEBUG_T({ PerlIO_printf(Perl_debug_log, 9673 "### Pending identifier '%s'\n", PL_tokenbuf); }); 9674 assert(tokenbuf_len >= 2); 9675 9676 /* if we're in a my(), we can't allow dynamics here. 9677 $foo'bar has already been turned into $foo::bar, so 9678 just check for colons. 9679 9680 if it's a legal name, the OP is a PADANY. 9681 */ 9682 if (PL_in_my) { 9683 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ 9684 if (has_colon) 9685 /* diag_listed_as: No package name allowed for variable %s 9686 in "our" */ 9687 yyerror_pv(Perl_form(aTHX_ "No package name allowed for " 9688 "%s %s in \"our\"", 9689 *PL_tokenbuf=='&' ? "subroutine" : "variable", 9690 PL_tokenbuf), UTF ? SVf_UTF8 : 0); 9691 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0); 9692 } 9693 else { 9694 OP *o; 9695 if (has_colon) { 9696 /* "my" variable %s can't be in a package */ 9697 /* PL_no_myglob is constant */ 9698 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); 9699 yyerror_pv(Perl_form(aTHX_ PL_no_myglob, 9700 PL_in_my == KEY_my ? "my" : "state", 9701 *PL_tokenbuf == '&' ? "subroutine" : "variable", 9702 PL_tokenbuf), 9703 UTF ? SVf_UTF8 : 0); 9704 GCC_DIAG_RESTORE_STMT; 9705 } 9706 9707 if (PL_in_my == KEY_sigvar) { 9708 /* A signature 'padop' needs in addition, an op_first to 9709 * point to a child sigdefelem, and an extra field to hold 9710 * the signature index. We can achieve both by using an 9711 * UNOP_AUX and (ab)using the op_aux field to hold the 9712 * index. If we ever need more fields, use a real malloced 9713 * aux strut instead. 9714 */ 9715 o = newUNOP_AUX(OP_ARGELEM, 0, NULL, 9716 INT2PTR(UNOP_AUX_item *, 9717 (PL_parser->sig_elems))); 9718 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV 9719 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV 9720 : OPpARGELEM_HV); 9721 } 9722 else 9723 o = newOP(OP_PADANY, 0); 9724 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 9725 UTF ? SVf_UTF8 : 0); 9726 if (PL_in_my == KEY_sigvar) 9727 PL_in_my = 0; 9728 9729 pl_yylval.opval = o; 9730 return PRIVATEREF; 9731 } 9732 } 9733 9734 /* 9735 build the ops for accesses to a my() variable. 9736 */ 9737 9738 if (!has_colon) { 9739 if (!PL_in_my) 9740 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len, 9741 0); 9742 if (tmp != NOT_IN_PAD) { 9743 /* might be an "our" variable" */ 9744 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 9745 /* build ops for a bareword */ 9746 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 9747 HEK * const stashname = HvNAME_HEK(stash); 9748 SV * const sym = newSVhek(stashname); 9749 sv_catpvs(sym, "::"); 9750 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES )); 9751 pl_yylval.opval = newSVOP(OP_CONST, 0, sym); 9752 pl_yylval.opval->op_private = OPpCONST_ENTERED; 9753 if (pit != '&') 9754 gv_fetchsv(sym, 9755 GV_ADDMULTI, 9756 ((PL_tokenbuf[0] == '$') ? SVt_PV 9757 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 9758 : SVt_PVHV)); 9759 return BAREWORD; 9760 } 9761 9762 pl_yylval.opval = newOP(OP_PADANY, 0); 9763 pl_yylval.opval->op_targ = tmp; 9764 return PRIVATEREF; 9765 } 9766 } 9767 9768 /* 9769 Whine if they've said @foo or @foo{key} in a doublequoted string, 9770 and @foo (or %foo) isn't a variable we can find in the symbol 9771 table. 9772 */ 9773 if (ckWARN(WARN_AMBIGUOUS) 9774 && pit == '@' 9775 && PL_lex_state != LEX_NORMAL 9776 && !PL_lex_brackets) 9777 { 9778 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, 9779 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG, 9780 SVt_PVAV); 9781 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) 9782 ) 9783 { 9784 /* Downgraded from fatal to warning 20000522 mjd */ 9785 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9786 "Possible unintended interpolation of %" UTF8f 9787 " in string", 9788 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf)); 9789 } 9790 } 9791 9792 /* build ops for a bareword */ 9793 pl_yylval.opval = newSVOP(OP_CONST, 0, 9794 newSVpvn_flags(PL_tokenbuf + 1, 9795 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, 9796 UTF ? SVf_UTF8 : 0 )); 9797 pl_yylval.opval->op_private = OPpCONST_ENTERED; 9798 if (pit != '&') 9799 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, 9800 (PL_in_eval ? GV_ADDMULTI : GV_ADD) 9801 | ( UTF ? SVf_UTF8 : 0 ), 9802 ((PL_tokenbuf[0] == '$') ? SVt_PV 9803 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 9804 : SVt_PVHV)); 9805 return BAREWORD; 9806 } 9807 9808 STATIC void 9809 S_checkcomma(pTHX_ const char *s, const char *name, const char *what) 9810 { 9811 PERL_ARGS_ASSERT_CHECKCOMMA; 9812 9813 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ 9814 if (ckWARN(WARN_SYNTAX)) { 9815 int level = 1; 9816 const char *w; 9817 for (w = s+2; *w && level; w++) { 9818 if (*w == '(') 9819 ++level; 9820 else if (*w == ')') 9821 --level; 9822 } 9823 while (isSPACE(*w)) 9824 ++w; 9825 /* the list of chars below is for end of statements or 9826 * block / parens, boolean operators (&&, ||, //) and branch 9827 * constructs (or, and, if, until, unless, while, err, for). 9828 * Not a very solid hack... */ 9829 if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w)) 9830 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 9831 "%s (...) interpreted as function",name); 9832 } 9833 } 9834 while (s < PL_bufend && isSPACE(*s)) 9835 s++; 9836 if (*s == '(') 9837 s++; 9838 while (s < PL_bufend && isSPACE(*s)) 9839 s++; 9840 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 9841 const char * const w = s; 9842 s += UTF ? UTF8SKIP(s) : 1; 9843 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)) 9844 s += UTF ? UTF8SKIP(s) : 1; 9845 while (s < PL_bufend && isSPACE(*s)) 9846 s++; 9847 if (*s == ',') { 9848 GV* gv; 9849 if (keyword(w, s - w, 0)) 9850 return; 9851 9852 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); 9853 if (gv && GvCVu(gv)) 9854 return; 9855 if (s - w <= 254) { 9856 PADOFFSET off; 9857 char tmpbuf[256]; 9858 Copy(w, tmpbuf+1, s - w, char); 9859 *tmpbuf = '&'; 9860 off = pad_findmy_pvn(tmpbuf, s-w+1, 0); 9861 if (off != NOT_IN_PAD) return; 9862 } 9863 Perl_croak(aTHX_ "No comma allowed after %s", what); 9864 } 9865 } 9866 } 9867 9868 /* S_new_constant(): do any overload::constant lookup. 9869 9870 Either returns sv, or mortalizes/frees sv and returns a new SV*. 9871 Best used as sv=new_constant(..., sv, ...). 9872 If s, pv are NULL, calls subroutine with one argument, 9873 and <type> is used with error messages only. 9874 <type> is assumed to be well formed UTF-8. 9875 9876 If error_msg is not NULL, *error_msg will be set to any error encountered. 9877 Otherwise yyerror() will be used to output it */ 9878 9879 STATIC SV * 9880 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, 9881 SV *sv, SV *pv, const char *type, STRLEN typelen, 9882 const char ** error_msg) 9883 { 9884 dSP; 9885 HV * table = GvHV(PL_hintgv); /* ^H */ 9886 SV *res; 9887 SV *errsv = NULL; 9888 SV **cvp; 9889 SV *cv, *typesv; 9890 const char *why1 = "", *why2 = "", *why3 = ""; 9891 const char * optional_colon = ":"; /* Only some messages have a colon */ 9892 char *msg; 9893 9894 PERL_ARGS_ASSERT_NEW_CONSTANT; 9895 /* We assume that this is true: */ 9896 assert(type || s); 9897 9898 sv_2mortal(sv); /* Parent created it permanently */ 9899 9900 if ( ! table 9901 || ! (PL_hints & HINT_LOCALIZE_HH)) 9902 { 9903 why1 = "unknown"; 9904 optional_colon = ""; 9905 goto report; 9906 } 9907 9908 cvp = hv_fetch(table, key, keylen, FALSE); 9909 if (!cvp || !SvOK(*cvp)) { 9910 why1 = "$^H{"; 9911 why2 = key; 9912 why3 = "} is not defined"; 9913 goto report; 9914 } 9915 9916 cv = *cvp; 9917 if (!pv && s) 9918 pv = newSVpvn_flags(s, len, SVs_TEMP); 9919 if (type && pv) 9920 typesv = newSVpvn_flags(type, typelen, SVs_TEMP); 9921 else 9922 typesv = &PL_sv_undef; 9923 9924 PUSHSTACKi(PERLSI_OVERLOAD); 9925 ENTER ; 9926 SAVETMPS; 9927 9928 PUSHMARK(SP) ; 9929 EXTEND(sp, 3); 9930 if (pv) 9931 PUSHs(pv); 9932 PUSHs(sv); 9933 if (pv) 9934 PUSHs(typesv); 9935 PUTBACK; 9936 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); 9937 9938 SPAGAIN ; 9939 9940 /* Check the eval first */ 9941 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) { 9942 STRLEN errlen; 9943 const char * errstr; 9944 sv_catpvs(errsv, "Propagated"); 9945 errstr = SvPV_const(errsv, errlen); 9946 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */ 9947 (void)POPs; 9948 res = SvREFCNT_inc_simple_NN(sv); 9949 } 9950 else { 9951 res = POPs; 9952 SvREFCNT_inc_simple_void_NN(res); 9953 } 9954 9955 PUTBACK ; 9956 FREETMPS ; 9957 LEAVE ; 9958 POPSTACK; 9959 9960 if (SvOK(res)) { 9961 return res; 9962 } 9963 9964 sv = res; 9965 (void)sv_2mortal(sv); 9966 9967 why1 = "Call to &{$^H{"; 9968 why2 = key; 9969 why3 = "}} did not return a defined value"; 9970 9971 report: 9972 9973 msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s", 9974 (int)(type ? typelen : len), 9975 (type ? type: s), 9976 optional_colon, 9977 why1, why2, why3); 9978 if (error_msg) { 9979 *error_msg = msg; 9980 } 9981 else { 9982 yyerror_pv(msg, UTF ? SVf_UTF8 : 0); 9983 } 9984 return SvREFCNT_inc_simple_NN(sv); 9985 } 9986 9987 PERL_STATIC_INLINE void 9988 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, 9989 bool is_utf8, bool check_dollar, bool tick_warn) 9990 { 9991 int saw_tick = 0; 9992 const char *olds = *s; 9993 PERL_ARGS_ASSERT_PARSE_IDENT; 9994 9995 while (*s < PL_bufend) { 9996 if (*d >= e) 9997 Perl_croak(aTHX_ "%s", ident_too_long); 9998 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) { 9999 /* The UTF-8 case must come first, otherwise things 10000 * like c\N{COMBINING TILDE} would start failing, as the 10001 * isWORDCHAR_A case below would gobble the 'c' up. 10002 */ 10003 10004 char *t = *s + UTF8SKIP(*s); 10005 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) { 10006 t += UTF8SKIP(t); 10007 } 10008 if (*d + (t - *s) > e) 10009 Perl_croak(aTHX_ "%s", ident_too_long); 10010 Copy(*s, *d, t - *s, char); 10011 *d += t - *s; 10012 *s = t; 10013 } 10014 else if ( isWORDCHAR_A(**s) ) { 10015 do { 10016 *(*d)++ = *(*s)++; 10017 } while (isWORDCHAR_A(**s) && *d < e); 10018 } 10019 else if ( allow_package 10020 && **s == '\'' 10021 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8)) 10022 { 10023 *(*d)++ = ':'; 10024 *(*d)++ = ':'; 10025 (*s)++; 10026 saw_tick++; 10027 } 10028 else if (allow_package && **s == ':' && (*s)[1] == ':' 10029 /* Disallow things like Foo::$bar. For the curious, this is 10030 * the code path that triggers the "Bad name after" warning 10031 * when looking for barewords. 10032 */ 10033 && !(check_dollar && (*s)[2] == '$')) { 10034 *(*d)++ = *(*s)++; 10035 *(*d)++ = *(*s)++; 10036 } 10037 else 10038 break; 10039 } 10040 if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL 10041 && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) { 10042 char *this_d; 10043 char *d2; 10044 Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */ 10045 d2 = this_d; 10046 SAVEFREEPV(this_d); 10047 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 10048 "Old package separator used in string"); 10049 if (olds[-1] == '#') 10050 *d2++ = olds[-2]; 10051 *d2++ = olds[-1]; 10052 while (olds < *s) { 10053 if (*olds == '\'') { 10054 *d2++ = '\\'; 10055 *d2++ = *olds++; 10056 } 10057 else 10058 *d2++ = *olds++; 10059 } 10060 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 10061 "\t(Did you mean \"%" UTF8f "\" instead?)\n", 10062 UTF8fARG(is_utf8, d2-this_d, this_d)); 10063 } 10064 return; 10065 } 10066 10067 /* Returns a NUL terminated string, with the length of the string written to 10068 *slp 10069 */ 10070 char * 10071 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) 10072 { 10073 char *d = dest; 10074 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 10075 bool is_utf8 = cBOOL(UTF); 10076 10077 PERL_ARGS_ASSERT_SCAN_WORD; 10078 10079 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE); 10080 *d = '\0'; 10081 *slp = d - dest; 10082 return s; 10083 } 10084 10085 /* Is the byte 'd' a legal single character identifier name? 'u' is true 10086 * iff Unicode semantics are to be used. The legal ones are any of: 10087 * a) all ASCII characters except: 10088 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE; 10089 * 2) '{' 10090 * The final case currently doesn't get this far in the program, so we 10091 * don't test for it. If that were to change, it would be ok to allow it. 10092 * b) When not under Unicode rules, any upper Latin1 character 10093 * c) Otherwise, when unicode rules are used, all XIDS characters. 10094 * 10095 * Because all ASCII characters have the same representation whether 10096 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and 10097 * '{' without knowing if is UTF-8 or not. */ 10098 #define VALID_LEN_ONE_IDENT(s, e, is_utf8) \ 10099 (isGRAPH_A(*(s)) || ((is_utf8) \ 10100 ? isIDFIRST_utf8_safe(s, e) \ 10101 : (isGRAPH_L1(*s) \ 10102 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD))))) 10103 10104 STATIC char * 10105 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) 10106 { 10107 I32 herelines = PL_parser->herelines; 10108 SSize_t bracket = -1; 10109 char funny = *s++; 10110 char *d = dest; 10111 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 10112 bool is_utf8 = cBOOL(UTF); 10113 I32 orig_copline = 0, tmp_copline = 0; 10114 10115 PERL_ARGS_ASSERT_SCAN_IDENT; 10116 10117 if (isSPACE(*s) || !*s) 10118 s = skipspace(s); 10119 if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */ 10120 bool is_zero= *s == '0' ? TRUE : FALSE; 10121 char *digit_start= d; 10122 *d++ = *s++; 10123 while (s < PL_bufend && isDIGIT(*s)) { 10124 if (d >= e) 10125 Perl_croak(aTHX_ "%s", ident_too_long); 10126 *d++ = *s++; 10127 } 10128 if (is_zero && d - digit_start > 1) 10129 Perl_croak(aTHX_ ident_var_zero_multi_digit); 10130 } 10131 else { /* See if it is a "normal" identifier */ 10132 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE); 10133 } 10134 *d = '\0'; 10135 d = dest; 10136 if (*d) { 10137 /* Either a digit variable, or parse_ident() found an identifier 10138 (anything valid as a bareword), so job done and return. */ 10139 if (PL_lex_state != LEX_NORMAL) 10140 PL_lex_state = LEX_INTERPENDMAYBE; 10141 return s; 10142 } 10143 10144 /* Here, it is not a run-of-the-mill identifier name */ 10145 10146 if (*s == '$' && s[1] 10147 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8) 10148 || isDIGIT_A((U8)s[1]) 10149 || s[1] == '$' 10150 || s[1] == '{' 10151 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) ) 10152 { 10153 /* Dereferencing a value in a scalar variable. 10154 The alternatives are different syntaxes for a scalar variable. 10155 Using ' as a leading package separator isn't allowed. :: is. */ 10156 return s; 10157 } 10158 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */ 10159 if (*s == '{') { 10160 bracket = s - SvPVX(PL_linestr); 10161 s++; 10162 orig_copline = CopLINE(PL_curcop); 10163 if (s < PL_bufend && isSPACE(*s)) { 10164 s = skipspace(s); 10165 } 10166 } 10167 if ((s <= PL_bufend - ((is_utf8) 10168 ? UTF8SKIP(s) 10169 : 1)) 10170 && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8)) 10171 { 10172 if (is_utf8) { 10173 const STRLEN skip = UTF8SKIP(s); 10174 STRLEN i; 10175 d[skip] = '\0'; 10176 for ( i = 0; i < skip; i++ ) 10177 d[i] = *s++; 10178 } 10179 else { 10180 *d = *s++; 10181 /* special case to handle ${10}, ${11} the same way we handle ${1} etc */ 10182 if (isDIGIT(*d)) { 10183 bool is_zero= *d == '0' ? TRUE : FALSE; 10184 char *digit_start= d; 10185 while (s < PL_bufend && isDIGIT(*s)) { 10186 d++; 10187 if (d >= e) 10188 Perl_croak(aTHX_ "%s", ident_too_long); 10189 *d= *s++; 10190 } 10191 if (is_zero && d - digit_start > 1) 10192 Perl_croak(aTHX_ ident_var_zero_multi_digit); 10193 } 10194 d[1] = '\0'; 10195 } 10196 } 10197 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */ 10198 if (*d == '^' && *s && isCONTROLVAR(*s)) { 10199 *d = toCTRL(*s); 10200 s++; 10201 } 10202 /* Warn about ambiguous code after unary operators if {...} notation isn't 10203 used. There's no difference in ambiguity; it's merely a heuristic 10204 about when not to warn. */ 10205 else if (ck_uni && bracket == -1) 10206 check_uni(); 10207 if (bracket != -1) { 10208 bool skip; 10209 char *s2; 10210 /* If we were processing {...} notation then... */ 10211 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) 10212 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ 10213 && isWORDCHAR(*s)) 10214 ) { 10215 /* note we have to check for a normal identifier first, 10216 * as it handles utf8 symbols, and only after that has 10217 * been ruled out can we look at the caret words */ 10218 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) { 10219 /* if it starts as a valid identifier, assume that it is one. 10220 (the later check for } being at the expected point will trap 10221 cases where this doesn't pan out.) */ 10222 d += is_utf8 ? UTF8SKIP(d) : 1; 10223 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE); 10224 *d = '\0'; 10225 } 10226 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */ 10227 d++; 10228 while (isWORDCHAR(*s) && d < e) { 10229 *d++ = *s++; 10230 } 10231 if (d >= e) 10232 Perl_croak(aTHX_ "%s", ident_too_long); 10233 *d = '\0'; 10234 } 10235 tmp_copline = CopLINE(PL_curcop); 10236 if (s < PL_bufend && isSPACE(*s)) { 10237 s = skipspace(s); 10238 } 10239 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { 10240 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */ 10241 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { 10242 const char * const brack = 10243 (const char *) 10244 ((*s == '[') ? "[...]" : "{...}"); 10245 orig_copline = CopLINE(PL_curcop); 10246 CopLINE_set(PL_curcop, tmp_copline); 10247 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */ 10248 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 10249 "Ambiguous use of %c{%s%s} resolved to %c%s%s", 10250 funny, dest, brack, funny, dest, brack); 10251 CopLINE_set(PL_curcop, orig_copline); 10252 } 10253 bracket++; 10254 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK); 10255 PL_lex_allbrackets++; 10256 return s; 10257 } 10258 } 10259 10260 if ( !tmp_copline ) 10261 tmp_copline = CopLINE(PL_curcop); 10262 if ((skip = s < PL_bufend && isSPACE(*s))) { 10263 /* Avoid incrementing line numbers or resetting PL_linestart, 10264 in case we have to back up. */ 10265 STRLEN s_off = s - SvPVX(PL_linestr); 10266 s2 = peekspace(s); 10267 s = SvPVX(PL_linestr) + s_off; 10268 } 10269 else 10270 s2 = s; 10271 10272 /* Expect to find a closing } after consuming any trailing whitespace. 10273 */ 10274 if (*s2 == '}') { 10275 /* Now increment line numbers if applicable. */ 10276 if (skip) 10277 s = skipspace(s); 10278 s++; 10279 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 10280 PL_lex_state = LEX_INTERPEND; 10281 PL_expect = XREF; 10282 } 10283 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) { 10284 if (ckWARN(WARN_AMBIGUOUS) 10285 && (keyword(dest, d - dest, 0) 10286 || get_cvn_flags(dest, d - dest, is_utf8 10287 ? SVf_UTF8 10288 : 0))) 10289 { 10290 SV *tmp = newSVpvn_flags( dest, d - dest, 10291 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) ); 10292 if (funny == '#') 10293 funny = '@'; 10294 orig_copline = CopLINE(PL_curcop); 10295 CopLINE_set(PL_curcop, tmp_copline); 10296 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 10297 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf, 10298 funny, SVfARG(tmp), funny, SVfARG(tmp)); 10299 CopLINE_set(PL_curcop, orig_copline); 10300 } 10301 } 10302 } 10303 else { 10304 /* Didn't find the closing } at the point we expected, so restore 10305 state such that the next thing to process is the opening { and */ 10306 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */ 10307 CopLINE_set(PL_curcop, orig_copline); 10308 PL_parser->herelines = herelines; 10309 *dest = '\0'; 10310 PL_parser->sub_no_recover = TRUE; 10311 } 10312 } 10313 else if ( PL_lex_state == LEX_INTERPNORMAL 10314 && !PL_lex_brackets 10315 && !intuit_more(s, PL_bufend)) 10316 PL_lex_state = LEX_INTERPEND; 10317 return s; 10318 } 10319 10320 static bool 10321 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) { 10322 10323 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag 10324 * found in the parse starting at 's', based on the subset that are valid 10325 * in this context input to this routine in 'valid_flags'. Advances s. 10326 * Returns TRUE if the input should be treated as a valid flag, so the next 10327 * char may be as well; otherwise FALSE. 'charset' should point to a NUL 10328 * upon first call on the current regex. This routine will set it to any 10329 * charset modifier found. The caller shouldn't change it. This way, 10330 * another charset modifier encountered in the parse can be detected as an 10331 * error, as we have decided to allow only one */ 10332 10333 const char c = **s; 10334 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1; 10335 10336 if ( charlen != 1 || ! strchr(valid_flags, c) ) { 10337 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) { 10338 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s), 10339 UTF ? SVf_UTF8 : 0); 10340 (*s) += charlen; 10341 /* Pretend that it worked, so will continue processing before 10342 * dieing */ 10343 return TRUE; 10344 } 10345 return FALSE; 10346 } 10347 10348 switch (c) { 10349 10350 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count); 10351 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break; 10352 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break; 10353 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break; 10354 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break; 10355 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break; 10356 case LOCALE_PAT_MOD: 10357 if (*charset) { 10358 goto multiple_charsets; 10359 } 10360 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET); 10361 *charset = c; 10362 break; 10363 case UNICODE_PAT_MOD: 10364 if (*charset) { 10365 goto multiple_charsets; 10366 } 10367 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET); 10368 *charset = c; 10369 break; 10370 case ASCII_RESTRICT_PAT_MOD: 10371 if (! *charset) { 10372 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET); 10373 } 10374 else { 10375 10376 /* Error if previous modifier wasn't an 'a', but if it was, see 10377 * if, and accept, a second occurrence (only) */ 10378 if (*charset != 'a' 10379 || get_regex_charset(*pmfl) 10380 != REGEX_ASCII_RESTRICTED_CHARSET) 10381 { 10382 goto multiple_charsets; 10383 } 10384 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET); 10385 } 10386 *charset = c; 10387 break; 10388 case DEPENDS_PAT_MOD: 10389 if (*charset) { 10390 goto multiple_charsets; 10391 } 10392 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET); 10393 *charset = c; 10394 break; 10395 } 10396 10397 (*s)++; 10398 return TRUE; 10399 10400 multiple_charsets: 10401 if (*charset != c) { 10402 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c)); 10403 } 10404 else if (c == 'a') { 10405 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */ 10406 yyerror("Regexp modifier \"/a\" may appear a maximum of twice"); 10407 } 10408 else { 10409 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c)); 10410 } 10411 10412 /* Pretend that it worked, so will continue processing before dieing */ 10413 (*s)++; 10414 return TRUE; 10415 } 10416 10417 STATIC char * 10418 S_scan_pat(pTHX_ char *start, I32 type) 10419 { 10420 PMOP *pm; 10421 char *s; 10422 const char * const valid_flags = 10423 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); 10424 char charset = '\0'; /* character set modifier */ 10425 unsigned int x_mod_count = 0; 10426 10427 PERL_ARGS_ASSERT_SCAN_PAT; 10428 10429 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL); 10430 if (!s) 10431 Perl_croak(aTHX_ "Search pattern not terminated"); 10432 10433 pm = (PMOP*)newPMOP(type, 0); 10434 if (PL_multi_open == '?') { 10435 /* This is the only point in the code that sets PMf_ONCE: */ 10436 pm->op_pmflags |= PMf_ONCE; 10437 10438 /* Hence it's safe to do this bit of PMOP book-keeping here, which 10439 allows us to restrict the list needed by reset to just the ?? 10440 matches. */ 10441 assert(type != OP_TRANS); 10442 if (PL_curstash) { 10443 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab); 10444 U32 elements; 10445 if (!mg) { 10446 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0, 10447 0); 10448 } 10449 elements = mg->mg_len / sizeof(PMOP**); 10450 Renewc(mg->mg_ptr, elements + 1, PMOP*, char); 10451 ((PMOP**)mg->mg_ptr) [elements++] = pm; 10452 mg->mg_len = elements * sizeof(PMOP**); 10453 PmopSTASH_set(pm,PL_curstash); 10454 } 10455 } 10456 10457 /* if qr/...(?{..}).../, then need to parse the pattern within a new 10458 * anon CV. False positives like qr/[(?{]/ are harmless */ 10459 10460 if (type == OP_QR) { 10461 STRLEN len; 10462 char *e, *p = SvPV(PL_lex_stuff, len); 10463 e = p + len; 10464 for (; p < e; p++) { 10465 if (p[0] == '(' && p[1] == '?' 10466 && (p[2] == '{' || (p[2] == '?' && p[3] == '{'))) 10467 { 10468 pm->op_pmflags |= PMf_HAS_CV; 10469 break; 10470 } 10471 } 10472 pm->op_pmflags |= PMf_IS_QR; 10473 } 10474 10475 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), 10476 &s, &charset, &x_mod_count)) 10477 {}; 10478 /* issue a warning if /c is specified,but /g is not */ 10479 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) 10480 { 10481 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), 10482 "Use of /c modifier is meaningless without /g" ); 10483 } 10484 10485 PL_lex_op = (OP*)pm; 10486 pl_yylval.ival = OP_MATCH; 10487 return s; 10488 } 10489 10490 STATIC char * 10491 S_scan_subst(pTHX_ char *start) 10492 { 10493 char *s; 10494 PMOP *pm; 10495 I32 first_start; 10496 line_t first_line; 10497 line_t linediff = 0; 10498 I32 es = 0; 10499 char charset = '\0'; /* character set modifier */ 10500 unsigned int x_mod_count = 0; 10501 char *t; 10502 10503 PERL_ARGS_ASSERT_SCAN_SUBST; 10504 10505 pl_yylval.ival = OP_NULL; 10506 10507 s = scan_str(start, TRUE, FALSE, FALSE, &t); 10508 10509 if (!s) 10510 Perl_croak(aTHX_ "Substitution pattern not terminated"); 10511 10512 s = t; 10513 10514 first_start = PL_multi_start; 10515 first_line = CopLINE(PL_curcop); 10516 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 10517 if (!s) { 10518 SvREFCNT_dec_NN(PL_lex_stuff); 10519 PL_lex_stuff = NULL; 10520 Perl_croak(aTHX_ "Substitution replacement not terminated"); 10521 } 10522 PL_multi_start = first_start; /* so whole substitution is taken together */ 10523 10524 pm = (PMOP*)newPMOP(OP_SUBST, 0); 10525 10526 10527 while (*s) { 10528 if (*s == EXEC_PAT_MOD) { 10529 s++; 10530 es++; 10531 } 10532 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), 10533 &s, &charset, &x_mod_count)) 10534 { 10535 break; 10536 } 10537 } 10538 10539 if ((pm->op_pmflags & PMf_CONTINUE)) { 10540 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); 10541 } 10542 10543 if (es) { 10544 SV * const repl = newSVpvs(""); 10545 10546 PL_multi_end = 0; 10547 pm->op_pmflags |= PMf_EVAL; 10548 for (; es > 1; es--) { 10549 sv_catpvs(repl, "eval "); 10550 } 10551 sv_catpvs(repl, "do {"); 10552 sv_catsv(repl, PL_parser->lex_sub_repl); 10553 sv_catpvs(repl, "}"); 10554 SvREFCNT_dec(PL_parser->lex_sub_repl); 10555 PL_parser->lex_sub_repl = repl; 10556 } 10557 10558 10559 linediff = CopLINE(PL_curcop) - first_line; 10560 if (linediff) 10561 CopLINE_set(PL_curcop, first_line); 10562 10563 if (linediff || es) { 10564 /* the IVX field indicates that the replacement string is a s///e; 10565 * the NVX field indicates how many src code lines the replacement 10566 * spreads over */ 10567 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV); 10568 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff; 10569 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen = 10570 cBOOL(es); 10571 } 10572 10573 PL_lex_op = (OP*)pm; 10574 pl_yylval.ival = OP_SUBST; 10575 return s; 10576 } 10577 10578 STATIC char * 10579 S_scan_trans(pTHX_ char *start) 10580 { 10581 char* s; 10582 OP *o; 10583 U8 squash; 10584 U8 del; 10585 U8 complement; 10586 bool nondestruct = 0; 10587 char *t; 10588 10589 PERL_ARGS_ASSERT_SCAN_TRANS; 10590 10591 pl_yylval.ival = OP_NULL; 10592 10593 s = scan_str(start,FALSE,FALSE,FALSE,&t); 10594 if (!s) 10595 Perl_croak(aTHX_ "Transliteration pattern not terminated"); 10596 10597 s = t; 10598 10599 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 10600 if (!s) { 10601 SvREFCNT_dec_NN(PL_lex_stuff); 10602 PL_lex_stuff = NULL; 10603 Perl_croak(aTHX_ "Transliteration replacement not terminated"); 10604 } 10605 10606 complement = del = squash = 0; 10607 while (1) { 10608 switch (*s) { 10609 case 'c': 10610 complement = OPpTRANS_COMPLEMENT; 10611 break; 10612 case 'd': 10613 del = OPpTRANS_DELETE; 10614 break; 10615 case 's': 10616 squash = OPpTRANS_SQUASH; 10617 break; 10618 case 'r': 10619 nondestruct = 1; 10620 break; 10621 default: 10622 goto no_more; 10623 } 10624 s++; 10625 } 10626 no_more: 10627 10628 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL); 10629 o->op_private &= ~OPpTRANS_ALL; 10630 o->op_private |= del|squash|complement; 10631 10632 PL_lex_op = o; 10633 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS; 10634 10635 10636 return s; 10637 } 10638 10639 /* scan_heredoc 10640 Takes a pointer to the first < in <<FOO. 10641 Returns a pointer to the byte following <<FOO. 10642 10643 This function scans a heredoc, which involves different methods 10644 depending on whether we are in a string eval, quoted construct, etc. 10645 This is because PL_linestr could containing a single line of input, or 10646 a whole string being evalled, or the contents of the current quote- 10647 like operator. 10648 10649 The two basic methods are: 10650 - Steal lines from the input stream 10651 - Scan the heredoc in PL_linestr and remove it therefrom 10652 10653 In a file scope or filtered eval, the first method is used; in a 10654 string eval, the second. 10655 10656 In a quote-like operator, we have to choose between the two, 10657 depending on where we can find a newline. We peek into outer lex- 10658 ing scopes until we find one with a newline in it. If we reach the 10659 outermost lexing scope and it is a file, we use the stream method. 10660 Otherwise it is treated as an eval. 10661 */ 10662 10663 STATIC char * 10664 S_scan_heredoc(pTHX_ char *s) 10665 { 10666 I32 op_type = OP_SCALAR; 10667 I32 len; 10668 SV *tmpstr; 10669 char term; 10670 char *d; 10671 char *e; 10672 char *peek; 10673 char *indent = 0; 10674 I32 indent_len = 0; 10675 bool indented = FALSE; 10676 const bool infile = PL_rsfp || PL_parser->filtered; 10677 const line_t origline = CopLINE(PL_curcop); 10678 LEXSHARED *shared = PL_parser->lex_shared; 10679 10680 PERL_ARGS_ASSERT_SCAN_HEREDOC; 10681 10682 s += 2; 10683 d = PL_tokenbuf + 1; 10684 e = PL_tokenbuf + sizeof PL_tokenbuf - 1; 10685 *PL_tokenbuf = '\n'; 10686 peek = s; 10687 10688 if (*peek == '~') { 10689 indented = TRUE; 10690 peek++; s++; 10691 } 10692 10693 while (SPACE_OR_TAB(*peek)) 10694 peek++; 10695 10696 if (*peek == '`' || *peek == '\'' || *peek =='"') { 10697 s = peek; 10698 term = *s++; 10699 s = delimcpy(d, e, s, PL_bufend, term, &len); 10700 if (s == PL_bufend) 10701 Perl_croak(aTHX_ "Unterminated delimiter for here document"); 10702 d += len; 10703 s++; 10704 } 10705 else { 10706 if (*s == '\\') 10707 /* <<\FOO is equivalent to <<'FOO' */ 10708 s++, term = '\''; 10709 else 10710 term = '"'; 10711 10712 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)) 10713 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden"); 10714 10715 peek = s; 10716 10717 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) { 10718 peek += UTF ? UTF8SKIP(peek) : 1; 10719 } 10720 10721 len = (peek - s >= e - d) ? (e - d) : (peek - s); 10722 Copy(s, d, len, char); 10723 s += len; 10724 d += len; 10725 } 10726 10727 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1) 10728 Perl_croak(aTHX_ "Delimiter for here document is too long"); 10729 10730 *d++ = '\n'; 10731 *d = '\0'; 10732 len = d - PL_tokenbuf; 10733 10734 #ifndef PERL_STRICT_CR 10735 d = (char *) memchr(s, '\r', PL_bufend - s); 10736 if (d) { 10737 char * const olds = s; 10738 s = d; 10739 while (s < PL_bufend) { 10740 if (*s == '\r') { 10741 *d++ = '\n'; 10742 if (*++s == '\n') 10743 s++; 10744 } 10745 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */ 10746 *d++ = *s++; 10747 s++; 10748 } 10749 else 10750 *d++ = *s++; 10751 } 10752 *d = '\0'; 10753 PL_bufend = d; 10754 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 10755 s = olds; 10756 } 10757 #endif 10758 10759 tmpstr = newSV_type(SVt_PVIV); 10760 SvGROW(tmpstr, 80); 10761 if (term == '\'') { 10762 op_type = OP_CONST; 10763 SvIV_set(tmpstr, -1); 10764 } 10765 else if (term == '`') { 10766 op_type = OP_BACKTICK; 10767 SvIV_set(tmpstr, '\\'); 10768 } 10769 10770 PL_multi_start = origline + 1 + PL_parser->herelines; 10771 PL_multi_open = PL_multi_close = '<'; 10772 10773 /* inside a string eval or quote-like operator */ 10774 if (!infile || PL_lex_inwhat) { 10775 SV *linestr; 10776 char *bufend; 10777 char * const olds = s; 10778 PERL_CONTEXT * const cx = CX_CUR(); 10779 /* These two fields are not set until an inner lexing scope is 10780 entered. But we need them set here. */ 10781 shared->ls_bufptr = s; 10782 shared->ls_linestr = PL_linestr; 10783 10784 if (PL_lex_inwhat) { 10785 /* Look for a newline. If the current buffer does not have one, 10786 peek into the line buffer of the parent lexing scope, going 10787 up as many levels as necessary to find one with a newline 10788 after bufptr. 10789 */ 10790 while (!(s = (char *)memchr( 10791 (void *)shared->ls_bufptr, '\n', 10792 SvEND(shared->ls_linestr)-shared->ls_bufptr 10793 ))) 10794 { 10795 shared = shared->ls_prev; 10796 /* shared is only null if we have gone beyond the outermost 10797 lexing scope. In a file, we will have broken out of the 10798 loop in the previous iteration. In an eval, the string buf- 10799 fer ends with "\n;", so the while condition above will have 10800 evaluated to false. So shared can never be null. Or so you 10801 might think. Odd syntax errors like s;@{<<; can gobble up 10802 the implicit semicolon at the end of a flie, causing the 10803 file handle to be closed even when we are not in a string 10804 eval. So shared may be null in that case. 10805 (Closing '>>}' here to balance the earlier open brace for 10806 editors that look for matched pairs.) */ 10807 if (UNLIKELY(!shared)) 10808 goto interminable; 10809 /* A LEXSHARED struct with a null ls_prev pointer is the outer- 10810 most lexing scope. In a file, shared->ls_linestr at that 10811 level is just one line, so there is no body to steal. */ 10812 if (infile && !shared->ls_prev) { 10813 s = olds; 10814 goto streaming; 10815 } 10816 } 10817 } 10818 else { /* eval or we've already hit EOF */ 10819 s = (char*)memchr((void*)s, '\n', PL_bufend - s); 10820 if (!s) 10821 goto interminable; 10822 } 10823 10824 linestr = shared->ls_linestr; 10825 bufend = SvEND(linestr); 10826 d = s; 10827 if (indented) { 10828 char *myolds = s; 10829 10830 while (s < bufend - len + 1) { 10831 if (*s++ == '\n') 10832 ++PL_parser->herelines; 10833 10834 if (memEQ(s, PL_tokenbuf + 1, len - 1)) { 10835 char *backup = s; 10836 indent_len = 0; 10837 10838 /* Only valid if it's preceded by whitespace only */ 10839 while (backup != myolds && --backup >= myolds) { 10840 if (! SPACE_OR_TAB(*backup)) { 10841 break; 10842 } 10843 indent_len++; 10844 } 10845 10846 /* No whitespace or all! */ 10847 if (backup == s || *backup == '\n') { 10848 Newx(indent, indent_len + 1, char); 10849 memcpy(indent, backup + 1, indent_len); 10850 indent[indent_len] = 0; 10851 s--; /* before our delimiter */ 10852 PL_parser->herelines--; /* this line doesn't count */ 10853 break; 10854 } 10855 } 10856 } 10857 } 10858 else { 10859 while (s < bufend - len + 1 10860 && memNE(s,PL_tokenbuf,len) ) 10861 { 10862 if (*s++ == '\n') 10863 ++PL_parser->herelines; 10864 } 10865 } 10866 10867 if (s >= bufend - len + 1) { 10868 goto interminable; 10869 } 10870 10871 sv_setpvn(tmpstr,d+1,s-d); 10872 s += len - 1; 10873 /* the preceding stmt passes a newline */ 10874 PL_parser->herelines++; 10875 10876 /* s now points to the newline after the heredoc terminator. 10877 d points to the newline before the body of the heredoc. 10878 */ 10879 10880 /* We are going to modify linestr in place here, so set 10881 aside copies of the string if necessary for re-evals or 10882 (caller $n)[6]. */ 10883 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we 10884 check shared->re_eval_str. */ 10885 if (shared->re_eval_start || shared->re_eval_str) { 10886 /* Set aside the rest of the regexp */ 10887 if (!shared->re_eval_str) 10888 shared->re_eval_str = 10889 newSVpvn(shared->re_eval_start, 10890 bufend - shared->re_eval_start); 10891 shared->re_eval_start -= s-d; 10892 } 10893 10894 if (cxstack_ix >= 0 10895 && CxTYPE(cx) == CXt_EVAL 10896 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL 10897 && cx->blk_eval.cur_text == linestr) 10898 { 10899 cx->blk_eval.cur_text = newSVsv(linestr); 10900 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */ 10901 } 10902 10903 /* Copy everything from s onwards back to d. */ 10904 Move(s,d,bufend-s + 1,char); 10905 SvCUR_set(linestr, SvCUR(linestr) - (s-d)); 10906 /* Setting PL_bufend only applies when we have not dug deeper 10907 into other scopes, because sublex_done sets PL_bufend to 10908 SvEND(PL_linestr). */ 10909 if (shared == PL_parser->lex_shared) 10910 PL_bufend = SvEND(linestr); 10911 s = olds; 10912 } 10913 else { 10914 SV *linestr_save; 10915 char *oldbufptr_save; 10916 char *oldoldbufptr_save; 10917 streaming: 10918 SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */ 10919 term = PL_tokenbuf[1]; 10920 len--; 10921 linestr_save = PL_linestr; /* must restore this afterwards */ 10922 d = s; /* and this */ 10923 oldbufptr_save = PL_oldbufptr; 10924 oldoldbufptr_save = PL_oldoldbufptr; 10925 PL_linestr = newSVpvs(""); 10926 PL_bufend = SvPVX(PL_linestr); 10927 10928 while (1) { 10929 PL_bufptr = PL_bufend; 10930 CopLINE_set(PL_curcop, 10931 origline + 1 + PL_parser->herelines); 10932 10933 if ( !lex_next_chunk(LEX_NO_TERM) 10934 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) 10935 { 10936 /* Simply freeing linestr_save might seem simpler here, as it 10937 does not matter what PL_linestr points to, since we are 10938 about to croak; but in a quote-like op, linestr_save 10939 will have been prospectively freed already, via 10940 SAVEFREESV(PL_linestr) in sublex_push, so it's easier to 10941 restore PL_linestr. */ 10942 SvREFCNT_dec_NN(PL_linestr); 10943 PL_linestr = linestr_save; 10944 PL_oldbufptr = oldbufptr_save; 10945 PL_oldoldbufptr = oldoldbufptr_save; 10946 goto interminable; 10947 } 10948 10949 CopLINE_set(PL_curcop, origline); 10950 10951 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') { 10952 s = lex_grow_linestr(SvLEN(PL_linestr) + 3); 10953 /* ^That should be enough to avoid this needing to grow: */ 10954 sv_catpvs(PL_linestr, "\n\0"); 10955 assert(s == SvPVX(PL_linestr)); 10956 PL_bufend = SvEND(PL_linestr); 10957 } 10958 10959 s = PL_bufptr; 10960 PL_parser->herelines++; 10961 PL_last_lop = PL_last_uni = NULL; 10962 10963 #ifndef PERL_STRICT_CR 10964 if (PL_bufend - PL_linestart >= 2) { 10965 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') 10966 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r')) 10967 { 10968 PL_bufend[-2] = '\n'; 10969 PL_bufend--; 10970 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 10971 } 10972 else if (PL_bufend[-1] == '\r') 10973 PL_bufend[-1] = '\n'; 10974 } 10975 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') 10976 PL_bufend[-1] = '\n'; 10977 #endif 10978 10979 if (indented && (PL_bufend-s) >= len) { 10980 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len)); 10981 10982 if (found) { 10983 char *backup = found; 10984 indent_len = 0; 10985 10986 /* Only valid if it's preceded by whitespace only */ 10987 while (backup != s && --backup >= s) { 10988 if (! SPACE_OR_TAB(*backup)) { 10989 break; 10990 } 10991 indent_len++; 10992 } 10993 10994 /* All whitespace or none! */ 10995 if (backup == found || SPACE_OR_TAB(*backup)) { 10996 Newx(indent, indent_len + 1, char); 10997 memcpy(indent, backup, indent_len); 10998 indent[indent_len] = 0; 10999 SvREFCNT_dec(PL_linestr); 11000 PL_linestr = linestr_save; 11001 PL_linestart = SvPVX(linestr_save); 11002 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 11003 PL_oldbufptr = oldbufptr_save; 11004 PL_oldoldbufptr = oldoldbufptr_save; 11005 s = d; 11006 break; 11007 } 11008 } 11009 11010 /* Didn't find it */ 11011 sv_catsv(tmpstr,PL_linestr); 11012 } 11013 else { 11014 if (*s == term && PL_bufend-s >= len 11015 && memEQ(s,PL_tokenbuf + 1,len)) 11016 { 11017 SvREFCNT_dec(PL_linestr); 11018 PL_linestr = linestr_save; 11019 PL_linestart = SvPVX(linestr_save); 11020 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 11021 PL_oldbufptr = oldbufptr_save; 11022 PL_oldoldbufptr = oldoldbufptr_save; 11023 s = d; 11024 break; 11025 } 11026 else { 11027 sv_catsv(tmpstr,PL_linestr); 11028 } 11029 } 11030 } /* while (1) */ 11031 } 11032 11033 PL_multi_end = origline + PL_parser->herelines; 11034 11035 if (indented && indent) { 11036 STRLEN linecount = 1; 11037 STRLEN herelen = SvCUR(tmpstr); 11038 char *ss = SvPVX(tmpstr); 11039 char *se = ss + herelen; 11040 SV *newstr = newSV(herelen+1); 11041 SvPOK_on(newstr); 11042 11043 /* Trim leading whitespace */ 11044 while (ss < se) { 11045 /* newline only? Copy and move on */ 11046 if (*ss == '\n') { 11047 sv_catpvs(newstr,"\n"); 11048 ss++; 11049 linecount++; 11050 11051 /* Found our indentation? Strip it */ 11052 } 11053 else if (se - ss >= indent_len 11054 && memEQ(ss, indent, indent_len)) 11055 { 11056 STRLEN le = 0; 11057 ss += indent_len; 11058 11059 while ((ss + le) < se && *(ss + le) != '\n') 11060 le++; 11061 11062 sv_catpvn(newstr, ss, le); 11063 ss += le; 11064 11065 /* Line doesn't begin with our indentation? Croak */ 11066 } 11067 else { 11068 Safefree(indent); 11069 Perl_croak(aTHX_ 11070 "Indentation on line %d of here-doc doesn't match delimiter", 11071 (int)linecount 11072 ); 11073 } 11074 } /* while */ 11075 11076 /* avoid sv_setsv() as we dont wan't to COW here */ 11077 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr)); 11078 Safefree(indent); 11079 SvREFCNT_dec_NN(newstr); 11080 } 11081 11082 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { 11083 SvPV_shrink_to_cur(tmpstr); 11084 } 11085 11086 if (!IN_BYTES) { 11087 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) 11088 SvUTF8_on(tmpstr); 11089 } 11090 11091 PL_lex_stuff = tmpstr; 11092 pl_yylval.ival = op_type; 11093 return s; 11094 11095 interminable: 11096 if (indent) 11097 Safefree(indent); 11098 SvREFCNT_dec(tmpstr); 11099 CopLINE_set(PL_curcop, origline); 11100 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1); 11101 } 11102 11103 11104 /* scan_inputsymbol 11105 takes: position of first '<' in input buffer 11106 returns: position of first char following the matching '>' in 11107 input buffer 11108 side-effects: pl_yylval and lex_op are set. 11109 11110 This code handles: 11111 11112 <> read from ARGV 11113 <<>> read from ARGV without magic open 11114 <FH> read from filehandle 11115 <pkg::FH> read from package qualified filehandle 11116 <pkg'FH> read from package qualified filehandle 11117 <$fh> read from filehandle in $fh 11118 <*.h> filename glob 11119 11120 */ 11121 11122 STATIC char * 11123 S_scan_inputsymbol(pTHX_ char *start) 11124 { 11125 char *s = start; /* current position in buffer */ 11126 char *end; 11127 I32 len; 11128 bool nomagicopen = FALSE; 11129 char *d = PL_tokenbuf; /* start of temp holding space */ 11130 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ 11131 11132 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; 11133 11134 end = (char *) memchr(s, '\n', PL_bufend - s); 11135 if (!end) 11136 end = PL_bufend; 11137 if (s[1] == '<' && s[2] == '>' && s[3] == '>') { 11138 nomagicopen = TRUE; 11139 *d = '\0'; 11140 len = 0; 11141 s += 3; 11142 } 11143 else 11144 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ 11145 11146 /* die if we didn't have space for the contents of the <>, 11147 or if it didn't end, or if we see a newline 11148 */ 11149 11150 if (len >= (I32)sizeof PL_tokenbuf) 11151 Perl_croak(aTHX_ "Excessively long <> operator"); 11152 if (s >= end) 11153 Perl_croak(aTHX_ "Unterminated <> operator"); 11154 11155 s++; 11156 11157 /* check for <$fh> 11158 Remember, only scalar variables are interpreted as filehandles by 11159 this code. Anything more complex (e.g., <$fh{$num}>) will be 11160 treated as a glob() call. 11161 This code makes use of the fact that except for the $ at the front, 11162 a scalar variable and a filehandle look the same. 11163 */ 11164 if (*d == '$' && d[1]) d++; 11165 11166 /* allow <Pkg'VALUE> or <Pkg::VALUE> */ 11167 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') { 11168 d += UTF ? UTF8SKIP(d) : 1; 11169 } 11170 11171 /* If we've tried to read what we allow filehandles to look like, and 11172 there's still text left, then it must be a glob() and not a getline. 11173 Use scan_str to pull out the stuff between the <> and treat it 11174 as nothing more than a string. 11175 */ 11176 11177 if (d - PL_tokenbuf != len) { 11178 pl_yylval.ival = OP_GLOB; 11179 s = scan_str(start,FALSE,FALSE,FALSE,NULL); 11180 if (!s) 11181 Perl_croak(aTHX_ "Glob not terminated"); 11182 return s; 11183 } 11184 else { 11185 bool readline_overriden = FALSE; 11186 GV *gv_readline; 11187 /* we're in a filehandle read situation */ 11188 d = PL_tokenbuf; 11189 11190 /* turn <> into <ARGV> */ 11191 if (!len) 11192 Copy("ARGV",d,5,char); 11193 11194 /* Check whether readline() is overriden */ 11195 if ((gv_readline = gv_override("readline",8))) 11196 readline_overriden = TRUE; 11197 11198 /* if <$fh>, create the ops to turn the variable into a 11199 filehandle 11200 */ 11201 if (*d == '$') { 11202 /* try to find it in the pad for this block, otherwise find 11203 add symbol table ops 11204 */ 11205 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0); 11206 if (tmp != NOT_IN_PAD) { 11207 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 11208 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 11209 HEK * const stashname = HvNAME_HEK(stash); 11210 SV * const sym = sv_2mortal(newSVhek(stashname)); 11211 sv_catpvs(sym, "::"); 11212 sv_catpv(sym, d+1); 11213 d = SvPVX(sym); 11214 goto intro_sym; 11215 } 11216 else { 11217 OP * const o = newOP(OP_PADSV, 0); 11218 o->op_targ = tmp; 11219 PL_lex_op = readline_overriden 11220 ? newUNOP(OP_ENTERSUB, OPf_STACKED, 11221 op_append_elem(OP_LIST, o, 11222 newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) 11223 : newUNOP(OP_READLINE, 0, o); 11224 } 11225 } 11226 else { 11227 GV *gv; 11228 ++d; 11229 intro_sym: 11230 gv = gv_fetchpv(d, 11231 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ), 11232 SVt_PV); 11233 PL_lex_op = readline_overriden 11234 ? newUNOP(OP_ENTERSUB, OPf_STACKED, 11235 op_append_elem(OP_LIST, 11236 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), 11237 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 11238 : newUNOP(OP_READLINE, 0, 11239 newUNOP(OP_RV2SV, 0, 11240 newGVOP(OP_GV, 0, gv))); 11241 } 11242 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ 11243 pl_yylval.ival = OP_NULL; 11244 } 11245 11246 /* If it's none of the above, it must be a literal filehandle 11247 (<Foo::BAR> or <FOO>) so build a simple readline OP */ 11248 else { 11249 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO); 11250 PL_lex_op = readline_overriden 11251 ? newUNOP(OP_ENTERSUB, OPf_STACKED, 11252 op_append_elem(OP_LIST, 11253 newGVOP(OP_GV, 0, gv), 11254 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 11255 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv)); 11256 pl_yylval.ival = OP_NULL; 11257 11258 /* leave the token generation above to avoid confusing the parser */ 11259 if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) { 11260 no_bareword_filehandle(d); 11261 } 11262 } 11263 } 11264 11265 return s; 11266 } 11267 11268 11269 /* scan_str 11270 takes: 11271 start position in buffer 11272 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but 11273 only if they are of the open/close form 11274 keep_delims preserve the delimiters around the string 11275 re_reparse compiling a run-time /(?{})/: 11276 collapse // to /, and skip encoding src 11277 delimp if non-null, this is set to the position of 11278 the closing delimiter, or just after it if 11279 the closing and opening delimiters differ 11280 (i.e., the opening delimiter of a substitu- 11281 tion replacement) 11282 returns: position to continue reading from buffer 11283 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and 11284 updates the read buffer. 11285 11286 This subroutine pulls a string out of the input. It is called for: 11287 q single quotes q(literal text) 11288 ' single quotes 'literal text' 11289 qq double quotes qq(interpolate $here please) 11290 " double quotes "interpolate $here please" 11291 qx backticks qx(/bin/ls -l) 11292 ` backticks `/bin/ls -l` 11293 qw quote words @EXPORT_OK = qw( func() $spam ) 11294 m// regexp match m/this/ 11295 s/// regexp substitute s/this/that/ 11296 tr/// string transliterate tr/this/that/ 11297 y/// string transliterate y/this/that/ 11298 ($*@) sub prototypes sub foo ($) 11299 (stuff) sub attr parameters sub foo : attr(stuff) 11300 <> readline or globs <FOO>, <>, <$fh>, or <*.c> 11301 11302 In most of these cases (all but <>, patterns and transliterate) 11303 yylex() calls scan_str(). m// makes yylex() call scan_pat() which 11304 calls scan_str(). s/// makes yylex() call scan_subst() which calls 11305 scan_str(). tr/// and y/// make yylex() call scan_trans() which 11306 calls scan_str(). 11307 11308 It skips whitespace before the string starts, and treats the first 11309 character as the delimiter. If the delimiter is one of ([{< then 11310 the corresponding "close" character )]}> is used as the closing 11311 delimiter. It allows quoting of delimiters, and if the string has 11312 balanced delimiters ([{<>}]) it allows nesting. 11313 11314 On success, the SV with the resulting string is put into lex_stuff or, 11315 if that is already non-NULL, into lex_repl. The second case occurs only 11316 when parsing the RHS of the special constructs s/// and tr/// (y///). 11317 For convenience, the terminating delimiter character is stuffed into 11318 SvIVX of the SV. 11319 */ 11320 11321 char * 11322 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse, 11323 char **delimp 11324 ) 11325 { 11326 SV *sv; /* scalar value: string */ 11327 char *s = start; /* current position in the buffer */ 11328 char *to; /* current position in the sv's data */ 11329 int brackets = 1; /* bracket nesting level */ 11330 bool d_is_utf8 = FALSE; /* is there any utf8 content? */ 11331 UV open_delim_code; /* code point */ 11332 char open_delim_str[UTF8_MAXBYTES+1]; 11333 STRLEN delim_byte_len; /* each delimiter currently is the same number 11334 of bytes */ 11335 line_t herelines; 11336 11337 /* The only non-UTF character that isn't a stand alone grapheme is 11338 * white-space, hence can't be a delimiter. */ 11339 const char * non_grapheme_msg = "Use of unassigned code point or" 11340 " non-standalone grapheme for a delimiter" 11341 " is not allowed"; 11342 PERL_ARGS_ASSERT_SCAN_STR; 11343 11344 /* skip space before the delimiter */ 11345 if (isSPACE(*s)) { /* skipspace can change the buffer 's' is in, so 11346 'start' also has to change */ 11347 s = start = skipspace(s); 11348 } 11349 11350 /* mark where we are, in case we need to report errors */ 11351 CLINE; 11352 11353 /* after skipping whitespace, the next character is the delimiter */ 11354 if (! UTF || UTF8_IS_INVARIANT(*s)) { 11355 open_delim_code = (U8) *s; 11356 open_delim_str[0] = *s; 11357 delim_byte_len = 1; 11358 } 11359 else { 11360 open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, 11361 &delim_byte_len); 11362 if (UNLIKELY(! is_grapheme((U8 *) start, 11363 (U8 *) s, 11364 (U8 *) PL_bufend, 11365 open_delim_code))) 11366 { 11367 yyerror(non_grapheme_msg); 11368 } 11369 11370 Copy(s, open_delim_str, delim_byte_len, char); 11371 } 11372 open_delim_str[delim_byte_len] = '\0'; /* Only for safety */ 11373 11374 11375 /* mark where we are */ 11376 PL_multi_start = CopLINE(PL_curcop); 11377 PL_multi_open = open_delim_code; 11378 herelines = PL_parser->herelines; 11379 11380 const char * legal_paired_opening_delims; 11381 const char * legal_paired_closing_delims; 11382 const char * deprecated_opening_delims; 11383 if (FEATURE_MORE_DELIMS_IS_ENABLED) { 11384 if (UTF) { 11385 legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS; 11386 legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS; 11387 11388 /* We are deprecating using a closing delimiter as the opening, in 11389 * case we want in the future to accept them reversed. The string 11390 * may include ones that are legal, but the code below won't look 11391 * at this string unless it didn't find a legal opening one */ 11392 deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS; 11393 } 11394 else { 11395 legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS; 11396 legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS; 11397 deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS; 11398 } 11399 } 11400 else { 11401 legal_paired_opening_delims = "([{<"; 11402 legal_paired_closing_delims = ")]}>"; 11403 deprecated_opening_delims = (UTF) 11404 ? DEPRECATED_OPENING_UTF8_BRACKETS 11405 : DEPRECATED_OPENING_NON_UTF8_BRACKETS; 11406 } 11407 11408 const char * legal_paired_opening_delims_end = legal_paired_opening_delims 11409 + strlen(legal_paired_opening_delims); 11410 const char * deprecated_delims_end = deprecated_opening_delims 11411 + strlen(deprecated_opening_delims); 11412 11413 const char * close_delim_str = open_delim_str; 11414 UV close_delim_code = open_delim_code; 11415 11416 /* If the delimiter has a mirror-image closing one, get it */ 11417 const char *tmps = ninstr(legal_paired_opening_delims, 11418 legal_paired_opening_delims_end, 11419 open_delim_str, open_delim_str + delim_byte_len); 11420 if (tmps) { 11421 /* Here, there is a paired delimiter, and tmps points to its position 11422 in the string of the accepted opening paired delimiters. The 11423 corresponding position in the string of closing ones is the 11424 beginning of the paired mate. Both contain the same number of 11425 bytes. */ 11426 close_delim_str = legal_paired_closing_delims 11427 + (tmps - legal_paired_opening_delims); 11428 11429 /* The list of paired delimiters contains all the ASCII ones that have 11430 * always been legal, and no other ASCIIs. Don't raise a message if 11431 * using one of these */ 11432 if (! isASCII(open_delim_code)) { 11433 Perl_ck_warner_d(aTHX_ 11434 packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS), 11435 "Use of '%" UTF8f "' is experimental as a string delimiter", 11436 UTF8fARG(UTF, delim_byte_len, open_delim_str)); 11437 } 11438 11439 close_delim_code = (UTF) 11440 ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL) 11441 : * (U8 *) close_delim_str; 11442 } 11443 else { /* Here, the delimiter isn't paired, hence the close is the same as 11444 the open; and has aready been set up. But make sure it isn't 11445 deprecated to use this particular delimiter, as we plan 11446 eventually to make it paired. */ 11447 if (ninstr(deprecated_opening_delims, deprecated_delims_end, 11448 open_delim_str, open_delim_str + delim_byte_len)) 11449 { 11450 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), 11451 "Use of '%" UTF8f "' is deprecated as a string delimiter", 11452 UTF8fARG(UTF, delim_byte_len, open_delim_str)); 11453 } 11454 11455 /* Note that a NUL may be used as a delimiter, and this happens when 11456 * delimitting an empty string, and no special handling for it is 11457 * needed, as ninstr() calls are used */ 11458 } 11459 11460 PL_multi_close = close_delim_code; 11461 11462 if (PL_multi_open == PL_multi_close) { 11463 keep_bracketed_quoted = FALSE; 11464 } 11465 11466 /* create a new SV to hold the contents. 79 is the SV's initial length. 11467 What a random number. */ 11468 sv = newSV_type(SVt_PVIV); 11469 SvGROW(sv, 79); 11470 SvIV_set(sv, close_delim_code); 11471 (void)SvPOK_only(sv); /* validate pointer */ 11472 11473 /* move past delimiter and try to read a complete string */ 11474 if (keep_delims) 11475 sv_catpvn(sv, s, delim_byte_len); 11476 s += delim_byte_len; 11477 for (;;) { 11478 /* extend sv if need be */ 11479 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); 11480 /* set 'to' to the next character in the sv's string */ 11481 to = SvPVX(sv)+SvCUR(sv); 11482 11483 /* read until we run out of string, or we find the closing delimiter */ 11484 while (s < PL_bufend) { 11485 /* embedded newlines increment the line count */ 11486 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 11487 COPLINE_INC_WITH_HERELINES; 11488 11489 /* backslashes can escape the closing delimiter */ 11490 if ( *s == '\\' && s < PL_bufend - delim_byte_len 11491 11492 /* ... but not if the delimiter itself is a backslash */ 11493 && close_delim_code != '\\') 11494 { 11495 /* Here, we have an escaping backslash. If we're supposed to 11496 * discard those that escape the closing delimiter, just 11497 * discard this one */ 11498 if ( ! keep_bracketed_quoted 11499 && ( memEQ(s + 1, open_delim_str, delim_byte_len) 11500 || ( PL_multi_open == PL_multi_close 11501 && re_reparse && s[1] == '\\') 11502 || memEQ(s + 1, close_delim_str, delim_byte_len))) 11503 { 11504 s++; 11505 } 11506 else /* any other escapes are simply copied straight through */ 11507 *to++ = *s++; 11508 } 11509 else if ( s < PL_bufend - (delim_byte_len - 1) 11510 && memEQ(s, close_delim_str, delim_byte_len) 11511 && --brackets <= 0) 11512 { 11513 /* Found unescaped closing delimiter, unnested if we care about 11514 * that; so are done. 11515 * 11516 * In the case of the opening and closing delimiters being 11517 * different, we have to deal with nesting; the conditional 11518 * above makes sure we don't get here until the nesting level, 11519 * 'brackets', is back down to zero. In the other case, 11520 * nesting isn't an issue, and 'brackets' never can get 11521 * incremented above 0, so will come here at the first closing 11522 * delimiter. 11523 * 11524 * Only grapheme delimiters are legal. */ 11525 if ( UTF /* All Non-UTF-8's are graphemes */ 11526 && UNLIKELY(! is_grapheme((U8 *) start, 11527 (U8 *) s, 11528 (U8 *) PL_bufend, 11529 close_delim_code))) 11530 { 11531 yyerror(non_grapheme_msg); 11532 } 11533 11534 break; 11535 } 11536 /* No nesting if open eq close */ 11537 else if ( PL_multi_open != PL_multi_close 11538 && s < PL_bufend - (delim_byte_len - 1) 11539 && memEQ(s, open_delim_str, delim_byte_len)) 11540 { 11541 brackets++; 11542 } 11543 11544 /* Here, still in the middle of the string; copy this character */ 11545 if (! UTF || UTF8_IS_INVARIANT((U8) *s)) { 11546 *to++ = *s++; 11547 } 11548 else { 11549 size_t this_char_len = UTF8SKIP(s); 11550 Copy(s, to, this_char_len, char); 11551 s += this_char_len; 11552 to += this_char_len; 11553 11554 d_is_utf8 = TRUE; 11555 } 11556 } /* End of loop through buffer */ 11557 11558 /* Here, found end of the string, OR ran out of buffer: terminate the 11559 * copied string and update the sv's end-of-string */ 11560 *to = '\0'; 11561 SvCUR_set(sv, to - SvPVX_const(sv)); 11562 11563 /* 11564 * this next chunk reads more into the buffer if we're not done yet 11565 */ 11566 11567 if (s < PL_bufend) 11568 break; /* handle case where we are done yet :-) */ 11569 11570 #ifndef PERL_STRICT_CR 11571 if (to - SvPVX_const(sv) >= 2) { 11572 if ( (to[-2] == '\r' && to[-1] == '\n') 11573 || (to[-2] == '\n' && to[-1] == '\r')) 11574 { 11575 to[-2] = '\n'; 11576 to--; 11577 SvCUR_set(sv, to - SvPVX_const(sv)); 11578 } 11579 else if (to[-1] == '\r') 11580 to[-1] = '\n'; 11581 } 11582 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') 11583 to[-1] = '\n'; 11584 #endif 11585 11586 /* if we're out of file, or a read fails, bail and reset the current 11587 line marker so we can report where the unterminated string began 11588 */ 11589 COPLINE_INC_WITH_HERELINES; 11590 PL_bufptr = PL_bufend; 11591 if (!lex_next_chunk(0)) { 11592 sv_free(sv); 11593 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 11594 return NULL; 11595 } 11596 s = start = PL_bufptr; 11597 } /* End of infinite loop */ 11598 11599 /* at this point, we have successfully read the delimited string */ 11600 11601 if (keep_delims) 11602 sv_catpvn(sv, s, delim_byte_len); 11603 s += delim_byte_len; 11604 11605 if (d_is_utf8) 11606 SvUTF8_on(sv); 11607 11608 PL_multi_end = CopLINE(PL_curcop); 11609 CopLINE_set(PL_curcop, PL_multi_start); 11610 PL_parser->herelines = herelines; 11611 11612 /* if we allocated too much space, give some back */ 11613 if (SvCUR(sv) + 5 < SvLEN(sv)) { 11614 SvLEN_set(sv, SvCUR(sv) + 1); 11615 SvPV_shrink_to_cur(sv); 11616 } 11617 11618 /* decide whether this is the first or second quoted string we've read 11619 for this op 11620 */ 11621 11622 if (PL_lex_stuff) 11623 PL_parser->lex_sub_repl = sv; 11624 else 11625 PL_lex_stuff = sv; 11626 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s; 11627 return s; 11628 } 11629 11630 /* 11631 scan_num 11632 takes: pointer to position in buffer 11633 returns: pointer to new position in buffer 11634 side-effects: builds ops for the constant in pl_yylval.op 11635 11636 Read a number in any of the formats that Perl accepts: 11637 11638 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. 11639 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 11640 0b[01](_?[01])* binary integers 11641 0o?[0-7](_?[0-7])* octal integers 11642 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers 11643 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats 11644 11645 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the 11646 thing it reads. 11647 11648 If it reads a number without a decimal point or an exponent, it will 11649 try converting the number to an integer and see if it can do so 11650 without loss of precision. 11651 */ 11652 11653 char * 11654 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) 11655 { 11656 const char *s = start; /* current position in buffer */ 11657 char *d; /* destination in temp buffer */ 11658 char *e; /* end of temp buffer */ 11659 NV nv; /* number read, as a double */ 11660 SV *sv = NULL; /* place to put the converted number */ 11661 bool floatit; /* boolean: int or float? */ 11662 const char *lastub = NULL; /* position of last underbar */ 11663 static const char* const number_too_long = "Number too long"; 11664 bool warned_about_underscore = 0; 11665 I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */ 11666 #define WARN_ABOUT_UNDERSCORE() \ 11667 do { \ 11668 if (!warned_about_underscore) { \ 11669 warned_about_underscore = 1; \ 11670 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \ 11671 "Misplaced _ in number"); \ 11672 } \ 11673 } while(0) 11674 /* Hexadecimal floating point. 11675 * 11676 * In many places (where we have quads and NV is IEEE 754 double) 11677 * we can fit the mantissa bits of a NV into an unsigned quad. 11678 * (Note that UVs might not be quads even when we have quads.) 11679 * This will not work everywhere, though (either no quads, or 11680 * using long doubles), in which case we have to resort to NV, 11681 * which will probably mean horrible loss of precision due to 11682 * multiple fp operations. */ 11683 bool hexfp = FALSE; 11684 int total_bits = 0; 11685 int significant_bits = 0; 11686 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t) 11687 # define HEXFP_UQUAD 11688 Uquad_t hexfp_uquad = 0; 11689 int hexfp_frac_bits = 0; 11690 #else 11691 # define HEXFP_NV 11692 NV hexfp_nv = 0.0; 11693 #endif 11694 NV hexfp_mult = 1.0; 11695 UV high_non_zero = 0; /* highest digit */ 11696 int non_zero_integer_digits = 0; 11697 bool new_octal = FALSE; /* octal with "0o" prefix */ 11698 11699 PERL_ARGS_ASSERT_SCAN_NUM; 11700 11701 /* We use the first character to decide what type of number this is */ 11702 11703 switch (*s) { 11704 default: 11705 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s); 11706 11707 /* if it starts with a 0, it could be an octal number, a decimal in 11708 0.13 disguise, or a hexadecimal number, or a binary number. */ 11709 case '0': 11710 { 11711 /* variables: 11712 u holds the "number so far" 11713 overflowed was the number more than we can hold? 11714 11715 Shift is used when we add a digit. It also serves as an "are 11716 we in octal/hex/binary?" indicator to disallow hex characters 11717 when in octal mode. 11718 */ 11719 NV n = 0.0; 11720 UV u = 0; 11721 bool overflowed = FALSE; 11722 bool just_zero = TRUE; /* just plain 0 or binary number? */ 11723 bool has_digs = FALSE; 11724 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 }; 11725 static const char* const bases[5] = 11726 { "", "binary", "", "octal", "hexadecimal" }; 11727 static const char* const Bases[5] = 11728 { "", "Binary", "", "Octal", "Hexadecimal" }; 11729 static const char* const maxima[5] = 11730 { "", 11731 "0b11111111111111111111111111111111", 11732 "", 11733 "037777777777", 11734 "0xffffffff" }; 11735 11736 /* check for hex */ 11737 if (isALPHA_FOLD_EQ(s[1], 'x')) { 11738 shift = 4; 11739 s += 2; 11740 just_zero = FALSE; 11741 } else if (isALPHA_FOLD_EQ(s[1], 'b')) { 11742 shift = 1; 11743 s += 2; 11744 just_zero = FALSE; 11745 } 11746 /* check for a decimal in disguise */ 11747 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e')) 11748 goto decimal; 11749 /* so it must be octal */ 11750 else { 11751 shift = 3; 11752 s++; 11753 if (isALPHA_FOLD_EQ(*s, 'o')) { 11754 s++; 11755 just_zero = FALSE; 11756 new_octal = TRUE; 11757 } 11758 } 11759 11760 if (*s == '_') { 11761 WARN_ABOUT_UNDERSCORE(); 11762 lastub = s++; 11763 } 11764 11765 /* read the rest of the number */ 11766 for (;;) { 11767 /* x is used in the overflow test, 11768 b is the digit we're adding on. */ 11769 UV x, b; 11770 11771 switch (*s) { 11772 11773 /* if we don't mention it, we're done */ 11774 default: 11775 goto out; 11776 11777 /* _ are ignored -- but warned about if consecutive */ 11778 case '_': 11779 if (lastub && s == lastub + 1) 11780 WARN_ABOUT_UNDERSCORE(); 11781 lastub = s++; 11782 break; 11783 11784 /* 8 and 9 are not octal */ 11785 case '8': case '9': 11786 if (shift == 3) 11787 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s)); 11788 /* FALLTHROUGH */ 11789 11790 /* octal digits */ 11791 case '2': case '3': case '4': 11792 case '5': case '6': case '7': 11793 if (shift == 1) 11794 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s)); 11795 /* FALLTHROUGH */ 11796 11797 case '0': case '1': 11798 b = *s++ & 15; /* ASCII digit -> value of digit */ 11799 goto digit; 11800 11801 /* hex digits */ 11802 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 11803 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 11804 /* make sure they said 0x */ 11805 if (shift != 4) 11806 goto out; 11807 b = (*s++ & 7) + 9; 11808 11809 /* Prepare to put the digit we have onto the end 11810 of the number so far. We check for overflows. 11811 */ 11812 11813 digit: 11814 just_zero = FALSE; 11815 has_digs = TRUE; 11816 if (!overflowed) { 11817 assert(shift >= 0); 11818 x = u << shift; /* make room for the digit */ 11819 11820 total_bits += shift; 11821 11822 if ((x >> shift) != u 11823 && !(PL_hints & HINT_NEW_BINARY)) { 11824 overflowed = TRUE; 11825 n = (NV) u; 11826 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 11827 "Integer overflow in %s number", 11828 bases[shift]); 11829 } else 11830 u = x | b; /* add the digit to the end */ 11831 } 11832 if (overflowed) { 11833 n *= nvshift[shift]; 11834 /* If an NV has not enough bits in its 11835 * mantissa to represent an UV this summing of 11836 * small low-order numbers is a waste of time 11837 * (because the NV cannot preserve the 11838 * low-order bits anyway): we could just 11839 * remember when did we overflow and in the 11840 * end just multiply n by the right 11841 * amount. */ 11842 n += (NV) b; 11843 } 11844 11845 if (high_non_zero == 0 && b > 0) 11846 high_non_zero = b; 11847 11848 if (high_non_zero) 11849 non_zero_integer_digits++; 11850 11851 /* this could be hexfp, but peek ahead 11852 * to avoid matching ".." */ 11853 if (UNLIKELY(HEXFP_PEEK(s))) { 11854 goto out; 11855 } 11856 11857 break; 11858 } 11859 } 11860 11861 /* if we get here, we had success: make a scalar value from 11862 the number. 11863 */ 11864 out: 11865 11866 /* final misplaced underbar check */ 11867 if (s[-1] == '_') 11868 WARN_ABOUT_UNDERSCORE(); 11869 11870 if (UNLIKELY(HEXFP_PEEK(s))) { 11871 /* Do sloppy (on the underbars) but quick detection 11872 * (and value construction) for hexfp, the decimal 11873 * detection will shortly be more thorough with the 11874 * underbar checks. */ 11875 const char* h = s; 11876 significant_bits = non_zero_integer_digits * shift; 11877 #ifdef HEXFP_UQUAD 11878 hexfp_uquad = u; 11879 #else /* HEXFP_NV */ 11880 hexfp_nv = u; 11881 #endif 11882 /* Ignore the leading zero bits of 11883 * the high (first) non-zero digit. */ 11884 if (high_non_zero) { 11885 if (high_non_zero < 0x8) 11886 significant_bits--; 11887 if (high_non_zero < 0x4) 11888 significant_bits--; 11889 if (high_non_zero < 0x2) 11890 significant_bits--; 11891 } 11892 11893 if (*h == '.') { 11894 #ifdef HEXFP_NV 11895 NV nv_mult = 1.0; 11896 #endif 11897 bool accumulate = TRUE; 11898 U8 b; 11899 int lim = 1 << shift; 11900 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) || 11901 *h == '_'); h++) { 11902 if (isXDIGIT(*h)) { 11903 significant_bits += shift; 11904 #ifdef HEXFP_UQUAD 11905 if (accumulate) { 11906 if (significant_bits < NV_MANT_DIG) { 11907 /* We are in the long "run" of xdigits, 11908 * accumulate the full four bits. */ 11909 assert(shift >= 0); 11910 hexfp_uquad <<= shift; 11911 hexfp_uquad |= b; 11912 hexfp_frac_bits += shift; 11913 } else if (significant_bits - shift < NV_MANT_DIG) { 11914 /* We are at a hexdigit either at, 11915 * or straddling, the edge of mantissa. 11916 * We will try grabbing as many as 11917 * possible bits. */ 11918 int tail = 11919 significant_bits - NV_MANT_DIG; 11920 if (tail <= 0) 11921 tail += shift; 11922 assert(tail >= 0); 11923 hexfp_uquad <<= tail; 11924 assert((shift - tail) >= 0); 11925 hexfp_uquad |= b >> (shift - tail); 11926 hexfp_frac_bits += tail; 11927 11928 /* Ignore the trailing zero bits 11929 * of the last non-zero xdigit. 11930 * 11931 * The assumption here is that if 11932 * one has input of e.g. the xdigit 11933 * eight (0x8), there is only one 11934 * bit being input, not the full 11935 * four bits. Conversely, if one 11936 * specifies a zero xdigit, the 11937 * assumption is that one really 11938 * wants all those bits to be zero. */ 11939 if (b) { 11940 if ((b & 0x1) == 0x0) { 11941 significant_bits--; 11942 if ((b & 0x2) == 0x0) { 11943 significant_bits--; 11944 if ((b & 0x4) == 0x0) { 11945 significant_bits--; 11946 } 11947 } 11948 } 11949 } 11950 11951 accumulate = FALSE; 11952 } 11953 } else { 11954 /* Keep skipping the xdigits, and 11955 * accumulating the significant bits, 11956 * but do not shift the uquad 11957 * (which would catastrophically drop 11958 * high-order bits) or accumulate the 11959 * xdigits anymore. */ 11960 } 11961 #else /* HEXFP_NV */ 11962 if (accumulate) { 11963 nv_mult /= nvshift[shift]; 11964 if (nv_mult > 0.0) 11965 hexfp_nv += b * nv_mult; 11966 else 11967 accumulate = FALSE; 11968 } 11969 #endif 11970 } 11971 if (significant_bits >= NV_MANT_DIG) 11972 accumulate = FALSE; 11973 } 11974 } 11975 11976 if ((total_bits > 0 || significant_bits > 0) && 11977 isALPHA_FOLD_EQ(*h, 'p')) { 11978 bool negexp = FALSE; 11979 h++; 11980 if (*h == '+') 11981 h++; 11982 else if (*h == '-') { 11983 negexp = TRUE; 11984 h++; 11985 } 11986 if (isDIGIT(*h)) { 11987 I32 hexfp_exp = 0; 11988 while (isDIGIT(*h) || *h == '_') { 11989 if (isDIGIT(*h)) { 11990 hexfp_exp *= 10; 11991 hexfp_exp += *h - '0'; 11992 #ifdef NV_MIN_EXP 11993 if (negexp 11994 && -hexfp_exp < NV_MIN_EXP - 1) { 11995 /* NOTE: this means that the exponent 11996 * underflow warning happens for 11997 * the IEEE 754 subnormals (denormals), 11998 * because DBL_MIN_EXP etc are the lowest 11999 * possible binary (or, rather, DBL_RADIX-base) 12000 * exponent for normals, not subnormals. 12001 * 12002 * This may or may not be a good thing. */ 12003 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 12004 "Hexadecimal float: exponent underflow"); 12005 break; 12006 } 12007 #endif 12008 #ifdef NV_MAX_EXP 12009 if (!negexp 12010 && hexfp_exp > NV_MAX_EXP - 1) { 12011 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 12012 "Hexadecimal float: exponent overflow"); 12013 break; 12014 } 12015 #endif 12016 } 12017 h++; 12018 } 12019 if (negexp) 12020 hexfp_exp = -hexfp_exp; 12021 #ifdef HEXFP_UQUAD 12022 hexfp_exp -= hexfp_frac_bits; 12023 #endif 12024 hexfp_mult = Perl_pow(2.0, hexfp_exp); 12025 hexfp = TRUE; 12026 goto decimal; 12027 } 12028 } 12029 } 12030 12031 if (!just_zero && !has_digs) { 12032 /* 0x, 0o or 0b with no digits, treat it as an error. 12033 Originally this backed up the parse before the b or 12034 x, but that has the potential for silent changes in 12035 behaviour, like for: "0x.3" and "0x+$foo". 12036 */ 12037 const char *d = s; 12038 char *oldbp = PL_bufptr; 12039 if (*d) ++d; /* so the user sees the bad non-digit */ 12040 PL_bufptr = (char *)d; /* so yyerror reports the context */ 12041 yyerror(Perl_form(aTHX_ "No digits found for %s literal", 12042 bases[shift])); 12043 PL_bufptr = oldbp; 12044 } 12045 12046 if (overflowed) { 12047 if (n > 4294967295.0) 12048 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 12049 "%s number > %s non-portable", 12050 Bases[shift], 12051 new_octal ? "0o37777777777" : maxima[shift]); 12052 sv = newSVnv(n); 12053 } 12054 else { 12055 #if UVSIZE > 4 12056 if (u > 0xffffffff) 12057 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 12058 "%s number > %s non-portable", 12059 Bases[shift], 12060 new_octal ? "0o37777777777" : maxima[shift]); 12061 #endif 12062 sv = newSVuv(u); 12063 } 12064 if (just_zero && (PL_hints & HINT_NEW_INTEGER)) 12065 sv = new_constant(start, s - start, "integer", 12066 sv, NULL, NULL, 0, NULL); 12067 else if (PL_hints & HINT_NEW_BINARY) 12068 sv = new_constant(start, s - start, "binary", 12069 sv, NULL, NULL, 0, NULL); 12070 } 12071 break; 12072 12073 /* 12074 handle decimal numbers. 12075 we're also sent here when we read a 0 as the first digit 12076 */ 12077 case '1': case '2': case '3': case '4': case '5': 12078 case '6': case '7': case '8': case '9': case '.': 12079 decimal: 12080 d = PL_tokenbuf; 12081 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ 12082 floatit = FALSE; 12083 if (hexfp) { 12084 floatit = TRUE; 12085 *d++ = '0'; 12086 switch (shift) { 12087 case 4: 12088 *d++ = 'x'; 12089 s = start + 2; 12090 break; 12091 case 3: 12092 if (new_octal) { 12093 *d++ = 'o'; 12094 s = start + 2; 12095 break; 12096 } 12097 s = start + 1; 12098 break; 12099 case 1: 12100 *d++ = 'b'; 12101 s = start + 2; 12102 break; 12103 default: 12104 NOT_REACHED; /* NOTREACHED */ 12105 } 12106 } 12107 12108 /* read next group of digits and _ and copy into d */ 12109 while (isDIGIT(*s) 12110 || *s == '_' 12111 || UNLIKELY(hexfp && isXDIGIT(*s))) 12112 { 12113 /* skip underscores, checking for misplaced ones 12114 if -w is on 12115 */ 12116 if (*s == '_') { 12117 if (lastub && s == lastub + 1) 12118 WARN_ABOUT_UNDERSCORE(); 12119 lastub = s++; 12120 } 12121 else { 12122 /* check for end of fixed-length buffer */ 12123 if (d >= e) 12124 Perl_croak(aTHX_ "%s", number_too_long); 12125 /* if we're ok, copy the character */ 12126 *d++ = *s++; 12127 } 12128 } 12129 12130 /* final misplaced underbar check */ 12131 if (lastub && s == lastub + 1) 12132 WARN_ABOUT_UNDERSCORE(); 12133 12134 /* read a decimal portion if there is one. avoid 12135 3..5 being interpreted as the number 3. followed 12136 by .5 12137 */ 12138 if (*s == '.' && s[1] != '.') { 12139 floatit = TRUE; 12140 *d++ = *s++; 12141 12142 if (*s == '_') { 12143 WARN_ABOUT_UNDERSCORE(); 12144 lastub = s; 12145 } 12146 12147 /* copy, ignoring underbars, until we run out of digits. 12148 */ 12149 for (; isDIGIT(*s) 12150 || *s == '_' 12151 || UNLIKELY(hexfp && isXDIGIT(*s)); 12152 s++) 12153 { 12154 /* fixed length buffer check */ 12155 if (d >= e) 12156 Perl_croak(aTHX_ "%s", number_too_long); 12157 if (*s == '_') { 12158 if (lastub && s == lastub + 1) 12159 WARN_ABOUT_UNDERSCORE(); 12160 lastub = s; 12161 } 12162 else 12163 *d++ = *s; 12164 } 12165 /* fractional part ending in underbar? */ 12166 if (s[-1] == '_') 12167 WARN_ABOUT_UNDERSCORE(); 12168 if (*s == '.' && isDIGIT(s[1])) { 12169 /* oops, it's really a v-string, but without the "v" */ 12170 s = start; 12171 goto vstring; 12172 } 12173 } 12174 12175 /* read exponent part, if present */ 12176 if ((isALPHA_FOLD_EQ(*s, 'e') 12177 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p'))) 12178 && memCHRs("+-0123456789_", s[1])) 12179 { 12180 int exp_digits = 0; 12181 const char *save_s = s; 12182 char * save_d = d; 12183 12184 /* regardless of whether user said 3E5 or 3e5, use lower 'e', 12185 ditto for p (hexfloats) */ 12186 if ((isALPHA_FOLD_EQ(*s, 'e'))) { 12187 /* At least some Mach atof()s don't grok 'E' */ 12188 *d++ = 'e'; 12189 } 12190 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) { 12191 *d++ = 'p'; 12192 } 12193 12194 s++; 12195 12196 12197 /* stray preinitial _ */ 12198 if (*s == '_') { 12199 WARN_ABOUT_UNDERSCORE(); 12200 lastub = s++; 12201 } 12202 12203 /* allow positive or negative exponent */ 12204 if (*s == '+' || *s == '-') 12205 *d++ = *s++; 12206 12207 /* stray initial _ */ 12208 if (*s == '_') { 12209 WARN_ABOUT_UNDERSCORE(); 12210 lastub = s++; 12211 } 12212 12213 /* read digits of exponent */ 12214 while (isDIGIT(*s) || *s == '_') { 12215 if (isDIGIT(*s)) { 12216 ++exp_digits; 12217 if (d >= e) 12218 Perl_croak(aTHX_ "%s", number_too_long); 12219 *d++ = *s++; 12220 } 12221 else { 12222 if (((lastub && s == lastub + 1) 12223 || (!isDIGIT(s[1]) && s[1] != '_'))) 12224 WARN_ABOUT_UNDERSCORE(); 12225 lastub = s++; 12226 } 12227 } 12228 12229 if (!exp_digits) { 12230 /* no exponent digits, the [eEpP] could be for something else, 12231 * though in practice we don't get here for p since that's preparsed 12232 * earlier, and results in only the 0xX being consumed, so behave similarly 12233 * for decimal floats and consume only the D.DD, leaving the [eE] to the 12234 * next token. 12235 */ 12236 s = save_s; 12237 d = save_d; 12238 } 12239 else { 12240 floatit = TRUE; 12241 } 12242 } 12243 12244 12245 /* 12246 We try to do an integer conversion first if no characters 12247 indicating "float" have been found. 12248 */ 12249 12250 if (!floatit) { 12251 UV uv; 12252 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); 12253 12254 if (flags == IS_NUMBER_IN_UV) { 12255 if (uv <= IV_MAX) 12256 sv = newSViv(uv); /* Prefer IVs over UVs. */ 12257 else 12258 sv = newSVuv(uv); 12259 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) { 12260 if (uv <= (UV) IV_MIN) 12261 sv = newSViv(-(IV)uv); 12262 else 12263 floatit = TRUE; 12264 } else 12265 floatit = TRUE; 12266 } 12267 if (floatit) { 12268 /* terminate the string */ 12269 *d = '\0'; 12270 if (UNLIKELY(hexfp)) { 12271 # ifdef NV_MANT_DIG 12272 if (significant_bits > NV_MANT_DIG) 12273 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 12274 "Hexadecimal float: mantissa overflow"); 12275 # endif 12276 #ifdef HEXFP_UQUAD 12277 nv = hexfp_uquad * hexfp_mult; 12278 #else /* HEXFP_NV */ 12279 nv = hexfp_nv * hexfp_mult; 12280 #endif 12281 } else { 12282 nv = Atof(PL_tokenbuf); 12283 } 12284 sv = newSVnv(nv); 12285 } 12286 12287 if ( floatit 12288 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) { 12289 const char *const key = floatit ? "float" : "integer"; 12290 const STRLEN keylen = floatit ? 5 : 7; 12291 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf, 12292 key, keylen, sv, NULL, NULL, 0, NULL); 12293 } 12294 break; 12295 12296 /* if it starts with a v, it could be a v-string */ 12297 case 'v': 12298 vstring: 12299 sv = newSV(5); /* preallocate storage space */ 12300 ENTER_with_name("scan_vstring"); 12301 SAVEFREESV(sv); 12302 s = scan_vstring(s, PL_bufend, sv); 12303 SvREFCNT_inc_simple_void_NN(sv); 12304 LEAVE_with_name("scan_vstring"); 12305 break; 12306 } 12307 12308 /* make the op for the constant and return */ 12309 12310 if (sv) 12311 lvalp->opval = newSVOP(OP_CONST, 0, sv); 12312 else 12313 lvalp->opval = NULL; 12314 12315 return (char *)s; 12316 } 12317 12318 STATIC char * 12319 S_scan_formline(pTHX_ char *s) 12320 { 12321 SV * const stuff = newSVpvs(""); 12322 bool needargs = FALSE; 12323 bool eofmt = FALSE; 12324 12325 PERL_ARGS_ASSERT_SCAN_FORMLINE; 12326 12327 while (!needargs) { 12328 char *eol; 12329 if (*s == '.') { 12330 char *t = s+1; 12331 #ifdef PERL_STRICT_CR 12332 while (SPACE_OR_TAB(*t)) 12333 t++; 12334 #else 12335 while (SPACE_OR_TAB(*t) || *t == '\r') 12336 t++; 12337 #endif 12338 if (*t == '\n' || t == PL_bufend) { 12339 eofmt = TRUE; 12340 break; 12341 } 12342 } 12343 eol = (char *) memchr(s,'\n',PL_bufend-s); 12344 if (!eol++) 12345 eol = PL_bufend; 12346 if (*s != '#') { 12347 char *t; 12348 for (t = s; t < eol; t++) { 12349 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { 12350 needargs = FALSE; 12351 goto enough; /* ~~ must be first line in formline */ 12352 } 12353 if (*t == '@' || *t == '^') 12354 needargs = TRUE; 12355 } 12356 if (eol > s) { 12357 sv_catpvn(stuff, s, eol-s); 12358 #ifndef PERL_STRICT_CR 12359 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { 12360 char *end = SvPVX(stuff) + SvCUR(stuff); 12361 end[-2] = '\n'; 12362 end[-1] = '\0'; 12363 SvCUR_set(stuff, SvCUR(stuff) - 1); 12364 } 12365 #endif 12366 } 12367 else 12368 break; 12369 } 12370 s = (char*)eol; 12371 if ((PL_rsfp || PL_parser->filtered) 12372 && PL_parser->form_lex_state == LEX_NORMAL) { 12373 bool got_some; 12374 PL_bufptr = PL_bufend; 12375 COPLINE_INC_WITH_HERELINES; 12376 got_some = lex_next_chunk(0); 12377 CopLINE_dec(PL_curcop); 12378 s = PL_bufptr; 12379 if (!got_some) 12380 break; 12381 } 12382 incline(s, PL_bufend); 12383 } 12384 enough: 12385 if (!SvCUR(stuff) || needargs) 12386 PL_lex_state = PL_parser->form_lex_state; 12387 if (SvCUR(stuff)) { 12388 PL_expect = XSTATE; 12389 if (needargs) { 12390 const char *s2 = s; 12391 while (isSPACE(*s2) && *s2 != '\n') 12392 s2++; 12393 if (*s2 == '{') { 12394 PL_expect = XTERMBLOCK; 12395 NEXTVAL_NEXTTOKE.ival = 0; 12396 force_next(DO); 12397 } 12398 NEXTVAL_NEXTTOKE.ival = 0; 12399 force_next(FORMLBRACK); 12400 } 12401 if (!IN_BYTES) { 12402 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) 12403 SvUTF8_on(stuff); 12404 } 12405 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff); 12406 force_next(THING); 12407 } 12408 else { 12409 SvREFCNT_dec(stuff); 12410 if (eofmt) 12411 PL_lex_formbrack = 0; 12412 } 12413 return s; 12414 } 12415 12416 I32 12417 Perl_start_subparse(pTHX_ I32 is_format, U32 flags) 12418 { 12419 const I32 oldsavestack_ix = PL_savestack_ix; 12420 CV* const outsidecv = PL_compcv; 12421 12422 SAVEI32(PL_subline); 12423 save_item(PL_subname); 12424 SAVESPTR(PL_compcv); 12425 12426 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV)); 12427 CvFLAGS(PL_compcv) |= flags; 12428 12429 PL_subline = CopLINE(PL_curcop); 12430 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); 12431 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); 12432 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; 12433 if (outsidecv && CvPADLIST(outsidecv)) 12434 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id; 12435 12436 return oldsavestack_ix; 12437 } 12438 12439 12440 /* Do extra initialisation of a CV (typically one just created by 12441 * start_subparse()) if that CV is for a named sub 12442 */ 12443 12444 void 12445 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop) 12446 { 12447 PERL_ARGS_ASSERT_INIT_NAMED_CV; 12448 12449 if (nameop->op_type == OP_CONST) { 12450 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv); 12451 if ( strEQ(name, "BEGIN") 12452 || strEQ(name, "END") 12453 || strEQ(name, "INIT") 12454 || strEQ(name, "CHECK") 12455 || strEQ(name, "UNITCHECK") 12456 ) 12457 CvSPECIAL_on(cv); 12458 } 12459 else 12460 /* State subs inside anonymous subs need to be 12461 clonable themselves. */ 12462 if ( CvANON(CvOUTSIDE(cv)) 12463 || CvCLONE(CvOUTSIDE(cv)) 12464 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST( 12465 CvOUTSIDE(cv) 12466 ))[nameop->op_targ]) 12467 ) 12468 CvCLONE_on(cv); 12469 } 12470 12471 12472 static int 12473 S_yywarn(pTHX_ const char *const s, U32 flags) 12474 { 12475 PERL_ARGS_ASSERT_YYWARN; 12476 12477 PL_in_eval |= EVAL_WARNONLY; 12478 yyerror_pv(s, flags); 12479 return 0; 12480 } 12481 12482 void 12483 Perl_abort_execution(pTHX_ const char * const msg, const char * const name) 12484 { 12485 PERL_ARGS_ASSERT_ABORT_EXECUTION; 12486 12487 if (PL_minus_c) 12488 Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name); 12489 else { 12490 Perl_croak(aTHX_ 12491 "%sExecution of %s aborted due to compilation errors.\n", msg, name); 12492 } 12493 NOT_REACHED; /* NOTREACHED */ 12494 } 12495 12496 void 12497 Perl_yyquit(pTHX) 12498 { 12499 /* Called, after at least one error has been found, to abort the parse now, 12500 * instead of trying to forge ahead */ 12501 12502 yyerror_pvn(NULL, 0, 0); 12503 } 12504 12505 int 12506 Perl_yyerror(pTHX_ const char *const s) 12507 { 12508 PERL_ARGS_ASSERT_YYERROR; 12509 return yyerror_pvn(s, strlen(s), 0); 12510 } 12511 12512 int 12513 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags) 12514 { 12515 PERL_ARGS_ASSERT_YYERROR_PV; 12516 return yyerror_pvn(s, strlen(s), flags); 12517 } 12518 12519 int 12520 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) 12521 { 12522 const char *context = NULL; 12523 int contlen = -1; 12524 SV *msg; 12525 SV * const where_sv = newSVpvs_flags("", SVs_TEMP); 12526 int yychar = PL_parser->yychar; 12527 12528 /* Output error message 's' with length 'len'. 'flags' are SV flags that 12529 * apply. If the number of errors found is large enough, it abandons 12530 * parsing. If 's' is NULL, there is no message, and it abandons 12531 * processing unconditionally */ 12532 12533 if (s != NULL) { 12534 if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp)) 12535 sv_catpvs(where_sv, "at EOF"); 12536 else if ( PL_oldoldbufptr 12537 && PL_bufptr > PL_oldoldbufptr 12538 && PL_bufptr - PL_oldoldbufptr < 200 12539 && PL_oldoldbufptr != PL_oldbufptr 12540 && PL_oldbufptr != PL_bufptr) 12541 { 12542 while (isSPACE(*PL_oldoldbufptr)) 12543 PL_oldoldbufptr++; 12544 context = PL_oldoldbufptr; 12545 contlen = PL_bufptr - PL_oldoldbufptr; 12546 } 12547 else if ( PL_oldbufptr 12548 && PL_bufptr > PL_oldbufptr 12549 && PL_bufptr - PL_oldbufptr < 200 12550 && PL_oldbufptr != PL_bufptr) 12551 { 12552 while (isSPACE(*PL_oldbufptr)) 12553 PL_oldbufptr++; 12554 context = PL_oldbufptr; 12555 contlen = PL_bufptr - PL_oldbufptr; 12556 } 12557 else if (yychar > 255) 12558 sv_catpvs(where_sv, "next token ???"); 12559 else if (yychar == YYEMPTY) { 12560 if (PL_lex_state == LEX_NORMAL) 12561 sv_catpvs(where_sv, "at end of line"); 12562 else if (PL_lex_inpat) 12563 sv_catpvs(where_sv, "within pattern"); 12564 else 12565 sv_catpvs(where_sv, "within string"); 12566 } 12567 else { 12568 sv_catpvs(where_sv, "next char "); 12569 if (yychar < 32) 12570 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); 12571 else if (isPRINT_LC(yychar)) { 12572 const char string = yychar; 12573 sv_catpvn(where_sv, &string, 1); 12574 } 12575 else 12576 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); 12577 } 12578 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); 12579 Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ", 12580 OutCopFILE(PL_curcop), 12581 (IV)(PL_parser->preambling == NOLINE 12582 ? CopLINE(PL_curcop) 12583 : PL_parser->preambling)); 12584 if (context) 12585 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n", 12586 UTF8fARG(UTF, contlen, context)); 12587 else 12588 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv)); 12589 if ( PL_multi_start < PL_multi_end 12590 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) 12591 { 12592 Perl_sv_catpvf(aTHX_ msg, 12593 " (Might be a runaway multi-line %c%c string starting on" 12594 " line %" IVdf ")\n", 12595 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); 12596 PL_multi_end = 0; 12597 } 12598 if (PL_in_eval & EVAL_WARNONLY) { 12599 PL_in_eval &= ~EVAL_WARNONLY; 12600 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg)); 12601 } 12602 else { 12603 qerror(msg); 12604 } 12605 } 12606 if (s == NULL || PL_error_count >= 10) { 12607 const char * msg = ""; 12608 const char * const name = OutCopFILE(PL_curcop); 12609 12610 if (PL_in_eval) { 12611 SV * errsv = ERRSV; 12612 if (SvCUR(errsv)) { 12613 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv)); 12614 } 12615 } 12616 12617 if (s == NULL) { 12618 abort_execution(msg, name); 12619 } 12620 else { 12621 Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name); 12622 } 12623 } 12624 PL_in_my = 0; 12625 PL_in_my_stash = NULL; 12626 return 0; 12627 } 12628 12629 STATIC char* 12630 S_swallow_bom(pTHX_ U8 *s) 12631 { 12632 const STRLEN slen = SvCUR(PL_linestr); 12633 12634 PERL_ARGS_ASSERT_SWALLOW_BOM; 12635 12636 switch (s[0]) { 12637 case 0xFF: 12638 if (s[1] == 0xFE) { 12639 /* UTF-16 little-endian? (or UTF-32LE?) */ 12640 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ 12641 /* diag_listed_as: Unsupported script encoding %s */ 12642 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); 12643 #ifndef PERL_NO_UTF16_FILTER 12644 #ifdef DEBUGGING 12645 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); 12646 #endif 12647 s += 2; 12648 if (PL_bufend > (char*)s) { 12649 s = add_utf16_textfilter(s, TRUE); 12650 } 12651 #else 12652 /* diag_listed_as: Unsupported script encoding %s */ 12653 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 12654 #endif 12655 } 12656 break; 12657 case 0xFE: 12658 if (s[1] == 0xFF) { /* UTF-16 big-endian? */ 12659 #ifndef PERL_NO_UTF16_FILTER 12660 #ifdef DEBUGGING 12661 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); 12662 #endif 12663 s += 2; 12664 if (PL_bufend > (char *)s) { 12665 s = add_utf16_textfilter(s, FALSE); 12666 } 12667 #else 12668 /* diag_listed_as: Unsupported script encoding %s */ 12669 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 12670 #endif 12671 } 12672 break; 12673 case BOM_UTF8_FIRST_BYTE: { 12674 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) { 12675 #ifdef DEBUGGING 12676 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); 12677 #endif 12678 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */ 12679 } 12680 break; 12681 } 12682 case 0: 12683 if (slen > 3) { 12684 if (s[1] == 0) { 12685 if (s[2] == 0xFE && s[3] == 0xFF) { 12686 /* UTF-32 big-endian */ 12687 /* diag_listed_as: Unsupported script encoding %s */ 12688 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE"); 12689 } 12690 } 12691 else if (s[2] == 0 && s[3] != 0) { 12692 /* Leading bytes 12693 * 00 xx 00 xx 12694 * are a good indicator of UTF-16BE. */ 12695 #ifndef PERL_NO_UTF16_FILTER 12696 #ifdef DEBUGGING 12697 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); 12698 #endif 12699 s = add_utf16_textfilter(s, FALSE); 12700 #else 12701 /* diag_listed_as: Unsupported script encoding %s */ 12702 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 12703 #endif 12704 } 12705 } 12706 break; 12707 12708 default: 12709 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { 12710 /* Leading bytes 12711 * xx 00 xx 00 12712 * are a good indicator of UTF-16LE. */ 12713 #ifndef PERL_NO_UTF16_FILTER 12714 #ifdef DEBUGGING 12715 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); 12716 #endif 12717 s = add_utf16_textfilter(s, TRUE); 12718 #else 12719 /* diag_listed_as: Unsupported script encoding %s */ 12720 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 12721 #endif 12722 } 12723 } 12724 return (char*)s; 12725 } 12726 12727 12728 #ifndef PERL_NO_UTF16_FILTER 12729 static I32 12730 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) 12731 { 12732 SV *const filter = FILTER_DATA(idx); 12733 /* We re-use this each time round, throwing the contents away before we 12734 return. */ 12735 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter)); 12736 SV *const utf8_buffer = filter; 12737 IV status = IoPAGE(filter); 12738 const bool reverse = cBOOL(IoLINES(filter)); 12739 I32 retval; 12740 12741 PERL_ARGS_ASSERT_UTF16_TEXTFILTER; 12742 12743 /* As we're automatically added, at the lowest level, and hence only called 12744 from this file, we can be sure that we're not called in block mode. Hence 12745 don't bother writing code to deal with block mode. */ 12746 if (maxlen) { 12747 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); 12748 } 12749 if (status < 0) { 12750 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status); 12751 } 12752 DEBUG_P(PerlIO_printf(Perl_debug_log, 12753 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n", 12754 FPTR2DPTR(void *, S_utf16_textfilter), 12755 reverse ? 'l' : 'b', idx, maxlen, status, 12756 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 12757 12758 while (1) { 12759 STRLEN chars; 12760 STRLEN have; 12761 Size_t newlen; 12762 U8 *end; 12763 /* First, look in our buffer of existing UTF-8 data: */ 12764 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); 12765 12766 if (nl) { 12767 ++nl; 12768 } else if (status == 0) { 12769 /* EOF */ 12770 IoPAGE(filter) = 0; 12771 nl = SvEND(utf8_buffer); 12772 } 12773 if (nl) { 12774 STRLEN got = nl - SvPVX(utf8_buffer); 12775 /* Did we have anything to append? */ 12776 retval = got != 0; 12777 sv_catpvn(sv, SvPVX(utf8_buffer), got); 12778 /* Everything else in this code works just fine if SVp_POK isn't 12779 set. This, however, needs it, and we need it to work, else 12780 we loop infinitely because the buffer is never consumed. */ 12781 sv_chop(utf8_buffer, nl); 12782 break; 12783 } 12784 12785 /* OK, not a complete line there, so need to read some more UTF-16. 12786 Read an extra octect if the buffer currently has an odd number. */ 12787 while (1) { 12788 if (status <= 0) 12789 break; 12790 if (SvCUR(utf16_buffer) >= 2) { 12791 /* Location of the high octet of the last complete code point. 12792 Gosh, UTF-16 is a pain. All the benefits of variable length, 12793 *coupled* with all the benefits of partial reads and 12794 endianness. */ 12795 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) 12796 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); 12797 12798 if (*last_hi < 0xd8 || *last_hi > 0xdb) { 12799 break; 12800 } 12801 12802 /* We have the first half of a surrogate. Read more. */ 12803 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); 12804 } 12805 12806 status = FILTER_READ(idx + 1, utf16_buffer, 12807 160 + (SvCUR(utf16_buffer) & 1)); 12808 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer))); 12809 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); 12810 if (status < 0) { 12811 /* Error */ 12812 IoPAGE(filter) = status; 12813 return status; 12814 } 12815 } 12816 12817 /* 'chars' isn't quite the right name, as code points above 0xFFFF 12818 * require 4 bytes per char */ 12819 chars = SvCUR(utf16_buffer) >> 1; 12820 have = SvCUR(utf8_buffer); 12821 12822 /* Assume the worst case size as noted by the functions: twice the 12823 * number of input bytes */ 12824 SvGROW(utf8_buffer, have + chars * 4 + 1); 12825 12826 if (reverse) { 12827 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), 12828 (U8*)SvPVX_const(utf8_buffer) + have, 12829 chars * 2, &newlen); 12830 } else { 12831 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), 12832 (U8*)SvPVX_const(utf8_buffer) + have, 12833 chars * 2, &newlen); 12834 } 12835 SvCUR_set(utf8_buffer, have + newlen); 12836 *end = '\0'; 12837 12838 /* No need to keep this SV "well-formed" with a '\0' after the end, as 12839 it's private to us, and utf16_to_utf8{,reversed} take a 12840 (pointer,length) pair, rather than a NUL-terminated string. */ 12841 if(SvCUR(utf16_buffer) & 1) { 12842 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; 12843 SvCUR_set(utf16_buffer, 1); 12844 } else { 12845 SvCUR_set(utf16_buffer, 0); 12846 } 12847 } 12848 DEBUG_P(PerlIO_printf(Perl_debug_log, 12849 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n", 12850 status, 12851 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 12852 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);}); 12853 return retval; 12854 } 12855 12856 static U8 * 12857 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) 12858 { 12859 SV *filter = filter_add(S_utf16_textfilter, NULL); 12860 12861 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER; 12862 12863 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s)); 12864 SvPVCLEAR(filter); 12865 IoLINES(filter) = reversed; 12866 IoPAGE(filter) = 1; /* Not EOF */ 12867 12868 /* Sadly, we have to return a valid pointer, come what may, so we have to 12869 ignore any error return from this. */ 12870 SvCUR_set(PL_linestr, 0); 12871 if (FILTER_READ(0, PL_linestr, 0)) { 12872 SvUTF8_on(PL_linestr); 12873 } else { 12874 SvUTF8_on(PL_linestr); 12875 } 12876 PL_bufend = SvEND(PL_linestr); 12877 return (U8*)SvPVX(PL_linestr); 12878 } 12879 #endif 12880 12881 /* 12882 =for apidoc scan_vstring 12883 12884 Returns a pointer to the next character after the parsed 12885 vstring, as well as updating the passed in sv. 12886 12887 Function must be called like 12888 12889 sv = sv_2mortal(newSV(5)); 12890 s = scan_vstring(s,e,sv); 12891 12892 where s and e are the start and end of the string. 12893 The sv should already be large enough to store the vstring 12894 passed in, for performance reasons. 12895 12896 This function may croak if fatal warnings are enabled in the 12897 calling scope, hence the sv_2mortal in the example (to prevent 12898 a leak). Make sure to do SvREFCNT_inc afterwards if you use 12899 sv_2mortal. 12900 12901 =cut 12902 */ 12903 12904 char * 12905 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) 12906 { 12907 const char *pos = s; 12908 const char *start = s; 12909 12910 PERL_ARGS_ASSERT_SCAN_VSTRING; 12911 12912 if (*pos == 'v') pos++; /* get past 'v' */ 12913 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 12914 pos++; 12915 if ( *pos != '.') { 12916 /* this may not be a v-string if followed by => */ 12917 const char *next = pos; 12918 while (next < e && isSPACE(*next)) 12919 ++next; 12920 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) { 12921 /* return string not v-string */ 12922 sv_setpvn(sv,(char *)s,pos-s); 12923 return (char *)pos; 12924 } 12925 } 12926 12927 if (!isALPHA(*pos)) { 12928 U8 tmpbuf[UTF8_MAXBYTES+1]; 12929 12930 if (*s == 'v') 12931 s++; /* get past 'v' */ 12932 12933 SvPVCLEAR(sv); 12934 12935 for (;;) { 12936 /* this is atoi() that tolerates underscores */ 12937 U8 *tmpend; 12938 UV rev = 0; 12939 const char *end = pos; 12940 UV mult = 1; 12941 while (--end >= s) { 12942 if (*end != '_') { 12943 const UV orev = rev; 12944 rev += (*end - '0') * mult; 12945 mult *= 10; 12946 if (orev > rev) 12947 /* diag_listed_as: Integer overflow in %s number */ 12948 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 12949 "Integer overflow in decimal number"); 12950 } 12951 } 12952 12953 /* Append native character for the rev point */ 12954 tmpend = uvchr_to_utf8(tmpbuf, rev); 12955 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); 12956 if (!UVCHR_IS_INVARIANT(rev)) 12957 SvUTF8_on(sv); 12958 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1])) 12959 s = ++pos; 12960 else { 12961 s = pos; 12962 break; 12963 } 12964 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 12965 pos++; 12966 } 12967 SvPOK_on(sv); 12968 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start); 12969 SvRMAGICAL_on(sv); 12970 } 12971 return (char *)s; 12972 } 12973 12974 int 12975 Perl_keyword_plugin_standard(pTHX_ 12976 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) 12977 { 12978 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD; 12979 PERL_UNUSED_CONTEXT; 12980 PERL_UNUSED_ARG(keyword_ptr); 12981 PERL_UNUSED_ARG(keyword_len); 12982 PERL_UNUSED_ARG(op_ptr); 12983 return KEYWORD_PLUGIN_DECLINE; 12984 } 12985 12986 /* 12987 =for apidoc_section $lexer 12988 =for apidoc wrap_keyword_plugin 12989 12990 Puts a C function into the chain of keyword plugins. This is the 12991 preferred way to manipulate the L</PL_keyword_plugin> variable. 12992 C<new_plugin> is a pointer to the C function that is to be added to the 12993 keyword plugin chain, and C<old_plugin_p> points to the storage location 12994 where a pointer to the next function in the chain will be stored. The 12995 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable, 12996 while the value previously stored there is written to C<*old_plugin_p>. 12997 12998 L</PL_keyword_plugin> is global to an entire process, and a module wishing 12999 to hook keyword parsing may find itself invoked more than once per 13000 process, typically in different threads. To handle that situation, this 13001 function is idempotent. The location C<*old_plugin_p> must initially 13002 (once per process) contain a null pointer. A C variable of static 13003 duration (declared at file scope, typically also marked C<static> to give 13004 it internal linkage) will be implicitly initialised appropriately, if it 13005 does not have an explicit initialiser. This function will only actually 13006 modify the plugin chain if it finds C<*old_plugin_p> to be null. This 13007 function is also thread safe on the small scale. It uses appropriate 13008 locking to avoid race conditions in accessing L</PL_keyword_plugin>. 13009 13010 When this function is called, the function referenced by C<new_plugin> 13011 must be ready to be called, except for C<*old_plugin_p> being unfilled. 13012 In a threading situation, C<new_plugin> may be called immediately, even 13013 before this function has returned. C<*old_plugin_p> will always be 13014 appropriately set before C<new_plugin> is called. If C<new_plugin> 13015 decides not to do anything special with the identifier that it is given 13016 (which is the usual case for most calls to a keyword plugin), it must 13017 chain the plugin function referenced by C<*old_plugin_p>. 13018 13019 Taken all together, XS code to install a keyword plugin should typically 13020 look something like this: 13021 13022 static Perl_keyword_plugin_t next_keyword_plugin; 13023 static OP *my_keyword_plugin(pTHX_ 13024 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) 13025 { 13026 if (memEQs(keyword_ptr, keyword_len, 13027 "my_new_keyword")) { 13028 ... 13029 } else { 13030 return next_keyword_plugin(aTHX_ 13031 keyword_ptr, keyword_len, op_ptr); 13032 } 13033 } 13034 BOOT: 13035 wrap_keyword_plugin(my_keyword_plugin, 13036 &next_keyword_plugin); 13037 13038 Direct access to L</PL_keyword_plugin> should be avoided. 13039 13040 =cut 13041 */ 13042 13043 void 13044 Perl_wrap_keyword_plugin(pTHX_ 13045 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p) 13046 { 13047 13048 PERL_UNUSED_CONTEXT; 13049 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN; 13050 if (*old_plugin_p) return; 13051 KEYWORD_PLUGIN_MUTEX_LOCK; 13052 if (!*old_plugin_p) { 13053 *old_plugin_p = PL_keyword_plugin; 13054 PL_keyword_plugin = new_plugin; 13055 } 13056 KEYWORD_PLUGIN_MUTEX_UNLOCK; 13057 } 13058 13059 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p) 13060 static void 13061 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof) 13062 { 13063 SAVEI32(PL_lex_brackets); 13064 if (PL_lex_brackets > 100) 13065 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 13066 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF; 13067 SAVEI32(PL_lex_allbrackets); 13068 PL_lex_allbrackets = 0; 13069 SAVEI8(PL_lex_fakeeof); 13070 PL_lex_fakeeof = (U8)fakeeof; 13071 if(yyparse(gramtype) && !PL_parser->error_count) 13072 qerror(Perl_mess(aTHX_ "Parse error")); 13073 } 13074 13075 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p) 13076 static OP * 13077 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof) 13078 { 13079 OP *o; 13080 ENTER; 13081 SAVEVPTR(PL_eval_root); 13082 PL_eval_root = NULL; 13083 parse_recdescent(gramtype, fakeeof); 13084 o = PL_eval_root; 13085 LEAVE; 13086 return o; 13087 } 13088 13089 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f) 13090 static OP * 13091 S_parse_expr(pTHX_ I32 fakeeof, U32 flags) 13092 { 13093 OP *exprop; 13094 if (flags & ~PARSE_OPTIONAL) 13095 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr"); 13096 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof); 13097 if (!exprop && !(flags & PARSE_OPTIONAL)) { 13098 if (!PL_parser->error_count) 13099 qerror(Perl_mess(aTHX_ "Parse error")); 13100 exprop = newOP(OP_NULL, 0); 13101 } 13102 return exprop; 13103 } 13104 13105 /* 13106 =for apidoc parse_arithexpr 13107 13108 Parse a Perl arithmetic expression. This may contain operators of precedence 13109 down to the bit shift operators. The expression must be followed (and thus 13110 terminated) either by a comparison or lower-precedence operator or by 13111 something that would normally terminate an expression such as semicolon. 13112 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 13113 otherwise it is mandatory. It is up to the caller to ensure that the 13114 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 13115 the source of the code to be parsed and the lexical context for the 13116 expression. 13117 13118 The op tree representing the expression is returned. If an optional 13119 expression is absent, a null pointer is returned, otherwise the pointer 13120 will be non-null. 13121 13122 If an error occurs in parsing or compilation, in most cases a valid op 13123 tree is returned anyway. The error is reflected in the parser state, 13124 normally resulting in a single exception at the top level of parsing 13125 which covers all the compilation errors that occurred. Some compilation 13126 errors, however, will throw an exception immediately. 13127 13128 =for apidoc Amnh||PARSE_OPTIONAL 13129 13130 =cut 13131 13132 */ 13133 13134 OP * 13135 Perl_parse_arithexpr(pTHX_ U32 flags) 13136 { 13137 return parse_expr(LEX_FAKEEOF_COMPARE, flags); 13138 } 13139 13140 /* 13141 =for apidoc parse_termexpr 13142 13143 Parse a Perl term expression. This may contain operators of precedence 13144 down to the assignment operators. The expression must be followed (and thus 13145 terminated) either by a comma or lower-precedence operator or by 13146 something that would normally terminate an expression such as semicolon. 13147 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 13148 otherwise it is mandatory. It is up to the caller to ensure that the 13149 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 13150 the source of the code to be parsed and the lexical context for the 13151 expression. 13152 13153 The op tree representing the expression is returned. If an optional 13154 expression is absent, a null pointer is returned, otherwise the pointer 13155 will be non-null. 13156 13157 If an error occurs in parsing or compilation, in most cases a valid op 13158 tree is returned anyway. The error is reflected in the parser state, 13159 normally resulting in a single exception at the top level of parsing 13160 which covers all the compilation errors that occurred. Some compilation 13161 errors, however, will throw an exception immediately. 13162 13163 =cut 13164 */ 13165 13166 OP * 13167 Perl_parse_termexpr(pTHX_ U32 flags) 13168 { 13169 return parse_expr(LEX_FAKEEOF_COMMA, flags); 13170 } 13171 13172 /* 13173 =for apidoc parse_listexpr 13174 13175 Parse a Perl list expression. This may contain operators of precedence 13176 down to the comma operator. The expression must be followed (and thus 13177 terminated) either by a low-precedence logic operator such as C<or> or by 13178 something that would normally terminate an expression such as semicolon. 13179 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 13180 otherwise it is mandatory. It is up to the caller to ensure that the 13181 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 13182 the source of the code to be parsed and the lexical context for the 13183 expression. 13184 13185 The op tree representing the expression is returned. If an optional 13186 expression is absent, a null pointer is returned, otherwise the pointer 13187 will be non-null. 13188 13189 If an error occurs in parsing or compilation, in most cases a valid op 13190 tree is returned anyway. The error is reflected in the parser state, 13191 normally resulting in a single exception at the top level of parsing 13192 which covers all the compilation errors that occurred. Some compilation 13193 errors, however, will throw an exception immediately. 13194 13195 =cut 13196 */ 13197 13198 OP * 13199 Perl_parse_listexpr(pTHX_ U32 flags) 13200 { 13201 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags); 13202 } 13203 13204 /* 13205 =for apidoc parse_fullexpr 13206 13207 Parse a single complete Perl expression. This allows the full 13208 expression grammar, including the lowest-precedence operators such 13209 as C<or>. The expression must be followed (and thus terminated) by a 13210 token that an expression would normally be terminated by: end-of-file, 13211 closing bracketing punctuation, semicolon, or one of the keywords that 13212 signals a postfix expression-statement modifier. If C<flags> has the 13213 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is 13214 mandatory. It is up to the caller to ensure that the dynamic parser 13215 state (L</PL_parser> et al) is correctly set to reflect the source of 13216 the code to be parsed and the lexical context for the expression. 13217 13218 The op tree representing the expression is returned. If an optional 13219 expression is absent, a null pointer is returned, otherwise the pointer 13220 will be non-null. 13221 13222 If an error occurs in parsing or compilation, in most cases a valid op 13223 tree is returned anyway. The error is reflected in the parser state, 13224 normally resulting in a single exception at the top level of parsing 13225 which covers all the compilation errors that occurred. Some compilation 13226 errors, however, will throw an exception immediately. 13227 13228 =cut 13229 */ 13230 13231 OP * 13232 Perl_parse_fullexpr(pTHX_ U32 flags) 13233 { 13234 return parse_expr(LEX_FAKEEOF_NONEXPR, flags); 13235 } 13236 13237 /* 13238 =for apidoc parse_block 13239 13240 Parse a single complete Perl code block. This consists of an opening 13241 brace, a sequence of statements, and a closing brace. The block 13242 constitutes a lexical scope, so C<my> variables and various compile-time 13243 effects can be contained within it. It is up to the caller to ensure 13244 that the dynamic parser state (L</PL_parser> et al) is correctly set to 13245 reflect the source of the code to be parsed and the lexical context for 13246 the statement. 13247 13248 The op tree representing the code block is returned. This is always a 13249 real op, never a null pointer. It will normally be a C<lineseq> list, 13250 including C<nextstate> or equivalent ops. No ops to construct any kind 13251 of runtime scope are included by virtue of it being a block. 13252 13253 If an error occurs in parsing or compilation, in most cases a valid op 13254 tree (most likely null) is returned anyway. The error is reflected in 13255 the parser state, normally resulting in a single exception at the top 13256 level of parsing which covers all the compilation errors that occurred. 13257 Some compilation errors, however, will throw an exception immediately. 13258 13259 The C<flags> parameter is reserved for future use, and must always 13260 be zero. 13261 13262 =cut 13263 */ 13264 13265 OP * 13266 Perl_parse_block(pTHX_ U32 flags) 13267 { 13268 if (flags) 13269 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block"); 13270 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER); 13271 } 13272 13273 /* 13274 =for apidoc parse_barestmt 13275 13276 Parse a single unadorned Perl statement. This may be a normal imperative 13277 statement or a declaration that has compile-time effect. It does not 13278 include any label or other affixture. It is up to the caller to ensure 13279 that the dynamic parser state (L</PL_parser> et al) is correctly set to 13280 reflect the source of the code to be parsed and the lexical context for 13281 the statement. 13282 13283 The op tree representing the statement is returned. This may be a 13284 null pointer if the statement is null, for example if it was actually 13285 a subroutine definition (which has compile-time side effects). If not 13286 null, it will be ops directly implementing the statement, suitable to 13287 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or 13288 equivalent op (except for those embedded in a scope contained entirely 13289 within the statement). 13290 13291 If an error occurs in parsing or compilation, in most cases a valid op 13292 tree (most likely null) is returned anyway. The error is reflected in 13293 the parser state, normally resulting in a single exception at the top 13294 level of parsing which covers all the compilation errors that occurred. 13295 Some compilation errors, however, will throw an exception immediately. 13296 13297 The C<flags> parameter is reserved for future use, and must always 13298 be zero. 13299 13300 =cut 13301 */ 13302 13303 OP * 13304 Perl_parse_barestmt(pTHX_ U32 flags) 13305 { 13306 if (flags) 13307 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt"); 13308 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER); 13309 } 13310 13311 /* 13312 =for apidoc parse_label 13313 13314 Parse a single label, possibly optional, of the type that may prefix a 13315 Perl statement. It is up to the caller to ensure that the dynamic parser 13316 state (L</PL_parser> et al) is correctly set to reflect the source of 13317 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the 13318 label is optional, otherwise it is mandatory. 13319 13320 The name of the label is returned in the form of a fresh scalar. If an 13321 optional label is absent, a null pointer is returned. 13322 13323 If an error occurs in parsing, which can only occur if the label is 13324 mandatory, a valid label is returned anyway. The error is reflected in 13325 the parser state, normally resulting in a single exception at the top 13326 level of parsing which covers all the compilation errors that occurred. 13327 13328 =cut 13329 */ 13330 13331 SV * 13332 Perl_parse_label(pTHX_ U32 flags) 13333 { 13334 if (flags & ~PARSE_OPTIONAL) 13335 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label"); 13336 if (PL_nexttoke) { 13337 PL_parser->yychar = yylex(); 13338 if (PL_parser->yychar == LABEL) { 13339 SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv; 13340 PL_parser->yychar = YYEMPTY; 13341 cSVOPx(pl_yylval.opval)->op_sv = NULL; 13342 op_free(pl_yylval.opval); 13343 return labelsv; 13344 } else { 13345 yyunlex(); 13346 goto no_label; 13347 } 13348 } else { 13349 char *s, *t; 13350 STRLEN wlen, bufptr_pos; 13351 lex_read_space(0); 13352 t = s = PL_bufptr; 13353 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) 13354 goto no_label; 13355 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen); 13356 if (word_takes_any_delimiter(s, wlen)) 13357 goto no_label; 13358 bufptr_pos = s - SvPVX(PL_linestr); 13359 PL_bufptr = t; 13360 lex_read_space(LEX_KEEP_PREVIOUS); 13361 t = PL_bufptr; 13362 s = SvPVX(PL_linestr) + bufptr_pos; 13363 if (t[0] == ':' && t[1] != ':') { 13364 PL_oldoldbufptr = PL_oldbufptr; 13365 PL_oldbufptr = s; 13366 PL_bufptr = t+1; 13367 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0); 13368 } else { 13369 PL_bufptr = s; 13370 no_label: 13371 if (flags & PARSE_OPTIONAL) { 13372 return NULL; 13373 } else { 13374 qerror(Perl_mess(aTHX_ "Parse error")); 13375 return newSVpvs("x"); 13376 } 13377 } 13378 } 13379 } 13380 13381 /* 13382 =for apidoc parse_fullstmt 13383 13384 Parse a single complete Perl statement. This may be a normal imperative 13385 statement or a declaration that has compile-time effect, and may include 13386 optional labels. It is up to the caller to ensure that the dynamic 13387 parser state (L</PL_parser> et al) is correctly set to reflect the source 13388 of the code to be parsed and the lexical context for the statement. 13389 13390 The op tree representing the statement is returned. This may be a 13391 null pointer if the statement is null, for example if it was actually 13392 a subroutine definition (which has compile-time side effects). If not 13393 null, it will be the result of a L</newSTATEOP> call, normally including 13394 a C<nextstate> or equivalent op. 13395 13396 If an error occurs in parsing or compilation, in most cases a valid op 13397 tree (most likely null) is returned anyway. The error is reflected in 13398 the parser state, normally resulting in a single exception at the top 13399 level of parsing which covers all the compilation errors that occurred. 13400 Some compilation errors, however, will throw an exception immediately. 13401 13402 The C<flags> parameter is reserved for future use, and must always 13403 be zero. 13404 13405 =cut 13406 */ 13407 13408 OP * 13409 Perl_parse_fullstmt(pTHX_ U32 flags) 13410 { 13411 if (flags) 13412 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt"); 13413 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER); 13414 } 13415 13416 /* 13417 =for apidoc parse_stmtseq 13418 13419 Parse a sequence of zero or more Perl statements. These may be normal 13420 imperative statements, including optional labels, or declarations 13421 that have compile-time effect, or any mixture thereof. The statement 13422 sequence ends when a closing brace or end-of-file is encountered in a 13423 place where a new statement could have validly started. It is up to 13424 the caller to ensure that the dynamic parser state (L</PL_parser> et al) 13425 is correctly set to reflect the source of the code to be parsed and the 13426 lexical context for the statements. 13427 13428 The op tree representing the statement sequence is returned. This may 13429 be a null pointer if the statements were all null, for example if there 13430 were no statements or if there were only subroutine definitions (which 13431 have compile-time side effects). If not null, it will be a C<lineseq> 13432 list, normally including C<nextstate> or equivalent ops. 13433 13434 If an error occurs in parsing or compilation, in most cases a valid op 13435 tree is returned anyway. The error is reflected in the parser state, 13436 normally resulting in a single exception at the top level of parsing 13437 which covers all the compilation errors that occurred. Some compilation 13438 errors, however, will throw an exception immediately. 13439 13440 The C<flags> parameter is reserved for future use, and must always 13441 be zero. 13442 13443 =cut 13444 */ 13445 13446 OP * 13447 Perl_parse_stmtseq(pTHX_ U32 flags) 13448 { 13449 OP *stmtseqop; 13450 I32 c; 13451 if (flags) 13452 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq"); 13453 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING); 13454 c = lex_peek_unichar(0); 13455 if (c != -1 && c != /*{*/'}') 13456 qerror(Perl_mess(aTHX_ "Parse error")); 13457 return stmtseqop; 13458 } 13459 13460 /* 13461 =for apidoc parse_subsignature 13462 13463 Parse a subroutine signature declaration. This is the contents of the 13464 parentheses following a named or anonymous subroutine declaration when the 13465 C<signatures> feature is enabled. Note that this function neither expects 13466 nor consumes the opening and closing parentheses around the signature; it 13467 is the caller's job to handle these. 13468 13469 This function must only be called during parsing of a subroutine; after 13470 L</start_subparse> has been called. It might allocate lexical variables on 13471 the pad for the current subroutine. 13472 13473 The op tree to unpack the arguments from the stack at runtime is returned. 13474 This op tree should appear at the beginning of the compiled function. The 13475 caller may wish to use L</op_append_list> to build their function body 13476 after it, or splice it together with the body before calling L</newATTRSUB>. 13477 13478 The C<flags> parameter is reserved for future use, and must always 13479 be zero. 13480 13481 =cut 13482 */ 13483 13484 OP * 13485 Perl_parse_subsignature(pTHX_ U32 flags) 13486 { 13487 if (flags) 13488 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature"); 13489 return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR); 13490 } 13491 13492 /* 13493 * ex: set ts=8 sts=4 sw=4 et: 13494 */ 13495