1 /* toke.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'It all comes from here, the stench and the peril.' --Frodo 13 * 14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"] 15 */ 16 17 /* 18 * This file is the lexer for Perl. It's closely linked to the 19 * parser, perly.y. 20 * 21 * The main routine is yylex(), which returns the next token. 22 */ 23 24 /* 25 =head1 Lexer interface 26 27 This is the lower layer of the Perl parser, managing characters and tokens. 28 29 =for apidoc AmU|yy_parser *|PL_parser 30 31 Pointer to a structure encapsulating the state of the parsing operation 32 currently in progress. The pointer can be locally changed to perform 33 a nested parse without interfering with the state of an outer parse. 34 Individual members of C<PL_parser> have their own documentation. 35 36 =cut 37 */ 38 39 #include "EXTERN.h" 40 #define PERL_IN_TOKE_C 41 #include "perl.h" 42 #include "dquote_static.c" 43 44 #define new_constant(a,b,c,d,e,f,g) \ 45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g) 46 47 #define pl_yylval (PL_parser->yylval) 48 49 /* XXX temporary backwards compatibility */ 50 #define PL_lex_brackets (PL_parser->lex_brackets) 51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets) 52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof) 53 #define PL_lex_brackstack (PL_parser->lex_brackstack) 54 #define PL_lex_casemods (PL_parser->lex_casemods) 55 #define PL_lex_casestack (PL_parser->lex_casestack) 56 #define PL_lex_defer (PL_parser->lex_defer) 57 #define PL_lex_dojoin (PL_parser->lex_dojoin) 58 #define PL_lex_expect (PL_parser->lex_expect) 59 #define PL_lex_formbrack (PL_parser->lex_formbrack) 60 #define PL_lex_inpat (PL_parser->lex_inpat) 61 #define PL_lex_inwhat (PL_parser->lex_inwhat) 62 #define PL_lex_op (PL_parser->lex_op) 63 #define PL_lex_repl (PL_parser->lex_repl) 64 #define PL_lex_starts (PL_parser->lex_starts) 65 #define PL_lex_stuff (PL_parser->lex_stuff) 66 #define PL_multi_start (PL_parser->multi_start) 67 #define PL_multi_open (PL_parser->multi_open) 68 #define PL_multi_close (PL_parser->multi_close) 69 #define PL_preambled (PL_parser->preambled) 70 #define PL_sublex_info (PL_parser->sublex_info) 71 #define PL_linestr (PL_parser->linestr) 72 #define PL_expect (PL_parser->expect) 73 #define PL_copline (PL_parser->copline) 74 #define PL_bufptr (PL_parser->bufptr) 75 #define PL_oldbufptr (PL_parser->oldbufptr) 76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr) 77 #define PL_linestart (PL_parser->linestart) 78 #define PL_bufend (PL_parser->bufend) 79 #define PL_last_uni (PL_parser->last_uni) 80 #define PL_last_lop (PL_parser->last_lop) 81 #define PL_last_lop_op (PL_parser->last_lop_op) 82 #define PL_lex_state (PL_parser->lex_state) 83 #define PL_rsfp (PL_parser->rsfp) 84 #define PL_rsfp_filters (PL_parser->rsfp_filters) 85 #define PL_in_my (PL_parser->in_my) 86 #define PL_in_my_stash (PL_parser->in_my_stash) 87 #define PL_tokenbuf (PL_parser->tokenbuf) 88 #define PL_multi_end (PL_parser->multi_end) 89 #define PL_error_count (PL_parser->error_count) 90 91 #ifdef PERL_MAD 92 # define PL_endwhite (PL_parser->endwhite) 93 # define PL_faketokens (PL_parser->faketokens) 94 # define PL_lasttoke (PL_parser->lasttoke) 95 # define PL_nextwhite (PL_parser->nextwhite) 96 # define PL_realtokenstart (PL_parser->realtokenstart) 97 # define PL_skipwhite (PL_parser->skipwhite) 98 # define PL_thisclose (PL_parser->thisclose) 99 # define PL_thismad (PL_parser->thismad) 100 # define PL_thisopen (PL_parser->thisopen) 101 # define PL_thisstuff (PL_parser->thisstuff) 102 # define PL_thistoken (PL_parser->thistoken) 103 # define PL_thiswhite (PL_parser->thiswhite) 104 # define PL_thiswhite (PL_parser->thiswhite) 105 # define PL_nexttoke (PL_parser->nexttoke) 106 # define PL_curforce (PL_parser->curforce) 107 #else 108 # define PL_nexttoke (PL_parser->nexttoke) 109 # define PL_nexttype (PL_parser->nexttype) 110 # define PL_nextval (PL_parser->nextval) 111 #endif 112 113 static const char* const ident_too_long = "Identifier too long"; 114 115 #ifdef PERL_MAD 116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; } 117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val 118 #else 119 # define CURMAD(slot,sv) 120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke] 121 #endif 122 123 #define XENUMMASK 0x3f 124 #define XFAKEEOF 0x40 125 #define XFAKEBRACK 0x80 126 127 #ifdef USE_UTF8_SCRIPTS 128 # define UTF (!IN_BYTES) 129 #else 130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) 131 #endif 132 133 /* The maximum number of characters preceding the unrecognized one to display */ 134 #define UNRECOGNIZED_PRECEDE_COUNT 10 135 136 /* In variables named $^X, these are the legal values for X. 137 * 1999-02-27 mjd-perl-patch@plover.com */ 138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) 139 140 #define SPACE_OR_TAB(c) isBLANK_A(c) 141 142 /* LEX_* are values for PL_lex_state, the state of the lexer. 143 * They are arranged oddly so that the guard on the switch statement 144 * can get by with a single comparison (if the compiler is smart enough). 145 * 146 * These values refer to the various states within a sublex parse, 147 * i.e. within a double quotish string 148 */ 149 150 /* #define LEX_NOTPARSING 11 is done in perl.h. */ 151 152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */ 153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */ 154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */ 155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */ 156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */ 157 158 /* at end of code, eg "$x" followed by: */ 159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */ 160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */ 161 162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of 163 string or after \E, $foo, etc */ 164 #define LEX_INTERPCONST 2 /* NOT USED */ 165 #define LEX_FORMLINE 1 /* expecting a format line */ 166 #define LEX_KNOWNEXT 0 /* next token known; just return it */ 167 168 169 #ifdef DEBUGGING 170 static const char* const lex_state_names[] = { 171 "KNOWNEXT", 172 "FORMLINE", 173 "INTERPCONST", 174 "INTERPCONCAT", 175 "INTERPENDMAYBE", 176 "INTERPEND", 177 "INTERPSTART", 178 "INTERPPUSH", 179 "INTERPCASEMOD", 180 "INTERPNORMAL", 181 "NORMAL" 182 }; 183 #endif 184 185 #include "keywords.h" 186 187 /* CLINE is a macro that ensures PL_copline has a sane value */ 188 189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) 190 191 #ifdef PERL_MAD 192 # define SKIPSPACE0(s) skipspace0(s) 193 # define SKIPSPACE1(s) skipspace1(s) 194 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv) 195 # define PEEKSPACE(s) skipspace2(s,0) 196 #else 197 # define SKIPSPACE0(s) skipspace(s) 198 # define SKIPSPACE1(s) skipspace(s) 199 # define SKIPSPACE2(s,tsv) skipspace(s) 200 # define PEEKSPACE(s) skipspace(s) 201 #endif 202 203 /* 204 * Convenience functions to return different tokens and prime the 205 * lexer for the next token. They all take an argument. 206 * 207 * TOKEN : generic token (used for '(', DOLSHARP, etc) 208 * OPERATOR : generic operator 209 * AOPERATOR : assignment operator 210 * PREBLOCK : beginning the block after an if, while, foreach, ... 211 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref) 212 * PREREF : *EXPR where EXPR is not a simple identifier 213 * TERM : expression term 214 * POSTDEREF : postfix dereference (->$* ->@[...] etc.) 215 * LOOPX : loop exiting command (goto, last, dump, etc) 216 * FTST : file test operator 217 * FUN0 : zero-argument function 218 * FUN0OP : zero-argument function, with its op created in this file 219 * FUN1 : not used, except for not, which isn't a UNIOP 220 * BOop : bitwise or or xor 221 * BAop : bitwise and 222 * SHop : shift operator 223 * PWop : power operator 224 * PMop : pattern-matching operator 225 * Aop : addition-level operator 226 * Mop : multiplication-level operator 227 * Eop : equality-testing operator 228 * Rop : relational operator <= != gt 229 * 230 * Also see LOP and lop() below. 231 */ 232 233 #ifdef DEBUGGING /* Serve -DT. */ 234 # define REPORT(retval) tokereport((I32)retval, &pl_yylval) 235 #else 236 # define REPORT(retval) (retval) 237 #endif 238 239 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval)) 240 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval)) 241 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval))) 242 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval)) 243 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval)) 244 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) 245 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) 246 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1])) 247 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX)) 248 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) 249 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) 250 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) 251 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1)) 252 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP))) 253 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP))) 254 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP))) 255 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP))) 256 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) 257 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP))) 258 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP))) 259 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP)) 260 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP)) 261 262 /* This bit of chicanery makes a unary function followed by 263 * a parenthesis into a function with one argument, highest precedence. 264 * The UNIDOR macro is for unary functions that can be followed by the // 265 * operator (such as C<shift // 0>). 266 */ 267 #define UNI3(f,x,have_x) { \ 268 pl_yylval.ival = f; \ 269 if (have_x) PL_expect = x; \ 270 PL_bufptr = s; \ 271 PL_last_uni = PL_oldbufptr; \ 272 PL_last_lop_op = f; \ 273 if (*s == '(') \ 274 return REPORT( (int)FUNC1 ); \ 275 s = PEEKSPACE(s); \ 276 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ 277 } 278 #define UNI(f) UNI3(f,XTERM,1) 279 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1) 280 #define UNIPROTO(f,optional) { \ 281 if (optional) PL_last_uni = PL_oldbufptr; \ 282 OPERATOR(f); \ 283 } 284 285 #define UNIBRACK(f) UNI3(f,0,0) 286 287 /* grandfather return to old style */ 288 #define OLDLOP(f) \ 289 do { \ 290 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \ 291 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \ 292 pl_yylval.ival = (f); \ 293 PL_expect = XTERM; \ 294 PL_bufptr = s; \ 295 return (int)LSTOP; \ 296 } while(0) 297 298 #define COPLINE_INC_WITH_HERELINES \ 299 STMT_START { \ 300 CopLINE_inc(PL_curcop); \ 301 if (PL_parser->herelines) \ 302 CopLINE(PL_curcop) += PL_parser->herelines, \ 303 PL_parser->herelines = 0; \ 304 } STMT_END 305 /* Called after scan_str to update CopLINE(PL_curcop), but only when there 306 * is no sublex_push to follow. */ 307 #define COPLINE_SET_FROM_MULTI_END \ 308 STMT_START { \ 309 CopLINE_set(PL_curcop, PL_multi_end); \ 310 if (PL_multi_end != PL_multi_start) \ 311 PL_parser->herelines = 0; \ 312 } STMT_END 313 314 315 #ifdef DEBUGGING 316 317 /* how to interpret the pl_yylval associated with the token */ 318 enum token_type { 319 TOKENTYPE_NONE, 320 TOKENTYPE_IVAL, 321 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */ 322 TOKENTYPE_PVAL, 323 TOKENTYPE_OPVAL 324 }; 325 326 static struct debug_tokens { 327 const int token; 328 enum token_type type; 329 const char *name; 330 } const debug_tokens[] = 331 { 332 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" }, 333 { ANDAND, TOKENTYPE_NONE, "ANDAND" }, 334 { ANDOP, TOKENTYPE_NONE, "ANDOP" }, 335 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" }, 336 { ARROW, TOKENTYPE_NONE, "ARROW" }, 337 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" }, 338 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" }, 339 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" }, 340 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" }, 341 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" }, 342 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" }, 343 { DO, TOKENTYPE_NONE, "DO" }, 344 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" }, 345 { DORDOR, TOKENTYPE_NONE, "DORDOR" }, 346 { DOROP, TOKENTYPE_OPNUM, "DOROP" }, 347 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" }, 348 { ELSE, TOKENTYPE_NONE, "ELSE" }, 349 { ELSIF, TOKENTYPE_IVAL, "ELSIF" }, 350 { EQOP, TOKENTYPE_OPNUM, "EQOP" }, 351 { FOR, TOKENTYPE_IVAL, "FOR" }, 352 { FORMAT, TOKENTYPE_NONE, "FORMAT" }, 353 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" }, 354 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" }, 355 { FUNC, TOKENTYPE_OPNUM, "FUNC" }, 356 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" }, 357 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" }, 358 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" }, 359 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" }, 360 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" }, 361 { GIVEN, TOKENTYPE_IVAL, "GIVEN" }, 362 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" }, 363 { IF, TOKENTYPE_IVAL, "IF" }, 364 { LABEL, TOKENTYPE_PVAL, "LABEL" }, 365 { LOCAL, TOKENTYPE_IVAL, "LOCAL" }, 366 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" }, 367 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" }, 368 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" }, 369 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" }, 370 { METHOD, TOKENTYPE_OPVAL, "METHOD" }, 371 { MULOP, TOKENTYPE_OPNUM, "MULOP" }, 372 { MY, TOKENTYPE_IVAL, "MY" }, 373 { NOAMP, TOKENTYPE_NONE, "NOAMP" }, 374 { NOTOP, TOKENTYPE_NONE, "NOTOP" }, 375 { OROP, TOKENTYPE_IVAL, "OROP" }, 376 { OROR, TOKENTYPE_NONE, "OROR" }, 377 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" }, 378 { PEG, TOKENTYPE_NONE, "PEG" }, 379 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" }, 380 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" }, 381 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" }, 382 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" }, 383 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" }, 384 { POSTINC, TOKENTYPE_NONE, "POSTINC" }, 385 { POWOP, TOKENTYPE_OPNUM, "POWOP" }, 386 { PREDEC, TOKENTYPE_NONE, "PREDEC" }, 387 { PREINC, TOKENTYPE_NONE, "PREINC" }, 388 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" }, 389 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" }, 390 { REFGEN, TOKENTYPE_NONE, "REFGEN" }, 391 { RELOP, TOKENTYPE_OPNUM, "RELOP" }, 392 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" }, 393 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" }, 394 { SUB, TOKENTYPE_NONE, "SUB" }, 395 { THING, TOKENTYPE_OPVAL, "THING" }, 396 { UMINUS, TOKENTYPE_NONE, "UMINUS" }, 397 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" }, 398 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" }, 399 { UNLESS, TOKENTYPE_IVAL, "UNLESS" }, 400 { UNTIL, TOKENTYPE_IVAL, "UNTIL" }, 401 { USE, TOKENTYPE_IVAL, "USE" }, 402 { WHEN, TOKENTYPE_IVAL, "WHEN" }, 403 { WHILE, TOKENTYPE_IVAL, "WHILE" }, 404 { WORD, TOKENTYPE_OPVAL, "WORD" }, 405 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" }, 406 { 0, TOKENTYPE_NONE, NULL } 407 }; 408 409 /* dump the returned token in rv, plus any optional arg in pl_yylval */ 410 411 STATIC int 412 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) 413 { 414 dVAR; 415 416 PERL_ARGS_ASSERT_TOKEREPORT; 417 418 if (DEBUG_T_TEST) { 419 const char *name = NULL; 420 enum token_type type = TOKENTYPE_NONE; 421 const struct debug_tokens *p; 422 SV* const report = newSVpvs("<== "); 423 424 for (p = debug_tokens; p->token; p++) { 425 if (p->token == (int)rv) { 426 name = p->name; 427 type = p->type; 428 break; 429 } 430 } 431 if (name) 432 Perl_sv_catpv(aTHX_ report, name); 433 else if ((char)rv > ' ' && (char)rv <= '~') 434 { 435 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); 436 if ((char)rv == 'p') 437 sv_catpvs(report, " (pending identifier)"); 438 } 439 else if (!rv) 440 sv_catpvs(report, "EOF"); 441 else 442 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv); 443 switch (type) { 444 case TOKENTYPE_NONE: 445 break; 446 case TOKENTYPE_IVAL: 447 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival); 448 break; 449 case TOKENTYPE_OPNUM: 450 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)", 451 PL_op_name[lvalp->ival]); 452 break; 453 case TOKENTYPE_PVAL: 454 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval); 455 break; 456 case TOKENTYPE_OPVAL: 457 if (lvalp->opval) { 458 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)", 459 PL_op_name[lvalp->opval->op_type]); 460 if (lvalp->opval->op_type == OP_CONST) { 461 Perl_sv_catpvf(aTHX_ report, " %s", 462 SvPEEK(cSVOPx_sv(lvalp->opval))); 463 } 464 465 } 466 else 467 sv_catpvs(report, "(opval=null)"); 468 break; 469 } 470 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report)); 471 }; 472 return (int)rv; 473 } 474 475 476 /* print the buffer with suitable escapes */ 477 478 STATIC void 479 S_printbuf(pTHX_ const char *const fmt, const char *const s) 480 { 481 SV* const tmp = newSVpvs(""); 482 483 PERL_ARGS_ASSERT_PRINTBUF; 484 485 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */ 486 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60)); 487 GCC_DIAG_RESTORE; 488 SvREFCNT_dec(tmp); 489 } 490 491 #endif 492 493 static int 494 S_deprecate_commaless_var_list(pTHX) { 495 PL_expect = XTERM; 496 deprecate("comma-less variable list"); 497 return REPORT(','); /* grandfather non-comma-format format */ 498 } 499 500 /* 501 * S_ao 502 * 503 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR 504 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN 505 */ 506 507 STATIC int 508 S_ao(pTHX_ int toketype) 509 { 510 dVAR; 511 if (*PL_bufptr == '=') { 512 PL_bufptr++; 513 if (toketype == ANDAND) 514 pl_yylval.ival = OP_ANDASSIGN; 515 else if (toketype == OROR) 516 pl_yylval.ival = OP_ORASSIGN; 517 else if (toketype == DORDOR) 518 pl_yylval.ival = OP_DORASSIGN; 519 toketype = ASSIGNOP; 520 } 521 return toketype; 522 } 523 524 /* 525 * S_no_op 526 * When Perl expects an operator and finds something else, no_op 527 * prints the warning. It always prints "<something> found where 528 * operator expected. It prints "Missing semicolon on previous line?" 529 * if the surprise occurs at the start of the line. "do you need to 530 * predeclare ..." is printed out for code like "sub bar; foo bar $x" 531 * where the compiler doesn't know if foo is a method call or a function. 532 * It prints "Missing operator before end of line" if there's nothing 533 * after the missing operator, or "... before <...>" if there is something 534 * after the missing operator. 535 */ 536 537 STATIC void 538 S_no_op(pTHX_ const char *const what, char *s) 539 { 540 dVAR; 541 char * const oldbp = PL_bufptr; 542 const bool is_first = (PL_oldbufptr == PL_linestart); 543 544 PERL_ARGS_ASSERT_NO_OP; 545 546 if (!s) 547 s = oldbp; 548 else 549 PL_bufptr = s; 550 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0); 551 if (ckWARN_d(WARN_SYNTAX)) { 552 if (is_first) 553 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 554 "\t(Missing semicolon on previous line?)\n"); 555 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) { 556 const char *t; 557 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':'); 558 t += UTF ? UTF8SKIP(t) : 1) 559 NOOP; 560 if (t < PL_bufptr && isSPACE(*t)) 561 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 562 "\t(Do you need to predeclare %"UTF8f"?)\n", 563 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr)); 564 } 565 else { 566 assert(s >= oldbp); 567 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 568 "\t(Missing operator before %"UTF8f"?)\n", 569 UTF8fARG(UTF, s - oldbp, oldbp)); 570 } 571 } 572 PL_bufptr = oldbp; 573 } 574 575 /* 576 * S_missingterm 577 * Complain about missing quote/regexp/heredoc terminator. 578 * If it's called with NULL then it cauterizes the line buffer. 579 * If we're in a delimited string and the delimiter is a control 580 * character, it's reformatted into a two-char sequence like ^C. 581 * This is fatal. 582 */ 583 584 STATIC void 585 S_missingterm(pTHX_ char *s) 586 { 587 dVAR; 588 char tmpbuf[3]; 589 char q; 590 if (s) { 591 char * const nl = strrchr(s,'\n'); 592 if (nl) 593 *nl = '\0'; 594 } 595 else if ((U8) PL_multi_close < 32) { 596 *tmpbuf = '^'; 597 tmpbuf[1] = (char)toCTRL(PL_multi_close); 598 tmpbuf[2] = '\0'; 599 s = tmpbuf; 600 } 601 else { 602 *tmpbuf = (char)PL_multi_close; 603 tmpbuf[1] = '\0'; 604 s = tmpbuf; 605 } 606 q = strchr(s,'"') ? '\'' : '"'; 607 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q); 608 } 609 610 #include "feature.h" 611 612 /* 613 * Check whether the named feature is enabled. 614 */ 615 bool 616 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen) 617 { 618 dVAR; 619 char he_name[8 + MAX_FEATURE_LEN] = "feature_"; 620 621 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED; 622 623 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM); 624 625 if (namelen > MAX_FEATURE_LEN) 626 return FALSE; 627 memcpy(&he_name[8], name, namelen); 628 629 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0, 630 REFCOUNTED_HE_EXISTS)); 631 } 632 633 /* 634 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and 635 * utf16-to-utf8-reversed. 636 */ 637 638 #ifdef PERL_CR_FILTER 639 static void 640 strip_return(SV *sv) 641 { 642 const char *s = SvPVX_const(sv); 643 const char * const e = s + SvCUR(sv); 644 645 PERL_ARGS_ASSERT_STRIP_RETURN; 646 647 /* outer loop optimized to do nothing if there are no CR-LFs */ 648 while (s < e) { 649 if (*s++ == '\r' && *s == '\n') { 650 /* hit a CR-LF, need to copy the rest */ 651 char *d = s - 1; 652 *d++ = *s++; 653 while (s < e) { 654 if (*s == '\r' && s[1] == '\n') 655 s++; 656 *d++ = *s++; 657 } 658 SvCUR(sv) -= s - d; 659 return; 660 } 661 } 662 } 663 664 STATIC I32 665 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) 666 { 667 const I32 count = FILTER_READ(idx+1, sv, maxlen); 668 if (count > 0 && !maxlen) 669 strip_return(sv); 670 return count; 671 } 672 #endif 673 674 /* 675 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags 676 677 Creates and initialises a new lexer/parser state object, supplying 678 a context in which to lex and parse from a new source of Perl code. 679 A pointer to the new state object is placed in L</PL_parser>. An entry 680 is made on the save stack so that upon unwinding the new state object 681 will be destroyed and the former value of L</PL_parser> will be restored. 682 Nothing else need be done to clean up the parsing context. 683 684 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if 685 non-null, provides a string (in SV form) containing code to be parsed. 686 A copy of the string is made, so subsequent modification of I<line> 687 does not affect parsing. I<rsfp>, if non-null, provides an input stream 688 from which code will be read to be parsed. If both are non-null, the 689 code in I<line> comes first and must consist of complete lines of input, 690 and I<rsfp> supplies the remainder of the source. 691 692 The I<flags> parameter is reserved for future use. Currently it is only 693 used by perl internally, so extensions should always pass zero. 694 695 =cut 696 */ 697 698 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it 699 can share filters with the current parser. 700 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the 701 caller, hence isn't owned by the parser, so shouldn't be closed on parser 702 destruction. This is used to handle the case of defaulting to reading the 703 script from the standard input because no filename was given on the command 704 line (without getting confused by situation where STDIN has been closed, so 705 the script handle is opened on fd 0) */ 706 707 void 708 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) 709 { 710 dVAR; 711 const char *s = NULL; 712 yy_parser *parser, *oparser; 713 if (flags && flags & ~LEX_START_FLAGS) 714 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); 715 716 /* create and initialise a parser */ 717 718 Newxz(parser, 1, yy_parser); 719 parser->old_parser = oparser = PL_parser; 720 PL_parser = parser; 721 722 parser->stack = NULL; 723 parser->ps = NULL; 724 parser->stack_size = 0; 725 726 /* on scope exit, free this parser and restore any outer one */ 727 SAVEPARSER(parser); 728 parser->saved_curcop = PL_curcop; 729 730 /* initialise lexer state */ 731 732 #ifdef PERL_MAD 733 parser->curforce = -1; 734 #else 735 parser->nexttoke = 0; 736 #endif 737 parser->error_count = oparser ? oparser->error_count : 0; 738 parser->copline = parser->preambling = NOLINE; 739 parser->lex_state = LEX_NORMAL; 740 parser->expect = XSTATE; 741 parser->rsfp = rsfp; 742 parser->rsfp_filters = 743 !(flags & LEX_START_SAME_FILTER) || !oparser 744 ? NULL 745 : MUTABLE_AV(SvREFCNT_inc( 746 oparser->rsfp_filters 747 ? oparser->rsfp_filters 748 : (oparser->rsfp_filters = newAV()) 749 )); 750 751 Newx(parser->lex_brackstack, 120, char); 752 Newx(parser->lex_casestack, 12, char); 753 *parser->lex_casestack = '\0'; 754 Newxz(parser->lex_shared, 1, LEXSHARED); 755 756 if (line) { 757 STRLEN len; 758 s = SvPV_const(line, len); 759 parser->linestr = flags & LEX_START_COPIED 760 ? SvREFCNT_inc_simple_NN(line) 761 : newSVpvn_flags(s, len, SvUTF8(line)); 762 sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2); 763 } else { 764 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2); 765 } 766 parser->oldoldbufptr = 767 parser->oldbufptr = 768 parser->bufptr = 769 parser->linestart = SvPVX(parser->linestr); 770 parser->bufend = parser->bufptr + SvCUR(parser->linestr); 771 parser->last_lop = parser->last_uni = NULL; 772 773 assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 774 |LEX_DONT_CLOSE_RSFP)); 775 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 776 |LEX_DONT_CLOSE_RSFP)); 777 778 parser->in_pod = parser->filtered = 0; 779 } 780 781 782 /* delete a parser object */ 783 784 void 785 Perl_parser_free(pTHX_ const yy_parser *parser) 786 { 787 PERL_ARGS_ASSERT_PARSER_FREE; 788 789 PL_curcop = parser->saved_curcop; 790 SvREFCNT_dec(parser->linestr); 791 792 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 793 PerlIO_clearerr(parser->rsfp); 794 else if (parser->rsfp && (!parser->old_parser || 795 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp))) 796 PerlIO_close(parser->rsfp); 797 SvREFCNT_dec(parser->rsfp_filters); 798 SvREFCNT_dec(parser->lex_stuff); 799 SvREFCNT_dec(parser->sublex_info.repl); 800 801 Safefree(parser->lex_brackstack); 802 Safefree(parser->lex_casestack); 803 Safefree(parser->lex_shared); 804 PL_parser = parser->old_parser; 805 Safefree(parser); 806 } 807 808 void 809 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab) 810 { 811 #ifdef PERL_MAD 812 I32 nexttoke = parser->lasttoke; 813 #else 814 I32 nexttoke = parser->nexttoke; 815 #endif 816 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS; 817 while (nexttoke--) { 818 #ifdef PERL_MAD 819 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type 820 & 0xffff) 821 && parser->nexttoke[nexttoke].next_val.opval 822 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed 823 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) { 824 op_free(parser->nexttoke[nexttoke].next_val.opval); 825 parser->nexttoke[nexttoke].next_val.opval = NULL; 826 } 827 #else 828 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff) 829 && parser->nextval[nexttoke].opval 830 && parser->nextval[nexttoke].opval->op_slabbed 831 && OpSLAB(parser->nextval[nexttoke].opval) == slab) { 832 op_free(parser->nextval[nexttoke].opval); 833 parser->nextval[nexttoke].opval = NULL; 834 } 835 #endif 836 } 837 } 838 839 840 /* 841 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr 842 843 Buffer scalar containing the chunk currently under consideration of the 844 text currently being lexed. This is always a plain string scalar (for 845 which C<SvPOK> is true). It is not intended to be used as a scalar by 846 normal scalar means; instead refer to the buffer directly by the pointer 847 variables described below. 848 849 The lexer maintains various C<char*> pointers to things in the 850 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever 851 reallocated, all of these pointers must be updated. Don't attempt to 852 do this manually, but rather use L</lex_grow_linestr> if you need to 853 reallocate the buffer. 854 855 The content of the text chunk in the buffer is commonly exactly one 856 complete line of input, up to and including a newline terminator, 857 but there are situations where it is otherwise. The octets of the 858 buffer may be intended to be interpreted as either UTF-8 or Latin-1. 859 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8> 860 flag on this scalar, which may disagree with it. 861 862 For direct examination of the buffer, the variable 863 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current 864 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use 865 of these pointers is usually preferable to examination of the scalar 866 through normal scalar means. 867 868 =for apidoc AmxU|char *|PL_parser-E<gt>bufend 869 870 Direct pointer to the end of the chunk of text currently being lexed, the 871 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr) 872 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is 873 always located at the end of the buffer, and does not count as part of 874 the buffer's contents. 875 876 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr 877 878 Points to the current position of lexing inside the lexer buffer. 879 Characters around this point may be freely examined, within 880 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and 881 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be 882 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>. 883 884 Lexing code (whether in the Perl core or not) moves this pointer past 885 the characters that it consumes. It is also expected to perform some 886 bookkeeping whenever a newline character is consumed. This movement 887 can be more conveniently performed by the function L</lex_read_to>, 888 which handles newlines appropriately. 889 890 Interpretation of the buffer's octets can be abstracted out by 891 using the slightly higher-level functions L</lex_peek_unichar> and 892 L</lex_read_unichar>. 893 894 =for apidoc AmxU|char *|PL_parser-E<gt>linestart 895 896 Points to the start of the current line inside the lexer buffer. 897 This is useful for indicating at which column an error occurred, and 898 not much else. This must be updated by any lexing code that consumes 899 a newline; the function L</lex_read_to> handles this detail. 900 901 =cut 902 */ 903 904 /* 905 =for apidoc Amx|bool|lex_bufutf8 906 907 Indicates whether the octets in the lexer buffer 908 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding 909 of Unicode characters. If not, they should be interpreted as Latin-1 910 characters. This is analogous to the C<SvUTF8> flag for scalars. 911 912 In UTF-8 mode, it is not guaranteed that the lexer buffer actually 913 contains valid UTF-8. Lexing code must be robust in the face of invalid 914 encoding. 915 916 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar 917 is significant, but not the whole story regarding the input character 918 encoding. Normally, when a file is being read, the scalar contains octets 919 and its C<SvUTF8> flag is off, but the octets should be interpreted as 920 UTF-8 if the C<use utf8> pragma is in effect. During a string eval, 921 however, the scalar may have the C<SvUTF8> flag on, and in this case its 922 octets should be interpreted as UTF-8 unless the C<use bytes> pragma 923 is in effect. This logic may change in the future; use this function 924 instead of implementing the logic yourself. 925 926 =cut 927 */ 928 929 bool 930 Perl_lex_bufutf8(pTHX) 931 { 932 return UTF; 933 } 934 935 /* 936 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len 937 938 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate 939 at least I<len> octets (including terminating C<NUL>). Returns a 940 pointer to the reallocated buffer. This is necessary before making 941 any direct modification of the buffer that would increase its length. 942 L</lex_stuff_pvn> provides a more convenient way to insert text into 943 the buffer. 944 945 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>; 946 this function updates all of the lexer's variables that point directly 947 into the buffer. 948 949 =cut 950 */ 951 952 char * 953 Perl_lex_grow_linestr(pTHX_ STRLEN len) 954 { 955 SV *linestr; 956 char *buf; 957 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 958 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos; 959 linestr = PL_parser->linestr; 960 buf = SvPVX(linestr); 961 if (len <= SvLEN(linestr)) 962 return buf; 963 bufend_pos = PL_parser->bufend - buf; 964 bufptr_pos = PL_parser->bufptr - buf; 965 oldbufptr_pos = PL_parser->oldbufptr - buf; 966 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 967 linestart_pos = PL_parser->linestart - buf; 968 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 969 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 970 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ? 971 PL_parser->lex_shared->re_eval_start - buf : 0; 972 973 buf = sv_grow(linestr, len); 974 975 PL_parser->bufend = buf + bufend_pos; 976 PL_parser->bufptr = buf + bufptr_pos; 977 PL_parser->oldbufptr = buf + oldbufptr_pos; 978 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 979 PL_parser->linestart = buf + linestart_pos; 980 if (PL_parser->last_uni) 981 PL_parser->last_uni = buf + last_uni_pos; 982 if (PL_parser->last_lop) 983 PL_parser->last_lop = buf + last_lop_pos; 984 if (PL_parser->lex_shared->re_eval_start) 985 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos; 986 return buf; 987 } 988 989 /* 990 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags 991 992 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 993 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 994 reallocating the buffer if necessary. This means that lexing code that 995 runs later will see the characters as if they had appeared in the input. 996 It is not recommended to do this as part of normal parsing, and most 997 uses of this facility run the risk of the inserted characters being 998 interpreted in an unintended manner. 999 1000 The string to be inserted is represented by I<len> octets starting 1001 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1, 1002 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>. 1003 The characters are recoded for the lexer buffer, according to how the 1004 buffer is currently being interpreted (L</lex_bufutf8>). If a string 1005 to be inserted is available as a Perl scalar, the L</lex_stuff_sv> 1006 function is more convenient. 1007 1008 =cut 1009 */ 1010 1011 void 1012 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) 1013 { 1014 dVAR; 1015 char *bufptr; 1016 PERL_ARGS_ASSERT_LEX_STUFF_PVN; 1017 if (flags & ~(LEX_STUFF_UTF8)) 1018 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn"); 1019 if (UTF) { 1020 if (flags & LEX_STUFF_UTF8) { 1021 goto plain_copy; 1022 } else { 1023 STRLEN highhalf = 0; /* Count of variants */ 1024 const char *p, *e = pv+len; 1025 for (p = pv; p != e; p++) { 1026 if (! UTF8_IS_INVARIANT(*p)) { 1027 highhalf++; 1028 } 1029 } 1030 if (!highhalf) 1031 goto plain_copy; 1032 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf); 1033 bufptr = PL_parser->bufptr; 1034 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char); 1035 SvCUR_set(PL_parser->linestr, 1036 SvCUR(PL_parser->linestr) + len+highhalf); 1037 PL_parser->bufend += len+highhalf; 1038 for (p = pv; p != e; p++) { 1039 U8 c = (U8)*p; 1040 if (! UTF8_IS_INVARIANT(c)) { 1041 *bufptr++ = UTF8_TWO_BYTE_HI(c); 1042 *bufptr++ = UTF8_TWO_BYTE_LO(c); 1043 } else { 1044 *bufptr++ = (char)c; 1045 } 1046 } 1047 } 1048 } else { 1049 if (flags & LEX_STUFF_UTF8) { 1050 STRLEN highhalf = 0; 1051 const char *p, *e = pv+len; 1052 for (p = pv; p != e; p++) { 1053 U8 c = (U8)*p; 1054 if (UTF8_IS_ABOVE_LATIN1(c)) { 1055 Perl_croak(aTHX_ "Lexing code attempted to stuff " 1056 "non-Latin-1 character into Latin-1 input"); 1057 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) { 1058 p++; 1059 highhalf++; 1060 } else if (! UTF8_IS_INVARIANT(c)) { 1061 /* malformed UTF-8 */ 1062 ENTER; 1063 SAVESPTR(PL_warnhook); 1064 PL_warnhook = PERL_WARNHOOK_FATAL; 1065 utf8n_to_uvchr((U8*)p, e-p, NULL, 0); 1066 LEAVE; 1067 } 1068 } 1069 if (!highhalf) 1070 goto plain_copy; 1071 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf); 1072 bufptr = PL_parser->bufptr; 1073 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char); 1074 SvCUR_set(PL_parser->linestr, 1075 SvCUR(PL_parser->linestr) + len-highhalf); 1076 PL_parser->bufend += len-highhalf; 1077 p = pv; 1078 while (p < e) { 1079 if (UTF8_IS_INVARIANT(*p)) { 1080 *bufptr++ = *p; 1081 p++; 1082 } 1083 else { 1084 assert(p < e -1 ); 1085 *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)); 1086 p += 2; 1087 } 1088 } 1089 } else { 1090 plain_copy: 1091 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len); 1092 bufptr = PL_parser->bufptr; 1093 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char); 1094 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len); 1095 PL_parser->bufend += len; 1096 Copy(pv, bufptr, len, char); 1097 } 1098 } 1099 } 1100 1101 /* 1102 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags 1103 1104 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1105 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1106 reallocating the buffer if necessary. This means that lexing code that 1107 runs later will see the characters as if they had appeared in the input. 1108 It is not recommended to do this as part of normal parsing, and most 1109 uses of this facility run the risk of the inserted characters being 1110 interpreted in an unintended manner. 1111 1112 The string to be inserted is represented by octets starting at I<pv> 1113 and continuing to the first nul. These octets are interpreted as either 1114 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set 1115 in I<flags>. The characters are recoded for the lexer buffer, according 1116 to how the buffer is currently being interpreted (L</lex_bufutf8>). 1117 If it is not convenient to nul-terminate a string to be inserted, the 1118 L</lex_stuff_pvn> function is more appropriate. 1119 1120 =cut 1121 */ 1122 1123 void 1124 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags) 1125 { 1126 PERL_ARGS_ASSERT_LEX_STUFF_PV; 1127 lex_stuff_pvn(pv, strlen(pv), flags); 1128 } 1129 1130 /* 1131 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags 1132 1133 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1134 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1135 reallocating the buffer if necessary. This means that lexing code that 1136 runs later will see the characters as if they had appeared in the input. 1137 It is not recommended to do this as part of normal parsing, and most 1138 uses of this facility run the risk of the inserted characters being 1139 interpreted in an unintended manner. 1140 1141 The string to be inserted is the string value of I<sv>. The characters 1142 are recoded for the lexer buffer, according to how the buffer is currently 1143 being interpreted (L</lex_bufutf8>). If a string to be inserted is 1144 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the 1145 need to construct a scalar. 1146 1147 =cut 1148 */ 1149 1150 void 1151 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags) 1152 { 1153 char *pv; 1154 STRLEN len; 1155 PERL_ARGS_ASSERT_LEX_STUFF_SV; 1156 if (flags) 1157 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv"); 1158 pv = SvPV(sv, len); 1159 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0)); 1160 } 1161 1162 /* 1163 =for apidoc Amx|void|lex_unstuff|char *ptr 1164 1165 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to 1166 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened. 1167 This hides the discarded text from any lexing code that runs later, 1168 as if the text had never appeared. 1169 1170 This is not the normal way to consume lexed text. For that, use 1171 L</lex_read_to>. 1172 1173 =cut 1174 */ 1175 1176 void 1177 Perl_lex_unstuff(pTHX_ char *ptr) 1178 { 1179 char *buf, *bufend; 1180 STRLEN unstuff_len; 1181 PERL_ARGS_ASSERT_LEX_UNSTUFF; 1182 buf = PL_parser->bufptr; 1183 if (ptr < buf) 1184 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1185 if (ptr == buf) 1186 return; 1187 bufend = PL_parser->bufend; 1188 if (ptr > bufend) 1189 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1190 unstuff_len = ptr - buf; 1191 Move(ptr, buf, bufend+1-ptr, char); 1192 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len); 1193 PL_parser->bufend = bufend - unstuff_len; 1194 } 1195 1196 /* 1197 =for apidoc Amx|void|lex_read_to|char *ptr 1198 1199 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up 1200 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>, 1201 performing the correct bookkeeping whenever a newline character is passed. 1202 This is the normal way to consume lexed text. 1203 1204 Interpretation of the buffer's octets can be abstracted out by 1205 using the slightly higher-level functions L</lex_peek_unichar> and 1206 L</lex_read_unichar>. 1207 1208 =cut 1209 */ 1210 1211 void 1212 Perl_lex_read_to(pTHX_ char *ptr) 1213 { 1214 char *s; 1215 PERL_ARGS_ASSERT_LEX_READ_TO; 1216 s = PL_parser->bufptr; 1217 if (ptr < s || ptr > PL_parser->bufend) 1218 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to"); 1219 for (; s != ptr; s++) 1220 if (*s == '\n') { 1221 COPLINE_INC_WITH_HERELINES; 1222 PL_parser->linestart = s+1; 1223 } 1224 PL_parser->bufptr = ptr; 1225 } 1226 1227 /* 1228 =for apidoc Amx|void|lex_discard_to|char *ptr 1229 1230 Discards the first part of the L</PL_parser-E<gt>linestr> buffer, 1231 up to I<ptr>. The remaining content of the buffer will be moved, and 1232 all pointers into the buffer updated appropriately. I<ptr> must not 1233 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>: 1234 it is not permitted to discard text that has yet to be lexed. 1235 1236 Normally it is not necessarily to do this directly, because it suffices to 1237 use the implicit discarding behaviour of L</lex_next_chunk> and things 1238 based on it. However, if a token stretches across multiple lines, 1239 and the lexing code has kept multiple lines of text in the buffer for 1240 that purpose, then after completion of the token it would be wise to 1241 explicitly discard the now-unneeded earlier lines, to avoid future 1242 multi-line tokens growing the buffer without bound. 1243 1244 =cut 1245 */ 1246 1247 void 1248 Perl_lex_discard_to(pTHX_ char *ptr) 1249 { 1250 char *buf; 1251 STRLEN discard_len; 1252 PERL_ARGS_ASSERT_LEX_DISCARD_TO; 1253 buf = SvPVX(PL_parser->linestr); 1254 if (ptr < buf) 1255 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1256 if (ptr == buf) 1257 return; 1258 if (ptr > PL_parser->bufptr) 1259 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1260 discard_len = ptr - buf; 1261 if (PL_parser->oldbufptr < ptr) 1262 PL_parser->oldbufptr = ptr; 1263 if (PL_parser->oldoldbufptr < ptr) 1264 PL_parser->oldoldbufptr = ptr; 1265 if (PL_parser->last_uni && PL_parser->last_uni < ptr) 1266 PL_parser->last_uni = NULL; 1267 if (PL_parser->last_lop && PL_parser->last_lop < ptr) 1268 PL_parser->last_lop = NULL; 1269 Move(ptr, buf, PL_parser->bufend+1-ptr, char); 1270 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len); 1271 PL_parser->bufend -= discard_len; 1272 PL_parser->bufptr -= discard_len; 1273 PL_parser->oldbufptr -= discard_len; 1274 PL_parser->oldoldbufptr -= discard_len; 1275 if (PL_parser->last_uni) 1276 PL_parser->last_uni -= discard_len; 1277 if (PL_parser->last_lop) 1278 PL_parser->last_lop -= discard_len; 1279 } 1280 1281 /* 1282 =for apidoc Amx|bool|lex_next_chunk|U32 flags 1283 1284 Reads in the next chunk of text to be lexed, appending it to 1285 L</PL_parser-E<gt>linestr>. This should be called when lexing code has 1286 looked to the end of the current chunk and wants to know more. It is 1287 usual, but not necessary, for lexing to have consumed the entirety of 1288 the current chunk at this time. 1289 1290 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current 1291 chunk (i.e., the current chunk has been entirely consumed), normally the 1292 current chunk will be discarded at the same time that the new chunk is 1293 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk 1294 will not be discarded. If the current chunk has not been entirely 1295 consumed, then it will not be discarded regardless of the flag. 1296 1297 Returns true if some new text was added to the buffer, or false if the 1298 buffer has reached the end of the input text. 1299 1300 =cut 1301 */ 1302 1303 #define LEX_FAKE_EOF 0x80000000 1304 #define LEX_NO_TERM 0x40000000 1305 1306 bool 1307 Perl_lex_next_chunk(pTHX_ U32 flags) 1308 { 1309 SV *linestr; 1310 char *buf; 1311 STRLEN old_bufend_pos, new_bufend_pos; 1312 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 1313 STRLEN linestart_pos, last_uni_pos, last_lop_pos; 1314 bool got_some_for_debugger = 0; 1315 bool got_some; 1316 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM)) 1317 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); 1318 linestr = PL_parser->linestr; 1319 buf = SvPVX(linestr); 1320 if (!(flags & LEX_KEEP_PREVIOUS) && 1321 PL_parser->bufptr == PL_parser->bufend) { 1322 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0; 1323 linestart_pos = 0; 1324 if (PL_parser->last_uni != PL_parser->bufend) 1325 PL_parser->last_uni = NULL; 1326 if (PL_parser->last_lop != PL_parser->bufend) 1327 PL_parser->last_lop = NULL; 1328 last_uni_pos = last_lop_pos = 0; 1329 *buf = 0; 1330 SvCUR(linestr) = 0; 1331 } else { 1332 old_bufend_pos = PL_parser->bufend - buf; 1333 bufptr_pos = PL_parser->bufptr - buf; 1334 oldbufptr_pos = PL_parser->oldbufptr - buf; 1335 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 1336 linestart_pos = PL_parser->linestart - buf; 1337 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 1338 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 1339 } 1340 if (flags & LEX_FAKE_EOF) { 1341 goto eof; 1342 } else if (!PL_parser->rsfp && !PL_parser->filtered) { 1343 got_some = 0; 1344 } else if (filter_gets(linestr, old_bufend_pos)) { 1345 got_some = 1; 1346 got_some_for_debugger = 1; 1347 } else if (flags & LEX_NO_TERM) { 1348 got_some = 0; 1349 } else { 1350 if (!SvPOK(linestr)) /* can get undefined by filter_gets */ 1351 sv_setpvs(linestr, ""); 1352 eof: 1353 /* End of real input. Close filehandle (unless it was STDIN), 1354 * then add implicit termination. 1355 */ 1356 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 1357 PerlIO_clearerr(PL_parser->rsfp); 1358 else if (PL_parser->rsfp) 1359 (void)PerlIO_close(PL_parser->rsfp); 1360 PL_parser->rsfp = NULL; 1361 PL_parser->in_pod = PL_parser->filtered = 0; 1362 #ifdef PERL_MAD 1363 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n)) 1364 PL_faketokens = 1; 1365 #endif 1366 if (!PL_in_eval && PL_minus_p) { 1367 sv_catpvs(linestr, 1368 /*{*/";}continue{print or die qq(-p destination: $!\\n);}"); 1369 PL_minus_n = PL_minus_p = 0; 1370 } else if (!PL_in_eval && PL_minus_n) { 1371 sv_catpvs(linestr, /*{*/";}"); 1372 PL_minus_n = 0; 1373 } else 1374 sv_catpvs(linestr, ";"); 1375 got_some = 1; 1376 } 1377 buf = SvPVX(linestr); 1378 new_bufend_pos = SvCUR(linestr); 1379 PL_parser->bufend = buf + new_bufend_pos; 1380 PL_parser->bufptr = buf + bufptr_pos; 1381 PL_parser->oldbufptr = buf + oldbufptr_pos; 1382 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 1383 PL_parser->linestart = buf + linestart_pos; 1384 if (PL_parser->last_uni) 1385 PL_parser->last_uni = buf + last_uni_pos; 1386 if (PL_parser->last_lop) 1387 PL_parser->last_lop = buf + last_lop_pos; 1388 if (PL_parser->preambling != NOLINE) { 1389 CopLINE_set(PL_curcop, PL_parser->preambling + 1); 1390 PL_parser->preambling = NOLINE; 1391 } 1392 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) && 1393 PL_curstash != PL_debstash) { 1394 /* debugger active and we're not compiling the debugger code, 1395 * so store the line into the debugger's array of lines 1396 */ 1397 update_debugger_info(NULL, buf+old_bufend_pos, 1398 new_bufend_pos-old_bufend_pos); 1399 } 1400 return got_some; 1401 } 1402 1403 /* 1404 =for apidoc Amx|I32|lex_peek_unichar|U32 flags 1405 1406 Looks ahead one (Unicode) character in the text currently being lexed. 1407 Returns the codepoint (unsigned integer value) of the next character, 1408 or -1 if lexing has reached the end of the input text. To consume the 1409 peeked character, use L</lex_read_unichar>. 1410 1411 If the next character is in (or extends into) the next chunk of input 1412 text, the next chunk will be read in. Normally the current chunk will be 1413 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> 1414 then the current chunk will not be discarded. 1415 1416 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1417 is encountered, an exception is generated. 1418 1419 =cut 1420 */ 1421 1422 I32 1423 Perl_lex_peek_unichar(pTHX_ U32 flags) 1424 { 1425 dVAR; 1426 char *s, *bufend; 1427 if (flags & ~(LEX_KEEP_PREVIOUS)) 1428 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar"); 1429 s = PL_parser->bufptr; 1430 bufend = PL_parser->bufend; 1431 if (UTF) { 1432 U8 head; 1433 I32 unichar; 1434 STRLEN len, retlen; 1435 if (s == bufend) { 1436 if (!lex_next_chunk(flags)) 1437 return -1; 1438 s = PL_parser->bufptr; 1439 bufend = PL_parser->bufend; 1440 } 1441 head = (U8)*s; 1442 if (UTF8_IS_INVARIANT(head)) 1443 return head; 1444 if (UTF8_IS_START(head)) { 1445 len = UTF8SKIP(&head); 1446 while ((STRLEN)(bufend-s) < len) { 1447 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS)) 1448 break; 1449 s = PL_parser->bufptr; 1450 bufend = PL_parser->bufend; 1451 } 1452 } 1453 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); 1454 if (retlen == (STRLEN)-1) { 1455 /* malformed UTF-8 */ 1456 ENTER; 1457 SAVESPTR(PL_warnhook); 1458 PL_warnhook = PERL_WARNHOOK_FATAL; 1459 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0); 1460 LEAVE; 1461 } 1462 return unichar; 1463 } else { 1464 if (s == bufend) { 1465 if (!lex_next_chunk(flags)) 1466 return -1; 1467 s = PL_parser->bufptr; 1468 } 1469 return (U8)*s; 1470 } 1471 } 1472 1473 /* 1474 =for apidoc Amx|I32|lex_read_unichar|U32 flags 1475 1476 Reads the next (Unicode) character in the text currently being lexed. 1477 Returns the codepoint (unsigned integer value) of the character read, 1478 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1 1479 if lexing has reached the end of the input text. To non-destructively 1480 examine the next character, use L</lex_peek_unichar> instead. 1481 1482 If the next character is in (or extends into) the next chunk of input 1483 text, the next chunk will be read in. Normally the current chunk will be 1484 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> 1485 then the current chunk will not be discarded. 1486 1487 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1488 is encountered, an exception is generated. 1489 1490 =cut 1491 */ 1492 1493 I32 1494 Perl_lex_read_unichar(pTHX_ U32 flags) 1495 { 1496 I32 c; 1497 if (flags & ~(LEX_KEEP_PREVIOUS)) 1498 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar"); 1499 c = lex_peek_unichar(flags); 1500 if (c != -1) { 1501 if (c == '\n') 1502 COPLINE_INC_WITH_HERELINES; 1503 if (UTF) 1504 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr); 1505 else 1506 ++(PL_parser->bufptr); 1507 } 1508 return c; 1509 } 1510 1511 /* 1512 =for apidoc Amx|void|lex_read_space|U32 flags 1513 1514 Reads optional spaces, in Perl style, in the text currently being 1515 lexed. The spaces may include ordinary whitespace characters and 1516 Perl-style comments. C<#line> directives are processed if encountered. 1517 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points 1518 at a non-space character (or the end of the input text). 1519 1520 If spaces extend into the next chunk of input text, the next chunk will 1521 be read in. Normally the current chunk will be discarded at the same 1522 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current 1523 chunk will not be discarded. 1524 1525 =cut 1526 */ 1527 1528 #define LEX_NO_INCLINE 0x40000000 1529 #define LEX_NO_NEXT_CHUNK 0x80000000 1530 1531 void 1532 Perl_lex_read_space(pTHX_ U32 flags) 1533 { 1534 char *s, *bufend; 1535 const bool can_incline = !(flags & LEX_NO_INCLINE); 1536 bool need_incline = 0; 1537 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE)) 1538 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); 1539 #ifdef PERL_MAD 1540 if (PL_skipwhite) { 1541 sv_free(PL_skipwhite); 1542 PL_skipwhite = NULL; 1543 } 1544 if (PL_madskills) 1545 PL_skipwhite = newSVpvs(""); 1546 #endif /* PERL_MAD */ 1547 s = PL_parser->bufptr; 1548 bufend = PL_parser->bufend; 1549 while (1) { 1550 char c = *s; 1551 if (c == '#') { 1552 do { 1553 c = *++s; 1554 } while (!(c == '\n' || (c == 0 && s == bufend))); 1555 } else if (c == '\n') { 1556 s++; 1557 if (can_incline) { 1558 PL_parser->linestart = s; 1559 if (s == bufend) 1560 need_incline = 1; 1561 else 1562 incline(s); 1563 } 1564 } else if (isSPACE(c)) { 1565 s++; 1566 } else if (c == 0 && s == bufend) { 1567 bool got_more; 1568 line_t l; 1569 #ifdef PERL_MAD 1570 if (PL_madskills) 1571 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr); 1572 #endif /* PERL_MAD */ 1573 if (flags & LEX_NO_NEXT_CHUNK) 1574 break; 1575 PL_parser->bufptr = s; 1576 l = CopLINE(PL_curcop); 1577 CopLINE(PL_curcop) += PL_parser->herelines + 1; 1578 got_more = lex_next_chunk(flags); 1579 CopLINE_set(PL_curcop, l); 1580 s = PL_parser->bufptr; 1581 bufend = PL_parser->bufend; 1582 if (!got_more) 1583 break; 1584 if (can_incline && need_incline && PL_parser->rsfp) { 1585 incline(s); 1586 need_incline = 0; 1587 } 1588 } else { 1589 break; 1590 } 1591 } 1592 #ifdef PERL_MAD 1593 if (PL_madskills) 1594 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr); 1595 #endif /* PERL_MAD */ 1596 PL_parser->bufptr = s; 1597 } 1598 1599 /* 1600 1601 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn 1602 1603 This function performs syntax checking on a prototype, C<proto>. 1604 If C<warn> is true, any illegal characters or mismatched brackets 1605 will trigger illegalproto warnings, declaring that they were 1606 detected in the prototype for C<name>. 1607 1608 The return value is C<true> if this is a valid prototype, and 1609 C<false> if it is not, regardless of whether C<warn> was C<true> or 1610 C<false>. 1611 1612 Note that C<NULL> is a valid C<proto> and will always return C<true>. 1613 1614 =cut 1615 1616 */ 1617 1618 bool 1619 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn) 1620 { 1621 STRLEN len, origlen; 1622 char *p = proto ? SvPV(proto, len) : NULL; 1623 bool bad_proto = FALSE; 1624 bool in_brackets = FALSE; 1625 bool after_slash = FALSE; 1626 char greedy_proto = ' '; 1627 bool proto_after_greedy_proto = FALSE; 1628 bool must_be_last = FALSE; 1629 bool underscore = FALSE; 1630 bool bad_proto_after_underscore = FALSE; 1631 1632 PERL_ARGS_ASSERT_VALIDATE_PROTO; 1633 1634 if (!proto) 1635 return TRUE; 1636 1637 origlen = len; 1638 for (; len--; p++) { 1639 if (!isSPACE(*p)) { 1640 if (must_be_last) 1641 proto_after_greedy_proto = TRUE; 1642 if (underscore) { 1643 if (!strchr(";@%", *p)) 1644 bad_proto_after_underscore = TRUE; 1645 underscore = FALSE; 1646 } 1647 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { 1648 bad_proto = TRUE; 1649 } 1650 else { 1651 if (*p == '[') 1652 in_brackets = TRUE; 1653 else if (*p == ']') 1654 in_brackets = FALSE; 1655 else if ((*p == '@' || *p == '%') && 1656 !after_slash && 1657 !in_brackets ) { 1658 must_be_last = TRUE; 1659 greedy_proto = *p; 1660 } 1661 else if (*p == '_') 1662 underscore = TRUE; 1663 } 1664 if (*p == '\\') 1665 after_slash = TRUE; 1666 else 1667 after_slash = FALSE; 1668 } 1669 } 1670 1671 if (warn) { 1672 SV *tmpsv = newSVpvs_flags("", SVs_TEMP); 1673 p -= origlen; 1674 p = SvUTF8(proto) 1675 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8), 1676 origlen, UNI_DISPLAY_ISPRINT) 1677 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); 1678 1679 if (proto_after_greedy_proto) 1680 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1681 "Prototype after '%c' for %"SVf" : %s", 1682 greedy_proto, SVfARG(name), p); 1683 if (in_brackets) 1684 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1685 "Missing ']' in prototype for %"SVf" : %s", 1686 SVfARG(name), p); 1687 if (bad_proto) 1688 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1689 "Illegal character in prototype for %"SVf" : %s", 1690 SVfARG(name), p); 1691 if (bad_proto_after_underscore) 1692 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1693 "Illegal character after '_' in prototype for %"SVf" : %s", 1694 SVfARG(name), p); 1695 } 1696 1697 return (! (proto_after_greedy_proto || bad_proto) ); 1698 } 1699 1700 /* 1701 * S_incline 1702 * This subroutine has nothing to do with tilting, whether at windmills 1703 * or pinball tables. Its name is short for "increment line". It 1704 * increments the current line number in CopLINE(PL_curcop) and checks 1705 * to see whether the line starts with a comment of the form 1706 * # line 500 "foo.pm" 1707 * If so, it sets the current line number and file to the values in the comment. 1708 */ 1709 1710 STATIC void 1711 S_incline(pTHX_ const char *s) 1712 { 1713 dVAR; 1714 const char *t; 1715 const char *n; 1716 const char *e; 1717 line_t line_num; 1718 1719 PERL_ARGS_ASSERT_INCLINE; 1720 1721 COPLINE_INC_WITH_HERELINES; 1722 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL 1723 && s+1 == PL_bufend && *s == ';') { 1724 /* fake newline in string eval */ 1725 CopLINE_dec(PL_curcop); 1726 return; 1727 } 1728 if (*s++ != '#') 1729 return; 1730 while (SPACE_OR_TAB(*s)) 1731 s++; 1732 if (strnEQ(s, "line", 4)) 1733 s += 4; 1734 else 1735 return; 1736 if (SPACE_OR_TAB(*s)) 1737 s++; 1738 else 1739 return; 1740 while (SPACE_OR_TAB(*s)) 1741 s++; 1742 if (!isDIGIT(*s)) 1743 return; 1744 1745 n = s; 1746 while (isDIGIT(*s)) 1747 s++; 1748 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0') 1749 return; 1750 while (SPACE_OR_TAB(*s)) 1751 s++; 1752 if (*s == '"' && (t = strchr(s+1, '"'))) { 1753 s++; 1754 e = t + 1; 1755 } 1756 else { 1757 t = s; 1758 while (!isSPACE(*t)) 1759 t++; 1760 e = t; 1761 } 1762 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f') 1763 e++; 1764 if (*e != '\n' && *e != '\0') 1765 return; /* false alarm */ 1766 1767 line_num = atoi(n)-1; 1768 1769 if (t - s > 0) { 1770 const STRLEN len = t - s; 1771 1772 if (!PL_rsfp && !PL_parser->filtered) { 1773 /* must copy *{"::_<(eval N)[oldfilename:L]"} 1774 * to *{"::_<newfilename"} */ 1775 /* However, the long form of evals is only turned on by the 1776 debugger - usually they're "(eval %lu)" */ 1777 GV * const cfgv = CopFILEGV(PL_curcop); 1778 if (cfgv) { 1779 char smallbuf[128]; 1780 STRLEN tmplen2 = len; 1781 char *tmpbuf2; 1782 GV *gv2; 1783 1784 if (tmplen2 + 2 <= sizeof smallbuf) 1785 tmpbuf2 = smallbuf; 1786 else 1787 Newx(tmpbuf2, tmplen2 + 2, char); 1788 1789 tmpbuf2[0] = '_'; 1790 tmpbuf2[1] = '<'; 1791 1792 memcpy(tmpbuf2 + 2, s, tmplen2); 1793 tmplen2 += 2; 1794 1795 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE); 1796 if (!isGV(gv2)) { 1797 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE); 1798 /* adjust ${"::_<newfilename"} to store the new file name */ 1799 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2); 1800 /* The line number may differ. If that is the case, 1801 alias the saved lines that are in the array. 1802 Otherwise alias the whole array. */ 1803 if (CopLINE(PL_curcop) == line_num) { 1804 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv))); 1805 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv))); 1806 } 1807 else if (GvAV(cfgv)) { 1808 AV * const av = GvAV(cfgv); 1809 const I32 start = CopLINE(PL_curcop)+1; 1810 I32 items = AvFILLp(av) - start; 1811 if (items > 0) { 1812 AV * const av2 = GvAVn(gv2); 1813 SV **svp = AvARRAY(av) + start; 1814 I32 l = (I32)line_num+1; 1815 while (items--) 1816 av_store(av2, l++, SvREFCNT_inc(*svp++)); 1817 } 1818 } 1819 } 1820 1821 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2); 1822 } 1823 } 1824 CopFILE_free(PL_curcop); 1825 CopFILE_setn(PL_curcop, s, len); 1826 } 1827 CopLINE_set(PL_curcop, line_num); 1828 } 1829 1830 #define skipspace(s) skipspace_flags(s, 0) 1831 1832 #ifdef PERL_MAD 1833 /* skip space before PL_thistoken */ 1834 1835 STATIC char * 1836 S_skipspace0(pTHX_ char *s) 1837 { 1838 PERL_ARGS_ASSERT_SKIPSPACE0; 1839 1840 s = skipspace(s); 1841 if (!PL_madskills) 1842 return s; 1843 if (PL_skipwhite) { 1844 if (!PL_thiswhite) 1845 PL_thiswhite = newSVpvs(""); 1846 sv_catsv(PL_thiswhite, PL_skipwhite); 1847 sv_free(PL_skipwhite); 1848 PL_skipwhite = 0; 1849 } 1850 PL_realtokenstart = s - SvPVX(PL_linestr); 1851 return s; 1852 } 1853 1854 /* skip space after PL_thistoken */ 1855 1856 STATIC char * 1857 S_skipspace1(pTHX_ char *s) 1858 { 1859 const char *start = s; 1860 I32 startoff = start - SvPVX(PL_linestr); 1861 1862 PERL_ARGS_ASSERT_SKIPSPACE1; 1863 1864 s = skipspace(s); 1865 if (!PL_madskills) 1866 return s; 1867 start = SvPVX(PL_linestr) + startoff; 1868 if (!PL_thistoken && PL_realtokenstart >= 0) { 1869 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart; 1870 PL_thistoken = newSVpvn(tstart, start - tstart); 1871 } 1872 PL_realtokenstart = -1; 1873 if (PL_skipwhite) { 1874 if (!PL_nextwhite) 1875 PL_nextwhite = newSVpvs(""); 1876 sv_catsv(PL_nextwhite, PL_skipwhite); 1877 sv_free(PL_skipwhite); 1878 PL_skipwhite = 0; 1879 } 1880 return s; 1881 } 1882 1883 STATIC char * 1884 S_skipspace2(pTHX_ char *s, SV **svp) 1885 { 1886 char *start; 1887 const I32 startoff = s - SvPVX(PL_linestr); 1888 1889 PERL_ARGS_ASSERT_SKIPSPACE2; 1890 1891 s = skipspace(s); 1892 if (!PL_madskills || !svp) 1893 return s; 1894 start = SvPVX(PL_linestr) + startoff; 1895 if (!PL_thistoken && PL_realtokenstart >= 0) { 1896 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart; 1897 PL_thistoken = newSVpvn(tstart, start - tstart); 1898 PL_realtokenstart = -1; 1899 } 1900 if (PL_skipwhite) { 1901 if (!*svp) 1902 *svp = newSVpvs(""); 1903 sv_setsv(*svp, PL_skipwhite); 1904 sv_free(PL_skipwhite); 1905 PL_skipwhite = 0; 1906 } 1907 1908 return s; 1909 } 1910 #endif 1911 1912 STATIC void 1913 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) 1914 { 1915 AV *av = CopFILEAVx(PL_curcop); 1916 if (av) { 1917 SV * sv; 1918 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG); 1919 else { 1920 sv = *av_fetch(av, 0, 1); 1921 SvUPGRADE(sv, SVt_PVMG); 1922 } 1923 if (!SvPOK(sv)) sv_setpvs(sv,""); 1924 if (orig_sv) 1925 sv_catsv(sv, orig_sv); 1926 else 1927 sv_catpvn(sv, buf, len); 1928 if (!SvIOK(sv)) { 1929 (void)SvIOK_on(sv); 1930 SvIV_set(sv, 0); 1931 } 1932 if (PL_parser->preambling == NOLINE) 1933 av_store(av, CopLINE(PL_curcop), sv); 1934 } 1935 } 1936 1937 /* 1938 * S_skipspace 1939 * Called to gobble the appropriate amount and type of whitespace. 1940 * Skips comments as well. 1941 */ 1942 1943 STATIC char * 1944 S_skipspace_flags(pTHX_ char *s, U32 flags) 1945 { 1946 #ifdef PERL_MAD 1947 char *start = s; 1948 #endif /* PERL_MAD */ 1949 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS; 1950 #ifdef PERL_MAD 1951 if (PL_skipwhite) { 1952 sv_free(PL_skipwhite); 1953 PL_skipwhite = NULL; 1954 } 1955 #endif /* PERL_MAD */ 1956 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 1957 while (s < PL_bufend && SPACE_OR_TAB(*s)) 1958 s++; 1959 } else { 1960 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); 1961 PL_bufptr = s; 1962 lex_read_space(flags | LEX_KEEP_PREVIOUS | 1963 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ? 1964 LEX_NO_NEXT_CHUNK : 0)); 1965 s = PL_bufptr; 1966 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos; 1967 if (PL_linestart > PL_bufptr) 1968 PL_bufptr = PL_linestart; 1969 return s; 1970 } 1971 #ifdef PERL_MAD 1972 if (PL_madskills) 1973 PL_skipwhite = newSVpvn(start, s-start); 1974 #endif /* PERL_MAD */ 1975 return s; 1976 } 1977 1978 /* 1979 * S_check_uni 1980 * Check the unary operators to ensure there's no ambiguity in how they're 1981 * used. An ambiguous piece of code would be: 1982 * rand + 5 1983 * This doesn't mean rand() + 5. Because rand() is a unary operator, 1984 * the +5 is its argument. 1985 */ 1986 1987 STATIC void 1988 S_check_uni(pTHX) 1989 { 1990 dVAR; 1991 const char *s; 1992 const char *t; 1993 1994 if (PL_oldoldbufptr != PL_last_uni) 1995 return; 1996 while (isSPACE(*PL_last_uni)) 1997 PL_last_uni++; 1998 s = PL_last_uni; 1999 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-') 2000 s++; 2001 if ((t = strchr(s, '(')) && t < PL_bufptr) 2002 return; 2003 2004 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 2005 "Warning: Use of \"%.*s\" without parentheses is ambiguous", 2006 (int)(s - PL_last_uni), PL_last_uni); 2007 } 2008 2009 /* 2010 * LOP : macro to build a list operator. Its behaviour has been replaced 2011 * with a subroutine, S_lop() for which LOP is just another name. 2012 */ 2013 2014 #define LOP(f,x) return lop(f,x,s) 2015 2016 /* 2017 * S_lop 2018 * Build a list operator (or something that might be one). The rules: 2019 * - if we have a next token, then it's a list operator [why?] 2020 * - if the next thing is an opening paren, then it's a function 2021 * - else it's a list operator 2022 */ 2023 2024 STATIC I32 2025 S_lop(pTHX_ I32 f, int x, char *s) 2026 { 2027 dVAR; 2028 2029 PERL_ARGS_ASSERT_LOP; 2030 2031 pl_yylval.ival = f; 2032 CLINE; 2033 PL_expect = x; 2034 PL_bufptr = s; 2035 PL_last_lop = PL_oldbufptr; 2036 PL_last_lop_op = (OPCODE)f; 2037 #ifdef PERL_MAD 2038 if (PL_lasttoke) 2039 goto lstop; 2040 #else 2041 if (PL_nexttoke) 2042 goto lstop; 2043 #endif 2044 if (*s == '(') 2045 return REPORT(FUNC); 2046 s = PEEKSPACE(s); 2047 if (*s == '(') 2048 return REPORT(FUNC); 2049 else { 2050 lstop: 2051 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 2052 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 2053 return REPORT(LSTOP); 2054 } 2055 } 2056 2057 #ifdef PERL_MAD 2058 /* 2059 * S_start_force 2060 * Sets up for an eventual force_next(). start_force(0) basically does 2061 * an unshift, while start_force(-1) does a push. yylex removes items 2062 * on the "pop" end. 2063 */ 2064 2065 STATIC void 2066 S_start_force(pTHX_ int where) 2067 { 2068 int i; 2069 2070 if (where < 0) /* so people can duplicate start_force(PL_curforce) */ 2071 where = PL_lasttoke; 2072 assert(PL_curforce < 0 || PL_curforce == where); 2073 if (PL_curforce != where) { 2074 for (i = PL_lasttoke; i > where; --i) { 2075 PL_nexttoke[i] = PL_nexttoke[i-1]; 2076 } 2077 PL_lasttoke++; 2078 } 2079 if (PL_curforce < 0) /* in case of duplicate start_force() */ 2080 Zero(&PL_nexttoke[where], 1, NEXTTOKE); 2081 PL_curforce = where; 2082 if (PL_nextwhite) { 2083 if (PL_madskills) 2084 curmad('^', newSVpvs("")); 2085 CURMAD('_', PL_nextwhite); 2086 } 2087 } 2088 2089 STATIC void 2090 S_curmad(pTHX_ char slot, SV *sv) 2091 { 2092 MADPROP **where; 2093 2094 if (!sv) 2095 return; 2096 if (PL_curforce < 0) 2097 where = &PL_thismad; 2098 else 2099 where = &PL_nexttoke[PL_curforce].next_mad; 2100 2101 if (PL_faketokens) 2102 sv_setpvs(sv, ""); 2103 else { 2104 if (!IN_BYTES) { 2105 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) 2106 SvUTF8_on(sv); 2107 else if (PL_encoding) { 2108 sv_recode_to_utf8(sv, PL_encoding); 2109 } 2110 } 2111 } 2112 2113 /* keep a slot open for the head of the list? */ 2114 if (slot != '_' && *where && (*where)->mad_key == '^') { 2115 (*where)->mad_key = slot; 2116 sv_free(MUTABLE_SV(((*where)->mad_val))); 2117 (*where)->mad_val = (void*)sv; 2118 } 2119 else 2120 addmad(newMADsv(slot, sv), where, 0); 2121 } 2122 #else 2123 # define start_force(where) NOOP 2124 # define curmad(slot, sv) NOOP 2125 #endif 2126 2127 /* 2128 * S_force_next 2129 * When the lexer realizes it knows the next token (for instance, 2130 * it is reordering tokens for the parser) then it can call S_force_next 2131 * to know what token to return the next time the lexer is called. Caller 2132 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD), 2133 * and possibly PL_expect to ensure the lexer handles the token correctly. 2134 */ 2135 2136 STATIC void 2137 S_force_next(pTHX_ I32 type) 2138 { 2139 dVAR; 2140 #ifdef DEBUGGING 2141 if (DEBUG_T_TEST) { 2142 PerlIO_printf(Perl_debug_log, "### forced token:\n"); 2143 tokereport(type, &NEXTVAL_NEXTTOKE); 2144 } 2145 #endif 2146 #ifdef PERL_MAD 2147 if (PL_curforce < 0) 2148 start_force(PL_lasttoke); 2149 PL_nexttoke[PL_curforce].next_type = type; 2150 if (PL_lex_state != LEX_KNOWNEXT) 2151 PL_lex_defer = PL_lex_state; 2152 PL_lex_state = LEX_KNOWNEXT; 2153 PL_lex_expect = PL_expect; 2154 PL_curforce = -1; 2155 #else 2156 PL_nexttype[PL_nexttoke] = type; 2157 PL_nexttoke++; 2158 if (PL_lex_state != LEX_KNOWNEXT) { 2159 PL_lex_defer = PL_lex_state; 2160 PL_lex_expect = PL_expect; 2161 PL_lex_state = LEX_KNOWNEXT; 2162 } 2163 #endif 2164 } 2165 2166 /* 2167 * S_postderef 2168 * 2169 * This subroutine handles postfix deref syntax after the arrow has already 2170 * been emitted. @* $* etc. are emitted as two separate token right here. 2171 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits 2172 * only the first, leaving yylex to find the next. 2173 */ 2174 2175 static int 2176 S_postderef(pTHX_ int const funny, char const next) 2177 { 2178 dVAR; 2179 assert(funny == DOLSHARP || strchr("$@%&*", funny)); 2180 assert(strchr("*[{", next)); 2181 if (next == '*') { 2182 PL_expect = XOPERATOR; 2183 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 2184 assert('@' == funny || '$' == funny || DOLSHARP == funny); 2185 PL_lex_state = LEX_INTERPEND; 2186 start_force(PL_curforce); 2187 force_next(POSTJOIN); 2188 } 2189 start_force(PL_curforce); 2190 force_next(next); 2191 PL_bufptr+=2; 2192 } 2193 else { 2194 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL 2195 && !PL_lex_brackets) 2196 PL_lex_dojoin = 2; 2197 PL_expect = XOPERATOR; 2198 PL_bufptr++; 2199 } 2200 return funny; 2201 } 2202 2203 void 2204 Perl_yyunlex(pTHX) 2205 { 2206 int yyc = PL_parser->yychar; 2207 if (yyc != YYEMPTY) { 2208 if (yyc) { 2209 start_force(-1); 2210 NEXTVAL_NEXTTOKE = PL_parser->yylval; 2211 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) { 2212 PL_lex_allbrackets--; 2213 PL_lex_brackets--; 2214 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16); 2215 } else if (yyc == '('/*)*/) { 2216 PL_lex_allbrackets--; 2217 yyc |= (2<<24); 2218 } 2219 force_next(yyc); 2220 } 2221 PL_parser->yychar = YYEMPTY; 2222 } 2223 } 2224 2225 STATIC SV * 2226 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) 2227 { 2228 dVAR; 2229 SV * const sv = newSVpvn_utf8(start, len, 2230 !IN_BYTES 2231 && UTF 2232 && !is_ascii_string((const U8*)start, len) 2233 && is_utf8_string((const U8*)start, len)); 2234 return sv; 2235 } 2236 2237 /* 2238 * S_force_word 2239 * When the lexer knows the next thing is a word (for instance, it has 2240 * just seen -> and it knows that the next char is a word char, then 2241 * it calls S_force_word to stick the next word into the PL_nexttoke/val 2242 * lookahead. 2243 * 2244 * Arguments: 2245 * char *start : buffer position (must be within PL_linestr) 2246 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD) 2247 * int check_keyword : if true, Perl checks to make sure the word isn't 2248 * a keyword (do this if the word is a label, e.g. goto FOO) 2249 * int allow_pack : if true, : characters will also be allowed (require, 2250 * use, etc. do this) 2251 * int allow_initial_tick : used by the "sub" lexer only. 2252 */ 2253 2254 STATIC char * 2255 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) 2256 { 2257 dVAR; 2258 char *s; 2259 STRLEN len; 2260 2261 PERL_ARGS_ASSERT_FORCE_WORD; 2262 2263 start = SKIPSPACE1(start); 2264 s = start; 2265 if (isIDFIRST_lazy_if(s,UTF) || 2266 (allow_pack && *s == ':') ) 2267 { 2268 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); 2269 if (check_keyword) { 2270 char *s2 = PL_tokenbuf; 2271 STRLEN len2 = len; 2272 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6)) 2273 s2 += 6, len2 -= 6; 2274 if (keyword(s2, len2, 0)) 2275 return start; 2276 } 2277 start_force(PL_curforce); 2278 if (PL_madskills) 2279 curmad('X', newSVpvn(start,s-start)); 2280 if (token == METHOD) { 2281 s = SKIPSPACE1(s); 2282 if (*s == '(') 2283 PL_expect = XTERM; 2284 else { 2285 PL_expect = XOPERATOR; 2286 } 2287 } 2288 if (PL_madskills) 2289 curmad('g', newSVpvs( "forced" )); 2290 NEXTVAL_NEXTTOKE.opval 2291 = (OP*)newSVOP(OP_CONST,0, 2292 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 2293 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 2294 force_next(token); 2295 } 2296 return s; 2297 } 2298 2299 /* 2300 * S_force_ident 2301 * Called when the lexer wants $foo *foo &foo etc, but the program 2302 * text only contains the "foo" portion. The first argument is a pointer 2303 * to the "foo", and the second argument is the type symbol to prefix. 2304 * Forces the next token to be a "WORD". 2305 * Creates the symbol if it didn't already exist (via gv_fetchpv()). 2306 */ 2307 2308 STATIC void 2309 S_force_ident(pTHX_ const char *s, int kind) 2310 { 2311 dVAR; 2312 2313 PERL_ARGS_ASSERT_FORCE_IDENT; 2314 2315 if (s[0]) { 2316 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */ 2317 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len, 2318 UTF ? SVf_UTF8 : 0)); 2319 start_force(PL_curforce); 2320 NEXTVAL_NEXTTOKE.opval = o; 2321 force_next(WORD); 2322 if (kind) { 2323 o->op_private = OPpCONST_ENTERED; 2324 /* XXX see note in pp_entereval() for why we forgo typo 2325 warnings if the symbol must be introduced in an eval. 2326 GSAR 96-10-12 */ 2327 gv_fetchpvn_flags(s, len, 2328 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) 2329 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), 2330 kind == '$' ? SVt_PV : 2331 kind == '@' ? SVt_PVAV : 2332 kind == '%' ? SVt_PVHV : 2333 SVt_PVGV 2334 ); 2335 } 2336 } 2337 } 2338 2339 static void 2340 S_force_ident_maybe_lex(pTHX_ char pit) 2341 { 2342 start_force(PL_curforce); 2343 NEXTVAL_NEXTTOKE.ival = pit; 2344 force_next('p'); 2345 } 2346 2347 NV 2348 Perl_str_to_version(pTHX_ SV *sv) 2349 { 2350 NV retval = 0.0; 2351 NV nshift = 1.0; 2352 STRLEN len; 2353 const char *start = SvPV_const(sv,len); 2354 const char * const end = start + len; 2355 const bool utf = SvUTF8(sv) ? TRUE : FALSE; 2356 2357 PERL_ARGS_ASSERT_STR_TO_VERSION; 2358 2359 while (start < end) { 2360 STRLEN skip; 2361 UV n; 2362 if (utf) 2363 n = utf8n_to_uvchr((U8*)start, len, &skip, 0); 2364 else { 2365 n = *(U8*)start; 2366 skip = 1; 2367 } 2368 retval += ((NV)n)/nshift; 2369 start += skip; 2370 nshift *= 1000; 2371 } 2372 return retval; 2373 } 2374 2375 /* 2376 * S_force_version 2377 * Forces the next token to be a version number. 2378 * If the next token appears to be an invalid version number, (e.g. "v2b"), 2379 * and if "guessing" is TRUE, then no new token is created (and the caller 2380 * must use an alternative parsing method). 2381 */ 2382 2383 STATIC char * 2384 S_force_version(pTHX_ char *s, int guessing) 2385 { 2386 dVAR; 2387 OP *version = NULL; 2388 char *d; 2389 #ifdef PERL_MAD 2390 I32 startoff = s - SvPVX(PL_linestr); 2391 #endif 2392 2393 PERL_ARGS_ASSERT_FORCE_VERSION; 2394 2395 s = SKIPSPACE1(s); 2396 2397 d = s; 2398 if (*d == 'v') 2399 d++; 2400 if (isDIGIT(*d)) { 2401 while (isDIGIT(*d) || *d == '_' || *d == '.') 2402 d++; 2403 #ifdef PERL_MAD 2404 if (PL_madskills) { 2405 start_force(PL_curforce); 2406 curmad('X', newSVpvn(s,d-s)); 2407 } 2408 #endif 2409 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) { 2410 SV *ver; 2411 s = scan_num(s, &pl_yylval); 2412 version = pl_yylval.opval; 2413 ver = cSVOPx(version)->op_sv; 2414 if (SvPOK(ver) && !SvNIOK(ver)) { 2415 SvUPGRADE(ver, SVt_PVNV); 2416 SvNV_set(ver, str_to_version(ver)); 2417 SvNOK_on(ver); /* hint that it is a version */ 2418 } 2419 } 2420 else if (guessing) { 2421 #ifdef PERL_MAD 2422 if (PL_madskills) { 2423 sv_free(PL_nextwhite); /* let next token collect whitespace */ 2424 PL_nextwhite = 0; 2425 s = SvPVX(PL_linestr) + startoff; 2426 } 2427 #endif 2428 return s; 2429 } 2430 } 2431 2432 #ifdef PERL_MAD 2433 if (PL_madskills && !version) { 2434 sv_free(PL_nextwhite); /* let next token collect whitespace */ 2435 PL_nextwhite = 0; 2436 s = SvPVX(PL_linestr) + startoff; 2437 } 2438 #endif 2439 /* NOTE: The parser sees the package name and the VERSION swapped */ 2440 start_force(PL_curforce); 2441 NEXTVAL_NEXTTOKE.opval = version; 2442 force_next(WORD); 2443 2444 return s; 2445 } 2446 2447 /* 2448 * S_force_strict_version 2449 * Forces the next token to be a version number using strict syntax rules. 2450 */ 2451 2452 STATIC char * 2453 S_force_strict_version(pTHX_ char *s) 2454 { 2455 dVAR; 2456 OP *version = NULL; 2457 #ifdef PERL_MAD 2458 I32 startoff = s - SvPVX(PL_linestr); 2459 #endif 2460 const char *errstr = NULL; 2461 2462 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION; 2463 2464 while (isSPACE(*s)) /* leading whitespace */ 2465 s++; 2466 2467 if (is_STRICT_VERSION(s,&errstr)) { 2468 SV *ver = newSV(0); 2469 s = (char *)scan_version(s, ver, 0); 2470 version = newSVOP(OP_CONST, 0, ver); 2471 } 2472 else if ( (*s != ';' && *s != '{' && *s != '}' ) && 2473 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' ))) 2474 { 2475 PL_bufptr = s; 2476 if (errstr) 2477 yyerror(errstr); /* version required */ 2478 return s; 2479 } 2480 2481 #ifdef PERL_MAD 2482 if (PL_madskills && !version) { 2483 sv_free(PL_nextwhite); /* let next token collect whitespace */ 2484 PL_nextwhite = 0; 2485 s = SvPVX(PL_linestr) + startoff; 2486 } 2487 #endif 2488 /* NOTE: The parser sees the package name and the VERSION swapped */ 2489 start_force(PL_curforce); 2490 NEXTVAL_NEXTTOKE.opval = version; 2491 force_next(WORD); 2492 2493 return s; 2494 } 2495 2496 /* 2497 * S_tokeq 2498 * Tokenize a quoted string passed in as an SV. It finds the next 2499 * chunk, up to end of string or a backslash. It may make a new 2500 * SV containing that chunk (if HINT_NEW_STRING is on). It also 2501 * turns \\ into \. 2502 */ 2503 2504 STATIC SV * 2505 S_tokeq(pTHX_ SV *sv) 2506 { 2507 dVAR; 2508 char *s; 2509 char *send; 2510 char *d; 2511 SV *pv = sv; 2512 2513 PERL_ARGS_ASSERT_TOKEQ; 2514 2515 assert (SvPOK(sv)); 2516 assert (SvLEN(sv)); 2517 assert (!SvIsCOW(sv)); 2518 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */ 2519 goto finish; 2520 s = SvPVX(sv); 2521 send = SvEND(sv); 2522 /* This is relying on the SV being "well formed" with a trailing '\0' */ 2523 while (s < send && !(*s == '\\' && s[1] == '\\')) 2524 s++; 2525 if (s == send) 2526 goto finish; 2527 d = s; 2528 if ( PL_hints & HINT_NEW_STRING ) { 2529 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv), 2530 SVs_TEMP | SvUTF8(sv)); 2531 } 2532 while (s < send) { 2533 if (*s == '\\') { 2534 if (s + 1 < send && (s[1] == '\\')) 2535 s++; /* all that, just for this */ 2536 } 2537 *d++ = *s++; 2538 } 2539 *d = '\0'; 2540 SvCUR_set(sv, d - SvPVX_const(sv)); 2541 finish: 2542 if ( PL_hints & HINT_NEW_STRING ) 2543 return new_constant(NULL, 0, "q", sv, pv, "q", 1); 2544 return sv; 2545 } 2546 2547 /* 2548 * Now come three functions related to double-quote context, 2549 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when 2550 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They 2551 * interact with PL_lex_state, and create fake ( ... ) argument lists 2552 * to handle functions and concatenation. 2553 * For example, 2554 * "foo\lbar" 2555 * is tokenised as 2556 * stringify ( const[foo] concat lcfirst ( const[bar] ) ) 2557 */ 2558 2559 /* 2560 * S_sublex_start 2561 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST). 2562 * 2563 * Pattern matching will set PL_lex_op to the pattern-matching op to 2564 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise). 2565 * 2566 * OP_CONST and OP_READLINE are easy--just make the new op and return. 2567 * 2568 * Everything else becomes a FUNC. 2569 * 2570 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we 2571 * had an OP_CONST or OP_READLINE). This just sets us up for a 2572 * call to S_sublex_push(). 2573 */ 2574 2575 STATIC I32 2576 S_sublex_start(pTHX) 2577 { 2578 dVAR; 2579 const I32 op_type = pl_yylval.ival; 2580 2581 if (op_type == OP_NULL) { 2582 pl_yylval.opval = PL_lex_op; 2583 PL_lex_op = NULL; 2584 return THING; 2585 } 2586 if (op_type == OP_CONST) { 2587 SV *sv = tokeq(PL_lex_stuff); 2588 2589 if (SvTYPE(sv) == SVt_PVIV) { 2590 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ 2591 STRLEN len; 2592 const char * const p = SvPV_const(sv, len); 2593 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv)); 2594 SvREFCNT_dec(sv); 2595 sv = nsv; 2596 } 2597 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv); 2598 PL_lex_stuff = NULL; 2599 return THING; 2600 } 2601 2602 PL_sublex_info.super_state = PL_lex_state; 2603 PL_sublex_info.sub_inwhat = (U16)op_type; 2604 PL_sublex_info.sub_op = PL_lex_op; 2605 PL_lex_state = LEX_INTERPPUSH; 2606 2607 PL_expect = XTERM; 2608 if (PL_lex_op) { 2609 pl_yylval.opval = PL_lex_op; 2610 PL_lex_op = NULL; 2611 return PMFUNC; 2612 } 2613 else 2614 return FUNC; 2615 } 2616 2617 /* 2618 * S_sublex_push 2619 * Create a new scope to save the lexing state. The scope will be 2620 * ended in S_sublex_done. Returns a '(', starting the function arguments 2621 * to the uc, lc, etc. found before. 2622 * Sets PL_lex_state to LEX_INTERPCONCAT. 2623 */ 2624 2625 STATIC I32 2626 S_sublex_push(pTHX) 2627 { 2628 dVAR; 2629 LEXSHARED *shared; 2630 const bool is_heredoc = PL_multi_close == '<'; 2631 ENTER; 2632 2633 PL_lex_state = PL_sublex_info.super_state; 2634 SAVEI8(PL_lex_dojoin); 2635 SAVEI32(PL_lex_brackets); 2636 SAVEI32(PL_lex_allbrackets); 2637 SAVEI32(PL_lex_formbrack); 2638 SAVEI8(PL_lex_fakeeof); 2639 SAVEI32(PL_lex_casemods); 2640 SAVEI32(PL_lex_starts); 2641 SAVEI8(PL_lex_state); 2642 SAVESPTR(PL_lex_repl); 2643 SAVEVPTR(PL_lex_inpat); 2644 SAVEI16(PL_lex_inwhat); 2645 if (is_heredoc) 2646 { 2647 SAVECOPLINE(PL_curcop); 2648 SAVEI32(PL_multi_end); 2649 SAVEI32(PL_parser->herelines); 2650 PL_parser->herelines = 0; 2651 } 2652 SAVEI8(PL_multi_close); 2653 SAVEPPTR(PL_bufptr); 2654 SAVEPPTR(PL_bufend); 2655 SAVEPPTR(PL_oldbufptr); 2656 SAVEPPTR(PL_oldoldbufptr); 2657 SAVEPPTR(PL_last_lop); 2658 SAVEPPTR(PL_last_uni); 2659 SAVEPPTR(PL_linestart); 2660 SAVESPTR(PL_linestr); 2661 SAVEGENERICPV(PL_lex_brackstack); 2662 SAVEGENERICPV(PL_lex_casestack); 2663 SAVEGENERICPV(PL_parser->lex_shared); 2664 SAVEBOOL(PL_parser->lex_re_reparsing); 2665 SAVEI32(PL_copline); 2666 2667 /* The here-doc parser needs to be able to peek into outer lexing 2668 scopes to find the body of the here-doc. So we put PL_linestr and 2669 PL_bufptr into lex_shared, to ‘share’ those values. 2670 */ 2671 PL_parser->lex_shared->ls_linestr = PL_linestr; 2672 PL_parser->lex_shared->ls_bufptr = PL_bufptr; 2673 2674 PL_linestr = PL_lex_stuff; 2675 PL_lex_repl = PL_sublex_info.repl; 2676 PL_lex_stuff = NULL; 2677 PL_sublex_info.repl = NULL; 2678 2679 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart 2680 = SvPVX(PL_linestr); 2681 PL_bufend += SvCUR(PL_linestr); 2682 PL_last_lop = PL_last_uni = NULL; 2683 SAVEFREESV(PL_linestr); 2684 if (PL_lex_repl) SAVEFREESV(PL_lex_repl); 2685 2686 PL_lex_dojoin = FALSE; 2687 PL_lex_brackets = PL_lex_formbrack = 0; 2688 PL_lex_allbrackets = 0; 2689 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2690 Newx(PL_lex_brackstack, 120, char); 2691 Newx(PL_lex_casestack, 12, char); 2692 PL_lex_casemods = 0; 2693 *PL_lex_casestack = '\0'; 2694 PL_lex_starts = 0; 2695 PL_lex_state = LEX_INTERPCONCAT; 2696 if (is_heredoc) 2697 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 2698 PL_copline = NOLINE; 2699 2700 Newxz(shared, 1, LEXSHARED); 2701 shared->ls_prev = PL_parser->lex_shared; 2702 PL_parser->lex_shared = shared; 2703 2704 PL_lex_inwhat = PL_sublex_info.sub_inwhat; 2705 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS; 2706 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST) 2707 PL_lex_inpat = PL_sublex_info.sub_op; 2708 else 2709 PL_lex_inpat = NULL; 2710 2711 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING); 2712 PL_in_eval &= ~EVAL_RE_REPARSING; 2713 2714 return '('; 2715 } 2716 2717 /* 2718 * S_sublex_done 2719 * Restores lexer state after a S_sublex_push. 2720 */ 2721 2722 STATIC I32 2723 S_sublex_done(pTHX) 2724 { 2725 dVAR; 2726 if (!PL_lex_starts++) { 2727 SV * const sv = newSVpvs(""); 2728 if (SvUTF8(PL_linestr)) 2729 SvUTF8_on(sv); 2730 PL_expect = XOPERATOR; 2731 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 2732 return THING; 2733 } 2734 2735 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */ 2736 PL_lex_state = LEX_INTERPCASEMOD; 2737 return yylex(); 2738 } 2739 2740 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */ 2741 assert(PL_lex_inwhat != OP_TRANSR); 2742 if (PL_lex_repl) { 2743 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS); 2744 PL_linestr = PL_lex_repl; 2745 PL_lex_inpat = 0; 2746 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); 2747 PL_bufend += SvCUR(PL_linestr); 2748 PL_last_lop = PL_last_uni = NULL; 2749 PL_lex_dojoin = FALSE; 2750 PL_lex_brackets = 0; 2751 PL_lex_allbrackets = 0; 2752 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2753 PL_lex_casemods = 0; 2754 *PL_lex_casestack = '\0'; 2755 PL_lex_starts = 0; 2756 if (SvEVALED(PL_lex_repl)) { 2757 PL_lex_state = LEX_INTERPNORMAL; 2758 PL_lex_starts++; 2759 /* we don't clear PL_lex_repl here, so that we can check later 2760 whether this is an evalled subst; that means we rely on the 2761 logic to ensure sublex_done() is called again only via the 2762 branch (in yylex()) that clears PL_lex_repl, else we'll loop */ 2763 } 2764 else { 2765 PL_lex_state = LEX_INTERPCONCAT; 2766 PL_lex_repl = NULL; 2767 } 2768 if (SvTYPE(PL_linestr) >= SVt_PVNV) { 2769 CopLINE(PL_curcop) += 2770 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow 2771 + PL_parser->herelines; 2772 PL_parser->herelines = 0; 2773 } 2774 return ','; 2775 } 2776 else { 2777 const line_t l = CopLINE(PL_curcop); 2778 #ifdef PERL_MAD 2779 if (PL_madskills) { 2780 if (PL_thiswhite) { 2781 if (!PL_endwhite) 2782 PL_endwhite = newSVpvs(""); 2783 sv_catsv(PL_endwhite, PL_thiswhite); 2784 PL_thiswhite = 0; 2785 } 2786 if (PL_thistoken) 2787 sv_setpvs(PL_thistoken,""); 2788 else 2789 PL_realtokenstart = -1; 2790 } 2791 #endif 2792 LEAVE; 2793 if (PL_multi_close == '<') 2794 PL_parser->herelines += l - PL_multi_end; 2795 PL_bufend = SvPVX(PL_linestr); 2796 PL_bufend += SvCUR(PL_linestr); 2797 PL_expect = XOPERATOR; 2798 PL_sublex_info.sub_inwhat = 0; 2799 return ')'; 2800 } 2801 } 2802 2803 PERL_STATIC_INLINE SV* 2804 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) 2805 { 2806 /* <s> points to first character of interior of \N{}, <e> to one beyond the 2807 * interior, hence to the "}". Finds what the name resolves to, returning 2808 * an SV* containing it; NULL if no valid one found */ 2809 2810 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0); 2811 2812 HV * table; 2813 SV **cvp; 2814 SV *cv; 2815 SV *rv; 2816 HV *stash; 2817 const U8* first_bad_char_loc; 2818 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */ 2819 2820 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; 2821 2822 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr, 2823 e - backslash_ptr, 2824 &first_bad_char_loc)) 2825 { 2826 /* If warnings are on, this will print a more detailed analysis of what 2827 * is wrong than the error message below */ 2828 utf8n_to_uvchr(first_bad_char_loc, 2829 e - ((char *) first_bad_char_loc), 2830 NULL, 0); 2831 2832 /* We deliberately don't try to print the malformed character, which 2833 * might not print very well; it also may be just the first of many 2834 * malformations, so don't print what comes after it */ 2835 yyerror(Perl_form(aTHX_ 2836 "Malformed UTF-8 character immediately after '%.*s'", 2837 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr)); 2838 return NULL; 2839 } 2840 2841 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr, 2842 /* include the <}> */ 2843 e - backslash_ptr + 1); 2844 if (! SvPOK(res)) { 2845 SvREFCNT_dec_NN(res); 2846 return NULL; 2847 } 2848 2849 /* See if the charnames handler is the Perl core's, and if so, we can skip 2850 * the validation needed for a user-supplied one, as Perl's does its own 2851 * validation. */ 2852 table = GvHV(PL_hintgv); /* ^H */ 2853 cvp = hv_fetchs(table, "charnames", FALSE); 2854 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv), 2855 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL)) 2856 { 2857 const char * const name = HvNAME(stash); 2858 if (HvNAMELEN(stash) == sizeof("_charnames")-1 2859 && strEQ(name, "_charnames")) { 2860 return res; 2861 } 2862 } 2863 2864 /* Here, it isn't Perl's charname handler. We can't rely on a 2865 * user-supplied handler to validate the input name. For non-ut8 input, 2866 * look to see that the first character is legal. Then loop through the 2867 * rest checking that each is a continuation */ 2868 2869 /* This code needs to be sync'ed with a regex in _charnames.pm which does 2870 * the same thing */ 2871 2872 if (! UTF) { 2873 if (! isALPHAU(*s)) { 2874 goto bad_charname; 2875 } 2876 s++; 2877 while (s < e) { 2878 if (! isCHARNAME_CONT(*s)) { 2879 goto bad_charname; 2880 } 2881 if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { 2882 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 2883 "A sequence of multiple spaces in a charnames " 2884 "alias definition is deprecated"); 2885 } 2886 s++; 2887 } 2888 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { 2889 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 2890 "Trailing white-space in a charnames alias " 2891 "definition is deprecated"); 2892 } 2893 } 2894 else { 2895 /* Similarly for utf8. For invariants can check directly; for other 2896 * Latin1, can calculate their code point and check; otherwise use a 2897 * swash */ 2898 if (UTF8_IS_INVARIANT(*s)) { 2899 if (! isALPHAU(*s)) { 2900 goto bad_charname; 2901 } 2902 s++; 2903 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2904 if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) { 2905 goto bad_charname; 2906 } 2907 s += 2; 2908 } 2909 else { 2910 if (! PL_utf8_charname_begin) { 2911 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; 2912 PL_utf8_charname_begin = _core_swash_init("utf8", 2913 "_Perl_Charname_Begin", 2914 &PL_sv_undef, 2915 1, 0, NULL, &flags); 2916 } 2917 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) { 2918 goto bad_charname; 2919 } 2920 s += UTF8SKIP(s); 2921 } 2922 2923 while (s < e) { 2924 if (UTF8_IS_INVARIANT(*s)) { 2925 if (! isCHARNAME_CONT(*s)) { 2926 goto bad_charname; 2927 } 2928 if (*s == ' ' && *(s-1) == ' ' 2929 && ckWARN_d(WARN_DEPRECATED)) { 2930 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 2931 "A sequence of multiple spaces in a charnam" 2932 "es alias definition is deprecated"); 2933 } 2934 s++; 2935 } 2936 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2937 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) 2938 { 2939 goto bad_charname; 2940 } 2941 s += 2; 2942 } 2943 else { 2944 if (! PL_utf8_charname_continue) { 2945 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; 2946 PL_utf8_charname_continue = _core_swash_init("utf8", 2947 "_Perl_Charname_Continue", 2948 &PL_sv_undef, 2949 1, 0, NULL, &flags); 2950 } 2951 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) { 2952 goto bad_charname; 2953 } 2954 s += UTF8SKIP(s); 2955 } 2956 } 2957 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { 2958 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 2959 "Trailing white-space in a charnames alias " 2960 "definition is deprecated"); 2961 } 2962 } 2963 2964 if (SvUTF8(res)) { /* Don't accept malformed input */ 2965 const U8* first_bad_char_loc; 2966 STRLEN len; 2967 const char* const str = SvPV_const(res, len); 2968 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) { 2969 /* If warnings are on, this will print a more detailed analysis of 2970 * what is wrong than the error message below */ 2971 utf8n_to_uvchr(first_bad_char_loc, 2972 (char *) first_bad_char_loc - str, 2973 NULL, 0); 2974 2975 /* We deliberately don't try to print the malformed character, 2976 * which might not print very well; it also may be just the first 2977 * of many malformations, so don't print what comes after it */ 2978 yyerror_pv( 2979 Perl_form(aTHX_ 2980 "Malformed UTF-8 returned by %.*s immediately after '%.*s'", 2981 (int) (e - backslash_ptr + 1), backslash_ptr, 2982 (int) ((char *) first_bad_char_loc - str), str 2983 ), 2984 SVf_UTF8); 2985 return NULL; 2986 } 2987 } 2988 2989 return res; 2990 2991 bad_charname: { 2992 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1); 2993 2994 /* The final %.*s makes sure that should the trailing NUL be missing 2995 * that this print won't run off the end of the string */ 2996 yyerror_pv( 2997 Perl_form(aTHX_ 2998 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s", 2999 (int)(s - backslash_ptr + bad_char_size), backslash_ptr, 3000 (int)(e - s + bad_char_size), s + bad_char_size 3001 ), 3002 UTF ? SVf_UTF8 : 0); 3003 return NULL; 3004 } 3005 } 3006 3007 /* 3008 scan_const 3009 3010 Extracts the next constant part of a pattern, double-quoted string, 3011 or transliteration. This is terrifying code. 3012 3013 For example, in parsing the double-quoted string "ab\x63$d", it would 3014 stop at the '$' and return an OP_CONST containing 'abc'. 3015 3016 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's 3017 processing a pattern (PL_lex_inpat is true), a transliteration 3018 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string. 3019 3020 Returns a pointer to the character scanned up to. If this is 3021 advanced from the start pointer supplied (i.e. if anything was 3022 successfully parsed), will leave an OP_CONST for the substring scanned 3023 in pl_yylval. Caller must intuit reason for not parsing further 3024 by looking at the next characters herself. 3025 3026 In patterns: 3027 expand: 3028 \N{FOO} => \N{U+hex_for_character_FOO} 3029 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...}) 3030 3031 pass through: 3032 all other \-char, including \N and \N{ apart from \N{ABC} 3033 3034 stops on: 3035 @ and $ where it appears to be a var, but not for $ as tail anchor 3036 \l \L \u \U \Q \E 3037 (?{ or (??{ 3038 3039 3040 In transliterations: 3041 characters are VERY literal, except for - not at the start or end 3042 of the string, which indicates a range. If the range is in bytes, 3043 scan_const expands the range to the full set of intermediate 3044 characters. If the range is in utf8, the hyphen is replaced with 3045 a certain range mark which will be handled by pmtrans() in op.c. 3046 3047 In double-quoted strings: 3048 backslashes: 3049 double-quoted style: \r and \n 3050 constants: \x31, etc. 3051 deprecated backrefs: \1 (in substitution replacements) 3052 case and quoting: \U \Q \E 3053 stops on @ and $ 3054 3055 scan_const does *not* construct ops to handle interpolated strings. 3056 It stops processing as soon as it finds an embedded $ or @ variable 3057 and leaves it to the caller to work out what's going on. 3058 3059 embedded arrays (whether in pattern or not) could be: 3060 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-. 3061 3062 $ in double-quoted strings must be the symbol of an embedded scalar. 3063 3064 $ in pattern could be $foo or could be tail anchor. Assumption: 3065 it's a tail anchor if $ is the last thing in the string, or if it's 3066 followed by one of "()| \r\n\t" 3067 3068 \1 (backreferences) are turned into $1 in substitutions 3069 3070 The structure of the code is 3071 while (there's a character to process) { 3072 handle transliteration ranges 3073 skip regexp comments /(?#comment)/ and codes /(?{code})/ 3074 skip #-initiated comments in //x patterns 3075 check for embedded arrays 3076 check for embedded scalars 3077 if (backslash) { 3078 deprecate \1 in substitution replacements 3079 handle string-changing backslashes \l \U \Q \E, etc. 3080 switch (what was escaped) { 3081 handle \- in a transliteration (becomes a literal -) 3082 if a pattern and not \N{, go treat as regular character 3083 handle \132 (octal characters) 3084 handle \x15 and \x{1234} (hex characters) 3085 handle \N{name} (named characters, also \N{3,5} in a pattern) 3086 handle \cV (control characters) 3087 handle printf-style backslashes (\f, \r, \n, etc) 3088 } (end switch) 3089 continue 3090 } (end if backslash) 3091 handle regular character 3092 } (end while character to read) 3093 3094 */ 3095 3096 STATIC char * 3097 S_scan_const(pTHX_ char *start) 3098 { 3099 dVAR; 3100 char *send = PL_bufend; /* end of the constant */ 3101 SV *sv = newSV(send - start); /* sv for the constant. See 3102 note below on sizing. */ 3103 char *s = start; /* start of the constant */ 3104 char *d = SvPVX(sv); /* destination for copies */ 3105 bool dorange = FALSE; /* are we in a translit range? */ 3106 bool didrange = FALSE; /* did we just finish a range? */ 3107 bool in_charclass = FALSE; /* within /[...]/ */ 3108 bool has_utf8 = FALSE; /* Output constant is UTF8 */ 3109 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed 3110 to be UTF8? But, this can 3111 show as true when the source 3112 isn't utf8, as for example 3113 when it is entirely composed 3114 of hex constants */ 3115 SV *res; /* result from charnames */ 3116 3117 /* Note on sizing: The scanned constant is placed into sv, which is 3118 * initialized by newSV() assuming one byte of output for every byte of 3119 * input. This routine expects newSV() to allocate an extra byte for a 3120 * trailing NUL, which this routine will append if it gets to the end of 3121 * the input. There may be more bytes of input than output (eg., \N{LATIN 3122 * CAPITAL LETTER A}), or more output than input if the constant ends up 3123 * recoded to utf8, but each time a construct is found that might increase 3124 * the needed size, SvGROW() is called. Its size parameter each time is 3125 * based on the best guess estimate at the time, namely the length used so 3126 * far, plus the length the current construct will occupy, plus room for 3127 * the trailing NUL, plus one byte for every input byte still unscanned */ 3128 3129 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses 3130 before set */ 3131 #ifdef EBCDIC 3132 UV literal_endpoint = 0; 3133 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */ 3134 #endif 3135 3136 PERL_ARGS_ASSERT_SCAN_CONST; 3137 3138 assert(PL_lex_inwhat != OP_TRANSR); 3139 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { 3140 /* If we are doing a trans and we know we want UTF8 set expectation */ 3141 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF); 3142 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); 3143 } 3144 3145 /* Protect sv from errors and fatal warnings. */ 3146 ENTER_with_name("scan_const"); 3147 SAVEFREESV(sv); 3148 3149 while (s < send || dorange) { 3150 3151 /* get transliterations out of the way (they're most literal) */ 3152 if (PL_lex_inwhat == OP_TRANS) { 3153 /* expand a range A-Z to the full set of characters. AIE! */ 3154 if (dorange) { 3155 I32 i; /* current expanded character */ 3156 I32 min; /* first character in range */ 3157 I32 max; /* last character in range */ 3158 3159 #ifdef EBCDIC 3160 UV uvmax = 0; 3161 #endif 3162 3163 if (has_utf8 3164 #ifdef EBCDIC 3165 && !native_range 3166 #endif 3167 ) { 3168 char * const c = (char*)utf8_hop((U8*)d, -1); 3169 char *e = d++; 3170 while (e-- > c) 3171 *(e + 1) = *e; 3172 *c = (char) ILLEGAL_UTF8_BYTE; 3173 /* mark the range as done, and continue */ 3174 dorange = FALSE; 3175 didrange = TRUE; 3176 continue; 3177 } 3178 3179 i = d - SvPVX_const(sv); /* remember current offset */ 3180 #ifdef EBCDIC 3181 SvGROW(sv, 3182 SvLEN(sv) + (has_utf8 ? 3183 (512 - UTF_CONTINUATION_MARK + 3184 UNISKIP(0x100)) 3185 : 256)); 3186 /* How many two-byte within 0..255: 128 in UTF-8, 3187 * 96 in UTF-8-mod. */ 3188 #else 3189 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */ 3190 #endif 3191 d = SvPVX(sv) + i; /* refresh d after realloc */ 3192 #ifdef EBCDIC 3193 if (has_utf8) { 3194 int j; 3195 for (j = 0; j <= 1; j++) { 3196 char * const c = (char*)utf8_hop((U8*)d, -1); 3197 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0); 3198 if (j) 3199 min = (U8)uv; 3200 else if (uv < 256) 3201 max = (U8)uv; 3202 else { 3203 max = (U8)0xff; /* only to \xff */ 3204 uvmax = uv; /* \x{100} to uvmax */ 3205 } 3206 d = c; /* eat endpoint chars */ 3207 } 3208 } 3209 else { 3210 #endif 3211 d -= 2; /* eat the first char and the - */ 3212 min = (U8)*d; /* first char in range */ 3213 max = (U8)d[1]; /* last char in range */ 3214 #ifdef EBCDIC 3215 } 3216 #endif 3217 3218 if (min > max) { 3219 Perl_croak(aTHX_ 3220 "Invalid range \"%c-%c\" in transliteration operator", 3221 (char)min, (char)max); 3222 } 3223 3224 #ifdef EBCDIC 3225 if (literal_endpoint == 2 && 3226 ((isLOWER_A(min) && isLOWER_A(max)) || 3227 (isUPPER_A(min) && isUPPER_A(max)))) 3228 { 3229 for (i = min; i <= max; i++) { 3230 if (isALPHA_A(i)) 3231 *d++ = i; 3232 } 3233 } 3234 else 3235 #endif 3236 for (i = min; i <= max; i++) 3237 #ifdef EBCDIC 3238 if (has_utf8) { 3239 append_utf8_from_native_byte(i, &d); 3240 } 3241 else 3242 #endif 3243 *d++ = (char)i; 3244 3245 #ifdef EBCDIC 3246 if (uvmax) { 3247 d = (char*)uvchr_to_utf8((U8*)d, 0x100); 3248 if (uvmax > 0x101) 3249 *d++ = (char) ILLEGAL_UTF8_BYTE; 3250 if (uvmax > 0x100) 3251 d = (char*)uvchr_to_utf8((U8*)d, uvmax); 3252 } 3253 #endif 3254 3255 /* mark the range as done, and continue */ 3256 dorange = FALSE; 3257 didrange = TRUE; 3258 #ifdef EBCDIC 3259 literal_endpoint = 0; 3260 #endif 3261 continue; 3262 } 3263 3264 /* range begins (ignore - as first or last char) */ 3265 else if (*s == '-' && s+1 < send && s != start) { 3266 if (didrange) { 3267 Perl_croak(aTHX_ "Ambiguous range in transliteration operator"); 3268 } 3269 if (has_utf8 3270 #ifdef EBCDIC 3271 && !native_range 3272 #endif 3273 ) { 3274 *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */ 3275 s++; 3276 continue; 3277 } 3278 dorange = TRUE; 3279 s++; 3280 } 3281 else { 3282 didrange = FALSE; 3283 #ifdef EBCDIC 3284 literal_endpoint = 0; 3285 native_range = TRUE; 3286 #endif 3287 } 3288 } 3289 3290 /* if we get here, we're not doing a transliteration */ 3291 3292 else if (*s == '[' && PL_lex_inpat && !in_charclass) { 3293 char *s1 = s-1; 3294 int esc = 0; 3295 while (s1 >= start && *s1-- == '\\') 3296 esc = !esc; 3297 if (!esc) 3298 in_charclass = TRUE; 3299 } 3300 3301 else if (*s == ']' && PL_lex_inpat && in_charclass) { 3302 char *s1 = s-1; 3303 int esc = 0; 3304 while (s1 >= start && *s1-- == '\\') 3305 esc = !esc; 3306 if (!esc) 3307 in_charclass = FALSE; 3308 } 3309 3310 /* skip for regexp comments /(?#comment)/, except for the last 3311 * char, which will be done separately. 3312 * Stop on (?{..}) and friends */ 3313 3314 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { 3315 if (s[2] == '#') { 3316 while (s+1 < send && *s != ')') 3317 *d++ = *s++; 3318 } 3319 else if (!PL_lex_casemods && 3320 ( s[2] == '{' /* This should match regcomp.c */ 3321 || (s[2] == '?' && s[3] == '{'))) 3322 { 3323 break; 3324 } 3325 } 3326 3327 /* likewise skip #-initiated comments in //x patterns */ 3328 else if (*s == '#' && PL_lex_inpat && !in_charclass && 3329 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) { 3330 while (s+1 < send && *s != '\n') 3331 *d++ = *s++; 3332 } 3333 3334 /* no further processing of single-quoted regex */ 3335 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') 3336 goto default_action; 3337 3338 /* check for embedded arrays 3339 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) 3340 */ 3341 else if (*s == '@' && s[1]) { 3342 if (isWORDCHAR_lazy_if(s+1,UTF)) 3343 break; 3344 if (strchr(":'{$", s[1])) 3345 break; 3346 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) 3347 break; /* in regexp, neither @+ nor @- are interpolated */ 3348 } 3349 3350 /* check for embedded scalars. only stop if we're sure it's a 3351 variable. 3352 */ 3353 else if (*s == '$') { 3354 if (!PL_lex_inpat) /* not a regexp, so $ must be var */ 3355 break; 3356 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) { 3357 if (s[1] == '\\') { 3358 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 3359 "Possible unintended interpolation of $\\ in regex"); 3360 } 3361 break; /* in regexp, $ might be tail anchor */ 3362 } 3363 } 3364 3365 /* End of else if chain - OP_TRANS rejoin rest */ 3366 3367 /* backslashes */ 3368 if (*s == '\\' && s+1 < send) { 3369 char* e; /* Can be used for ending '}', etc. */ 3370 3371 s++; 3372 3373 /* warn on \1 - \9 in substitution replacements, but note that \11 3374 * is an octal; and \19 is \1 followed by '9' */ 3375 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat && 3376 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1])) 3377 { 3378 /* diag_listed_as: \%d better written as $%d */ 3379 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); 3380 *--s = '$'; 3381 break; 3382 } 3383 3384 /* string-change backslash escapes */ 3385 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) { 3386 --s; 3387 break; 3388 } 3389 /* In a pattern, process \N, but skip any other backslash escapes. 3390 * This is because we don't want to translate an escape sequence 3391 * into a meta symbol and have the regex compiler use the meta 3392 * symbol meaning, e.g. \x{2E} would be confused with a dot. But 3393 * in spite of this, we do have to process \N here while the proper 3394 * charnames handler is in scope. See bugs #56444 and #62056. 3395 * There is a complication because \N in a pattern may also stand 3396 * for 'match a non-nl', and not mean a charname, in which case its 3397 * processing should be deferred to the regex compiler. To be a 3398 * charname it must be followed immediately by a '{', and not look 3399 * like \N followed by a curly quantifier, i.e., not something like 3400 * \N{3,}. regcurly returns a boolean indicating if it is a legal 3401 * quantifier */ 3402 else if (PL_lex_inpat 3403 && (*s != 'N' 3404 || s[1] != '{' 3405 || regcurly(s + 1, FALSE))) 3406 { 3407 *d++ = '\\'; 3408 goto default_action; 3409 } 3410 3411 switch (*s) { 3412 3413 /* quoted - in transliterations */ 3414 case '-': 3415 if (PL_lex_inwhat == OP_TRANS) { 3416 *d++ = *s++; 3417 continue; 3418 } 3419 /* FALL THROUGH */ 3420 default: 3421 { 3422 if ((isALPHANUMERIC(*s))) 3423 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 3424 "Unrecognized escape \\%c passed through", 3425 *s); 3426 /* default action is to copy the quoted character */ 3427 goto default_action; 3428 } 3429 3430 /* eg. \132 indicates the octal constant 0132 */ 3431 case '0': case '1': case '2': case '3': 3432 case '4': case '5': case '6': case '7': 3433 { 3434 I32 flags = PERL_SCAN_SILENT_ILLDIGIT; 3435 STRLEN len = 3; 3436 uv = grok_oct(s, &len, &flags, NULL); 3437 s += len; 3438 if (len < 3 && s < send && isDIGIT(*s) 3439 && ckWARN(WARN_MISC)) 3440 { 3441 Perl_warner(aTHX_ packWARN(WARN_MISC), 3442 "%s", form_short_octal_warning(s, len)); 3443 } 3444 } 3445 goto NUM_ESCAPE_INSERT; 3446 3447 /* eg. \o{24} indicates the octal constant \024 */ 3448 case 'o': 3449 { 3450 const char* error; 3451 3452 bool valid = grok_bslash_o(&s, &uv, &error, 3453 TRUE, /* Output warning */ 3454 FALSE, /* Not strict */ 3455 TRUE, /* Output warnings for 3456 non-portables */ 3457 UTF); 3458 if (! valid) { 3459 yyerror(error); 3460 continue; 3461 } 3462 goto NUM_ESCAPE_INSERT; 3463 } 3464 3465 /* eg. \x24 indicates the hex constant 0x24 */ 3466 case 'x': 3467 { 3468 const char* error; 3469 3470 bool valid = grok_bslash_x(&s, &uv, &error, 3471 TRUE, /* Output warning */ 3472 FALSE, /* Not strict */ 3473 TRUE, /* Output warnings for 3474 non-portables */ 3475 UTF); 3476 if (! valid) { 3477 yyerror(error); 3478 continue; 3479 } 3480 } 3481 3482 NUM_ESCAPE_INSERT: 3483 /* Insert oct or hex escaped character. There will always be 3484 * enough room in sv since such escapes will be longer than any 3485 * UTF-8 sequence they can end up as, except if they force us 3486 * to recode the rest of the string into utf8 */ 3487 3488 /* Here uv is the ordinal of the next character being added */ 3489 if (!UVCHR_IS_INVARIANT(uv)) { 3490 if (!has_utf8 && uv > 255) { 3491 /* Might need to recode whatever we have accumulated so 3492 * far if it contains any chars variant in utf8 or 3493 * utf-ebcdic. */ 3494 3495 SvCUR_set(sv, d - SvPVX_const(sv)); 3496 SvPOK_on(sv); 3497 *d = '\0'; 3498 /* See Note on sizing above. */ 3499 sv_utf8_upgrade_flags_grow(sv, 3500 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3501 UNISKIP(uv) + (STRLEN)(send - s) + 1); 3502 d = SvPVX(sv) + SvCUR(sv); 3503 has_utf8 = TRUE; 3504 } 3505 3506 if (has_utf8) { 3507 d = (char*)uvchr_to_utf8((U8*)d, uv); 3508 if (PL_lex_inwhat == OP_TRANS && 3509 PL_sublex_info.sub_op) { 3510 PL_sublex_info.sub_op->op_private |= 3511 (PL_lex_repl ? OPpTRANS_FROM_UTF 3512 : OPpTRANS_TO_UTF); 3513 } 3514 #ifdef EBCDIC 3515 if (uv > 255 && !dorange) 3516 native_range = FALSE; 3517 #endif 3518 } 3519 else { 3520 *d++ = (char)uv; 3521 } 3522 } 3523 else { 3524 *d++ = (char) uv; 3525 } 3526 continue; 3527 3528 case 'N': 3529 /* In a non-pattern \N must be a named character, like \N{LATIN 3530 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can 3531 * mean to match a non-newline. For non-patterns, named 3532 * characters are converted to their string equivalents. In 3533 * patterns, named characters are not converted to their 3534 * ultimate forms for the same reasons that other escapes 3535 * aren't. Instead, they are converted to the \N{U+...} form 3536 * to get the value from the charnames that is in effect right 3537 * now, while preserving the fact that it was a named character 3538 * so that the regex compiler knows this */ 3539 3540 /* The structure of this section of code (besides checking for 3541 * errors and upgrading to utf8) is: 3542 * Further disambiguate between the two meanings of \N, and if 3543 * not a charname, go process it elsewhere 3544 * If of form \N{U+...}, pass it through if a pattern; 3545 * otherwise convert to utf8 3546 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a 3547 * pattern; otherwise convert to utf8 */ 3548 3549 /* Here, s points to the 'N'; the test below is guaranteed to 3550 * succeed if we are being called on a pattern as we already 3551 * know from a test above that the next character is a '{'. 3552 * On a non-pattern \N must mean 'named sequence, which 3553 * requires braces */ 3554 s++; 3555 if (*s != '{') { 3556 yyerror("Missing braces on \\N{}"); 3557 continue; 3558 } 3559 s++; 3560 3561 /* If there is no matching '}', it is an error. */ 3562 if (! (e = strchr(s, '}'))) { 3563 if (! PL_lex_inpat) { 3564 yyerror("Missing right brace on \\N{}"); 3565 } else { 3566 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N"); 3567 } 3568 continue; 3569 } 3570 3571 /* Here it looks like a named character */ 3572 3573 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */ 3574 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES 3575 | PERL_SCAN_DISALLOW_PREFIX; 3576 STRLEN len; 3577 3578 /* For \N{U+...}, the '...' is a unicode value even on 3579 * EBCDIC machines */ 3580 s += 2; /* Skip to next char after the 'U+' */ 3581 len = e - s; 3582 uv = grok_hex(s, &len, &flags, NULL); 3583 if (len == 0 || len != (STRLEN)(e - s)) { 3584 yyerror("Invalid hexadecimal number in \\N{U+...}"); 3585 s = e + 1; 3586 continue; 3587 } 3588 3589 if (PL_lex_inpat) { 3590 3591 /* On non-EBCDIC platforms, pass through to the regex 3592 * compiler unchanged. The reason we evaluated the 3593 * number above is to make sure there wasn't a syntax 3594 * error. But on EBCDIC we convert to native so 3595 * downstream code can continue to assume it's native 3596 */ 3597 s -= 5; /* Include the '\N{U+' */ 3598 #ifdef EBCDIC 3599 d += my_snprintf(d, e - s + 1 + 1, /* includes the } 3600 and the \0 */ 3601 "\\N{U+%X}", 3602 (unsigned int) UNI_TO_NATIVE(uv)); 3603 #else 3604 Copy(s, d, e - s + 1, char); /* 1 = include the } */ 3605 d += e - s + 1; 3606 #endif 3607 } 3608 else { /* Not a pattern: convert the hex to string */ 3609 3610 /* If destination is not in utf8, unconditionally 3611 * recode it to be so. This is because \N{} implies 3612 * Unicode semantics, and scalars have to be in utf8 3613 * to guarantee those semantics */ 3614 if (! has_utf8) { 3615 SvCUR_set(sv, d - SvPVX_const(sv)); 3616 SvPOK_on(sv); 3617 *d = '\0'; 3618 /* See Note on sizing above. */ 3619 sv_utf8_upgrade_flags_grow( 3620 sv, 3621 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3622 UNISKIP(uv) + (STRLEN)(send - e) + 1); 3623 d = SvPVX(sv) + SvCUR(sv); 3624 has_utf8 = TRUE; 3625 } 3626 3627 /* Add the (Unicode) code point to the output. */ 3628 if (UNI_IS_INVARIANT(uv)) { 3629 *d++ = (char) LATIN1_TO_NATIVE(uv); 3630 } 3631 else { 3632 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0); 3633 } 3634 } 3635 } 3636 else /* Here is \N{NAME} but not \N{U+...}. */ 3637 if ((res = get_and_check_backslash_N_name(s, e))) 3638 { 3639 STRLEN len; 3640 const char *str = SvPV_const(res, len); 3641 if (PL_lex_inpat) { 3642 3643 if (! len) { /* The name resolved to an empty string */ 3644 Copy("\\N{}", d, 4, char); 3645 d += 4; 3646 } 3647 else { 3648 /* In order to not lose information for the regex 3649 * compiler, pass the result in the specially made 3650 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are 3651 * the code points in hex of each character 3652 * returned by charnames */ 3653 3654 const char *str_end = str + len; 3655 const STRLEN off = d - SvPVX_const(sv); 3656 3657 if (! SvUTF8(res)) { 3658 /* For the non-UTF-8 case, we can determine the 3659 * exact length needed without having to parse 3660 * through the string. Each character takes up 3661 * 2 hex digits plus either a trailing dot or 3662 * the "}" */ 3663 d = off + SvGROW(sv, off 3664 + 3 * len 3665 + 6 /* For the "\N{U+", and 3666 trailing NUL */ 3667 + (STRLEN)(send - e)); 3668 Copy("\\N{U+", d, 5, char); 3669 d += 5; 3670 while (str < str_end) { 3671 char hex_string[4]; 3672 my_snprintf(hex_string, sizeof(hex_string), 3673 "%02X.", (U8) *str); 3674 Copy(hex_string, d, 3, char); 3675 d += 3; 3676 str++; 3677 } 3678 d--; /* We will overwrite below the final 3679 dot with a right brace */ 3680 } 3681 else { 3682 STRLEN char_length; /* cur char's byte length */ 3683 3684 /* and the number of bytes after this is 3685 * translated into hex digits */ 3686 STRLEN output_length; 3687 3688 /* 2 hex per byte; 2 chars for '\N'; 2 chars 3689 * for max('U+', '.'); and 1 for NUL */ 3690 char hex_string[2 * UTF8_MAXBYTES + 5]; 3691 3692 /* Get the first character of the result. */ 3693 U32 uv = utf8n_to_uvchr((U8 *) str, 3694 len, 3695 &char_length, 3696 UTF8_ALLOW_ANYUV); 3697 /* Convert first code point to hex, including 3698 * the boiler plate before it. */ 3699 output_length = 3700 my_snprintf(hex_string, sizeof(hex_string), 3701 "\\N{U+%X", 3702 (unsigned int) uv); 3703 3704 /* Make sure there is enough space to hold it */ 3705 d = off + SvGROW(sv, off 3706 + output_length 3707 + (STRLEN)(send - e) 3708 + 2); /* '}' + NUL */ 3709 /* And output it */ 3710 Copy(hex_string, d, output_length, char); 3711 d += output_length; 3712 3713 /* For each subsequent character, append dot and 3714 * its ordinal in hex */ 3715 while ((str += char_length) < str_end) { 3716 const STRLEN off = d - SvPVX_const(sv); 3717 U32 uv = utf8n_to_uvchr((U8 *) str, 3718 str_end - str, 3719 &char_length, 3720 UTF8_ALLOW_ANYUV); 3721 output_length = 3722 my_snprintf(hex_string, 3723 sizeof(hex_string), 3724 ".%X", 3725 (unsigned int) uv); 3726 3727 d = off + SvGROW(sv, off 3728 + output_length 3729 + (STRLEN)(send - e) 3730 + 2); /* '}' + NUL */ 3731 Copy(hex_string, d, output_length, char); 3732 d += output_length; 3733 } 3734 } 3735 3736 *d++ = '}'; /* Done. Add the trailing brace */ 3737 } 3738 } 3739 else { /* Here, not in a pattern. Convert the name to a 3740 * string. */ 3741 3742 /* If destination is not in utf8, unconditionally 3743 * recode it to be so. This is because \N{} implies 3744 * Unicode semantics, and scalars have to be in utf8 3745 * to guarantee those semantics */ 3746 if (! has_utf8) { 3747 SvCUR_set(sv, d - SvPVX_const(sv)); 3748 SvPOK_on(sv); 3749 *d = '\0'; 3750 /* See Note on sizing above. */ 3751 sv_utf8_upgrade_flags_grow(sv, 3752 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3753 len + (STRLEN)(send - s) + 1); 3754 d = SvPVX(sv) + SvCUR(sv); 3755 has_utf8 = TRUE; 3756 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ 3757 3758 /* See Note on sizing above. (NOTE: SvCUR() is not 3759 * set correctly here). */ 3760 const STRLEN off = d - SvPVX_const(sv); 3761 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1); 3762 } 3763 Copy(str, d, len, char); 3764 d += len; 3765 } 3766 3767 SvREFCNT_dec(res); 3768 3769 } /* End \N{NAME} */ 3770 #ifdef EBCDIC 3771 if (!dorange) 3772 native_range = FALSE; /* \N{} is defined to be Unicode */ 3773 #endif 3774 s = e + 1; /* Point to just after the '}' */ 3775 continue; 3776 3777 /* \c is a control character */ 3778 case 'c': 3779 s++; 3780 if (s < send) { 3781 *d++ = grok_bslash_c(*s++, 1); 3782 } 3783 else { 3784 yyerror("Missing control char name in \\c"); 3785 } 3786 continue; 3787 3788 /* printf-style backslashes, formfeeds, newlines, etc */ 3789 case 'b': 3790 *d++ = '\b'; 3791 break; 3792 case 'n': 3793 *d++ = '\n'; 3794 break; 3795 case 'r': 3796 *d++ = '\r'; 3797 break; 3798 case 'f': 3799 *d++ = '\f'; 3800 break; 3801 case 't': 3802 *d++ = '\t'; 3803 break; 3804 case 'e': 3805 *d++ = ASCII_TO_NATIVE('\033'); 3806 break; 3807 case 'a': 3808 *d++ = '\a'; 3809 break; 3810 } /* end switch */ 3811 3812 s++; 3813 continue; 3814 } /* end if (backslash) */ 3815 #ifdef EBCDIC 3816 else 3817 literal_endpoint++; 3818 #endif 3819 3820 default_action: 3821 /* If we started with encoded form, or already know we want it, 3822 then encode the next character */ 3823 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) { 3824 STRLEN len = 1; 3825 3826 3827 /* One might think that it is wasted effort in the case of the 3828 * source being utf8 (this_utf8 == TRUE) to take the next character 3829 * in the source, convert it to an unsigned value, and then convert 3830 * it back again. But the source has not been validated here. The 3831 * routine that does the conversion checks for errors like 3832 * malformed utf8 */ 3833 3834 const UV nextuv = (this_utf8) 3835 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) 3836 : (UV) ((U8) *s); 3837 const STRLEN need = UNISKIP(nextuv); 3838 if (!has_utf8) { 3839 SvCUR_set(sv, d - SvPVX_const(sv)); 3840 SvPOK_on(sv); 3841 *d = '\0'; 3842 /* See Note on sizing above. */ 3843 sv_utf8_upgrade_flags_grow(sv, 3844 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3845 need + (STRLEN)(send - s) + 1); 3846 d = SvPVX(sv) + SvCUR(sv); 3847 has_utf8 = TRUE; 3848 } else if (need > len) { 3849 /* encoded value larger than old, may need extra space (NOTE: 3850 * SvCUR() is not set correctly here). See Note on sizing 3851 * above. */ 3852 const STRLEN off = d - SvPVX_const(sv); 3853 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off; 3854 } 3855 s += len; 3856 3857 d = (char*)uvchr_to_utf8((U8*)d, nextuv); 3858 #ifdef EBCDIC 3859 if (uv > 255 && !dorange) 3860 native_range = FALSE; 3861 #endif 3862 } 3863 else { 3864 *d++ = *s++; 3865 } 3866 } /* while loop to process each character */ 3867 3868 /* terminate the string and set up the sv */ 3869 *d = '\0'; 3870 SvCUR_set(sv, d - SvPVX_const(sv)); 3871 if (SvCUR(sv) >= SvLEN(sv)) 3872 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf 3873 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv)); 3874 3875 SvPOK_on(sv); 3876 if (PL_encoding && !has_utf8) { 3877 sv_recode_to_utf8(sv, PL_encoding); 3878 if (SvUTF8(sv)) 3879 has_utf8 = TRUE; 3880 } 3881 if (has_utf8) { 3882 SvUTF8_on(sv); 3883 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { 3884 PL_sublex_info.sub_op->op_private |= 3885 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); 3886 } 3887 } 3888 3889 /* shrink the sv if we allocated more than we used */ 3890 if (SvCUR(sv) + 5 < SvLEN(sv)) { 3891 SvPV_shrink_to_cur(sv); 3892 } 3893 3894 /* return the substring (via pl_yylval) only if we parsed anything */ 3895 if (s > start) { 3896 char *s2 = start; 3897 for (; s2 < s; s2++) { 3898 if (*s2 == '\n') 3899 COPLINE_INC_WITH_HERELINES; 3900 } 3901 SvREFCNT_inc_simple_void_NN(sv); 3902 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) 3903 && ! PL_parser->lex_re_reparsing) 3904 { 3905 const char *const key = PL_lex_inpat ? "qr" : "q"; 3906 const STRLEN keylen = PL_lex_inpat ? 2 : 1; 3907 const char *type; 3908 STRLEN typelen; 3909 3910 if (PL_lex_inwhat == OP_TRANS) { 3911 type = "tr"; 3912 typelen = 2; 3913 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) { 3914 type = "s"; 3915 typelen = 1; 3916 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') { 3917 type = "q"; 3918 typelen = 1; 3919 } else { 3920 type = "qq"; 3921 typelen = 2; 3922 } 3923 3924 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL, 3925 type, typelen); 3926 } 3927 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 3928 } 3929 LEAVE_with_name("scan_const"); 3930 return s; 3931 } 3932 3933 /* S_intuit_more 3934 * Returns TRUE if there's more to the expression (e.g., a subscript), 3935 * FALSE otherwise. 3936 * 3937 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/ 3938 * 3939 * ->[ and ->{ return TRUE 3940 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled 3941 * { and [ outside a pattern are always subscripts, so return TRUE 3942 * if we're outside a pattern and it's not { or [, then return FALSE 3943 * if we're in a pattern and the first char is a { 3944 * {4,5} (any digits around the comma) returns FALSE 3945 * if we're in a pattern and the first char is a [ 3946 * [] returns FALSE 3947 * [SOMETHING] has a funky algorithm to decide whether it's a 3948 * character class or not. It has to deal with things like 3949 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/ 3950 * anything else returns TRUE 3951 */ 3952 3953 /* This is the one truly awful dwimmer necessary to conflate C and sed. */ 3954 3955 STATIC int 3956 S_intuit_more(pTHX_ char *s) 3957 { 3958 dVAR; 3959 3960 PERL_ARGS_ASSERT_INTUIT_MORE; 3961 3962 if (PL_lex_brackets) 3963 return TRUE; 3964 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{')) 3965 return TRUE; 3966 if (*s == '-' && s[1] == '>' 3967 && FEATURE_POSTDEREF_QQ_IS_ENABLED 3968 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*'))) 3969 ||(s[2] == '@' && strchr("*[{",s[3])) )) 3970 return TRUE; 3971 if (*s != '{' && *s != '[') 3972 return FALSE; 3973 if (!PL_lex_inpat) 3974 return TRUE; 3975 3976 /* In a pattern, so maybe we have {n,m}. */ 3977 if (*s == '{') { 3978 if (regcurly(s, FALSE)) { 3979 return FALSE; 3980 } 3981 return TRUE; 3982 } 3983 3984 /* On the other hand, maybe we have a character class */ 3985 3986 s++; 3987 if (*s == ']' || *s == '^') 3988 return FALSE; 3989 else { 3990 /* this is terrifying, and it works */ 3991 int weight; 3992 char seen[256]; 3993 const char * const send = strchr(s,']'); 3994 unsigned char un_char, last_un_char; 3995 char tmpbuf[sizeof PL_tokenbuf * 4]; 3996 3997 if (!send) /* has to be an expression */ 3998 return TRUE; 3999 weight = 2; /* let's weigh the evidence */ 4000 4001 if (*s == '$') 4002 weight -= 3; 4003 else if (isDIGIT(*s)) { 4004 if (s[1] != ']') { 4005 if (isDIGIT(s[1]) && s[2] == ']') 4006 weight -= 10; 4007 } 4008 else 4009 weight -= 100; 4010 } 4011 Zero(seen,256,char); 4012 un_char = 255; 4013 for (; s < send; s++) { 4014 last_un_char = un_char; 4015 un_char = (unsigned char)*s; 4016 switch (*s) { 4017 case '@': 4018 case '&': 4019 case '$': 4020 weight -= seen[un_char] * 10; 4021 if (isWORDCHAR_lazy_if(s+1,UTF)) { 4022 int len; 4023 char *tmp = PL_bufend; 4024 PL_bufend = (char*)send; 4025 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); 4026 PL_bufend = tmp; 4027 len = (int)strlen(tmpbuf); 4028 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 4029 UTF ? SVf_UTF8 : 0, SVt_PV)) 4030 weight -= 100; 4031 else 4032 weight -= 10; 4033 } 4034 else if (*s == '$' && s[1] && 4035 strchr("[#!%*<>()-=",s[1])) { 4036 if (/*{*/ strchr("])} =",s[2])) 4037 weight -= 10; 4038 else 4039 weight -= 1; 4040 } 4041 break; 4042 case '\\': 4043 un_char = 254; 4044 if (s[1]) { 4045 if (strchr("wds]",s[1])) 4046 weight += 100; 4047 else if (seen[(U8)'\''] || seen[(U8)'"']) 4048 weight += 1; 4049 else if (strchr("rnftbxcav",s[1])) 4050 weight += 40; 4051 else if (isDIGIT(s[1])) { 4052 weight += 40; 4053 while (s[1] && isDIGIT(s[1])) 4054 s++; 4055 } 4056 } 4057 else 4058 weight += 100; 4059 break; 4060 case '-': 4061 if (s[1] == '\\') 4062 weight += 50; 4063 if (strchr("aA01! ",last_un_char)) 4064 weight += 30; 4065 if (strchr("zZ79~",s[1])) 4066 weight += 30; 4067 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$')) 4068 weight -= 5; /* cope with negative subscript */ 4069 break; 4070 default: 4071 if (!isWORDCHAR(last_un_char) 4072 && !(last_un_char == '$' || last_un_char == '@' 4073 || last_un_char == '&') 4074 && isALPHA(*s) && s[1] && isALPHA(s[1])) { 4075 char *d = s; 4076 while (isALPHA(*s)) 4077 s++; 4078 if (keyword(d, s - d, 0)) 4079 weight -= 150; 4080 } 4081 if (un_char == last_un_char + 1) 4082 weight += 5; 4083 weight -= seen[un_char]; 4084 break; 4085 } 4086 seen[un_char]++; 4087 } 4088 if (weight >= 0) /* probably a character class */ 4089 return FALSE; 4090 } 4091 4092 return TRUE; 4093 } 4094 4095 /* 4096 * S_intuit_method 4097 * 4098 * Does all the checking to disambiguate 4099 * foo bar 4100 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise 4101 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args). 4102 * 4103 * First argument is the stuff after the first token, e.g. "bar". 4104 * 4105 * Not a method if foo is a filehandle. 4106 * Not a method if foo is a subroutine prototyped to take a filehandle. 4107 * Not a method if it's really "Foo $bar" 4108 * Method if it's "foo $bar" 4109 * Not a method if it's really "print foo $bar" 4110 * Method if it's really "foo package::" (interpreted as package->foo) 4111 * Not a method if bar is known to be a subroutine ("sub bar; foo bar") 4112 * Not a method if bar is a filehandle or package, but is quoted with 4113 * => 4114 */ 4115 4116 STATIC int 4117 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) 4118 { 4119 dVAR; 4120 char *s = start + (*start == '$'); 4121 char tmpbuf[sizeof PL_tokenbuf]; 4122 STRLEN len; 4123 GV* indirgv; 4124 #ifdef PERL_MAD 4125 int soff; 4126 #endif 4127 4128 PERL_ARGS_ASSERT_INTUIT_METHOD; 4129 4130 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv)) 4131 return 0; 4132 if (cv && SvPOK(cv)) { 4133 const char *proto = CvPROTO(cv); 4134 if (proto) { 4135 while (*proto && (isSPACE(*proto) || *proto == ';')) 4136 proto++; 4137 if (*proto == '*') 4138 return 0; 4139 } 4140 } 4141 4142 if (*start == '$') { 4143 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY || 4144 isUPPER(*PL_tokenbuf)) 4145 return 0; 4146 #ifdef PERL_MAD 4147 len = start - SvPVX(PL_linestr); 4148 #endif 4149 s = PEEKSPACE(s); 4150 #ifdef PERL_MAD 4151 start = SvPVX(PL_linestr) + len; 4152 #endif 4153 PL_bufptr = start; 4154 PL_expect = XREF; 4155 return *s == '(' ? FUNCMETH : METHOD; 4156 } 4157 4158 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 4159 /* start is the beginning of the possible filehandle/object, 4160 * and s is the end of it 4161 * tmpbuf is a copy of it (but with single quotes as double colons) 4162 */ 4163 4164 if (!keyword(tmpbuf, len, 0)) { 4165 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { 4166 len -= 2; 4167 tmpbuf[len] = '\0'; 4168 #ifdef PERL_MAD 4169 soff = s - SvPVX(PL_linestr); 4170 #endif 4171 goto bare_package; 4172 } 4173 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); 4174 if (indirgv && GvCVu(indirgv)) 4175 return 0; 4176 /* filehandle or package name makes it a method */ 4177 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { 4178 #ifdef PERL_MAD 4179 soff = s - SvPVX(PL_linestr); 4180 #endif 4181 s = PEEKSPACE(s); 4182 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') 4183 return 0; /* no assumptions -- "=>" quotes bareword */ 4184 bare_package: 4185 start_force(PL_curforce); 4186 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, 4187 S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); 4188 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; 4189 if (PL_madskills) 4190 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start, 4191 ( UTF ? SVf_UTF8 : 0 ))); 4192 PL_expect = XTERM; 4193 force_next(WORD); 4194 PL_bufptr = s; 4195 #ifdef PERL_MAD 4196 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */ 4197 #endif 4198 return *s == '(' ? FUNCMETH : METHOD; 4199 } 4200 } 4201 return 0; 4202 } 4203 4204 /* Encoded script support. filter_add() effectively inserts a 4205 * 'pre-processing' function into the current source input stream. 4206 * Note that the filter function only applies to the current source file 4207 * (e.g., it will not affect files 'require'd or 'use'd by this one). 4208 * 4209 * The datasv parameter (which may be NULL) can be used to pass 4210 * private data to this instance of the filter. The filter function 4211 * can recover the SV using the FILTER_DATA macro and use it to 4212 * store private buffers and state information. 4213 * 4214 * The supplied datasv parameter is upgraded to a PVIO type 4215 * and the IoDIRP/IoANY field is used to store the function pointer, 4216 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such. 4217 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for 4218 * private use must be set using malloc'd pointers. 4219 */ 4220 4221 SV * 4222 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) 4223 { 4224 dVAR; 4225 if (!funcp) 4226 return NULL; 4227 4228 if (!PL_parser) 4229 return NULL; 4230 4231 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) 4232 Perl_croak(aTHX_ "Source filters apply only to byte streams"); 4233 4234 if (!PL_rsfp_filters) 4235 PL_rsfp_filters = newAV(); 4236 if (!datasv) 4237 datasv = newSV(0); 4238 SvUPGRADE(datasv, SVt_PVIO); 4239 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */ 4240 IoFLAGS(datasv) |= IOf_FAKE_DIRP; 4241 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n", 4242 FPTR2DPTR(void *, IoANY(datasv)), 4243 SvPV_nolen(datasv))); 4244 av_unshift(PL_rsfp_filters, 1); 4245 av_store(PL_rsfp_filters, 0, datasv) ; 4246 if ( 4247 !PL_parser->filtered 4248 && PL_parser->lex_flags & LEX_EVALBYTES 4249 && PL_bufptr < PL_bufend 4250 ) { 4251 const char *s = PL_bufptr; 4252 while (s < PL_bufend) { 4253 if (*s == '\n') { 4254 SV *linestr = PL_parser->linestr; 4255 char *buf = SvPVX(linestr); 4256 STRLEN const bufptr_pos = PL_parser->bufptr - buf; 4257 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf; 4258 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf; 4259 STRLEN const linestart_pos = PL_parser->linestart - buf; 4260 STRLEN const last_uni_pos = 4261 PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 4262 STRLEN const last_lop_pos = 4263 PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 4264 av_push(PL_rsfp_filters, linestr); 4265 PL_parser->linestr = 4266 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr)); 4267 buf = SvPVX(PL_parser->linestr); 4268 PL_parser->bufend = buf + SvCUR(PL_parser->linestr); 4269 PL_parser->bufptr = buf + bufptr_pos; 4270 PL_parser->oldbufptr = buf + oldbufptr_pos; 4271 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 4272 PL_parser->linestart = buf + linestart_pos; 4273 if (PL_parser->last_uni) 4274 PL_parser->last_uni = buf + last_uni_pos; 4275 if (PL_parser->last_lop) 4276 PL_parser->last_lop = buf + last_lop_pos; 4277 SvLEN(linestr) = SvCUR(linestr); 4278 SvCUR(linestr) = s-SvPVX(linestr); 4279 PL_parser->filtered = 1; 4280 break; 4281 } 4282 s++; 4283 } 4284 } 4285 return(datasv); 4286 } 4287 4288 4289 /* Delete most recently added instance of this filter function. */ 4290 void 4291 Perl_filter_del(pTHX_ filter_t funcp) 4292 { 4293 dVAR; 4294 SV *datasv; 4295 4296 PERL_ARGS_ASSERT_FILTER_DEL; 4297 4298 #ifdef DEBUGGING 4299 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", 4300 FPTR2DPTR(void*, funcp))); 4301 #endif 4302 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0) 4303 return; 4304 /* if filter is on top of stack (usual case) just pop it off */ 4305 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters)); 4306 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) { 4307 sv_free(av_pop(PL_rsfp_filters)); 4308 4309 return; 4310 } 4311 /* we need to search for the correct entry and clear it */ 4312 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)"); 4313 } 4314 4315 4316 /* Invoke the idxth filter function for the current rsfp. */ 4317 /* maxlen 0 = read one text line */ 4318 I32 4319 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) 4320 { 4321 dVAR; 4322 filter_t funcp; 4323 SV *datasv = NULL; 4324 /* This API is bad. It should have been using unsigned int for maxlen. 4325 Not sure if we want to change the API, but if not we should sanity 4326 check the value here. */ 4327 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen; 4328 4329 PERL_ARGS_ASSERT_FILTER_READ; 4330 4331 if (!PL_parser || !PL_rsfp_filters) 4332 return -1; 4333 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */ 4334 /* Provide a default input filter to make life easy. */ 4335 /* Note that we append to the line. This is handy. */ 4336 DEBUG_P(PerlIO_printf(Perl_debug_log, 4337 "filter_read %d: from rsfp\n", idx)); 4338 if (correct_length) { 4339 /* Want a block */ 4340 int len ; 4341 const int old_len = SvCUR(buf_sv); 4342 4343 /* ensure buf_sv is large enough */ 4344 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; 4345 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, 4346 correct_length)) <= 0) { 4347 if (PerlIO_error(PL_rsfp)) 4348 return -1; /* error */ 4349 else 4350 return 0 ; /* end of file */ 4351 } 4352 SvCUR_set(buf_sv, old_len + len) ; 4353 SvPVX(buf_sv)[old_len + len] = '\0'; 4354 } else { 4355 /* Want a line */ 4356 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { 4357 if (PerlIO_error(PL_rsfp)) 4358 return -1; /* error */ 4359 else 4360 return 0 ; /* end of file */ 4361 } 4362 } 4363 return SvCUR(buf_sv); 4364 } 4365 /* Skip this filter slot if filter has been deleted */ 4366 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) { 4367 DEBUG_P(PerlIO_printf(Perl_debug_log, 4368 "filter_read %d: skipped (filter deleted)\n", 4369 idx)); 4370 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */ 4371 } 4372 if (SvTYPE(datasv) != SVt_PVIO) { 4373 if (correct_length) { 4374 /* Want a block */ 4375 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv); 4376 if (!remainder) return 0; /* eof */ 4377 if (correct_length > remainder) correct_length = remainder; 4378 sv_catpvn(buf_sv, SvEND(datasv), correct_length); 4379 SvCUR_set(datasv, SvCUR(datasv) + correct_length); 4380 } else { 4381 /* Want a line */ 4382 const char *s = SvEND(datasv); 4383 const char *send = SvPVX(datasv) + SvLEN(datasv); 4384 while (s < send) { 4385 if (*s == '\n') { 4386 s++; 4387 break; 4388 } 4389 s++; 4390 } 4391 if (s == send) return 0; /* eof */ 4392 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv)); 4393 SvCUR_set(datasv, s-SvPVX(datasv)); 4394 } 4395 return SvCUR(buf_sv); 4396 } 4397 /* Get function pointer hidden within datasv */ 4398 funcp = DPTR2FPTR(filter_t, IoANY(datasv)); 4399 DEBUG_P(PerlIO_printf(Perl_debug_log, 4400 "filter_read %d: via function %p (%s)\n", 4401 idx, (void*)datasv, SvPV_nolen_const(datasv))); 4402 /* Call function. The function is expected to */ 4403 /* call "FILTER_READ(idx+1, buf_sv)" first. */ 4404 /* Return: <0:error, =0:eof, >0:not eof */ 4405 return (*funcp)(aTHX_ idx, buf_sv, correct_length); 4406 } 4407 4408 STATIC char * 4409 S_filter_gets(pTHX_ SV *sv, STRLEN append) 4410 { 4411 dVAR; 4412 4413 PERL_ARGS_ASSERT_FILTER_GETS; 4414 4415 #ifdef PERL_CR_FILTER 4416 if (!PL_rsfp_filters) { 4417 filter_add(S_cr_textfilter,NULL); 4418 } 4419 #endif 4420 if (PL_rsfp_filters) { 4421 if (!append) 4422 SvCUR_set(sv, 0); /* start with empty line */ 4423 if (FILTER_READ(0, sv, 0) > 0) 4424 return ( SvPVX(sv) ) ; 4425 else 4426 return NULL ; 4427 } 4428 else 4429 return (sv_gets(sv, PL_rsfp, append)); 4430 } 4431 4432 STATIC HV * 4433 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) 4434 { 4435 dVAR; 4436 GV *gv; 4437 4438 PERL_ARGS_ASSERT_FIND_IN_MY_STASH; 4439 4440 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__")) 4441 return PL_curstash; 4442 4443 if (len > 2 && 4444 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') && 4445 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV))) 4446 { 4447 return GvHV(gv); /* Foo:: */ 4448 } 4449 4450 /* use constant CLASS => 'MyClass' */ 4451 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV); 4452 if (gv && GvCV(gv)) { 4453 SV * const sv = cv_const_sv(GvCV(gv)); 4454 if (sv) 4455 pkgname = SvPV_const(sv, len); 4456 } 4457 4458 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0); 4459 } 4460 4461 #ifdef PERL_MAD 4462 /* 4463 * Perl_madlex 4464 * The intent of this yylex wrapper is to minimize the changes to the 4465 * tokener when we aren't interested in collecting madprops. It remains 4466 * to be seen how successful this strategy will be... 4467 */ 4468 4469 int 4470 Perl_madlex(pTHX) 4471 { 4472 int optype; 4473 char *s = PL_bufptr; 4474 4475 /* make sure PL_thiswhite is initialized */ 4476 PL_thiswhite = 0; 4477 PL_thismad = 0; 4478 4479 /* previous token ate up our whitespace? */ 4480 if (!PL_lasttoke && PL_nextwhite) { 4481 PL_thiswhite = PL_nextwhite; 4482 PL_nextwhite = 0; 4483 } 4484 4485 /* isolate the token, and figure out where it is without whitespace */ 4486 PL_realtokenstart = -1; 4487 PL_thistoken = 0; 4488 optype = yylex(); 4489 s = PL_bufptr; 4490 assert(PL_curforce < 0); 4491 4492 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */ 4493 if (!PL_thistoken) { 4494 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop)) 4495 PL_thistoken = newSVpvs(""); 4496 else { 4497 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart; 4498 PL_thistoken = newSVpvn(tstart, s - tstart); 4499 } 4500 } 4501 if (PL_thismad) /* install head */ 4502 CURMAD('X', PL_thistoken); 4503 } 4504 4505 /* last whitespace of a sublex? */ 4506 if (optype == ')' && PL_endwhite) { 4507 CURMAD('X', PL_endwhite); 4508 } 4509 4510 if (!PL_thismad) { 4511 4512 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */ 4513 if (!PL_thiswhite && !PL_endwhite && !optype) { 4514 sv_free(PL_thistoken); 4515 PL_thistoken = 0; 4516 return 0; 4517 } 4518 4519 /* put off final whitespace till peg */ 4520 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) { 4521 PL_nextwhite = PL_thiswhite; 4522 PL_thiswhite = 0; 4523 } 4524 else if (PL_thisopen) { 4525 CURMAD('q', PL_thisopen); 4526 if (PL_thistoken) 4527 sv_free(PL_thistoken); 4528 PL_thistoken = 0; 4529 } 4530 else { 4531 /* Store actual token text as madprop X */ 4532 CURMAD('X', PL_thistoken); 4533 } 4534 4535 if (PL_thiswhite) { 4536 /* add preceding whitespace as madprop _ */ 4537 CURMAD('_', PL_thiswhite); 4538 } 4539 4540 if (PL_thisstuff) { 4541 /* add quoted material as madprop = */ 4542 CURMAD('=', PL_thisstuff); 4543 } 4544 4545 if (PL_thisclose) { 4546 /* add terminating quote as madprop Q */ 4547 CURMAD('Q', PL_thisclose); 4548 } 4549 } 4550 4551 /* special processing based on optype */ 4552 4553 switch (optype) { 4554 4555 /* opval doesn't need a TOKEN since it can already store mp */ 4556 case WORD: 4557 case METHOD: 4558 case FUNCMETH: 4559 case THING: 4560 case PMFUNC: 4561 case PRIVATEREF: 4562 case FUNC0SUB: 4563 case UNIOPSUB: 4564 case LSTOPSUB: 4565 if (pl_yylval.opval) 4566 append_madprops(PL_thismad, pl_yylval.opval, 0); 4567 PL_thismad = 0; 4568 return optype; 4569 4570 /* fake EOF */ 4571 case 0: 4572 optype = PEG; 4573 if (PL_endwhite) { 4574 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0); 4575 PL_endwhite = 0; 4576 } 4577 break; 4578 4579 /* pval */ 4580 case LABEL: 4581 break; 4582 4583 case ']': 4584 case '}': 4585 if (PL_faketokens) 4586 break; 4587 /* remember any fake bracket that lexer is about to discard */ 4588 if (PL_lex_brackets == 1 && 4589 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK)) 4590 { 4591 s = PL_bufptr; 4592 while (s < PL_bufend && (*s == ' ' || *s == '\t')) 4593 s++; 4594 if (*s == '}') { 4595 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr); 4596 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0); 4597 PL_thiswhite = 0; 4598 PL_bufptr = s - 1; 4599 break; /* don't bother looking for trailing comment */ 4600 } 4601 else 4602 s = PL_bufptr; 4603 } 4604 if (optype == ']') 4605 break; 4606 /* FALLTHROUGH */ 4607 4608 /* attach a trailing comment to its statement instead of next token */ 4609 case ';': 4610 if (PL_faketokens) 4611 break; 4612 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) { 4613 s = PL_bufptr; 4614 while (s < PL_bufend && (*s == ' ' || *s == '\t')) 4615 s++; 4616 if (*s == '\n' || *s == '#') { 4617 while (s < PL_bufend && *s != '\n') 4618 s++; 4619 if (s < PL_bufend) 4620 s++; 4621 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr); 4622 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0); 4623 PL_thiswhite = 0; 4624 PL_bufptr = s; 4625 } 4626 } 4627 break; 4628 4629 /* ival */ 4630 default: 4631 break; 4632 4633 } 4634 4635 /* Create new token struct. Note: opvals return early above. */ 4636 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad); 4637 PL_thismad = 0; 4638 return optype; 4639 } 4640 #endif 4641 4642 STATIC char * 4643 S_tokenize_use(pTHX_ int is_use, char *s) { 4644 dVAR; 4645 4646 PERL_ARGS_ASSERT_TOKENIZE_USE; 4647 4648 if (PL_expect != XSTATE) 4649 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", 4650 is_use ? "use" : "no")); 4651 PL_expect = XTERM; 4652 s = SKIPSPACE1(s); 4653 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { 4654 s = force_version(s, TRUE); 4655 if (*s == ';' || *s == '}' 4656 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) { 4657 start_force(PL_curforce); 4658 NEXTVAL_NEXTTOKE.opval = NULL; 4659 force_next(WORD); 4660 } 4661 else if (*s == 'v') { 4662 s = force_word(s,WORD,FALSE,TRUE); 4663 s = force_version(s, FALSE); 4664 } 4665 } 4666 else { 4667 s = force_word(s,WORD,FALSE,TRUE); 4668 s = force_version(s, FALSE); 4669 } 4670 pl_yylval.ival = is_use; 4671 return s; 4672 } 4673 #ifdef DEBUGGING 4674 static const char* const exp_name[] = 4675 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", 4676 "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR" 4677 }; 4678 #endif 4679 4680 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l) 4681 STATIC bool 4682 S_word_takes_any_delimeter(char *p, STRLEN len) 4683 { 4684 return (len == 1 && strchr("msyq", p[0])) || 4685 (len == 2 && ( 4686 (p[0] == 't' && p[1] == 'r') || 4687 (p[0] == 'q' && strchr("qwxr", p[1])))); 4688 } 4689 4690 static void 4691 S_check_scalar_slice(pTHX_ char *s) 4692 { 4693 s++; 4694 while (*s == ' ' || *s == '\t') s++; 4695 if (*s == 'q' && s[1] == 'w' 4696 && !isWORDCHAR_lazy_if(s+2,UTF)) 4697 return; 4698 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s))) 4699 s += UTF ? UTF8SKIP(s) : 1; 4700 if (*s == '}' || *s == ']') 4701 pl_yylval.ival = OPpSLICEWARNING; 4702 } 4703 4704 /* 4705 yylex 4706 4707 Works out what to call the token just pulled out of the input 4708 stream. The yacc parser takes care of taking the ops we return and 4709 stitching them into a tree. 4710 4711 Returns: 4712 The type of the next token 4713 4714 Structure: 4715 Switch based on the current state: 4716 - if we already built the token before, use it 4717 - if we have a case modifier in a string, deal with that 4718 - handle other cases of interpolation inside a string 4719 - scan the next line if we are inside a format 4720 In the normal state switch on the next character: 4721 - default: 4722 if alphabetic, go to key lookup 4723 unrecoginized character - croak 4724 - 0/4/26: handle end-of-line or EOF 4725 - cases for whitespace 4726 - \n and #: handle comments and line numbers 4727 - various operators, brackets and sigils 4728 - numbers 4729 - quotes 4730 - 'v': vstrings (or go to key lookup) 4731 - 'x' repetition operator (or go to key lookup) 4732 - other ASCII alphanumerics (key lookup begins here): 4733 word before => ? 4734 keyword plugin 4735 scan built-in keyword (but do nothing with it yet) 4736 check for statement label 4737 check for lexical subs 4738 goto just_a_word if there is one 4739 see whether built-in keyword is overridden 4740 switch on keyword number: 4741 - default: just_a_word: 4742 not a built-in keyword; handle bareword lookup 4743 disambiguate between method and sub call 4744 fall back to bareword 4745 - cases for built-in keywords 4746 */ 4747 4748 4749 int 4750 Perl_yylex(pTHX) 4751 { 4752 dVAR; 4753 char *s = PL_bufptr; 4754 char *d; 4755 STRLEN len; 4756 bool bof = FALSE; 4757 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil); 4758 U8 formbrack = 0; 4759 U32 fake_eof = 0; 4760 4761 /* orig_keyword, gvp, and gv are initialized here because 4762 * jump to the label just_a_word_zero can bypass their 4763 * initialization later. */ 4764 I32 orig_keyword = 0; 4765 GV *gv = NULL; 4766 GV **gvp = NULL; 4767 4768 DEBUG_T( { 4769 SV* tmp = newSVpvs(""); 4770 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n", 4771 (IV)CopLINE(PL_curcop), 4772 lex_state_names[PL_lex_state], 4773 exp_name[PL_expect], 4774 pv_display(tmp, s, strlen(s), 0, 60)); 4775 SvREFCNT_dec(tmp); 4776 } ); 4777 4778 switch (PL_lex_state) { 4779 case LEX_NORMAL: 4780 case LEX_INTERPNORMAL: 4781 break; 4782 4783 /* when we've already built the next token, just pull it out of the queue */ 4784 case LEX_KNOWNEXT: 4785 #ifdef PERL_MAD 4786 PL_lasttoke--; 4787 pl_yylval = PL_nexttoke[PL_lasttoke].next_val; 4788 if (PL_madskills) { 4789 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad; 4790 PL_nexttoke[PL_lasttoke].next_mad = 0; 4791 if (PL_thismad && PL_thismad->mad_key == '_') { 4792 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val); 4793 PL_thismad->mad_val = 0; 4794 mad_free(PL_thismad); 4795 PL_thismad = 0; 4796 } 4797 } 4798 if (!PL_lasttoke) { 4799 PL_lex_state = PL_lex_defer; 4800 PL_expect = PL_lex_expect; 4801 PL_lex_defer = LEX_NORMAL; 4802 if (!PL_nexttoke[PL_lasttoke].next_type) 4803 return yylex(); 4804 } 4805 #else 4806 PL_nexttoke--; 4807 pl_yylval = PL_nextval[PL_nexttoke]; 4808 if (!PL_nexttoke) { 4809 PL_lex_state = PL_lex_defer; 4810 PL_expect = PL_lex_expect; 4811 PL_lex_defer = LEX_NORMAL; 4812 } 4813 #endif 4814 { 4815 I32 next_type; 4816 #ifdef PERL_MAD 4817 next_type = PL_nexttoke[PL_lasttoke].next_type; 4818 #else 4819 next_type = PL_nexttype[PL_nexttoke]; 4820 #endif 4821 if (next_type & (7<<24)) { 4822 if (next_type & (1<<24)) { 4823 if (PL_lex_brackets > 100) 4824 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 4825 PL_lex_brackstack[PL_lex_brackets++] = 4826 (char) ((next_type >> 16) & 0xff); 4827 } 4828 if (next_type & (2<<24)) 4829 PL_lex_allbrackets++; 4830 if (next_type & (4<<24)) 4831 PL_lex_allbrackets--; 4832 next_type &= 0xffff; 4833 } 4834 return REPORT(next_type == 'p' ? pending_ident() : next_type); 4835 } 4836 4837 /* interpolated case modifiers like \L \U, including \Q and \E. 4838 when we get here, PL_bufptr is at the \ 4839 */ 4840 case LEX_INTERPCASEMOD: 4841 #ifdef DEBUGGING 4842 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\') 4843 Perl_croak(aTHX_ 4844 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u", 4845 PL_bufptr, PL_bufend, *PL_bufptr); 4846 #endif 4847 /* handle \E or end of string */ 4848 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') { 4849 /* if at a \E */ 4850 if (PL_lex_casemods) { 4851 const char oldmod = PL_lex_casestack[--PL_lex_casemods]; 4852 PL_lex_casestack[PL_lex_casemods] = '\0'; 4853 4854 if (PL_bufptr != PL_bufend 4855 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q' 4856 || oldmod == 'F')) { 4857 PL_bufptr += 2; 4858 PL_lex_state = LEX_INTERPCONCAT; 4859 #ifdef PERL_MAD 4860 if (PL_madskills) 4861 PL_thistoken = newSVpvs("\\E"); 4862 #endif 4863 } 4864 PL_lex_allbrackets--; 4865 return REPORT(')'); 4866 } 4867 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) { 4868 /* Got an unpaired \E */ 4869 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 4870 "Useless use of \\E"); 4871 } 4872 #ifdef PERL_MAD 4873 while (PL_bufptr != PL_bufend && 4874 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') { 4875 if (PL_madskills) { 4876 if (!PL_thiswhite) 4877 PL_thiswhite = newSVpvs(""); 4878 sv_catpvn(PL_thiswhite, PL_bufptr, 2); 4879 } 4880 PL_bufptr += 2; 4881 } 4882 #else 4883 if (PL_bufptr != PL_bufend) 4884 PL_bufptr += 2; 4885 #endif 4886 PL_lex_state = LEX_INTERPCONCAT; 4887 return yylex(); 4888 } 4889 else { 4890 DEBUG_T({ PerlIO_printf(Perl_debug_log, 4891 "### Saw case modifier\n"); }); 4892 s = PL_bufptr + 1; 4893 if (s[1] == '\\' && s[2] == 'E') { 4894 #ifdef PERL_MAD 4895 if (PL_madskills) { 4896 if (!PL_thiswhite) 4897 PL_thiswhite = newSVpvs(""); 4898 sv_catpvn(PL_thiswhite, PL_bufptr, 4); 4899 } 4900 #endif 4901 PL_bufptr = s + 3; 4902 PL_lex_state = LEX_INTERPCONCAT; 4903 return yylex(); 4904 } 4905 else { 4906 I32 tmp; 4907 if (!PL_madskills) /* when just compiling don't need correct */ 4908 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) 4909 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ 4910 if ((*s == 'L' || *s == 'U' || *s == 'F') && 4911 (strchr(PL_lex_casestack, 'L') 4912 || strchr(PL_lex_casestack, 'U') 4913 || strchr(PL_lex_casestack, 'F'))) { 4914 PL_lex_casestack[--PL_lex_casemods] = '\0'; 4915 PL_lex_allbrackets--; 4916 return REPORT(')'); 4917 } 4918 if (PL_lex_casemods > 10) 4919 Renew(PL_lex_casestack, PL_lex_casemods + 2, char); 4920 PL_lex_casestack[PL_lex_casemods++] = *s; 4921 PL_lex_casestack[PL_lex_casemods] = '\0'; 4922 PL_lex_state = LEX_INTERPCONCAT; 4923 start_force(PL_curforce); 4924 NEXTVAL_NEXTTOKE.ival = 0; 4925 force_next((2<<24)|'('); 4926 start_force(PL_curforce); 4927 if (*s == 'l') 4928 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST; 4929 else if (*s == 'u') 4930 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST; 4931 else if (*s == 'L') 4932 NEXTVAL_NEXTTOKE.ival = OP_LC; 4933 else if (*s == 'U') 4934 NEXTVAL_NEXTTOKE.ival = OP_UC; 4935 else if (*s == 'Q') 4936 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA; 4937 else if (*s == 'F') 4938 NEXTVAL_NEXTTOKE.ival = OP_FC; 4939 else 4940 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s); 4941 if (PL_madskills) { 4942 SV* const tmpsv = newSVpvs("\\ "); 4943 /* replace the space with the character we want to escape 4944 */ 4945 SvPVX(tmpsv)[1] = *s; 4946 curmad('_', tmpsv); 4947 } 4948 PL_bufptr = s + 1; 4949 } 4950 force_next(FUNC); 4951 if (PL_lex_starts) { 4952 s = PL_bufptr; 4953 PL_lex_starts = 0; 4954 #ifdef PERL_MAD 4955 if (PL_madskills) { 4956 if (PL_thistoken) 4957 sv_free(PL_thistoken); 4958 PL_thistoken = newSVpvs(""); 4959 } 4960 #endif 4961 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 4962 if (PL_lex_casemods == 1 && PL_lex_inpat) 4963 OPERATOR(','); 4964 else 4965 Aop(OP_CONCAT); 4966 } 4967 else 4968 return yylex(); 4969 } 4970 4971 case LEX_INTERPPUSH: 4972 return REPORT(sublex_push()); 4973 4974 case LEX_INTERPSTART: 4975 if (PL_bufptr == PL_bufend) 4976 return REPORT(sublex_done()); 4977 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log, 4978 "### Interpolated variable\n"); }); 4979 PL_expect = XTERM; 4980 /* for /@a/, we leave the joining for the regex engine to do 4981 * (unless we're within \Q etc) */ 4982 PL_lex_dojoin = (*PL_bufptr == '@' 4983 && (!PL_lex_inpat || PL_lex_casemods)); 4984 PL_lex_state = LEX_INTERPNORMAL; 4985 if (PL_lex_dojoin) { 4986 start_force(PL_curforce); 4987 NEXTVAL_NEXTTOKE.ival = 0; 4988 force_next(','); 4989 start_force(PL_curforce); 4990 force_ident("\"", '$'); 4991 start_force(PL_curforce); 4992 NEXTVAL_NEXTTOKE.ival = 0; 4993 force_next('$'); 4994 start_force(PL_curforce); 4995 NEXTVAL_NEXTTOKE.ival = 0; 4996 force_next((2<<24)|'('); 4997 start_force(PL_curforce); 4998 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */ 4999 force_next(FUNC); 5000 } 5001 /* Convert (?{...}) and friends to 'do {...}' */ 5002 if (PL_lex_inpat && *PL_bufptr == '(') { 5003 PL_parser->lex_shared->re_eval_start = PL_bufptr; 5004 PL_bufptr += 2; 5005 if (*PL_bufptr != '{') 5006 PL_bufptr++; 5007 start_force(PL_curforce); 5008 /* XXX probably need a CURMAD(something) here */ 5009 PL_expect = XTERMBLOCK; 5010 force_next(DO); 5011 } 5012 5013 if (PL_lex_starts++) { 5014 s = PL_bufptr; 5015 #ifdef PERL_MAD 5016 if (PL_madskills) { 5017 if (PL_thistoken) 5018 sv_free(PL_thistoken); 5019 PL_thistoken = newSVpvs(""); 5020 } 5021 #endif 5022 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 5023 if (!PL_lex_casemods && PL_lex_inpat) 5024 OPERATOR(','); 5025 else 5026 Aop(OP_CONCAT); 5027 } 5028 return yylex(); 5029 5030 case LEX_INTERPENDMAYBE: 5031 if (intuit_more(PL_bufptr)) { 5032 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ 5033 break; 5034 } 5035 /* FALL THROUGH */ 5036 5037 case LEX_INTERPEND: 5038 if (PL_lex_dojoin) { 5039 const U8 dojoin_was = PL_lex_dojoin; 5040 PL_lex_dojoin = FALSE; 5041 PL_lex_state = LEX_INTERPCONCAT; 5042 #ifdef PERL_MAD 5043 if (PL_madskills) { 5044 if (PL_thistoken) 5045 sv_free(PL_thistoken); 5046 PL_thistoken = newSVpvs(""); 5047 } 5048 #endif 5049 PL_lex_allbrackets--; 5050 return REPORT(dojoin_was == 1 ? ')' : POSTJOIN); 5051 } 5052 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl 5053 && SvEVALED(PL_lex_repl)) 5054 { 5055 if (PL_bufptr != PL_bufend) 5056 Perl_croak(aTHX_ "Bad evalled substitution pattern"); 5057 PL_lex_repl = NULL; 5058 } 5059 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets 5060 re_eval_str. If the here-doc body’s length equals the previous 5061 value of re_eval_start, re_eval_start will now be null. So 5062 check re_eval_str as well. */ 5063 if (PL_parser->lex_shared->re_eval_start 5064 || PL_parser->lex_shared->re_eval_str) { 5065 SV *sv; 5066 if (*PL_bufptr != ')') 5067 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'"); 5068 PL_bufptr++; 5069 /* having compiled a (?{..}) expression, return the original 5070 * text too, as a const */ 5071 if (PL_parser->lex_shared->re_eval_str) { 5072 sv = PL_parser->lex_shared->re_eval_str; 5073 PL_parser->lex_shared->re_eval_str = NULL; 5074 SvCUR_set(sv, 5075 PL_bufptr - PL_parser->lex_shared->re_eval_start); 5076 SvPV_shrink_to_cur(sv); 5077 } 5078 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start, 5079 PL_bufptr - PL_parser->lex_shared->re_eval_start); 5080 start_force(PL_curforce); 5081 /* XXX probably need a CURMAD(something) here */ 5082 NEXTVAL_NEXTTOKE.opval = 5083 (OP*)newSVOP(OP_CONST, 0, 5084 sv); 5085 force_next(THING); 5086 PL_parser->lex_shared->re_eval_start = NULL; 5087 PL_expect = XTERM; 5088 return REPORT(','); 5089 } 5090 5091 /* FALLTHROUGH */ 5092 case LEX_INTERPCONCAT: 5093 #ifdef DEBUGGING 5094 if (PL_lex_brackets) 5095 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld", 5096 (long) PL_lex_brackets); 5097 #endif 5098 if (PL_bufptr == PL_bufend) 5099 return REPORT(sublex_done()); 5100 5101 /* m'foo' still needs to be parsed for possible (?{...}) */ 5102 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) { 5103 SV *sv = newSVsv(PL_linestr); 5104 sv = tokeq(sv); 5105 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 5106 s = PL_bufend; 5107 } 5108 else { 5109 s = scan_const(PL_bufptr); 5110 if (*s == '\\') 5111 PL_lex_state = LEX_INTERPCASEMOD; 5112 else 5113 PL_lex_state = LEX_INTERPSTART; 5114 } 5115 5116 if (s != PL_bufptr) { 5117 start_force(PL_curforce); 5118 if (PL_madskills) { 5119 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr)); 5120 } 5121 NEXTVAL_NEXTTOKE = pl_yylval; 5122 PL_expect = XTERM; 5123 force_next(THING); 5124 if (PL_lex_starts++) { 5125 #ifdef PERL_MAD 5126 if (PL_madskills) { 5127 if (PL_thistoken) 5128 sv_free(PL_thistoken); 5129 PL_thistoken = newSVpvs(""); 5130 } 5131 #endif 5132 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 5133 if (!PL_lex_casemods && PL_lex_inpat) 5134 OPERATOR(','); 5135 else 5136 Aop(OP_CONCAT); 5137 } 5138 else { 5139 PL_bufptr = s; 5140 return yylex(); 5141 } 5142 } 5143 5144 return yylex(); 5145 case LEX_FORMLINE: 5146 s = scan_formline(PL_bufptr); 5147 if (!PL_lex_formbrack) 5148 { 5149 formbrack = 1; 5150 goto rightbracket; 5151 } 5152 PL_bufptr = s; 5153 return yylex(); 5154 } 5155 5156 /* We really do *not* want PL_linestr ever becoming a COW. */ 5157 assert (!SvIsCOW(PL_linestr)); 5158 s = PL_bufptr; 5159 PL_oldoldbufptr = PL_oldbufptr; 5160 PL_oldbufptr = s; 5161 PL_parser->saw_infix_sigil = 0; 5162 5163 retry: 5164 #ifdef PERL_MAD 5165 if (PL_thistoken) { 5166 sv_free(PL_thistoken); 5167 PL_thistoken = 0; 5168 } 5169 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */ 5170 #endif 5171 switch (*s) { 5172 default: 5173 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s)) 5174 goto keylookup; 5175 { 5176 SV *dsv = newSVpvs_flags("", SVs_TEMP); 5177 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s, 5178 UTF8SKIP(s), 5179 SVs_TEMP | SVf_UTF8), 5180 10, UNI_DISPLAY_ISPRINT) 5181 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s); 5182 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); 5183 if (len > UNRECOGNIZED_PRECEDE_COUNT) { 5184 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT; 5185 } else { 5186 d = PL_linestart; 5187 } 5188 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c, 5189 UTF8fARG(UTF, (s - d), d), 5190 (int) len + 1); 5191 } 5192 case 4: 5193 case 26: 5194 goto fake_eof; /* emulate EOF on ^D or ^Z */ 5195 case 0: 5196 #ifdef PERL_MAD 5197 if (PL_madskills) 5198 PL_faketokens = 0; 5199 #endif 5200 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) { 5201 PL_last_uni = 0; 5202 PL_last_lop = 0; 5203 if (PL_lex_brackets && 5204 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) { 5205 yyerror((const char *) 5206 (PL_lex_formbrack 5207 ? "Format not terminated" 5208 : "Missing right curly or square bracket")); 5209 } 5210 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5211 "### Tokener got EOF\n"); 5212 } ); 5213 TOKEN(0); 5214 } 5215 if (s++ < PL_bufend) 5216 goto retry; /* ignore stray nulls */ 5217 PL_last_uni = 0; 5218 PL_last_lop = 0; 5219 if (!PL_in_eval && !PL_preambled) { 5220 PL_preambled = TRUE; 5221 #ifdef PERL_MAD 5222 if (PL_madskills) 5223 PL_faketokens = 1; 5224 #endif 5225 if (PL_perldb) { 5226 /* Generate a string of Perl code to load the debugger. 5227 * If PERL5DB is set, it will return the contents of that, 5228 * otherwise a compile-time require of perl5db.pl. */ 5229 5230 const char * const pdb = PerlEnv_getenv("PERL5DB"); 5231 5232 if (pdb) { 5233 sv_setpv(PL_linestr, pdb); 5234 sv_catpvs(PL_linestr,";"); 5235 } else { 5236 SETERRNO(0,SS_NORMAL); 5237 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };"); 5238 } 5239 PL_parser->preambling = CopLINE(PL_curcop); 5240 } else 5241 sv_setpvs(PL_linestr,""); 5242 if (PL_preambleav) { 5243 SV **svp = AvARRAY(PL_preambleav); 5244 SV **const end = svp + AvFILLp(PL_preambleav); 5245 while(svp <= end) { 5246 sv_catsv(PL_linestr, *svp); 5247 ++svp; 5248 sv_catpvs(PL_linestr, ";"); 5249 } 5250 sv_free(MUTABLE_SV(PL_preambleav)); 5251 PL_preambleav = NULL; 5252 } 5253 if (PL_minus_E) 5254 sv_catpvs(PL_linestr, 5255 "use feature ':5." STRINGIFY(PERL_VERSION) "';"); 5256 if (PL_minus_n || PL_minus_p) { 5257 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/); 5258 if (PL_minus_l) 5259 sv_catpvs(PL_linestr,"chomp;"); 5260 if (PL_minus_a) { 5261 if (PL_minus_F) { 5262 if ((*PL_splitstr == '/' || *PL_splitstr == '\'' 5263 || *PL_splitstr == '"') 5264 && strchr(PL_splitstr + 1, *PL_splitstr)) 5265 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); 5266 else { 5267 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL 5268 bytes can be used as quoting characters. :-) */ 5269 const char *splits = PL_splitstr; 5270 sv_catpvs(PL_linestr, "our @F=split(q\0"); 5271 do { 5272 /* Need to \ \s */ 5273 if (*splits == '\\') 5274 sv_catpvn(PL_linestr, splits, 1); 5275 sv_catpvn(PL_linestr, splits, 1); 5276 } while (*splits++); 5277 /* This loop will embed the trailing NUL of 5278 PL_linestr as the last thing it does before 5279 terminating. */ 5280 sv_catpvs(PL_linestr, ");"); 5281 } 5282 } 5283 else 5284 sv_catpvs(PL_linestr,"our @F=split(' ');"); 5285 } 5286 } 5287 sv_catpvs(PL_linestr, "\n"); 5288 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 5289 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5290 PL_last_lop = PL_last_uni = NULL; 5291 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash) 5292 update_debugger_info(PL_linestr, NULL, 0); 5293 goto retry; 5294 } 5295 do { 5296 fake_eof = 0; 5297 bof = PL_rsfp ? TRUE : FALSE; 5298 if (0) { 5299 fake_eof: 5300 fake_eof = LEX_FAKE_EOF; 5301 } 5302 PL_bufptr = PL_bufend; 5303 COPLINE_INC_WITH_HERELINES; 5304 if (!lex_next_chunk(fake_eof)) { 5305 CopLINE_dec(PL_curcop); 5306 s = PL_bufptr; 5307 TOKEN(';'); /* not infinite loop because rsfp is NULL now */ 5308 } 5309 CopLINE_dec(PL_curcop); 5310 #ifdef PERL_MAD 5311 if (!PL_rsfp) 5312 PL_realtokenstart = -1; 5313 #endif 5314 s = PL_bufptr; 5315 /* If it looks like the start of a BOM or raw UTF-16, 5316 * check if it in fact is. */ 5317 if (bof && PL_rsfp && 5318 (*s == 0 || 5319 *(U8*)s == BOM_UTF8_FIRST_BYTE || 5320 *(U8*)s >= 0xFE || 5321 s[1] == 0)) { 5322 Off_t offset = (IV)PerlIO_tell(PL_rsfp); 5323 bof = (offset == (Off_t)SvCUR(PL_linestr)); 5324 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS) 5325 /* offset may include swallowed CR */ 5326 if (!bof) 5327 bof = (offset == (Off_t)SvCUR(PL_linestr)+1); 5328 #endif 5329 if (bof) { 5330 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5331 s = swallow_bom((U8*)s); 5332 } 5333 } 5334 if (PL_parser->in_pod) { 5335 /* Incest with pod. */ 5336 #ifdef PERL_MAD 5337 if (PL_madskills) 5338 sv_catsv(PL_thiswhite, PL_linestr); 5339 #endif 5340 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) { 5341 sv_setpvs(PL_linestr, ""); 5342 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 5343 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5344 PL_last_lop = PL_last_uni = NULL; 5345 PL_parser->in_pod = 0; 5346 } 5347 } 5348 if (PL_rsfp || PL_parser->filtered) 5349 incline(s); 5350 } while (PL_parser->in_pod); 5351 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; 5352 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5353 PL_last_lop = PL_last_uni = NULL; 5354 if (CopLINE(PL_curcop) == 1) { 5355 while (s < PL_bufend && isSPACE(*s)) 5356 s++; 5357 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */ 5358 s++; 5359 #ifdef PERL_MAD 5360 if (PL_madskills) 5361 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart); 5362 #endif 5363 d = NULL; 5364 if (!PL_in_eval) { 5365 if (*s == '#' && *(s+1) == '!') 5366 d = s + 2; 5367 #ifdef ALTERNATE_SHEBANG 5368 else { 5369 static char const as[] = ALTERNATE_SHEBANG; 5370 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) 5371 d = s + (sizeof(as) - 1); 5372 } 5373 #endif /* ALTERNATE_SHEBANG */ 5374 } 5375 if (d) { 5376 char *ipath; 5377 char *ipathend; 5378 5379 while (isSPACE(*d)) 5380 d++; 5381 ipath = d; 5382 while (*d && !isSPACE(*d)) 5383 d++; 5384 ipathend = d; 5385 5386 #ifdef ARG_ZERO_IS_SCRIPT 5387 if (ipathend > ipath) { 5388 /* 5389 * HP-UX (at least) sets argv[0] to the script name, 5390 * which makes $^X incorrect. And Digital UNIX and Linux, 5391 * at least, set argv[0] to the basename of the Perl 5392 * interpreter. So, having found "#!", we'll set it right. 5393 */ 5394 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL, 5395 SVt_PV)); /* $^X */ 5396 assert(SvPOK(x) || SvGMAGICAL(x)); 5397 if (sv_eq(x, CopFILESV(PL_curcop))) { 5398 sv_setpvn(x, ipath, ipathend - ipath); 5399 SvSETMAGIC(x); 5400 } 5401 else { 5402 STRLEN blen; 5403 STRLEN llen; 5404 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen); 5405 const char * const lstart = SvPV_const(x,llen); 5406 if (llen < blen) { 5407 bstart += blen - llen; 5408 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { 5409 sv_setpvn(x, ipath, ipathend - ipath); 5410 SvSETMAGIC(x); 5411 } 5412 } 5413 } 5414 TAINT_NOT; /* $^X is always tainted, but that's OK */ 5415 } 5416 #endif /* ARG_ZERO_IS_SCRIPT */ 5417 5418 /* 5419 * Look for options. 5420 */ 5421 d = instr(s,"perl -"); 5422 if (!d) { 5423 d = instr(s,"perl"); 5424 #if defined(DOSISH) 5425 /* avoid getting into infinite loops when shebang 5426 * line contains "Perl" rather than "perl" */ 5427 if (!d) { 5428 for (d = ipathend-4; d >= ipath; --d) { 5429 if ((*d == 'p' || *d == 'P') 5430 && !ibcmp(d, "perl", 4)) 5431 { 5432 break; 5433 } 5434 } 5435 if (d < ipath) 5436 d = NULL; 5437 } 5438 #endif 5439 } 5440 #ifdef ALTERNATE_SHEBANG 5441 /* 5442 * If the ALTERNATE_SHEBANG on this system starts with a 5443 * character that can be part of a Perl expression, then if 5444 * we see it but not "perl", we're probably looking at the 5445 * start of Perl code, not a request to hand off to some 5446 * other interpreter. Similarly, if "perl" is there, but 5447 * not in the first 'word' of the line, we assume the line 5448 * contains the start of the Perl program. 5449 */ 5450 if (d && *s != '#') { 5451 const char *c = ipath; 5452 while (*c && !strchr("; \t\r\n\f\v#", *c)) 5453 c++; 5454 if (c < d) 5455 d = NULL; /* "perl" not in first word; ignore */ 5456 else 5457 *s = '#'; /* Don't try to parse shebang line */ 5458 } 5459 #endif /* ALTERNATE_SHEBANG */ 5460 if (!d && 5461 *s == '#' && 5462 ipathend > ipath && 5463 !PL_minus_c && 5464 !instr(s,"indir") && 5465 instr(PL_origargv[0],"perl")) 5466 { 5467 dVAR; 5468 char **newargv; 5469 5470 *ipathend = '\0'; 5471 s = ipathend + 1; 5472 while (s < PL_bufend && isSPACE(*s)) 5473 s++; 5474 if (s < PL_bufend) { 5475 Newx(newargv,PL_origargc+3,char*); 5476 newargv[1] = s; 5477 while (s < PL_bufend && !isSPACE(*s)) 5478 s++; 5479 *s = '\0'; 5480 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*); 5481 } 5482 else 5483 newargv = PL_origargv; 5484 newargv[0] = ipath; 5485 PERL_FPU_PRE_EXEC 5486 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); 5487 PERL_FPU_POST_EXEC 5488 Perl_croak(aTHX_ "Can't exec %s", ipath); 5489 } 5490 if (d) { 5491 while (*d && !isSPACE(*d)) 5492 d++; 5493 while (SPACE_OR_TAB(*d)) 5494 d++; 5495 5496 if (*d++ == '-') { 5497 const bool switches_done = PL_doswitches; 5498 const U32 oldpdb = PL_perldb; 5499 const bool oldn = PL_minus_n; 5500 const bool oldp = PL_minus_p; 5501 const char *d1 = d; 5502 5503 do { 5504 bool baduni = FALSE; 5505 if (*d1 == 'C') { 5506 const char *d2 = d1 + 1; 5507 if (parse_unicode_opts((const char **)&d2) 5508 != PL_unicode) 5509 baduni = TRUE; 5510 } 5511 if (baduni || *d1 == 'M' || *d1 == 'm') { 5512 const char * const m = d1; 5513 while (*d1 && !isSPACE(*d1)) 5514 d1++; 5515 Perl_croak(aTHX_ "Too late for \"-%.*s\" option", 5516 (int)(d1 - m), m); 5517 } 5518 d1 = moreswitches(d1); 5519 } while (d1); 5520 if (PL_doswitches && !switches_done) { 5521 int argc = PL_origargc; 5522 char **argv = PL_origargv; 5523 do { 5524 argc--,argv++; 5525 } while (argc && argv[0][0] == '-' && argv[0][1]); 5526 init_argv_symbols(argc,argv); 5527 } 5528 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) || 5529 ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) 5530 /* if we have already added "LINE: while (<>) {", 5531 we must not do it again */ 5532 { 5533 sv_setpvs(PL_linestr, ""); 5534 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 5535 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5536 PL_last_lop = PL_last_uni = NULL; 5537 PL_preambled = FALSE; 5538 if (PERLDB_LINE || PERLDB_SAVESRC) 5539 (void)gv_fetchfile(PL_origfilename); 5540 goto retry; 5541 } 5542 } 5543 } 5544 } 5545 } 5546 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 5547 PL_lex_state = LEX_FORMLINE; 5548 start_force(PL_curforce); 5549 NEXTVAL_NEXTTOKE.ival = 0; 5550 force_next(FORMRBRACK); 5551 TOKEN(';'); 5552 } 5553 goto retry; 5554 case '\r': 5555 #ifdef PERL_STRICT_CR 5556 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); 5557 Perl_croak(aTHX_ 5558 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); 5559 #endif 5560 case ' ': case '\t': case '\f': case 013: 5561 #ifdef PERL_MAD 5562 PL_realtokenstart = -1; 5563 if (PL_madskills) { 5564 if (!PL_thiswhite) 5565 PL_thiswhite = newSVpvs(""); 5566 sv_catpvn(PL_thiswhite, s, 1); 5567 } 5568 #endif 5569 s++; 5570 goto retry; 5571 case '#': 5572 case '\n': 5573 #ifdef PERL_MAD 5574 PL_realtokenstart = -1; 5575 if (PL_madskills) 5576 PL_faketokens = 0; 5577 #endif 5578 if (PL_lex_state != LEX_NORMAL || 5579 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) { 5580 if (*s == '#' && s == PL_linestart && PL_in_eval 5581 && !PL_rsfp && !PL_parser->filtered) { 5582 /* handle eval qq[#line 1 "foo"\n ...] */ 5583 CopLINE_dec(PL_curcop); 5584 incline(s); 5585 } 5586 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) { 5587 s = SKIPSPACE0(s); 5588 if (!PL_in_eval || PL_rsfp || PL_parser->filtered) 5589 incline(s); 5590 } 5591 else { 5592 const bool in_comment = *s == '#'; 5593 d = s; 5594 while (d < PL_bufend && *d != '\n') 5595 d++; 5596 if (d < PL_bufend) 5597 d++; 5598 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */ 5599 Perl_croak(aTHX_ "panic: input overflow, %p > %p", 5600 d, PL_bufend); 5601 #ifdef PERL_MAD 5602 if (PL_madskills) 5603 PL_thiswhite = newSVpvn(s, d - s); 5604 #endif 5605 s = d; 5606 if (in_comment && d == PL_bufend 5607 && PL_lex_state == LEX_INTERPNORMAL 5608 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 5609 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--; 5610 else incline(s); 5611 } 5612 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 5613 PL_lex_state = LEX_FORMLINE; 5614 start_force(PL_curforce); 5615 NEXTVAL_NEXTTOKE.ival = 0; 5616 force_next(FORMRBRACK); 5617 TOKEN(';'); 5618 } 5619 } 5620 else { 5621 #ifdef PERL_MAD 5622 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) { 5623 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') { 5624 PL_faketokens = 0; 5625 s = SKIPSPACE0(s); 5626 TOKEN(PEG); /* make sure any #! line is accessible */ 5627 } 5628 s = SKIPSPACE0(s); 5629 } 5630 else { 5631 #endif 5632 if (PL_madskills) d = s; 5633 while (s < PL_bufend && *s != '\n') 5634 s++; 5635 if (s < PL_bufend) 5636 { 5637 s++; 5638 if (s < PL_bufend) 5639 incline(s); 5640 } 5641 else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */ 5642 Perl_croak(aTHX_ "panic: input overflow"); 5643 #ifdef PERL_MAD 5644 if (PL_madskills && CopLINE(PL_curcop) >= 1) { 5645 if (!PL_thiswhite) 5646 PL_thiswhite = newSVpvs(""); 5647 if (CopLINE(PL_curcop) == 1) { 5648 sv_setpvs(PL_thiswhite, ""); 5649 PL_faketokens = 0; 5650 } 5651 sv_catpvn(PL_thiswhite, d, s - d); 5652 } 5653 } 5654 #endif 5655 } 5656 goto retry; 5657 case '-': 5658 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) { 5659 I32 ftst = 0; 5660 char tmp; 5661 5662 s++; 5663 PL_bufptr = s; 5664 tmp = *s++; 5665 5666 while (s < PL_bufend && SPACE_OR_TAB(*s)) 5667 s++; 5668 5669 if (strnEQ(s,"=>",2)) { 5670 s = force_word(PL_bufptr,WORD,FALSE,FALSE); 5671 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); 5672 OPERATOR('-'); /* unary minus */ 5673 } 5674 switch (tmp) { 5675 case 'r': ftst = OP_FTEREAD; break; 5676 case 'w': ftst = OP_FTEWRITE; break; 5677 case 'x': ftst = OP_FTEEXEC; break; 5678 case 'o': ftst = OP_FTEOWNED; break; 5679 case 'R': ftst = OP_FTRREAD; break; 5680 case 'W': ftst = OP_FTRWRITE; break; 5681 case 'X': ftst = OP_FTREXEC; break; 5682 case 'O': ftst = OP_FTROWNED; break; 5683 case 'e': ftst = OP_FTIS; break; 5684 case 'z': ftst = OP_FTZERO; break; 5685 case 's': ftst = OP_FTSIZE; break; 5686 case 'f': ftst = OP_FTFILE; break; 5687 case 'd': ftst = OP_FTDIR; break; 5688 case 'l': ftst = OP_FTLINK; break; 5689 case 'p': ftst = OP_FTPIPE; break; 5690 case 'S': ftst = OP_FTSOCK; break; 5691 case 'u': ftst = OP_FTSUID; break; 5692 case 'g': ftst = OP_FTSGID; break; 5693 case 'k': ftst = OP_FTSVTX; break; 5694 case 'b': ftst = OP_FTBLK; break; 5695 case 'c': ftst = OP_FTCHR; break; 5696 case 't': ftst = OP_FTTTY; break; 5697 case 'T': ftst = OP_FTTEXT; break; 5698 case 'B': ftst = OP_FTBINARY; break; 5699 case 'M': case 'A': case 'C': 5700 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV); 5701 switch (tmp) { 5702 case 'M': ftst = OP_FTMTIME; break; 5703 case 'A': ftst = OP_FTATIME; break; 5704 case 'C': ftst = OP_FTCTIME; break; 5705 default: break; 5706 } 5707 break; 5708 default: 5709 break; 5710 } 5711 if (ftst) { 5712 PL_last_uni = PL_oldbufptr; 5713 PL_last_lop_op = (OPCODE)ftst; 5714 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5715 "### Saw file test %c\n", (int)tmp); 5716 } ); 5717 FTST(ftst); 5718 } 5719 else { 5720 /* Assume it was a minus followed by a one-letter named 5721 * subroutine call (or a -bareword), then. */ 5722 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5723 "### '-%c' looked like a file test but was not\n", 5724 (int) tmp); 5725 } ); 5726 s = --PL_bufptr; 5727 } 5728 } 5729 { 5730 const char tmp = *s++; 5731 if (*s == tmp) { 5732 s++; 5733 if (PL_expect == XOPERATOR) 5734 TERM(POSTDEC); 5735 else 5736 OPERATOR(PREDEC); 5737 } 5738 else if (*s == '>') { 5739 s++; 5740 s = SKIPSPACE1(s); 5741 if (FEATURE_POSTDEREF_IS_ENABLED && ( 5742 ((*s == '$' || *s == '&') && s[1] == '*') 5743 ||(*s == '$' && s[1] == '#' && s[2] == '*') 5744 ||((*s == '@' || *s == '%') && strchr("*[{", s[1])) 5745 ||(*s == '*' && (s[1] == '*' || s[1] == '{')) 5746 )) 5747 { 5748 Perl_ck_warner_d(aTHX_ 5749 packWARN(WARN_EXPERIMENTAL__POSTDEREF), 5750 "Postfix dereference is experimental" 5751 ); 5752 PL_expect = XPOSTDEREF; 5753 TOKEN(ARROW); 5754 } 5755 if (isIDFIRST_lazy_if(s,UTF)) { 5756 s = force_word(s,METHOD,FALSE,TRUE); 5757 TOKEN(ARROW); 5758 } 5759 else if (*s == '$') 5760 OPERATOR(ARROW); 5761 else 5762 TERM(ARROW); 5763 } 5764 if (PL_expect == XOPERATOR) { 5765 if (*s == '=' && !PL_lex_allbrackets && 5766 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 5767 s--; 5768 TOKEN(0); 5769 } 5770 Aop(OP_SUBTRACT); 5771 } 5772 else { 5773 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5774 check_uni(); 5775 OPERATOR('-'); /* unary minus */ 5776 } 5777 } 5778 5779 case '+': 5780 { 5781 const char tmp = *s++; 5782 if (*s == tmp) { 5783 s++; 5784 if (PL_expect == XOPERATOR) 5785 TERM(POSTINC); 5786 else 5787 OPERATOR(PREINC); 5788 } 5789 if (PL_expect == XOPERATOR) { 5790 if (*s == '=' && !PL_lex_allbrackets && 5791 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 5792 s--; 5793 TOKEN(0); 5794 } 5795 Aop(OP_ADD); 5796 } 5797 else { 5798 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5799 check_uni(); 5800 OPERATOR('+'); 5801 } 5802 } 5803 5804 case '*': 5805 if (PL_expect == XPOSTDEREF) POSTDEREF('*'); 5806 if (PL_expect != XOPERATOR) { 5807 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 5808 PL_expect = XOPERATOR; 5809 force_ident(PL_tokenbuf, '*'); 5810 if (!*PL_tokenbuf) 5811 PREREF('*'); 5812 TERM('*'); 5813 } 5814 s++; 5815 if (*s == '*') { 5816 s++; 5817 if (*s == '=' && !PL_lex_allbrackets && 5818 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 5819 s -= 2; 5820 TOKEN(0); 5821 } 5822 PWop(OP_POW); 5823 } 5824 if (*s == '=' && !PL_lex_allbrackets && 5825 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 5826 s--; 5827 TOKEN(0); 5828 } 5829 PL_parser->saw_infix_sigil = 1; 5830 Mop(OP_MULTIPLY); 5831 5832 case '%': 5833 { 5834 if (PL_expect == XOPERATOR) { 5835 if (s[1] == '=' && !PL_lex_allbrackets && 5836 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5837 TOKEN(0); 5838 ++s; 5839 PL_parser->saw_infix_sigil = 1; 5840 Mop(OP_MODULO); 5841 } 5842 else if (PL_expect == XPOSTDEREF) POSTDEREF('%'); 5843 PL_tokenbuf[0] = '%'; 5844 s = scan_ident(s, PL_tokenbuf + 1, 5845 sizeof PL_tokenbuf - 1, FALSE); 5846 pl_yylval.ival = 0; 5847 if (!PL_tokenbuf[1]) { 5848 PREREF('%'); 5849 } 5850 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { 5851 if (*s == '[') 5852 PL_tokenbuf[0] = '@'; 5853 } 5854 PL_expect = XOPERATOR; 5855 force_ident_maybe_lex('%'); 5856 TERM('%'); 5857 } 5858 case '^': 5859 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 5860 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) 5861 TOKEN(0); 5862 s++; 5863 BOop(OP_BIT_XOR); 5864 case '[': 5865 if (PL_lex_brackets > 100) 5866 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 5867 PL_lex_brackstack[PL_lex_brackets++] = 0; 5868 PL_lex_allbrackets++; 5869 { 5870 const char tmp = *s++; 5871 OPERATOR(tmp); 5872 } 5873 case '~': 5874 if (s[1] == '~' 5875 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) 5876 { 5877 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 5878 TOKEN(0); 5879 s += 2; 5880 Perl_ck_warner_d(aTHX_ 5881 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 5882 "Smartmatch is experimental"); 5883 Eop(OP_SMARTMATCH); 5884 } 5885 s++; 5886 OPERATOR('~'); 5887 case ',': 5888 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 5889 TOKEN(0); 5890 s++; 5891 OPERATOR(','); 5892 case ':': 5893 if (s[1] == ':') { 5894 len = 0; 5895 goto just_a_word_zero_gv; 5896 } 5897 s++; 5898 switch (PL_expect) { 5899 OP *attrs; 5900 #ifdef PERL_MAD 5901 I32 stuffstart; 5902 #endif 5903 case XOPERATOR: 5904 if (!PL_in_my || PL_lex_state != LEX_NORMAL) 5905 break; 5906 PL_bufptr = s; /* update in case we back off */ 5907 if (*s == '=') { 5908 Perl_croak(aTHX_ 5909 "Use of := for an empty attribute list is not allowed"); 5910 } 5911 goto grabattrs; 5912 case XATTRBLOCK: 5913 PL_expect = XBLOCK; 5914 goto grabattrs; 5915 case XATTRTERM: 5916 PL_expect = XTERMBLOCK; 5917 grabattrs: 5918 #ifdef PERL_MAD 5919 stuffstart = s - SvPVX(PL_linestr) - 1; 5920 #endif 5921 s = PEEKSPACE(s); 5922 attrs = NULL; 5923 while (isIDFIRST_lazy_if(s,UTF)) { 5924 I32 tmp; 5925 SV *sv; 5926 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 5927 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) { 5928 if (tmp < 0) tmp = -tmp; 5929 switch (tmp) { 5930 case KEY_or: 5931 case KEY_and: 5932 case KEY_for: 5933 case KEY_foreach: 5934 case KEY_unless: 5935 case KEY_if: 5936 case KEY_while: 5937 case KEY_until: 5938 goto got_attrs; 5939 default: 5940 break; 5941 } 5942 } 5943 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0); 5944 if (*d == '(') { 5945 d = scan_str(d,TRUE,TRUE,FALSE,FALSE,NULL); 5946 COPLINE_SET_FROM_MULTI_END; 5947 if (!d) { 5948 /* MUST advance bufptr here to avoid bogus 5949 "at end of line" context messages from yyerror(). 5950 */ 5951 PL_bufptr = s + len; 5952 yyerror("Unterminated attribute parameter in attribute list"); 5953 if (attrs) 5954 op_free(attrs); 5955 sv_free(sv); 5956 return REPORT(0); /* EOF indicator */ 5957 } 5958 } 5959 if (PL_lex_stuff) { 5960 sv_catsv(sv, PL_lex_stuff); 5961 attrs = op_append_elem(OP_LIST, attrs, 5962 newSVOP(OP_CONST, 0, sv)); 5963 SvREFCNT_dec(PL_lex_stuff); 5964 PL_lex_stuff = NULL; 5965 } 5966 else { 5967 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) { 5968 sv_free(sv); 5969 if (PL_in_my == KEY_our) { 5970 deprecate(":unique"); 5971 } 5972 else 5973 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables"); 5974 } 5975 5976 /* NOTE: any CV attrs applied here need to be part of 5977 the CVf_BUILTIN_ATTRS define in cv.h! */ 5978 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) { 5979 sv_free(sv); 5980 CvLVALUE_on(PL_compcv); 5981 } 5982 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) { 5983 sv_free(sv); 5984 deprecate(":locked"); 5985 } 5986 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) { 5987 sv_free(sv); 5988 CvMETHOD_on(PL_compcv); 5989 } 5990 /* After we've set the flags, it could be argued that 5991 we don't need to do the attributes.pm-based setting 5992 process, and shouldn't bother appending recognized 5993 flags. To experiment with that, uncomment the 5994 following "else". (Note that's already been 5995 uncommented. That keeps the above-applied built-in 5996 attributes from being intercepted (and possibly 5997 rejected) by a package's attribute routines, but is 5998 justified by the performance win for the common case 5999 of applying only built-in attributes.) */ 6000 else 6001 attrs = op_append_elem(OP_LIST, attrs, 6002 newSVOP(OP_CONST, 0, 6003 sv)); 6004 } 6005 s = PEEKSPACE(d); 6006 if (*s == ':' && s[1] != ':') 6007 s = PEEKSPACE(s+1); 6008 else if (s == d) 6009 break; /* require real whitespace or :'s */ 6010 /* XXX losing whitespace on sequential attributes here */ 6011 } 6012 { 6013 if (*s != ';' && *s != '}' && 6014 !(PL_expect == XOPERATOR 6015 ? (*s == '=' || *s == ')') 6016 : (*s == '{' || *s == '('))) { 6017 const char q = ((*s == '\'') ? '"' : '\''); 6018 /* If here for an expression, and parsed no attrs, back 6019 off. */ 6020 if (PL_expect == XOPERATOR && !attrs) { 6021 s = PL_bufptr; 6022 break; 6023 } 6024 /* MUST advance bufptr here to avoid bogus "at end of line" 6025 context messages from yyerror(). 6026 */ 6027 PL_bufptr = s; 6028 yyerror( (const char *) 6029 (*s 6030 ? Perl_form(aTHX_ "Invalid separator character " 6031 "%c%c%c in attribute list", q, *s, q) 6032 : "Unterminated attribute list" ) ); 6033 if (attrs) 6034 op_free(attrs); 6035 OPERATOR(':'); 6036 } 6037 } 6038 got_attrs: 6039 if (attrs) { 6040 start_force(PL_curforce); 6041 NEXTVAL_NEXTTOKE.opval = attrs; 6042 CURMAD('_', PL_nextwhite); 6043 force_next(THING); 6044 } 6045 #ifdef PERL_MAD 6046 if (PL_madskills) { 6047 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart, 6048 (s - SvPVX(PL_linestr)) - stuffstart); 6049 } 6050 #endif 6051 TOKEN(COLONATTR); 6052 } 6053 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) { 6054 s--; 6055 TOKEN(0); 6056 } 6057 PL_lex_allbrackets--; 6058 OPERATOR(':'); 6059 case '(': 6060 s++; 6061 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr) 6062 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ 6063 else 6064 PL_expect = XTERM; 6065 s = SKIPSPACE1(s); 6066 PL_lex_allbrackets++; 6067 TOKEN('('); 6068 case ';': 6069 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 6070 TOKEN(0); 6071 CLINE; 6072 s++; 6073 OPERATOR(';'); 6074 case ')': 6075 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) 6076 TOKEN(0); 6077 s++; 6078 PL_lex_allbrackets--; 6079 s = SKIPSPACE1(s); 6080 if (*s == '{') 6081 PREBLOCK(')'); 6082 TERM(')'); 6083 case ']': 6084 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 6085 TOKEN(0); 6086 s++; 6087 if (PL_lex_brackets <= 0) 6088 /* diag_listed_as: Unmatched right %s bracket */ 6089 yyerror("Unmatched right square bracket"); 6090 else 6091 --PL_lex_brackets; 6092 PL_lex_allbrackets--; 6093 if (PL_lex_state == LEX_INTERPNORMAL) { 6094 if (PL_lex_brackets == 0) { 6095 if (*s == '-' && s[1] == '>') 6096 PL_lex_state = LEX_INTERPENDMAYBE; 6097 else if (*s != '[' && *s != '{') 6098 PL_lex_state = LEX_INTERPEND; 6099 } 6100 } 6101 TERM(']'); 6102 case '{': 6103 s++; 6104 leftbracket: 6105 if (PL_lex_brackets > 100) { 6106 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 6107 } 6108 switch (PL_expect) { 6109 case XTERM: 6110 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6111 PL_lex_allbrackets++; 6112 OPERATOR(HASHBRACK); 6113 case XOPERATOR: 6114 while (s < PL_bufend && SPACE_OR_TAB(*s)) 6115 s++; 6116 d = s; 6117 PL_tokenbuf[0] = '\0'; 6118 if (d < PL_bufend && *d == '-') { 6119 PL_tokenbuf[0] = '-'; 6120 d++; 6121 while (d < PL_bufend && SPACE_OR_TAB(*d)) 6122 d++; 6123 } 6124 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) { 6125 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 6126 FALSE, &len); 6127 while (d < PL_bufend && SPACE_OR_TAB(*d)) 6128 d++; 6129 if (*d == '}') { 6130 const char minus = (PL_tokenbuf[0] == '-'); 6131 s = force_word(s + minus, WORD, FALSE, TRUE); 6132 if (minus) 6133 force_next('-'); 6134 } 6135 } 6136 /* FALL THROUGH */ 6137 case XATTRBLOCK: 6138 case XBLOCK: 6139 PL_lex_brackstack[PL_lex_brackets++] = XSTATE; 6140 PL_lex_allbrackets++; 6141 PL_expect = XSTATE; 6142 break; 6143 case XATTRTERM: 6144 case XTERMBLOCK: 6145 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6146 PL_lex_allbrackets++; 6147 PL_expect = XSTATE; 6148 break; 6149 default: { 6150 const char *t; 6151 if (PL_oldoldbufptr == PL_last_lop) 6152 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 6153 else 6154 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6155 PL_lex_allbrackets++; 6156 s = SKIPSPACE1(s); 6157 if (*s == '}') { 6158 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { 6159 PL_expect = XTERM; 6160 /* This hack is to get the ${} in the message. */ 6161 PL_bufptr = s+1; 6162 yyerror("syntax error"); 6163 break; 6164 } 6165 OPERATOR(HASHBRACK); 6166 } 6167 /* This hack serves to disambiguate a pair of curlies 6168 * as being a block or an anon hash. Normally, expectation 6169 * determines that, but in cases where we're not in a 6170 * position to expect anything in particular (like inside 6171 * eval"") we have to resolve the ambiguity. This code 6172 * covers the case where the first term in the curlies is a 6173 * quoted string. Most other cases need to be explicitly 6174 * disambiguated by prepending a "+" before the opening 6175 * curly in order to force resolution as an anon hash. 6176 * 6177 * XXX should probably propagate the outer expectation 6178 * into eval"" to rely less on this hack, but that could 6179 * potentially break current behavior of eval"". 6180 * GSAR 97-07-21 6181 */ 6182 t = s; 6183 if (*s == '\'' || *s == '"' || *s == '`') { 6184 /* common case: get past first string, handling escapes */ 6185 for (t++; t < PL_bufend && *t != *s;) 6186 if (*t++ == '\\' && (*t == '\\' || *t == *s)) 6187 t++; 6188 t++; 6189 } 6190 else if (*s == 'q') { 6191 if (++t < PL_bufend 6192 && (!isWORDCHAR(*t) 6193 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend 6194 && !isWORDCHAR(*t)))) 6195 { 6196 /* skip q//-like construct */ 6197 const char *tmps; 6198 char open, close, term; 6199 I32 brackets = 1; 6200 6201 while (t < PL_bufend && isSPACE(*t)) 6202 t++; 6203 /* check for q => */ 6204 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') { 6205 OPERATOR(HASHBRACK); 6206 } 6207 term = *t; 6208 open = term; 6209 if (term && (tmps = strchr("([{< )]}> )]}>",term))) 6210 term = tmps[5]; 6211 close = term; 6212 if (open == close) 6213 for (t++; t < PL_bufend; t++) { 6214 if (*t == '\\' && t+1 < PL_bufend && open != '\\') 6215 t++; 6216 else if (*t == open) 6217 break; 6218 } 6219 else { 6220 for (t++; t < PL_bufend; t++) { 6221 if (*t == '\\' && t+1 < PL_bufend) 6222 t++; 6223 else if (*t == close && --brackets <= 0) 6224 break; 6225 else if (*t == open) 6226 brackets++; 6227 } 6228 } 6229 t++; 6230 } 6231 else 6232 /* skip plain q word */ 6233 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF)) 6234 t += UTF8SKIP(t); 6235 } 6236 else if (isWORDCHAR_lazy_if(t,UTF)) { 6237 t += UTF8SKIP(t); 6238 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF)) 6239 t += UTF8SKIP(t); 6240 } 6241 while (t < PL_bufend && isSPACE(*t)) 6242 t++; 6243 /* if comma follows first term, call it an anon hash */ 6244 /* XXX it could be a comma expression with loop modifiers */ 6245 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s))) 6246 || (*t == '=' && t[1] == '>'))) 6247 OPERATOR(HASHBRACK); 6248 if (PL_expect == XREF) 6249 PL_expect = XTERM; 6250 else { 6251 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE; 6252 PL_expect = XSTATE; 6253 } 6254 } 6255 break; 6256 } 6257 pl_yylval.ival = CopLINE(PL_curcop); 6258 if (isSPACE(*s) || *s == '#') 6259 PL_copline = NOLINE; /* invalidate current command line number */ 6260 TOKEN(formbrack ? '=' : '{'); 6261 case '}': 6262 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 6263 TOKEN(0); 6264 rightbracket: 6265 s++; 6266 if (PL_lex_brackets <= 0) 6267 /* diag_listed_as: Unmatched right %s bracket */ 6268 yyerror("Unmatched right curly bracket"); 6269 else 6270 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets]; 6271 PL_lex_allbrackets--; 6272 if (PL_lex_state == LEX_INTERPNORMAL) { 6273 if (PL_lex_brackets == 0) { 6274 if (PL_expect & XFAKEBRACK) { 6275 PL_expect &= XENUMMASK; 6276 PL_lex_state = LEX_INTERPEND; 6277 PL_bufptr = s; 6278 #if 0 6279 if (PL_madskills) { 6280 if (!PL_thiswhite) 6281 PL_thiswhite = newSVpvs(""); 6282 sv_catpvs(PL_thiswhite,"}"); 6283 } 6284 #endif 6285 return yylex(); /* ignore fake brackets */ 6286 } 6287 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 6288 && SvEVALED(PL_lex_repl)) 6289 PL_lex_state = LEX_INTERPEND; 6290 else if (*s == '-' && s[1] == '>') 6291 PL_lex_state = LEX_INTERPENDMAYBE; 6292 else if (*s != '[' && *s != '{') 6293 PL_lex_state = LEX_INTERPEND; 6294 } 6295 } 6296 if (PL_expect & XFAKEBRACK) { 6297 PL_expect &= XENUMMASK; 6298 PL_bufptr = s; 6299 return yylex(); /* ignore fake brackets */ 6300 } 6301 start_force(PL_curforce); 6302 if (PL_madskills) { 6303 curmad('X', newSVpvn(s-1,1)); 6304 CURMAD('_', PL_thiswhite); 6305 } 6306 force_next(formbrack ? '.' : '}'); 6307 if (formbrack) LEAVE; 6308 #ifdef PERL_MAD 6309 if (PL_madskills && !PL_thistoken) 6310 PL_thistoken = newSVpvs(""); 6311 #endif 6312 if (formbrack == 2) { /* means . where arguments were expected */ 6313 start_force(PL_curforce); 6314 force_next(';'); 6315 TOKEN(FORMRBRACK); 6316 } 6317 TOKEN(';'); 6318 case '&': 6319 if (PL_expect == XPOSTDEREF) POSTDEREF('&'); 6320 s++; 6321 if (*s++ == '&') { 6322 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6323 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6324 s -= 2; 6325 TOKEN(0); 6326 } 6327 AOPERATOR(ANDAND); 6328 } 6329 s--; 6330 if (PL_expect == XOPERATOR) { 6331 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON) 6332 && isIDFIRST_lazy_if(s,UTF)) 6333 { 6334 CopLINE_dec(PL_curcop); 6335 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 6336 CopLINE_inc(PL_curcop); 6337 } 6338 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6339 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6340 s--; 6341 TOKEN(0); 6342 } 6343 PL_parser->saw_infix_sigil = 1; 6344 BAop(OP_BIT_AND); 6345 } 6346 6347 PL_tokenbuf[0] = '&'; 6348 s = scan_ident(s - 1, PL_tokenbuf + 1, 6349 sizeof PL_tokenbuf - 1, TRUE); 6350 if (PL_tokenbuf[1]) { 6351 PL_expect = XOPERATOR; 6352 force_ident_maybe_lex('&'); 6353 } 6354 else 6355 PREREF('&'); 6356 pl_yylval.ival = (OPpENTERSUB_AMPER<<8); 6357 TERM('&'); 6358 6359 case '|': 6360 s++; 6361 if (*s++ == '|') { 6362 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6363 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6364 s -= 2; 6365 TOKEN(0); 6366 } 6367 AOPERATOR(OROR); 6368 } 6369 s--; 6370 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6371 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6372 s--; 6373 TOKEN(0); 6374 } 6375 BOop(OP_BIT_OR); 6376 case '=': 6377 s++; 6378 { 6379 const char tmp = *s++; 6380 if (tmp == '=') { 6381 if (!PL_lex_allbrackets && 6382 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6383 s -= 2; 6384 TOKEN(0); 6385 } 6386 Eop(OP_EQ); 6387 } 6388 if (tmp == '>') { 6389 if (!PL_lex_allbrackets && 6390 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) { 6391 s -= 2; 6392 TOKEN(0); 6393 } 6394 OPERATOR(','); 6395 } 6396 if (tmp == '~') 6397 PMop(OP_MATCH); 6398 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX) 6399 && strchr("+-*/%.^&|<",tmp)) 6400 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6401 "Reversed %c= operator",(int)tmp); 6402 s--; 6403 if (PL_expect == XSTATE && isALPHA(tmp) && 6404 (s == PL_linestart+1 || s[-2] == '\n') ) 6405 { 6406 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered) 6407 || PL_lex_state != LEX_NORMAL) { 6408 d = PL_bufend; 6409 while (s < d) { 6410 if (*s++ == '\n') { 6411 incline(s); 6412 if (strnEQ(s,"=cut",4)) { 6413 s = strchr(s,'\n'); 6414 if (s) 6415 s++; 6416 else 6417 s = d; 6418 incline(s); 6419 goto retry; 6420 } 6421 } 6422 } 6423 goto retry; 6424 } 6425 #ifdef PERL_MAD 6426 if (PL_madskills) { 6427 if (!PL_thiswhite) 6428 PL_thiswhite = newSVpvs(""); 6429 sv_catpvn(PL_thiswhite, PL_linestart, 6430 PL_bufend - PL_linestart); 6431 } 6432 #endif 6433 s = PL_bufend; 6434 PL_parser->in_pod = 1; 6435 goto retry; 6436 } 6437 } 6438 if (PL_expect == XBLOCK) { 6439 const char *t = s; 6440 #ifdef PERL_STRICT_CR 6441 while (SPACE_OR_TAB(*t)) 6442 #else 6443 while (SPACE_OR_TAB(*t) || *t == '\r') 6444 #endif 6445 t++; 6446 if (*t == '\n' || *t == '#') { 6447 formbrack = 1; 6448 ENTER; 6449 SAVEI8(PL_parser->form_lex_state); 6450 SAVEI32(PL_lex_formbrack); 6451 PL_parser->form_lex_state = PL_lex_state; 6452 PL_lex_formbrack = PL_lex_brackets + 1; 6453 goto leftbracket; 6454 } 6455 } 6456 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6457 s--; 6458 TOKEN(0); 6459 } 6460 pl_yylval.ival = 0; 6461 OPERATOR(ASSIGNOP); 6462 case '!': 6463 s++; 6464 { 6465 const char tmp = *s++; 6466 if (tmp == '=') { 6467 /* was this !=~ where !~ was meant? 6468 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */ 6469 6470 if (*s == '~' && ckWARN(WARN_SYNTAX)) { 6471 const char *t = s+1; 6472 6473 while (t < PL_bufend && isSPACE(*t)) 6474 ++t; 6475 6476 if (*t == '/' || *t == '?' || 6477 ((*t == 'm' || *t == 's' || *t == 'y') 6478 && !isWORDCHAR(t[1])) || 6479 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2]))) 6480 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6481 "!=~ should be !~"); 6482 } 6483 if (!PL_lex_allbrackets && 6484 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6485 s -= 2; 6486 TOKEN(0); 6487 } 6488 Eop(OP_NE); 6489 } 6490 if (tmp == '~') 6491 PMop(OP_NOT); 6492 } 6493 s--; 6494 OPERATOR('!'); 6495 case '<': 6496 if (PL_expect != XOPERATOR) { 6497 if (s[1] != '<' && !strchr(s,'>')) 6498 check_uni(); 6499 if (s[1] == '<') 6500 s = scan_heredoc(s); 6501 else 6502 s = scan_inputsymbol(s); 6503 PL_expect = XOPERATOR; 6504 TOKEN(sublex_start()); 6505 } 6506 s++; 6507 { 6508 char tmp = *s++; 6509 if (tmp == '<') { 6510 if (*s == '=' && !PL_lex_allbrackets && 6511 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6512 s -= 2; 6513 TOKEN(0); 6514 } 6515 SHop(OP_LEFT_SHIFT); 6516 } 6517 if (tmp == '=') { 6518 tmp = *s++; 6519 if (tmp == '>') { 6520 if (!PL_lex_allbrackets && 6521 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6522 s -= 3; 6523 TOKEN(0); 6524 } 6525 Eop(OP_NCMP); 6526 } 6527 s--; 6528 if (!PL_lex_allbrackets && 6529 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6530 s -= 2; 6531 TOKEN(0); 6532 } 6533 Rop(OP_LE); 6534 } 6535 } 6536 s--; 6537 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6538 s--; 6539 TOKEN(0); 6540 } 6541 Rop(OP_LT); 6542 case '>': 6543 s++; 6544 { 6545 const char tmp = *s++; 6546 if (tmp == '>') { 6547 if (*s == '=' && !PL_lex_allbrackets && 6548 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6549 s -= 2; 6550 TOKEN(0); 6551 } 6552 SHop(OP_RIGHT_SHIFT); 6553 } 6554 else if (tmp == '=') { 6555 if (!PL_lex_allbrackets && 6556 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6557 s -= 2; 6558 TOKEN(0); 6559 } 6560 Rop(OP_GE); 6561 } 6562 } 6563 s--; 6564 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6565 s--; 6566 TOKEN(0); 6567 } 6568 Rop(OP_GT); 6569 6570 case '$': 6571 CLINE; 6572 6573 if (PL_expect == XOPERATOR) { 6574 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6575 return deprecate_commaless_var_list(); 6576 } 6577 } 6578 else if (PL_expect == XPOSTDEREF) { 6579 if (s[1] == '#') { 6580 s++; 6581 POSTDEREF(DOLSHARP); 6582 } 6583 POSTDEREF('$'); 6584 } 6585 6586 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) { 6587 PL_tokenbuf[0] = '@'; 6588 s = scan_ident(s + 1, PL_tokenbuf + 1, 6589 sizeof PL_tokenbuf - 1, FALSE); 6590 if (PL_expect == XOPERATOR) 6591 no_op("Array length", s); 6592 if (!PL_tokenbuf[1]) 6593 PREREF(DOLSHARP); 6594 PL_expect = XOPERATOR; 6595 force_ident_maybe_lex('#'); 6596 TOKEN(DOLSHARP); 6597 } 6598 6599 PL_tokenbuf[0] = '$'; 6600 s = scan_ident(s, PL_tokenbuf + 1, 6601 sizeof PL_tokenbuf - 1, FALSE); 6602 if (PL_expect == XOPERATOR) 6603 no_op("Scalar", s); 6604 if (!PL_tokenbuf[1]) { 6605 if (s == PL_bufend) 6606 yyerror("Final $ should be \\$ or $name"); 6607 PREREF('$'); 6608 } 6609 6610 d = s; 6611 { 6612 const char tmp = *s; 6613 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) 6614 s = SKIPSPACE1(s); 6615 6616 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 6617 && intuit_more(s)) { 6618 if (*s == '[') { 6619 PL_tokenbuf[0] = '@'; 6620 if (ckWARN(WARN_SYNTAX)) { 6621 char *t = s+1; 6622 6623 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$') 6624 t++; 6625 if (*t++ == ',') { 6626 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */ 6627 while (t < PL_bufend && *t != ']') 6628 t++; 6629 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6630 "Multidimensional syntax %.*s not supported", 6631 (int)((t - PL_bufptr) + 1), PL_bufptr); 6632 } 6633 } 6634 } 6635 else if (*s == '{') { 6636 char *t; 6637 PL_tokenbuf[0] = '%'; 6638 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX) 6639 && (t = strchr(s, '}')) && (t = strchr(t, '='))) 6640 { 6641 char tmpbuf[sizeof PL_tokenbuf]; 6642 do { 6643 t++; 6644 } while (isSPACE(*t)); 6645 if (isIDFIRST_lazy_if(t,UTF)) { 6646 STRLEN len; 6647 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, 6648 &len); 6649 while (isSPACE(*t)) 6650 t++; 6651 if (*t == ';' 6652 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0)) 6653 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6654 "You need to quote \"%"UTF8f"\"", 6655 UTF8fARG(UTF, len, tmpbuf)); 6656 } 6657 } 6658 } 6659 } 6660 6661 PL_expect = XOPERATOR; 6662 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) { 6663 const bool islop = (PL_last_lop == PL_oldoldbufptr); 6664 if (!islop || PL_last_lop_op == OP_GREPSTART) 6665 PL_expect = XOPERATOR; 6666 else if (strchr("$@\"'`q", *s)) 6667 PL_expect = XTERM; /* e.g. print $fh "foo" */ 6668 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF)) 6669 PL_expect = XTERM; /* e.g. print $fh &sub */ 6670 else if (isIDFIRST_lazy_if(s,UTF)) { 6671 char tmpbuf[sizeof PL_tokenbuf]; 6672 int t2; 6673 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 6674 if ((t2 = keyword(tmpbuf, len, 0))) { 6675 /* binary operators exclude handle interpretations */ 6676 switch (t2) { 6677 case -KEY_x: 6678 case -KEY_eq: 6679 case -KEY_ne: 6680 case -KEY_gt: 6681 case -KEY_lt: 6682 case -KEY_ge: 6683 case -KEY_le: 6684 case -KEY_cmp: 6685 break; 6686 default: 6687 PL_expect = XTERM; /* e.g. print $fh length() */ 6688 break; 6689 } 6690 } 6691 else { 6692 PL_expect = XTERM; /* e.g. print $fh subr() */ 6693 } 6694 } 6695 else if (isDIGIT(*s)) 6696 PL_expect = XTERM; /* e.g. print $fh 3 */ 6697 else if (*s == '.' && isDIGIT(s[1])) 6698 PL_expect = XTERM; /* e.g. print $fh .3 */ 6699 else if ((*s == '?' || *s == '-' || *s == '+') 6700 && !isSPACE(s[1]) && s[1] != '=') 6701 PL_expect = XTERM; /* e.g. print $fh -1 */ 6702 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' 6703 && s[1] != '/') 6704 PL_expect = XTERM; /* e.g. print $fh /.../ 6705 XXX except DORDOR operator 6706 */ 6707 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) 6708 && s[2] != '=') 6709 PL_expect = XTERM; /* print $fh <<"EOF" */ 6710 } 6711 } 6712 force_ident_maybe_lex('$'); 6713 TOKEN('$'); 6714 6715 case '@': 6716 if (PL_expect == XOPERATOR) 6717 no_op("Array", s); 6718 else if (PL_expect == XPOSTDEREF) POSTDEREF('@'); 6719 PL_tokenbuf[0] = '@'; 6720 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 6721 pl_yylval.ival = 0; 6722 if (!PL_tokenbuf[1]) { 6723 PREREF('@'); 6724 } 6725 if (PL_lex_state == LEX_NORMAL) 6726 s = SKIPSPACE1(s); 6727 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { 6728 if (*s == '{') 6729 PL_tokenbuf[0] = '%'; 6730 6731 /* Warn about @ where they meant $. */ 6732 if (*s == '[' || *s == '{') { 6733 if (ckWARN(WARN_SYNTAX)) { 6734 S_check_scalar_slice(aTHX_ s); 6735 } 6736 } 6737 } 6738 PL_expect = XOPERATOR; 6739 force_ident_maybe_lex('@'); 6740 TERM('@'); 6741 6742 case '/': /* may be division, defined-or, or pattern */ 6743 if (PL_expect == XTERMORDORDOR && s[1] == '/') { 6744 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6745 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) 6746 TOKEN(0); 6747 s += 2; 6748 AOPERATOR(DORDOR); 6749 } 6750 case '?': /* may either be conditional or pattern */ 6751 if (PL_expect == XOPERATOR) { 6752 char tmp = *s++; 6753 if(tmp == '?') { 6754 if (!PL_lex_allbrackets && 6755 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) { 6756 s--; 6757 TOKEN(0); 6758 } 6759 PL_lex_allbrackets++; 6760 OPERATOR('?'); 6761 } 6762 else { 6763 tmp = *s++; 6764 if(tmp == '/') { 6765 /* A // operator. */ 6766 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6767 (*s == '=' ? LEX_FAKEEOF_ASSIGN : 6768 LEX_FAKEEOF_LOGIC)) { 6769 s -= 2; 6770 TOKEN(0); 6771 } 6772 AOPERATOR(DORDOR); 6773 } 6774 else { 6775 s--; 6776 if (*s == '=' && !PL_lex_allbrackets && 6777 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6778 s--; 6779 TOKEN(0); 6780 } 6781 Mop(OP_DIVIDE); 6782 } 6783 } 6784 } 6785 else { 6786 /* Disable warning on "study /blah/" */ 6787 if (PL_oldoldbufptr == PL_last_uni 6788 && (*PL_last_uni != 's' || s - PL_last_uni < 5 6789 || memNE(PL_last_uni, "study", 5) 6790 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF) 6791 )) 6792 check_uni(); 6793 if (*s == '?') 6794 deprecate("?PATTERN? without explicit operator"); 6795 s = scan_pat(s,OP_MATCH); 6796 TERM(sublex_start()); 6797 } 6798 6799 case '.': 6800 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack 6801 #ifdef PERL_STRICT_CR 6802 && s[1] == '\n' 6803 #else 6804 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) 6805 #endif 6806 && (s == PL_linestart || s[-1] == '\n') ) 6807 { 6808 PL_expect = XSTATE; 6809 formbrack = 2; /* dot seen where arguments expected */ 6810 goto rightbracket; 6811 } 6812 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { 6813 s += 3; 6814 OPERATOR(YADAYADA); 6815 } 6816 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { 6817 char tmp = *s++; 6818 if (*s == tmp) { 6819 if (!PL_lex_allbrackets && 6820 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) { 6821 s--; 6822 TOKEN(0); 6823 } 6824 s++; 6825 if (*s == tmp) { 6826 s++; 6827 pl_yylval.ival = OPf_SPECIAL; 6828 } 6829 else 6830 pl_yylval.ival = 0; 6831 OPERATOR(DOTDOT); 6832 } 6833 if (*s == '=' && !PL_lex_allbrackets && 6834 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6835 s--; 6836 TOKEN(0); 6837 } 6838 Aop(OP_CONCAT); 6839 } 6840 /* FALL THROUGH */ 6841 case '0': case '1': case '2': case '3': case '4': 6842 case '5': case '6': case '7': case '8': case '9': 6843 s = scan_num(s, &pl_yylval); 6844 DEBUG_T( { printbuf("### Saw number in %s\n", s); } ); 6845 if (PL_expect == XOPERATOR) 6846 no_op("Number",s); 6847 TERM(THING); 6848 6849 case '\'': 6850 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 6851 if (!s) 6852 missingterm(NULL); 6853 COPLINE_SET_FROM_MULTI_END; 6854 DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); 6855 if (PL_expect == XOPERATOR) { 6856 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6857 return deprecate_commaless_var_list(); 6858 } 6859 else 6860 no_op("String",s); 6861 } 6862 pl_yylval.ival = OP_CONST; 6863 TERM(sublex_start()); 6864 6865 case '"': 6866 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 6867 DEBUG_T( { 6868 if (s) 6869 printbuf("### Saw string before %s\n", s); 6870 else 6871 PerlIO_printf(Perl_debug_log, 6872 "### Saw unterminated string\n"); 6873 } ); 6874 if (PL_expect == XOPERATOR) { 6875 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6876 return deprecate_commaless_var_list(); 6877 } 6878 else 6879 no_op("String",s); 6880 } 6881 if (!s) 6882 missingterm(NULL); 6883 pl_yylval.ival = OP_CONST; 6884 /* FIXME. I think that this can be const if char *d is replaced by 6885 more localised variables. */ 6886 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) { 6887 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) { 6888 pl_yylval.ival = OP_STRINGIFY; 6889 break; 6890 } 6891 } 6892 if (pl_yylval.ival == OP_CONST) 6893 COPLINE_SET_FROM_MULTI_END; 6894 TERM(sublex_start()); 6895 6896 case '`': 6897 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 6898 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } ); 6899 if (PL_expect == XOPERATOR) 6900 no_op("Backticks",s); 6901 if (!s) 6902 missingterm(NULL); 6903 pl_yylval.ival = OP_BACKTICK; 6904 TERM(sublex_start()); 6905 6906 case '\\': 6907 s++; 6908 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 6909 && isDIGIT(*s)) 6910 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", 6911 *s, *s); 6912 if (PL_expect == XOPERATOR) 6913 no_op("Backslash",s); 6914 OPERATOR(REFGEN); 6915 6916 case 'v': 6917 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { 6918 char *start = s + 2; 6919 while (isDIGIT(*start) || *start == '_') 6920 start++; 6921 if (*start == '.' && isDIGIT(start[1])) { 6922 s = scan_num(s, &pl_yylval); 6923 TERM(THING); 6924 } 6925 else if ((*start == ':' && start[1] == ':') 6926 || (PL_expect == XSTATE && *start == ':')) 6927 goto keylookup; 6928 else if (PL_expect == XSTATE) { 6929 d = start; 6930 while (d < PL_bufend && isSPACE(*d)) d++; 6931 if (*d == ':') goto keylookup; 6932 } 6933 /* avoid v123abc() or $h{v1}, allow C<print v10;> */ 6934 if (!isALPHA(*start) && (PL_expect == XTERM 6935 || PL_expect == XREF || PL_expect == XSTATE 6936 || PL_expect == XTERMORDORDOR)) { 6937 GV *const gv = gv_fetchpvn_flags(s, start - s, 6938 UTF ? SVf_UTF8 : 0, SVt_PVCV); 6939 if (!gv) { 6940 s = scan_num(s, &pl_yylval); 6941 TERM(THING); 6942 } 6943 } 6944 } 6945 goto keylookup; 6946 case 'x': 6947 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { 6948 s++; 6949 Mop(OP_REPEAT); 6950 } 6951 goto keylookup; 6952 6953 case '_': 6954 case 'a': case 'A': 6955 case 'b': case 'B': 6956 case 'c': case 'C': 6957 case 'd': case 'D': 6958 case 'e': case 'E': 6959 case 'f': case 'F': 6960 case 'g': case 'G': 6961 case 'h': case 'H': 6962 case 'i': case 'I': 6963 case 'j': case 'J': 6964 case 'k': case 'K': 6965 case 'l': case 'L': 6966 case 'm': case 'M': 6967 case 'n': case 'N': 6968 case 'o': case 'O': 6969 case 'p': case 'P': 6970 case 'q': case 'Q': 6971 case 'r': case 'R': 6972 case 's': case 'S': 6973 case 't': case 'T': 6974 case 'u': case 'U': 6975 case 'V': 6976 case 'w': case 'W': 6977 case 'X': 6978 case 'y': case 'Y': 6979 case 'z': case 'Z': 6980 6981 keylookup: { 6982 bool anydelim; 6983 bool lex; 6984 I32 tmp; 6985 SV *sv; 6986 CV *cv; 6987 PADOFFSET off; 6988 OP *rv2cv_op; 6989 6990 lex = FALSE; 6991 orig_keyword = 0; 6992 off = 0; 6993 sv = NULL; 6994 cv = NULL; 6995 gv = NULL; 6996 gvp = NULL; 6997 rv2cv_op = NULL; 6998 6999 PL_bufptr = s; 7000 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 7001 7002 /* Some keywords can be followed by any delimiter, including ':' */ 7003 anydelim = word_takes_any_delimeter(PL_tokenbuf, len); 7004 7005 /* x::* is just a word, unless x is "CORE" */ 7006 if (!anydelim && *s == ':' && s[1] == ':') { 7007 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE; 7008 goto just_a_word; 7009 } 7010 7011 d = s; 7012 while (d < PL_bufend && isSPACE(*d)) 7013 d++; /* no comments skipped here, or s### is misparsed */ 7014 7015 /* Is this a word before a => operator? */ 7016 if (*d == '=' && d[1] == '>') { 7017 fat_arrow: 7018 CLINE; 7019 pl_yylval.opval 7020 = (OP*)newSVOP(OP_CONST, 0, 7021 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 7022 pl_yylval.opval->op_private = OPpCONST_BARE; 7023 TERM(WORD); 7024 } 7025 7026 /* Check for plugged-in keyword */ 7027 { 7028 OP *o; 7029 int result; 7030 char *saved_bufptr = PL_bufptr; 7031 PL_bufptr = s; 7032 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o); 7033 s = PL_bufptr; 7034 if (result == KEYWORD_PLUGIN_DECLINE) { 7035 /* not a plugged-in keyword */ 7036 PL_bufptr = saved_bufptr; 7037 } else if (result == KEYWORD_PLUGIN_STMT) { 7038 pl_yylval.opval = o; 7039 CLINE; 7040 PL_expect = XSTATE; 7041 return REPORT(PLUGSTMT); 7042 } else if (result == KEYWORD_PLUGIN_EXPR) { 7043 pl_yylval.opval = o; 7044 CLINE; 7045 PL_expect = XOPERATOR; 7046 return REPORT(PLUGEXPR); 7047 } else { 7048 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", 7049 PL_tokenbuf); 7050 } 7051 } 7052 7053 /* Check for built-in keyword */ 7054 tmp = keyword(PL_tokenbuf, len, 0); 7055 7056 /* Is this a label? */ 7057 if (!anydelim && PL_expect == XSTATE 7058 && d < PL_bufend && *d == ':' && *(d + 1) != ':') { 7059 s = d + 1; 7060 pl_yylval.pval = savepvn(PL_tokenbuf, len+1); 7061 pl_yylval.pval[len] = '\0'; 7062 pl_yylval.pval[len+1] = UTF ? 1 : 0; 7063 CLINE; 7064 TOKEN(LABEL); 7065 } 7066 7067 /* Check for lexical sub */ 7068 if (PL_expect != XOPERATOR) { 7069 char tmpbuf[sizeof PL_tokenbuf + 1]; 7070 *tmpbuf = '&'; 7071 Copy(PL_tokenbuf, tmpbuf+1, len, char); 7072 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0); 7073 if (off != NOT_IN_PAD) { 7074 assert(off); /* we assume this is boolean-true below */ 7075 if (PAD_COMPNAME_FLAGS_isOUR(off)) { 7076 HV * const stash = PAD_COMPNAME_OURSTASH(off); 7077 HEK * const stashname = HvNAME_HEK(stash); 7078 sv = newSVhek(stashname); 7079 sv_catpvs(sv, "::"); 7080 sv_catpvn_flags(sv, PL_tokenbuf, len, 7081 (UTF ? SV_CATUTF8 : SV_CATBYTES)); 7082 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv), 7083 SVt_PVCV); 7084 off = 0; 7085 if (!gv) { 7086 sv_free(sv); 7087 sv = NULL; 7088 goto just_a_word; 7089 } 7090 } 7091 else { 7092 rv2cv_op = newOP(OP_PADANY, 0); 7093 rv2cv_op->op_targ = off; 7094 cv = find_lexical_cv(off); 7095 } 7096 lex = TRUE; 7097 goto just_a_word; 7098 } 7099 off = 0; 7100 } 7101 7102 if (tmp < 0) { /* second-class keyword? */ 7103 GV *ogv = NULL; /* override (winner) */ 7104 GV *hgv = NULL; /* hidden (loser) */ 7105 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) { 7106 CV *cv; 7107 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 7108 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL, 7109 SVt_PVCV)) && 7110 (cv = GvCVu(gv))) 7111 { 7112 if (GvIMPORTED_CV(gv)) 7113 ogv = gv; 7114 else if (! CvMETHOD(cv)) 7115 hgv = gv; 7116 } 7117 if (!ogv && 7118 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, 7119 len, FALSE)) && 7120 (gv = *gvp) && ( 7121 isGV_with_GP(gv) 7122 ? GvCVu(gv) && GvIMPORTED_CV(gv) 7123 : SvPCS_IMPORTED(gv) 7124 && (gv_init(gv, PL_globalstash, PL_tokenbuf, 7125 len, 0), 1) 7126 )) 7127 { 7128 ogv = gv; 7129 } 7130 } 7131 if (ogv) { 7132 orig_keyword = tmp; 7133 tmp = 0; /* overridden by import or by GLOBAL */ 7134 } 7135 else if (gv && !gvp 7136 && -tmp==KEY_lock /* XXX generalizable kludge */ 7137 && GvCVu(gv)) 7138 { 7139 tmp = 0; /* any sub overrides "weak" keyword */ 7140 } 7141 else { /* no override */ 7142 tmp = -tmp; 7143 if (tmp == KEY_dump) { 7144 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 7145 "dump() better written as CORE::dump()"); 7146 } 7147 gv = NULL; 7148 gvp = 0; 7149 if (hgv && tmp != KEY_x) /* never ambiguous */ 7150 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 7151 "Ambiguous call resolved as CORE::%s(), " 7152 "qualify as such or use &", 7153 GvENAME(hgv)); 7154 } 7155 } 7156 7157 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__ 7158 && (!anydelim || *s != '#')) { 7159 /* no override, and not s### either; skipspace is safe here 7160 * check for => on following line */ 7161 bool arrow; 7162 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); 7163 STRLEN soff = s - SvPVX(PL_linestr); 7164 s = skipspace_flags(s, LEX_NO_INCLINE); 7165 arrow = *s == '=' && s[1] == '>'; 7166 PL_bufptr = SvPVX(PL_linestr) + bufoff; 7167 s = SvPVX(PL_linestr) + soff; 7168 if (arrow) 7169 goto fat_arrow; 7170 } 7171 7172 reserved_word: 7173 switch (tmp) { 7174 7175 default: /* not a keyword */ 7176 /* Trade off - by using this evil construction we can pull the 7177 variable gv into the block labelled keylookup. If not, then 7178 we have to give it function scope so that the goto from the 7179 earlier ':' case doesn't bypass the initialisation. */ 7180 if (0) { 7181 just_a_word_zero_gv: 7182 sv = NULL; 7183 cv = NULL; 7184 gv = NULL; 7185 gvp = NULL; 7186 rv2cv_op = NULL; 7187 orig_keyword = 0; 7188 lex = 0; 7189 off = 0; 7190 } 7191 just_a_word: { 7192 int pkgname = 0; 7193 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); 7194 const char penultchar = 7195 lastchar && PL_bufptr - 2 >= PL_linestart 7196 ? PL_bufptr[-2] 7197 : 0; 7198 #ifdef PERL_MAD 7199 SV *nextPL_nextwhite = 0; 7200 #endif 7201 7202 7203 /* Get the rest if it looks like a package qualifier */ 7204 7205 if (*s == '\'' || (*s == ':' && s[1] == ':')) { 7206 STRLEN morelen; 7207 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, 7208 TRUE, &morelen); 7209 if (!morelen) 7210 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s", 7211 UTF8fARG(UTF, len, PL_tokenbuf), 7212 *s == '\'' ? "'" : "::"); 7213 len += morelen; 7214 pkgname = 1; 7215 } 7216 7217 if (PL_expect == XOPERATOR) { 7218 if (PL_bufptr == PL_linestart) { 7219 CopLINE_dec(PL_curcop); 7220 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 7221 CopLINE_inc(PL_curcop); 7222 } 7223 else 7224 no_op("Bareword",s); 7225 } 7226 7227 /* Look for a subroutine with this name in current package, 7228 unless this is a lexical sub, or name is "Foo::", 7229 in which case Foo is a bareword 7230 (and a package name). */ 7231 7232 if (len > 2 && !PL_madskills && 7233 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') 7234 { 7235 if (ckWARN(WARN_BAREWORD) 7236 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) 7237 Perl_warner(aTHX_ packWARN(WARN_BAREWORD), 7238 "Bareword \"%"UTF8f"\" refers to nonexistent package", 7239 UTF8fARG(UTF, len, PL_tokenbuf)); 7240 len -= 2; 7241 PL_tokenbuf[len] = '\0'; 7242 gv = NULL; 7243 gvp = 0; 7244 } 7245 else { 7246 if (!lex && !gv) { 7247 /* Mustn't actually add anything to a symbol table. 7248 But also don't want to "initialise" any placeholder 7249 constants that might already be there into full 7250 blown PVGVs with attached PVCV. */ 7251 gv = gv_fetchpvn_flags(PL_tokenbuf, len, 7252 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ), 7253 SVt_PVCV); 7254 } 7255 len = 0; 7256 } 7257 7258 /* if we saw a global override before, get the right name */ 7259 7260 if (!sv) 7261 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, 7262 len ? len : strlen(PL_tokenbuf)); 7263 if (gvp) { 7264 SV * const tmp_sv = sv; 7265 sv = newSVpvs("CORE::GLOBAL::"); 7266 sv_catsv(sv, tmp_sv); 7267 SvREFCNT_dec(tmp_sv); 7268 } 7269 7270 #ifdef PERL_MAD 7271 if (PL_madskills && !PL_thistoken) { 7272 char *start = SvPVX(PL_linestr) + PL_realtokenstart; 7273 PL_thistoken = newSVpvn(start,s - start); 7274 PL_realtokenstart = s - SvPVX(PL_linestr); 7275 } 7276 #endif 7277 7278 /* Presume this is going to be a bareword of some sort. */ 7279 CLINE; 7280 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 7281 pl_yylval.opval->op_private = OPpCONST_BARE; 7282 7283 /* And if "Foo::", then that's what it certainly is. */ 7284 if (len) 7285 goto safe_bareword; 7286 7287 if (!off) 7288 { 7289 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv)); 7290 const_op->op_private = OPpCONST_BARE; 7291 rv2cv_op = newCVREF(0, const_op); 7292 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0); 7293 } 7294 7295 /* See if it's the indirect object for a list operator. */ 7296 7297 if (PL_oldoldbufptr && 7298 PL_oldoldbufptr < PL_bufptr && 7299 (PL_oldoldbufptr == PL_last_lop 7300 || PL_oldoldbufptr == PL_last_uni) && 7301 /* NO SKIPSPACE BEFORE HERE! */ 7302 (PL_expect == XREF || 7303 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF)) 7304 { 7305 bool immediate_paren = *s == '('; 7306 7307 /* (Now we can afford to cross potential line boundary.) */ 7308 s = SKIPSPACE2(s,nextPL_nextwhite); 7309 #ifdef PERL_MAD 7310 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */ 7311 #endif 7312 7313 /* Two barewords in a row may indicate method call. */ 7314 7315 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && 7316 (tmp = intuit_method(s, gv, cv))) { 7317 op_free(rv2cv_op); 7318 if (tmp == METHOD && !PL_lex_allbrackets && 7319 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7320 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7321 return REPORT(tmp); 7322 } 7323 7324 /* If not a declared subroutine, it's an indirect object. */ 7325 /* (But it's an indir obj regardless for sort.) */ 7326 /* Also, if "_" follows a filetest operator, it's a bareword */ 7327 7328 if ( 7329 ( !immediate_paren && (PL_last_lop_op == OP_SORT || 7330 (!cv && 7331 (PL_last_lop_op != OP_MAPSTART && 7332 PL_last_lop_op != OP_GREPSTART)))) 7333 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' 7334 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP)) 7335 ) 7336 { 7337 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; 7338 goto bareword; 7339 } 7340 } 7341 7342 PL_expect = XOPERATOR; 7343 #ifdef PERL_MAD 7344 if (isSPACE(*s)) 7345 s = SKIPSPACE2(s,nextPL_nextwhite); 7346 PL_nextwhite = nextPL_nextwhite; 7347 #else 7348 s = skipspace(s); 7349 #endif 7350 7351 /* Is this a word before a => operator? */ 7352 if (*s == '=' && s[1] == '>' && !pkgname) { 7353 op_free(rv2cv_op); 7354 CLINE; 7355 /* This is our own scalar, created a few lines above, 7356 so this is safe. */ 7357 SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv); 7358 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf); 7359 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) 7360 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv); 7361 SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv); 7362 TERM(WORD); 7363 } 7364 7365 /* If followed by a paren, it's certainly a subroutine. */ 7366 if (*s == '(') { 7367 CLINE; 7368 if (cv) { 7369 d = s + 1; 7370 while (SPACE_OR_TAB(*d)) 7371 d++; 7372 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) { 7373 s = d + 1; 7374 goto its_constant; 7375 } 7376 } 7377 #ifdef PERL_MAD 7378 if (PL_madskills) { 7379 PL_nextwhite = PL_thiswhite; 7380 PL_thiswhite = 0; 7381 } 7382 start_force(PL_curforce); 7383 #endif 7384 NEXTVAL_NEXTTOKE.opval = 7385 off ? rv2cv_op : pl_yylval.opval; 7386 PL_expect = XOPERATOR; 7387 #ifdef PERL_MAD 7388 if (PL_madskills) { 7389 PL_nextwhite = nextPL_nextwhite; 7390 curmad('X', PL_thistoken); 7391 PL_thistoken = newSVpvs(""); 7392 } 7393 #endif 7394 if (off) 7395 op_free(pl_yylval.opval), force_next(PRIVATEREF); 7396 else op_free(rv2cv_op), force_next(WORD); 7397 pl_yylval.ival = 0; 7398 TOKEN('&'); 7399 } 7400 7401 /* If followed by var or block, call it a method (unless sub) */ 7402 7403 if ((*s == '$' || *s == '{') && !cv) { 7404 op_free(rv2cv_op); 7405 PL_last_lop = PL_oldbufptr; 7406 PL_last_lop_op = OP_METHOD; 7407 if (!PL_lex_allbrackets && 7408 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7409 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7410 PREBLOCK(METHOD); 7411 } 7412 7413 /* If followed by a bareword, see if it looks like indir obj. */ 7414 7415 if (!orig_keyword 7416 && (isIDFIRST_lazy_if(s,UTF) || *s == '$') 7417 && (tmp = intuit_method(s, gv, cv))) { 7418 op_free(rv2cv_op); 7419 if (tmp == METHOD && !PL_lex_allbrackets && 7420 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7421 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7422 return REPORT(tmp); 7423 } 7424 7425 /* Not a method, so call it a subroutine (if defined) */ 7426 7427 if (cv) { 7428 if (lastchar == '-' && penultchar != '-') { 7429 const STRLEN l = len ? len : strlen(PL_tokenbuf); 7430 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7431 "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()", 7432 UTF8fARG(UTF, l, PL_tokenbuf), 7433 UTF8fARG(UTF, l, PL_tokenbuf)); 7434 } 7435 /* Check for a constant sub */ 7436 if ((sv = cv_const_sv_or_av(cv))) { 7437 its_constant: 7438 op_free(rv2cv_op); 7439 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); 7440 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); 7441 if (SvTYPE(sv) == SVt_PVAV) 7442 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS, 7443 pl_yylval.opval); 7444 else { 7445 pl_yylval.opval->op_private = 0; 7446 pl_yylval.opval->op_folded = 1; 7447 pl_yylval.opval->op_flags |= OPf_SPECIAL; 7448 } 7449 TOKEN(WORD); 7450 } 7451 7452 op_free(pl_yylval.opval); 7453 pl_yylval.opval = 7454 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; 7455 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 7456 PL_last_lop = PL_oldbufptr; 7457 PL_last_lop_op = OP_ENTERSUB; 7458 /* Is there a prototype? */ 7459 if ( 7460 #ifdef PERL_MAD 7461 cv && 7462 #endif 7463 SvPOK(cv)) 7464 { 7465 STRLEN protolen = CvPROTOLEN(cv); 7466 const char *proto = CvPROTO(cv); 7467 bool optional; 7468 proto = S_strip_spaces(aTHX_ proto, &protolen); 7469 if (!protolen) 7470 TERM(FUNC0SUB); 7471 if ((optional = *proto == ';')) 7472 do 7473 proto++; 7474 while (*proto == ';'); 7475 if ( 7476 ( 7477 ( 7478 *proto == '$' || *proto == '_' 7479 || *proto == '*' || *proto == '+' 7480 ) 7481 && proto[1] == '\0' 7482 ) 7483 || ( 7484 *proto == '\\' && proto[1] && proto[2] == '\0' 7485 ) 7486 ) 7487 UNIPROTO(UNIOPSUB,optional); 7488 if (*proto == '\\' && proto[1] == '[') { 7489 const char *p = proto + 2; 7490 while(*p && *p != ']') 7491 ++p; 7492 if(*p == ']' && !p[1]) 7493 UNIPROTO(UNIOPSUB,optional); 7494 } 7495 if (*proto == '&' && *s == '{') { 7496 if (PL_curstash) 7497 sv_setpvs(PL_subname, "__ANON__"); 7498 else 7499 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 7500 if (!PL_lex_allbrackets && 7501 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7502 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7503 PREBLOCK(LSTOPSUB); 7504 } 7505 } 7506 #ifdef PERL_MAD 7507 { 7508 if (PL_madskills) { 7509 PL_nextwhite = PL_thiswhite; 7510 PL_thiswhite = 0; 7511 } 7512 start_force(PL_curforce); 7513 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7514 PL_expect = XTERM; 7515 if (PL_madskills) { 7516 PL_nextwhite = nextPL_nextwhite; 7517 curmad('X', PL_thistoken); 7518 PL_thistoken = newSVpvs(""); 7519 } 7520 force_next(off ? PRIVATEREF : WORD); 7521 if (!PL_lex_allbrackets && 7522 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7523 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7524 TOKEN(NOAMP); 7525 } 7526 } 7527 7528 /* Guess harder when madskills require "best effort". */ 7529 if (PL_madskills && (!gv || !GvCVu(gv))) { 7530 int probable_sub = 0; 7531 if (strchr("\"'`$@%0123456789!*+{[<", *s)) 7532 probable_sub = 1; 7533 else if (isALPHA(*s)) { 7534 char tmpbuf[1024]; 7535 STRLEN tmplen; 7536 d = s; 7537 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen); 7538 if (!keyword(tmpbuf, tmplen, 0)) 7539 probable_sub = 1; 7540 else { 7541 while (d < PL_bufend && isSPACE(*d)) 7542 d++; 7543 if (*d == '=' && d[1] == '>') 7544 probable_sub = 1; 7545 } 7546 } 7547 if (probable_sub) { 7548 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), 7549 SVt_PVCV); 7550 op_free(pl_yylval.opval); 7551 pl_yylval.opval = 7552 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; 7553 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 7554 PL_last_lop = PL_oldbufptr; 7555 PL_last_lop_op = OP_ENTERSUB; 7556 PL_nextwhite = PL_thiswhite; 7557 PL_thiswhite = 0; 7558 start_force(PL_curforce); 7559 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7560 PL_expect = XTERM; 7561 PL_nextwhite = nextPL_nextwhite; 7562 curmad('X', PL_thistoken); 7563 PL_thistoken = newSVpvs(""); 7564 force_next(off ? PRIVATEREF : WORD); 7565 if (!PL_lex_allbrackets && 7566 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7567 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7568 TOKEN(NOAMP); 7569 } 7570 #else 7571 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7572 PL_expect = XTERM; 7573 force_next(off ? PRIVATEREF : WORD); 7574 if (!PL_lex_allbrackets && 7575 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7576 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7577 TOKEN(NOAMP); 7578 #endif 7579 } 7580 7581 /* Call it a bare word */ 7582 7583 if (PL_hints & HINT_STRICT_SUBS) 7584 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7585 else { 7586 bareword: 7587 /* after "print" and similar functions (corresponding to 7588 * "F? L" in opcode.pl), whatever wasn't already parsed as 7589 * a filehandle should be subject to "strict subs". 7590 * Likewise for the optional indirect-object argument to system 7591 * or exec, which can't be a bareword */ 7592 if ((PL_last_lop_op == OP_PRINT 7593 || PL_last_lop_op == OP_PRTF 7594 || PL_last_lop_op == OP_SAY 7595 || PL_last_lop_op == OP_SYSTEM 7596 || PL_last_lop_op == OP_EXEC) 7597 && (PL_hints & HINT_STRICT_SUBS)) 7598 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7599 if (lastchar != '-') { 7600 if (ckWARN(WARN_RESERVED)) { 7601 d = PL_tokenbuf; 7602 while (isLOWER(*d)) 7603 d++; 7604 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) 7605 { 7606 /* PL_warn_reserved is constant */ 7607 GCC_DIAG_IGNORE(-Wformat-nonliteral); 7608 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, 7609 PL_tokenbuf); 7610 GCC_DIAG_RESTORE; 7611 } 7612 } 7613 } 7614 } 7615 op_free(rv2cv_op); 7616 7617 safe_bareword: 7618 if ((lastchar == '*' || lastchar == '%' || lastchar == '&') 7619 && saw_infix_sigil) { 7620 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7621 "Operator or semicolon missing before %c%"UTF8f, 7622 lastchar, 7623 UTF8fARG(UTF, strlen(PL_tokenbuf), 7624 PL_tokenbuf)); 7625 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7626 "Ambiguous use of %c resolved as operator %c", 7627 lastchar, lastchar); 7628 } 7629 TOKEN(WORD); 7630 } 7631 7632 case KEY___FILE__: 7633 FUN0OP( 7634 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) 7635 ); 7636 7637 case KEY___LINE__: 7638 FUN0OP( 7639 (OP*)newSVOP(OP_CONST, 0, 7640 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop))) 7641 ); 7642 7643 case KEY___PACKAGE__: 7644 FUN0OP( 7645 (OP*)newSVOP(OP_CONST, 0, 7646 (PL_curstash 7647 ? newSVhek(HvNAME_HEK(PL_curstash)) 7648 : &PL_sv_undef)) 7649 ); 7650 7651 case KEY___DATA__: 7652 case KEY___END__: { 7653 GV *gv; 7654 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) { 7655 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash 7656 ? PL_curstash 7657 : PL_defstash; 7658 gv = (GV *)*hv_fetchs(stash, "DATA", 1); 7659 if (!isGV(gv)) 7660 gv_init(gv,stash,"DATA",4,0); 7661 GvMULTI_on(gv); 7662 if (!GvIO(gv)) 7663 GvIOp(gv) = newIO(); 7664 IoIFP(GvIOp(gv)) = PL_rsfp; 7665 #if defined(HAS_FCNTL) && defined(F_SETFD) 7666 { 7667 const int fd = PerlIO_fileno(PL_rsfp); 7668 fcntl(fd,F_SETFD,fd >= 3); 7669 } 7670 #endif 7671 /* Mark this internal pseudo-handle as clean */ 7672 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; 7673 if ((PerlIO*)PL_rsfp == PerlIO_stdin()) 7674 IoTYPE(GvIOp(gv)) = IoTYPE_STD; 7675 else 7676 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY; 7677 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS) 7678 /* if the script was opened in binmode, we need to revert 7679 * it to text mode for compatibility; but only iff it has CRs 7680 * XXX this is a questionable hack at best. */ 7681 if (PL_bufend-PL_bufptr > 2 7682 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r') 7683 { 7684 Off_t loc = 0; 7685 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) { 7686 loc = PerlIO_tell(PL_rsfp); 7687 (void)PerlIO_seek(PL_rsfp, 0L, 0); 7688 } 7689 #ifdef NETWARE 7690 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) { 7691 #else 7692 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) { 7693 #endif /* NETWARE */ 7694 if (loc > 0) 7695 PerlIO_seek(PL_rsfp, loc, 0); 7696 } 7697 } 7698 #endif 7699 #ifdef PERLIO_LAYERS 7700 if (!IN_BYTES) { 7701 if (UTF) 7702 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); 7703 else if (PL_encoding) { 7704 SV *name; 7705 dSP; 7706 ENTER; 7707 SAVETMPS; 7708 PUSHMARK(sp); 7709 XPUSHs(PL_encoding); 7710 PUTBACK; 7711 call_method("name", G_SCALAR); 7712 SPAGAIN; 7713 name = POPs; 7714 PUTBACK; 7715 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, 7716 Perl_form(aTHX_ ":encoding(%"SVf")", 7717 SVfARG(name))); 7718 FREETMPS; 7719 LEAVE; 7720 } 7721 } 7722 #endif 7723 #ifdef PERL_MAD 7724 if (PL_madskills) { 7725 if (PL_realtokenstart >= 0) { 7726 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart; 7727 if (!PL_endwhite) 7728 PL_endwhite = newSVpvs(""); 7729 sv_catsv(PL_endwhite, PL_thiswhite); 7730 PL_thiswhite = 0; 7731 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart); 7732 PL_realtokenstart = -1; 7733 } 7734 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite))) 7735 != NULL) ; 7736 } 7737 #endif 7738 PL_rsfp = NULL; 7739 } 7740 goto fake_eof; 7741 } 7742 7743 case KEY___SUB__: 7744 FUN0OP(newPVOP(OP_RUNCV,0,NULL)); 7745 7746 case KEY_AUTOLOAD: 7747 case KEY_DESTROY: 7748 case KEY_BEGIN: 7749 case KEY_UNITCHECK: 7750 case KEY_CHECK: 7751 case KEY_INIT: 7752 case KEY_END: 7753 if (PL_expect == XSTATE) { 7754 s = PL_bufptr; 7755 goto really_sub; 7756 } 7757 goto just_a_word; 7758 7759 case_KEY_CORE: 7760 { 7761 STRLEN olen = len; 7762 d = s; 7763 s += 2; 7764 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 7765 if ((*s == ':' && s[1] == ':') 7766 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) 7767 { 7768 s = d; 7769 len = olen; 7770 Copy(PL_bufptr, PL_tokenbuf, olen, char); 7771 goto just_a_word; 7772 } 7773 if (!tmp) 7774 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword", 7775 UTF8fARG(UTF, len, PL_tokenbuf)); 7776 if (tmp < 0) 7777 tmp = -tmp; 7778 else if (tmp == KEY_require || tmp == KEY_do 7779 || tmp == KEY_glob) 7780 /* that's a way to remember we saw "CORE::" */ 7781 orig_keyword = tmp; 7782 goto reserved_word; 7783 } 7784 7785 case KEY_abs: 7786 UNI(OP_ABS); 7787 7788 case KEY_alarm: 7789 UNI(OP_ALARM); 7790 7791 case KEY_accept: 7792 LOP(OP_ACCEPT,XTERM); 7793 7794 case KEY_and: 7795 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 7796 return REPORT(0); 7797 OPERATOR(ANDOP); 7798 7799 case KEY_atan2: 7800 LOP(OP_ATAN2,XTERM); 7801 7802 case KEY_bind: 7803 LOP(OP_BIND,XTERM); 7804 7805 case KEY_binmode: 7806 LOP(OP_BINMODE,XTERM); 7807 7808 case KEY_bless: 7809 LOP(OP_BLESS,XTERM); 7810 7811 case KEY_break: 7812 FUN0(OP_BREAK); 7813 7814 case KEY_chop: 7815 UNI(OP_CHOP); 7816 7817 case KEY_continue: 7818 /* We have to disambiguate the two senses of 7819 "continue". If the next token is a '{' then 7820 treat it as the start of a continue block; 7821 otherwise treat it as a control operator. 7822 */ 7823 s = skipspace(s); 7824 if (*s == '{') 7825 PREBLOCK(CONTINUE); 7826 else 7827 FUN0(OP_CONTINUE); 7828 7829 case KEY_chdir: 7830 /* may use HOME */ 7831 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV); 7832 UNI(OP_CHDIR); 7833 7834 case KEY_close: 7835 UNI(OP_CLOSE); 7836 7837 case KEY_closedir: 7838 UNI(OP_CLOSEDIR); 7839 7840 case KEY_cmp: 7841 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7842 return REPORT(0); 7843 Eop(OP_SCMP); 7844 7845 case KEY_caller: 7846 UNI(OP_CALLER); 7847 7848 case KEY_crypt: 7849 #ifdef FCRYPT 7850 if (!PL_cryptseen) { 7851 PL_cryptseen = TRUE; 7852 init_des(); 7853 } 7854 #endif 7855 LOP(OP_CRYPT,XTERM); 7856 7857 case KEY_chmod: 7858 LOP(OP_CHMOD,XTERM); 7859 7860 case KEY_chown: 7861 LOP(OP_CHOWN,XTERM); 7862 7863 case KEY_connect: 7864 LOP(OP_CONNECT,XTERM); 7865 7866 case KEY_chr: 7867 UNI(OP_CHR); 7868 7869 case KEY_cos: 7870 UNI(OP_COS); 7871 7872 case KEY_chroot: 7873 UNI(OP_CHROOT); 7874 7875 case KEY_default: 7876 PREBLOCK(DEFAULT); 7877 7878 case KEY_do: 7879 s = SKIPSPACE1(s); 7880 if (*s == '{') 7881 PRETERMBLOCK(DO); 7882 if (*s != '\'') { 7883 *PL_tokenbuf = '&'; 7884 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 7885 1, &len); 7886 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE")) 7887 && !keyword(PL_tokenbuf + 1, len, 0)) { 7888 d = SKIPSPACE1(d); 7889 if (*d == '(') { 7890 force_ident_maybe_lex('&'); 7891 s = d; 7892 } 7893 } 7894 } 7895 if (orig_keyword == KEY_do) { 7896 orig_keyword = 0; 7897 pl_yylval.ival = 1; 7898 } 7899 else 7900 pl_yylval.ival = 0; 7901 OPERATOR(DO); 7902 7903 case KEY_die: 7904 PL_hints |= HINT_BLOCK_SCOPE; 7905 LOP(OP_DIE,XTERM); 7906 7907 case KEY_defined: 7908 UNI(OP_DEFINED); 7909 7910 case KEY_delete: 7911 UNI(OP_DELETE); 7912 7913 case KEY_dbmopen: 7914 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"), 7915 STR_WITH_LEN("NDBM_File::"), 7916 STR_WITH_LEN("DB_File::"), 7917 STR_WITH_LEN("GDBM_File::"), 7918 STR_WITH_LEN("SDBM_File::"), 7919 STR_WITH_LEN("ODBM_File::"), 7920 NULL); 7921 LOP(OP_DBMOPEN,XTERM); 7922 7923 case KEY_dbmclose: 7924 UNI(OP_DBMCLOSE); 7925 7926 case KEY_dump: 7927 PL_expect = XOPERATOR; 7928 s = force_word(s,WORD,TRUE,FALSE); 7929 LOOPX(OP_DUMP); 7930 7931 case KEY_else: 7932 PREBLOCK(ELSE); 7933 7934 case KEY_elsif: 7935 pl_yylval.ival = CopLINE(PL_curcop); 7936 OPERATOR(ELSIF); 7937 7938 case KEY_eq: 7939 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7940 return REPORT(0); 7941 Eop(OP_SEQ); 7942 7943 case KEY_exists: 7944 UNI(OP_EXISTS); 7945 7946 case KEY_exit: 7947 if (PL_madskills) 7948 UNI(OP_INT); 7949 UNI(OP_EXIT); 7950 7951 case KEY_eval: 7952 s = SKIPSPACE1(s); 7953 if (*s == '{') { /* block eval */ 7954 PL_expect = XTERMBLOCK; 7955 UNIBRACK(OP_ENTERTRY); 7956 } 7957 else { /* string eval */ 7958 PL_expect = XTERM; 7959 UNIBRACK(OP_ENTEREVAL); 7960 } 7961 7962 case KEY_evalbytes: 7963 PL_expect = XTERM; 7964 UNIBRACK(-OP_ENTEREVAL); 7965 7966 case KEY_eof: 7967 UNI(OP_EOF); 7968 7969 case KEY_exp: 7970 UNI(OP_EXP); 7971 7972 case KEY_each: 7973 UNI(OP_EACH); 7974 7975 case KEY_exec: 7976 LOP(OP_EXEC,XREF); 7977 7978 case KEY_endhostent: 7979 FUN0(OP_EHOSTENT); 7980 7981 case KEY_endnetent: 7982 FUN0(OP_ENETENT); 7983 7984 case KEY_endservent: 7985 FUN0(OP_ESERVENT); 7986 7987 case KEY_endprotoent: 7988 FUN0(OP_EPROTOENT); 7989 7990 case KEY_endpwent: 7991 FUN0(OP_EPWENT); 7992 7993 case KEY_endgrent: 7994 FUN0(OP_EGRENT); 7995 7996 case KEY_for: 7997 case KEY_foreach: 7998 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 7999 return REPORT(0); 8000 pl_yylval.ival = CopLINE(PL_curcop); 8001 s = SKIPSPACE1(s); 8002 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) { 8003 char *p = s; 8004 #ifdef PERL_MAD 8005 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */ 8006 #endif 8007 8008 if ((PL_bufend - p) >= 3 && 8009 strnEQ(p, "my", 2) && isSPACE(*(p + 2))) 8010 p += 2; 8011 else if ((PL_bufend - p) >= 4 && 8012 strnEQ(p, "our", 3) && isSPACE(*(p + 3))) 8013 p += 3; 8014 p = PEEKSPACE(p); 8015 /* skip optional package name, as in "for my abc $x (..)" */ 8016 if (isIDFIRST_lazy_if(p,UTF)) { 8017 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 8018 p = PEEKSPACE(p); 8019 } 8020 if (*p != '$') 8021 Perl_croak(aTHX_ "Missing $ on loop variable"); 8022 #ifdef PERL_MAD 8023 s = SvPVX(PL_linestr) + soff; 8024 #endif 8025 } 8026 OPERATOR(FOR); 8027 8028 case KEY_formline: 8029 LOP(OP_FORMLINE,XTERM); 8030 8031 case KEY_fork: 8032 FUN0(OP_FORK); 8033 8034 case KEY_fc: 8035 UNI(OP_FC); 8036 8037 case KEY_fcntl: 8038 LOP(OP_FCNTL,XTERM); 8039 8040 case KEY_fileno: 8041 UNI(OP_FILENO); 8042 8043 case KEY_flock: 8044 LOP(OP_FLOCK,XTERM); 8045 8046 case KEY_gt: 8047 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8048 return REPORT(0); 8049 Rop(OP_SGT); 8050 8051 case KEY_ge: 8052 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8053 return REPORT(0); 8054 Rop(OP_SGE); 8055 8056 case KEY_grep: 8057 LOP(OP_GREPSTART, XREF); 8058 8059 case KEY_goto: 8060 PL_expect = XOPERATOR; 8061 s = force_word(s,WORD,TRUE,FALSE); 8062 LOOPX(OP_GOTO); 8063 8064 case KEY_gmtime: 8065 UNI(OP_GMTIME); 8066 8067 case KEY_getc: 8068 UNIDOR(OP_GETC); 8069 8070 case KEY_getppid: 8071 FUN0(OP_GETPPID); 8072 8073 case KEY_getpgrp: 8074 UNI(OP_GETPGRP); 8075 8076 case KEY_getpriority: 8077 LOP(OP_GETPRIORITY,XTERM); 8078 8079 case KEY_getprotobyname: 8080 UNI(OP_GPBYNAME); 8081 8082 case KEY_getprotobynumber: 8083 LOP(OP_GPBYNUMBER,XTERM); 8084 8085 case KEY_getprotoent: 8086 FUN0(OP_GPROTOENT); 8087 8088 case KEY_getpwent: 8089 FUN0(OP_GPWENT); 8090 8091 case KEY_getpwnam: 8092 UNI(OP_GPWNAM); 8093 8094 case KEY_getpwuid: 8095 UNI(OP_GPWUID); 8096 8097 case KEY_getpeername: 8098 UNI(OP_GETPEERNAME); 8099 8100 case KEY_gethostbyname: 8101 UNI(OP_GHBYNAME); 8102 8103 case KEY_gethostbyaddr: 8104 LOP(OP_GHBYADDR,XTERM); 8105 8106 case KEY_gethostent: 8107 FUN0(OP_GHOSTENT); 8108 8109 case KEY_getnetbyname: 8110 UNI(OP_GNBYNAME); 8111 8112 case KEY_getnetbyaddr: 8113 LOP(OP_GNBYADDR,XTERM); 8114 8115 case KEY_getnetent: 8116 FUN0(OP_GNETENT); 8117 8118 case KEY_getservbyname: 8119 LOP(OP_GSBYNAME,XTERM); 8120 8121 case KEY_getservbyport: 8122 LOP(OP_GSBYPORT,XTERM); 8123 8124 case KEY_getservent: 8125 FUN0(OP_GSERVENT); 8126 8127 case KEY_getsockname: 8128 UNI(OP_GETSOCKNAME); 8129 8130 case KEY_getsockopt: 8131 LOP(OP_GSOCKOPT,XTERM); 8132 8133 case KEY_getgrent: 8134 FUN0(OP_GGRENT); 8135 8136 case KEY_getgrnam: 8137 UNI(OP_GGRNAM); 8138 8139 case KEY_getgrgid: 8140 UNI(OP_GGRGID); 8141 8142 case KEY_getlogin: 8143 FUN0(OP_GETLOGIN); 8144 8145 case KEY_given: 8146 pl_yylval.ival = CopLINE(PL_curcop); 8147 Perl_ck_warner_d(aTHX_ 8148 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 8149 "given is experimental"); 8150 OPERATOR(GIVEN); 8151 8152 case KEY_glob: 8153 LOP( 8154 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, 8155 XTERM 8156 ); 8157 8158 case KEY_hex: 8159 UNI(OP_HEX); 8160 8161 case KEY_if: 8162 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8163 return REPORT(0); 8164 pl_yylval.ival = CopLINE(PL_curcop); 8165 OPERATOR(IF); 8166 8167 case KEY_index: 8168 LOP(OP_INDEX,XTERM); 8169 8170 case KEY_int: 8171 UNI(OP_INT); 8172 8173 case KEY_ioctl: 8174 LOP(OP_IOCTL,XTERM); 8175 8176 case KEY_join: 8177 LOP(OP_JOIN,XTERM); 8178 8179 case KEY_keys: 8180 UNI(OP_KEYS); 8181 8182 case KEY_kill: 8183 LOP(OP_KILL,XTERM); 8184 8185 case KEY_last: 8186 PL_expect = XOPERATOR; 8187 s = force_word(s,WORD,TRUE,FALSE); 8188 LOOPX(OP_LAST); 8189 8190 case KEY_lc: 8191 UNI(OP_LC); 8192 8193 case KEY_lcfirst: 8194 UNI(OP_LCFIRST); 8195 8196 case KEY_local: 8197 pl_yylval.ival = 0; 8198 OPERATOR(LOCAL); 8199 8200 case KEY_length: 8201 UNI(OP_LENGTH); 8202 8203 case KEY_lt: 8204 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8205 return REPORT(0); 8206 Rop(OP_SLT); 8207 8208 case KEY_le: 8209 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8210 return REPORT(0); 8211 Rop(OP_SLE); 8212 8213 case KEY_localtime: 8214 UNI(OP_LOCALTIME); 8215 8216 case KEY_log: 8217 UNI(OP_LOG); 8218 8219 case KEY_link: 8220 LOP(OP_LINK,XTERM); 8221 8222 case KEY_listen: 8223 LOP(OP_LISTEN,XTERM); 8224 8225 case KEY_lock: 8226 UNI(OP_LOCK); 8227 8228 case KEY_lstat: 8229 UNI(OP_LSTAT); 8230 8231 case KEY_m: 8232 s = scan_pat(s,OP_MATCH); 8233 TERM(sublex_start()); 8234 8235 case KEY_map: 8236 LOP(OP_MAPSTART, XREF); 8237 8238 case KEY_mkdir: 8239 LOP(OP_MKDIR,XTERM); 8240 8241 case KEY_msgctl: 8242 LOP(OP_MSGCTL,XTERM); 8243 8244 case KEY_msgget: 8245 LOP(OP_MSGGET,XTERM); 8246 8247 case KEY_msgrcv: 8248 LOP(OP_MSGRCV,XTERM); 8249 8250 case KEY_msgsnd: 8251 LOP(OP_MSGSND,XTERM); 8252 8253 case KEY_our: 8254 case KEY_my: 8255 case KEY_state: 8256 PL_in_my = (U16)tmp; 8257 s = SKIPSPACE1(s); 8258 if (isIDFIRST_lazy_if(s,UTF)) { 8259 #ifdef PERL_MAD 8260 char* start = s; 8261 #endif 8262 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 8263 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3)) 8264 { 8265 if (!FEATURE_LEXSUBS_IS_ENABLED) 8266 Perl_croak(aTHX_ 8267 "Experimental \"%s\" subs not enabled", 8268 tmp == KEY_my ? "my" : 8269 tmp == KEY_state ? "state" : "our"); 8270 Perl_ck_warner_d(aTHX_ 8271 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS), 8272 "The lexical_subs feature is experimental"); 8273 goto really_sub; 8274 } 8275 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); 8276 if (!PL_in_my_stash) { 8277 char tmpbuf[1024]; 8278 PL_bufptr = s; 8279 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); 8280 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0); 8281 } 8282 #ifdef PERL_MAD 8283 if (PL_madskills) { /* just add type to declarator token */ 8284 sv_catsv(PL_thistoken, PL_nextwhite); 8285 PL_nextwhite = 0; 8286 sv_catpvn(PL_thistoken, start, s - start); 8287 } 8288 #endif 8289 } 8290 pl_yylval.ival = 1; 8291 OPERATOR(MY); 8292 8293 case KEY_next: 8294 PL_expect = XOPERATOR; 8295 s = force_word(s,WORD,TRUE,FALSE); 8296 LOOPX(OP_NEXT); 8297 8298 case KEY_ne: 8299 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8300 return REPORT(0); 8301 Eop(OP_SNE); 8302 8303 case KEY_no: 8304 s = tokenize_use(0, s); 8305 TERM(USE); 8306 8307 case KEY_not: 8308 if (*s == '(' || (s = SKIPSPACE1(s), *s == '(')) 8309 FUN1(OP_NOT); 8310 else { 8311 if (!PL_lex_allbrackets && 8312 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 8313 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 8314 OPERATOR(NOTOP); 8315 } 8316 8317 case KEY_open: 8318 s = SKIPSPACE1(s); 8319 if (isIDFIRST_lazy_if(s,UTF)) { 8320 const char *t; 8321 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, 8322 &len); 8323 for (t=d; isSPACE(*t);) 8324 t++; 8325 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) 8326 /* [perl #16184] */ 8327 && !(t[0] == '=' && t[1] == '>') 8328 && !(t[0] == ':' && t[1] == ':') 8329 && !keyword(s, d-s, 0) 8330 ) { 8331 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), 8332 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")", 8333 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s)); 8334 } 8335 } 8336 LOP(OP_OPEN,XTERM); 8337 8338 case KEY_or: 8339 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8340 return REPORT(0); 8341 pl_yylval.ival = OP_OR; 8342 OPERATOR(OROP); 8343 8344 case KEY_ord: 8345 UNI(OP_ORD); 8346 8347 case KEY_oct: 8348 UNI(OP_OCT); 8349 8350 case KEY_opendir: 8351 LOP(OP_OPEN_DIR,XTERM); 8352 8353 case KEY_print: 8354 checkcomma(s,PL_tokenbuf,"filehandle"); 8355 LOP(OP_PRINT,XREF); 8356 8357 case KEY_printf: 8358 checkcomma(s,PL_tokenbuf,"filehandle"); 8359 LOP(OP_PRTF,XREF); 8360 8361 case KEY_prototype: 8362 UNI(OP_PROTOTYPE); 8363 8364 case KEY_push: 8365 LOP(OP_PUSH,XTERM); 8366 8367 case KEY_pop: 8368 UNIDOR(OP_POP); 8369 8370 case KEY_pos: 8371 UNIDOR(OP_POS); 8372 8373 case KEY_pack: 8374 LOP(OP_PACK,XTERM); 8375 8376 case KEY_package: 8377 s = force_word(s,WORD,FALSE,TRUE); 8378 s = SKIPSPACE1(s); 8379 s = force_strict_version(s); 8380 PL_lex_expect = XBLOCK; 8381 OPERATOR(PACKAGE); 8382 8383 case KEY_pipe: 8384 LOP(OP_PIPE_OP,XTERM); 8385 8386 case KEY_q: 8387 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 8388 if (!s) 8389 missingterm(NULL); 8390 COPLINE_SET_FROM_MULTI_END; 8391 pl_yylval.ival = OP_CONST; 8392 TERM(sublex_start()); 8393 8394 case KEY_quotemeta: 8395 UNI(OP_QUOTEMETA); 8396 8397 case KEY_qw: { 8398 OP *words = NULL; 8399 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 8400 if (!s) 8401 missingterm(NULL); 8402 COPLINE_SET_FROM_MULTI_END; 8403 PL_expect = XOPERATOR; 8404 if (SvCUR(PL_lex_stuff)) { 8405 int warned_comma = !ckWARN(WARN_QW); 8406 int warned_comment = warned_comma; 8407 d = SvPV_force(PL_lex_stuff, len); 8408 while (len) { 8409 for (; isSPACE(*d) && len; --len, ++d) 8410 /**/; 8411 if (len) { 8412 SV *sv; 8413 const char *b = d; 8414 if (!warned_comma || !warned_comment) { 8415 for (; !isSPACE(*d) && len; --len, ++d) { 8416 if (!warned_comma && *d == ',') { 8417 Perl_warner(aTHX_ packWARN(WARN_QW), 8418 "Possible attempt to separate words with commas"); 8419 ++warned_comma; 8420 } 8421 else if (!warned_comment && *d == '#') { 8422 Perl_warner(aTHX_ packWARN(WARN_QW), 8423 "Possible attempt to put comments in qw() list"); 8424 ++warned_comment; 8425 } 8426 } 8427 } 8428 else { 8429 for (; !isSPACE(*d) && len; --len, ++d) 8430 /**/; 8431 } 8432 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff)); 8433 words = op_append_elem(OP_LIST, words, 8434 newSVOP(OP_CONST, 0, tokeq(sv))); 8435 } 8436 } 8437 } 8438 if (!words) 8439 words = newNULLLIST(); 8440 if (PL_lex_stuff) { 8441 SvREFCNT_dec(PL_lex_stuff); 8442 PL_lex_stuff = NULL; 8443 } 8444 PL_expect = XOPERATOR; 8445 pl_yylval.opval = sawparens(words); 8446 TOKEN(QWLIST); 8447 } 8448 8449 case KEY_qq: 8450 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 8451 if (!s) 8452 missingterm(NULL); 8453 pl_yylval.ival = OP_STRINGIFY; 8454 if (SvIVX(PL_lex_stuff) == '\'') 8455 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */ 8456 TERM(sublex_start()); 8457 8458 case KEY_qr: 8459 s = scan_pat(s,OP_QR); 8460 TERM(sublex_start()); 8461 8462 case KEY_qx: 8463 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 8464 if (!s) 8465 missingterm(NULL); 8466 pl_yylval.ival = OP_BACKTICK; 8467 TERM(sublex_start()); 8468 8469 case KEY_return: 8470 OLDLOP(OP_RETURN); 8471 8472 case KEY_require: 8473 s = SKIPSPACE1(s); 8474 PL_expect = XOPERATOR; 8475 if (isDIGIT(*s)) { 8476 s = force_version(s, FALSE); 8477 } 8478 else if (*s != 'v' || !isDIGIT(s[1]) 8479 || (s = force_version(s, TRUE), *s == 'v')) 8480 { 8481 *PL_tokenbuf = '\0'; 8482 s = force_word(s,WORD,TRUE,TRUE); 8483 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF)) 8484 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), 8485 GV_ADD | (UTF ? SVf_UTF8 : 0)); 8486 else if (*s == '<') 8487 yyerror("<> should be quotes"); 8488 } 8489 if (orig_keyword == KEY_require) { 8490 orig_keyword = 0; 8491 pl_yylval.ival = 1; 8492 } 8493 else 8494 pl_yylval.ival = 0; 8495 PL_expect = XTERM; 8496 PL_bufptr = s; 8497 PL_last_uni = PL_oldbufptr; 8498 PL_last_lop_op = OP_REQUIRE; 8499 s = skipspace(s); 8500 return REPORT( (int)REQUIRE ); 8501 8502 case KEY_reset: 8503 UNI(OP_RESET); 8504 8505 case KEY_redo: 8506 PL_expect = XOPERATOR; 8507 s = force_word(s,WORD,TRUE,FALSE); 8508 LOOPX(OP_REDO); 8509 8510 case KEY_rename: 8511 LOP(OP_RENAME,XTERM); 8512 8513 case KEY_rand: 8514 UNI(OP_RAND); 8515 8516 case KEY_rmdir: 8517 UNI(OP_RMDIR); 8518 8519 case KEY_rindex: 8520 LOP(OP_RINDEX,XTERM); 8521 8522 case KEY_read: 8523 LOP(OP_READ,XTERM); 8524 8525 case KEY_readdir: 8526 UNI(OP_READDIR); 8527 8528 case KEY_readline: 8529 UNIDOR(OP_READLINE); 8530 8531 case KEY_readpipe: 8532 UNIDOR(OP_BACKTICK); 8533 8534 case KEY_rewinddir: 8535 UNI(OP_REWINDDIR); 8536 8537 case KEY_recv: 8538 LOP(OP_RECV,XTERM); 8539 8540 case KEY_reverse: 8541 LOP(OP_REVERSE,XTERM); 8542 8543 case KEY_readlink: 8544 UNIDOR(OP_READLINK); 8545 8546 case KEY_ref: 8547 UNI(OP_REF); 8548 8549 case KEY_s: 8550 s = scan_subst(s); 8551 if (pl_yylval.opval) 8552 TERM(sublex_start()); 8553 else 8554 TOKEN(1); /* force error */ 8555 8556 case KEY_say: 8557 checkcomma(s,PL_tokenbuf,"filehandle"); 8558 LOP(OP_SAY,XREF); 8559 8560 case KEY_chomp: 8561 UNI(OP_CHOMP); 8562 8563 case KEY_scalar: 8564 UNI(OP_SCALAR); 8565 8566 case KEY_select: 8567 LOP(OP_SELECT,XTERM); 8568 8569 case KEY_seek: 8570 LOP(OP_SEEK,XTERM); 8571 8572 case KEY_semctl: 8573 LOP(OP_SEMCTL,XTERM); 8574 8575 case KEY_semget: 8576 LOP(OP_SEMGET,XTERM); 8577 8578 case KEY_semop: 8579 LOP(OP_SEMOP,XTERM); 8580 8581 case KEY_send: 8582 LOP(OP_SEND,XTERM); 8583 8584 case KEY_setpgrp: 8585 LOP(OP_SETPGRP,XTERM); 8586 8587 case KEY_setpriority: 8588 LOP(OP_SETPRIORITY,XTERM); 8589 8590 case KEY_sethostent: 8591 UNI(OP_SHOSTENT); 8592 8593 case KEY_setnetent: 8594 UNI(OP_SNETENT); 8595 8596 case KEY_setservent: 8597 UNI(OP_SSERVENT); 8598 8599 case KEY_setprotoent: 8600 UNI(OP_SPROTOENT); 8601 8602 case KEY_setpwent: 8603 FUN0(OP_SPWENT); 8604 8605 case KEY_setgrent: 8606 FUN0(OP_SGRENT); 8607 8608 case KEY_seekdir: 8609 LOP(OP_SEEKDIR,XTERM); 8610 8611 case KEY_setsockopt: 8612 LOP(OP_SSOCKOPT,XTERM); 8613 8614 case KEY_shift: 8615 UNIDOR(OP_SHIFT); 8616 8617 case KEY_shmctl: 8618 LOP(OP_SHMCTL,XTERM); 8619 8620 case KEY_shmget: 8621 LOP(OP_SHMGET,XTERM); 8622 8623 case KEY_shmread: 8624 LOP(OP_SHMREAD,XTERM); 8625 8626 case KEY_shmwrite: 8627 LOP(OP_SHMWRITE,XTERM); 8628 8629 case KEY_shutdown: 8630 LOP(OP_SHUTDOWN,XTERM); 8631 8632 case KEY_sin: 8633 UNI(OP_SIN); 8634 8635 case KEY_sleep: 8636 UNI(OP_SLEEP); 8637 8638 case KEY_socket: 8639 LOP(OP_SOCKET,XTERM); 8640 8641 case KEY_socketpair: 8642 LOP(OP_SOCKPAIR,XTERM); 8643 8644 case KEY_sort: 8645 checkcomma(s,PL_tokenbuf,"subroutine name"); 8646 s = SKIPSPACE1(s); 8647 PL_expect = XTERM; 8648 s = force_word(s,WORD,TRUE,TRUE); 8649 LOP(OP_SORT,XREF); 8650 8651 case KEY_split: 8652 LOP(OP_SPLIT,XTERM); 8653 8654 case KEY_sprintf: 8655 LOP(OP_SPRINTF,XTERM); 8656 8657 case KEY_splice: 8658 LOP(OP_SPLICE,XTERM); 8659 8660 case KEY_sqrt: 8661 UNI(OP_SQRT); 8662 8663 case KEY_srand: 8664 UNI(OP_SRAND); 8665 8666 case KEY_stat: 8667 UNI(OP_STAT); 8668 8669 case KEY_study: 8670 UNI(OP_STUDY); 8671 8672 case KEY_substr: 8673 LOP(OP_SUBSTR,XTERM); 8674 8675 case KEY_format: 8676 case KEY_sub: 8677 really_sub: 8678 { 8679 char * const tmpbuf = PL_tokenbuf + 1; 8680 expectation attrful; 8681 bool have_name, have_proto; 8682 const int key = tmp; 8683 #ifndef PERL_MAD 8684 SV *format_name = NULL; 8685 #endif 8686 8687 #ifdef PERL_MAD 8688 SV *tmpwhite = 0; 8689 8690 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart; 8691 SV *subtoken = PL_madskills 8692 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr)) 8693 : NULL; 8694 PL_thistoken = 0; 8695 8696 d = s; 8697 s = SKIPSPACE2(s,tmpwhite); 8698 #else 8699 d = s; 8700 s = skipspace(s); 8701 #endif 8702 8703 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' || 8704 (*s == ':' && s[1] == ':')) 8705 { 8706 #ifdef PERL_MAD 8707 SV *nametoke = NULL; 8708 #endif 8709 8710 PL_expect = XBLOCK; 8711 attrful = XATTRBLOCK; 8712 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, 8713 &len); 8714 #ifdef PERL_MAD 8715 if (PL_madskills) 8716 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr)); 8717 #else 8718 if (key == KEY_format) 8719 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s); 8720 #endif 8721 *PL_tokenbuf = '&'; 8722 if (memchr(tmpbuf, ':', len) || key != KEY_sub 8723 || pad_findmy_pvn( 8724 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0 8725 ) != NOT_IN_PAD) 8726 sv_setpvn(PL_subname, tmpbuf, len); 8727 else { 8728 sv_setsv(PL_subname,PL_curstname); 8729 sv_catpvs(PL_subname,"::"); 8730 sv_catpvn(PL_subname,tmpbuf,len); 8731 } 8732 if (SvUTF8(PL_linestr)) 8733 SvUTF8_on(PL_subname); 8734 have_name = TRUE; 8735 8736 8737 #ifdef PERL_MAD 8738 start_force(0); 8739 CURMAD('X', nametoke); 8740 CURMAD('_', tmpwhite); 8741 force_ident_maybe_lex('&'); 8742 8743 s = SKIPSPACE2(d,tmpwhite); 8744 #else 8745 s = skipspace(d); 8746 #endif 8747 } 8748 else { 8749 if (key == KEY_my || key == KEY_our || key==KEY_state) 8750 { 8751 *d = '\0'; 8752 /* diag_listed_as: Missing name in "%s sub" */ 8753 Perl_croak(aTHX_ 8754 "Missing name in \"%s\"", PL_bufptr); 8755 } 8756 PL_expect = XTERMBLOCK; 8757 attrful = XATTRTERM; 8758 sv_setpvs(PL_subname,"?"); 8759 have_name = FALSE; 8760 } 8761 8762 if (key == KEY_format) { 8763 #ifdef PERL_MAD 8764 PL_thistoken = subtoken; 8765 s = d; 8766 #else 8767 if (format_name) { 8768 start_force(PL_curforce); 8769 NEXTVAL_NEXTTOKE.opval 8770 = (OP*)newSVOP(OP_CONST,0, format_name); 8771 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 8772 force_next(WORD); 8773 } 8774 #endif 8775 PREBLOCK(FORMAT); 8776 } 8777 8778 /* Look for a prototype */ 8779 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) { 8780 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 8781 COPLINE_SET_FROM_MULTI_END; 8782 if (!s) 8783 Perl_croak(aTHX_ "Prototype not terminated"); 8784 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO)); 8785 have_proto = TRUE; 8786 8787 #ifdef PERL_MAD 8788 start_force(0); 8789 CURMAD('q', PL_thisopen); 8790 CURMAD('_', tmpwhite); 8791 CURMAD('=', PL_thisstuff); 8792 CURMAD('Q', PL_thisclose); 8793 NEXTVAL_NEXTTOKE.opval = 8794 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff); 8795 PL_lex_stuff = NULL; 8796 force_next(THING); 8797 8798 s = SKIPSPACE2(s,tmpwhite); 8799 #else 8800 s = skipspace(s); 8801 #endif 8802 } 8803 else 8804 have_proto = FALSE; 8805 8806 if (*s == ':' && s[1] != ':') 8807 PL_expect = attrful; 8808 else if ((*s != '{' && *s != '(') && key == KEY_sub) { 8809 if (!have_name) 8810 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine"); 8811 else if (*s != ';' && *s != '}') 8812 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname)); 8813 } 8814 8815 #ifdef PERL_MAD 8816 start_force(0); 8817 if (tmpwhite) { 8818 if (PL_madskills) 8819 curmad('^', newSVpvs("")); 8820 CURMAD('_', tmpwhite); 8821 } 8822 force_next(0); 8823 8824 PL_thistoken = subtoken; 8825 PERL_UNUSED_VAR(have_proto); 8826 #else 8827 if (have_proto) { 8828 NEXTVAL_NEXTTOKE.opval = 8829 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff); 8830 PL_lex_stuff = NULL; 8831 force_next(THING); 8832 } 8833 #endif 8834 if (!have_name) { 8835 if (PL_curstash) 8836 sv_setpvs(PL_subname, "__ANON__"); 8837 else 8838 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 8839 TOKEN(ANONSUB); 8840 } 8841 #ifndef PERL_MAD 8842 force_ident_maybe_lex('&'); 8843 #endif 8844 TOKEN(SUB); 8845 } 8846 8847 case KEY_system: 8848 LOP(OP_SYSTEM,XREF); 8849 8850 case KEY_symlink: 8851 LOP(OP_SYMLINK,XTERM); 8852 8853 case KEY_syscall: 8854 LOP(OP_SYSCALL,XTERM); 8855 8856 case KEY_sysopen: 8857 LOP(OP_SYSOPEN,XTERM); 8858 8859 case KEY_sysseek: 8860 LOP(OP_SYSSEEK,XTERM); 8861 8862 case KEY_sysread: 8863 LOP(OP_SYSREAD,XTERM); 8864 8865 case KEY_syswrite: 8866 LOP(OP_SYSWRITE,XTERM); 8867 8868 case KEY_tr: 8869 case KEY_y: 8870 s = scan_trans(s); 8871 TERM(sublex_start()); 8872 8873 case KEY_tell: 8874 UNI(OP_TELL); 8875 8876 case KEY_telldir: 8877 UNI(OP_TELLDIR); 8878 8879 case KEY_tie: 8880 LOP(OP_TIE,XTERM); 8881 8882 case KEY_tied: 8883 UNI(OP_TIED); 8884 8885 case KEY_time: 8886 FUN0(OP_TIME); 8887 8888 case KEY_times: 8889 FUN0(OP_TMS); 8890 8891 case KEY_truncate: 8892 LOP(OP_TRUNCATE,XTERM); 8893 8894 case KEY_uc: 8895 UNI(OP_UC); 8896 8897 case KEY_ucfirst: 8898 UNI(OP_UCFIRST); 8899 8900 case KEY_untie: 8901 UNI(OP_UNTIE); 8902 8903 case KEY_until: 8904 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8905 return REPORT(0); 8906 pl_yylval.ival = CopLINE(PL_curcop); 8907 OPERATOR(UNTIL); 8908 8909 case KEY_unless: 8910 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8911 return REPORT(0); 8912 pl_yylval.ival = CopLINE(PL_curcop); 8913 OPERATOR(UNLESS); 8914 8915 case KEY_unlink: 8916 LOP(OP_UNLINK,XTERM); 8917 8918 case KEY_undef: 8919 UNIDOR(OP_UNDEF); 8920 8921 case KEY_unpack: 8922 LOP(OP_UNPACK,XTERM); 8923 8924 case KEY_utime: 8925 LOP(OP_UTIME,XTERM); 8926 8927 case KEY_umask: 8928 UNIDOR(OP_UMASK); 8929 8930 case KEY_unshift: 8931 LOP(OP_UNSHIFT,XTERM); 8932 8933 case KEY_use: 8934 s = tokenize_use(1, s); 8935 OPERATOR(USE); 8936 8937 case KEY_values: 8938 UNI(OP_VALUES); 8939 8940 case KEY_vec: 8941 LOP(OP_VEC,XTERM); 8942 8943 case KEY_when: 8944 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8945 return REPORT(0); 8946 pl_yylval.ival = CopLINE(PL_curcop); 8947 Perl_ck_warner_d(aTHX_ 8948 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 8949 "when is experimental"); 8950 OPERATOR(WHEN); 8951 8952 case KEY_while: 8953 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8954 return REPORT(0); 8955 pl_yylval.ival = CopLINE(PL_curcop); 8956 OPERATOR(WHILE); 8957 8958 case KEY_warn: 8959 PL_hints |= HINT_BLOCK_SCOPE; 8960 LOP(OP_WARN,XTERM); 8961 8962 case KEY_wait: 8963 FUN0(OP_WAIT); 8964 8965 case KEY_waitpid: 8966 LOP(OP_WAITPID,XTERM); 8967 8968 case KEY_wantarray: 8969 FUN0(OP_WANTARRAY); 8970 8971 case KEY_write: 8972 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and 8973 * we use the same number on EBCDIC */ 8974 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV); 8975 UNI(OP_ENTERWRITE); 8976 8977 case KEY_x: 8978 if (PL_expect == XOPERATOR) { 8979 if (*s == '=' && !PL_lex_allbrackets && 8980 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 8981 return REPORT(0); 8982 Mop(OP_REPEAT); 8983 } 8984 check_uni(); 8985 goto just_a_word; 8986 8987 case KEY_xor: 8988 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8989 return REPORT(0); 8990 pl_yylval.ival = OP_XOR; 8991 OPERATOR(OROP); 8992 } 8993 }} 8994 } 8995 8996 /* 8997 S_pending_ident 8998 8999 Looks up an identifier in the pad or in a package 9000 9001 Returns: 9002 PRIVATEREF if this is a lexical name. 9003 WORD if this belongs to a package. 9004 9005 Structure: 9006 if we're in a my declaration 9007 croak if they tried to say my($foo::bar) 9008 build the ops for a my() declaration 9009 if it's an access to a my() variable 9010 build ops for access to a my() variable 9011 if in a dq string, and they've said @foo and we can't find @foo 9012 warn 9013 build ops for a bareword 9014 */ 9015 9016 static int 9017 S_pending_ident(pTHX) 9018 { 9019 dVAR; 9020 PADOFFSET tmp = 0; 9021 const char pit = (char)pl_yylval.ival; 9022 const STRLEN tokenbuf_len = strlen(PL_tokenbuf); 9023 /* All routes through this function want to know if there is a colon. */ 9024 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len); 9025 9026 DEBUG_T({ PerlIO_printf(Perl_debug_log, 9027 "### Pending identifier '%s'\n", PL_tokenbuf); }); 9028 9029 /* if we're in a my(), we can't allow dynamics here. 9030 $foo'bar has already been turned into $foo::bar, so 9031 just check for colons. 9032 9033 if it's a legal name, the OP is a PADANY. 9034 */ 9035 if (PL_in_my) { 9036 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ 9037 if (has_colon) 9038 yyerror_pv(Perl_form(aTHX_ "No package name allowed for " 9039 "variable %s in \"our\"", 9040 PL_tokenbuf), UTF ? SVf_UTF8 : 0); 9041 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0); 9042 } 9043 else { 9044 if (has_colon) { 9045 /* PL_no_myglob is constant */ 9046 GCC_DIAG_IGNORE(-Wformat-nonliteral); 9047 yyerror_pv(Perl_form(aTHX_ PL_no_myglob, 9048 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf), 9049 UTF ? SVf_UTF8 : 0); 9050 GCC_DIAG_RESTORE; 9051 } 9052 9053 pl_yylval.opval = newOP(OP_PADANY, 0); 9054 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 9055 UTF ? SVf_UTF8 : 0); 9056 return PRIVATEREF; 9057 } 9058 } 9059 9060 /* 9061 build the ops for accesses to a my() variable. 9062 */ 9063 9064 if (!has_colon) { 9065 if (!PL_in_my) 9066 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len, 9067 UTF ? SVf_UTF8 : 0); 9068 if (tmp != NOT_IN_PAD) { 9069 /* might be an "our" variable" */ 9070 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 9071 /* build ops for a bareword */ 9072 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 9073 HEK * const stashname = HvNAME_HEK(stash); 9074 SV * const sym = newSVhek(stashname); 9075 sv_catpvs(sym, "::"); 9076 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES )); 9077 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym); 9078 pl_yylval.opval->op_private = OPpCONST_ENTERED; 9079 if (pit != '&') 9080 gv_fetchsv(sym, 9081 (PL_in_eval 9082 ? (GV_ADDMULTI | GV_ADDINEVAL) 9083 : GV_ADDMULTI 9084 ), 9085 ((PL_tokenbuf[0] == '$') ? SVt_PV 9086 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 9087 : SVt_PVHV)); 9088 return WORD; 9089 } 9090 9091 pl_yylval.opval = newOP(OP_PADANY, 0); 9092 pl_yylval.opval->op_targ = tmp; 9093 return PRIVATEREF; 9094 } 9095 } 9096 9097 /* 9098 Whine if they've said @foo in a doublequoted string, 9099 and @foo isn't a variable we can find in the symbol 9100 table. 9101 */ 9102 if (ckWARN(WARN_AMBIGUOUS) && 9103 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { 9104 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 9105 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV); 9106 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) 9107 /* DO NOT warn for @- and @+ */ 9108 && !( PL_tokenbuf[2] == '\0' && 9109 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' )) 9110 ) 9111 { 9112 /* Downgraded from fatal to warning 20000522 mjd */ 9113 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9114 "Possible unintended interpolation of %"UTF8f 9115 " in string", 9116 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf)); 9117 } 9118 } 9119 9120 /* build ops for a bareword */ 9121 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, 9122 newSVpvn_flags(PL_tokenbuf + 1, 9123 tokenbuf_len - 1, 9124 UTF ? SVf_UTF8 : 0 )); 9125 pl_yylval.opval->op_private = OPpCONST_ENTERED; 9126 if (pit != '&') 9127 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1, 9128 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD) 9129 | ( UTF ? SVf_UTF8 : 0 ), 9130 ((PL_tokenbuf[0] == '$') ? SVt_PV 9131 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 9132 : SVt_PVHV)); 9133 return WORD; 9134 } 9135 9136 STATIC void 9137 S_checkcomma(pTHX_ const char *s, const char *name, const char *what) 9138 { 9139 dVAR; 9140 9141 PERL_ARGS_ASSERT_CHECKCOMMA; 9142 9143 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ 9144 if (ckWARN(WARN_SYNTAX)) { 9145 int level = 1; 9146 const char *w; 9147 for (w = s+2; *w && level; w++) { 9148 if (*w == '(') 9149 ++level; 9150 else if (*w == ')') 9151 --level; 9152 } 9153 while (isSPACE(*w)) 9154 ++w; 9155 /* the list of chars below is for end of statements or 9156 * block / parens, boolean operators (&&, ||, //) and branch 9157 * constructs (or, and, if, until, unless, while, err, for). 9158 * Not a very solid hack... */ 9159 if (!*w || !strchr(";&/|})]oaiuwef!=", *w)) 9160 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 9161 "%s (...) interpreted as function",name); 9162 } 9163 } 9164 while (s < PL_bufend && isSPACE(*s)) 9165 s++; 9166 if (*s == '(') 9167 s++; 9168 while (s < PL_bufend && isSPACE(*s)) 9169 s++; 9170 if (isIDFIRST_lazy_if(s,UTF)) { 9171 const char * const w = s; 9172 s += UTF ? UTF8SKIP(s) : 1; 9173 while (isWORDCHAR_lazy_if(s,UTF)) 9174 s += UTF ? UTF8SKIP(s) : 1; 9175 while (s < PL_bufend && isSPACE(*s)) 9176 s++; 9177 if (*s == ',') { 9178 GV* gv; 9179 if (keyword(w, s - w, 0)) 9180 return; 9181 9182 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); 9183 if (gv && GvCVu(gv)) 9184 return; 9185 Perl_croak(aTHX_ "No comma allowed after %s", what); 9186 } 9187 } 9188 } 9189 9190 /* S_new_constant(): do any overload::constant lookup. 9191 9192 Either returns sv, or mortalizes/frees sv and returns a new SV*. 9193 Best used as sv=new_constant(..., sv, ...). 9194 If s, pv are NULL, calls subroutine with one argument, 9195 and <type> is used with error messages only. 9196 <type> is assumed to be well formed UTF-8 */ 9197 9198 STATIC SV * 9199 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, 9200 SV *sv, SV *pv, const char *type, STRLEN typelen) 9201 { 9202 dVAR; dSP; 9203 HV * table = GvHV(PL_hintgv); /* ^H */ 9204 SV *res; 9205 SV *errsv = NULL; 9206 SV **cvp; 9207 SV *cv, *typesv; 9208 const char *why1 = "", *why2 = "", *why3 = ""; 9209 9210 PERL_ARGS_ASSERT_NEW_CONSTANT; 9211 /* We assume that this is true: */ 9212 if (*key == 'c') { assert (strEQ(key, "charnames")); } 9213 assert(type || s); 9214 9215 /* charnames doesn't work well if there have been errors found */ 9216 if (PL_error_count > 0 && *key == 'c') 9217 { 9218 SvREFCNT_dec_NN(sv); 9219 return &PL_sv_undef; 9220 } 9221 9222 sv_2mortal(sv); /* Parent created it permanently */ 9223 if (!table 9224 || ! (PL_hints & HINT_LOCALIZE_HH) 9225 || ! (cvp = hv_fetch(table, key, keylen, FALSE)) 9226 || ! SvOK(*cvp)) 9227 { 9228 char *msg; 9229 9230 /* Here haven't found what we're looking for. If it is charnames, 9231 * perhaps it needs to be loaded. Try doing that before giving up */ 9232 if (*key == 'c') { 9233 Perl_load_module(aTHX_ 9234 0, 9235 newSVpvs("_charnames"), 9236 /* version parameter; no need to specify it, as if 9237 * we get too early a version, will fail anyway, 9238 * not being able to find '_charnames' */ 9239 NULL, 9240 newSVpvs(":full"), 9241 newSVpvs(":short"), 9242 NULL); 9243 assert(sp == PL_stack_sp); 9244 table = GvHV(PL_hintgv); 9245 if (table 9246 && (PL_hints & HINT_LOCALIZE_HH) 9247 && (cvp = hv_fetch(table, key, keylen, FALSE)) 9248 && SvOK(*cvp)) 9249 { 9250 goto now_ok; 9251 } 9252 } 9253 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { 9254 msg = Perl_form(aTHX_ 9255 "Constant(%.*s) unknown", 9256 (int)(type ? typelen : len), 9257 (type ? type: s)); 9258 } 9259 else { 9260 why1 = "$^H{"; 9261 why2 = key; 9262 why3 = "} is not defined"; 9263 report: 9264 if (*key == 'c') { 9265 msg = Perl_form(aTHX_ 9266 /* The +3 is for '\N{'; -4 for that, plus '}' */ 9267 "Unknown charname '%.*s'", (int)typelen - 4, type + 3 9268 ); 9269 } 9270 else { 9271 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s", 9272 (int)(type ? typelen : len), 9273 (type ? type: s), why1, why2, why3); 9274 } 9275 } 9276 yyerror_pv(msg, UTF ? SVf_UTF8 : 0); 9277 return SvREFCNT_inc_simple_NN(sv); 9278 } 9279 now_ok: 9280 cv = *cvp; 9281 if (!pv && s) 9282 pv = newSVpvn_flags(s, len, SVs_TEMP); 9283 if (type && pv) 9284 typesv = newSVpvn_flags(type, typelen, SVs_TEMP); 9285 else 9286 typesv = &PL_sv_undef; 9287 9288 PUSHSTACKi(PERLSI_OVERLOAD); 9289 ENTER ; 9290 SAVETMPS; 9291 9292 PUSHMARK(SP) ; 9293 EXTEND(sp, 3); 9294 if (pv) 9295 PUSHs(pv); 9296 PUSHs(sv); 9297 if (pv) 9298 PUSHs(typesv); 9299 PUTBACK; 9300 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); 9301 9302 SPAGAIN ; 9303 9304 /* Check the eval first */ 9305 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) { 9306 STRLEN errlen; 9307 const char * errstr; 9308 sv_catpvs(errsv, "Propagated"); 9309 errstr = SvPV_const(errsv, errlen); 9310 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */ 9311 (void)POPs; 9312 res = SvREFCNT_inc_simple_NN(sv); 9313 } 9314 else { 9315 res = POPs; 9316 SvREFCNT_inc_simple_void_NN(res); 9317 } 9318 9319 PUTBACK ; 9320 FREETMPS ; 9321 LEAVE ; 9322 POPSTACK; 9323 9324 if (!SvOK(res)) { 9325 why1 = "Call to &{$^H{"; 9326 why2 = key; 9327 why3 = "}} did not return a defined value"; 9328 sv = res; 9329 (void)sv_2mortal(sv); 9330 goto report; 9331 } 9332 9333 return res; 9334 } 9335 9336 PERL_STATIC_INLINE void 9337 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) { 9338 dVAR; 9339 PERL_ARGS_ASSERT_PARSE_IDENT; 9340 9341 for (;;) { 9342 if (*d >= e) 9343 Perl_croak(aTHX_ "%s", ident_too_long); 9344 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) { 9345 /* The UTF-8 case must come first, otherwise things 9346 * like c\N{COMBINING TILDE} would start failing, as the 9347 * isWORDCHAR_A case below would gobble the 'c' up. 9348 */ 9349 9350 char *t = *s + UTF8SKIP(*s); 9351 while (isIDCONT_utf8((U8*)t)) 9352 t += UTF8SKIP(t); 9353 if (*d + (t - *s) > e) 9354 Perl_croak(aTHX_ "%s", ident_too_long); 9355 Copy(*s, *d, t - *s, char); 9356 *d += t - *s; 9357 *s = t; 9358 } 9359 else if ( isWORDCHAR_A(**s) ) { 9360 do { 9361 *(*d)++ = *(*s)++; 9362 } while (isWORDCHAR_A(**s) && *d < e); 9363 } 9364 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) { 9365 *(*d)++ = ':'; 9366 *(*d)++ = ':'; 9367 (*s)++; 9368 } 9369 else if (allow_package && **s == ':' && (*s)[1] == ':' 9370 /* Disallow things like Foo::$bar. For the curious, this is 9371 * the code path that triggers the "Bad name after" warning 9372 * when looking for barewords. 9373 */ 9374 && (*s)[2] != '$') { 9375 *(*d)++ = *(*s)++; 9376 *(*d)++ = *(*s)++; 9377 } 9378 else 9379 break; 9380 } 9381 return; 9382 } 9383 9384 /* Returns a NUL terminated string, with the length of the string written to 9385 *slp 9386 */ 9387 STATIC char * 9388 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) 9389 { 9390 dVAR; 9391 char *d = dest; 9392 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 9393 bool is_utf8 = cBOOL(UTF); 9394 9395 PERL_ARGS_ASSERT_SCAN_WORD; 9396 9397 parse_ident(&s, &d, e, allow_package, is_utf8); 9398 *d = '\0'; 9399 *slp = d - dest; 9400 return s; 9401 } 9402 9403 STATIC char * 9404 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) 9405 { 9406 dVAR; 9407 I32 herelines = PL_parser->herelines; 9408 SSize_t bracket = -1; 9409 char funny = *s++; 9410 char *d = dest; 9411 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 9412 bool is_utf8 = cBOOL(UTF); 9413 I32 orig_copline = 0, tmp_copline = 0; 9414 9415 PERL_ARGS_ASSERT_SCAN_IDENT; 9416 9417 if (isSPACE(*s)) 9418 s = PEEKSPACE(s); 9419 if (isDIGIT(*s)) { 9420 while (isDIGIT(*s)) { 9421 if (d >= e) 9422 Perl_croak(aTHX_ "%s", ident_too_long); 9423 *d++ = *s++; 9424 } 9425 } 9426 else { 9427 parse_ident(&s, &d, e, 1, is_utf8); 9428 } 9429 *d = '\0'; 9430 d = dest; 9431 if (*d) { 9432 /* Either a digit variable, or parse_ident() found an identifier 9433 (anything valid as a bareword), so job done and return. */ 9434 if (PL_lex_state != LEX_NORMAL) 9435 PL_lex_state = LEX_INTERPENDMAYBE; 9436 return s; 9437 } 9438 if (*s == '$' && s[1] && 9439 (isIDFIRST_lazy_if(s+1,is_utf8) 9440 || isDIGIT_A((U8)s[1]) 9441 || s[1] == '$' 9442 || s[1] == '{' 9443 || strnEQ(s+1,"::",2)) ) 9444 { 9445 /* Dereferencing a value in a scalar variable. 9446 The alternatives are different syntaxes for a scalar variable. 9447 Using ' as a leading package separator isn't allowed. :: is. */ 9448 return s; 9449 } 9450 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */ 9451 if (*s == '{') { 9452 bracket = s - SvPVX(PL_linestr); 9453 s++; 9454 orig_copline = CopLINE(PL_curcop); 9455 if (s < PL_bufend && isSPACE(*s)) { 9456 s = PEEKSPACE(s); 9457 } 9458 } 9459 9460 /* Is the byte 'd' a legal single character identifier name? 'u' is true 9461 * iff Unicode semantics are to be used. The legal ones are any of: 9462 * a) ASCII digits 9463 * b) ASCII punctuation 9464 * c) When not under Unicode rules, any upper Latin1 character 9465 * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally 9466 * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus 9467 * the \s ones. */ 9468 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \ 9469 || isDIGIT_A((U8)(d)) \ 9470 || (!(u) && !isASCII((U8)(d))) \ 9471 || ((((U8)(d)) < 32) \ 9472 && (((((U8)(d)) >= 14) \ 9473 || (((U8)(d)) <= 8 && (d) != 0) \ 9474 || (((U8)(d)) == 13)))) \ 9475 || (((U8)(d)) == toCTRL('?'))) 9476 if (s < PL_bufend 9477 && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8))) 9478 { 9479 if ( isCNTRL_A((U8)*s) ) { 9480 deprecate("literal control characters in variable names"); 9481 } 9482 9483 if (is_utf8) { 9484 const STRLEN skip = UTF8SKIP(s); 9485 STRLEN i; 9486 d[skip] = '\0'; 9487 for ( i = 0; i < skip; i++ ) 9488 d[i] = *s++; 9489 } 9490 else { 9491 *d = *s++; 9492 d[1] = '\0'; 9493 } 9494 } 9495 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */ 9496 if (*d == '^' && *s && isCONTROLVAR(*s)) { 9497 *d = toCTRL(*s); 9498 s++; 9499 } 9500 /* Warn about ambiguous code after unary operators if {...} notation isn't 9501 used. There's no difference in ambiguity; it's merely a heuristic 9502 about when not to warn. */ 9503 else if (ck_uni && bracket == -1) 9504 check_uni(); 9505 if (bracket != -1) { 9506 /* If we were processing {...} notation then... */ 9507 if (isIDFIRST_lazy_if(d,is_utf8)) { 9508 /* if it starts as a valid identifier, assume that it is one. 9509 (the later check for } being at the expected point will trap 9510 cases where this doesn't pan out.) */ 9511 d += is_utf8 ? UTF8SKIP(d) : 1; 9512 parse_ident(&s, &d, e, 1, is_utf8); 9513 *d = '\0'; 9514 tmp_copline = CopLINE(PL_curcop); 9515 if (s < PL_bufend && isSPACE(*s)) { 9516 s = PEEKSPACE(s); 9517 } 9518 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { 9519 /* ${foo[0]} and ${foo{bar}} notation. */ 9520 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { 9521 const char * const brack = 9522 (const char *) 9523 ((*s == '[') ? "[...]" : "{...}"); 9524 orig_copline = CopLINE(PL_curcop); 9525 CopLINE_set(PL_curcop, tmp_copline); 9526 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */ 9527 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9528 "Ambiguous use of %c{%s%s} resolved to %c%s%s", 9529 funny, dest, brack, funny, dest, brack); 9530 CopLINE_set(PL_curcop, orig_copline); 9531 } 9532 bracket++; 9533 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK); 9534 PL_lex_allbrackets++; 9535 return s; 9536 } 9537 } 9538 /* Handle extended ${^Foo} variables 9539 * 1999-02-27 mjd-perl-patch@plover.com */ 9540 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ 9541 && isWORDCHAR(*s)) 9542 { 9543 d++; 9544 while (isWORDCHAR(*s) && d < e) { 9545 *d++ = *s++; 9546 } 9547 if (d >= e) 9548 Perl_croak(aTHX_ "%s", ident_too_long); 9549 *d = '\0'; 9550 } 9551 9552 if ( !tmp_copline ) 9553 tmp_copline = CopLINE(PL_curcop); 9554 if (s < PL_bufend && isSPACE(*s)) { 9555 s = PEEKSPACE(s); 9556 } 9557 9558 /* Expect to find a closing } after consuming any trailing whitespace. 9559 */ 9560 if (*s == '}') { 9561 s++; 9562 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 9563 PL_lex_state = LEX_INTERPEND; 9564 PL_expect = XREF; 9565 } 9566 if (PL_lex_state == LEX_NORMAL) { 9567 if (ckWARN(WARN_AMBIGUOUS) && 9568 (keyword(dest, d - dest, 0) 9569 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0))) 9570 { 9571 SV *tmp = newSVpvn_flags( dest, d - dest, 9572 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) ); 9573 if (funny == '#') 9574 funny = '@'; 9575 orig_copline = CopLINE(PL_curcop); 9576 CopLINE_set(PL_curcop, tmp_copline); 9577 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9578 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf, 9579 funny, tmp, funny, tmp); 9580 CopLINE_set(PL_curcop, orig_copline); 9581 } 9582 } 9583 } 9584 else { 9585 /* Didn't find the closing } at the point we expected, so restore 9586 state such that the next thing to process is the opening { and */ 9587 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */ 9588 CopLINE_set(PL_curcop, orig_copline); 9589 PL_parser->herelines = herelines; 9590 *dest = '\0'; 9591 } 9592 } 9593 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s)) 9594 PL_lex_state = LEX_INTERPEND; 9595 return s; 9596 } 9597 9598 static bool 9599 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) { 9600 9601 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in 9602 * the parse starting at 's', based on the subset that are valid in this 9603 * context input to this routine in 'valid_flags'. Advances s. Returns 9604 * TRUE if the input should be treated as a valid flag, so the next char 9605 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon 9606 * first call on the current regex. This routine will set it to any 9607 * charset modifier found. The caller shouldn't change it. This way, 9608 * another charset modifier encountered in the parse can be detected as an 9609 * error, as we have decided to allow only one */ 9610 9611 const char c = **s; 9612 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1; 9613 9614 if ( charlen != 1 || ! strchr(valid_flags, c) ) { 9615 if (isWORDCHAR_lazy_if(*s, UTF)) { 9616 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s), 9617 UTF ? SVf_UTF8 : 0); 9618 (*s) += charlen; 9619 /* Pretend that it worked, so will continue processing before 9620 * dieing */ 9621 return TRUE; 9622 } 9623 return FALSE; 9624 } 9625 9626 switch (c) { 9627 9628 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl); 9629 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break; 9630 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break; 9631 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break; 9632 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break; 9633 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break; 9634 case LOCALE_PAT_MOD: 9635 if (*charset) { 9636 goto multiple_charsets; 9637 } 9638 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET); 9639 *charset = c; 9640 break; 9641 case UNICODE_PAT_MOD: 9642 if (*charset) { 9643 goto multiple_charsets; 9644 } 9645 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET); 9646 *charset = c; 9647 break; 9648 case ASCII_RESTRICT_PAT_MOD: 9649 if (! *charset) { 9650 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET); 9651 } 9652 else { 9653 9654 /* Error if previous modifier wasn't an 'a', but if it was, see 9655 * if, and accept, a second occurrence (only) */ 9656 if (*charset != 'a' 9657 || get_regex_charset(*pmfl) 9658 != REGEX_ASCII_RESTRICTED_CHARSET) 9659 { 9660 goto multiple_charsets; 9661 } 9662 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET); 9663 } 9664 *charset = c; 9665 break; 9666 case DEPENDS_PAT_MOD: 9667 if (*charset) { 9668 goto multiple_charsets; 9669 } 9670 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET); 9671 *charset = c; 9672 break; 9673 } 9674 9675 (*s)++; 9676 return TRUE; 9677 9678 multiple_charsets: 9679 if (*charset != c) { 9680 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c)); 9681 } 9682 else if (c == 'a') { 9683 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */ 9684 yyerror("Regexp modifier \"/a\" may appear a maximum of twice"); 9685 } 9686 else { 9687 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c)); 9688 } 9689 9690 /* Pretend that it worked, so will continue processing before dieing */ 9691 (*s)++; 9692 return TRUE; 9693 } 9694 9695 STATIC char * 9696 S_scan_pat(pTHX_ char *start, I32 type) 9697 { 9698 dVAR; 9699 PMOP *pm; 9700 char *s; 9701 const char * const valid_flags = 9702 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); 9703 char charset = '\0'; /* character set modifier */ 9704 #ifdef PERL_MAD 9705 char *modstart; 9706 #endif 9707 9708 PERL_ARGS_ASSERT_SCAN_PAT; 9709 9710 s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING), 9711 TRUE /* look for escaped bracketed metas */, NULL); 9712 9713 if (!s) { 9714 const char * const delimiter = skipspace(start); 9715 Perl_croak(aTHX_ 9716 (const char *) 9717 (*delimiter == '?' 9718 ? "Search pattern not terminated or ternary operator parsed as search pattern" 9719 : "Search pattern not terminated" )); 9720 } 9721 9722 pm = (PMOP*)newPMOP(type, 0); 9723 if (PL_multi_open == '?') { 9724 /* This is the only point in the code that sets PMf_ONCE: */ 9725 pm->op_pmflags |= PMf_ONCE; 9726 9727 /* Hence it's safe to do this bit of PMOP book-keeping here, which 9728 allows us to restrict the list needed by reset to just the ?? 9729 matches. */ 9730 assert(type != OP_TRANS); 9731 if (PL_curstash) { 9732 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab); 9733 U32 elements; 9734 if (!mg) { 9735 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0, 9736 0); 9737 } 9738 elements = mg->mg_len / sizeof(PMOP**); 9739 Renewc(mg->mg_ptr, elements + 1, PMOP*, char); 9740 ((PMOP**)mg->mg_ptr) [elements++] = pm; 9741 mg->mg_len = elements * sizeof(PMOP**); 9742 PmopSTASH_set(pm,PL_curstash); 9743 } 9744 } 9745 #ifdef PERL_MAD 9746 modstart = s; 9747 #endif 9748 9749 /* if qr/...(?{..}).../, then need to parse the pattern within a new 9750 * anon CV. False positives like qr/[(?{]/ are harmless */ 9751 9752 if (type == OP_QR) { 9753 STRLEN len; 9754 char *e, *p = SvPV(PL_lex_stuff, len); 9755 e = p + len; 9756 for (; p < e; p++) { 9757 if (p[0] == '(' && p[1] == '?' 9758 && (p[2] == '{' || (p[2] == '?' && p[3] == '{'))) 9759 { 9760 pm->op_pmflags |= PMf_HAS_CV; 9761 break; 9762 } 9763 } 9764 pm->op_pmflags |= PMf_IS_QR; 9765 } 9766 9767 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {}; 9768 #ifdef PERL_MAD 9769 if (PL_madskills && modstart != s) { 9770 SV* tmptoken = newSVpvn(modstart, s - modstart); 9771 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0); 9772 } 9773 #endif 9774 /* issue a warning if /c is specified,but /g is not */ 9775 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) 9776 { 9777 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), 9778 "Use of /c modifier is meaningless without /g" ); 9779 } 9780 9781 PL_lex_op = (OP*)pm; 9782 pl_yylval.ival = OP_MATCH; 9783 return s; 9784 } 9785 9786 STATIC char * 9787 S_scan_subst(pTHX_ char *start) 9788 { 9789 dVAR; 9790 char *s; 9791 PMOP *pm; 9792 I32 first_start; 9793 line_t first_line; 9794 I32 es = 0; 9795 char charset = '\0'; /* character set modifier */ 9796 #ifdef PERL_MAD 9797 char *modstart; 9798 #endif 9799 char *t; 9800 9801 PERL_ARGS_ASSERT_SCAN_SUBST; 9802 9803 pl_yylval.ival = OP_NULL; 9804 9805 s = scan_str(start,!!PL_madskills,FALSE,FALSE, 9806 TRUE /* look for escaped bracketed metas */, &t); 9807 9808 if (!s) 9809 Perl_croak(aTHX_ "Substitution pattern not terminated"); 9810 9811 s = t; 9812 #ifdef PERL_MAD 9813 if (PL_madskills) { 9814 CURMAD('q', PL_thisopen); 9815 CURMAD('_', PL_thiswhite); 9816 CURMAD('E', PL_thisstuff); 9817 CURMAD('Q', PL_thisclose); 9818 PL_realtokenstart = s - SvPVX(PL_linestr); 9819 } 9820 #endif 9821 9822 first_start = PL_multi_start; 9823 first_line = CopLINE(PL_curcop); 9824 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 9825 if (!s) { 9826 if (PL_lex_stuff) { 9827 SvREFCNT_dec(PL_lex_stuff); 9828 PL_lex_stuff = NULL; 9829 } 9830 Perl_croak(aTHX_ "Substitution replacement not terminated"); 9831 } 9832 PL_multi_start = first_start; /* so whole substitution is taken together */ 9833 9834 pm = (PMOP*)newPMOP(OP_SUBST, 0); 9835 9836 #ifdef PERL_MAD 9837 if (PL_madskills) { 9838 CURMAD('z', PL_thisopen); 9839 CURMAD('R', PL_thisstuff); 9840 CURMAD('Z', PL_thisclose); 9841 } 9842 modstart = s; 9843 #endif 9844 9845 while (*s) { 9846 if (*s == EXEC_PAT_MOD) { 9847 s++; 9848 es++; 9849 } 9850 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset)) 9851 { 9852 break; 9853 } 9854 } 9855 9856 #ifdef PERL_MAD 9857 if (PL_madskills) { 9858 if (modstart != s) 9859 curmad('m', newSVpvn(modstart, s - modstart)); 9860 append_madprops(PL_thismad, (OP*)pm, 0); 9861 PL_thismad = 0; 9862 } 9863 #endif 9864 if ((pm->op_pmflags & PMf_CONTINUE)) { 9865 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); 9866 } 9867 9868 if (es) { 9869 SV * const repl = newSVpvs(""); 9870 9871 PL_multi_end = 0; 9872 pm->op_pmflags |= PMf_EVAL; 9873 while (es-- > 0) { 9874 if (es) 9875 sv_catpvs(repl, "eval "); 9876 else 9877 sv_catpvs(repl, "do "); 9878 } 9879 sv_catpvs(repl, "{"); 9880 sv_catsv(repl, PL_sublex_info.repl); 9881 sv_catpvs(repl, "}"); 9882 SvEVALED_on(repl); 9883 SvREFCNT_dec(PL_sublex_info.repl); 9884 PL_sublex_info.repl = repl; 9885 } 9886 if (CopLINE(PL_curcop) != first_line) { 9887 sv_upgrade(PL_sublex_info.repl, SVt_PVNV); 9888 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow = 9889 CopLINE(PL_curcop) - first_line; 9890 CopLINE_set(PL_curcop, first_line); 9891 } 9892 9893 PL_lex_op = (OP*)pm; 9894 pl_yylval.ival = OP_SUBST; 9895 return s; 9896 } 9897 9898 STATIC char * 9899 S_scan_trans(pTHX_ char *start) 9900 { 9901 dVAR; 9902 char* s; 9903 OP *o; 9904 U8 squash; 9905 U8 del; 9906 U8 complement; 9907 bool nondestruct = 0; 9908 #ifdef PERL_MAD 9909 char *modstart; 9910 #endif 9911 char *t; 9912 9913 PERL_ARGS_ASSERT_SCAN_TRANS; 9914 9915 pl_yylval.ival = OP_NULL; 9916 9917 s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,&t); 9918 if (!s) 9919 Perl_croak(aTHX_ "Transliteration pattern not terminated"); 9920 9921 s = t; 9922 #ifdef PERL_MAD 9923 if (PL_madskills) { 9924 CURMAD('q', PL_thisopen); 9925 CURMAD('_', PL_thiswhite); 9926 CURMAD('E', PL_thisstuff); 9927 CURMAD('Q', PL_thisclose); 9928 PL_realtokenstart = s - SvPVX(PL_linestr); 9929 } 9930 #endif 9931 9932 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 9933 if (!s) { 9934 if (PL_lex_stuff) { 9935 SvREFCNT_dec(PL_lex_stuff); 9936 PL_lex_stuff = NULL; 9937 } 9938 Perl_croak(aTHX_ "Transliteration replacement not terminated"); 9939 } 9940 if (PL_madskills) { 9941 CURMAD('z', PL_thisopen); 9942 CURMAD('R', PL_thisstuff); 9943 CURMAD('Z', PL_thisclose); 9944 } 9945 9946 complement = del = squash = 0; 9947 #ifdef PERL_MAD 9948 modstart = s; 9949 #endif 9950 while (1) { 9951 switch (*s) { 9952 case 'c': 9953 complement = OPpTRANS_COMPLEMENT; 9954 break; 9955 case 'd': 9956 del = OPpTRANS_DELETE; 9957 break; 9958 case 's': 9959 squash = OPpTRANS_SQUASH; 9960 break; 9961 case 'r': 9962 nondestruct = 1; 9963 break; 9964 default: 9965 goto no_more; 9966 } 9967 s++; 9968 } 9969 no_more: 9970 9971 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL); 9972 o->op_private &= ~OPpTRANS_ALL; 9973 o->op_private |= del|squash|complement| 9974 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)| 9975 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0); 9976 9977 PL_lex_op = o; 9978 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS; 9979 9980 #ifdef PERL_MAD 9981 if (PL_madskills) { 9982 if (modstart != s) 9983 curmad('m', newSVpvn(modstart, s - modstart)); 9984 append_madprops(PL_thismad, o, 0); 9985 PL_thismad = 0; 9986 } 9987 #endif 9988 9989 return s; 9990 } 9991 9992 /* scan_heredoc 9993 Takes a pointer to the first < in <<FOO. 9994 Returns a pointer to the byte following <<FOO. 9995 9996 This function scans a heredoc, which involves different methods 9997 depending on whether we are in a string eval, quoted construct, etc. 9998 This is because PL_linestr could containing a single line of input, or 9999 a whole string being evalled, or the contents of the current quote- 10000 like operator. 10001 10002 The two basic methods are: 10003 - Steal lines from the input stream 10004 - Scan the heredoc in PL_linestr and remove it therefrom 10005 10006 In a file scope or filtered eval, the first method is used; in a 10007 string eval, the second. 10008 10009 In a quote-like operator, we have to choose between the two, 10010 depending on where we can find a newline. We peek into outer lex- 10011 ing scopes until we find one with a newline in it. If we reach the 10012 outermost lexing scope and it is a file, we use the stream method. 10013 Otherwise it is treated as an eval. 10014 */ 10015 10016 STATIC char * 10017 S_scan_heredoc(pTHX_ char *s) 10018 { 10019 dVAR; 10020 I32 op_type = OP_SCALAR; 10021 I32 len; 10022 SV *tmpstr; 10023 char term; 10024 char *d; 10025 char *e; 10026 char *peek; 10027 const bool infile = PL_rsfp || PL_parser->filtered; 10028 const line_t origline = CopLINE(PL_curcop); 10029 LEXSHARED *shared = PL_parser->lex_shared; 10030 #ifdef PERL_MAD 10031 I32 stuffstart = s - SvPVX(PL_linestr); 10032 char *tstart; 10033 10034 PL_realtokenstart = -1; 10035 #endif 10036 10037 PERL_ARGS_ASSERT_SCAN_HEREDOC; 10038 10039 s += 2; 10040 d = PL_tokenbuf + 1; 10041 e = PL_tokenbuf + sizeof PL_tokenbuf - 1; 10042 *PL_tokenbuf = '\n'; 10043 peek = s; 10044 while (SPACE_OR_TAB(*peek)) 10045 peek++; 10046 if (*peek == '`' || *peek == '\'' || *peek =='"') { 10047 s = peek; 10048 term = *s++; 10049 s = delimcpy(d, e, s, PL_bufend, term, &len); 10050 if (s == PL_bufend) 10051 Perl_croak(aTHX_ "Unterminated delimiter for here document"); 10052 d += len; 10053 s++; 10054 } 10055 else { 10056 if (*s == '\\') 10057 /* <<\FOO is equivalent to <<'FOO' */ 10058 s++, term = '\''; 10059 else 10060 term = '"'; 10061 if (!isWORDCHAR_lazy_if(s,UTF)) 10062 deprecate("bare << to mean <<\"\""); 10063 for (; isWORDCHAR_lazy_if(s,UTF); s++) { 10064 if (d < e) 10065 *d++ = *s; 10066 } 10067 } 10068 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1) 10069 Perl_croak(aTHX_ "Delimiter for here document is too long"); 10070 *d++ = '\n'; 10071 *d = '\0'; 10072 len = d - PL_tokenbuf; 10073 10074 #ifdef PERL_MAD 10075 if (PL_madskills) { 10076 tstart = PL_tokenbuf + 1; 10077 PL_thisclose = newSVpvn(tstart, len - 1); 10078 tstart = SvPVX(PL_linestr) + stuffstart; 10079 PL_thisopen = newSVpvn(tstart, s - tstart); 10080 stuffstart = s - SvPVX(PL_linestr); 10081 } 10082 #endif 10083 #ifndef PERL_STRICT_CR 10084 d = strchr(s, '\r'); 10085 if (d) { 10086 char * const olds = s; 10087 s = d; 10088 while (s < PL_bufend) { 10089 if (*s == '\r') { 10090 *d++ = '\n'; 10091 if (*++s == '\n') 10092 s++; 10093 } 10094 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */ 10095 *d++ = *s++; 10096 s++; 10097 } 10098 else 10099 *d++ = *s++; 10100 } 10101 *d = '\0'; 10102 PL_bufend = d; 10103 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 10104 s = olds; 10105 } 10106 #endif 10107 #ifdef PERL_MAD 10108 if (PL_madskills) { 10109 tstart = SvPVX(PL_linestr) + stuffstart; 10110 if (PL_thisstuff) 10111 sv_catpvn(PL_thisstuff, tstart, s - tstart); 10112 else 10113 PL_thisstuff = newSVpvn(tstart, s - tstart); 10114 } 10115 10116 stuffstart = s - SvPVX(PL_linestr); 10117 #endif 10118 10119 tmpstr = newSV_type(SVt_PVIV); 10120 SvGROW(tmpstr, 80); 10121 if (term == '\'') { 10122 op_type = OP_CONST; 10123 SvIV_set(tmpstr, -1); 10124 } 10125 else if (term == '`') { 10126 op_type = OP_BACKTICK; 10127 SvIV_set(tmpstr, '\\'); 10128 } 10129 10130 PL_multi_start = origline + 1 + PL_parser->herelines; 10131 PL_multi_open = PL_multi_close = '<'; 10132 /* inside a string eval or quote-like operator */ 10133 if (!infile || PL_lex_inwhat) { 10134 SV *linestr; 10135 char *bufend; 10136 char * const olds = s; 10137 PERL_CONTEXT * const cx = &cxstack[cxstack_ix]; 10138 /* These two fields are not set until an inner lexing scope is 10139 entered. But we need them set here. */ 10140 shared->ls_bufptr = s; 10141 shared->ls_linestr = PL_linestr; 10142 if (PL_lex_inwhat) 10143 /* Look for a newline. If the current buffer does not have one, 10144 peek into the line buffer of the parent lexing scope, going 10145 up as many levels as necessary to find one with a newline 10146 after bufptr. 10147 */ 10148 while (!(s = (char *)memchr( 10149 (void *)shared->ls_bufptr, '\n', 10150 SvEND(shared->ls_linestr)-shared->ls_bufptr 10151 ))) { 10152 shared = shared->ls_prev; 10153 /* shared is only null if we have gone beyond the outermost 10154 lexing scope. In a file, we will have broken out of the 10155 loop in the previous iteration. In an eval, the string buf- 10156 fer ends with "\n;", so the while condition above will have 10157 evaluated to false. So shared can never be null. */ 10158 assert(shared); 10159 /* A LEXSHARED struct with a null ls_prev pointer is the outer- 10160 most lexing scope. In a file, shared->ls_linestr at that 10161 level is just one line, so there is no body to steal. */ 10162 if (infile && !shared->ls_prev) { 10163 s = olds; 10164 goto streaming; 10165 } 10166 } 10167 else { /* eval */ 10168 s = (char*)memchr((void*)s, '\n', PL_bufend - s); 10169 assert(s); 10170 } 10171 linestr = shared->ls_linestr; 10172 bufend = SvEND(linestr); 10173 d = s; 10174 while (s < bufend - len + 1 && 10175 memNE(s,PL_tokenbuf,len) ) { 10176 if (*s++ == '\n') 10177 ++PL_parser->herelines; 10178 } 10179 if (s >= bufend - len + 1) { 10180 goto interminable; 10181 } 10182 sv_setpvn(tmpstr,d+1,s-d); 10183 #ifdef PERL_MAD 10184 if (PL_madskills) { 10185 if (PL_thisstuff) 10186 sv_catpvn(PL_thisstuff, d + 1, s - d); 10187 else 10188 PL_thisstuff = newSVpvn(d + 1, s - d); 10189 stuffstart = s - SvPVX(PL_linestr); 10190 } 10191 #endif 10192 s += len - 1; 10193 /* the preceding stmt passes a newline */ 10194 PL_parser->herelines++; 10195 10196 /* s now points to the newline after the heredoc terminator. 10197 d points to the newline before the body of the heredoc. 10198 */ 10199 10200 /* We are going to modify linestr in place here, so set 10201 aside copies of the string if necessary for re-evals or 10202 (caller $n)[6]. */ 10203 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we 10204 check shared->re_eval_str. */ 10205 if (shared->re_eval_start || shared->re_eval_str) { 10206 /* Set aside the rest of the regexp */ 10207 if (!shared->re_eval_str) 10208 shared->re_eval_str = 10209 newSVpvn(shared->re_eval_start, 10210 bufend - shared->re_eval_start); 10211 shared->re_eval_start -= s-d; 10212 } 10213 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL && 10214 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL && 10215 cx->blk_eval.cur_text == linestr) 10216 { 10217 cx->blk_eval.cur_text = newSVsv(linestr); 10218 SvSCREAM_on(cx->blk_eval.cur_text); 10219 } 10220 /* Copy everything from s onwards back to d. */ 10221 Move(s,d,bufend-s + 1,char); 10222 SvCUR_set(linestr, SvCUR(linestr) - (s-d)); 10223 /* Setting PL_bufend only applies when we have not dug deeper 10224 into other scopes, because sublex_done sets PL_bufend to 10225 SvEND(PL_linestr). */ 10226 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr); 10227 s = olds; 10228 } 10229 else 10230 { 10231 SV *linestr_save; 10232 streaming: 10233 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */ 10234 term = PL_tokenbuf[1]; 10235 len--; 10236 linestr_save = PL_linestr; /* must restore this afterwards */ 10237 d = s; /* and this */ 10238 PL_linestr = newSVpvs(""); 10239 PL_bufend = SvPVX(PL_linestr); 10240 while (1) { 10241 #ifdef PERL_MAD 10242 if (PL_madskills) { 10243 tstart = SvPVX(PL_linestr) + stuffstart; 10244 if (PL_thisstuff) 10245 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart); 10246 else 10247 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart); 10248 } 10249 #endif 10250 PL_bufptr = PL_bufend; 10251 CopLINE_set(PL_curcop, 10252 origline + 1 + PL_parser->herelines); 10253 if (!lex_next_chunk(LEX_NO_TERM) 10254 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) { 10255 SvREFCNT_dec(linestr_save); 10256 goto interminable; 10257 } 10258 CopLINE_set(PL_curcop, origline); 10259 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') { 10260 s = lex_grow_linestr(SvLEN(PL_linestr) + 3); 10261 /* ^That should be enough to avoid this needing to grow: */ 10262 sv_catpvs(PL_linestr, "\n\0"); 10263 assert(s == SvPVX(PL_linestr)); 10264 PL_bufend = SvEND(PL_linestr); 10265 } 10266 s = PL_bufptr; 10267 #ifdef PERL_MAD 10268 stuffstart = s - SvPVX(PL_linestr); 10269 #endif 10270 PL_parser->herelines++; 10271 PL_last_lop = PL_last_uni = NULL; 10272 #ifndef PERL_STRICT_CR 10273 if (PL_bufend - PL_linestart >= 2) { 10274 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') || 10275 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r')) 10276 { 10277 PL_bufend[-2] = '\n'; 10278 PL_bufend--; 10279 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 10280 } 10281 else if (PL_bufend[-1] == '\r') 10282 PL_bufend[-1] = '\n'; 10283 } 10284 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') 10285 PL_bufend[-1] = '\n'; 10286 #endif 10287 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) { 10288 SvREFCNT_dec(PL_linestr); 10289 PL_linestr = linestr_save; 10290 PL_linestart = SvPVX(linestr_save); 10291 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 10292 s = d; 10293 break; 10294 } 10295 else { 10296 sv_catsv(tmpstr,PL_linestr); 10297 } 10298 } 10299 } 10300 PL_multi_end = origline + PL_parser->herelines; 10301 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { 10302 SvPV_shrink_to_cur(tmpstr); 10303 } 10304 if (!IN_BYTES) { 10305 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) 10306 SvUTF8_on(tmpstr); 10307 else if (PL_encoding) 10308 sv_recode_to_utf8(tmpstr, PL_encoding); 10309 } 10310 PL_lex_stuff = tmpstr; 10311 pl_yylval.ival = op_type; 10312 return s; 10313 10314 interminable: 10315 SvREFCNT_dec(tmpstr); 10316 CopLINE_set(PL_curcop, origline); 10317 missingterm(PL_tokenbuf + 1); 10318 } 10319 10320 /* scan_inputsymbol 10321 takes: current position in input buffer 10322 returns: new position in input buffer 10323 side-effects: pl_yylval and lex_op are set. 10324 10325 This code handles: 10326 10327 <> read from ARGV 10328 <FH> read from filehandle 10329 <pkg::FH> read from package qualified filehandle 10330 <pkg'FH> read from package qualified filehandle 10331 <$fh> read from filehandle in $fh 10332 <*.h> filename glob 10333 10334 */ 10335 10336 STATIC char * 10337 S_scan_inputsymbol(pTHX_ char *start) 10338 { 10339 dVAR; 10340 char *s = start; /* current position in buffer */ 10341 char *end; 10342 I32 len; 10343 char *d = PL_tokenbuf; /* start of temp holding space */ 10344 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ 10345 10346 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; 10347 10348 end = strchr(s, '\n'); 10349 if (!end) 10350 end = PL_bufend; 10351 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ 10352 10353 /* die if we didn't have space for the contents of the <>, 10354 or if it didn't end, or if we see a newline 10355 */ 10356 10357 if (len >= (I32)sizeof PL_tokenbuf) 10358 Perl_croak(aTHX_ "Excessively long <> operator"); 10359 if (s >= end) 10360 Perl_croak(aTHX_ "Unterminated <> operator"); 10361 10362 s++; 10363 10364 /* check for <$fh> 10365 Remember, only scalar variables are interpreted as filehandles by 10366 this code. Anything more complex (e.g., <$fh{$num}>) will be 10367 treated as a glob() call. 10368 This code makes use of the fact that except for the $ at the front, 10369 a scalar variable and a filehandle look the same. 10370 */ 10371 if (*d == '$' && d[1]) d++; 10372 10373 /* allow <Pkg'VALUE> or <Pkg::VALUE> */ 10374 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':')) 10375 d += UTF ? UTF8SKIP(d) : 1; 10376 10377 /* If we've tried to read what we allow filehandles to look like, and 10378 there's still text left, then it must be a glob() and not a getline. 10379 Use scan_str to pull out the stuff between the <> and treat it 10380 as nothing more than a string. 10381 */ 10382 10383 if (d - PL_tokenbuf != len) { 10384 pl_yylval.ival = OP_GLOB; 10385 s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,NULL); 10386 if (!s) 10387 Perl_croak(aTHX_ "Glob not terminated"); 10388 return s; 10389 } 10390 else { 10391 bool readline_overriden = FALSE; 10392 GV *gv_readline; 10393 /* we're in a filehandle read situation */ 10394 d = PL_tokenbuf; 10395 10396 /* turn <> into <ARGV> */ 10397 if (!len) 10398 Copy("ARGV",d,5,char); 10399 10400 /* Check whether readline() is overriden */ 10401 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV); 10402 if ((gv_readline = gv_override("readline",8))) 10403 readline_overriden = TRUE; 10404 10405 /* if <$fh>, create the ops to turn the variable into a 10406 filehandle 10407 */ 10408 if (*d == '$') { 10409 /* try to find it in the pad for this block, otherwise find 10410 add symbol table ops 10411 */ 10412 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0); 10413 if (tmp != NOT_IN_PAD) { 10414 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 10415 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 10416 HEK * const stashname = HvNAME_HEK(stash); 10417 SV * const sym = sv_2mortal(newSVhek(stashname)); 10418 sv_catpvs(sym, "::"); 10419 sv_catpv(sym, d+1); 10420 d = SvPVX(sym); 10421 goto intro_sym; 10422 } 10423 else { 10424 OP * const o = newOP(OP_PADSV, 0); 10425 o->op_targ = tmp; 10426 PL_lex_op = readline_overriden 10427 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 10428 op_append_elem(OP_LIST, o, 10429 newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) 10430 : (OP*)newUNOP(OP_READLINE, 0, o); 10431 } 10432 } 10433 else { 10434 GV *gv; 10435 ++d; 10436 intro_sym: 10437 gv = gv_fetchpv(d, 10438 (PL_in_eval 10439 ? (GV_ADDMULTI | GV_ADDINEVAL) 10440 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ), 10441 SVt_PV); 10442 PL_lex_op = readline_overriden 10443 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 10444 op_append_elem(OP_LIST, 10445 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), 10446 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 10447 : (OP*)newUNOP(OP_READLINE, 0, 10448 newUNOP(OP_RV2SV, 0, 10449 newGVOP(OP_GV, 0, gv))); 10450 } 10451 if (!readline_overriden) 10452 PL_lex_op->op_flags |= OPf_SPECIAL; 10453 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ 10454 pl_yylval.ival = OP_NULL; 10455 } 10456 10457 /* If it's none of the above, it must be a literal filehandle 10458 (<Foo::BAR> or <FOO>) so build a simple readline OP */ 10459 else { 10460 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO); 10461 PL_lex_op = readline_overriden 10462 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 10463 op_append_elem(OP_LIST, 10464 newGVOP(OP_GV, 0, gv), 10465 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 10466 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv)); 10467 pl_yylval.ival = OP_NULL; 10468 } 10469 } 10470 10471 return s; 10472 } 10473 10474 10475 /* scan_str 10476 takes: 10477 start position in buffer 10478 keep_quoted preserve \ on the embedded delimiter(s) 10479 keep_delims preserve the delimiters around the string 10480 re_reparse compiling a run-time /(?{})/: 10481 collapse // to /, and skip encoding src 10482 deprecate_escaped_meta issue a deprecation warning for cer- 10483 tain paired metacharacters that appear 10484 escaped within it 10485 delimp if non-null, this is set to the position of 10486 the closing delimiter, or just after it if 10487 the closing and opening delimiters differ 10488 (i.e., the opening delimiter of a substitu- 10489 tion replacement) 10490 returns: position to continue reading from buffer 10491 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and 10492 updates the read buffer. 10493 10494 This subroutine pulls a string out of the input. It is called for: 10495 q single quotes q(literal text) 10496 ' single quotes 'literal text' 10497 qq double quotes qq(interpolate $here please) 10498 " double quotes "interpolate $here please" 10499 qx backticks qx(/bin/ls -l) 10500 ` backticks `/bin/ls -l` 10501 qw quote words @EXPORT_OK = qw( func() $spam ) 10502 m// regexp match m/this/ 10503 s/// regexp substitute s/this/that/ 10504 tr/// string transliterate tr/this/that/ 10505 y/// string transliterate y/this/that/ 10506 ($*@) sub prototypes sub foo ($) 10507 (stuff) sub attr parameters sub foo : attr(stuff) 10508 <> readline or globs <FOO>, <>, <$fh>, or <*.c> 10509 10510 In most of these cases (all but <>, patterns and transliterate) 10511 yylex() calls scan_str(). m// makes yylex() call scan_pat() which 10512 calls scan_str(). s/// makes yylex() call scan_subst() which calls 10513 scan_str(). tr/// and y/// make yylex() call scan_trans() which 10514 calls scan_str(). 10515 10516 It skips whitespace before the string starts, and treats the first 10517 character as the delimiter. If the delimiter is one of ([{< then 10518 the corresponding "close" character )]}> is used as the closing 10519 delimiter. It allows quoting of delimiters, and if the string has 10520 balanced delimiters ([{<>}]) it allows nesting. 10521 10522 On success, the SV with the resulting string is put into lex_stuff or, 10523 if that is already non-NULL, into lex_repl. The second case occurs only 10524 when parsing the RHS of the special constructs s/// and tr/// (y///). 10525 For convenience, the terminating delimiter character is stuffed into 10526 SvIVX of the SV. 10527 */ 10528 10529 STATIC char * 10530 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse, 10531 bool deprecate_escaped_meta, char **delimp 10532 ) 10533 { 10534 dVAR; 10535 SV *sv; /* scalar value: string */ 10536 const char *tmps; /* temp string, used for delimiter matching */ 10537 char *s = start; /* current position in the buffer */ 10538 char term; /* terminating character */ 10539 char *to; /* current position in the sv's data */ 10540 I32 brackets = 1; /* bracket nesting level */ 10541 bool has_utf8 = FALSE; /* is there any utf8 content? */ 10542 I32 termcode; /* terminating char. code */ 10543 U8 termstr[UTF8_MAXBYTES]; /* terminating string */ 10544 STRLEN termlen; /* length of terminating string */ 10545 int last_off = 0; /* last position for nesting bracket */ 10546 char *escaped_open = NULL; 10547 line_t herelines; 10548 #ifdef PERL_MAD 10549 int stuffstart; 10550 char *tstart; 10551 #endif 10552 10553 PERL_ARGS_ASSERT_SCAN_STR; 10554 10555 /* skip space before the delimiter */ 10556 if (isSPACE(*s)) { 10557 s = PEEKSPACE(s); 10558 } 10559 10560 #ifdef PERL_MAD 10561 if (PL_realtokenstart >= 0) { 10562 stuffstart = PL_realtokenstart; 10563 PL_realtokenstart = -1; 10564 } 10565 else 10566 stuffstart = start - SvPVX(PL_linestr); 10567 #endif 10568 /* mark where we are, in case we need to report errors */ 10569 CLINE; 10570 10571 /* after skipping whitespace, the next character is the terminator */ 10572 term = *s; 10573 if (!UTF) { 10574 termcode = termstr[0] = term; 10575 termlen = 1; 10576 } 10577 else { 10578 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen); 10579 Copy(s, termstr, termlen, U8); 10580 if (!UTF8_IS_INVARIANT(term)) 10581 has_utf8 = TRUE; 10582 } 10583 10584 /* mark where we are */ 10585 PL_multi_start = CopLINE(PL_curcop); 10586 PL_multi_open = term; 10587 herelines = PL_parser->herelines; 10588 10589 /* find corresponding closing delimiter */ 10590 if (term && (tmps = strchr("([{< )]}> )]}>",term))) 10591 termcode = termstr[0] = term = tmps[5]; 10592 10593 PL_multi_close = term; 10594 10595 /* A warning is raised if the input parameter requires it for escaped (by a 10596 * backslash) paired metacharacters {} [] and () when the delimiters are 10597 * those same characters, and the backslash is ineffective. This doesn't 10598 * happen for <>, as they aren't metas. */ 10599 if (deprecate_escaped_meta 10600 && (PL_multi_open == PL_multi_close 10601 || PL_multi_open == '<' 10602 || ! ckWARN_d(WARN_DEPRECATED))) 10603 { 10604 deprecate_escaped_meta = FALSE; 10605 } 10606 10607 /* create a new SV to hold the contents. 79 is the SV's initial length. 10608 What a random number. */ 10609 sv = newSV_type(SVt_PVIV); 10610 SvGROW(sv, 80); 10611 SvIV_set(sv, termcode); 10612 (void)SvPOK_only(sv); /* validate pointer */ 10613 10614 /* move past delimiter and try to read a complete string */ 10615 if (keep_delims) 10616 sv_catpvn(sv, s, termlen); 10617 s += termlen; 10618 #ifdef PERL_MAD 10619 tstart = SvPVX(PL_linestr) + stuffstart; 10620 if (PL_madskills && !PL_thisopen && !keep_delims) { 10621 PL_thisopen = newSVpvn(tstart, s - tstart); 10622 stuffstart = s - SvPVX(PL_linestr); 10623 } 10624 #endif 10625 for (;;) { 10626 if (PL_encoding && !UTF && !re_reparse) { 10627 bool cont = TRUE; 10628 10629 while (cont) { 10630 int offset = s - SvPVX_const(PL_linestr); 10631 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, 10632 &offset, (char*)termstr, termlen); 10633 const char *ns; 10634 char *svlast; 10635 10636 if (SvIsCOW(PL_linestr)) { 10637 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos; 10638 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos; 10639 STRLEN last_lop_pos, re_eval_start_pos, s_pos; 10640 char *buf = SvPVX(PL_linestr); 10641 bufend_pos = PL_parser->bufend - buf; 10642 bufptr_pos = PL_parser->bufptr - buf; 10643 oldbufptr_pos = PL_parser->oldbufptr - buf; 10644 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 10645 linestart_pos = PL_parser->linestart - buf; 10646 last_uni_pos = PL_parser->last_uni 10647 ? PL_parser->last_uni - buf 10648 : 0; 10649 last_lop_pos = PL_parser->last_lop 10650 ? PL_parser->last_lop - buf 10651 : 0; 10652 re_eval_start_pos = 10653 PL_parser->lex_shared->re_eval_start ? 10654 PL_parser->lex_shared->re_eval_start - buf : 0; 10655 s_pos = s - buf; 10656 10657 sv_force_normal(PL_linestr); 10658 10659 buf = SvPVX(PL_linestr); 10660 PL_parser->bufend = buf + bufend_pos; 10661 PL_parser->bufptr = buf + bufptr_pos; 10662 PL_parser->oldbufptr = buf + oldbufptr_pos; 10663 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 10664 PL_parser->linestart = buf + linestart_pos; 10665 if (PL_parser->last_uni) 10666 PL_parser->last_uni = buf + last_uni_pos; 10667 if (PL_parser->last_lop) 10668 PL_parser->last_lop = buf + last_lop_pos; 10669 if (PL_parser->lex_shared->re_eval_start) 10670 PL_parser->lex_shared->re_eval_start = 10671 buf + re_eval_start_pos; 10672 s = buf + s_pos; 10673 } 10674 ns = SvPVX_const(PL_linestr) + offset; 10675 svlast = SvEND(sv) - 1; 10676 10677 for (; s < ns; s++) { 10678 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10679 COPLINE_INC_WITH_HERELINES; 10680 } 10681 if (!found) 10682 goto read_more_line; 10683 else { 10684 /* handle quoted delimiters */ 10685 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') { 10686 const char *t; 10687 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';) 10688 t--; 10689 if ((svlast-1 - t) % 2) { 10690 if (!keep_quoted) { 10691 *(svlast-1) = term; 10692 *svlast = '\0'; 10693 SvCUR_set(sv, SvCUR(sv) - 1); 10694 } 10695 continue; 10696 } 10697 } 10698 if (PL_multi_open == PL_multi_close) { 10699 cont = FALSE; 10700 } 10701 else { 10702 const char *t; 10703 char *w; 10704 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) { 10705 /* At here, all closes are "was quoted" one, 10706 so we don't check PL_multi_close. */ 10707 if (*t == '\\') { 10708 if (!keep_quoted && *(t+1) == PL_multi_open) 10709 t++; 10710 else 10711 *w++ = *t++; 10712 } 10713 else if (*t == PL_multi_open) 10714 brackets++; 10715 10716 *w = *t; 10717 } 10718 if (w < t) { 10719 *w++ = term; 10720 *w = '\0'; 10721 SvCUR_set(sv, w - SvPVX_const(sv)); 10722 } 10723 last_off = w - SvPVX(sv); 10724 if (--brackets <= 0) 10725 cont = FALSE; 10726 } 10727 } 10728 } 10729 if (!keep_delims) { 10730 SvCUR_set(sv, SvCUR(sv) - 1); 10731 *SvEND(sv) = '\0'; 10732 } 10733 break; 10734 } 10735 10736 /* extend sv if need be */ 10737 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); 10738 /* set 'to' to the next character in the sv's string */ 10739 to = SvPVX(sv)+SvCUR(sv); 10740 10741 /* if open delimiter is the close delimiter read unbridle */ 10742 if (PL_multi_open == PL_multi_close) { 10743 for (; s < PL_bufend; s++,to++) { 10744 /* embedded newlines increment the current line number */ 10745 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10746 COPLINE_INC_WITH_HERELINES; 10747 /* handle quoted delimiters */ 10748 if (*s == '\\' && s+1 < PL_bufend && term != '\\') { 10749 if (!keep_quoted 10750 && (s[1] == term 10751 || (re_reparse && s[1] == '\\')) 10752 ) 10753 s++; 10754 /* any other quotes are simply copied straight through */ 10755 else 10756 *to++ = *s++; 10757 } 10758 /* terminate when run out of buffer (the for() condition), or 10759 have found the terminator */ 10760 else if (*s == term) { 10761 if (termlen == 1) 10762 break; 10763 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) 10764 break; 10765 } 10766 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 10767 has_utf8 = TRUE; 10768 *to = *s; 10769 } 10770 } 10771 10772 /* if the terminator isn't the same as the start character (e.g., 10773 matched brackets), we have to allow more in the quoting, and 10774 be prepared for nested brackets. 10775 */ 10776 else { 10777 /* read until we run out of string, or we find the terminator */ 10778 for (; s < PL_bufend; s++,to++) { 10779 /* embedded newlines increment the line count */ 10780 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10781 COPLINE_INC_WITH_HERELINES; 10782 /* backslashes can escape the open or closing characters */ 10783 if (*s == '\\' && s+1 < PL_bufend) { 10784 if (!keep_quoted && 10785 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close))) 10786 { 10787 s++; 10788 10789 /* Here, 'deprecate_escaped_meta' is true iff the 10790 * delimiters are paired metacharacters, and 's' points 10791 * to an occurrence of one of them within the string, 10792 * which was preceded by a backslash. If this is a 10793 * context where the delimiter is also a metacharacter, 10794 * the backslash is useless, and deprecated. () and [] 10795 * are meta in any context. {} are meta only when 10796 * appearing in a quantifier or in things like '\p{' 10797 * (but '\\p{' isn't meta). They also aren't meta 10798 * unless there is a matching closed, escaped char 10799 * later on within the string. If 's' points to an 10800 * open, set a flag; if to a close, test that flag, and 10801 * raise a warning if it was set */ 10802 10803 if (deprecate_escaped_meta) { 10804 if (*s == PL_multi_open) { 10805 if (*s != '{') { 10806 escaped_open = s; 10807 } 10808 /* Look for a closing '\}' */ 10809 else if (regcurly(s, TRUE)) { 10810 escaped_open = s; 10811 } 10812 /* Look for e.g. '\x{' */ 10813 else if (s - start > 2 10814 && _generic_isCC(*(s-2), 10815 _CC_BACKSLASH_FOO_LBRACE_IS_META)) 10816 { /* Exclude '\\x', '\\\\x', etc. */ 10817 char *lookbehind = s - 4; 10818 bool is_meta = TRUE; 10819 while (lookbehind >= start 10820 && *lookbehind == '\\') 10821 { 10822 is_meta = ! is_meta; 10823 lookbehind--; 10824 } 10825 if (is_meta) { 10826 escaped_open = s; 10827 } 10828 } 10829 } 10830 else if (escaped_open) { 10831 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 10832 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open); 10833 escaped_open = NULL; 10834 } 10835 } 10836 } 10837 else 10838 *to++ = *s++; 10839 } 10840 /* allow nested opens and closes */ 10841 else if (*s == PL_multi_close && --brackets <= 0) 10842 break; 10843 else if (*s == PL_multi_open) 10844 brackets++; 10845 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 10846 has_utf8 = TRUE; 10847 *to = *s; 10848 } 10849 } 10850 /* terminate the copied string and update the sv's end-of-string */ 10851 *to = '\0'; 10852 SvCUR_set(sv, to - SvPVX_const(sv)); 10853 10854 /* 10855 * this next chunk reads more into the buffer if we're not done yet 10856 */ 10857 10858 if (s < PL_bufend) 10859 break; /* handle case where we are done yet :-) */ 10860 10861 #ifndef PERL_STRICT_CR 10862 if (to - SvPVX_const(sv) >= 2) { 10863 if ((to[-2] == '\r' && to[-1] == '\n') || 10864 (to[-2] == '\n' && to[-1] == '\r')) 10865 { 10866 to[-2] = '\n'; 10867 to--; 10868 SvCUR_set(sv, to - SvPVX_const(sv)); 10869 } 10870 else if (to[-1] == '\r') 10871 to[-1] = '\n'; 10872 } 10873 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') 10874 to[-1] = '\n'; 10875 #endif 10876 10877 read_more_line: 10878 /* if we're out of file, or a read fails, bail and reset the current 10879 line marker so we can report where the unterminated string began 10880 */ 10881 #ifdef PERL_MAD 10882 if (PL_madskills) { 10883 char * const tstart = SvPVX(PL_linestr) + stuffstart; 10884 if (PL_thisstuff) 10885 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart); 10886 else 10887 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart); 10888 } 10889 #endif 10890 COPLINE_INC_WITH_HERELINES; 10891 PL_bufptr = PL_bufend; 10892 if (!lex_next_chunk(0)) { 10893 sv_free(sv); 10894 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 10895 return NULL; 10896 } 10897 s = PL_bufptr; 10898 #ifdef PERL_MAD 10899 stuffstart = 0; 10900 #endif 10901 } 10902 10903 /* at this point, we have successfully read the delimited string */ 10904 10905 if (!PL_encoding || UTF || re_reparse) { 10906 #ifdef PERL_MAD 10907 if (PL_madskills) { 10908 char * const tstart = SvPVX(PL_linestr) + stuffstart; 10909 const int len = s - tstart; 10910 if (PL_thisstuff) 10911 sv_catpvn(PL_thisstuff, tstart, len); 10912 else 10913 PL_thisstuff = newSVpvn(tstart, len); 10914 if (!PL_thisclose && !keep_delims) 10915 PL_thisclose = newSVpvn(s,termlen); 10916 } 10917 #endif 10918 10919 if (keep_delims) 10920 sv_catpvn(sv, s, termlen); 10921 s += termlen; 10922 } 10923 #ifdef PERL_MAD 10924 else { 10925 if (PL_madskills) { 10926 char * const tstart = SvPVX(PL_linestr) + stuffstart; 10927 const int len = s - tstart - termlen; 10928 if (PL_thisstuff) 10929 sv_catpvn(PL_thisstuff, tstart, len); 10930 else 10931 PL_thisstuff = newSVpvn(tstart, len); 10932 if (!PL_thisclose && !keep_delims) 10933 PL_thisclose = newSVpvn(s - termlen,termlen); 10934 } 10935 } 10936 #endif 10937 if (has_utf8 || (PL_encoding && !re_reparse)) 10938 SvUTF8_on(sv); 10939 10940 PL_multi_end = CopLINE(PL_curcop); 10941 CopLINE_set(PL_curcop, PL_multi_start); 10942 PL_parser->herelines = herelines; 10943 10944 /* if we allocated too much space, give some back */ 10945 if (SvCUR(sv) + 5 < SvLEN(sv)) { 10946 SvLEN_set(sv, SvCUR(sv) + 1); 10947 SvPV_renew(sv, SvLEN(sv)); 10948 } 10949 10950 /* decide whether this is the first or second quoted string we've read 10951 for this op 10952 */ 10953 10954 if (PL_lex_stuff) 10955 PL_sublex_info.repl = sv; 10956 else 10957 PL_lex_stuff = sv; 10958 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s; 10959 return s; 10960 } 10961 10962 /* 10963 scan_num 10964 takes: pointer to position in buffer 10965 returns: pointer to new position in buffer 10966 side-effects: builds ops for the constant in pl_yylval.op 10967 10968 Read a number in any of the formats that Perl accepts: 10969 10970 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. 10971 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 10972 0b[01](_?[01])* 10973 0[0-7](_?[0-7])* 10974 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* 10975 10976 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the 10977 thing it reads. 10978 10979 If it reads a number without a decimal point or an exponent, it will 10980 try converting the number to an integer and see if it can do so 10981 without loss of precision. 10982 */ 10983 10984 char * 10985 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) 10986 { 10987 dVAR; 10988 const char *s = start; /* current position in buffer */ 10989 char *d; /* destination in temp buffer */ 10990 char *e; /* end of temp buffer */ 10991 NV nv; /* number read, as a double */ 10992 SV *sv = NULL; /* place to put the converted number */ 10993 bool floatit; /* boolean: int or float? */ 10994 const char *lastub = NULL; /* position of last underbar */ 10995 static const char* const number_too_long = "Number too long"; 10996 10997 PERL_ARGS_ASSERT_SCAN_NUM; 10998 10999 /* We use the first character to decide what type of number this is */ 11000 11001 switch (*s) { 11002 default: 11003 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s); 11004 11005 /* if it starts with a 0, it could be an octal number, a decimal in 11006 0.13 disguise, or a hexadecimal number, or a binary number. */ 11007 case '0': 11008 { 11009 /* variables: 11010 u holds the "number so far" 11011 shift the power of 2 of the base 11012 (hex == 4, octal == 3, binary == 1) 11013 overflowed was the number more than we can hold? 11014 11015 Shift is used when we add a digit. It also serves as an "are 11016 we in octal/hex/binary?" indicator to disallow hex characters 11017 when in octal mode. 11018 */ 11019 NV n = 0.0; 11020 UV u = 0; 11021 I32 shift; 11022 bool overflowed = FALSE; 11023 bool just_zero = TRUE; /* just plain 0 or binary number? */ 11024 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 }; 11025 static const char* const bases[5] = 11026 { "", "binary", "", "octal", "hexadecimal" }; 11027 static const char* const Bases[5] = 11028 { "", "Binary", "", "Octal", "Hexadecimal" }; 11029 static const char* const maxima[5] = 11030 { "", 11031 "0b11111111111111111111111111111111", 11032 "", 11033 "037777777777", 11034 "0xffffffff" }; 11035 const char *base, *Base, *max; 11036 11037 /* check for hex */ 11038 if (s[1] == 'x' || s[1] == 'X') { 11039 shift = 4; 11040 s += 2; 11041 just_zero = FALSE; 11042 } else if (s[1] == 'b' || s[1] == 'B') { 11043 shift = 1; 11044 s += 2; 11045 just_zero = FALSE; 11046 } 11047 /* check for a decimal in disguise */ 11048 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E') 11049 goto decimal; 11050 /* so it must be octal */ 11051 else { 11052 shift = 3; 11053 s++; 11054 } 11055 11056 if (*s == '_') { 11057 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11058 "Misplaced _ in number"); 11059 lastub = s++; 11060 } 11061 11062 base = bases[shift]; 11063 Base = Bases[shift]; 11064 max = maxima[shift]; 11065 11066 /* read the rest of the number */ 11067 for (;;) { 11068 /* x is used in the overflow test, 11069 b is the digit we're adding on. */ 11070 UV x, b; 11071 11072 switch (*s) { 11073 11074 /* if we don't mention it, we're done */ 11075 default: 11076 goto out; 11077 11078 /* _ are ignored -- but warned about if consecutive */ 11079 case '_': 11080 if (lastub && s == lastub + 1) 11081 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11082 "Misplaced _ in number"); 11083 lastub = s++; 11084 break; 11085 11086 /* 8 and 9 are not octal */ 11087 case '8': case '9': 11088 if (shift == 3) 11089 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s)); 11090 /* FALL THROUGH */ 11091 11092 /* octal digits */ 11093 case '2': case '3': case '4': 11094 case '5': case '6': case '7': 11095 if (shift == 1) 11096 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s)); 11097 /* FALL THROUGH */ 11098 11099 case '0': case '1': 11100 b = *s++ & 15; /* ASCII digit -> value of digit */ 11101 goto digit; 11102 11103 /* hex digits */ 11104 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 11105 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 11106 /* make sure they said 0x */ 11107 if (shift != 4) 11108 goto out; 11109 b = (*s++ & 7) + 9; 11110 11111 /* Prepare to put the digit we have onto the end 11112 of the number so far. We check for overflows. 11113 */ 11114 11115 digit: 11116 just_zero = FALSE; 11117 if (!overflowed) { 11118 x = u << shift; /* make room for the digit */ 11119 11120 if ((x >> shift) != u 11121 && !(PL_hints & HINT_NEW_BINARY)) { 11122 overflowed = TRUE; 11123 n = (NV) u; 11124 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 11125 "Integer overflow in %s number", 11126 base); 11127 } else 11128 u = x | b; /* add the digit to the end */ 11129 } 11130 if (overflowed) { 11131 n *= nvshift[shift]; 11132 /* If an NV has not enough bits in its 11133 * mantissa to represent an UV this summing of 11134 * small low-order numbers is a waste of time 11135 * (because the NV cannot preserve the 11136 * low-order bits anyway): we could just 11137 * remember when did we overflow and in the 11138 * end just multiply n by the right 11139 * amount. */ 11140 n += (NV) b; 11141 } 11142 break; 11143 } 11144 } 11145 11146 /* if we get here, we had success: make a scalar value from 11147 the number. 11148 */ 11149 out: 11150 11151 /* final misplaced underbar check */ 11152 if (s[-1] == '_') { 11153 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); 11154 } 11155 11156 if (overflowed) { 11157 if (n > 4294967295.0) 11158 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 11159 "%s number > %s non-portable", 11160 Base, max); 11161 sv = newSVnv(n); 11162 } 11163 else { 11164 #if UVSIZE > 4 11165 if (u > 0xffffffff) 11166 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 11167 "%s number > %s non-portable", 11168 Base, max); 11169 #endif 11170 sv = newSVuv(u); 11171 } 11172 if (just_zero && (PL_hints & HINT_NEW_INTEGER)) 11173 sv = new_constant(start, s - start, "integer", 11174 sv, NULL, NULL, 0); 11175 else if (PL_hints & HINT_NEW_BINARY) 11176 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0); 11177 } 11178 break; 11179 11180 /* 11181 handle decimal numbers. 11182 we're also sent here when we read a 0 as the first digit 11183 */ 11184 case '1': case '2': case '3': case '4': case '5': 11185 case '6': case '7': case '8': case '9': case '.': 11186 decimal: 11187 d = PL_tokenbuf; 11188 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ 11189 floatit = FALSE; 11190 11191 /* read next group of digits and _ and copy into d */ 11192 while (isDIGIT(*s) || *s == '_') { 11193 /* skip underscores, checking for misplaced ones 11194 if -w is on 11195 */ 11196 if (*s == '_') { 11197 if (lastub && s == lastub + 1) 11198 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11199 "Misplaced _ in number"); 11200 lastub = s++; 11201 } 11202 else { 11203 /* check for end of fixed-length buffer */ 11204 if (d >= e) 11205 Perl_croak(aTHX_ "%s", number_too_long); 11206 /* if we're ok, copy the character */ 11207 *d++ = *s++; 11208 } 11209 } 11210 11211 /* final misplaced underbar check */ 11212 if (lastub && s == lastub + 1) { 11213 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); 11214 } 11215 11216 /* read a decimal portion if there is one. avoid 11217 3..5 being interpreted as the number 3. followed 11218 by .5 11219 */ 11220 if (*s == '.' && s[1] != '.') { 11221 floatit = TRUE; 11222 *d++ = *s++; 11223 11224 if (*s == '_') { 11225 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11226 "Misplaced _ in number"); 11227 lastub = s; 11228 } 11229 11230 /* copy, ignoring underbars, until we run out of digits. 11231 */ 11232 for (; isDIGIT(*s) || *s == '_'; s++) { 11233 /* fixed length buffer check */ 11234 if (d >= e) 11235 Perl_croak(aTHX_ "%s", number_too_long); 11236 if (*s == '_') { 11237 if (lastub && s == lastub + 1) 11238 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11239 "Misplaced _ in number"); 11240 lastub = s; 11241 } 11242 else 11243 *d++ = *s; 11244 } 11245 /* fractional part ending in underbar? */ 11246 if (s[-1] == '_') { 11247 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11248 "Misplaced _ in number"); 11249 } 11250 if (*s == '.' && isDIGIT(s[1])) { 11251 /* oops, it's really a v-string, but without the "v" */ 11252 s = start; 11253 goto vstring; 11254 } 11255 } 11256 11257 /* read exponent part, if present */ 11258 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) { 11259 floatit = TRUE; 11260 s++; 11261 11262 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */ 11263 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */ 11264 11265 /* stray preinitial _ */ 11266 if (*s == '_') { 11267 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11268 "Misplaced _ in number"); 11269 lastub = s++; 11270 } 11271 11272 /* allow positive or negative exponent */ 11273 if (*s == '+' || *s == '-') 11274 *d++ = *s++; 11275 11276 /* stray initial _ */ 11277 if (*s == '_') { 11278 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11279 "Misplaced _ in number"); 11280 lastub = s++; 11281 } 11282 11283 /* read digits of exponent */ 11284 while (isDIGIT(*s) || *s == '_') { 11285 if (isDIGIT(*s)) { 11286 if (d >= e) 11287 Perl_croak(aTHX_ "%s", number_too_long); 11288 *d++ = *s++; 11289 } 11290 else { 11291 if (((lastub && s == lastub + 1) || 11292 (!isDIGIT(s[1]) && s[1] != '_'))) 11293 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 11294 "Misplaced _ in number"); 11295 lastub = s++; 11296 } 11297 } 11298 } 11299 11300 11301 /* 11302 We try to do an integer conversion first if no characters 11303 indicating "float" have been found. 11304 */ 11305 11306 if (!floatit) { 11307 UV uv; 11308 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); 11309 11310 if (flags == IS_NUMBER_IN_UV) { 11311 if (uv <= IV_MAX) 11312 sv = newSViv(uv); /* Prefer IVs over UVs. */ 11313 else 11314 sv = newSVuv(uv); 11315 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) { 11316 if (uv <= (UV) IV_MIN) 11317 sv = newSViv(-(IV)uv); 11318 else 11319 floatit = TRUE; 11320 } else 11321 floatit = TRUE; 11322 } 11323 if (floatit) { 11324 STORE_NUMERIC_LOCAL_SET_STANDARD(); 11325 /* terminate the string */ 11326 *d = '\0'; 11327 nv = Atof(PL_tokenbuf); 11328 RESTORE_NUMERIC_LOCAL(); 11329 sv = newSVnv(nv); 11330 } 11331 11332 if ( floatit 11333 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) { 11334 const char *const key = floatit ? "float" : "integer"; 11335 const STRLEN keylen = floatit ? 5 : 7; 11336 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf, 11337 key, keylen, sv, NULL, NULL, 0); 11338 } 11339 break; 11340 11341 /* if it starts with a v, it could be a v-string */ 11342 case 'v': 11343 vstring: 11344 sv = newSV(5); /* preallocate storage space */ 11345 ENTER_with_name("scan_vstring"); 11346 SAVEFREESV(sv); 11347 s = scan_vstring(s, PL_bufend, sv); 11348 SvREFCNT_inc_simple_void_NN(sv); 11349 LEAVE_with_name("scan_vstring"); 11350 break; 11351 } 11352 11353 /* make the op for the constant and return */ 11354 11355 if (sv) 11356 lvalp->opval = newSVOP(OP_CONST, 0, sv); 11357 else 11358 lvalp->opval = NULL; 11359 11360 return (char *)s; 11361 } 11362 11363 STATIC char * 11364 S_scan_formline(pTHX_ char *s) 11365 { 11366 dVAR; 11367 char *eol; 11368 char *t; 11369 SV * const stuff = newSVpvs(""); 11370 bool needargs = FALSE; 11371 bool eofmt = FALSE; 11372 #ifdef PERL_MAD 11373 char *tokenstart = s; 11374 SV* savewhite = NULL; 11375 11376 if (PL_madskills) { 11377 savewhite = PL_thiswhite; 11378 PL_thiswhite = 0; 11379 } 11380 #endif 11381 11382 PERL_ARGS_ASSERT_SCAN_FORMLINE; 11383 11384 while (!needargs) { 11385 if (*s == '.') { 11386 t = s+1; 11387 #ifdef PERL_STRICT_CR 11388 while (SPACE_OR_TAB(*t)) 11389 t++; 11390 #else 11391 while (SPACE_OR_TAB(*t) || *t == '\r') 11392 t++; 11393 #endif 11394 if (*t == '\n' || t == PL_bufend) { 11395 eofmt = TRUE; 11396 break; 11397 } 11398 } 11399 eol = (char *) memchr(s,'\n',PL_bufend-s); 11400 if (!eol++) 11401 eol = PL_bufend; 11402 if (*s != '#') { 11403 for (t = s; t < eol; t++) { 11404 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { 11405 needargs = FALSE; 11406 goto enough; /* ~~ must be first line in formline */ 11407 } 11408 if (*t == '@' || *t == '^') 11409 needargs = TRUE; 11410 } 11411 if (eol > s) { 11412 sv_catpvn(stuff, s, eol-s); 11413 #ifndef PERL_STRICT_CR 11414 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { 11415 char *end = SvPVX(stuff) + SvCUR(stuff); 11416 end[-2] = '\n'; 11417 end[-1] = '\0'; 11418 SvCUR_set(stuff, SvCUR(stuff) - 1); 11419 } 11420 #endif 11421 } 11422 else 11423 break; 11424 } 11425 s = (char*)eol; 11426 if ((PL_rsfp || PL_parser->filtered) 11427 && PL_parser->form_lex_state == LEX_NORMAL) { 11428 bool got_some; 11429 #ifdef PERL_MAD 11430 if (PL_madskills) { 11431 if (PL_thistoken) 11432 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart); 11433 else 11434 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart); 11435 } 11436 #endif 11437 PL_bufptr = PL_bufend; 11438 COPLINE_INC_WITH_HERELINES; 11439 got_some = lex_next_chunk(0); 11440 CopLINE_dec(PL_curcop); 11441 s = PL_bufptr; 11442 #ifdef PERL_MAD 11443 tokenstart = PL_bufptr; 11444 #endif 11445 if (!got_some) 11446 break; 11447 } 11448 incline(s); 11449 } 11450 enough: 11451 if (!SvCUR(stuff) || needargs) 11452 PL_lex_state = PL_parser->form_lex_state; 11453 if (SvCUR(stuff)) { 11454 PL_expect = XSTATE; 11455 if (needargs) { 11456 const char *s2 = s; 11457 while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f' 11458 || *s2 == 013) 11459 s2++; 11460 if (*s2 == '{') { 11461 start_force(PL_curforce); 11462 PL_expect = XTERMBLOCK; 11463 NEXTVAL_NEXTTOKE.ival = 0; 11464 force_next(DO); 11465 } 11466 start_force(PL_curforce); 11467 NEXTVAL_NEXTTOKE.ival = 0; 11468 force_next(FORMLBRACK); 11469 } 11470 if (!IN_BYTES) { 11471 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) 11472 SvUTF8_on(stuff); 11473 else if (PL_encoding) 11474 sv_recode_to_utf8(stuff, PL_encoding); 11475 } 11476 start_force(PL_curforce); 11477 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff); 11478 force_next(THING); 11479 } 11480 else { 11481 SvREFCNT_dec(stuff); 11482 if (eofmt) 11483 PL_lex_formbrack = 0; 11484 } 11485 #ifdef PERL_MAD 11486 if (PL_madskills) { 11487 if (PL_thistoken) 11488 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart); 11489 else 11490 PL_thistoken = newSVpvn(tokenstart, s - tokenstart); 11491 PL_thiswhite = savewhite; 11492 } 11493 #endif 11494 return s; 11495 } 11496 11497 I32 11498 Perl_start_subparse(pTHX_ I32 is_format, U32 flags) 11499 { 11500 dVAR; 11501 const I32 oldsavestack_ix = PL_savestack_ix; 11502 CV* const outsidecv = PL_compcv; 11503 11504 SAVEI32(PL_subline); 11505 save_item(PL_subname); 11506 SAVESPTR(PL_compcv); 11507 11508 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV)); 11509 CvFLAGS(PL_compcv) |= flags; 11510 11511 PL_subline = CopLINE(PL_curcop); 11512 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); 11513 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); 11514 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; 11515 if (outsidecv && CvPADLIST(outsidecv)) 11516 CvPADLIST(PL_compcv)->xpadl_outid = 11517 PadlistNAMES(CvPADLIST(outsidecv)); 11518 11519 return oldsavestack_ix; 11520 } 11521 11522 static int 11523 S_yywarn(pTHX_ const char *const s, U32 flags) 11524 { 11525 dVAR; 11526 11527 PERL_ARGS_ASSERT_YYWARN; 11528 11529 PL_in_eval |= EVAL_WARNONLY; 11530 yyerror_pv(s, flags); 11531 PL_in_eval &= ~EVAL_WARNONLY; 11532 return 0; 11533 } 11534 11535 int 11536 Perl_yyerror(pTHX_ const char *const s) 11537 { 11538 PERL_ARGS_ASSERT_YYERROR; 11539 return yyerror_pvn(s, strlen(s), 0); 11540 } 11541 11542 int 11543 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags) 11544 { 11545 PERL_ARGS_ASSERT_YYERROR_PV; 11546 return yyerror_pvn(s, strlen(s), flags); 11547 } 11548 11549 int 11550 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) 11551 { 11552 dVAR; 11553 const char *context = NULL; 11554 int contlen = -1; 11555 SV *msg; 11556 SV * const where_sv = newSVpvs_flags("", SVs_TEMP); 11557 int yychar = PL_parser->yychar; 11558 11559 PERL_ARGS_ASSERT_YYERROR_PVN; 11560 11561 if (!yychar || (yychar == ';' && !PL_rsfp)) 11562 sv_catpvs(where_sv, "at EOF"); 11563 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr && 11564 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr && 11565 PL_oldbufptr != PL_bufptr) { 11566 /* 11567 Only for NetWare: 11568 The code below is removed for NetWare because it abends/crashes on NetWare 11569 when the script has error such as not having the closing quotes like: 11570 if ($var eq "value) 11571 Checking of white spaces is anyway done in NetWare code. 11572 */ 11573 #ifndef NETWARE 11574 while (isSPACE(*PL_oldoldbufptr)) 11575 PL_oldoldbufptr++; 11576 #endif 11577 context = PL_oldoldbufptr; 11578 contlen = PL_bufptr - PL_oldoldbufptr; 11579 } 11580 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr && 11581 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) { 11582 /* 11583 Only for NetWare: 11584 The code below is removed for NetWare because it abends/crashes on NetWare 11585 when the script has error such as not having the closing quotes like: 11586 if ($var eq "value) 11587 Checking of white spaces is anyway done in NetWare code. 11588 */ 11589 #ifndef NETWARE 11590 while (isSPACE(*PL_oldbufptr)) 11591 PL_oldbufptr++; 11592 #endif 11593 context = PL_oldbufptr; 11594 contlen = PL_bufptr - PL_oldbufptr; 11595 } 11596 else if (yychar > 255) 11597 sv_catpvs(where_sv, "next token ???"); 11598 else if (yychar == -2) { /* YYEMPTY */ 11599 if (PL_lex_state == LEX_NORMAL || 11600 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL)) 11601 sv_catpvs(where_sv, "at end of line"); 11602 else if (PL_lex_inpat) 11603 sv_catpvs(where_sv, "within pattern"); 11604 else 11605 sv_catpvs(where_sv, "within string"); 11606 } 11607 else { 11608 sv_catpvs(where_sv, "next char "); 11609 if (yychar < 32) 11610 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); 11611 else if (isPRINT_LC(yychar)) { 11612 const char string = yychar; 11613 sv_catpvn(where_sv, &string, 1); 11614 } 11615 else 11616 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); 11617 } 11618 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); 11619 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ", 11620 OutCopFILE(PL_curcop), 11621 (IV)(PL_parser->preambling == NOLINE 11622 ? CopLINE(PL_curcop) 11623 : PL_parser->preambling)); 11624 if (context) 11625 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n", 11626 UTF8fARG(UTF, contlen, context)); 11627 else 11628 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv)); 11629 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) { 11630 Perl_sv_catpvf(aTHX_ msg, 11631 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n", 11632 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); 11633 PL_multi_end = 0; 11634 } 11635 if (PL_in_eval & EVAL_WARNONLY) { 11636 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); 11637 } 11638 else 11639 qerror(msg); 11640 if (PL_error_count >= 10) { 11641 SV * errsv; 11642 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv))) 11643 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n", 11644 SVfARG(errsv), OutCopFILE(PL_curcop)); 11645 else 11646 Perl_croak(aTHX_ "%s has too many errors.\n", 11647 OutCopFILE(PL_curcop)); 11648 } 11649 PL_in_my = 0; 11650 PL_in_my_stash = NULL; 11651 return 0; 11652 } 11653 11654 STATIC char* 11655 S_swallow_bom(pTHX_ U8 *s) 11656 { 11657 dVAR; 11658 const STRLEN slen = SvCUR(PL_linestr); 11659 11660 PERL_ARGS_ASSERT_SWALLOW_BOM; 11661 11662 switch (s[0]) { 11663 case 0xFF: 11664 if (s[1] == 0xFE) { 11665 /* UTF-16 little-endian? (or UTF-32LE?) */ 11666 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ 11667 /* diag_listed_as: Unsupported script encoding %s */ 11668 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); 11669 #ifndef PERL_NO_UTF16_FILTER 11670 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); 11671 s += 2; 11672 if (PL_bufend > (char*)s) { 11673 s = add_utf16_textfilter(s, TRUE); 11674 } 11675 #else 11676 /* diag_listed_as: Unsupported script encoding %s */ 11677 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 11678 #endif 11679 } 11680 break; 11681 case 0xFE: 11682 if (s[1] == 0xFF) { /* UTF-16 big-endian? */ 11683 #ifndef PERL_NO_UTF16_FILTER 11684 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); 11685 s += 2; 11686 if (PL_bufend > (char *)s) { 11687 s = add_utf16_textfilter(s, FALSE); 11688 } 11689 #else 11690 /* diag_listed_as: Unsupported script encoding %s */ 11691 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 11692 #endif 11693 } 11694 break; 11695 case BOM_UTF8_FIRST_BYTE: { 11696 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */ 11697 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) { 11698 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); 11699 s += len + 1; /* UTF-8 */ 11700 } 11701 break; 11702 } 11703 case 0: 11704 if (slen > 3) { 11705 if (s[1] == 0) { 11706 if (s[2] == 0xFE && s[3] == 0xFF) { 11707 /* UTF-32 big-endian */ 11708 /* diag_listed_as: Unsupported script encoding %s */ 11709 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE"); 11710 } 11711 } 11712 else if (s[2] == 0 && s[3] != 0) { 11713 /* Leading bytes 11714 * 00 xx 00 xx 11715 * are a good indicator of UTF-16BE. */ 11716 #ifndef PERL_NO_UTF16_FILTER 11717 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); 11718 s = add_utf16_textfilter(s, FALSE); 11719 #else 11720 /* diag_listed_as: Unsupported script encoding %s */ 11721 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 11722 #endif 11723 } 11724 } 11725 11726 default: 11727 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { 11728 /* Leading bytes 11729 * xx 00 xx 00 11730 * are a good indicator of UTF-16LE. */ 11731 #ifndef PERL_NO_UTF16_FILTER 11732 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); 11733 s = add_utf16_textfilter(s, TRUE); 11734 #else 11735 /* diag_listed_as: Unsupported script encoding %s */ 11736 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 11737 #endif 11738 } 11739 } 11740 return (char*)s; 11741 } 11742 11743 11744 #ifndef PERL_NO_UTF16_FILTER 11745 static I32 11746 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) 11747 { 11748 dVAR; 11749 SV *const filter = FILTER_DATA(idx); 11750 /* We re-use this each time round, throwing the contents away before we 11751 return. */ 11752 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter)); 11753 SV *const utf8_buffer = filter; 11754 IV status = IoPAGE(filter); 11755 const bool reverse = cBOOL(IoLINES(filter)); 11756 I32 retval; 11757 11758 PERL_ARGS_ASSERT_UTF16_TEXTFILTER; 11759 11760 /* As we're automatically added, at the lowest level, and hence only called 11761 from this file, we can be sure that we're not called in block mode. Hence 11762 don't bother writing code to deal with block mode. */ 11763 if (maxlen) { 11764 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); 11765 } 11766 if (status < 0) { 11767 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status); 11768 } 11769 DEBUG_P(PerlIO_printf(Perl_debug_log, 11770 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", 11771 FPTR2DPTR(void *, S_utf16_textfilter), 11772 reverse ? 'l' : 'b', idx, maxlen, status, 11773 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 11774 11775 while (1) { 11776 STRLEN chars; 11777 STRLEN have; 11778 I32 newlen; 11779 U8 *end; 11780 /* First, look in our buffer of existing UTF-8 data: */ 11781 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); 11782 11783 if (nl) { 11784 ++nl; 11785 } else if (status == 0) { 11786 /* EOF */ 11787 IoPAGE(filter) = 0; 11788 nl = SvEND(utf8_buffer); 11789 } 11790 if (nl) { 11791 STRLEN got = nl - SvPVX(utf8_buffer); 11792 /* Did we have anything to append? */ 11793 retval = got != 0; 11794 sv_catpvn(sv, SvPVX(utf8_buffer), got); 11795 /* Everything else in this code works just fine if SVp_POK isn't 11796 set. This, however, needs it, and we need it to work, else 11797 we loop infinitely because the buffer is never consumed. */ 11798 sv_chop(utf8_buffer, nl); 11799 break; 11800 } 11801 11802 /* OK, not a complete line there, so need to read some more UTF-16. 11803 Read an extra octect if the buffer currently has an odd number. */ 11804 while (1) { 11805 if (status <= 0) 11806 break; 11807 if (SvCUR(utf16_buffer) >= 2) { 11808 /* Location of the high octet of the last complete code point. 11809 Gosh, UTF-16 is a pain. All the benefits of variable length, 11810 *coupled* with all the benefits of partial reads and 11811 endianness. */ 11812 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) 11813 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); 11814 11815 if (*last_hi < 0xd8 || *last_hi > 0xdb) { 11816 break; 11817 } 11818 11819 /* We have the first half of a surrogate. Read more. */ 11820 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); 11821 } 11822 11823 status = FILTER_READ(idx + 1, utf16_buffer, 11824 160 + (SvCUR(utf16_buffer) & 1)); 11825 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer))); 11826 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); 11827 if (status < 0) { 11828 /* Error */ 11829 IoPAGE(filter) = status; 11830 return status; 11831 } 11832 } 11833 11834 chars = SvCUR(utf16_buffer) >> 1; 11835 have = SvCUR(utf8_buffer); 11836 SvGROW(utf8_buffer, have + chars * 3 + 1); 11837 11838 if (reverse) { 11839 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), 11840 (U8*)SvPVX_const(utf8_buffer) + have, 11841 chars * 2, &newlen); 11842 } else { 11843 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), 11844 (U8*)SvPVX_const(utf8_buffer) + have, 11845 chars * 2, &newlen); 11846 } 11847 SvCUR_set(utf8_buffer, have + newlen); 11848 *end = '\0'; 11849 11850 /* No need to keep this SV "well-formed" with a '\0' after the end, as 11851 it's private to us, and utf16_to_utf8{,reversed} take a 11852 (pointer,length) pair, rather than a NUL-terminated string. */ 11853 if(SvCUR(utf16_buffer) & 1) { 11854 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; 11855 SvCUR_set(utf16_buffer, 1); 11856 } else { 11857 SvCUR_set(utf16_buffer, 0); 11858 } 11859 } 11860 DEBUG_P(PerlIO_printf(Perl_debug_log, 11861 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", 11862 status, 11863 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 11864 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);}); 11865 return retval; 11866 } 11867 11868 static U8 * 11869 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) 11870 { 11871 SV *filter = filter_add(S_utf16_textfilter, NULL); 11872 11873 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER; 11874 11875 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s)); 11876 sv_setpvs(filter, ""); 11877 IoLINES(filter) = reversed; 11878 IoPAGE(filter) = 1; /* Not EOF */ 11879 11880 /* Sadly, we have to return a valid pointer, come what may, so we have to 11881 ignore any error return from this. */ 11882 SvCUR_set(PL_linestr, 0); 11883 if (FILTER_READ(0, PL_linestr, 0)) { 11884 SvUTF8_on(PL_linestr); 11885 } else { 11886 SvUTF8_on(PL_linestr); 11887 } 11888 PL_bufend = SvEND(PL_linestr); 11889 return (U8*)SvPVX(PL_linestr); 11890 } 11891 #endif 11892 11893 /* 11894 Returns a pointer to the next character after the parsed 11895 vstring, as well as updating the passed in sv. 11896 11897 Function must be called like 11898 11899 sv = sv_2mortal(newSV(5)); 11900 s = scan_vstring(s,e,sv); 11901 11902 where s and e are the start and end of the string. 11903 The sv should already be large enough to store the vstring 11904 passed in, for performance reasons. 11905 11906 This function may croak if fatal warnings are enabled in the 11907 calling scope, hence the sv_2mortal in the example (to prevent 11908 a leak). Make sure to do SvREFCNT_inc afterwards if you use 11909 sv_2mortal. 11910 11911 */ 11912 11913 char * 11914 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) 11915 { 11916 dVAR; 11917 const char *pos = s; 11918 const char *start = s; 11919 11920 PERL_ARGS_ASSERT_SCAN_VSTRING; 11921 11922 if (*pos == 'v') pos++; /* get past 'v' */ 11923 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 11924 pos++; 11925 if ( *pos != '.') { 11926 /* this may not be a v-string if followed by => */ 11927 const char *next = pos; 11928 while (next < e && isSPACE(*next)) 11929 ++next; 11930 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) { 11931 /* return string not v-string */ 11932 sv_setpvn(sv,(char *)s,pos-s); 11933 return (char *)pos; 11934 } 11935 } 11936 11937 if (!isALPHA(*pos)) { 11938 U8 tmpbuf[UTF8_MAXBYTES+1]; 11939 11940 if (*s == 'v') 11941 s++; /* get past 'v' */ 11942 11943 sv_setpvs(sv, ""); 11944 11945 for (;;) { 11946 /* this is atoi() that tolerates underscores */ 11947 U8 *tmpend; 11948 UV rev = 0; 11949 const char *end = pos; 11950 UV mult = 1; 11951 while (--end >= s) { 11952 if (*end != '_') { 11953 const UV orev = rev; 11954 rev += (*end - '0') * mult; 11955 mult *= 10; 11956 if (orev > rev) 11957 /* diag_listed_as: Integer overflow in %s number */ 11958 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 11959 "Integer overflow in decimal number"); 11960 } 11961 } 11962 #ifdef EBCDIC 11963 if (rev > 0x7FFFFFFF) 11964 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647"); 11965 #endif 11966 /* Append native character for the rev point */ 11967 tmpend = uvchr_to_utf8(tmpbuf, rev); 11968 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); 11969 if (!UVCHR_IS_INVARIANT(rev)) 11970 SvUTF8_on(sv); 11971 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1])) 11972 s = ++pos; 11973 else { 11974 s = pos; 11975 break; 11976 } 11977 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 11978 pos++; 11979 } 11980 SvPOK_on(sv); 11981 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start); 11982 SvRMAGICAL_on(sv); 11983 } 11984 return (char *)s; 11985 } 11986 11987 int 11988 Perl_keyword_plugin_standard(pTHX_ 11989 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) 11990 { 11991 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD; 11992 PERL_UNUSED_CONTEXT; 11993 PERL_UNUSED_ARG(keyword_ptr); 11994 PERL_UNUSED_ARG(keyword_len); 11995 PERL_UNUSED_ARG(op_ptr); 11996 return KEYWORD_PLUGIN_DECLINE; 11997 } 11998 11999 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p) 12000 static void 12001 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof) 12002 { 12003 SAVEI32(PL_lex_brackets); 12004 if (PL_lex_brackets > 100) 12005 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 12006 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF; 12007 SAVEI32(PL_lex_allbrackets); 12008 PL_lex_allbrackets = 0; 12009 SAVEI8(PL_lex_fakeeof); 12010 PL_lex_fakeeof = (U8)fakeeof; 12011 if(yyparse(gramtype) && !PL_parser->error_count) 12012 qerror(Perl_mess(aTHX_ "Parse error")); 12013 } 12014 12015 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p) 12016 static OP * 12017 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof) 12018 { 12019 OP *o; 12020 ENTER; 12021 SAVEVPTR(PL_eval_root); 12022 PL_eval_root = NULL; 12023 parse_recdescent(gramtype, fakeeof); 12024 o = PL_eval_root; 12025 LEAVE; 12026 return o; 12027 } 12028 12029 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f) 12030 static OP * 12031 S_parse_expr(pTHX_ I32 fakeeof, U32 flags) 12032 { 12033 OP *exprop; 12034 if (flags & ~PARSE_OPTIONAL) 12035 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr"); 12036 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof); 12037 if (!exprop && !(flags & PARSE_OPTIONAL)) { 12038 if (!PL_parser->error_count) 12039 qerror(Perl_mess(aTHX_ "Parse error")); 12040 exprop = newOP(OP_NULL, 0); 12041 } 12042 return exprop; 12043 } 12044 12045 /* 12046 =for apidoc Amx|OP *|parse_arithexpr|U32 flags 12047 12048 Parse a Perl arithmetic expression. This may contain operators of precedence 12049 down to the bit shift operators. The expression must be followed (and thus 12050 terminated) either by a comparison or lower-precedence operator or by 12051 something that would normally terminate an expression such as semicolon. 12052 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional, 12053 otherwise it is mandatory. It is up to the caller to ensure that the 12054 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 12055 the source of the code to be parsed and the lexical context for the 12056 expression. 12057 12058 The op tree representing the expression is returned. If an optional 12059 expression is absent, a null pointer is returned, otherwise the pointer 12060 will be non-null. 12061 12062 If an error occurs in parsing or compilation, in most cases a valid op 12063 tree is returned anyway. The error is reflected in the parser state, 12064 normally resulting in a single exception at the top level of parsing 12065 which covers all the compilation errors that occurred. Some compilation 12066 errors, however, will throw an exception immediately. 12067 12068 =cut 12069 */ 12070 12071 OP * 12072 Perl_parse_arithexpr(pTHX_ U32 flags) 12073 { 12074 return parse_expr(LEX_FAKEEOF_COMPARE, flags); 12075 } 12076 12077 /* 12078 =for apidoc Amx|OP *|parse_termexpr|U32 flags 12079 12080 Parse a Perl term expression. This may contain operators of precedence 12081 down to the assignment operators. The expression must be followed (and thus 12082 terminated) either by a comma or lower-precedence operator or by 12083 something that would normally terminate an expression such as semicolon. 12084 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional, 12085 otherwise it is mandatory. It is up to the caller to ensure that the 12086 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 12087 the source of the code to be parsed and the lexical context for the 12088 expression. 12089 12090 The op tree representing the expression is returned. If an optional 12091 expression is absent, a null pointer is returned, otherwise the pointer 12092 will be non-null. 12093 12094 If an error occurs in parsing or compilation, in most cases a valid op 12095 tree is returned anyway. The error is reflected in the parser state, 12096 normally resulting in a single exception at the top level of parsing 12097 which covers all the compilation errors that occurred. Some compilation 12098 errors, however, will throw an exception immediately. 12099 12100 =cut 12101 */ 12102 12103 OP * 12104 Perl_parse_termexpr(pTHX_ U32 flags) 12105 { 12106 return parse_expr(LEX_FAKEEOF_COMMA, flags); 12107 } 12108 12109 /* 12110 =for apidoc Amx|OP *|parse_listexpr|U32 flags 12111 12112 Parse a Perl list expression. This may contain operators of precedence 12113 down to the comma operator. The expression must be followed (and thus 12114 terminated) either by a low-precedence logic operator such as C<or> or by 12115 something that would normally terminate an expression such as semicolon. 12116 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional, 12117 otherwise it is mandatory. It is up to the caller to ensure that the 12118 dynamic parser state (L</PL_parser> et al) is correctly set to reflect 12119 the source of the code to be parsed and the lexical context for the 12120 expression. 12121 12122 The op tree representing the expression is returned. If an optional 12123 expression is absent, a null pointer is returned, otherwise the pointer 12124 will be non-null. 12125 12126 If an error occurs in parsing or compilation, in most cases a valid op 12127 tree is returned anyway. The error is reflected in the parser state, 12128 normally resulting in a single exception at the top level of parsing 12129 which covers all the compilation errors that occurred. Some compilation 12130 errors, however, will throw an exception immediately. 12131 12132 =cut 12133 */ 12134 12135 OP * 12136 Perl_parse_listexpr(pTHX_ U32 flags) 12137 { 12138 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags); 12139 } 12140 12141 /* 12142 =for apidoc Amx|OP *|parse_fullexpr|U32 flags 12143 12144 Parse a single complete Perl expression. This allows the full 12145 expression grammar, including the lowest-precedence operators such 12146 as C<or>. The expression must be followed (and thus terminated) by a 12147 token that an expression would normally be terminated by: end-of-file, 12148 closing bracketing punctuation, semicolon, or one of the keywords that 12149 signals a postfix expression-statement modifier. If I<flags> includes 12150 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is 12151 mandatory. It is up to the caller to ensure that the dynamic parser 12152 state (L</PL_parser> et al) is correctly set to reflect the source of 12153 the code to be parsed and the lexical context for the expression. 12154 12155 The op tree representing the expression is returned. If an optional 12156 expression is absent, a null pointer is returned, otherwise the pointer 12157 will be non-null. 12158 12159 If an error occurs in parsing or compilation, in most cases a valid op 12160 tree is returned anyway. The error is reflected in the parser state, 12161 normally resulting in a single exception at the top level of parsing 12162 which covers all the compilation errors that occurred. Some compilation 12163 errors, however, will throw an exception immediately. 12164 12165 =cut 12166 */ 12167 12168 OP * 12169 Perl_parse_fullexpr(pTHX_ U32 flags) 12170 { 12171 return parse_expr(LEX_FAKEEOF_NONEXPR, flags); 12172 } 12173 12174 /* 12175 =for apidoc Amx|OP *|parse_block|U32 flags 12176 12177 Parse a single complete Perl code block. This consists of an opening 12178 brace, a sequence of statements, and a closing brace. The block 12179 constitutes a lexical scope, so C<my> variables and various compile-time 12180 effects can be contained within it. It is up to the caller to ensure 12181 that the dynamic parser state (L</PL_parser> et al) is correctly set to 12182 reflect the source of the code to be parsed and the lexical context for 12183 the statement. 12184 12185 The op tree representing the code block is returned. This is always a 12186 real op, never a null pointer. It will normally be a C<lineseq> list, 12187 including C<nextstate> or equivalent ops. No ops to construct any kind 12188 of runtime scope are included by virtue of it being a block. 12189 12190 If an error occurs in parsing or compilation, in most cases a valid op 12191 tree (most likely null) is returned anyway. The error is reflected in 12192 the parser state, normally resulting in a single exception at the top 12193 level of parsing which covers all the compilation errors that occurred. 12194 Some compilation errors, however, will throw an exception immediately. 12195 12196 The I<flags> parameter is reserved for future use, and must always 12197 be zero. 12198 12199 =cut 12200 */ 12201 12202 OP * 12203 Perl_parse_block(pTHX_ U32 flags) 12204 { 12205 if (flags) 12206 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block"); 12207 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER); 12208 } 12209 12210 /* 12211 =for apidoc Amx|OP *|parse_barestmt|U32 flags 12212 12213 Parse a single unadorned Perl statement. This may be a normal imperative 12214 statement or a declaration that has compile-time effect. It does not 12215 include any label or other affixture. It is up to the caller to ensure 12216 that the dynamic parser state (L</PL_parser> et al) is correctly set to 12217 reflect the source of the code to be parsed and the lexical context for 12218 the statement. 12219 12220 The op tree representing the statement is returned. This may be a 12221 null pointer if the statement is null, for example if it was actually 12222 a subroutine definition (which has compile-time side effects). If not 12223 null, it will be ops directly implementing the statement, suitable to 12224 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or 12225 equivalent op (except for those embedded in a scope contained entirely 12226 within the statement). 12227 12228 If an error occurs in parsing or compilation, in most cases a valid op 12229 tree (most likely null) is returned anyway. The error is reflected in 12230 the parser state, normally resulting in a single exception at the top 12231 level of parsing which covers all the compilation errors that occurred. 12232 Some compilation errors, however, will throw an exception immediately. 12233 12234 The I<flags> parameter is reserved for future use, and must always 12235 be zero. 12236 12237 =cut 12238 */ 12239 12240 OP * 12241 Perl_parse_barestmt(pTHX_ U32 flags) 12242 { 12243 if (flags) 12244 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt"); 12245 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER); 12246 } 12247 12248 /* 12249 =for apidoc Amx|SV *|parse_label|U32 flags 12250 12251 Parse a single label, possibly optional, of the type that may prefix a 12252 Perl statement. It is up to the caller to ensure that the dynamic parser 12253 state (L</PL_parser> et al) is correctly set to reflect the source of 12254 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the 12255 label is optional, otherwise it is mandatory. 12256 12257 The name of the label is returned in the form of a fresh scalar. If an 12258 optional label is absent, a null pointer is returned. 12259 12260 If an error occurs in parsing, which can only occur if the label is 12261 mandatory, a valid label is returned anyway. The error is reflected in 12262 the parser state, normally resulting in a single exception at the top 12263 level of parsing which covers all the compilation errors that occurred. 12264 12265 =cut 12266 */ 12267 12268 SV * 12269 Perl_parse_label(pTHX_ U32 flags) 12270 { 12271 if (flags & ~PARSE_OPTIONAL) 12272 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label"); 12273 if (PL_lex_state == LEX_KNOWNEXT) { 12274 PL_parser->yychar = yylex(); 12275 if (PL_parser->yychar == LABEL) { 12276 char * const lpv = pl_yylval.pval; 12277 STRLEN llen = strlen(lpv); 12278 PL_parser->yychar = YYEMPTY; 12279 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0); 12280 } else { 12281 yyunlex(); 12282 goto no_label; 12283 } 12284 } else { 12285 char *s, *t; 12286 STRLEN wlen, bufptr_pos; 12287 lex_read_space(0); 12288 t = s = PL_bufptr; 12289 if (!isIDFIRST_lazy_if(s, UTF)) 12290 goto no_label; 12291 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen); 12292 if (word_takes_any_delimeter(s, wlen)) 12293 goto no_label; 12294 bufptr_pos = s - SvPVX(PL_linestr); 12295 PL_bufptr = t; 12296 lex_read_space(LEX_KEEP_PREVIOUS); 12297 t = PL_bufptr; 12298 s = SvPVX(PL_linestr) + bufptr_pos; 12299 if (t[0] == ':' && t[1] != ':') { 12300 PL_oldoldbufptr = PL_oldbufptr; 12301 PL_oldbufptr = s; 12302 PL_bufptr = t+1; 12303 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0); 12304 } else { 12305 PL_bufptr = s; 12306 no_label: 12307 if (flags & PARSE_OPTIONAL) { 12308 return NULL; 12309 } else { 12310 qerror(Perl_mess(aTHX_ "Parse error")); 12311 return newSVpvs("x"); 12312 } 12313 } 12314 } 12315 } 12316 12317 /* 12318 =for apidoc Amx|OP *|parse_fullstmt|U32 flags 12319 12320 Parse a single complete Perl statement. This may be a normal imperative 12321 statement or a declaration that has compile-time effect, and may include 12322 optional labels. It is up to the caller to ensure that the dynamic 12323 parser state (L</PL_parser> et al) is correctly set to reflect the source 12324 of the code to be parsed and the lexical context for the statement. 12325 12326 The op tree representing the statement is returned. This may be a 12327 null pointer if the statement is null, for example if it was actually 12328 a subroutine definition (which has compile-time side effects). If not 12329 null, it will be the result of a L</newSTATEOP> call, normally including 12330 a C<nextstate> or equivalent op. 12331 12332 If an error occurs in parsing or compilation, in most cases a valid op 12333 tree (most likely null) is returned anyway. The error is reflected in 12334 the parser state, normally resulting in a single exception at the top 12335 level of parsing which covers all the compilation errors that occurred. 12336 Some compilation errors, however, will throw an exception immediately. 12337 12338 The I<flags> parameter is reserved for future use, and must always 12339 be zero. 12340 12341 =cut 12342 */ 12343 12344 OP * 12345 Perl_parse_fullstmt(pTHX_ U32 flags) 12346 { 12347 if (flags) 12348 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt"); 12349 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER); 12350 } 12351 12352 /* 12353 =for apidoc Amx|OP *|parse_stmtseq|U32 flags 12354 12355 Parse a sequence of zero or more Perl statements. These may be normal 12356 imperative statements, including optional labels, or declarations 12357 that have compile-time effect, or any mixture thereof. The statement 12358 sequence ends when a closing brace or end-of-file is encountered in a 12359 place where a new statement could have validly started. It is up to 12360 the caller to ensure that the dynamic parser state (L</PL_parser> et al) 12361 is correctly set to reflect the source of the code to be parsed and the 12362 lexical context for the statements. 12363 12364 The op tree representing the statement sequence is returned. This may 12365 be a null pointer if the statements were all null, for example if there 12366 were no statements or if there were only subroutine definitions (which 12367 have compile-time side effects). If not null, it will be a C<lineseq> 12368 list, normally including C<nextstate> or equivalent ops. 12369 12370 If an error occurs in parsing or compilation, in most cases a valid op 12371 tree is returned anyway. The error is reflected in the parser state, 12372 normally resulting in a single exception at the top level of parsing 12373 which covers all the compilation errors that occurred. Some compilation 12374 errors, however, will throw an exception immediately. 12375 12376 The I<flags> parameter is reserved for future use, and must always 12377 be zero. 12378 12379 =cut 12380 */ 12381 12382 OP * 12383 Perl_parse_stmtseq(pTHX_ U32 flags) 12384 { 12385 OP *stmtseqop; 12386 I32 c; 12387 if (flags) 12388 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq"); 12389 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING); 12390 c = lex_peek_unichar(0); 12391 if (c != -1 && c != /*{*/'}') 12392 qerror(Perl_mess(aTHX_ "Parse error")); 12393 return stmtseqop; 12394 } 12395 12396 #define lex_token_boundary() S_lex_token_boundary(aTHX) 12397 static void 12398 S_lex_token_boundary(pTHX) 12399 { 12400 PL_oldoldbufptr = PL_oldbufptr; 12401 PL_oldbufptr = PL_bufptr; 12402 } 12403 12404 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX) 12405 static OP * 12406 S_parse_opt_lexvar(pTHX) 12407 { 12408 I32 sigil, c; 12409 char *s, *d; 12410 OP *var; 12411 lex_token_boundary(); 12412 sigil = lex_read_unichar(0); 12413 if (lex_peek_unichar(0) == '#') { 12414 qerror(Perl_mess(aTHX_ "Parse error")); 12415 return NULL; 12416 } 12417 lex_read_space(0); 12418 c = lex_peek_unichar(0); 12419 if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c))) 12420 return NULL; 12421 s = PL_bufptr; 12422 d = PL_tokenbuf + 1; 12423 PL_tokenbuf[0] = (char)sigil; 12424 parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF)); 12425 PL_bufptr = s; 12426 if (d == PL_tokenbuf+1) 12427 return NULL; 12428 *d = 0; 12429 var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV, 12430 OPf_MOD | (OPpLVAL_INTRO<<8)); 12431 var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0); 12432 return var; 12433 } 12434 12435 OP * 12436 Perl_parse_subsignature(pTHX) 12437 { 12438 I32 c; 12439 int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0; 12440 OP *initops = NULL; 12441 lex_read_space(0); 12442 c = lex_peek_unichar(0); 12443 while (c != /*(*/')') { 12444 switch (c) { 12445 case '$': { 12446 OP *var, *expr; 12447 if (prev_type == 2) 12448 qerror(Perl_mess(aTHX_ "Slurpy parameter not last")); 12449 var = parse_opt_lexvar(); 12450 expr = var ? 12451 newBINOP(OP_AELEM, 0, 12452 ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)), 12453 OP_RV2AV), 12454 newSVOP(OP_CONST, 0, newSViv(pos))) : 12455 NULL; 12456 lex_read_space(0); 12457 c = lex_peek_unichar(0); 12458 if (c == '=') { 12459 lex_token_boundary(); 12460 lex_read_unichar(0); 12461 lex_read_space(0); 12462 c = lex_peek_unichar(0); 12463 if (c == ',' || c == /*(*/')') { 12464 if (var) 12465 qerror(Perl_mess(aTHX_ "Optional parameter " 12466 "lacks default expression")); 12467 } else { 12468 OP *defexpr = parse_termexpr(0); 12469 if (defexpr->op_type == OP_UNDEF && 12470 !(defexpr->op_flags & OPf_KIDS)) { 12471 op_free(defexpr); 12472 } else { 12473 OP *ifop = 12474 newBINOP(OP_GE, 0, 12475 scalar(newUNOP(OP_RV2AV, 0, 12476 newGVOP(OP_GV, 0, PL_defgv))), 12477 newSVOP(OP_CONST, 0, newSViv(pos+1))); 12478 expr = var ? 12479 newCONDOP(0, ifop, expr, defexpr) : 12480 newLOGOP(OP_OR, 0, ifop, defexpr); 12481 } 12482 } 12483 prev_type = 1; 12484 } else { 12485 if (prev_type == 1) 12486 qerror(Perl_mess(aTHX_ "Mandatory parameter " 12487 "follows optional parameter")); 12488 prev_type = 0; 12489 min_arity = pos + 1; 12490 } 12491 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr); 12492 if (expr) 12493 initops = op_append_list(OP_LINESEQ, initops, 12494 newSTATEOP(0, NULL, expr)); 12495 max_arity = ++pos; 12496 } break; 12497 case '@': 12498 case '%': { 12499 OP *var; 12500 if (prev_type == 2) 12501 qerror(Perl_mess(aTHX_ "Slurpy parameter not last")); 12502 var = parse_opt_lexvar(); 12503 if (c == '%') { 12504 OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0, 12505 newBINOP(OP_BIT_AND, 0, 12506 scalar(newUNOP(OP_RV2AV, 0, 12507 newGVOP(OP_GV, 0, PL_defgv))), 12508 newSVOP(OP_CONST, 0, newSViv(1))), 12509 newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), 12510 newSVOP(OP_CONST, 0, 12511 newSVpvs("Odd name/value argument " 12512 "for subroutine")))); 12513 if (pos != min_arity) 12514 chkop = newLOGOP(OP_AND, 0, 12515 newBINOP(OP_GT, 0, 12516 scalar(newUNOP(OP_RV2AV, 0, 12517 newGVOP(OP_GV, 0, PL_defgv))), 12518 newSVOP(OP_CONST, 0, newSViv(pos))), 12519 chkop); 12520 initops = op_append_list(OP_LINESEQ, 12521 newSTATEOP(0, NULL, chkop), 12522 initops); 12523 } 12524 if (var) { 12525 OP *slice = pos ? 12526 op_prepend_elem(OP_ASLICE, 12527 newOP(OP_PUSHMARK, 0), 12528 newLISTOP(OP_ASLICE, 0, 12529 list(newRANGE(0, 12530 newSVOP(OP_CONST, 0, newSViv(pos)), 12531 newUNOP(OP_AV2ARYLEN, 0, 12532 ref(newUNOP(OP_RV2AV, 0, 12533 newGVOP(OP_GV, 0, PL_defgv)), 12534 OP_AV2ARYLEN)))), 12535 ref(newUNOP(OP_RV2AV, 0, 12536 newGVOP(OP_GV, 0, PL_defgv)), 12537 OP_ASLICE))) : 12538 newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)); 12539 initops = op_append_list(OP_LINESEQ, initops, 12540 newSTATEOP(0, NULL, 12541 newASSIGNOP(OPf_STACKED, var, 0, slice))); 12542 } 12543 prev_type = 2; 12544 max_arity = -1; 12545 } break; 12546 default: 12547 parse_error: 12548 qerror(Perl_mess(aTHX_ "Parse error")); 12549 return NULL; 12550 } 12551 lex_read_space(0); 12552 c = lex_peek_unichar(0); 12553 switch (c) { 12554 case /*(*/')': break; 12555 case ',': 12556 do { 12557 lex_token_boundary(); 12558 lex_read_unichar(0); 12559 lex_read_space(0); 12560 c = lex_peek_unichar(0); 12561 } while (c == ','); 12562 break; 12563 default: 12564 goto parse_error; 12565 } 12566 } 12567 if (min_arity != 0) { 12568 initops = op_append_list(OP_LINESEQ, 12569 newSTATEOP(0, NULL, 12570 newLOGOP(OP_OR, 0, 12571 newBINOP(OP_GE, 0, 12572 scalar(newUNOP(OP_RV2AV, 0, 12573 newGVOP(OP_GV, 0, PL_defgv))), 12574 newSVOP(OP_CONST, 0, newSViv(min_arity))), 12575 newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), 12576 newSVOP(OP_CONST, 0, 12577 newSVpvs("Too few arguments for subroutine"))))), 12578 initops); 12579 } 12580 if (max_arity != -1) { 12581 initops = op_append_list(OP_LINESEQ, 12582 newSTATEOP(0, NULL, 12583 newLOGOP(OP_OR, 0, 12584 newBINOP(OP_LE, 0, 12585 scalar(newUNOP(OP_RV2AV, 0, 12586 newGVOP(OP_GV, 0, PL_defgv))), 12587 newSVOP(OP_CONST, 0, newSViv(max_arity))), 12588 newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), 12589 newSVOP(OP_CONST, 0, 12590 newSVpvs("Too many arguments for subroutine"))))), 12591 initops); 12592 } 12593 return initops; 12594 } 12595 12596 /* 12597 * Local variables: 12598 * c-indentation-style: bsd 12599 * c-basic-offset: 4 12600 * indent-tabs-mode: nil 12601 * End: 12602 * 12603 * ex: set ts=8 sts=4 sw=4 et: 12604 */ 12605