1 /* toke.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'It all comes from here, the stench and the peril.' --Frodo 13 * 14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"] 15 */ 16 17 /* 18 * This file is the lexer for Perl. It's closely linked to the 19 * parser, perly.y. 20 * 21 * The main routine is yylex(), which returns the next token. 22 */ 23 24 /* 25 =head1 Lexer interface 26 27 This is the lower layer of the Perl parser, managing characters and tokens. 28 29 =for apidoc AmU|yy_parser *|PL_parser 30 31 Pointer to a structure encapsulating the state of the parsing operation 32 currently in progress. The pointer can be locally changed to perform 33 a nested parse without interfering with the state of an outer parse. 34 Individual members of C<PL_parser> have their own documentation. 35 36 =cut 37 */ 38 39 #include "EXTERN.h" 40 #define PERL_IN_TOKE_C 41 #include "perl.h" 42 43 #define new_constant(a,b,c,d,e,f,g) \ 44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g) 45 46 #define pl_yylval (PL_parser->yylval) 47 48 /* YYINITDEPTH -- initial size of the parser's stacks. */ 49 #define YYINITDEPTH 200 50 51 /* XXX temporary backwards compatibility */ 52 #define PL_lex_brackets (PL_parser->lex_brackets) 53 #define PL_lex_brackstack (PL_parser->lex_brackstack) 54 #define PL_lex_casemods (PL_parser->lex_casemods) 55 #define PL_lex_casestack (PL_parser->lex_casestack) 56 #define PL_lex_defer (PL_parser->lex_defer) 57 #define PL_lex_dojoin (PL_parser->lex_dojoin) 58 #define PL_lex_expect (PL_parser->lex_expect) 59 #define PL_lex_formbrack (PL_parser->lex_formbrack) 60 #define PL_lex_inpat (PL_parser->lex_inpat) 61 #define PL_lex_inwhat (PL_parser->lex_inwhat) 62 #define PL_lex_op (PL_parser->lex_op) 63 #define PL_lex_repl (PL_parser->lex_repl) 64 #define PL_lex_starts (PL_parser->lex_starts) 65 #define PL_lex_stuff (PL_parser->lex_stuff) 66 #define PL_multi_start (PL_parser->multi_start) 67 #define PL_multi_open (PL_parser->multi_open) 68 #define PL_multi_close (PL_parser->multi_close) 69 #define PL_pending_ident (PL_parser->pending_ident) 70 #define PL_preambled (PL_parser->preambled) 71 #define PL_sublex_info (PL_parser->sublex_info) 72 #define PL_linestr (PL_parser->linestr) 73 #define PL_expect (PL_parser->expect) 74 #define PL_copline (PL_parser->copline) 75 #define PL_bufptr (PL_parser->bufptr) 76 #define PL_oldbufptr (PL_parser->oldbufptr) 77 #define PL_oldoldbufptr (PL_parser->oldoldbufptr) 78 #define PL_linestart (PL_parser->linestart) 79 #define PL_bufend (PL_parser->bufend) 80 #define PL_last_uni (PL_parser->last_uni) 81 #define PL_last_lop (PL_parser->last_lop) 82 #define PL_last_lop_op (PL_parser->last_lop_op) 83 #define PL_lex_state (PL_parser->lex_state) 84 #define PL_rsfp (PL_parser->rsfp) 85 #define PL_rsfp_filters (PL_parser->rsfp_filters) 86 #define PL_in_my (PL_parser->in_my) 87 #define PL_in_my_stash (PL_parser->in_my_stash) 88 #define PL_tokenbuf (PL_parser->tokenbuf) 89 #define PL_multi_end (PL_parser->multi_end) 90 #define PL_error_count (PL_parser->error_count) 91 92 #ifdef PERL_MAD 93 # define PL_endwhite (PL_parser->endwhite) 94 # define PL_faketokens (PL_parser->faketokens) 95 # define PL_lasttoke (PL_parser->lasttoke) 96 # define PL_nextwhite (PL_parser->nextwhite) 97 # define PL_realtokenstart (PL_parser->realtokenstart) 98 # define PL_skipwhite (PL_parser->skipwhite) 99 # define PL_thisclose (PL_parser->thisclose) 100 # define PL_thismad (PL_parser->thismad) 101 # define PL_thisopen (PL_parser->thisopen) 102 # define PL_thisstuff (PL_parser->thisstuff) 103 # define PL_thistoken (PL_parser->thistoken) 104 # define PL_thiswhite (PL_parser->thiswhite) 105 # define PL_thiswhite (PL_parser->thiswhite) 106 # define PL_nexttoke (PL_parser->nexttoke) 107 # define PL_curforce (PL_parser->curforce) 108 #else 109 # define PL_nexttoke (PL_parser->nexttoke) 110 # define PL_nexttype (PL_parser->nexttype) 111 # define PL_nextval (PL_parser->nextval) 112 #endif 113 114 /* This can't be done with embed.fnc, because struct yy_parser contains a 115 member named pending_ident, which clashes with the generated #define */ 116 static int 117 S_pending_ident(pTHX); 118 119 static const char ident_too_long[] = "Identifier too long"; 120 121 #ifdef PERL_MAD 122 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; } 123 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val 124 #else 125 # define CURMAD(slot,sv) 126 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke] 127 #endif 128 129 #define XFAKEBRACK 128 130 #define XENUMMASK 127 131 132 #ifdef USE_UTF8_SCRIPTS 133 # define UTF (!IN_BYTES) 134 #else 135 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) 136 #endif 137 138 /* The maximum number of characters preceding the unrecognized one to display */ 139 #define UNRECOGNIZED_PRECEDE_COUNT 10 140 141 /* In variables named $^X, these are the legal values for X. 142 * 1999-02-27 mjd-perl-patch@plover.com */ 143 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) 144 145 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t') 146 147 /* LEX_* are values for PL_lex_state, the state of the lexer. 148 * They are arranged oddly so that the guard on the switch statement 149 * can get by with a single comparison (if the compiler is smart enough). 150 */ 151 152 /* #define LEX_NOTPARSING 11 is done in perl.h. */ 153 154 #define LEX_NORMAL 10 /* normal code (ie not within "...") */ 155 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */ 156 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */ 157 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */ 158 #define LEX_INTERPSTART 6 /* expecting the start of a $var */ 159 160 /* at end of code, eg "$x" followed by: */ 161 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */ 162 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */ 163 164 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of 165 string or after \E, $foo, etc */ 166 #define LEX_INTERPCONST 2 /* NOT USED */ 167 #define LEX_FORMLINE 1 /* expecting a format line */ 168 #define LEX_KNOWNEXT 0 /* next token known; just return it */ 169 170 171 #ifdef DEBUGGING 172 static const char* const lex_state_names[] = { 173 "KNOWNEXT", 174 "FORMLINE", 175 "INTERPCONST", 176 "INTERPCONCAT", 177 "INTERPENDMAYBE", 178 "INTERPEND", 179 "INTERPSTART", 180 "INTERPPUSH", 181 "INTERPCASEMOD", 182 "INTERPNORMAL", 183 "NORMAL" 184 }; 185 #endif 186 187 #ifdef ff_next 188 #undef ff_next 189 #endif 190 191 #include "keywords.h" 192 193 /* CLINE is a macro that ensures PL_copline has a sane value */ 194 195 #ifdef CLINE 196 #undef CLINE 197 #endif 198 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) 199 200 #ifdef PERL_MAD 201 # define SKIPSPACE0(s) skipspace0(s) 202 # define SKIPSPACE1(s) skipspace1(s) 203 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv) 204 # define PEEKSPACE(s) skipspace2(s,0) 205 #else 206 # define SKIPSPACE0(s) skipspace(s) 207 # define SKIPSPACE1(s) skipspace(s) 208 # define SKIPSPACE2(s,tsv) skipspace(s) 209 # define PEEKSPACE(s) skipspace(s) 210 #endif 211 212 /* 213 * Convenience functions to return different tokens and prime the 214 * lexer for the next token. They all take an argument. 215 * 216 * TOKEN : generic token (used for '(', DOLSHARP, etc) 217 * OPERATOR : generic operator 218 * AOPERATOR : assignment operator 219 * PREBLOCK : beginning the block after an if, while, foreach, ... 220 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref) 221 * PREREF : *EXPR where EXPR is not a simple identifier 222 * TERM : expression term 223 * LOOPX : loop exiting command (goto, last, dump, etc) 224 * FTST : file test operator 225 * FUN0 : zero-argument function 226 * FUN1 : not used, except for not, which isn't a UNIOP 227 * BOop : bitwise or or xor 228 * BAop : bitwise and 229 * SHop : shift operator 230 * PWop : power operator 231 * PMop : pattern-matching operator 232 * Aop : addition-level operator 233 * Mop : multiplication-level operator 234 * Eop : equality-testing operator 235 * Rop : relational operator <= != gt 236 * 237 * Also see LOP and lop() below. 238 */ 239 240 #ifdef DEBUGGING /* Serve -DT. */ 241 # define REPORT(retval) tokereport((I32)retval, &pl_yylval) 242 #else 243 # define REPORT(retval) (retval) 244 #endif 245 246 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval)) 247 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval)) 248 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval))) 249 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval)) 250 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval)) 251 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) 252 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) 253 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX)) 254 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) 255 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) 256 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1)) 257 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP))) 258 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP))) 259 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP))) 260 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP))) 261 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) 262 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP))) 263 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP))) 264 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP)) 265 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP)) 266 267 /* This bit of chicanery makes a unary function followed by 268 * a parenthesis into a function with one argument, highest precedence. 269 * The UNIDOR macro is for unary functions that can be followed by the // 270 * operator (such as C<shift // 0>). 271 */ 272 #define UNI2(f,x) { \ 273 pl_yylval.ival = f; \ 274 PL_expect = x; \ 275 PL_bufptr = s; \ 276 PL_last_uni = PL_oldbufptr; \ 277 PL_last_lop_op = f; \ 278 if (*s == '(') \ 279 return REPORT( (int)FUNC1 ); \ 280 s = PEEKSPACE(s); \ 281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ 282 } 283 #define UNI(f) UNI2(f,XTERM) 284 #define UNIDOR(f) UNI2(f,XTERMORDORDOR) 285 286 #define UNIBRACK(f) { \ 287 pl_yylval.ival = f; \ 288 PL_bufptr = s; \ 289 PL_last_uni = PL_oldbufptr; \ 290 if (*s == '(') \ 291 return REPORT( (int)FUNC1 ); \ 292 s = PEEKSPACE(s); \ 293 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \ 294 } 295 296 /* grandfather return to old style */ 297 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP) 298 299 #ifdef DEBUGGING 300 301 /* how to interpret the pl_yylval associated with the token */ 302 enum token_type { 303 TOKENTYPE_NONE, 304 TOKENTYPE_IVAL, 305 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */ 306 TOKENTYPE_PVAL, 307 TOKENTYPE_OPVAL, 308 TOKENTYPE_GVVAL 309 }; 310 311 static struct debug_tokens { 312 const int token; 313 enum token_type type; 314 const char *name; 315 } const debug_tokens[] = 316 { 317 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" }, 318 { ANDAND, TOKENTYPE_NONE, "ANDAND" }, 319 { ANDOP, TOKENTYPE_NONE, "ANDOP" }, 320 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" }, 321 { ARROW, TOKENTYPE_NONE, "ARROW" }, 322 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" }, 323 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" }, 324 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" }, 325 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" }, 326 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" }, 327 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" }, 328 { DO, TOKENTYPE_NONE, "DO" }, 329 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" }, 330 { DORDOR, TOKENTYPE_NONE, "DORDOR" }, 331 { DOROP, TOKENTYPE_OPNUM, "DOROP" }, 332 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" }, 333 { ELSE, TOKENTYPE_NONE, "ELSE" }, 334 { ELSIF, TOKENTYPE_IVAL, "ELSIF" }, 335 { EQOP, TOKENTYPE_OPNUM, "EQOP" }, 336 { FOR, TOKENTYPE_IVAL, "FOR" }, 337 { FORMAT, TOKENTYPE_NONE, "FORMAT" }, 338 { FUNC, TOKENTYPE_OPNUM, "FUNC" }, 339 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" }, 340 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" }, 341 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" }, 342 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" }, 343 { GIVEN, TOKENTYPE_IVAL, "GIVEN" }, 344 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" }, 345 { IF, TOKENTYPE_IVAL, "IF" }, 346 { LABEL, TOKENTYPE_PVAL, "LABEL" }, 347 { LOCAL, TOKENTYPE_IVAL, "LOCAL" }, 348 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" }, 349 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" }, 350 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" }, 351 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" }, 352 { METHOD, TOKENTYPE_OPVAL, "METHOD" }, 353 { MULOP, TOKENTYPE_OPNUM, "MULOP" }, 354 { MY, TOKENTYPE_IVAL, "MY" }, 355 { MYSUB, TOKENTYPE_NONE, "MYSUB" }, 356 { NOAMP, TOKENTYPE_NONE, "NOAMP" }, 357 { NOTOP, TOKENTYPE_NONE, "NOTOP" }, 358 { OROP, TOKENTYPE_IVAL, "OROP" }, 359 { OROR, TOKENTYPE_NONE, "OROR" }, 360 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" }, 361 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" }, 362 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" }, 363 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" }, 364 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" }, 365 { POSTINC, TOKENTYPE_NONE, "POSTINC" }, 366 { POWOP, TOKENTYPE_OPNUM, "POWOP" }, 367 { PREDEC, TOKENTYPE_NONE, "PREDEC" }, 368 { PREINC, TOKENTYPE_NONE, "PREINC" }, 369 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" }, 370 { REFGEN, TOKENTYPE_NONE, "REFGEN" }, 371 { RELOP, TOKENTYPE_OPNUM, "RELOP" }, 372 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" }, 373 { SUB, TOKENTYPE_NONE, "SUB" }, 374 { THING, TOKENTYPE_OPVAL, "THING" }, 375 { UMINUS, TOKENTYPE_NONE, "UMINUS" }, 376 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" }, 377 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" }, 378 { UNLESS, TOKENTYPE_IVAL, "UNLESS" }, 379 { UNTIL, TOKENTYPE_IVAL, "UNTIL" }, 380 { USE, TOKENTYPE_IVAL, "USE" }, 381 { WHEN, TOKENTYPE_IVAL, "WHEN" }, 382 { WHILE, TOKENTYPE_IVAL, "WHILE" }, 383 { WORD, TOKENTYPE_OPVAL, "WORD" }, 384 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" }, 385 { 0, TOKENTYPE_NONE, NULL } 386 }; 387 388 /* dump the returned token in rv, plus any optional arg in pl_yylval */ 389 390 STATIC int 391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) 392 { 393 dVAR; 394 395 PERL_ARGS_ASSERT_TOKEREPORT; 396 397 if (DEBUG_T_TEST) { 398 const char *name = NULL; 399 enum token_type type = TOKENTYPE_NONE; 400 const struct debug_tokens *p; 401 SV* const report = newSVpvs("<== "); 402 403 for (p = debug_tokens; p->token; p++) { 404 if (p->token == (int)rv) { 405 name = p->name; 406 type = p->type; 407 break; 408 } 409 } 410 if (name) 411 Perl_sv_catpv(aTHX_ report, name); 412 else if ((char)rv > ' ' && (char)rv < '~') 413 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); 414 else if (!rv) 415 sv_catpvs(report, "EOF"); 416 else 417 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv); 418 switch (type) { 419 case TOKENTYPE_NONE: 420 case TOKENTYPE_GVVAL: /* doesn't appear to be used */ 421 break; 422 case TOKENTYPE_IVAL: 423 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival); 424 break; 425 case TOKENTYPE_OPNUM: 426 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)", 427 PL_op_name[lvalp->ival]); 428 break; 429 case TOKENTYPE_PVAL: 430 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval); 431 break; 432 case TOKENTYPE_OPVAL: 433 if (lvalp->opval) { 434 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)", 435 PL_op_name[lvalp->opval->op_type]); 436 if (lvalp->opval->op_type == OP_CONST) { 437 Perl_sv_catpvf(aTHX_ report, " %s", 438 SvPEEK(cSVOPx_sv(lvalp->opval))); 439 } 440 441 } 442 else 443 sv_catpvs(report, "(opval=null)"); 444 break; 445 } 446 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report)); 447 }; 448 return (int)rv; 449 } 450 451 452 /* print the buffer with suitable escapes */ 453 454 STATIC void 455 S_printbuf(pTHX_ const char *const fmt, const char *const s) 456 { 457 SV* const tmp = newSVpvs(""); 458 459 PERL_ARGS_ASSERT_PRINTBUF; 460 461 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60)); 462 SvREFCNT_dec(tmp); 463 } 464 465 #endif 466 467 static int 468 S_deprecate_commaless_var_list(pTHX) { 469 PL_expect = XTERM; 470 deprecate("comma-less variable list"); 471 return REPORT(','); /* grandfather non-comma-format format */ 472 } 473 474 /* 475 * S_ao 476 * 477 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR 478 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN 479 */ 480 481 STATIC int 482 S_ao(pTHX_ int toketype) 483 { 484 dVAR; 485 if (*PL_bufptr == '=') { 486 PL_bufptr++; 487 if (toketype == ANDAND) 488 pl_yylval.ival = OP_ANDASSIGN; 489 else if (toketype == OROR) 490 pl_yylval.ival = OP_ORASSIGN; 491 else if (toketype == DORDOR) 492 pl_yylval.ival = OP_DORASSIGN; 493 toketype = ASSIGNOP; 494 } 495 return toketype; 496 } 497 498 /* 499 * S_no_op 500 * When Perl expects an operator and finds something else, no_op 501 * prints the warning. It always prints "<something> found where 502 * operator expected. It prints "Missing semicolon on previous line?" 503 * if the surprise occurs at the start of the line. "do you need to 504 * predeclare ..." is printed out for code like "sub bar; foo bar $x" 505 * where the compiler doesn't know if foo is a method call or a function. 506 * It prints "Missing operator before end of line" if there's nothing 507 * after the missing operator, or "... before <...>" if there is something 508 * after the missing operator. 509 */ 510 511 STATIC void 512 S_no_op(pTHX_ const char *const what, char *s) 513 { 514 dVAR; 515 char * const oldbp = PL_bufptr; 516 const bool is_first = (PL_oldbufptr == PL_linestart); 517 518 PERL_ARGS_ASSERT_NO_OP; 519 520 if (!s) 521 s = oldbp; 522 else 523 PL_bufptr = s; 524 yywarn(Perl_form(aTHX_ "%s found where operator expected", what)); 525 if (ckWARN_d(WARN_SYNTAX)) { 526 if (is_first) 527 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 528 "\t(Missing semicolon on previous line?)\n"); 529 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) { 530 const char *t; 531 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) 532 NOOP; 533 if (t < PL_bufptr && isSPACE(*t)) 534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 535 "\t(Do you need to predeclare %.*s?)\n", 536 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr); 537 } 538 else { 539 assert(s >= oldbp); 540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 541 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp); 542 } 543 } 544 PL_bufptr = oldbp; 545 } 546 547 /* 548 * S_missingterm 549 * Complain about missing quote/regexp/heredoc terminator. 550 * If it's called with NULL then it cauterizes the line buffer. 551 * If we're in a delimited string and the delimiter is a control 552 * character, it's reformatted into a two-char sequence like ^C. 553 * This is fatal. 554 */ 555 556 STATIC void 557 S_missingterm(pTHX_ char *s) 558 { 559 dVAR; 560 char tmpbuf[3]; 561 char q; 562 if (s) { 563 char * const nl = strrchr(s,'\n'); 564 if (nl) 565 *nl = '\0'; 566 } 567 else if (isCNTRL(PL_multi_close)) { 568 *tmpbuf = '^'; 569 tmpbuf[1] = (char)toCTRL(PL_multi_close); 570 tmpbuf[2] = '\0'; 571 s = tmpbuf; 572 } 573 else { 574 *tmpbuf = (char)PL_multi_close; 575 tmpbuf[1] = '\0'; 576 s = tmpbuf; 577 } 578 q = strchr(s,'"') ? '\'' : '"'; 579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q); 580 } 581 582 #define FEATURE_IS_ENABLED(name) \ 583 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \ 584 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name))) 585 /* The longest string we pass in. */ 586 #define MAX_FEATURE_LEN (sizeof("unicode_strings")-1) 587 588 /* 589 * S_feature_is_enabled 590 * Check whether the named feature is enabled. 591 */ 592 STATIC bool 593 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen) 594 { 595 dVAR; 596 HV * const hinthv = GvHV(PL_hintgv); 597 char he_name[8 + MAX_FEATURE_LEN] = "feature_"; 598 599 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED; 600 601 assert(namelen <= MAX_FEATURE_LEN); 602 memcpy(&he_name[8], name, namelen); 603 604 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen)); 605 } 606 607 /* 608 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and 609 * utf16-to-utf8-reversed. 610 */ 611 612 #ifdef PERL_CR_FILTER 613 static void 614 strip_return(SV *sv) 615 { 616 register const char *s = SvPVX_const(sv); 617 register const char * const e = s + SvCUR(sv); 618 619 PERL_ARGS_ASSERT_STRIP_RETURN; 620 621 /* outer loop optimized to do nothing if there are no CR-LFs */ 622 while (s < e) { 623 if (*s++ == '\r' && *s == '\n') { 624 /* hit a CR-LF, need to copy the rest */ 625 register char *d = s - 1; 626 *d++ = *s++; 627 while (s < e) { 628 if (*s == '\r' && s[1] == '\n') 629 s++; 630 *d++ = *s++; 631 } 632 SvCUR(sv) -= s - d; 633 return; 634 } 635 } 636 } 637 638 STATIC I32 639 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) 640 { 641 const I32 count = FILTER_READ(idx+1, sv, maxlen); 642 if (count > 0 && !maxlen) 643 strip_return(sv); 644 return count; 645 } 646 #endif 647 648 649 650 /* 651 * Perl_lex_start 652 * 653 * Create a parser object and initialise its parser and lexer fields 654 * 655 * rsfp is the opened file handle to read from (if any), 656 * 657 * line holds any initial content already read from the file (or in 658 * the case of no file, such as an eval, the whole contents); 659 * 660 * new_filter indicates that this is a new file and it shouldn't inherit 661 * the filters from the current parser (ie require). 662 */ 663 664 void 665 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter) 666 { 667 dVAR; 668 const char *s = NULL; 669 STRLEN len; 670 yy_parser *parser, *oparser; 671 672 /* create and initialise a parser */ 673 674 Newxz(parser, 1, yy_parser); 675 parser->old_parser = oparser = PL_parser; 676 PL_parser = parser; 677 678 Newx(parser->stack, YYINITDEPTH, yy_stack_frame); 679 parser->ps = parser->stack; 680 parser->stack_size = YYINITDEPTH; 681 682 parser->stack->state = 0; 683 parser->yyerrstatus = 0; 684 parser->yychar = YYEMPTY; /* Cause a token to be read. */ 685 686 /* on scope exit, free this parser and restore any outer one */ 687 SAVEPARSER(parser); 688 parser->saved_curcop = PL_curcop; 689 690 /* initialise lexer state */ 691 692 #ifdef PERL_MAD 693 parser->curforce = -1; 694 #else 695 parser->nexttoke = 0; 696 #endif 697 parser->error_count = oparser ? oparser->error_count : 0; 698 parser->copline = NOLINE; 699 parser->lex_state = LEX_NORMAL; 700 parser->expect = XSTATE; 701 parser->rsfp = rsfp; 702 parser->rsfp_filters = (new_filter || !oparser) ? newAV() 703 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters)); 704 705 Newx(parser->lex_brackstack, 120, char); 706 Newx(parser->lex_casestack, 12, char); 707 *parser->lex_casestack = '\0'; 708 709 if (line) { 710 s = SvPV_const(line, len); 711 } else { 712 len = 0; 713 } 714 715 if (!len) { 716 parser->linestr = newSVpvs("\n;"); 717 } else if (SvREADONLY(line) || s[len-1] != ';') { 718 parser->linestr = newSVsv(line); 719 if (s[len-1] != ';') 720 sv_catpvs(parser->linestr, "\n;"); 721 } else { 722 SvTEMP_off(line); 723 SvREFCNT_inc_simple_void_NN(line); 724 parser->linestr = line; 725 } 726 parser->oldoldbufptr = 727 parser->oldbufptr = 728 parser->bufptr = 729 parser->linestart = SvPVX(parser->linestr); 730 parser->bufend = parser->bufptr + SvCUR(parser->linestr); 731 parser->last_lop = parser->last_uni = NULL; 732 } 733 734 735 /* delete a parser object */ 736 737 void 738 Perl_parser_free(pTHX_ const yy_parser *parser) 739 { 740 PERL_ARGS_ASSERT_PARSER_FREE; 741 742 PL_curcop = parser->saved_curcop; 743 SvREFCNT_dec(parser->linestr); 744 745 if (parser->rsfp == PerlIO_stdin()) 746 PerlIO_clearerr(parser->rsfp); 747 else if (parser->rsfp && (!parser->old_parser || 748 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp))) 749 PerlIO_close(parser->rsfp); 750 SvREFCNT_dec(parser->rsfp_filters); 751 752 Safefree(parser->stack); 753 Safefree(parser->lex_brackstack); 754 Safefree(parser->lex_casestack); 755 PL_parser = parser->old_parser; 756 Safefree(parser); 757 } 758 759 760 /* 761 * Perl_lex_end 762 * Finalizer for lexing operations. Must be called when the parser is 763 * done with the lexer. 764 */ 765 766 void 767 Perl_lex_end(pTHX) 768 { 769 dVAR; 770 PL_doextract = FALSE; 771 } 772 773 /* 774 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr 775 776 Buffer scalar containing the chunk currently under consideration of the 777 text currently being lexed. This is always a plain string scalar (for 778 which C<SvPOK> is true). It is not intended to be used as a scalar by 779 normal scalar means; instead refer to the buffer directly by the pointer 780 variables described below. 781 782 The lexer maintains various C<char*> pointers to things in the 783 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever 784 reallocated, all of these pointers must be updated. Don't attempt to 785 do this manually, but rather use L</lex_grow_linestr> if you need to 786 reallocate the buffer. 787 788 The content of the text chunk in the buffer is commonly exactly one 789 complete line of input, up to and including a newline terminator, 790 but there are situations where it is otherwise. The octets of the 791 buffer may be intended to be interpreted as either UTF-8 or Latin-1. 792 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8> 793 flag on this scalar, which may disagree with it. 794 795 For direct examination of the buffer, the variable 796 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current 797 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use 798 of these pointers is usually preferable to examination of the scalar 799 through normal scalar means. 800 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufend 802 803 Direct pointer to the end of the chunk of text currently being lexed, the 804 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr) 805 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is 806 always located at the end of the buffer, and does not count as part of 807 the buffer's contents. 808 809 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr 810 811 Points to the current position of lexing inside the lexer buffer. 812 Characters around this point may be freely examined, within 813 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and 814 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be 815 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>. 816 817 Lexing code (whether in the Perl core or not) moves this pointer past 818 the characters that it consumes. It is also expected to perform some 819 bookkeeping whenever a newline character is consumed. This movement 820 can be more conveniently performed by the function L</lex_read_to>, 821 which handles newlines appropriately. 822 823 Interpretation of the buffer's octets can be abstracted out by 824 using the slightly higher-level functions L</lex_peek_unichar> and 825 L</lex_read_unichar>. 826 827 =for apidoc AmxU|char *|PL_parser-E<gt>linestart 828 829 Points to the start of the current line inside the lexer buffer. 830 This is useful for indicating at which column an error occurred, and 831 not much else. This must be updated by any lexing code that consumes 832 a newline; the function L</lex_read_to> handles this detail. 833 834 =cut 835 */ 836 837 /* 838 =for apidoc Amx|bool|lex_bufutf8 839 840 Indicates whether the octets in the lexer buffer 841 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding 842 of Unicode characters. If not, they should be interpreted as Latin-1 843 characters. This is analogous to the C<SvUTF8> flag for scalars. 844 845 In UTF-8 mode, it is not guaranteed that the lexer buffer actually 846 contains valid UTF-8. Lexing code must be robust in the face of invalid 847 encoding. 848 849 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar 850 is significant, but not the whole story regarding the input character 851 encoding. Normally, when a file is being read, the scalar contains octets 852 and its C<SvUTF8> flag is off, but the octets should be interpreted as 853 UTF-8 if the C<use utf8> pragma is in effect. During a string eval, 854 however, the scalar may have the C<SvUTF8> flag on, and in this case its 855 octets should be interpreted as UTF-8 unless the C<use bytes> pragma 856 is in effect. This logic may change in the future; use this function 857 instead of implementing the logic yourself. 858 859 =cut 860 */ 861 862 bool 863 Perl_lex_bufutf8(pTHX) 864 { 865 return UTF; 866 } 867 868 /* 869 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len 870 871 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate 872 at least I<len> octets (including terminating NUL). Returns a 873 pointer to the reallocated buffer. This is necessary before making 874 any direct modification of the buffer that would increase its length. 875 L</lex_stuff_pvn> provides a more convenient way to insert text into 876 the buffer. 877 878 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>; 879 this function updates all of the lexer's variables that point directly 880 into the buffer. 881 882 =cut 883 */ 884 885 char * 886 Perl_lex_grow_linestr(pTHX_ STRLEN len) 887 { 888 SV *linestr; 889 char *buf; 890 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 891 STRLEN linestart_pos, last_uni_pos, last_lop_pos; 892 linestr = PL_parser->linestr; 893 buf = SvPVX(linestr); 894 if (len <= SvLEN(linestr)) 895 return buf; 896 bufend_pos = PL_parser->bufend - buf; 897 bufptr_pos = PL_parser->bufptr - buf; 898 oldbufptr_pos = PL_parser->oldbufptr - buf; 899 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 900 linestart_pos = PL_parser->linestart - buf; 901 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 902 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 903 buf = sv_grow(linestr, len); 904 PL_parser->bufend = buf + bufend_pos; 905 PL_parser->bufptr = buf + bufptr_pos; 906 PL_parser->oldbufptr = buf + oldbufptr_pos; 907 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 908 PL_parser->linestart = buf + linestart_pos; 909 if (PL_parser->last_uni) 910 PL_parser->last_uni = buf + last_uni_pos; 911 if (PL_parser->last_lop) 912 PL_parser->last_lop = buf + last_lop_pos; 913 return buf; 914 } 915 916 /* 917 =for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags 918 919 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 920 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 921 reallocating the buffer if necessary. This means that lexing code that 922 runs later will see the characters as if they had appeared in the input. 923 It is not recommended to do this as part of normal parsing, and most 924 uses of this facility run the risk of the inserted characters being 925 interpreted in an unintended manner. 926 927 The string to be inserted is represented by I<len> octets starting 928 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1, 929 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>. 930 The characters are recoded for the lexer buffer, according to how the 931 buffer is currently being interpreted (L</lex_bufutf8>). If a string 932 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv> 933 function is more convenient. 934 935 =cut 936 */ 937 938 void 939 Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags) 940 { 941 dVAR; 942 char *bufptr; 943 PERL_ARGS_ASSERT_LEX_STUFF_PVN; 944 if (flags & ~(LEX_STUFF_UTF8)) 945 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn"); 946 if (UTF) { 947 if (flags & LEX_STUFF_UTF8) { 948 goto plain_copy; 949 } else { 950 STRLEN highhalf = 0; 951 char *p, *e = pv+len; 952 for (p = pv; p != e; p++) 953 highhalf += !!(((U8)*p) & 0x80); 954 if (!highhalf) 955 goto plain_copy; 956 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf); 957 bufptr = PL_parser->bufptr; 958 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char); 959 SvCUR_set(PL_parser->linestr, 960 SvCUR(PL_parser->linestr) + len+highhalf); 961 PL_parser->bufend += len+highhalf; 962 for (p = pv; p != e; p++) { 963 U8 c = (U8)*p; 964 if (c & 0x80) { 965 *bufptr++ = (char)(0xc0 | (c >> 6)); 966 *bufptr++ = (char)(0x80 | (c & 0x3f)); 967 } else { 968 *bufptr++ = (char)c; 969 } 970 } 971 } 972 } else { 973 if (flags & LEX_STUFF_UTF8) { 974 STRLEN highhalf = 0; 975 char *p, *e = pv+len; 976 for (p = pv; p != e; p++) { 977 U8 c = (U8)*p; 978 if (c >= 0xc4) { 979 Perl_croak(aTHX_ "Lexing code attempted to stuff " 980 "non-Latin-1 character into Latin-1 input"); 981 } else if (c >= 0xc2 && p+1 != e && 982 (((U8)p[1]) & 0xc0) == 0x80) { 983 p++; 984 highhalf++; 985 } else if (c >= 0x80) { 986 /* malformed UTF-8 */ 987 ENTER; 988 SAVESPTR(PL_warnhook); 989 PL_warnhook = PERL_WARNHOOK_FATAL; 990 utf8n_to_uvuni((U8*)p, e-p, NULL, 0); 991 LEAVE; 992 } 993 } 994 if (!highhalf) 995 goto plain_copy; 996 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf); 997 bufptr = PL_parser->bufptr; 998 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char); 999 SvCUR_set(PL_parser->linestr, 1000 SvCUR(PL_parser->linestr) + len-highhalf); 1001 PL_parser->bufend += len-highhalf; 1002 for (p = pv; p != e; p++) { 1003 U8 c = (U8)*p; 1004 if (c & 0x80) { 1005 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f)); 1006 p++; 1007 } else { 1008 *bufptr++ = (char)c; 1009 } 1010 } 1011 } else { 1012 plain_copy: 1013 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len); 1014 bufptr = PL_parser->bufptr; 1015 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char); 1016 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len); 1017 PL_parser->bufend += len; 1018 Copy(pv, bufptr, len, char); 1019 } 1020 } 1021 } 1022 1023 /* 1024 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags 1025 1026 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1027 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1028 reallocating the buffer if necessary. This means that lexing code that 1029 runs later will see the characters as if they had appeared in the input. 1030 It is not recommended to do this as part of normal parsing, and most 1031 uses of this facility run the risk of the inserted characters being 1032 interpreted in an unintended manner. 1033 1034 The string to be inserted is the string value of I<sv>. The characters 1035 are recoded for the lexer buffer, according to how the buffer is currently 1036 being interpreted (L</lex_bufutf8>). If a string to be interpreted is 1037 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the 1038 need to construct a scalar. 1039 1040 =cut 1041 */ 1042 1043 void 1044 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags) 1045 { 1046 char *pv; 1047 STRLEN len; 1048 PERL_ARGS_ASSERT_LEX_STUFF_SV; 1049 if (flags) 1050 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv"); 1051 pv = SvPV(sv, len); 1052 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0)); 1053 } 1054 1055 /* 1056 =for apidoc Amx|void|lex_unstuff|char *ptr 1057 1058 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to 1059 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened. 1060 This hides the discarded text from any lexing code that runs later, 1061 as if the text had never appeared. 1062 1063 This is not the normal way to consume lexed text. For that, use 1064 L</lex_read_to>. 1065 1066 =cut 1067 */ 1068 1069 void 1070 Perl_lex_unstuff(pTHX_ char *ptr) 1071 { 1072 char *buf, *bufend; 1073 STRLEN unstuff_len; 1074 PERL_ARGS_ASSERT_LEX_UNSTUFF; 1075 buf = PL_parser->bufptr; 1076 if (ptr < buf) 1077 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1078 if (ptr == buf) 1079 return; 1080 bufend = PL_parser->bufend; 1081 if (ptr > bufend) 1082 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1083 unstuff_len = ptr - buf; 1084 Move(ptr, buf, bufend+1-ptr, char); 1085 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len); 1086 PL_parser->bufend = bufend - unstuff_len; 1087 } 1088 1089 /* 1090 =for apidoc Amx|void|lex_read_to|char *ptr 1091 1092 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up 1093 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>, 1094 performing the correct bookkeeping whenever a newline character is passed. 1095 This is the normal way to consume lexed text. 1096 1097 Interpretation of the buffer's octets can be abstracted out by 1098 using the slightly higher-level functions L</lex_peek_unichar> and 1099 L</lex_read_unichar>. 1100 1101 =cut 1102 */ 1103 1104 void 1105 Perl_lex_read_to(pTHX_ char *ptr) 1106 { 1107 char *s; 1108 PERL_ARGS_ASSERT_LEX_READ_TO; 1109 s = PL_parser->bufptr; 1110 if (ptr < s || ptr > PL_parser->bufend) 1111 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to"); 1112 for (; s != ptr; s++) 1113 if (*s == '\n') { 1114 CopLINE_inc(PL_curcop); 1115 PL_parser->linestart = s+1; 1116 } 1117 PL_parser->bufptr = ptr; 1118 } 1119 1120 /* 1121 =for apidoc Amx|void|lex_discard_to|char *ptr 1122 1123 Discards the first part of the L</PL_parser-E<gt>linestr> buffer, 1124 up to I<ptr>. The remaining content of the buffer will be moved, and 1125 all pointers into the buffer updated appropriately. I<ptr> must not 1126 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>: 1127 it is not permitted to discard text that has yet to be lexed. 1128 1129 Normally it is not necessarily to do this directly, because it suffices to 1130 use the implicit discarding behaviour of L</lex_next_chunk> and things 1131 based on it. However, if a token stretches across multiple lines, 1132 and the lexing code has kept multiple lines of text in the buffer fof 1133 that purpose, then after completion of the token it would be wise to 1134 explicitly discard the now-unneeded earlier lines, to avoid future 1135 multi-line tokens growing the buffer without bound. 1136 1137 =cut 1138 */ 1139 1140 void 1141 Perl_lex_discard_to(pTHX_ char *ptr) 1142 { 1143 char *buf; 1144 STRLEN discard_len; 1145 PERL_ARGS_ASSERT_LEX_DISCARD_TO; 1146 buf = SvPVX(PL_parser->linestr); 1147 if (ptr < buf) 1148 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1149 if (ptr == buf) 1150 return; 1151 if (ptr > PL_parser->bufptr) 1152 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1153 discard_len = ptr - buf; 1154 if (PL_parser->oldbufptr < ptr) 1155 PL_parser->oldbufptr = ptr; 1156 if (PL_parser->oldoldbufptr < ptr) 1157 PL_parser->oldoldbufptr = ptr; 1158 if (PL_parser->last_uni && PL_parser->last_uni < ptr) 1159 PL_parser->last_uni = NULL; 1160 if (PL_parser->last_lop && PL_parser->last_lop < ptr) 1161 PL_parser->last_lop = NULL; 1162 Move(ptr, buf, PL_parser->bufend+1-ptr, char); 1163 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len); 1164 PL_parser->bufend -= discard_len; 1165 PL_parser->bufptr -= discard_len; 1166 PL_parser->oldbufptr -= discard_len; 1167 PL_parser->oldoldbufptr -= discard_len; 1168 if (PL_parser->last_uni) 1169 PL_parser->last_uni -= discard_len; 1170 if (PL_parser->last_lop) 1171 PL_parser->last_lop -= discard_len; 1172 } 1173 1174 /* 1175 =for apidoc Amx|bool|lex_next_chunk|U32 flags 1176 1177 Reads in the next chunk of text to be lexed, appending it to 1178 L</PL_parser-E<gt>linestr>. This should be called when lexing code has 1179 looked to the end of the current chunk and wants to know more. It is 1180 usual, but not necessary, for lexing to have consumed the entirety of 1181 the current chunk at this time. 1182 1183 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current 1184 chunk (i.e., the current chunk has been entirely consumed), normally the 1185 current chunk will be discarded at the same time that the new chunk is 1186 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk 1187 will not be discarded. If the current chunk has not been entirely 1188 consumed, then it will not be discarded regardless of the flag. 1189 1190 Returns true if some new text was added to the buffer, or false if the 1191 buffer has reached the end of the input text. 1192 1193 =cut 1194 */ 1195 1196 #define LEX_FAKE_EOF 0x80000000 1197 1198 bool 1199 Perl_lex_next_chunk(pTHX_ U32 flags) 1200 { 1201 SV *linestr; 1202 char *buf; 1203 STRLEN old_bufend_pos, new_bufend_pos; 1204 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 1205 STRLEN linestart_pos, last_uni_pos, last_lop_pos; 1206 bool got_some_for_debugger = 0; 1207 bool got_some; 1208 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF)) 1209 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); 1210 linestr = PL_parser->linestr; 1211 buf = SvPVX(linestr); 1212 if (!(flags & LEX_KEEP_PREVIOUS) && 1213 PL_parser->bufptr == PL_parser->bufend) { 1214 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0; 1215 linestart_pos = 0; 1216 if (PL_parser->last_uni != PL_parser->bufend) 1217 PL_parser->last_uni = NULL; 1218 if (PL_parser->last_lop != PL_parser->bufend) 1219 PL_parser->last_lop = NULL; 1220 last_uni_pos = last_lop_pos = 0; 1221 *buf = 0; 1222 SvCUR(linestr) = 0; 1223 } else { 1224 old_bufend_pos = PL_parser->bufend - buf; 1225 bufptr_pos = PL_parser->bufptr - buf; 1226 oldbufptr_pos = PL_parser->oldbufptr - buf; 1227 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 1228 linestart_pos = PL_parser->linestart - buf; 1229 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 1230 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 1231 } 1232 if (flags & LEX_FAKE_EOF) { 1233 goto eof; 1234 } else if (!PL_parser->rsfp) { 1235 got_some = 0; 1236 } else if (filter_gets(linestr, old_bufend_pos)) { 1237 got_some = 1; 1238 got_some_for_debugger = 1; 1239 } else { 1240 if (!SvPOK(linestr)) /* can get undefined by filter_gets */ 1241 sv_setpvs(linestr, ""); 1242 eof: 1243 /* End of real input. Close filehandle (unless it was STDIN), 1244 * then add implicit termination. 1245 */ 1246 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin()) 1247 PerlIO_clearerr(PL_parser->rsfp); 1248 else if (PL_parser->rsfp) 1249 (void)PerlIO_close(PL_parser->rsfp); 1250 PL_parser->rsfp = NULL; 1251 PL_doextract = FALSE; 1252 #ifdef PERL_MAD 1253 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n)) 1254 PL_faketokens = 1; 1255 #endif 1256 if (!PL_in_eval && PL_minus_p) { 1257 sv_catpvs(linestr, 1258 /*{*/";}continue{print or die qq(-p destination: $!\\n);}"); 1259 PL_minus_n = PL_minus_p = 0; 1260 } else if (!PL_in_eval && PL_minus_n) { 1261 sv_catpvs(linestr, /*{*/";}"); 1262 PL_minus_n = 0; 1263 } else 1264 sv_catpvs(linestr, ";"); 1265 got_some = 1; 1266 } 1267 buf = SvPVX(linestr); 1268 new_bufend_pos = SvCUR(linestr); 1269 PL_parser->bufend = buf + new_bufend_pos; 1270 PL_parser->bufptr = buf + bufptr_pos; 1271 PL_parser->oldbufptr = buf + oldbufptr_pos; 1272 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 1273 PL_parser->linestart = buf + linestart_pos; 1274 if (PL_parser->last_uni) 1275 PL_parser->last_uni = buf + last_uni_pos; 1276 if (PL_parser->last_lop) 1277 PL_parser->last_lop = buf + last_lop_pos; 1278 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) && 1279 PL_curstash != PL_debstash) { 1280 /* debugger active and we're not compiling the debugger code, 1281 * so store the line into the debugger's array of lines 1282 */ 1283 update_debugger_info(NULL, buf+old_bufend_pos, 1284 new_bufend_pos-old_bufend_pos); 1285 } 1286 return got_some; 1287 } 1288 1289 /* 1290 =for apidoc Amx|I32|lex_peek_unichar|U32 flags 1291 1292 Looks ahead one (Unicode) character in the text currently being lexed. 1293 Returns the codepoint (unsigned integer value) of the next character, 1294 or -1 if lexing has reached the end of the input text. To consume the 1295 peeked character, use L</lex_read_unichar>. 1296 1297 If the next character is in (or extends into) the next chunk of input 1298 text, the next chunk will be read in. Normally the current chunk will be 1299 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> 1300 then the current chunk will not be discarded. 1301 1302 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1303 is encountered, an exception is generated. 1304 1305 =cut 1306 */ 1307 1308 I32 1309 Perl_lex_peek_unichar(pTHX_ U32 flags) 1310 { 1311 dVAR; 1312 char *s, *bufend; 1313 if (flags & ~(LEX_KEEP_PREVIOUS)) 1314 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar"); 1315 s = PL_parser->bufptr; 1316 bufend = PL_parser->bufend; 1317 if (UTF) { 1318 U8 head; 1319 I32 unichar; 1320 STRLEN len, retlen; 1321 if (s == bufend) { 1322 if (!lex_next_chunk(flags)) 1323 return -1; 1324 s = PL_parser->bufptr; 1325 bufend = PL_parser->bufend; 1326 } 1327 head = (U8)*s; 1328 if (!(head & 0x80)) 1329 return head; 1330 if (head & 0x40) { 1331 len = PL_utf8skip[head]; 1332 while ((STRLEN)(bufend-s) < len) { 1333 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS)) 1334 break; 1335 s = PL_parser->bufptr; 1336 bufend = PL_parser->bufend; 1337 } 1338 } 1339 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); 1340 if (retlen == (STRLEN)-1) { 1341 /* malformed UTF-8 */ 1342 ENTER; 1343 SAVESPTR(PL_warnhook); 1344 PL_warnhook = PERL_WARNHOOK_FATAL; 1345 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0); 1346 LEAVE; 1347 } 1348 return unichar; 1349 } else { 1350 if (s == bufend) { 1351 if (!lex_next_chunk(flags)) 1352 return -1; 1353 s = PL_parser->bufptr; 1354 } 1355 return (U8)*s; 1356 } 1357 } 1358 1359 /* 1360 =for apidoc Amx|I32|lex_read_unichar|U32 flags 1361 1362 Reads the next (Unicode) character in the text currently being lexed. 1363 Returns the codepoint (unsigned integer value) of the character read, 1364 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1 1365 if lexing has reached the end of the input text. To non-destructively 1366 examine the next character, use L</lex_peek_unichar> instead. 1367 1368 If the next character is in (or extends into) the next chunk of input 1369 text, the next chunk will be read in. Normally the current chunk will be 1370 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> 1371 then the current chunk will not be discarded. 1372 1373 If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1374 is encountered, an exception is generated. 1375 1376 =cut 1377 */ 1378 1379 I32 1380 Perl_lex_read_unichar(pTHX_ U32 flags) 1381 { 1382 I32 c; 1383 if (flags & ~(LEX_KEEP_PREVIOUS)) 1384 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar"); 1385 c = lex_peek_unichar(flags); 1386 if (c != -1) { 1387 if (c == '\n') 1388 CopLINE_inc(PL_curcop); 1389 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr); 1390 } 1391 return c; 1392 } 1393 1394 /* 1395 =for apidoc Amx|void|lex_read_space|U32 flags 1396 1397 Reads optional spaces, in Perl style, in the text currently being 1398 lexed. The spaces may include ordinary whitespace characters and 1399 Perl-style comments. C<#line> directives are processed if encountered. 1400 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points 1401 at a non-space character (or the end of the input text). 1402 1403 If spaces extend into the next chunk of input text, the next chunk will 1404 be read in. Normally the current chunk will be discarded at the same 1405 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current 1406 chunk will not be discarded. 1407 1408 =cut 1409 */ 1410 1411 #define LEX_NO_NEXT_CHUNK 0x80000000 1412 1413 void 1414 Perl_lex_read_space(pTHX_ U32 flags) 1415 { 1416 char *s, *bufend; 1417 bool need_incline = 0; 1418 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK)) 1419 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); 1420 #ifdef PERL_MAD 1421 if (PL_skipwhite) { 1422 sv_free(PL_skipwhite); 1423 PL_skipwhite = NULL; 1424 } 1425 if (PL_madskills) 1426 PL_skipwhite = newSVpvs(""); 1427 #endif /* PERL_MAD */ 1428 s = PL_parser->bufptr; 1429 bufend = PL_parser->bufend; 1430 while (1) { 1431 char c = *s; 1432 if (c == '#') { 1433 do { 1434 c = *++s; 1435 } while (!(c == '\n' || (c == 0 && s == bufend))); 1436 } else if (c == '\n') { 1437 s++; 1438 PL_parser->linestart = s; 1439 if (s == bufend) 1440 need_incline = 1; 1441 else 1442 incline(s); 1443 } else if (isSPACE(c)) { 1444 s++; 1445 } else if (c == 0 && s == bufend) { 1446 bool got_more; 1447 #ifdef PERL_MAD 1448 if (PL_madskills) 1449 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr); 1450 #endif /* PERL_MAD */ 1451 if (flags & LEX_NO_NEXT_CHUNK) 1452 break; 1453 PL_parser->bufptr = s; 1454 CopLINE_inc(PL_curcop); 1455 got_more = lex_next_chunk(flags); 1456 CopLINE_dec(PL_curcop); 1457 s = PL_parser->bufptr; 1458 bufend = PL_parser->bufend; 1459 if (!got_more) 1460 break; 1461 if (need_incline && PL_parser->rsfp) { 1462 incline(s); 1463 need_incline = 0; 1464 } 1465 } else { 1466 break; 1467 } 1468 } 1469 #ifdef PERL_MAD 1470 if (PL_madskills) 1471 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr); 1472 #endif /* PERL_MAD */ 1473 PL_parser->bufptr = s; 1474 } 1475 1476 /* 1477 * S_incline 1478 * This subroutine has nothing to do with tilting, whether at windmills 1479 * or pinball tables. Its name is short for "increment line". It 1480 * increments the current line number in CopLINE(PL_curcop) and checks 1481 * to see whether the line starts with a comment of the form 1482 * # line 500 "foo.pm" 1483 * If so, it sets the current line number and file to the values in the comment. 1484 */ 1485 1486 STATIC void 1487 S_incline(pTHX_ const char *s) 1488 { 1489 dVAR; 1490 const char *t; 1491 const char *n; 1492 const char *e; 1493 1494 PERL_ARGS_ASSERT_INCLINE; 1495 1496 CopLINE_inc(PL_curcop); 1497 if (*s++ != '#') 1498 return; 1499 while (SPACE_OR_TAB(*s)) 1500 s++; 1501 if (strnEQ(s, "line", 4)) 1502 s += 4; 1503 else 1504 return; 1505 if (SPACE_OR_TAB(*s)) 1506 s++; 1507 else 1508 return; 1509 while (SPACE_OR_TAB(*s)) 1510 s++; 1511 if (!isDIGIT(*s)) 1512 return; 1513 1514 n = s; 1515 while (isDIGIT(*s)) 1516 s++; 1517 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0') 1518 return; 1519 while (SPACE_OR_TAB(*s)) 1520 s++; 1521 if (*s == '"' && (t = strchr(s+1, '"'))) { 1522 s++; 1523 e = t + 1; 1524 } 1525 else { 1526 t = s; 1527 while (!isSPACE(*t)) 1528 t++; 1529 e = t; 1530 } 1531 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f') 1532 e++; 1533 if (*e != '\n' && *e != '\0') 1534 return; /* false alarm */ 1535 1536 if (t - s > 0) { 1537 const STRLEN len = t - s; 1538 #ifndef USE_ITHREADS 1539 SV *const temp_sv = CopFILESV(PL_curcop); 1540 const char *cf; 1541 STRLEN tmplen; 1542 1543 if (temp_sv) { 1544 cf = SvPVX(temp_sv); 1545 tmplen = SvCUR(temp_sv); 1546 } else { 1547 cf = NULL; 1548 tmplen = 0; 1549 } 1550 1551 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) { 1552 /* must copy *{"::_<(eval N)[oldfilename:L]"} 1553 * to *{"::_<newfilename"} */ 1554 /* However, the long form of evals is only turned on by the 1555 debugger - usually they're "(eval %lu)" */ 1556 char smallbuf[128]; 1557 char *tmpbuf; 1558 GV **gvp; 1559 STRLEN tmplen2 = len; 1560 if (tmplen + 2 <= sizeof smallbuf) 1561 tmpbuf = smallbuf; 1562 else 1563 Newx(tmpbuf, tmplen + 2, char); 1564 tmpbuf[0] = '_'; 1565 tmpbuf[1] = '<'; 1566 memcpy(tmpbuf + 2, cf, tmplen); 1567 tmplen += 2; 1568 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE); 1569 if (gvp) { 1570 char *tmpbuf2; 1571 GV *gv2; 1572 1573 if (tmplen2 + 2 <= sizeof smallbuf) 1574 tmpbuf2 = smallbuf; 1575 else 1576 Newx(tmpbuf2, tmplen2 + 2, char); 1577 1578 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) { 1579 /* Either they malloc'd it, or we malloc'd it, 1580 so no prefix is present in ours. */ 1581 tmpbuf2[0] = '_'; 1582 tmpbuf2[1] = '<'; 1583 } 1584 1585 memcpy(tmpbuf2 + 2, s, tmplen2); 1586 tmplen2 += 2; 1587 1588 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE); 1589 if (!isGV(gv2)) { 1590 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE); 1591 /* adjust ${"::_<newfilename"} to store the new file name */ 1592 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2); 1593 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp))); 1594 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp))); 1595 } 1596 1597 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2); 1598 } 1599 if (tmpbuf != smallbuf) Safefree(tmpbuf); 1600 } 1601 #endif 1602 CopFILE_free(PL_curcop); 1603 CopFILE_setn(PL_curcop, s, len); 1604 } 1605 CopLINE_set(PL_curcop, atoi(n)-1); 1606 } 1607 1608 #ifdef PERL_MAD 1609 /* skip space before PL_thistoken */ 1610 1611 STATIC char * 1612 S_skipspace0(pTHX_ register char *s) 1613 { 1614 PERL_ARGS_ASSERT_SKIPSPACE0; 1615 1616 s = skipspace(s); 1617 if (!PL_madskills) 1618 return s; 1619 if (PL_skipwhite) { 1620 if (!PL_thiswhite) 1621 PL_thiswhite = newSVpvs(""); 1622 sv_catsv(PL_thiswhite, PL_skipwhite); 1623 sv_free(PL_skipwhite); 1624 PL_skipwhite = 0; 1625 } 1626 PL_realtokenstart = s - SvPVX(PL_linestr); 1627 return s; 1628 } 1629 1630 /* skip space after PL_thistoken */ 1631 1632 STATIC char * 1633 S_skipspace1(pTHX_ register char *s) 1634 { 1635 const char *start = s; 1636 I32 startoff = start - SvPVX(PL_linestr); 1637 1638 PERL_ARGS_ASSERT_SKIPSPACE1; 1639 1640 s = skipspace(s); 1641 if (!PL_madskills) 1642 return s; 1643 start = SvPVX(PL_linestr) + startoff; 1644 if (!PL_thistoken && PL_realtokenstart >= 0) { 1645 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart; 1646 PL_thistoken = newSVpvn(tstart, start - tstart); 1647 } 1648 PL_realtokenstart = -1; 1649 if (PL_skipwhite) { 1650 if (!PL_nextwhite) 1651 PL_nextwhite = newSVpvs(""); 1652 sv_catsv(PL_nextwhite, PL_skipwhite); 1653 sv_free(PL_skipwhite); 1654 PL_skipwhite = 0; 1655 } 1656 return s; 1657 } 1658 1659 STATIC char * 1660 S_skipspace2(pTHX_ register char *s, SV **svp) 1661 { 1662 char *start; 1663 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr); 1664 const I32 startoff = s - SvPVX(PL_linestr); 1665 1666 PERL_ARGS_ASSERT_SKIPSPACE2; 1667 1668 s = skipspace(s); 1669 PL_bufptr = SvPVX(PL_linestr) + bufptroff; 1670 if (!PL_madskills || !svp) 1671 return s; 1672 start = SvPVX(PL_linestr) + startoff; 1673 if (!PL_thistoken && PL_realtokenstart >= 0) { 1674 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart; 1675 PL_thistoken = newSVpvn(tstart, start - tstart); 1676 PL_realtokenstart = -1; 1677 } 1678 if (PL_skipwhite) { 1679 if (!*svp) 1680 *svp = newSVpvs(""); 1681 sv_setsv(*svp, PL_skipwhite); 1682 sv_free(PL_skipwhite); 1683 PL_skipwhite = 0; 1684 } 1685 1686 return s; 1687 } 1688 #endif 1689 1690 STATIC void 1691 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) 1692 { 1693 AV *av = CopFILEAVx(PL_curcop); 1694 if (av) { 1695 SV * const sv = newSV_type(SVt_PVMG); 1696 if (orig_sv) 1697 sv_setsv(sv, orig_sv); 1698 else 1699 sv_setpvn(sv, buf, len); 1700 (void)SvIOK_on(sv); 1701 SvIV_set(sv, 0); 1702 av_store(av, (I32)CopLINE(PL_curcop), sv); 1703 } 1704 } 1705 1706 /* 1707 * S_skipspace 1708 * Called to gobble the appropriate amount and type of whitespace. 1709 * Skips comments as well. 1710 */ 1711 1712 STATIC char * 1713 S_skipspace(pTHX_ register char *s) 1714 { 1715 #ifdef PERL_MAD 1716 char *start = s; 1717 #endif /* PERL_MAD */ 1718 PERL_ARGS_ASSERT_SKIPSPACE; 1719 #ifdef PERL_MAD 1720 if (PL_skipwhite) { 1721 sv_free(PL_skipwhite); 1722 PL_skipwhite = NULL; 1723 } 1724 #endif /* PERL_MAD */ 1725 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 1726 while (s < PL_bufend && SPACE_OR_TAB(*s)) 1727 s++; 1728 } else { 1729 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); 1730 PL_bufptr = s; 1731 lex_read_space(LEX_KEEP_PREVIOUS | 1732 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ? 1733 LEX_NO_NEXT_CHUNK : 0)); 1734 s = PL_bufptr; 1735 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos; 1736 if (PL_linestart > PL_bufptr) 1737 PL_bufptr = PL_linestart; 1738 return s; 1739 } 1740 #ifdef PERL_MAD 1741 if (PL_madskills) 1742 PL_skipwhite = newSVpvn(start, s-start); 1743 #endif /* PERL_MAD */ 1744 return s; 1745 } 1746 1747 /* 1748 * S_check_uni 1749 * Check the unary operators to ensure there's no ambiguity in how they're 1750 * used. An ambiguous piece of code would be: 1751 * rand + 5 1752 * This doesn't mean rand() + 5. Because rand() is a unary operator, 1753 * the +5 is its argument. 1754 */ 1755 1756 STATIC void 1757 S_check_uni(pTHX) 1758 { 1759 dVAR; 1760 const char *s; 1761 const char *t; 1762 1763 if (PL_oldoldbufptr != PL_last_uni) 1764 return; 1765 while (isSPACE(*PL_last_uni)) 1766 PL_last_uni++; 1767 s = PL_last_uni; 1768 while (isALNUM_lazy_if(s,UTF) || *s == '-') 1769 s++; 1770 if ((t = strchr(s, '(')) && t < PL_bufptr) 1771 return; 1772 1773 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 1774 "Warning: Use of \"%.*s\" without parentheses is ambiguous", 1775 (int)(s - PL_last_uni), PL_last_uni); 1776 } 1777 1778 /* 1779 * LOP : macro to build a list operator. Its behaviour has been replaced 1780 * with a subroutine, S_lop() for which LOP is just another name. 1781 */ 1782 1783 #define LOP(f,x) return lop(f,x,s) 1784 1785 /* 1786 * S_lop 1787 * Build a list operator (or something that might be one). The rules: 1788 * - if we have a next token, then it's a list operator [why?] 1789 * - if the next thing is an opening paren, then it's a function 1790 * - else it's a list operator 1791 */ 1792 1793 STATIC I32 1794 S_lop(pTHX_ I32 f, int x, char *s) 1795 { 1796 dVAR; 1797 1798 PERL_ARGS_ASSERT_LOP; 1799 1800 pl_yylval.ival = f; 1801 CLINE; 1802 PL_expect = x; 1803 PL_bufptr = s; 1804 PL_last_lop = PL_oldbufptr; 1805 PL_last_lop_op = (OPCODE)f; 1806 #ifdef PERL_MAD 1807 if (PL_lasttoke) 1808 return REPORT(LSTOP); 1809 #else 1810 if (PL_nexttoke) 1811 return REPORT(LSTOP); 1812 #endif 1813 if (*s == '(') 1814 return REPORT(FUNC); 1815 s = PEEKSPACE(s); 1816 if (*s == '(') 1817 return REPORT(FUNC); 1818 else 1819 return REPORT(LSTOP); 1820 } 1821 1822 #ifdef PERL_MAD 1823 /* 1824 * S_start_force 1825 * Sets up for an eventual force_next(). start_force(0) basically does 1826 * an unshift, while start_force(-1) does a push. yylex removes items 1827 * on the "pop" end. 1828 */ 1829 1830 STATIC void 1831 S_start_force(pTHX_ int where) 1832 { 1833 int i; 1834 1835 if (where < 0) /* so people can duplicate start_force(PL_curforce) */ 1836 where = PL_lasttoke; 1837 assert(PL_curforce < 0 || PL_curforce == where); 1838 if (PL_curforce != where) { 1839 for (i = PL_lasttoke; i > where; --i) { 1840 PL_nexttoke[i] = PL_nexttoke[i-1]; 1841 } 1842 PL_lasttoke++; 1843 } 1844 if (PL_curforce < 0) /* in case of duplicate start_force() */ 1845 Zero(&PL_nexttoke[where], 1, NEXTTOKE); 1846 PL_curforce = where; 1847 if (PL_nextwhite) { 1848 if (PL_madskills) 1849 curmad('^', newSVpvs("")); 1850 CURMAD('_', PL_nextwhite); 1851 } 1852 } 1853 1854 STATIC void 1855 S_curmad(pTHX_ char slot, SV *sv) 1856 { 1857 MADPROP **where; 1858 1859 if (!sv) 1860 return; 1861 if (PL_curforce < 0) 1862 where = &PL_thismad; 1863 else 1864 where = &PL_nexttoke[PL_curforce].next_mad; 1865 1866 if (PL_faketokens) 1867 sv_setpvs(sv, ""); 1868 else { 1869 if (!IN_BYTES) { 1870 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) 1871 SvUTF8_on(sv); 1872 else if (PL_encoding) { 1873 sv_recode_to_utf8(sv, PL_encoding); 1874 } 1875 } 1876 } 1877 1878 /* keep a slot open for the head of the list? */ 1879 if (slot != '_' && *where && (*where)->mad_key == '^') { 1880 (*where)->mad_key = slot; 1881 sv_free(MUTABLE_SV(((*where)->mad_val))); 1882 (*where)->mad_val = (void*)sv; 1883 } 1884 else 1885 addmad(newMADsv(slot, sv), where, 0); 1886 } 1887 #else 1888 # define start_force(where) NOOP 1889 # define curmad(slot, sv) NOOP 1890 #endif 1891 1892 /* 1893 * S_force_next 1894 * When the lexer realizes it knows the next token (for instance, 1895 * it is reordering tokens for the parser) then it can call S_force_next 1896 * to know what token to return the next time the lexer is called. Caller 1897 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD), 1898 * and possibly PL_expect to ensure the lexer handles the token correctly. 1899 */ 1900 1901 STATIC void 1902 S_force_next(pTHX_ I32 type) 1903 { 1904 dVAR; 1905 #ifdef DEBUGGING 1906 if (DEBUG_T_TEST) { 1907 PerlIO_printf(Perl_debug_log, "### forced token:\n"); 1908 tokereport(type, &NEXTVAL_NEXTTOKE); 1909 } 1910 #endif 1911 #ifdef PERL_MAD 1912 if (PL_curforce < 0) 1913 start_force(PL_lasttoke); 1914 PL_nexttoke[PL_curforce].next_type = type; 1915 if (PL_lex_state != LEX_KNOWNEXT) 1916 PL_lex_defer = PL_lex_state; 1917 PL_lex_state = LEX_KNOWNEXT; 1918 PL_lex_expect = PL_expect; 1919 PL_curforce = -1; 1920 #else 1921 PL_nexttype[PL_nexttoke] = type; 1922 PL_nexttoke++; 1923 if (PL_lex_state != LEX_KNOWNEXT) { 1924 PL_lex_defer = PL_lex_state; 1925 PL_lex_expect = PL_expect; 1926 PL_lex_state = LEX_KNOWNEXT; 1927 } 1928 #endif 1929 } 1930 1931 STATIC SV * 1932 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) 1933 { 1934 dVAR; 1935 SV * const sv = newSVpvn_utf8(start, len, 1936 !IN_BYTES 1937 && UTF 1938 && !is_ascii_string((const U8*)start, len) 1939 && is_utf8_string((const U8*)start, len)); 1940 return sv; 1941 } 1942 1943 /* 1944 * S_force_word 1945 * When the lexer knows the next thing is a word (for instance, it has 1946 * just seen -> and it knows that the next char is a word char, then 1947 * it calls S_force_word to stick the next word into the PL_nexttoke/val 1948 * lookahead. 1949 * 1950 * Arguments: 1951 * char *start : buffer position (must be within PL_linestr) 1952 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD) 1953 * int check_keyword : if true, Perl checks to make sure the word isn't 1954 * a keyword (do this if the word is a label, e.g. goto FOO) 1955 * int allow_pack : if true, : characters will also be allowed (require, 1956 * use, etc. do this) 1957 * int allow_initial_tick : used by the "sub" lexer only. 1958 */ 1959 1960 STATIC char * 1961 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick) 1962 { 1963 dVAR; 1964 register char *s; 1965 STRLEN len; 1966 1967 PERL_ARGS_ASSERT_FORCE_WORD; 1968 1969 start = SKIPSPACE1(start); 1970 s = start; 1971 if (isIDFIRST_lazy_if(s,UTF) || 1972 (allow_pack && *s == ':') || 1973 (allow_initial_tick && *s == '\'') ) 1974 { 1975 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); 1976 if (check_keyword && keyword(PL_tokenbuf, len, 0)) 1977 return start; 1978 start_force(PL_curforce); 1979 if (PL_madskills) 1980 curmad('X', newSVpvn(start,s-start)); 1981 if (token == METHOD) { 1982 s = SKIPSPACE1(s); 1983 if (*s == '(') 1984 PL_expect = XTERM; 1985 else { 1986 PL_expect = XOPERATOR; 1987 } 1988 } 1989 if (PL_madskills) 1990 curmad('g', newSVpvs( "forced" )); 1991 NEXTVAL_NEXTTOKE.opval 1992 = (OP*)newSVOP(OP_CONST,0, 1993 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 1994 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 1995 force_next(token); 1996 } 1997 return s; 1998 } 1999 2000 /* 2001 * S_force_ident 2002 * Called when the lexer wants $foo *foo &foo etc, but the program 2003 * text only contains the "foo" portion. The first argument is a pointer 2004 * to the "foo", and the second argument is the type symbol to prefix. 2005 * Forces the next token to be a "WORD". 2006 * Creates the symbol if it didn't already exist (via gv_fetchpv()). 2007 */ 2008 2009 STATIC void 2010 S_force_ident(pTHX_ register const char *s, int kind) 2011 { 2012 dVAR; 2013 2014 PERL_ARGS_ASSERT_FORCE_IDENT; 2015 2016 if (*s) { 2017 const STRLEN len = strlen(s); 2018 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len)); 2019 start_force(PL_curforce); 2020 NEXTVAL_NEXTTOKE.opval = o; 2021 force_next(WORD); 2022 if (kind) { 2023 o->op_private = OPpCONST_ENTERED; 2024 /* XXX see note in pp_entereval() for why we forgo typo 2025 warnings if the symbol must be introduced in an eval. 2026 GSAR 96-10-12 */ 2027 gv_fetchpvn_flags(s, len, 2028 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) 2029 : GV_ADD, 2030 kind == '$' ? SVt_PV : 2031 kind == '@' ? SVt_PVAV : 2032 kind == '%' ? SVt_PVHV : 2033 SVt_PVGV 2034 ); 2035 } 2036 } 2037 } 2038 2039 NV 2040 Perl_str_to_version(pTHX_ SV *sv) 2041 { 2042 NV retval = 0.0; 2043 NV nshift = 1.0; 2044 STRLEN len; 2045 const char *start = SvPV_const(sv,len); 2046 const char * const end = start + len; 2047 const bool utf = SvUTF8(sv) ? TRUE : FALSE; 2048 2049 PERL_ARGS_ASSERT_STR_TO_VERSION; 2050 2051 while (start < end) { 2052 STRLEN skip; 2053 UV n; 2054 if (utf) 2055 n = utf8n_to_uvchr((U8*)start, len, &skip, 0); 2056 else { 2057 n = *(U8*)start; 2058 skip = 1; 2059 } 2060 retval += ((NV)n)/nshift; 2061 start += skip; 2062 nshift *= 1000; 2063 } 2064 return retval; 2065 } 2066 2067 /* 2068 * S_force_version 2069 * Forces the next token to be a version number. 2070 * If the next token appears to be an invalid version number, (e.g. "v2b"), 2071 * and if "guessing" is TRUE, then no new token is created (and the caller 2072 * must use an alternative parsing method). 2073 */ 2074 2075 STATIC char * 2076 S_force_version(pTHX_ char *s, int guessing) 2077 { 2078 dVAR; 2079 OP *version = NULL; 2080 char *d; 2081 #ifdef PERL_MAD 2082 I32 startoff = s - SvPVX(PL_linestr); 2083 #endif 2084 2085 PERL_ARGS_ASSERT_FORCE_VERSION; 2086 2087 s = SKIPSPACE1(s); 2088 2089 d = s; 2090 if (*d == 'v') 2091 d++; 2092 if (isDIGIT(*d)) { 2093 while (isDIGIT(*d) || *d == '_' || *d == '.') 2094 d++; 2095 #ifdef PERL_MAD 2096 if (PL_madskills) { 2097 start_force(PL_curforce); 2098 curmad('X', newSVpvn(s,d-s)); 2099 } 2100 #endif 2101 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) { 2102 SV *ver; 2103 #ifdef USE_LOCALE_NUMERIC 2104 char *loc = setlocale(LC_NUMERIC, "C"); 2105 #endif 2106 s = scan_num(s, &pl_yylval); 2107 #ifdef USE_LOCALE_NUMERIC 2108 setlocale(LC_NUMERIC, loc); 2109 #endif 2110 version = pl_yylval.opval; 2111 ver = cSVOPx(version)->op_sv; 2112 if (SvPOK(ver) && !SvNIOK(ver)) { 2113 SvUPGRADE(ver, SVt_PVNV); 2114 SvNV_set(ver, str_to_version(ver)); 2115 SvNOK_on(ver); /* hint that it is a version */ 2116 } 2117 } 2118 else if (guessing) { 2119 #ifdef PERL_MAD 2120 if (PL_madskills) { 2121 sv_free(PL_nextwhite); /* let next token collect whitespace */ 2122 PL_nextwhite = 0; 2123 s = SvPVX(PL_linestr) + startoff; 2124 } 2125 #endif 2126 return s; 2127 } 2128 } 2129 2130 #ifdef PERL_MAD 2131 if (PL_madskills && !version) { 2132 sv_free(PL_nextwhite); /* let next token collect whitespace */ 2133 PL_nextwhite = 0; 2134 s = SvPVX(PL_linestr) + startoff; 2135 } 2136 #endif 2137 /* NOTE: The parser sees the package name and the VERSION swapped */ 2138 start_force(PL_curforce); 2139 NEXTVAL_NEXTTOKE.opval = version; 2140 force_next(WORD); 2141 2142 return s; 2143 } 2144 2145 /* 2146 * S_force_strict_version 2147 * Forces the next token to be a version number using strict syntax rules. 2148 */ 2149 2150 STATIC char * 2151 S_force_strict_version(pTHX_ char *s) 2152 { 2153 dVAR; 2154 OP *version = NULL; 2155 #ifdef PERL_MAD 2156 I32 startoff = s - SvPVX(PL_linestr); 2157 #endif 2158 const char *errstr = NULL; 2159 2160 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION; 2161 2162 while (isSPACE(*s)) /* leading whitespace */ 2163 s++; 2164 2165 if (is_STRICT_VERSION(s,&errstr)) { 2166 SV *ver = newSV(0); 2167 s = (char *)scan_version(s, ver, 0); 2168 version = newSVOP(OP_CONST, 0, ver); 2169 } 2170 else if ( (*s != ';' && *s != '}' ) && (s = SKIPSPACE1(s), (*s != ';' && *s !='}' ))) { 2171 PL_bufptr = s; 2172 if (errstr) 2173 yyerror(errstr); /* version required */ 2174 return s; 2175 } 2176 2177 #ifdef PERL_MAD 2178 if (PL_madskills && !version) { 2179 sv_free(PL_nextwhite); /* let next token collect whitespace */ 2180 PL_nextwhite = 0; 2181 s = SvPVX(PL_linestr) + startoff; 2182 } 2183 #endif 2184 /* NOTE: The parser sees the package name and the VERSION swapped */ 2185 start_force(PL_curforce); 2186 NEXTVAL_NEXTTOKE.opval = version; 2187 force_next(WORD); 2188 2189 return s; 2190 } 2191 2192 /* 2193 * S_tokeq 2194 * Tokenize a quoted string passed in as an SV. It finds the next 2195 * chunk, up to end of string or a backslash. It may make a new 2196 * SV containing that chunk (if HINT_NEW_STRING is on). It also 2197 * turns \\ into \. 2198 */ 2199 2200 STATIC SV * 2201 S_tokeq(pTHX_ SV *sv) 2202 { 2203 dVAR; 2204 register char *s; 2205 register char *send; 2206 register char *d; 2207 STRLEN len = 0; 2208 SV *pv = sv; 2209 2210 PERL_ARGS_ASSERT_TOKEQ; 2211 2212 if (!SvLEN(sv)) 2213 goto finish; 2214 2215 s = SvPV_force(sv, len); 2216 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) 2217 goto finish; 2218 send = s + len; 2219 while (s < send && *s != '\\') 2220 s++; 2221 if (s == send) 2222 goto finish; 2223 d = s; 2224 if ( PL_hints & HINT_NEW_STRING ) { 2225 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv)); 2226 } 2227 while (s < send) { 2228 if (*s == '\\') { 2229 if (s + 1 < send && (s[1] == '\\')) 2230 s++; /* all that, just for this */ 2231 } 2232 *d++ = *s++; 2233 } 2234 *d = '\0'; 2235 SvCUR_set(sv, d - SvPVX_const(sv)); 2236 finish: 2237 if ( PL_hints & HINT_NEW_STRING ) 2238 return new_constant(NULL, 0, "q", sv, pv, "q", 1); 2239 return sv; 2240 } 2241 2242 /* 2243 * Now come three functions related to double-quote context, 2244 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when 2245 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They 2246 * interact with PL_lex_state, and create fake ( ... ) argument lists 2247 * to handle functions and concatenation. 2248 * They assume that whoever calls them will be setting up a fake 2249 * join call, because each subthing puts a ',' after it. This lets 2250 * "lower \luPpEr" 2251 * become 2252 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,) 2253 * 2254 * (I'm not sure whether the spurious commas at the end of lcfirst's 2255 * arguments and join's arguments are created or not). 2256 */ 2257 2258 /* 2259 * S_sublex_start 2260 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST). 2261 * 2262 * Pattern matching will set PL_lex_op to the pattern-matching op to 2263 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise). 2264 * 2265 * OP_CONST and OP_READLINE are easy--just make the new op and return. 2266 * 2267 * Everything else becomes a FUNC. 2268 * 2269 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we 2270 * had an OP_CONST or OP_READLINE). This just sets us up for a 2271 * call to S_sublex_push(). 2272 */ 2273 2274 STATIC I32 2275 S_sublex_start(pTHX) 2276 { 2277 dVAR; 2278 register const I32 op_type = pl_yylval.ival; 2279 2280 if (op_type == OP_NULL) { 2281 pl_yylval.opval = PL_lex_op; 2282 PL_lex_op = NULL; 2283 return THING; 2284 } 2285 if (op_type == OP_CONST || op_type == OP_READLINE) { 2286 SV *sv = tokeq(PL_lex_stuff); 2287 2288 if (SvTYPE(sv) == SVt_PVIV) { 2289 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ 2290 STRLEN len; 2291 const char * const p = SvPV_const(sv, len); 2292 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv)); 2293 SvREFCNT_dec(sv); 2294 sv = nsv; 2295 } 2296 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv); 2297 PL_lex_stuff = NULL; 2298 /* Allow <FH> // "foo" */ 2299 if (op_type == OP_READLINE) 2300 PL_expect = XTERMORDORDOR; 2301 return THING; 2302 } 2303 else if (op_type == OP_BACKTICK && PL_lex_op) { 2304 /* readpipe() vas overriden */ 2305 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff); 2306 pl_yylval.opval = PL_lex_op; 2307 PL_lex_op = NULL; 2308 PL_lex_stuff = NULL; 2309 return THING; 2310 } 2311 2312 PL_sublex_info.super_state = PL_lex_state; 2313 PL_sublex_info.sub_inwhat = (U16)op_type; 2314 PL_sublex_info.sub_op = PL_lex_op; 2315 PL_lex_state = LEX_INTERPPUSH; 2316 2317 PL_expect = XTERM; 2318 if (PL_lex_op) { 2319 pl_yylval.opval = PL_lex_op; 2320 PL_lex_op = NULL; 2321 return PMFUNC; 2322 } 2323 else 2324 return FUNC; 2325 } 2326 2327 /* 2328 * S_sublex_push 2329 * Create a new scope to save the lexing state. The scope will be 2330 * ended in S_sublex_done. Returns a '(', starting the function arguments 2331 * to the uc, lc, etc. found before. 2332 * Sets PL_lex_state to LEX_INTERPCONCAT. 2333 */ 2334 2335 STATIC I32 2336 S_sublex_push(pTHX) 2337 { 2338 dVAR; 2339 ENTER; 2340 2341 PL_lex_state = PL_sublex_info.super_state; 2342 SAVEBOOL(PL_lex_dojoin); 2343 SAVEI32(PL_lex_brackets); 2344 SAVEI32(PL_lex_casemods); 2345 SAVEI32(PL_lex_starts); 2346 SAVEI8(PL_lex_state); 2347 SAVEVPTR(PL_lex_inpat); 2348 SAVEI16(PL_lex_inwhat); 2349 SAVECOPLINE(PL_curcop); 2350 SAVEPPTR(PL_bufptr); 2351 SAVEPPTR(PL_bufend); 2352 SAVEPPTR(PL_oldbufptr); 2353 SAVEPPTR(PL_oldoldbufptr); 2354 SAVEPPTR(PL_last_lop); 2355 SAVEPPTR(PL_last_uni); 2356 SAVEPPTR(PL_linestart); 2357 SAVESPTR(PL_linestr); 2358 SAVEGENERICPV(PL_lex_brackstack); 2359 SAVEGENERICPV(PL_lex_casestack); 2360 2361 PL_linestr = PL_lex_stuff; 2362 PL_lex_stuff = NULL; 2363 2364 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart 2365 = SvPVX(PL_linestr); 2366 PL_bufend += SvCUR(PL_linestr); 2367 PL_last_lop = PL_last_uni = NULL; 2368 SAVEFREESV(PL_linestr); 2369 2370 PL_lex_dojoin = FALSE; 2371 PL_lex_brackets = 0; 2372 Newx(PL_lex_brackstack, 120, char); 2373 Newx(PL_lex_casestack, 12, char); 2374 PL_lex_casemods = 0; 2375 *PL_lex_casestack = '\0'; 2376 PL_lex_starts = 0; 2377 PL_lex_state = LEX_INTERPCONCAT; 2378 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 2379 2380 PL_lex_inwhat = PL_sublex_info.sub_inwhat; 2381 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST) 2382 PL_lex_inpat = PL_sublex_info.sub_op; 2383 else 2384 PL_lex_inpat = NULL; 2385 2386 return '('; 2387 } 2388 2389 /* 2390 * S_sublex_done 2391 * Restores lexer state after a S_sublex_push. 2392 */ 2393 2394 STATIC I32 2395 S_sublex_done(pTHX) 2396 { 2397 dVAR; 2398 if (!PL_lex_starts++) { 2399 SV * const sv = newSVpvs(""); 2400 if (SvUTF8(PL_linestr)) 2401 SvUTF8_on(sv); 2402 PL_expect = XOPERATOR; 2403 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 2404 return THING; 2405 } 2406 2407 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */ 2408 PL_lex_state = LEX_INTERPCASEMOD; 2409 return yylex(); 2410 } 2411 2412 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */ 2413 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) { 2414 PL_linestr = PL_lex_repl; 2415 PL_lex_inpat = 0; 2416 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); 2417 PL_bufend += SvCUR(PL_linestr); 2418 PL_last_lop = PL_last_uni = NULL; 2419 SAVEFREESV(PL_linestr); 2420 PL_lex_dojoin = FALSE; 2421 PL_lex_brackets = 0; 2422 PL_lex_casemods = 0; 2423 *PL_lex_casestack = '\0'; 2424 PL_lex_starts = 0; 2425 if (SvEVALED(PL_lex_repl)) { 2426 PL_lex_state = LEX_INTERPNORMAL; 2427 PL_lex_starts++; 2428 /* we don't clear PL_lex_repl here, so that we can check later 2429 whether this is an evalled subst; that means we rely on the 2430 logic to ensure sublex_done() is called again only via the 2431 branch (in yylex()) that clears PL_lex_repl, else we'll loop */ 2432 } 2433 else { 2434 PL_lex_state = LEX_INTERPCONCAT; 2435 PL_lex_repl = NULL; 2436 } 2437 return ','; 2438 } 2439 else { 2440 #ifdef PERL_MAD 2441 if (PL_madskills) { 2442 if (PL_thiswhite) { 2443 if (!PL_endwhite) 2444 PL_endwhite = newSVpvs(""); 2445 sv_catsv(PL_endwhite, PL_thiswhite); 2446 PL_thiswhite = 0; 2447 } 2448 if (PL_thistoken) 2449 sv_setpvs(PL_thistoken,""); 2450 else 2451 PL_realtokenstart = -1; 2452 } 2453 #endif 2454 LEAVE; 2455 PL_bufend = SvPVX(PL_linestr); 2456 PL_bufend += SvCUR(PL_linestr); 2457 PL_expect = XOPERATOR; 2458 PL_sublex_info.sub_inwhat = 0; 2459 return ')'; 2460 } 2461 } 2462 2463 /* 2464 scan_const 2465 2466 Extracts a pattern, double-quoted string, or transliteration. This 2467 is terrifying code. 2468 2469 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's 2470 processing a pattern (PL_lex_inpat is true), a transliteration 2471 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string. 2472 2473 Returns a pointer to the character scanned up to. If this is 2474 advanced from the start pointer supplied (i.e. if anything was 2475 successfully parsed), will leave an OP for the substring scanned 2476 in pl_yylval. Caller must intuit reason for not parsing further 2477 by looking at the next characters herself. 2478 2479 In patterns: 2480 backslashes: 2481 constants: \N{NAME} only 2482 case and quoting: \U \Q \E 2483 stops on @ and $, but not for $ as tail anchor 2484 2485 In transliterations: 2486 characters are VERY literal, except for - not at the start or end 2487 of the string, which indicates a range. If the range is in bytes, 2488 scan_const expands the range to the full set of intermediate 2489 characters. If the range is in utf8, the hyphen is replaced with 2490 a certain range mark which will be handled by pmtrans() in op.c. 2491 2492 In double-quoted strings: 2493 backslashes: 2494 double-quoted style: \r and \n 2495 constants: \x31, etc. 2496 deprecated backrefs: \1 (in substitution replacements) 2497 case and quoting: \U \Q \E 2498 stops on @ and $ 2499 2500 scan_const does *not* construct ops to handle interpolated strings. 2501 It stops processing as soon as it finds an embedded $ or @ variable 2502 and leaves it to the caller to work out what's going on. 2503 2504 embedded arrays (whether in pattern or not) could be: 2505 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-. 2506 2507 $ in double-quoted strings must be the symbol of an embedded scalar. 2508 2509 $ in pattern could be $foo or could be tail anchor. Assumption: 2510 it's a tail anchor if $ is the last thing in the string, or if it's 2511 followed by one of "()| \r\n\t" 2512 2513 \1 (backreferences) are turned into $1 2514 2515 The structure of the code is 2516 while (there's a character to process) { 2517 handle transliteration ranges 2518 skip regexp comments /(?#comment)/ and codes /(?{code})/ 2519 skip #-initiated comments in //x patterns 2520 check for embedded arrays 2521 check for embedded scalars 2522 if (backslash) { 2523 deprecate \1 in substitution replacements 2524 handle string-changing backslashes \l \U \Q \E, etc. 2525 switch (what was escaped) { 2526 handle \- in a transliteration (becomes a literal -) 2527 if a pattern and not \N{, go treat as regular character 2528 handle \132 (octal characters) 2529 handle \x15 and \x{1234} (hex characters) 2530 handle \N{name} (named characters, also \N{3,5} in a pattern) 2531 handle \cV (control characters) 2532 handle printf-style backslashes (\f, \r, \n, etc) 2533 } (end switch) 2534 continue 2535 } (end if backslash) 2536 handle regular character 2537 } (end while character to read) 2538 2539 */ 2540 2541 STATIC char * 2542 S_scan_const(pTHX_ char *start) 2543 { 2544 dVAR; 2545 register char *send = PL_bufend; /* end of the constant */ 2546 SV *sv = newSV(send - start); /* sv for the constant. See 2547 note below on sizing. */ 2548 register char *s = start; /* start of the constant */ 2549 register char *d = SvPVX(sv); /* destination for copies */ 2550 bool dorange = FALSE; /* are we in a translit range? */ 2551 bool didrange = FALSE; /* did we just finish a range? */ 2552 I32 has_utf8 = FALSE; /* Output constant is UTF8 */ 2553 I32 this_utf8 = UTF; /* Is the source string assumed 2554 to be UTF8? But, this can 2555 show as true when the source 2556 isn't utf8, as for example 2557 when it is entirely composed 2558 of hex constants */ 2559 2560 /* Note on sizing: The scanned constant is placed into sv, which is 2561 * initialized by newSV() assuming one byte of output for every byte of 2562 * input. This routine expects newSV() to allocate an extra byte for a 2563 * trailing NUL, which this routine will append if it gets to the end of 2564 * the input. There may be more bytes of input than output (eg., \N{LATIN 2565 * CAPITAL LETTER A}), or more output than input if the constant ends up 2566 * recoded to utf8, but each time a construct is found that might increase 2567 * the needed size, SvGROW() is called. Its size parameter each time is 2568 * based on the best guess estimate at the time, namely the length used so 2569 * far, plus the length the current construct will occupy, plus room for 2570 * the trailing NUL, plus one byte for every input byte still unscanned */ 2571 2572 UV uv; 2573 #ifdef EBCDIC 2574 UV literal_endpoint = 0; 2575 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */ 2576 #endif 2577 2578 PERL_ARGS_ASSERT_SCAN_CONST; 2579 2580 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { 2581 /* If we are doing a trans and we know we want UTF8 set expectation */ 2582 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF); 2583 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); 2584 } 2585 2586 2587 while (s < send || dorange) { 2588 2589 /* get transliterations out of the way (they're most literal) */ 2590 if (PL_lex_inwhat == OP_TRANS) { 2591 /* expand a range A-Z to the full set of characters. AIE! */ 2592 if (dorange) { 2593 I32 i; /* current expanded character */ 2594 I32 min; /* first character in range */ 2595 I32 max; /* last character in range */ 2596 2597 #ifdef EBCDIC 2598 UV uvmax = 0; 2599 #endif 2600 2601 if (has_utf8 2602 #ifdef EBCDIC 2603 && !native_range 2604 #endif 2605 ) { 2606 char * const c = (char*)utf8_hop((U8*)d, -1); 2607 char *e = d++; 2608 while (e-- > c) 2609 *(e + 1) = *e; 2610 *c = (char)UTF_TO_NATIVE(0xff); 2611 /* mark the range as done, and continue */ 2612 dorange = FALSE; 2613 didrange = TRUE; 2614 continue; 2615 } 2616 2617 i = d - SvPVX_const(sv); /* remember current offset */ 2618 #ifdef EBCDIC 2619 SvGROW(sv, 2620 SvLEN(sv) + (has_utf8 ? 2621 (512 - UTF_CONTINUATION_MARK + 2622 UNISKIP(0x100)) 2623 : 256)); 2624 /* How many two-byte within 0..255: 128 in UTF-8, 2625 * 96 in UTF-8-mod. */ 2626 #else 2627 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */ 2628 #endif 2629 d = SvPVX(sv) + i; /* refresh d after realloc */ 2630 #ifdef EBCDIC 2631 if (has_utf8) { 2632 int j; 2633 for (j = 0; j <= 1; j++) { 2634 char * const c = (char*)utf8_hop((U8*)d, -1); 2635 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0); 2636 if (j) 2637 min = (U8)uv; 2638 else if (uv < 256) 2639 max = (U8)uv; 2640 else { 2641 max = (U8)0xff; /* only to \xff */ 2642 uvmax = uv; /* \x{100} to uvmax */ 2643 } 2644 d = c; /* eat endpoint chars */ 2645 } 2646 } 2647 else { 2648 #endif 2649 d -= 2; /* eat the first char and the - */ 2650 min = (U8)*d; /* first char in range */ 2651 max = (U8)d[1]; /* last char in range */ 2652 #ifdef EBCDIC 2653 } 2654 #endif 2655 2656 if (min > max) { 2657 Perl_croak(aTHX_ 2658 "Invalid range \"%c-%c\" in transliteration operator", 2659 (char)min, (char)max); 2660 } 2661 2662 #ifdef EBCDIC 2663 if (literal_endpoint == 2 && 2664 ((isLOWER(min) && isLOWER(max)) || 2665 (isUPPER(min) && isUPPER(max)))) { 2666 if (isLOWER(min)) { 2667 for (i = min; i <= max; i++) 2668 if (isLOWER(i)) 2669 *d++ = NATIVE_TO_NEED(has_utf8,i); 2670 } else { 2671 for (i = min; i <= max; i++) 2672 if (isUPPER(i)) 2673 *d++ = NATIVE_TO_NEED(has_utf8,i); 2674 } 2675 } 2676 else 2677 #endif 2678 for (i = min; i <= max; i++) 2679 #ifdef EBCDIC 2680 if (has_utf8) { 2681 const U8 ch = (U8)NATIVE_TO_UTF(i); 2682 if (UNI_IS_INVARIANT(ch)) 2683 *d++ = (U8)i; 2684 else { 2685 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch); 2686 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch); 2687 } 2688 } 2689 else 2690 #endif 2691 *d++ = (char)i; 2692 2693 #ifdef EBCDIC 2694 if (uvmax) { 2695 d = (char*)uvchr_to_utf8((U8*)d, 0x100); 2696 if (uvmax > 0x101) 2697 *d++ = (char)UTF_TO_NATIVE(0xff); 2698 if (uvmax > 0x100) 2699 d = (char*)uvchr_to_utf8((U8*)d, uvmax); 2700 } 2701 #endif 2702 2703 /* mark the range as done, and continue */ 2704 dorange = FALSE; 2705 didrange = TRUE; 2706 #ifdef EBCDIC 2707 literal_endpoint = 0; 2708 #endif 2709 continue; 2710 } 2711 2712 /* range begins (ignore - as first or last char) */ 2713 else if (*s == '-' && s+1 < send && s != start) { 2714 if (didrange) { 2715 Perl_croak(aTHX_ "Ambiguous range in transliteration operator"); 2716 } 2717 if (has_utf8 2718 #ifdef EBCDIC 2719 && !native_range 2720 #endif 2721 ) { 2722 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */ 2723 s++; 2724 continue; 2725 } 2726 dorange = TRUE; 2727 s++; 2728 } 2729 else { 2730 didrange = FALSE; 2731 #ifdef EBCDIC 2732 literal_endpoint = 0; 2733 native_range = TRUE; 2734 #endif 2735 } 2736 } 2737 2738 /* if we get here, we're not doing a transliteration */ 2739 2740 /* skip for regexp comments /(?#comment)/ and code /(?{code})/, 2741 except for the last char, which will be done separately. */ 2742 else if (*s == '(' && PL_lex_inpat && s[1] == '?') { 2743 if (s[2] == '#') { 2744 while (s+1 < send && *s != ')') 2745 *d++ = NATIVE_TO_NEED(has_utf8,*s++); 2746 } 2747 else if (s[2] == '{' /* This should match regcomp.c */ 2748 || (s[2] == '?' && s[3] == '{')) 2749 { 2750 I32 count = 1; 2751 char *regparse = s + (s[2] == '{' ? 3 : 4); 2752 char c; 2753 2754 while (count && (c = *regparse)) { 2755 if (c == '\\' && regparse[1]) 2756 regparse++; 2757 else if (c == '{') 2758 count++; 2759 else if (c == '}') 2760 count--; 2761 regparse++; 2762 } 2763 if (*regparse != ')') 2764 regparse--; /* Leave one char for continuation. */ 2765 while (s < regparse) 2766 *d++ = NATIVE_TO_NEED(has_utf8,*s++); 2767 } 2768 } 2769 2770 /* likewise skip #-initiated comments in //x patterns */ 2771 else if (*s == '#' && PL_lex_inpat && 2772 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) { 2773 while (s+1 < send && *s != '\n') 2774 *d++ = NATIVE_TO_NEED(has_utf8,*s++); 2775 } 2776 2777 /* check for embedded arrays 2778 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) 2779 */ 2780 else if (*s == '@' && s[1]) { 2781 if (isALNUM_lazy_if(s+1,UTF)) 2782 break; 2783 if (strchr(":'{$", s[1])) 2784 break; 2785 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) 2786 break; /* in regexp, neither @+ nor @- are interpolated */ 2787 } 2788 2789 /* check for embedded scalars. only stop if we're sure it's a 2790 variable. 2791 */ 2792 else if (*s == '$') { 2793 if (!PL_lex_inpat) /* not a regexp, so $ must be var */ 2794 break; 2795 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) { 2796 if (s[1] == '\\') { 2797 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 2798 "Possible unintended interpolation of $\\ in regex"); 2799 } 2800 break; /* in regexp, $ might be tail anchor */ 2801 } 2802 } 2803 2804 /* End of else if chain - OP_TRANS rejoin rest */ 2805 2806 /* backslashes */ 2807 if (*s == '\\' && s+1 < send) { 2808 char* e; /* Can be used for ending '}', etc. */ 2809 2810 s++; 2811 2812 /* deprecate \1 in strings and substitution replacements */ 2813 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat && 2814 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1])) 2815 { 2816 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); 2817 *--s = '$'; 2818 break; 2819 } 2820 2821 /* string-change backslash escapes */ 2822 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) { 2823 --s; 2824 break; 2825 } 2826 /* In a pattern, process \N, but skip any other backslash escapes. 2827 * This is because we don't want to translate an escape sequence 2828 * into a meta symbol and have the regex compiler use the meta 2829 * symbol meaning, e.g. \x{2E} would be confused with a dot. But 2830 * in spite of this, we do have to process \N here while the proper 2831 * charnames handler is in scope. See bugs #56444 and #62056. 2832 * There is a complication because \N in a pattern may also stand 2833 * for 'match a non-nl', and not mean a charname, in which case its 2834 * processing should be deferred to the regex compiler. To be a 2835 * charname it must be followed immediately by a '{', and not look 2836 * like \N followed by a curly quantifier, i.e., not something like 2837 * \N{3,}. regcurly returns a boolean indicating if it is a legal 2838 * quantifier */ 2839 else if (PL_lex_inpat 2840 && (*s != 'N' 2841 || s[1] != '{' 2842 || regcurly(s + 1))) 2843 { 2844 *d++ = NATIVE_TO_NEED(has_utf8,'\\'); 2845 goto default_action; 2846 } 2847 2848 switch (*s) { 2849 2850 /* quoted - in transliterations */ 2851 case '-': 2852 if (PL_lex_inwhat == OP_TRANS) { 2853 *d++ = *s++; 2854 continue; 2855 } 2856 /* FALL THROUGH */ 2857 default: 2858 { 2859 if ((isALPHA(*s) || isDIGIT(*s))) 2860 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 2861 "Unrecognized escape \\%c passed through", 2862 *s); 2863 /* default action is to copy the quoted character */ 2864 goto default_action; 2865 } 2866 2867 /* eg. \132 indicates the octal constant 0x132 */ 2868 case '0': case '1': case '2': case '3': 2869 case '4': case '5': case '6': case '7': 2870 { 2871 I32 flags = 0; 2872 STRLEN len = 3; 2873 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL)); 2874 s += len; 2875 } 2876 goto NUM_ESCAPE_INSERT; 2877 2878 /* eg. \x24 indicates the hex constant 0x24 */ 2879 case 'x': 2880 ++s; 2881 if (*s == '{') { 2882 char* const e = strchr(s, '}'); 2883 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES | 2884 PERL_SCAN_DISALLOW_PREFIX; 2885 STRLEN len; 2886 2887 ++s; 2888 if (!e) { 2889 yyerror("Missing right brace on \\x{}"); 2890 continue; 2891 } 2892 len = e - s; 2893 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL)); 2894 s = e + 1; 2895 } 2896 else { 2897 { 2898 STRLEN len = 2; 2899 I32 flags = PERL_SCAN_DISALLOW_PREFIX; 2900 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL)); 2901 s += len; 2902 } 2903 } 2904 2905 NUM_ESCAPE_INSERT: 2906 /* Insert oct or hex escaped character. There will always be 2907 * enough room in sv since such escapes will be longer than any 2908 * UTF-8 sequence they can end up as, except if they force us 2909 * to recode the rest of the string into utf8 */ 2910 2911 /* Here uv is the ordinal of the next character being added in 2912 * unicode (converted from native). */ 2913 if (!UNI_IS_INVARIANT(uv)) { 2914 if (!has_utf8 && uv > 255) { 2915 /* Might need to recode whatever we have accumulated so 2916 * far if it contains any chars variant in utf8 or 2917 * utf-ebcdic. */ 2918 2919 SvCUR_set(sv, d - SvPVX_const(sv)); 2920 SvPOK_on(sv); 2921 *d = '\0'; 2922 /* See Note on sizing above. */ 2923 sv_utf8_upgrade_flags_grow(sv, 2924 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 2925 UNISKIP(uv) + (STRLEN)(send - s) + 1); 2926 d = SvPVX(sv) + SvCUR(sv); 2927 has_utf8 = TRUE; 2928 } 2929 2930 if (has_utf8) { 2931 d = (char*)uvuni_to_utf8((U8*)d, uv); 2932 if (PL_lex_inwhat == OP_TRANS && 2933 PL_sublex_info.sub_op) { 2934 PL_sublex_info.sub_op->op_private |= 2935 (PL_lex_repl ? OPpTRANS_FROM_UTF 2936 : OPpTRANS_TO_UTF); 2937 } 2938 #ifdef EBCDIC 2939 if (uv > 255 && !dorange) 2940 native_range = FALSE; 2941 #endif 2942 } 2943 else { 2944 *d++ = (char)uv; 2945 } 2946 } 2947 else { 2948 *d++ = (char) uv; 2949 } 2950 continue; 2951 2952 case 'N': 2953 /* In a non-pattern \N must be a named character, like \N{LATIN 2954 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can 2955 * mean to match a non-newline. For non-patterns, named 2956 * characters are converted to their string equivalents. In 2957 * patterns, named characters are not converted to their 2958 * ultimate forms for the same reasons that other escapes 2959 * aren't. Instead, they are converted to the \N{U+...} form 2960 * to get the value from the charnames that is in effect right 2961 * now, while preserving the fact that it was a named character 2962 * so that the regex compiler knows this */ 2963 2964 /* This section of code doesn't generally use the 2965 * NATIVE_TO_NEED() macro to transform the input. I (khw) did 2966 * a close examination of this macro and determined it is a 2967 * no-op except on utfebcdic variant characters. Every 2968 * character generated by this that would normally need to be 2969 * enclosed by this macro is invariant, so the macro is not 2970 * needed, and would complicate use of copy(). There are other 2971 * parts of this file where the macro is used inconsistently, 2972 * but are saved by it being a no-op */ 2973 2974 /* The structure of this section of code (besides checking for 2975 * errors and upgrading to utf8) is: 2976 * Further disambiguate between the two meanings of \N, and if 2977 * not a charname, go process it elsewhere 2978 * If of form \N{U+...}, pass it through if a pattern; 2979 * otherwise convert to utf8 2980 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a 2981 * pattern; otherwise convert to utf8 */ 2982 2983 /* Here, s points to the 'N'; the test below is guaranteed to 2984 * succeed if we are being called on a pattern as we already 2985 * know from a test above that the next character is a '{'. 2986 * On a non-pattern \N must mean 'named sequence, which 2987 * requires braces */ 2988 s++; 2989 if (*s != '{') { 2990 yyerror("Missing braces on \\N{}"); 2991 continue; 2992 } 2993 s++; 2994 2995 /* If there is no matching '}', it is an error. */ 2996 if (! (e = strchr(s, '}'))) { 2997 if (! PL_lex_inpat) { 2998 yyerror("Missing right brace on \\N{}"); 2999 } else { 3000 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N."); 3001 } 3002 continue; 3003 } 3004 3005 /* Here it looks like a named character */ 3006 3007 if (PL_lex_inpat) { 3008 3009 /* XXX This block is temporary code. \N{} implies that the 3010 * pattern is to have Unicode semantics, and therefore 3011 * currently has to be encoded in utf8. By putting it in 3012 * utf8 now, we save a whole pass in the regular expression 3013 * compiler. Once that code is changed so Unicode 3014 * semantics doesn't necessarily have to be in utf8, this 3015 * block should be removed */ 3016 if (!has_utf8) { 3017 SvCUR_set(sv, d - SvPVX_const(sv)); 3018 SvPOK_on(sv); 3019 *d = '\0'; 3020 /* See Note on sizing above. */ 3021 sv_utf8_upgrade_flags_grow(sv, 3022 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3023 /* 5 = '\N{' + cur char + NUL */ 3024 (STRLEN)(send - s) + 5); 3025 d = SvPVX(sv) + SvCUR(sv); 3026 has_utf8 = TRUE; 3027 } 3028 } 3029 3030 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */ 3031 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES 3032 | PERL_SCAN_DISALLOW_PREFIX; 3033 STRLEN len; 3034 3035 /* For \N{U+...}, the '...' is a unicode value even on 3036 * EBCDIC machines */ 3037 s += 2; /* Skip to next char after the 'U+' */ 3038 len = e - s; 3039 uv = grok_hex(s, &len, &flags, NULL); 3040 if (len == 0 || len != (STRLEN)(e - s)) { 3041 yyerror("Invalid hexadecimal number in \\N{U+...}"); 3042 s = e + 1; 3043 continue; 3044 } 3045 3046 if (PL_lex_inpat) { 3047 3048 /* Pass through to the regex compiler unchanged. The 3049 * reason we evaluated the number above is to make sure 3050 * there wasn't a syntax error. */ 3051 s -= 5; /* Include the '\N{U+' */ 3052 Copy(s, d, e - s + 1, char); /* 1 = include the } */ 3053 d += e - s + 1; 3054 } 3055 else { /* Not a pattern: convert the hex to string */ 3056 3057 /* If destination is not in utf8, unconditionally 3058 * recode it to be so. This is because \N{} implies 3059 * Unicode semantics, and scalars have to be in utf8 3060 * to guarantee those semantics */ 3061 if (! has_utf8) { 3062 SvCUR_set(sv, d - SvPVX_const(sv)); 3063 SvPOK_on(sv); 3064 *d = '\0'; 3065 /* See Note on sizing above. */ 3066 sv_utf8_upgrade_flags_grow( 3067 sv, 3068 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3069 UNISKIP(uv) + (STRLEN)(send - e) + 1); 3070 d = SvPVX(sv) + SvCUR(sv); 3071 has_utf8 = TRUE; 3072 } 3073 3074 /* Add the string to the output */ 3075 if (UNI_IS_INVARIANT(uv)) { 3076 *d++ = (char) uv; 3077 } 3078 else d = (char*)uvuni_to_utf8((U8*)d, uv); 3079 } 3080 } 3081 else { /* Here is \N{NAME} but not \N{U+...}. */ 3082 3083 SV *res; /* result from charnames */ 3084 const char *str; /* the string in 'res' */ 3085 STRLEN len; /* its length */ 3086 3087 /* Get the value for NAME */ 3088 res = newSVpvn(s, e - s); 3089 res = new_constant( NULL, 0, "charnames", 3090 /* includes all of: \N{...} */ 3091 res, NULL, s - 3, e - s + 4 ); 3092 3093 /* Most likely res will be in utf8 already since the 3094 * standard charnames uses pack U, but a custom translator 3095 * can leave it otherwise, so make sure. XXX This can be 3096 * revisited to not have charnames use utf8 for characters 3097 * that don't need it when regexes don't have to be in utf8 3098 * for Unicode semantics. If doing so, remember EBCDIC */ 3099 sv_utf8_upgrade(res); 3100 str = SvPV_const(res, len); 3101 3102 /* Don't accept malformed input */ 3103 if (! is_utf8_string((U8 *) str, len)) { 3104 yyerror("Malformed UTF-8 returned by \\N"); 3105 } 3106 else if (PL_lex_inpat) { 3107 3108 if (! len) { /* The name resolved to an empty string */ 3109 Copy("\\N{}", d, 4, char); 3110 d += 4; 3111 } 3112 else { 3113 /* In order to not lose information for the regex 3114 * compiler, pass the result in the specially made 3115 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are 3116 * the code points in hex of each character 3117 * returned by charnames */ 3118 3119 const char *str_end = str + len; 3120 STRLEN char_length; /* cur char's byte length */ 3121 STRLEN output_length; /* and the number of bytes 3122 after this is translated 3123 into hex digits */ 3124 const STRLEN off = d - SvPVX_const(sv); 3125 3126 /* 2 hex per byte; 2 chars for '\N'; 2 chars for 3127 * max('U+', '.'); and 1 for NUL */ 3128 char hex_string[2 * UTF8_MAXBYTES + 5]; 3129 3130 /* Get the first character of the result. */ 3131 U32 uv = utf8n_to_uvuni((U8 *) str, 3132 len, 3133 &char_length, 3134 UTF8_ALLOW_ANYUV); 3135 3136 /* The call to is_utf8_string() above hopefully 3137 * guarantees that there won't be an error. But 3138 * it's easy here to make sure. The function just 3139 * above warns and returns 0 if invalid utf8, but 3140 * it can also return 0 if the input is validly a 3141 * NUL. Disambiguate */ 3142 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') { 3143 uv = UNICODE_REPLACEMENT; 3144 } 3145 3146 /* Convert first code point to hex, including the 3147 * boiler plate before it */ 3148 snprintf(hex_string, sizeof(hex_string), 3149 "\\N{U+%X", (unsigned int) uv); 3150 output_length = strlen(hex_string); 3151 3152 /* Make sure there is enough space to hold it */ 3153 d = off + SvGROW(sv, off 3154 + output_length 3155 + (STRLEN)(send - e) 3156 + 2); /* '}' + NUL */ 3157 /* And output it */ 3158 Copy(hex_string, d, output_length, char); 3159 d += output_length; 3160 3161 /* For each subsequent character, append dot and 3162 * its ordinal in hex */ 3163 while ((str += char_length) < str_end) { 3164 const STRLEN off = d - SvPVX_const(sv); 3165 U32 uv = utf8n_to_uvuni((U8 *) str, 3166 str_end - str, 3167 &char_length, 3168 UTF8_ALLOW_ANYUV); 3169 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') { 3170 uv = UNICODE_REPLACEMENT; 3171 } 3172 3173 snprintf(hex_string, sizeof(hex_string), 3174 ".%X", (unsigned int) uv); 3175 output_length = strlen(hex_string); 3176 3177 d = off + SvGROW(sv, off 3178 + output_length 3179 + (STRLEN)(send - e) 3180 + 2); /* '}' + NUL */ 3181 Copy(hex_string, d, output_length, char); 3182 d += output_length; 3183 } 3184 3185 *d++ = '}'; /* Done. Add the trailing brace */ 3186 } 3187 } 3188 else { /* Here, not in a pattern. Convert the name to a 3189 * string. */ 3190 3191 /* If destination is not in utf8, unconditionally 3192 * recode it to be so. This is because \N{} implies 3193 * Unicode semantics, and scalars have to be in utf8 3194 * to guarantee those semantics */ 3195 if (! has_utf8) { 3196 SvCUR_set(sv, d - SvPVX_const(sv)); 3197 SvPOK_on(sv); 3198 *d = '\0'; 3199 /* See Note on sizing above. */ 3200 sv_utf8_upgrade_flags_grow(sv, 3201 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3202 len + (STRLEN)(send - s) + 1); 3203 d = SvPVX(sv) + SvCUR(sv); 3204 has_utf8 = TRUE; 3205 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ 3206 3207 /* See Note on sizing above. (NOTE: SvCUR() is not 3208 * set correctly here). */ 3209 const STRLEN off = d - SvPVX_const(sv); 3210 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1); 3211 } 3212 Copy(str, d, len, char); 3213 d += len; 3214 } 3215 SvREFCNT_dec(res); 3216 3217 /* Deprecate non-approved name syntax */ 3218 if (ckWARN_d(WARN_DEPRECATED)) { 3219 bool problematic = FALSE; 3220 char* i = s; 3221 3222 /* For non-ut8 input, look to see that the first 3223 * character is an alpha, then loop through the rest 3224 * checking that each is a continuation */ 3225 if (! this_utf8) { 3226 if (! isALPHAU(*i)) problematic = TRUE; 3227 else for (i = s + 1; i < e; i++) { 3228 if (isCHARNAME_CONT(*i)) continue; 3229 problematic = TRUE; 3230 break; 3231 } 3232 } 3233 else { 3234 /* Similarly for utf8. For invariants can check 3235 * directly. We accept anything above the latin1 3236 * range because it is immaterial to Perl if it is 3237 * correct or not, and is expensive to check. But 3238 * it is fairly easy in the latin1 range to convert 3239 * the variants into a single character and check 3240 * those */ 3241 if (UTF8_IS_INVARIANT(*i)) { 3242 if (! isALPHAU(*i)) problematic = TRUE; 3243 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) { 3244 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i, 3245 *(i+1))))) 3246 { 3247 problematic = TRUE; 3248 } 3249 } 3250 if (! problematic) for (i = s + UTF8SKIP(s); 3251 i < e; 3252 i+= UTF8SKIP(i)) 3253 { 3254 if (UTF8_IS_INVARIANT(*i)) { 3255 if (isCHARNAME_CONT(*i)) continue; 3256 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) { 3257 continue; 3258 } else if (isCHARNAME_CONT( 3259 UNI_TO_NATIVE( 3260 UTF8_ACCUMULATE(*i, *(i+1))))) 3261 { 3262 continue; 3263 } 3264 problematic = TRUE; 3265 break; 3266 } 3267 } 3268 if (problematic) { 3269 char *string; 3270 Newx(string, e - i + 1, char); 3271 Copy(i, string, e - i, char); 3272 string[e - i] = '\0'; 3273 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 3274 "Deprecated character(s) in \\N{...} starting at '%s'", 3275 string); 3276 Safefree(string); 3277 } 3278 } 3279 } /* End \N{NAME} */ 3280 #ifdef EBCDIC 3281 if (!dorange) 3282 native_range = FALSE; /* \N{} is defined to be Unicode */ 3283 #endif 3284 s = e + 1; /* Point to just after the '}' */ 3285 continue; 3286 3287 /* \c is a control character */ 3288 case 'c': 3289 s++; 3290 if (s < send) { 3291 U8 c = *s++; 3292 #ifdef EBCDIC 3293 if (isLOWER(c)) 3294 c = toUPPER(c); 3295 #endif 3296 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c)); 3297 } 3298 else { 3299 yyerror("Missing control char name in \\c"); 3300 } 3301 continue; 3302 3303 /* printf-style backslashes, formfeeds, newlines, etc */ 3304 case 'b': 3305 *d++ = NATIVE_TO_NEED(has_utf8,'\b'); 3306 break; 3307 case 'n': 3308 *d++ = NATIVE_TO_NEED(has_utf8,'\n'); 3309 break; 3310 case 'r': 3311 *d++ = NATIVE_TO_NEED(has_utf8,'\r'); 3312 break; 3313 case 'f': 3314 *d++ = NATIVE_TO_NEED(has_utf8,'\f'); 3315 break; 3316 case 't': 3317 *d++ = NATIVE_TO_NEED(has_utf8,'\t'); 3318 break; 3319 case 'e': 3320 *d++ = ASCII_TO_NEED(has_utf8,'\033'); 3321 break; 3322 case 'a': 3323 *d++ = ASCII_TO_NEED(has_utf8,'\007'); 3324 break; 3325 } /* end switch */ 3326 3327 s++; 3328 continue; 3329 } /* end if (backslash) */ 3330 #ifdef EBCDIC 3331 else 3332 literal_endpoint++; 3333 #endif 3334 3335 default_action: 3336 /* If we started with encoded form, or already know we want it, 3337 then encode the next character */ 3338 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) { 3339 STRLEN len = 1; 3340 3341 3342 /* One might think that it is wasted effort in the case of the 3343 * source being utf8 (this_utf8 == TRUE) to take the next character 3344 * in the source, convert it to an unsigned value, and then convert 3345 * it back again. But the source has not been validated here. The 3346 * routine that does the conversion checks for errors like 3347 * malformed utf8 */ 3348 3349 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s); 3350 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv)); 3351 if (!has_utf8) { 3352 SvCUR_set(sv, d - SvPVX_const(sv)); 3353 SvPOK_on(sv); 3354 *d = '\0'; 3355 /* See Note on sizing above. */ 3356 sv_utf8_upgrade_flags_grow(sv, 3357 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3358 need + (STRLEN)(send - s) + 1); 3359 d = SvPVX(sv) + SvCUR(sv); 3360 has_utf8 = TRUE; 3361 } else if (need > len) { 3362 /* encoded value larger than old, may need extra space (NOTE: 3363 * SvCUR() is not set correctly here). See Note on sizing 3364 * above. */ 3365 const STRLEN off = d - SvPVX_const(sv); 3366 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off; 3367 } 3368 s += len; 3369 3370 d = (char*)uvchr_to_utf8((U8*)d, nextuv); 3371 #ifdef EBCDIC 3372 if (uv > 255 && !dorange) 3373 native_range = FALSE; 3374 #endif 3375 } 3376 else { 3377 *d++ = NATIVE_TO_NEED(has_utf8,*s++); 3378 } 3379 } /* while loop to process each character */ 3380 3381 /* terminate the string and set up the sv */ 3382 *d = '\0'; 3383 SvCUR_set(sv, d - SvPVX_const(sv)); 3384 if (SvCUR(sv) >= SvLEN(sv)) 3385 Perl_croak(aTHX_ "panic: constant overflowed allocated space"); 3386 3387 SvPOK_on(sv); 3388 if (PL_encoding && !has_utf8) { 3389 sv_recode_to_utf8(sv, PL_encoding); 3390 if (SvUTF8(sv)) 3391 has_utf8 = TRUE; 3392 } 3393 if (has_utf8) { 3394 SvUTF8_on(sv); 3395 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { 3396 PL_sublex_info.sub_op->op_private |= 3397 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); 3398 } 3399 } 3400 3401 /* shrink the sv if we allocated more than we used */ 3402 if (SvCUR(sv) + 5 < SvLEN(sv)) { 3403 SvPV_shrink_to_cur(sv); 3404 } 3405 3406 /* return the substring (via pl_yylval) only if we parsed anything */ 3407 if (s > PL_bufptr) { 3408 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) { 3409 const char *const key = PL_lex_inpat ? "qr" : "q"; 3410 const STRLEN keylen = PL_lex_inpat ? 2 : 1; 3411 const char *type; 3412 STRLEN typelen; 3413 3414 if (PL_lex_inwhat == OP_TRANS) { 3415 type = "tr"; 3416 typelen = 2; 3417 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) { 3418 type = "s"; 3419 typelen = 1; 3420 } else { 3421 type = "qq"; 3422 typelen = 2; 3423 } 3424 3425 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL, 3426 type, typelen); 3427 } 3428 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 3429 } else 3430 SvREFCNT_dec(sv); 3431 return s; 3432 } 3433 3434 /* S_intuit_more 3435 * Returns TRUE if there's more to the expression (e.g., a subscript), 3436 * FALSE otherwise. 3437 * 3438 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/ 3439 * 3440 * ->[ and ->{ return TRUE 3441 * { and [ outside a pattern are always subscripts, so return TRUE 3442 * if we're outside a pattern and it's not { or [, then return FALSE 3443 * if we're in a pattern and the first char is a { 3444 * {4,5} (any digits around the comma) returns FALSE 3445 * if we're in a pattern and the first char is a [ 3446 * [] returns FALSE 3447 * [SOMETHING] has a funky algorithm to decide whether it's a 3448 * character class or not. It has to deal with things like 3449 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/ 3450 * anything else returns TRUE 3451 */ 3452 3453 /* This is the one truly awful dwimmer necessary to conflate C and sed. */ 3454 3455 STATIC int 3456 S_intuit_more(pTHX_ register char *s) 3457 { 3458 dVAR; 3459 3460 PERL_ARGS_ASSERT_INTUIT_MORE; 3461 3462 if (PL_lex_brackets) 3463 return TRUE; 3464 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{')) 3465 return TRUE; 3466 if (*s != '{' && *s != '[') 3467 return FALSE; 3468 if (!PL_lex_inpat) 3469 return TRUE; 3470 3471 /* In a pattern, so maybe we have {n,m}. */ 3472 if (*s == '{') { 3473 s++; 3474 if (!isDIGIT(*s)) 3475 return TRUE; 3476 while (isDIGIT(*s)) 3477 s++; 3478 if (*s == ',') 3479 s++; 3480 while (isDIGIT(*s)) 3481 s++; 3482 if (*s == '}') 3483 return FALSE; 3484 return TRUE; 3485 3486 } 3487 3488 /* On the other hand, maybe we have a character class */ 3489 3490 s++; 3491 if (*s == ']' || *s == '^') 3492 return FALSE; 3493 else { 3494 /* this is terrifying, and it works */ 3495 int weight = 2; /* let's weigh the evidence */ 3496 char seen[256]; 3497 unsigned char un_char = 255, last_un_char; 3498 const char * const send = strchr(s,']'); 3499 char tmpbuf[sizeof PL_tokenbuf * 4]; 3500 3501 if (!send) /* has to be an expression */ 3502 return TRUE; 3503 3504 Zero(seen,256,char); 3505 if (*s == '$') 3506 weight -= 3; 3507 else if (isDIGIT(*s)) { 3508 if (s[1] != ']') { 3509 if (isDIGIT(s[1]) && s[2] == ']') 3510 weight -= 10; 3511 } 3512 else 3513 weight -= 100; 3514 } 3515 for (; s < send; s++) { 3516 last_un_char = un_char; 3517 un_char = (unsigned char)*s; 3518 switch (*s) { 3519 case '@': 3520 case '&': 3521 case '$': 3522 weight -= seen[un_char] * 10; 3523 if (isALNUM_lazy_if(s+1,UTF)) { 3524 int len; 3525 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE); 3526 len = (int)strlen(tmpbuf); 3527 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV)) 3528 weight -= 100; 3529 else 3530 weight -= 10; 3531 } 3532 else if (*s == '$' && s[1] && 3533 strchr("[#!%*<>()-=",s[1])) { 3534 if (/*{*/ strchr("])} =",s[2])) 3535 weight -= 10; 3536 else 3537 weight -= 1; 3538 } 3539 break; 3540 case '\\': 3541 un_char = 254; 3542 if (s[1]) { 3543 if (strchr("wds]",s[1])) 3544 weight += 100; 3545 else if (seen[(U8)'\''] || seen[(U8)'"']) 3546 weight += 1; 3547 else if (strchr("rnftbxcav",s[1])) 3548 weight += 40; 3549 else if (isDIGIT(s[1])) { 3550 weight += 40; 3551 while (s[1] && isDIGIT(s[1])) 3552 s++; 3553 } 3554 } 3555 else 3556 weight += 100; 3557 break; 3558 case '-': 3559 if (s[1] == '\\') 3560 weight += 50; 3561 if (strchr("aA01! ",last_un_char)) 3562 weight += 30; 3563 if (strchr("zZ79~",s[1])) 3564 weight += 30; 3565 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$')) 3566 weight -= 5; /* cope with negative subscript */ 3567 break; 3568 default: 3569 if (!isALNUM(last_un_char) 3570 && !(last_un_char == '$' || last_un_char == '@' 3571 || last_un_char == '&') 3572 && isALPHA(*s) && s[1] && isALPHA(s[1])) { 3573 char *d = tmpbuf; 3574 while (isALPHA(*s)) 3575 *d++ = *s++; 3576 *d = '\0'; 3577 if (keyword(tmpbuf, d - tmpbuf, 0)) 3578 weight -= 150; 3579 } 3580 if (un_char == last_un_char + 1) 3581 weight += 5; 3582 weight -= seen[un_char]; 3583 break; 3584 } 3585 seen[un_char]++; 3586 } 3587 if (weight >= 0) /* probably a character class */ 3588 return FALSE; 3589 } 3590 3591 return TRUE; 3592 } 3593 3594 /* 3595 * S_intuit_method 3596 * 3597 * Does all the checking to disambiguate 3598 * foo bar 3599 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise 3600 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args). 3601 * 3602 * First argument is the stuff after the first token, e.g. "bar". 3603 * 3604 * Not a method if bar is a filehandle. 3605 * Not a method if foo is a subroutine prototyped to take a filehandle. 3606 * Not a method if it's really "Foo $bar" 3607 * Method if it's "foo $bar" 3608 * Not a method if it's really "print foo $bar" 3609 * Method if it's really "foo package::" (interpreted as package->foo) 3610 * Not a method if bar is known to be a subroutine ("sub bar; foo bar") 3611 * Not a method if bar is a filehandle or package, but is quoted with 3612 * => 3613 */ 3614 3615 STATIC int 3616 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) 3617 { 3618 dVAR; 3619 char *s = start + (*start == '$'); 3620 char tmpbuf[sizeof PL_tokenbuf]; 3621 STRLEN len; 3622 GV* indirgv; 3623 #ifdef PERL_MAD 3624 int soff; 3625 #endif 3626 3627 PERL_ARGS_ASSERT_INTUIT_METHOD; 3628 3629 if (gv) { 3630 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv)) 3631 return 0; 3632 if (cv) { 3633 if (SvPOK(cv)) { 3634 const char *proto = SvPVX_const(cv); 3635 if (proto) { 3636 if (*proto == ';') 3637 proto++; 3638 if (*proto == '*') 3639 return 0; 3640 } 3641 } 3642 } else 3643 gv = NULL; 3644 } 3645 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 3646 /* start is the beginning of the possible filehandle/object, 3647 * and s is the end of it 3648 * tmpbuf is a copy of it 3649 */ 3650 3651 if (*start == '$') { 3652 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY || 3653 isUPPER(*PL_tokenbuf)) 3654 return 0; 3655 #ifdef PERL_MAD 3656 len = start - SvPVX(PL_linestr); 3657 #endif 3658 s = PEEKSPACE(s); 3659 #ifdef PERL_MAD 3660 start = SvPVX(PL_linestr) + len; 3661 #endif 3662 PL_bufptr = start; 3663 PL_expect = XREF; 3664 return *s == '(' ? FUNCMETH : METHOD; 3665 } 3666 if (!keyword(tmpbuf, len, 0)) { 3667 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { 3668 len -= 2; 3669 tmpbuf[len] = '\0'; 3670 #ifdef PERL_MAD 3671 soff = s - SvPVX(PL_linestr); 3672 #endif 3673 goto bare_package; 3674 } 3675 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV); 3676 if (indirgv && GvCVu(indirgv)) 3677 return 0; 3678 /* filehandle or package name makes it a method */ 3679 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) { 3680 #ifdef PERL_MAD 3681 soff = s - SvPVX(PL_linestr); 3682 #endif 3683 s = PEEKSPACE(s); 3684 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') 3685 return 0; /* no assumptions -- "=>" quotes bearword */ 3686 bare_package: 3687 start_force(PL_curforce); 3688 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, 3689 S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); 3690 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; 3691 if (PL_madskills) 3692 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start)); 3693 PL_expect = XTERM; 3694 force_next(WORD); 3695 PL_bufptr = s; 3696 #ifdef PERL_MAD 3697 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */ 3698 #endif 3699 return *s == '(' ? FUNCMETH : METHOD; 3700 } 3701 } 3702 return 0; 3703 } 3704 3705 /* Encoded script support. filter_add() effectively inserts a 3706 * 'pre-processing' function into the current source input stream. 3707 * Note that the filter function only applies to the current source file 3708 * (e.g., it will not affect files 'require'd or 'use'd by this one). 3709 * 3710 * The datasv parameter (which may be NULL) can be used to pass 3711 * private data to this instance of the filter. The filter function 3712 * can recover the SV using the FILTER_DATA macro and use it to 3713 * store private buffers and state information. 3714 * 3715 * The supplied datasv parameter is upgraded to a PVIO type 3716 * and the IoDIRP/IoANY field is used to store the function pointer, 3717 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such. 3718 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for 3719 * private use must be set using malloc'd pointers. 3720 */ 3721 3722 SV * 3723 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) 3724 { 3725 dVAR; 3726 if (!funcp) 3727 return NULL; 3728 3729 if (!PL_parser) 3730 return NULL; 3731 3732 if (!PL_rsfp_filters) 3733 PL_rsfp_filters = newAV(); 3734 if (!datasv) 3735 datasv = newSV(0); 3736 SvUPGRADE(datasv, SVt_PVIO); 3737 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */ 3738 IoFLAGS(datasv) |= IOf_FAKE_DIRP; 3739 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n", 3740 FPTR2DPTR(void *, IoANY(datasv)), 3741 SvPV_nolen(datasv))); 3742 av_unshift(PL_rsfp_filters, 1); 3743 av_store(PL_rsfp_filters, 0, datasv) ; 3744 return(datasv); 3745 } 3746 3747 3748 /* Delete most recently added instance of this filter function. */ 3749 void 3750 Perl_filter_del(pTHX_ filter_t funcp) 3751 { 3752 dVAR; 3753 SV *datasv; 3754 3755 PERL_ARGS_ASSERT_FILTER_DEL; 3756 3757 #ifdef DEBUGGING 3758 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", 3759 FPTR2DPTR(void*, funcp))); 3760 #endif 3761 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0) 3762 return; 3763 /* if filter is on top of stack (usual case) just pop it off */ 3764 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters)); 3765 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) { 3766 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP; 3767 IoANY(datasv) = (void *)NULL; 3768 sv_free(av_pop(PL_rsfp_filters)); 3769 3770 return; 3771 } 3772 /* we need to search for the correct entry and clear it */ 3773 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)"); 3774 } 3775 3776 3777 /* Invoke the idxth filter function for the current rsfp. */ 3778 /* maxlen 0 = read one text line */ 3779 I32 3780 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) 3781 { 3782 dVAR; 3783 filter_t funcp; 3784 SV *datasv = NULL; 3785 /* This API is bad. It should have been using unsigned int for maxlen. 3786 Not sure if we want to change the API, but if not we should sanity 3787 check the value here. */ 3788 const unsigned int correct_length 3789 = maxlen < 0 ? 3790 #ifdef PERL_MICRO 3791 0x7FFFFFFF 3792 #else 3793 INT_MAX 3794 #endif 3795 : maxlen; 3796 3797 PERL_ARGS_ASSERT_FILTER_READ; 3798 3799 if (!PL_parser || !PL_rsfp_filters) 3800 return -1; 3801 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */ 3802 /* Provide a default input filter to make life easy. */ 3803 /* Note that we append to the line. This is handy. */ 3804 DEBUG_P(PerlIO_printf(Perl_debug_log, 3805 "filter_read %d: from rsfp\n", idx)); 3806 if (correct_length) { 3807 /* Want a block */ 3808 int len ; 3809 const int old_len = SvCUR(buf_sv); 3810 3811 /* ensure buf_sv is large enough */ 3812 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; 3813 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, 3814 correct_length)) <= 0) { 3815 if (PerlIO_error(PL_rsfp)) 3816 return -1; /* error */ 3817 else 3818 return 0 ; /* end of file */ 3819 } 3820 SvCUR_set(buf_sv, old_len + len) ; 3821 SvPVX(buf_sv)[old_len + len] = '\0'; 3822 } else { 3823 /* Want a line */ 3824 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { 3825 if (PerlIO_error(PL_rsfp)) 3826 return -1; /* error */ 3827 else 3828 return 0 ; /* end of file */ 3829 } 3830 } 3831 return SvCUR(buf_sv); 3832 } 3833 /* Skip this filter slot if filter has been deleted */ 3834 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) { 3835 DEBUG_P(PerlIO_printf(Perl_debug_log, 3836 "filter_read %d: skipped (filter deleted)\n", 3837 idx)); 3838 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */ 3839 } 3840 /* Get function pointer hidden within datasv */ 3841 funcp = DPTR2FPTR(filter_t, IoANY(datasv)); 3842 DEBUG_P(PerlIO_printf(Perl_debug_log, 3843 "filter_read %d: via function %p (%s)\n", 3844 idx, (void*)datasv, SvPV_nolen_const(datasv))); 3845 /* Call function. The function is expected to */ 3846 /* call "FILTER_READ(idx+1, buf_sv)" first. */ 3847 /* Return: <0:error, =0:eof, >0:not eof */ 3848 return (*funcp)(aTHX_ idx, buf_sv, correct_length); 3849 } 3850 3851 STATIC char * 3852 S_filter_gets(pTHX_ register SV *sv, STRLEN append) 3853 { 3854 dVAR; 3855 3856 PERL_ARGS_ASSERT_FILTER_GETS; 3857 3858 #ifdef PERL_CR_FILTER 3859 if (!PL_rsfp_filters) { 3860 filter_add(S_cr_textfilter,NULL); 3861 } 3862 #endif 3863 if (PL_rsfp_filters) { 3864 if (!append) 3865 SvCUR_set(sv, 0); /* start with empty line */ 3866 if (FILTER_READ(0, sv, 0) > 0) 3867 return ( SvPVX(sv) ) ; 3868 else 3869 return NULL ; 3870 } 3871 else 3872 return (sv_gets(sv, PL_rsfp, append)); 3873 } 3874 3875 STATIC HV * 3876 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) 3877 { 3878 dVAR; 3879 GV *gv; 3880 3881 PERL_ARGS_ASSERT_FIND_IN_MY_STASH; 3882 3883 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__")) 3884 return PL_curstash; 3885 3886 if (len > 2 && 3887 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') && 3888 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV))) 3889 { 3890 return GvHV(gv); /* Foo:: */ 3891 } 3892 3893 /* use constant CLASS => 'MyClass' */ 3894 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV); 3895 if (gv && GvCV(gv)) { 3896 SV * const sv = cv_const_sv(GvCV(gv)); 3897 if (sv) 3898 pkgname = SvPV_const(sv, len); 3899 } 3900 3901 return gv_stashpvn(pkgname, len, 0); 3902 } 3903 3904 /* 3905 * S_readpipe_override 3906 * Check whether readpipe() is overriden, and generates the appropriate 3907 * optree, provided sublex_start() is called afterwards. 3908 */ 3909 STATIC void 3910 S_readpipe_override(pTHX) 3911 { 3912 GV **gvp; 3913 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV); 3914 pl_yylval.ival = OP_BACKTICK; 3915 if ((gv_readpipe 3916 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)) 3917 || 3918 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE)) 3919 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe) 3920 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))) 3921 { 3922 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 3923 append_elem(OP_LIST, 3924 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */ 3925 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe)))); 3926 } 3927 } 3928 3929 #ifdef PERL_MAD 3930 /* 3931 * Perl_madlex 3932 * The intent of this yylex wrapper is to minimize the changes to the 3933 * tokener when we aren't interested in collecting madprops. It remains 3934 * to be seen how successful this strategy will be... 3935 */ 3936 3937 int 3938 Perl_madlex(pTHX) 3939 { 3940 int optype; 3941 char *s = PL_bufptr; 3942 3943 /* make sure PL_thiswhite is initialized */ 3944 PL_thiswhite = 0; 3945 PL_thismad = 0; 3946 3947 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */ 3948 if (PL_pending_ident) 3949 return S_pending_ident(aTHX); 3950 3951 /* previous token ate up our whitespace? */ 3952 if (!PL_lasttoke && PL_nextwhite) { 3953 PL_thiswhite = PL_nextwhite; 3954 PL_nextwhite = 0; 3955 } 3956 3957 /* isolate the token, and figure out where it is without whitespace */ 3958 PL_realtokenstart = -1; 3959 PL_thistoken = 0; 3960 optype = yylex(); 3961 s = PL_bufptr; 3962 assert(PL_curforce < 0); 3963 3964 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */ 3965 if (!PL_thistoken) { 3966 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop)) 3967 PL_thistoken = newSVpvs(""); 3968 else { 3969 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart; 3970 PL_thistoken = newSVpvn(tstart, s - tstart); 3971 } 3972 } 3973 if (PL_thismad) /* install head */ 3974 CURMAD('X', PL_thistoken); 3975 } 3976 3977 /* last whitespace of a sublex? */ 3978 if (optype == ')' && PL_endwhite) { 3979 CURMAD('X', PL_endwhite); 3980 } 3981 3982 if (!PL_thismad) { 3983 3984 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */ 3985 if (!PL_thiswhite && !PL_endwhite && !optype) { 3986 sv_free(PL_thistoken); 3987 PL_thistoken = 0; 3988 return 0; 3989 } 3990 3991 /* put off final whitespace till peg */ 3992 if (optype == ';' && !PL_rsfp) { 3993 PL_nextwhite = PL_thiswhite; 3994 PL_thiswhite = 0; 3995 } 3996 else if (PL_thisopen) { 3997 CURMAD('q', PL_thisopen); 3998 if (PL_thistoken) 3999 sv_free(PL_thistoken); 4000 PL_thistoken = 0; 4001 } 4002 else { 4003 /* Store actual token text as madprop X */ 4004 CURMAD('X', PL_thistoken); 4005 } 4006 4007 if (PL_thiswhite) { 4008 /* add preceding whitespace as madprop _ */ 4009 CURMAD('_', PL_thiswhite); 4010 } 4011 4012 if (PL_thisstuff) { 4013 /* add quoted material as madprop = */ 4014 CURMAD('=', PL_thisstuff); 4015 } 4016 4017 if (PL_thisclose) { 4018 /* add terminating quote as madprop Q */ 4019 CURMAD('Q', PL_thisclose); 4020 } 4021 } 4022 4023 /* special processing based on optype */ 4024 4025 switch (optype) { 4026 4027 /* opval doesn't need a TOKEN since it can already store mp */ 4028 case WORD: 4029 case METHOD: 4030 case FUNCMETH: 4031 case THING: 4032 case PMFUNC: 4033 case PRIVATEREF: 4034 case FUNC0SUB: 4035 case UNIOPSUB: 4036 case LSTOPSUB: 4037 if (pl_yylval.opval) 4038 append_madprops(PL_thismad, pl_yylval.opval, 0); 4039 PL_thismad = 0; 4040 return optype; 4041 4042 /* fake EOF */ 4043 case 0: 4044 optype = PEG; 4045 if (PL_endwhite) { 4046 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0); 4047 PL_endwhite = 0; 4048 } 4049 break; 4050 4051 case ']': 4052 case '}': 4053 if (PL_faketokens) 4054 break; 4055 /* remember any fake bracket that lexer is about to discard */ 4056 if (PL_lex_brackets == 1 && 4057 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK)) 4058 { 4059 s = PL_bufptr; 4060 while (s < PL_bufend && (*s == ' ' || *s == '\t')) 4061 s++; 4062 if (*s == '}') { 4063 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr); 4064 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0); 4065 PL_thiswhite = 0; 4066 PL_bufptr = s - 1; 4067 break; /* don't bother looking for trailing comment */ 4068 } 4069 else 4070 s = PL_bufptr; 4071 } 4072 if (optype == ']') 4073 break; 4074 /* FALLTHROUGH */ 4075 4076 /* attach a trailing comment to its statement instead of next token */ 4077 case ';': 4078 if (PL_faketokens) 4079 break; 4080 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) { 4081 s = PL_bufptr; 4082 while (s < PL_bufend && (*s == ' ' || *s == '\t')) 4083 s++; 4084 if (*s == '\n' || *s == '#') { 4085 while (s < PL_bufend && *s != '\n') 4086 s++; 4087 if (s < PL_bufend) 4088 s++; 4089 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr); 4090 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0); 4091 PL_thiswhite = 0; 4092 PL_bufptr = s; 4093 } 4094 } 4095 break; 4096 4097 /* pval */ 4098 case LABEL: 4099 break; 4100 4101 /* ival */ 4102 default: 4103 break; 4104 4105 } 4106 4107 /* Create new token struct. Note: opvals return early above. */ 4108 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad); 4109 PL_thismad = 0; 4110 return optype; 4111 } 4112 #endif 4113 4114 STATIC char * 4115 S_tokenize_use(pTHX_ int is_use, char *s) { 4116 dVAR; 4117 4118 PERL_ARGS_ASSERT_TOKENIZE_USE; 4119 4120 if (PL_expect != XSTATE) 4121 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", 4122 is_use ? "use" : "no")); 4123 s = SKIPSPACE1(s); 4124 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { 4125 s = force_version(s, TRUE); 4126 if (*s == ';' || *s == '}' 4127 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) { 4128 start_force(PL_curforce); 4129 NEXTVAL_NEXTTOKE.opval = NULL; 4130 force_next(WORD); 4131 } 4132 else if (*s == 'v') { 4133 s = force_word(s,WORD,FALSE,TRUE,FALSE); 4134 s = force_version(s, FALSE); 4135 } 4136 } 4137 else { 4138 s = force_word(s,WORD,FALSE,TRUE,FALSE); 4139 s = force_version(s, FALSE); 4140 } 4141 pl_yylval.ival = is_use; 4142 return s; 4143 } 4144 #ifdef DEBUGGING 4145 static const char* const exp_name[] = 4146 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", 4147 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR" 4148 }; 4149 #endif 4150 4151 /* 4152 yylex 4153 4154 Works out what to call the token just pulled out of the input 4155 stream. The yacc parser takes care of taking the ops we return and 4156 stitching them into a tree. 4157 4158 Returns: 4159 PRIVATEREF 4160 4161 Structure: 4162 if read an identifier 4163 if we're in a my declaration 4164 croak if they tried to say my($foo::bar) 4165 build the ops for a my() declaration 4166 if it's an access to a my() variable 4167 are we in a sort block? 4168 croak if my($a); $a <=> $b 4169 build ops for access to a my() variable 4170 if in a dq string, and they've said @foo and we can't find @foo 4171 croak 4172 build ops for a bareword 4173 if we already built the token before, use it. 4174 */ 4175 4176 4177 #ifdef __SC__ 4178 #pragma segment Perl_yylex 4179 #endif 4180 int 4181 Perl_yylex(pTHX) 4182 { 4183 dVAR; 4184 register char *s = PL_bufptr; 4185 register char *d; 4186 STRLEN len; 4187 bool bof = FALSE; 4188 U32 fake_eof = 0; 4189 4190 /* orig_keyword, gvp, and gv are initialized here because 4191 * jump to the label just_a_word_zero can bypass their 4192 * initialization later. */ 4193 I32 orig_keyword = 0; 4194 GV *gv = NULL; 4195 GV **gvp = NULL; 4196 4197 DEBUG_T( { 4198 SV* tmp = newSVpvs(""); 4199 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n", 4200 (IV)CopLINE(PL_curcop), 4201 lex_state_names[PL_lex_state], 4202 exp_name[PL_expect], 4203 pv_display(tmp, s, strlen(s), 0, 60)); 4204 SvREFCNT_dec(tmp); 4205 } ); 4206 /* check if there's an identifier for us to look at */ 4207 if (PL_pending_ident) 4208 return REPORT(S_pending_ident(aTHX)); 4209 4210 /* no identifier pending identification */ 4211 4212 switch (PL_lex_state) { 4213 #ifdef COMMENTARY 4214 case LEX_NORMAL: /* Some compilers will produce faster */ 4215 case LEX_INTERPNORMAL: /* code if we comment these out. */ 4216 break; 4217 #endif 4218 4219 /* when we've already built the next token, just pull it out of the queue */ 4220 case LEX_KNOWNEXT: 4221 #ifdef PERL_MAD 4222 PL_lasttoke--; 4223 pl_yylval = PL_nexttoke[PL_lasttoke].next_val; 4224 if (PL_madskills) { 4225 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad; 4226 PL_nexttoke[PL_lasttoke].next_mad = 0; 4227 if (PL_thismad && PL_thismad->mad_key == '_') { 4228 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val); 4229 PL_thismad->mad_val = 0; 4230 mad_free(PL_thismad); 4231 PL_thismad = 0; 4232 } 4233 } 4234 if (!PL_lasttoke) { 4235 PL_lex_state = PL_lex_defer; 4236 PL_expect = PL_lex_expect; 4237 PL_lex_defer = LEX_NORMAL; 4238 if (!PL_nexttoke[PL_lasttoke].next_type) 4239 return yylex(); 4240 } 4241 #else 4242 PL_nexttoke--; 4243 pl_yylval = PL_nextval[PL_nexttoke]; 4244 if (!PL_nexttoke) { 4245 PL_lex_state = PL_lex_defer; 4246 PL_expect = PL_lex_expect; 4247 PL_lex_defer = LEX_NORMAL; 4248 } 4249 #endif 4250 #ifdef PERL_MAD 4251 /* FIXME - can these be merged? */ 4252 return(PL_nexttoke[PL_lasttoke].next_type); 4253 #else 4254 return REPORT(PL_nexttype[PL_nexttoke]); 4255 #endif 4256 4257 /* interpolated case modifiers like \L \U, including \Q and \E. 4258 when we get here, PL_bufptr is at the \ 4259 */ 4260 case LEX_INTERPCASEMOD: 4261 #ifdef DEBUGGING 4262 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\') 4263 Perl_croak(aTHX_ "panic: INTERPCASEMOD"); 4264 #endif 4265 /* handle \E or end of string */ 4266 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') { 4267 /* if at a \E */ 4268 if (PL_lex_casemods) { 4269 const char oldmod = PL_lex_casestack[--PL_lex_casemods]; 4270 PL_lex_casestack[PL_lex_casemods] = '\0'; 4271 4272 if (PL_bufptr != PL_bufend 4273 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) { 4274 PL_bufptr += 2; 4275 PL_lex_state = LEX_INTERPCONCAT; 4276 #ifdef PERL_MAD 4277 if (PL_madskills) 4278 PL_thistoken = newSVpvs("\\E"); 4279 #endif 4280 } 4281 return REPORT(')'); 4282 } 4283 #ifdef PERL_MAD 4284 while (PL_bufptr != PL_bufend && 4285 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') { 4286 if (!PL_thiswhite) 4287 PL_thiswhite = newSVpvs(""); 4288 sv_catpvn(PL_thiswhite, PL_bufptr, 2); 4289 PL_bufptr += 2; 4290 } 4291 #else 4292 if (PL_bufptr != PL_bufend) 4293 PL_bufptr += 2; 4294 #endif 4295 PL_lex_state = LEX_INTERPCONCAT; 4296 return yylex(); 4297 } 4298 else { 4299 DEBUG_T({ PerlIO_printf(Perl_debug_log, 4300 "### Saw case modifier\n"); }); 4301 s = PL_bufptr + 1; 4302 if (s[1] == '\\' && s[2] == 'E') { 4303 #ifdef PERL_MAD 4304 if (!PL_thiswhite) 4305 PL_thiswhite = newSVpvs(""); 4306 sv_catpvn(PL_thiswhite, PL_bufptr, 4); 4307 #endif 4308 PL_bufptr = s + 3; 4309 PL_lex_state = LEX_INTERPCONCAT; 4310 return yylex(); 4311 } 4312 else { 4313 I32 tmp; 4314 if (!PL_madskills) /* when just compiling don't need correct */ 4315 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) 4316 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ 4317 if ((*s == 'L' || *s == 'U') && 4318 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) { 4319 PL_lex_casestack[--PL_lex_casemods] = '\0'; 4320 return REPORT(')'); 4321 } 4322 if (PL_lex_casemods > 10) 4323 Renew(PL_lex_casestack, PL_lex_casemods + 2, char); 4324 PL_lex_casestack[PL_lex_casemods++] = *s; 4325 PL_lex_casestack[PL_lex_casemods] = '\0'; 4326 PL_lex_state = LEX_INTERPCONCAT; 4327 start_force(PL_curforce); 4328 NEXTVAL_NEXTTOKE.ival = 0; 4329 force_next('('); 4330 start_force(PL_curforce); 4331 if (*s == 'l') 4332 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST; 4333 else if (*s == 'u') 4334 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST; 4335 else if (*s == 'L') 4336 NEXTVAL_NEXTTOKE.ival = OP_LC; 4337 else if (*s == 'U') 4338 NEXTVAL_NEXTTOKE.ival = OP_UC; 4339 else if (*s == 'Q') 4340 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA; 4341 else 4342 Perl_croak(aTHX_ "panic: yylex"); 4343 if (PL_madskills) { 4344 SV* const tmpsv = newSVpvs("\\ "); 4345 /* replace the space with the character we want to escape 4346 */ 4347 SvPVX(tmpsv)[1] = *s; 4348 curmad('_', tmpsv); 4349 } 4350 PL_bufptr = s + 1; 4351 } 4352 force_next(FUNC); 4353 if (PL_lex_starts) { 4354 s = PL_bufptr; 4355 PL_lex_starts = 0; 4356 #ifdef PERL_MAD 4357 if (PL_madskills) { 4358 if (PL_thistoken) 4359 sv_free(PL_thistoken); 4360 PL_thistoken = newSVpvs(""); 4361 } 4362 #endif 4363 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 4364 if (PL_lex_casemods == 1 && PL_lex_inpat) 4365 OPERATOR(','); 4366 else 4367 Aop(OP_CONCAT); 4368 } 4369 else 4370 return yylex(); 4371 } 4372 4373 case LEX_INTERPPUSH: 4374 return REPORT(sublex_push()); 4375 4376 case LEX_INTERPSTART: 4377 if (PL_bufptr == PL_bufend) 4378 return REPORT(sublex_done()); 4379 DEBUG_T({ PerlIO_printf(Perl_debug_log, 4380 "### Interpolated variable\n"); }); 4381 PL_expect = XTERM; 4382 PL_lex_dojoin = (*PL_bufptr == '@'); 4383 PL_lex_state = LEX_INTERPNORMAL; 4384 if (PL_lex_dojoin) { 4385 start_force(PL_curforce); 4386 NEXTVAL_NEXTTOKE.ival = 0; 4387 force_next(','); 4388 start_force(PL_curforce); 4389 force_ident("\"", '$'); 4390 start_force(PL_curforce); 4391 NEXTVAL_NEXTTOKE.ival = 0; 4392 force_next('$'); 4393 start_force(PL_curforce); 4394 NEXTVAL_NEXTTOKE.ival = 0; 4395 force_next('('); 4396 start_force(PL_curforce); 4397 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */ 4398 force_next(FUNC); 4399 } 4400 if (PL_lex_starts++) { 4401 s = PL_bufptr; 4402 #ifdef PERL_MAD 4403 if (PL_madskills) { 4404 if (PL_thistoken) 4405 sv_free(PL_thistoken); 4406 PL_thistoken = newSVpvs(""); 4407 } 4408 #endif 4409 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 4410 if (!PL_lex_casemods && PL_lex_inpat) 4411 OPERATOR(','); 4412 else 4413 Aop(OP_CONCAT); 4414 } 4415 return yylex(); 4416 4417 case LEX_INTERPENDMAYBE: 4418 if (intuit_more(PL_bufptr)) { 4419 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ 4420 break; 4421 } 4422 /* FALL THROUGH */ 4423 4424 case LEX_INTERPEND: 4425 if (PL_lex_dojoin) { 4426 PL_lex_dojoin = FALSE; 4427 PL_lex_state = LEX_INTERPCONCAT; 4428 #ifdef PERL_MAD 4429 if (PL_madskills) { 4430 if (PL_thistoken) 4431 sv_free(PL_thistoken); 4432 PL_thistoken = newSVpvs(""); 4433 } 4434 #endif 4435 return REPORT(')'); 4436 } 4437 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl 4438 && SvEVALED(PL_lex_repl)) 4439 { 4440 if (PL_bufptr != PL_bufend) 4441 Perl_croak(aTHX_ "Bad evalled substitution pattern"); 4442 PL_lex_repl = NULL; 4443 } 4444 /* FALLTHROUGH */ 4445 case LEX_INTERPCONCAT: 4446 #ifdef DEBUGGING 4447 if (PL_lex_brackets) 4448 Perl_croak(aTHX_ "panic: INTERPCONCAT"); 4449 #endif 4450 if (PL_bufptr == PL_bufend) 4451 return REPORT(sublex_done()); 4452 4453 if (SvIVX(PL_linestr) == '\'') { 4454 SV *sv = newSVsv(PL_linestr); 4455 if (!PL_lex_inpat) 4456 sv = tokeq(sv); 4457 else if ( PL_hints & HINT_NEW_RE ) 4458 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1); 4459 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 4460 s = PL_bufend; 4461 } 4462 else { 4463 s = scan_const(PL_bufptr); 4464 if (*s == '\\') 4465 PL_lex_state = LEX_INTERPCASEMOD; 4466 else 4467 PL_lex_state = LEX_INTERPSTART; 4468 } 4469 4470 if (s != PL_bufptr) { 4471 start_force(PL_curforce); 4472 if (PL_madskills) { 4473 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr)); 4474 } 4475 NEXTVAL_NEXTTOKE = pl_yylval; 4476 PL_expect = XTERM; 4477 force_next(THING); 4478 if (PL_lex_starts++) { 4479 #ifdef PERL_MAD 4480 if (PL_madskills) { 4481 if (PL_thistoken) 4482 sv_free(PL_thistoken); 4483 PL_thistoken = newSVpvs(""); 4484 } 4485 #endif 4486 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 4487 if (!PL_lex_casemods && PL_lex_inpat) 4488 OPERATOR(','); 4489 else 4490 Aop(OP_CONCAT); 4491 } 4492 else { 4493 PL_bufptr = s; 4494 return yylex(); 4495 } 4496 } 4497 4498 return yylex(); 4499 case LEX_FORMLINE: 4500 PL_lex_state = LEX_NORMAL; 4501 s = scan_formline(PL_bufptr); 4502 if (!PL_lex_formbrack) 4503 goto rightbracket; 4504 OPERATOR(';'); 4505 } 4506 4507 s = PL_bufptr; 4508 PL_oldoldbufptr = PL_oldbufptr; 4509 PL_oldbufptr = s; 4510 4511 retry: 4512 #ifdef PERL_MAD 4513 if (PL_thistoken) { 4514 sv_free(PL_thistoken); 4515 PL_thistoken = 0; 4516 } 4517 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */ 4518 #endif 4519 switch (*s) { 4520 default: 4521 if (isIDFIRST_lazy_if(s,UTF)) 4522 goto keylookup; 4523 { 4524 unsigned char c = *s; 4525 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); 4526 if (len > UNRECOGNIZED_PRECEDE_COUNT) { 4527 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT; 4528 } else { 4529 d = PL_linestart; 4530 } 4531 *s = '\0'; 4532 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1); 4533 } 4534 case 4: 4535 case 26: 4536 goto fake_eof; /* emulate EOF on ^D or ^Z */ 4537 case 0: 4538 #ifdef PERL_MAD 4539 if (PL_madskills) 4540 PL_faketokens = 0; 4541 #endif 4542 if (!PL_rsfp) { 4543 PL_last_uni = 0; 4544 PL_last_lop = 0; 4545 if (PL_lex_brackets) { 4546 yyerror((const char *) 4547 (PL_lex_formbrack 4548 ? "Format not terminated" 4549 : "Missing right curly or square bracket")); 4550 } 4551 DEBUG_T( { PerlIO_printf(Perl_debug_log, 4552 "### Tokener got EOF\n"); 4553 } ); 4554 TOKEN(0); 4555 } 4556 if (s++ < PL_bufend) 4557 goto retry; /* ignore stray nulls */ 4558 PL_last_uni = 0; 4559 PL_last_lop = 0; 4560 if (!PL_in_eval && !PL_preambled) { 4561 PL_preambled = TRUE; 4562 #ifdef PERL_MAD 4563 if (PL_madskills) 4564 PL_faketokens = 1; 4565 #endif 4566 if (PL_perldb) { 4567 /* Generate a string of Perl code to load the debugger. 4568 * If PERL5DB is set, it will return the contents of that, 4569 * otherwise a compile-time require of perl5db.pl. */ 4570 4571 const char * const pdb = PerlEnv_getenv("PERL5DB"); 4572 4573 if (pdb) { 4574 sv_setpv(PL_linestr, pdb); 4575 sv_catpvs(PL_linestr,";"); 4576 } else { 4577 SETERRNO(0,SS_NORMAL); 4578 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };"); 4579 } 4580 } else 4581 sv_setpvs(PL_linestr,""); 4582 if (PL_preambleav) { 4583 SV **svp = AvARRAY(PL_preambleav); 4584 SV **const end = svp + AvFILLp(PL_preambleav); 4585 while(svp <= end) { 4586 sv_catsv(PL_linestr, *svp); 4587 ++svp; 4588 sv_catpvs(PL_linestr, ";"); 4589 } 4590 sv_free(MUTABLE_SV(PL_preambleav)); 4591 PL_preambleav = NULL; 4592 } 4593 if (PL_minus_E) 4594 sv_catpvs(PL_linestr, 4595 "use feature ':5." STRINGIFY(PERL_VERSION) "';"); 4596 if (PL_minus_n || PL_minus_p) { 4597 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/); 4598 if (PL_minus_l) 4599 sv_catpvs(PL_linestr,"chomp;"); 4600 if (PL_minus_a) { 4601 if (PL_minus_F) { 4602 if ((*PL_splitstr == '/' || *PL_splitstr == '\'' 4603 || *PL_splitstr == '"') 4604 && strchr(PL_splitstr + 1, *PL_splitstr)) 4605 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); 4606 else { 4607 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL 4608 bytes can be used as quoting characters. :-) */ 4609 const char *splits = PL_splitstr; 4610 sv_catpvs(PL_linestr, "our @F=split(q\0"); 4611 do { 4612 /* Need to \ \s */ 4613 if (*splits == '\\') 4614 sv_catpvn(PL_linestr, splits, 1); 4615 sv_catpvn(PL_linestr, splits, 1); 4616 } while (*splits++); 4617 /* This loop will embed the trailing NUL of 4618 PL_linestr as the last thing it does before 4619 terminating. */ 4620 sv_catpvs(PL_linestr, ");"); 4621 } 4622 } 4623 else 4624 sv_catpvs(PL_linestr,"our @F=split(' ');"); 4625 } 4626 } 4627 sv_catpvs(PL_linestr, "\n"); 4628 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 4629 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 4630 PL_last_lop = PL_last_uni = NULL; 4631 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash) 4632 update_debugger_info(PL_linestr, NULL, 0); 4633 goto retry; 4634 } 4635 do { 4636 fake_eof = 0; 4637 bof = PL_rsfp ? TRUE : FALSE; 4638 if (0) { 4639 fake_eof: 4640 fake_eof = LEX_FAKE_EOF; 4641 } 4642 PL_bufptr = PL_bufend; 4643 CopLINE_inc(PL_curcop); 4644 if (!lex_next_chunk(fake_eof)) { 4645 CopLINE_dec(PL_curcop); 4646 s = PL_bufptr; 4647 TOKEN(';'); /* not infinite loop because rsfp is NULL now */ 4648 } 4649 CopLINE_dec(PL_curcop); 4650 #ifdef PERL_MAD 4651 if (!PL_rsfp) 4652 PL_realtokenstart = -1; 4653 #endif 4654 s = PL_bufptr; 4655 /* If it looks like the start of a BOM or raw UTF-16, 4656 * check if it in fact is. */ 4657 if (bof && PL_rsfp && 4658 (*s == 0 || 4659 *(U8*)s == 0xEF || 4660 *(U8*)s >= 0xFE || 4661 s[1] == 0)) { 4662 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr); 4663 if (bof) { 4664 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 4665 s = swallow_bom((U8*)s); 4666 } 4667 } 4668 if (PL_doextract) { 4669 /* Incest with pod. */ 4670 #ifdef PERL_MAD 4671 if (PL_madskills) 4672 sv_catsv(PL_thiswhite, PL_linestr); 4673 #endif 4674 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) { 4675 sv_setpvs(PL_linestr, ""); 4676 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 4677 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 4678 PL_last_lop = PL_last_uni = NULL; 4679 PL_doextract = FALSE; 4680 } 4681 } 4682 if (PL_rsfp) 4683 incline(s); 4684 } while (PL_doextract); 4685 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; 4686 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 4687 PL_last_lop = PL_last_uni = NULL; 4688 if (CopLINE(PL_curcop) == 1) { 4689 while (s < PL_bufend && isSPACE(*s)) 4690 s++; 4691 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */ 4692 s++; 4693 #ifdef PERL_MAD 4694 if (PL_madskills) 4695 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart); 4696 #endif 4697 d = NULL; 4698 if (!PL_in_eval) { 4699 if (*s == '#' && *(s+1) == '!') 4700 d = s + 2; 4701 #ifdef ALTERNATE_SHEBANG 4702 else { 4703 static char const as[] = ALTERNATE_SHEBANG; 4704 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) 4705 d = s + (sizeof(as) - 1); 4706 } 4707 #endif /* ALTERNATE_SHEBANG */ 4708 } 4709 if (d) { 4710 char *ipath; 4711 char *ipathend; 4712 4713 while (isSPACE(*d)) 4714 d++; 4715 ipath = d; 4716 while (*d && !isSPACE(*d)) 4717 d++; 4718 ipathend = d; 4719 4720 #ifdef ARG_ZERO_IS_SCRIPT 4721 if (ipathend > ipath) { 4722 /* 4723 * HP-UX (at least) sets argv[0] to the script name, 4724 * which makes $^X incorrect. And Digital UNIX and Linux, 4725 * at least, set argv[0] to the basename of the Perl 4726 * interpreter. So, having found "#!", we'll set it right. 4727 */ 4728 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL, 4729 SVt_PV)); /* $^X */ 4730 assert(SvPOK(x) || SvGMAGICAL(x)); 4731 if (sv_eq(x, CopFILESV(PL_curcop))) { 4732 sv_setpvn(x, ipath, ipathend - ipath); 4733 SvSETMAGIC(x); 4734 } 4735 else { 4736 STRLEN blen; 4737 STRLEN llen; 4738 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen); 4739 const char * const lstart = SvPV_const(x,llen); 4740 if (llen < blen) { 4741 bstart += blen - llen; 4742 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { 4743 sv_setpvn(x, ipath, ipathend - ipath); 4744 SvSETMAGIC(x); 4745 } 4746 } 4747 } 4748 TAINT_NOT; /* $^X is always tainted, but that's OK */ 4749 } 4750 #endif /* ARG_ZERO_IS_SCRIPT */ 4751 4752 /* 4753 * Look for options. 4754 */ 4755 d = instr(s,"perl -"); 4756 if (!d) { 4757 d = instr(s,"perl"); 4758 #if defined(DOSISH) 4759 /* avoid getting into infinite loops when shebang 4760 * line contains "Perl" rather than "perl" */ 4761 if (!d) { 4762 for (d = ipathend-4; d >= ipath; --d) { 4763 if ((*d == 'p' || *d == 'P') 4764 && !ibcmp(d, "perl", 4)) 4765 { 4766 break; 4767 } 4768 } 4769 if (d < ipath) 4770 d = NULL; 4771 } 4772 #endif 4773 } 4774 #ifdef ALTERNATE_SHEBANG 4775 /* 4776 * If the ALTERNATE_SHEBANG on this system starts with a 4777 * character that can be part of a Perl expression, then if 4778 * we see it but not "perl", we're probably looking at the 4779 * start of Perl code, not a request to hand off to some 4780 * other interpreter. Similarly, if "perl" is there, but 4781 * not in the first 'word' of the line, we assume the line 4782 * contains the start of the Perl program. 4783 */ 4784 if (d && *s != '#') { 4785 const char *c = ipath; 4786 while (*c && !strchr("; \t\r\n\f\v#", *c)) 4787 c++; 4788 if (c < d) 4789 d = NULL; /* "perl" not in first word; ignore */ 4790 else 4791 *s = '#'; /* Don't try to parse shebang line */ 4792 } 4793 #endif /* ALTERNATE_SHEBANG */ 4794 if (!d && 4795 *s == '#' && 4796 ipathend > ipath && 4797 !PL_minus_c && 4798 !instr(s,"indir") && 4799 instr(PL_origargv[0],"perl")) 4800 { 4801 dVAR; 4802 char **newargv; 4803 4804 *ipathend = '\0'; 4805 s = ipathend + 1; 4806 while (s < PL_bufend && isSPACE(*s)) 4807 s++; 4808 if (s < PL_bufend) { 4809 Newx(newargv,PL_origargc+3,char*); 4810 newargv[1] = s; 4811 while (s < PL_bufend && !isSPACE(*s)) 4812 s++; 4813 *s = '\0'; 4814 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*); 4815 } 4816 else 4817 newargv = PL_origargv; 4818 newargv[0] = ipath; 4819 PERL_FPU_PRE_EXEC 4820 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); 4821 PERL_FPU_POST_EXEC 4822 Perl_croak(aTHX_ "Can't exec %s", ipath); 4823 } 4824 if (d) { 4825 while (*d && !isSPACE(*d)) 4826 d++; 4827 while (SPACE_OR_TAB(*d)) 4828 d++; 4829 4830 if (*d++ == '-') { 4831 const bool switches_done = PL_doswitches; 4832 const U32 oldpdb = PL_perldb; 4833 const bool oldn = PL_minus_n; 4834 const bool oldp = PL_minus_p; 4835 const char *d1 = d; 4836 4837 do { 4838 bool baduni = FALSE; 4839 if (*d1 == 'C') { 4840 const char *d2 = d1 + 1; 4841 if (parse_unicode_opts((const char **)&d2) 4842 != PL_unicode) 4843 baduni = TRUE; 4844 } 4845 if (baduni || *d1 == 'M' || *d1 == 'm') { 4846 const char * const m = d1; 4847 while (*d1 && !isSPACE(*d1)) 4848 d1++; 4849 Perl_croak(aTHX_ "Too late for \"-%.*s\" option", 4850 (int)(d1 - m), m); 4851 } 4852 d1 = moreswitches(d1); 4853 } while (d1); 4854 if (PL_doswitches && !switches_done) { 4855 int argc = PL_origargc; 4856 char **argv = PL_origargv; 4857 do { 4858 argc--,argv++; 4859 } while (argc && argv[0][0] == '-' && argv[0][1]); 4860 init_argv_symbols(argc,argv); 4861 } 4862 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) || 4863 ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) 4864 /* if we have already added "LINE: while (<>) {", 4865 we must not do it again */ 4866 { 4867 sv_setpvs(PL_linestr, ""); 4868 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 4869 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 4870 PL_last_lop = PL_last_uni = NULL; 4871 PL_preambled = FALSE; 4872 if (PERLDB_LINE || PERLDB_SAVESRC) 4873 (void)gv_fetchfile(PL_origfilename); 4874 goto retry; 4875 } 4876 } 4877 } 4878 } 4879 } 4880 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 4881 PL_bufptr = s; 4882 PL_lex_state = LEX_FORMLINE; 4883 return yylex(); 4884 } 4885 goto retry; 4886 case '\r': 4887 #ifdef PERL_STRICT_CR 4888 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); 4889 Perl_croak(aTHX_ 4890 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); 4891 #endif 4892 case ' ': case '\t': case '\f': case 013: 4893 #ifdef PERL_MAD 4894 PL_realtokenstart = -1; 4895 if (!PL_thiswhite) 4896 PL_thiswhite = newSVpvs(""); 4897 sv_catpvn(PL_thiswhite, s, 1); 4898 #endif 4899 s++; 4900 goto retry; 4901 case '#': 4902 case '\n': 4903 #ifdef PERL_MAD 4904 PL_realtokenstart = -1; 4905 if (PL_madskills) 4906 PL_faketokens = 0; 4907 #endif 4908 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) { 4909 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) { 4910 /* handle eval qq[#line 1 "foo"\n ...] */ 4911 CopLINE_dec(PL_curcop); 4912 incline(s); 4913 } 4914 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) { 4915 s = SKIPSPACE0(s); 4916 if (!PL_in_eval || PL_rsfp) 4917 incline(s); 4918 } 4919 else { 4920 d = s; 4921 while (d < PL_bufend && *d != '\n') 4922 d++; 4923 if (d < PL_bufend) 4924 d++; 4925 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */ 4926 Perl_croak(aTHX_ "panic: input overflow"); 4927 #ifdef PERL_MAD 4928 if (PL_madskills) 4929 PL_thiswhite = newSVpvn(s, d - s); 4930 #endif 4931 s = d; 4932 incline(s); 4933 } 4934 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 4935 PL_bufptr = s; 4936 PL_lex_state = LEX_FORMLINE; 4937 return yylex(); 4938 } 4939 } 4940 else { 4941 #ifdef PERL_MAD 4942 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) { 4943 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') { 4944 PL_faketokens = 0; 4945 s = SKIPSPACE0(s); 4946 TOKEN(PEG); /* make sure any #! line is accessible */ 4947 } 4948 s = SKIPSPACE0(s); 4949 } 4950 else { 4951 /* if (PL_madskills && PL_lex_formbrack) { */ 4952 d = s; 4953 while (d < PL_bufend && *d != '\n') 4954 d++; 4955 if (d < PL_bufend) 4956 d++; 4957 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */ 4958 Perl_croak(aTHX_ "panic: input overflow"); 4959 if (PL_madskills && CopLINE(PL_curcop) >= 1) { 4960 if (!PL_thiswhite) 4961 PL_thiswhite = newSVpvs(""); 4962 if (CopLINE(PL_curcop) == 1) { 4963 sv_setpvs(PL_thiswhite, ""); 4964 PL_faketokens = 0; 4965 } 4966 sv_catpvn(PL_thiswhite, s, d - s); 4967 } 4968 s = d; 4969 /* } 4970 *s = '\0'; 4971 PL_bufend = s; */ 4972 } 4973 #else 4974 *s = '\0'; 4975 PL_bufend = s; 4976 #endif 4977 } 4978 goto retry; 4979 case '-': 4980 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) { 4981 I32 ftst = 0; 4982 char tmp; 4983 4984 s++; 4985 PL_bufptr = s; 4986 tmp = *s++; 4987 4988 while (s < PL_bufend && SPACE_OR_TAB(*s)) 4989 s++; 4990 4991 if (strnEQ(s,"=>",2)) { 4992 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE); 4993 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); 4994 OPERATOR('-'); /* unary minus */ 4995 } 4996 PL_last_uni = PL_oldbufptr; 4997 switch (tmp) { 4998 case 'r': ftst = OP_FTEREAD; break; 4999 case 'w': ftst = OP_FTEWRITE; break; 5000 case 'x': ftst = OP_FTEEXEC; break; 5001 case 'o': ftst = OP_FTEOWNED; break; 5002 case 'R': ftst = OP_FTRREAD; break; 5003 case 'W': ftst = OP_FTRWRITE; break; 5004 case 'X': ftst = OP_FTREXEC; break; 5005 case 'O': ftst = OP_FTROWNED; break; 5006 case 'e': ftst = OP_FTIS; break; 5007 case 'z': ftst = OP_FTZERO; break; 5008 case 's': ftst = OP_FTSIZE; break; 5009 case 'f': ftst = OP_FTFILE; break; 5010 case 'd': ftst = OP_FTDIR; break; 5011 case 'l': ftst = OP_FTLINK; break; 5012 case 'p': ftst = OP_FTPIPE; break; 5013 case 'S': ftst = OP_FTSOCK; break; 5014 case 'u': ftst = OP_FTSUID; break; 5015 case 'g': ftst = OP_FTSGID; break; 5016 case 'k': ftst = OP_FTSVTX; break; 5017 case 'b': ftst = OP_FTBLK; break; 5018 case 'c': ftst = OP_FTCHR; break; 5019 case 't': ftst = OP_FTTTY; break; 5020 case 'T': ftst = OP_FTTEXT; break; 5021 case 'B': ftst = OP_FTBINARY; break; 5022 case 'M': case 'A': case 'C': 5023 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV); 5024 switch (tmp) { 5025 case 'M': ftst = OP_FTMTIME; break; 5026 case 'A': ftst = OP_FTATIME; break; 5027 case 'C': ftst = OP_FTCTIME; break; 5028 default: break; 5029 } 5030 break; 5031 default: 5032 break; 5033 } 5034 if (ftst) { 5035 PL_last_lop_op = (OPCODE)ftst; 5036 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5037 "### Saw file test %c\n", (int)tmp); 5038 } ); 5039 FTST(ftst); 5040 } 5041 else { 5042 /* Assume it was a minus followed by a one-letter named 5043 * subroutine call (or a -bareword), then. */ 5044 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5045 "### '-%c' looked like a file test but was not\n", 5046 (int) tmp); 5047 } ); 5048 s = --PL_bufptr; 5049 } 5050 } 5051 { 5052 const char tmp = *s++; 5053 if (*s == tmp) { 5054 s++; 5055 if (PL_expect == XOPERATOR) 5056 TERM(POSTDEC); 5057 else 5058 OPERATOR(PREDEC); 5059 } 5060 else if (*s == '>') { 5061 s++; 5062 s = SKIPSPACE1(s); 5063 if (isIDFIRST_lazy_if(s,UTF)) { 5064 s = force_word(s,METHOD,FALSE,TRUE,FALSE); 5065 TOKEN(ARROW); 5066 } 5067 else if (*s == '$') 5068 OPERATOR(ARROW); 5069 else 5070 TERM(ARROW); 5071 } 5072 if (PL_expect == XOPERATOR) 5073 Aop(OP_SUBTRACT); 5074 else { 5075 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5076 check_uni(); 5077 OPERATOR('-'); /* unary minus */ 5078 } 5079 } 5080 5081 case '+': 5082 { 5083 const char tmp = *s++; 5084 if (*s == tmp) { 5085 s++; 5086 if (PL_expect == XOPERATOR) 5087 TERM(POSTINC); 5088 else 5089 OPERATOR(PREINC); 5090 } 5091 if (PL_expect == XOPERATOR) 5092 Aop(OP_ADD); 5093 else { 5094 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5095 check_uni(); 5096 OPERATOR('+'); 5097 } 5098 } 5099 5100 case '*': 5101 if (PL_expect != XOPERATOR) { 5102 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 5103 PL_expect = XOPERATOR; 5104 force_ident(PL_tokenbuf, '*'); 5105 if (!*PL_tokenbuf) 5106 PREREF('*'); 5107 TERM('*'); 5108 } 5109 s++; 5110 if (*s == '*') { 5111 s++; 5112 PWop(OP_POW); 5113 } 5114 Mop(OP_MULTIPLY); 5115 5116 case '%': 5117 if (PL_expect == XOPERATOR) { 5118 ++s; 5119 Mop(OP_MODULO); 5120 } 5121 PL_tokenbuf[0] = '%'; 5122 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, 5123 sizeof PL_tokenbuf - 1, FALSE); 5124 if (!PL_tokenbuf[1]) { 5125 PREREF('%'); 5126 } 5127 PL_pending_ident = '%'; 5128 TERM('%'); 5129 5130 case '^': 5131 s++; 5132 BOop(OP_BIT_XOR); 5133 case '[': 5134 PL_lex_brackets++; 5135 { 5136 const char tmp = *s++; 5137 OPERATOR(tmp); 5138 } 5139 case '~': 5140 if (s[1] == '~' 5141 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) 5142 { 5143 s += 2; 5144 Eop(OP_SMARTMATCH); 5145 } 5146 case ',': 5147 { 5148 const char tmp = *s++; 5149 OPERATOR(tmp); 5150 } 5151 case ':': 5152 if (s[1] == ':') { 5153 len = 0; 5154 goto just_a_word_zero_gv; 5155 } 5156 s++; 5157 switch (PL_expect) { 5158 OP *attrs; 5159 #ifdef PERL_MAD 5160 I32 stuffstart; 5161 #endif 5162 case XOPERATOR: 5163 if (!PL_in_my || PL_lex_state != LEX_NORMAL) 5164 break; 5165 PL_bufptr = s; /* update in case we back off */ 5166 if (*s == '=') { 5167 deprecate(":= for an empty attribute list"); 5168 } 5169 goto grabattrs; 5170 case XATTRBLOCK: 5171 PL_expect = XBLOCK; 5172 goto grabattrs; 5173 case XATTRTERM: 5174 PL_expect = XTERMBLOCK; 5175 grabattrs: 5176 #ifdef PERL_MAD 5177 stuffstart = s - SvPVX(PL_linestr) - 1; 5178 #endif 5179 s = PEEKSPACE(s); 5180 attrs = NULL; 5181 while (isIDFIRST_lazy_if(s,UTF)) { 5182 I32 tmp; 5183 SV *sv; 5184 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 5185 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) { 5186 if (tmp < 0) tmp = -tmp; 5187 switch (tmp) { 5188 case KEY_or: 5189 case KEY_and: 5190 case KEY_for: 5191 case KEY_foreach: 5192 case KEY_unless: 5193 case KEY_if: 5194 case KEY_while: 5195 case KEY_until: 5196 goto got_attrs; 5197 default: 5198 break; 5199 } 5200 } 5201 sv = newSVpvn(s, len); 5202 if (*d == '(') { 5203 d = scan_str(d,TRUE,TRUE); 5204 if (!d) { 5205 /* MUST advance bufptr here to avoid bogus 5206 "at end of line" context messages from yyerror(). 5207 */ 5208 PL_bufptr = s + len; 5209 yyerror("Unterminated attribute parameter in attribute list"); 5210 if (attrs) 5211 op_free(attrs); 5212 sv_free(sv); 5213 return REPORT(0); /* EOF indicator */ 5214 } 5215 } 5216 if (PL_lex_stuff) { 5217 sv_catsv(sv, PL_lex_stuff); 5218 attrs = append_elem(OP_LIST, attrs, 5219 newSVOP(OP_CONST, 0, sv)); 5220 SvREFCNT_dec(PL_lex_stuff); 5221 PL_lex_stuff = NULL; 5222 } 5223 else { 5224 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) { 5225 sv_free(sv); 5226 if (PL_in_my == KEY_our) { 5227 deprecate(":unique"); 5228 } 5229 else 5230 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables"); 5231 } 5232 5233 /* NOTE: any CV attrs applied here need to be part of 5234 the CVf_BUILTIN_ATTRS define in cv.h! */ 5235 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) { 5236 sv_free(sv); 5237 CvLVALUE_on(PL_compcv); 5238 } 5239 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) { 5240 sv_free(sv); 5241 deprecate(":locked"); 5242 } 5243 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) { 5244 sv_free(sv); 5245 CvMETHOD_on(PL_compcv); 5246 } 5247 /* After we've set the flags, it could be argued that 5248 we don't need to do the attributes.pm-based setting 5249 process, and shouldn't bother appending recognized 5250 flags. To experiment with that, uncomment the 5251 following "else". (Note that's already been 5252 uncommented. That keeps the above-applied built-in 5253 attributes from being intercepted (and possibly 5254 rejected) by a package's attribute routines, but is 5255 justified by the performance win for the common case 5256 of applying only built-in attributes.) */ 5257 else 5258 attrs = append_elem(OP_LIST, attrs, 5259 newSVOP(OP_CONST, 0, 5260 sv)); 5261 } 5262 s = PEEKSPACE(d); 5263 if (*s == ':' && s[1] != ':') 5264 s = PEEKSPACE(s+1); 5265 else if (s == d) 5266 break; /* require real whitespace or :'s */ 5267 /* XXX losing whitespace on sequential attributes here */ 5268 } 5269 { 5270 const char tmp 5271 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */ 5272 if (*s != ';' && *s != '}' && *s != tmp 5273 && (tmp != '=' || *s != ')')) { 5274 const char q = ((*s == '\'') ? '"' : '\''); 5275 /* If here for an expression, and parsed no attrs, back 5276 off. */ 5277 if (tmp == '=' && !attrs) { 5278 s = PL_bufptr; 5279 break; 5280 } 5281 /* MUST advance bufptr here to avoid bogus "at end of line" 5282 context messages from yyerror(). 5283 */ 5284 PL_bufptr = s; 5285 yyerror( (const char *) 5286 (*s 5287 ? Perl_form(aTHX_ "Invalid separator character " 5288 "%c%c%c in attribute list", q, *s, q) 5289 : "Unterminated attribute list" ) ); 5290 if (attrs) 5291 op_free(attrs); 5292 OPERATOR(':'); 5293 } 5294 } 5295 got_attrs: 5296 if (attrs) { 5297 start_force(PL_curforce); 5298 NEXTVAL_NEXTTOKE.opval = attrs; 5299 CURMAD('_', PL_nextwhite); 5300 force_next(THING); 5301 } 5302 #ifdef PERL_MAD 5303 if (PL_madskills) { 5304 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart, 5305 (s - SvPVX(PL_linestr)) - stuffstart); 5306 } 5307 #endif 5308 TOKEN(COLONATTR); 5309 } 5310 OPERATOR(':'); 5311 case '(': 5312 s++; 5313 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr) 5314 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ 5315 else 5316 PL_expect = XTERM; 5317 s = SKIPSPACE1(s); 5318 TOKEN('('); 5319 case ';': 5320 CLINE; 5321 { 5322 const char tmp = *s++; 5323 OPERATOR(tmp); 5324 } 5325 case ')': 5326 { 5327 const char tmp = *s++; 5328 s = SKIPSPACE1(s); 5329 if (*s == '{') 5330 PREBLOCK(tmp); 5331 TERM(tmp); 5332 } 5333 case ']': 5334 s++; 5335 if (PL_lex_brackets <= 0) 5336 yyerror("Unmatched right square bracket"); 5337 else 5338 --PL_lex_brackets; 5339 if (PL_lex_state == LEX_INTERPNORMAL) { 5340 if (PL_lex_brackets == 0) { 5341 if (*s == '-' && s[1] == '>') 5342 PL_lex_state = LEX_INTERPENDMAYBE; 5343 else if (*s != '[' && *s != '{') 5344 PL_lex_state = LEX_INTERPEND; 5345 } 5346 } 5347 TERM(']'); 5348 case '{': 5349 leftbracket: 5350 s++; 5351 if (PL_lex_brackets > 100) { 5352 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 5353 } 5354 switch (PL_expect) { 5355 case XTERM: 5356 if (PL_lex_formbrack) { 5357 s--; 5358 PRETERMBLOCK(DO); 5359 } 5360 if (PL_oldoldbufptr == PL_last_lop) 5361 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 5362 else 5363 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 5364 OPERATOR(HASHBRACK); 5365 case XOPERATOR: 5366 while (s < PL_bufend && SPACE_OR_TAB(*s)) 5367 s++; 5368 d = s; 5369 PL_tokenbuf[0] = '\0'; 5370 if (d < PL_bufend && *d == '-') { 5371 PL_tokenbuf[0] = '-'; 5372 d++; 5373 while (d < PL_bufend && SPACE_OR_TAB(*d)) 5374 d++; 5375 } 5376 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) { 5377 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 5378 FALSE, &len); 5379 while (d < PL_bufend && SPACE_OR_TAB(*d)) 5380 d++; 5381 if (*d == '}') { 5382 const char minus = (PL_tokenbuf[0] == '-'); 5383 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE); 5384 if (minus) 5385 force_next('-'); 5386 } 5387 } 5388 /* FALL THROUGH */ 5389 case XATTRBLOCK: 5390 case XBLOCK: 5391 PL_lex_brackstack[PL_lex_brackets++] = XSTATE; 5392 PL_expect = XSTATE; 5393 break; 5394 case XATTRTERM: 5395 case XTERMBLOCK: 5396 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 5397 PL_expect = XSTATE; 5398 break; 5399 default: { 5400 const char *t; 5401 if (PL_oldoldbufptr == PL_last_lop) 5402 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 5403 else 5404 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 5405 s = SKIPSPACE1(s); 5406 if (*s == '}') { 5407 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { 5408 PL_expect = XTERM; 5409 /* This hack is to get the ${} in the message. */ 5410 PL_bufptr = s+1; 5411 yyerror("syntax error"); 5412 break; 5413 } 5414 OPERATOR(HASHBRACK); 5415 } 5416 /* This hack serves to disambiguate a pair of curlies 5417 * as being a block or an anon hash. Normally, expectation 5418 * determines that, but in cases where we're not in a 5419 * position to expect anything in particular (like inside 5420 * eval"") we have to resolve the ambiguity. This code 5421 * covers the case where the first term in the curlies is a 5422 * quoted string. Most other cases need to be explicitly 5423 * disambiguated by prepending a "+" before the opening 5424 * curly in order to force resolution as an anon hash. 5425 * 5426 * XXX should probably propagate the outer expectation 5427 * into eval"" to rely less on this hack, but that could 5428 * potentially break current behavior of eval"". 5429 * GSAR 97-07-21 5430 */ 5431 t = s; 5432 if (*s == '\'' || *s == '"' || *s == '`') { 5433 /* common case: get past first string, handling escapes */ 5434 for (t++; t < PL_bufend && *t != *s;) 5435 if (*t++ == '\\' && (*t == '\\' || *t == *s)) 5436 t++; 5437 t++; 5438 } 5439 else if (*s == 'q') { 5440 if (++t < PL_bufend 5441 && (!isALNUM(*t) 5442 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend 5443 && !isALNUM(*t)))) 5444 { 5445 /* skip q//-like construct */ 5446 const char *tmps; 5447 char open, close, term; 5448 I32 brackets = 1; 5449 5450 while (t < PL_bufend && isSPACE(*t)) 5451 t++; 5452 /* check for q => */ 5453 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') { 5454 OPERATOR(HASHBRACK); 5455 } 5456 term = *t; 5457 open = term; 5458 if (term && (tmps = strchr("([{< )]}> )]}>",term))) 5459 term = tmps[5]; 5460 close = term; 5461 if (open == close) 5462 for (t++; t < PL_bufend; t++) { 5463 if (*t == '\\' && t+1 < PL_bufend && open != '\\') 5464 t++; 5465 else if (*t == open) 5466 break; 5467 } 5468 else { 5469 for (t++; t < PL_bufend; t++) { 5470 if (*t == '\\' && t+1 < PL_bufend) 5471 t++; 5472 else if (*t == close && --brackets <= 0) 5473 break; 5474 else if (*t == open) 5475 brackets++; 5476 } 5477 } 5478 t++; 5479 } 5480 else 5481 /* skip plain q word */ 5482 while (t < PL_bufend && isALNUM_lazy_if(t,UTF)) 5483 t += UTF8SKIP(t); 5484 } 5485 else if (isALNUM_lazy_if(t,UTF)) { 5486 t += UTF8SKIP(t); 5487 while (t < PL_bufend && isALNUM_lazy_if(t,UTF)) 5488 t += UTF8SKIP(t); 5489 } 5490 while (t < PL_bufend && isSPACE(*t)) 5491 t++; 5492 /* if comma follows first term, call it an anon hash */ 5493 /* XXX it could be a comma expression with loop modifiers */ 5494 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s))) 5495 || (*t == '=' && t[1] == '>'))) 5496 OPERATOR(HASHBRACK); 5497 if (PL_expect == XREF) 5498 PL_expect = XTERM; 5499 else { 5500 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE; 5501 PL_expect = XSTATE; 5502 } 5503 } 5504 break; 5505 } 5506 pl_yylval.ival = CopLINE(PL_curcop); 5507 if (isSPACE(*s) || *s == '#') 5508 PL_copline = NOLINE; /* invalidate current command line number */ 5509 TOKEN('{'); 5510 case '}': 5511 rightbracket: 5512 s++; 5513 if (PL_lex_brackets <= 0) 5514 yyerror("Unmatched right curly bracket"); 5515 else 5516 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets]; 5517 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL) 5518 PL_lex_formbrack = 0; 5519 if (PL_lex_state == LEX_INTERPNORMAL) { 5520 if (PL_lex_brackets == 0) { 5521 if (PL_expect & XFAKEBRACK) { 5522 PL_expect &= XENUMMASK; 5523 PL_lex_state = LEX_INTERPEND; 5524 PL_bufptr = s; 5525 #if 0 5526 if (PL_madskills) { 5527 if (!PL_thiswhite) 5528 PL_thiswhite = newSVpvs(""); 5529 sv_catpvs(PL_thiswhite,"}"); 5530 } 5531 #endif 5532 return yylex(); /* ignore fake brackets */ 5533 } 5534 if (*s == '-' && s[1] == '>') 5535 PL_lex_state = LEX_INTERPENDMAYBE; 5536 else if (*s != '[' && *s != '{') 5537 PL_lex_state = LEX_INTERPEND; 5538 } 5539 } 5540 if (PL_expect & XFAKEBRACK) { 5541 PL_expect &= XENUMMASK; 5542 PL_bufptr = s; 5543 return yylex(); /* ignore fake brackets */ 5544 } 5545 start_force(PL_curforce); 5546 if (PL_madskills) { 5547 curmad('X', newSVpvn(s-1,1)); 5548 CURMAD('_', PL_thiswhite); 5549 } 5550 force_next('}'); 5551 #ifdef PERL_MAD 5552 if (!PL_thistoken) 5553 PL_thistoken = newSVpvs(""); 5554 #endif 5555 TOKEN(';'); 5556 case '&': 5557 s++; 5558 if (*s++ == '&') 5559 AOPERATOR(ANDAND); 5560 s--; 5561 if (PL_expect == XOPERATOR) { 5562 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON) 5563 && isIDFIRST_lazy_if(s,UTF)) 5564 { 5565 CopLINE_dec(PL_curcop); 5566 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 5567 CopLINE_inc(PL_curcop); 5568 } 5569 BAop(OP_BIT_AND); 5570 } 5571 5572 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 5573 if (*PL_tokenbuf) { 5574 PL_expect = XOPERATOR; 5575 force_ident(PL_tokenbuf, '&'); 5576 } 5577 else 5578 PREREF('&'); 5579 pl_yylval.ival = (OPpENTERSUB_AMPER<<8); 5580 TERM('&'); 5581 5582 case '|': 5583 s++; 5584 if (*s++ == '|') 5585 AOPERATOR(OROR); 5586 s--; 5587 BOop(OP_BIT_OR); 5588 case '=': 5589 s++; 5590 { 5591 const char tmp = *s++; 5592 if (tmp == '=') 5593 Eop(OP_EQ); 5594 if (tmp == '>') 5595 OPERATOR(','); 5596 if (tmp == '~') 5597 PMop(OP_MATCH); 5598 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX) 5599 && strchr("+-*/%.^&|<",tmp)) 5600 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5601 "Reversed %c= operator",(int)tmp); 5602 s--; 5603 if (PL_expect == XSTATE && isALPHA(tmp) && 5604 (s == PL_linestart+1 || s[-2] == '\n') ) 5605 { 5606 if (PL_in_eval && !PL_rsfp) { 5607 d = PL_bufend; 5608 while (s < d) { 5609 if (*s++ == '\n') { 5610 incline(s); 5611 if (strnEQ(s,"=cut",4)) { 5612 s = strchr(s,'\n'); 5613 if (s) 5614 s++; 5615 else 5616 s = d; 5617 incline(s); 5618 goto retry; 5619 } 5620 } 5621 } 5622 goto retry; 5623 } 5624 #ifdef PERL_MAD 5625 if (PL_madskills) { 5626 if (!PL_thiswhite) 5627 PL_thiswhite = newSVpvs(""); 5628 sv_catpvn(PL_thiswhite, PL_linestart, 5629 PL_bufend - PL_linestart); 5630 } 5631 #endif 5632 s = PL_bufend; 5633 PL_doextract = TRUE; 5634 goto retry; 5635 } 5636 } 5637 if (PL_lex_brackets < PL_lex_formbrack) { 5638 const char *t = s; 5639 #ifdef PERL_STRICT_CR 5640 while (SPACE_OR_TAB(*t)) 5641 #else 5642 while (SPACE_OR_TAB(*t) || *t == '\r') 5643 #endif 5644 t++; 5645 if (*t == '\n' || *t == '#') { 5646 s--; 5647 PL_expect = XBLOCK; 5648 goto leftbracket; 5649 } 5650 } 5651 pl_yylval.ival = 0; 5652 OPERATOR(ASSIGNOP); 5653 case '!': 5654 s++; 5655 { 5656 const char tmp = *s++; 5657 if (tmp == '=') { 5658 /* was this !=~ where !~ was meant? 5659 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */ 5660 5661 if (*s == '~' && ckWARN(WARN_SYNTAX)) { 5662 const char *t = s+1; 5663 5664 while (t < PL_bufend && isSPACE(*t)) 5665 ++t; 5666 5667 if (*t == '/' || *t == '?' || 5668 ((*t == 'm' || *t == 's' || *t == 'y') 5669 && !isALNUM(t[1])) || 5670 (*t == 't' && t[1] == 'r' && !isALNUM(t[2]))) 5671 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5672 "!=~ should be !~"); 5673 } 5674 Eop(OP_NE); 5675 } 5676 if (tmp == '~') 5677 PMop(OP_NOT); 5678 } 5679 s--; 5680 OPERATOR('!'); 5681 case '<': 5682 if (PL_expect != XOPERATOR) { 5683 if (s[1] != '<' && !strchr(s,'>')) 5684 check_uni(); 5685 if (s[1] == '<') 5686 s = scan_heredoc(s); 5687 else 5688 s = scan_inputsymbol(s); 5689 TERM(sublex_start()); 5690 } 5691 s++; 5692 { 5693 char tmp = *s++; 5694 if (tmp == '<') 5695 SHop(OP_LEFT_SHIFT); 5696 if (tmp == '=') { 5697 tmp = *s++; 5698 if (tmp == '>') 5699 Eop(OP_NCMP); 5700 s--; 5701 Rop(OP_LE); 5702 } 5703 } 5704 s--; 5705 Rop(OP_LT); 5706 case '>': 5707 s++; 5708 { 5709 const char tmp = *s++; 5710 if (tmp == '>') 5711 SHop(OP_RIGHT_SHIFT); 5712 else if (tmp == '=') 5713 Rop(OP_GE); 5714 } 5715 s--; 5716 Rop(OP_GT); 5717 5718 case '$': 5719 CLINE; 5720 5721 if (PL_expect == XOPERATOR) { 5722 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 5723 return deprecate_commaless_var_list(); 5724 } 5725 } 5726 5727 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) { 5728 PL_tokenbuf[0] = '@'; 5729 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1, 5730 sizeof PL_tokenbuf - 1, FALSE); 5731 if (PL_expect == XOPERATOR) 5732 no_op("Array length", s); 5733 if (!PL_tokenbuf[1]) 5734 PREREF(DOLSHARP); 5735 PL_expect = XOPERATOR; 5736 PL_pending_ident = '#'; 5737 TOKEN(DOLSHARP); 5738 } 5739 5740 PL_tokenbuf[0] = '$'; 5741 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, 5742 sizeof PL_tokenbuf - 1, FALSE); 5743 if (PL_expect == XOPERATOR) 5744 no_op("Scalar", s); 5745 if (!PL_tokenbuf[1]) { 5746 if (s == PL_bufend) 5747 yyerror("Final $ should be \\$ or $name"); 5748 PREREF('$'); 5749 } 5750 5751 /* This kludge not intended to be bulletproof. */ 5752 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) { 5753 pl_yylval.opval = newSVOP(OP_CONST, 0, 5754 newSViv(CopARYBASE_get(&PL_compiling))); 5755 pl_yylval.opval->op_private = OPpCONST_ARYBASE; 5756 TERM(THING); 5757 } 5758 5759 d = s; 5760 { 5761 const char tmp = *s; 5762 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) 5763 s = SKIPSPACE1(s); 5764 5765 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 5766 && intuit_more(s)) { 5767 if (*s == '[') { 5768 PL_tokenbuf[0] = '@'; 5769 if (ckWARN(WARN_SYNTAX)) { 5770 char *t = s+1; 5771 5772 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$') 5773 t++; 5774 if (*t++ == ',') { 5775 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */ 5776 while (t < PL_bufend && *t != ']') 5777 t++; 5778 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5779 "Multidimensional syntax %.*s not supported", 5780 (int)((t - PL_bufptr) + 1), PL_bufptr); 5781 } 5782 } 5783 } 5784 else if (*s == '{') { 5785 char *t; 5786 PL_tokenbuf[0] = '%'; 5787 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX) 5788 && (t = strchr(s, '}')) && (t = strchr(t, '='))) 5789 { 5790 char tmpbuf[sizeof PL_tokenbuf]; 5791 do { 5792 t++; 5793 } while (isSPACE(*t)); 5794 if (isIDFIRST_lazy_if(t,UTF)) { 5795 STRLEN len; 5796 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, 5797 &len); 5798 while (isSPACE(*t)) 5799 t++; 5800 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0)) 5801 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5802 "You need to quote \"%s\"", 5803 tmpbuf); 5804 } 5805 } 5806 } 5807 } 5808 5809 PL_expect = XOPERATOR; 5810 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) { 5811 const bool islop = (PL_last_lop == PL_oldoldbufptr); 5812 if (!islop || PL_last_lop_op == OP_GREPSTART) 5813 PL_expect = XOPERATOR; 5814 else if (strchr("$@\"'`q", *s)) 5815 PL_expect = XTERM; /* e.g. print $fh "foo" */ 5816 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF)) 5817 PL_expect = XTERM; /* e.g. print $fh &sub */ 5818 else if (isIDFIRST_lazy_if(s,UTF)) { 5819 char tmpbuf[sizeof PL_tokenbuf]; 5820 int t2; 5821 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 5822 if ((t2 = keyword(tmpbuf, len, 0))) { 5823 /* binary operators exclude handle interpretations */ 5824 switch (t2) { 5825 case -KEY_x: 5826 case -KEY_eq: 5827 case -KEY_ne: 5828 case -KEY_gt: 5829 case -KEY_lt: 5830 case -KEY_ge: 5831 case -KEY_le: 5832 case -KEY_cmp: 5833 break; 5834 default: 5835 PL_expect = XTERM; /* e.g. print $fh length() */ 5836 break; 5837 } 5838 } 5839 else { 5840 PL_expect = XTERM; /* e.g. print $fh subr() */ 5841 } 5842 } 5843 else if (isDIGIT(*s)) 5844 PL_expect = XTERM; /* e.g. print $fh 3 */ 5845 else if (*s == '.' && isDIGIT(s[1])) 5846 PL_expect = XTERM; /* e.g. print $fh .3 */ 5847 else if ((*s == '?' || *s == '-' || *s == '+') 5848 && !isSPACE(s[1]) && s[1] != '=') 5849 PL_expect = XTERM; /* e.g. print $fh -1 */ 5850 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' 5851 && s[1] != '/') 5852 PL_expect = XTERM; /* e.g. print $fh /.../ 5853 XXX except DORDOR operator 5854 */ 5855 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) 5856 && s[2] != '=') 5857 PL_expect = XTERM; /* print $fh <<"EOF" */ 5858 } 5859 } 5860 PL_pending_ident = '$'; 5861 TOKEN('$'); 5862 5863 case '@': 5864 if (PL_expect == XOPERATOR) 5865 no_op("Array", s); 5866 PL_tokenbuf[0] = '@'; 5867 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 5868 if (!PL_tokenbuf[1]) { 5869 PREREF('@'); 5870 } 5871 if (PL_lex_state == LEX_NORMAL) 5872 s = SKIPSPACE1(s); 5873 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { 5874 if (*s == '{') 5875 PL_tokenbuf[0] = '%'; 5876 5877 /* Warn about @ where they meant $. */ 5878 if (*s == '[' || *s == '{') { 5879 if (ckWARN(WARN_SYNTAX)) { 5880 const char *t = s + 1; 5881 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t))) 5882 t++; 5883 if (*t == '}' || *t == ']') { 5884 t++; 5885 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */ 5886 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5887 "Scalar value %.*s better written as $%.*s", 5888 (int)(t-PL_bufptr), PL_bufptr, 5889 (int)(t-PL_bufptr-1), PL_bufptr+1); 5890 } 5891 } 5892 } 5893 } 5894 PL_pending_ident = '@'; 5895 TERM('@'); 5896 5897 case '/': /* may be division, defined-or, or pattern */ 5898 if (PL_expect == XTERMORDORDOR && s[1] == '/') { 5899 s += 2; 5900 AOPERATOR(DORDOR); 5901 } 5902 case '?': /* may either be conditional or pattern */ 5903 if (PL_expect == XOPERATOR) { 5904 char tmp = *s++; 5905 if(tmp == '?') { 5906 OPERATOR('?'); 5907 } 5908 else { 5909 tmp = *s++; 5910 if(tmp == '/') { 5911 /* A // operator. */ 5912 AOPERATOR(DORDOR); 5913 } 5914 else { 5915 s--; 5916 Mop(OP_DIVIDE); 5917 } 5918 } 5919 } 5920 else { 5921 /* Disable warning on "study /blah/" */ 5922 if (PL_oldoldbufptr == PL_last_uni 5923 && (*PL_last_uni != 's' || s - PL_last_uni < 5 5924 || memNE(PL_last_uni, "study", 5) 5925 || isALNUM_lazy_if(PL_last_uni+5,UTF) 5926 )) 5927 check_uni(); 5928 s = scan_pat(s,OP_MATCH); 5929 TERM(sublex_start()); 5930 } 5931 5932 case '.': 5933 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack 5934 #ifdef PERL_STRICT_CR 5935 && s[1] == '\n' 5936 #else 5937 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) 5938 #endif 5939 && (s == PL_linestart || s[-1] == '\n') ) 5940 { 5941 PL_lex_formbrack = 0; 5942 PL_expect = XSTATE; 5943 goto rightbracket; 5944 } 5945 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { 5946 s += 3; 5947 OPERATOR(YADAYADA); 5948 } 5949 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { 5950 char tmp = *s++; 5951 if (*s == tmp) { 5952 s++; 5953 if (*s == tmp) { 5954 s++; 5955 pl_yylval.ival = OPf_SPECIAL; 5956 } 5957 else 5958 pl_yylval.ival = 0; 5959 OPERATOR(DOTDOT); 5960 } 5961 Aop(OP_CONCAT); 5962 } 5963 /* FALL THROUGH */ 5964 case '0': case '1': case '2': case '3': case '4': 5965 case '5': case '6': case '7': case '8': case '9': 5966 s = scan_num(s, &pl_yylval); 5967 DEBUG_T( { printbuf("### Saw number in %s\n", s); } ); 5968 if (PL_expect == XOPERATOR) 5969 no_op("Number",s); 5970 TERM(THING); 5971 5972 case '\'': 5973 s = scan_str(s,!!PL_madskills,FALSE); 5974 DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); 5975 if (PL_expect == XOPERATOR) { 5976 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 5977 return deprecate_commaless_var_list(); 5978 } 5979 else 5980 no_op("String",s); 5981 } 5982 if (!s) 5983 missingterm(NULL); 5984 pl_yylval.ival = OP_CONST; 5985 TERM(sublex_start()); 5986 5987 case '"': 5988 s = scan_str(s,!!PL_madskills,FALSE); 5989 DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); 5990 if (PL_expect == XOPERATOR) { 5991 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 5992 return deprecate_commaless_var_list(); 5993 } 5994 else 5995 no_op("String",s); 5996 } 5997 if (!s) 5998 missingterm(NULL); 5999 pl_yylval.ival = OP_CONST; 6000 /* FIXME. I think that this can be const if char *d is replaced by 6001 more localised variables. */ 6002 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) { 6003 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) { 6004 pl_yylval.ival = OP_STRINGIFY; 6005 break; 6006 } 6007 } 6008 TERM(sublex_start()); 6009 6010 case '`': 6011 s = scan_str(s,!!PL_madskills,FALSE); 6012 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } ); 6013 if (PL_expect == XOPERATOR) 6014 no_op("Backticks",s); 6015 if (!s) 6016 missingterm(NULL); 6017 readpipe_override(); 6018 TERM(sublex_start()); 6019 6020 case '\\': 6021 s++; 6022 if (PL_lex_inwhat && isDIGIT(*s)) 6023 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", 6024 *s, *s); 6025 if (PL_expect == XOPERATOR) 6026 no_op("Backslash",s); 6027 OPERATOR(REFGEN); 6028 6029 case 'v': 6030 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { 6031 char *start = s + 2; 6032 while (isDIGIT(*start) || *start == '_') 6033 start++; 6034 if (*start == '.' && isDIGIT(start[1])) { 6035 s = scan_num(s, &pl_yylval); 6036 TERM(THING); 6037 } 6038 /* avoid v123abc() or $h{v1}, allow C<print v10;> */ 6039 else if (!isALPHA(*start) && (PL_expect == XTERM 6040 || PL_expect == XREF || PL_expect == XSTATE 6041 || PL_expect == XTERMORDORDOR)) { 6042 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV); 6043 if (!gv) { 6044 s = scan_num(s, &pl_yylval); 6045 TERM(THING); 6046 } 6047 } 6048 } 6049 goto keylookup; 6050 case 'x': 6051 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { 6052 s++; 6053 Mop(OP_REPEAT); 6054 } 6055 goto keylookup; 6056 6057 case '_': 6058 case 'a': case 'A': 6059 case 'b': case 'B': 6060 case 'c': case 'C': 6061 case 'd': case 'D': 6062 case 'e': case 'E': 6063 case 'f': case 'F': 6064 case 'g': case 'G': 6065 case 'h': case 'H': 6066 case 'i': case 'I': 6067 case 'j': case 'J': 6068 case 'k': case 'K': 6069 case 'l': case 'L': 6070 case 'm': case 'M': 6071 case 'n': case 'N': 6072 case 'o': case 'O': 6073 case 'p': case 'P': 6074 case 'q': case 'Q': 6075 case 'r': case 'R': 6076 case 's': case 'S': 6077 case 't': case 'T': 6078 case 'u': case 'U': 6079 case 'V': 6080 case 'w': case 'W': 6081 case 'X': 6082 case 'y': case 'Y': 6083 case 'z': case 'Z': 6084 6085 keylookup: { 6086 bool anydelim; 6087 I32 tmp; 6088 6089 orig_keyword = 0; 6090 gv = NULL; 6091 gvp = NULL; 6092 6093 PL_bufptr = s; 6094 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 6095 6096 /* Some keywords can be followed by any delimiter, including ':' */ 6097 anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) || 6098 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') || 6099 (PL_tokenbuf[0] == 'q' && 6100 strchr("qwxr", PL_tokenbuf[1]))))); 6101 6102 /* x::* is just a word, unless x is "CORE" */ 6103 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE")) 6104 goto just_a_word; 6105 6106 d = s; 6107 while (d < PL_bufend && isSPACE(*d)) 6108 d++; /* no comments skipped here, or s### is misparsed */ 6109 6110 /* Is this a word before a => operator? */ 6111 if (*d == '=' && d[1] == '>') { 6112 CLINE; 6113 pl_yylval.opval 6114 = (OP*)newSVOP(OP_CONST, 0, 6115 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 6116 pl_yylval.opval->op_private = OPpCONST_BARE; 6117 TERM(WORD); 6118 } 6119 6120 /* Check for plugged-in keyword */ 6121 { 6122 OP *o; 6123 int result; 6124 char *saved_bufptr = PL_bufptr; 6125 PL_bufptr = s; 6126 result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o); 6127 s = PL_bufptr; 6128 if (result == KEYWORD_PLUGIN_DECLINE) { 6129 /* not a plugged-in keyword */ 6130 PL_bufptr = saved_bufptr; 6131 } else if (result == KEYWORD_PLUGIN_STMT) { 6132 pl_yylval.opval = o; 6133 CLINE; 6134 PL_expect = XSTATE; 6135 return REPORT(PLUGSTMT); 6136 } else if (result == KEYWORD_PLUGIN_EXPR) { 6137 pl_yylval.opval = o; 6138 CLINE; 6139 PL_expect = XOPERATOR; 6140 return REPORT(PLUGEXPR); 6141 } else { 6142 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", 6143 PL_tokenbuf); 6144 } 6145 } 6146 6147 /* Check for built-in keyword */ 6148 tmp = keyword(PL_tokenbuf, len, 0); 6149 6150 /* Is this a label? */ 6151 if (!anydelim && PL_expect == XSTATE 6152 && d < PL_bufend && *d == ':' && *(d + 1) != ':') { 6153 s = d + 1; 6154 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf); 6155 CLINE; 6156 TOKEN(LABEL); 6157 } 6158 6159 if (tmp < 0) { /* second-class keyword? */ 6160 GV *ogv = NULL; /* override (winner) */ 6161 GV *hgv = NULL; /* hidden (loser) */ 6162 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) { 6163 CV *cv; 6164 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) && 6165 (cv = GvCVu(gv))) 6166 { 6167 if (GvIMPORTED_CV(gv)) 6168 ogv = gv; 6169 else if (! CvMETHOD(cv)) 6170 hgv = gv; 6171 } 6172 if (!ogv && 6173 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) && 6174 (gv = *gvp) && isGV_with_GP(gv) && 6175 GvCVu(gv) && GvIMPORTED_CV(gv)) 6176 { 6177 ogv = gv; 6178 } 6179 } 6180 if (ogv) { 6181 orig_keyword = tmp; 6182 tmp = 0; /* overridden by import or by GLOBAL */ 6183 } 6184 else if (gv && !gvp 6185 && -tmp==KEY_lock /* XXX generalizable kludge */ 6186 && GvCVu(gv)) 6187 { 6188 tmp = 0; /* any sub overrides "weak" keyword */ 6189 } 6190 else { /* no override */ 6191 tmp = -tmp; 6192 if (tmp == KEY_dump) { 6193 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 6194 "dump() better written as CORE::dump()"); 6195 } 6196 gv = NULL; 6197 gvp = 0; 6198 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */ 6199 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 6200 "Ambiguous call resolved as CORE::%s(), %s", 6201 GvENAME(hgv), "qualify as such or use &"); 6202 } 6203 } 6204 6205 reserved_word: 6206 switch (tmp) { 6207 6208 default: /* not a keyword */ 6209 /* Trade off - by using this evil construction we can pull the 6210 variable gv into the block labelled keylookup. If not, then 6211 we have to give it function scope so that the goto from the 6212 earlier ':' case doesn't bypass the initialisation. */ 6213 if (0) { 6214 just_a_word_zero_gv: 6215 gv = NULL; 6216 gvp = NULL; 6217 orig_keyword = 0; 6218 } 6219 just_a_word: { 6220 SV *sv; 6221 int pkgname = 0; 6222 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); 6223 OP *rv2cv_op; 6224 CV *cv; 6225 #ifdef PERL_MAD 6226 SV *nextPL_nextwhite = 0; 6227 #endif 6228 6229 6230 /* Get the rest if it looks like a package qualifier */ 6231 6232 if (*s == '\'' || (*s == ':' && s[1] == ':')) { 6233 STRLEN morelen; 6234 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, 6235 TRUE, &morelen); 6236 if (!morelen) 6237 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf, 6238 *s == '\'' ? "'" : "::"); 6239 len += morelen; 6240 pkgname = 1; 6241 } 6242 6243 if (PL_expect == XOPERATOR) { 6244 if (PL_bufptr == PL_linestart) { 6245 CopLINE_dec(PL_curcop); 6246 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 6247 CopLINE_inc(PL_curcop); 6248 } 6249 else 6250 no_op("Bareword",s); 6251 } 6252 6253 /* Look for a subroutine with this name in current package, 6254 unless name is "Foo::", in which case Foo is a bearword 6255 (and a package name). */ 6256 6257 if (len > 2 && !PL_madskills && 6258 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') 6259 { 6260 if (ckWARN(WARN_BAREWORD) 6261 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV)) 6262 Perl_warner(aTHX_ packWARN(WARN_BAREWORD), 6263 "Bareword \"%s\" refers to nonexistent package", 6264 PL_tokenbuf); 6265 len -= 2; 6266 PL_tokenbuf[len] = '\0'; 6267 gv = NULL; 6268 gvp = 0; 6269 } 6270 else { 6271 if (!gv) { 6272 /* Mustn't actually add anything to a symbol table. 6273 But also don't want to "initialise" any placeholder 6274 constants that might already be there into full 6275 blown PVGVs with attached PVCV. */ 6276 gv = gv_fetchpvn_flags(PL_tokenbuf, len, 6277 GV_NOADD_NOINIT, SVt_PVCV); 6278 } 6279 len = 0; 6280 } 6281 6282 /* if we saw a global override before, get the right name */ 6283 6284 if (gvp) { 6285 sv = newSVpvs("CORE::GLOBAL::"); 6286 sv_catpv(sv,PL_tokenbuf); 6287 } 6288 else { 6289 /* If len is 0, newSVpv does strlen(), which is correct. 6290 If len is non-zero, then it will be the true length, 6291 and so the scalar will be created correctly. */ 6292 sv = newSVpv(PL_tokenbuf,len); 6293 } 6294 #ifdef PERL_MAD 6295 if (PL_madskills && !PL_thistoken) { 6296 char *start = SvPVX(PL_linestr) + PL_realtokenstart; 6297 PL_thistoken = newSVpvn(start,s - start); 6298 PL_realtokenstart = s - SvPVX(PL_linestr); 6299 } 6300 #endif 6301 6302 /* Presume this is going to be a bareword of some sort. */ 6303 6304 CLINE; 6305 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 6306 pl_yylval.opval->op_private = OPpCONST_BARE; 6307 /* UTF-8 package name? */ 6308 if (UTF && !IN_BYTES && 6309 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv))) 6310 SvUTF8_on(sv); 6311 6312 /* And if "Foo::", then that's what it certainly is. */ 6313 6314 if (len) 6315 goto safe_bareword; 6316 6317 cv = NULL; 6318 { 6319 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv)); 6320 const_op->op_private = OPpCONST_BARE; 6321 rv2cv_op = newCVREF(0, const_op); 6322 } 6323 if (rv2cv_op->op_type == OP_RV2CV && 6324 (rv2cv_op->op_flags & OPf_KIDS)) { 6325 OP *rv_op = cUNOPx(rv2cv_op)->op_first; 6326 switch (rv_op->op_type) { 6327 case OP_CONST: { 6328 SV *sv = cSVOPx_sv(rv_op); 6329 if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV) 6330 cv = (CV*)SvRV(sv); 6331 } break; 6332 case OP_GV: { 6333 GV *gv = cGVOPx_gv(rv_op); 6334 CV *maybe_cv = GvCVu(gv); 6335 if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV) 6336 cv = maybe_cv; 6337 } break; 6338 } 6339 } 6340 6341 /* See if it's the indirect object for a list operator. */ 6342 6343 if (PL_oldoldbufptr && 6344 PL_oldoldbufptr < PL_bufptr && 6345 (PL_oldoldbufptr == PL_last_lop 6346 || PL_oldoldbufptr == PL_last_uni) && 6347 /* NO SKIPSPACE BEFORE HERE! */ 6348 (PL_expect == XREF || 6349 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF)) 6350 { 6351 bool immediate_paren = *s == '('; 6352 6353 /* (Now we can afford to cross potential line boundary.) */ 6354 s = SKIPSPACE2(s,nextPL_nextwhite); 6355 #ifdef PERL_MAD 6356 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */ 6357 #endif 6358 6359 /* Two barewords in a row may indicate method call. */ 6360 6361 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && 6362 (tmp = intuit_method(s, gv, cv))) { 6363 op_free(rv2cv_op); 6364 return REPORT(tmp); 6365 } 6366 6367 /* If not a declared subroutine, it's an indirect object. */ 6368 /* (But it's an indir obj regardless for sort.) */ 6369 /* Also, if "_" follows a filetest operator, it's a bareword */ 6370 6371 if ( 6372 ( !immediate_paren && (PL_last_lop_op == OP_SORT || 6373 (!cv && 6374 (PL_last_lop_op != OP_MAPSTART && 6375 PL_last_lop_op != OP_GREPSTART)))) 6376 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' 6377 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP)) 6378 ) 6379 { 6380 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; 6381 goto bareword; 6382 } 6383 } 6384 6385 PL_expect = XOPERATOR; 6386 #ifdef PERL_MAD 6387 if (isSPACE(*s)) 6388 s = SKIPSPACE2(s,nextPL_nextwhite); 6389 PL_nextwhite = nextPL_nextwhite; 6390 #else 6391 s = skipspace(s); 6392 #endif 6393 6394 /* Is this a word before a => operator? */ 6395 if (*s == '=' && s[1] == '>' && !pkgname) { 6396 op_free(rv2cv_op); 6397 CLINE; 6398 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf); 6399 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) 6400 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv); 6401 TERM(WORD); 6402 } 6403 6404 /* If followed by a paren, it's certainly a subroutine. */ 6405 if (*s == '(') { 6406 CLINE; 6407 if (cv) { 6408 d = s + 1; 6409 while (SPACE_OR_TAB(*d)) 6410 d++; 6411 if (*d == ')' && (sv = cv_const_sv(cv))) { 6412 s = d + 1; 6413 goto its_constant; 6414 } 6415 } 6416 #ifdef PERL_MAD 6417 if (PL_madskills) { 6418 PL_nextwhite = PL_thiswhite; 6419 PL_thiswhite = 0; 6420 } 6421 start_force(PL_curforce); 6422 #endif 6423 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 6424 PL_expect = XOPERATOR; 6425 #ifdef PERL_MAD 6426 if (PL_madskills) { 6427 PL_nextwhite = nextPL_nextwhite; 6428 curmad('X', PL_thistoken); 6429 PL_thistoken = newSVpvs(""); 6430 } 6431 #endif 6432 op_free(rv2cv_op); 6433 force_next(WORD); 6434 pl_yylval.ival = 0; 6435 TOKEN('&'); 6436 } 6437 6438 /* If followed by var or block, call it a method (unless sub) */ 6439 6440 if ((*s == '$' || *s == '{') && !cv) { 6441 op_free(rv2cv_op); 6442 PL_last_lop = PL_oldbufptr; 6443 PL_last_lop_op = OP_METHOD; 6444 PREBLOCK(METHOD); 6445 } 6446 6447 /* If followed by a bareword, see if it looks like indir obj. */ 6448 6449 if (!orig_keyword 6450 && (isIDFIRST_lazy_if(s,UTF) || *s == '$') 6451 && (tmp = intuit_method(s, gv, cv))) { 6452 op_free(rv2cv_op); 6453 return REPORT(tmp); 6454 } 6455 6456 /* Not a method, so call it a subroutine (if defined) */ 6457 6458 if (cv) { 6459 if (lastchar == '-') 6460 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 6461 "Ambiguous use of -%s resolved as -&%s()", 6462 PL_tokenbuf, PL_tokenbuf); 6463 /* Check for a constant sub */ 6464 if ((sv = cv_const_sv(cv))) { 6465 its_constant: 6466 op_free(rv2cv_op); 6467 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); 6468 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); 6469 pl_yylval.opval->op_private = 0; 6470 TOKEN(WORD); 6471 } 6472 6473 op_free(pl_yylval.opval); 6474 pl_yylval.opval = rv2cv_op; 6475 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 6476 PL_last_lop = PL_oldbufptr; 6477 PL_last_lop_op = OP_ENTERSUB; 6478 /* Is there a prototype? */ 6479 if ( 6480 #ifdef PERL_MAD 6481 cv && 6482 #endif 6483 SvPOK(cv)) 6484 { 6485 STRLEN protolen; 6486 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen); 6487 if (!protolen) 6488 TERM(FUNC0SUB); 6489 if ((*proto == '$' || *proto == '_') && proto[1] == '\0') 6490 OPERATOR(UNIOPSUB); 6491 while (*proto == ';') 6492 proto++; 6493 if (*proto == '&' && *s == '{') { 6494 if (PL_curstash) 6495 sv_setpvs(PL_subname, "__ANON__"); 6496 else 6497 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 6498 PREBLOCK(LSTOPSUB); 6499 } 6500 } 6501 #ifdef PERL_MAD 6502 { 6503 if (PL_madskills) { 6504 PL_nextwhite = PL_thiswhite; 6505 PL_thiswhite = 0; 6506 } 6507 start_force(PL_curforce); 6508 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 6509 PL_expect = XTERM; 6510 if (PL_madskills) { 6511 PL_nextwhite = nextPL_nextwhite; 6512 curmad('X', PL_thistoken); 6513 PL_thistoken = newSVpvs(""); 6514 } 6515 force_next(WORD); 6516 TOKEN(NOAMP); 6517 } 6518 } 6519 6520 /* Guess harder when madskills require "best effort". */ 6521 if (PL_madskills && (!gv || !GvCVu(gv))) { 6522 int probable_sub = 0; 6523 if (strchr("\"'`$@%0123456789!*+{[<", *s)) 6524 probable_sub = 1; 6525 else if (isALPHA(*s)) { 6526 char tmpbuf[1024]; 6527 STRLEN tmplen; 6528 d = s; 6529 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen); 6530 if (!keyword(tmpbuf, tmplen, 0)) 6531 probable_sub = 1; 6532 else { 6533 while (d < PL_bufend && isSPACE(*d)) 6534 d++; 6535 if (*d == '=' && d[1] == '>') 6536 probable_sub = 1; 6537 } 6538 } 6539 if (probable_sub) { 6540 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV); 6541 op_free(pl_yylval.opval); 6542 pl_yylval.opval = rv2cv_op; 6543 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 6544 PL_last_lop = PL_oldbufptr; 6545 PL_last_lop_op = OP_ENTERSUB; 6546 PL_nextwhite = PL_thiswhite; 6547 PL_thiswhite = 0; 6548 start_force(PL_curforce); 6549 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 6550 PL_expect = XTERM; 6551 PL_nextwhite = nextPL_nextwhite; 6552 curmad('X', PL_thistoken); 6553 PL_thistoken = newSVpvs(""); 6554 force_next(WORD); 6555 TOKEN(NOAMP); 6556 } 6557 #else 6558 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 6559 PL_expect = XTERM; 6560 force_next(WORD); 6561 TOKEN(NOAMP); 6562 #endif 6563 } 6564 6565 /* Call it a bare word */ 6566 6567 if (PL_hints & HINT_STRICT_SUBS) 6568 pl_yylval.opval->op_private |= OPpCONST_STRICT; 6569 else { 6570 bareword: 6571 /* after "print" and similar functions (corresponding to 6572 * "F? L" in opcode.pl), whatever wasn't already parsed as 6573 * a filehandle should be subject to "strict subs". 6574 * Likewise for the optional indirect-object argument to system 6575 * or exec, which can't be a bareword */ 6576 if ((PL_last_lop_op == OP_PRINT 6577 || PL_last_lop_op == OP_PRTF 6578 || PL_last_lop_op == OP_SAY 6579 || PL_last_lop_op == OP_SYSTEM 6580 || PL_last_lop_op == OP_EXEC) 6581 && (PL_hints & HINT_STRICT_SUBS)) 6582 pl_yylval.opval->op_private |= OPpCONST_STRICT; 6583 if (lastchar != '-') { 6584 if (ckWARN(WARN_RESERVED)) { 6585 d = PL_tokenbuf; 6586 while (isLOWER(*d)) 6587 d++; 6588 if (!*d && !gv_stashpv(PL_tokenbuf, 0)) 6589 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, 6590 PL_tokenbuf); 6591 } 6592 } 6593 } 6594 op_free(rv2cv_op); 6595 6596 safe_bareword: 6597 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) { 6598 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 6599 "Operator or semicolon missing before %c%s", 6600 lastchar, PL_tokenbuf); 6601 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 6602 "Ambiguous use of %c resolved as operator %c", 6603 lastchar, lastchar); 6604 } 6605 TOKEN(WORD); 6606 } 6607 6608 case KEY___FILE__: 6609 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, 6610 newSVpv(CopFILE(PL_curcop),0)); 6611 TERM(THING); 6612 6613 case KEY___LINE__: 6614 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, 6615 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop))); 6616 TERM(THING); 6617 6618 case KEY___PACKAGE__: 6619 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, 6620 (PL_curstash 6621 ? newSVhek(HvNAME_HEK(PL_curstash)) 6622 : &PL_sv_undef)); 6623 TERM(THING); 6624 6625 case KEY___DATA__: 6626 case KEY___END__: { 6627 GV *gv; 6628 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) { 6629 const char *pname = "main"; 6630 if (PL_tokenbuf[2] == 'D') 6631 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash); 6632 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD, 6633 SVt_PVIO); 6634 GvMULTI_on(gv); 6635 if (!GvIO(gv)) 6636 GvIOp(gv) = newIO(); 6637 IoIFP(GvIOp(gv)) = PL_rsfp; 6638 #if defined(HAS_FCNTL) && defined(F_SETFD) 6639 { 6640 const int fd = PerlIO_fileno(PL_rsfp); 6641 fcntl(fd,F_SETFD,fd >= 3); 6642 } 6643 #endif 6644 /* Mark this internal pseudo-handle as clean */ 6645 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; 6646 if ((PerlIO*)PL_rsfp == PerlIO_stdin()) 6647 IoTYPE(GvIOp(gv)) = IoTYPE_STD; 6648 else 6649 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY; 6650 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS) 6651 /* if the script was opened in binmode, we need to revert 6652 * it to text mode for compatibility; but only iff it has CRs 6653 * XXX this is a questionable hack at best. */ 6654 if (PL_bufend-PL_bufptr > 2 6655 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r') 6656 { 6657 Off_t loc = 0; 6658 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) { 6659 loc = PerlIO_tell(PL_rsfp); 6660 (void)PerlIO_seek(PL_rsfp, 0L, 0); 6661 } 6662 #ifdef NETWARE 6663 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) { 6664 #else 6665 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) { 6666 #endif /* NETWARE */ 6667 #ifdef PERLIO_IS_STDIO /* really? */ 6668 # if defined(__BORLANDC__) 6669 /* XXX see note in do_binmode() */ 6670 ((FILE*)PL_rsfp)->flags &= ~_F_BIN; 6671 # endif 6672 #endif 6673 if (loc > 0) 6674 PerlIO_seek(PL_rsfp, loc, 0); 6675 } 6676 } 6677 #endif 6678 #ifdef PERLIO_LAYERS 6679 if (!IN_BYTES) { 6680 if (UTF) 6681 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); 6682 else if (PL_encoding) { 6683 SV *name; 6684 dSP; 6685 ENTER; 6686 SAVETMPS; 6687 PUSHMARK(sp); 6688 EXTEND(SP, 1); 6689 XPUSHs(PL_encoding); 6690 PUTBACK; 6691 call_method("name", G_SCALAR); 6692 SPAGAIN; 6693 name = POPs; 6694 PUTBACK; 6695 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, 6696 Perl_form(aTHX_ ":encoding(%"SVf")", 6697 SVfARG(name))); 6698 FREETMPS; 6699 LEAVE; 6700 } 6701 } 6702 #endif 6703 #ifdef PERL_MAD 6704 if (PL_madskills) { 6705 if (PL_realtokenstart >= 0) { 6706 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart; 6707 if (!PL_endwhite) 6708 PL_endwhite = newSVpvs(""); 6709 sv_catsv(PL_endwhite, PL_thiswhite); 6710 PL_thiswhite = 0; 6711 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart); 6712 PL_realtokenstart = -1; 6713 } 6714 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite))) 6715 != NULL) ; 6716 } 6717 #endif 6718 PL_rsfp = NULL; 6719 } 6720 goto fake_eof; 6721 } 6722 6723 case KEY_AUTOLOAD: 6724 case KEY_DESTROY: 6725 case KEY_BEGIN: 6726 case KEY_UNITCHECK: 6727 case KEY_CHECK: 6728 case KEY_INIT: 6729 case KEY_END: 6730 if (PL_expect == XSTATE) { 6731 s = PL_bufptr; 6732 goto really_sub; 6733 } 6734 goto just_a_word; 6735 6736 case KEY_CORE: 6737 if (*s == ':' && s[1] == ':') { 6738 s += 2; 6739 d = s; 6740 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 6741 if (!(tmp = keyword(PL_tokenbuf, len, 0))) 6742 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf); 6743 if (tmp < 0) 6744 tmp = -tmp; 6745 else if (tmp == KEY_require || tmp == KEY_do) 6746 /* that's a way to remember we saw "CORE::" */ 6747 orig_keyword = tmp; 6748 goto reserved_word; 6749 } 6750 goto just_a_word; 6751 6752 case KEY_abs: 6753 UNI(OP_ABS); 6754 6755 case KEY_alarm: 6756 UNI(OP_ALARM); 6757 6758 case KEY_accept: 6759 LOP(OP_ACCEPT,XTERM); 6760 6761 case KEY_and: 6762 OPERATOR(ANDOP); 6763 6764 case KEY_atan2: 6765 LOP(OP_ATAN2,XTERM); 6766 6767 case KEY_bind: 6768 LOP(OP_BIND,XTERM); 6769 6770 case KEY_binmode: 6771 LOP(OP_BINMODE,XTERM); 6772 6773 case KEY_bless: 6774 LOP(OP_BLESS,XTERM); 6775 6776 case KEY_break: 6777 FUN0(OP_BREAK); 6778 6779 case KEY_chop: 6780 UNI(OP_CHOP); 6781 6782 case KEY_continue: 6783 /* When 'use switch' is in effect, continue has a dual 6784 life as a control operator. */ 6785 { 6786 if (!FEATURE_IS_ENABLED("switch")) 6787 PREBLOCK(CONTINUE); 6788 else { 6789 /* We have to disambiguate the two senses of 6790 "continue". If the next token is a '{' then 6791 treat it as the start of a continue block; 6792 otherwise treat it as a control operator. 6793 */ 6794 s = skipspace(s); 6795 if (*s == '{') 6796 PREBLOCK(CONTINUE); 6797 else 6798 FUN0(OP_CONTINUE); 6799 } 6800 } 6801 6802 case KEY_chdir: 6803 /* may use HOME */ 6804 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV); 6805 UNI(OP_CHDIR); 6806 6807 case KEY_close: 6808 UNI(OP_CLOSE); 6809 6810 case KEY_closedir: 6811 UNI(OP_CLOSEDIR); 6812 6813 case KEY_cmp: 6814 Eop(OP_SCMP); 6815 6816 case KEY_caller: 6817 UNI(OP_CALLER); 6818 6819 case KEY_crypt: 6820 #ifdef FCRYPT 6821 if (!PL_cryptseen) { 6822 PL_cryptseen = TRUE; 6823 init_des(); 6824 } 6825 #endif 6826 LOP(OP_CRYPT,XTERM); 6827 6828 case KEY_chmod: 6829 LOP(OP_CHMOD,XTERM); 6830 6831 case KEY_chown: 6832 LOP(OP_CHOWN,XTERM); 6833 6834 case KEY_connect: 6835 LOP(OP_CONNECT,XTERM); 6836 6837 case KEY_chr: 6838 UNI(OP_CHR); 6839 6840 case KEY_cos: 6841 UNI(OP_COS); 6842 6843 case KEY_chroot: 6844 UNI(OP_CHROOT); 6845 6846 case KEY_default: 6847 PREBLOCK(DEFAULT); 6848 6849 case KEY_do: 6850 s = SKIPSPACE1(s); 6851 if (*s == '{') 6852 PRETERMBLOCK(DO); 6853 if (*s != '\'') 6854 s = force_word(s,WORD,TRUE,TRUE,FALSE); 6855 if (orig_keyword == KEY_do) { 6856 orig_keyword = 0; 6857 pl_yylval.ival = 1; 6858 } 6859 else 6860 pl_yylval.ival = 0; 6861 OPERATOR(DO); 6862 6863 case KEY_die: 6864 PL_hints |= HINT_BLOCK_SCOPE; 6865 LOP(OP_DIE,XTERM); 6866 6867 case KEY_defined: 6868 UNI(OP_DEFINED); 6869 6870 case KEY_delete: 6871 UNI(OP_DELETE); 6872 6873 case KEY_dbmopen: 6874 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV); 6875 LOP(OP_DBMOPEN,XTERM); 6876 6877 case KEY_dbmclose: 6878 UNI(OP_DBMCLOSE); 6879 6880 case KEY_dump: 6881 s = force_word(s,WORD,TRUE,FALSE,FALSE); 6882 LOOPX(OP_DUMP); 6883 6884 case KEY_else: 6885 PREBLOCK(ELSE); 6886 6887 case KEY_elsif: 6888 pl_yylval.ival = CopLINE(PL_curcop); 6889 OPERATOR(ELSIF); 6890 6891 case KEY_eq: 6892 Eop(OP_SEQ); 6893 6894 case KEY_exists: 6895 UNI(OP_EXISTS); 6896 6897 case KEY_exit: 6898 if (PL_madskills) 6899 UNI(OP_INT); 6900 UNI(OP_EXIT); 6901 6902 case KEY_eval: 6903 s = SKIPSPACE1(s); 6904 if (*s == '{') { /* block eval */ 6905 PL_expect = XTERMBLOCK; 6906 UNIBRACK(OP_ENTERTRY); 6907 } 6908 else { /* string eval */ 6909 PL_expect = XTERM; 6910 UNIBRACK(OP_ENTEREVAL); 6911 } 6912 6913 case KEY_eof: 6914 UNI(OP_EOF); 6915 6916 case KEY_exp: 6917 UNI(OP_EXP); 6918 6919 case KEY_each: 6920 UNI(OP_EACH); 6921 6922 case KEY_exec: 6923 LOP(OP_EXEC,XREF); 6924 6925 case KEY_endhostent: 6926 FUN0(OP_EHOSTENT); 6927 6928 case KEY_endnetent: 6929 FUN0(OP_ENETENT); 6930 6931 case KEY_endservent: 6932 FUN0(OP_ESERVENT); 6933 6934 case KEY_endprotoent: 6935 FUN0(OP_EPROTOENT); 6936 6937 case KEY_endpwent: 6938 FUN0(OP_EPWENT); 6939 6940 case KEY_endgrent: 6941 FUN0(OP_EGRENT); 6942 6943 case KEY_for: 6944 case KEY_foreach: 6945 pl_yylval.ival = CopLINE(PL_curcop); 6946 s = SKIPSPACE1(s); 6947 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) { 6948 char *p = s; 6949 #ifdef PERL_MAD 6950 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */ 6951 #endif 6952 6953 if ((PL_bufend - p) >= 3 && 6954 strnEQ(p, "my", 2) && isSPACE(*(p + 2))) 6955 p += 2; 6956 else if ((PL_bufend - p) >= 4 && 6957 strnEQ(p, "our", 3) && isSPACE(*(p + 3))) 6958 p += 3; 6959 p = PEEKSPACE(p); 6960 if (isIDFIRST_lazy_if(p,UTF)) { 6961 p = scan_ident(p, PL_bufend, 6962 PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 6963 p = PEEKSPACE(p); 6964 } 6965 if (*p != '$') 6966 Perl_croak(aTHX_ "Missing $ on loop variable"); 6967 #ifdef PERL_MAD 6968 s = SvPVX(PL_linestr) + soff; 6969 #endif 6970 } 6971 OPERATOR(FOR); 6972 6973 case KEY_formline: 6974 LOP(OP_FORMLINE,XTERM); 6975 6976 case KEY_fork: 6977 FUN0(OP_FORK); 6978 6979 case KEY_fcntl: 6980 LOP(OP_FCNTL,XTERM); 6981 6982 case KEY_fileno: 6983 UNI(OP_FILENO); 6984 6985 case KEY_flock: 6986 LOP(OP_FLOCK,XTERM); 6987 6988 case KEY_gt: 6989 Rop(OP_SGT); 6990 6991 case KEY_ge: 6992 Rop(OP_SGE); 6993 6994 case KEY_grep: 6995 LOP(OP_GREPSTART, XREF); 6996 6997 case KEY_goto: 6998 s = force_word(s,WORD,TRUE,FALSE,FALSE); 6999 LOOPX(OP_GOTO); 7000 7001 case KEY_gmtime: 7002 UNI(OP_GMTIME); 7003 7004 case KEY_getc: 7005 UNIDOR(OP_GETC); 7006 7007 case KEY_getppid: 7008 FUN0(OP_GETPPID); 7009 7010 case KEY_getpgrp: 7011 UNI(OP_GETPGRP); 7012 7013 case KEY_getpriority: 7014 LOP(OP_GETPRIORITY,XTERM); 7015 7016 case KEY_getprotobyname: 7017 UNI(OP_GPBYNAME); 7018 7019 case KEY_getprotobynumber: 7020 LOP(OP_GPBYNUMBER,XTERM); 7021 7022 case KEY_getprotoent: 7023 FUN0(OP_GPROTOENT); 7024 7025 case KEY_getpwent: 7026 FUN0(OP_GPWENT); 7027 7028 case KEY_getpwnam: 7029 UNI(OP_GPWNAM); 7030 7031 case KEY_getpwuid: 7032 UNI(OP_GPWUID); 7033 7034 case KEY_getpeername: 7035 UNI(OP_GETPEERNAME); 7036 7037 case KEY_gethostbyname: 7038 UNI(OP_GHBYNAME); 7039 7040 case KEY_gethostbyaddr: 7041 LOP(OP_GHBYADDR,XTERM); 7042 7043 case KEY_gethostent: 7044 FUN0(OP_GHOSTENT); 7045 7046 case KEY_getnetbyname: 7047 UNI(OP_GNBYNAME); 7048 7049 case KEY_getnetbyaddr: 7050 LOP(OP_GNBYADDR,XTERM); 7051 7052 case KEY_getnetent: 7053 FUN0(OP_GNETENT); 7054 7055 case KEY_getservbyname: 7056 LOP(OP_GSBYNAME,XTERM); 7057 7058 case KEY_getservbyport: 7059 LOP(OP_GSBYPORT,XTERM); 7060 7061 case KEY_getservent: 7062 FUN0(OP_GSERVENT); 7063 7064 case KEY_getsockname: 7065 UNI(OP_GETSOCKNAME); 7066 7067 case KEY_getsockopt: 7068 LOP(OP_GSOCKOPT,XTERM); 7069 7070 case KEY_getgrent: 7071 FUN0(OP_GGRENT); 7072 7073 case KEY_getgrnam: 7074 UNI(OP_GGRNAM); 7075 7076 case KEY_getgrgid: 7077 UNI(OP_GGRGID); 7078 7079 case KEY_getlogin: 7080 FUN0(OP_GETLOGIN); 7081 7082 case KEY_given: 7083 pl_yylval.ival = CopLINE(PL_curcop); 7084 OPERATOR(GIVEN); 7085 7086 case KEY_glob: 7087 LOP(OP_GLOB,XTERM); 7088 7089 case KEY_hex: 7090 UNI(OP_HEX); 7091 7092 case KEY_if: 7093 pl_yylval.ival = CopLINE(PL_curcop); 7094 OPERATOR(IF); 7095 7096 case KEY_index: 7097 LOP(OP_INDEX,XTERM); 7098 7099 case KEY_int: 7100 UNI(OP_INT); 7101 7102 case KEY_ioctl: 7103 LOP(OP_IOCTL,XTERM); 7104 7105 case KEY_join: 7106 LOP(OP_JOIN,XTERM); 7107 7108 case KEY_keys: 7109 UNI(OP_KEYS); 7110 7111 case KEY_kill: 7112 LOP(OP_KILL,XTERM); 7113 7114 case KEY_last: 7115 s = force_word(s,WORD,TRUE,FALSE,FALSE); 7116 LOOPX(OP_LAST); 7117 7118 case KEY_lc: 7119 UNI(OP_LC); 7120 7121 case KEY_lcfirst: 7122 UNI(OP_LCFIRST); 7123 7124 case KEY_local: 7125 pl_yylval.ival = 0; 7126 OPERATOR(LOCAL); 7127 7128 case KEY_length: 7129 UNI(OP_LENGTH); 7130 7131 case KEY_lt: 7132 Rop(OP_SLT); 7133 7134 case KEY_le: 7135 Rop(OP_SLE); 7136 7137 case KEY_localtime: 7138 UNI(OP_LOCALTIME); 7139 7140 case KEY_log: 7141 UNI(OP_LOG); 7142 7143 case KEY_link: 7144 LOP(OP_LINK,XTERM); 7145 7146 case KEY_listen: 7147 LOP(OP_LISTEN,XTERM); 7148 7149 case KEY_lock: 7150 UNI(OP_LOCK); 7151 7152 case KEY_lstat: 7153 UNI(OP_LSTAT); 7154 7155 case KEY_m: 7156 s = scan_pat(s,OP_MATCH); 7157 TERM(sublex_start()); 7158 7159 case KEY_map: 7160 LOP(OP_MAPSTART, XREF); 7161 7162 case KEY_mkdir: 7163 LOP(OP_MKDIR,XTERM); 7164 7165 case KEY_msgctl: 7166 LOP(OP_MSGCTL,XTERM); 7167 7168 case KEY_msgget: 7169 LOP(OP_MSGGET,XTERM); 7170 7171 case KEY_msgrcv: 7172 LOP(OP_MSGRCV,XTERM); 7173 7174 case KEY_msgsnd: 7175 LOP(OP_MSGSND,XTERM); 7176 7177 case KEY_our: 7178 case KEY_my: 7179 case KEY_state: 7180 PL_in_my = (U16)tmp; 7181 s = SKIPSPACE1(s); 7182 if (isIDFIRST_lazy_if(s,UTF)) { 7183 #ifdef PERL_MAD 7184 char* start = s; 7185 #endif 7186 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 7187 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3)) 7188 goto really_sub; 7189 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); 7190 if (!PL_in_my_stash) { 7191 char tmpbuf[1024]; 7192 PL_bufptr = s; 7193 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); 7194 yyerror(tmpbuf); 7195 } 7196 #ifdef PERL_MAD 7197 if (PL_madskills) { /* just add type to declarator token */ 7198 sv_catsv(PL_thistoken, PL_nextwhite); 7199 PL_nextwhite = 0; 7200 sv_catpvn(PL_thistoken, start, s - start); 7201 } 7202 #endif 7203 } 7204 pl_yylval.ival = 1; 7205 OPERATOR(MY); 7206 7207 case KEY_next: 7208 s = force_word(s,WORD,TRUE,FALSE,FALSE); 7209 LOOPX(OP_NEXT); 7210 7211 case KEY_ne: 7212 Eop(OP_SNE); 7213 7214 case KEY_no: 7215 s = tokenize_use(0, s); 7216 OPERATOR(USE); 7217 7218 case KEY_not: 7219 if (*s == '(' || (s = SKIPSPACE1(s), *s == '(')) 7220 FUN1(OP_NOT); 7221 else 7222 OPERATOR(NOTOP); 7223 7224 case KEY_open: 7225 s = SKIPSPACE1(s); 7226 if (isIDFIRST_lazy_if(s,UTF)) { 7227 const char *t; 7228 for (d = s; isALNUM_lazy_if(d,UTF);) 7229 d++; 7230 for (t=d; isSPACE(*t);) 7231 t++; 7232 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) 7233 /* [perl #16184] */ 7234 && !(t[0] == '=' && t[1] == '>') 7235 ) { 7236 int parms_len = (int)(d-s); 7237 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), 7238 "Precedence problem: open %.*s should be open(%.*s)", 7239 parms_len, s, parms_len, s); 7240 } 7241 } 7242 LOP(OP_OPEN,XTERM); 7243 7244 case KEY_or: 7245 pl_yylval.ival = OP_OR; 7246 OPERATOR(OROP); 7247 7248 case KEY_ord: 7249 UNI(OP_ORD); 7250 7251 case KEY_oct: 7252 UNI(OP_OCT); 7253 7254 case KEY_opendir: 7255 LOP(OP_OPEN_DIR,XTERM); 7256 7257 case KEY_print: 7258 checkcomma(s,PL_tokenbuf,"filehandle"); 7259 LOP(OP_PRINT,XREF); 7260 7261 case KEY_printf: 7262 checkcomma(s,PL_tokenbuf,"filehandle"); 7263 LOP(OP_PRTF,XREF); 7264 7265 case KEY_prototype: 7266 UNI(OP_PROTOTYPE); 7267 7268 case KEY_push: 7269 LOP(OP_PUSH,XTERM); 7270 7271 case KEY_pop: 7272 UNIDOR(OP_POP); 7273 7274 case KEY_pos: 7275 UNIDOR(OP_POS); 7276 7277 case KEY_pack: 7278 LOP(OP_PACK,XTERM); 7279 7280 case KEY_package: 7281 s = force_word(s,WORD,FALSE,TRUE,FALSE); 7282 s = SKIPSPACE1(s); 7283 s = force_strict_version(s); 7284 OPERATOR(PACKAGE); 7285 7286 case KEY_pipe: 7287 LOP(OP_PIPE_OP,XTERM); 7288 7289 case KEY_q: 7290 s = scan_str(s,!!PL_madskills,FALSE); 7291 if (!s) 7292 missingterm(NULL); 7293 pl_yylval.ival = OP_CONST; 7294 TERM(sublex_start()); 7295 7296 case KEY_quotemeta: 7297 UNI(OP_QUOTEMETA); 7298 7299 case KEY_qw: 7300 s = scan_str(s,!!PL_madskills,FALSE); 7301 if (!s) 7302 missingterm(NULL); 7303 PL_expect = XOPERATOR; 7304 force_next(')'); 7305 if (SvCUR(PL_lex_stuff)) { 7306 OP *words = NULL; 7307 int warned = 0; 7308 d = SvPV_force(PL_lex_stuff, len); 7309 while (len) { 7310 for (; isSPACE(*d) && len; --len, ++d) 7311 /**/; 7312 if (len) { 7313 SV *sv; 7314 const char *b = d; 7315 if (!warned && ckWARN(WARN_QW)) { 7316 for (; !isSPACE(*d) && len; --len, ++d) { 7317 if (*d == ',') { 7318 Perl_warner(aTHX_ packWARN(WARN_QW), 7319 "Possible attempt to separate words with commas"); 7320 ++warned; 7321 } 7322 else if (*d == '#') { 7323 Perl_warner(aTHX_ packWARN(WARN_QW), 7324 "Possible attempt to put comments in qw() list"); 7325 ++warned; 7326 } 7327 } 7328 } 7329 else { 7330 for (; !isSPACE(*d) && len; --len, ++d) 7331 /**/; 7332 } 7333 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff)); 7334 words = append_elem(OP_LIST, words, 7335 newSVOP(OP_CONST, 0, tokeq(sv))); 7336 } 7337 } 7338 if (words) { 7339 start_force(PL_curforce); 7340 NEXTVAL_NEXTTOKE.opval = words; 7341 force_next(THING); 7342 } 7343 } 7344 if (PL_lex_stuff) { 7345 SvREFCNT_dec(PL_lex_stuff); 7346 PL_lex_stuff = NULL; 7347 } 7348 PL_expect = XTERM; 7349 TOKEN('('); 7350 7351 case KEY_qq: 7352 s = scan_str(s,!!PL_madskills,FALSE); 7353 if (!s) 7354 missingterm(NULL); 7355 pl_yylval.ival = OP_STRINGIFY; 7356 if (SvIVX(PL_lex_stuff) == '\'') 7357 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */ 7358 TERM(sublex_start()); 7359 7360 case KEY_qr: 7361 s = scan_pat(s,OP_QR); 7362 TERM(sublex_start()); 7363 7364 case KEY_qx: 7365 s = scan_str(s,!!PL_madskills,FALSE); 7366 if (!s) 7367 missingterm(NULL); 7368 readpipe_override(); 7369 TERM(sublex_start()); 7370 7371 case KEY_return: 7372 OLDLOP(OP_RETURN); 7373 7374 case KEY_require: 7375 s = SKIPSPACE1(s); 7376 if (isDIGIT(*s)) { 7377 s = force_version(s, FALSE); 7378 } 7379 else if (*s != 'v' || !isDIGIT(s[1]) 7380 || (s = force_version(s, TRUE), *s == 'v')) 7381 { 7382 *PL_tokenbuf = '\0'; 7383 s = force_word(s,WORD,TRUE,TRUE,FALSE); 7384 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF)) 7385 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD); 7386 else if (*s == '<') 7387 yyerror("<> should be quotes"); 7388 } 7389 if (orig_keyword == KEY_require) { 7390 orig_keyword = 0; 7391 pl_yylval.ival = 1; 7392 } 7393 else 7394 pl_yylval.ival = 0; 7395 PL_expect = XTERM; 7396 PL_bufptr = s; 7397 PL_last_uni = PL_oldbufptr; 7398 PL_last_lop_op = OP_REQUIRE; 7399 s = skipspace(s); 7400 return REPORT( (int)REQUIRE ); 7401 7402 case KEY_reset: 7403 UNI(OP_RESET); 7404 7405 case KEY_redo: 7406 s = force_word(s,WORD,TRUE,FALSE,FALSE); 7407 LOOPX(OP_REDO); 7408 7409 case KEY_rename: 7410 LOP(OP_RENAME,XTERM); 7411 7412 case KEY_rand: 7413 UNI(OP_RAND); 7414 7415 case KEY_rmdir: 7416 UNI(OP_RMDIR); 7417 7418 case KEY_rindex: 7419 LOP(OP_RINDEX,XTERM); 7420 7421 case KEY_read: 7422 LOP(OP_READ,XTERM); 7423 7424 case KEY_readdir: 7425 UNI(OP_READDIR); 7426 7427 case KEY_readline: 7428 UNIDOR(OP_READLINE); 7429 7430 case KEY_readpipe: 7431 UNIDOR(OP_BACKTICK); 7432 7433 case KEY_rewinddir: 7434 UNI(OP_REWINDDIR); 7435 7436 case KEY_recv: 7437 LOP(OP_RECV,XTERM); 7438 7439 case KEY_reverse: 7440 LOP(OP_REVERSE,XTERM); 7441 7442 case KEY_readlink: 7443 UNIDOR(OP_READLINK); 7444 7445 case KEY_ref: 7446 UNI(OP_REF); 7447 7448 case KEY_s: 7449 s = scan_subst(s); 7450 if (pl_yylval.opval) 7451 TERM(sublex_start()); 7452 else 7453 TOKEN(1); /* force error */ 7454 7455 case KEY_say: 7456 checkcomma(s,PL_tokenbuf,"filehandle"); 7457 LOP(OP_SAY,XREF); 7458 7459 case KEY_chomp: 7460 UNI(OP_CHOMP); 7461 7462 case KEY_scalar: 7463 UNI(OP_SCALAR); 7464 7465 case KEY_select: 7466 LOP(OP_SELECT,XTERM); 7467 7468 case KEY_seek: 7469 LOP(OP_SEEK,XTERM); 7470 7471 case KEY_semctl: 7472 LOP(OP_SEMCTL,XTERM); 7473 7474 case KEY_semget: 7475 LOP(OP_SEMGET,XTERM); 7476 7477 case KEY_semop: 7478 LOP(OP_SEMOP,XTERM); 7479 7480 case KEY_send: 7481 LOP(OP_SEND,XTERM); 7482 7483 case KEY_setpgrp: 7484 LOP(OP_SETPGRP,XTERM); 7485 7486 case KEY_setpriority: 7487 LOP(OP_SETPRIORITY,XTERM); 7488 7489 case KEY_sethostent: 7490 UNI(OP_SHOSTENT); 7491 7492 case KEY_setnetent: 7493 UNI(OP_SNETENT); 7494 7495 case KEY_setservent: 7496 UNI(OP_SSERVENT); 7497 7498 case KEY_setprotoent: 7499 UNI(OP_SPROTOENT); 7500 7501 case KEY_setpwent: 7502 FUN0(OP_SPWENT); 7503 7504 case KEY_setgrent: 7505 FUN0(OP_SGRENT); 7506 7507 case KEY_seekdir: 7508 LOP(OP_SEEKDIR,XTERM); 7509 7510 case KEY_setsockopt: 7511 LOP(OP_SSOCKOPT,XTERM); 7512 7513 case KEY_shift: 7514 UNIDOR(OP_SHIFT); 7515 7516 case KEY_shmctl: 7517 LOP(OP_SHMCTL,XTERM); 7518 7519 case KEY_shmget: 7520 LOP(OP_SHMGET,XTERM); 7521 7522 case KEY_shmread: 7523 LOP(OP_SHMREAD,XTERM); 7524 7525 case KEY_shmwrite: 7526 LOP(OP_SHMWRITE,XTERM); 7527 7528 case KEY_shutdown: 7529 LOP(OP_SHUTDOWN,XTERM); 7530 7531 case KEY_sin: 7532 UNI(OP_SIN); 7533 7534 case KEY_sleep: 7535 UNI(OP_SLEEP); 7536 7537 case KEY_socket: 7538 LOP(OP_SOCKET,XTERM); 7539 7540 case KEY_socketpair: 7541 LOP(OP_SOCKPAIR,XTERM); 7542 7543 case KEY_sort: 7544 checkcomma(s,PL_tokenbuf,"subroutine name"); 7545 s = SKIPSPACE1(s); 7546 if (*s == ';' || *s == ')') /* probably a close */ 7547 Perl_croak(aTHX_ "sort is now a reserved word"); 7548 PL_expect = XTERM; 7549 s = force_word(s,WORD,TRUE,TRUE,FALSE); 7550 LOP(OP_SORT,XREF); 7551 7552 case KEY_split: 7553 LOP(OP_SPLIT,XTERM); 7554 7555 case KEY_sprintf: 7556 LOP(OP_SPRINTF,XTERM); 7557 7558 case KEY_splice: 7559 LOP(OP_SPLICE,XTERM); 7560 7561 case KEY_sqrt: 7562 UNI(OP_SQRT); 7563 7564 case KEY_srand: 7565 UNI(OP_SRAND); 7566 7567 case KEY_stat: 7568 UNI(OP_STAT); 7569 7570 case KEY_study: 7571 UNI(OP_STUDY); 7572 7573 case KEY_substr: 7574 LOP(OP_SUBSTR,XTERM); 7575 7576 case KEY_format: 7577 case KEY_sub: 7578 really_sub: 7579 { 7580 char tmpbuf[sizeof PL_tokenbuf]; 7581 SSize_t tboffset = 0; 7582 expectation attrful; 7583 bool have_name, have_proto; 7584 const int key = tmp; 7585 7586 #ifdef PERL_MAD 7587 SV *tmpwhite = 0; 7588 7589 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart; 7590 SV *subtoken = newSVpvn(tstart, s - tstart); 7591 PL_thistoken = 0; 7592 7593 d = s; 7594 s = SKIPSPACE2(s,tmpwhite); 7595 #else 7596 s = skipspace(s); 7597 #endif 7598 7599 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' || 7600 (*s == ':' && s[1] == ':')) 7601 { 7602 #ifdef PERL_MAD 7603 SV *nametoke = NULL; 7604 #endif 7605 7606 PL_expect = XBLOCK; 7607 attrful = XATTRBLOCK; 7608 /* remember buffer pos'n for later force_word */ 7609 tboffset = s - PL_oldbufptr; 7610 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 7611 #ifdef PERL_MAD 7612 if (PL_madskills) 7613 nametoke = newSVpvn(s, d - s); 7614 #endif 7615 if (memchr(tmpbuf, ':', len)) 7616 sv_setpvn(PL_subname, tmpbuf, len); 7617 else { 7618 sv_setsv(PL_subname,PL_curstname); 7619 sv_catpvs(PL_subname,"::"); 7620 sv_catpvn(PL_subname,tmpbuf,len); 7621 } 7622 have_name = TRUE; 7623 7624 #ifdef PERL_MAD 7625 7626 start_force(0); 7627 CURMAD('X', nametoke); 7628 CURMAD('_', tmpwhite); 7629 (void) force_word(PL_oldbufptr + tboffset, WORD, 7630 FALSE, TRUE, TRUE); 7631 7632 s = SKIPSPACE2(d,tmpwhite); 7633 #else 7634 s = skipspace(d); 7635 #endif 7636 } 7637 else { 7638 if (key == KEY_my) 7639 Perl_croak(aTHX_ "Missing name in \"my sub\""); 7640 PL_expect = XTERMBLOCK; 7641 attrful = XATTRTERM; 7642 sv_setpvs(PL_subname,"?"); 7643 have_name = FALSE; 7644 } 7645 7646 if (key == KEY_format) { 7647 if (*s == '=') 7648 PL_lex_formbrack = PL_lex_brackets + 1; 7649 #ifdef PERL_MAD 7650 PL_thistoken = subtoken; 7651 s = d; 7652 #else 7653 if (have_name) 7654 (void) force_word(PL_oldbufptr + tboffset, WORD, 7655 FALSE, TRUE, TRUE); 7656 #endif 7657 OPERATOR(FORMAT); 7658 } 7659 7660 /* Look for a prototype */ 7661 if (*s == '(') { 7662 char *p; 7663 bool bad_proto = FALSE; 7664 bool in_brackets = FALSE; 7665 char greedy_proto = ' '; 7666 bool proto_after_greedy_proto = FALSE; 7667 bool must_be_last = FALSE; 7668 bool underscore = FALSE; 7669 bool seen_underscore = FALSE; 7670 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO); 7671 7672 s = scan_str(s,!!PL_madskills,FALSE); 7673 if (!s) 7674 Perl_croak(aTHX_ "Prototype not terminated"); 7675 /* strip spaces and check for bad characters */ 7676 d = SvPVX(PL_lex_stuff); 7677 tmp = 0; 7678 for (p = d; *p; ++p) { 7679 if (!isSPACE(*p)) { 7680 d[tmp++] = *p; 7681 7682 if (warnillegalproto) { 7683 if (must_be_last) 7684 proto_after_greedy_proto = TRUE; 7685 if (!strchr("$@%*;[]&\\_", *p)) { 7686 bad_proto = TRUE; 7687 } 7688 else { 7689 if ( underscore ) { 7690 if ( *p != ';' ) 7691 bad_proto = TRUE; 7692 underscore = FALSE; 7693 } 7694 if ( *p == '[' ) { 7695 in_brackets = TRUE; 7696 } 7697 else if ( *p == ']' ) { 7698 in_brackets = FALSE; 7699 } 7700 else if ( (*p == '@' || *p == '%') && 7701 ( tmp < 2 || d[tmp-2] != '\\' ) && 7702 !in_brackets ) { 7703 must_be_last = TRUE; 7704 greedy_proto = *p; 7705 } 7706 else if ( *p == '_' ) { 7707 underscore = seen_underscore = TRUE; 7708 } 7709 } 7710 } 7711 } 7712 } 7713 d[tmp] = '\0'; 7714 if (proto_after_greedy_proto) 7715 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 7716 "Prototype after '%c' for %"SVf" : %s", 7717 greedy_proto, SVfARG(PL_subname), d); 7718 if (bad_proto) 7719 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 7720 "Illegal character %sin prototype for %"SVf" : %s", 7721 seen_underscore ? "after '_' " : "", 7722 SVfARG(PL_subname), d); 7723 SvCUR_set(PL_lex_stuff, tmp); 7724 have_proto = TRUE; 7725 7726 #ifdef PERL_MAD 7727 start_force(0); 7728 CURMAD('q', PL_thisopen); 7729 CURMAD('_', tmpwhite); 7730 CURMAD('=', PL_thisstuff); 7731 CURMAD('Q', PL_thisclose); 7732 NEXTVAL_NEXTTOKE.opval = 7733 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff); 7734 PL_lex_stuff = NULL; 7735 force_next(THING); 7736 7737 s = SKIPSPACE2(s,tmpwhite); 7738 #else 7739 s = skipspace(s); 7740 #endif 7741 } 7742 else 7743 have_proto = FALSE; 7744 7745 if (*s == ':' && s[1] != ':') 7746 PL_expect = attrful; 7747 else if (*s != '{' && key == KEY_sub) { 7748 if (!have_name) 7749 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine"); 7750 else if (*s != ';' && *s != '}') 7751 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname)); 7752 } 7753 7754 #ifdef PERL_MAD 7755 start_force(0); 7756 if (tmpwhite) { 7757 if (PL_madskills) 7758 curmad('^', newSVpvs("")); 7759 CURMAD('_', tmpwhite); 7760 } 7761 force_next(0); 7762 7763 PL_thistoken = subtoken; 7764 #else 7765 if (have_proto) { 7766 NEXTVAL_NEXTTOKE.opval = 7767 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff); 7768 PL_lex_stuff = NULL; 7769 force_next(THING); 7770 } 7771 #endif 7772 if (!have_name) { 7773 if (PL_curstash) 7774 sv_setpvs(PL_subname, "__ANON__"); 7775 else 7776 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 7777 TOKEN(ANONSUB); 7778 } 7779 #ifndef PERL_MAD 7780 (void) force_word(PL_oldbufptr + tboffset, WORD, 7781 FALSE, TRUE, TRUE); 7782 #endif 7783 if (key == KEY_my) 7784 TOKEN(MYSUB); 7785 TOKEN(SUB); 7786 } 7787 7788 case KEY_system: 7789 LOP(OP_SYSTEM,XREF); 7790 7791 case KEY_symlink: 7792 LOP(OP_SYMLINK,XTERM); 7793 7794 case KEY_syscall: 7795 LOP(OP_SYSCALL,XTERM); 7796 7797 case KEY_sysopen: 7798 LOP(OP_SYSOPEN,XTERM); 7799 7800 case KEY_sysseek: 7801 LOP(OP_SYSSEEK,XTERM); 7802 7803 case KEY_sysread: 7804 LOP(OP_SYSREAD,XTERM); 7805 7806 case KEY_syswrite: 7807 LOP(OP_SYSWRITE,XTERM); 7808 7809 case KEY_tr: 7810 s = scan_trans(s); 7811 TERM(sublex_start()); 7812 7813 case KEY_tell: 7814 UNI(OP_TELL); 7815 7816 case KEY_telldir: 7817 UNI(OP_TELLDIR); 7818 7819 case KEY_tie: 7820 LOP(OP_TIE,XTERM); 7821 7822 case KEY_tied: 7823 UNI(OP_TIED); 7824 7825 case KEY_time: 7826 FUN0(OP_TIME); 7827 7828 case KEY_times: 7829 FUN0(OP_TMS); 7830 7831 case KEY_truncate: 7832 LOP(OP_TRUNCATE,XTERM); 7833 7834 case KEY_uc: 7835 UNI(OP_UC); 7836 7837 case KEY_ucfirst: 7838 UNI(OP_UCFIRST); 7839 7840 case KEY_untie: 7841 UNI(OP_UNTIE); 7842 7843 case KEY_until: 7844 pl_yylval.ival = CopLINE(PL_curcop); 7845 OPERATOR(UNTIL); 7846 7847 case KEY_unless: 7848 pl_yylval.ival = CopLINE(PL_curcop); 7849 OPERATOR(UNLESS); 7850 7851 case KEY_unlink: 7852 LOP(OP_UNLINK,XTERM); 7853 7854 case KEY_undef: 7855 UNIDOR(OP_UNDEF); 7856 7857 case KEY_unpack: 7858 LOP(OP_UNPACK,XTERM); 7859 7860 case KEY_utime: 7861 LOP(OP_UTIME,XTERM); 7862 7863 case KEY_umask: 7864 UNIDOR(OP_UMASK); 7865 7866 case KEY_unshift: 7867 LOP(OP_UNSHIFT,XTERM); 7868 7869 case KEY_use: 7870 s = tokenize_use(1, s); 7871 OPERATOR(USE); 7872 7873 case KEY_values: 7874 UNI(OP_VALUES); 7875 7876 case KEY_vec: 7877 LOP(OP_VEC,XTERM); 7878 7879 case KEY_when: 7880 pl_yylval.ival = CopLINE(PL_curcop); 7881 OPERATOR(WHEN); 7882 7883 case KEY_while: 7884 pl_yylval.ival = CopLINE(PL_curcop); 7885 OPERATOR(WHILE); 7886 7887 case KEY_warn: 7888 PL_hints |= HINT_BLOCK_SCOPE; 7889 LOP(OP_WARN,XTERM); 7890 7891 case KEY_wait: 7892 FUN0(OP_WAIT); 7893 7894 case KEY_waitpid: 7895 LOP(OP_WAITPID,XTERM); 7896 7897 case KEY_wantarray: 7898 FUN0(OP_WANTARRAY); 7899 7900 case KEY_write: 7901 #ifdef EBCDIC 7902 { 7903 char ctl_l[2]; 7904 ctl_l[0] = toCTRL('L'); 7905 ctl_l[1] = '\0'; 7906 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV); 7907 } 7908 #else 7909 /* Make sure $^L is defined */ 7910 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV); 7911 #endif 7912 UNI(OP_ENTERWRITE); 7913 7914 case KEY_x: 7915 if (PL_expect == XOPERATOR) 7916 Mop(OP_REPEAT); 7917 check_uni(); 7918 goto just_a_word; 7919 7920 case KEY_xor: 7921 pl_yylval.ival = OP_XOR; 7922 OPERATOR(OROP); 7923 7924 case KEY_y: 7925 s = scan_trans(s); 7926 TERM(sublex_start()); 7927 } 7928 }} 7929 } 7930 #ifdef __SC__ 7931 #pragma segment Main 7932 #endif 7933 7934 static int 7935 S_pending_ident(pTHX) 7936 { 7937 dVAR; 7938 register char *d; 7939 PADOFFSET tmp = 0; 7940 /* pit holds the identifier we read and pending_ident is reset */ 7941 char pit = PL_pending_ident; 7942 const STRLEN tokenbuf_len = strlen(PL_tokenbuf); 7943 /* All routes through this function want to know if there is a colon. */ 7944 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len); 7945 PL_pending_ident = 0; 7946 7947 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */ 7948 DEBUG_T({ PerlIO_printf(Perl_debug_log, 7949 "### Pending identifier '%s'\n", PL_tokenbuf); }); 7950 7951 /* if we're in a my(), we can't allow dynamics here. 7952 $foo'bar has already been turned into $foo::bar, so 7953 just check for colons. 7954 7955 if it's a legal name, the OP is a PADANY. 7956 */ 7957 if (PL_in_my) { 7958 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ 7959 if (has_colon) 7960 yyerror(Perl_form(aTHX_ "No package name allowed for " 7961 "variable %s in \"our\"", 7962 PL_tokenbuf)); 7963 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0); 7964 } 7965 else { 7966 if (has_colon) 7967 yyerror(Perl_form(aTHX_ PL_no_myglob, 7968 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf)); 7969 7970 pl_yylval.opval = newOP(OP_PADANY, 0); 7971 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0); 7972 return PRIVATEREF; 7973 } 7974 } 7975 7976 /* 7977 build the ops for accesses to a my() variable. 7978 7979 Deny my($a) or my($b) in a sort block, *if* $a or $b is 7980 then used in a comparison. This catches most, but not 7981 all cases. For instance, it catches 7982 sort { my($a); $a <=> $b } 7983 but not 7984 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; } 7985 (although why you'd do that is anyone's guess). 7986 */ 7987 7988 if (!has_colon) { 7989 if (!PL_in_my) 7990 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0); 7991 if (tmp != NOT_IN_PAD) { 7992 /* might be an "our" variable" */ 7993 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 7994 /* build ops for a bareword */ 7995 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 7996 HEK * const stashname = HvNAME_HEK(stash); 7997 SV * const sym = newSVhek(stashname); 7998 sv_catpvs(sym, "::"); 7999 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1); 8000 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym); 8001 pl_yylval.opval->op_private = OPpCONST_ENTERED; 8002 gv_fetchsv(sym, 8003 (PL_in_eval 8004 ? (GV_ADDMULTI | GV_ADDINEVAL) 8005 : GV_ADDMULTI 8006 ), 8007 ((PL_tokenbuf[0] == '$') ? SVt_PV 8008 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 8009 : SVt_PVHV)); 8010 return WORD; 8011 } 8012 8013 /* if it's a sort block and they're naming $a or $b */ 8014 if (PL_last_lop_op == OP_SORT && 8015 PL_tokenbuf[0] == '$' && 8016 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b') 8017 && !PL_tokenbuf[2]) 8018 { 8019 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart; 8020 d < PL_bufend && *d != '\n'; 8021 d++) 8022 { 8023 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) { 8024 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison", 8025 PL_tokenbuf); 8026 } 8027 } 8028 } 8029 8030 pl_yylval.opval = newOP(OP_PADANY, 0); 8031 pl_yylval.opval->op_targ = tmp; 8032 return PRIVATEREF; 8033 } 8034 } 8035 8036 /* 8037 Whine if they've said @foo in a doublequoted string, 8038 and @foo isn't a variable we can find in the symbol 8039 table. 8040 */ 8041 if (ckWARN(WARN_AMBIGUOUS) && 8042 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { 8043 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0, 8044 SVt_PVAV); 8045 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) 8046 /* DO NOT warn for @- and @+ */ 8047 && !( PL_tokenbuf[2] == '\0' && 8048 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' )) 8049 ) 8050 { 8051 /* Downgraded from fatal to warning 20000522 mjd */ 8052 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 8053 "Possible unintended interpolation of %s in string", 8054 PL_tokenbuf); 8055 } 8056 } 8057 8058 /* build ops for a bareword */ 8059 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1, 8060 tokenbuf_len - 1)); 8061 pl_yylval.opval->op_private = OPpCONST_ENTERED; 8062 gv_fetchpvn_flags( 8063 PL_tokenbuf + 1, tokenbuf_len - 1, 8064 /* If the identifier refers to a stash, don't autovivify it. 8065 * Change 24660 had the side effect of causing symbol table 8066 * hashes to always be defined, even if they were freshly 8067 * created and the only reference in the entire program was 8068 * the single statement with the defined %foo::bar:: test. 8069 * It appears that all code in the wild doing this actually 8070 * wants to know whether sub-packages have been loaded, so 8071 * by avoiding auto-vivifying symbol tables, we ensure that 8072 * defined %foo::bar:: continues to be false, and the existing 8073 * tests still give the expected answers, even though what 8074 * they're actually testing has now changed subtly. 8075 */ 8076 (*PL_tokenbuf == '%' 8077 && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':' 8078 && d[-1] == ':' 8079 ? 0 8080 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD), 8081 ((PL_tokenbuf[0] == '$') ? SVt_PV 8082 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 8083 : SVt_PVHV)); 8084 return WORD; 8085 } 8086 8087 /* 8088 * The following code was generated by perl_keyword.pl. 8089 */ 8090 8091 I32 8092 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords) 8093 { 8094 dVAR; 8095 8096 PERL_ARGS_ASSERT_KEYWORD; 8097 8098 switch (len) 8099 { 8100 case 1: /* 5 tokens of length 1 */ 8101 switch (name[0]) 8102 { 8103 case 'm': 8104 { /* m */ 8105 return KEY_m; 8106 } 8107 8108 case 'q': 8109 { /* q */ 8110 return KEY_q; 8111 } 8112 8113 case 's': 8114 { /* s */ 8115 return KEY_s; 8116 } 8117 8118 case 'x': 8119 { /* x */ 8120 return -KEY_x; 8121 } 8122 8123 case 'y': 8124 { /* y */ 8125 return KEY_y; 8126 } 8127 8128 default: 8129 goto unknown; 8130 } 8131 8132 case 2: /* 18 tokens of length 2 */ 8133 switch (name[0]) 8134 { 8135 case 'd': 8136 if (name[1] == 'o') 8137 { /* do */ 8138 return KEY_do; 8139 } 8140 8141 goto unknown; 8142 8143 case 'e': 8144 if (name[1] == 'q') 8145 { /* eq */ 8146 return -KEY_eq; 8147 } 8148 8149 goto unknown; 8150 8151 case 'g': 8152 switch (name[1]) 8153 { 8154 case 'e': 8155 { /* ge */ 8156 return -KEY_ge; 8157 } 8158 8159 case 't': 8160 { /* gt */ 8161 return -KEY_gt; 8162 } 8163 8164 default: 8165 goto unknown; 8166 } 8167 8168 case 'i': 8169 if (name[1] == 'f') 8170 { /* if */ 8171 return KEY_if; 8172 } 8173 8174 goto unknown; 8175 8176 case 'l': 8177 switch (name[1]) 8178 { 8179 case 'c': 8180 { /* lc */ 8181 return -KEY_lc; 8182 } 8183 8184 case 'e': 8185 { /* le */ 8186 return -KEY_le; 8187 } 8188 8189 case 't': 8190 { /* lt */ 8191 return -KEY_lt; 8192 } 8193 8194 default: 8195 goto unknown; 8196 } 8197 8198 case 'm': 8199 if (name[1] == 'y') 8200 { /* my */ 8201 return KEY_my; 8202 } 8203 8204 goto unknown; 8205 8206 case 'n': 8207 switch (name[1]) 8208 { 8209 case 'e': 8210 { /* ne */ 8211 return -KEY_ne; 8212 } 8213 8214 case 'o': 8215 { /* no */ 8216 return KEY_no; 8217 } 8218 8219 default: 8220 goto unknown; 8221 } 8222 8223 case 'o': 8224 if (name[1] == 'r') 8225 { /* or */ 8226 return -KEY_or; 8227 } 8228 8229 goto unknown; 8230 8231 case 'q': 8232 switch (name[1]) 8233 { 8234 case 'q': 8235 { /* qq */ 8236 return KEY_qq; 8237 } 8238 8239 case 'r': 8240 { /* qr */ 8241 return KEY_qr; 8242 } 8243 8244 case 'w': 8245 { /* qw */ 8246 return KEY_qw; 8247 } 8248 8249 case 'x': 8250 { /* qx */ 8251 return KEY_qx; 8252 } 8253 8254 default: 8255 goto unknown; 8256 } 8257 8258 case 't': 8259 if (name[1] == 'r') 8260 { /* tr */ 8261 return KEY_tr; 8262 } 8263 8264 goto unknown; 8265 8266 case 'u': 8267 if (name[1] == 'c') 8268 { /* uc */ 8269 return -KEY_uc; 8270 } 8271 8272 goto unknown; 8273 8274 default: 8275 goto unknown; 8276 } 8277 8278 case 3: /* 29 tokens of length 3 */ 8279 switch (name[0]) 8280 { 8281 case 'E': 8282 if (name[1] == 'N' && 8283 name[2] == 'D') 8284 { /* END */ 8285 return KEY_END; 8286 } 8287 8288 goto unknown; 8289 8290 case 'a': 8291 switch (name[1]) 8292 { 8293 case 'b': 8294 if (name[2] == 's') 8295 { /* abs */ 8296 return -KEY_abs; 8297 } 8298 8299 goto unknown; 8300 8301 case 'n': 8302 if (name[2] == 'd') 8303 { /* and */ 8304 return -KEY_and; 8305 } 8306 8307 goto unknown; 8308 8309 default: 8310 goto unknown; 8311 } 8312 8313 case 'c': 8314 switch (name[1]) 8315 { 8316 case 'h': 8317 if (name[2] == 'r') 8318 { /* chr */ 8319 return -KEY_chr; 8320 } 8321 8322 goto unknown; 8323 8324 case 'm': 8325 if (name[2] == 'p') 8326 { /* cmp */ 8327 return -KEY_cmp; 8328 } 8329 8330 goto unknown; 8331 8332 case 'o': 8333 if (name[2] == 's') 8334 { /* cos */ 8335 return -KEY_cos; 8336 } 8337 8338 goto unknown; 8339 8340 default: 8341 goto unknown; 8342 } 8343 8344 case 'd': 8345 if (name[1] == 'i' && 8346 name[2] == 'e') 8347 { /* die */ 8348 return -KEY_die; 8349 } 8350 8351 goto unknown; 8352 8353 case 'e': 8354 switch (name[1]) 8355 { 8356 case 'o': 8357 if (name[2] == 'f') 8358 { /* eof */ 8359 return -KEY_eof; 8360 } 8361 8362 goto unknown; 8363 8364 case 'x': 8365 if (name[2] == 'p') 8366 { /* exp */ 8367 return -KEY_exp; 8368 } 8369 8370 goto unknown; 8371 8372 default: 8373 goto unknown; 8374 } 8375 8376 case 'f': 8377 if (name[1] == 'o' && 8378 name[2] == 'r') 8379 { /* for */ 8380 return KEY_for; 8381 } 8382 8383 goto unknown; 8384 8385 case 'h': 8386 if (name[1] == 'e' && 8387 name[2] == 'x') 8388 { /* hex */ 8389 return -KEY_hex; 8390 } 8391 8392 goto unknown; 8393 8394 case 'i': 8395 if (name[1] == 'n' && 8396 name[2] == 't') 8397 { /* int */ 8398 return -KEY_int; 8399 } 8400 8401 goto unknown; 8402 8403 case 'l': 8404 if (name[1] == 'o' && 8405 name[2] == 'g') 8406 { /* log */ 8407 return -KEY_log; 8408 } 8409 8410 goto unknown; 8411 8412 case 'm': 8413 if (name[1] == 'a' && 8414 name[2] == 'p') 8415 { /* map */ 8416 return KEY_map; 8417 } 8418 8419 goto unknown; 8420 8421 case 'n': 8422 if (name[1] == 'o' && 8423 name[2] == 't') 8424 { /* not */ 8425 return -KEY_not; 8426 } 8427 8428 goto unknown; 8429 8430 case 'o': 8431 switch (name[1]) 8432 { 8433 case 'c': 8434 if (name[2] == 't') 8435 { /* oct */ 8436 return -KEY_oct; 8437 } 8438 8439 goto unknown; 8440 8441 case 'r': 8442 if (name[2] == 'd') 8443 { /* ord */ 8444 return -KEY_ord; 8445 } 8446 8447 goto unknown; 8448 8449 case 'u': 8450 if (name[2] == 'r') 8451 { /* our */ 8452 return KEY_our; 8453 } 8454 8455 goto unknown; 8456 8457 default: 8458 goto unknown; 8459 } 8460 8461 case 'p': 8462 if (name[1] == 'o') 8463 { 8464 switch (name[2]) 8465 { 8466 case 'p': 8467 { /* pop */ 8468 return -KEY_pop; 8469 } 8470 8471 case 's': 8472 { /* pos */ 8473 return KEY_pos; 8474 } 8475 8476 default: 8477 goto unknown; 8478 } 8479 } 8480 8481 goto unknown; 8482 8483 case 'r': 8484 if (name[1] == 'e' && 8485 name[2] == 'f') 8486 { /* ref */ 8487 return -KEY_ref; 8488 } 8489 8490 goto unknown; 8491 8492 case 's': 8493 switch (name[1]) 8494 { 8495 case 'a': 8496 if (name[2] == 'y') 8497 { /* say */ 8498 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0); 8499 } 8500 8501 goto unknown; 8502 8503 case 'i': 8504 if (name[2] == 'n') 8505 { /* sin */ 8506 return -KEY_sin; 8507 } 8508 8509 goto unknown; 8510 8511 case 'u': 8512 if (name[2] == 'b') 8513 { /* sub */ 8514 return KEY_sub; 8515 } 8516 8517 goto unknown; 8518 8519 default: 8520 goto unknown; 8521 } 8522 8523 case 't': 8524 if (name[1] == 'i' && 8525 name[2] == 'e') 8526 { /* tie */ 8527 return KEY_tie; 8528 } 8529 8530 goto unknown; 8531 8532 case 'u': 8533 if (name[1] == 's' && 8534 name[2] == 'e') 8535 { /* use */ 8536 return KEY_use; 8537 } 8538 8539 goto unknown; 8540 8541 case 'v': 8542 if (name[1] == 'e' && 8543 name[2] == 'c') 8544 { /* vec */ 8545 return -KEY_vec; 8546 } 8547 8548 goto unknown; 8549 8550 case 'x': 8551 if (name[1] == 'o' && 8552 name[2] == 'r') 8553 { /* xor */ 8554 return -KEY_xor; 8555 } 8556 8557 goto unknown; 8558 8559 default: 8560 goto unknown; 8561 } 8562 8563 case 4: /* 41 tokens of length 4 */ 8564 switch (name[0]) 8565 { 8566 case 'C': 8567 if (name[1] == 'O' && 8568 name[2] == 'R' && 8569 name[3] == 'E') 8570 { /* CORE */ 8571 return -KEY_CORE; 8572 } 8573 8574 goto unknown; 8575 8576 case 'I': 8577 if (name[1] == 'N' && 8578 name[2] == 'I' && 8579 name[3] == 'T') 8580 { /* INIT */ 8581 return KEY_INIT; 8582 } 8583 8584 goto unknown; 8585 8586 case 'b': 8587 if (name[1] == 'i' && 8588 name[2] == 'n' && 8589 name[3] == 'd') 8590 { /* bind */ 8591 return -KEY_bind; 8592 } 8593 8594 goto unknown; 8595 8596 case 'c': 8597 if (name[1] == 'h' && 8598 name[2] == 'o' && 8599 name[3] == 'p') 8600 { /* chop */ 8601 return -KEY_chop; 8602 } 8603 8604 goto unknown; 8605 8606 case 'd': 8607 if (name[1] == 'u' && 8608 name[2] == 'm' && 8609 name[3] == 'p') 8610 { /* dump */ 8611 return -KEY_dump; 8612 } 8613 8614 goto unknown; 8615 8616 case 'e': 8617 switch (name[1]) 8618 { 8619 case 'a': 8620 if (name[2] == 'c' && 8621 name[3] == 'h') 8622 { /* each */ 8623 return -KEY_each; 8624 } 8625 8626 goto unknown; 8627 8628 case 'l': 8629 if (name[2] == 's' && 8630 name[3] == 'e') 8631 { /* else */ 8632 return KEY_else; 8633 } 8634 8635 goto unknown; 8636 8637 case 'v': 8638 if (name[2] == 'a' && 8639 name[3] == 'l') 8640 { /* eval */ 8641 return KEY_eval; 8642 } 8643 8644 goto unknown; 8645 8646 case 'x': 8647 switch (name[2]) 8648 { 8649 case 'e': 8650 if (name[3] == 'c') 8651 { /* exec */ 8652 return -KEY_exec; 8653 } 8654 8655 goto unknown; 8656 8657 case 'i': 8658 if (name[3] == 't') 8659 { /* exit */ 8660 return -KEY_exit; 8661 } 8662 8663 goto unknown; 8664 8665 default: 8666 goto unknown; 8667 } 8668 8669 default: 8670 goto unknown; 8671 } 8672 8673 case 'f': 8674 if (name[1] == 'o' && 8675 name[2] == 'r' && 8676 name[3] == 'k') 8677 { /* fork */ 8678 return -KEY_fork; 8679 } 8680 8681 goto unknown; 8682 8683 case 'g': 8684 switch (name[1]) 8685 { 8686 case 'e': 8687 if (name[2] == 't' && 8688 name[3] == 'c') 8689 { /* getc */ 8690 return -KEY_getc; 8691 } 8692 8693 goto unknown; 8694 8695 case 'l': 8696 if (name[2] == 'o' && 8697 name[3] == 'b') 8698 { /* glob */ 8699 return KEY_glob; 8700 } 8701 8702 goto unknown; 8703 8704 case 'o': 8705 if (name[2] == 't' && 8706 name[3] == 'o') 8707 { /* goto */ 8708 return KEY_goto; 8709 } 8710 8711 goto unknown; 8712 8713 case 'r': 8714 if (name[2] == 'e' && 8715 name[3] == 'p') 8716 { /* grep */ 8717 return KEY_grep; 8718 } 8719 8720 goto unknown; 8721 8722 default: 8723 goto unknown; 8724 } 8725 8726 case 'j': 8727 if (name[1] == 'o' && 8728 name[2] == 'i' && 8729 name[3] == 'n') 8730 { /* join */ 8731 return -KEY_join; 8732 } 8733 8734 goto unknown; 8735 8736 case 'k': 8737 switch (name[1]) 8738 { 8739 case 'e': 8740 if (name[2] == 'y' && 8741 name[3] == 's') 8742 { /* keys */ 8743 return -KEY_keys; 8744 } 8745 8746 goto unknown; 8747 8748 case 'i': 8749 if (name[2] == 'l' && 8750 name[3] == 'l') 8751 { /* kill */ 8752 return -KEY_kill; 8753 } 8754 8755 goto unknown; 8756 8757 default: 8758 goto unknown; 8759 } 8760 8761 case 'l': 8762 switch (name[1]) 8763 { 8764 case 'a': 8765 if (name[2] == 's' && 8766 name[3] == 't') 8767 { /* last */ 8768 return KEY_last; 8769 } 8770 8771 goto unknown; 8772 8773 case 'i': 8774 if (name[2] == 'n' && 8775 name[3] == 'k') 8776 { /* link */ 8777 return -KEY_link; 8778 } 8779 8780 goto unknown; 8781 8782 case 'o': 8783 if (name[2] == 'c' && 8784 name[3] == 'k') 8785 { /* lock */ 8786 return -KEY_lock; 8787 } 8788 8789 goto unknown; 8790 8791 default: 8792 goto unknown; 8793 } 8794 8795 case 'n': 8796 if (name[1] == 'e' && 8797 name[2] == 'x' && 8798 name[3] == 't') 8799 { /* next */ 8800 return KEY_next; 8801 } 8802 8803 goto unknown; 8804 8805 case 'o': 8806 if (name[1] == 'p' && 8807 name[2] == 'e' && 8808 name[3] == 'n') 8809 { /* open */ 8810 return -KEY_open; 8811 } 8812 8813 goto unknown; 8814 8815 case 'p': 8816 switch (name[1]) 8817 { 8818 case 'a': 8819 if (name[2] == 'c' && 8820 name[3] == 'k') 8821 { /* pack */ 8822 return -KEY_pack; 8823 } 8824 8825 goto unknown; 8826 8827 case 'i': 8828 if (name[2] == 'p' && 8829 name[3] == 'e') 8830 { /* pipe */ 8831 return -KEY_pipe; 8832 } 8833 8834 goto unknown; 8835 8836 case 'u': 8837 if (name[2] == 's' && 8838 name[3] == 'h') 8839 { /* push */ 8840 return -KEY_push; 8841 } 8842 8843 goto unknown; 8844 8845 default: 8846 goto unknown; 8847 } 8848 8849 case 'r': 8850 switch (name[1]) 8851 { 8852 case 'a': 8853 if (name[2] == 'n' && 8854 name[3] == 'd') 8855 { /* rand */ 8856 return -KEY_rand; 8857 } 8858 8859 goto unknown; 8860 8861 case 'e': 8862 switch (name[2]) 8863 { 8864 case 'a': 8865 if (name[3] == 'd') 8866 { /* read */ 8867 return -KEY_read; 8868 } 8869 8870 goto unknown; 8871 8872 case 'c': 8873 if (name[3] == 'v') 8874 { /* recv */ 8875 return -KEY_recv; 8876 } 8877 8878 goto unknown; 8879 8880 case 'd': 8881 if (name[3] == 'o') 8882 { /* redo */ 8883 return KEY_redo; 8884 } 8885 8886 goto unknown; 8887 8888 default: 8889 goto unknown; 8890 } 8891 8892 default: 8893 goto unknown; 8894 } 8895 8896 case 's': 8897 switch (name[1]) 8898 { 8899 case 'e': 8900 switch (name[2]) 8901 { 8902 case 'e': 8903 if (name[3] == 'k') 8904 { /* seek */ 8905 return -KEY_seek; 8906 } 8907 8908 goto unknown; 8909 8910 case 'n': 8911 if (name[3] == 'd') 8912 { /* send */ 8913 return -KEY_send; 8914 } 8915 8916 goto unknown; 8917 8918 default: 8919 goto unknown; 8920 } 8921 8922 case 'o': 8923 if (name[2] == 'r' && 8924 name[3] == 't') 8925 { /* sort */ 8926 return KEY_sort; 8927 } 8928 8929 goto unknown; 8930 8931 case 'q': 8932 if (name[2] == 'r' && 8933 name[3] == 't') 8934 { /* sqrt */ 8935 return -KEY_sqrt; 8936 } 8937 8938 goto unknown; 8939 8940 case 't': 8941 if (name[2] == 'a' && 8942 name[3] == 't') 8943 { /* stat */ 8944 return -KEY_stat; 8945 } 8946 8947 goto unknown; 8948 8949 default: 8950 goto unknown; 8951 } 8952 8953 case 't': 8954 switch (name[1]) 8955 { 8956 case 'e': 8957 if (name[2] == 'l' && 8958 name[3] == 'l') 8959 { /* tell */ 8960 return -KEY_tell; 8961 } 8962 8963 goto unknown; 8964 8965 case 'i': 8966 switch (name[2]) 8967 { 8968 case 'e': 8969 if (name[3] == 'd') 8970 { /* tied */ 8971 return KEY_tied; 8972 } 8973 8974 goto unknown; 8975 8976 case 'm': 8977 if (name[3] == 'e') 8978 { /* time */ 8979 return -KEY_time; 8980 } 8981 8982 goto unknown; 8983 8984 default: 8985 goto unknown; 8986 } 8987 8988 default: 8989 goto unknown; 8990 } 8991 8992 case 'w': 8993 switch (name[1]) 8994 { 8995 case 'a': 8996 switch (name[2]) 8997 { 8998 case 'i': 8999 if (name[3] == 't') 9000 { /* wait */ 9001 return -KEY_wait; 9002 } 9003 9004 goto unknown; 9005 9006 case 'r': 9007 if (name[3] == 'n') 9008 { /* warn */ 9009 return -KEY_warn; 9010 } 9011 9012 goto unknown; 9013 9014 default: 9015 goto unknown; 9016 } 9017 9018 case 'h': 9019 if (name[2] == 'e' && 9020 name[3] == 'n') 9021 { /* when */ 9022 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0); 9023 } 9024 9025 goto unknown; 9026 9027 default: 9028 goto unknown; 9029 } 9030 9031 default: 9032 goto unknown; 9033 } 9034 9035 case 5: /* 39 tokens of length 5 */ 9036 switch (name[0]) 9037 { 9038 case 'B': 9039 if (name[1] == 'E' && 9040 name[2] == 'G' && 9041 name[3] == 'I' && 9042 name[4] == 'N') 9043 { /* BEGIN */ 9044 return KEY_BEGIN; 9045 } 9046 9047 goto unknown; 9048 9049 case 'C': 9050 if (name[1] == 'H' && 9051 name[2] == 'E' && 9052 name[3] == 'C' && 9053 name[4] == 'K') 9054 { /* CHECK */ 9055 return KEY_CHECK; 9056 } 9057 9058 goto unknown; 9059 9060 case 'a': 9061 switch (name[1]) 9062 { 9063 case 'l': 9064 if (name[2] == 'a' && 9065 name[3] == 'r' && 9066 name[4] == 'm') 9067 { /* alarm */ 9068 return -KEY_alarm; 9069 } 9070 9071 goto unknown; 9072 9073 case 't': 9074 if (name[2] == 'a' && 9075 name[3] == 'n' && 9076 name[4] == '2') 9077 { /* atan2 */ 9078 return -KEY_atan2; 9079 } 9080 9081 goto unknown; 9082 9083 default: 9084 goto unknown; 9085 } 9086 9087 case 'b': 9088 switch (name[1]) 9089 { 9090 case 'l': 9091 if (name[2] == 'e' && 9092 name[3] == 's' && 9093 name[4] == 's') 9094 { /* bless */ 9095 return -KEY_bless; 9096 } 9097 9098 goto unknown; 9099 9100 case 'r': 9101 if (name[2] == 'e' && 9102 name[3] == 'a' && 9103 name[4] == 'k') 9104 { /* break */ 9105 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0); 9106 } 9107 9108 goto unknown; 9109 9110 default: 9111 goto unknown; 9112 } 9113 9114 case 'c': 9115 switch (name[1]) 9116 { 9117 case 'h': 9118 switch (name[2]) 9119 { 9120 case 'd': 9121 if (name[3] == 'i' && 9122 name[4] == 'r') 9123 { /* chdir */ 9124 return -KEY_chdir; 9125 } 9126 9127 goto unknown; 9128 9129 case 'm': 9130 if (name[3] == 'o' && 9131 name[4] == 'd') 9132 { /* chmod */ 9133 return -KEY_chmod; 9134 } 9135 9136 goto unknown; 9137 9138 case 'o': 9139 switch (name[3]) 9140 { 9141 case 'm': 9142 if (name[4] == 'p') 9143 { /* chomp */ 9144 return -KEY_chomp; 9145 } 9146 9147 goto unknown; 9148 9149 case 'w': 9150 if (name[4] == 'n') 9151 { /* chown */ 9152 return -KEY_chown; 9153 } 9154 9155 goto unknown; 9156 9157 default: 9158 goto unknown; 9159 } 9160 9161 default: 9162 goto unknown; 9163 } 9164 9165 case 'l': 9166 if (name[2] == 'o' && 9167 name[3] == 's' && 9168 name[4] == 'e') 9169 { /* close */ 9170 return -KEY_close; 9171 } 9172 9173 goto unknown; 9174 9175 case 'r': 9176 if (name[2] == 'y' && 9177 name[3] == 'p' && 9178 name[4] == 't') 9179 { /* crypt */ 9180 return -KEY_crypt; 9181 } 9182 9183 goto unknown; 9184 9185 default: 9186 goto unknown; 9187 } 9188 9189 case 'e': 9190 if (name[1] == 'l' && 9191 name[2] == 's' && 9192 name[3] == 'i' && 9193 name[4] == 'f') 9194 { /* elsif */ 9195 return KEY_elsif; 9196 } 9197 9198 goto unknown; 9199 9200 case 'f': 9201 switch (name[1]) 9202 { 9203 case 'c': 9204 if (name[2] == 'n' && 9205 name[3] == 't' && 9206 name[4] == 'l') 9207 { /* fcntl */ 9208 return -KEY_fcntl; 9209 } 9210 9211 goto unknown; 9212 9213 case 'l': 9214 if (name[2] == 'o' && 9215 name[3] == 'c' && 9216 name[4] == 'k') 9217 { /* flock */ 9218 return -KEY_flock; 9219 } 9220 9221 goto unknown; 9222 9223 default: 9224 goto unknown; 9225 } 9226 9227 case 'g': 9228 if (name[1] == 'i' && 9229 name[2] == 'v' && 9230 name[3] == 'e' && 9231 name[4] == 'n') 9232 { /* given */ 9233 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0); 9234 } 9235 9236 goto unknown; 9237 9238 case 'i': 9239 switch (name[1]) 9240 { 9241 case 'n': 9242 if (name[2] == 'd' && 9243 name[3] == 'e' && 9244 name[4] == 'x') 9245 { /* index */ 9246 return -KEY_index; 9247 } 9248 9249 goto unknown; 9250 9251 case 'o': 9252 if (name[2] == 'c' && 9253 name[3] == 't' && 9254 name[4] == 'l') 9255 { /* ioctl */ 9256 return -KEY_ioctl; 9257 } 9258 9259 goto unknown; 9260 9261 default: 9262 goto unknown; 9263 } 9264 9265 case 'l': 9266 switch (name[1]) 9267 { 9268 case 'o': 9269 if (name[2] == 'c' && 9270 name[3] == 'a' && 9271 name[4] == 'l') 9272 { /* local */ 9273 return KEY_local; 9274 } 9275 9276 goto unknown; 9277 9278 case 's': 9279 if (name[2] == 't' && 9280 name[3] == 'a' && 9281 name[4] == 't') 9282 { /* lstat */ 9283 return -KEY_lstat; 9284 } 9285 9286 goto unknown; 9287 9288 default: 9289 goto unknown; 9290 } 9291 9292 case 'm': 9293 if (name[1] == 'k' && 9294 name[2] == 'd' && 9295 name[3] == 'i' && 9296 name[4] == 'r') 9297 { /* mkdir */ 9298 return -KEY_mkdir; 9299 } 9300 9301 goto unknown; 9302 9303 case 'p': 9304 if (name[1] == 'r' && 9305 name[2] == 'i' && 9306 name[3] == 'n' && 9307 name[4] == 't') 9308 { /* print */ 9309 return KEY_print; 9310 } 9311 9312 goto unknown; 9313 9314 case 'r': 9315 switch (name[1]) 9316 { 9317 case 'e': 9318 if (name[2] == 's' && 9319 name[3] == 'e' && 9320 name[4] == 't') 9321 { /* reset */ 9322 return -KEY_reset; 9323 } 9324 9325 goto unknown; 9326 9327 case 'm': 9328 if (name[2] == 'd' && 9329 name[3] == 'i' && 9330 name[4] == 'r') 9331 { /* rmdir */ 9332 return -KEY_rmdir; 9333 } 9334 9335 goto unknown; 9336 9337 default: 9338 goto unknown; 9339 } 9340 9341 case 's': 9342 switch (name[1]) 9343 { 9344 case 'e': 9345 if (name[2] == 'm' && 9346 name[3] == 'o' && 9347 name[4] == 'p') 9348 { /* semop */ 9349 return -KEY_semop; 9350 } 9351 9352 goto unknown; 9353 9354 case 'h': 9355 if (name[2] == 'i' && 9356 name[3] == 'f' && 9357 name[4] == 't') 9358 { /* shift */ 9359 return -KEY_shift; 9360 } 9361 9362 goto unknown; 9363 9364 case 'l': 9365 if (name[2] == 'e' && 9366 name[3] == 'e' && 9367 name[4] == 'p') 9368 { /* sleep */ 9369 return -KEY_sleep; 9370 } 9371 9372 goto unknown; 9373 9374 case 'p': 9375 if (name[2] == 'l' && 9376 name[3] == 'i' && 9377 name[4] == 't') 9378 { /* split */ 9379 return KEY_split; 9380 } 9381 9382 goto unknown; 9383 9384 case 'r': 9385 if (name[2] == 'a' && 9386 name[3] == 'n' && 9387 name[4] == 'd') 9388 { /* srand */ 9389 return -KEY_srand; 9390 } 9391 9392 goto unknown; 9393 9394 case 't': 9395 switch (name[2]) 9396 { 9397 case 'a': 9398 if (name[3] == 't' && 9399 name[4] == 'e') 9400 { /* state */ 9401 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0); 9402 } 9403 9404 goto unknown; 9405 9406 case 'u': 9407 if (name[3] == 'd' && 9408 name[4] == 'y') 9409 { /* study */ 9410 return KEY_study; 9411 } 9412 9413 goto unknown; 9414 9415 default: 9416 goto unknown; 9417 } 9418 9419 default: 9420 goto unknown; 9421 } 9422 9423 case 't': 9424 if (name[1] == 'i' && 9425 name[2] == 'm' && 9426 name[3] == 'e' && 9427 name[4] == 's') 9428 { /* times */ 9429 return -KEY_times; 9430 } 9431 9432 goto unknown; 9433 9434 case 'u': 9435 switch (name[1]) 9436 { 9437 case 'm': 9438 if (name[2] == 'a' && 9439 name[3] == 's' && 9440 name[4] == 'k') 9441 { /* umask */ 9442 return -KEY_umask; 9443 } 9444 9445 goto unknown; 9446 9447 case 'n': 9448 switch (name[2]) 9449 { 9450 case 'd': 9451 if (name[3] == 'e' && 9452 name[4] == 'f') 9453 { /* undef */ 9454 return KEY_undef; 9455 } 9456 9457 goto unknown; 9458 9459 case 't': 9460 if (name[3] == 'i') 9461 { 9462 switch (name[4]) 9463 { 9464 case 'e': 9465 { /* untie */ 9466 return KEY_untie; 9467 } 9468 9469 case 'l': 9470 { /* until */ 9471 return KEY_until; 9472 } 9473 9474 default: 9475 goto unknown; 9476 } 9477 } 9478 9479 goto unknown; 9480 9481 default: 9482 goto unknown; 9483 } 9484 9485 case 't': 9486 if (name[2] == 'i' && 9487 name[3] == 'm' && 9488 name[4] == 'e') 9489 { /* utime */ 9490 return -KEY_utime; 9491 } 9492 9493 goto unknown; 9494 9495 default: 9496 goto unknown; 9497 } 9498 9499 case 'w': 9500 switch (name[1]) 9501 { 9502 case 'h': 9503 if (name[2] == 'i' && 9504 name[3] == 'l' && 9505 name[4] == 'e') 9506 { /* while */ 9507 return KEY_while; 9508 } 9509 9510 goto unknown; 9511 9512 case 'r': 9513 if (name[2] == 'i' && 9514 name[3] == 't' && 9515 name[4] == 'e') 9516 { /* write */ 9517 return -KEY_write; 9518 } 9519 9520 goto unknown; 9521 9522 default: 9523 goto unknown; 9524 } 9525 9526 default: 9527 goto unknown; 9528 } 9529 9530 case 6: /* 33 tokens of length 6 */ 9531 switch (name[0]) 9532 { 9533 case 'a': 9534 if (name[1] == 'c' && 9535 name[2] == 'c' && 9536 name[3] == 'e' && 9537 name[4] == 'p' && 9538 name[5] == 't') 9539 { /* accept */ 9540 return -KEY_accept; 9541 } 9542 9543 goto unknown; 9544 9545 case 'c': 9546 switch (name[1]) 9547 { 9548 case 'a': 9549 if (name[2] == 'l' && 9550 name[3] == 'l' && 9551 name[4] == 'e' && 9552 name[5] == 'r') 9553 { /* caller */ 9554 return -KEY_caller; 9555 } 9556 9557 goto unknown; 9558 9559 case 'h': 9560 if (name[2] == 'r' && 9561 name[3] == 'o' && 9562 name[4] == 'o' && 9563 name[5] == 't') 9564 { /* chroot */ 9565 return -KEY_chroot; 9566 } 9567 9568 goto unknown; 9569 9570 default: 9571 goto unknown; 9572 } 9573 9574 case 'd': 9575 if (name[1] == 'e' && 9576 name[2] == 'l' && 9577 name[3] == 'e' && 9578 name[4] == 't' && 9579 name[5] == 'e') 9580 { /* delete */ 9581 return KEY_delete; 9582 } 9583 9584 goto unknown; 9585 9586 case 'e': 9587 switch (name[1]) 9588 { 9589 case 'l': 9590 if (name[2] == 's' && 9591 name[3] == 'e' && 9592 name[4] == 'i' && 9593 name[5] == 'f') 9594 { /* elseif */ 9595 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif"); 9596 } 9597 9598 goto unknown; 9599 9600 case 'x': 9601 if (name[2] == 'i' && 9602 name[3] == 's' && 9603 name[4] == 't' && 9604 name[5] == 's') 9605 { /* exists */ 9606 return KEY_exists; 9607 } 9608 9609 goto unknown; 9610 9611 default: 9612 goto unknown; 9613 } 9614 9615 case 'f': 9616 switch (name[1]) 9617 { 9618 case 'i': 9619 if (name[2] == 'l' && 9620 name[3] == 'e' && 9621 name[4] == 'n' && 9622 name[5] == 'o') 9623 { /* fileno */ 9624 return -KEY_fileno; 9625 } 9626 9627 goto unknown; 9628 9629 case 'o': 9630 if (name[2] == 'r' && 9631 name[3] == 'm' && 9632 name[4] == 'a' && 9633 name[5] == 't') 9634 { /* format */ 9635 return KEY_format; 9636 } 9637 9638 goto unknown; 9639 9640 default: 9641 goto unknown; 9642 } 9643 9644 case 'g': 9645 if (name[1] == 'm' && 9646 name[2] == 't' && 9647 name[3] == 'i' && 9648 name[4] == 'm' && 9649 name[5] == 'e') 9650 { /* gmtime */ 9651 return -KEY_gmtime; 9652 } 9653 9654 goto unknown; 9655 9656 case 'l': 9657 switch (name[1]) 9658 { 9659 case 'e': 9660 if (name[2] == 'n' && 9661 name[3] == 'g' && 9662 name[4] == 't' && 9663 name[5] == 'h') 9664 { /* length */ 9665 return -KEY_length; 9666 } 9667 9668 goto unknown; 9669 9670 case 'i': 9671 if (name[2] == 's' && 9672 name[3] == 't' && 9673 name[4] == 'e' && 9674 name[5] == 'n') 9675 { /* listen */ 9676 return -KEY_listen; 9677 } 9678 9679 goto unknown; 9680 9681 default: 9682 goto unknown; 9683 } 9684 9685 case 'm': 9686 if (name[1] == 's' && 9687 name[2] == 'g') 9688 { 9689 switch (name[3]) 9690 { 9691 case 'c': 9692 if (name[4] == 't' && 9693 name[5] == 'l') 9694 { /* msgctl */ 9695 return -KEY_msgctl; 9696 } 9697 9698 goto unknown; 9699 9700 case 'g': 9701 if (name[4] == 'e' && 9702 name[5] == 't') 9703 { /* msgget */ 9704 return -KEY_msgget; 9705 } 9706 9707 goto unknown; 9708 9709 case 'r': 9710 if (name[4] == 'c' && 9711 name[5] == 'v') 9712 { /* msgrcv */ 9713 return -KEY_msgrcv; 9714 } 9715 9716 goto unknown; 9717 9718 case 's': 9719 if (name[4] == 'n' && 9720 name[5] == 'd') 9721 { /* msgsnd */ 9722 return -KEY_msgsnd; 9723 } 9724 9725 goto unknown; 9726 9727 default: 9728 goto unknown; 9729 } 9730 } 9731 9732 goto unknown; 9733 9734 case 'p': 9735 if (name[1] == 'r' && 9736 name[2] == 'i' && 9737 name[3] == 'n' && 9738 name[4] == 't' && 9739 name[5] == 'f') 9740 { /* printf */ 9741 return KEY_printf; 9742 } 9743 9744 goto unknown; 9745 9746 case 'r': 9747 switch (name[1]) 9748 { 9749 case 'e': 9750 switch (name[2]) 9751 { 9752 case 'n': 9753 if (name[3] == 'a' && 9754 name[4] == 'm' && 9755 name[5] == 'e') 9756 { /* rename */ 9757 return -KEY_rename; 9758 } 9759 9760 goto unknown; 9761 9762 case 't': 9763 if (name[3] == 'u' && 9764 name[4] == 'r' && 9765 name[5] == 'n') 9766 { /* return */ 9767 return KEY_return; 9768 } 9769 9770 goto unknown; 9771 9772 default: 9773 goto unknown; 9774 } 9775 9776 case 'i': 9777 if (name[2] == 'n' && 9778 name[3] == 'd' && 9779 name[4] == 'e' && 9780 name[5] == 'x') 9781 { /* rindex */ 9782 return -KEY_rindex; 9783 } 9784 9785 goto unknown; 9786 9787 default: 9788 goto unknown; 9789 } 9790 9791 case 's': 9792 switch (name[1]) 9793 { 9794 case 'c': 9795 if (name[2] == 'a' && 9796 name[3] == 'l' && 9797 name[4] == 'a' && 9798 name[5] == 'r') 9799 { /* scalar */ 9800 return KEY_scalar; 9801 } 9802 9803 goto unknown; 9804 9805 case 'e': 9806 switch (name[2]) 9807 { 9808 case 'l': 9809 if (name[3] == 'e' && 9810 name[4] == 'c' && 9811 name[5] == 't') 9812 { /* select */ 9813 return -KEY_select; 9814 } 9815 9816 goto unknown; 9817 9818 case 'm': 9819 switch (name[3]) 9820 { 9821 case 'c': 9822 if (name[4] == 't' && 9823 name[5] == 'l') 9824 { /* semctl */ 9825 return -KEY_semctl; 9826 } 9827 9828 goto unknown; 9829 9830 case 'g': 9831 if (name[4] == 'e' && 9832 name[5] == 't') 9833 { /* semget */ 9834 return -KEY_semget; 9835 } 9836 9837 goto unknown; 9838 9839 default: 9840 goto unknown; 9841 } 9842 9843 default: 9844 goto unknown; 9845 } 9846 9847 case 'h': 9848 if (name[2] == 'm') 9849 { 9850 switch (name[3]) 9851 { 9852 case 'c': 9853 if (name[4] == 't' && 9854 name[5] == 'l') 9855 { /* shmctl */ 9856 return -KEY_shmctl; 9857 } 9858 9859 goto unknown; 9860 9861 case 'g': 9862 if (name[4] == 'e' && 9863 name[5] == 't') 9864 { /* shmget */ 9865 return -KEY_shmget; 9866 } 9867 9868 goto unknown; 9869 9870 default: 9871 goto unknown; 9872 } 9873 } 9874 9875 goto unknown; 9876 9877 case 'o': 9878 if (name[2] == 'c' && 9879 name[3] == 'k' && 9880 name[4] == 'e' && 9881 name[5] == 't') 9882 { /* socket */ 9883 return -KEY_socket; 9884 } 9885 9886 goto unknown; 9887 9888 case 'p': 9889 if (name[2] == 'l' && 9890 name[3] == 'i' && 9891 name[4] == 'c' && 9892 name[5] == 'e') 9893 { /* splice */ 9894 return -KEY_splice; 9895 } 9896 9897 goto unknown; 9898 9899 case 'u': 9900 if (name[2] == 'b' && 9901 name[3] == 's' && 9902 name[4] == 't' && 9903 name[5] == 'r') 9904 { /* substr */ 9905 return -KEY_substr; 9906 } 9907 9908 goto unknown; 9909 9910 case 'y': 9911 if (name[2] == 's' && 9912 name[3] == 't' && 9913 name[4] == 'e' && 9914 name[5] == 'm') 9915 { /* system */ 9916 return -KEY_system; 9917 } 9918 9919 goto unknown; 9920 9921 default: 9922 goto unknown; 9923 } 9924 9925 case 'u': 9926 if (name[1] == 'n') 9927 { 9928 switch (name[2]) 9929 { 9930 case 'l': 9931 switch (name[3]) 9932 { 9933 case 'e': 9934 if (name[4] == 's' && 9935 name[5] == 's') 9936 { /* unless */ 9937 return KEY_unless; 9938 } 9939 9940 goto unknown; 9941 9942 case 'i': 9943 if (name[4] == 'n' && 9944 name[5] == 'k') 9945 { /* unlink */ 9946 return -KEY_unlink; 9947 } 9948 9949 goto unknown; 9950 9951 default: 9952 goto unknown; 9953 } 9954 9955 case 'p': 9956 if (name[3] == 'a' && 9957 name[4] == 'c' && 9958 name[5] == 'k') 9959 { /* unpack */ 9960 return -KEY_unpack; 9961 } 9962 9963 goto unknown; 9964 9965 default: 9966 goto unknown; 9967 } 9968 } 9969 9970 goto unknown; 9971 9972 case 'v': 9973 if (name[1] == 'a' && 9974 name[2] == 'l' && 9975 name[3] == 'u' && 9976 name[4] == 'e' && 9977 name[5] == 's') 9978 { /* values */ 9979 return -KEY_values; 9980 } 9981 9982 goto unknown; 9983 9984 default: 9985 goto unknown; 9986 } 9987 9988 case 7: /* 29 tokens of length 7 */ 9989 switch (name[0]) 9990 { 9991 case 'D': 9992 if (name[1] == 'E' && 9993 name[2] == 'S' && 9994 name[3] == 'T' && 9995 name[4] == 'R' && 9996 name[5] == 'O' && 9997 name[6] == 'Y') 9998 { /* DESTROY */ 9999 return KEY_DESTROY; 10000 } 10001 10002 goto unknown; 10003 10004 case '_': 10005 if (name[1] == '_' && 10006 name[2] == 'E' && 10007 name[3] == 'N' && 10008 name[4] == 'D' && 10009 name[5] == '_' && 10010 name[6] == '_') 10011 { /* __END__ */ 10012 return KEY___END__; 10013 } 10014 10015 goto unknown; 10016 10017 case 'b': 10018 if (name[1] == 'i' && 10019 name[2] == 'n' && 10020 name[3] == 'm' && 10021 name[4] == 'o' && 10022 name[5] == 'd' && 10023 name[6] == 'e') 10024 { /* binmode */ 10025 return -KEY_binmode; 10026 } 10027 10028 goto unknown; 10029 10030 case 'c': 10031 if (name[1] == 'o' && 10032 name[2] == 'n' && 10033 name[3] == 'n' && 10034 name[4] == 'e' && 10035 name[5] == 'c' && 10036 name[6] == 't') 10037 { /* connect */ 10038 return -KEY_connect; 10039 } 10040 10041 goto unknown; 10042 10043 case 'd': 10044 switch (name[1]) 10045 { 10046 case 'b': 10047 if (name[2] == 'm' && 10048 name[3] == 'o' && 10049 name[4] == 'p' && 10050 name[5] == 'e' && 10051 name[6] == 'n') 10052 { /* dbmopen */ 10053 return -KEY_dbmopen; 10054 } 10055 10056 goto unknown; 10057 10058 case 'e': 10059 if (name[2] == 'f') 10060 { 10061 switch (name[3]) 10062 { 10063 case 'a': 10064 if (name[4] == 'u' && 10065 name[5] == 'l' && 10066 name[6] == 't') 10067 { /* default */ 10068 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0); 10069 } 10070 10071 goto unknown; 10072 10073 case 'i': 10074 if (name[4] == 'n' && 10075 name[5] == 'e' && 10076 name[6] == 'd') 10077 { /* defined */ 10078 return KEY_defined; 10079 } 10080 10081 goto unknown; 10082 10083 default: 10084 goto unknown; 10085 } 10086 } 10087 10088 goto unknown; 10089 10090 default: 10091 goto unknown; 10092 } 10093 10094 case 'f': 10095 if (name[1] == 'o' && 10096 name[2] == 'r' && 10097 name[3] == 'e' && 10098 name[4] == 'a' && 10099 name[5] == 'c' && 10100 name[6] == 'h') 10101 { /* foreach */ 10102 return KEY_foreach; 10103 } 10104 10105 goto unknown; 10106 10107 case 'g': 10108 if (name[1] == 'e' && 10109 name[2] == 't' && 10110 name[3] == 'p') 10111 { 10112 switch (name[4]) 10113 { 10114 case 'g': 10115 if (name[5] == 'r' && 10116 name[6] == 'p') 10117 { /* getpgrp */ 10118 return -KEY_getpgrp; 10119 } 10120 10121 goto unknown; 10122 10123 case 'p': 10124 if (name[5] == 'i' && 10125 name[6] == 'd') 10126 { /* getppid */ 10127 return -KEY_getppid; 10128 } 10129 10130 goto unknown; 10131 10132 default: 10133 goto unknown; 10134 } 10135 } 10136 10137 goto unknown; 10138 10139 case 'l': 10140 if (name[1] == 'c' && 10141 name[2] == 'f' && 10142 name[3] == 'i' && 10143 name[4] == 'r' && 10144 name[5] == 's' && 10145 name[6] == 't') 10146 { /* lcfirst */ 10147 return -KEY_lcfirst; 10148 } 10149 10150 goto unknown; 10151 10152 case 'o': 10153 if (name[1] == 'p' && 10154 name[2] == 'e' && 10155 name[3] == 'n' && 10156 name[4] == 'd' && 10157 name[5] == 'i' && 10158 name[6] == 'r') 10159 { /* opendir */ 10160 return -KEY_opendir; 10161 } 10162 10163 goto unknown; 10164 10165 case 'p': 10166 if (name[1] == 'a' && 10167 name[2] == 'c' && 10168 name[3] == 'k' && 10169 name[4] == 'a' && 10170 name[5] == 'g' && 10171 name[6] == 'e') 10172 { /* package */ 10173 return KEY_package; 10174 } 10175 10176 goto unknown; 10177 10178 case 'r': 10179 if (name[1] == 'e') 10180 { 10181 switch (name[2]) 10182 { 10183 case 'a': 10184 if (name[3] == 'd' && 10185 name[4] == 'd' && 10186 name[5] == 'i' && 10187 name[6] == 'r') 10188 { /* readdir */ 10189 return -KEY_readdir; 10190 } 10191 10192 goto unknown; 10193 10194 case 'q': 10195 if (name[3] == 'u' && 10196 name[4] == 'i' && 10197 name[5] == 'r' && 10198 name[6] == 'e') 10199 { /* require */ 10200 return KEY_require; 10201 } 10202 10203 goto unknown; 10204 10205 case 'v': 10206 if (name[3] == 'e' && 10207 name[4] == 'r' && 10208 name[5] == 's' && 10209 name[6] == 'e') 10210 { /* reverse */ 10211 return -KEY_reverse; 10212 } 10213 10214 goto unknown; 10215 10216 default: 10217 goto unknown; 10218 } 10219 } 10220 10221 goto unknown; 10222 10223 case 's': 10224 switch (name[1]) 10225 { 10226 case 'e': 10227 switch (name[2]) 10228 { 10229 case 'e': 10230 if (name[3] == 'k' && 10231 name[4] == 'd' && 10232 name[5] == 'i' && 10233 name[6] == 'r') 10234 { /* seekdir */ 10235 return -KEY_seekdir; 10236 } 10237 10238 goto unknown; 10239 10240 case 't': 10241 if (name[3] == 'p' && 10242 name[4] == 'g' && 10243 name[5] == 'r' && 10244 name[6] == 'p') 10245 { /* setpgrp */ 10246 return -KEY_setpgrp; 10247 } 10248 10249 goto unknown; 10250 10251 default: 10252 goto unknown; 10253 } 10254 10255 case 'h': 10256 if (name[2] == 'm' && 10257 name[3] == 'r' && 10258 name[4] == 'e' && 10259 name[5] == 'a' && 10260 name[6] == 'd') 10261 { /* shmread */ 10262 return -KEY_shmread; 10263 } 10264 10265 goto unknown; 10266 10267 case 'p': 10268 if (name[2] == 'r' && 10269 name[3] == 'i' && 10270 name[4] == 'n' && 10271 name[5] == 't' && 10272 name[6] == 'f') 10273 { /* sprintf */ 10274 return -KEY_sprintf; 10275 } 10276 10277 goto unknown; 10278 10279 case 'y': 10280 switch (name[2]) 10281 { 10282 case 'm': 10283 if (name[3] == 'l' && 10284 name[4] == 'i' && 10285 name[5] == 'n' && 10286 name[6] == 'k') 10287 { /* symlink */ 10288 return -KEY_symlink; 10289 } 10290 10291 goto unknown; 10292 10293 case 's': 10294 switch (name[3]) 10295 { 10296 case 'c': 10297 if (name[4] == 'a' && 10298 name[5] == 'l' && 10299 name[6] == 'l') 10300 { /* syscall */ 10301 return -KEY_syscall; 10302 } 10303 10304 goto unknown; 10305 10306 case 'o': 10307 if (name[4] == 'p' && 10308 name[5] == 'e' && 10309 name[6] == 'n') 10310 { /* sysopen */ 10311 return -KEY_sysopen; 10312 } 10313 10314 goto unknown; 10315 10316 case 'r': 10317 if (name[4] == 'e' && 10318 name[5] == 'a' && 10319 name[6] == 'd') 10320 { /* sysread */ 10321 return -KEY_sysread; 10322 } 10323 10324 goto unknown; 10325 10326 case 's': 10327 if (name[4] == 'e' && 10328 name[5] == 'e' && 10329 name[6] == 'k') 10330 { /* sysseek */ 10331 return -KEY_sysseek; 10332 } 10333 10334 goto unknown; 10335 10336 default: 10337 goto unknown; 10338 } 10339 10340 default: 10341 goto unknown; 10342 } 10343 10344 default: 10345 goto unknown; 10346 } 10347 10348 case 't': 10349 if (name[1] == 'e' && 10350 name[2] == 'l' && 10351 name[3] == 'l' && 10352 name[4] == 'd' && 10353 name[5] == 'i' && 10354 name[6] == 'r') 10355 { /* telldir */ 10356 return -KEY_telldir; 10357 } 10358 10359 goto unknown; 10360 10361 case 'u': 10362 switch (name[1]) 10363 { 10364 case 'c': 10365 if (name[2] == 'f' && 10366 name[3] == 'i' && 10367 name[4] == 'r' && 10368 name[5] == 's' && 10369 name[6] == 't') 10370 { /* ucfirst */ 10371 return -KEY_ucfirst; 10372 } 10373 10374 goto unknown; 10375 10376 case 'n': 10377 if (name[2] == 's' && 10378 name[3] == 'h' && 10379 name[4] == 'i' && 10380 name[5] == 'f' && 10381 name[6] == 't') 10382 { /* unshift */ 10383 return -KEY_unshift; 10384 } 10385 10386 goto unknown; 10387 10388 default: 10389 goto unknown; 10390 } 10391 10392 case 'w': 10393 if (name[1] == 'a' && 10394 name[2] == 'i' && 10395 name[3] == 't' && 10396 name[4] == 'p' && 10397 name[5] == 'i' && 10398 name[6] == 'd') 10399 { /* waitpid */ 10400 return -KEY_waitpid; 10401 } 10402 10403 goto unknown; 10404 10405 default: 10406 goto unknown; 10407 } 10408 10409 case 8: /* 26 tokens of length 8 */ 10410 switch (name[0]) 10411 { 10412 case 'A': 10413 if (name[1] == 'U' && 10414 name[2] == 'T' && 10415 name[3] == 'O' && 10416 name[4] == 'L' && 10417 name[5] == 'O' && 10418 name[6] == 'A' && 10419 name[7] == 'D') 10420 { /* AUTOLOAD */ 10421 return KEY_AUTOLOAD; 10422 } 10423 10424 goto unknown; 10425 10426 case '_': 10427 if (name[1] == '_') 10428 { 10429 switch (name[2]) 10430 { 10431 case 'D': 10432 if (name[3] == 'A' && 10433 name[4] == 'T' && 10434 name[5] == 'A' && 10435 name[6] == '_' && 10436 name[7] == '_') 10437 { /* __DATA__ */ 10438 return KEY___DATA__; 10439 } 10440 10441 goto unknown; 10442 10443 case 'F': 10444 if (name[3] == 'I' && 10445 name[4] == 'L' && 10446 name[5] == 'E' && 10447 name[6] == '_' && 10448 name[7] == '_') 10449 { /* __FILE__ */ 10450 return -KEY___FILE__; 10451 } 10452 10453 goto unknown; 10454 10455 case 'L': 10456 if (name[3] == 'I' && 10457 name[4] == 'N' && 10458 name[5] == 'E' && 10459 name[6] == '_' && 10460 name[7] == '_') 10461 { /* __LINE__ */ 10462 return -KEY___LINE__; 10463 } 10464 10465 goto unknown; 10466 10467 default: 10468 goto unknown; 10469 } 10470 } 10471 10472 goto unknown; 10473 10474 case 'c': 10475 switch (name[1]) 10476 { 10477 case 'l': 10478 if (name[2] == 'o' && 10479 name[3] == 's' && 10480 name[4] == 'e' && 10481 name[5] == 'd' && 10482 name[6] == 'i' && 10483 name[7] == 'r') 10484 { /* closedir */ 10485 return -KEY_closedir; 10486 } 10487 10488 goto unknown; 10489 10490 case 'o': 10491 if (name[2] == 'n' && 10492 name[3] == 't' && 10493 name[4] == 'i' && 10494 name[5] == 'n' && 10495 name[6] == 'u' && 10496 name[7] == 'e') 10497 { /* continue */ 10498 return -KEY_continue; 10499 } 10500 10501 goto unknown; 10502 10503 default: 10504 goto unknown; 10505 } 10506 10507 case 'd': 10508 if (name[1] == 'b' && 10509 name[2] == 'm' && 10510 name[3] == 'c' && 10511 name[4] == 'l' && 10512 name[5] == 'o' && 10513 name[6] == 's' && 10514 name[7] == 'e') 10515 { /* dbmclose */ 10516 return -KEY_dbmclose; 10517 } 10518 10519 goto unknown; 10520 10521 case 'e': 10522 if (name[1] == 'n' && 10523 name[2] == 'd') 10524 { 10525 switch (name[3]) 10526 { 10527 case 'g': 10528 if (name[4] == 'r' && 10529 name[5] == 'e' && 10530 name[6] == 'n' && 10531 name[7] == 't') 10532 { /* endgrent */ 10533 return -KEY_endgrent; 10534 } 10535 10536 goto unknown; 10537 10538 case 'p': 10539 if (name[4] == 'w' && 10540 name[5] == 'e' && 10541 name[6] == 'n' && 10542 name[7] == 't') 10543 { /* endpwent */ 10544 return -KEY_endpwent; 10545 } 10546 10547 goto unknown; 10548 10549 default: 10550 goto unknown; 10551 } 10552 } 10553 10554 goto unknown; 10555 10556 case 'f': 10557 if (name[1] == 'o' && 10558 name[2] == 'r' && 10559 name[3] == 'm' && 10560 name[4] == 'l' && 10561 name[5] == 'i' && 10562 name[6] == 'n' && 10563 name[7] == 'e') 10564 { /* formline */ 10565 return -KEY_formline; 10566 } 10567 10568 goto unknown; 10569 10570 case 'g': 10571 if (name[1] == 'e' && 10572 name[2] == 't') 10573 { 10574 switch (name[3]) 10575 { 10576 case 'g': 10577 if (name[4] == 'r') 10578 { 10579 switch (name[5]) 10580 { 10581 case 'e': 10582 if (name[6] == 'n' && 10583 name[7] == 't') 10584 { /* getgrent */ 10585 return -KEY_getgrent; 10586 } 10587 10588 goto unknown; 10589 10590 case 'g': 10591 if (name[6] == 'i' && 10592 name[7] == 'd') 10593 { /* getgrgid */ 10594 return -KEY_getgrgid; 10595 } 10596 10597 goto unknown; 10598 10599 case 'n': 10600 if (name[6] == 'a' && 10601 name[7] == 'm') 10602 { /* getgrnam */ 10603 return -KEY_getgrnam; 10604 } 10605 10606 goto unknown; 10607 10608 default: 10609 goto unknown; 10610 } 10611 } 10612 10613 goto unknown; 10614 10615 case 'l': 10616 if (name[4] == 'o' && 10617 name[5] == 'g' && 10618 name[6] == 'i' && 10619 name[7] == 'n') 10620 { /* getlogin */ 10621 return -KEY_getlogin; 10622 } 10623 10624 goto unknown; 10625 10626 case 'p': 10627 if (name[4] == 'w') 10628 { 10629 switch (name[5]) 10630 { 10631 case 'e': 10632 if (name[6] == 'n' && 10633 name[7] == 't') 10634 { /* getpwent */ 10635 return -KEY_getpwent; 10636 } 10637 10638 goto unknown; 10639 10640 case 'n': 10641 if (name[6] == 'a' && 10642 name[7] == 'm') 10643 { /* getpwnam */ 10644 return -KEY_getpwnam; 10645 } 10646 10647 goto unknown; 10648 10649 case 'u': 10650 if (name[6] == 'i' && 10651 name[7] == 'd') 10652 { /* getpwuid */ 10653 return -KEY_getpwuid; 10654 } 10655 10656 goto unknown; 10657 10658 default: 10659 goto unknown; 10660 } 10661 } 10662 10663 goto unknown; 10664 10665 default: 10666 goto unknown; 10667 } 10668 } 10669 10670 goto unknown; 10671 10672 case 'r': 10673 if (name[1] == 'e' && 10674 name[2] == 'a' && 10675 name[3] == 'd') 10676 { 10677 switch (name[4]) 10678 { 10679 case 'l': 10680 if (name[5] == 'i' && 10681 name[6] == 'n') 10682 { 10683 switch (name[7]) 10684 { 10685 case 'e': 10686 { /* readline */ 10687 return -KEY_readline; 10688 } 10689 10690 case 'k': 10691 { /* readlink */ 10692 return -KEY_readlink; 10693 } 10694 10695 default: 10696 goto unknown; 10697 } 10698 } 10699 10700 goto unknown; 10701 10702 case 'p': 10703 if (name[5] == 'i' && 10704 name[6] == 'p' && 10705 name[7] == 'e') 10706 { /* readpipe */ 10707 return -KEY_readpipe; 10708 } 10709 10710 goto unknown; 10711 10712 default: 10713 goto unknown; 10714 } 10715 } 10716 10717 goto unknown; 10718 10719 case 's': 10720 switch (name[1]) 10721 { 10722 case 'e': 10723 if (name[2] == 't') 10724 { 10725 switch (name[3]) 10726 { 10727 case 'g': 10728 if (name[4] == 'r' && 10729 name[5] == 'e' && 10730 name[6] == 'n' && 10731 name[7] == 't') 10732 { /* setgrent */ 10733 return -KEY_setgrent; 10734 } 10735 10736 goto unknown; 10737 10738 case 'p': 10739 if (name[4] == 'w' && 10740 name[5] == 'e' && 10741 name[6] == 'n' && 10742 name[7] == 't') 10743 { /* setpwent */ 10744 return -KEY_setpwent; 10745 } 10746 10747 goto unknown; 10748 10749 default: 10750 goto unknown; 10751 } 10752 } 10753 10754 goto unknown; 10755 10756 case 'h': 10757 switch (name[2]) 10758 { 10759 case 'm': 10760 if (name[3] == 'w' && 10761 name[4] == 'r' && 10762 name[5] == 'i' && 10763 name[6] == 't' && 10764 name[7] == 'e') 10765 { /* shmwrite */ 10766 return -KEY_shmwrite; 10767 } 10768 10769 goto unknown; 10770 10771 case 'u': 10772 if (name[3] == 't' && 10773 name[4] == 'd' && 10774 name[5] == 'o' && 10775 name[6] == 'w' && 10776 name[7] == 'n') 10777 { /* shutdown */ 10778 return -KEY_shutdown; 10779 } 10780 10781 goto unknown; 10782 10783 default: 10784 goto unknown; 10785 } 10786 10787 case 'y': 10788 if (name[2] == 's' && 10789 name[3] == 'w' && 10790 name[4] == 'r' && 10791 name[5] == 'i' && 10792 name[6] == 't' && 10793 name[7] == 'e') 10794 { /* syswrite */ 10795 return -KEY_syswrite; 10796 } 10797 10798 goto unknown; 10799 10800 default: 10801 goto unknown; 10802 } 10803 10804 case 't': 10805 if (name[1] == 'r' && 10806 name[2] == 'u' && 10807 name[3] == 'n' && 10808 name[4] == 'c' && 10809 name[5] == 'a' && 10810 name[6] == 't' && 10811 name[7] == 'e') 10812 { /* truncate */ 10813 return -KEY_truncate; 10814 } 10815 10816 goto unknown; 10817 10818 default: 10819 goto unknown; 10820 } 10821 10822 case 9: /* 9 tokens of length 9 */ 10823 switch (name[0]) 10824 { 10825 case 'U': 10826 if (name[1] == 'N' && 10827 name[2] == 'I' && 10828 name[3] == 'T' && 10829 name[4] == 'C' && 10830 name[5] == 'H' && 10831 name[6] == 'E' && 10832 name[7] == 'C' && 10833 name[8] == 'K') 10834 { /* UNITCHECK */ 10835 return KEY_UNITCHECK; 10836 } 10837 10838 goto unknown; 10839 10840 case 'e': 10841 if (name[1] == 'n' && 10842 name[2] == 'd' && 10843 name[3] == 'n' && 10844 name[4] == 'e' && 10845 name[5] == 't' && 10846 name[6] == 'e' && 10847 name[7] == 'n' && 10848 name[8] == 't') 10849 { /* endnetent */ 10850 return -KEY_endnetent; 10851 } 10852 10853 goto unknown; 10854 10855 case 'g': 10856 if (name[1] == 'e' && 10857 name[2] == 't' && 10858 name[3] == 'n' && 10859 name[4] == 'e' && 10860 name[5] == 't' && 10861 name[6] == 'e' && 10862 name[7] == 'n' && 10863 name[8] == 't') 10864 { /* getnetent */ 10865 return -KEY_getnetent; 10866 } 10867 10868 goto unknown; 10869 10870 case 'l': 10871 if (name[1] == 'o' && 10872 name[2] == 'c' && 10873 name[3] == 'a' && 10874 name[4] == 'l' && 10875 name[5] == 't' && 10876 name[6] == 'i' && 10877 name[7] == 'm' && 10878 name[8] == 'e') 10879 { /* localtime */ 10880 return -KEY_localtime; 10881 } 10882 10883 goto unknown; 10884 10885 case 'p': 10886 if (name[1] == 'r' && 10887 name[2] == 'o' && 10888 name[3] == 't' && 10889 name[4] == 'o' && 10890 name[5] == 't' && 10891 name[6] == 'y' && 10892 name[7] == 'p' && 10893 name[8] == 'e') 10894 { /* prototype */ 10895 return KEY_prototype; 10896 } 10897 10898 goto unknown; 10899 10900 case 'q': 10901 if (name[1] == 'u' && 10902 name[2] == 'o' && 10903 name[3] == 't' && 10904 name[4] == 'e' && 10905 name[5] == 'm' && 10906 name[6] == 'e' && 10907 name[7] == 't' && 10908 name[8] == 'a') 10909 { /* quotemeta */ 10910 return -KEY_quotemeta; 10911 } 10912 10913 goto unknown; 10914 10915 case 'r': 10916 if (name[1] == 'e' && 10917 name[2] == 'w' && 10918 name[3] == 'i' && 10919 name[4] == 'n' && 10920 name[5] == 'd' && 10921 name[6] == 'd' && 10922 name[7] == 'i' && 10923 name[8] == 'r') 10924 { /* rewinddir */ 10925 return -KEY_rewinddir; 10926 } 10927 10928 goto unknown; 10929 10930 case 's': 10931 if (name[1] == 'e' && 10932 name[2] == 't' && 10933 name[3] == 'n' && 10934 name[4] == 'e' && 10935 name[5] == 't' && 10936 name[6] == 'e' && 10937 name[7] == 'n' && 10938 name[8] == 't') 10939 { /* setnetent */ 10940 return -KEY_setnetent; 10941 } 10942 10943 goto unknown; 10944 10945 case 'w': 10946 if (name[1] == 'a' && 10947 name[2] == 'n' && 10948 name[3] == 't' && 10949 name[4] == 'a' && 10950 name[5] == 'r' && 10951 name[6] == 'r' && 10952 name[7] == 'a' && 10953 name[8] == 'y') 10954 { /* wantarray */ 10955 return -KEY_wantarray; 10956 } 10957 10958 goto unknown; 10959 10960 default: 10961 goto unknown; 10962 } 10963 10964 case 10: /* 9 tokens of length 10 */ 10965 switch (name[0]) 10966 { 10967 case 'e': 10968 if (name[1] == 'n' && 10969 name[2] == 'd') 10970 { 10971 switch (name[3]) 10972 { 10973 case 'h': 10974 if (name[4] == 'o' && 10975 name[5] == 's' && 10976 name[6] == 't' && 10977 name[7] == 'e' && 10978 name[8] == 'n' && 10979 name[9] == 't') 10980 { /* endhostent */ 10981 return -KEY_endhostent; 10982 } 10983 10984 goto unknown; 10985 10986 case 's': 10987 if (name[4] == 'e' && 10988 name[5] == 'r' && 10989 name[6] == 'v' && 10990 name[7] == 'e' && 10991 name[8] == 'n' && 10992 name[9] == 't') 10993 { /* endservent */ 10994 return -KEY_endservent; 10995 } 10996 10997 goto unknown; 10998 10999 default: 11000 goto unknown; 11001 } 11002 } 11003 11004 goto unknown; 11005 11006 case 'g': 11007 if (name[1] == 'e' && 11008 name[2] == 't') 11009 { 11010 switch (name[3]) 11011 { 11012 case 'h': 11013 if (name[4] == 'o' && 11014 name[5] == 's' && 11015 name[6] == 't' && 11016 name[7] == 'e' && 11017 name[8] == 'n' && 11018 name[9] == 't') 11019 { /* gethostent */ 11020 return -KEY_gethostent; 11021 } 11022 11023 goto unknown; 11024 11025 case 's': 11026 switch (name[4]) 11027 { 11028 case 'e': 11029 if (name[5] == 'r' && 11030 name[6] == 'v' && 11031 name[7] == 'e' && 11032 name[8] == 'n' && 11033 name[9] == 't') 11034 { /* getservent */ 11035 return -KEY_getservent; 11036 } 11037 11038 goto unknown; 11039 11040 case 'o': 11041 if (name[5] == 'c' && 11042 name[6] == 'k' && 11043 name[7] == 'o' && 11044 name[8] == 'p' && 11045 name[9] == 't') 11046 { /* getsockopt */ 11047 return -KEY_getsockopt; 11048 } 11049 11050 goto unknown; 11051 11052 default: 11053 goto unknown; 11054 } 11055 11056 default: 11057 goto unknown; 11058 } 11059 } 11060 11061 goto unknown; 11062 11063 case 's': 11064 switch (name[1]) 11065 { 11066 case 'e': 11067 if (name[2] == 't') 11068 { 11069 switch (name[3]) 11070 { 11071 case 'h': 11072 if (name[4] == 'o' && 11073 name[5] == 's' && 11074 name[6] == 't' && 11075 name[7] == 'e' && 11076 name[8] == 'n' && 11077 name[9] == 't') 11078 { /* sethostent */ 11079 return -KEY_sethostent; 11080 } 11081 11082 goto unknown; 11083 11084 case 's': 11085 switch (name[4]) 11086 { 11087 case 'e': 11088 if (name[5] == 'r' && 11089 name[6] == 'v' && 11090 name[7] == 'e' && 11091 name[8] == 'n' && 11092 name[9] == 't') 11093 { /* setservent */ 11094 return -KEY_setservent; 11095 } 11096 11097 goto unknown; 11098 11099 case 'o': 11100 if (name[5] == 'c' && 11101 name[6] == 'k' && 11102 name[7] == 'o' && 11103 name[8] == 'p' && 11104 name[9] == 't') 11105 { /* setsockopt */ 11106 return -KEY_setsockopt; 11107 } 11108 11109 goto unknown; 11110 11111 default: 11112 goto unknown; 11113 } 11114 11115 default: 11116 goto unknown; 11117 } 11118 } 11119 11120 goto unknown; 11121 11122 case 'o': 11123 if (name[2] == 'c' && 11124 name[3] == 'k' && 11125 name[4] == 'e' && 11126 name[5] == 't' && 11127 name[6] == 'p' && 11128 name[7] == 'a' && 11129 name[8] == 'i' && 11130 name[9] == 'r') 11131 { /* socketpair */ 11132 return -KEY_socketpair; 11133 } 11134 11135 goto unknown; 11136 11137 default: 11138 goto unknown; 11139 } 11140 11141 default: 11142 goto unknown; 11143 } 11144 11145 case 11: /* 8 tokens of length 11 */ 11146 switch (name[0]) 11147 { 11148 case '_': 11149 if (name[1] == '_' && 11150 name[2] == 'P' && 11151 name[3] == 'A' && 11152 name[4] == 'C' && 11153 name[5] == 'K' && 11154 name[6] == 'A' && 11155 name[7] == 'G' && 11156 name[8] == 'E' && 11157 name[9] == '_' && 11158 name[10] == '_') 11159 { /* __PACKAGE__ */ 11160 return -KEY___PACKAGE__; 11161 } 11162 11163 goto unknown; 11164 11165 case 'e': 11166 if (name[1] == 'n' && 11167 name[2] == 'd' && 11168 name[3] == 'p' && 11169 name[4] == 'r' && 11170 name[5] == 'o' && 11171 name[6] == 't' && 11172 name[7] == 'o' && 11173 name[8] == 'e' && 11174 name[9] == 'n' && 11175 name[10] == 't') 11176 { /* endprotoent */ 11177 return -KEY_endprotoent; 11178 } 11179 11180 goto unknown; 11181 11182 case 'g': 11183 if (name[1] == 'e' && 11184 name[2] == 't') 11185 { 11186 switch (name[3]) 11187 { 11188 case 'p': 11189 switch (name[4]) 11190 { 11191 case 'e': 11192 if (name[5] == 'e' && 11193 name[6] == 'r' && 11194 name[7] == 'n' && 11195 name[8] == 'a' && 11196 name[9] == 'm' && 11197 name[10] == 'e') 11198 { /* getpeername */ 11199 return -KEY_getpeername; 11200 } 11201 11202 goto unknown; 11203 11204 case 'r': 11205 switch (name[5]) 11206 { 11207 case 'i': 11208 if (name[6] == 'o' && 11209 name[7] == 'r' && 11210 name[8] == 'i' && 11211 name[9] == 't' && 11212 name[10] == 'y') 11213 { /* getpriority */ 11214 return -KEY_getpriority; 11215 } 11216 11217 goto unknown; 11218 11219 case 'o': 11220 if (name[6] == 't' && 11221 name[7] == 'o' && 11222 name[8] == 'e' && 11223 name[9] == 'n' && 11224 name[10] == 't') 11225 { /* getprotoent */ 11226 return -KEY_getprotoent; 11227 } 11228 11229 goto unknown; 11230 11231 default: 11232 goto unknown; 11233 } 11234 11235 default: 11236 goto unknown; 11237 } 11238 11239 case 's': 11240 if (name[4] == 'o' && 11241 name[5] == 'c' && 11242 name[6] == 'k' && 11243 name[7] == 'n' && 11244 name[8] == 'a' && 11245 name[9] == 'm' && 11246 name[10] == 'e') 11247 { /* getsockname */ 11248 return -KEY_getsockname; 11249 } 11250 11251 goto unknown; 11252 11253 default: 11254 goto unknown; 11255 } 11256 } 11257 11258 goto unknown; 11259 11260 case 's': 11261 if (name[1] == 'e' && 11262 name[2] == 't' && 11263 name[3] == 'p' && 11264 name[4] == 'r') 11265 { 11266 switch (name[5]) 11267 { 11268 case 'i': 11269 if (name[6] == 'o' && 11270 name[7] == 'r' && 11271 name[8] == 'i' && 11272 name[9] == 't' && 11273 name[10] == 'y') 11274 { /* setpriority */ 11275 return -KEY_setpriority; 11276 } 11277 11278 goto unknown; 11279 11280 case 'o': 11281 if (name[6] == 't' && 11282 name[7] == 'o' && 11283 name[8] == 'e' && 11284 name[9] == 'n' && 11285 name[10] == 't') 11286 { /* setprotoent */ 11287 return -KEY_setprotoent; 11288 } 11289 11290 goto unknown; 11291 11292 default: 11293 goto unknown; 11294 } 11295 } 11296 11297 goto unknown; 11298 11299 default: 11300 goto unknown; 11301 } 11302 11303 case 12: /* 2 tokens of length 12 */ 11304 if (name[0] == 'g' && 11305 name[1] == 'e' && 11306 name[2] == 't' && 11307 name[3] == 'n' && 11308 name[4] == 'e' && 11309 name[5] == 't' && 11310 name[6] == 'b' && 11311 name[7] == 'y') 11312 { 11313 switch (name[8]) 11314 { 11315 case 'a': 11316 if (name[9] == 'd' && 11317 name[10] == 'd' && 11318 name[11] == 'r') 11319 { /* getnetbyaddr */ 11320 return -KEY_getnetbyaddr; 11321 } 11322 11323 goto unknown; 11324 11325 case 'n': 11326 if (name[9] == 'a' && 11327 name[10] == 'm' && 11328 name[11] == 'e') 11329 { /* getnetbyname */ 11330 return -KEY_getnetbyname; 11331 } 11332 11333 goto unknown; 11334 11335 default: 11336 goto unknown; 11337 } 11338 } 11339 11340 goto unknown; 11341 11342 case 13: /* 4 tokens of length 13 */ 11343 if (name[0] == 'g' && 11344 name[1] == 'e' && 11345 name[2] == 't') 11346 { 11347 switch (name[3]) 11348 { 11349 case 'h': 11350 if (name[4] == 'o' && 11351 name[5] == 's' && 11352 name[6] == 't' && 11353 name[7] == 'b' && 11354 name[8] == 'y') 11355 { 11356 switch (name[9]) 11357 { 11358 case 'a': 11359 if (name[10] == 'd' && 11360 name[11] == 'd' && 11361 name[12] == 'r') 11362 { /* gethostbyaddr */ 11363 return -KEY_gethostbyaddr; 11364 } 11365 11366 goto unknown; 11367 11368 case 'n': 11369 if (name[10] == 'a' && 11370 name[11] == 'm' && 11371 name[12] == 'e') 11372 { /* gethostbyname */ 11373 return -KEY_gethostbyname; 11374 } 11375 11376 goto unknown; 11377 11378 default: 11379 goto unknown; 11380 } 11381 } 11382 11383 goto unknown; 11384 11385 case 's': 11386 if (name[4] == 'e' && 11387 name[5] == 'r' && 11388 name[6] == 'v' && 11389 name[7] == 'b' && 11390 name[8] == 'y') 11391 { 11392 switch (name[9]) 11393 { 11394 case 'n': 11395 if (name[10] == 'a' && 11396 name[11] == 'm' && 11397 name[12] == 'e') 11398 { /* getservbyname */ 11399 return -KEY_getservbyname; 11400 } 11401 11402 goto unknown; 11403 11404 case 'p': 11405 if (name[10] == 'o' && 11406 name[11] == 'r' && 11407 name[12] == 't') 11408 { /* getservbyport */ 11409 return -KEY_getservbyport; 11410 } 11411 11412 goto unknown; 11413 11414 default: 11415 goto unknown; 11416 } 11417 } 11418 11419 goto unknown; 11420 11421 default: 11422 goto unknown; 11423 } 11424 } 11425 11426 goto unknown; 11427 11428 case 14: /* 1 tokens of length 14 */ 11429 if (name[0] == 'g' && 11430 name[1] == 'e' && 11431 name[2] == 't' && 11432 name[3] == 'p' && 11433 name[4] == 'r' && 11434 name[5] == 'o' && 11435 name[6] == 't' && 11436 name[7] == 'o' && 11437 name[8] == 'b' && 11438 name[9] == 'y' && 11439 name[10] == 'n' && 11440 name[11] == 'a' && 11441 name[12] == 'm' && 11442 name[13] == 'e') 11443 { /* getprotobyname */ 11444 return -KEY_getprotobyname; 11445 } 11446 11447 goto unknown; 11448 11449 case 16: /* 1 tokens of length 16 */ 11450 if (name[0] == 'g' && 11451 name[1] == 'e' && 11452 name[2] == 't' && 11453 name[3] == 'p' && 11454 name[4] == 'r' && 11455 name[5] == 'o' && 11456 name[6] == 't' && 11457 name[7] == 'o' && 11458 name[8] == 'b' && 11459 name[9] == 'y' && 11460 name[10] == 'n' && 11461 name[11] == 'u' && 11462 name[12] == 'm' && 11463 name[13] == 'b' && 11464 name[14] == 'e' && 11465 name[15] == 'r') 11466 { /* getprotobynumber */ 11467 return -KEY_getprotobynumber; 11468 } 11469 11470 goto unknown; 11471 11472 default: 11473 goto unknown; 11474 } 11475 11476 unknown: 11477 return 0; 11478 } 11479 11480 STATIC void 11481 S_checkcomma(pTHX_ const char *s, const char *name, const char *what) 11482 { 11483 dVAR; 11484 11485 PERL_ARGS_ASSERT_CHECKCOMMA; 11486 11487 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ 11488 if (ckWARN(WARN_SYNTAX)) { 11489 int level = 1; 11490 const char *w; 11491 for (w = s+2; *w && level; w++) { 11492 if (*w == '(') 11493 ++level; 11494 else if (*w == ')') 11495 --level; 11496 } 11497 while (isSPACE(*w)) 11498 ++w; 11499 /* the list of chars below is for end of statements or 11500 * block / parens, boolean operators (&&, ||, //) and branch 11501 * constructs (or, and, if, until, unless, while, err, for). 11502 * Not a very solid hack... */ 11503 if (!*w || !strchr(";&/|})]oaiuwef!=", *w)) 11504 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 11505 "%s (...) interpreted as function",name); 11506 } 11507 } 11508 while (s < PL_bufend && isSPACE(*s)) 11509 s++; 11510 if (*s == '(') 11511 s++; 11512 while (s < PL_bufend && isSPACE(*s)) 11513 s++; 11514 if (isIDFIRST_lazy_if(s,UTF)) { 11515 const char * const w = s++; 11516 while (isALNUM_lazy_if(s,UTF)) 11517 s++; 11518 while (s < PL_bufend && isSPACE(*s)) 11519 s++; 11520 if (*s == ',') { 11521 GV* gv; 11522 if (keyword(w, s - w, 0)) 11523 return; 11524 11525 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV); 11526 if (gv && GvCVu(gv)) 11527 return; 11528 Perl_croak(aTHX_ "No comma allowed after %s", what); 11529 } 11530 } 11531 } 11532 11533 /* Either returns sv, or mortalizes sv and returns a new SV*. 11534 Best used as sv=new_constant(..., sv, ...). 11535 If s, pv are NULL, calls subroutine with one argument, 11536 and type is used with error messages only. */ 11537 11538 STATIC SV * 11539 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, 11540 SV *sv, SV *pv, const char *type, STRLEN typelen) 11541 { 11542 dVAR; dSP; 11543 HV * const table = GvHV(PL_hintgv); /* ^H */ 11544 SV *res; 11545 SV **cvp; 11546 SV *cv, *typesv; 11547 const char *why1 = "", *why2 = "", *why3 = ""; 11548 11549 PERL_ARGS_ASSERT_NEW_CONSTANT; 11550 11551 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { 11552 SV *msg; 11553 11554 why2 = (const char *) 11555 (strEQ(key,"charnames") 11556 ? "(possibly a missing \"use charnames ...\")" 11557 : ""); 11558 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s", 11559 (type ? type: "undef"), why2); 11560 11561 /* This is convoluted and evil ("goto considered harmful") 11562 * but I do not understand the intricacies of all the different 11563 * failure modes of %^H in here. The goal here is to make 11564 * the most probable error message user-friendly. --jhi */ 11565 11566 goto msgdone; 11567 11568 report: 11569 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s", 11570 (type ? type: "undef"), why1, why2, why3); 11571 msgdone: 11572 yyerror(SvPVX_const(msg)); 11573 SvREFCNT_dec(msg); 11574 return sv; 11575 } 11576 11577 /* charnames doesn't work well if there have been errors found */ 11578 if (PL_error_count > 0 && strEQ(key,"charnames")) 11579 return &PL_sv_undef; 11580 11581 cvp = hv_fetch(table, key, keylen, FALSE); 11582 if (!cvp || !SvOK(*cvp)) { 11583 why1 = "$^H{"; 11584 why2 = key; 11585 why3 = "} is not defined"; 11586 goto report; 11587 } 11588 sv_2mortal(sv); /* Parent created it permanently */ 11589 cv = *cvp; 11590 if (!pv && s) 11591 pv = newSVpvn_flags(s, len, SVs_TEMP); 11592 if (type && pv) 11593 typesv = newSVpvn_flags(type, typelen, SVs_TEMP); 11594 else 11595 typesv = &PL_sv_undef; 11596 11597 PUSHSTACKi(PERLSI_OVERLOAD); 11598 ENTER ; 11599 SAVETMPS; 11600 11601 PUSHMARK(SP) ; 11602 EXTEND(sp, 3); 11603 if (pv) 11604 PUSHs(pv); 11605 PUSHs(sv); 11606 if (pv) 11607 PUSHs(typesv); 11608 PUTBACK; 11609 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); 11610 11611 SPAGAIN ; 11612 11613 /* Check the eval first */ 11614 if (!PL_in_eval && SvTRUE(ERRSV)) { 11615 sv_catpvs(ERRSV, "Propagated"); 11616 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */ 11617 (void)POPs; 11618 res = SvREFCNT_inc_simple(sv); 11619 } 11620 else { 11621 res = POPs; 11622 SvREFCNT_inc_simple_void(res); 11623 } 11624 11625 PUTBACK ; 11626 FREETMPS ; 11627 LEAVE ; 11628 POPSTACK; 11629 11630 if (!SvOK(res)) { 11631 why1 = "Call to &{$^H{"; 11632 why2 = key; 11633 why3 = "}} did not return a defined value"; 11634 sv = res; 11635 goto report; 11636 } 11637 11638 return res; 11639 } 11640 11641 /* Returns a NUL terminated string, with the length of the string written to 11642 *slp 11643 */ 11644 STATIC char * 11645 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) 11646 { 11647 dVAR; 11648 register char *d = dest; 11649 register char * const e = d + destlen - 3; /* two-character token, ending NUL */ 11650 11651 PERL_ARGS_ASSERT_SCAN_WORD; 11652 11653 for (;;) { 11654 if (d >= e) 11655 Perl_croak(aTHX_ ident_too_long); 11656 if (isALNUM(*s)) /* UTF handled below */ 11657 *d++ = *s++; 11658 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) { 11659 *d++ = ':'; 11660 *d++ = ':'; 11661 s++; 11662 } 11663 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) { 11664 *d++ = *s++; 11665 *d++ = *s++; 11666 } 11667 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) { 11668 char *t = s + UTF8SKIP(s); 11669 size_t len; 11670 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t)) 11671 t += UTF8SKIP(t); 11672 len = t - s; 11673 if (d + len > e) 11674 Perl_croak(aTHX_ ident_too_long); 11675 Copy(s, d, len, char); 11676 d += len; 11677 s = t; 11678 } 11679 else { 11680 *d = '\0'; 11681 *slp = d - dest; 11682 return s; 11683 } 11684 } 11685 } 11686 11687 STATIC char * 11688 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni) 11689 { 11690 dVAR; 11691 char *bracket = NULL; 11692 char funny = *s++; 11693 register char *d = dest; 11694 register char * const e = d + destlen - 3; /* two-character token, ending NUL */ 11695 11696 PERL_ARGS_ASSERT_SCAN_IDENT; 11697 11698 if (isSPACE(*s)) 11699 s = PEEKSPACE(s); 11700 if (isDIGIT(*s)) { 11701 while (isDIGIT(*s)) { 11702 if (d >= e) 11703 Perl_croak(aTHX_ ident_too_long); 11704 *d++ = *s++; 11705 } 11706 } 11707 else { 11708 for (;;) { 11709 if (d >= e) 11710 Perl_croak(aTHX_ ident_too_long); 11711 if (isALNUM(*s)) /* UTF handled below */ 11712 *d++ = *s++; 11713 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) { 11714 *d++ = ':'; 11715 *d++ = ':'; 11716 s++; 11717 } 11718 else if (*s == ':' && s[1] == ':') { 11719 *d++ = *s++; 11720 *d++ = *s++; 11721 } 11722 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) { 11723 char *t = s + UTF8SKIP(s); 11724 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t)) 11725 t += UTF8SKIP(t); 11726 if (d + (t - s) > e) 11727 Perl_croak(aTHX_ ident_too_long); 11728 Copy(s, d, t - s, char); 11729 d += t - s; 11730 s = t; 11731 } 11732 else 11733 break; 11734 } 11735 } 11736 *d = '\0'; 11737 d = dest; 11738 if (*d) { 11739 if (PL_lex_state != LEX_NORMAL) 11740 PL_lex_state = LEX_INTERPENDMAYBE; 11741 return s; 11742 } 11743 if (*s == '$' && s[1] && 11744 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) ) 11745 { 11746 return s; 11747 } 11748 if (*s == '{') { 11749 bracket = s; 11750 s++; 11751 } 11752 else if (ck_uni) 11753 check_uni(); 11754 if (s < send) 11755 *d = *s++; 11756 d[1] = '\0'; 11757 if (*d == '^' && *s && isCONTROLVAR(*s)) { 11758 *d = toCTRL(*s); 11759 s++; 11760 } 11761 if (bracket) { 11762 if (isSPACE(s[-1])) { 11763 while (s < send) { 11764 const char ch = *s++; 11765 if (!SPACE_OR_TAB(ch)) { 11766 *d = ch; 11767 break; 11768 } 11769 } 11770 } 11771 if (isIDFIRST_lazy_if(d,UTF)) { 11772 d++; 11773 if (UTF) { 11774 char *end = s; 11775 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') { 11776 end += UTF8SKIP(end); 11777 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end)) 11778 end += UTF8SKIP(end); 11779 } 11780 Copy(s, d, end - s, char); 11781 d += end - s; 11782 s = end; 11783 } 11784 else { 11785 while ((isALNUM(*s) || *s == ':') && d < e) 11786 *d++ = *s++; 11787 if (d >= e) 11788 Perl_croak(aTHX_ ident_too_long); 11789 } 11790 *d = '\0'; 11791 while (s < send && SPACE_OR_TAB(*s)) 11792 s++; 11793 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { 11794 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { 11795 const char * const brack = 11796 (const char *) 11797 ((*s == '[') ? "[...]" : "{...}"); 11798 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 11799 "Ambiguous use of %c{%s%s} resolved to %c%s%s", 11800 funny, dest, brack, funny, dest, brack); 11801 } 11802 bracket++; 11803 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK); 11804 return s; 11805 } 11806 } 11807 /* Handle extended ${^Foo} variables 11808 * 1999-02-27 mjd-perl-patch@plover.com */ 11809 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */ 11810 && isALNUM(*s)) 11811 { 11812 d++; 11813 while (isALNUM(*s) && d < e) { 11814 *d++ = *s++; 11815 } 11816 if (d >= e) 11817 Perl_croak(aTHX_ ident_too_long); 11818 *d = '\0'; 11819 } 11820 if (*s == '}') { 11821 s++; 11822 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 11823 PL_lex_state = LEX_INTERPEND; 11824 PL_expect = XREF; 11825 } 11826 if (PL_lex_state == LEX_NORMAL) { 11827 if (ckWARN(WARN_AMBIGUOUS) && 11828 (keyword(dest, d - dest, 0) 11829 || get_cvn_flags(dest, d - dest, 0))) 11830 { 11831 if (funny == '#') 11832 funny = '@'; 11833 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 11834 "Ambiguous use of %c{%s} resolved to %c%s", 11835 funny, dest, funny, dest); 11836 } 11837 } 11838 } 11839 else { 11840 s = bracket; /* let the parser handle it */ 11841 *dest = '\0'; 11842 } 11843 } 11844 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s)) 11845 PL_lex_state = LEX_INTERPEND; 11846 return s; 11847 } 11848 11849 static U32 11850 S_pmflag(U32 pmfl, const char ch) { 11851 switch (ch) { 11852 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl); 11853 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break; 11854 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break; 11855 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break; 11856 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break; 11857 } 11858 return pmfl; 11859 } 11860 11861 void 11862 Perl_pmflag(pTHX_ U32* pmfl, int ch) 11863 { 11864 PERL_ARGS_ASSERT_PMFLAG; 11865 11866 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), 11867 "Perl_pmflag() is deprecated, and will be removed from the XS API"); 11868 11869 if (ch<256) { 11870 *pmfl = S_pmflag(*pmfl, (char)ch); 11871 } 11872 } 11873 11874 STATIC char * 11875 S_scan_pat(pTHX_ char *start, I32 type) 11876 { 11877 dVAR; 11878 PMOP *pm; 11879 char *s = scan_str(start,!!PL_madskills,FALSE); 11880 const char * const valid_flags = 11881 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); 11882 #ifdef PERL_MAD 11883 char *modstart; 11884 #endif 11885 11886 PERL_ARGS_ASSERT_SCAN_PAT; 11887 11888 if (!s) { 11889 const char * const delimiter = skipspace(start); 11890 Perl_croak(aTHX_ 11891 (const char *) 11892 (*delimiter == '?' 11893 ? "Search pattern not terminated or ternary operator parsed as search pattern" 11894 : "Search pattern not terminated" )); 11895 } 11896 11897 pm = (PMOP*)newPMOP(type, 0); 11898 if (PL_multi_open == '?') { 11899 /* This is the only point in the code that sets PMf_ONCE: */ 11900 pm->op_pmflags |= PMf_ONCE; 11901 11902 /* Hence it's safe to do this bit of PMOP book-keeping here, which 11903 allows us to restrict the list needed by reset to just the ?? 11904 matches. */ 11905 assert(type != OP_TRANS); 11906 if (PL_curstash) { 11907 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab); 11908 U32 elements; 11909 if (!mg) { 11910 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0, 11911 0); 11912 } 11913 elements = mg->mg_len / sizeof(PMOP**); 11914 Renewc(mg->mg_ptr, elements + 1, PMOP*, char); 11915 ((PMOP**)mg->mg_ptr) [elements++] = pm; 11916 mg->mg_len = elements * sizeof(PMOP**); 11917 PmopSTASH_set(pm,PL_curstash); 11918 } 11919 } 11920 #ifdef PERL_MAD 11921 modstart = s; 11922 #endif 11923 while (*s && strchr(valid_flags, *s)) 11924 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++); 11925 #ifdef PERL_MAD 11926 if (PL_madskills && modstart != s) { 11927 SV* tmptoken = newSVpvn(modstart, s - modstart); 11928 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0); 11929 } 11930 #endif 11931 /* issue a warning if /c is specified,but /g is not */ 11932 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) 11933 { 11934 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), 11935 "Use of /c modifier is meaningless without /g" ); 11936 } 11937 11938 PL_lex_op = (OP*)pm; 11939 pl_yylval.ival = OP_MATCH; 11940 return s; 11941 } 11942 11943 STATIC char * 11944 S_scan_subst(pTHX_ char *start) 11945 { 11946 dVAR; 11947 register char *s; 11948 register PMOP *pm; 11949 I32 first_start; 11950 I32 es = 0; 11951 #ifdef PERL_MAD 11952 char *modstart; 11953 #endif 11954 11955 PERL_ARGS_ASSERT_SCAN_SUBST; 11956 11957 pl_yylval.ival = OP_NULL; 11958 11959 s = scan_str(start,!!PL_madskills,FALSE); 11960 11961 if (!s) 11962 Perl_croak(aTHX_ "Substitution pattern not terminated"); 11963 11964 if (s[-1] == PL_multi_open) 11965 s--; 11966 #ifdef PERL_MAD 11967 if (PL_madskills) { 11968 CURMAD('q', PL_thisopen); 11969 CURMAD('_', PL_thiswhite); 11970 CURMAD('E', PL_thisstuff); 11971 CURMAD('Q', PL_thisclose); 11972 PL_realtokenstart = s - SvPVX(PL_linestr); 11973 } 11974 #endif 11975 11976 first_start = PL_multi_start; 11977 s = scan_str(s,!!PL_madskills,FALSE); 11978 if (!s) { 11979 if (PL_lex_stuff) { 11980 SvREFCNT_dec(PL_lex_stuff); 11981 PL_lex_stuff = NULL; 11982 } 11983 Perl_croak(aTHX_ "Substitution replacement not terminated"); 11984 } 11985 PL_multi_start = first_start; /* so whole substitution is taken together */ 11986 11987 pm = (PMOP*)newPMOP(OP_SUBST, 0); 11988 11989 #ifdef PERL_MAD 11990 if (PL_madskills) { 11991 CURMAD('z', PL_thisopen); 11992 CURMAD('R', PL_thisstuff); 11993 CURMAD('Z', PL_thisclose); 11994 } 11995 modstart = s; 11996 #endif 11997 11998 while (*s) { 11999 if (*s == EXEC_PAT_MOD) { 12000 s++; 12001 es++; 12002 } 12003 else if (strchr(S_PAT_MODS, *s)) 12004 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++); 12005 else 12006 break; 12007 } 12008 12009 #ifdef PERL_MAD 12010 if (PL_madskills) { 12011 if (modstart != s) 12012 curmad('m', newSVpvn(modstart, s - modstart)); 12013 append_madprops(PL_thismad, (OP*)pm, 0); 12014 PL_thismad = 0; 12015 } 12016 #endif 12017 if ((pm->op_pmflags & PMf_CONTINUE)) { 12018 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); 12019 } 12020 12021 if (es) { 12022 SV * const repl = newSVpvs(""); 12023 12024 PL_sublex_info.super_bufptr = s; 12025 PL_sublex_info.super_bufend = PL_bufend; 12026 PL_multi_end = 0; 12027 pm->op_pmflags |= PMf_EVAL; 12028 while (es-- > 0) { 12029 if (es) 12030 sv_catpvs(repl, "eval "); 12031 else 12032 sv_catpvs(repl, "do "); 12033 } 12034 sv_catpvs(repl, "{"); 12035 sv_catsv(repl, PL_lex_repl); 12036 if (strchr(SvPVX(PL_lex_repl), '#')) 12037 sv_catpvs(repl, "\n"); 12038 sv_catpvs(repl, "}"); 12039 SvEVALED_on(repl); 12040 SvREFCNT_dec(PL_lex_repl); 12041 PL_lex_repl = repl; 12042 } 12043 12044 PL_lex_op = (OP*)pm; 12045 pl_yylval.ival = OP_SUBST; 12046 return s; 12047 } 12048 12049 STATIC char * 12050 S_scan_trans(pTHX_ char *start) 12051 { 12052 dVAR; 12053 register char* s; 12054 OP *o; 12055 short *tbl; 12056 U8 squash; 12057 U8 del; 12058 U8 complement; 12059 #ifdef PERL_MAD 12060 char *modstart; 12061 #endif 12062 12063 PERL_ARGS_ASSERT_SCAN_TRANS; 12064 12065 pl_yylval.ival = OP_NULL; 12066 12067 s = scan_str(start,!!PL_madskills,FALSE); 12068 if (!s) 12069 Perl_croak(aTHX_ "Transliteration pattern not terminated"); 12070 12071 if (s[-1] == PL_multi_open) 12072 s--; 12073 #ifdef PERL_MAD 12074 if (PL_madskills) { 12075 CURMAD('q', PL_thisopen); 12076 CURMAD('_', PL_thiswhite); 12077 CURMAD('E', PL_thisstuff); 12078 CURMAD('Q', PL_thisclose); 12079 PL_realtokenstart = s - SvPVX(PL_linestr); 12080 } 12081 #endif 12082 12083 s = scan_str(s,!!PL_madskills,FALSE); 12084 if (!s) { 12085 if (PL_lex_stuff) { 12086 SvREFCNT_dec(PL_lex_stuff); 12087 PL_lex_stuff = NULL; 12088 } 12089 Perl_croak(aTHX_ "Transliteration replacement not terminated"); 12090 } 12091 if (PL_madskills) { 12092 CURMAD('z', PL_thisopen); 12093 CURMAD('R', PL_thisstuff); 12094 CURMAD('Z', PL_thisclose); 12095 } 12096 12097 complement = del = squash = 0; 12098 #ifdef PERL_MAD 12099 modstart = s; 12100 #endif 12101 while (1) { 12102 switch (*s) { 12103 case 'c': 12104 complement = OPpTRANS_COMPLEMENT; 12105 break; 12106 case 'd': 12107 del = OPpTRANS_DELETE; 12108 break; 12109 case 's': 12110 squash = OPpTRANS_SQUASH; 12111 break; 12112 default: 12113 goto no_more; 12114 } 12115 s++; 12116 } 12117 no_more: 12118 12119 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short)); 12120 o = newPVOP(OP_TRANS, 0, (char*)tbl); 12121 o->op_private &= ~OPpTRANS_ALL; 12122 o->op_private |= del|squash|complement| 12123 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)| 12124 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0); 12125 12126 PL_lex_op = o; 12127 pl_yylval.ival = OP_TRANS; 12128 12129 #ifdef PERL_MAD 12130 if (PL_madskills) { 12131 if (modstart != s) 12132 curmad('m', newSVpvn(modstart, s - modstart)); 12133 append_madprops(PL_thismad, o, 0); 12134 PL_thismad = 0; 12135 } 12136 #endif 12137 12138 return s; 12139 } 12140 12141 STATIC char * 12142 S_scan_heredoc(pTHX_ register char *s) 12143 { 12144 dVAR; 12145 SV *herewas; 12146 I32 op_type = OP_SCALAR; 12147 I32 len; 12148 SV *tmpstr; 12149 char term; 12150 const char *found_newline; 12151 register char *d; 12152 register char *e; 12153 char *peek; 12154 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR)); 12155 #ifdef PERL_MAD 12156 I32 stuffstart = s - SvPVX(PL_linestr); 12157 char *tstart; 12158 12159 PL_realtokenstart = -1; 12160 #endif 12161 12162 PERL_ARGS_ASSERT_SCAN_HEREDOC; 12163 12164 s += 2; 12165 d = PL_tokenbuf; 12166 e = PL_tokenbuf + sizeof PL_tokenbuf - 1; 12167 if (!outer) 12168 *d++ = '\n'; 12169 peek = s; 12170 while (SPACE_OR_TAB(*peek)) 12171 peek++; 12172 if (*peek == '`' || *peek == '\'' || *peek =='"') { 12173 s = peek; 12174 term = *s++; 12175 s = delimcpy(d, e, s, PL_bufend, term, &len); 12176 d += len; 12177 if (s < PL_bufend) 12178 s++; 12179 } 12180 else { 12181 if (*s == '\\') 12182 s++, term = '\''; 12183 else 12184 term = '"'; 12185 if (!isALNUM_lazy_if(s,UTF)) 12186 deprecate("bare << to mean <<\"\""); 12187 for (; isALNUM_lazy_if(s,UTF); s++) { 12188 if (d < e) 12189 *d++ = *s; 12190 } 12191 } 12192 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1) 12193 Perl_croak(aTHX_ "Delimiter for here document is too long"); 12194 *d++ = '\n'; 12195 *d = '\0'; 12196 len = d - PL_tokenbuf; 12197 12198 #ifdef PERL_MAD 12199 if (PL_madskills) { 12200 tstart = PL_tokenbuf + !outer; 12201 PL_thisclose = newSVpvn(tstart, len - !outer); 12202 tstart = SvPVX(PL_linestr) + stuffstart; 12203 PL_thisopen = newSVpvn(tstart, s - tstart); 12204 stuffstart = s - SvPVX(PL_linestr); 12205 } 12206 #endif 12207 #ifndef PERL_STRICT_CR 12208 d = strchr(s, '\r'); 12209 if (d) { 12210 char * const olds = s; 12211 s = d; 12212 while (s < PL_bufend) { 12213 if (*s == '\r') { 12214 *d++ = '\n'; 12215 if (*++s == '\n') 12216 s++; 12217 } 12218 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */ 12219 *d++ = *s++; 12220 s++; 12221 } 12222 else 12223 *d++ = *s++; 12224 } 12225 *d = '\0'; 12226 PL_bufend = d; 12227 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 12228 s = olds; 12229 } 12230 #endif 12231 #ifdef PERL_MAD 12232 found_newline = 0; 12233 #endif 12234 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) { 12235 herewas = newSVpvn(s,PL_bufend-s); 12236 } 12237 else { 12238 #ifdef PERL_MAD 12239 herewas = newSVpvn(s-1,found_newline-s+1); 12240 #else 12241 s--; 12242 herewas = newSVpvn(s,found_newline-s); 12243 #endif 12244 } 12245 #ifdef PERL_MAD 12246 if (PL_madskills) { 12247 tstart = SvPVX(PL_linestr) + stuffstart; 12248 if (PL_thisstuff) 12249 sv_catpvn(PL_thisstuff, tstart, s - tstart); 12250 else 12251 PL_thisstuff = newSVpvn(tstart, s - tstart); 12252 } 12253 #endif 12254 s += SvCUR(herewas); 12255 12256 #ifdef PERL_MAD 12257 stuffstart = s - SvPVX(PL_linestr); 12258 12259 if (found_newline) 12260 s--; 12261 #endif 12262 12263 tmpstr = newSV_type(SVt_PVIV); 12264 SvGROW(tmpstr, 80); 12265 if (term == '\'') { 12266 op_type = OP_CONST; 12267 SvIV_set(tmpstr, -1); 12268 } 12269 else if (term == '`') { 12270 op_type = OP_BACKTICK; 12271 SvIV_set(tmpstr, '\\'); 12272 } 12273 12274 CLINE; 12275 PL_multi_start = CopLINE(PL_curcop); 12276 PL_multi_open = PL_multi_close = '<'; 12277 term = *PL_tokenbuf; 12278 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) { 12279 char * const bufptr = PL_sublex_info.super_bufptr; 12280 char * const bufend = PL_sublex_info.super_bufend; 12281 char * const olds = s - SvCUR(herewas); 12282 s = strchr(bufptr, '\n'); 12283 if (!s) 12284 s = bufend; 12285 d = s; 12286 while (s < bufend && 12287 (*s != term || memNE(s,PL_tokenbuf,len)) ) { 12288 if (*s++ == '\n') 12289 CopLINE_inc(PL_curcop); 12290 } 12291 if (s >= bufend) { 12292 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 12293 missingterm(PL_tokenbuf); 12294 } 12295 sv_setpvn(herewas,bufptr,d-bufptr+1); 12296 sv_setpvn(tmpstr,d+1,s-d); 12297 s += len - 1; 12298 sv_catpvn(herewas,s,bufend-s); 12299 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char); 12300 12301 s = olds; 12302 goto retval; 12303 } 12304 else if (!outer) { 12305 d = s; 12306 while (s < PL_bufend && 12307 (*s != term || memNE(s,PL_tokenbuf,len)) ) { 12308 if (*s++ == '\n') 12309 CopLINE_inc(PL_curcop); 12310 } 12311 if (s >= PL_bufend) { 12312 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 12313 missingterm(PL_tokenbuf); 12314 } 12315 sv_setpvn(tmpstr,d+1,s-d); 12316 #ifdef PERL_MAD 12317 if (PL_madskills) { 12318 if (PL_thisstuff) 12319 sv_catpvn(PL_thisstuff, d + 1, s - d); 12320 else 12321 PL_thisstuff = newSVpvn(d + 1, s - d); 12322 stuffstart = s - SvPVX(PL_linestr); 12323 } 12324 #endif 12325 s += len - 1; 12326 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */ 12327 12328 sv_catpvn(herewas,s,PL_bufend-s); 12329 sv_setsv(PL_linestr,herewas); 12330 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr); 12331 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 12332 PL_last_lop = PL_last_uni = NULL; 12333 } 12334 else 12335 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */ 12336 while (s >= PL_bufend) { /* multiple line string? */ 12337 #ifdef PERL_MAD 12338 if (PL_madskills) { 12339 tstart = SvPVX(PL_linestr) + stuffstart; 12340 if (PL_thisstuff) 12341 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart); 12342 else 12343 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart); 12344 } 12345 #endif 12346 PL_bufptr = s; 12347 CopLINE_inc(PL_curcop); 12348 if (!outer || !lex_next_chunk(0)) { 12349 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 12350 missingterm(PL_tokenbuf); 12351 } 12352 CopLINE_dec(PL_curcop); 12353 s = PL_bufptr; 12354 #ifdef PERL_MAD 12355 stuffstart = s - SvPVX(PL_linestr); 12356 #endif 12357 CopLINE_inc(PL_curcop); 12358 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 12359 PL_last_lop = PL_last_uni = NULL; 12360 #ifndef PERL_STRICT_CR 12361 if (PL_bufend - PL_linestart >= 2) { 12362 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') || 12363 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r')) 12364 { 12365 PL_bufend[-2] = '\n'; 12366 PL_bufend--; 12367 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 12368 } 12369 else if (PL_bufend[-1] == '\r') 12370 PL_bufend[-1] = '\n'; 12371 } 12372 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') 12373 PL_bufend[-1] = '\n'; 12374 #endif 12375 if (*s == term && memEQ(s,PL_tokenbuf,len)) { 12376 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr); 12377 *(SvPVX(PL_linestr) + off ) = ' '; 12378 sv_catsv(PL_linestr,herewas); 12379 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 12380 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */ 12381 } 12382 else { 12383 s = PL_bufend; 12384 sv_catsv(tmpstr,PL_linestr); 12385 } 12386 } 12387 s++; 12388 retval: 12389 PL_multi_end = CopLINE(PL_curcop); 12390 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { 12391 SvPV_shrink_to_cur(tmpstr); 12392 } 12393 SvREFCNT_dec(herewas); 12394 if (!IN_BYTES) { 12395 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) 12396 SvUTF8_on(tmpstr); 12397 else if (PL_encoding) 12398 sv_recode_to_utf8(tmpstr, PL_encoding); 12399 } 12400 PL_lex_stuff = tmpstr; 12401 pl_yylval.ival = op_type; 12402 return s; 12403 } 12404 12405 /* scan_inputsymbol 12406 takes: current position in input buffer 12407 returns: new position in input buffer 12408 side-effects: pl_yylval and lex_op are set. 12409 12410 This code handles: 12411 12412 <> read from ARGV 12413 <FH> read from filehandle 12414 <pkg::FH> read from package qualified filehandle 12415 <pkg'FH> read from package qualified filehandle 12416 <$fh> read from filehandle in $fh 12417 <*.h> filename glob 12418 12419 */ 12420 12421 STATIC char * 12422 S_scan_inputsymbol(pTHX_ char *start) 12423 { 12424 dVAR; 12425 register char *s = start; /* current position in buffer */ 12426 char *end; 12427 I32 len; 12428 char *d = PL_tokenbuf; /* start of temp holding space */ 12429 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ 12430 12431 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; 12432 12433 end = strchr(s, '\n'); 12434 if (!end) 12435 end = PL_bufend; 12436 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ 12437 12438 /* die if we didn't have space for the contents of the <>, 12439 or if it didn't end, or if we see a newline 12440 */ 12441 12442 if (len >= (I32)sizeof PL_tokenbuf) 12443 Perl_croak(aTHX_ "Excessively long <> operator"); 12444 if (s >= end) 12445 Perl_croak(aTHX_ "Unterminated <> operator"); 12446 12447 s++; 12448 12449 /* check for <$fh> 12450 Remember, only scalar variables are interpreted as filehandles by 12451 this code. Anything more complex (e.g., <$fh{$num}>) will be 12452 treated as a glob() call. 12453 This code makes use of the fact that except for the $ at the front, 12454 a scalar variable and a filehandle look the same. 12455 */ 12456 if (*d == '$' && d[1]) d++; 12457 12458 /* allow <Pkg'VALUE> or <Pkg::VALUE> */ 12459 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':')) 12460 d++; 12461 12462 /* If we've tried to read what we allow filehandles to look like, and 12463 there's still text left, then it must be a glob() and not a getline. 12464 Use scan_str to pull out the stuff between the <> and treat it 12465 as nothing more than a string. 12466 */ 12467 12468 if (d - PL_tokenbuf != len) { 12469 pl_yylval.ival = OP_GLOB; 12470 s = scan_str(start,!!PL_madskills,FALSE); 12471 if (!s) 12472 Perl_croak(aTHX_ "Glob not terminated"); 12473 return s; 12474 } 12475 else { 12476 bool readline_overriden = FALSE; 12477 GV *gv_readline; 12478 GV **gvp; 12479 /* we're in a filehandle read situation */ 12480 d = PL_tokenbuf; 12481 12482 /* turn <> into <ARGV> */ 12483 if (!len) 12484 Copy("ARGV",d,5,char); 12485 12486 /* Check whether readline() is overriden */ 12487 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV); 12488 if ((gv_readline 12489 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)) 12490 || 12491 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE)) 12492 && (gv_readline = *gvp) && isGV_with_GP(gv_readline) 12493 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))) 12494 readline_overriden = TRUE; 12495 12496 /* if <$fh>, create the ops to turn the variable into a 12497 filehandle 12498 */ 12499 if (*d == '$') { 12500 /* try to find it in the pad for this block, otherwise find 12501 add symbol table ops 12502 */ 12503 const PADOFFSET tmp = pad_findmy(d, len, 0); 12504 if (tmp != NOT_IN_PAD) { 12505 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 12506 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 12507 HEK * const stashname = HvNAME_HEK(stash); 12508 SV * const sym = sv_2mortal(newSVhek(stashname)); 12509 sv_catpvs(sym, "::"); 12510 sv_catpv(sym, d+1); 12511 d = SvPVX(sym); 12512 goto intro_sym; 12513 } 12514 else { 12515 OP * const o = newOP(OP_PADSV, 0); 12516 o->op_targ = tmp; 12517 PL_lex_op = readline_overriden 12518 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 12519 append_elem(OP_LIST, o, 12520 newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) 12521 : (OP*)newUNOP(OP_READLINE, 0, o); 12522 } 12523 } 12524 else { 12525 GV *gv; 12526 ++d; 12527 intro_sym: 12528 gv = gv_fetchpv(d, 12529 (PL_in_eval 12530 ? (GV_ADDMULTI | GV_ADDINEVAL) 12531 : GV_ADDMULTI), 12532 SVt_PV); 12533 PL_lex_op = readline_overriden 12534 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 12535 append_elem(OP_LIST, 12536 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), 12537 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 12538 : (OP*)newUNOP(OP_READLINE, 0, 12539 newUNOP(OP_RV2SV, 0, 12540 newGVOP(OP_GV, 0, gv))); 12541 } 12542 if (!readline_overriden) 12543 PL_lex_op->op_flags |= OPf_SPECIAL; 12544 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ 12545 pl_yylval.ival = OP_NULL; 12546 } 12547 12548 /* If it's none of the above, it must be a literal filehandle 12549 (<Foo::BAR> or <FOO>) so build a simple readline OP */ 12550 else { 12551 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO); 12552 PL_lex_op = readline_overriden 12553 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 12554 append_elem(OP_LIST, 12555 newGVOP(OP_GV, 0, gv), 12556 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 12557 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv)); 12558 pl_yylval.ival = OP_NULL; 12559 } 12560 } 12561 12562 return s; 12563 } 12564 12565 12566 /* scan_str 12567 takes: start position in buffer 12568 keep_quoted preserve \ on the embedded delimiter(s) 12569 keep_delims preserve the delimiters around the string 12570 returns: position to continue reading from buffer 12571 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and 12572 updates the read buffer. 12573 12574 This subroutine pulls a string out of the input. It is called for: 12575 q single quotes q(literal text) 12576 ' single quotes 'literal text' 12577 qq double quotes qq(interpolate $here please) 12578 " double quotes "interpolate $here please" 12579 qx backticks qx(/bin/ls -l) 12580 ` backticks `/bin/ls -l` 12581 qw quote words @EXPORT_OK = qw( func() $spam ) 12582 m// regexp match m/this/ 12583 s/// regexp substitute s/this/that/ 12584 tr/// string transliterate tr/this/that/ 12585 y/// string transliterate y/this/that/ 12586 ($*@) sub prototypes sub foo ($) 12587 (stuff) sub attr parameters sub foo : attr(stuff) 12588 <> readline or globs <FOO>, <>, <$fh>, or <*.c> 12589 12590 In most of these cases (all but <>, patterns and transliterate) 12591 yylex() calls scan_str(). m// makes yylex() call scan_pat() which 12592 calls scan_str(). s/// makes yylex() call scan_subst() which calls 12593 scan_str(). tr/// and y/// make yylex() call scan_trans() which 12594 calls scan_str(). 12595 12596 It skips whitespace before the string starts, and treats the first 12597 character as the delimiter. If the delimiter is one of ([{< then 12598 the corresponding "close" character )]}> is used as the closing 12599 delimiter. It allows quoting of delimiters, and if the string has 12600 balanced delimiters ([{<>}]) it allows nesting. 12601 12602 On success, the SV with the resulting string is put into lex_stuff or, 12603 if that is already non-NULL, into lex_repl. The second case occurs only 12604 when parsing the RHS of the special constructs s/// and tr/// (y///). 12605 For convenience, the terminating delimiter character is stuffed into 12606 SvIVX of the SV. 12607 */ 12608 12609 STATIC char * 12610 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) 12611 { 12612 dVAR; 12613 SV *sv; /* scalar value: string */ 12614 const char *tmps; /* temp string, used for delimiter matching */ 12615 register char *s = start; /* current position in the buffer */ 12616 register char term; /* terminating character */ 12617 register char *to; /* current position in the sv's data */ 12618 I32 brackets = 1; /* bracket nesting level */ 12619 bool has_utf8 = FALSE; /* is there any utf8 content? */ 12620 I32 termcode; /* terminating char. code */ 12621 U8 termstr[UTF8_MAXBYTES]; /* terminating string */ 12622 STRLEN termlen; /* length of terminating string */ 12623 int last_off = 0; /* last position for nesting bracket */ 12624 #ifdef PERL_MAD 12625 int stuffstart; 12626 char *tstart; 12627 #endif 12628 12629 PERL_ARGS_ASSERT_SCAN_STR; 12630 12631 /* skip space before the delimiter */ 12632 if (isSPACE(*s)) { 12633 s = PEEKSPACE(s); 12634 } 12635 12636 #ifdef PERL_MAD 12637 if (PL_realtokenstart >= 0) { 12638 stuffstart = PL_realtokenstart; 12639 PL_realtokenstart = -1; 12640 } 12641 else 12642 stuffstart = start - SvPVX(PL_linestr); 12643 #endif 12644 /* mark where we are, in case we need to report errors */ 12645 CLINE; 12646 12647 /* after skipping whitespace, the next character is the terminator */ 12648 term = *s; 12649 if (!UTF) { 12650 termcode = termstr[0] = term; 12651 termlen = 1; 12652 } 12653 else { 12654 termcode = utf8_to_uvchr((U8*)s, &termlen); 12655 Copy(s, termstr, termlen, U8); 12656 if (!UTF8_IS_INVARIANT(term)) 12657 has_utf8 = TRUE; 12658 } 12659 12660 /* mark where we are */ 12661 PL_multi_start = CopLINE(PL_curcop); 12662 PL_multi_open = term; 12663 12664 /* find corresponding closing delimiter */ 12665 if (term && (tmps = strchr("([{< )]}> )]}>",term))) 12666 termcode = termstr[0] = term = tmps[5]; 12667 12668 PL_multi_close = term; 12669 12670 /* create a new SV to hold the contents. 79 is the SV's initial length. 12671 What a random number. */ 12672 sv = newSV_type(SVt_PVIV); 12673 SvGROW(sv, 80); 12674 SvIV_set(sv, termcode); 12675 (void)SvPOK_only(sv); /* validate pointer */ 12676 12677 /* move past delimiter and try to read a complete string */ 12678 if (keep_delims) 12679 sv_catpvn(sv, s, termlen); 12680 s += termlen; 12681 #ifdef PERL_MAD 12682 tstart = SvPVX(PL_linestr) + stuffstart; 12683 if (!PL_thisopen && !keep_delims) { 12684 PL_thisopen = newSVpvn(tstart, s - tstart); 12685 stuffstart = s - SvPVX(PL_linestr); 12686 } 12687 #endif 12688 for (;;) { 12689 if (PL_encoding && !UTF) { 12690 bool cont = TRUE; 12691 12692 while (cont) { 12693 int offset = s - SvPVX_const(PL_linestr); 12694 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, 12695 &offset, (char*)termstr, termlen); 12696 const char * const ns = SvPVX_const(PL_linestr) + offset; 12697 char * const svlast = SvEND(sv) - 1; 12698 12699 for (; s < ns; s++) { 12700 if (*s == '\n' && !PL_rsfp) 12701 CopLINE_inc(PL_curcop); 12702 } 12703 if (!found) 12704 goto read_more_line; 12705 else { 12706 /* handle quoted delimiters */ 12707 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') { 12708 const char *t; 12709 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';) 12710 t--; 12711 if ((svlast-1 - t) % 2) { 12712 if (!keep_quoted) { 12713 *(svlast-1) = term; 12714 *svlast = '\0'; 12715 SvCUR_set(sv, SvCUR(sv) - 1); 12716 } 12717 continue; 12718 } 12719 } 12720 if (PL_multi_open == PL_multi_close) { 12721 cont = FALSE; 12722 } 12723 else { 12724 const char *t; 12725 char *w; 12726 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) { 12727 /* At here, all closes are "was quoted" one, 12728 so we don't check PL_multi_close. */ 12729 if (*t == '\\') { 12730 if (!keep_quoted && *(t+1) == PL_multi_open) 12731 t++; 12732 else 12733 *w++ = *t++; 12734 } 12735 else if (*t == PL_multi_open) 12736 brackets++; 12737 12738 *w = *t; 12739 } 12740 if (w < t) { 12741 *w++ = term; 12742 *w = '\0'; 12743 SvCUR_set(sv, w - SvPVX_const(sv)); 12744 } 12745 last_off = w - SvPVX(sv); 12746 if (--brackets <= 0) 12747 cont = FALSE; 12748 } 12749 } 12750 } 12751 if (!keep_delims) { 12752 SvCUR_set(sv, SvCUR(sv) - 1); 12753 *SvEND(sv) = '\0'; 12754 } 12755 break; 12756 } 12757 12758 /* extend sv if need be */ 12759 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); 12760 /* set 'to' to the next character in the sv's string */ 12761 to = SvPVX(sv)+SvCUR(sv); 12762 12763 /* if open delimiter is the close delimiter read unbridle */ 12764 if (PL_multi_open == PL_multi_close) { 12765 for (; s < PL_bufend; s++,to++) { 12766 /* embedded newlines increment the current line number */ 12767 if (*s == '\n' && !PL_rsfp) 12768 CopLINE_inc(PL_curcop); 12769 /* handle quoted delimiters */ 12770 if (*s == '\\' && s+1 < PL_bufend && term != '\\') { 12771 if (!keep_quoted && s[1] == term) 12772 s++; 12773 /* any other quotes are simply copied straight through */ 12774 else 12775 *to++ = *s++; 12776 } 12777 /* terminate when run out of buffer (the for() condition), or 12778 have found the terminator */ 12779 else if (*s == term) { 12780 if (termlen == 1) 12781 break; 12782 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) 12783 break; 12784 } 12785 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 12786 has_utf8 = TRUE; 12787 *to = *s; 12788 } 12789 } 12790 12791 /* if the terminator isn't the same as the start character (e.g., 12792 matched brackets), we have to allow more in the quoting, and 12793 be prepared for nested brackets. 12794 */ 12795 else { 12796 /* read until we run out of string, or we find the terminator */ 12797 for (; s < PL_bufend; s++,to++) { 12798 /* embedded newlines increment the line count */ 12799 if (*s == '\n' && !PL_rsfp) 12800 CopLINE_inc(PL_curcop); 12801 /* backslashes can escape the open or closing characters */ 12802 if (*s == '\\' && s+1 < PL_bufend) { 12803 if (!keep_quoted && 12804 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close))) 12805 s++; 12806 else 12807 *to++ = *s++; 12808 } 12809 /* allow nested opens and closes */ 12810 else if (*s == PL_multi_close && --brackets <= 0) 12811 break; 12812 else if (*s == PL_multi_open) 12813 brackets++; 12814 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 12815 has_utf8 = TRUE; 12816 *to = *s; 12817 } 12818 } 12819 /* terminate the copied string and update the sv's end-of-string */ 12820 *to = '\0'; 12821 SvCUR_set(sv, to - SvPVX_const(sv)); 12822 12823 /* 12824 * this next chunk reads more into the buffer if we're not done yet 12825 */ 12826 12827 if (s < PL_bufend) 12828 break; /* handle case where we are done yet :-) */ 12829 12830 #ifndef PERL_STRICT_CR 12831 if (to - SvPVX_const(sv) >= 2) { 12832 if ((to[-2] == '\r' && to[-1] == '\n') || 12833 (to[-2] == '\n' && to[-1] == '\r')) 12834 { 12835 to[-2] = '\n'; 12836 to--; 12837 SvCUR_set(sv, to - SvPVX_const(sv)); 12838 } 12839 else if (to[-1] == '\r') 12840 to[-1] = '\n'; 12841 } 12842 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') 12843 to[-1] = '\n'; 12844 #endif 12845 12846 read_more_line: 12847 /* if we're out of file, or a read fails, bail and reset the current 12848 line marker so we can report where the unterminated string began 12849 */ 12850 #ifdef PERL_MAD 12851 if (PL_madskills) { 12852 char * const tstart = SvPVX(PL_linestr) + stuffstart; 12853 if (PL_thisstuff) 12854 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart); 12855 else 12856 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart); 12857 } 12858 #endif 12859 CopLINE_inc(PL_curcop); 12860 PL_bufptr = PL_bufend; 12861 if (!lex_next_chunk(0)) { 12862 sv_free(sv); 12863 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 12864 return NULL; 12865 } 12866 s = PL_bufptr; 12867 #ifdef PERL_MAD 12868 stuffstart = 0; 12869 #endif 12870 } 12871 12872 /* at this point, we have successfully read the delimited string */ 12873 12874 if (!PL_encoding || UTF) { 12875 #ifdef PERL_MAD 12876 if (PL_madskills) { 12877 char * const tstart = SvPVX(PL_linestr) + stuffstart; 12878 const int len = s - tstart; 12879 if (PL_thisstuff) 12880 sv_catpvn(PL_thisstuff, tstart, len); 12881 else 12882 PL_thisstuff = newSVpvn(tstart, len); 12883 if (!PL_thisclose && !keep_delims) 12884 PL_thisclose = newSVpvn(s,termlen); 12885 } 12886 #endif 12887 12888 if (keep_delims) 12889 sv_catpvn(sv, s, termlen); 12890 s += termlen; 12891 } 12892 #ifdef PERL_MAD 12893 else { 12894 if (PL_madskills) { 12895 char * const tstart = SvPVX(PL_linestr) + stuffstart; 12896 const int len = s - tstart - termlen; 12897 if (PL_thisstuff) 12898 sv_catpvn(PL_thisstuff, tstart, len); 12899 else 12900 PL_thisstuff = newSVpvn(tstart, len); 12901 if (!PL_thisclose && !keep_delims) 12902 PL_thisclose = newSVpvn(s - termlen,termlen); 12903 } 12904 } 12905 #endif 12906 if (has_utf8 || PL_encoding) 12907 SvUTF8_on(sv); 12908 12909 PL_multi_end = CopLINE(PL_curcop); 12910 12911 /* if we allocated too much space, give some back */ 12912 if (SvCUR(sv) + 5 < SvLEN(sv)) { 12913 SvLEN_set(sv, SvCUR(sv) + 1); 12914 SvPV_renew(sv, SvLEN(sv)); 12915 } 12916 12917 /* decide whether this is the first or second quoted string we've read 12918 for this op 12919 */ 12920 12921 if (PL_lex_stuff) 12922 PL_lex_repl = sv; 12923 else 12924 PL_lex_stuff = sv; 12925 return s; 12926 } 12927 12928 /* 12929 scan_num 12930 takes: pointer to position in buffer 12931 returns: pointer to new position in buffer 12932 side-effects: builds ops for the constant in pl_yylval.op 12933 12934 Read a number in any of the formats that Perl accepts: 12935 12936 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. 12937 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 12938 0b[01](_?[01])* 12939 0[0-7](_?[0-7])* 12940 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* 12941 12942 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the 12943 thing it reads. 12944 12945 If it reads a number without a decimal point or an exponent, it will 12946 try converting the number to an integer and see if it can do so 12947 without loss of precision. 12948 */ 12949 12950 char * 12951 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) 12952 { 12953 dVAR; 12954 register const char *s = start; /* current position in buffer */ 12955 register char *d; /* destination in temp buffer */ 12956 register char *e; /* end of temp buffer */ 12957 NV nv; /* number read, as a double */ 12958 SV *sv = NULL; /* place to put the converted number */ 12959 bool floatit; /* boolean: int or float? */ 12960 const char *lastub = NULL; /* position of last underbar */ 12961 static char const number_too_long[] = "Number too long"; 12962 12963 PERL_ARGS_ASSERT_SCAN_NUM; 12964 12965 /* We use the first character to decide what type of number this is */ 12966 12967 switch (*s) { 12968 default: 12969 Perl_croak(aTHX_ "panic: scan_num"); 12970 12971 /* if it starts with a 0, it could be an octal number, a decimal in 12972 0.13 disguise, or a hexadecimal number, or a binary number. */ 12973 case '0': 12974 { 12975 /* variables: 12976 u holds the "number so far" 12977 shift the power of 2 of the base 12978 (hex == 4, octal == 3, binary == 1) 12979 overflowed was the number more than we can hold? 12980 12981 Shift is used when we add a digit. It also serves as an "are 12982 we in octal/hex/binary?" indicator to disallow hex characters 12983 when in octal mode. 12984 */ 12985 NV n = 0.0; 12986 UV u = 0; 12987 I32 shift; 12988 bool overflowed = FALSE; 12989 bool just_zero = TRUE; /* just plain 0 or binary number? */ 12990 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 }; 12991 static const char* const bases[5] = 12992 { "", "binary", "", "octal", "hexadecimal" }; 12993 static const char* const Bases[5] = 12994 { "", "Binary", "", "Octal", "Hexadecimal" }; 12995 static const char* const maxima[5] = 12996 { "", 12997 "0b11111111111111111111111111111111", 12998 "", 12999 "037777777777", 13000 "0xffffffff" }; 13001 const char *base, *Base, *max; 13002 13003 /* check for hex */ 13004 if (s[1] == 'x') { 13005 shift = 4; 13006 s += 2; 13007 just_zero = FALSE; 13008 } else if (s[1] == 'b') { 13009 shift = 1; 13010 s += 2; 13011 just_zero = FALSE; 13012 } 13013 /* check for a decimal in disguise */ 13014 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E') 13015 goto decimal; 13016 /* so it must be octal */ 13017 else { 13018 shift = 3; 13019 s++; 13020 } 13021 13022 if (*s == '_') { 13023 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 13024 "Misplaced _ in number"); 13025 lastub = s++; 13026 } 13027 13028 base = bases[shift]; 13029 Base = Bases[shift]; 13030 max = maxima[shift]; 13031 13032 /* read the rest of the number */ 13033 for (;;) { 13034 /* x is used in the overflow test, 13035 b is the digit we're adding on. */ 13036 UV x, b; 13037 13038 switch (*s) { 13039 13040 /* if we don't mention it, we're done */ 13041 default: 13042 goto out; 13043 13044 /* _ are ignored -- but warned about if consecutive */ 13045 case '_': 13046 if (lastub && s == lastub + 1) 13047 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 13048 "Misplaced _ in number"); 13049 lastub = s++; 13050 break; 13051 13052 /* 8 and 9 are not octal */ 13053 case '8': case '9': 13054 if (shift == 3) 13055 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s)); 13056 /* FALL THROUGH */ 13057 13058 /* octal digits */ 13059 case '2': case '3': case '4': 13060 case '5': case '6': case '7': 13061 if (shift == 1) 13062 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s)); 13063 /* FALL THROUGH */ 13064 13065 case '0': case '1': 13066 b = *s++ & 15; /* ASCII digit -> value of digit */ 13067 goto digit; 13068 13069 /* hex digits */ 13070 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 13071 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 13072 /* make sure they said 0x */ 13073 if (shift != 4) 13074 goto out; 13075 b = (*s++ & 7) + 9; 13076 13077 /* Prepare to put the digit we have onto the end 13078 of the number so far. We check for overflows. 13079 */ 13080 13081 digit: 13082 just_zero = FALSE; 13083 if (!overflowed) { 13084 x = u << shift; /* make room for the digit */ 13085 13086 if ((x >> shift) != u 13087 && !(PL_hints & HINT_NEW_BINARY)) { 13088 overflowed = TRUE; 13089 n = (NV) u; 13090 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 13091 "Integer overflow in %s number", 13092 base); 13093 } else 13094 u = x | b; /* add the digit to the end */ 13095 } 13096 if (overflowed) { 13097 n *= nvshift[shift]; 13098 /* If an NV has not enough bits in its 13099 * mantissa to represent an UV this summing of 13100 * small low-order numbers is a waste of time 13101 * (because the NV cannot preserve the 13102 * low-order bits anyway): we could just 13103 * remember when did we overflow and in the 13104 * end just multiply n by the right 13105 * amount. */ 13106 n += (NV) b; 13107 } 13108 break; 13109 } 13110 } 13111 13112 /* if we get here, we had success: make a scalar value from 13113 the number. 13114 */ 13115 out: 13116 13117 /* final misplaced underbar check */ 13118 if (s[-1] == '_') { 13119 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); 13120 } 13121 13122 sv = newSV(0); 13123 if (overflowed) { 13124 if (n > 4294967295.0) 13125 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 13126 "%s number > %s non-portable", 13127 Base, max); 13128 sv_setnv(sv, n); 13129 } 13130 else { 13131 #if UVSIZE > 4 13132 if (u > 0xffffffff) 13133 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 13134 "%s number > %s non-portable", 13135 Base, max); 13136 #endif 13137 sv_setuv(sv, u); 13138 } 13139 if (just_zero && (PL_hints & HINT_NEW_INTEGER)) 13140 sv = new_constant(start, s - start, "integer", 13141 sv, NULL, NULL, 0); 13142 else if (PL_hints & HINT_NEW_BINARY) 13143 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0); 13144 } 13145 break; 13146 13147 /* 13148 handle decimal numbers. 13149 we're also sent here when we read a 0 as the first digit 13150 */ 13151 case '1': case '2': case '3': case '4': case '5': 13152 case '6': case '7': case '8': case '9': case '.': 13153 decimal: 13154 d = PL_tokenbuf; 13155 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ 13156 floatit = FALSE; 13157 13158 /* read next group of digits and _ and copy into d */ 13159 while (isDIGIT(*s) || *s == '_') { 13160 /* skip underscores, checking for misplaced ones 13161 if -w is on 13162 */ 13163 if (*s == '_') { 13164 if (lastub && s == lastub + 1) 13165 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 13166 "Misplaced _ in number"); 13167 lastub = s++; 13168 } 13169 else { 13170 /* check for end of fixed-length buffer */ 13171 if (d >= e) 13172 Perl_croak(aTHX_ number_too_long); 13173 /* if we're ok, copy the character */ 13174 *d++ = *s++; 13175 } 13176 } 13177 13178 /* final misplaced underbar check */ 13179 if (lastub && s == lastub + 1) { 13180 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); 13181 } 13182 13183 /* read a decimal portion if there is one. avoid 13184 3..5 being interpreted as the number 3. followed 13185 by .5 13186 */ 13187 if (*s == '.' && s[1] != '.') { 13188 floatit = TRUE; 13189 *d++ = *s++; 13190 13191 if (*s == '_') { 13192 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 13193 "Misplaced _ in number"); 13194 lastub = s; 13195 } 13196 13197 /* copy, ignoring underbars, until we run out of digits. 13198 */ 13199 for (; isDIGIT(*s) || *s == '_'; s++) { 13200 /* fixed length buffer check */ 13201 if (d >= e) 13202 Perl_croak(aTHX_ number_too_long); 13203 if (*s == '_') { 13204 if (lastub && s == lastub + 1) 13205 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 13206 "Misplaced _ in number"); 13207 lastub = s; 13208 } 13209 else 13210 *d++ = *s; 13211 } 13212 /* fractional part ending in underbar? */ 13213 if (s[-1] == '_') { 13214 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 13215 "Misplaced _ in number"); 13216 } 13217 if (*s == '.' && isDIGIT(s[1])) { 13218 /* oops, it's really a v-string, but without the "v" */ 13219 s = start; 13220 goto vstring; 13221 } 13222 } 13223 13224 /* read exponent part, if present */ 13225 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) { 13226 floatit = TRUE; 13227 s++; 13228 13229 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */ 13230 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */ 13231 13232 /* stray preinitial _ */ 13233 if (*s == '_') { 13234 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 13235 "Misplaced _ in number"); 13236 lastub = s++; 13237 } 13238 13239 /* allow positive or negative exponent */ 13240 if (*s == '+' || *s == '-') 13241 *d++ = *s++; 13242 13243 /* stray initial _ */ 13244 if (*s == '_') { 13245 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 13246 "Misplaced _ in number"); 13247 lastub = s++; 13248 } 13249 13250 /* read digits of exponent */ 13251 while (isDIGIT(*s) || *s == '_') { 13252 if (isDIGIT(*s)) { 13253 if (d >= e) 13254 Perl_croak(aTHX_ number_too_long); 13255 *d++ = *s++; 13256 } 13257 else { 13258 if (((lastub && s == lastub + 1) || 13259 (!isDIGIT(s[1]) && s[1] != '_'))) 13260 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 13261 "Misplaced _ in number"); 13262 lastub = s++; 13263 } 13264 } 13265 } 13266 13267 13268 /* make an sv from the string */ 13269 sv = newSV(0); 13270 13271 /* 13272 We try to do an integer conversion first if no characters 13273 indicating "float" have been found. 13274 */ 13275 13276 if (!floatit) { 13277 UV uv; 13278 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); 13279 13280 if (flags == IS_NUMBER_IN_UV) { 13281 if (uv <= IV_MAX) 13282 sv_setiv(sv, uv); /* Prefer IVs over UVs. */ 13283 else 13284 sv_setuv(sv, uv); 13285 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) { 13286 if (uv <= (UV) IV_MIN) 13287 sv_setiv(sv, -(IV)uv); 13288 else 13289 floatit = TRUE; 13290 } else 13291 floatit = TRUE; 13292 } 13293 if (floatit) { 13294 /* terminate the string */ 13295 *d = '\0'; 13296 nv = Atof(PL_tokenbuf); 13297 sv_setnv(sv, nv); 13298 } 13299 13300 if ( floatit 13301 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) { 13302 const char *const key = floatit ? "float" : "integer"; 13303 const STRLEN keylen = floatit ? 5 : 7; 13304 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf, 13305 key, keylen, sv, NULL, NULL, 0); 13306 } 13307 break; 13308 13309 /* if it starts with a v, it could be a v-string */ 13310 case 'v': 13311 vstring: 13312 sv = newSV(5); /* preallocate storage space */ 13313 s = scan_vstring(s, PL_bufend, sv); 13314 break; 13315 } 13316 13317 /* make the op for the constant and return */ 13318 13319 if (sv) 13320 lvalp->opval = newSVOP(OP_CONST, 0, sv); 13321 else 13322 lvalp->opval = NULL; 13323 13324 return (char *)s; 13325 } 13326 13327 STATIC char * 13328 S_scan_formline(pTHX_ register char *s) 13329 { 13330 dVAR; 13331 register char *eol; 13332 register char *t; 13333 SV * const stuff = newSVpvs(""); 13334 bool needargs = FALSE; 13335 bool eofmt = FALSE; 13336 #ifdef PERL_MAD 13337 char *tokenstart = s; 13338 SV* savewhite = NULL; 13339 13340 if (PL_madskills) { 13341 savewhite = PL_thiswhite; 13342 PL_thiswhite = 0; 13343 } 13344 #endif 13345 13346 PERL_ARGS_ASSERT_SCAN_FORMLINE; 13347 13348 while (!needargs) { 13349 if (*s == '.') { 13350 t = s+1; 13351 #ifdef PERL_STRICT_CR 13352 while (SPACE_OR_TAB(*t)) 13353 t++; 13354 #else 13355 while (SPACE_OR_TAB(*t) || *t == '\r') 13356 t++; 13357 #endif 13358 if (*t == '\n' || t == PL_bufend) { 13359 eofmt = TRUE; 13360 break; 13361 } 13362 } 13363 if (PL_in_eval && !PL_rsfp) { 13364 eol = (char *) memchr(s,'\n',PL_bufend-s); 13365 if (!eol++) 13366 eol = PL_bufend; 13367 } 13368 else 13369 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 13370 if (*s != '#') { 13371 for (t = s; t < eol; t++) { 13372 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { 13373 needargs = FALSE; 13374 goto enough; /* ~~ must be first line in formline */ 13375 } 13376 if (*t == '@' || *t == '^') 13377 needargs = TRUE; 13378 } 13379 if (eol > s) { 13380 sv_catpvn(stuff, s, eol-s); 13381 #ifndef PERL_STRICT_CR 13382 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { 13383 char *end = SvPVX(stuff) + SvCUR(stuff); 13384 end[-2] = '\n'; 13385 end[-1] = '\0'; 13386 SvCUR_set(stuff, SvCUR(stuff) - 1); 13387 } 13388 #endif 13389 } 13390 else 13391 break; 13392 } 13393 s = (char*)eol; 13394 if (PL_rsfp) { 13395 bool got_some; 13396 #ifdef PERL_MAD 13397 if (PL_madskills) { 13398 if (PL_thistoken) 13399 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart); 13400 else 13401 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart); 13402 } 13403 #endif 13404 PL_bufptr = PL_bufend; 13405 CopLINE_inc(PL_curcop); 13406 got_some = lex_next_chunk(0); 13407 CopLINE_dec(PL_curcop); 13408 s = PL_bufptr; 13409 #ifdef PERL_MAD 13410 tokenstart = PL_bufptr; 13411 #endif 13412 if (!got_some) 13413 break; 13414 } 13415 incline(s); 13416 } 13417 enough: 13418 if (SvCUR(stuff)) { 13419 PL_expect = XTERM; 13420 if (needargs) { 13421 PL_lex_state = LEX_NORMAL; 13422 start_force(PL_curforce); 13423 NEXTVAL_NEXTTOKE.ival = 0; 13424 force_next(','); 13425 } 13426 else 13427 PL_lex_state = LEX_FORMLINE; 13428 if (!IN_BYTES) { 13429 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) 13430 SvUTF8_on(stuff); 13431 else if (PL_encoding) 13432 sv_recode_to_utf8(stuff, PL_encoding); 13433 } 13434 start_force(PL_curforce); 13435 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff); 13436 force_next(THING); 13437 start_force(PL_curforce); 13438 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE; 13439 force_next(LSTOP); 13440 } 13441 else { 13442 SvREFCNT_dec(stuff); 13443 if (eofmt) 13444 PL_lex_formbrack = 0; 13445 PL_bufptr = s; 13446 } 13447 #ifdef PERL_MAD 13448 if (PL_madskills) { 13449 if (PL_thistoken) 13450 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart); 13451 else 13452 PL_thistoken = newSVpvn(tokenstart, s - tokenstart); 13453 PL_thiswhite = savewhite; 13454 } 13455 #endif 13456 return s; 13457 } 13458 13459 I32 13460 Perl_start_subparse(pTHX_ I32 is_format, U32 flags) 13461 { 13462 dVAR; 13463 const I32 oldsavestack_ix = PL_savestack_ix; 13464 CV* const outsidecv = PL_compcv; 13465 13466 if (PL_compcv) { 13467 assert(SvTYPE(PL_compcv) == SVt_PVCV); 13468 } 13469 SAVEI32(PL_subline); 13470 save_item(PL_subname); 13471 SAVESPTR(PL_compcv); 13472 13473 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV)); 13474 CvFLAGS(PL_compcv) |= flags; 13475 13476 PL_subline = CopLINE(PL_curcop); 13477 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); 13478 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); 13479 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; 13480 13481 return oldsavestack_ix; 13482 } 13483 13484 #ifdef __SC__ 13485 #pragma segment Perl_yylex 13486 #endif 13487 static int 13488 S_yywarn(pTHX_ const char *const s) 13489 { 13490 dVAR; 13491 13492 PERL_ARGS_ASSERT_YYWARN; 13493 13494 PL_in_eval |= EVAL_WARNONLY; 13495 yyerror(s); 13496 PL_in_eval &= ~EVAL_WARNONLY; 13497 return 0; 13498 } 13499 13500 int 13501 Perl_yyerror(pTHX_ const char *const s) 13502 { 13503 dVAR; 13504 const char *where = NULL; 13505 const char *context = NULL; 13506 int contlen = -1; 13507 SV *msg; 13508 int yychar = PL_parser->yychar; 13509 13510 PERL_ARGS_ASSERT_YYERROR; 13511 13512 if (!yychar || (yychar == ';' && !PL_rsfp)) 13513 where = "at EOF"; 13514 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr && 13515 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr && 13516 PL_oldbufptr != PL_bufptr) { 13517 /* 13518 Only for NetWare: 13519 The code below is removed for NetWare because it abends/crashes on NetWare 13520 when the script has error such as not having the closing quotes like: 13521 if ($var eq "value) 13522 Checking of white spaces is anyway done in NetWare code. 13523 */ 13524 #ifndef NETWARE 13525 while (isSPACE(*PL_oldoldbufptr)) 13526 PL_oldoldbufptr++; 13527 #endif 13528 context = PL_oldoldbufptr; 13529 contlen = PL_bufptr - PL_oldoldbufptr; 13530 } 13531 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr && 13532 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) { 13533 /* 13534 Only for NetWare: 13535 The code below is removed for NetWare because it abends/crashes on NetWare 13536 when the script has error such as not having the closing quotes like: 13537 if ($var eq "value) 13538 Checking of white spaces is anyway done in NetWare code. 13539 */ 13540 #ifndef NETWARE 13541 while (isSPACE(*PL_oldbufptr)) 13542 PL_oldbufptr++; 13543 #endif 13544 context = PL_oldbufptr; 13545 contlen = PL_bufptr - PL_oldbufptr; 13546 } 13547 else if (yychar > 255) 13548 where = "next token ???"; 13549 else if (yychar == -2) { /* YYEMPTY */ 13550 if (PL_lex_state == LEX_NORMAL || 13551 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL)) 13552 where = "at end of line"; 13553 else if (PL_lex_inpat) 13554 where = "within pattern"; 13555 else 13556 where = "within string"; 13557 } 13558 else { 13559 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP); 13560 if (yychar < 32) 13561 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); 13562 else if (isPRINT_LC(yychar)) { 13563 const char string = yychar; 13564 sv_catpvn(where_sv, &string, 1); 13565 } 13566 else 13567 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); 13568 where = SvPVX_const(where_sv); 13569 } 13570 msg = sv_2mortal(newSVpv(s, 0)); 13571 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ", 13572 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop)); 13573 if (context) 13574 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context); 13575 else 13576 Perl_sv_catpvf(aTHX_ msg, "%s\n", where); 13577 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) { 13578 Perl_sv_catpvf(aTHX_ msg, 13579 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n", 13580 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); 13581 PL_multi_end = 0; 13582 } 13583 if (PL_in_eval & EVAL_WARNONLY) { 13584 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); 13585 } 13586 else 13587 qerror(msg); 13588 if (PL_error_count >= 10) { 13589 if (PL_in_eval && SvCUR(ERRSV)) 13590 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n", 13591 SVfARG(ERRSV), OutCopFILE(PL_curcop)); 13592 else 13593 Perl_croak(aTHX_ "%s has too many errors.\n", 13594 OutCopFILE(PL_curcop)); 13595 } 13596 PL_in_my = 0; 13597 PL_in_my_stash = NULL; 13598 return 0; 13599 } 13600 #ifdef __SC__ 13601 #pragma segment Main 13602 #endif 13603 13604 STATIC char* 13605 S_swallow_bom(pTHX_ U8 *s) 13606 { 13607 dVAR; 13608 const STRLEN slen = SvCUR(PL_linestr); 13609 13610 PERL_ARGS_ASSERT_SWALLOW_BOM; 13611 13612 switch (s[0]) { 13613 case 0xFF: 13614 if (s[1] == 0xFE) { 13615 /* UTF-16 little-endian? (or UTF-32LE?) */ 13616 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ 13617 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); 13618 #ifndef PERL_NO_UTF16_FILTER 13619 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); 13620 s += 2; 13621 if (PL_bufend > (char*)s) { 13622 s = add_utf16_textfilter(s, TRUE); 13623 } 13624 #else 13625 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 13626 #endif 13627 } 13628 break; 13629 case 0xFE: 13630 if (s[1] == 0xFF) { /* UTF-16 big-endian? */ 13631 #ifndef PERL_NO_UTF16_FILTER 13632 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); 13633 s += 2; 13634 if (PL_bufend > (char *)s) { 13635 s = add_utf16_textfilter(s, FALSE); 13636 } 13637 #else 13638 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 13639 #endif 13640 } 13641 break; 13642 case 0xEF: 13643 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) { 13644 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); 13645 s += 3; /* UTF-8 */ 13646 } 13647 break; 13648 case 0: 13649 if (slen > 3) { 13650 if (s[1] == 0) { 13651 if (s[2] == 0xFE && s[3] == 0xFF) { 13652 /* UTF-32 big-endian */ 13653 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE"); 13654 } 13655 } 13656 else if (s[2] == 0 && s[3] != 0) { 13657 /* Leading bytes 13658 * 00 xx 00 xx 13659 * are a good indicator of UTF-16BE. */ 13660 #ifndef PERL_NO_UTF16_FILTER 13661 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); 13662 s = add_utf16_textfilter(s, FALSE); 13663 #else 13664 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 13665 #endif 13666 } 13667 } 13668 #ifdef EBCDIC 13669 case 0xDD: 13670 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) { 13671 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); 13672 s += 4; /* UTF-8 */ 13673 } 13674 break; 13675 #endif 13676 13677 default: 13678 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { 13679 /* Leading bytes 13680 * xx 00 xx 00 13681 * are a good indicator of UTF-16LE. */ 13682 #ifndef PERL_NO_UTF16_FILTER 13683 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); 13684 s = add_utf16_textfilter(s, TRUE); 13685 #else 13686 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 13687 #endif 13688 } 13689 } 13690 return (char*)s; 13691 } 13692 13693 13694 #ifndef PERL_NO_UTF16_FILTER 13695 static I32 13696 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) 13697 { 13698 dVAR; 13699 SV *const filter = FILTER_DATA(idx); 13700 /* We re-use this each time round, throwing the contents away before we 13701 return. */ 13702 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter)); 13703 SV *const utf8_buffer = filter; 13704 IV status = IoPAGE(filter); 13705 const bool reverse = (bool) IoLINES(filter); 13706 I32 retval; 13707 13708 /* As we're automatically added, at the lowest level, and hence only called 13709 from this file, we can be sure that we're not called in block mode. Hence 13710 don't bother writing code to deal with block mode. */ 13711 if (maxlen) { 13712 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); 13713 } 13714 if (status < 0) { 13715 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status); 13716 } 13717 DEBUG_P(PerlIO_printf(Perl_debug_log, 13718 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", 13719 FPTR2DPTR(void *, S_utf16_textfilter), 13720 reverse ? 'l' : 'b', idx, maxlen, status, 13721 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 13722 13723 while (1) { 13724 STRLEN chars; 13725 STRLEN have; 13726 I32 newlen; 13727 U8 *end; 13728 /* First, look in our buffer of existing UTF-8 data: */ 13729 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); 13730 13731 if (nl) { 13732 ++nl; 13733 } else if (status == 0) { 13734 /* EOF */ 13735 IoPAGE(filter) = 0; 13736 nl = SvEND(utf8_buffer); 13737 } 13738 if (nl) { 13739 STRLEN got = nl - SvPVX(utf8_buffer); 13740 /* Did we have anything to append? */ 13741 retval = got != 0; 13742 sv_catpvn(sv, SvPVX(utf8_buffer), got); 13743 /* Everything else in this code works just fine if SVp_POK isn't 13744 set. This, however, needs it, and we need it to work, else 13745 we loop infinitely because the buffer is never consumed. */ 13746 sv_chop(utf8_buffer, nl); 13747 break; 13748 } 13749 13750 /* OK, not a complete line there, so need to read some more UTF-16. 13751 Read an extra octect if the buffer currently has an odd number. */ 13752 while (1) { 13753 if (status <= 0) 13754 break; 13755 if (SvCUR(utf16_buffer) >= 2) { 13756 /* Location of the high octet of the last complete code point. 13757 Gosh, UTF-16 is a pain. All the benefits of variable length, 13758 *coupled* with all the benefits of partial reads and 13759 endianness. */ 13760 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) 13761 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); 13762 13763 if (*last_hi < 0xd8 || *last_hi > 0xdb) { 13764 break; 13765 } 13766 13767 /* We have the first half of a surrogate. Read more. */ 13768 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); 13769 } 13770 13771 status = FILTER_READ(idx + 1, utf16_buffer, 13772 160 + (SvCUR(utf16_buffer) & 1)); 13773 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer))); 13774 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); 13775 if (status < 0) { 13776 /* Error */ 13777 IoPAGE(filter) = status; 13778 return status; 13779 } 13780 } 13781 13782 chars = SvCUR(utf16_buffer) >> 1; 13783 have = SvCUR(utf8_buffer); 13784 SvGROW(utf8_buffer, have + chars * 3 + 1); 13785 13786 if (reverse) { 13787 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), 13788 (U8*)SvPVX_const(utf8_buffer) + have, 13789 chars * 2, &newlen); 13790 } else { 13791 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), 13792 (U8*)SvPVX_const(utf8_buffer) + have, 13793 chars * 2, &newlen); 13794 } 13795 SvCUR_set(utf8_buffer, have + newlen); 13796 *end = '\0'; 13797 13798 /* No need to keep this SV "well-formed" with a '\0' after the end, as 13799 it's private to us, and utf16_to_utf8{,reversed} take a 13800 (pointer,length) pair, rather than a NUL-terminated string. */ 13801 if(SvCUR(utf16_buffer) & 1) { 13802 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; 13803 SvCUR_set(utf16_buffer, 1); 13804 } else { 13805 SvCUR_set(utf16_buffer, 0); 13806 } 13807 } 13808 DEBUG_P(PerlIO_printf(Perl_debug_log, 13809 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", 13810 status, 13811 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 13812 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);}); 13813 return retval; 13814 } 13815 13816 static U8 * 13817 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) 13818 { 13819 SV *filter = filter_add(S_utf16_textfilter, NULL); 13820 13821 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s)); 13822 sv_setpvs(filter, ""); 13823 IoLINES(filter) = reversed; 13824 IoPAGE(filter) = 1; /* Not EOF */ 13825 13826 /* Sadly, we have to return a valid pointer, come what may, so we have to 13827 ignore any error return from this. */ 13828 SvCUR_set(PL_linestr, 0); 13829 if (FILTER_READ(0, PL_linestr, 0)) { 13830 SvUTF8_on(PL_linestr); 13831 } else { 13832 SvUTF8_on(PL_linestr); 13833 } 13834 PL_bufend = SvEND(PL_linestr); 13835 return (U8*)SvPVX(PL_linestr); 13836 } 13837 #endif 13838 13839 /* 13840 Returns a pointer to the next character after the parsed 13841 vstring, as well as updating the passed in sv. 13842 13843 Function must be called like 13844 13845 sv = newSV(5); 13846 s = scan_vstring(s,e,sv); 13847 13848 where s and e are the start and end of the string. 13849 The sv should already be large enough to store the vstring 13850 passed in, for performance reasons. 13851 13852 */ 13853 13854 char * 13855 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) 13856 { 13857 dVAR; 13858 const char *pos = s; 13859 const char *start = s; 13860 13861 PERL_ARGS_ASSERT_SCAN_VSTRING; 13862 13863 if (*pos == 'v') pos++; /* get past 'v' */ 13864 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 13865 pos++; 13866 if ( *pos != '.') { 13867 /* this may not be a v-string if followed by => */ 13868 const char *next = pos; 13869 while (next < e && isSPACE(*next)) 13870 ++next; 13871 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) { 13872 /* return string not v-string */ 13873 sv_setpvn(sv,(char *)s,pos-s); 13874 return (char *)pos; 13875 } 13876 } 13877 13878 if (!isALPHA(*pos)) { 13879 U8 tmpbuf[UTF8_MAXBYTES+1]; 13880 13881 if (*s == 'v') 13882 s++; /* get past 'v' */ 13883 13884 sv_setpvs(sv, ""); 13885 13886 for (;;) { 13887 /* this is atoi() that tolerates underscores */ 13888 U8 *tmpend; 13889 UV rev = 0; 13890 const char *end = pos; 13891 UV mult = 1; 13892 while (--end >= s) { 13893 if (*end != '_') { 13894 const UV orev = rev; 13895 rev += (*end - '0') * mult; 13896 mult *= 10; 13897 if (orev > rev) 13898 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 13899 "Integer overflow in decimal number"); 13900 } 13901 } 13902 #ifdef EBCDIC 13903 if (rev > 0x7FFFFFFF) 13904 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647"); 13905 #endif 13906 /* Append native character for the rev point */ 13907 tmpend = uvchr_to_utf8(tmpbuf, rev); 13908 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); 13909 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev))) 13910 SvUTF8_on(sv); 13911 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1])) 13912 s = ++pos; 13913 else { 13914 s = pos; 13915 break; 13916 } 13917 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 13918 pos++; 13919 } 13920 SvPOK_on(sv); 13921 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start); 13922 SvRMAGICAL_on(sv); 13923 } 13924 return (char *)s; 13925 } 13926 13927 int 13928 Perl_keyword_plugin_standard(pTHX_ 13929 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) 13930 { 13931 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD; 13932 PERL_UNUSED_CONTEXT; 13933 PERL_UNUSED_ARG(keyword_ptr); 13934 PERL_UNUSED_ARG(keyword_len); 13935 PERL_UNUSED_ARG(op_ptr); 13936 return KEYWORD_PLUGIN_DECLINE; 13937 } 13938 13939 /* 13940 * Local variables: 13941 * c-indentation-style: bsd 13942 * c-basic-offset: 4 13943 * indent-tabs-mode: t 13944 * End: 13945 * 13946 * ex: set ts=8 sts=4 sw=4 noet: 13947 */ 13948