1 /* toke.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11 /*
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15 */
16
17 /*
18 * This file is the lexer for Perl. It's closely linked to the
19 * parser, perly.y.
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
24 /*
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
27
28 =for apidoc AmnU|yy_parser *|PL_parser
29
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
34
35 =cut
36 */
37
38 #include "EXTERN.h"
39 #define PERL_IN_TOKE_C
40 #include "perl.h"
41 #include "invlist_inline.h"
42
43 #define new_constant(a,b,c,d,e,f,g, h) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
45
46 #define pl_yylval (PL_parser->yylval)
47
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets (PL_parser->lex_brackets)
50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack (PL_parser->lex_brackstack)
53 #define PL_lex_casemods (PL_parser->lex_casemods)
54 #define PL_lex_casestack (PL_parser->lex_casestack)
55 #define PL_lex_dojoin (PL_parser->lex_dojoin)
56 #define PL_lex_formbrack (PL_parser->lex_formbrack)
57 #define PL_lex_inpat (PL_parser->lex_inpat)
58 #define PL_lex_inwhat (PL_parser->lex_inwhat)
59 #define PL_lex_op (PL_parser->lex_op)
60 #define PL_lex_repl (PL_parser->lex_repl)
61 #define PL_lex_starts (PL_parser->lex_starts)
62 #define PL_lex_stuff (PL_parser->lex_stuff)
63 #define PL_multi_start (PL_parser->multi_start)
64 #define PL_multi_open (PL_parser->multi_open)
65 #define PL_multi_close (PL_parser->multi_close)
66 #define PL_preambled (PL_parser->preambled)
67 #define PL_linestr (PL_parser->linestr)
68 #define PL_expect (PL_parser->expect)
69 #define PL_copline (PL_parser->copline)
70 #define PL_bufptr (PL_parser->bufptr)
71 #define PL_oldbufptr (PL_parser->oldbufptr)
72 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
73 #define PL_linestart (PL_parser->linestart)
74 #define PL_bufend (PL_parser->bufend)
75 #define PL_last_uni (PL_parser->last_uni)
76 #define PL_last_lop (PL_parser->last_lop)
77 #define PL_last_lop_op (PL_parser->last_lop_op)
78 #define PL_lex_state (PL_parser->lex_state)
79 #define PL_rsfp (PL_parser->rsfp)
80 #define PL_rsfp_filters (PL_parser->rsfp_filters)
81 #define PL_in_my (PL_parser->in_my)
82 #define PL_in_my_stash (PL_parser->in_my_stash)
83 #define PL_tokenbuf (PL_parser->tokenbuf)
84 #define PL_multi_end (PL_parser->multi_end)
85 #define PL_error_count (PL_parser->error_count)
86
87 # define PL_nexttoke (PL_parser->nexttoke)
88 # define PL_nexttype (PL_parser->nexttype)
89 # define PL_nextval (PL_parser->nextval)
90
91
92 #define SvEVALED(sv) \
93 (SvTYPE(sv) >= SVt_PVNV \
94 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
95
96 static const char* const ident_too_long = "Identifier too long";
97 static const char* const ident_var_zero_multi_digit = "Numeric variables with more than one digit may not start with '0'";
98
99 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
100
101 #define XENUMMASK 0x3f
102 #define XFAKEEOF 0x40
103 #define XFAKEBRACK 0x80
104
105 #ifdef USE_UTF8_SCRIPTS
106 # define UTF cBOOL(!IN_BYTES)
107 #else
108 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
109 #endif
110
111 /* The maximum number of characters preceding the unrecognized one to display */
112 #define UNRECOGNIZED_PRECEDE_COUNT 10
113
114 /* In variables named $^X, these are the legal values for X.
115 * 1999-02-27 mjd-perl-patch@plover.com */
116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
117
118 #define SPACE_OR_TAB(c) isBLANK_A(c)
119
120 #define HEXFP_PEEK(s) \
121 (((s[0] == '.') && \
122 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
123 isALPHA_FOLD_EQ(s[0], 'p'))
124
125 /* LEX_* are values for PL_lex_state, the state of the lexer.
126 * They are arranged oddly so that the guard on the switch statement
127 * can get by with a single comparison (if the compiler is smart enough).
128 *
129 * These values refer to the various states within a sublex parse,
130 * i.e. within a double quotish string
131 */
132
133 /* #define LEX_NOTPARSING 11 is done in perl.h. */
134
135 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
136 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
137 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
138 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
139 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
140
141 /* at end of code, eg "$x" followed by: */
142 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
143 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
144
145 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
146 string or after \E, $foo, etc */
147 #define LEX_INTERPCONST 2 /* NOT USED */
148 #define LEX_FORMLINE 1 /* expecting a format line */
149
150 /* returned to yyl_try() to request it to retry the parse loop, expected to only
151 be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
152 can also return it.
153
154 yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
155 other token values are 258 or higher (see perly.h), so -1 should be
156 a safe value here.
157 */
158 #define YYL_RETRY (-1)
159
160 #ifdef DEBUGGING
161 static const char* const lex_state_names[] = {
162 "KNOWNEXT",
163 "FORMLINE",
164 "INTERPCONST",
165 "INTERPCONCAT",
166 "INTERPENDMAYBE",
167 "INTERPEND",
168 "INTERPSTART",
169 "INTERPPUSH",
170 "INTERPCASEMOD",
171 "INTERPNORMAL",
172 "NORMAL"
173 };
174 #endif
175
176 #include "keywords.h"
177
178 /* CLINE is a macro that ensures PL_copline has a sane value */
179
180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
181
182 /*
183 * Convenience functions to return different tokens and prime the
184 * lexer for the next token. They all take an argument.
185 *
186 * TOKEN : generic token (used for '(', DOLSHARP, etc)
187 * OPERATOR : generic operator
188 * AOPERATOR : assignment operator
189 * PREBLOCK : beginning the block after an if, while, foreach, ...
190 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
191 * PREREF : *EXPR where EXPR is not a simple identifier
192 * TERM : expression term
193 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
194 * LOOPX : loop exiting command (goto, last, dump, etc)
195 * FTST : file test operator
196 * FUN0 : zero-argument function
197 * FUN0OP : zero-argument function, with its op created in this file
198 * FUN1 : not used, except for not, which isn't a UNIOP
199 * BOop : bitwise or or xor
200 * BAop : bitwise and
201 * BCop : bitwise complement
202 * SHop : shift operator
203 * PWop : power operator
204 * PMop : pattern-matching operator
205 * Aop : addition-level operator
206 * AopNOASSIGN : addition-level operator that is never part of .=
207 * Mop : multiplication-level operator
208 * ChEop : chaining equality-testing operator
209 * NCEop : non-chaining comparison operator at equality precedence
210 * ChRop : chaining relational operator <= != gt
211 * NCRop : non-chaining relational operator isa
212 *
213 * Also see LOP and lop() below.
214 */
215
216 #ifdef DEBUGGING /* Serve -DT. */
217 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
218 #else
219 # define REPORT(retval) (retval)
220 #endif
221
222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
231 pl_yylval.ival=f, \
232 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
233 REPORT((int)LOOPEX))
234 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
235 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
236 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
237 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
238 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
239 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
241 REPORT('~')
242 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
243 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
244 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
245 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
247 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
252
253 /* This bit of chicanery makes a unary function followed by
254 * a parenthesis into a function with one argument, highest precedence.
255 * The UNIDOR macro is for unary functions that can be followed by the //
256 * operator (such as C<shift // 0>).
257 */
258 #define UNI3(f,x,have_x) { \
259 pl_yylval.ival = f; \
260 if (have_x) PL_expect = x; \
261 PL_bufptr = s; \
262 PL_last_uni = PL_oldbufptr; \
263 PL_last_lop_op = (f) < 0 ? -(f) : (f); \
264 if (*s == '(') \
265 return REPORT( (int)FUNC1 ); \
266 s = skipspace(s); \
267 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
268 }
269 #define UNI(f) UNI3(f,XTERM,1)
270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
271 #define UNIPROTO(f,optional) { \
272 if (optional) PL_last_uni = PL_oldbufptr; \
273 OPERATOR(f); \
274 }
275
276 #define UNIBRACK(f) UNI3(f,0,0)
277
278 /* grandfather return to old style */
279 #define OLDLOP(f) \
280 do { \
281 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
282 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
283 pl_yylval.ival = (f); \
284 PL_expect = XTERM; \
285 PL_bufptr = s; \
286 return (int)LSTOP; \
287 } while(0)
288
289 #define COPLINE_INC_WITH_HERELINES \
290 STMT_START { \
291 CopLINE_inc(PL_curcop); \
292 if (PL_parser->herelines) \
293 CopLINE(PL_curcop) += PL_parser->herelines, \
294 PL_parser->herelines = 0; \
295 } STMT_END
296 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
297 * is no sublex_push to follow. */
298 #define COPLINE_SET_FROM_MULTI_END \
299 STMT_START { \
300 CopLINE_set(PL_curcop, PL_multi_end); \
301 if (PL_multi_end != PL_multi_start) \
302 PL_parser->herelines = 0; \
303 } STMT_END
304
305
306 /* A file-local structure for passing around information about subroutines and
307 * related definable words */
308 struct code {
309 SV *sv;
310 CV *cv;
311 GV *gv, **gvp;
312 OP *rv2cv_op;
313 PADOFFSET off;
314 bool lex;
315 };
316
317 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
318
319 #ifdef DEBUGGING
320
321 /* how to interpret the pl_yylval associated with the token */
322 enum token_type {
323 TOKENTYPE_NONE,
324 TOKENTYPE_IVAL,
325 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
326 TOKENTYPE_PVAL,
327 TOKENTYPE_OPVAL
328 };
329
330 static struct debug_tokens {
331 const int token;
332 enum token_type type;
333 const char *name;
334 } const debug_tokens[] =
335 {
336 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
337 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
338 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
339 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
340 { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" },
341 { ARROW, TOKENTYPE_NONE, "ARROW" },
342 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
343 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
344 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
345 { CHEQOP, TOKENTYPE_OPNUM, "CHEQOP" },
346 { CHRELOP, TOKENTYPE_OPNUM, "CHRELOP" },
347 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
348 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
349 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
350 { DO, TOKENTYPE_NONE, "DO" },
351 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
352 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
353 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
354 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
355 { ELSE, TOKENTYPE_NONE, "ELSE" },
356 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
357 { FOR, TOKENTYPE_IVAL, "FOR" },
358 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
359 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
360 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
361 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
362 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
363 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
364 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
365 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
366 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
367 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
368 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
369 { IF, TOKENTYPE_IVAL, "IF" },
370 { LABEL, TOKENTYPE_OPVAL, "LABEL" },
371 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
372 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
373 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
374 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
375 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
376 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
377 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
378 { MY, TOKENTYPE_IVAL, "MY" },
379 { NCEQOP, TOKENTYPE_OPNUM, "NCEQOP" },
380 { NCRELOP, TOKENTYPE_OPNUM, "NCRELOP" },
381 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
382 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
383 { OROP, TOKENTYPE_IVAL, "OROP" },
384 { OROR, TOKENTYPE_NONE, "OROR" },
385 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
386 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
387 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
388 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
389 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
390 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
391 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
392 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
393 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
394 { PREINC, TOKENTYPE_NONE, "PREINC" },
395 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
396 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
397 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
398 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
399 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
400 { SIGSUB, TOKENTYPE_NONE, "SIGSUB" },
401 { SUB, TOKENTYPE_NONE, "SUB" },
402 { SUBLEXEND, TOKENTYPE_NONE, "SUBLEXEND" },
403 { SUBLEXSTART, TOKENTYPE_NONE, "SUBLEXSTART" },
404 { THING, TOKENTYPE_OPVAL, "THING" },
405 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
406 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
407 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
408 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
409 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
410 { USE, TOKENTYPE_IVAL, "USE" },
411 { WHEN, TOKENTYPE_IVAL, "WHEN" },
412 { WHILE, TOKENTYPE_IVAL, "WHILE" },
413 { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" },
414 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
415 { 0, TOKENTYPE_NONE, NULL }
416 };
417
418 /* dump the returned token in rv, plus any optional arg in pl_yylval */
419
420 STATIC int
S_tokereport(pTHX_ I32 rv,const YYSTYPE * lvalp)421 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
422 {
423 PERL_ARGS_ASSERT_TOKEREPORT;
424
425 if (DEBUG_T_TEST) {
426 const char *name = NULL;
427 enum token_type type = TOKENTYPE_NONE;
428 const struct debug_tokens *p;
429 SV* const report = newSVpvs("<== ");
430
431 for (p = debug_tokens; p->token; p++) {
432 if (p->token == (int)rv) {
433 name = p->name;
434 type = p->type;
435 break;
436 }
437 }
438 if (name)
439 Perl_sv_catpv(aTHX_ report, name);
440 else if (isGRAPH(rv))
441 {
442 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
443 if ((char)rv == 'p')
444 sv_catpvs(report, " (pending identifier)");
445 }
446 else if (!rv)
447 sv_catpvs(report, "EOF");
448 else
449 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
450 switch (type) {
451 case TOKENTYPE_NONE:
452 break;
453 case TOKENTYPE_IVAL:
454 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
455 break;
456 case TOKENTYPE_OPNUM:
457 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
458 PL_op_name[lvalp->ival]);
459 break;
460 case TOKENTYPE_PVAL:
461 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
462 break;
463 case TOKENTYPE_OPVAL:
464 if (lvalp->opval) {
465 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
466 PL_op_name[lvalp->opval->op_type]);
467 if (lvalp->opval->op_type == OP_CONST) {
468 Perl_sv_catpvf(aTHX_ report, " %s",
469 SvPEEK(cSVOPx_sv(lvalp->opval)));
470 }
471
472 }
473 else
474 sv_catpvs(report, "(opval=null)");
475 break;
476 }
477 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
478 };
479 return (int)rv;
480 }
481
482
483 /* print the buffer with suitable escapes */
484
485 STATIC void
S_printbuf(pTHX_ const char * const fmt,const char * const s)486 S_printbuf(pTHX_ const char *const fmt, const char *const s)
487 {
488 SV* const tmp = newSVpvs("");
489
490 PERL_ARGS_ASSERT_PRINTBUF;
491
492 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
493 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
494 GCC_DIAG_RESTORE_STMT;
495 SvREFCNT_dec(tmp);
496 }
497
498 #endif
499
500 /*
501 * S_ao
502 *
503 * This subroutine looks for an '=' next to the operator that has just been
504 * parsed and turns it into an ASSIGNOP if it finds one.
505 */
506
507 STATIC int
S_ao(pTHX_ int toketype)508 S_ao(pTHX_ int toketype)
509 {
510 if (*PL_bufptr == '=') {
511 PL_bufptr++;
512 if (toketype == ANDAND)
513 pl_yylval.ival = OP_ANDASSIGN;
514 else if (toketype == OROR)
515 pl_yylval.ival = OP_ORASSIGN;
516 else if (toketype == DORDOR)
517 pl_yylval.ival = OP_DORASSIGN;
518 toketype = ASSIGNOP;
519 }
520 return REPORT(toketype);
521 }
522
523 /*
524 * S_no_op
525 * When Perl expects an operator and finds something else, no_op
526 * prints the warning. It always prints "<something> found where
527 * operator expected. It prints "Missing semicolon on previous line?"
528 * if the surprise occurs at the start of the line. "do you need to
529 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
530 * where the compiler doesn't know if foo is a method call or a function.
531 * It prints "Missing operator before end of line" if there's nothing
532 * after the missing operator, or "... before <...>" if there is something
533 * after the missing operator.
534 *
535 * PL_bufptr is expected to point to the start of the thing that was found,
536 * and s after the next token or partial token.
537 */
538
539 STATIC void
S_no_op(pTHX_ const char * const what,char * s)540 S_no_op(pTHX_ const char *const what, char *s)
541 {
542 char * const oldbp = PL_bufptr;
543 const bool is_first = (PL_oldbufptr == PL_linestart);
544
545 PERL_ARGS_ASSERT_NO_OP;
546
547 if (!s)
548 s = oldbp;
549 else
550 PL_bufptr = s;
551 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
552 if (ckWARN_d(WARN_SYNTAX)) {
553 if (is_first)
554 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
555 "\t(Missing semicolon on previous line?)\n");
556 else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
557 PL_bufend,
558 UTF))
559 {
560 const char *t;
561 for (t = PL_oldoldbufptr;
562 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
563 t += UTF ? UTF8SKIP(t) : 1)
564 {
565 NOOP;
566 }
567 if (t < PL_bufptr && isSPACE(*t))
568 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
569 "\t(Do you need to predeclare %" UTF8f "?)\n",
570 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
571 }
572 else {
573 assert(s >= oldbp);
574 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
575 "\t(Missing operator before %" UTF8f "?)\n",
576 UTF8fARG(UTF, s - oldbp, oldbp));
577 }
578 }
579 PL_bufptr = oldbp;
580 }
581
582 /*
583 * S_missingterm
584 * Complain about missing quote/regexp/heredoc terminator.
585 * If it's called with NULL then it cauterizes the line buffer.
586 * If we're in a delimited string and the delimiter is a control
587 * character, it's reformatted into a two-char sequence like ^C.
588 * This is fatal.
589 */
590
591 STATIC void
S_missingterm(pTHX_ char * s,STRLEN len)592 S_missingterm(pTHX_ char *s, STRLEN len)
593 {
594 char tmpbuf[UTF8_MAXBYTES + 1];
595 char q;
596 bool uni = FALSE;
597 SV *sv;
598 if (s) {
599 char * const nl = (char *) my_memrchr(s, '\n', len);
600 if (nl) {
601 *nl = '\0';
602 len = nl - s;
603 }
604 uni = UTF;
605 }
606 else if (PL_multi_close < 32) {
607 *tmpbuf = '^';
608 tmpbuf[1] = (char)toCTRL(PL_multi_close);
609 tmpbuf[2] = '\0';
610 s = tmpbuf;
611 len = 2;
612 }
613 else {
614 if (LIKELY(PL_multi_close < 256)) {
615 *tmpbuf = (char)PL_multi_close;
616 tmpbuf[1] = '\0';
617 len = 1;
618 }
619 else {
620 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
621 *end = '\0';
622 len = end - tmpbuf;
623 uni = TRUE;
624 }
625 s = tmpbuf;
626 }
627 q = memchr(s, '"', len) ? '\'' : '"';
628 sv = sv_2mortal(newSVpvn(s, len));
629 if (uni)
630 SvUTF8_on(sv);
631 Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
632 " anywhere before EOF", q, SVfARG(sv), q);
633 }
634
635 #include "feature.h"
636
637 /*
638 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
639 * utf16-to-utf8-reversed.
640 */
641
642 #ifdef PERL_CR_FILTER
643 static void
strip_return(SV * sv)644 strip_return(SV *sv)
645 {
646 const char *s = SvPVX_const(sv);
647 const char * const e = s + SvCUR(sv);
648
649 PERL_ARGS_ASSERT_STRIP_RETURN;
650
651 /* outer loop optimized to do nothing if there are no CR-LFs */
652 while (s < e) {
653 if (*s++ == '\r' && *s == '\n') {
654 /* hit a CR-LF, need to copy the rest */
655 char *d = s - 1;
656 *d++ = *s++;
657 while (s < e) {
658 if (*s == '\r' && s[1] == '\n')
659 s++;
660 *d++ = *s++;
661 }
662 SvCUR(sv) -= s - d;
663 return;
664 }
665 }
666 }
667
668 STATIC I32
S_cr_textfilter(pTHX_ int idx,SV * sv,int maxlen)669 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
670 {
671 const I32 count = FILTER_READ(idx+1, sv, maxlen);
672 if (count > 0 && !maxlen)
673 strip_return(sv);
674 return count;
675 }
676 #endif
677
678 /*
679 =for apidoc lex_start
680
681 Creates and initialises a new lexer/parser state object, supplying
682 a context in which to lex and parse from a new source of Perl code.
683 A pointer to the new state object is placed in L</PL_parser>. An entry
684 is made on the save stack so that upon unwinding, the new state object
685 will be destroyed and the former value of L</PL_parser> will be restored.
686 Nothing else need be done to clean up the parsing context.
687
688 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
689 non-null, provides a string (in SV form) containing code to be parsed.
690 A copy of the string is made, so subsequent modification of C<line>
691 does not affect parsing. C<rsfp>, if non-null, provides an input stream
692 from which code will be read to be parsed. If both are non-null, the
693 code in C<line> comes first and must consist of complete lines of input,
694 and C<rsfp> supplies the remainder of the source.
695
696 The C<flags> parameter is reserved for future use. Currently it is only
697 used by perl internally, so extensions should always pass zero.
698
699 =cut
700 */
701
702 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
703 can share filters with the current parser.
704 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
705 caller, hence isn't owned by the parser, so shouldn't be closed on parser
706 destruction. This is used to handle the case of defaulting to reading the
707 script from the standard input because no filename was given on the command
708 line (without getting confused by situation where STDIN has been closed, so
709 the script handle is opened on fd 0) */
710
711 void
Perl_lex_start(pTHX_ SV * line,PerlIO * rsfp,U32 flags)712 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
713 {
714 const char *s = NULL;
715 yy_parser *parser, *oparser;
716
717 if (flags && flags & ~LEX_START_FLAGS)
718 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
719
720 /* create and initialise a parser */
721
722 Newxz(parser, 1, yy_parser);
723 parser->old_parser = oparser = PL_parser;
724 PL_parser = parser;
725
726 parser->stack = NULL;
727 parser->stack_max1 = NULL;
728 parser->ps = NULL;
729
730 /* on scope exit, free this parser and restore any outer one */
731 SAVEPARSER(parser);
732 parser->saved_curcop = PL_curcop;
733
734 /* initialise lexer state */
735
736 parser->nexttoke = 0;
737 parser->error_count = oparser ? oparser->error_count : 0;
738 parser->copline = parser->preambling = NOLINE;
739 parser->lex_state = LEX_NORMAL;
740 parser->expect = XSTATE;
741 parser->rsfp = rsfp;
742 parser->recheck_utf8_validity = TRUE;
743 parser->rsfp_filters =
744 !(flags & LEX_START_SAME_FILTER) || !oparser
745 ? NULL
746 : MUTABLE_AV(SvREFCNT_inc(
747 oparser->rsfp_filters
748 ? oparser->rsfp_filters
749 : (oparser->rsfp_filters = newAV())
750 ));
751
752 Newx(parser->lex_brackstack, 120, char);
753 Newx(parser->lex_casestack, 12, char);
754 *parser->lex_casestack = '\0';
755 Newxz(parser->lex_shared, 1, LEXSHARED);
756
757 if (line) {
758 STRLEN len;
759 const U8* first_bad_char_loc;
760
761 s = SvPV_const(line, len);
762
763 if ( SvUTF8(line)
764 && UNLIKELY(! is_utf8_string_loc((U8 *) s,
765 SvCUR(line),
766 &first_bad_char_loc)))
767 {
768 _force_out_malformed_utf8_message(first_bad_char_loc,
769 (U8 *) s + SvCUR(line),
770 0,
771 1 /* 1 means die */ );
772 NOT_REACHED; /* NOTREACHED */
773 }
774
775 parser->linestr = flags & LEX_START_COPIED
776 ? SvREFCNT_inc_simple_NN(line)
777 : newSVpvn_flags(s, len, SvUTF8(line));
778 if (!rsfp)
779 sv_catpvs(parser->linestr, "\n;");
780 } else {
781 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
782 }
783
784 parser->oldoldbufptr =
785 parser->oldbufptr =
786 parser->bufptr =
787 parser->linestart = SvPVX(parser->linestr);
788 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
789 parser->last_lop = parser->last_uni = NULL;
790
791 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
792 |LEX_DONT_CLOSE_RSFP));
793 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
794 |LEX_DONT_CLOSE_RSFP));
795
796 parser->in_pod = parser->filtered = 0;
797 }
798
799
800 /* delete a parser object */
801
802 void
Perl_parser_free(pTHX_ const yy_parser * parser)803 Perl_parser_free(pTHX_ const yy_parser *parser)
804 {
805 PERL_ARGS_ASSERT_PARSER_FREE;
806
807 PL_curcop = parser->saved_curcop;
808 SvREFCNT_dec(parser->linestr);
809
810 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
811 PerlIO_clearerr(parser->rsfp);
812 else if (parser->rsfp && (!parser->old_parser
813 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
814 PerlIO_close(parser->rsfp);
815 SvREFCNT_dec(parser->rsfp_filters);
816 SvREFCNT_dec(parser->lex_stuff);
817 SvREFCNT_dec(parser->lex_sub_repl);
818
819 Safefree(parser->lex_brackstack);
820 Safefree(parser->lex_casestack);
821 Safefree(parser->lex_shared);
822 PL_parser = parser->old_parser;
823 Safefree(parser);
824 }
825
826 void
Perl_parser_free_nexttoke_ops(pTHX_ yy_parser * parser,OPSLAB * slab)827 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
828 {
829 I32 nexttoke = parser->nexttoke;
830 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
831 while (nexttoke--) {
832 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
833 && parser->nextval[nexttoke].opval
834 && parser->nextval[nexttoke].opval->op_slabbed
835 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
836 op_free(parser->nextval[nexttoke].opval);
837 parser->nextval[nexttoke].opval = NULL;
838 }
839 }
840 }
841
842
843 /*
844 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
845
846 Buffer scalar containing the chunk currently under consideration of the
847 text currently being lexed. This is always a plain string scalar (for
848 which C<SvPOK> is true). It is not intended to be used as a scalar by
849 normal scalar means; instead refer to the buffer directly by the pointer
850 variables described below.
851
852 The lexer maintains various C<char*> pointers to things in the
853 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
854 reallocated, all of these pointers must be updated. Don't attempt to
855 do this manually, but rather use L</lex_grow_linestr> if you need to
856 reallocate the buffer.
857
858 The content of the text chunk in the buffer is commonly exactly one
859 complete line of input, up to and including a newline terminator,
860 but there are situations where it is otherwise. The octets of the
861 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
862 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
863 flag on this scalar, which may disagree with it.
864
865 For direct examination of the buffer, the variable
866 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
867 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
868 of these pointers is usually preferable to examination of the scalar
869 through normal scalar means.
870
871 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
872
873 Direct pointer to the end of the chunk of text currently being lexed, the
874 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
875 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
876 always located at the end of the buffer, and does not count as part of
877 the buffer's contents.
878
879 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
880
881 Points to the current position of lexing inside the lexer buffer.
882 Characters around this point may be freely examined, within
883 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
884 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
885 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
886
887 Lexing code (whether in the Perl core or not) moves this pointer past
888 the characters that it consumes. It is also expected to perform some
889 bookkeeping whenever a newline character is consumed. This movement
890 can be more conveniently performed by the function L</lex_read_to>,
891 which handles newlines appropriately.
892
893 Interpretation of the buffer's octets can be abstracted out by
894 using the slightly higher-level functions L</lex_peek_unichar> and
895 L</lex_read_unichar>.
896
897 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
898
899 Points to the start of the current line inside the lexer buffer.
900 This is useful for indicating at which column an error occurred, and
901 not much else. This must be updated by any lexing code that consumes
902 a newline; the function L</lex_read_to> handles this detail.
903
904 =cut
905 */
906
907 /*
908 =for apidoc lex_bufutf8
909
910 Indicates whether the octets in the lexer buffer
911 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
912 of Unicode characters. If not, they should be interpreted as Latin-1
913 characters. This is analogous to the C<SvUTF8> flag for scalars.
914
915 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
916 contains valid UTF-8. Lexing code must be robust in the face of invalid
917 encoding.
918
919 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
920 is significant, but not the whole story regarding the input character
921 encoding. Normally, when a file is being read, the scalar contains octets
922 and its C<SvUTF8> flag is off, but the octets should be interpreted as
923 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
924 however, the scalar may have the C<SvUTF8> flag on, and in this case its
925 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
926 is in effect. This logic may change in the future; use this function
927 instead of implementing the logic yourself.
928
929 =cut
930 */
931
932 bool
Perl_lex_bufutf8(pTHX)933 Perl_lex_bufutf8(pTHX)
934 {
935 return UTF;
936 }
937
938 /*
939 =for apidoc lex_grow_linestr
940
941 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
942 at least C<len> octets (including terminating C<NUL>). Returns a
943 pointer to the reallocated buffer. This is necessary before making
944 any direct modification of the buffer that would increase its length.
945 L</lex_stuff_pvn> provides a more convenient way to insert text into
946 the buffer.
947
948 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
949 this function updates all of the lexer's variables that point directly
950 into the buffer.
951
952 =cut
953 */
954
955 char *
Perl_lex_grow_linestr(pTHX_ STRLEN len)956 Perl_lex_grow_linestr(pTHX_ STRLEN len)
957 {
958 SV *linestr;
959 char *buf;
960 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
961 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
962 bool current;
963
964 linestr = PL_parser->linestr;
965 buf = SvPVX(linestr);
966 if (len <= SvLEN(linestr))
967 return buf;
968
969 /* Is the lex_shared linestr SV the same as the current linestr SV?
970 * Only in this case does re_eval_start need adjusting, since it
971 * points within lex_shared->ls_linestr's buffer */
972 current = ( !PL_parser->lex_shared->ls_linestr
973 || linestr == PL_parser->lex_shared->ls_linestr);
974
975 bufend_pos = PL_parser->bufend - buf;
976 bufptr_pos = PL_parser->bufptr - buf;
977 oldbufptr_pos = PL_parser->oldbufptr - buf;
978 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
979 linestart_pos = PL_parser->linestart - buf;
980 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
981 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
982 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
983 PL_parser->lex_shared->re_eval_start - buf : 0;
984
985 buf = sv_grow(linestr, len);
986
987 PL_parser->bufend = buf + bufend_pos;
988 PL_parser->bufptr = buf + bufptr_pos;
989 PL_parser->oldbufptr = buf + oldbufptr_pos;
990 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
991 PL_parser->linestart = buf + linestart_pos;
992 if (PL_parser->last_uni)
993 PL_parser->last_uni = buf + last_uni_pos;
994 if (PL_parser->last_lop)
995 PL_parser->last_lop = buf + last_lop_pos;
996 if (current && PL_parser->lex_shared->re_eval_start)
997 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
998 return buf;
999 }
1000
1001 /*
1002 =for apidoc lex_stuff_pvn
1003
1004 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1005 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1006 reallocating the buffer if necessary. This means that lexing code that
1007 runs later will see the characters as if they had appeared in the input.
1008 It is not recommended to do this as part of normal parsing, and most
1009 uses of this facility run the risk of the inserted characters being
1010 interpreted in an unintended manner.
1011
1012 The string to be inserted is represented by C<len> octets starting
1013 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1014 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1015 The characters are recoded for the lexer buffer, according to how the
1016 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1017 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1018 function is more convenient.
1019
1020 =for apidoc Amnh||LEX_STUFF_UTF8
1021
1022 =cut
1023 */
1024
1025 void
Perl_lex_stuff_pvn(pTHX_ const char * pv,STRLEN len,U32 flags)1026 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1027 {
1028 dVAR;
1029 char *bufptr;
1030 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1031 if (flags & ~(LEX_STUFF_UTF8))
1032 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1033 if (UTF) {
1034 if (flags & LEX_STUFF_UTF8) {
1035 goto plain_copy;
1036 } else {
1037 STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1038 (U8 *) pv + len);
1039 const char *p, *e = pv+len;;
1040 if (!highhalf)
1041 goto plain_copy;
1042 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1043 bufptr = PL_parser->bufptr;
1044 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1045 SvCUR_set(PL_parser->linestr,
1046 SvCUR(PL_parser->linestr) + len+highhalf);
1047 PL_parser->bufend += len+highhalf;
1048 for (p = pv; p != e; p++) {
1049 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1050 }
1051 }
1052 } else {
1053 if (flags & LEX_STUFF_UTF8) {
1054 STRLEN highhalf = 0;
1055 const char *p, *e = pv+len;
1056 for (p = pv; p != e; p++) {
1057 U8 c = (U8)*p;
1058 if (UTF8_IS_ABOVE_LATIN1(c)) {
1059 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1060 "non-Latin-1 character into Latin-1 input");
1061 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1062 p++;
1063 highhalf++;
1064 } else assert(UTF8_IS_INVARIANT(c));
1065 }
1066 if (!highhalf)
1067 goto plain_copy;
1068 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1069 bufptr = PL_parser->bufptr;
1070 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1071 SvCUR_set(PL_parser->linestr,
1072 SvCUR(PL_parser->linestr) + len-highhalf);
1073 PL_parser->bufend += len-highhalf;
1074 p = pv;
1075 while (p < e) {
1076 if (UTF8_IS_INVARIANT(*p)) {
1077 *bufptr++ = *p;
1078 p++;
1079 }
1080 else {
1081 assert(p < e -1 );
1082 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1083 p += 2;
1084 }
1085 }
1086 } else {
1087 plain_copy:
1088 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1089 bufptr = PL_parser->bufptr;
1090 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1091 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1092 PL_parser->bufend += len;
1093 Copy(pv, bufptr, len, char);
1094 }
1095 }
1096 }
1097
1098 /*
1099 =for apidoc lex_stuff_pv
1100
1101 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1102 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1103 reallocating the buffer if necessary. This means that lexing code that
1104 runs later will see the characters as if they had appeared in the input.
1105 It is not recommended to do this as part of normal parsing, and most
1106 uses of this facility run the risk of the inserted characters being
1107 interpreted in an unintended manner.
1108
1109 The string to be inserted is represented by octets starting at C<pv>
1110 and continuing to the first nul. These octets are interpreted as either
1111 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1112 in C<flags>. The characters are recoded for the lexer buffer, according
1113 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1114 If it is not convenient to nul-terminate a string to be inserted, the
1115 L</lex_stuff_pvn> function is more appropriate.
1116
1117 =cut
1118 */
1119
1120 void
Perl_lex_stuff_pv(pTHX_ const char * pv,U32 flags)1121 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1122 {
1123 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1124 lex_stuff_pvn(pv, strlen(pv), flags);
1125 }
1126
1127 /*
1128 =for apidoc lex_stuff_sv
1129
1130 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1131 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1132 reallocating the buffer if necessary. This means that lexing code that
1133 runs later will see the characters as if they had appeared in the input.
1134 It is not recommended to do this as part of normal parsing, and most
1135 uses of this facility run the risk of the inserted characters being
1136 interpreted in an unintended manner.
1137
1138 The string to be inserted is the string value of C<sv>. The characters
1139 are recoded for the lexer buffer, according to how the buffer is currently
1140 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1141 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1142 need to construct a scalar.
1143
1144 =cut
1145 */
1146
1147 void
Perl_lex_stuff_sv(pTHX_ SV * sv,U32 flags)1148 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1149 {
1150 char *pv;
1151 STRLEN len;
1152 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1153 if (flags)
1154 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1155 pv = SvPV(sv, len);
1156 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1157 }
1158
1159 /*
1160 =for apidoc lex_unstuff
1161
1162 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1163 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1164 This hides the discarded text from any lexing code that runs later,
1165 as if the text had never appeared.
1166
1167 This is not the normal way to consume lexed text. For that, use
1168 L</lex_read_to>.
1169
1170 =cut
1171 */
1172
1173 void
Perl_lex_unstuff(pTHX_ char * ptr)1174 Perl_lex_unstuff(pTHX_ char *ptr)
1175 {
1176 char *buf, *bufend;
1177 STRLEN unstuff_len;
1178 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1179 buf = PL_parser->bufptr;
1180 if (ptr < buf)
1181 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1182 if (ptr == buf)
1183 return;
1184 bufend = PL_parser->bufend;
1185 if (ptr > bufend)
1186 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1187 unstuff_len = ptr - buf;
1188 Move(ptr, buf, bufend+1-ptr, char);
1189 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1190 PL_parser->bufend = bufend - unstuff_len;
1191 }
1192
1193 /*
1194 =for apidoc lex_read_to
1195
1196 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1197 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1198 performing the correct bookkeeping whenever a newline character is passed.
1199 This is the normal way to consume lexed text.
1200
1201 Interpretation of the buffer's octets can be abstracted out by
1202 using the slightly higher-level functions L</lex_peek_unichar> and
1203 L</lex_read_unichar>.
1204
1205 =cut
1206 */
1207
1208 void
Perl_lex_read_to(pTHX_ char * ptr)1209 Perl_lex_read_to(pTHX_ char *ptr)
1210 {
1211 char *s;
1212 PERL_ARGS_ASSERT_LEX_READ_TO;
1213 s = PL_parser->bufptr;
1214 if (ptr < s || ptr > PL_parser->bufend)
1215 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1216 for (; s != ptr; s++)
1217 if (*s == '\n') {
1218 COPLINE_INC_WITH_HERELINES;
1219 PL_parser->linestart = s+1;
1220 }
1221 PL_parser->bufptr = ptr;
1222 }
1223
1224 /*
1225 =for apidoc lex_discard_to
1226
1227 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1228 up to C<ptr>. The remaining content of the buffer will be moved, and
1229 all pointers into the buffer updated appropriately. C<ptr> must not
1230 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1231 it is not permitted to discard text that has yet to be lexed.
1232
1233 Normally it is not necessarily to do this directly, because it suffices to
1234 use the implicit discarding behaviour of L</lex_next_chunk> and things
1235 based on it. However, if a token stretches across multiple lines,
1236 and the lexing code has kept multiple lines of text in the buffer for
1237 that purpose, then after completion of the token it would be wise to
1238 explicitly discard the now-unneeded earlier lines, to avoid future
1239 multi-line tokens growing the buffer without bound.
1240
1241 =cut
1242 */
1243
1244 void
Perl_lex_discard_to(pTHX_ char * ptr)1245 Perl_lex_discard_to(pTHX_ char *ptr)
1246 {
1247 char *buf;
1248 STRLEN discard_len;
1249 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1250 buf = SvPVX(PL_parser->linestr);
1251 if (ptr < buf)
1252 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1253 if (ptr == buf)
1254 return;
1255 if (ptr > PL_parser->bufptr)
1256 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1257 discard_len = ptr - buf;
1258 if (PL_parser->oldbufptr < ptr)
1259 PL_parser->oldbufptr = ptr;
1260 if (PL_parser->oldoldbufptr < ptr)
1261 PL_parser->oldoldbufptr = ptr;
1262 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1263 PL_parser->last_uni = NULL;
1264 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1265 PL_parser->last_lop = NULL;
1266 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1267 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1268 PL_parser->bufend -= discard_len;
1269 PL_parser->bufptr -= discard_len;
1270 PL_parser->oldbufptr -= discard_len;
1271 PL_parser->oldoldbufptr -= discard_len;
1272 if (PL_parser->last_uni)
1273 PL_parser->last_uni -= discard_len;
1274 if (PL_parser->last_lop)
1275 PL_parser->last_lop -= discard_len;
1276 }
1277
1278 void
Perl_notify_parser_that_changed_to_utf8(pTHX)1279 Perl_notify_parser_that_changed_to_utf8(pTHX)
1280 {
1281 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1282 * off to on. At compile time, this has the effect of entering a 'use
1283 * utf8' section. This means that any input was not previously checked for
1284 * UTF-8 (because it was off), but now we do need to check it, or our
1285 * assumptions about the input being sane could be wrong, and we could
1286 * segfault. This routine just sets a flag so that the next time we look
1287 * at the input we do the well-formed UTF-8 check. If we aren't in the
1288 * proper phase, there may not be a parser object, but if there is, setting
1289 * the flag is harmless */
1290
1291 if (PL_parser) {
1292 PL_parser->recheck_utf8_validity = TRUE;
1293 }
1294 }
1295
1296 /*
1297 =for apidoc lex_next_chunk
1298
1299 Reads in the next chunk of text to be lexed, appending it to
1300 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1301 looked to the end of the current chunk and wants to know more. It is
1302 usual, but not necessary, for lexing to have consumed the entirety of
1303 the current chunk at this time.
1304
1305 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1306 chunk (i.e., the current chunk has been entirely consumed), normally the
1307 current chunk will be discarded at the same time that the new chunk is
1308 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1309 will not be discarded. If the current chunk has not been entirely
1310 consumed, then it will not be discarded regardless of the flag.
1311
1312 Returns true if some new text was added to the buffer, or false if the
1313 buffer has reached the end of the input text.
1314
1315 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1316
1317 =cut
1318 */
1319
1320 #define LEX_FAKE_EOF 0x80000000
1321 #define LEX_NO_TERM 0x40000000 /* here-doc */
1322
1323 bool
Perl_lex_next_chunk(pTHX_ U32 flags)1324 Perl_lex_next_chunk(pTHX_ U32 flags)
1325 {
1326 SV *linestr;
1327 char *buf;
1328 STRLEN old_bufend_pos, new_bufend_pos;
1329 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1330 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1331 bool got_some_for_debugger = 0;
1332 bool got_some;
1333
1334 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1335 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1336 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1337 return FALSE;
1338 linestr = PL_parser->linestr;
1339 buf = SvPVX(linestr);
1340 if (!(flags & LEX_KEEP_PREVIOUS)
1341 && PL_parser->bufptr == PL_parser->bufend)
1342 {
1343 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1344 linestart_pos = 0;
1345 if (PL_parser->last_uni != PL_parser->bufend)
1346 PL_parser->last_uni = NULL;
1347 if (PL_parser->last_lop != PL_parser->bufend)
1348 PL_parser->last_lop = NULL;
1349 last_uni_pos = last_lop_pos = 0;
1350 *buf = 0;
1351 SvCUR_set(linestr, 0);
1352 } else {
1353 old_bufend_pos = PL_parser->bufend - buf;
1354 bufptr_pos = PL_parser->bufptr - buf;
1355 oldbufptr_pos = PL_parser->oldbufptr - buf;
1356 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1357 linestart_pos = PL_parser->linestart - buf;
1358 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1359 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1360 }
1361 if (flags & LEX_FAKE_EOF) {
1362 goto eof;
1363 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1364 got_some = 0;
1365 } else if (filter_gets(linestr, old_bufend_pos)) {
1366 got_some = 1;
1367 got_some_for_debugger = 1;
1368 } else if (flags & LEX_NO_TERM) {
1369 got_some = 0;
1370 } else {
1371 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1372 SvPVCLEAR(linestr);
1373 eof:
1374 /* End of real input. Close filehandle (unless it was STDIN),
1375 * then add implicit termination.
1376 */
1377 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1378 PerlIO_clearerr(PL_parser->rsfp);
1379 else if (PL_parser->rsfp)
1380 (void)PerlIO_close(PL_parser->rsfp);
1381 PL_parser->rsfp = NULL;
1382 PL_parser->in_pod = PL_parser->filtered = 0;
1383 if (!PL_in_eval && PL_minus_p) {
1384 sv_catpvs(linestr,
1385 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1386 PL_minus_n = PL_minus_p = 0;
1387 } else if (!PL_in_eval && PL_minus_n) {
1388 sv_catpvs(linestr, /*{*/";}");
1389 PL_minus_n = 0;
1390 } else
1391 sv_catpvs(linestr, ";");
1392 got_some = 1;
1393 }
1394 buf = SvPVX(linestr);
1395 new_bufend_pos = SvCUR(linestr);
1396 PL_parser->bufend = buf + new_bufend_pos;
1397 PL_parser->bufptr = buf + bufptr_pos;
1398
1399 if (UTF) {
1400 const U8* first_bad_char_loc;
1401 if (UNLIKELY(! is_utf8_string_loc(
1402 (U8 *) PL_parser->bufptr,
1403 PL_parser->bufend - PL_parser->bufptr,
1404 &first_bad_char_loc)))
1405 {
1406 _force_out_malformed_utf8_message(first_bad_char_loc,
1407 (U8 *) PL_parser->bufend,
1408 0,
1409 1 /* 1 means die */ );
1410 NOT_REACHED; /* NOTREACHED */
1411 }
1412 }
1413
1414 PL_parser->oldbufptr = buf + oldbufptr_pos;
1415 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1416 PL_parser->linestart = buf + linestart_pos;
1417 if (PL_parser->last_uni)
1418 PL_parser->last_uni = buf + last_uni_pos;
1419 if (PL_parser->last_lop)
1420 PL_parser->last_lop = buf + last_lop_pos;
1421 if (PL_parser->preambling != NOLINE) {
1422 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1423 PL_parser->preambling = NOLINE;
1424 }
1425 if ( got_some_for_debugger
1426 && PERLDB_LINE_OR_SAVESRC
1427 && PL_curstash != PL_debstash)
1428 {
1429 /* debugger active and we're not compiling the debugger code,
1430 * so store the line into the debugger's array of lines
1431 */
1432 update_debugger_info(NULL, buf+old_bufend_pos,
1433 new_bufend_pos-old_bufend_pos);
1434 }
1435 return got_some;
1436 }
1437
1438 /*
1439 =for apidoc lex_peek_unichar
1440
1441 Looks ahead one (Unicode) character in the text currently being lexed.
1442 Returns the codepoint (unsigned integer value) of the next character,
1443 or -1 if lexing has reached the end of the input text. To consume the
1444 peeked character, use L</lex_read_unichar>.
1445
1446 If the next character is in (or extends into) the next chunk of input
1447 text, the next chunk will be read in. Normally the current chunk will be
1448 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1449 bit set, then the current chunk will not be discarded.
1450
1451 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1452 is encountered, an exception is generated.
1453
1454 =cut
1455 */
1456
1457 I32
Perl_lex_peek_unichar(pTHX_ U32 flags)1458 Perl_lex_peek_unichar(pTHX_ U32 flags)
1459 {
1460 dVAR;
1461 char *s, *bufend;
1462 if (flags & ~(LEX_KEEP_PREVIOUS))
1463 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1464 s = PL_parser->bufptr;
1465 bufend = PL_parser->bufend;
1466 if (UTF) {
1467 U8 head;
1468 I32 unichar;
1469 STRLEN len, retlen;
1470 if (s == bufend) {
1471 if (!lex_next_chunk(flags))
1472 return -1;
1473 s = PL_parser->bufptr;
1474 bufend = PL_parser->bufend;
1475 }
1476 head = (U8)*s;
1477 if (UTF8_IS_INVARIANT(head))
1478 return head;
1479 if (UTF8_IS_START(head)) {
1480 len = UTF8SKIP(&head);
1481 while ((STRLEN)(bufend-s) < len) {
1482 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1483 break;
1484 s = PL_parser->bufptr;
1485 bufend = PL_parser->bufend;
1486 }
1487 }
1488 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1489 if (retlen == (STRLEN)-1) {
1490 _force_out_malformed_utf8_message((U8 *) s,
1491 (U8 *) bufend,
1492 0,
1493 1 /* 1 means die */ );
1494 NOT_REACHED; /* NOTREACHED */
1495 }
1496 return unichar;
1497 } else {
1498 if (s == bufend) {
1499 if (!lex_next_chunk(flags))
1500 return -1;
1501 s = PL_parser->bufptr;
1502 }
1503 return (U8)*s;
1504 }
1505 }
1506
1507 /*
1508 =for apidoc lex_read_unichar
1509
1510 Reads the next (Unicode) character in the text currently being lexed.
1511 Returns the codepoint (unsigned integer value) of the character read,
1512 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1513 if lexing has reached the end of the input text. To non-destructively
1514 examine the next character, use L</lex_peek_unichar> instead.
1515
1516 If the next character is in (or extends into) the next chunk of input
1517 text, the next chunk will be read in. Normally the current chunk will be
1518 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1519 bit set, then the current chunk will not be discarded.
1520
1521 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1522 is encountered, an exception is generated.
1523
1524 =cut
1525 */
1526
1527 I32
Perl_lex_read_unichar(pTHX_ U32 flags)1528 Perl_lex_read_unichar(pTHX_ U32 flags)
1529 {
1530 I32 c;
1531 if (flags & ~(LEX_KEEP_PREVIOUS))
1532 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1533 c = lex_peek_unichar(flags);
1534 if (c != -1) {
1535 if (c == '\n')
1536 COPLINE_INC_WITH_HERELINES;
1537 if (UTF)
1538 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1539 else
1540 ++(PL_parser->bufptr);
1541 }
1542 return c;
1543 }
1544
1545 /*
1546 =for apidoc lex_read_space
1547
1548 Reads optional spaces, in Perl style, in the text currently being
1549 lexed. The spaces may include ordinary whitespace characters and
1550 Perl-style comments. C<#line> directives are processed if encountered.
1551 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1552 at a non-space character (or the end of the input text).
1553
1554 If spaces extend into the next chunk of input text, the next chunk will
1555 be read in. Normally the current chunk will be discarded at the same
1556 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1557 chunk will not be discarded.
1558
1559 =cut
1560 */
1561
1562 #define LEX_NO_INCLINE 0x40000000
1563 #define LEX_NO_NEXT_CHUNK 0x80000000
1564
1565 void
Perl_lex_read_space(pTHX_ U32 flags)1566 Perl_lex_read_space(pTHX_ U32 flags)
1567 {
1568 char *s, *bufend;
1569 const bool can_incline = !(flags & LEX_NO_INCLINE);
1570 bool need_incline = 0;
1571 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1572 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1573 s = PL_parser->bufptr;
1574 bufend = PL_parser->bufend;
1575 while (1) {
1576 char c = *s;
1577 if (c == '#') {
1578 do {
1579 c = *++s;
1580 } while (!(c == '\n' || (c == 0 && s == bufend)));
1581 } else if (c == '\n') {
1582 s++;
1583 if (can_incline) {
1584 PL_parser->linestart = s;
1585 if (s == bufend)
1586 need_incline = 1;
1587 else
1588 incline(s, bufend);
1589 }
1590 } else if (isSPACE(c)) {
1591 s++;
1592 } else if (c == 0 && s == bufend) {
1593 bool got_more;
1594 line_t l;
1595 if (flags & LEX_NO_NEXT_CHUNK)
1596 break;
1597 PL_parser->bufptr = s;
1598 l = CopLINE(PL_curcop);
1599 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1600 got_more = lex_next_chunk(flags);
1601 CopLINE_set(PL_curcop, l);
1602 s = PL_parser->bufptr;
1603 bufend = PL_parser->bufend;
1604 if (!got_more)
1605 break;
1606 if (can_incline && need_incline && PL_parser->rsfp) {
1607 incline(s, bufend);
1608 need_incline = 0;
1609 }
1610 } else if (!c) {
1611 s++;
1612 } else {
1613 break;
1614 }
1615 }
1616 PL_parser->bufptr = s;
1617 }
1618
1619 /*
1620
1621 =for apidoc validate_proto
1622
1623 This function performs syntax checking on a prototype, C<proto>.
1624 If C<warn> is true, any illegal characters or mismatched brackets
1625 will trigger illegalproto warnings, declaring that they were
1626 detected in the prototype for C<name>.
1627
1628 The return value is C<true> if this is a valid prototype, and
1629 C<false> if it is not, regardless of whether C<warn> was C<true> or
1630 C<false>.
1631
1632 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1633
1634 =cut
1635
1636 */
1637
1638 bool
Perl_validate_proto(pTHX_ SV * name,SV * proto,bool warn,bool curstash)1639 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1640 {
1641 STRLEN len, origlen;
1642 char *p;
1643 bool bad_proto = FALSE;
1644 bool in_brackets = FALSE;
1645 bool after_slash = FALSE;
1646 char greedy_proto = ' ';
1647 bool proto_after_greedy_proto = FALSE;
1648 bool must_be_last = FALSE;
1649 bool underscore = FALSE;
1650 bool bad_proto_after_underscore = FALSE;
1651
1652 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1653
1654 if (!proto)
1655 return TRUE;
1656
1657 p = SvPV(proto, len);
1658 origlen = len;
1659 for (; len--; p++) {
1660 if (!isSPACE(*p)) {
1661 if (must_be_last)
1662 proto_after_greedy_proto = TRUE;
1663 if (underscore) {
1664 if (!memCHRs(";@%", *p))
1665 bad_proto_after_underscore = TRUE;
1666 underscore = FALSE;
1667 }
1668 if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1669 bad_proto = TRUE;
1670 }
1671 else {
1672 if (*p == '[')
1673 in_brackets = TRUE;
1674 else if (*p == ']')
1675 in_brackets = FALSE;
1676 else if ((*p == '@' || *p == '%')
1677 && !after_slash
1678 && !in_brackets )
1679 {
1680 must_be_last = TRUE;
1681 greedy_proto = *p;
1682 }
1683 else if (*p == '_')
1684 underscore = TRUE;
1685 }
1686 if (*p == '\\')
1687 after_slash = TRUE;
1688 else
1689 after_slash = FALSE;
1690 }
1691 }
1692
1693 if (warn) {
1694 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1695 p -= origlen;
1696 p = SvUTF8(proto)
1697 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1698 origlen, UNI_DISPLAY_ISPRINT)
1699 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1700
1701 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1702 SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1703 sv_catpvs(name2, "::");
1704 sv_catsv(name2, (SV *)name);
1705 name = name2;
1706 }
1707
1708 if (proto_after_greedy_proto)
1709 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1710 "Prototype after '%c' for %" SVf " : %s",
1711 greedy_proto, SVfARG(name), p);
1712 if (in_brackets)
1713 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1714 "Missing ']' in prototype for %" SVf " : %s",
1715 SVfARG(name), p);
1716 if (bad_proto)
1717 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1718 "Illegal character in prototype for %" SVf " : %s",
1719 SVfARG(name), p);
1720 if (bad_proto_after_underscore)
1721 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1722 "Illegal character after '_' in prototype for %" SVf " : %s",
1723 SVfARG(name), p);
1724 }
1725
1726 return (! (proto_after_greedy_proto || bad_proto) );
1727 }
1728
1729 /*
1730 * S_incline
1731 * This subroutine has nothing to do with tilting, whether at windmills
1732 * or pinball tables. Its name is short for "increment line". It
1733 * increments the current line number in CopLINE(PL_curcop) and checks
1734 * to see whether the line starts with a comment of the form
1735 * # line 500 "foo.pm"
1736 * If so, it sets the current line number and file to the values in the comment.
1737 */
1738
1739 STATIC void
S_incline(pTHX_ const char * s,const char * end)1740 S_incline(pTHX_ const char *s, const char *end)
1741 {
1742 const char *t;
1743 const char *n;
1744 const char *e;
1745 line_t line_num;
1746 UV uv;
1747
1748 PERL_ARGS_ASSERT_INCLINE;
1749
1750 assert(end >= s);
1751
1752 COPLINE_INC_WITH_HERELINES;
1753 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1754 && s+1 == PL_bufend && *s == ';') {
1755 /* fake newline in string eval */
1756 CopLINE_dec(PL_curcop);
1757 return;
1758 }
1759 if (*s++ != '#')
1760 return;
1761 while (SPACE_OR_TAB(*s))
1762 s++;
1763 if (memBEGINs(s, (STRLEN) (end - s), "line"))
1764 s += sizeof("line") - 1;
1765 else
1766 return;
1767 if (SPACE_OR_TAB(*s))
1768 s++;
1769 else
1770 return;
1771 while (SPACE_OR_TAB(*s))
1772 s++;
1773 if (!isDIGIT(*s))
1774 return;
1775
1776 n = s;
1777 while (isDIGIT(*s))
1778 s++;
1779 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1780 return;
1781 while (SPACE_OR_TAB(*s))
1782 s++;
1783 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1784 s++;
1785 e = t + 1;
1786 }
1787 else {
1788 t = s;
1789 while (*t && !isSPACE(*t))
1790 t++;
1791 e = t;
1792 }
1793 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1794 e++;
1795 if (*e != '\n' && *e != '\0')
1796 return; /* false alarm */
1797
1798 if (!grok_atoUV(n, &uv, &e))
1799 return;
1800 line_num = ((line_t)uv) - 1;
1801
1802 if (t - s > 0) {
1803 const STRLEN len = t - s;
1804
1805 if (!PL_rsfp && !PL_parser->filtered) {
1806 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1807 * to *{"::_<newfilename"} */
1808 /* However, the long form of evals is only turned on by the
1809 debugger - usually they're "(eval %lu)" */
1810 GV * const cfgv = CopFILEGV(PL_curcop);
1811 if (cfgv) {
1812 char smallbuf[128];
1813 STRLEN tmplen2 = len;
1814 char *tmpbuf2;
1815 GV *gv2;
1816
1817 if (tmplen2 + 2 <= sizeof smallbuf)
1818 tmpbuf2 = smallbuf;
1819 else
1820 Newx(tmpbuf2, tmplen2 + 2, char);
1821
1822 tmpbuf2[0] = '_';
1823 tmpbuf2[1] = '<';
1824
1825 memcpy(tmpbuf2 + 2, s, tmplen2);
1826 tmplen2 += 2;
1827
1828 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1829 if (!isGV(gv2)) {
1830 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1831 /* adjust ${"::_<newfilename"} to store the new file name */
1832 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1833 /* The line number may differ. If that is the case,
1834 alias the saved lines that are in the array.
1835 Otherwise alias the whole array. */
1836 if (CopLINE(PL_curcop) == line_num) {
1837 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1838 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1839 }
1840 else if (GvAV(cfgv)) {
1841 AV * const av = GvAV(cfgv);
1842 const line_t start = CopLINE(PL_curcop)+1;
1843 SSize_t items = AvFILLp(av) - start;
1844 if (items > 0) {
1845 AV * const av2 = GvAVn(gv2);
1846 SV **svp = AvARRAY(av) + start;
1847 Size_t l = line_num+1;
1848 while (items-- && l < SSize_t_MAX && l == (line_t)l)
1849 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1850 }
1851 }
1852 }
1853
1854 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1855 }
1856 }
1857 CopFILE_free(PL_curcop);
1858 CopFILE_setn(PL_curcop, s, len);
1859 }
1860 CopLINE_set(PL_curcop, line_num);
1861 }
1862
1863 STATIC void
S_update_debugger_info(pTHX_ SV * orig_sv,const char * const buf,STRLEN len)1864 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1865 {
1866 AV *av = CopFILEAVx(PL_curcop);
1867 if (av) {
1868 SV * sv;
1869 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1870 else {
1871 sv = *av_fetch(av, 0, 1);
1872 SvUPGRADE(sv, SVt_PVMG);
1873 }
1874 if (!SvPOK(sv)) SvPVCLEAR(sv);
1875 if (orig_sv)
1876 sv_catsv(sv, orig_sv);
1877 else
1878 sv_catpvn(sv, buf, len);
1879 if (!SvIOK(sv)) {
1880 (void)SvIOK_on(sv);
1881 SvIV_set(sv, 0);
1882 }
1883 if (PL_parser->preambling == NOLINE)
1884 av_store(av, CopLINE(PL_curcop), sv);
1885 }
1886 }
1887
1888 /*
1889 * skipspace
1890 * Called to gobble the appropriate amount and type of whitespace.
1891 * Skips comments as well.
1892 * Returns the next character after the whitespace that is skipped.
1893 *
1894 * peekspace
1895 * Same thing, but look ahead without incrementing line numbers or
1896 * adjusting PL_linestart.
1897 */
1898
1899 #define skipspace(s) skipspace_flags(s, 0)
1900 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1901
1902 char *
Perl_skipspace_flags(pTHX_ char * s,U32 flags)1903 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
1904 {
1905 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1906 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1907 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1908 s++;
1909 } else {
1910 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1911 PL_bufptr = s;
1912 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1913 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1914 LEX_NO_NEXT_CHUNK : 0));
1915 s = PL_bufptr;
1916 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1917 if (PL_linestart > PL_bufptr)
1918 PL_bufptr = PL_linestart;
1919 return s;
1920 }
1921 return s;
1922 }
1923
1924 /*
1925 * S_check_uni
1926 * Check the unary operators to ensure there's no ambiguity in how they're
1927 * used. An ambiguous piece of code would be:
1928 * rand + 5
1929 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1930 * the +5 is its argument.
1931 */
1932
1933 STATIC void
S_check_uni(pTHX)1934 S_check_uni(pTHX)
1935 {
1936 const char *s;
1937
1938 if (PL_oldoldbufptr != PL_last_uni)
1939 return;
1940 while (isSPACE(*PL_last_uni))
1941 PL_last_uni++;
1942 s = PL_last_uni;
1943 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1944 s += UTF ? UTF8SKIP(s) : 1;
1945 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
1946 return;
1947
1948 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1949 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1950 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1951 }
1952
1953 /*
1954 * LOP : macro to build a list operator. Its behaviour has been replaced
1955 * with a subroutine, S_lop() for which LOP is just another name.
1956 */
1957
1958 #define LOP(f,x) return lop(f,x,s)
1959
1960 /*
1961 * S_lop
1962 * Build a list operator (or something that might be one). The rules:
1963 * - if we have a next token, then it's a list operator (no parens) for
1964 * which the next token has already been parsed; e.g.,
1965 * sort foo @args
1966 * sort foo (@args)
1967 * - if the next thing is an opening paren, then it's a function
1968 * - else it's a list operator
1969 */
1970
1971 STATIC I32
S_lop(pTHX_ I32 f,U8 x,char * s)1972 S_lop(pTHX_ I32 f, U8 x, char *s)
1973 {
1974 PERL_ARGS_ASSERT_LOP;
1975
1976 pl_yylval.ival = f;
1977 CLINE;
1978 PL_bufptr = s;
1979 PL_last_lop = PL_oldbufptr;
1980 PL_last_lop_op = (OPCODE)f;
1981 if (PL_nexttoke)
1982 goto lstop;
1983 PL_expect = x;
1984 if (*s == '(')
1985 return REPORT(FUNC);
1986 s = skipspace(s);
1987 if (*s == '(')
1988 return REPORT(FUNC);
1989 else {
1990 lstop:
1991 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1992 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1993 return REPORT(LSTOP);
1994 }
1995 }
1996
1997 /*
1998 * S_force_next
1999 * When the lexer realizes it knows the next token (for instance,
2000 * it is reordering tokens for the parser) then it can call S_force_next
2001 * to know what token to return the next time the lexer is called. Caller
2002 * will need to set PL_nextval[] and possibly PL_expect to ensure
2003 * the lexer handles the token correctly.
2004 */
2005
2006 STATIC void
S_force_next(pTHX_ I32 type)2007 S_force_next(pTHX_ I32 type)
2008 {
2009 #ifdef DEBUGGING
2010 if (DEBUG_T_TEST) {
2011 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2012 tokereport(type, &NEXTVAL_NEXTTOKE);
2013 }
2014 #endif
2015 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2016 PL_nexttype[PL_nexttoke] = type;
2017 PL_nexttoke++;
2018 }
2019
2020 /*
2021 * S_postderef
2022 *
2023 * This subroutine handles postfix deref syntax after the arrow has already
2024 * been emitted. @* $* etc. are emitted as two separate tokens right here.
2025 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2026 * only the first, leaving yylex to find the next.
2027 */
2028
2029 static int
S_postderef(pTHX_ int const funny,char const next)2030 S_postderef(pTHX_ int const funny, char const next)
2031 {
2032 assert(funny == DOLSHARP || memCHRs("$@%&*", funny));
2033 if (next == '*') {
2034 PL_expect = XOPERATOR;
2035 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2036 assert('@' == funny || '$' == funny || DOLSHARP == funny);
2037 PL_lex_state = LEX_INTERPEND;
2038 if ('@' == funny)
2039 force_next(POSTJOIN);
2040 }
2041 force_next(next);
2042 PL_bufptr+=2;
2043 }
2044 else {
2045 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2046 && !PL_lex_brackets)
2047 PL_lex_dojoin = 2;
2048 PL_expect = XOPERATOR;
2049 PL_bufptr++;
2050 }
2051 return funny;
2052 }
2053
2054 void
Perl_yyunlex(pTHX)2055 Perl_yyunlex(pTHX)
2056 {
2057 int yyc = PL_parser->yychar;
2058 if (yyc != YYEMPTY) {
2059 if (yyc) {
2060 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2061 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2062 PL_lex_allbrackets--;
2063 PL_lex_brackets--;
2064 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2065 } else if (yyc == '('/*)*/) {
2066 PL_lex_allbrackets--;
2067 yyc |= (2<<24);
2068 }
2069 force_next(yyc);
2070 }
2071 PL_parser->yychar = YYEMPTY;
2072 }
2073 }
2074
2075 STATIC SV *
S_newSV_maybe_utf8(pTHX_ const char * const start,STRLEN len)2076 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2077 {
2078 SV * const sv = newSVpvn_utf8(start, len,
2079 ! IN_BYTES
2080 && UTF
2081 && len != 0
2082 && is_utf8_non_invariant_string((const U8*)start, len));
2083 return sv;
2084 }
2085
2086 /*
2087 * S_force_word
2088 * When the lexer knows the next thing is a word (for instance, it has
2089 * just seen -> and it knows that the next char is a word char, then
2090 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2091 * lookahead.
2092 *
2093 * Arguments:
2094 * char *start : buffer position (must be within PL_linestr)
2095 * int token : PL_next* will be this type of bare word
2096 * (e.g., METHOD,BAREWORD)
2097 * int check_keyword : if true, Perl checks to make sure the word isn't
2098 * a keyword (do this if the word is a label, e.g. goto FOO)
2099 * int allow_pack : if true, : characters will also be allowed (require,
2100 * use, etc. do this)
2101 */
2102
2103 STATIC char *
S_force_word(pTHX_ char * start,int token,int check_keyword,int allow_pack)2104 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2105 {
2106 char *s;
2107 STRLEN len;
2108
2109 PERL_ARGS_ASSERT_FORCE_WORD;
2110
2111 start = skipspace(start);
2112 s = start;
2113 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2114 || (allow_pack && *s == ':' && s[1] == ':') )
2115 {
2116 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2117 if (check_keyword) {
2118 char *s2 = PL_tokenbuf;
2119 STRLEN len2 = len;
2120 if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2121 s2 += sizeof("CORE::") - 1;
2122 len2 -= sizeof("CORE::") - 1;
2123 }
2124 if (keyword(s2, len2, 0))
2125 return start;
2126 }
2127 if (token == METHOD) {
2128 s = skipspace(s);
2129 if (*s == '(')
2130 PL_expect = XTERM;
2131 else {
2132 PL_expect = XOPERATOR;
2133 }
2134 }
2135 NEXTVAL_NEXTTOKE.opval
2136 = newSVOP(OP_CONST,0,
2137 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2138 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2139 force_next(token);
2140 }
2141 return s;
2142 }
2143
2144 /*
2145 * S_force_ident
2146 * Called when the lexer wants $foo *foo &foo etc, but the program
2147 * text only contains the "foo" portion. The first argument is a pointer
2148 * to the "foo", and the second argument is the type symbol to prefix.
2149 * Forces the next token to be a "BAREWORD".
2150 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2151 */
2152
2153 STATIC void
S_force_ident(pTHX_ const char * s,int kind)2154 S_force_ident(pTHX_ const char *s, int kind)
2155 {
2156 PERL_ARGS_ASSERT_FORCE_IDENT;
2157
2158 if (s[0]) {
2159 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2160 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2161 UTF ? SVf_UTF8 : 0));
2162 NEXTVAL_NEXTTOKE.opval = o;
2163 force_next(BAREWORD);
2164 if (kind) {
2165 o->op_private = OPpCONST_ENTERED;
2166 /* XXX see note in pp_entereval() for why we forgo typo
2167 warnings if the symbol must be introduced in an eval.
2168 GSAR 96-10-12 */
2169 gv_fetchpvn_flags(s, len,
2170 (PL_in_eval ? GV_ADDMULTI
2171 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2172 kind == '$' ? SVt_PV :
2173 kind == '@' ? SVt_PVAV :
2174 kind == '%' ? SVt_PVHV :
2175 SVt_PVGV
2176 );
2177 }
2178 }
2179 }
2180
2181 static void
S_force_ident_maybe_lex(pTHX_ char pit)2182 S_force_ident_maybe_lex(pTHX_ char pit)
2183 {
2184 NEXTVAL_NEXTTOKE.ival = pit;
2185 force_next('p');
2186 }
2187
2188 NV
Perl_str_to_version(pTHX_ SV * sv)2189 Perl_str_to_version(pTHX_ SV *sv)
2190 {
2191 NV retval = 0.0;
2192 NV nshift = 1.0;
2193 STRLEN len;
2194 const char *start = SvPV_const(sv,len);
2195 const char * const end = start + len;
2196 const bool utf = cBOOL(SvUTF8(sv));
2197
2198 PERL_ARGS_ASSERT_STR_TO_VERSION;
2199
2200 while (start < end) {
2201 STRLEN skip;
2202 UV n;
2203 if (utf)
2204 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2205 else {
2206 n = *(U8*)start;
2207 skip = 1;
2208 }
2209 retval += ((NV)n)/nshift;
2210 start += skip;
2211 nshift *= 1000;
2212 }
2213 return retval;
2214 }
2215
2216 /*
2217 * S_force_version
2218 * Forces the next token to be a version number.
2219 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2220 * and if "guessing" is TRUE, then no new token is created (and the caller
2221 * must use an alternative parsing method).
2222 */
2223
2224 STATIC char *
S_force_version(pTHX_ char * s,int guessing)2225 S_force_version(pTHX_ char *s, int guessing)
2226 {
2227 OP *version = NULL;
2228 char *d;
2229
2230 PERL_ARGS_ASSERT_FORCE_VERSION;
2231
2232 s = skipspace(s);
2233
2234 d = s;
2235 if (*d == 'v')
2236 d++;
2237 if (isDIGIT(*d)) {
2238 while (isDIGIT(*d) || *d == '_' || *d == '.')
2239 d++;
2240 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2241 SV *ver;
2242 s = scan_num(s, &pl_yylval);
2243 version = pl_yylval.opval;
2244 ver = cSVOPx(version)->op_sv;
2245 if (SvPOK(ver) && !SvNIOK(ver)) {
2246 SvUPGRADE(ver, SVt_PVNV);
2247 SvNV_set(ver, str_to_version(ver));
2248 SvNOK_on(ver); /* hint that it is a version */
2249 }
2250 }
2251 else if (guessing) {
2252 return s;
2253 }
2254 }
2255
2256 /* NOTE: The parser sees the package name and the VERSION swapped */
2257 NEXTVAL_NEXTTOKE.opval = version;
2258 force_next(BAREWORD);
2259
2260 return s;
2261 }
2262
2263 /*
2264 * S_force_strict_version
2265 * Forces the next token to be a version number using strict syntax rules.
2266 */
2267
2268 STATIC char *
S_force_strict_version(pTHX_ char * s)2269 S_force_strict_version(pTHX_ char *s)
2270 {
2271 OP *version = NULL;
2272 const char *errstr = NULL;
2273
2274 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2275
2276 while (isSPACE(*s)) /* leading whitespace */
2277 s++;
2278
2279 if (is_STRICT_VERSION(s,&errstr)) {
2280 SV *ver = newSV(0);
2281 s = (char *)scan_version(s, ver, 0);
2282 version = newSVOP(OP_CONST, 0, ver);
2283 }
2284 else if ((*s != ';' && *s != '{' && *s != '}' )
2285 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2286 {
2287 PL_bufptr = s;
2288 if (errstr)
2289 yyerror(errstr); /* version required */
2290 return s;
2291 }
2292
2293 /* NOTE: The parser sees the package name and the VERSION swapped */
2294 NEXTVAL_NEXTTOKE.opval = version;
2295 force_next(BAREWORD);
2296
2297 return s;
2298 }
2299
2300 /*
2301 * S_tokeq
2302 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2303 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is
2304 * unchanged, and a new SV containing the modified input is returned.
2305 */
2306
2307 STATIC SV *
S_tokeq(pTHX_ SV * sv)2308 S_tokeq(pTHX_ SV *sv)
2309 {
2310 char *s;
2311 char *send;
2312 char *d;
2313 SV *pv = sv;
2314
2315 PERL_ARGS_ASSERT_TOKEQ;
2316
2317 assert (SvPOK(sv));
2318 assert (SvLEN(sv));
2319 assert (!SvIsCOW(sv));
2320 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2321 goto finish;
2322 s = SvPVX(sv);
2323 send = SvEND(sv);
2324 /* This is relying on the SV being "well formed" with a trailing '\0' */
2325 while (s < send && !(*s == '\\' && s[1] == '\\'))
2326 s++;
2327 if (s == send)
2328 goto finish;
2329 d = s;
2330 if ( PL_hints & HINT_NEW_STRING ) {
2331 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2332 SVs_TEMP | SvUTF8(sv));
2333 }
2334 while (s < send) {
2335 if (*s == '\\') {
2336 if (s + 1 < send && (s[1] == '\\'))
2337 s++; /* all that, just for this */
2338 }
2339 *d++ = *s++;
2340 }
2341 *d = '\0';
2342 SvCUR_set(sv, d - SvPVX_const(sv));
2343 finish:
2344 if ( PL_hints & HINT_NEW_STRING )
2345 return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2346 return sv;
2347 }
2348
2349 /*
2350 * Now come three functions related to double-quote context,
2351 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2352 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2353 * interact with PL_lex_state, and create fake ( ... ) argument lists
2354 * to handle functions and concatenation.
2355 * For example,
2356 * "foo\lbar"
2357 * is tokenised as
2358 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2359 */
2360
2361 /*
2362 * S_sublex_start
2363 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2364 *
2365 * Pattern matching will set PL_lex_op to the pattern-matching op to
2366 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2367 *
2368 * OP_CONST is easy--just make the new op and return.
2369 *
2370 * Everything else becomes a FUNC.
2371 *
2372 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2373 * had an OP_CONST. This just sets us up for a
2374 * call to S_sublex_push().
2375 */
2376
2377 STATIC I32
S_sublex_start(pTHX)2378 S_sublex_start(pTHX)
2379 {
2380 const I32 op_type = pl_yylval.ival;
2381
2382 if (op_type == OP_NULL) {
2383 pl_yylval.opval = PL_lex_op;
2384 PL_lex_op = NULL;
2385 return THING;
2386 }
2387 if (op_type == OP_CONST) {
2388 SV *sv = PL_lex_stuff;
2389 PL_lex_stuff = NULL;
2390 sv = tokeq(sv);
2391
2392 if (SvTYPE(sv) == SVt_PVIV) {
2393 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2394 STRLEN len;
2395 const char * const p = SvPV_const(sv, len);
2396 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2397 SvREFCNT_dec(sv);
2398 sv = nsv;
2399 }
2400 pl_yylval.opval = newSVOP(op_type, 0, sv);
2401 return THING;
2402 }
2403
2404 PL_parser->lex_super_state = PL_lex_state;
2405 PL_parser->lex_sub_inwhat = (U16)op_type;
2406 PL_parser->lex_sub_op = PL_lex_op;
2407 PL_parser->sub_no_recover = FALSE;
2408 PL_parser->sub_error_count = PL_error_count;
2409 PL_lex_state = LEX_INTERPPUSH;
2410
2411 PL_expect = XTERM;
2412 if (PL_lex_op) {
2413 pl_yylval.opval = PL_lex_op;
2414 PL_lex_op = NULL;
2415 return PMFUNC;
2416 }
2417 else
2418 return FUNC;
2419 }
2420
2421 /*
2422 * S_sublex_push
2423 * Create a new scope to save the lexing state. The scope will be
2424 * ended in S_sublex_done. Returns a '(', starting the function arguments
2425 * to the uc, lc, etc. found before.
2426 * Sets PL_lex_state to LEX_INTERPCONCAT.
2427 */
2428
2429 STATIC I32
S_sublex_push(pTHX)2430 S_sublex_push(pTHX)
2431 {
2432 LEXSHARED *shared;
2433 const bool is_heredoc = PL_multi_close == '<';
2434 ENTER;
2435
2436 PL_lex_state = PL_parser->lex_super_state;
2437 SAVEI8(PL_lex_dojoin);
2438 SAVEI32(PL_lex_brackets);
2439 SAVEI32(PL_lex_allbrackets);
2440 SAVEI32(PL_lex_formbrack);
2441 SAVEI8(PL_lex_fakeeof);
2442 SAVEI32(PL_lex_casemods);
2443 SAVEI32(PL_lex_starts);
2444 SAVEI8(PL_lex_state);
2445 SAVESPTR(PL_lex_repl);
2446 SAVEVPTR(PL_lex_inpat);
2447 SAVEI16(PL_lex_inwhat);
2448 if (is_heredoc)
2449 {
2450 SAVECOPLINE(PL_curcop);
2451 SAVEI32(PL_multi_end);
2452 SAVEI32(PL_parser->herelines);
2453 PL_parser->herelines = 0;
2454 }
2455 SAVEIV(PL_multi_close);
2456 SAVEPPTR(PL_bufptr);
2457 SAVEPPTR(PL_bufend);
2458 SAVEPPTR(PL_oldbufptr);
2459 SAVEPPTR(PL_oldoldbufptr);
2460 SAVEPPTR(PL_last_lop);
2461 SAVEPPTR(PL_last_uni);
2462 SAVEPPTR(PL_linestart);
2463 SAVESPTR(PL_linestr);
2464 SAVEGENERICPV(PL_lex_brackstack);
2465 SAVEGENERICPV(PL_lex_casestack);
2466 SAVEGENERICPV(PL_parser->lex_shared);
2467 SAVEBOOL(PL_parser->lex_re_reparsing);
2468 SAVEI32(PL_copline);
2469
2470 /* The here-doc parser needs to be able to peek into outer lexing
2471 scopes to find the body of the here-doc. So we put PL_linestr and
2472 PL_bufptr into lex_shared, to ‘share’ those values.
2473 */
2474 PL_parser->lex_shared->ls_linestr = PL_linestr;
2475 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2476
2477 PL_linestr = PL_lex_stuff;
2478 PL_lex_repl = PL_parser->lex_sub_repl;
2479 PL_lex_stuff = NULL;
2480 PL_parser->lex_sub_repl = NULL;
2481
2482 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2483 set for an inner quote-like operator and then an error causes scope-
2484 popping. We must not have a PL_lex_stuff value left dangling, as
2485 that breaks assumptions elsewhere. See bug #123617. */
2486 SAVEGENERICSV(PL_lex_stuff);
2487 SAVEGENERICSV(PL_parser->lex_sub_repl);
2488
2489 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2490 = SvPVX(PL_linestr);
2491 PL_bufend += SvCUR(PL_linestr);
2492 PL_last_lop = PL_last_uni = NULL;
2493 SAVEFREESV(PL_linestr);
2494 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2495
2496 PL_lex_dojoin = FALSE;
2497 PL_lex_brackets = PL_lex_formbrack = 0;
2498 PL_lex_allbrackets = 0;
2499 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2500 Newx(PL_lex_brackstack, 120, char);
2501 Newx(PL_lex_casestack, 12, char);
2502 PL_lex_casemods = 0;
2503 *PL_lex_casestack = '\0';
2504 PL_lex_starts = 0;
2505 PL_lex_state = LEX_INTERPCONCAT;
2506 if (is_heredoc)
2507 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2508 PL_copline = NOLINE;
2509
2510 Newxz(shared, 1, LEXSHARED);
2511 shared->ls_prev = PL_parser->lex_shared;
2512 PL_parser->lex_shared = shared;
2513
2514 PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2515 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2516 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2517 PL_lex_inpat = PL_parser->lex_sub_op;
2518 else
2519 PL_lex_inpat = NULL;
2520
2521 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2522 PL_in_eval &= ~EVAL_RE_REPARSING;
2523
2524 return SUBLEXSTART;
2525 }
2526
2527 /*
2528 * S_sublex_done
2529 * Restores lexer state after a S_sublex_push.
2530 */
2531
2532 STATIC I32
S_sublex_done(pTHX)2533 S_sublex_done(pTHX)
2534 {
2535 if (!PL_lex_starts++) {
2536 SV * const sv = newSVpvs("");
2537 if (SvUTF8(PL_linestr))
2538 SvUTF8_on(sv);
2539 PL_expect = XOPERATOR;
2540 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2541 return THING;
2542 }
2543
2544 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2545 PL_lex_state = LEX_INTERPCASEMOD;
2546 return yylex();
2547 }
2548
2549 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2550 assert(PL_lex_inwhat != OP_TRANSR);
2551 if (PL_lex_repl) {
2552 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2553 PL_linestr = PL_lex_repl;
2554 PL_lex_inpat = 0;
2555 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2556 PL_bufend += SvCUR(PL_linestr);
2557 PL_last_lop = PL_last_uni = NULL;
2558 PL_lex_dojoin = FALSE;
2559 PL_lex_brackets = 0;
2560 PL_lex_allbrackets = 0;
2561 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2562 PL_lex_casemods = 0;
2563 *PL_lex_casestack = '\0';
2564 PL_lex_starts = 0;
2565 if (SvEVALED(PL_lex_repl)) {
2566 PL_lex_state = LEX_INTERPNORMAL;
2567 PL_lex_starts++;
2568 /* we don't clear PL_lex_repl here, so that we can check later
2569 whether this is an evalled subst; that means we rely on the
2570 logic to ensure sublex_done() is called again only via the
2571 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2572 }
2573 else {
2574 PL_lex_state = LEX_INTERPCONCAT;
2575 PL_lex_repl = NULL;
2576 }
2577 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2578 CopLINE(PL_curcop) +=
2579 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2580 + PL_parser->herelines;
2581 PL_parser->herelines = 0;
2582 }
2583 return '/';
2584 }
2585 else {
2586 const line_t l = CopLINE(PL_curcop);
2587 LEAVE;
2588 if (PL_parser->sub_error_count != PL_error_count) {
2589 if (PL_parser->sub_no_recover) {
2590 yyquit();
2591 NOT_REACHED;
2592 }
2593 }
2594 if (PL_multi_close == '<')
2595 PL_parser->herelines += l - PL_multi_end;
2596 PL_bufend = SvPVX(PL_linestr);
2597 PL_bufend += SvCUR(PL_linestr);
2598 PL_expect = XOPERATOR;
2599 return SUBLEXEND;
2600 }
2601 }
2602
2603 HV *
Perl_load_charnames(pTHX_ SV * char_name,const char * context,const STRLEN context_len,const char ** error_msg)2604 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2605 const STRLEN context_len, const char ** error_msg)
2606 {
2607 /* Load the official _charnames module if not already there. The
2608 * parameters are just to give info for any error messages generated:
2609 * char_name a name to look up which is the reason for loading this
2610 * context 'char_name' in the context in the input in which it appears
2611 * context_len how many bytes 'context' occupies
2612 * error_msg *error_msg will be set to any error
2613 *
2614 * Returns the ^H table if success; otherwise NULL */
2615
2616 unsigned int i;
2617 HV * table;
2618 SV **cvp;
2619 SV * res;
2620
2621 PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2622
2623 /* This loop is executed 1 1/2 times. On the first time through, if it
2624 * isn't already loaded, try loading it, and iterate just once to see if it
2625 * worked. */
2626 for (i = 0; i < 2; i++) {
2627 table = GvHV(PL_hintgv); /* ^H */
2628
2629 if ( table
2630 && (PL_hints & HINT_LOCALIZE_HH)
2631 && (cvp = hv_fetchs(table, "charnames", FALSE))
2632 && SvOK(*cvp))
2633 {
2634 return table; /* Quit if already loaded */
2635 }
2636
2637 if (i == 0) {
2638 Perl_load_module(aTHX_
2639 0,
2640 newSVpvs("_charnames"),
2641
2642 /* version parameter; no need to specify it, as if we get too early
2643 * a version, will fail anyway, not being able to find 'charnames'
2644 * */
2645 NULL,
2646 newSVpvs(":full"),
2647 newSVpvs(":short"),
2648 NULL);
2649 }
2650 }
2651
2652 /* Here, it failed; new_constant will give appropriate error messages */
2653 *error_msg = NULL;
2654 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2655 context, context_len, error_msg);
2656 SvREFCNT_dec(res);
2657
2658 return NULL;
2659 }
2660
2661 STATIC SV*
S_get_and_check_backslash_N_name_wrapper(pTHX_ const char * s,const char * const e)2662 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2663 {
2664 /* This justs wraps get_and_check_backslash_N_name() to output any error
2665 * message it returns. */
2666
2667 const char * error_msg = NULL;
2668 SV * result;
2669
2670 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2671
2672 /* charnames doesn't work well if there have been errors found */
2673 if (PL_error_count > 0) {
2674 return NULL;
2675 }
2676
2677 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2678
2679 if (error_msg) {
2680 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2681 }
2682
2683 return result;
2684 }
2685
2686 SV*
Perl_get_and_check_backslash_N_name(pTHX_ const char * s,const char * const e,const bool is_utf8,const char ** error_msg)2687 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2688 const char* const e,
2689 const bool is_utf8,
2690 const char ** error_msg)
2691 {
2692 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2693 * interior, hence to the "}". Finds what the name resolves to, returning
2694 * an SV* containing it; NULL if no valid one found.
2695 *
2696 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2697 * doesn't have to be. */
2698
2699 SV* char_name;
2700 SV* res;
2701 HV * table;
2702 SV **cvp;
2703 SV *cv;
2704 SV *rv;
2705 HV *stash;
2706
2707 /* Points to the beginning of the \N{... so that any messages include the
2708 * context of what's failing*/
2709 const char* context = s - 3;
2710 STRLEN context_len = e - context + 1; /* include all of \N{...} */
2711
2712 dVAR;
2713
2714 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2715
2716 assert(e >= s);
2717 assert(s > (char *) 3);
2718
2719 char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2720
2721 if (!SvCUR(char_name)) {
2722 SvREFCNT_dec_NN(char_name);
2723 /* diag_listed_as: Unknown charname '%s' */
2724 *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2725 return NULL;
2726 }
2727
2728 /* Autoload the charnames module */
2729
2730 table = load_charnames(char_name, context, context_len, error_msg);
2731 if (table == NULL) {
2732 return NULL;
2733 }
2734
2735 *error_msg = NULL;
2736 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2737 context, context_len, error_msg);
2738 if (*error_msg) {
2739 *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2740
2741 SvREFCNT_dec(res);
2742 return NULL;
2743 }
2744
2745 /* See if the charnames handler is the Perl core's, and if so, we can skip
2746 * the validation needed for a user-supplied one, as Perl's does its own
2747 * validation. */
2748 cvp = hv_fetchs(table, "charnames", FALSE);
2749 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2750 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2751 {
2752 const char * const name = HvNAME(stash);
2753 if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2754 return res;
2755 }
2756 }
2757
2758 /* Here, it isn't Perl's charname handler. We can't rely on a
2759 * user-supplied handler to validate the input name. For non-ut8 input,
2760 * look to see that the first character is legal. Then loop through the
2761 * rest checking that each is a continuation */
2762
2763 /* This code makes the reasonable assumption that the only Latin1-range
2764 * characters that begin a character name alias are alphabetic, otherwise
2765 * would have to create a isCHARNAME_BEGIN macro */
2766
2767 if (! is_utf8) {
2768 if (! isALPHAU(*s)) {
2769 goto bad_charname;
2770 }
2771 s++;
2772 while (s < e) {
2773 if (! isCHARNAME_CONT(*s)) {
2774 goto bad_charname;
2775 }
2776 if (*s == ' ' && *(s-1) == ' ') {
2777 goto multi_spaces;
2778 }
2779 s++;
2780 }
2781 }
2782 else {
2783 /* Similarly for utf8. For invariants can check directly; for other
2784 * Latin1, can calculate their code point and check; otherwise use an
2785 * inversion list */
2786 if (UTF8_IS_INVARIANT(*s)) {
2787 if (! isALPHAU(*s)) {
2788 goto bad_charname;
2789 }
2790 s++;
2791 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2792 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2793 goto bad_charname;
2794 }
2795 s += 2;
2796 }
2797 else {
2798 if (! _invlist_contains_cp(PL_utf8_charname_begin,
2799 utf8_to_uvchr_buf((U8 *) s,
2800 (U8 *) e,
2801 NULL)))
2802 {
2803 goto bad_charname;
2804 }
2805 s += UTF8SKIP(s);
2806 }
2807
2808 while (s < e) {
2809 if (UTF8_IS_INVARIANT(*s)) {
2810 if (! isCHARNAME_CONT(*s)) {
2811 goto bad_charname;
2812 }
2813 if (*s == ' ' && *(s-1) == ' ') {
2814 goto multi_spaces;
2815 }
2816 s++;
2817 }
2818 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2819 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2820 {
2821 goto bad_charname;
2822 }
2823 s += 2;
2824 }
2825 else {
2826 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2827 utf8_to_uvchr_buf((U8 *) s,
2828 (U8 *) e,
2829 NULL)))
2830 {
2831 goto bad_charname;
2832 }
2833 s += UTF8SKIP(s);
2834 }
2835 }
2836 }
2837 if (*(s-1) == ' ') {
2838 /* diag_listed_as: charnames alias definitions may not contain
2839 trailing white-space; marked by <-- HERE in %s
2840 */
2841 *error_msg = Perl_form(aTHX_
2842 "charnames alias definitions may not contain trailing "
2843 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2844 (int)(s - context + 1), context,
2845 (int)(e - s + 1), s + 1);
2846 return NULL;
2847 }
2848
2849 if (SvUTF8(res)) { /* Don't accept malformed charname value */
2850 const U8* first_bad_char_loc;
2851 STRLEN len;
2852 const char* const str = SvPV_const(res, len);
2853 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2854 &first_bad_char_loc)))
2855 {
2856 _force_out_malformed_utf8_message(first_bad_char_loc,
2857 (U8 *) PL_parser->bufend,
2858 0,
2859 0 /* 0 means don't die */ );
2860 /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2861 immediately after '%s' */
2862 *error_msg = Perl_form(aTHX_
2863 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2864 (int) context_len, context,
2865 (int) ((char *) first_bad_char_loc - str), str);
2866 return NULL;
2867 }
2868 }
2869
2870 return res;
2871
2872 bad_charname: {
2873
2874 /* The final %.*s makes sure that should the trailing NUL be missing
2875 * that this print won't run off the end of the string */
2876 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2877 in \N{%s} */
2878 *error_msg = Perl_form(aTHX_
2879 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2880 (int)(s - context + 1), context,
2881 (int)(e - s + 1), s + 1);
2882 return NULL;
2883 }
2884
2885 multi_spaces:
2886 /* diag_listed_as: charnames alias definitions may not contain a
2887 sequence of multiple spaces; marked by <-- HERE
2888 in %s */
2889 *error_msg = Perl_form(aTHX_
2890 "charnames alias definitions may not contain a sequence of "
2891 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2892 (int)(s - context + 1), context,
2893 (int)(e - s + 1), s + 1);
2894 return NULL;
2895 }
2896
2897 /*
2898 scan_const
2899
2900 Extracts the next constant part of a pattern, double-quoted string,
2901 or transliteration. This is terrifying code.
2902
2903 For example, in parsing the double-quoted string "ab\x63$d", it would
2904 stop at the '$' and return an OP_CONST containing 'abc'.
2905
2906 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2907 processing a pattern (PL_lex_inpat is true), a transliteration
2908 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2909
2910 Returns a pointer to the character scanned up to. If this is
2911 advanced from the start pointer supplied (i.e. if anything was
2912 successfully parsed), will leave an OP_CONST for the substring scanned
2913 in pl_yylval. Caller must intuit reason for not parsing further
2914 by looking at the next characters herself.
2915
2916 In patterns:
2917 expand:
2918 \N{FOO} => \N{U+hex_for_character_FOO}
2919 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2920
2921 pass through:
2922 all other \-char, including \N and \N{ apart from \N{ABC}
2923
2924 stops on:
2925 @ and $ where it appears to be a var, but not for $ as tail anchor
2926 \l \L \u \U \Q \E
2927 (?{ or (??{
2928
2929 In transliterations:
2930 characters are VERY literal, except for - not at the start or end
2931 of the string, which indicates a range. However some backslash sequences
2932 are recognized: \r, \n, and the like
2933 \007 \o{}, \x{}, \N{}
2934 If all elements in the transliteration are below 256,
2935 scan_const expands the range to the full set of intermediate
2936 characters. If the range is in utf8, the hyphen is replaced with
2937 a certain range mark which will be handled by pmtrans() in op.c.
2938
2939 In double-quoted strings:
2940 backslashes:
2941 all those recognized in transliterations
2942 deprecated backrefs: \1 (in substitution replacements)
2943 case and quoting: \U \Q \E
2944 stops on @ and $
2945
2946 scan_const does *not* construct ops to handle interpolated strings.
2947 It stops processing as soon as it finds an embedded $ or @ variable
2948 and leaves it to the caller to work out what's going on.
2949
2950 embedded arrays (whether in pattern or not) could be:
2951 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2952
2953 $ in double-quoted strings must be the symbol of an embedded scalar.
2954
2955 $ in pattern could be $foo or could be tail anchor. Assumption:
2956 it's a tail anchor if $ is the last thing in the string, or if it's
2957 followed by one of "()| \r\n\t"
2958
2959 \1 (backreferences) are turned into $1 in substitutions
2960
2961 The structure of the code is
2962 while (there's a character to process) {
2963 handle transliteration ranges
2964 skip regexp comments /(?#comment)/ and codes /(?{code})/
2965 skip #-initiated comments in //x patterns
2966 check for embedded arrays
2967 check for embedded scalars
2968 if (backslash) {
2969 deprecate \1 in substitution replacements
2970 handle string-changing backslashes \l \U \Q \E, etc.
2971 switch (what was escaped) {
2972 handle \- in a transliteration (becomes a literal -)
2973 if a pattern and not \N{, go treat as regular character
2974 handle \132 (octal characters)
2975 handle \x15 and \x{1234} (hex characters)
2976 handle \N{name} (named characters, also \N{3,5} in a pattern)
2977 handle \cV (control characters)
2978 handle printf-style backslashes (\f, \r, \n, etc)
2979 } (end switch)
2980 continue
2981 } (end if backslash)
2982 handle regular character
2983 } (end while character to read)
2984
2985 */
2986
2987 STATIC char *
S_scan_const(pTHX_ char * start)2988 S_scan_const(pTHX_ char *start)
2989 {
2990 char *send = PL_bufend; /* end of the constant */
2991 SV *sv = newSV(send - start); /* sv for the constant. See note below
2992 on sizing. */
2993 char *s = start; /* start of the constant */
2994 char *d = SvPVX(sv); /* destination for copies */
2995 bool dorange = FALSE; /* are we in a translit range? */
2996 bool didrange = FALSE; /* did we just finish a range? */
2997 bool in_charclass = FALSE; /* within /[...]/ */
2998 bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
2999 UTF8? But, this can show as true
3000 when the source isn't utf8, as for
3001 example when it is entirely composed
3002 of hex constants */
3003 bool d_is_utf8 = FALSE; /* Output constant is UTF8 */
3004 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
3005 number of characters found so far
3006 that will expand (into 2 bytes)
3007 should we have to convert to
3008 UTF-8) */
3009 SV *res; /* result from charnames */
3010 STRLEN offset_to_max = 0; /* The offset in the output to where the range
3011 high-end character is temporarily placed */
3012
3013 /* Does something require special handling in tr/// ? This avoids extra
3014 * work in a less likely case. As such, khw didn't feel it was worth
3015 * adding any branches to the more mainline code to handle this, which
3016 * means that this doesn't get set in some circumstances when things like
3017 * \x{100} get expanded out. As a result there needs to be extra testing
3018 * done in the tr code */
3019 bool has_above_latin1 = FALSE;
3020
3021 /* Note on sizing: The scanned constant is placed into sv, which is
3022 * initialized by newSV() assuming one byte of output for every byte of
3023 * input. This routine expects newSV() to allocate an extra byte for a
3024 * trailing NUL, which this routine will append if it gets to the end of
3025 * the input. There may be more bytes of input than output (eg., \N{LATIN
3026 * CAPITAL LETTER A}), or more output than input if the constant ends up
3027 * recoded to utf8, but each time a construct is found that might increase
3028 * the needed size, SvGROW() is called. Its size parameter each time is
3029 * based on the best guess estimate at the time, namely the length used so
3030 * far, plus the length the current construct will occupy, plus room for
3031 * the trailing NUL, plus one byte for every input byte still unscanned */
3032
3033 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3034 before set */
3035 #ifdef EBCDIC
3036 int backslash_N = 0; /* ? was the character from \N{} */
3037 int non_portable_endpoint = 0; /* ? In a range is an endpoint
3038 platform-specific like \x65 */
3039 #endif
3040
3041 PERL_ARGS_ASSERT_SCAN_CONST;
3042
3043 assert(PL_lex_inwhat != OP_TRANSR);
3044
3045 /* Protect sv from errors and fatal warnings. */
3046 ENTER_with_name("scan_const");
3047 SAVEFREESV(sv);
3048
3049 /* A bunch of code in the loop below assumes that if s[n] exists and is not
3050 * NUL, then s[n+1] exists. This assertion makes sure that assumption is
3051 * valid */
3052 assert(*send == '\0');
3053
3054 while (s < send
3055 || dorange /* Handle tr/// range at right edge of input */
3056 ) {
3057
3058 /* get transliterations out of the way (they're most literal) */
3059 if (PL_lex_inwhat == OP_TRANS) {
3060
3061 /* But there isn't any special handling necessary unless there is a
3062 * range, so for most cases we just drop down and handle the value
3063 * as any other. There are two exceptions.
3064 *
3065 * 1. A hyphen indicates that we are actually going to have a
3066 * range. In this case, skip the '-', set a flag, then drop
3067 * down to handle what should be the end range value.
3068 * 2. After we've handled that value, the next time through, that
3069 * flag is set and we fix up the range.
3070 *
3071 * Ranges entirely within Latin1 are expanded out entirely, in
3072 * order to make the transliteration a simple table look-up.
3073 * Ranges that extend above Latin1 have to be done differently, so
3074 * there is no advantage to expanding them here, so they are
3075 * stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is
3076 * a byte that can't occur in legal UTF-8, and hence can signify a
3077 * hyphen without any possible ambiguity. On EBCDIC machines, if
3078 * the range is expressed as Unicode, the Latin1 portion is
3079 * expanded out even if the range extends above Latin1. This is
3080 * because each code point in it has to be processed here
3081 * individually to get its native translation */
3082
3083 if (! dorange) {
3084
3085 /* Here, we don't think we're in a range. If the new character
3086 * is not a hyphen; or if it is a hyphen, but it's too close to
3087 * either edge to indicate a range, or if we haven't output any
3088 * characters yet then it's a regular character. */
3089 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3090 {
3091
3092 /* A regular character. Process like any other, but first
3093 * clear any flags */
3094 didrange = FALSE;
3095 dorange = FALSE;
3096 #ifdef EBCDIC
3097 non_portable_endpoint = 0;
3098 backslash_N = 0;
3099 #endif
3100 /* The tests here for being above Latin1 and similar ones
3101 * in the following 'else' suffice to find all such
3102 * occurences in the constant, except those added by a
3103 * backslash escape sequence, like \x{100}. Mostly, those
3104 * set 'has_above_latin1' as appropriate */
3105 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3106 has_above_latin1 = TRUE;
3107 }
3108
3109 /* Drops down to generic code to process current byte */
3110 }
3111 else { /* Is a '-' in the context where it means a range */
3112 if (didrange) { /* Something like y/A-C-Z// */
3113 Perl_croak(aTHX_ "Ambiguous range in transliteration"
3114 " operator");
3115 }
3116
3117 dorange = TRUE;
3118
3119 s++; /* Skip past the hyphen */
3120
3121 /* d now points to where the end-range character will be
3122 * placed. Drop down to get that character. We'll finish
3123 * processing the range the next time through the loop */
3124
3125 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3126 has_above_latin1 = TRUE;
3127 }
3128
3129 /* Drops down to generic code to process current byte */
3130 }
3131 } /* End of not a range */
3132 else {
3133 /* Here we have parsed a range. Now must handle it. At this
3134 * point:
3135 * 'sv' is a SV* that contains the output string we are
3136 * constructing. The final two characters in that string
3137 * are the range start and range end, in order.
3138 * 'd' points to just beyond the range end in the 'sv' string,
3139 * where we would next place something
3140 */
3141 char * max_ptr;
3142 char * min_ptr;
3143 IV range_min;
3144 IV range_max; /* last character in range */
3145 STRLEN grow;
3146 Size_t offset_to_min = 0;
3147 Size_t extras = 0;
3148 #ifdef EBCDIC
3149 bool convert_unicode;
3150 IV real_range_max = 0;
3151 #endif
3152 /* Get the code point values of the range ends. */
3153 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3154 offset_to_max = max_ptr - SvPVX_const(sv);
3155 if (d_is_utf8) {
3156 /* We know the utf8 is valid, because we just constructed
3157 * it ourselves in previous loop iterations */
3158 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3159 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3160 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3161
3162 /* This compensates for not all code setting
3163 * 'has_above_latin1', so that we don't skip stuff that
3164 * should be executed */
3165 if (range_max > 255) {
3166 has_above_latin1 = TRUE;
3167 }
3168 }
3169 else {
3170 min_ptr = max_ptr - 1;
3171 range_min = * (U8*) min_ptr;
3172 range_max = * (U8*) max_ptr;
3173 }
3174
3175 /* If the range is just a single code point, like tr/a-a/.../,
3176 * that code point is already in the output, twice. We can
3177 * just back up over the second instance and avoid all the rest
3178 * of the work. But if it is a variant character, it's been
3179 * counted twice, so decrement. (This unlikely scenario is
3180 * special cased, like the one for a range of 2 code points
3181 * below, only because the main-line code below needs a range
3182 * of 3 or more to work without special casing. Might as well
3183 * get it out of the way now.) */
3184 if (UNLIKELY(range_max == range_min)) {
3185 d = max_ptr;
3186 if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3187 utf8_variant_count--;
3188 }
3189 goto range_done;
3190 }
3191
3192 #ifdef EBCDIC
3193 /* On EBCDIC platforms, we may have to deal with portable
3194 * ranges. These happen if at least one range endpoint is a
3195 * Unicode value (\N{...}), or if the range is a subset of
3196 * [A-Z] or [a-z], and both ends are literal characters,
3197 * like 'A', and not like \x{C1} */
3198 convert_unicode =
3199 cBOOL(backslash_N) /* \N{} forces Unicode,
3200 hence portable range */
3201 || ( ! non_portable_endpoint
3202 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
3203 || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3204 if (convert_unicode) {
3205
3206 /* Special handling is needed for these portable ranges.
3207 * They are defined to be in Unicode terms, which includes
3208 * all the Unicode code points between the end points.
3209 * Convert to Unicode to get the Unicode range. Later we
3210 * will convert each code point in the range back to
3211 * native. */
3212 range_min = NATIVE_TO_UNI(range_min);
3213 range_max = NATIVE_TO_UNI(range_max);
3214 }
3215 #endif
3216
3217 if (range_min > range_max) {
3218 #ifdef EBCDIC
3219 if (convert_unicode) {
3220 /* Need to convert back to native for meaningful
3221 * messages for this platform */
3222 range_min = UNI_TO_NATIVE(range_min);
3223 range_max = UNI_TO_NATIVE(range_max);
3224 }
3225 #endif
3226 /* Use the characters themselves for the error message if
3227 * ASCII printables; otherwise some visible representation
3228 * of them */
3229 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3230 Perl_croak(aTHX_
3231 "Invalid range \"%c-%c\" in transliteration operator",
3232 (char)range_min, (char)range_max);
3233 }
3234 #ifdef EBCDIC
3235 else if (convert_unicode) {
3236 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3237 Perl_croak(aTHX_
3238 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3239 UVXf "}\" in transliteration operator",
3240 range_min, range_max);
3241 }
3242 #endif
3243 else {
3244 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3245 Perl_croak(aTHX_
3246 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3247 " in transliteration operator",
3248 range_min, range_max);
3249 }
3250 }
3251
3252 /* If the range is exactly two code points long, they are
3253 * already both in the output */
3254 if (UNLIKELY(range_min + 1 == range_max)) {
3255 goto range_done;
3256 }
3257
3258 /* Here the range contains at least 3 code points */
3259
3260 if (d_is_utf8) {
3261
3262 /* If everything in the transliteration is below 256, we
3263 * can avoid special handling later. A translation table
3264 * for each of those bytes is created by op.c. So we
3265 * expand out all ranges to their constituent code points.
3266 * But if we've encountered something above 255, the
3267 * expanding won't help, so skip doing that. But if it's
3268 * EBCDIC, we may have to look at each character below 256
3269 * if we have to convert to/from Unicode values */
3270 if ( has_above_latin1
3271 #ifdef EBCDIC
3272 && (range_min > 255 || ! convert_unicode)
3273 #endif
3274 ) {
3275 const STRLEN off = d - SvPVX(sv);
3276 const STRLEN extra = 1 + (send - s) + 1;
3277 char *e;
3278
3279 /* Move the high character one byte to the right; then
3280 * insert between it and the range begin, an illegal
3281 * byte which serves to indicate this is a range (using
3282 * a '-' would be ambiguous). */
3283
3284 if (off + extra > SvLEN(sv)) {
3285 d = off + SvGROW(sv, off + extra);
3286 max_ptr = d - off + offset_to_max;
3287 }
3288
3289 e = d++;
3290 while (e-- > max_ptr) {
3291 *(e + 1) = *e;
3292 }
3293 *(e + 1) = (char) RANGE_INDICATOR;
3294 goto range_done;
3295 }
3296
3297 /* Here, we're going to expand out the range. For EBCDIC
3298 * the range can extend above 255 (not so in ASCII), so
3299 * for EBCDIC, split it into the parts above and below
3300 * 255/256 */
3301 #ifdef EBCDIC
3302 if (range_max > 255) {
3303 real_range_max = range_max;
3304 range_max = 255;
3305 }
3306 #endif
3307 }
3308
3309 /* Here we need to expand out the string to contain each
3310 * character in the range. Grow the output to handle this.
3311 * For non-UTF8, we need a byte for each code point in the
3312 * range, minus the three that we've already allocated for: the
3313 * hyphen, the min, and the max. For UTF-8, we need this
3314 * plus an extra byte for each code point that occupies two
3315 * bytes (is variant) when in UTF-8 (except we've already
3316 * allocated for the end points, including if they are
3317 * variants). For ASCII platforms and Unicode ranges on EBCDIC
3318 * platforms, it's easy to calculate a precise number. To
3319 * start, we count the variants in the range, which we need
3320 * elsewhere in this function anyway. (For the case where it
3321 * isn't easy to calculate, 'extras' has been initialized to 0,
3322 * and the calculation is done in a loop further down.) */
3323 #ifdef EBCDIC
3324 if (convert_unicode)
3325 #endif
3326 {
3327 /* This is executed unconditionally on ASCII, and for
3328 * Unicode ranges on EBCDIC. Under these conditions, all
3329 * code points above a certain value are variant; and none
3330 * under that value are. We just need to find out how much
3331 * of the range is above that value. We don't count the
3332 * end points here, as they will already have been counted
3333 * as they were parsed. */
3334 if (range_min >= UTF_CONTINUATION_MARK) {
3335
3336 /* The whole range is made up of variants */
3337 extras = (range_max - 1) - (range_min + 1) + 1;
3338 }
3339 else if (range_max >= UTF_CONTINUATION_MARK) {
3340
3341 /* Only the higher portion of the range is variants */
3342 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3343 }
3344
3345 utf8_variant_count += extras;
3346 }
3347
3348 /* The base growth is the number of code points in the range,
3349 * not including the endpoints, which have already been sized
3350 * for (and output). We don't subtract for the hyphen, as it
3351 * has been parsed but not output, and the SvGROW below is
3352 * based only on what's been output plus what's left to parse.
3353 * */
3354 grow = (range_max - 1) - (range_min + 1) + 1;
3355
3356 if (d_is_utf8) {
3357 #ifdef EBCDIC
3358 /* In some cases in EBCDIC, we haven't yet calculated a
3359 * precise amount needed for the UTF-8 variants. Just
3360 * assume the worst case, that everything will expand by a
3361 * byte */
3362 if (! convert_unicode) {
3363 grow *= 2;
3364 }
3365 else
3366 #endif
3367 {
3368 /* Otherwise we know exactly how many variants there
3369 * are in the range. */
3370 grow += extras;
3371 }
3372 }
3373
3374 /* Grow, but position the output to overwrite the range min end
3375 * point, because in some cases we overwrite that */
3376 SvCUR_set(sv, d - SvPVX_const(sv));
3377 offset_to_min = min_ptr - SvPVX_const(sv);
3378
3379 /* See Note on sizing above. */
3380 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3381 + (send - s)
3382 + grow
3383 + 1 /* Trailing NUL */ );
3384
3385 /* Now, we can expand out the range. */
3386 #ifdef EBCDIC
3387 if (convert_unicode) {
3388 SSize_t i;
3389
3390 /* Recall that the min and max are now in Unicode terms, so
3391 * we have to convert each character to its native
3392 * equivalent */
3393 if (d_is_utf8) {
3394 for (i = range_min; i <= range_max; i++) {
3395 append_utf8_from_native_byte(
3396 LATIN1_TO_NATIVE((U8) i),
3397 (U8 **) &d);
3398 }
3399 }
3400 else {
3401 for (i = range_min; i <= range_max; i++) {
3402 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3403 }
3404 }
3405 }
3406 else
3407 #endif
3408 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3409 {
3410 /* Here, no conversions are necessary, which means that the
3411 * first character in the range is already in 'd' and
3412 * valid, so we can skip overwriting it */
3413 if (d_is_utf8) {
3414 SSize_t i;
3415 d += UTF8SKIP(d);
3416 for (i = range_min + 1; i <= range_max; i++) {
3417 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3418 }
3419 }
3420 else {
3421 SSize_t i;
3422 d++;
3423 assert(range_min + 1 <= range_max);
3424 for (i = range_min + 1; i < range_max; i++) {
3425 #ifdef EBCDIC
3426 /* In this case on EBCDIC, we haven't calculated
3427 * the variants. Do it here, as we go along */
3428 if (! UVCHR_IS_INVARIANT(i)) {
3429 utf8_variant_count++;
3430 }
3431 #endif
3432 *d++ = (char)i;
3433 }
3434
3435 /* The range_max is done outside the loop so as to
3436 * avoid having to special case not incrementing
3437 * 'utf8_variant_count' on EBCDIC (it's already been
3438 * counted when originally parsed) */
3439 *d++ = (char) range_max;
3440 }
3441 }
3442
3443 #ifdef EBCDIC
3444 /* If the original range extended above 255, add in that
3445 * portion. */
3446 if (real_range_max) {
3447 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3448 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3449 if (real_range_max > 0x100) {
3450 if (real_range_max > 0x101) {
3451 *d++ = (char) RANGE_INDICATOR;
3452 }
3453 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3454 }
3455 }
3456 #endif
3457
3458 range_done:
3459 /* mark the range as done, and continue */
3460 didrange = TRUE;
3461 dorange = FALSE;
3462 #ifdef EBCDIC
3463 non_portable_endpoint = 0;
3464 backslash_N = 0;
3465 #endif
3466 continue;
3467 } /* End of is a range */
3468 } /* End of transliteration. Joins main code after these else's */
3469 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3470 char *s1 = s-1;
3471 int esc = 0;
3472 while (s1 >= start && *s1-- == '\\')
3473 esc = !esc;
3474 if (!esc)
3475 in_charclass = TRUE;
3476 }
3477 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3478 char *s1 = s-1;
3479 int esc = 0;
3480 while (s1 >= start && *s1-- == '\\')
3481 esc = !esc;
3482 if (!esc)
3483 in_charclass = FALSE;
3484 }
3485 /* skip for regexp comments /(?#comment)/, except for the last
3486 * char, which will be done separately. Stop on (?{..}) and
3487 * friends */
3488 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3489 if (s[2] == '#') {
3490 if (s_is_utf8) {
3491 PERL_UINT_FAST8_T len = UTF8SKIP(s);
3492
3493 while (s + len < send && *s != ')') {
3494 Copy(s, d, len, U8);
3495 d += len;
3496 s += len;
3497 len = UTF8_SAFE_SKIP(s, send);
3498 }
3499 }
3500 else while (s+1 < send && *s != ')') {
3501 *d++ = *s++;
3502 }
3503 }
3504 else if (!PL_lex_casemods
3505 && ( s[2] == '{' /* This should match regcomp.c */
3506 || (s[2] == '?' && s[3] == '{')))
3507 {
3508 break;
3509 }
3510 }
3511 /* likewise skip #-initiated comments in //x patterns */
3512 else if (*s == '#'
3513 && PL_lex_inpat
3514 && !in_charclass
3515 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3516 {
3517 while (s < send && *s != '\n')
3518 *d++ = *s++;
3519 }
3520 /* no further processing of single-quoted regex */
3521 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3522 goto default_action;
3523
3524 /* check for embedded arrays
3525 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3526 */
3527 else if (*s == '@' && s[1]) {
3528 if (UTF
3529 ? isIDFIRST_utf8_safe(s+1, send)
3530 : isWORDCHAR_A(s[1]))
3531 {
3532 break;
3533 }
3534 if (memCHRs(":'{$", s[1]))
3535 break;
3536 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3537 break; /* in regexp, neither @+ nor @- are interpolated */
3538 }
3539 /* check for embedded scalars. only stop if we're sure it's a
3540 * variable. */
3541 else if (*s == '$') {
3542 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3543 break;
3544 if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3545 if (s[1] == '\\') {
3546 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3547 "Possible unintended interpolation of $\\ in regex");
3548 }
3549 break; /* in regexp, $ might be tail anchor */
3550 }
3551 }
3552
3553 /* End of else if chain - OP_TRANS rejoin rest */
3554
3555 if (UNLIKELY(s >= send)) {
3556 assert(s == send);
3557 break;
3558 }
3559
3560 /* backslashes */
3561 if (*s == '\\' && s+1 < send) {
3562 char* e; /* Can be used for ending '}', etc. */
3563
3564 s++;
3565
3566 /* warn on \1 - \9 in substitution replacements, but note that \11
3567 * is an octal; and \19 is \1 followed by '9' */
3568 if (PL_lex_inwhat == OP_SUBST
3569 && !PL_lex_inpat
3570 && isDIGIT(*s)
3571 && *s != '0'
3572 && !isDIGIT(s[1]))
3573 {
3574 /* diag_listed_as: \%d better written as $%d */
3575 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3576 *--s = '$';
3577 break;
3578 }
3579
3580 /* string-change backslash escapes */
3581 if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3582 --s;
3583 break;
3584 }
3585 /* In a pattern, process \N, but skip any other backslash escapes.
3586 * This is because we don't want to translate an escape sequence
3587 * into a meta symbol and have the regex compiler use the meta
3588 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3589 * in spite of this, we do have to process \N here while the proper
3590 * charnames handler is in scope. See bugs #56444 and #62056.
3591 *
3592 * There is a complication because \N in a pattern may also stand
3593 * for 'match a non-nl', and not mean a charname, in which case its
3594 * processing should be deferred to the regex compiler. To be a
3595 * charname it must be followed immediately by a '{', and not look
3596 * like \N followed by a curly quantifier, i.e., not something like
3597 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3598 * quantifier */
3599 else if (PL_lex_inpat
3600 && (*s != 'N'
3601 || s[1] != '{'
3602 || regcurly(s + 1)))
3603 {
3604 *d++ = '\\';
3605 goto default_action;
3606 }
3607
3608 switch (*s) {
3609 default:
3610 {
3611 if ((isALPHANUMERIC(*s)))
3612 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3613 "Unrecognized escape \\%c passed through",
3614 *s);
3615 /* default action is to copy the quoted character */
3616 goto default_action;
3617 }
3618
3619 /* eg. \132 indicates the octal constant 0132 */
3620 case '0': case '1': case '2': case '3':
3621 case '4': case '5': case '6': case '7':
3622 {
3623 I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3624 | PERL_SCAN_NOTIFY_ILLDIGIT;
3625 STRLEN len = 3;
3626 uv = grok_oct(s, &len, &flags, NULL);
3627 s += len;
3628 if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3629 && s < send
3630 && isDIGIT(*s) /* like \08, \178 */
3631 && ckWARN(WARN_MISC))
3632 {
3633 Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3634 form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3635 }
3636 }
3637 goto NUM_ESCAPE_INSERT;
3638
3639 /* eg. \o{24} indicates the octal constant \024 */
3640 case 'o':
3641 {
3642 const char* error;
3643
3644 if (! grok_bslash_o(&s, send,
3645 &uv, &error,
3646 NULL,
3647 FALSE, /* Not strict */
3648 FALSE, /* No illegal cp's */
3649 UTF))
3650 {
3651 yyerror(error);
3652 uv = 0; /* drop through to ensure range ends are set */
3653 }
3654 goto NUM_ESCAPE_INSERT;
3655 }
3656
3657 /* eg. \x24 indicates the hex constant 0x24 */
3658 case 'x':
3659 {
3660 const char* error;
3661
3662 if (! grok_bslash_x(&s, send,
3663 &uv, &error,
3664 NULL,
3665 FALSE, /* Not strict */
3666 FALSE, /* No illegal cp's */
3667 UTF))
3668 {
3669 yyerror(error);
3670 uv = 0; /* drop through to ensure range ends are set */
3671 }
3672 }
3673
3674 NUM_ESCAPE_INSERT:
3675 /* Insert oct or hex escaped character. */
3676
3677 /* Here uv is the ordinal of the next character being added */
3678 if (UVCHR_IS_INVARIANT(uv)) {
3679 *d++ = (char) uv;
3680 }
3681 else {
3682 if (!d_is_utf8 && uv > 255) {
3683
3684 /* Here, 'uv' won't fit unless we convert to UTF-8.
3685 * If we've only seen invariants so far, all we have to
3686 * do is turn on the flag */
3687 if (utf8_variant_count == 0) {
3688 SvUTF8_on(sv);
3689 }
3690 else {
3691 SvCUR_set(sv, d - SvPVX_const(sv));
3692 SvPOK_on(sv);
3693 *d = '\0';
3694
3695 sv_utf8_upgrade_flags_grow(
3696 sv,
3697 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3698
3699 /* Since we're having to grow here,
3700 * make sure we have enough room for
3701 * this escape and a NUL, so the
3702 * code immediately below won't have
3703 * to actually grow again */
3704 UVCHR_SKIP(uv)
3705 + (STRLEN)(send - s) + 1);
3706 d = SvPVX(sv) + SvCUR(sv);
3707 }
3708
3709 has_above_latin1 = TRUE;
3710 d_is_utf8 = TRUE;
3711 }
3712
3713 if (! d_is_utf8) {
3714 *d++ = (char)uv;
3715 utf8_variant_count++;
3716 }
3717 else {
3718 /* Usually, there will already be enough room in 'sv'
3719 * since such escapes are likely longer than any UTF-8
3720 * sequence they can end up as. This isn't the case on
3721 * EBCDIC where \x{40000000} contains 12 bytes, and the
3722 * UTF-8 for it contains 14. And, we have to allow for
3723 * a trailing NUL. It probably can't happen on ASCII
3724 * platforms, but be safe. See Note on sizing above. */
3725 const STRLEN needed = d - SvPVX(sv)
3726 + UVCHR_SKIP(uv)
3727 + (send - s)
3728 + 1;
3729 if (UNLIKELY(needed > SvLEN(sv))) {
3730 SvCUR_set(sv, d - SvPVX_const(sv));
3731 d = SvCUR(sv) + SvGROW(sv, needed);
3732 }
3733
3734 d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3735 (ckWARN(WARN_PORTABLE))
3736 ? UNICODE_WARN_PERL_EXTENDED
3737 : 0);
3738 }
3739 }
3740 #ifdef EBCDIC
3741 non_portable_endpoint++;
3742 #endif
3743 continue;
3744
3745 case 'N':
3746 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3747 * named character, like \N{LATIN SMALL LETTER A}, or a named
3748 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3749 * GRAVE} (except y/// can't handle the latter, croaking). For
3750 * convenience all three forms are referred to as "named
3751 * characters" below.
3752 *
3753 * For patterns, \N also can mean to match a non-newline. Code
3754 * before this 'switch' statement should already have handled
3755 * this situation, and hence this code only has to deal with
3756 * the named character cases.
3757 *
3758 * For non-patterns, the named characters are converted to
3759 * their string equivalents. In patterns, named characters are
3760 * not converted to their ultimate forms for the same reasons
3761 * that other escapes aren't (mainly that the ultimate
3762 * character could be considered a meta-symbol by the regex
3763 * compiler). Instead, they are converted to the \N{U+...}
3764 * form to get the value from the charnames that is in effect
3765 * right now, while preserving the fact that it was a named
3766 * character, so that the regex compiler knows this.
3767 *
3768 * The structure of this section of code (besides checking for
3769 * errors and upgrading to utf8) is:
3770 * If the named character is of the form \N{U+...}, pass it
3771 * through if a pattern; otherwise convert the code point
3772 * to utf8
3773 * Otherwise must be some \N{NAME}: convert to
3774 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3775 *
3776 * Transliteration is an exception. The conversion to utf8 is
3777 * only done if the code point requires it to be representable.
3778 *
3779 * Here, 's' points to the 'N'; the test below is guaranteed to
3780 * succeed if we are being called on a pattern, as we already
3781 * know from a test above that the next character is a '{'. A
3782 * non-pattern \N must mean 'named character', which requires
3783 * braces */
3784 s++;
3785 if (*s != '{') {
3786 yyerror("Missing braces on \\N{}");
3787 *d++ = '\0';
3788 continue;
3789 }
3790 s++;
3791
3792 /* If there is no matching '}', it is an error. */
3793 if (! (e = (char *) memchr(s, '}', send - s))) {
3794 if (! PL_lex_inpat) {
3795 yyerror("Missing right brace on \\N{}");
3796 } else {
3797 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3798 }
3799 yyquit(); /* Have exhausted the input. */
3800 }
3801
3802 /* Here it looks like a named character */
3803
3804 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3805 s += 2; /* Skip to next char after the 'U+' */
3806 if (PL_lex_inpat) {
3807
3808 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3809 /* Check the syntax. */
3810 const char *orig_s;
3811 orig_s = s - 5;
3812 if (!isXDIGIT(*s)) {
3813 bad_NU:
3814 yyerror(
3815 "Invalid hexadecimal number in \\N{U+...}"
3816 );
3817 s = e + 1;
3818 *d++ = '\0';
3819 continue;
3820 }
3821 while (++s < e) {
3822 if (isXDIGIT(*s))
3823 continue;
3824 else if ((*s == '.' || *s == '_')
3825 && isXDIGIT(s[1]))
3826 continue;
3827 goto bad_NU;
3828 }
3829
3830 /* Pass everything through unchanged.
3831 * +1 is for the '}' */
3832 Copy(orig_s, d, e - orig_s + 1, char);
3833 d += e - orig_s + 1;
3834 }
3835 else { /* Not a pattern: convert the hex to string */
3836 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3837 | PERL_SCAN_SILENT_ILLDIGIT
3838 | PERL_SCAN_SILENT_OVERFLOW
3839 | PERL_SCAN_DISALLOW_PREFIX;
3840 STRLEN len = e - s;
3841
3842 uv = grok_hex(s, &len, &flags, NULL);
3843 if (len == 0 || (len != (STRLEN)(e - s)))
3844 goto bad_NU;
3845
3846 if ( uv > MAX_LEGAL_CP
3847 || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
3848 {
3849 yyerror(form_cp_too_large_msg(16, s, len, 0));
3850 uv = 0; /* drop through to ensure range ends are
3851 set */
3852 }
3853
3854 /* For non-tr///, if the destination is not in utf8,
3855 * unconditionally recode it to be so. This is
3856 * because \N{} implies Unicode semantics, and scalars
3857 * have to be in utf8 to guarantee those semantics.
3858 * tr/// doesn't care about Unicode rules, so no need
3859 * there to upgrade to UTF-8 for small enough code
3860 * points */
3861 if (! d_is_utf8 && ( uv > 0xFF
3862 || PL_lex_inwhat != OP_TRANS))
3863 {
3864 /* See Note on sizing above. */
3865 const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3866
3867 SvCUR_set(sv, d - SvPVX_const(sv));
3868 SvPOK_on(sv);
3869 *d = '\0';
3870
3871 if (utf8_variant_count == 0) {
3872 SvUTF8_on(sv);
3873 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3874 }
3875 else {
3876 sv_utf8_upgrade_flags_grow(
3877 sv,
3878 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3879 extra);
3880 d = SvPVX(sv) + SvCUR(sv);
3881 }
3882
3883 d_is_utf8 = TRUE;
3884 has_above_latin1 = TRUE;
3885 }
3886
3887 /* Add the (Unicode) code point to the output. */
3888 if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3889 *d++ = (char) LATIN1_TO_NATIVE(uv);
3890 }
3891 else {
3892 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
3893 (ckWARN(WARN_PORTABLE))
3894 ? UNICODE_WARN_PERL_EXTENDED
3895 : 0);
3896 }
3897 }
3898 }
3899 else /* Here is \N{NAME} but not \N{U+...}. */
3900 if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
3901 { /* Failed. We should die eventually, but for now use a NUL
3902 to keep parsing */
3903 *d++ = '\0';
3904 }
3905 else { /* Successfully evaluated the name */
3906 STRLEN len;
3907 const char *str = SvPV_const(res, len);
3908 if (PL_lex_inpat) {
3909
3910 if (! len) { /* The name resolved to an empty string */
3911 const char empty_N[] = "\\N{_}";
3912 Copy(empty_N, d, sizeof(empty_N) - 1, char);
3913 d += sizeof(empty_N) - 1;
3914 }
3915 else {
3916 /* In order to not lose information for the regex
3917 * compiler, pass the result in the specially made
3918 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3919 * the code points in hex of each character
3920 * returned by charnames */
3921
3922 const char *str_end = str + len;
3923 const STRLEN off = d - SvPVX_const(sv);
3924
3925 if (! SvUTF8(res)) {
3926 /* For the non-UTF-8 case, we can determine the
3927 * exact length needed without having to parse
3928 * through the string. Each character takes up
3929 * 2 hex digits plus either a trailing dot or
3930 * the "}" */
3931 const char initial_text[] = "\\N{U+";
3932 const STRLEN initial_len = sizeof(initial_text)
3933 - 1;
3934 d = off + SvGROW(sv, off
3935 + 3 * len
3936
3937 /* +1 for trailing NUL */
3938 + initial_len + 1
3939
3940 + (STRLEN)(send - e));
3941 Copy(initial_text, d, initial_len, char);
3942 d += initial_len;
3943 while (str < str_end) {
3944 char hex_string[4];
3945 int len =
3946 my_snprintf(hex_string,
3947 sizeof(hex_string),
3948 "%02X.",
3949
3950 /* The regex compiler is
3951 * expecting Unicode, not
3952 * native */
3953 NATIVE_TO_LATIN1(*str));
3954 PERL_MY_SNPRINTF_POST_GUARD(len,
3955 sizeof(hex_string));
3956 Copy(hex_string, d, 3, char);
3957 d += 3;
3958 str++;
3959 }
3960 d--; /* Below, we will overwrite the final
3961 dot with a right brace */
3962 }
3963 else {
3964 STRLEN char_length; /* cur char's byte length */
3965
3966 /* and the number of bytes after this is
3967 * translated into hex digits */
3968 STRLEN output_length;
3969
3970 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3971 * for max('U+', '.'); and 1 for NUL */
3972 char hex_string[2 * UTF8_MAXBYTES + 5];
3973
3974 /* Get the first character of the result. */
3975 U32 uv = utf8n_to_uvchr((U8 *) str,
3976 len,
3977 &char_length,
3978 UTF8_ALLOW_ANYUV);
3979 /* Convert first code point to Unicode hex,
3980 * including the boiler plate before it. */
3981 output_length =
3982 my_snprintf(hex_string, sizeof(hex_string),
3983 "\\N{U+%X",
3984 (unsigned int) NATIVE_TO_UNI(uv));
3985
3986 /* Make sure there is enough space to hold it */
3987 d = off + SvGROW(sv, off
3988 + output_length
3989 + (STRLEN)(send - e)
3990 + 2); /* '}' + NUL */
3991 /* And output it */
3992 Copy(hex_string, d, output_length, char);
3993 d += output_length;
3994
3995 /* For each subsequent character, append dot and
3996 * its Unicode code point in hex */
3997 while ((str += char_length) < str_end) {
3998 const STRLEN off = d - SvPVX_const(sv);
3999 U32 uv = utf8n_to_uvchr((U8 *) str,
4000 str_end - str,
4001 &char_length,
4002 UTF8_ALLOW_ANYUV);
4003 output_length =
4004 my_snprintf(hex_string,
4005 sizeof(hex_string),
4006 ".%X",
4007 (unsigned int) NATIVE_TO_UNI(uv));
4008
4009 d = off + SvGROW(sv, off
4010 + output_length
4011 + (STRLEN)(send - e)
4012 + 2); /* '}' + NUL */
4013 Copy(hex_string, d, output_length, char);
4014 d += output_length;
4015 }
4016 }
4017
4018 *d++ = '}'; /* Done. Add the trailing brace */
4019 }
4020 }
4021 else { /* Here, not in a pattern. Convert the name to a
4022 * string. */
4023
4024 if (PL_lex_inwhat == OP_TRANS) {
4025 str = SvPV_const(res, len);
4026 if (len > ((SvUTF8(res))
4027 ? UTF8SKIP(str)
4028 : 1U))
4029 {
4030 yyerror(Perl_form(aTHX_
4031 "%.*s must not be a named sequence"
4032 " in transliteration operator",
4033 /* +1 to include the "}" */
4034 (int) (e + 1 - start), start));
4035 *d++ = '\0';
4036 goto end_backslash_N;
4037 }
4038
4039 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4040 has_above_latin1 = TRUE;
4041 }
4042
4043 }
4044 else if (! SvUTF8(res)) {
4045 /* Make sure \N{} return is UTF-8. This is because
4046 * \N{} implies Unicode semantics, and scalars have
4047 * to be in utf8 to guarantee those semantics; but
4048 * not needed in tr/// */
4049 sv_utf8_upgrade_flags(res, 0);
4050 str = SvPV_const(res, len);
4051 }
4052
4053 /* Upgrade destination to be utf8 if this new
4054 * component is */
4055 if (! d_is_utf8 && SvUTF8(res)) {
4056 /* See Note on sizing above. */
4057 const STRLEN extra = len + (send - s) + 1;
4058
4059 SvCUR_set(sv, d - SvPVX_const(sv));
4060 SvPOK_on(sv);
4061 *d = '\0';
4062
4063 if (utf8_variant_count == 0) {
4064 SvUTF8_on(sv);
4065 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4066 }
4067 else {
4068 sv_utf8_upgrade_flags_grow(sv,
4069 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4070 extra);
4071 d = SvPVX(sv) + SvCUR(sv);
4072 }
4073 d_is_utf8 = TRUE;
4074 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
4075
4076 /* See Note on sizing above. (NOTE: SvCUR() is not
4077 * set correctly here). */
4078 const STRLEN extra = len + (send - e) + 1;
4079 const STRLEN off = d - SvPVX_const(sv);
4080 d = off + SvGROW(sv, off + extra);
4081 }
4082 Copy(str, d, len, char);
4083 d += len;
4084 }
4085
4086 SvREFCNT_dec(res);
4087
4088 } /* End \N{NAME} */
4089
4090 end_backslash_N:
4091 #ifdef EBCDIC
4092 backslash_N++; /* \N{} is defined to be Unicode */
4093 #endif
4094 s = e + 1; /* Point to just after the '}' */
4095 continue;
4096
4097 /* \c is a control character */
4098 case 'c':
4099 s++;
4100 if (s < send) {
4101 const char * message;
4102
4103 if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4104 yyerror(message);
4105 yyquit(); /* Have always immediately croaked on
4106 errors in this */
4107 }
4108 d++;
4109 }
4110 else {
4111 yyerror("Missing control char name in \\c");
4112 yyquit(); /* Are at end of input, no sense continuing */
4113 }
4114 #ifdef EBCDIC
4115 non_portable_endpoint++;
4116 #endif
4117 break;
4118
4119 /* printf-style backslashes, formfeeds, newlines, etc */
4120 case 'b':
4121 *d++ = '\b';
4122 break;
4123 case 'n':
4124 *d++ = '\n';
4125 break;
4126 case 'r':
4127 *d++ = '\r';
4128 break;
4129 case 'f':
4130 *d++ = '\f';
4131 break;
4132 case 't':
4133 *d++ = '\t';
4134 break;
4135 case 'e':
4136 *d++ = ESC_NATIVE;
4137 break;
4138 case 'a':
4139 *d++ = '\a';
4140 break;
4141 } /* end switch */
4142
4143 s++;
4144 continue;
4145 } /* end if (backslash) */
4146
4147 default_action:
4148 /* Just copy the input to the output, though we may have to convert
4149 * to/from UTF-8.
4150 *
4151 * If the input has the same representation in UTF-8 as not, it will be
4152 * a single byte, and we don't care about UTF8ness; just copy the byte */
4153 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4154 *d++ = *s++;
4155 }
4156 else if (! s_is_utf8 && ! d_is_utf8) {
4157 /* If neither source nor output is UTF-8, is also a single byte,
4158 * just copy it; but this byte counts should we later have to
4159 * convert to UTF-8 */
4160 *d++ = *s++;
4161 utf8_variant_count++;
4162 }
4163 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
4164 const STRLEN len = UTF8SKIP(s);
4165
4166 /* We expect the source to have already been checked for
4167 * malformedness */
4168 assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4169
4170 Copy(s, d, len, U8);
4171 d += len;
4172 s += len;
4173 }
4174 else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4175 STRLEN need = send - s + 1; /* See Note on sizing above. */
4176
4177 SvCUR_set(sv, d - SvPVX_const(sv));
4178 SvPOK_on(sv);
4179 *d = '\0';
4180
4181 if (utf8_variant_count == 0) {
4182 SvUTF8_on(sv);
4183 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4184 }
4185 else {
4186 sv_utf8_upgrade_flags_grow(sv,
4187 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4188 need);
4189 d = SvPVX(sv) + SvCUR(sv);
4190 }
4191 d_is_utf8 = TRUE;
4192 goto default_action; /* Redo, having upgraded so both are UTF-8 */
4193 }
4194 else { /* UTF8ness matters: convert this non-UTF8 source char to
4195 UTF-8 for output. It will occupy 2 bytes, but don't include
4196 the input byte since we haven't incremented 's' yet. See
4197 Note on sizing above. */
4198 const STRLEN off = d - SvPVX(sv);
4199 const STRLEN extra = 2 + (send - s - 1) + 1;
4200 if (off + extra > SvLEN(sv)) {
4201 d = off + SvGROW(sv, off + extra);
4202 }
4203 *d++ = UTF8_EIGHT_BIT_HI(*s);
4204 *d++ = UTF8_EIGHT_BIT_LO(*s);
4205 s++;
4206 }
4207 } /* while loop to process each character */
4208
4209 {
4210 const STRLEN off = d - SvPVX(sv);
4211
4212 /* See if room for the terminating NUL */
4213 if (UNLIKELY(off >= SvLEN(sv))) {
4214
4215 #ifndef DEBUGGING
4216
4217 if (off > SvLEN(sv))
4218 #endif
4219 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4220 " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4221
4222 /* Whew! Here we don't have room for the terminating NUL, but
4223 * everything else so far has fit. It's not too late to grow
4224 * to fit the NUL and continue on. But it is a bug, as the code
4225 * above was supposed to have made room for this, so under
4226 * DEBUGGING builds, we panic anyway. */
4227 d = off + SvGROW(sv, off + 1);
4228 }
4229 }
4230
4231 /* terminate the string and set up the sv */
4232 *d = '\0';
4233 SvCUR_set(sv, d - SvPVX_const(sv));
4234
4235 SvPOK_on(sv);
4236 if (d_is_utf8) {
4237 SvUTF8_on(sv);
4238 }
4239
4240 /* shrink the sv if we allocated more than we used */
4241 if (SvCUR(sv) + 5 < SvLEN(sv)) {
4242 SvPV_shrink_to_cur(sv);
4243 }
4244
4245 /* return the substring (via pl_yylval) only if we parsed anything */
4246 if (s > start) {
4247 char *s2 = start;
4248 for (; s2 < s; s2++) {
4249 if (*s2 == '\n')
4250 COPLINE_INC_WITH_HERELINES;
4251 }
4252 SvREFCNT_inc_simple_void_NN(sv);
4253 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4254 && ! PL_parser->lex_re_reparsing)
4255 {
4256 const char *const key = PL_lex_inpat ? "qr" : "q";
4257 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4258 const char *type;
4259 STRLEN typelen;
4260
4261 if (PL_lex_inwhat == OP_TRANS) {
4262 type = "tr";
4263 typelen = 2;
4264 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4265 type = "s";
4266 typelen = 1;
4267 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4268 type = "q";
4269 typelen = 1;
4270 } else {
4271 type = "qq";
4272 typelen = 2;
4273 }
4274
4275 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4276 type, typelen, NULL);
4277 }
4278 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4279 }
4280 LEAVE_with_name("scan_const");
4281 return s;
4282 }
4283
4284 /* S_intuit_more
4285 * Returns TRUE if there's more to the expression (e.g., a subscript),
4286 * FALSE otherwise.
4287 *
4288 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4289 *
4290 * ->[ and ->{ return TRUE
4291 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4292 * { and [ outside a pattern are always subscripts, so return TRUE
4293 * if we're outside a pattern and it's not { or [, then return FALSE
4294 * if we're in a pattern and the first char is a {
4295 * {4,5} (any digits around the comma) returns FALSE
4296 * if we're in a pattern and the first char is a [
4297 * [] returns FALSE
4298 * [SOMETHING] has a funky algorithm to decide whether it's a
4299 * character class or not. It has to deal with things like
4300 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4301 * anything else returns TRUE
4302 */
4303
4304 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4305
4306 STATIC int
S_intuit_more(pTHX_ char * s,char * e)4307 S_intuit_more(pTHX_ char *s, char *e)
4308 {
4309 PERL_ARGS_ASSERT_INTUIT_MORE;
4310
4311 if (PL_lex_brackets)
4312 return TRUE;
4313 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4314 return TRUE;
4315 if (*s == '-' && s[1] == '>'
4316 && FEATURE_POSTDEREF_QQ_IS_ENABLED
4317 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4318 ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4319 return TRUE;
4320 if (*s != '{' && *s != '[')
4321 return FALSE;
4322 PL_parser->sub_no_recover = TRUE;
4323 if (!PL_lex_inpat)
4324 return TRUE;
4325
4326 /* In a pattern, so maybe we have {n,m}. */
4327 if (*s == '{') {
4328 if (regcurly(s)) {
4329 return FALSE;
4330 }
4331 return TRUE;
4332 }
4333
4334 /* On the other hand, maybe we have a character class */
4335
4336 s++;
4337 if (*s == ']' || *s == '^')
4338 return FALSE;
4339 else {
4340 /* this is terrifying, and it works */
4341 int weight;
4342 char seen[256];
4343 const char * const send = (char *) memchr(s, ']', e - s);
4344 unsigned char un_char, last_un_char;
4345 char tmpbuf[sizeof PL_tokenbuf * 4];
4346
4347 if (!send) /* has to be an expression */
4348 return TRUE;
4349 weight = 2; /* let's weigh the evidence */
4350
4351 if (*s == '$')
4352 weight -= 3;
4353 else if (isDIGIT(*s)) {
4354 if (s[1] != ']') {
4355 if (isDIGIT(s[1]) && s[2] == ']')
4356 weight -= 10;
4357 }
4358 else
4359 weight -= 100;
4360 }
4361 Zero(seen,256,char);
4362 un_char = 255;
4363 for (; s < send; s++) {
4364 last_un_char = un_char;
4365 un_char = (unsigned char)*s;
4366 switch (*s) {
4367 case '@':
4368 case '&':
4369 case '$':
4370 weight -= seen[un_char] * 10;
4371 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4372 int len;
4373 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4374 len = (int)strlen(tmpbuf);
4375 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4376 UTF ? SVf_UTF8 : 0, SVt_PV))
4377 weight -= 100;
4378 else
4379 weight -= 10;
4380 }
4381 else if (*s == '$'
4382 && s[1]
4383 && memCHRs("[#!%*<>()-=",s[1]))
4384 {
4385 if (/*{*/ memCHRs("])} =",s[2]))
4386 weight -= 10;
4387 else
4388 weight -= 1;
4389 }
4390 break;
4391 case '\\':
4392 un_char = 254;
4393 if (s[1]) {
4394 if (memCHRs("wds]",s[1]))
4395 weight += 100;
4396 else if (seen[(U8)'\''] || seen[(U8)'"'])
4397 weight += 1;
4398 else if (memCHRs("rnftbxcav",s[1]))
4399 weight += 40;
4400 else if (isDIGIT(s[1])) {
4401 weight += 40;
4402 while (s[1] && isDIGIT(s[1]))
4403 s++;
4404 }
4405 }
4406 else
4407 weight += 100;
4408 break;
4409 case '-':
4410 if (s[1] == '\\')
4411 weight += 50;
4412 if (memCHRs("aA01! ",last_un_char))
4413 weight += 30;
4414 if (memCHRs("zZ79~",s[1]))
4415 weight += 30;
4416 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4417 weight -= 5; /* cope with negative subscript */
4418 break;
4419 default:
4420 if (!isWORDCHAR(last_un_char)
4421 && !(last_un_char == '$' || last_un_char == '@'
4422 || last_un_char == '&')
4423 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4424 char *d = s;
4425 while (isALPHA(*s))
4426 s++;
4427 if (keyword(d, s - d, 0))
4428 weight -= 150;
4429 }
4430 if (un_char == last_un_char + 1)
4431 weight += 5;
4432 weight -= seen[un_char];
4433 break;
4434 }
4435 seen[un_char]++;
4436 }
4437 if (weight >= 0) /* probably a character class */
4438 return FALSE;
4439 }
4440
4441 return TRUE;
4442 }
4443
4444 /*
4445 * S_intuit_method
4446 *
4447 * Does all the checking to disambiguate
4448 * foo bar
4449 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4450 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4451 *
4452 * First argument is the stuff after the first token, e.g. "bar".
4453 *
4454 * Not a method if foo is a filehandle.
4455 * Not a method if foo is a subroutine prototyped to take a filehandle.
4456 * Not a method if it's really "Foo $bar"
4457 * Method if it's "foo $bar"
4458 * Not a method if it's really "print foo $bar"
4459 * Method if it's really "foo package::" (interpreted as package->foo)
4460 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4461 * Not a method if bar is a filehandle or package, but is quoted with
4462 * =>
4463 */
4464
4465 STATIC int
S_intuit_method(pTHX_ char * start,SV * ioname,CV * cv)4466 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4467 {
4468 char *s = start + (*start == '$');
4469 char tmpbuf[sizeof PL_tokenbuf];
4470 STRLEN len;
4471 GV* indirgv;
4472 /* Mustn't actually add anything to a symbol table.
4473 But also don't want to "initialise" any placeholder
4474 constants that might already be there into full
4475 blown PVGVs with attached PVCV. */
4476 GV * const gv =
4477 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4478
4479 PERL_ARGS_ASSERT_INTUIT_METHOD;
4480
4481 if (!FEATURE_INDIRECT_IS_ENABLED)
4482 return 0;
4483
4484 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4485 return 0;
4486 if (cv && SvPOK(cv)) {
4487 const char *proto = CvPROTO(cv);
4488 if (proto) {
4489 while (*proto && (isSPACE(*proto) || *proto == ';'))
4490 proto++;
4491 if (*proto == '*')
4492 return 0;
4493 }
4494 }
4495
4496 if (*start == '$') {
4497 SSize_t start_off = start - SvPVX(PL_linestr);
4498 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4499 || isUPPER(*PL_tokenbuf))
4500 return 0;
4501 /* this could be $# */
4502 if (isSPACE(*s))
4503 s = skipspace(s);
4504 PL_bufptr = SvPVX(PL_linestr) + start_off;
4505 PL_expect = XREF;
4506 return *s == '(' ? FUNCMETH : METHOD;
4507 }
4508
4509 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4510 /* start is the beginning of the possible filehandle/object,
4511 * and s is the end of it
4512 * tmpbuf is a copy of it (but with single quotes as double colons)
4513 */
4514
4515 if (!keyword(tmpbuf, len, 0)) {
4516 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4517 len -= 2;
4518 tmpbuf[len] = '\0';
4519 goto bare_package;
4520 }
4521 indirgv = gv_fetchpvn_flags(tmpbuf, len,
4522 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4523 SVt_PVCV);
4524 if (indirgv && SvTYPE(indirgv) != SVt_NULL
4525 && (!isGV(indirgv) || GvCVu(indirgv)))
4526 return 0;
4527 /* filehandle or package name makes it a method */
4528 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4529 s = skipspace(s);
4530 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4531 return 0; /* no assumptions -- "=>" quotes bareword */
4532 bare_package:
4533 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4534 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4535 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4536 PL_expect = XTERM;
4537 force_next(BAREWORD);
4538 PL_bufptr = s;
4539 return *s == '(' ? FUNCMETH : METHOD;
4540 }
4541 }
4542 return 0;
4543 }
4544
4545 /* Encoded script support. filter_add() effectively inserts a
4546 * 'pre-processing' function into the current source input stream.
4547 * Note that the filter function only applies to the current source file
4548 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4549 *
4550 * The datasv parameter (which may be NULL) can be used to pass
4551 * private data to this instance of the filter. The filter function
4552 * can recover the SV using the FILTER_DATA macro and use it to
4553 * store private buffers and state information.
4554 *
4555 * The supplied datasv parameter is upgraded to a PVIO type
4556 * and the IoDIRP/IoANY field is used to store the function pointer,
4557 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4558 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4559 * private use must be set using malloc'd pointers.
4560 */
4561
4562 SV *
Perl_filter_add(pTHX_ filter_t funcp,SV * datasv)4563 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4564 {
4565 if (!funcp)
4566 return NULL;
4567
4568 if (!PL_parser)
4569 return NULL;
4570
4571 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4572 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4573
4574 if (!PL_rsfp_filters)
4575 PL_rsfp_filters = newAV();
4576 if (!datasv)
4577 datasv = newSV(0);
4578 SvUPGRADE(datasv, SVt_PVIO);
4579 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4580 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4581 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4582 FPTR2DPTR(void *, IoANY(datasv)),
4583 SvPV_nolen(datasv)));
4584 av_unshift(PL_rsfp_filters, 1);
4585 av_store(PL_rsfp_filters, 0, datasv) ;
4586 if (
4587 !PL_parser->filtered
4588 && PL_parser->lex_flags & LEX_EVALBYTES
4589 && PL_bufptr < PL_bufend
4590 ) {
4591 const char *s = PL_bufptr;
4592 while (s < PL_bufend) {
4593 if (*s == '\n') {
4594 SV *linestr = PL_parser->linestr;
4595 char *buf = SvPVX(linestr);
4596 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4597 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4598 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4599 STRLEN const linestart_pos = PL_parser->linestart - buf;
4600 STRLEN const last_uni_pos =
4601 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4602 STRLEN const last_lop_pos =
4603 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4604 av_push(PL_rsfp_filters, linestr);
4605 PL_parser->linestr =
4606 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4607 buf = SvPVX(PL_parser->linestr);
4608 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4609 PL_parser->bufptr = buf + bufptr_pos;
4610 PL_parser->oldbufptr = buf + oldbufptr_pos;
4611 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4612 PL_parser->linestart = buf + linestart_pos;
4613 if (PL_parser->last_uni)
4614 PL_parser->last_uni = buf + last_uni_pos;
4615 if (PL_parser->last_lop)
4616 PL_parser->last_lop = buf + last_lop_pos;
4617 SvLEN_set(linestr, SvCUR(linestr));
4618 SvCUR_set(linestr, s - SvPVX(linestr));
4619 PL_parser->filtered = 1;
4620 break;
4621 }
4622 s++;
4623 }
4624 }
4625 return(datasv);
4626 }
4627
4628
4629 /* Delete most recently added instance of this filter function. */
4630 void
Perl_filter_del(pTHX_ filter_t funcp)4631 Perl_filter_del(pTHX_ filter_t funcp)
4632 {
4633 SV *datasv;
4634
4635 PERL_ARGS_ASSERT_FILTER_DEL;
4636
4637 #ifdef DEBUGGING
4638 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4639 FPTR2DPTR(void*, funcp)));
4640 #endif
4641 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4642 return;
4643 /* if filter is on top of stack (usual case) just pop it off */
4644 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4645 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4646 sv_free(av_pop(PL_rsfp_filters));
4647
4648 return;
4649 }
4650 /* we need to search for the correct entry and clear it */
4651 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4652 }
4653
4654
4655 /* Invoke the idxth filter function for the current rsfp. */
4656 /* maxlen 0 = read one text line */
4657 I32
Perl_filter_read(pTHX_ int idx,SV * buf_sv,int maxlen)4658 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4659 {
4660 filter_t funcp;
4661 I32 ret;
4662 SV *datasv = NULL;
4663 /* This API is bad. It should have been using unsigned int for maxlen.
4664 Not sure if we want to change the API, but if not we should sanity
4665 check the value here. */
4666 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4667
4668 PERL_ARGS_ASSERT_FILTER_READ;
4669
4670 if (!PL_parser || !PL_rsfp_filters)
4671 return -1;
4672 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4673 /* Provide a default input filter to make life easy. */
4674 /* Note that we append to the line. This is handy. */
4675 DEBUG_P(PerlIO_printf(Perl_debug_log,
4676 "filter_read %d: from rsfp\n", idx));
4677 if (correct_length) {
4678 /* Want a block */
4679 int len ;
4680 const int old_len = SvCUR(buf_sv);
4681
4682 /* ensure buf_sv is large enough */
4683 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4684 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4685 correct_length)) <= 0) {
4686 if (PerlIO_error(PL_rsfp))
4687 return -1; /* error */
4688 else
4689 return 0 ; /* end of file */
4690 }
4691 SvCUR_set(buf_sv, old_len + len) ;
4692 SvPVX(buf_sv)[old_len + len] = '\0';
4693 } else {
4694 /* Want a line */
4695 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4696 if (PerlIO_error(PL_rsfp))
4697 return -1; /* error */
4698 else
4699 return 0 ; /* end of file */
4700 }
4701 }
4702 return SvCUR(buf_sv);
4703 }
4704 /* Skip this filter slot if filter has been deleted */
4705 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4706 DEBUG_P(PerlIO_printf(Perl_debug_log,
4707 "filter_read %d: skipped (filter deleted)\n",
4708 idx));
4709 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4710 }
4711 if (SvTYPE(datasv) != SVt_PVIO) {
4712 if (correct_length) {
4713 /* Want a block */
4714 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4715 if (!remainder) return 0; /* eof */
4716 if (correct_length > remainder) correct_length = remainder;
4717 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4718 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4719 } else {
4720 /* Want a line */
4721 const char *s = SvEND(datasv);
4722 const char *send = SvPVX(datasv) + SvLEN(datasv);
4723 while (s < send) {
4724 if (*s == '\n') {
4725 s++;
4726 break;
4727 }
4728 s++;
4729 }
4730 if (s == send) return 0; /* eof */
4731 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4732 SvCUR_set(datasv, s-SvPVX(datasv));
4733 }
4734 return SvCUR(buf_sv);
4735 }
4736 /* Get function pointer hidden within datasv */
4737 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4738 DEBUG_P(PerlIO_printf(Perl_debug_log,
4739 "filter_read %d: via function %p (%s)\n",
4740 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4741 /* Call function. The function is expected to */
4742 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4743 /* Return: <0:error, =0:eof, >0:not eof */
4744 ENTER;
4745 save_scalar(PL_errgv);
4746 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4747 LEAVE;
4748 return ret;
4749 }
4750
4751 STATIC char *
S_filter_gets(pTHX_ SV * sv,STRLEN append)4752 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4753 {
4754 PERL_ARGS_ASSERT_FILTER_GETS;
4755
4756 #ifdef PERL_CR_FILTER
4757 if (!PL_rsfp_filters) {
4758 filter_add(S_cr_textfilter,NULL);
4759 }
4760 #endif
4761 if (PL_rsfp_filters) {
4762 if (!append)
4763 SvCUR_set(sv, 0); /* start with empty line */
4764 if (FILTER_READ(0, sv, 0) > 0)
4765 return ( SvPVX(sv) ) ;
4766 else
4767 return NULL ;
4768 }
4769 else
4770 return (sv_gets(sv, PL_rsfp, append));
4771 }
4772
4773 STATIC HV *
S_find_in_my_stash(pTHX_ const char * pkgname,STRLEN len)4774 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4775 {
4776 GV *gv;
4777
4778 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4779
4780 if (memEQs(pkgname, len, "__PACKAGE__"))
4781 return PL_curstash;
4782
4783 if (len > 2
4784 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4785 && (gv = gv_fetchpvn_flags(pkgname,
4786 len,
4787 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4788 {
4789 return GvHV(gv); /* Foo:: */
4790 }
4791
4792 /* use constant CLASS => 'MyClass' */
4793 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4794 if (gv && GvCV(gv)) {
4795 SV * const sv = cv_const_sv(GvCV(gv));
4796 if (sv)
4797 return gv_stashsv(sv, 0);
4798 }
4799
4800 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4801 }
4802
4803
4804 STATIC char *
S_tokenize_use(pTHX_ int is_use,char * s)4805 S_tokenize_use(pTHX_ int is_use, char *s) {
4806 PERL_ARGS_ASSERT_TOKENIZE_USE;
4807
4808 if (PL_expect != XSTATE)
4809 /* diag_listed_as: "use" not allowed in expression */
4810 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4811 is_use ? "use" : "no"));
4812 PL_expect = XTERM;
4813 s = skipspace(s);
4814 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4815 s = force_version(s, TRUE);
4816 if (*s == ';' || *s == '}'
4817 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4818 NEXTVAL_NEXTTOKE.opval = NULL;
4819 force_next(BAREWORD);
4820 }
4821 else if (*s == 'v') {
4822 s = force_word(s,BAREWORD,FALSE,TRUE);
4823 s = force_version(s, FALSE);
4824 }
4825 }
4826 else {
4827 s = force_word(s,BAREWORD,FALSE,TRUE);
4828 s = force_version(s, FALSE);
4829 }
4830 pl_yylval.ival = is_use;
4831 return s;
4832 }
4833 #ifdef DEBUGGING
4834 static const char* const exp_name[] =
4835 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4836 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4837 "SIGVAR", "TERMORDORDOR"
4838 };
4839 #endif
4840
4841 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4842 STATIC bool
S_word_takes_any_delimiter(char * p,STRLEN len)4843 S_word_takes_any_delimiter(char *p, STRLEN len)
4844 {
4845 return (len == 1 && memCHRs("msyq", p[0]))
4846 || (len == 2
4847 && ((p[0] == 't' && p[1] == 'r')
4848 || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
4849 }
4850
4851 static void
S_check_scalar_slice(pTHX_ char * s)4852 S_check_scalar_slice(pTHX_ char *s)
4853 {
4854 s++;
4855 while (SPACE_OR_TAB(*s)) s++;
4856 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4857 PL_bufend,
4858 UTF))
4859 {
4860 return;
4861 }
4862 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4863 || (*s && memCHRs(" \t$#+-'\"", *s)))
4864 {
4865 s += UTF ? UTF8SKIP(s) : 1;
4866 }
4867 if (*s == '}' || *s == ']')
4868 pl_yylval.ival = OPpSLICEWARNING;
4869 }
4870
4871 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4872 static void
S_lex_token_boundary(pTHX)4873 S_lex_token_boundary(pTHX)
4874 {
4875 PL_oldoldbufptr = PL_oldbufptr;
4876 PL_oldbufptr = PL_bufptr;
4877 }
4878
4879 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4880 static char *
S_vcs_conflict_marker(pTHX_ char * s)4881 S_vcs_conflict_marker(pTHX_ char *s)
4882 {
4883 lex_token_boundary();
4884 PL_bufptr = s;
4885 yyerror("Version control conflict marker");
4886 while (s < PL_bufend && *s != '\n')
4887 s++;
4888 return s;
4889 }
4890
4891 static int
yyl_sigvar(pTHX_ char * s)4892 yyl_sigvar(pTHX_ char *s)
4893 {
4894 /* we expect the sigil and optional var name part of a
4895 * signature element here. Since a '$' is not necessarily
4896 * followed by a var name, handle it specially here; the general
4897 * yylex code would otherwise try to interpret whatever follows
4898 * as a var; e.g. ($, ...) would be seen as the var '$,'
4899 */
4900
4901 U8 sigil;
4902
4903 s = skipspace(s);
4904 sigil = *s++;
4905 PL_bufptr = s; /* for error reporting */
4906 switch (sigil) {
4907 case '$':
4908 case '@':
4909 case '%':
4910 /* spot stuff that looks like an prototype */
4911 if (memCHRs("$:@%&*;\\[]", *s)) {
4912 yyerror("Illegal character following sigil in a subroutine signature");
4913 break;
4914 }
4915 /* '$#' is banned, while '$ # comment' isn't */
4916 if (*s == '#') {
4917 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
4918 break;
4919 }
4920 s = skipspace(s);
4921 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
4922 char *dest = PL_tokenbuf + 1;
4923 /* read var name, including sigil, into PL_tokenbuf */
4924 PL_tokenbuf[0] = sigil;
4925 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
4926 0, cBOOL(UTF), FALSE, FALSE);
4927 *dest = '\0';
4928 assert(PL_tokenbuf[1]); /* we have a variable name */
4929 }
4930 else {
4931 *PL_tokenbuf = 0;
4932 PL_in_my = 0;
4933 }
4934
4935 s = skipspace(s);
4936 /* parse the = for the default ourselves to avoid '+=' etc being accepted here
4937 * as the ASSIGNOP, and exclude other tokens that start with =
4938 */
4939 if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
4940 /* save now to report with the same context as we did when
4941 * all ASSIGNOPS were accepted */
4942 PL_oldbufptr = s;
4943
4944 ++s;
4945 NEXTVAL_NEXTTOKE.ival = 0;
4946 force_next(ASSIGNOP);
4947 PL_expect = XTERM;
4948 }
4949 else if (*s == ',' || *s == ')') {
4950 PL_expect = XOPERATOR;
4951 }
4952 else {
4953 /* make sure the context shows the unexpected character and
4954 * hopefully a bit more */
4955 if (*s) ++s;
4956 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4957 s++;
4958 PL_bufptr = s; /* for error reporting */
4959 yyerror("Illegal operator following parameter in a subroutine signature");
4960 PL_in_my = 0;
4961 }
4962 if (*PL_tokenbuf) {
4963 NEXTVAL_NEXTTOKE.ival = sigil;
4964 force_next('p'); /* force a signature pending identifier */
4965 }
4966 break;
4967
4968 case ')':
4969 PL_expect = XBLOCK;
4970 break;
4971 case ',': /* handle ($a,,$b) */
4972 break;
4973
4974 default:
4975 PL_in_my = 0;
4976 yyerror("A signature parameter must start with '$', '@' or '%'");
4977 /* very crude error recovery: skip to likely next signature
4978 * element */
4979 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4980 s++;
4981 break;
4982 }
4983
4984 TOKEN(sigil);
4985 }
4986
4987 static int
yyl_dollar(pTHX_ char * s)4988 yyl_dollar(pTHX_ char *s)
4989 {
4990 CLINE;
4991
4992 if (PL_expect == XPOSTDEREF) {
4993 if (s[1] == '#') {
4994 s++;
4995 POSTDEREF(DOLSHARP);
4996 }
4997 POSTDEREF('$');
4998 }
4999
5000 if ( s[1] == '#'
5001 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5002 || memCHRs("{$:+-@", s[2])))
5003 {
5004 PL_tokenbuf[0] = '@';
5005 s = scan_ident(s + 1, PL_tokenbuf + 1,
5006 sizeof PL_tokenbuf - 1, FALSE);
5007 if (PL_expect == XOPERATOR) {
5008 char *d = s;
5009 if (PL_bufptr > s) {
5010 d = PL_bufptr-1;
5011 PL_bufptr = PL_oldbufptr;
5012 }
5013 no_op("Array length", d);
5014 }
5015 if (!PL_tokenbuf[1])
5016 PREREF(DOLSHARP);
5017 PL_expect = XOPERATOR;
5018 force_ident_maybe_lex('#');
5019 TOKEN(DOLSHARP);
5020 }
5021
5022 PL_tokenbuf[0] = '$';
5023 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5024 if (PL_expect == XOPERATOR) {
5025 char *d = s;
5026 if (PL_bufptr > s) {
5027 d = PL_bufptr-1;
5028 PL_bufptr = PL_oldbufptr;
5029 }
5030 no_op("Scalar", d);
5031 }
5032 if (!PL_tokenbuf[1]) {
5033 if (s == PL_bufend)
5034 yyerror("Final $ should be \\$ or $name");
5035 PREREF('$');
5036 }
5037
5038 {
5039 const char tmp = *s;
5040 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5041 s = skipspace(s);
5042
5043 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5044 && intuit_more(s, PL_bufend)) {
5045 if (*s == '[') {
5046 PL_tokenbuf[0] = '@';
5047 if (ckWARN(WARN_SYNTAX)) {
5048 char *t = s+1;
5049
5050 while ( t < PL_bufend ) {
5051 if (isSPACE(*t)) {
5052 do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5053 /* consumed one or more space chars */
5054 } else if (*t == '$' || *t == '@') {
5055 /* could be more than one '$' like $$ref or @$ref */
5056 do { t++; } while (t < PL_bufend && *t == '$');
5057
5058 /* could be an abigail style identifier like $ foo */
5059 while (t < PL_bufend && *t == ' ') t++;
5060
5061 /* strip off the name of the var */
5062 while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5063 t += UTF ? UTF8SKIP(t) : 1;
5064 /* consumed a varname */
5065 } else if (isDIGIT(*t)) {
5066 /* deal with hex constants like 0x11 */
5067 if (t[0] == '0' && t[1] == 'x') {
5068 t += 2;
5069 while (t < PL_bufend && isXDIGIT(*t)) t++;
5070 } else {
5071 /* deal with decimal/octal constants like 1 and 0123 */
5072 do { t++; } while (isDIGIT(*t));
5073 if (t<PL_bufend && *t == '.') {
5074 do { t++; } while (isDIGIT(*t));
5075 }
5076 }
5077 /* consumed a number */
5078 } else {
5079 /* not a var nor a space nor a number */
5080 break;
5081 }
5082 }
5083 if (t < PL_bufend && *t++ == ',') {
5084 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5085 while (t < PL_bufend && *t != ']')
5086 t++;
5087 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5088 "Multidimensional syntax %" UTF8f " not supported",
5089 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5090 }
5091 }
5092 }
5093 else if (*s == '{') {
5094 char *t;
5095 PL_tokenbuf[0] = '%';
5096 if ( strEQ(PL_tokenbuf+1, "SIG")
5097 && ckWARN(WARN_SYNTAX)
5098 && (t = (char *) memchr(s, '}', PL_bufend - s))
5099 && (t = (char *) memchr(t, '=', PL_bufend - t)))
5100 {
5101 char tmpbuf[sizeof PL_tokenbuf];
5102 do {
5103 t++;
5104 } while (isSPACE(*t));
5105 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5106 STRLEN len;
5107 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5108 &len);
5109 while (isSPACE(*t))
5110 t++;
5111 if ( *t == ';'
5112 && get_cvn_flags(tmpbuf, len, UTF
5113 ? SVf_UTF8
5114 : 0))
5115 {
5116 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5117 "You need to quote \"%" UTF8f "\"",
5118 UTF8fARG(UTF, len, tmpbuf));
5119 }
5120 }
5121 }
5122 }
5123 }
5124
5125 PL_expect = XOPERATOR;
5126 if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5127 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5128 if (!islop || PL_last_lop_op == OP_GREPSTART)
5129 PL_expect = XOPERATOR;
5130 else if (memCHRs("$@\"'`q", *s))
5131 PL_expect = XTERM; /* e.g. print $fh "foo" */
5132 else if ( memCHRs("&*<%", *s)
5133 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5134 {
5135 PL_expect = XTERM; /* e.g. print $fh &sub */
5136 }
5137 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5138 char tmpbuf[sizeof PL_tokenbuf];
5139 int t2;
5140 STRLEN len;
5141 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5142 if ((t2 = keyword(tmpbuf, len, 0))) {
5143 /* binary operators exclude handle interpretations */
5144 switch (t2) {
5145 case -KEY_x:
5146 case -KEY_eq:
5147 case -KEY_ne:
5148 case -KEY_gt:
5149 case -KEY_lt:
5150 case -KEY_ge:
5151 case -KEY_le:
5152 case -KEY_cmp:
5153 break;
5154 default:
5155 PL_expect = XTERM; /* e.g. print $fh length() */
5156 break;
5157 }
5158 }
5159 else {
5160 PL_expect = XTERM; /* e.g. print $fh subr() */
5161 }
5162 }
5163 else if (isDIGIT(*s))
5164 PL_expect = XTERM; /* e.g. print $fh 3 */
5165 else if (*s == '.' && isDIGIT(s[1]))
5166 PL_expect = XTERM; /* e.g. print $fh .3 */
5167 else if ((*s == '?' || *s == '-' || *s == '+')
5168 && !isSPACE(s[1]) && s[1] != '=')
5169 PL_expect = XTERM; /* e.g. print $fh -1 */
5170 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5171 && s[1] != '/')
5172 PL_expect = XTERM; /* e.g. print $fh /.../
5173 XXX except DORDOR operator
5174 */
5175 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5176 && s[2] != '=')
5177 PL_expect = XTERM; /* print $fh <<"EOF" */
5178 }
5179 }
5180 force_ident_maybe_lex('$');
5181 TOKEN('$');
5182 }
5183
5184 static int
yyl_sub(pTHX_ char * s,const int key)5185 yyl_sub(pTHX_ char *s, const int key)
5186 {
5187 char * const tmpbuf = PL_tokenbuf + 1;
5188 bool have_name, have_proto;
5189 STRLEN len;
5190 SV *format_name = NULL;
5191 bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5192
5193 SSize_t off = s-SvPVX(PL_linestr);
5194 char *d;
5195
5196 s = skipspace(s); /* can move PL_linestr */
5197
5198 d = SvPVX(PL_linestr)+off;
5199
5200 SAVEBOOL(PL_parser->sig_seen);
5201 PL_parser->sig_seen = FALSE;
5202
5203 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5204 || *s == '\''
5205 || (*s == ':' && s[1] == ':'))
5206 {
5207
5208 PL_expect = XATTRBLOCK;
5209 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5210 &len);
5211 if (key == KEY_format)
5212 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5213 *PL_tokenbuf = '&';
5214 if (memchr(tmpbuf, ':', len) || key != KEY_sub
5215 || pad_findmy_pvn(
5216 PL_tokenbuf, len + 1, 0
5217 ) != NOT_IN_PAD)
5218 sv_setpvn(PL_subname, tmpbuf, len);
5219 else {
5220 sv_setsv(PL_subname,PL_curstname);
5221 sv_catpvs(PL_subname,"::");
5222 sv_catpvn(PL_subname,tmpbuf,len);
5223 }
5224 if (SvUTF8(PL_linestr))
5225 SvUTF8_on(PL_subname);
5226 have_name = TRUE;
5227
5228 s = skipspace(d);
5229 }
5230 else {
5231 if (key == KEY_my || key == KEY_our || key==KEY_state) {
5232 *d = '\0';
5233 /* diag_listed_as: Missing name in "%s sub" */
5234 Perl_croak(aTHX_
5235 "Missing name in \"%s\"", PL_bufptr);
5236 }
5237 PL_expect = XATTRTERM;
5238 sv_setpvs(PL_subname,"?");
5239 have_name = FALSE;
5240 }
5241
5242 if (key == KEY_format) {
5243 if (format_name) {
5244 NEXTVAL_NEXTTOKE.opval
5245 = newSVOP(OP_CONST,0, format_name);
5246 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5247 force_next(BAREWORD);
5248 }
5249 PREBLOCK(FORMAT);
5250 }
5251
5252 /* Look for a prototype */
5253 if (*s == '(' && !is_sigsub) {
5254 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5255 if (!s)
5256 Perl_croak(aTHX_ "Prototype not terminated");
5257 COPLINE_SET_FROM_MULTI_END;
5258 (void)validate_proto(PL_subname, PL_lex_stuff,
5259 ckWARN(WARN_ILLEGALPROTO), 0);
5260 have_proto = TRUE;
5261
5262 s = skipspace(s);
5263 }
5264 else
5265 have_proto = FALSE;
5266
5267 if ( !(*s == ':' && s[1] != ':')
5268 && (*s != '{' && *s != '(') && key != KEY_format)
5269 {
5270 assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5271 key == KEY_DESTROY || key == KEY_BEGIN ||
5272 key == KEY_UNITCHECK || key == KEY_CHECK ||
5273 key == KEY_INIT || key == KEY_END ||
5274 key == KEY_my || key == KEY_state ||
5275 key == KEY_our);
5276 if (!have_name)
5277 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5278 else if (*s != ';' && *s != '}')
5279 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5280 }
5281
5282 if (have_proto) {
5283 NEXTVAL_NEXTTOKE.opval =
5284 newSVOP(OP_CONST, 0, PL_lex_stuff);
5285 PL_lex_stuff = NULL;
5286 force_next(THING);
5287 }
5288 if (!have_name) {
5289 if (PL_curstash)
5290 sv_setpvs(PL_subname, "__ANON__");
5291 else
5292 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5293 if (is_sigsub)
5294 TOKEN(ANON_SIGSUB);
5295 else
5296 TOKEN(ANONSUB);
5297 }
5298 force_ident_maybe_lex('&');
5299 if (is_sigsub)
5300 TOKEN(SIGSUB);
5301 else
5302 TOKEN(SUB);
5303 }
5304
5305 static int
yyl_interpcasemod(pTHX_ char * s)5306 yyl_interpcasemod(pTHX_ char *s)
5307 {
5308 #ifdef DEBUGGING
5309 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5310 Perl_croak(aTHX_
5311 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5312 PL_bufptr, PL_bufend, *PL_bufptr);
5313 #endif
5314
5315 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5316 /* if at a \E */
5317 if (PL_lex_casemods) {
5318 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5319 PL_lex_casestack[PL_lex_casemods] = '\0';
5320
5321 if (PL_bufptr != PL_bufend
5322 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5323 || oldmod == 'F')) {
5324 PL_bufptr += 2;
5325 PL_lex_state = LEX_INTERPCONCAT;
5326 }
5327 PL_lex_allbrackets--;
5328 return REPORT(')');
5329 }
5330 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5331 /* Got an unpaired \E */
5332 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5333 "Useless use of \\E");
5334 }
5335 if (PL_bufptr != PL_bufend)
5336 PL_bufptr += 2;
5337 PL_lex_state = LEX_INTERPCONCAT;
5338 return yylex();
5339 }
5340 else {
5341 DEBUG_T({
5342 PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5343 });
5344 s = PL_bufptr + 1;
5345 if (s[1] == '\\' && s[2] == 'E') {
5346 PL_bufptr = s + 3;
5347 PL_lex_state = LEX_INTERPCONCAT;
5348 return yylex();
5349 }
5350 else {
5351 I32 tmp;
5352 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5353 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5354 {
5355 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
5356 }
5357 if ((*s == 'L' || *s == 'U' || *s == 'F')
5358 && (strpbrk(PL_lex_casestack, "LUF")))
5359 {
5360 PL_lex_casestack[--PL_lex_casemods] = '\0';
5361 PL_lex_allbrackets--;
5362 return REPORT(')');
5363 }
5364 if (PL_lex_casemods > 10)
5365 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5366 PL_lex_casestack[PL_lex_casemods++] = *s;
5367 PL_lex_casestack[PL_lex_casemods] = '\0';
5368 PL_lex_state = LEX_INTERPCONCAT;
5369 NEXTVAL_NEXTTOKE.ival = 0;
5370 force_next((2<<24)|'(');
5371 if (*s == 'l')
5372 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5373 else if (*s == 'u')
5374 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5375 else if (*s == 'L')
5376 NEXTVAL_NEXTTOKE.ival = OP_LC;
5377 else if (*s == 'U')
5378 NEXTVAL_NEXTTOKE.ival = OP_UC;
5379 else if (*s == 'Q')
5380 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5381 else if (*s == 'F')
5382 NEXTVAL_NEXTTOKE.ival = OP_FC;
5383 else
5384 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5385 PL_bufptr = s + 1;
5386 }
5387 force_next(FUNC);
5388 if (PL_lex_starts) {
5389 s = PL_bufptr;
5390 PL_lex_starts = 0;
5391 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5392 if (PL_lex_casemods == 1 && PL_lex_inpat)
5393 TOKEN(',');
5394 else
5395 AopNOASSIGN(OP_CONCAT);
5396 }
5397 else
5398 return yylex();
5399 }
5400 }
5401
5402 static int
yyl_secondclass_keyword(pTHX_ char * s,STRLEN len,int key,I32 * orig_keyword,GV ** pgv,GV *** pgvp)5403 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5404 GV **pgv, GV ***pgvp)
5405 {
5406 GV *ogv = NULL; /* override (winner) */
5407 GV *hgv = NULL; /* hidden (loser) */
5408 GV *gv = *pgv;
5409
5410 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5411 CV *cv;
5412 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5413 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5414 SVt_PVCV))
5415 && (cv = GvCVu(gv)))
5416 {
5417 if (GvIMPORTED_CV(gv))
5418 ogv = gv;
5419 else if (! CvMETHOD(cv))
5420 hgv = gv;
5421 }
5422 if (!ogv
5423 && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5424 && (gv = **pgvp)
5425 && (isGV_with_GP(gv)
5426 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5427 : SvPCS_IMPORTED(gv)
5428 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5429 len, 0), 1)))
5430 {
5431 ogv = gv;
5432 }
5433 }
5434
5435 *pgv = gv;
5436
5437 if (ogv) {
5438 *orig_keyword = key;
5439 return 0; /* overridden by import or by GLOBAL */
5440 }
5441 else if (gv && !*pgvp
5442 && -key==KEY_lock /* XXX generalizable kludge */
5443 && GvCVu(gv))
5444 {
5445 return 0; /* any sub overrides "weak" keyword */
5446 }
5447 else { /* no override */
5448 key = -key;
5449 if (key == KEY_dump) {
5450 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5451 }
5452 *pgv = NULL;
5453 *pgvp = 0;
5454 if (hgv && key != KEY_x) /* never ambiguous */
5455 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5456 "Ambiguous call resolved as CORE::%s(), "
5457 "qualify as such or use &",
5458 GvENAME(hgv));
5459 return key;
5460 }
5461 }
5462
5463 static int
yyl_qw(pTHX_ char * s,STRLEN len)5464 yyl_qw(pTHX_ char *s, STRLEN len)
5465 {
5466 OP *words = NULL;
5467
5468 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5469 if (!s)
5470 missingterm(NULL, 0);
5471
5472 COPLINE_SET_FROM_MULTI_END;
5473 PL_expect = XOPERATOR;
5474 if (SvCUR(PL_lex_stuff)) {
5475 int warned_comma = !ckWARN(WARN_QW);
5476 int warned_comment = warned_comma;
5477 char *d = SvPV_force(PL_lex_stuff, len);
5478 while (len) {
5479 for (; isSPACE(*d) && len; --len, ++d)
5480 /**/;
5481 if (len) {
5482 SV *sv;
5483 const char *b = d;
5484 if (!warned_comma || !warned_comment) {
5485 for (; !isSPACE(*d) && len; --len, ++d) {
5486 if (!warned_comma && *d == ',') {
5487 Perl_warner(aTHX_ packWARN(WARN_QW),
5488 "Possible attempt to separate words with commas");
5489 ++warned_comma;
5490 }
5491 else if (!warned_comment && *d == '#') {
5492 Perl_warner(aTHX_ packWARN(WARN_QW),
5493 "Possible attempt to put comments in qw() list");
5494 ++warned_comment;
5495 }
5496 }
5497 }
5498 else {
5499 for (; !isSPACE(*d) && len; --len, ++d)
5500 /**/;
5501 }
5502 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5503 words = op_append_elem(OP_LIST, words,
5504 newSVOP(OP_CONST, 0, tokeq(sv)));
5505 }
5506 }
5507 }
5508 if (!words)
5509 words = newNULLLIST();
5510 SvREFCNT_dec_NN(PL_lex_stuff);
5511 PL_lex_stuff = NULL;
5512 PL_expect = XOPERATOR;
5513 pl_yylval.opval = sawparens(words);
5514 TOKEN(QWLIST);
5515 }
5516
5517 static int
yyl_hyphen(pTHX_ char * s)5518 yyl_hyphen(pTHX_ char *s)
5519 {
5520 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5521 I32 ftst = 0;
5522 char tmp;
5523
5524 s++;
5525 PL_bufptr = s;
5526 tmp = *s++;
5527
5528 while (s < PL_bufend && SPACE_OR_TAB(*s))
5529 s++;
5530
5531 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5532 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5533 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5534 OPERATOR('-'); /* unary minus */
5535 }
5536 switch (tmp) {
5537 case 'r': ftst = OP_FTEREAD; break;
5538 case 'w': ftst = OP_FTEWRITE; break;
5539 case 'x': ftst = OP_FTEEXEC; break;
5540 case 'o': ftst = OP_FTEOWNED; break;
5541 case 'R': ftst = OP_FTRREAD; break;
5542 case 'W': ftst = OP_FTRWRITE; break;
5543 case 'X': ftst = OP_FTREXEC; break;
5544 case 'O': ftst = OP_FTROWNED; break;
5545 case 'e': ftst = OP_FTIS; break;
5546 case 'z': ftst = OP_FTZERO; break;
5547 case 's': ftst = OP_FTSIZE; break;
5548 case 'f': ftst = OP_FTFILE; break;
5549 case 'd': ftst = OP_FTDIR; break;
5550 case 'l': ftst = OP_FTLINK; break;
5551 case 'p': ftst = OP_FTPIPE; break;
5552 case 'S': ftst = OP_FTSOCK; break;
5553 case 'u': ftst = OP_FTSUID; break;
5554 case 'g': ftst = OP_FTSGID; break;
5555 case 'k': ftst = OP_FTSVTX; break;
5556 case 'b': ftst = OP_FTBLK; break;
5557 case 'c': ftst = OP_FTCHR; break;
5558 case 't': ftst = OP_FTTTY; break;
5559 case 'T': ftst = OP_FTTEXT; break;
5560 case 'B': ftst = OP_FTBINARY; break;
5561 case 'M': case 'A': case 'C':
5562 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5563 switch (tmp) {
5564 case 'M': ftst = OP_FTMTIME; break;
5565 case 'A': ftst = OP_FTATIME; break;
5566 case 'C': ftst = OP_FTCTIME; break;
5567 default: break;
5568 }
5569 break;
5570 default:
5571 break;
5572 }
5573 if (ftst) {
5574 PL_last_uni = PL_oldbufptr;
5575 PL_last_lop_op = (OPCODE)ftst;
5576 DEBUG_T( {
5577 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5578 } );
5579 FTST(ftst);
5580 }
5581 else {
5582 /* Assume it was a minus followed by a one-letter named
5583 * subroutine call (or a -bareword), then. */
5584 DEBUG_T( {
5585 PerlIO_printf(Perl_debug_log,
5586 "### '-%c' looked like a file test but was not\n",
5587 (int) tmp);
5588 } );
5589 s = --PL_bufptr;
5590 }
5591 }
5592 {
5593 const char tmp = *s++;
5594 if (*s == tmp) {
5595 s++;
5596 if (PL_expect == XOPERATOR)
5597 TERM(POSTDEC);
5598 else
5599 OPERATOR(PREDEC);
5600 }
5601 else if (*s == '>') {
5602 s++;
5603 s = skipspace(s);
5604 if (((*s == '$' || *s == '&') && s[1] == '*')
5605 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5606 ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5607 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5608 )
5609 {
5610 PL_expect = XPOSTDEREF;
5611 TOKEN(ARROW);
5612 }
5613 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5614 s = force_word(s,METHOD,FALSE,TRUE);
5615 TOKEN(ARROW);
5616 }
5617 else if (*s == '$')
5618 OPERATOR(ARROW);
5619 else
5620 TERM(ARROW);
5621 }
5622 if (PL_expect == XOPERATOR) {
5623 if (*s == '='
5624 && !PL_lex_allbrackets
5625 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5626 {
5627 s--;
5628 TOKEN(0);
5629 }
5630 Aop(OP_SUBTRACT);
5631 }
5632 else {
5633 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5634 check_uni();
5635 OPERATOR('-'); /* unary minus */
5636 }
5637 }
5638 }
5639
5640 static int
yyl_plus(pTHX_ char * s)5641 yyl_plus(pTHX_ char *s)
5642 {
5643 const char tmp = *s++;
5644 if (*s == tmp) {
5645 s++;
5646 if (PL_expect == XOPERATOR)
5647 TERM(POSTINC);
5648 else
5649 OPERATOR(PREINC);
5650 }
5651 if (PL_expect == XOPERATOR) {
5652 if (*s == '='
5653 && !PL_lex_allbrackets
5654 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5655 {
5656 s--;
5657 TOKEN(0);
5658 }
5659 Aop(OP_ADD);
5660 }
5661 else {
5662 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5663 check_uni();
5664 OPERATOR('+');
5665 }
5666 }
5667
5668 static int
yyl_star(pTHX_ char * s)5669 yyl_star(pTHX_ char *s)
5670 {
5671 if (PL_expect == XPOSTDEREF)
5672 POSTDEREF('*');
5673
5674 if (PL_expect != XOPERATOR) {
5675 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5676 PL_expect = XOPERATOR;
5677 force_ident(PL_tokenbuf, '*');
5678 if (!*PL_tokenbuf)
5679 PREREF('*');
5680 TERM('*');
5681 }
5682
5683 s++;
5684 if (*s == '*') {
5685 s++;
5686 if (*s == '=' && !PL_lex_allbrackets
5687 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5688 {
5689 s -= 2;
5690 TOKEN(0);
5691 }
5692 PWop(OP_POW);
5693 }
5694
5695 if (*s == '='
5696 && !PL_lex_allbrackets
5697 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5698 {
5699 s--;
5700 TOKEN(0);
5701 }
5702
5703 Mop(OP_MULTIPLY);
5704 }
5705
5706 static int
yyl_percent(pTHX_ char * s)5707 yyl_percent(pTHX_ char *s)
5708 {
5709 if (PL_expect == XOPERATOR) {
5710 if (s[1] == '='
5711 && !PL_lex_allbrackets
5712 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5713 {
5714 TOKEN(0);
5715 }
5716 ++s;
5717 Mop(OP_MODULO);
5718 }
5719 else if (PL_expect == XPOSTDEREF)
5720 POSTDEREF('%');
5721
5722 PL_tokenbuf[0] = '%';
5723 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5724 pl_yylval.ival = 0;
5725 if (!PL_tokenbuf[1]) {
5726 PREREF('%');
5727 }
5728 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5729 && intuit_more(s, PL_bufend)) {
5730 if (*s == '[')
5731 PL_tokenbuf[0] = '@';
5732 }
5733 PL_expect = XOPERATOR;
5734 force_ident_maybe_lex('%');
5735 TERM('%');
5736 }
5737
5738 static int
yyl_caret(pTHX_ char * s)5739 yyl_caret(pTHX_ char *s)
5740 {
5741 char *d = s;
5742 const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5743 if (bof && s[1] == '.')
5744 s++;
5745 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5746 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5747 {
5748 s = d;
5749 TOKEN(0);
5750 }
5751 s++;
5752 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5753 }
5754
5755 static int
yyl_colon(pTHX_ char * s)5756 yyl_colon(pTHX_ char *s)
5757 {
5758 OP *attrs;
5759
5760 switch (PL_expect) {
5761 case XOPERATOR:
5762 if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5763 break;
5764 PL_bufptr = s; /* update in case we back off */
5765 if (*s == '=') {
5766 Perl_croak(aTHX_
5767 "Use of := for an empty attribute list is not allowed");
5768 }
5769 goto grabattrs;
5770 case XATTRBLOCK:
5771 PL_expect = XBLOCK;
5772 goto grabattrs;
5773 case XATTRTERM:
5774 PL_expect = XTERMBLOCK;
5775 grabattrs:
5776 /* NB: as well as parsing normal attributes, we also end up
5777 * here if there is something looking like attributes
5778 * following a signature (which is illegal, but used to be
5779 * legal in 5.20..5.26). If the latter, we still parse the
5780 * attributes so that error messages(s) are less confusing,
5781 * but ignore them (parser->sig_seen).
5782 */
5783 s = skipspace(s);
5784 attrs = NULL;
5785 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5786 bool sig = PL_parser->sig_seen;
5787 I32 tmp;
5788 SV *sv;
5789 STRLEN len;
5790 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5791 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5792 if (tmp < 0) tmp = -tmp;
5793 switch (tmp) {
5794 case KEY_or:
5795 case KEY_and:
5796 case KEY_for:
5797 case KEY_foreach:
5798 case KEY_unless:
5799 case KEY_if:
5800 case KEY_while:
5801 case KEY_until:
5802 goto got_attrs;
5803 default:
5804 break;
5805 }
5806 }
5807 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5808 if (*d == '(') {
5809 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5810 if (!d) {
5811 if (attrs)
5812 op_free(attrs);
5813 sv_free(sv);
5814 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5815 }
5816 COPLINE_SET_FROM_MULTI_END;
5817 }
5818 if (PL_lex_stuff) {
5819 sv_catsv(sv, PL_lex_stuff);
5820 attrs = op_append_elem(OP_LIST, attrs,
5821 newSVOP(OP_CONST, 0, sv));
5822 SvREFCNT_dec_NN(PL_lex_stuff);
5823 PL_lex_stuff = NULL;
5824 }
5825 else {
5826 /* NOTE: any CV attrs applied here need to be part of
5827 the CVf_BUILTIN_ATTRS define in cv.h! */
5828 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5829 sv_free(sv);
5830 if (!sig)
5831 CvLVALUE_on(PL_compcv);
5832 }
5833 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5834 sv_free(sv);
5835 if (!sig)
5836 CvMETHOD_on(PL_compcv);
5837 }
5838 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5839 sv_free(sv);
5840 if (!sig) {
5841 Perl_ck_warner_d(aTHX_
5842 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5843 ":const is experimental"
5844 );
5845 CvANONCONST_on(PL_compcv);
5846 if (!CvANON(PL_compcv))
5847 yyerror(":const is not permitted on named "
5848 "subroutines");
5849 }
5850 }
5851 /* After we've set the flags, it could be argued that
5852 we don't need to do the attributes.pm-based setting
5853 process, and shouldn't bother appending recognized
5854 flags. To experiment with that, uncomment the
5855 following "else". (Note that's already been
5856 uncommented. That keeps the above-applied built-in
5857 attributes from being intercepted (and possibly
5858 rejected) by a package's attribute routines, but is
5859 justified by the performance win for the common case
5860 of applying only built-in attributes.) */
5861 else
5862 attrs = op_append_elem(OP_LIST, attrs,
5863 newSVOP(OP_CONST, 0,
5864 sv));
5865 }
5866 s = skipspace(d);
5867 if (*s == ':' && s[1] != ':')
5868 s = skipspace(s+1);
5869 else if (s == d)
5870 break; /* require real whitespace or :'s */
5871 /* XXX losing whitespace on sequential attributes here */
5872 }
5873
5874 if (*s != ';'
5875 && *s != '}'
5876 && !(PL_expect == XOPERATOR
5877 ? (*s == '=' || *s == ')')
5878 : (*s == '{' || *s == '(')))
5879 {
5880 const char q = ((*s == '\'') ? '"' : '\'');
5881 /* If here for an expression, and parsed no attrs, back off. */
5882 if (PL_expect == XOPERATOR && !attrs) {
5883 s = PL_bufptr;
5884 break;
5885 }
5886 /* MUST advance bufptr here to avoid bogus "at end of line"
5887 context messages from yyerror().
5888 */
5889 PL_bufptr = s;
5890 yyerror( (const char *)
5891 (*s
5892 ? Perl_form(aTHX_ "Invalid separator character "
5893 "%c%c%c in attribute list", q, *s, q)
5894 : "Unterminated attribute list" ) );
5895 if (attrs)
5896 op_free(attrs);
5897 OPERATOR(':');
5898 }
5899
5900 got_attrs:
5901 if (PL_parser->sig_seen) {
5902 /* see comment about about sig_seen and parser error
5903 * handling */
5904 if (attrs)
5905 op_free(attrs);
5906 Perl_croak(aTHX_ "Subroutine attributes must come "
5907 "before the signature");
5908 }
5909 if (attrs) {
5910 NEXTVAL_NEXTTOKE.opval = attrs;
5911 force_next(THING);
5912 }
5913 TOKEN(COLONATTR);
5914 }
5915
5916 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5917 s--;
5918 TOKEN(0);
5919 }
5920
5921 PL_lex_allbrackets--;
5922 OPERATOR(':');
5923 }
5924
5925 static int
yyl_subproto(pTHX_ char * s,CV * cv)5926 yyl_subproto(pTHX_ char *s, CV *cv)
5927 {
5928 STRLEN protolen = CvPROTOLEN(cv);
5929 const char *proto = CvPROTO(cv);
5930 bool optional;
5931
5932 proto = S_strip_spaces(aTHX_ proto, &protolen);
5933 if (!protolen)
5934 TERM(FUNC0SUB);
5935 if ((optional = *proto == ';')) {
5936 do {
5937 proto++;
5938 } while (*proto == ';');
5939 }
5940
5941 if (
5942 (
5943 (
5944 *proto == '$' || *proto == '_'
5945 || *proto == '*' || *proto == '+'
5946 )
5947 && proto[1] == '\0'
5948 )
5949 || (
5950 *proto == '\\' && proto[1] && proto[2] == '\0'
5951 )
5952 ) {
5953 UNIPROTO(UNIOPSUB,optional);
5954 }
5955
5956 if (*proto == '\\' && proto[1] == '[') {
5957 const char *p = proto + 2;
5958 while(*p && *p != ']')
5959 ++p;
5960 if(*p == ']' && !p[1])
5961 UNIPROTO(UNIOPSUB,optional);
5962 }
5963
5964 if (*proto == '&' && *s == '{') {
5965 if (PL_curstash)
5966 sv_setpvs(PL_subname, "__ANON__");
5967 else
5968 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5969 if (!PL_lex_allbrackets
5970 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
5971 {
5972 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5973 }
5974 PREBLOCK(LSTOPSUB);
5975 }
5976
5977 return KEY_NULL;
5978 }
5979
5980 static int
yyl_leftcurly(pTHX_ char * s,const U8 formbrack)5981 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
5982 {
5983 char *d;
5984 if (PL_lex_brackets > 100) {
5985 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5986 }
5987
5988 switch (PL_expect) {
5989 case XTERM:
5990 case XTERMORDORDOR:
5991 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5992 PL_lex_allbrackets++;
5993 OPERATOR(HASHBRACK);
5994 case XOPERATOR:
5995 while (s < PL_bufend && SPACE_OR_TAB(*s))
5996 s++;
5997 d = s;
5998 PL_tokenbuf[0] = '\0';
5999 if (d < PL_bufend && *d == '-') {
6000 PL_tokenbuf[0] = '-';
6001 d++;
6002 while (d < PL_bufend && SPACE_OR_TAB(*d))
6003 d++;
6004 }
6005 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6006 STRLEN len;
6007 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6008 FALSE, &len);
6009 while (d < PL_bufend && SPACE_OR_TAB(*d))
6010 d++;
6011 if (*d == '}') {
6012 const char minus = (PL_tokenbuf[0] == '-');
6013 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6014 if (minus)
6015 force_next('-');
6016 }
6017 }
6018 /* FALLTHROUGH */
6019 case XATTRTERM:
6020 case XTERMBLOCK:
6021 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6022 PL_lex_allbrackets++;
6023 PL_expect = XSTATE;
6024 break;
6025 case XATTRBLOCK:
6026 case XBLOCK:
6027 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6028 PL_lex_allbrackets++;
6029 PL_expect = XSTATE;
6030 break;
6031 case XBLOCKTERM:
6032 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6033 PL_lex_allbrackets++;
6034 PL_expect = XSTATE;
6035 break;
6036 default: {
6037 const char *t;
6038 if (PL_oldoldbufptr == PL_last_lop)
6039 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6040 else
6041 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6042 PL_lex_allbrackets++;
6043 s = skipspace(s);
6044 if (*s == '}') {
6045 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6046 PL_expect = XTERM;
6047 /* This hack is to get the ${} in the message. */
6048 PL_bufptr = s+1;
6049 yyerror("syntax error");
6050 break;
6051 }
6052 OPERATOR(HASHBRACK);
6053 }
6054 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6055 /* ${...} or @{...} etc., but not print {...}
6056 * Skip the disambiguation and treat this as a block.
6057 */
6058 goto block_expectation;
6059 }
6060 /* This hack serves to disambiguate a pair of curlies
6061 * as being a block or an anon hash. Normally, expectation
6062 * determines that, but in cases where we're not in a
6063 * position to expect anything in particular (like inside
6064 * eval"") we have to resolve the ambiguity. This code
6065 * covers the case where the first term in the curlies is a
6066 * quoted string. Most other cases need to be explicitly
6067 * disambiguated by prepending a "+" before the opening
6068 * curly in order to force resolution as an anon hash.
6069 *
6070 * XXX should probably propagate the outer expectation
6071 * into eval"" to rely less on this hack, but that could
6072 * potentially break current behavior of eval"".
6073 * GSAR 97-07-21
6074 */
6075 t = s;
6076 if (*s == '\'' || *s == '"' || *s == '`') {
6077 /* common case: get past first string, handling escapes */
6078 for (t++; t < PL_bufend && *t != *s;)
6079 if (*t++ == '\\')
6080 t++;
6081 t++;
6082 }
6083 else if (*s == 'q') {
6084 if (++t < PL_bufend
6085 && (!isWORDCHAR(*t)
6086 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6087 && !isWORDCHAR(*t))))
6088 {
6089 /* skip q//-like construct */
6090 const char *tmps;
6091 char open, close, term;
6092 I32 brackets = 1;
6093
6094 while (t < PL_bufend && isSPACE(*t))
6095 t++;
6096 /* check for q => */
6097 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6098 OPERATOR(HASHBRACK);
6099 }
6100 term = *t;
6101 open = term;
6102 if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6103 term = tmps[5];
6104 close = term;
6105 if (open == close)
6106 for (t++; t < PL_bufend; t++) {
6107 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6108 t++;
6109 else if (*t == open)
6110 break;
6111 }
6112 else {
6113 for (t++; t < PL_bufend; t++) {
6114 if (*t == '\\' && t+1 < PL_bufend)
6115 t++;
6116 else if (*t == close && --brackets <= 0)
6117 break;
6118 else if (*t == open)
6119 brackets++;
6120 }
6121 }
6122 t++;
6123 }
6124 else
6125 /* skip plain q word */
6126 while ( t < PL_bufend
6127 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6128 {
6129 t += UTF ? UTF8SKIP(t) : 1;
6130 }
6131 }
6132 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6133 t += UTF ? UTF8SKIP(t) : 1;
6134 while ( t < PL_bufend
6135 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6136 {
6137 t += UTF ? UTF8SKIP(t) : 1;
6138 }
6139 }
6140 while (t < PL_bufend && isSPACE(*t))
6141 t++;
6142 /* if comma follows first term, call it an anon hash */
6143 /* XXX it could be a comma expression with loop modifiers */
6144 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6145 || (*t == '=' && t[1] == '>')))
6146 OPERATOR(HASHBRACK);
6147 if (PL_expect == XREF) {
6148 block_expectation:
6149 /* If there is an opening brace or 'sub:', treat it
6150 as a term to make ${{...}}{k} and &{sub:attr...}
6151 dwim. Otherwise, treat it as a statement, so
6152 map {no strict; ...} works.
6153 */
6154 s = skipspace(s);
6155 if (*s == '{') {
6156 PL_expect = XTERM;
6157 break;
6158 }
6159 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6160 PL_bufptr = s;
6161 d = s + 3;
6162 d = skipspace(d);
6163 s = PL_bufptr;
6164 if (*d == ':') {
6165 PL_expect = XTERM;
6166 break;
6167 }
6168 }
6169 PL_expect = XSTATE;
6170 }
6171 else {
6172 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6173 PL_expect = XSTATE;
6174 }
6175 }
6176 break;
6177 }
6178
6179 pl_yylval.ival = CopLINE(PL_curcop);
6180 PL_copline = NOLINE; /* invalidate current command line number */
6181 TOKEN(formbrack ? '=' : '{');
6182 }
6183
6184 static int
yyl_rightcurly(pTHX_ char * s,const U8 formbrack)6185 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6186 {
6187 assert(s != PL_bufend);
6188 s++;
6189
6190 if (PL_lex_brackets <= 0)
6191 /* diag_listed_as: Unmatched right %s bracket */
6192 yyerror("Unmatched right curly bracket");
6193 else
6194 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6195
6196 PL_lex_allbrackets--;
6197
6198 if (PL_lex_state == LEX_INTERPNORMAL) {
6199 if (PL_lex_brackets == 0) {
6200 if (PL_expect & XFAKEBRACK) {
6201 PL_expect &= XENUMMASK;
6202 PL_lex_state = LEX_INTERPEND;
6203 PL_bufptr = s;
6204 return yylex(); /* ignore fake brackets */
6205 }
6206 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6207 && SvEVALED(PL_lex_repl))
6208 PL_lex_state = LEX_INTERPEND;
6209 else if (*s == '-' && s[1] == '>')
6210 PL_lex_state = LEX_INTERPENDMAYBE;
6211 else if (*s != '[' && *s != '{')
6212 PL_lex_state = LEX_INTERPEND;
6213 }
6214 }
6215
6216 if (PL_expect & XFAKEBRACK) {
6217 PL_expect &= XENUMMASK;
6218 PL_bufptr = s;
6219 return yylex(); /* ignore fake brackets */
6220 }
6221
6222 force_next(formbrack ? '.' : '}');
6223 if (formbrack) LEAVE_with_name("lex_format");
6224 if (formbrack == 2) { /* means . where arguments were expected */
6225 force_next(';');
6226 TOKEN(FORMRBRACK);
6227 }
6228
6229 TOKEN(';');
6230 }
6231
6232 static int
yyl_ampersand(pTHX_ char * s)6233 yyl_ampersand(pTHX_ char *s)
6234 {
6235 if (PL_expect == XPOSTDEREF)
6236 POSTDEREF('&');
6237
6238 s++;
6239 if (*s++ == '&') {
6240 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6241 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6242 s -= 2;
6243 TOKEN(0);
6244 }
6245 AOPERATOR(ANDAND);
6246 }
6247 s--;
6248
6249 if (PL_expect == XOPERATOR) {
6250 char *d;
6251 bool bof;
6252 if ( PL_bufptr == PL_linestart
6253 && ckWARN(WARN_SEMICOLON)
6254 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6255 {
6256 CopLINE_dec(PL_curcop);
6257 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6258 CopLINE_inc(PL_curcop);
6259 }
6260 d = s;
6261 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6262 s++;
6263 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6264 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6265 s = d;
6266 s--;
6267 TOKEN(0);
6268 }
6269 if (d == s)
6270 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6271 else
6272 BAop(OP_SBIT_AND);
6273 }
6274
6275 PL_tokenbuf[0] = '&';
6276 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6277 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6278
6279 if (PL_tokenbuf[1])
6280 force_ident_maybe_lex('&');
6281 else
6282 PREREF('&');
6283
6284 TERM('&');
6285 }
6286
6287 static int
yyl_verticalbar(pTHX_ char * s)6288 yyl_verticalbar(pTHX_ char *s)
6289 {
6290 char *d;
6291 bool bof;
6292
6293 s++;
6294 if (*s++ == '|') {
6295 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6296 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6297 s -= 2;
6298 TOKEN(0);
6299 }
6300 AOPERATOR(OROR);
6301 }
6302
6303 s--;
6304 d = s;
6305 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6306 s++;
6307
6308 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6309 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6310 s = d - 1;
6311 TOKEN(0);
6312 }
6313
6314 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6315 }
6316
6317 static int
yyl_bang(pTHX_ char * s)6318 yyl_bang(pTHX_ char *s)
6319 {
6320 const char tmp = *s++;
6321 if (tmp == '=') {
6322 /* was this !=~ where !~ was meant?
6323 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6324
6325 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6326 const char *t = s+1;
6327
6328 while (t < PL_bufend && isSPACE(*t))
6329 ++t;
6330
6331 if (*t == '/' || *t == '?'
6332 || ((*t == 'm' || *t == 's' || *t == 'y')
6333 && !isWORDCHAR(t[1]))
6334 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6335 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6336 "!=~ should be !~");
6337 }
6338
6339 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6340 s -= 2;
6341 TOKEN(0);
6342 }
6343
6344 ChEop(OP_NE);
6345 }
6346
6347 if (tmp == '~')
6348 PMop(OP_NOT);
6349
6350 s--;
6351 OPERATOR('!');
6352 }
6353
6354 static int
yyl_snail(pTHX_ char * s)6355 yyl_snail(pTHX_ char *s)
6356 {
6357 if (PL_expect == XPOSTDEREF)
6358 POSTDEREF('@');
6359 PL_tokenbuf[0] = '@';
6360 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6361 if (PL_expect == XOPERATOR) {
6362 char *d = s;
6363 if (PL_bufptr > s) {
6364 d = PL_bufptr-1;
6365 PL_bufptr = PL_oldbufptr;
6366 }
6367 no_op("Array", d);
6368 }
6369 pl_yylval.ival = 0;
6370 if (!PL_tokenbuf[1]) {
6371 PREREF('@');
6372 }
6373 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6374 s = skipspace(s);
6375 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6376 && intuit_more(s, PL_bufend))
6377 {
6378 if (*s == '{')
6379 PL_tokenbuf[0] = '%';
6380
6381 /* Warn about @ where they meant $. */
6382 if (*s == '[' || *s == '{') {
6383 if (ckWARN(WARN_SYNTAX)) {
6384 S_check_scalar_slice(aTHX_ s);
6385 }
6386 }
6387 }
6388 PL_expect = XOPERATOR;
6389 force_ident_maybe_lex('@');
6390 TERM('@');
6391 }
6392
6393 static int
yyl_slash(pTHX_ char * s)6394 yyl_slash(pTHX_ char *s)
6395 {
6396 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6397 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6398 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6399 TOKEN(0);
6400 s += 2;
6401 AOPERATOR(DORDOR);
6402 }
6403 else if (PL_expect == XOPERATOR) {
6404 s++;
6405 if (*s == '=' && !PL_lex_allbrackets
6406 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6407 {
6408 s--;
6409 TOKEN(0);
6410 }
6411 Mop(OP_DIVIDE);
6412 }
6413 else {
6414 /* Disable warning on "study /blah/" */
6415 if ( PL_oldoldbufptr == PL_last_uni
6416 && ( *PL_last_uni != 's' || s - PL_last_uni < 5
6417 || memNE(PL_last_uni, "study", 5)
6418 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6419 ))
6420 check_uni();
6421 s = scan_pat(s,OP_MATCH);
6422 TERM(sublex_start());
6423 }
6424 }
6425
6426 static int
yyl_leftsquare(pTHX_ char * s)6427 yyl_leftsquare(pTHX_ char *s)
6428 {
6429 char tmp;
6430
6431 if (PL_lex_brackets > 100)
6432 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6433 PL_lex_brackstack[PL_lex_brackets++] = 0;
6434 PL_lex_allbrackets++;
6435 tmp = *s++;
6436 OPERATOR(tmp);
6437 }
6438
6439 static int
yyl_rightsquare(pTHX_ char * s)6440 yyl_rightsquare(pTHX_ char *s)
6441 {
6442 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6443 TOKEN(0);
6444 s++;
6445 if (PL_lex_brackets <= 0)
6446 /* diag_listed_as: Unmatched right %s bracket */
6447 yyerror("Unmatched right square bracket");
6448 else
6449 --PL_lex_brackets;
6450 PL_lex_allbrackets--;
6451 if (PL_lex_state == LEX_INTERPNORMAL) {
6452 if (PL_lex_brackets == 0) {
6453 if (*s == '-' && s[1] == '>')
6454 PL_lex_state = LEX_INTERPENDMAYBE;
6455 else if (*s != '[' && *s != '{')
6456 PL_lex_state = LEX_INTERPEND;
6457 }
6458 }
6459 TERM(']');
6460 }
6461
6462 static int
yyl_tilde(pTHX_ char * s)6463 yyl_tilde(pTHX_ char *s)
6464 {
6465 bool bof;
6466 if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6467 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6468 TOKEN(0);
6469 s += 2;
6470 Perl_ck_warner_d(aTHX_
6471 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6472 "Smartmatch is experimental");
6473 NCEop(OP_SMARTMATCH);
6474 }
6475 s++;
6476 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6477 s++;
6478 BCop(OP_SCOMPLEMENT);
6479 }
6480 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6481 }
6482
6483 static int
yyl_leftparen(pTHX_ char * s)6484 yyl_leftparen(pTHX_ char *s)
6485 {
6486 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6487 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6488 else
6489 PL_expect = XTERM;
6490 s = skipspace(s);
6491 PL_lex_allbrackets++;
6492 TOKEN('(');
6493 }
6494
6495 static int
yyl_rightparen(pTHX_ char * s)6496 yyl_rightparen(pTHX_ char *s)
6497 {
6498 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6499 TOKEN(0);
6500 s++;
6501 PL_lex_allbrackets--;
6502 s = skipspace(s);
6503 if (*s == '{')
6504 PREBLOCK(')');
6505 TERM(')');
6506 }
6507
6508 static int
yyl_leftpointy(pTHX_ char * s)6509 yyl_leftpointy(pTHX_ char *s)
6510 {
6511 char tmp;
6512
6513 if (PL_expect != XOPERATOR) {
6514 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6515 check_uni();
6516 if (s[1] == '<' && s[2] != '>')
6517 s = scan_heredoc(s);
6518 else
6519 s = scan_inputsymbol(s);
6520 PL_expect = XOPERATOR;
6521 TOKEN(sublex_start());
6522 }
6523
6524 s++;
6525
6526 tmp = *s++;
6527 if (tmp == '<') {
6528 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6529 s -= 2;
6530 TOKEN(0);
6531 }
6532 SHop(OP_LEFT_SHIFT);
6533 }
6534 if (tmp == '=') {
6535 tmp = *s++;
6536 if (tmp == '>') {
6537 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6538 s -= 3;
6539 TOKEN(0);
6540 }
6541 NCEop(OP_NCMP);
6542 }
6543 s--;
6544 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6545 s -= 2;
6546 TOKEN(0);
6547 }
6548 ChRop(OP_LE);
6549 }
6550
6551 s--;
6552 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6553 s--;
6554 TOKEN(0);
6555 }
6556
6557 ChRop(OP_LT);
6558 }
6559
6560 static int
yyl_rightpointy(pTHX_ char * s)6561 yyl_rightpointy(pTHX_ char *s)
6562 {
6563 const char tmp = *s++;
6564
6565 if (tmp == '>') {
6566 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6567 s -= 2;
6568 TOKEN(0);
6569 }
6570 SHop(OP_RIGHT_SHIFT);
6571 }
6572 else if (tmp == '=') {
6573 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6574 s -= 2;
6575 TOKEN(0);
6576 }
6577 ChRop(OP_GE);
6578 }
6579
6580 s--;
6581 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6582 s--;
6583 TOKEN(0);
6584 }
6585
6586 ChRop(OP_GT);
6587 }
6588
6589 static int
yyl_sglquote(pTHX_ char * s)6590 yyl_sglquote(pTHX_ char *s)
6591 {
6592 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6593 if (!s)
6594 missingterm(NULL, 0);
6595 COPLINE_SET_FROM_MULTI_END;
6596 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6597 if (PL_expect == XOPERATOR) {
6598 no_op("String",s);
6599 }
6600 pl_yylval.ival = OP_CONST;
6601 TERM(sublex_start());
6602 }
6603
6604 static int
yyl_dblquote(pTHX_ char * s)6605 yyl_dblquote(pTHX_ char *s)
6606 {
6607 char *d;
6608 STRLEN len;
6609 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6610 DEBUG_T( {
6611 if (s)
6612 printbuf("### Saw string before %s\n", s);
6613 else
6614 PerlIO_printf(Perl_debug_log,
6615 "### Saw unterminated string\n");
6616 } );
6617 if (PL_expect == XOPERATOR) {
6618 no_op("String",s);
6619 }
6620 if (!s)
6621 missingterm(NULL, 0);
6622 pl_yylval.ival = OP_CONST;
6623 /* FIXME. I think that this can be const if char *d is replaced by
6624 more localised variables. */
6625 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6626 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6627 pl_yylval.ival = OP_STRINGIFY;
6628 break;
6629 }
6630 }
6631 if (pl_yylval.ival == OP_CONST)
6632 COPLINE_SET_FROM_MULTI_END;
6633 TERM(sublex_start());
6634 }
6635
6636 static int
yyl_backtick(pTHX_ char * s)6637 yyl_backtick(pTHX_ char *s)
6638 {
6639 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6640 DEBUG_T( {
6641 if (s)
6642 printbuf("### Saw backtick string before %s\n", s);
6643 else
6644 PerlIO_printf(Perl_debug_log,
6645 "### Saw unterminated backtick string\n");
6646 } );
6647 if (PL_expect == XOPERATOR)
6648 no_op("Backticks",s);
6649 if (!s)
6650 missingterm(NULL, 0);
6651 pl_yylval.ival = OP_BACKTICK;
6652 TERM(sublex_start());
6653 }
6654
6655 static int
yyl_backslash(pTHX_ char * s)6656 yyl_backslash(pTHX_ char *s)
6657 {
6658 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6659 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6660 *s, *s);
6661 if (PL_expect == XOPERATOR)
6662 no_op("Backslash",s);
6663 OPERATOR(REFGEN);
6664 }
6665
6666 static void
yyl_data_handle(pTHX)6667 yyl_data_handle(pTHX)
6668 {
6669 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6670 ? PL_curstash
6671 : PL_defstash;
6672 GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6673
6674 if (!isGV(gv))
6675 gv_init(gv,stash,"DATA",4,0);
6676
6677 GvMULTI_on(gv);
6678 if (!GvIO(gv))
6679 GvIOp(gv) = newIO();
6680 IoIFP(GvIOp(gv)) = PL_rsfp;
6681
6682 /* Mark this internal pseudo-handle as clean */
6683 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6684 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6685 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6686 else
6687 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6688
6689 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6690 /* if the script was opened in binmode, we need to revert
6691 * it to text mode for compatibility; but only iff it has CRs
6692 * XXX this is a questionable hack at best. */
6693 if (PL_bufend-PL_bufptr > 2
6694 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6695 {
6696 Off_t loc = 0;
6697 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6698 loc = PerlIO_tell(PL_rsfp);
6699 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6700 }
6701 if (PerlLIO_setmode(RSFP_FILENO, O_TEXT) != -1) {
6702 if (loc > 0)
6703 PerlIO_seek(PL_rsfp, loc, 0);
6704 }
6705 }
6706 #endif
6707
6708 #ifdef PERLIO_LAYERS
6709 if (!IN_BYTES) {
6710 if (UTF)
6711 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6712 }
6713 #endif
6714
6715 PL_rsfp = NULL;
6716 }
6717
6718 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6719 __attribute__noreturn__;
6720
6721 PERL_STATIC_NO_RET void
yyl_croak_unrecognised(pTHX_ char * s)6722 yyl_croak_unrecognised(pTHX_ char *s)
6723 {
6724 SV *dsv = newSVpvs_flags("", SVs_TEMP);
6725 const char *c;
6726 char *d;
6727 STRLEN len;
6728
6729 if (UTF) {
6730 STRLEN skiplen = UTF8SKIP(s);
6731 STRLEN stravail = PL_bufend - s;
6732 c = sv_uni_display(dsv, newSVpvn_flags(s,
6733 skiplen > stravail ? stravail : skiplen,
6734 SVs_TEMP | SVf_UTF8),
6735 10, UNI_DISPLAY_ISPRINT);
6736 }
6737 else {
6738 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6739 }
6740
6741 if (s >= PL_linestart) {
6742 d = PL_linestart;
6743 }
6744 else {
6745 /* somehow (probably due to a parse failure), PL_linestart has advanced
6746 * pass PL_bufptr, get a reasonable beginning of line
6747 */
6748 d = s;
6749 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6750 --d;
6751 }
6752 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6753 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6754 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6755 }
6756
6757 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6758 UTF8fARG(UTF, (s - d), d),
6759 (int) len + 1);
6760 }
6761
6762 static int
yyl_require(pTHX_ char * s,I32 orig_keyword)6763 yyl_require(pTHX_ char *s, I32 orig_keyword)
6764 {
6765 s = skipspace(s);
6766 if (isDIGIT(*s)) {
6767 s = force_version(s, FALSE);
6768 }
6769 else if (*s != 'v' || !isDIGIT(s[1])
6770 || (s = force_version(s, TRUE), *s == 'v'))
6771 {
6772 *PL_tokenbuf = '\0';
6773 s = force_word(s,BAREWORD,TRUE,TRUE);
6774 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6775 PL_tokenbuf + sizeof(PL_tokenbuf),
6776 UTF))
6777 {
6778 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6779 GV_ADD | (UTF ? SVf_UTF8 : 0));
6780 }
6781 else if (*s == '<')
6782 yyerror("<> at require-statement should be quotes");
6783 }
6784
6785 if (orig_keyword == KEY_require)
6786 pl_yylval.ival = 1;
6787 else
6788 pl_yylval.ival = 0;
6789
6790 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6791 PL_bufptr = s;
6792 PL_last_uni = PL_oldbufptr;
6793 PL_last_lop_op = OP_REQUIRE;
6794 s = skipspace(s);
6795 return REPORT( (int)REQUIRE );
6796 }
6797
6798 static int
yyl_foreach(pTHX_ char * s)6799 yyl_foreach(pTHX_ char *s)
6800 {
6801 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6802 return REPORT(0);
6803 pl_yylval.ival = CopLINE(PL_curcop);
6804 s = skipspace(s);
6805 if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6806 char *p = s;
6807 SSize_t s_off = s - SvPVX(PL_linestr);
6808 STRLEN len;
6809
6810 if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") && isSPACE(p[2])) {
6811 p += 2;
6812 }
6813 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") && isSPACE(p[3])) {
6814 p += 3;
6815 }
6816
6817 p = skipspace(p);
6818 /* skip optional package name, as in "for my abc $x (..)" */
6819 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
6820 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6821 p = skipspace(p);
6822 }
6823 if (*p != '$' && *p != '\\')
6824 Perl_croak(aTHX_ "Missing $ on loop variable");
6825
6826 /* The buffer may have been reallocated, update s */
6827 s = SvPVX(PL_linestr) + s_off;
6828 }
6829 OPERATOR(FOR);
6830 }
6831
6832 static int
yyl_do(pTHX_ char * s,I32 orig_keyword)6833 yyl_do(pTHX_ char *s, I32 orig_keyword)
6834 {
6835 s = skipspace(s);
6836 if (*s == '{')
6837 PRETERMBLOCK(DO);
6838 if (*s != '\'') {
6839 char *d;
6840 STRLEN len;
6841 *PL_tokenbuf = '&';
6842 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6843 1, &len);
6844 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
6845 && !keyword(PL_tokenbuf + 1, len, 0)) {
6846 SSize_t off = s-SvPVX(PL_linestr);
6847 d = skipspace(d);
6848 s = SvPVX(PL_linestr)+off;
6849 if (*d == '(') {
6850 force_ident_maybe_lex('&');
6851 s = d;
6852 }
6853 }
6854 }
6855 if (orig_keyword == KEY_do)
6856 pl_yylval.ival = 1;
6857 else
6858 pl_yylval.ival = 0;
6859 OPERATOR(DO);
6860 }
6861
6862 static int
yyl_my(pTHX_ char * s,I32 my)6863 yyl_my(pTHX_ char *s, I32 my)
6864 {
6865 if (PL_in_my) {
6866 PL_bufptr = s;
6867 yyerror(Perl_form(aTHX_
6868 "Can't redeclare \"%s\" in \"%s\"",
6869 my == KEY_my ? "my" :
6870 my == KEY_state ? "state" : "our",
6871 PL_in_my == KEY_my ? "my" :
6872 PL_in_my == KEY_state ? "state" : "our"));
6873 }
6874 PL_in_my = (U16)my;
6875 s = skipspace(s);
6876 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6877 STRLEN len;
6878 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6879 if (memEQs(PL_tokenbuf, len, "sub"))
6880 return yyl_sub(aTHX_ s, my);
6881 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6882 if (!PL_in_my_stash) {
6883 char tmpbuf[1024];
6884 int i;
6885 PL_bufptr = s;
6886 i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6887 PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
6888 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
6889 }
6890 }
6891 else if (*s == '\\') {
6892 if (!FEATURE_MYREF_IS_ENABLED)
6893 Perl_croak(aTHX_ "The experimental declared_refs "
6894 "feature is not enabled");
6895 Perl_ck_warner_d(aTHX_
6896 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
6897 "Declaring references is experimental");
6898 }
6899 OPERATOR(MY);
6900 }
6901
6902 static int yyl_try(pTHX_ char*);
6903
6904 static bool
yyl_eol_needs_semicolon(pTHX_ char ** ps)6905 yyl_eol_needs_semicolon(pTHX_ char **ps)
6906 {
6907 char *s = *ps;
6908 if (PL_lex_state != LEX_NORMAL
6909 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
6910 {
6911 const bool in_comment = *s == '#';
6912 char *d;
6913 if (*s == '#' && s == PL_linestart && PL_in_eval
6914 && !PL_rsfp && !PL_parser->filtered) {
6915 /* handle eval qq[#line 1 "foo"\n ...] */
6916 CopLINE_dec(PL_curcop);
6917 incline(s, PL_bufend);
6918 }
6919 d = s;
6920 while (d < PL_bufend && *d != '\n')
6921 d++;
6922 if (d < PL_bufend)
6923 d++;
6924 s = d;
6925 if (in_comment && d == PL_bufend
6926 && PL_lex_state == LEX_INTERPNORMAL
6927 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6928 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
6929 else
6930 incline(s, PL_bufend);
6931 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
6932 PL_lex_state = LEX_FORMLINE;
6933 force_next(FORMRBRACK);
6934 *ps = s;
6935 return TRUE;
6936 }
6937 }
6938 else {
6939 while (s < PL_bufend && *s != '\n')
6940 s++;
6941 if (s < PL_bufend) {
6942 s++;
6943 if (s < PL_bufend)
6944 incline(s, PL_bufend);
6945 }
6946 }
6947 *ps = s;
6948 return FALSE;
6949 }
6950
6951 static int
yyl_fake_eof(pTHX_ U32 fake_eof,bool bof,char * s)6952 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
6953 {
6954 char *d;
6955
6956 goto start;
6957
6958 do {
6959 fake_eof = 0;
6960 bof = cBOOL(PL_rsfp);
6961 start:
6962
6963 PL_bufptr = PL_bufend;
6964 COPLINE_INC_WITH_HERELINES;
6965 if (!lex_next_chunk(fake_eof)) {
6966 CopLINE_dec(PL_curcop);
6967 s = PL_bufptr;
6968 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
6969 }
6970 CopLINE_dec(PL_curcop);
6971 s = PL_bufptr;
6972 /* If it looks like the start of a BOM or raw UTF-16,
6973 * check if it in fact is. */
6974 if (bof && PL_rsfp
6975 && ( *s == 0
6976 || *(U8*)s == BOM_UTF8_FIRST_BYTE
6977 || *(U8*)s >= 0xFE
6978 || s[1] == 0))
6979 {
6980 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
6981 bof = (offset == (Off_t)SvCUR(PL_linestr));
6982 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
6983 /* offset may include swallowed CR */
6984 if (!bof)
6985 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6986 #endif
6987 if (bof) {
6988 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6989 s = swallow_bom((U8*)s);
6990 }
6991 }
6992 if (PL_parser->in_pod) {
6993 /* Incest with pod. */
6994 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
6995 && !isALPHA(s[4]))
6996 {
6997 SvPVCLEAR(PL_linestr);
6998 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
6999 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7000 PL_last_lop = PL_last_uni = NULL;
7001 PL_parser->in_pod = 0;
7002 }
7003 }
7004 if (PL_rsfp || PL_parser->filtered)
7005 incline(s, PL_bufend);
7006 } while (PL_parser->in_pod);
7007
7008 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7009 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7010 PL_last_lop = PL_last_uni = NULL;
7011 if (CopLINE(PL_curcop) == 1) {
7012 while (s < PL_bufend && isSPACE(*s))
7013 s++;
7014 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7015 s++;
7016 d = NULL;
7017 if (!PL_in_eval) {
7018 if (*s == '#' && *(s+1) == '!')
7019 d = s + 2;
7020 #ifdef ALTERNATE_SHEBANG
7021 else {
7022 static char const as[] = ALTERNATE_SHEBANG;
7023 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7024 d = s + (sizeof(as) - 1);
7025 }
7026 #endif /* ALTERNATE_SHEBANG */
7027 }
7028 if (d) {
7029 char *ipath;
7030 char *ipathend;
7031
7032 while (isSPACE(*d))
7033 d++;
7034 ipath = d;
7035 while (*d && !isSPACE(*d))
7036 d++;
7037 ipathend = d;
7038
7039 #ifdef ARG_ZERO_IS_SCRIPT
7040 if (ipathend > ipath) {
7041 /*
7042 * HP-UX (at least) sets argv[0] to the script name,
7043 * which makes $^X incorrect. And Digital UNIX and Linux,
7044 * at least, set argv[0] to the basename of the Perl
7045 * interpreter. So, having found "#!", we'll set it right.
7046 */
7047 SV* copfilesv = CopFILESV(PL_curcop);
7048 if (copfilesv) {
7049 SV * const x =
7050 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7051 SVt_PV)); /* $^X */
7052 assert(SvPOK(x) || SvGMAGICAL(x));
7053 if (sv_eq(x, copfilesv)) {
7054 sv_setpvn(x, ipath, ipathend - ipath);
7055 SvSETMAGIC(x);
7056 }
7057 else {
7058 STRLEN blen;
7059 STRLEN llen;
7060 const char *bstart = SvPV_const(copfilesv, blen);
7061 const char * const lstart = SvPV_const(x, llen);
7062 if (llen < blen) {
7063 bstart += blen - llen;
7064 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
7065 sv_setpvn(x, ipath, ipathend - ipath);
7066 SvSETMAGIC(x);
7067 }
7068 }
7069 }
7070 }
7071 else {
7072 /* Anything to do if no copfilesv? */
7073 }
7074 TAINT_NOT; /* $^X is always tainted, but that's OK */
7075 }
7076 #endif /* ARG_ZERO_IS_SCRIPT */
7077
7078 /*
7079 * Look for options.
7080 */
7081 d = instr(s,"perl -");
7082 if (!d) {
7083 d = instr(s,"perl");
7084 #if defined(DOSISH)
7085 /* avoid getting into infinite loops when shebang
7086 * line contains "Perl" rather than "perl" */
7087 if (!d) {
7088 for (d = ipathend-4; d >= ipath; --d) {
7089 if (isALPHA_FOLD_EQ(*d, 'p')
7090 && !ibcmp(d, "perl", 4))
7091 {
7092 break;
7093 }
7094 }
7095 if (d < ipath)
7096 d = NULL;
7097 }
7098 #endif
7099 }
7100 #ifdef ALTERNATE_SHEBANG
7101 /*
7102 * If the ALTERNATE_SHEBANG on this system starts with a
7103 * character that can be part of a Perl expression, then if
7104 * we see it but not "perl", we're probably looking at the
7105 * start of Perl code, not a request to hand off to some
7106 * other interpreter. Similarly, if "perl" is there, but
7107 * not in the first 'word' of the line, we assume the line
7108 * contains the start of the Perl program.
7109 */
7110 if (d && *s != '#') {
7111 const char *c = ipath;
7112 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7113 c++;
7114 if (c < d)
7115 d = NULL; /* "perl" not in first word; ignore */
7116 else
7117 *s = '#'; /* Don't try to parse shebang line */
7118 }
7119 #endif /* ALTERNATE_SHEBANG */
7120 if (!d
7121 && *s == '#'
7122 && ipathend > ipath
7123 && !PL_minus_c
7124 && !instr(s,"indir")
7125 && instr(PL_origargv[0],"perl"))
7126 {
7127 dVAR;
7128 char **newargv;
7129
7130 *ipathend = '\0';
7131 s = ipathend + 1;
7132 while (s < PL_bufend && isSPACE(*s))
7133 s++;
7134 if (s < PL_bufend) {
7135 Newx(newargv,PL_origargc+3,char*);
7136 newargv[1] = s;
7137 while (s < PL_bufend && !isSPACE(*s))
7138 s++;
7139 *s = '\0';
7140 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7141 }
7142 else
7143 newargv = PL_origargv;
7144 newargv[0] = ipath;
7145 PERL_FPU_PRE_EXEC
7146 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7147 PERL_FPU_POST_EXEC
7148 Perl_croak(aTHX_ "Can't exec %s", ipath);
7149 }
7150 if (d) {
7151 while (*d && !isSPACE(*d))
7152 d++;
7153 while (SPACE_OR_TAB(*d))
7154 d++;
7155
7156 if (*d++ == '-') {
7157 const bool switches_done = PL_doswitches;
7158 const U32 oldpdb = PL_perldb;
7159 const bool oldn = PL_minus_n;
7160 const bool oldp = PL_minus_p;
7161 const char *d1 = d;
7162
7163 do {
7164 bool baduni = FALSE;
7165 if (*d1 == 'C') {
7166 const char *d2 = d1 + 1;
7167 if (parse_unicode_opts((const char **)&d2)
7168 != PL_unicode)
7169 baduni = TRUE;
7170 }
7171 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7172 const char * const m = d1;
7173 while (*d1 && !isSPACE(*d1))
7174 d1++;
7175 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7176 (int)(d1 - m), m);
7177 }
7178 d1 = moreswitches(d1);
7179 } while (d1);
7180 if (PL_doswitches && !switches_done) {
7181 int argc = PL_origargc;
7182 char **argv = PL_origargv;
7183 do {
7184 argc--,argv++;
7185 } while (argc && argv[0][0] == '-' && argv[0][1]);
7186 init_argv_symbols(argc,argv);
7187 }
7188 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7189 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7190 /* if we have already added "LINE: while (<>) {",
7191 we must not do it again */
7192 {
7193 SvPVCLEAR(PL_linestr);
7194 PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7195 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7196 PL_last_lop = PL_last_uni = NULL;
7197 PL_preambled = FALSE;
7198 if (PERLDB_LINE_OR_SAVESRC)
7199 (void)gv_fetchfile(PL_origfilename);
7200 return YYL_RETRY;
7201 }
7202 }
7203 }
7204 }
7205 }
7206
7207 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7208 PL_lex_state = LEX_FORMLINE;
7209 force_next(FORMRBRACK);
7210 TOKEN(';');
7211 }
7212
7213 PL_bufptr = s;
7214 return YYL_RETRY;
7215 }
7216
7217 static int
yyl_fatcomma(pTHX_ char * s,STRLEN len)7218 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7219 {
7220 CLINE;
7221 pl_yylval.opval
7222 = newSVOP(OP_CONST, 0,
7223 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7224 pl_yylval.opval->op_private = OPpCONST_BARE;
7225 TERM(BAREWORD);
7226 }
7227
7228 static int
yyl_safe_bareword(pTHX_ char * s,const char lastchar)7229 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7230 {
7231 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7232 && PL_parser->saw_infix_sigil)
7233 {
7234 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7235 "Operator or semicolon missing before %c%" UTF8f,
7236 lastchar,
7237 UTF8fARG(UTF, strlen(PL_tokenbuf),
7238 PL_tokenbuf));
7239 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7240 "Ambiguous use of %c resolved as operator %c",
7241 lastchar, lastchar);
7242 }
7243 TOKEN(BAREWORD);
7244 }
7245
7246 static int
yyl_constant_op(pTHX_ char * s,SV * sv,CV * cv,OP * rv2cv_op,PADOFFSET off)7247 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7248 {
7249 if (sv) {
7250 op_free(rv2cv_op);
7251 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7252 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7253 if (SvTYPE(sv) == SVt_PVAV)
7254 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7255 pl_yylval.opval);
7256 else {
7257 pl_yylval.opval->op_private = 0;
7258 pl_yylval.opval->op_folded = 1;
7259 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7260 }
7261 TOKEN(BAREWORD);
7262 }
7263
7264 op_free(pl_yylval.opval);
7265 pl_yylval.opval =
7266 off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7267 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7268 PL_last_lop = PL_oldbufptr;
7269 PL_last_lop_op = OP_ENTERSUB;
7270
7271 /* Is there a prototype? */
7272 if (SvPOK(cv)) {
7273 int k = yyl_subproto(aTHX_ s, cv);
7274 if (k != KEY_NULL)
7275 return k;
7276 }
7277
7278 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7279 PL_expect = XTERM;
7280 force_next(off ? PRIVATEREF : BAREWORD);
7281 if (!PL_lex_allbrackets
7282 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7283 {
7284 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7285 }
7286
7287 TOKEN(NOAMP);
7288 }
7289
7290 /* Honour "reserved word" warnings, and enforce strict subs */
7291 static void
yyl_strictwarn_bareword(pTHX_ const char lastchar)7292 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7293 {
7294 /* after "print" and similar functions (corresponding to
7295 * "F? L" in opcode.pl), whatever wasn't already parsed as
7296 * a filehandle should be subject to "strict subs".
7297 * Likewise for the optional indirect-object argument to system
7298 * or exec, which can't be a bareword */
7299 if ((PL_last_lop_op == OP_PRINT
7300 || PL_last_lop_op == OP_PRTF
7301 || PL_last_lop_op == OP_SAY
7302 || PL_last_lop_op == OP_SYSTEM
7303 || PL_last_lop_op == OP_EXEC)
7304 && (PL_hints & HINT_STRICT_SUBS))
7305 {
7306 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7307 }
7308
7309 if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7310 char *d = PL_tokenbuf;
7311 while (isLOWER(*d))
7312 d++;
7313 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7314 /* PL_warn_reserved is constant */
7315 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7316 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7317 PL_tokenbuf);
7318 GCC_DIAG_RESTORE_STMT;
7319 }
7320 }
7321 }
7322
7323 static int
yyl_just_a_word(pTHX_ char * s,STRLEN len,I32 orig_keyword,struct code c)7324 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7325 {
7326 int pkgname = 0;
7327 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7328 bool safebw;
7329 bool no_op_error = FALSE;
7330 /* Use this var to track whether intuit_method has been
7331 called. intuit_method returns 0 or > 255. */
7332 int key = 1;
7333
7334 if (PL_expect == XOPERATOR) {
7335 if (PL_bufptr == PL_linestart) {
7336 CopLINE_dec(PL_curcop);
7337 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7338 CopLINE_inc(PL_curcop);
7339 }
7340 else
7341 /* We want to call no_op with s pointing after the
7342 bareword, so defer it. But we want it to come
7343 before the Bad name croak. */
7344 no_op_error = TRUE;
7345 }
7346
7347 /* Get the rest if it looks like a package qualifier */
7348
7349 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7350 STRLEN morelen;
7351 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7352 TRUE, &morelen);
7353 if (no_op_error) {
7354 no_op("Bareword",s);
7355 no_op_error = FALSE;
7356 }
7357 if (!morelen)
7358 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7359 UTF8fARG(UTF, len, PL_tokenbuf),
7360 *s == '\'' ? "'" : "::");
7361 len += morelen;
7362 pkgname = 1;
7363 }
7364
7365 if (no_op_error)
7366 no_op("Bareword",s);
7367
7368 /* See if the name is "Foo::",
7369 in which case Foo is a bareword
7370 (and a package name). */
7371
7372 if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7373 if (ckWARN(WARN_BAREWORD)
7374 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7375 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7376 "Bareword \"%" UTF8f
7377 "\" refers to nonexistent package",
7378 UTF8fARG(UTF, len, PL_tokenbuf));
7379 len -= 2;
7380 PL_tokenbuf[len] = '\0';
7381 c.gv = NULL;
7382 c.gvp = 0;
7383 safebw = TRUE;
7384 }
7385 else {
7386 safebw = FALSE;
7387 }
7388
7389 /* if we saw a global override before, get the right name */
7390
7391 if (!c.sv)
7392 c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7393 if (c.gvp) {
7394 SV *sv = newSVpvs("CORE::GLOBAL::");
7395 sv_catsv(sv, c.sv);
7396 SvREFCNT_dec(c.sv);
7397 c.sv = sv;
7398 }
7399
7400 /* Presume this is going to be a bareword of some sort. */
7401 CLINE;
7402 pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7403 pl_yylval.opval->op_private = OPpCONST_BARE;
7404
7405 /* And if "Foo::", then that's what it certainly is. */
7406 if (safebw)
7407 return yyl_safe_bareword(aTHX_ s, lastchar);
7408
7409 if (!c.off) {
7410 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7411 const_op->op_private = OPpCONST_BARE;
7412 c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7413 c.cv = c.lex
7414 ? isGV(c.gv)
7415 ? GvCV(c.gv)
7416 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7417 ? (CV *)SvRV(c.gv)
7418 : ((CV *)c.gv)
7419 : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7420 }
7421
7422 /* See if it's the indirect object for a list operator. */
7423
7424 if (PL_oldoldbufptr
7425 && PL_oldoldbufptr < PL_bufptr
7426 && (PL_oldoldbufptr == PL_last_lop
7427 || PL_oldoldbufptr == PL_last_uni)
7428 && /* NO SKIPSPACE BEFORE HERE! */
7429 (PL_expect == XREF
7430 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7431 == OA_FILEREF))
7432 {
7433 bool immediate_paren = *s == '(';
7434 SSize_t s_off;
7435
7436 /* (Now we can afford to cross potential line boundary.) */
7437 s = skipspace(s);
7438
7439 /* intuit_method() can indirectly call lex_next_chunk(),
7440 * invalidating s
7441 */
7442 s_off = s - SvPVX(PL_linestr);
7443 /* Two barewords in a row may indicate method call. */
7444 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7445 || *s == '$')
7446 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7447 {
7448 /* the code at method: doesn't use s */
7449 goto method;
7450 }
7451 s = SvPVX(PL_linestr) + s_off;
7452
7453 /* If not a declared subroutine, it's an indirect object. */
7454 /* (But it's an indir obj regardless for sort.) */
7455 /* Also, if "_" follows a filetest operator, it's a bareword */
7456
7457 if (
7458 ( !immediate_paren && (PL_last_lop_op == OP_SORT
7459 || (!c.cv
7460 && (PL_last_lop_op != OP_MAPSTART
7461 && PL_last_lop_op != OP_GREPSTART))))
7462 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7463 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7464 == OA_FILESTATOP))
7465 )
7466 {
7467 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7468 yyl_strictwarn_bareword(aTHX_ lastchar);
7469 op_free(c.rv2cv_op);
7470 return yyl_safe_bareword(aTHX_ s, lastchar);
7471 }
7472 }
7473
7474 PL_expect = XOPERATOR;
7475 s = skipspace(s);
7476
7477 /* Is this a word before a => operator? */
7478 if (*s == '=' && s[1] == '>' && !pkgname) {
7479 op_free(c.rv2cv_op);
7480 CLINE;
7481 if (c.gvp || (c.lex && !c.off)) {
7482 assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7483 /* This is our own scalar, created a few lines
7484 above, so this is safe. */
7485 SvREADONLY_off(c.sv);
7486 sv_setpv(c.sv, PL_tokenbuf);
7487 if (UTF && !IN_BYTES
7488 && is_utf8_string((U8*)PL_tokenbuf, len))
7489 SvUTF8_on(c.sv);
7490 SvREADONLY_on(c.sv);
7491 }
7492 TERM(BAREWORD);
7493 }
7494
7495 /* If followed by a paren, it's certainly a subroutine. */
7496 if (*s == '(') {
7497 CLINE;
7498 if (c.cv) {
7499 char *d = s + 1;
7500 while (SPACE_OR_TAB(*d))
7501 d++;
7502 if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7503 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7504 }
7505 NEXTVAL_NEXTTOKE.opval =
7506 c.off ? c.rv2cv_op : pl_yylval.opval;
7507 if (c.off)
7508 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7509 else op_free(c.rv2cv_op), force_next(BAREWORD);
7510 pl_yylval.ival = 0;
7511 TOKEN('&');
7512 }
7513
7514 /* If followed by var or block, call it a method (unless sub) */
7515
7516 if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7517 op_free(c.rv2cv_op);
7518 PL_last_lop = PL_oldbufptr;
7519 PL_last_lop_op = OP_METHOD;
7520 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7521 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7522 PL_expect = XBLOCKTERM;
7523 PL_bufptr = s;
7524 return REPORT(METHOD);
7525 }
7526
7527 /* If followed by a bareword, see if it looks like indir obj. */
7528
7529 if ( key == 1
7530 && !orig_keyword
7531 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7532 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7533 {
7534 method:
7535 if (c.lex && !c.off) {
7536 assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7537 SvREADONLY_off(c.sv);
7538 sv_setpvn(c.sv, PL_tokenbuf, len);
7539 if (UTF && !IN_BYTES
7540 && is_utf8_string((U8*)PL_tokenbuf, len))
7541 SvUTF8_on(c.sv);
7542 else SvUTF8_off(c.sv);
7543 }
7544 op_free(c.rv2cv_op);
7545 if (key == METHOD && !PL_lex_allbrackets
7546 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7547 {
7548 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7549 }
7550 return REPORT(key);
7551 }
7552
7553 /* Not a method, so call it a subroutine (if defined) */
7554
7555 if (c.cv) {
7556 /* Check for a constant sub */
7557 c.sv = cv_const_sv_or_av(c.cv);
7558 return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7559 }
7560
7561 /* Call it a bare word */
7562
7563 if (PL_hints & HINT_STRICT_SUBS)
7564 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7565 else
7566 yyl_strictwarn_bareword(aTHX_ lastchar);
7567
7568 op_free(c.rv2cv_op);
7569
7570 return yyl_safe_bareword(aTHX_ s, lastchar);
7571 }
7572
7573 static int
yyl_word_or_keyword(pTHX_ char * s,STRLEN len,I32 key,I32 orig_keyword,struct code c)7574 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7575 {
7576 switch (key) {
7577 default: /* not a keyword */
7578 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7579
7580 case KEY___FILE__:
7581 FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7582
7583 case KEY___LINE__:
7584 FUN0OP(
7585 newSVOP(OP_CONST, 0,
7586 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7587 );
7588
7589 case KEY___PACKAGE__:
7590 FUN0OP(
7591 newSVOP(OP_CONST, 0, (PL_curstash
7592 ? newSVhek(HvNAME_HEK(PL_curstash))
7593 : &PL_sv_undef))
7594 );
7595
7596 case KEY___DATA__:
7597 case KEY___END__:
7598 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7599 yyl_data_handle(aTHX);
7600 return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7601
7602 case KEY___SUB__:
7603 FUN0OP(CvCLONE(PL_compcv)
7604 ? newOP(OP_RUNCV, 0)
7605 : newPVOP(OP_RUNCV,0,NULL));
7606
7607 case KEY_AUTOLOAD:
7608 case KEY_DESTROY:
7609 case KEY_BEGIN:
7610 case KEY_UNITCHECK:
7611 case KEY_CHECK:
7612 case KEY_INIT:
7613 case KEY_END:
7614 if (PL_expect == XSTATE)
7615 return yyl_sub(aTHX_ PL_bufptr, key);
7616 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7617
7618 case KEY_abs:
7619 UNI(OP_ABS);
7620
7621 case KEY_alarm:
7622 UNI(OP_ALARM);
7623
7624 case KEY_accept:
7625 LOP(OP_ACCEPT,XTERM);
7626
7627 case KEY_and:
7628 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7629 return REPORT(0);
7630 OPERATOR(ANDOP);
7631
7632 case KEY_atan2:
7633 LOP(OP_ATAN2,XTERM);
7634
7635 case KEY_bind:
7636 LOP(OP_BIND,XTERM);
7637
7638 case KEY_binmode:
7639 LOP(OP_BINMODE,XTERM);
7640
7641 case KEY_bless:
7642 LOP(OP_BLESS,XTERM);
7643
7644 case KEY_break:
7645 FUN0(OP_BREAK);
7646
7647 case KEY_chop:
7648 UNI(OP_CHOP);
7649
7650 case KEY_continue:
7651 /* We have to disambiguate the two senses of
7652 "continue". If the next token is a '{' then
7653 treat it as the start of a continue block;
7654 otherwise treat it as a control operator.
7655 */
7656 s = skipspace(s);
7657 if (*s == '{')
7658 PREBLOCK(CONTINUE);
7659 else
7660 FUN0(OP_CONTINUE);
7661
7662 case KEY_chdir:
7663 /* may use HOME */
7664 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7665 UNI(OP_CHDIR);
7666
7667 case KEY_close:
7668 UNI(OP_CLOSE);
7669
7670 case KEY_closedir:
7671 UNI(OP_CLOSEDIR);
7672
7673 case KEY_cmp:
7674 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7675 return REPORT(0);
7676 NCEop(OP_SCMP);
7677
7678 case KEY_caller:
7679 UNI(OP_CALLER);
7680
7681 case KEY_crypt:
7682 #ifdef FCRYPT
7683 if (!PL_cryptseen) {
7684 PL_cryptseen = TRUE;
7685 init_des();
7686 }
7687 #endif
7688 LOP(OP_CRYPT,XTERM);
7689
7690 case KEY_chmod:
7691 LOP(OP_CHMOD,XTERM);
7692
7693 case KEY_chown:
7694 LOP(OP_CHOWN,XTERM);
7695
7696 case KEY_connect:
7697 LOP(OP_CONNECT,XTERM);
7698
7699 case KEY_chr:
7700 UNI(OP_CHR);
7701
7702 case KEY_cos:
7703 UNI(OP_COS);
7704
7705 case KEY_chroot:
7706 UNI(OP_CHROOT);
7707
7708 case KEY_default:
7709 PREBLOCK(DEFAULT);
7710
7711 case KEY_do:
7712 return yyl_do(aTHX_ s, orig_keyword);
7713
7714 case KEY_die:
7715 PL_hints |= HINT_BLOCK_SCOPE;
7716 LOP(OP_DIE,XTERM);
7717
7718 case KEY_defined:
7719 UNI(OP_DEFINED);
7720
7721 case KEY_delete:
7722 UNI(OP_DELETE);
7723
7724 case KEY_dbmopen:
7725 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7726 STR_WITH_LEN("NDBM_File::"),
7727 STR_WITH_LEN("DB_File::"),
7728 STR_WITH_LEN("GDBM_File::"),
7729 STR_WITH_LEN("SDBM_File::"),
7730 STR_WITH_LEN("ODBM_File::"),
7731 NULL);
7732 LOP(OP_DBMOPEN,XTERM);
7733
7734 case KEY_dbmclose:
7735 UNI(OP_DBMCLOSE);
7736
7737 case KEY_dump:
7738 LOOPX(OP_DUMP);
7739
7740 case KEY_else:
7741 PREBLOCK(ELSE);
7742
7743 case KEY_elsif:
7744 pl_yylval.ival = CopLINE(PL_curcop);
7745 OPERATOR(ELSIF);
7746
7747 case KEY_eq:
7748 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7749 return REPORT(0);
7750 ChEop(OP_SEQ);
7751
7752 case KEY_exists:
7753 UNI(OP_EXISTS);
7754
7755 case KEY_exit:
7756 UNI(OP_EXIT);
7757
7758 case KEY_eval:
7759 s = skipspace(s);
7760 if (*s == '{') { /* block eval */
7761 PL_expect = XTERMBLOCK;
7762 UNIBRACK(OP_ENTERTRY);
7763 }
7764 else { /* string eval */
7765 PL_expect = XTERM;
7766 UNIBRACK(OP_ENTEREVAL);
7767 }
7768
7769 case KEY_evalbytes:
7770 PL_expect = XTERM;
7771 UNIBRACK(-OP_ENTEREVAL);
7772
7773 case KEY_eof:
7774 UNI(OP_EOF);
7775
7776 case KEY_exp:
7777 UNI(OP_EXP);
7778
7779 case KEY_each:
7780 UNI(OP_EACH);
7781
7782 case KEY_exec:
7783 LOP(OP_EXEC,XREF);
7784
7785 case KEY_endhostent:
7786 FUN0(OP_EHOSTENT);
7787
7788 case KEY_endnetent:
7789 FUN0(OP_ENETENT);
7790
7791 case KEY_endservent:
7792 FUN0(OP_ESERVENT);
7793
7794 case KEY_endprotoent:
7795 FUN0(OP_EPROTOENT);
7796
7797 case KEY_endpwent:
7798 FUN0(OP_EPWENT);
7799
7800 case KEY_endgrent:
7801 FUN0(OP_EGRENT);
7802
7803 case KEY_for:
7804 case KEY_foreach:
7805 return yyl_foreach(aTHX_ s);
7806
7807 case KEY_formline:
7808 LOP(OP_FORMLINE,XTERM);
7809
7810 case KEY_fork:
7811 FUN0(OP_FORK);
7812
7813 case KEY_fc:
7814 UNI(OP_FC);
7815
7816 case KEY_fcntl:
7817 LOP(OP_FCNTL,XTERM);
7818
7819 case KEY_fileno:
7820 UNI(OP_FILENO);
7821
7822 case KEY_flock:
7823 LOP(OP_FLOCK,XTERM);
7824
7825 case KEY_gt:
7826 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7827 return REPORT(0);
7828 ChRop(OP_SGT);
7829
7830 case KEY_ge:
7831 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7832 return REPORT(0);
7833 ChRop(OP_SGE);
7834
7835 case KEY_grep:
7836 LOP(OP_GREPSTART, XREF);
7837
7838 case KEY_goto:
7839 LOOPX(OP_GOTO);
7840
7841 case KEY_gmtime:
7842 UNI(OP_GMTIME);
7843
7844 case KEY_getc:
7845 UNIDOR(OP_GETC);
7846
7847 case KEY_getppid:
7848 FUN0(OP_GETPPID);
7849
7850 case KEY_getpgrp:
7851 UNI(OP_GETPGRP);
7852
7853 case KEY_getpriority:
7854 LOP(OP_GETPRIORITY,XTERM);
7855
7856 case KEY_getprotobyname:
7857 UNI(OP_GPBYNAME);
7858
7859 case KEY_getprotobynumber:
7860 LOP(OP_GPBYNUMBER,XTERM);
7861
7862 case KEY_getprotoent:
7863 FUN0(OP_GPROTOENT);
7864
7865 case KEY_getpwent:
7866 FUN0(OP_GPWENT);
7867
7868 case KEY_getpwnam:
7869 UNI(OP_GPWNAM);
7870
7871 case KEY_getpwuid:
7872 UNI(OP_GPWUID);
7873
7874 case KEY_getpeername:
7875 UNI(OP_GETPEERNAME);
7876
7877 case KEY_gethostbyname:
7878 UNI(OP_GHBYNAME);
7879
7880 case KEY_gethostbyaddr:
7881 LOP(OP_GHBYADDR,XTERM);
7882
7883 case KEY_gethostent:
7884 FUN0(OP_GHOSTENT);
7885
7886 case KEY_getnetbyname:
7887 UNI(OP_GNBYNAME);
7888
7889 case KEY_getnetbyaddr:
7890 LOP(OP_GNBYADDR,XTERM);
7891
7892 case KEY_getnetent:
7893 FUN0(OP_GNETENT);
7894
7895 case KEY_getservbyname:
7896 LOP(OP_GSBYNAME,XTERM);
7897
7898 case KEY_getservbyport:
7899 LOP(OP_GSBYPORT,XTERM);
7900
7901 case KEY_getservent:
7902 FUN0(OP_GSERVENT);
7903
7904 case KEY_getsockname:
7905 UNI(OP_GETSOCKNAME);
7906
7907 case KEY_getsockopt:
7908 LOP(OP_GSOCKOPT,XTERM);
7909
7910 case KEY_getgrent:
7911 FUN0(OP_GGRENT);
7912
7913 case KEY_getgrnam:
7914 UNI(OP_GGRNAM);
7915
7916 case KEY_getgrgid:
7917 UNI(OP_GGRGID);
7918
7919 case KEY_getlogin:
7920 FUN0(OP_GETLOGIN);
7921
7922 case KEY_given:
7923 pl_yylval.ival = CopLINE(PL_curcop);
7924 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7925 "given is experimental");
7926 OPERATOR(GIVEN);
7927
7928 case KEY_glob:
7929 LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
7930
7931 case KEY_hex:
7932 UNI(OP_HEX);
7933
7934 case KEY_if:
7935 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7936 return REPORT(0);
7937 pl_yylval.ival = CopLINE(PL_curcop);
7938 OPERATOR(IF);
7939
7940 case KEY_index:
7941 LOP(OP_INDEX,XTERM);
7942
7943 case KEY_int:
7944 UNI(OP_INT);
7945
7946 case KEY_ioctl:
7947 LOP(OP_IOCTL,XTERM);
7948
7949 case KEY_isa:
7950 Perl_ck_warner_d(aTHX_
7951 packWARN(WARN_EXPERIMENTAL__ISA), "isa is experimental");
7952 NCRop(OP_ISA);
7953
7954 case KEY_join:
7955 LOP(OP_JOIN,XTERM);
7956
7957 case KEY_keys:
7958 UNI(OP_KEYS);
7959
7960 case KEY_kill:
7961 LOP(OP_KILL,XTERM);
7962
7963 case KEY_last:
7964 LOOPX(OP_LAST);
7965
7966 case KEY_lc:
7967 UNI(OP_LC);
7968
7969 case KEY_lcfirst:
7970 UNI(OP_LCFIRST);
7971
7972 case KEY_local:
7973 OPERATOR(LOCAL);
7974
7975 case KEY_length:
7976 UNI(OP_LENGTH);
7977
7978 case KEY_lt:
7979 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7980 return REPORT(0);
7981 ChRop(OP_SLT);
7982
7983 case KEY_le:
7984 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7985 return REPORT(0);
7986 ChRop(OP_SLE);
7987
7988 case KEY_localtime:
7989 UNI(OP_LOCALTIME);
7990
7991 case KEY_log:
7992 UNI(OP_LOG);
7993
7994 case KEY_link:
7995 LOP(OP_LINK,XTERM);
7996
7997 case KEY_listen:
7998 LOP(OP_LISTEN,XTERM);
7999
8000 case KEY_lock:
8001 UNI(OP_LOCK);
8002
8003 case KEY_lstat:
8004 UNI(OP_LSTAT);
8005
8006 case KEY_m:
8007 s = scan_pat(s,OP_MATCH);
8008 TERM(sublex_start());
8009
8010 case KEY_map:
8011 LOP(OP_MAPSTART, XREF);
8012
8013 case KEY_mkdir:
8014 LOP(OP_MKDIR,XTERM);
8015
8016 case KEY_msgctl:
8017 LOP(OP_MSGCTL,XTERM);
8018
8019 case KEY_msgget:
8020 LOP(OP_MSGGET,XTERM);
8021
8022 case KEY_msgrcv:
8023 LOP(OP_MSGRCV,XTERM);
8024
8025 case KEY_msgsnd:
8026 LOP(OP_MSGSND,XTERM);
8027
8028 case KEY_our:
8029 case KEY_my:
8030 case KEY_state:
8031 return yyl_my(aTHX_ s, key);
8032
8033 case KEY_next:
8034 LOOPX(OP_NEXT);
8035
8036 case KEY_ne:
8037 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8038 return REPORT(0);
8039 ChEop(OP_SNE);
8040
8041 case KEY_no:
8042 s = tokenize_use(0, s);
8043 TOKEN(USE);
8044
8045 case KEY_not:
8046 if (*s == '(' || (s = skipspace(s), *s == '('))
8047 FUN1(OP_NOT);
8048 else {
8049 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8050 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8051 OPERATOR(NOTOP);
8052 }
8053
8054 case KEY_open:
8055 s = skipspace(s);
8056 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8057 const char *t;
8058 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8059 for (t=d; isSPACE(*t);)
8060 t++;
8061 if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8062 /* [perl #16184] */
8063 && !(t[0] == '=' && t[1] == '>')
8064 && !(t[0] == ':' && t[1] == ':')
8065 && !keyword(s, d-s, 0)
8066 ) {
8067 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8068 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8069 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8070 }
8071 }
8072 LOP(OP_OPEN,XTERM);
8073
8074 case KEY_or:
8075 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8076 return REPORT(0);
8077 pl_yylval.ival = OP_OR;
8078 OPERATOR(OROP);
8079
8080 case KEY_ord:
8081 UNI(OP_ORD);
8082
8083 case KEY_oct:
8084 UNI(OP_OCT);
8085
8086 case KEY_opendir:
8087 LOP(OP_OPEN_DIR,XTERM);
8088
8089 case KEY_print:
8090 checkcomma(s,PL_tokenbuf,"filehandle");
8091 LOP(OP_PRINT,XREF);
8092
8093 case KEY_printf:
8094 checkcomma(s,PL_tokenbuf,"filehandle");
8095 LOP(OP_PRTF,XREF);
8096
8097 case KEY_prototype:
8098 UNI(OP_PROTOTYPE);
8099
8100 case KEY_push:
8101 LOP(OP_PUSH,XTERM);
8102
8103 case KEY_pop:
8104 UNIDOR(OP_POP);
8105
8106 case KEY_pos:
8107 UNIDOR(OP_POS);
8108
8109 case KEY_pack:
8110 LOP(OP_PACK,XTERM);
8111
8112 case KEY_package:
8113 s = force_word(s,BAREWORD,FALSE,TRUE);
8114 s = skipspace(s);
8115 s = force_strict_version(s);
8116 PREBLOCK(PACKAGE);
8117
8118 case KEY_pipe:
8119 LOP(OP_PIPE_OP,XTERM);
8120
8121 case KEY_q:
8122 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8123 if (!s)
8124 missingterm(NULL, 0);
8125 COPLINE_SET_FROM_MULTI_END;
8126 pl_yylval.ival = OP_CONST;
8127 TERM(sublex_start());
8128
8129 case KEY_quotemeta:
8130 UNI(OP_QUOTEMETA);
8131
8132 case KEY_qw:
8133 return yyl_qw(aTHX_ s, len);
8134
8135 case KEY_qq:
8136 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8137 if (!s)
8138 missingterm(NULL, 0);
8139 pl_yylval.ival = OP_STRINGIFY;
8140 if (SvIVX(PL_lex_stuff) == '\'')
8141 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8142 TERM(sublex_start());
8143
8144 case KEY_qr:
8145 s = scan_pat(s,OP_QR);
8146 TERM(sublex_start());
8147
8148 case KEY_qx:
8149 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8150 if (!s)
8151 missingterm(NULL, 0);
8152 pl_yylval.ival = OP_BACKTICK;
8153 TERM(sublex_start());
8154
8155 case KEY_return:
8156 OLDLOP(OP_RETURN);
8157
8158 case KEY_require:
8159 return yyl_require(aTHX_ s, orig_keyword);
8160
8161 case KEY_reset:
8162 UNI(OP_RESET);
8163
8164 case KEY_redo:
8165 LOOPX(OP_REDO);
8166
8167 case KEY_rename:
8168 LOP(OP_RENAME,XTERM);
8169
8170 case KEY_rand:
8171 UNI(OP_RAND);
8172
8173 case KEY_rmdir:
8174 UNI(OP_RMDIR);
8175
8176 case KEY_rindex:
8177 LOP(OP_RINDEX,XTERM);
8178
8179 case KEY_read:
8180 LOP(OP_READ,XTERM);
8181
8182 case KEY_readdir:
8183 UNI(OP_READDIR);
8184
8185 case KEY_readline:
8186 UNIDOR(OP_READLINE);
8187
8188 case KEY_readpipe:
8189 UNIDOR(OP_BACKTICK);
8190
8191 case KEY_rewinddir:
8192 UNI(OP_REWINDDIR);
8193
8194 case KEY_recv:
8195 LOP(OP_RECV,XTERM);
8196
8197 case KEY_reverse:
8198 LOP(OP_REVERSE,XTERM);
8199
8200 case KEY_readlink:
8201 UNIDOR(OP_READLINK);
8202
8203 case KEY_ref:
8204 UNI(OP_REF);
8205
8206 case KEY_s:
8207 s = scan_subst(s);
8208 if (pl_yylval.opval)
8209 TERM(sublex_start());
8210 else
8211 TOKEN(1); /* force error */
8212
8213 case KEY_say:
8214 checkcomma(s,PL_tokenbuf,"filehandle");
8215 LOP(OP_SAY,XREF);
8216
8217 case KEY_chomp:
8218 UNI(OP_CHOMP);
8219
8220 case KEY_scalar:
8221 UNI(OP_SCALAR);
8222
8223 case KEY_select:
8224 LOP(OP_SELECT,XTERM);
8225
8226 case KEY_seek:
8227 LOP(OP_SEEK,XTERM);
8228
8229 case KEY_semctl:
8230 LOP(OP_SEMCTL,XTERM);
8231
8232 case KEY_semget:
8233 LOP(OP_SEMGET,XTERM);
8234
8235 case KEY_semop:
8236 LOP(OP_SEMOP,XTERM);
8237
8238 case KEY_send:
8239 LOP(OP_SEND,XTERM);
8240
8241 case KEY_setpgrp:
8242 LOP(OP_SETPGRP,XTERM);
8243
8244 case KEY_setpriority:
8245 LOP(OP_SETPRIORITY,XTERM);
8246
8247 case KEY_sethostent:
8248 UNI(OP_SHOSTENT);
8249
8250 case KEY_setnetent:
8251 UNI(OP_SNETENT);
8252
8253 case KEY_setservent:
8254 UNI(OP_SSERVENT);
8255
8256 case KEY_setprotoent:
8257 UNI(OP_SPROTOENT);
8258
8259 case KEY_setpwent:
8260 FUN0(OP_SPWENT);
8261
8262 case KEY_setgrent:
8263 FUN0(OP_SGRENT);
8264
8265 case KEY_seekdir:
8266 LOP(OP_SEEKDIR,XTERM);
8267
8268 case KEY_setsockopt:
8269 LOP(OP_SSOCKOPT,XTERM);
8270
8271 case KEY_shift:
8272 UNIDOR(OP_SHIFT);
8273
8274 case KEY_shmctl:
8275 LOP(OP_SHMCTL,XTERM);
8276
8277 case KEY_shmget:
8278 LOP(OP_SHMGET,XTERM);
8279
8280 case KEY_shmread:
8281 LOP(OP_SHMREAD,XTERM);
8282
8283 case KEY_shmwrite:
8284 LOP(OP_SHMWRITE,XTERM);
8285
8286 case KEY_shutdown:
8287 LOP(OP_SHUTDOWN,XTERM);
8288
8289 case KEY_sin:
8290 UNI(OP_SIN);
8291
8292 case KEY_sleep:
8293 UNI(OP_SLEEP);
8294
8295 case KEY_socket:
8296 LOP(OP_SOCKET,XTERM);
8297
8298 case KEY_socketpair:
8299 LOP(OP_SOCKPAIR,XTERM);
8300
8301 case KEY_sort:
8302 checkcomma(s,PL_tokenbuf,"subroutine name");
8303 s = skipspace(s);
8304 PL_expect = XTERM;
8305 s = force_word(s,BAREWORD,TRUE,TRUE);
8306 LOP(OP_SORT,XREF);
8307
8308 case KEY_split:
8309 LOP(OP_SPLIT,XTERM);
8310
8311 case KEY_sprintf:
8312 LOP(OP_SPRINTF,XTERM);
8313
8314 case KEY_splice:
8315 LOP(OP_SPLICE,XTERM);
8316
8317 case KEY_sqrt:
8318 UNI(OP_SQRT);
8319
8320 case KEY_srand:
8321 UNI(OP_SRAND);
8322
8323 case KEY_stat:
8324 UNI(OP_STAT);
8325
8326 case KEY_study:
8327 UNI(OP_STUDY);
8328
8329 case KEY_substr:
8330 LOP(OP_SUBSTR,XTERM);
8331
8332 case KEY_format:
8333 case KEY_sub:
8334 return yyl_sub(aTHX_ s, key);
8335
8336 case KEY_system:
8337 LOP(OP_SYSTEM,XREF);
8338
8339 case KEY_symlink:
8340 LOP(OP_SYMLINK,XTERM);
8341
8342 case KEY_syscall:
8343 LOP(OP_SYSCALL,XTERM);
8344
8345 case KEY_sysopen:
8346 LOP(OP_SYSOPEN,XTERM);
8347
8348 case KEY_sysseek:
8349 LOP(OP_SYSSEEK,XTERM);
8350
8351 case KEY_sysread:
8352 LOP(OP_SYSREAD,XTERM);
8353
8354 case KEY_syswrite:
8355 LOP(OP_SYSWRITE,XTERM);
8356
8357 case KEY_tr:
8358 case KEY_y:
8359 s = scan_trans(s);
8360 TERM(sublex_start());
8361
8362 case KEY_tell:
8363 UNI(OP_TELL);
8364
8365 case KEY_telldir:
8366 UNI(OP_TELLDIR);
8367
8368 case KEY_tie:
8369 LOP(OP_TIE,XTERM);
8370
8371 case KEY_tied:
8372 UNI(OP_TIED);
8373
8374 case KEY_time:
8375 FUN0(OP_TIME);
8376
8377 case KEY_times:
8378 FUN0(OP_TMS);
8379
8380 case KEY_truncate:
8381 LOP(OP_TRUNCATE,XTERM);
8382
8383 case KEY_uc:
8384 UNI(OP_UC);
8385
8386 case KEY_ucfirst:
8387 UNI(OP_UCFIRST);
8388
8389 case KEY_untie:
8390 UNI(OP_UNTIE);
8391
8392 case KEY_until:
8393 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8394 return REPORT(0);
8395 pl_yylval.ival = CopLINE(PL_curcop);
8396 OPERATOR(UNTIL);
8397
8398 case KEY_unless:
8399 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8400 return REPORT(0);
8401 pl_yylval.ival = CopLINE(PL_curcop);
8402 OPERATOR(UNLESS);
8403
8404 case KEY_unlink:
8405 LOP(OP_UNLINK,XTERM);
8406
8407 case KEY_undef:
8408 UNIDOR(OP_UNDEF);
8409
8410 case KEY_unpack:
8411 LOP(OP_UNPACK,XTERM);
8412
8413 case KEY_utime:
8414 LOP(OP_UTIME,XTERM);
8415
8416 case KEY_umask:
8417 UNIDOR(OP_UMASK);
8418
8419 case KEY_unshift:
8420 LOP(OP_UNSHIFT,XTERM);
8421
8422 case KEY_use:
8423 s = tokenize_use(1, s);
8424 TOKEN(USE);
8425
8426 case KEY_values:
8427 UNI(OP_VALUES);
8428
8429 case KEY_vec:
8430 LOP(OP_VEC,XTERM);
8431
8432 case KEY_when:
8433 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8434 return REPORT(0);
8435 pl_yylval.ival = CopLINE(PL_curcop);
8436 Perl_ck_warner_d(aTHX_
8437 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8438 "when is experimental");
8439 OPERATOR(WHEN);
8440
8441 case KEY_while:
8442 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8443 return REPORT(0);
8444 pl_yylval.ival = CopLINE(PL_curcop);
8445 OPERATOR(WHILE);
8446
8447 case KEY_warn:
8448 PL_hints |= HINT_BLOCK_SCOPE;
8449 LOP(OP_WARN,XTERM);
8450
8451 case KEY_wait:
8452 FUN0(OP_WAIT);
8453
8454 case KEY_waitpid:
8455 LOP(OP_WAITPID,XTERM);
8456
8457 case KEY_wantarray:
8458 FUN0(OP_WANTARRAY);
8459
8460 case KEY_write:
8461 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8462 * we use the same number on EBCDIC */
8463 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8464 UNI(OP_ENTERWRITE);
8465
8466 case KEY_x:
8467 if (PL_expect == XOPERATOR) {
8468 if (*s == '=' && !PL_lex_allbrackets
8469 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8470 {
8471 return REPORT(0);
8472 }
8473 Mop(OP_REPEAT);
8474 }
8475 check_uni();
8476 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8477
8478 case KEY_xor:
8479 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8480 return REPORT(0);
8481 pl_yylval.ival = OP_XOR;
8482 OPERATOR(OROP);
8483 }
8484 }
8485
8486 static int
yyl_key_core(pTHX_ char * s,STRLEN len,struct code c)8487 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8488 {
8489 I32 key = 0;
8490 I32 orig_keyword = 0;
8491 STRLEN olen = len;
8492 char *d = s;
8493 s += 2;
8494 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8495 if ((*s == ':' && s[1] == ':')
8496 || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8497 {
8498 Copy(PL_bufptr, PL_tokenbuf, olen, char);
8499 return yyl_just_a_word(aTHX_ d, olen, 0, c);
8500 }
8501 if (!key)
8502 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8503 UTF8fARG(UTF, len, PL_tokenbuf));
8504 if (key < 0)
8505 key = -key;
8506 else if (key == KEY_require || key == KEY_do
8507 || key == KEY_glob)
8508 /* that's a way to remember we saw "CORE::" */
8509 orig_keyword = key;
8510
8511 /* Known to be a reserved word at this point */
8512 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8513 }
8514
8515 static int
yyl_keylookup(pTHX_ char * s,GV * gv)8516 yyl_keylookup(pTHX_ char *s, GV *gv)
8517 {
8518 dVAR;
8519 STRLEN len;
8520 bool anydelim;
8521 I32 key;
8522 struct code c = no_code;
8523 I32 orig_keyword = 0;
8524 char *d;
8525
8526 c.gv = gv;
8527
8528 PL_bufptr = s;
8529 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8530
8531 /* Some keywords can be followed by any delimiter, including ':' */
8532 anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8533
8534 /* x::* is just a word, unless x is "CORE" */
8535 if (!anydelim && *s == ':' && s[1] == ':') {
8536 if (memEQs(PL_tokenbuf, len, "CORE"))
8537 return yyl_key_core(aTHX_ s, len, c);
8538 return yyl_just_a_word(aTHX_ s, len, 0, c);
8539 }
8540
8541 d = s;
8542 while (d < PL_bufend && isSPACE(*d))
8543 d++; /* no comments skipped here, or s### is misparsed */
8544
8545 /* Is this a word before a => operator? */
8546 if (*d == '=' && d[1] == '>') {
8547 return yyl_fatcomma(aTHX_ s, len);
8548 }
8549
8550 /* Check for plugged-in keyword */
8551 {
8552 OP *o;
8553 int result;
8554 char *saved_bufptr = PL_bufptr;
8555 PL_bufptr = s;
8556 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8557 s = PL_bufptr;
8558 if (result == KEYWORD_PLUGIN_DECLINE) {
8559 /* not a plugged-in keyword */
8560 PL_bufptr = saved_bufptr;
8561 } else if (result == KEYWORD_PLUGIN_STMT) {
8562 pl_yylval.opval = o;
8563 CLINE;
8564 if (!PL_nexttoke) PL_expect = XSTATE;
8565 return REPORT(PLUGSTMT);
8566 } else if (result == KEYWORD_PLUGIN_EXPR) {
8567 pl_yylval.opval = o;
8568 CLINE;
8569 if (!PL_nexttoke) PL_expect = XOPERATOR;
8570 return REPORT(PLUGEXPR);
8571 } else {
8572 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8573 }
8574 }
8575
8576 /* Is this a label? */
8577 if (!anydelim && PL_expect == XSTATE
8578 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8579 s = d + 1;
8580 pl_yylval.opval =
8581 newSVOP(OP_CONST, 0,
8582 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8583 CLINE;
8584 TOKEN(LABEL);
8585 }
8586
8587 /* Check for lexical sub */
8588 if (PL_expect != XOPERATOR) {
8589 char tmpbuf[sizeof PL_tokenbuf + 1];
8590 *tmpbuf = '&';
8591 Copy(PL_tokenbuf, tmpbuf+1, len, char);
8592 c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8593 if (c.off != NOT_IN_PAD) {
8594 assert(c.off); /* we assume this is boolean-true below */
8595 if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8596 HV * const stash = PAD_COMPNAME_OURSTASH(c.off);
8597 HEK * const stashname = HvNAME_HEK(stash);
8598 c.sv = newSVhek(stashname);
8599 sv_catpvs(c.sv, "::");
8600 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8601 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8602 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8603 SVt_PVCV);
8604 c.off = 0;
8605 if (!c.gv) {
8606 sv_free(c.sv);
8607 c.sv = NULL;
8608 return yyl_just_a_word(aTHX_ s, len, 0, c);
8609 }
8610 }
8611 else {
8612 c.rv2cv_op = newOP(OP_PADANY, 0);
8613 c.rv2cv_op->op_targ = c.off;
8614 c.cv = find_lexical_cv(c.off);
8615 }
8616 c.lex = TRUE;
8617 return yyl_just_a_word(aTHX_ s, len, 0, c);
8618 }
8619 c.off = 0;
8620 }
8621
8622 /* Check for built-in keyword */
8623 key = keyword(PL_tokenbuf, len, 0);
8624
8625 if (key < 0)
8626 key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8627
8628 if (key && key != KEY___DATA__ && key != KEY___END__
8629 && (!anydelim || *s != '#')) {
8630 /* no override, and not s### either; skipspace is safe here
8631 * check for => on following line */
8632 bool arrow;
8633 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8634 STRLEN soff = s - SvPVX(PL_linestr);
8635 s = peekspace(s);
8636 arrow = *s == '=' && s[1] == '>';
8637 PL_bufptr = SvPVX(PL_linestr) + bufoff;
8638 s = SvPVX(PL_linestr) + soff;
8639 if (arrow)
8640 return yyl_fatcomma(aTHX_ s, len);
8641 }
8642
8643 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8644 }
8645
8646 static int
yyl_try(pTHX_ char * s)8647 yyl_try(pTHX_ char *s)
8648 {
8649 char *d;
8650 GV *gv = NULL;
8651 int tok;
8652
8653 retry:
8654 switch (*s) {
8655 default:
8656 if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
8657 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
8658 return tok;
8659 goto retry_bufptr;
8660 }
8661 yyl_croak_unrecognised(aTHX_ s);
8662
8663 case 4:
8664 case 26:
8665 /* emulate EOF on ^D or ^Z */
8666 if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
8667 return tok;
8668 retry_bufptr:
8669 s = PL_bufptr;
8670 goto retry;
8671
8672 case 0:
8673 if ((!PL_rsfp || PL_lex_inwhat)
8674 && (!PL_parser->filtered || s+1 < PL_bufend)) {
8675 PL_last_uni = 0;
8676 PL_last_lop = 0;
8677 if (PL_lex_brackets
8678 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8679 {
8680 yyerror((const char *)
8681 (PL_lex_formbrack
8682 ? "Format not terminated"
8683 : "Missing right curly or square bracket"));
8684 }
8685 DEBUG_T({
8686 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8687 });
8688 TOKEN(0);
8689 }
8690 if (s++ < PL_bufend)
8691 goto retry; /* ignore stray nulls */
8692 PL_last_uni = 0;
8693 PL_last_lop = 0;
8694 if (!PL_in_eval && !PL_preambled) {
8695 PL_preambled = TRUE;
8696 if (PL_perldb) {
8697 /* Generate a string of Perl code to load the debugger.
8698 * If PERL5DB is set, it will return the contents of that,
8699 * otherwise a compile-time require of perl5db.pl. */
8700
8701 const char * const pdb = PerlEnv_getenv("PERL5DB");
8702
8703 if (pdb) {
8704 sv_setpv(PL_linestr, pdb);
8705 sv_catpvs(PL_linestr,";");
8706 } else {
8707 SETERRNO(0,SS_NORMAL);
8708 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8709 }
8710 PL_parser->preambling = CopLINE(PL_curcop);
8711 } else
8712 SvPVCLEAR(PL_linestr);
8713 if (PL_preambleav) {
8714 SV **svp = AvARRAY(PL_preambleav);
8715 SV **const end = svp + AvFILLp(PL_preambleav);
8716 while(svp <= end) {
8717 sv_catsv(PL_linestr, *svp);
8718 ++svp;
8719 sv_catpvs(PL_linestr, ";");
8720 }
8721 sv_free(MUTABLE_SV(PL_preambleav));
8722 PL_preambleav = NULL;
8723 }
8724 if (PL_minus_E)
8725 sv_catpvs(PL_linestr,
8726 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
8727 if (PL_minus_n || PL_minus_p) {
8728 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8729 if (PL_minus_l)
8730 sv_catpvs(PL_linestr,"chomp;");
8731 if (PL_minus_a) {
8732 if (PL_minus_F) {
8733 if ( ( *PL_splitstr == '/'
8734 || *PL_splitstr == '\''
8735 || *PL_splitstr == '"')
8736 && strchr(PL_splitstr + 1, *PL_splitstr))
8737 {
8738 /* strchr is ok, because -F pattern can't contain
8739 * embeddded NULs */
8740 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8741 }
8742 else {
8743 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8744 bytes can be used as quoting characters. :-) */
8745 const char *splits = PL_splitstr;
8746 sv_catpvs(PL_linestr, "our @F=split(q\0");
8747 do {
8748 /* Need to \ \s */
8749 if (*splits == '\\')
8750 sv_catpvn(PL_linestr, splits, 1);
8751 sv_catpvn(PL_linestr, splits, 1);
8752 } while (*splits++);
8753 /* This loop will embed the trailing NUL of
8754 PL_linestr as the last thing it does before
8755 terminating. */
8756 sv_catpvs(PL_linestr, ");");
8757 }
8758 }
8759 else
8760 sv_catpvs(PL_linestr,"our @F=split(' ');");
8761 }
8762 }
8763 sv_catpvs(PL_linestr, "\n");
8764 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8765 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8766 PL_last_lop = PL_last_uni = NULL;
8767 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8768 update_debugger_info(PL_linestr, NULL, 0);
8769 goto retry;
8770 }
8771 if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
8772 return tok;
8773 goto retry_bufptr;
8774
8775 case '\r':
8776 #ifdef PERL_STRICT_CR
8777 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8778 Perl_croak(aTHX_
8779 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8780 #endif
8781 case ' ': case '\t': case '\f': case '\v':
8782 s++;
8783 goto retry;
8784
8785 case '#':
8786 case '\n': {
8787 const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8788 if (needs_semicolon)
8789 TOKEN(';');
8790 else
8791 goto retry;
8792 }
8793
8794 case '-':
8795 return yyl_hyphen(aTHX_ s);
8796
8797 case '+':
8798 return yyl_plus(aTHX_ s);
8799
8800 case '*':
8801 return yyl_star(aTHX_ s);
8802
8803 case '%':
8804 return yyl_percent(aTHX_ s);
8805
8806 case '^':
8807 return yyl_caret(aTHX_ s);
8808
8809 case '[':
8810 return yyl_leftsquare(aTHX_ s);
8811
8812 case '~':
8813 return yyl_tilde(aTHX_ s);
8814
8815 case ',':
8816 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8817 TOKEN(0);
8818 s++;
8819 OPERATOR(',');
8820 case ':':
8821 if (s[1] == ':')
8822 return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
8823 return yyl_colon(aTHX_ s + 1);
8824
8825 case '(':
8826 return yyl_leftparen(aTHX_ s + 1);
8827
8828 case ';':
8829 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8830 TOKEN(0);
8831 CLINE;
8832 s++;
8833 PL_expect = XSTATE;
8834 TOKEN(';');
8835
8836 case ')':
8837 return yyl_rightparen(aTHX_ s);
8838
8839 case ']':
8840 return yyl_rightsquare(aTHX_ s);
8841
8842 case '{':
8843 return yyl_leftcurly(aTHX_ s + 1, 0);
8844
8845 case '}':
8846 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
8847 TOKEN(0);
8848 return yyl_rightcurly(aTHX_ s, 0);
8849
8850 case '&':
8851 return yyl_ampersand(aTHX_ s);
8852
8853 case '|':
8854 return yyl_verticalbar(aTHX_ s);
8855
8856 case '=':
8857 if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
8858 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
8859 {
8860 s = vcs_conflict_marker(s + 7);
8861 goto retry;
8862 }
8863
8864 s++;
8865 {
8866 const char tmp = *s++;
8867 if (tmp == '=') {
8868 if (!PL_lex_allbrackets
8869 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8870 {
8871 s -= 2;
8872 TOKEN(0);
8873 }
8874 ChEop(OP_EQ);
8875 }
8876 if (tmp == '>') {
8877 if (!PL_lex_allbrackets
8878 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8879 {
8880 s -= 2;
8881 TOKEN(0);
8882 }
8883 OPERATOR(',');
8884 }
8885 if (tmp == '~')
8886 PMop(OP_MATCH);
8887 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
8888 && memCHRs("+-*/%.^&|<",tmp))
8889 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8890 "Reversed %c= operator",(int)tmp);
8891 s--;
8892 if (PL_expect == XSTATE
8893 && isALPHA(tmp)
8894 && (s == PL_linestart+1 || s[-2] == '\n') )
8895 {
8896 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
8897 || PL_lex_state != LEX_NORMAL)
8898 {
8899 d = PL_bufend;
8900 while (s < d) {
8901 if (*s++ == '\n') {
8902 incline(s, PL_bufend);
8903 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
8904 {
8905 s = (char *) memchr(s,'\n', d - s);
8906 if (s)
8907 s++;
8908 else
8909 s = d;
8910 incline(s, PL_bufend);
8911 goto retry;
8912 }
8913 }
8914 }
8915 goto retry;
8916 }
8917 s = PL_bufend;
8918 PL_parser->in_pod = 1;
8919 goto retry;
8920 }
8921 }
8922 if (PL_expect == XBLOCK) {
8923 const char *t = s;
8924 #ifdef PERL_STRICT_CR
8925 while (SPACE_OR_TAB(*t))
8926 #else
8927 while (SPACE_OR_TAB(*t) || *t == '\r')
8928 #endif
8929 t++;
8930 if (*t == '\n' || *t == '#') {
8931 ENTER_with_name("lex_format");
8932 SAVEI8(PL_parser->form_lex_state);
8933 SAVEI32(PL_lex_formbrack);
8934 PL_parser->form_lex_state = PL_lex_state;
8935 PL_lex_formbrack = PL_lex_brackets + 1;
8936 PL_parser->sub_error_count = PL_error_count;
8937 return yyl_leftcurly(aTHX_ s, 1);
8938 }
8939 }
8940 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
8941 s--;
8942 TOKEN(0);
8943 }
8944 pl_yylval.ival = 0;
8945 OPERATOR(ASSIGNOP);
8946
8947 case '!':
8948 return yyl_bang(aTHX_ s + 1);
8949
8950 case '<':
8951 if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
8952 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
8953 {
8954 s = vcs_conflict_marker(s + 7);
8955 goto retry;
8956 }
8957 return yyl_leftpointy(aTHX_ s);
8958
8959 case '>':
8960 if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
8961 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
8962 {
8963 s = vcs_conflict_marker(s + 7);
8964 goto retry;
8965 }
8966 return yyl_rightpointy(aTHX_ s + 1);
8967
8968 case '$':
8969 return yyl_dollar(aTHX_ s);
8970
8971 case '@':
8972 return yyl_snail(aTHX_ s);
8973
8974 case '/': /* may be division, defined-or, or pattern */
8975 return yyl_slash(aTHX_ s);
8976
8977 case '?': /* conditional */
8978 s++;
8979 if (!PL_lex_allbrackets
8980 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
8981 {
8982 s--;
8983 TOKEN(0);
8984 }
8985 PL_lex_allbrackets++;
8986 OPERATOR('?');
8987
8988 case '.':
8989 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
8990 #ifdef PERL_STRICT_CR
8991 && s[1] == '\n'
8992 #else
8993 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
8994 #endif
8995 && (s == PL_linestart || s[-1] == '\n') )
8996 {
8997 PL_expect = XSTATE;
8998 /* formbrack==2 means dot seen where arguments expected */
8999 return yyl_rightcurly(aTHX_ s, 2);
9000 }
9001 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9002 s += 3;
9003 OPERATOR(YADAYADA);
9004 }
9005 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9006 char tmp = *s++;
9007 if (*s == tmp) {
9008 if (!PL_lex_allbrackets
9009 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9010 {
9011 s--;
9012 TOKEN(0);
9013 }
9014 s++;
9015 if (*s == tmp) {
9016 s++;
9017 pl_yylval.ival = OPf_SPECIAL;
9018 }
9019 else
9020 pl_yylval.ival = 0;
9021 OPERATOR(DOTDOT);
9022 }
9023 if (*s == '=' && !PL_lex_allbrackets
9024 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9025 {
9026 s--;
9027 TOKEN(0);
9028 }
9029 Aop(OP_CONCAT);
9030 }
9031 /* FALLTHROUGH */
9032 case '0': case '1': case '2': case '3': case '4':
9033 case '5': case '6': case '7': case '8': case '9':
9034 s = scan_num(s, &pl_yylval);
9035 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9036 if (PL_expect == XOPERATOR)
9037 no_op("Number",s);
9038 TERM(THING);
9039
9040 case '\'':
9041 return yyl_sglquote(aTHX_ s);
9042
9043 case '"':
9044 return yyl_dblquote(aTHX_ s);
9045
9046 case '`':
9047 return yyl_backtick(aTHX_ s);
9048
9049 case '\\':
9050 return yyl_backslash(aTHX_ s + 1);
9051
9052 case 'v':
9053 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9054 char *start = s + 2;
9055 while (isDIGIT(*start) || *start == '_')
9056 start++;
9057 if (*start == '.' && isDIGIT(start[1])) {
9058 s = scan_num(s, &pl_yylval);
9059 TERM(THING);
9060 }
9061 else if ((*start == ':' && start[1] == ':')
9062 || (PL_expect == XSTATE && *start == ':')) {
9063 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9064 return tok;
9065 goto retry_bufptr;
9066 }
9067 else if (PL_expect == XSTATE) {
9068 d = start;
9069 while (d < PL_bufend && isSPACE(*d)) d++;
9070 if (*d == ':') {
9071 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9072 return tok;
9073 goto retry_bufptr;
9074 }
9075 }
9076 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9077 if (!isALPHA(*start) && (PL_expect == XTERM
9078 || PL_expect == XREF || PL_expect == XSTATE
9079 || PL_expect == XTERMORDORDOR)) {
9080 GV *const gv = gv_fetchpvn_flags(s, start - s,
9081 UTF ? SVf_UTF8 : 0, SVt_PVCV);
9082 if (!gv) {
9083 s = scan_num(s, &pl_yylval);
9084 TERM(THING);
9085 }
9086 }
9087 }
9088 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9089 return tok;
9090 goto retry_bufptr;
9091
9092 case 'x':
9093 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9094 s++;
9095 Mop(OP_REPEAT);
9096 }
9097 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9098 return tok;
9099 goto retry_bufptr;
9100
9101 case '_':
9102 case 'a': case 'A':
9103 case 'b': case 'B':
9104 case 'c': case 'C':
9105 case 'd': case 'D':
9106 case 'e': case 'E':
9107 case 'f': case 'F':
9108 case 'g': case 'G':
9109 case 'h': case 'H':
9110 case 'i': case 'I':
9111 case 'j': case 'J':
9112 case 'k': case 'K':
9113 case 'l': case 'L':
9114 case 'm': case 'M':
9115 case 'n': case 'N':
9116 case 'o': case 'O':
9117 case 'p': case 'P':
9118 case 'q': case 'Q':
9119 case 'r': case 'R':
9120 case 's': case 'S':
9121 case 't': case 'T':
9122 case 'u': case 'U':
9123 case 'V':
9124 case 'w': case 'W':
9125 case 'X':
9126 case 'y': case 'Y':
9127 case 'z': case 'Z':
9128 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9129 return tok;
9130 goto retry_bufptr;
9131 }
9132 }
9133
9134
9135 /*
9136 yylex
9137
9138 Works out what to call the token just pulled out of the input
9139 stream. The yacc parser takes care of taking the ops we return and
9140 stitching them into a tree.
9141
9142 Returns:
9143 The type of the next token
9144
9145 Structure:
9146 Check if we have already built the token; if so, use it.
9147 Switch based on the current state:
9148 - if we have a case modifier in a string, deal with that
9149 - handle other cases of interpolation inside a string
9150 - scan the next line if we are inside a format
9151 In the normal state, switch on the next character:
9152 - default:
9153 if alphabetic, go to key lookup
9154 unrecognized character - croak
9155 - 0/4/26: handle end-of-line or EOF
9156 - cases for whitespace
9157 - \n and #: handle comments and line numbers
9158 - various operators, brackets and sigils
9159 - numbers
9160 - quotes
9161 - 'v': vstrings (or go to key lookup)
9162 - 'x' repetition operator (or go to key lookup)
9163 - other ASCII alphanumerics (key lookup begins here):
9164 word before => ?
9165 keyword plugin
9166 scan built-in keyword (but do nothing with it yet)
9167 check for statement label
9168 check for lexical subs
9169 return yyl_just_a_word if there is one
9170 see whether built-in keyword is overridden
9171 switch on keyword number:
9172 - default: return yyl_just_a_word:
9173 not a built-in keyword; handle bareword lookup
9174 disambiguate between method and sub call
9175 fall back to bareword
9176 - cases for built-in keywords
9177 */
9178
9179 #ifdef NETWARE
9180 #define RSFP_FILENO (PL_rsfp)
9181 #else
9182 #define RSFP_FILENO (PerlIO_fileno(PL_rsfp))
9183 #endif
9184
9185
9186 int
Perl_yylex(pTHX)9187 Perl_yylex(pTHX)
9188 {
9189 dVAR;
9190 char *s = PL_bufptr;
9191
9192 if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9193 const U8* first_bad_char_loc;
9194 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9195 PL_bufend - PL_bufptr,
9196 &first_bad_char_loc)))
9197 {
9198 _force_out_malformed_utf8_message(first_bad_char_loc,
9199 (U8 *) PL_bufend,
9200 0,
9201 1 /* 1 means die */ );
9202 NOT_REACHED; /* NOTREACHED */
9203 }
9204 PL_parser->recheck_utf8_validity = FALSE;
9205 }
9206 DEBUG_T( {
9207 SV* tmp = newSVpvs("");
9208 PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9209 (IV)CopLINE(PL_curcop),
9210 lex_state_names[PL_lex_state],
9211 exp_name[PL_expect],
9212 pv_display(tmp, s, strlen(s), 0, 60));
9213 SvREFCNT_dec(tmp);
9214 } );
9215
9216 /* when we've already built the next token, just pull it out of the queue */
9217 if (PL_nexttoke) {
9218 PL_nexttoke--;
9219 pl_yylval = PL_nextval[PL_nexttoke];
9220 {
9221 I32 next_type;
9222 next_type = PL_nexttype[PL_nexttoke];
9223 if (next_type & (7<<24)) {
9224 if (next_type & (1<<24)) {
9225 if (PL_lex_brackets > 100)
9226 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9227 PL_lex_brackstack[PL_lex_brackets++] =
9228 (char) ((next_type >> 16) & 0xff);
9229 }
9230 if (next_type & (2<<24))
9231 PL_lex_allbrackets++;
9232 if (next_type & (4<<24))
9233 PL_lex_allbrackets--;
9234 next_type &= 0xffff;
9235 }
9236 return REPORT(next_type == 'p' ? pending_ident() : next_type);
9237 }
9238 }
9239
9240 switch (PL_lex_state) {
9241 case LEX_NORMAL:
9242 case LEX_INTERPNORMAL:
9243 break;
9244
9245 /* interpolated case modifiers like \L \U, including \Q and \E.
9246 when we get here, PL_bufptr is at the \
9247 */
9248 case LEX_INTERPCASEMOD:
9249 /* handle \E or end of string */
9250 return yyl_interpcasemod(aTHX_ s);
9251
9252 case LEX_INTERPPUSH:
9253 return REPORT(sublex_push());
9254
9255 case LEX_INTERPSTART:
9256 if (PL_bufptr == PL_bufend)
9257 return REPORT(sublex_done());
9258 DEBUG_T({
9259 if(*PL_bufptr != '(')
9260 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9261 });
9262 PL_expect = XTERM;
9263 /* for /@a/, we leave the joining for the regex engine to do
9264 * (unless we're within \Q etc) */
9265 PL_lex_dojoin = (*PL_bufptr == '@'
9266 && (!PL_lex_inpat || PL_lex_casemods));
9267 PL_lex_state = LEX_INTERPNORMAL;
9268 if (PL_lex_dojoin) {
9269 NEXTVAL_NEXTTOKE.ival = 0;
9270 force_next(',');
9271 force_ident("\"", '$');
9272 NEXTVAL_NEXTTOKE.ival = 0;
9273 force_next('$');
9274 NEXTVAL_NEXTTOKE.ival = 0;
9275 force_next((2<<24)|'(');
9276 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
9277 force_next(FUNC);
9278 }
9279 /* Convert (?{...}) and friends to 'do {...}' */
9280 if (PL_lex_inpat && *PL_bufptr == '(') {
9281 PL_parser->lex_shared->re_eval_start = PL_bufptr;
9282 PL_bufptr += 2;
9283 if (*PL_bufptr != '{')
9284 PL_bufptr++;
9285 PL_expect = XTERMBLOCK;
9286 force_next(DO);
9287 }
9288
9289 if (PL_lex_starts++) {
9290 s = PL_bufptr;
9291 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9292 if (!PL_lex_casemods && PL_lex_inpat)
9293 TOKEN(',');
9294 else
9295 AopNOASSIGN(OP_CONCAT);
9296 }
9297 return yylex();
9298
9299 case LEX_INTERPENDMAYBE:
9300 if (intuit_more(PL_bufptr, PL_bufend)) {
9301 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
9302 break;
9303 }
9304 /* FALLTHROUGH */
9305
9306 case LEX_INTERPEND:
9307 if (PL_lex_dojoin) {
9308 const U8 dojoin_was = PL_lex_dojoin;
9309 PL_lex_dojoin = FALSE;
9310 PL_lex_state = LEX_INTERPCONCAT;
9311 PL_lex_allbrackets--;
9312 return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
9313 }
9314 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9315 && SvEVALED(PL_lex_repl))
9316 {
9317 if (PL_bufptr != PL_bufend)
9318 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9319 PL_lex_repl = NULL;
9320 }
9321 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
9322 re_eval_str. If the here-doc body’s length equals the previous
9323 value of re_eval_start, re_eval_start will now be null. So
9324 check re_eval_str as well. */
9325 if (PL_parser->lex_shared->re_eval_start
9326 || PL_parser->lex_shared->re_eval_str) {
9327 SV *sv;
9328 if (*PL_bufptr != ')')
9329 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9330 PL_bufptr++;
9331 /* having compiled a (?{..}) expression, return the original
9332 * text too, as a const */
9333 if (PL_parser->lex_shared->re_eval_str) {
9334 sv = PL_parser->lex_shared->re_eval_str;
9335 PL_parser->lex_shared->re_eval_str = NULL;
9336 SvCUR_set(sv,
9337 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9338 SvPV_shrink_to_cur(sv);
9339 }
9340 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9341 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9342 NEXTVAL_NEXTTOKE.opval =
9343 newSVOP(OP_CONST, 0,
9344 sv);
9345 force_next(THING);
9346 PL_parser->lex_shared->re_eval_start = NULL;
9347 PL_expect = XTERM;
9348 return REPORT(',');
9349 }
9350
9351 /* FALLTHROUGH */
9352 case LEX_INTERPCONCAT:
9353 #ifdef DEBUGGING
9354 if (PL_lex_brackets)
9355 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9356 (long) PL_lex_brackets);
9357 #endif
9358 if (PL_bufptr == PL_bufend)
9359 return REPORT(sublex_done());
9360
9361 /* m'foo' still needs to be parsed for possible (?{...}) */
9362 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9363 SV *sv = newSVsv(PL_linestr);
9364 sv = tokeq(sv);
9365 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9366 s = PL_bufend;
9367 }
9368 else {
9369 int save_error_count = PL_error_count;
9370
9371 s = scan_const(PL_bufptr);
9372
9373 /* Set flag if this was a pattern and there were errors. op.c will
9374 * refuse to compile a pattern with this flag set. Otherwise, we
9375 * could get segfaults, etc. */
9376 if (PL_lex_inpat && PL_error_count > save_error_count) {
9377 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9378 }
9379 if (*s == '\\')
9380 PL_lex_state = LEX_INTERPCASEMOD;
9381 else
9382 PL_lex_state = LEX_INTERPSTART;
9383 }
9384
9385 if (s != PL_bufptr) {
9386 NEXTVAL_NEXTTOKE = pl_yylval;
9387 PL_expect = XTERM;
9388 force_next(THING);
9389 if (PL_lex_starts++) {
9390 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9391 if (!PL_lex_casemods && PL_lex_inpat)
9392 TOKEN(',');
9393 else
9394 AopNOASSIGN(OP_CONCAT);
9395 }
9396 else {
9397 PL_bufptr = s;
9398 return yylex();
9399 }
9400 }
9401
9402 return yylex();
9403 case LEX_FORMLINE:
9404 if (PL_parser->sub_error_count != PL_error_count) {
9405 /* There was an error parsing a formline, which tends to
9406 mess up the parser.
9407 Unlike interpolated sub-parsing, we can't treat any of
9408 these as recoverable, so no need to check sub_no_recover.
9409 */
9410 yyquit();
9411 }
9412 assert(PL_lex_formbrack);
9413 s = scan_formline(PL_bufptr);
9414 if (!PL_lex_formbrack)
9415 return yyl_rightcurly(aTHX_ s, 1);
9416 PL_bufptr = s;
9417 return yylex();
9418 }
9419
9420 /* We really do *not* want PL_linestr ever becoming a COW. */
9421 assert (!SvIsCOW(PL_linestr));
9422 s = PL_bufptr;
9423 PL_oldoldbufptr = PL_oldbufptr;
9424 PL_oldbufptr = s;
9425
9426 if (PL_in_my == KEY_sigvar) {
9427 PL_parser->saw_infix_sigil = 0;
9428 return yyl_sigvar(aTHX_ s);
9429 }
9430
9431 {
9432 /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9433 On its return, we then need to set it to indicate whether the token
9434 we just encountered was an infix operator that (if we hadn't been
9435 expecting an operator) have been a sigil.
9436 */
9437 bool expected_operator = (PL_expect == XOPERATOR);
9438 int ret = yyl_try(aTHX_ s);
9439 switch (pl_yylval.ival) {
9440 case OP_BIT_AND:
9441 case OP_MODULO:
9442 case OP_MULTIPLY:
9443 case OP_NBIT_AND:
9444 if (expected_operator) {
9445 PL_parser->saw_infix_sigil = 1;
9446 break;
9447 }
9448 /* FALLTHROUGH */
9449 default:
9450 PL_parser->saw_infix_sigil = 0;
9451 }
9452 return ret;
9453 }
9454 }
9455
9456
9457 /*
9458 S_pending_ident
9459
9460 Looks up an identifier in the pad or in a package
9461
9462 PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9463 rather than a plain pad var.
9464
9465 Returns:
9466 PRIVATEREF if this is a lexical name.
9467 BAREWORD if this belongs to a package.
9468
9469 Structure:
9470 if we're in a my declaration
9471 croak if they tried to say my($foo::bar)
9472 build the ops for a my() declaration
9473 if it's an access to a my() variable
9474 build ops for access to a my() variable
9475 if in a dq string, and they've said @foo and we can't find @foo
9476 warn
9477 build ops for a bareword
9478 */
9479
9480 static int
S_pending_ident(pTHX)9481 S_pending_ident(pTHX)
9482 {
9483 PADOFFSET tmp = 0;
9484 const char pit = (char)pl_yylval.ival;
9485 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9486 /* All routes through this function want to know if there is a colon. */
9487 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9488
9489 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9490 "### Pending identifier '%s'\n", PL_tokenbuf); });
9491 assert(tokenbuf_len >= 2);
9492
9493 /* if we're in a my(), we can't allow dynamics here.
9494 $foo'bar has already been turned into $foo::bar, so
9495 just check for colons.
9496
9497 if it's a legal name, the OP is a PADANY.
9498 */
9499 if (PL_in_my) {
9500 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9501 if (has_colon)
9502 /* diag_listed_as: No package name allowed for variable %s
9503 in "our" */
9504 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9505 "%s %s in \"our\"",
9506 *PL_tokenbuf=='&' ? "subroutine" : "variable",
9507 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9508 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9509 }
9510 else {
9511 OP *o;
9512 if (has_colon) {
9513 /* "my" variable %s can't be in a package */
9514 /* PL_no_myglob is constant */
9515 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9516 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9517 PL_in_my == KEY_my ? "my" : "state",
9518 *PL_tokenbuf == '&' ? "subroutine" : "variable",
9519 PL_tokenbuf),
9520 UTF ? SVf_UTF8 : 0);
9521 GCC_DIAG_RESTORE_STMT;
9522 }
9523
9524 if (PL_in_my == KEY_sigvar) {
9525 /* A signature 'padop' needs in addition, an op_first to
9526 * point to a child sigdefelem, and an extra field to hold
9527 * the signature index. We can achieve both by using an
9528 * UNOP_AUX and (ab)using the op_aux field to hold the
9529 * index. If we ever need more fields, use a real malloced
9530 * aux strut instead.
9531 */
9532 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9533 INT2PTR(UNOP_AUX_item *,
9534 (PL_parser->sig_elems)));
9535 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9536 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9537 : OPpARGELEM_HV);
9538 }
9539 else
9540 o = newOP(OP_PADANY, 0);
9541 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9542 UTF ? SVf_UTF8 : 0);
9543 if (PL_in_my == KEY_sigvar)
9544 PL_in_my = 0;
9545
9546 pl_yylval.opval = o;
9547 return PRIVATEREF;
9548 }
9549 }
9550
9551 /*
9552 build the ops for accesses to a my() variable.
9553 */
9554
9555 if (!has_colon) {
9556 if (!PL_in_my)
9557 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9558 0);
9559 if (tmp != NOT_IN_PAD) {
9560 /* might be an "our" variable" */
9561 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9562 /* build ops for a bareword */
9563 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9564 HEK * const stashname = HvNAME_HEK(stash);
9565 SV * const sym = newSVhek(stashname);
9566 sv_catpvs(sym, "::");
9567 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9568 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9569 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9570 if (pit != '&')
9571 gv_fetchsv(sym,
9572 GV_ADDMULTI,
9573 ((PL_tokenbuf[0] == '$') ? SVt_PV
9574 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9575 : SVt_PVHV));
9576 return BAREWORD;
9577 }
9578
9579 pl_yylval.opval = newOP(OP_PADANY, 0);
9580 pl_yylval.opval->op_targ = tmp;
9581 return PRIVATEREF;
9582 }
9583 }
9584
9585 /*
9586 Whine if they've said @foo or @foo{key} in a doublequoted string,
9587 and @foo (or %foo) isn't a variable we can find in the symbol
9588 table.
9589 */
9590 if (ckWARN(WARN_AMBIGUOUS)
9591 && pit == '@'
9592 && PL_lex_state != LEX_NORMAL
9593 && !PL_lex_brackets)
9594 {
9595 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9596 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9597 SVt_PVAV);
9598 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9599 )
9600 {
9601 /* Downgraded from fatal to warning 20000522 mjd */
9602 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9603 "Possible unintended interpolation of %" UTF8f
9604 " in string",
9605 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9606 }
9607 }
9608
9609 /* build ops for a bareword */
9610 pl_yylval.opval = newSVOP(OP_CONST, 0,
9611 newSVpvn_flags(PL_tokenbuf + 1,
9612 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9613 UTF ? SVf_UTF8 : 0 ));
9614 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9615 if (pit != '&')
9616 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9617 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9618 | ( UTF ? SVf_UTF8 : 0 ),
9619 ((PL_tokenbuf[0] == '$') ? SVt_PV
9620 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9621 : SVt_PVHV));
9622 return BAREWORD;
9623 }
9624
9625 STATIC void
S_checkcomma(pTHX_ const char * s,const char * name,const char * what)9626 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9627 {
9628 PERL_ARGS_ASSERT_CHECKCOMMA;
9629
9630 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9631 if (ckWARN(WARN_SYNTAX)) {
9632 int level = 1;
9633 const char *w;
9634 for (w = s+2; *w && level; w++) {
9635 if (*w == '(')
9636 ++level;
9637 else if (*w == ')')
9638 --level;
9639 }
9640 while (isSPACE(*w))
9641 ++w;
9642 /* the list of chars below is for end of statements or
9643 * block / parens, boolean operators (&&, ||, //) and branch
9644 * constructs (or, and, if, until, unless, while, err, for).
9645 * Not a very solid hack... */
9646 if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
9647 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9648 "%s (...) interpreted as function",name);
9649 }
9650 }
9651 while (s < PL_bufend && isSPACE(*s))
9652 s++;
9653 if (*s == '(')
9654 s++;
9655 while (s < PL_bufend && isSPACE(*s))
9656 s++;
9657 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9658 const char * const w = s;
9659 s += UTF ? UTF8SKIP(s) : 1;
9660 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9661 s += UTF ? UTF8SKIP(s) : 1;
9662 while (s < PL_bufend && isSPACE(*s))
9663 s++;
9664 if (*s == ',') {
9665 GV* gv;
9666 if (keyword(w, s - w, 0))
9667 return;
9668
9669 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9670 if (gv && GvCVu(gv))
9671 return;
9672 if (s - w <= 254) {
9673 PADOFFSET off;
9674 char tmpbuf[256];
9675 Copy(w, tmpbuf+1, s - w, char);
9676 *tmpbuf = '&';
9677 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9678 if (off != NOT_IN_PAD) return;
9679 }
9680 Perl_croak(aTHX_ "No comma allowed after %s", what);
9681 }
9682 }
9683 }
9684
9685 /* S_new_constant(): do any overload::constant lookup.
9686
9687 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9688 Best used as sv=new_constant(..., sv, ...).
9689 If s, pv are NULL, calls subroutine with one argument,
9690 and <type> is used with error messages only.
9691 <type> is assumed to be well formed UTF-8.
9692
9693 If error_msg is not NULL, *error_msg will be set to any error encountered.
9694 Otherwise yyerror() will be used to output it */
9695
9696 STATIC SV *
S_new_constant(pTHX_ const char * s,STRLEN len,const char * key,STRLEN keylen,SV * sv,SV * pv,const char * type,STRLEN typelen,const char ** error_msg)9697 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9698 SV *sv, SV *pv, const char *type, STRLEN typelen,
9699 const char ** error_msg)
9700 {
9701 dSP;
9702 HV * table = GvHV(PL_hintgv); /* ^H */
9703 SV *res;
9704 SV *errsv = NULL;
9705 SV **cvp;
9706 SV *cv, *typesv;
9707 const char *why1 = "", *why2 = "", *why3 = "";
9708 const char * optional_colon = ":"; /* Only some messages have a colon */
9709 char *msg;
9710
9711 PERL_ARGS_ASSERT_NEW_CONSTANT;
9712 /* We assume that this is true: */
9713 assert(type || s);
9714
9715 sv_2mortal(sv); /* Parent created it permanently */
9716
9717 if ( ! table
9718 || ! (PL_hints & HINT_LOCALIZE_HH))
9719 {
9720 why1 = "unknown";
9721 optional_colon = "";
9722 goto report;
9723 }
9724
9725 cvp = hv_fetch(table, key, keylen, FALSE);
9726 if (!cvp || !SvOK(*cvp)) {
9727 why1 = "$^H{";
9728 why2 = key;
9729 why3 = "} is not defined";
9730 goto report;
9731 }
9732
9733 cv = *cvp;
9734 if (!pv && s)
9735 pv = newSVpvn_flags(s, len, SVs_TEMP);
9736 if (type && pv)
9737 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9738 else
9739 typesv = &PL_sv_undef;
9740
9741 PUSHSTACKi(PERLSI_OVERLOAD);
9742 ENTER ;
9743 SAVETMPS;
9744
9745 PUSHMARK(SP) ;
9746 EXTEND(sp, 3);
9747 if (pv)
9748 PUSHs(pv);
9749 PUSHs(sv);
9750 if (pv)
9751 PUSHs(typesv);
9752 PUTBACK;
9753 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9754
9755 SPAGAIN ;
9756
9757 /* Check the eval first */
9758 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9759 STRLEN errlen;
9760 const char * errstr;
9761 sv_catpvs(errsv, "Propagated");
9762 errstr = SvPV_const(errsv, errlen);
9763 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9764 (void)POPs;
9765 res = SvREFCNT_inc_simple_NN(sv);
9766 }
9767 else {
9768 res = POPs;
9769 SvREFCNT_inc_simple_void_NN(res);
9770 }
9771
9772 PUTBACK ;
9773 FREETMPS ;
9774 LEAVE ;
9775 POPSTACK;
9776
9777 if (SvOK(res)) {
9778 return res;
9779 }
9780
9781 sv = res;
9782 (void)sv_2mortal(sv);
9783
9784 why1 = "Call to &{$^H{";
9785 why2 = key;
9786 why3 = "}} did not return a defined value";
9787
9788 report:
9789
9790 msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
9791 (int)(type ? typelen : len),
9792 (type ? type: s),
9793 optional_colon,
9794 why1, why2, why3);
9795 if (error_msg) {
9796 *error_msg = msg;
9797 }
9798 else {
9799 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9800 }
9801 return SvREFCNT_inc_simple_NN(sv);
9802 }
9803
9804 PERL_STATIC_INLINE void
S_parse_ident(pTHX_ char ** s,char ** d,char * const e,int allow_package,bool is_utf8,bool check_dollar,bool tick_warn)9805 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9806 bool is_utf8, bool check_dollar, bool tick_warn)
9807 {
9808 int saw_tick = 0;
9809 const char *olds = *s;
9810 PERL_ARGS_ASSERT_PARSE_IDENT;
9811
9812 while (*s < PL_bufend) {
9813 if (*d >= e)
9814 Perl_croak(aTHX_ "%s", ident_too_long);
9815 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9816 /* The UTF-8 case must come first, otherwise things
9817 * like c\N{COMBINING TILDE} would start failing, as the
9818 * isWORDCHAR_A case below would gobble the 'c' up.
9819 */
9820
9821 char *t = *s + UTF8SKIP(*s);
9822 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9823 t += UTF8SKIP(t);
9824 }
9825 if (*d + (t - *s) > e)
9826 Perl_croak(aTHX_ "%s", ident_too_long);
9827 Copy(*s, *d, t - *s, char);
9828 *d += t - *s;
9829 *s = t;
9830 }
9831 else if ( isWORDCHAR_A(**s) ) {
9832 do {
9833 *(*d)++ = *(*s)++;
9834 } while (isWORDCHAR_A(**s) && *d < e);
9835 }
9836 else if ( allow_package
9837 && **s == '\''
9838 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9839 {
9840 *(*d)++ = ':';
9841 *(*d)++ = ':';
9842 (*s)++;
9843 saw_tick++;
9844 }
9845 else if (allow_package && **s == ':' && (*s)[1] == ':'
9846 /* Disallow things like Foo::$bar. For the curious, this is
9847 * the code path that triggers the "Bad name after" warning
9848 * when looking for barewords.
9849 */
9850 && !(check_dollar && (*s)[2] == '$')) {
9851 *(*d)++ = *(*s)++;
9852 *(*d)++ = *(*s)++;
9853 }
9854 else
9855 break;
9856 }
9857 if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
9858 && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
9859 char *this_d;
9860 char *d2;
9861 Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
9862 d2 = this_d;
9863 SAVEFREEPV(this_d);
9864 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9865 "Old package separator used in string");
9866 if (olds[-1] == '#')
9867 *d2++ = olds[-2];
9868 *d2++ = olds[-1];
9869 while (olds < *s) {
9870 if (*olds == '\'') {
9871 *d2++ = '\\';
9872 *d2++ = *olds++;
9873 }
9874 else
9875 *d2++ = *olds++;
9876 }
9877 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9878 "\t(Did you mean \"%" UTF8f "\" instead?)\n",
9879 UTF8fARG(is_utf8, d2-this_d, this_d));
9880 }
9881 return;
9882 }
9883
9884 /* Returns a NUL terminated string, with the length of the string written to
9885 *slp
9886 */
9887 char *
Perl_scan_word(pTHX_ char * s,char * dest,STRLEN destlen,int allow_package,STRLEN * slp)9888 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9889 {
9890 char *d = dest;
9891 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9892 bool is_utf8 = cBOOL(UTF);
9893
9894 PERL_ARGS_ASSERT_SCAN_WORD;
9895
9896 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
9897 *d = '\0';
9898 *slp = d - dest;
9899 return s;
9900 }
9901
9902 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9903 * iff Unicode semantics are to be used. The legal ones are any of:
9904 * a) all ASCII characters except:
9905 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9906 * 2) '{'
9907 * The final case currently doesn't get this far in the program, so we
9908 * don't test for it. If that were to change, it would be ok to allow it.
9909 * b) When not under Unicode rules, any upper Latin1 character
9910 * c) Otherwise, when unicode rules are used, all XIDS characters.
9911 *
9912 * Because all ASCII characters have the same representation whether
9913 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9914 * '{' without knowing if is UTF-8 or not. */
9915 #define VALID_LEN_ONE_IDENT(s, e, is_utf8) \
9916 (isGRAPH_A(*(s)) || ((is_utf8) \
9917 ? isIDFIRST_utf8_safe(s, e) \
9918 : (isGRAPH_L1(*s) \
9919 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9920
9921 STATIC char *
S_scan_ident(pTHX_ char * s,char * dest,STRLEN destlen,I32 ck_uni)9922 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9923 {
9924 I32 herelines = PL_parser->herelines;
9925 SSize_t bracket = -1;
9926 char funny = *s++;
9927 char *d = dest;
9928 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9929 bool is_utf8 = cBOOL(UTF);
9930 I32 orig_copline = 0, tmp_copline = 0;
9931
9932 PERL_ARGS_ASSERT_SCAN_IDENT;
9933
9934 if (isSPACE(*s) || !*s)
9935 s = skipspace(s);
9936 if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
9937 bool is_zero= *s == '0' ? TRUE : FALSE;
9938 char *digit_start= d;
9939 *d++ = *s++;
9940 while (s < PL_bufend && isDIGIT(*s)) {
9941 if (d >= e)
9942 Perl_croak(aTHX_ "%s", ident_too_long);
9943 *d++ = *s++;
9944 }
9945 if (is_zero && d - digit_start > 1)
9946 Perl_croak(aTHX_ ident_var_zero_multi_digit);
9947 }
9948 else { /* See if it is a "normal" identifier */
9949 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
9950 }
9951 *d = '\0';
9952 d = dest;
9953 if (*d) {
9954 /* Either a digit variable, or parse_ident() found an identifier
9955 (anything valid as a bareword), so job done and return. */
9956 if (PL_lex_state != LEX_NORMAL)
9957 PL_lex_state = LEX_INTERPENDMAYBE;
9958 return s;
9959 }
9960
9961 /* Here, it is not a run-of-the-mill identifier name */
9962
9963 if (*s == '$' && s[1]
9964 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9965 || isDIGIT_A((U8)s[1])
9966 || s[1] == '$'
9967 || s[1] == '{'
9968 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
9969 {
9970 /* Dereferencing a value in a scalar variable.
9971 The alternatives are different syntaxes for a scalar variable.
9972 Using ' as a leading package separator isn't allowed. :: is. */
9973 return s;
9974 }
9975 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9976 if (*s == '{') {
9977 bracket = s - SvPVX(PL_linestr);
9978 s++;
9979 orig_copline = CopLINE(PL_curcop);
9980 if (s < PL_bufend && isSPACE(*s)) {
9981 s = skipspace(s);
9982 }
9983 }
9984 if ((s <= PL_bufend - ((is_utf8)
9985 ? UTF8SKIP(s)
9986 : 1))
9987 && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9988 {
9989 if (is_utf8) {
9990 const STRLEN skip = UTF8SKIP(s);
9991 STRLEN i;
9992 d[skip] = '\0';
9993 for ( i = 0; i < skip; i++ )
9994 d[i] = *s++;
9995 }
9996 else {
9997 *d = *s++;
9998 /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
9999 if (isDIGIT(*d)) {
10000 bool is_zero= *d == '0' ? TRUE : FALSE;
10001 char *digit_start= d;
10002 while (s < PL_bufend && isDIGIT(*s)) {
10003 d++;
10004 if (d >= e)
10005 Perl_croak(aTHX_ "%s", ident_too_long);
10006 *d= *s++;
10007 }
10008 if (is_zero && d - digit_start > 1)
10009 Perl_croak(aTHX_ ident_var_zero_multi_digit);
10010 }
10011 d[1] = '\0';
10012 }
10013 }
10014 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10015 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10016 *d = toCTRL(*s);
10017 s++;
10018 }
10019 /* Warn about ambiguous code after unary operators if {...} notation isn't
10020 used. There's no difference in ambiguity; it's merely a heuristic
10021 about when not to warn. */
10022 else if (ck_uni && bracket == -1)
10023 check_uni();
10024 if (bracket != -1) {
10025 bool skip;
10026 char *s2;
10027 /* If we were processing {...} notation then... */
10028 if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10029 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10030 && isWORDCHAR(*s))
10031 ) {
10032 /* note we have to check for a normal identifier first,
10033 * as it handles utf8 symbols, and only after that has
10034 * been ruled out can we look at the caret words */
10035 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10036 /* if it starts as a valid identifier, assume that it is one.
10037 (the later check for } being at the expected point will trap
10038 cases where this doesn't pan out.) */
10039 d += is_utf8 ? UTF8SKIP(d) : 1;
10040 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10041 *d = '\0';
10042 }
10043 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10044 d++;
10045 while (isWORDCHAR(*s) && d < e) {
10046 *d++ = *s++;
10047 }
10048 if (d >= e)
10049 Perl_croak(aTHX_ "%s", ident_too_long);
10050 *d = '\0';
10051 }
10052 tmp_copline = CopLINE(PL_curcop);
10053 if (s < PL_bufend && isSPACE(*s)) {
10054 s = skipspace(s);
10055 }
10056 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10057 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
10058 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10059 const char * const brack =
10060 (const char *)
10061 ((*s == '[') ? "[...]" : "{...}");
10062 orig_copline = CopLINE(PL_curcop);
10063 CopLINE_set(PL_curcop, tmp_copline);
10064 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10065 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10066 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10067 funny, dest, brack, funny, dest, brack);
10068 CopLINE_set(PL_curcop, orig_copline);
10069 }
10070 bracket++;
10071 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10072 PL_lex_allbrackets++;
10073 return s;
10074 }
10075 }
10076
10077 if ( !tmp_copline )
10078 tmp_copline = CopLINE(PL_curcop);
10079 if ((skip = s < PL_bufend && isSPACE(*s))) {
10080 /* Avoid incrementing line numbers or resetting PL_linestart,
10081 in case we have to back up. */
10082 STRLEN s_off = s - SvPVX(PL_linestr);
10083 s2 = peekspace(s);
10084 s = SvPVX(PL_linestr) + s_off;
10085 }
10086 else
10087 s2 = s;
10088
10089 /* Expect to find a closing } after consuming any trailing whitespace.
10090 */
10091 if (*s2 == '}') {
10092 /* Now increment line numbers if applicable. */
10093 if (skip)
10094 s = skipspace(s);
10095 s++;
10096 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10097 PL_lex_state = LEX_INTERPEND;
10098 PL_expect = XREF;
10099 }
10100 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10101 if (ckWARN(WARN_AMBIGUOUS)
10102 && (keyword(dest, d - dest, 0)
10103 || get_cvn_flags(dest, d - dest, is_utf8
10104 ? SVf_UTF8
10105 : 0)))
10106 {
10107 SV *tmp = newSVpvn_flags( dest, d - dest,
10108 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10109 if (funny == '#')
10110 funny = '@';
10111 orig_copline = CopLINE(PL_curcop);
10112 CopLINE_set(PL_curcop, tmp_copline);
10113 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10114 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10115 funny, SVfARG(tmp), funny, SVfARG(tmp));
10116 CopLINE_set(PL_curcop, orig_copline);
10117 }
10118 }
10119 }
10120 else {
10121 /* Didn't find the closing } at the point we expected, so restore
10122 state such that the next thing to process is the opening { and */
10123 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10124 CopLINE_set(PL_curcop, orig_copline);
10125 PL_parser->herelines = herelines;
10126 *dest = '\0';
10127 PL_parser->sub_no_recover = TRUE;
10128 }
10129 }
10130 else if ( PL_lex_state == LEX_INTERPNORMAL
10131 && !PL_lex_brackets
10132 && !intuit_more(s, PL_bufend))
10133 PL_lex_state = LEX_INTERPEND;
10134 return s;
10135 }
10136
10137 static bool
S_pmflag(pTHX_ const char * const valid_flags,U32 * pmfl,char ** s,char * charset,unsigned int * x_mod_count)10138 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10139
10140 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10141 * found in the parse starting at 's', based on the subset that are valid
10142 * in this context input to this routine in 'valid_flags'. Advances s.
10143 * Returns TRUE if the input should be treated as a valid flag, so the next
10144 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10145 * upon first call on the current regex. This routine will set it to any
10146 * charset modifier found. The caller shouldn't change it. This way,
10147 * another charset modifier encountered in the parse can be detected as an
10148 * error, as we have decided to allow only one */
10149
10150 const char c = **s;
10151 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10152
10153 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10154 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10155 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10156 UTF ? SVf_UTF8 : 0);
10157 (*s) += charlen;
10158 /* Pretend that it worked, so will continue processing before
10159 * dieing */
10160 return TRUE;
10161 }
10162 return FALSE;
10163 }
10164
10165 switch (c) {
10166
10167 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10168 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10169 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10170 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10171 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
10172 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10173 case LOCALE_PAT_MOD:
10174 if (*charset) {
10175 goto multiple_charsets;
10176 }
10177 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10178 *charset = c;
10179 break;
10180 case UNICODE_PAT_MOD:
10181 if (*charset) {
10182 goto multiple_charsets;
10183 }
10184 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10185 *charset = c;
10186 break;
10187 case ASCII_RESTRICT_PAT_MOD:
10188 if (! *charset) {
10189 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10190 }
10191 else {
10192
10193 /* Error if previous modifier wasn't an 'a', but if it was, see
10194 * if, and accept, a second occurrence (only) */
10195 if (*charset != 'a'
10196 || get_regex_charset(*pmfl)
10197 != REGEX_ASCII_RESTRICTED_CHARSET)
10198 {
10199 goto multiple_charsets;
10200 }
10201 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10202 }
10203 *charset = c;
10204 break;
10205 case DEPENDS_PAT_MOD:
10206 if (*charset) {
10207 goto multiple_charsets;
10208 }
10209 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10210 *charset = c;
10211 break;
10212 }
10213
10214 (*s)++;
10215 return TRUE;
10216
10217 multiple_charsets:
10218 if (*charset != c) {
10219 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10220 }
10221 else if (c == 'a') {
10222 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10223 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10224 }
10225 else {
10226 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10227 }
10228
10229 /* Pretend that it worked, so will continue processing before dieing */
10230 (*s)++;
10231 return TRUE;
10232 }
10233
10234 STATIC char *
S_scan_pat(pTHX_ char * start,I32 type)10235 S_scan_pat(pTHX_ char *start, I32 type)
10236 {
10237 PMOP *pm;
10238 char *s;
10239 const char * const valid_flags =
10240 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10241 char charset = '\0'; /* character set modifier */
10242 unsigned int x_mod_count = 0;
10243
10244 PERL_ARGS_ASSERT_SCAN_PAT;
10245
10246 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10247 if (!s)
10248 Perl_croak(aTHX_ "Search pattern not terminated");
10249
10250 pm = (PMOP*)newPMOP(type, 0);
10251 if (PL_multi_open == '?') {
10252 /* This is the only point in the code that sets PMf_ONCE: */
10253 pm->op_pmflags |= PMf_ONCE;
10254
10255 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10256 allows us to restrict the list needed by reset to just the ??
10257 matches. */
10258 assert(type != OP_TRANS);
10259 if (PL_curstash) {
10260 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10261 U32 elements;
10262 if (!mg) {
10263 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10264 0);
10265 }
10266 elements = mg->mg_len / sizeof(PMOP**);
10267 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10268 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10269 mg->mg_len = elements * sizeof(PMOP**);
10270 PmopSTASH_set(pm,PL_curstash);
10271 }
10272 }
10273
10274 /* if qr/...(?{..}).../, then need to parse the pattern within a new
10275 * anon CV. False positives like qr/[(?{]/ are harmless */
10276
10277 if (type == OP_QR) {
10278 STRLEN len;
10279 char *e, *p = SvPV(PL_lex_stuff, len);
10280 e = p + len;
10281 for (; p < e; p++) {
10282 if (p[0] == '(' && p[1] == '?'
10283 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10284 {
10285 pm->op_pmflags |= PMf_HAS_CV;
10286 break;
10287 }
10288 }
10289 pm->op_pmflags |= PMf_IS_QR;
10290 }
10291
10292 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10293 &s, &charset, &x_mod_count))
10294 {};
10295 /* issue a warning if /c is specified,but /g is not */
10296 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10297 {
10298 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10299 "Use of /c modifier is meaningless without /g" );
10300 }
10301
10302 PL_lex_op = (OP*)pm;
10303 pl_yylval.ival = OP_MATCH;
10304 return s;
10305 }
10306
10307 STATIC char *
S_scan_subst(pTHX_ char * start)10308 S_scan_subst(pTHX_ char *start)
10309 {
10310 char *s;
10311 PMOP *pm;
10312 I32 first_start;
10313 line_t first_line;
10314 line_t linediff = 0;
10315 I32 es = 0;
10316 char charset = '\0'; /* character set modifier */
10317 unsigned int x_mod_count = 0;
10318 char *t;
10319
10320 PERL_ARGS_ASSERT_SCAN_SUBST;
10321
10322 pl_yylval.ival = OP_NULL;
10323
10324 s = scan_str(start, TRUE, FALSE, FALSE, &t);
10325
10326 if (!s)
10327 Perl_croak(aTHX_ "Substitution pattern not terminated");
10328
10329 s = t;
10330
10331 first_start = PL_multi_start;
10332 first_line = CopLINE(PL_curcop);
10333 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10334 if (!s) {
10335 SvREFCNT_dec_NN(PL_lex_stuff);
10336 PL_lex_stuff = NULL;
10337 Perl_croak(aTHX_ "Substitution replacement not terminated");
10338 }
10339 PL_multi_start = first_start; /* so whole substitution is taken together */
10340
10341 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10342
10343
10344 while (*s) {
10345 if (*s == EXEC_PAT_MOD) {
10346 s++;
10347 es++;
10348 }
10349 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10350 &s, &charset, &x_mod_count))
10351 {
10352 break;
10353 }
10354 }
10355
10356 if ((pm->op_pmflags & PMf_CONTINUE)) {
10357 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10358 }
10359
10360 if (es) {
10361 SV * const repl = newSVpvs("");
10362
10363 PL_multi_end = 0;
10364 pm->op_pmflags |= PMf_EVAL;
10365 for (; es > 1; es--) {
10366 sv_catpvs(repl, "eval ");
10367 }
10368 sv_catpvs(repl, "do {");
10369 sv_catsv(repl, PL_parser->lex_sub_repl);
10370 sv_catpvs(repl, "}");
10371 SvREFCNT_dec(PL_parser->lex_sub_repl);
10372 PL_parser->lex_sub_repl = repl;
10373 }
10374
10375
10376 linediff = CopLINE(PL_curcop) - first_line;
10377 if (linediff)
10378 CopLINE_set(PL_curcop, first_line);
10379
10380 if (linediff || es) {
10381 /* the IVX field indicates that the replacement string is a s///e;
10382 * the NVX field indicates how many src code lines the replacement
10383 * spreads over */
10384 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10385 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10386 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10387 cBOOL(es);
10388 }
10389
10390 PL_lex_op = (OP*)pm;
10391 pl_yylval.ival = OP_SUBST;
10392 return s;
10393 }
10394
10395 STATIC char *
S_scan_trans(pTHX_ char * start)10396 S_scan_trans(pTHX_ char *start)
10397 {
10398 char* s;
10399 OP *o;
10400 U8 squash;
10401 U8 del;
10402 U8 complement;
10403 bool nondestruct = 0;
10404 char *t;
10405
10406 PERL_ARGS_ASSERT_SCAN_TRANS;
10407
10408 pl_yylval.ival = OP_NULL;
10409
10410 s = scan_str(start,FALSE,FALSE,FALSE,&t);
10411 if (!s)
10412 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10413
10414 s = t;
10415
10416 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10417 if (!s) {
10418 SvREFCNT_dec_NN(PL_lex_stuff);
10419 PL_lex_stuff = NULL;
10420 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10421 }
10422
10423 complement = del = squash = 0;
10424 while (1) {
10425 switch (*s) {
10426 case 'c':
10427 complement = OPpTRANS_COMPLEMENT;
10428 break;
10429 case 'd':
10430 del = OPpTRANS_DELETE;
10431 break;
10432 case 's':
10433 squash = OPpTRANS_SQUASH;
10434 break;
10435 case 'r':
10436 nondestruct = 1;
10437 break;
10438 default:
10439 goto no_more;
10440 }
10441 s++;
10442 }
10443 no_more:
10444
10445 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10446 o->op_private &= ~OPpTRANS_ALL;
10447 o->op_private |= del|squash|complement;
10448
10449 PL_lex_op = o;
10450 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10451
10452
10453 return s;
10454 }
10455
10456 /* scan_heredoc
10457 Takes a pointer to the first < in <<FOO.
10458 Returns a pointer to the byte following <<FOO.
10459
10460 This function scans a heredoc, which involves different methods
10461 depending on whether we are in a string eval, quoted construct, etc.
10462 This is because PL_linestr could containing a single line of input, or
10463 a whole string being evalled, or the contents of the current quote-
10464 like operator.
10465
10466 The two basic methods are:
10467 - Steal lines from the input stream
10468 - Scan the heredoc in PL_linestr and remove it therefrom
10469
10470 In a file scope or filtered eval, the first method is used; in a
10471 string eval, the second.
10472
10473 In a quote-like operator, we have to choose between the two,
10474 depending on where we can find a newline. We peek into outer lex-
10475 ing scopes until we find one with a newline in it. If we reach the
10476 outermost lexing scope and it is a file, we use the stream method.
10477 Otherwise it is treated as an eval.
10478 */
10479
10480 STATIC char *
S_scan_heredoc(pTHX_ char * s)10481 S_scan_heredoc(pTHX_ char *s)
10482 {
10483 I32 op_type = OP_SCALAR;
10484 I32 len;
10485 SV *tmpstr;
10486 char term;
10487 char *d;
10488 char *e;
10489 char *peek;
10490 char *indent = 0;
10491 I32 indent_len = 0;
10492 bool indented = FALSE;
10493 const bool infile = PL_rsfp || PL_parser->filtered;
10494 const line_t origline = CopLINE(PL_curcop);
10495 LEXSHARED *shared = PL_parser->lex_shared;
10496
10497 PERL_ARGS_ASSERT_SCAN_HEREDOC;
10498
10499 s += 2;
10500 d = PL_tokenbuf + 1;
10501 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10502 *PL_tokenbuf = '\n';
10503 peek = s;
10504
10505 if (*peek == '~') {
10506 indented = TRUE;
10507 peek++; s++;
10508 }
10509
10510 while (SPACE_OR_TAB(*peek))
10511 peek++;
10512
10513 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10514 s = peek;
10515 term = *s++;
10516 s = delimcpy(d, e, s, PL_bufend, term, &len);
10517 if (s == PL_bufend)
10518 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10519 d += len;
10520 s++;
10521 }
10522 else {
10523 if (*s == '\\')
10524 /* <<\FOO is equivalent to <<'FOO' */
10525 s++, term = '\'';
10526 else
10527 term = '"';
10528
10529 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10530 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10531
10532 peek = s;
10533
10534 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10535 peek += UTF ? UTF8SKIP(peek) : 1;
10536 }
10537
10538 len = (peek - s >= e - d) ? (e - d) : (peek - s);
10539 Copy(s, d, len, char);
10540 s += len;
10541 d += len;
10542 }
10543
10544 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10545 Perl_croak(aTHX_ "Delimiter for here document is too long");
10546
10547 *d++ = '\n';
10548 *d = '\0';
10549 len = d - PL_tokenbuf;
10550
10551 #ifndef PERL_STRICT_CR
10552 d = (char *) memchr(s, '\r', PL_bufend - s);
10553 if (d) {
10554 char * const olds = s;
10555 s = d;
10556 while (s < PL_bufend) {
10557 if (*s == '\r') {
10558 *d++ = '\n';
10559 if (*++s == '\n')
10560 s++;
10561 }
10562 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10563 *d++ = *s++;
10564 s++;
10565 }
10566 else
10567 *d++ = *s++;
10568 }
10569 *d = '\0';
10570 PL_bufend = d;
10571 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10572 s = olds;
10573 }
10574 #endif
10575
10576 tmpstr = newSV_type(SVt_PVIV);
10577 SvGROW(tmpstr, 80);
10578 if (term == '\'') {
10579 op_type = OP_CONST;
10580 SvIV_set(tmpstr, -1);
10581 }
10582 else if (term == '`') {
10583 op_type = OP_BACKTICK;
10584 SvIV_set(tmpstr, '\\');
10585 }
10586
10587 PL_multi_start = origline + 1 + PL_parser->herelines;
10588 PL_multi_open = PL_multi_close = '<';
10589
10590 /* inside a string eval or quote-like operator */
10591 if (!infile || PL_lex_inwhat) {
10592 SV *linestr;
10593 char *bufend;
10594 char * const olds = s;
10595 PERL_CONTEXT * const cx = CX_CUR();
10596 /* These two fields are not set until an inner lexing scope is
10597 entered. But we need them set here. */
10598 shared->ls_bufptr = s;
10599 shared->ls_linestr = PL_linestr;
10600
10601 if (PL_lex_inwhat) {
10602 /* Look for a newline. If the current buffer does not have one,
10603 peek into the line buffer of the parent lexing scope, going
10604 up as many levels as necessary to find one with a newline
10605 after bufptr.
10606 */
10607 while (!(s = (char *)memchr(
10608 (void *)shared->ls_bufptr, '\n',
10609 SvEND(shared->ls_linestr)-shared->ls_bufptr
10610 )))
10611 {
10612 shared = shared->ls_prev;
10613 /* shared is only null if we have gone beyond the outermost
10614 lexing scope. In a file, we will have broken out of the
10615 loop in the previous iteration. In an eval, the string buf-
10616 fer ends with "\n;", so the while condition above will have
10617 evaluated to false. So shared can never be null. Or so you
10618 might think. Odd syntax errors like s;@{<<; can gobble up
10619 the implicit semicolon at the end of a flie, causing the
10620 file handle to be closed even when we are not in a string
10621 eval. So shared may be null in that case.
10622 (Closing '>>}' here to balance the earlier open brace for
10623 editors that look for matched pairs.) */
10624 if (UNLIKELY(!shared))
10625 goto interminable;
10626 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10627 most lexing scope. In a file, shared->ls_linestr at that
10628 level is just one line, so there is no body to steal. */
10629 if (infile && !shared->ls_prev) {
10630 s = olds;
10631 goto streaming;
10632 }
10633 }
10634 }
10635 else { /* eval or we've already hit EOF */
10636 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10637 if (!s)
10638 goto interminable;
10639 }
10640
10641 linestr = shared->ls_linestr;
10642 bufend = SvEND(linestr);
10643 d = s;
10644 if (indented) {
10645 char *myolds = s;
10646
10647 while (s < bufend - len + 1) {
10648 if (*s++ == '\n')
10649 ++PL_parser->herelines;
10650
10651 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10652 char *backup = s;
10653 indent_len = 0;
10654
10655 /* Only valid if it's preceded by whitespace only */
10656 while (backup != myolds && --backup >= myolds) {
10657 if (! SPACE_OR_TAB(*backup)) {
10658 break;
10659 }
10660 indent_len++;
10661 }
10662
10663 /* No whitespace or all! */
10664 if (backup == s || *backup == '\n') {
10665 Newx(indent, indent_len + 1, char);
10666 memcpy(indent, backup + 1, indent_len);
10667 indent[indent_len] = 0;
10668 s--; /* before our delimiter */
10669 PL_parser->herelines--; /* this line doesn't count */
10670 break;
10671 }
10672 }
10673 }
10674 }
10675 else {
10676 while (s < bufend - len + 1
10677 && memNE(s,PL_tokenbuf,len) )
10678 {
10679 if (*s++ == '\n')
10680 ++PL_parser->herelines;
10681 }
10682 }
10683
10684 if (s >= bufend - len + 1) {
10685 goto interminable;
10686 }
10687
10688 sv_setpvn(tmpstr,d+1,s-d);
10689 s += len - 1;
10690 /* the preceding stmt passes a newline */
10691 PL_parser->herelines++;
10692
10693 /* s now points to the newline after the heredoc terminator.
10694 d points to the newline before the body of the heredoc.
10695 */
10696
10697 /* We are going to modify linestr in place here, so set
10698 aside copies of the string if necessary for re-evals or
10699 (caller $n)[6]. */
10700 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10701 check shared->re_eval_str. */
10702 if (shared->re_eval_start || shared->re_eval_str) {
10703 /* Set aside the rest of the regexp */
10704 if (!shared->re_eval_str)
10705 shared->re_eval_str =
10706 newSVpvn(shared->re_eval_start,
10707 bufend - shared->re_eval_start);
10708 shared->re_eval_start -= s-d;
10709 }
10710
10711 if (cxstack_ix >= 0
10712 && CxTYPE(cx) == CXt_EVAL
10713 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10714 && cx->blk_eval.cur_text == linestr)
10715 {
10716 cx->blk_eval.cur_text = newSVsv(linestr);
10717 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10718 }
10719
10720 /* Copy everything from s onwards back to d. */
10721 Move(s,d,bufend-s + 1,char);
10722 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10723 /* Setting PL_bufend only applies when we have not dug deeper
10724 into other scopes, because sublex_done sets PL_bufend to
10725 SvEND(PL_linestr). */
10726 if (shared == PL_parser->lex_shared)
10727 PL_bufend = SvEND(linestr);
10728 s = olds;
10729 }
10730 else {
10731 SV *linestr_save;
10732 char *oldbufptr_save;
10733 char *oldoldbufptr_save;
10734 streaming:
10735 SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
10736 term = PL_tokenbuf[1];
10737 len--;
10738 linestr_save = PL_linestr; /* must restore this afterwards */
10739 d = s; /* and this */
10740 oldbufptr_save = PL_oldbufptr;
10741 oldoldbufptr_save = PL_oldoldbufptr;
10742 PL_linestr = newSVpvs("");
10743 PL_bufend = SvPVX(PL_linestr);
10744
10745 while (1) {
10746 PL_bufptr = PL_bufend;
10747 CopLINE_set(PL_curcop,
10748 origline + 1 + PL_parser->herelines);
10749
10750 if ( !lex_next_chunk(LEX_NO_TERM)
10751 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10752 {
10753 /* Simply freeing linestr_save might seem simpler here, as it
10754 does not matter what PL_linestr points to, since we are
10755 about to croak; but in a quote-like op, linestr_save
10756 will have been prospectively freed already, via
10757 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10758 restore PL_linestr. */
10759 SvREFCNT_dec_NN(PL_linestr);
10760 PL_linestr = linestr_save;
10761 PL_oldbufptr = oldbufptr_save;
10762 PL_oldoldbufptr = oldoldbufptr_save;
10763 goto interminable;
10764 }
10765
10766 CopLINE_set(PL_curcop, origline);
10767
10768 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10769 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10770 /* ^That should be enough to avoid this needing to grow: */
10771 sv_catpvs(PL_linestr, "\n\0");
10772 assert(s == SvPVX(PL_linestr));
10773 PL_bufend = SvEND(PL_linestr);
10774 }
10775
10776 s = PL_bufptr;
10777 PL_parser->herelines++;
10778 PL_last_lop = PL_last_uni = NULL;
10779
10780 #ifndef PERL_STRICT_CR
10781 if (PL_bufend - PL_linestart >= 2) {
10782 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10783 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10784 {
10785 PL_bufend[-2] = '\n';
10786 PL_bufend--;
10787 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10788 }
10789 else if (PL_bufend[-1] == '\r')
10790 PL_bufend[-1] = '\n';
10791 }
10792 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10793 PL_bufend[-1] = '\n';
10794 #endif
10795
10796 if (indented && (PL_bufend-s) >= len) {
10797 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10798
10799 if (found) {
10800 char *backup = found;
10801 indent_len = 0;
10802
10803 /* Only valid if it's preceded by whitespace only */
10804 while (backup != s && --backup >= s) {
10805 if (! SPACE_OR_TAB(*backup)) {
10806 break;
10807 }
10808 indent_len++;
10809 }
10810
10811 /* All whitespace or none! */
10812 if (backup == found || SPACE_OR_TAB(*backup)) {
10813 Newx(indent, indent_len + 1, char);
10814 memcpy(indent, backup, indent_len);
10815 indent[indent_len] = 0;
10816 SvREFCNT_dec(PL_linestr);
10817 PL_linestr = linestr_save;
10818 PL_linestart = SvPVX(linestr_save);
10819 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10820 PL_oldbufptr = oldbufptr_save;
10821 PL_oldoldbufptr = oldoldbufptr_save;
10822 s = d;
10823 break;
10824 }
10825 }
10826
10827 /* Didn't find it */
10828 sv_catsv(tmpstr,PL_linestr);
10829 }
10830 else {
10831 if (*s == term && PL_bufend-s >= len
10832 && memEQ(s,PL_tokenbuf + 1,len))
10833 {
10834 SvREFCNT_dec(PL_linestr);
10835 PL_linestr = linestr_save;
10836 PL_linestart = SvPVX(linestr_save);
10837 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10838 PL_oldbufptr = oldbufptr_save;
10839 PL_oldoldbufptr = oldoldbufptr_save;
10840 s = d;
10841 break;
10842 }
10843 else {
10844 sv_catsv(tmpstr,PL_linestr);
10845 }
10846 }
10847 } /* while (1) */
10848 }
10849
10850 PL_multi_end = origline + PL_parser->herelines;
10851
10852 if (indented && indent) {
10853 STRLEN linecount = 1;
10854 STRLEN herelen = SvCUR(tmpstr);
10855 char *ss = SvPVX(tmpstr);
10856 char *se = ss + herelen;
10857 SV *newstr = newSV(herelen+1);
10858 SvPOK_on(newstr);
10859
10860 /* Trim leading whitespace */
10861 while (ss < se) {
10862 /* newline only? Copy and move on */
10863 if (*ss == '\n') {
10864 sv_catpvs(newstr,"\n");
10865 ss++;
10866 linecount++;
10867
10868 /* Found our indentation? Strip it */
10869 }
10870 else if (se - ss >= indent_len
10871 && memEQ(ss, indent, indent_len))
10872 {
10873 STRLEN le = 0;
10874 ss += indent_len;
10875
10876 while ((ss + le) < se && *(ss + le) != '\n')
10877 le++;
10878
10879 sv_catpvn(newstr, ss, le);
10880 ss += le;
10881
10882 /* Line doesn't begin with our indentation? Croak */
10883 }
10884 else {
10885 Safefree(indent);
10886 Perl_croak(aTHX_
10887 "Indentation on line %d of here-doc doesn't match delimiter",
10888 (int)linecount
10889 );
10890 }
10891 } /* while */
10892
10893 /* avoid sv_setsv() as we dont wan't to COW here */
10894 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10895 Safefree(indent);
10896 SvREFCNT_dec_NN(newstr);
10897 }
10898
10899 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10900 SvPV_shrink_to_cur(tmpstr);
10901 }
10902
10903 if (!IN_BYTES) {
10904 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10905 SvUTF8_on(tmpstr);
10906 }
10907
10908 PL_lex_stuff = tmpstr;
10909 pl_yylval.ival = op_type;
10910 return s;
10911
10912 interminable:
10913 if (indent)
10914 Safefree(indent);
10915 SvREFCNT_dec(tmpstr);
10916 CopLINE_set(PL_curcop, origline);
10917 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
10918 }
10919
10920
10921 /* scan_inputsymbol
10922 takes: position of first '<' in input buffer
10923 returns: position of first char following the matching '>' in
10924 input buffer
10925 side-effects: pl_yylval and lex_op are set.
10926
10927 This code handles:
10928
10929 <> read from ARGV
10930 <<>> read from ARGV without magic open
10931 <FH> read from filehandle
10932 <pkg::FH> read from package qualified filehandle
10933 <pkg'FH> read from package qualified filehandle
10934 <$fh> read from filehandle in $fh
10935 <*.h> filename glob
10936
10937 */
10938
10939 STATIC char *
S_scan_inputsymbol(pTHX_ char * start)10940 S_scan_inputsymbol(pTHX_ char *start)
10941 {
10942 char *s = start; /* current position in buffer */
10943 char *end;
10944 I32 len;
10945 bool nomagicopen = FALSE;
10946 char *d = PL_tokenbuf; /* start of temp holding space */
10947 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10948
10949 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10950
10951 end = (char *) memchr(s, '\n', PL_bufend - s);
10952 if (!end)
10953 end = PL_bufend;
10954 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10955 nomagicopen = TRUE;
10956 *d = '\0';
10957 len = 0;
10958 s += 3;
10959 }
10960 else
10961 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10962
10963 /* die if we didn't have space for the contents of the <>,
10964 or if it didn't end, or if we see a newline
10965 */
10966
10967 if (len >= (I32)sizeof PL_tokenbuf)
10968 Perl_croak(aTHX_ "Excessively long <> operator");
10969 if (s >= end)
10970 Perl_croak(aTHX_ "Unterminated <> operator");
10971
10972 s++;
10973
10974 /* check for <$fh>
10975 Remember, only scalar variables are interpreted as filehandles by
10976 this code. Anything more complex (e.g., <$fh{$num}>) will be
10977 treated as a glob() call.
10978 This code makes use of the fact that except for the $ at the front,
10979 a scalar variable and a filehandle look the same.
10980 */
10981 if (*d == '$' && d[1]) d++;
10982
10983 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10984 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10985 d += UTF ? UTF8SKIP(d) : 1;
10986 }
10987
10988 /* If we've tried to read what we allow filehandles to look like, and
10989 there's still text left, then it must be a glob() and not a getline.
10990 Use scan_str to pull out the stuff between the <> and treat it
10991 as nothing more than a string.
10992 */
10993
10994 if (d - PL_tokenbuf != len) {
10995 pl_yylval.ival = OP_GLOB;
10996 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10997 if (!s)
10998 Perl_croak(aTHX_ "Glob not terminated");
10999 return s;
11000 }
11001 else {
11002 bool readline_overriden = FALSE;
11003 GV *gv_readline;
11004 /* we're in a filehandle read situation */
11005 d = PL_tokenbuf;
11006
11007 /* turn <> into <ARGV> */
11008 if (!len)
11009 Copy("ARGV",d,5,char);
11010
11011 /* Check whether readline() is overriden */
11012 if ((gv_readline = gv_override("readline",8)))
11013 readline_overriden = TRUE;
11014
11015 /* if <$fh>, create the ops to turn the variable into a
11016 filehandle
11017 */
11018 if (*d == '$') {
11019 /* try to find it in the pad for this block, otherwise find
11020 add symbol table ops
11021 */
11022 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11023 if (tmp != NOT_IN_PAD) {
11024 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11025 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11026 HEK * const stashname = HvNAME_HEK(stash);
11027 SV * const sym = sv_2mortal(newSVhek(stashname));
11028 sv_catpvs(sym, "::");
11029 sv_catpv(sym, d+1);
11030 d = SvPVX(sym);
11031 goto intro_sym;
11032 }
11033 else {
11034 OP * const o = newOP(OP_PADSV, 0);
11035 o->op_targ = tmp;
11036 PL_lex_op = readline_overriden
11037 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11038 op_append_elem(OP_LIST, o,
11039 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11040 : newUNOP(OP_READLINE, 0, o);
11041 }
11042 }
11043 else {
11044 GV *gv;
11045 ++d;
11046 intro_sym:
11047 gv = gv_fetchpv(d,
11048 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11049 SVt_PV);
11050 PL_lex_op = readline_overriden
11051 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11052 op_append_elem(OP_LIST,
11053 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11054 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11055 : newUNOP(OP_READLINE, 0,
11056 newUNOP(OP_RV2SV, 0,
11057 newGVOP(OP_GV, 0, gv)));
11058 }
11059 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11060 pl_yylval.ival = OP_NULL;
11061 }
11062
11063 /* If it's none of the above, it must be a literal filehandle
11064 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11065 else {
11066 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11067 PL_lex_op = readline_overriden
11068 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11069 op_append_elem(OP_LIST,
11070 newGVOP(OP_GV, 0, gv),
11071 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11072 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11073 pl_yylval.ival = OP_NULL;
11074 }
11075 }
11076
11077 return s;
11078 }
11079
11080
11081 /* scan_str
11082 takes:
11083 start position in buffer
11084 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
11085 only if they are of the open/close form
11086 keep_delims preserve the delimiters around the string
11087 re_reparse compiling a run-time /(?{})/:
11088 collapse // to /, and skip encoding src
11089 delimp if non-null, this is set to the position of
11090 the closing delimiter, or just after it if
11091 the closing and opening delimiters differ
11092 (i.e., the opening delimiter of a substitu-
11093 tion replacement)
11094 returns: position to continue reading from buffer
11095 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11096 updates the read buffer.
11097
11098 This subroutine pulls a string out of the input. It is called for:
11099 q single quotes q(literal text)
11100 ' single quotes 'literal text'
11101 qq double quotes qq(interpolate $here please)
11102 " double quotes "interpolate $here please"
11103 qx backticks qx(/bin/ls -l)
11104 ` backticks `/bin/ls -l`
11105 qw quote words @EXPORT_OK = qw( func() $spam )
11106 m// regexp match m/this/
11107 s/// regexp substitute s/this/that/
11108 tr/// string transliterate tr/this/that/
11109 y/// string transliterate y/this/that/
11110 ($*@) sub prototypes sub foo ($)
11111 (stuff) sub attr parameters sub foo : attr(stuff)
11112 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11113
11114 In most of these cases (all but <>, patterns and transliterate)
11115 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11116 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11117 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11118 calls scan_str().
11119
11120 It skips whitespace before the string starts, and treats the first
11121 character as the delimiter. If the delimiter is one of ([{< then
11122 the corresponding "close" character )]}> is used as the closing
11123 delimiter. It allows quoting of delimiters, and if the string has
11124 balanced delimiters ([{<>}]) it allows nesting.
11125
11126 On success, the SV with the resulting string is put into lex_stuff or,
11127 if that is already non-NULL, into lex_repl. The second case occurs only
11128 when parsing the RHS of the special constructs s/// and tr/// (y///).
11129 For convenience, the terminating delimiter character is stuffed into
11130 SvIVX of the SV.
11131 */
11132
11133 char *
Perl_scan_str(pTHX_ char * start,int keep_bracketed_quoted,int keep_delims,int re_reparse,char ** delimp)11134 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11135 char **delimp
11136 )
11137 {
11138 SV *sv; /* scalar value: string */
11139 const char *tmps; /* temp string, used for delimiter matching */
11140 char *s = start; /* current position in the buffer */
11141 char term; /* terminating character */
11142 char *to; /* current position in the sv's data */
11143 I32 brackets = 1; /* bracket nesting level */
11144 bool d_is_utf8 = FALSE; /* is there any utf8 content? */
11145 IV termcode; /* terminating char. code */
11146 U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
11147 STRLEN termlen; /* length of terminating string */
11148 line_t herelines;
11149
11150 /* The delimiters that have a mirror-image closing one */
11151 const char * opening_delims = "([{<";
11152 const char * closing_delims = ")]}>";
11153
11154 /* The only non-UTF character that isn't a stand alone grapheme is
11155 * white-space, hence can't be a delimiter. */
11156 const char * non_grapheme_msg = "Use of unassigned code point or"
11157 " non-standalone grapheme for a delimiter"
11158 " is not allowed";
11159 PERL_ARGS_ASSERT_SCAN_STR;
11160
11161 /* skip space before the delimiter */
11162 if (isSPACE(*s)) {
11163 s = skipspace(s);
11164 }
11165
11166 /* mark where we are, in case we need to report errors */
11167 CLINE;
11168
11169 /* after skipping whitespace, the next character is the terminator */
11170 term = *s;
11171 if (!UTF || UTF8_IS_INVARIANT(term)) {
11172 termcode = termstr[0] = term;
11173 termlen = 1;
11174 }
11175 else {
11176 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
11177 if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
11178 (U8 *) s,
11179 (U8 *) PL_bufend,
11180 termcode)))
11181 {
11182 yyerror(non_grapheme_msg);
11183 }
11184
11185 Copy(s, termstr, termlen, U8);
11186 }
11187
11188 /* mark where we are */
11189 PL_multi_start = CopLINE(PL_curcop);
11190 PL_multi_open = termcode;
11191 herelines = PL_parser->herelines;
11192
11193 /* If the delimiter has a mirror-image closing one, get it */
11194 if (term && (tmps = strchr(opening_delims, term))) {
11195 termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
11196 }
11197
11198 PL_multi_close = termcode;
11199
11200 if (PL_multi_open == PL_multi_close) {
11201 keep_bracketed_quoted = FALSE;
11202 }
11203
11204 /* create a new SV to hold the contents. 79 is the SV's initial length.
11205 What a random number. */
11206 sv = newSV_type(SVt_PVIV);
11207 SvGROW(sv, 80);
11208 SvIV_set(sv, termcode);
11209 (void)SvPOK_only(sv); /* validate pointer */
11210
11211 /* move past delimiter and try to read a complete string */
11212 if (keep_delims)
11213 sv_catpvn(sv, s, termlen);
11214 s += termlen;
11215 for (;;) {
11216 /* extend sv if need be */
11217 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11218 /* set 'to' to the next character in the sv's string */
11219 to = SvPVX(sv)+SvCUR(sv);
11220
11221 /* if open delimiter is the close delimiter read unbridle */
11222 if (PL_multi_open == PL_multi_close) {
11223 for (; s < PL_bufend; s++,to++) {
11224 /* embedded newlines increment the current line number */
11225 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11226 COPLINE_INC_WITH_HERELINES;
11227 /* handle quoted delimiters */
11228 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11229 if (!keep_bracketed_quoted
11230 && (s[1] == term
11231 || (re_reparse && s[1] == '\\'))
11232 )
11233 s++;
11234 else /* any other quotes are simply copied straight through */
11235 *to++ = *s++;
11236 }
11237 /* terminate when run out of buffer (the for() condition), or
11238 have found the terminator */
11239 else if (*s == term) { /* First byte of terminator matches */
11240 if (termlen == 1) /* If is the only byte, are done */
11241 break;
11242
11243 /* If the remainder of the terminator matches, also are
11244 * done, after checking that is a separate grapheme */
11245 if ( s + termlen <= PL_bufend
11246 && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
11247 {
11248 if ( UTF
11249 && UNLIKELY(! is_grapheme((U8 *) start,
11250 (U8 *) s,
11251 (U8 *) PL_bufend,
11252 termcode)))
11253 {
11254 yyerror(non_grapheme_msg);
11255 }
11256 break;
11257 }
11258 }
11259 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
11260 d_is_utf8 = TRUE;
11261 }
11262
11263 *to = *s;
11264 }
11265 }
11266
11267 /* if the terminator isn't the same as the start character (e.g.,
11268 matched brackets), we have to allow more in the quoting, and
11269 be prepared for nested brackets.
11270 */
11271 else {
11272 /* read until we run out of string, or we find the terminator */
11273 for (; s < PL_bufend; s++,to++) {
11274 /* embedded newlines increment the line count */
11275 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11276 COPLINE_INC_WITH_HERELINES;
11277 /* backslashes can escape the open or closing characters */
11278 if (*s == '\\' && s+1 < PL_bufend) {
11279 if (!keep_bracketed_quoted
11280 && ( ((UV)s[1] == PL_multi_open)
11281 || ((UV)s[1] == PL_multi_close) ))
11282 {
11283 s++;
11284 }
11285 else
11286 *to++ = *s++;
11287 }
11288 /* allow nested opens and closes */
11289 else if ((UV)*s == PL_multi_close && --brackets <= 0)
11290 break;
11291 else if ((UV)*s == PL_multi_open)
11292 brackets++;
11293 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11294 d_is_utf8 = TRUE;
11295 *to = *s;
11296 }
11297 }
11298 /* terminate the copied string and update the sv's end-of-string */
11299 *to = '\0';
11300 SvCUR_set(sv, to - SvPVX_const(sv));
11301
11302 /*
11303 * this next chunk reads more into the buffer if we're not done yet
11304 */
11305
11306 if (s < PL_bufend)
11307 break; /* handle case where we are done yet :-) */
11308
11309 #ifndef PERL_STRICT_CR
11310 if (to - SvPVX_const(sv) >= 2) {
11311 if ( (to[-2] == '\r' && to[-1] == '\n')
11312 || (to[-2] == '\n' && to[-1] == '\r'))
11313 {
11314 to[-2] = '\n';
11315 to--;
11316 SvCUR_set(sv, to - SvPVX_const(sv));
11317 }
11318 else if (to[-1] == '\r')
11319 to[-1] = '\n';
11320 }
11321 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11322 to[-1] = '\n';
11323 #endif
11324
11325 /* if we're out of file, or a read fails, bail and reset the current
11326 line marker so we can report where the unterminated string began
11327 */
11328 COPLINE_INC_WITH_HERELINES;
11329 PL_bufptr = PL_bufend;
11330 if (!lex_next_chunk(0)) {
11331 sv_free(sv);
11332 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11333 return NULL;
11334 }
11335 s = start = PL_bufptr;
11336 }
11337
11338 /* at this point, we have successfully read the delimited string */
11339
11340 if (keep_delims)
11341 sv_catpvn(sv, s, termlen);
11342 s += termlen;
11343
11344 if (d_is_utf8)
11345 SvUTF8_on(sv);
11346
11347 PL_multi_end = CopLINE(PL_curcop);
11348 CopLINE_set(PL_curcop, PL_multi_start);
11349 PL_parser->herelines = herelines;
11350
11351 /* if we allocated too much space, give some back */
11352 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11353 SvLEN_set(sv, SvCUR(sv) + 1);
11354 SvPV_renew(sv, SvLEN(sv));
11355 }
11356
11357 /* decide whether this is the first or second quoted string we've read
11358 for this op
11359 */
11360
11361 if (PL_lex_stuff)
11362 PL_parser->lex_sub_repl = sv;
11363 else
11364 PL_lex_stuff = sv;
11365 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
11366 return s;
11367 }
11368
11369 /*
11370 scan_num
11371 takes: pointer to position in buffer
11372 returns: pointer to new position in buffer
11373 side-effects: builds ops for the constant in pl_yylval.op
11374
11375 Read a number in any of the formats that Perl accepts:
11376
11377 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11378 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11379 0b[01](_?[01])* binary integers
11380 0[0-7](_?[0-7])* octal integers
11381 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
11382 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
11383
11384 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11385 thing it reads.
11386
11387 If it reads a number without a decimal point or an exponent, it will
11388 try converting the number to an integer and see if it can do so
11389 without loss of precision.
11390 */
11391
11392 char *
Perl_scan_num(pTHX_ const char * start,YYSTYPE * lvalp)11393 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11394 {
11395 const char *s = start; /* current position in buffer */
11396 char *d; /* destination in temp buffer */
11397 char *e; /* end of temp buffer */
11398 NV nv; /* number read, as a double */
11399 SV *sv = NULL; /* place to put the converted number */
11400 bool floatit; /* boolean: int or float? */
11401 const char *lastub = NULL; /* position of last underbar */
11402 static const char* const number_too_long = "Number too long";
11403 bool warned_about_underscore = 0;
11404 I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11405 #define WARN_ABOUT_UNDERSCORE() \
11406 do { \
11407 if (!warned_about_underscore) { \
11408 warned_about_underscore = 1; \
11409 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11410 "Misplaced _ in number"); \
11411 } \
11412 } while(0)
11413 /* Hexadecimal floating point.
11414 *
11415 * In many places (where we have quads and NV is IEEE 754 double)
11416 * we can fit the mantissa bits of a NV into an unsigned quad.
11417 * (Note that UVs might not be quads even when we have quads.)
11418 * This will not work everywhere, though (either no quads, or
11419 * using long doubles), in which case we have to resort to NV,
11420 * which will probably mean horrible loss of precision due to
11421 * multiple fp operations. */
11422 bool hexfp = FALSE;
11423 int total_bits = 0;
11424 int significant_bits = 0;
11425 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11426 # define HEXFP_UQUAD
11427 Uquad_t hexfp_uquad = 0;
11428 int hexfp_frac_bits = 0;
11429 #else
11430 # define HEXFP_NV
11431 NV hexfp_nv = 0.0;
11432 #endif
11433 NV hexfp_mult = 1.0;
11434 UV high_non_zero = 0; /* highest digit */
11435 int non_zero_integer_digits = 0;
11436
11437 PERL_ARGS_ASSERT_SCAN_NUM;
11438
11439 /* We use the first character to decide what type of number this is */
11440
11441 switch (*s) {
11442 default:
11443 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11444
11445 /* if it starts with a 0, it could be an octal number, a decimal in
11446 0.13 disguise, or a hexadecimal number, or a binary number. */
11447 case '0':
11448 {
11449 /* variables:
11450 u holds the "number so far"
11451 overflowed was the number more than we can hold?
11452
11453 Shift is used when we add a digit. It also serves as an "are
11454 we in octal/hex/binary?" indicator to disallow hex characters
11455 when in octal mode.
11456 */
11457 NV n = 0.0;
11458 UV u = 0;
11459 bool overflowed = FALSE;
11460 bool just_zero = TRUE; /* just plain 0 or binary number? */
11461 bool has_digs = FALSE;
11462 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11463 static const char* const bases[5] =
11464 { "", "binary", "", "octal", "hexadecimal" };
11465 static const char* const Bases[5] =
11466 { "", "Binary", "", "Octal", "Hexadecimal" };
11467 static const char* const maxima[5] =
11468 { "",
11469 "0b11111111111111111111111111111111",
11470 "",
11471 "037777777777",
11472 "0xffffffff" };
11473 const char *base, *Base, *max;
11474
11475 /* check for hex */
11476 if (isALPHA_FOLD_EQ(s[1], 'x')) {
11477 shift = 4;
11478 s += 2;
11479 just_zero = FALSE;
11480 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11481 shift = 1;
11482 s += 2;
11483 just_zero = FALSE;
11484 }
11485 /* check for a decimal in disguise */
11486 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11487 goto decimal;
11488 /* so it must be octal */
11489 else {
11490 shift = 3;
11491 s++;
11492 }
11493
11494 if (*s == '_') {
11495 WARN_ABOUT_UNDERSCORE();
11496 lastub = s++;
11497 }
11498
11499 base = bases[shift];
11500 Base = Bases[shift];
11501 max = maxima[shift];
11502
11503 /* read the rest of the number */
11504 for (;;) {
11505 /* x is used in the overflow test,
11506 b is the digit we're adding on. */
11507 UV x, b;
11508
11509 switch (*s) {
11510
11511 /* if we don't mention it, we're done */
11512 default:
11513 goto out;
11514
11515 /* _ are ignored -- but warned about if consecutive */
11516 case '_':
11517 if (lastub && s == lastub + 1)
11518 WARN_ABOUT_UNDERSCORE();
11519 lastub = s++;
11520 break;
11521
11522 /* 8 and 9 are not octal */
11523 case '8': case '9':
11524 if (shift == 3)
11525 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11526 /* FALLTHROUGH */
11527
11528 /* octal digits */
11529 case '2': case '3': case '4':
11530 case '5': case '6': case '7':
11531 if (shift == 1)
11532 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11533 /* FALLTHROUGH */
11534
11535 case '0': case '1':
11536 b = *s++ & 15; /* ASCII digit -> value of digit */
11537 goto digit;
11538
11539 /* hex digits */
11540 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11541 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11542 /* make sure they said 0x */
11543 if (shift != 4)
11544 goto out;
11545 b = (*s++ & 7) + 9;
11546
11547 /* Prepare to put the digit we have onto the end
11548 of the number so far. We check for overflows.
11549 */
11550
11551 digit:
11552 just_zero = FALSE;
11553 has_digs = TRUE;
11554 if (!overflowed) {
11555 assert(shift >= 0);
11556 x = u << shift; /* make room for the digit */
11557
11558 total_bits += shift;
11559
11560 if ((x >> shift) != u
11561 && !(PL_hints & HINT_NEW_BINARY)) {
11562 overflowed = TRUE;
11563 n = (NV) u;
11564 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11565 "Integer overflow in %s number",
11566 base);
11567 } else
11568 u = x | b; /* add the digit to the end */
11569 }
11570 if (overflowed) {
11571 n *= nvshift[shift];
11572 /* If an NV has not enough bits in its
11573 * mantissa to represent an UV this summing of
11574 * small low-order numbers is a waste of time
11575 * (because the NV cannot preserve the
11576 * low-order bits anyway): we could just
11577 * remember when did we overflow and in the
11578 * end just multiply n by the right
11579 * amount. */
11580 n += (NV) b;
11581 }
11582
11583 if (high_non_zero == 0 && b > 0)
11584 high_non_zero = b;
11585
11586 if (high_non_zero)
11587 non_zero_integer_digits++;
11588
11589 /* this could be hexfp, but peek ahead
11590 * to avoid matching ".." */
11591 if (UNLIKELY(HEXFP_PEEK(s))) {
11592 goto out;
11593 }
11594
11595 break;
11596 }
11597 }
11598
11599 /* if we get here, we had success: make a scalar value from
11600 the number.
11601 */
11602 out:
11603
11604 /* final misplaced underbar check */
11605 if (s[-1] == '_')
11606 WARN_ABOUT_UNDERSCORE();
11607
11608 if (UNLIKELY(HEXFP_PEEK(s))) {
11609 /* Do sloppy (on the underbars) but quick detection
11610 * (and value construction) for hexfp, the decimal
11611 * detection will shortly be more thorough with the
11612 * underbar checks. */
11613 const char* h = s;
11614 significant_bits = non_zero_integer_digits * shift;
11615 #ifdef HEXFP_UQUAD
11616 hexfp_uquad = u;
11617 #else /* HEXFP_NV */
11618 hexfp_nv = u;
11619 #endif
11620 /* Ignore the leading zero bits of
11621 * the high (first) non-zero digit. */
11622 if (high_non_zero) {
11623 if (high_non_zero < 0x8)
11624 significant_bits--;
11625 if (high_non_zero < 0x4)
11626 significant_bits--;
11627 if (high_non_zero < 0x2)
11628 significant_bits--;
11629 }
11630
11631 if (*h == '.') {
11632 #ifdef HEXFP_NV
11633 NV nv_mult = 1.0;
11634 #endif
11635 bool accumulate = TRUE;
11636 U8 b;
11637 int lim = 1 << shift;
11638 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11639 *h == '_'); h++) {
11640 if (isXDIGIT(*h)) {
11641 significant_bits += shift;
11642 #ifdef HEXFP_UQUAD
11643 if (accumulate) {
11644 if (significant_bits < NV_MANT_DIG) {
11645 /* We are in the long "run" of xdigits,
11646 * accumulate the full four bits. */
11647 assert(shift >= 0);
11648 hexfp_uquad <<= shift;
11649 hexfp_uquad |= b;
11650 hexfp_frac_bits += shift;
11651 } else if (significant_bits - shift < NV_MANT_DIG) {
11652 /* We are at a hexdigit either at,
11653 * or straddling, the edge of mantissa.
11654 * We will try grabbing as many as
11655 * possible bits. */
11656 int tail =
11657 significant_bits - NV_MANT_DIG;
11658 if (tail <= 0)
11659 tail += shift;
11660 assert(tail >= 0);
11661 hexfp_uquad <<= tail;
11662 assert((shift - tail) >= 0);
11663 hexfp_uquad |= b >> (shift - tail);
11664 hexfp_frac_bits += tail;
11665
11666 /* Ignore the trailing zero bits
11667 * of the last non-zero xdigit.
11668 *
11669 * The assumption here is that if
11670 * one has input of e.g. the xdigit
11671 * eight (0x8), there is only one
11672 * bit being input, not the full
11673 * four bits. Conversely, if one
11674 * specifies a zero xdigit, the
11675 * assumption is that one really
11676 * wants all those bits to be zero. */
11677 if (b) {
11678 if ((b & 0x1) == 0x0) {
11679 significant_bits--;
11680 if ((b & 0x2) == 0x0) {
11681 significant_bits--;
11682 if ((b & 0x4) == 0x0) {
11683 significant_bits--;
11684 }
11685 }
11686 }
11687 }
11688
11689 accumulate = FALSE;
11690 }
11691 } else {
11692 /* Keep skipping the xdigits, and
11693 * accumulating the significant bits,
11694 * but do not shift the uquad
11695 * (which would catastrophically drop
11696 * high-order bits) or accumulate the
11697 * xdigits anymore. */
11698 }
11699 #else /* HEXFP_NV */
11700 if (accumulate) {
11701 nv_mult /= nvshift[shift];
11702 if (nv_mult > 0.0)
11703 hexfp_nv += b * nv_mult;
11704 else
11705 accumulate = FALSE;
11706 }
11707 #endif
11708 }
11709 if (significant_bits >= NV_MANT_DIG)
11710 accumulate = FALSE;
11711 }
11712 }
11713
11714 if ((total_bits > 0 || significant_bits > 0) &&
11715 isALPHA_FOLD_EQ(*h, 'p')) {
11716 bool negexp = FALSE;
11717 h++;
11718 if (*h == '+')
11719 h++;
11720 else if (*h == '-') {
11721 negexp = TRUE;
11722 h++;
11723 }
11724 if (isDIGIT(*h)) {
11725 I32 hexfp_exp = 0;
11726 while (isDIGIT(*h) || *h == '_') {
11727 if (isDIGIT(*h)) {
11728 hexfp_exp *= 10;
11729 hexfp_exp += *h - '0';
11730 #ifdef NV_MIN_EXP
11731 if (negexp
11732 && -hexfp_exp < NV_MIN_EXP - 1) {
11733 /* NOTE: this means that the exponent
11734 * underflow warning happens for
11735 * the IEEE 754 subnormals (denormals),
11736 * because DBL_MIN_EXP etc are the lowest
11737 * possible binary (or, rather, DBL_RADIX-base)
11738 * exponent for normals, not subnormals.
11739 *
11740 * This may or may not be a good thing. */
11741 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11742 "Hexadecimal float: exponent underflow");
11743 break;
11744 }
11745 #endif
11746 #ifdef NV_MAX_EXP
11747 if (!negexp
11748 && hexfp_exp > NV_MAX_EXP - 1) {
11749 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11750 "Hexadecimal float: exponent overflow");
11751 break;
11752 }
11753 #endif
11754 }
11755 h++;
11756 }
11757 if (negexp)
11758 hexfp_exp = -hexfp_exp;
11759 #ifdef HEXFP_UQUAD
11760 hexfp_exp -= hexfp_frac_bits;
11761 #endif
11762 hexfp_mult = Perl_pow(2.0, hexfp_exp);
11763 hexfp = TRUE;
11764 goto decimal;
11765 }
11766 }
11767 }
11768
11769 if (shift != 3 && !has_digs) {
11770 /* 0x or 0b with no digits, treat it as an error.
11771 Originally this backed up the parse before the b or
11772 x, but that has the potential for silent changes in
11773 behaviour, like for: "0x.3" and "0x+$foo".
11774 */
11775 const char *d = s;
11776 char *oldbp = PL_bufptr;
11777 if (*d) ++d; /* so the user sees the bad non-digit */
11778 PL_bufptr = (char *)d; /* so yyerror reports the context */
11779 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
11780 shift == 4 ? "hexadecimal" : "binary"));
11781 PL_bufptr = oldbp;
11782 }
11783
11784 if (overflowed) {
11785 if (n > 4294967295.0)
11786 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11787 "%s number > %s non-portable",
11788 Base, max);
11789 sv = newSVnv(n);
11790 }
11791 else {
11792 #if UVSIZE > 4
11793 if (u > 0xffffffff)
11794 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11795 "%s number > %s non-portable",
11796 Base, max);
11797 #endif
11798 sv = newSVuv(u);
11799 }
11800 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11801 sv = new_constant(start, s - start, "integer",
11802 sv, NULL, NULL, 0, NULL);
11803 else if (PL_hints & HINT_NEW_BINARY)
11804 sv = new_constant(start, s - start, "binary",
11805 sv, NULL, NULL, 0, NULL);
11806 }
11807 break;
11808
11809 /*
11810 handle decimal numbers.
11811 we're also sent here when we read a 0 as the first digit
11812 */
11813 case '1': case '2': case '3': case '4': case '5':
11814 case '6': case '7': case '8': case '9': case '.':
11815 decimal:
11816 d = PL_tokenbuf;
11817 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11818 floatit = FALSE;
11819 if (hexfp) {
11820 floatit = TRUE;
11821 *d++ = '0';
11822 switch (shift) {
11823 case 4:
11824 *d++ = 'x';
11825 s = start + 2;
11826 break;
11827 case 3:
11828 s = start + 1;
11829 break;
11830 case 1:
11831 *d++ = 'b';
11832 s = start + 2;
11833 break;
11834 default:
11835 NOT_REACHED; /* NOTREACHED */
11836 }
11837 }
11838
11839 /* read next group of digits and _ and copy into d */
11840 while (isDIGIT(*s)
11841 || *s == '_'
11842 || UNLIKELY(hexfp && isXDIGIT(*s)))
11843 {
11844 /* skip underscores, checking for misplaced ones
11845 if -w is on
11846 */
11847 if (*s == '_') {
11848 if (lastub && s == lastub + 1)
11849 WARN_ABOUT_UNDERSCORE();
11850 lastub = s++;
11851 }
11852 else {
11853 /* check for end of fixed-length buffer */
11854 if (d >= e)
11855 Perl_croak(aTHX_ "%s", number_too_long);
11856 /* if we're ok, copy the character */
11857 *d++ = *s++;
11858 }
11859 }
11860
11861 /* final misplaced underbar check */
11862 if (lastub && s == lastub + 1)
11863 WARN_ABOUT_UNDERSCORE();
11864
11865 /* read a decimal portion if there is one. avoid
11866 3..5 being interpreted as the number 3. followed
11867 by .5
11868 */
11869 if (*s == '.' && s[1] != '.') {
11870 floatit = TRUE;
11871 *d++ = *s++;
11872
11873 if (*s == '_') {
11874 WARN_ABOUT_UNDERSCORE();
11875 lastub = s;
11876 }
11877
11878 /* copy, ignoring underbars, until we run out of digits.
11879 */
11880 for (; isDIGIT(*s)
11881 || *s == '_'
11882 || UNLIKELY(hexfp && isXDIGIT(*s));
11883 s++)
11884 {
11885 /* fixed length buffer check */
11886 if (d >= e)
11887 Perl_croak(aTHX_ "%s", number_too_long);
11888 if (*s == '_') {
11889 if (lastub && s == lastub + 1)
11890 WARN_ABOUT_UNDERSCORE();
11891 lastub = s;
11892 }
11893 else
11894 *d++ = *s;
11895 }
11896 /* fractional part ending in underbar? */
11897 if (s[-1] == '_')
11898 WARN_ABOUT_UNDERSCORE();
11899 if (*s == '.' && isDIGIT(s[1])) {
11900 /* oops, it's really a v-string, but without the "v" */
11901 s = start;
11902 goto vstring;
11903 }
11904 }
11905
11906 /* read exponent part, if present */
11907 if ((isALPHA_FOLD_EQ(*s, 'e')
11908 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11909 && memCHRs("+-0123456789_", s[1]))
11910 {
11911 int exp_digits = 0;
11912 const char *save_s = s;
11913 char * save_d = d;
11914
11915 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11916 ditto for p (hexfloats) */
11917 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11918 /* At least some Mach atof()s don't grok 'E' */
11919 *d++ = 'e';
11920 }
11921 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11922 *d++ = 'p';
11923 }
11924
11925 s++;
11926
11927
11928 /* stray preinitial _ */
11929 if (*s == '_') {
11930 WARN_ABOUT_UNDERSCORE();
11931 lastub = s++;
11932 }
11933
11934 /* allow positive or negative exponent */
11935 if (*s == '+' || *s == '-')
11936 *d++ = *s++;
11937
11938 /* stray initial _ */
11939 if (*s == '_') {
11940 WARN_ABOUT_UNDERSCORE();
11941 lastub = s++;
11942 }
11943
11944 /* read digits of exponent */
11945 while (isDIGIT(*s) || *s == '_') {
11946 if (isDIGIT(*s)) {
11947 ++exp_digits;
11948 if (d >= e)
11949 Perl_croak(aTHX_ "%s", number_too_long);
11950 *d++ = *s++;
11951 }
11952 else {
11953 if (((lastub && s == lastub + 1)
11954 || (!isDIGIT(s[1]) && s[1] != '_')))
11955 WARN_ABOUT_UNDERSCORE();
11956 lastub = s++;
11957 }
11958 }
11959
11960 if (!exp_digits) {
11961 /* no exponent digits, the [eEpP] could be for something else,
11962 * though in practice we don't get here for p since that's preparsed
11963 * earlier, and results in only the 0xX being consumed, so behave similarly
11964 * for decimal floats and consume only the D.DD, leaving the [eE] to the
11965 * next token.
11966 */
11967 s = save_s;
11968 d = save_d;
11969 }
11970 else {
11971 floatit = TRUE;
11972 }
11973 }
11974
11975
11976 /*
11977 We try to do an integer conversion first if no characters
11978 indicating "float" have been found.
11979 */
11980
11981 if (!floatit) {
11982 UV uv;
11983 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11984
11985 if (flags == IS_NUMBER_IN_UV) {
11986 if (uv <= IV_MAX)
11987 sv = newSViv(uv); /* Prefer IVs over UVs. */
11988 else
11989 sv = newSVuv(uv);
11990 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11991 if (uv <= (UV) IV_MIN)
11992 sv = newSViv(-(IV)uv);
11993 else
11994 floatit = TRUE;
11995 } else
11996 floatit = TRUE;
11997 }
11998 if (floatit) {
11999 /* terminate the string */
12000 *d = '\0';
12001 if (UNLIKELY(hexfp)) {
12002 # ifdef NV_MANT_DIG
12003 if (significant_bits > NV_MANT_DIG)
12004 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12005 "Hexadecimal float: mantissa overflow");
12006 # endif
12007 #ifdef HEXFP_UQUAD
12008 nv = hexfp_uquad * hexfp_mult;
12009 #else /* HEXFP_NV */
12010 nv = hexfp_nv * hexfp_mult;
12011 #endif
12012 } else {
12013 nv = Atof(PL_tokenbuf);
12014 }
12015 sv = newSVnv(nv);
12016 }
12017
12018 if ( floatit
12019 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12020 const char *const key = floatit ? "float" : "integer";
12021 const STRLEN keylen = floatit ? 5 : 7;
12022 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12023 key, keylen, sv, NULL, NULL, 0, NULL);
12024 }
12025 break;
12026
12027 /* if it starts with a v, it could be a v-string */
12028 case 'v':
12029 vstring:
12030 sv = newSV(5); /* preallocate storage space */
12031 ENTER_with_name("scan_vstring");
12032 SAVEFREESV(sv);
12033 s = scan_vstring(s, PL_bufend, sv);
12034 SvREFCNT_inc_simple_void_NN(sv);
12035 LEAVE_with_name("scan_vstring");
12036 break;
12037 }
12038
12039 /* make the op for the constant and return */
12040
12041 if (sv)
12042 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12043 else
12044 lvalp->opval = NULL;
12045
12046 return (char *)s;
12047 }
12048
12049 STATIC char *
S_scan_formline(pTHX_ char * s)12050 S_scan_formline(pTHX_ char *s)
12051 {
12052 SV * const stuff = newSVpvs("");
12053 bool needargs = FALSE;
12054 bool eofmt = FALSE;
12055
12056 PERL_ARGS_ASSERT_SCAN_FORMLINE;
12057
12058 while (!needargs) {
12059 char *eol;
12060 if (*s == '.') {
12061 char *t = s+1;
12062 #ifdef PERL_STRICT_CR
12063 while (SPACE_OR_TAB(*t))
12064 t++;
12065 #else
12066 while (SPACE_OR_TAB(*t) || *t == '\r')
12067 t++;
12068 #endif
12069 if (*t == '\n' || t == PL_bufend) {
12070 eofmt = TRUE;
12071 break;
12072 }
12073 }
12074 eol = (char *) memchr(s,'\n',PL_bufend-s);
12075 if (!eol++)
12076 eol = PL_bufend;
12077 if (*s != '#') {
12078 char *t;
12079 for (t = s; t < eol; t++) {
12080 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12081 needargs = FALSE;
12082 goto enough; /* ~~ must be first line in formline */
12083 }
12084 if (*t == '@' || *t == '^')
12085 needargs = TRUE;
12086 }
12087 if (eol > s) {
12088 sv_catpvn(stuff, s, eol-s);
12089 #ifndef PERL_STRICT_CR
12090 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12091 char *end = SvPVX(stuff) + SvCUR(stuff);
12092 end[-2] = '\n';
12093 end[-1] = '\0';
12094 SvCUR_set(stuff, SvCUR(stuff) - 1);
12095 }
12096 #endif
12097 }
12098 else
12099 break;
12100 }
12101 s = (char*)eol;
12102 if ((PL_rsfp || PL_parser->filtered)
12103 && PL_parser->form_lex_state == LEX_NORMAL) {
12104 bool got_some;
12105 PL_bufptr = PL_bufend;
12106 COPLINE_INC_WITH_HERELINES;
12107 got_some = lex_next_chunk(0);
12108 CopLINE_dec(PL_curcop);
12109 s = PL_bufptr;
12110 if (!got_some)
12111 break;
12112 }
12113 incline(s, PL_bufend);
12114 }
12115 enough:
12116 if (!SvCUR(stuff) || needargs)
12117 PL_lex_state = PL_parser->form_lex_state;
12118 if (SvCUR(stuff)) {
12119 PL_expect = XSTATE;
12120 if (needargs) {
12121 const char *s2 = s;
12122 while (isSPACE(*s2) && *s2 != '\n')
12123 s2++;
12124 if (*s2 == '{') {
12125 PL_expect = XTERMBLOCK;
12126 NEXTVAL_NEXTTOKE.ival = 0;
12127 force_next(DO);
12128 }
12129 NEXTVAL_NEXTTOKE.ival = 0;
12130 force_next(FORMLBRACK);
12131 }
12132 if (!IN_BYTES) {
12133 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12134 SvUTF8_on(stuff);
12135 }
12136 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12137 force_next(THING);
12138 }
12139 else {
12140 SvREFCNT_dec(stuff);
12141 if (eofmt)
12142 PL_lex_formbrack = 0;
12143 }
12144 return s;
12145 }
12146
12147 I32
Perl_start_subparse(pTHX_ I32 is_format,U32 flags)12148 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12149 {
12150 const I32 oldsavestack_ix = PL_savestack_ix;
12151 CV* const outsidecv = PL_compcv;
12152
12153 SAVEI32(PL_subline);
12154 save_item(PL_subname);
12155 SAVESPTR(PL_compcv);
12156
12157 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12158 CvFLAGS(PL_compcv) |= flags;
12159
12160 PL_subline = CopLINE(PL_curcop);
12161 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12162 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12163 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12164 if (outsidecv && CvPADLIST(outsidecv))
12165 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12166
12167 return oldsavestack_ix;
12168 }
12169
12170
12171 /* Do extra initialisation of a CV (typically one just created by
12172 * start_subparse()) if that CV is for a named sub
12173 */
12174
12175 void
Perl_init_named_cv(pTHX_ CV * cv,OP * nameop)12176 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12177 {
12178 PERL_ARGS_ASSERT_INIT_NAMED_CV;
12179
12180 if (nameop->op_type == OP_CONST) {
12181 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12182 if ( strEQ(name, "BEGIN")
12183 || strEQ(name, "END")
12184 || strEQ(name, "INIT")
12185 || strEQ(name, "CHECK")
12186 || strEQ(name, "UNITCHECK")
12187 )
12188 CvSPECIAL_on(cv);
12189 }
12190 else
12191 /* State subs inside anonymous subs need to be
12192 clonable themselves. */
12193 if ( CvANON(CvOUTSIDE(cv))
12194 || CvCLONE(CvOUTSIDE(cv))
12195 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12196 CvOUTSIDE(cv)
12197 ))[nameop->op_targ])
12198 )
12199 CvCLONE_on(cv);
12200 }
12201
12202
12203 static int
S_yywarn(pTHX_ const char * const s,U32 flags)12204 S_yywarn(pTHX_ const char *const s, U32 flags)
12205 {
12206 PERL_ARGS_ASSERT_YYWARN;
12207
12208 PL_in_eval |= EVAL_WARNONLY;
12209 yyerror_pv(s, flags);
12210 return 0;
12211 }
12212
12213 void
Perl_abort_execution(pTHX_ const char * const msg,const char * const name)12214 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12215 {
12216 PERL_ARGS_ASSERT_ABORT_EXECUTION;
12217
12218 if (PL_minus_c)
12219 Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12220 else {
12221 Perl_croak(aTHX_
12222 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12223 }
12224 NOT_REACHED; /* NOTREACHED */
12225 }
12226
12227 void
Perl_yyquit(pTHX)12228 Perl_yyquit(pTHX)
12229 {
12230 /* Called, after at least one error has been found, to abort the parse now,
12231 * instead of trying to forge ahead */
12232
12233 yyerror_pvn(NULL, 0, 0);
12234 }
12235
12236 int
Perl_yyerror(pTHX_ const char * const s)12237 Perl_yyerror(pTHX_ const char *const s)
12238 {
12239 PERL_ARGS_ASSERT_YYERROR;
12240 return yyerror_pvn(s, strlen(s), 0);
12241 }
12242
12243 int
Perl_yyerror_pv(pTHX_ const char * const s,U32 flags)12244 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12245 {
12246 PERL_ARGS_ASSERT_YYERROR_PV;
12247 return yyerror_pvn(s, strlen(s), flags);
12248 }
12249
12250 int
Perl_yyerror_pvn(pTHX_ const char * const s,STRLEN len,U32 flags)12251 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12252 {
12253 const char *context = NULL;
12254 int contlen = -1;
12255 SV *msg;
12256 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12257 int yychar = PL_parser->yychar;
12258
12259 /* Output error message 's' with length 'len'. 'flags' are SV flags that
12260 * apply. If the number of errors found is large enough, it abandons
12261 * parsing. If 's' is NULL, there is no message, and it abandons
12262 * processing unconditionally */
12263
12264 if (s != NULL) {
12265 if (!yychar || (yychar == ';' && !PL_rsfp))
12266 sv_catpvs(where_sv, "at EOF");
12267 else if ( PL_oldoldbufptr
12268 && PL_bufptr > PL_oldoldbufptr
12269 && PL_bufptr - PL_oldoldbufptr < 200
12270 && PL_oldoldbufptr != PL_oldbufptr
12271 && PL_oldbufptr != PL_bufptr)
12272 {
12273 /*
12274 Only for NetWare:
12275 The code below is removed for NetWare because it
12276 abends/crashes on NetWare when the script has error such as
12277 not having the closing quotes like:
12278 if ($var eq "value)
12279 Checking of white spaces is anyway done in NetWare code.
12280 */
12281 #ifndef NETWARE
12282 while (isSPACE(*PL_oldoldbufptr))
12283 PL_oldoldbufptr++;
12284 #endif
12285 context = PL_oldoldbufptr;
12286 contlen = PL_bufptr - PL_oldoldbufptr;
12287 }
12288 else if ( PL_oldbufptr
12289 && PL_bufptr > PL_oldbufptr
12290 && PL_bufptr - PL_oldbufptr < 200
12291 && PL_oldbufptr != PL_bufptr) {
12292 /*
12293 Only for NetWare:
12294 The code below is removed for NetWare because it
12295 abends/crashes on NetWare when the script has error such as
12296 not having the closing quotes like:
12297 if ($var eq "value)
12298 Checking of white spaces is anyway done in NetWare code.
12299 */
12300 #ifndef NETWARE
12301 while (isSPACE(*PL_oldbufptr))
12302 PL_oldbufptr++;
12303 #endif
12304 context = PL_oldbufptr;
12305 contlen = PL_bufptr - PL_oldbufptr;
12306 }
12307 else if (yychar > 255)
12308 sv_catpvs(where_sv, "next token ???");
12309 else if (yychar == YYEMPTY) {
12310 if (PL_lex_state == LEX_NORMAL)
12311 sv_catpvs(where_sv, "at end of line");
12312 else if (PL_lex_inpat)
12313 sv_catpvs(where_sv, "within pattern");
12314 else
12315 sv_catpvs(where_sv, "within string");
12316 }
12317 else {
12318 sv_catpvs(where_sv, "next char ");
12319 if (yychar < 32)
12320 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12321 else if (isPRINT_LC(yychar)) {
12322 const char string = yychar;
12323 sv_catpvn(where_sv, &string, 1);
12324 }
12325 else
12326 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12327 }
12328 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12329 Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12330 OutCopFILE(PL_curcop),
12331 (IV)(PL_parser->preambling == NOLINE
12332 ? CopLINE(PL_curcop)
12333 : PL_parser->preambling));
12334 if (context)
12335 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12336 UTF8fARG(UTF, contlen, context));
12337 else
12338 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12339 if ( PL_multi_start < PL_multi_end
12340 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12341 {
12342 Perl_sv_catpvf(aTHX_ msg,
12343 " (Might be a runaway multi-line %c%c string starting on"
12344 " line %" IVdf ")\n",
12345 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12346 PL_multi_end = 0;
12347 }
12348 if (PL_in_eval & EVAL_WARNONLY) {
12349 PL_in_eval &= ~EVAL_WARNONLY;
12350 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12351 }
12352 else {
12353 qerror(msg);
12354 }
12355 }
12356 if (s == NULL || PL_error_count >= 10) {
12357 const char * msg = "";
12358 const char * const name = OutCopFILE(PL_curcop);
12359
12360 if (PL_in_eval) {
12361 SV * errsv = ERRSV;
12362 if (SvCUR(errsv)) {
12363 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12364 }
12365 }
12366
12367 if (s == NULL) {
12368 abort_execution(msg, name);
12369 }
12370 else {
12371 Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12372 }
12373 }
12374 PL_in_my = 0;
12375 PL_in_my_stash = NULL;
12376 return 0;
12377 }
12378
12379 STATIC char*
S_swallow_bom(pTHX_ U8 * s)12380 S_swallow_bom(pTHX_ U8 *s)
12381 {
12382 const STRLEN slen = SvCUR(PL_linestr);
12383
12384 PERL_ARGS_ASSERT_SWALLOW_BOM;
12385
12386 switch (s[0]) {
12387 case 0xFF:
12388 if (s[1] == 0xFE) {
12389 /* UTF-16 little-endian? (or UTF-32LE?) */
12390 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12391 /* diag_listed_as: Unsupported script encoding %s */
12392 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12393 #ifndef PERL_NO_UTF16_FILTER
12394 #ifdef DEBUGGING
12395 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12396 #endif
12397 s += 2;
12398 if (PL_bufend > (char*)s) {
12399 s = add_utf16_textfilter(s, TRUE);
12400 }
12401 #else
12402 /* diag_listed_as: Unsupported script encoding %s */
12403 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12404 #endif
12405 }
12406 break;
12407 case 0xFE:
12408 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12409 #ifndef PERL_NO_UTF16_FILTER
12410 #ifdef DEBUGGING
12411 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12412 #endif
12413 s += 2;
12414 if (PL_bufend > (char *)s) {
12415 s = add_utf16_textfilter(s, FALSE);
12416 }
12417 #else
12418 /* diag_listed_as: Unsupported script encoding %s */
12419 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12420 #endif
12421 }
12422 break;
12423 case BOM_UTF8_FIRST_BYTE: {
12424 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12425 #ifdef DEBUGGING
12426 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12427 #endif
12428 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */
12429 }
12430 break;
12431 }
12432 case 0:
12433 if (slen > 3) {
12434 if (s[1] == 0) {
12435 if (s[2] == 0xFE && s[3] == 0xFF) {
12436 /* UTF-32 big-endian */
12437 /* diag_listed_as: Unsupported script encoding %s */
12438 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12439 }
12440 }
12441 else if (s[2] == 0 && s[3] != 0) {
12442 /* Leading bytes
12443 * 00 xx 00 xx
12444 * are a good indicator of UTF-16BE. */
12445 #ifndef PERL_NO_UTF16_FILTER
12446 #ifdef DEBUGGING
12447 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12448 #endif
12449 s = add_utf16_textfilter(s, FALSE);
12450 #else
12451 /* diag_listed_as: Unsupported script encoding %s */
12452 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12453 #endif
12454 }
12455 }
12456 break;
12457
12458 default:
12459 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12460 /* Leading bytes
12461 * xx 00 xx 00
12462 * are a good indicator of UTF-16LE. */
12463 #ifndef PERL_NO_UTF16_FILTER
12464 #ifdef DEBUGGING
12465 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12466 #endif
12467 s = add_utf16_textfilter(s, TRUE);
12468 #else
12469 /* diag_listed_as: Unsupported script encoding %s */
12470 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12471 #endif
12472 }
12473 }
12474 return (char*)s;
12475 }
12476
12477
12478 #ifndef PERL_NO_UTF16_FILTER
12479 static I32
S_utf16_textfilter(pTHX_ int idx,SV * sv,int maxlen)12480 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12481 {
12482 SV *const filter = FILTER_DATA(idx);
12483 /* We re-use this each time round, throwing the contents away before we
12484 return. */
12485 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12486 SV *const utf8_buffer = filter;
12487 IV status = IoPAGE(filter);
12488 const bool reverse = cBOOL(IoLINES(filter));
12489 I32 retval;
12490
12491 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12492
12493 /* As we're automatically added, at the lowest level, and hence only called
12494 from this file, we can be sure that we're not called in block mode. Hence
12495 don't bother writing code to deal with block mode. */
12496 if (maxlen) {
12497 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12498 }
12499 if (status < 0) {
12500 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12501 }
12502 DEBUG_P(PerlIO_printf(Perl_debug_log,
12503 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12504 FPTR2DPTR(void *, S_utf16_textfilter),
12505 reverse ? 'l' : 'b', idx, maxlen, status,
12506 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12507
12508 while (1) {
12509 STRLEN chars;
12510 STRLEN have;
12511 Size_t newlen;
12512 U8 *end;
12513 /* First, look in our buffer of existing UTF-8 data: */
12514 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12515
12516 if (nl) {
12517 ++nl;
12518 } else if (status == 0) {
12519 /* EOF */
12520 IoPAGE(filter) = 0;
12521 nl = SvEND(utf8_buffer);
12522 }
12523 if (nl) {
12524 STRLEN got = nl - SvPVX(utf8_buffer);
12525 /* Did we have anything to append? */
12526 retval = got != 0;
12527 sv_catpvn(sv, SvPVX(utf8_buffer), got);
12528 /* Everything else in this code works just fine if SVp_POK isn't
12529 set. This, however, needs it, and we need it to work, else
12530 we loop infinitely because the buffer is never consumed. */
12531 sv_chop(utf8_buffer, nl);
12532 break;
12533 }
12534
12535 /* OK, not a complete line there, so need to read some more UTF-16.
12536 Read an extra octect if the buffer currently has an odd number. */
12537 while (1) {
12538 if (status <= 0)
12539 break;
12540 if (SvCUR(utf16_buffer) >= 2) {
12541 /* Location of the high octet of the last complete code point.
12542 Gosh, UTF-16 is a pain. All the benefits of variable length,
12543 *coupled* with all the benefits of partial reads and
12544 endianness. */
12545 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12546 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12547
12548 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12549 break;
12550 }
12551
12552 /* We have the first half of a surrogate. Read more. */
12553 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12554 }
12555
12556 status = FILTER_READ(idx + 1, utf16_buffer,
12557 160 + (SvCUR(utf16_buffer) & 1));
12558 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12559 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12560 if (status < 0) {
12561 /* Error */
12562 IoPAGE(filter) = status;
12563 return status;
12564 }
12565 }
12566
12567 /* 'chars' isn't quite the right name, as code points above 0xFFFF
12568 * require 4 bytes per char */
12569 chars = SvCUR(utf16_buffer) >> 1;
12570 have = SvCUR(utf8_buffer);
12571
12572 /* Assume the worst case size as noted by the functions: twice the
12573 * number of input bytes */
12574 SvGROW(utf8_buffer, have + chars * 4 + 1);
12575
12576 if (reverse) {
12577 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12578 (U8*)SvPVX_const(utf8_buffer) + have,
12579 chars * 2, &newlen);
12580 } else {
12581 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12582 (U8*)SvPVX_const(utf8_buffer) + have,
12583 chars * 2, &newlen);
12584 }
12585 SvCUR_set(utf8_buffer, have + newlen);
12586 *end = '\0';
12587
12588 /* No need to keep this SV "well-formed" with a '\0' after the end, as
12589 it's private to us, and utf16_to_utf8{,reversed} take a
12590 (pointer,length) pair, rather than a NUL-terminated string. */
12591 if(SvCUR(utf16_buffer) & 1) {
12592 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12593 SvCUR_set(utf16_buffer, 1);
12594 } else {
12595 SvCUR_set(utf16_buffer, 0);
12596 }
12597 }
12598 DEBUG_P(PerlIO_printf(Perl_debug_log,
12599 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12600 status,
12601 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12602 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12603 return retval;
12604 }
12605
12606 static U8 *
S_add_utf16_textfilter(pTHX_ U8 * const s,bool reversed)12607 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12608 {
12609 SV *filter = filter_add(S_utf16_textfilter, NULL);
12610
12611 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12612
12613 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12614 SvPVCLEAR(filter);
12615 IoLINES(filter) = reversed;
12616 IoPAGE(filter) = 1; /* Not EOF */
12617
12618 /* Sadly, we have to return a valid pointer, come what may, so we have to
12619 ignore any error return from this. */
12620 SvCUR_set(PL_linestr, 0);
12621 if (FILTER_READ(0, PL_linestr, 0)) {
12622 SvUTF8_on(PL_linestr);
12623 } else {
12624 SvUTF8_on(PL_linestr);
12625 }
12626 PL_bufend = SvEND(PL_linestr);
12627 return (U8*)SvPVX(PL_linestr);
12628 }
12629 #endif
12630
12631 /*
12632 Returns a pointer to the next character after the parsed
12633 vstring, as well as updating the passed in sv.
12634
12635 Function must be called like
12636
12637 sv = sv_2mortal(newSV(5));
12638 s = scan_vstring(s,e,sv);
12639
12640 where s and e are the start and end of the string.
12641 The sv should already be large enough to store the vstring
12642 passed in, for performance reasons.
12643
12644 This function may croak if fatal warnings are enabled in the
12645 calling scope, hence the sv_2mortal in the example (to prevent
12646 a leak). Make sure to do SvREFCNT_inc afterwards if you use
12647 sv_2mortal.
12648
12649 */
12650
12651 char *
Perl_scan_vstring(pTHX_ const char * s,const char * const e,SV * sv)12652 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12653 {
12654 const char *pos = s;
12655 const char *start = s;
12656
12657 PERL_ARGS_ASSERT_SCAN_VSTRING;
12658
12659 if (*pos == 'v') pos++; /* get past 'v' */
12660 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12661 pos++;
12662 if ( *pos != '.') {
12663 /* this may not be a v-string if followed by => */
12664 const char *next = pos;
12665 while (next < e && isSPACE(*next))
12666 ++next;
12667 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12668 /* return string not v-string */
12669 sv_setpvn(sv,(char *)s,pos-s);
12670 return (char *)pos;
12671 }
12672 }
12673
12674 if (!isALPHA(*pos)) {
12675 U8 tmpbuf[UTF8_MAXBYTES+1];
12676
12677 if (*s == 'v')
12678 s++; /* get past 'v' */
12679
12680 SvPVCLEAR(sv);
12681
12682 for (;;) {
12683 /* this is atoi() that tolerates underscores */
12684 U8 *tmpend;
12685 UV rev = 0;
12686 const char *end = pos;
12687 UV mult = 1;
12688 while (--end >= s) {
12689 if (*end != '_') {
12690 const UV orev = rev;
12691 rev += (*end - '0') * mult;
12692 mult *= 10;
12693 if (orev > rev)
12694 /* diag_listed_as: Integer overflow in %s number */
12695 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12696 "Integer overflow in decimal number");
12697 }
12698 }
12699
12700 /* Append native character for the rev point */
12701 tmpend = uvchr_to_utf8(tmpbuf, rev);
12702 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12703 if (!UVCHR_IS_INVARIANT(rev))
12704 SvUTF8_on(sv);
12705 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12706 s = ++pos;
12707 else {
12708 s = pos;
12709 break;
12710 }
12711 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12712 pos++;
12713 }
12714 SvPOK_on(sv);
12715 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12716 SvRMAGICAL_on(sv);
12717 }
12718 return (char *)s;
12719 }
12720
12721 int
Perl_keyword_plugin_standard(pTHX_ char * keyword_ptr,STRLEN keyword_len,OP ** op_ptr)12722 Perl_keyword_plugin_standard(pTHX_
12723 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12724 {
12725 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12726 PERL_UNUSED_CONTEXT;
12727 PERL_UNUSED_ARG(keyword_ptr);
12728 PERL_UNUSED_ARG(keyword_len);
12729 PERL_UNUSED_ARG(op_ptr);
12730 return KEYWORD_PLUGIN_DECLINE;
12731 }
12732
12733 /*
12734 =for apidoc wrap_keyword_plugin
12735
12736 Puts a C function into the chain of keyword plugins. This is the
12737 preferred way to manipulate the L</PL_keyword_plugin> variable.
12738 C<new_plugin> is a pointer to the C function that is to be added to the
12739 keyword plugin chain, and C<old_plugin_p> points to the storage location
12740 where a pointer to the next function in the chain will be stored. The
12741 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12742 while the value previously stored there is written to C<*old_plugin_p>.
12743
12744 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12745 to hook keyword parsing may find itself invoked more than once per
12746 process, typically in different threads. To handle that situation, this
12747 function is idempotent. The location C<*old_plugin_p> must initially
12748 (once per process) contain a null pointer. A C variable of static
12749 duration (declared at file scope, typically also marked C<static> to give
12750 it internal linkage) will be implicitly initialised appropriately, if it
12751 does not have an explicit initialiser. This function will only actually
12752 modify the plugin chain if it finds C<*old_plugin_p> to be null. This
12753 function is also thread safe on the small scale. It uses appropriate
12754 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12755
12756 When this function is called, the function referenced by C<new_plugin>
12757 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12758 In a threading situation, C<new_plugin> may be called immediately, even
12759 before this function has returned. C<*old_plugin_p> will always be
12760 appropriately set before C<new_plugin> is called. If C<new_plugin>
12761 decides not to do anything special with the identifier that it is given
12762 (which is the usual case for most calls to a keyword plugin), it must
12763 chain the plugin function referenced by C<*old_plugin_p>.
12764
12765 Taken all together, XS code to install a keyword plugin should typically
12766 look something like this:
12767
12768 static Perl_keyword_plugin_t next_keyword_plugin;
12769 static OP *my_keyword_plugin(pTHX_
12770 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12771 {
12772 if (memEQs(keyword_ptr, keyword_len,
12773 "my_new_keyword")) {
12774 ...
12775 } else {
12776 return next_keyword_plugin(aTHX_
12777 keyword_ptr, keyword_len, op_ptr);
12778 }
12779 }
12780 BOOT:
12781 wrap_keyword_plugin(my_keyword_plugin,
12782 &next_keyword_plugin);
12783
12784 Direct access to L</PL_keyword_plugin> should be avoided.
12785
12786 =cut
12787 */
12788
12789 void
Perl_wrap_keyword_plugin(pTHX_ Perl_keyword_plugin_t new_plugin,Perl_keyword_plugin_t * old_plugin_p)12790 Perl_wrap_keyword_plugin(pTHX_
12791 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12792 {
12793 dVAR;
12794
12795 PERL_UNUSED_CONTEXT;
12796 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12797 if (*old_plugin_p) return;
12798 KEYWORD_PLUGIN_MUTEX_LOCK;
12799 if (!*old_plugin_p) {
12800 *old_plugin_p = PL_keyword_plugin;
12801 PL_keyword_plugin = new_plugin;
12802 }
12803 KEYWORD_PLUGIN_MUTEX_UNLOCK;
12804 }
12805
12806 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12807 static void
S_parse_recdescent(pTHX_ int gramtype,I32 fakeeof)12808 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12809 {
12810 SAVEI32(PL_lex_brackets);
12811 if (PL_lex_brackets > 100)
12812 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12813 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12814 SAVEI32(PL_lex_allbrackets);
12815 PL_lex_allbrackets = 0;
12816 SAVEI8(PL_lex_fakeeof);
12817 PL_lex_fakeeof = (U8)fakeeof;
12818 if(yyparse(gramtype) && !PL_parser->error_count)
12819 qerror(Perl_mess(aTHX_ "Parse error"));
12820 }
12821
12822 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12823 static OP *
S_parse_recdescent_for_op(pTHX_ int gramtype,I32 fakeeof)12824 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12825 {
12826 OP *o;
12827 ENTER;
12828 SAVEVPTR(PL_eval_root);
12829 PL_eval_root = NULL;
12830 parse_recdescent(gramtype, fakeeof);
12831 o = PL_eval_root;
12832 LEAVE;
12833 return o;
12834 }
12835
12836 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12837 static OP *
S_parse_expr(pTHX_ I32 fakeeof,U32 flags)12838 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12839 {
12840 OP *exprop;
12841 if (flags & ~PARSE_OPTIONAL)
12842 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12843 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12844 if (!exprop && !(flags & PARSE_OPTIONAL)) {
12845 if (!PL_parser->error_count)
12846 qerror(Perl_mess(aTHX_ "Parse error"));
12847 exprop = newOP(OP_NULL, 0);
12848 }
12849 return exprop;
12850 }
12851
12852 /*
12853 =for apidoc parse_arithexpr
12854
12855 Parse a Perl arithmetic expression. This may contain operators of precedence
12856 down to the bit shift operators. The expression must be followed (and thus
12857 terminated) either by a comparison or lower-precedence operator or by
12858 something that would normally terminate an expression such as semicolon.
12859 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12860 otherwise it is mandatory. It is up to the caller to ensure that the
12861 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12862 the source of the code to be parsed and the lexical context for the
12863 expression.
12864
12865 The op tree representing the expression is returned. If an optional
12866 expression is absent, a null pointer is returned, otherwise the pointer
12867 will be non-null.
12868
12869 If an error occurs in parsing or compilation, in most cases a valid op
12870 tree is returned anyway. The error is reflected in the parser state,
12871 normally resulting in a single exception at the top level of parsing
12872 which covers all the compilation errors that occurred. Some compilation
12873 errors, however, will throw an exception immediately.
12874
12875 =for apidoc Amnh||PARSE_OPTIONAL
12876
12877 =cut
12878
12879 */
12880
12881 OP *
Perl_parse_arithexpr(pTHX_ U32 flags)12882 Perl_parse_arithexpr(pTHX_ U32 flags)
12883 {
12884 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12885 }
12886
12887 /*
12888 =for apidoc parse_termexpr
12889
12890 Parse a Perl term expression. This may contain operators of precedence
12891 down to the assignment operators. The expression must be followed (and thus
12892 terminated) either by a comma or lower-precedence operator or by
12893 something that would normally terminate an expression such as semicolon.
12894 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12895 otherwise it is mandatory. It is up to the caller to ensure that the
12896 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12897 the source of the code to be parsed and the lexical context for the
12898 expression.
12899
12900 The op tree representing the expression is returned. If an optional
12901 expression is absent, a null pointer is returned, otherwise the pointer
12902 will be non-null.
12903
12904 If an error occurs in parsing or compilation, in most cases a valid op
12905 tree is returned anyway. The error is reflected in the parser state,
12906 normally resulting in a single exception at the top level of parsing
12907 which covers all the compilation errors that occurred. Some compilation
12908 errors, however, will throw an exception immediately.
12909
12910 =cut
12911 */
12912
12913 OP *
Perl_parse_termexpr(pTHX_ U32 flags)12914 Perl_parse_termexpr(pTHX_ U32 flags)
12915 {
12916 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12917 }
12918
12919 /*
12920 =for apidoc parse_listexpr
12921
12922 Parse a Perl list expression. This may contain operators of precedence
12923 down to the comma operator. The expression must be followed (and thus
12924 terminated) either by a low-precedence logic operator such as C<or> or by
12925 something that would normally terminate an expression such as semicolon.
12926 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12927 otherwise it is mandatory. It is up to the caller to ensure that the
12928 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12929 the source of the code to be parsed and the lexical context for the
12930 expression.
12931
12932 The op tree representing the expression is returned. If an optional
12933 expression is absent, a null pointer is returned, otherwise the pointer
12934 will be non-null.
12935
12936 If an error occurs in parsing or compilation, in most cases a valid op
12937 tree is returned anyway. The error is reflected in the parser state,
12938 normally resulting in a single exception at the top level of parsing
12939 which covers all the compilation errors that occurred. Some compilation
12940 errors, however, will throw an exception immediately.
12941
12942 =cut
12943 */
12944
12945 OP *
Perl_parse_listexpr(pTHX_ U32 flags)12946 Perl_parse_listexpr(pTHX_ U32 flags)
12947 {
12948 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12949 }
12950
12951 /*
12952 =for apidoc parse_fullexpr
12953
12954 Parse a single complete Perl expression. This allows the full
12955 expression grammar, including the lowest-precedence operators such
12956 as C<or>. The expression must be followed (and thus terminated) by a
12957 token that an expression would normally be terminated by: end-of-file,
12958 closing bracketing punctuation, semicolon, or one of the keywords that
12959 signals a postfix expression-statement modifier. If C<flags> has the
12960 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12961 mandatory. It is up to the caller to ensure that the dynamic parser
12962 state (L</PL_parser> et al) is correctly set to reflect the source of
12963 the code to be parsed and the lexical context for the expression.
12964
12965 The op tree representing the expression is returned. If an optional
12966 expression is absent, a null pointer is returned, otherwise the pointer
12967 will be non-null.
12968
12969 If an error occurs in parsing or compilation, in most cases a valid op
12970 tree is returned anyway. The error is reflected in the parser state,
12971 normally resulting in a single exception at the top level of parsing
12972 which covers all the compilation errors that occurred. Some compilation
12973 errors, however, will throw an exception immediately.
12974
12975 =cut
12976 */
12977
12978 OP *
Perl_parse_fullexpr(pTHX_ U32 flags)12979 Perl_parse_fullexpr(pTHX_ U32 flags)
12980 {
12981 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12982 }
12983
12984 /*
12985 =for apidoc parse_block
12986
12987 Parse a single complete Perl code block. This consists of an opening
12988 brace, a sequence of statements, and a closing brace. The block
12989 constitutes a lexical scope, so C<my> variables and various compile-time
12990 effects can be contained within it. It is up to the caller to ensure
12991 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12992 reflect the source of the code to be parsed and the lexical context for
12993 the statement.
12994
12995 The op tree representing the code block is returned. This is always a
12996 real op, never a null pointer. It will normally be a C<lineseq> list,
12997 including C<nextstate> or equivalent ops. No ops to construct any kind
12998 of runtime scope are included by virtue of it being a block.
12999
13000 If an error occurs in parsing or compilation, in most cases a valid op
13001 tree (most likely null) is returned anyway. The error is reflected in
13002 the parser state, normally resulting in a single exception at the top
13003 level of parsing which covers all the compilation errors that occurred.
13004 Some compilation errors, however, will throw an exception immediately.
13005
13006 The C<flags> parameter is reserved for future use, and must always
13007 be zero.
13008
13009 =cut
13010 */
13011
13012 OP *
Perl_parse_block(pTHX_ U32 flags)13013 Perl_parse_block(pTHX_ U32 flags)
13014 {
13015 if (flags)
13016 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13017 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13018 }
13019
13020 /*
13021 =for apidoc parse_barestmt
13022
13023 Parse a single unadorned Perl statement. This may be a normal imperative
13024 statement or a declaration that has compile-time effect. It does not
13025 include any label or other affixture. It is up to the caller to ensure
13026 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13027 reflect the source of the code to be parsed and the lexical context for
13028 the statement.
13029
13030 The op tree representing the statement is returned. This may be a
13031 null pointer if the statement is null, for example if it was actually
13032 a subroutine definition (which has compile-time side effects). If not
13033 null, it will be ops directly implementing the statement, suitable to
13034 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
13035 equivalent op (except for those embedded in a scope contained entirely
13036 within the statement).
13037
13038 If an error occurs in parsing or compilation, in most cases a valid op
13039 tree (most likely null) is returned anyway. The error is reflected in
13040 the parser state, normally resulting in a single exception at the top
13041 level of parsing which covers all the compilation errors that occurred.
13042 Some compilation errors, however, will throw an exception immediately.
13043
13044 The C<flags> parameter is reserved for future use, and must always
13045 be zero.
13046
13047 =cut
13048 */
13049
13050 OP *
Perl_parse_barestmt(pTHX_ U32 flags)13051 Perl_parse_barestmt(pTHX_ U32 flags)
13052 {
13053 if (flags)
13054 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13055 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13056 }
13057
13058 /*
13059 =for apidoc parse_label
13060
13061 Parse a single label, possibly optional, of the type that may prefix a
13062 Perl statement. It is up to the caller to ensure that the dynamic parser
13063 state (L</PL_parser> et al) is correctly set to reflect the source of
13064 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13065 label is optional, otherwise it is mandatory.
13066
13067 The name of the label is returned in the form of a fresh scalar. If an
13068 optional label is absent, a null pointer is returned.
13069
13070 If an error occurs in parsing, which can only occur if the label is
13071 mandatory, a valid label is returned anyway. The error is reflected in
13072 the parser state, normally resulting in a single exception at the top
13073 level of parsing which covers all the compilation errors that occurred.
13074
13075 =cut
13076 */
13077
13078 SV *
Perl_parse_label(pTHX_ U32 flags)13079 Perl_parse_label(pTHX_ U32 flags)
13080 {
13081 if (flags & ~PARSE_OPTIONAL)
13082 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13083 if (PL_nexttoke) {
13084 PL_parser->yychar = yylex();
13085 if (PL_parser->yychar == LABEL) {
13086 SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13087 PL_parser->yychar = YYEMPTY;
13088 cSVOPx(pl_yylval.opval)->op_sv = NULL;
13089 op_free(pl_yylval.opval);
13090 return labelsv;
13091 } else {
13092 yyunlex();
13093 goto no_label;
13094 }
13095 } else {
13096 char *s, *t;
13097 STRLEN wlen, bufptr_pos;
13098 lex_read_space(0);
13099 t = s = PL_bufptr;
13100 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13101 goto no_label;
13102 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
13103 if (word_takes_any_delimiter(s, wlen))
13104 goto no_label;
13105 bufptr_pos = s - SvPVX(PL_linestr);
13106 PL_bufptr = t;
13107 lex_read_space(LEX_KEEP_PREVIOUS);
13108 t = PL_bufptr;
13109 s = SvPVX(PL_linestr) + bufptr_pos;
13110 if (t[0] == ':' && t[1] != ':') {
13111 PL_oldoldbufptr = PL_oldbufptr;
13112 PL_oldbufptr = s;
13113 PL_bufptr = t+1;
13114 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13115 } else {
13116 PL_bufptr = s;
13117 no_label:
13118 if (flags & PARSE_OPTIONAL) {
13119 return NULL;
13120 } else {
13121 qerror(Perl_mess(aTHX_ "Parse error"));
13122 return newSVpvs("x");
13123 }
13124 }
13125 }
13126 }
13127
13128 /*
13129 =for apidoc parse_fullstmt
13130
13131 Parse a single complete Perl statement. This may be a normal imperative
13132 statement or a declaration that has compile-time effect, and may include
13133 optional labels. It is up to the caller to ensure that the dynamic
13134 parser state (L</PL_parser> et al) is correctly set to reflect the source
13135 of the code to be parsed and the lexical context for the statement.
13136
13137 The op tree representing the statement is returned. This may be a
13138 null pointer if the statement is null, for example if it was actually
13139 a subroutine definition (which has compile-time side effects). If not
13140 null, it will be the result of a L</newSTATEOP> call, normally including
13141 a C<nextstate> or equivalent op.
13142
13143 If an error occurs in parsing or compilation, in most cases a valid op
13144 tree (most likely null) is returned anyway. The error is reflected in
13145 the parser state, normally resulting in a single exception at the top
13146 level of parsing which covers all the compilation errors that occurred.
13147 Some compilation errors, however, will throw an exception immediately.
13148
13149 The C<flags> parameter is reserved for future use, and must always
13150 be zero.
13151
13152 =cut
13153 */
13154
13155 OP *
Perl_parse_fullstmt(pTHX_ U32 flags)13156 Perl_parse_fullstmt(pTHX_ U32 flags)
13157 {
13158 if (flags)
13159 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13160 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13161 }
13162
13163 /*
13164 =for apidoc parse_stmtseq
13165
13166 Parse a sequence of zero or more Perl statements. These may be normal
13167 imperative statements, including optional labels, or declarations
13168 that have compile-time effect, or any mixture thereof. The statement
13169 sequence ends when a closing brace or end-of-file is encountered in a
13170 place where a new statement could have validly started. It is up to
13171 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13172 is correctly set to reflect the source of the code to be parsed and the
13173 lexical context for the statements.
13174
13175 The op tree representing the statement sequence is returned. This may
13176 be a null pointer if the statements were all null, for example if there
13177 were no statements or if there were only subroutine definitions (which
13178 have compile-time side effects). If not null, it will be a C<lineseq>
13179 list, normally including C<nextstate> or equivalent ops.
13180
13181 If an error occurs in parsing or compilation, in most cases a valid op
13182 tree is returned anyway. The error is reflected in the parser state,
13183 normally resulting in a single exception at the top level of parsing
13184 which covers all the compilation errors that occurred. Some compilation
13185 errors, however, will throw an exception immediately.
13186
13187 The C<flags> parameter is reserved for future use, and must always
13188 be zero.
13189
13190 =cut
13191 */
13192
13193 OP *
Perl_parse_stmtseq(pTHX_ U32 flags)13194 Perl_parse_stmtseq(pTHX_ U32 flags)
13195 {
13196 OP *stmtseqop;
13197 I32 c;
13198 if (flags)
13199 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13200 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13201 c = lex_peek_unichar(0);
13202 if (c != -1 && c != /*{*/'}')
13203 qerror(Perl_mess(aTHX_ "Parse error"));
13204 return stmtseqop;
13205 }
13206
13207 /*
13208 =for apidoc parse_subsignature
13209
13210 Parse a subroutine signature declaration. This is the contents of the
13211 parentheses following a named or anonymous subroutine declaration when the
13212 C<signatures> feature is enabled. Note that this function neither expects
13213 nor consumes the opening and closing parentheses around the signature; it
13214 is the caller's job to handle these.
13215
13216 This function must only be called during parsing of a subroutine; after
13217 L</start_subparse> has been called. It might allocate lexical variables on
13218 the pad for the current subroutine.
13219
13220 The op tree to unpack the arguments from the stack at runtime is returned.
13221 This op tree should appear at the beginning of the compiled function. The
13222 caller may wish to use L</op_append_list> to build their function body
13223 after it, or splice it together with the body before calling L</newATTRSUB>.
13224
13225 The C<flags> parameter is reserved for future use, and must always
13226 be zero.
13227
13228 =cut
13229 */
13230
13231 OP *
Perl_parse_subsignature(pTHX_ U32 flags)13232 Perl_parse_subsignature(pTHX_ U32 flags)
13233 {
13234 if (flags)
13235 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13236 return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13237 }
13238
13239 /*
13240 * ex: set ts=8 sts=4 sw=4 et:
13241 */
13242