1 /* toke.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11 /*
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15 */
16
17 /*
18 * This file is the lexer for Perl. It's closely linked to the
19 * parser, perly.y.
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
24 /*
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
27
28 =for apidoc AmnU|yy_parser *|PL_parser
29
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
34
35 =cut
36 */
37
38 #include "EXTERN.h"
39 #define PERL_IN_TOKE_C
40 #include "perl.h"
41 #include "invlist_inline.h"
42
43 #define new_constant(a,b,c,d,e,f,g, h) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
45
46 #define pl_yylval (PL_parser->yylval)
47
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets (PL_parser->lex_brackets)
50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack (PL_parser->lex_brackstack)
53 #define PL_lex_casemods (PL_parser->lex_casemods)
54 #define PL_lex_casestack (PL_parser->lex_casestack)
55 #define PL_lex_dojoin (PL_parser->lex_dojoin)
56 #define PL_lex_formbrack (PL_parser->lex_formbrack)
57 #define PL_lex_inpat (PL_parser->lex_inpat)
58 #define PL_lex_inwhat (PL_parser->lex_inwhat)
59 #define PL_lex_op (PL_parser->lex_op)
60 #define PL_lex_repl (PL_parser->lex_repl)
61 #define PL_lex_starts (PL_parser->lex_starts)
62 #define PL_lex_stuff (PL_parser->lex_stuff)
63 #define PL_multi_start (PL_parser->multi_start)
64 #define PL_multi_open (PL_parser->multi_open)
65 #define PL_multi_close (PL_parser->multi_close)
66 #define PL_preambled (PL_parser->preambled)
67 #define PL_linestr (PL_parser->linestr)
68 #define PL_expect (PL_parser->expect)
69 #define PL_copline (PL_parser->copline)
70 #define PL_bufptr (PL_parser->bufptr)
71 #define PL_oldbufptr (PL_parser->oldbufptr)
72 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
73 #define PL_linestart (PL_parser->linestart)
74 #define PL_bufend (PL_parser->bufend)
75 #define PL_last_uni (PL_parser->last_uni)
76 #define PL_last_lop (PL_parser->last_lop)
77 #define PL_last_lop_op (PL_parser->last_lop_op)
78 #define PL_lex_state (PL_parser->lex_state)
79 #define PL_rsfp (PL_parser->rsfp)
80 #define PL_rsfp_filters (PL_parser->rsfp_filters)
81 #define PL_in_my (PL_parser->in_my)
82 #define PL_in_my_stash (PL_parser->in_my_stash)
83 #define PL_tokenbuf (PL_parser->tokenbuf)
84 #define PL_multi_end (PL_parser->multi_end)
85 #define PL_error_count (PL_parser->error_count)
86
87 # define PL_nexttoke (PL_parser->nexttoke)
88 # define PL_nexttype (PL_parser->nexttype)
89 # define PL_nextval (PL_parser->nextval)
90
91
92 #define SvEVALED(sv) \
93 (SvTYPE(sv) >= SVt_PVNV \
94 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
95
96 static const char ident_too_long[] = "Identifier too long";
97 static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
98
99 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
100
101 #define XENUMMASK 0x3f
102 #define XFAKEEOF 0x40
103 #define XFAKEBRACK 0x80
104
105 #ifdef USE_UTF8_SCRIPTS
106 # define UTF cBOOL(!IN_BYTES)
107 #else
108 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
109 #endif
110
111 /* The maximum number of characters preceding the unrecognized one to display */
112 #define UNRECOGNIZED_PRECEDE_COUNT 10
113
114 /* In variables named $^X, these are the legal values for X.
115 * 1999-02-27 mjd-perl-patch@plover.com */
116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
117
118 /* Non-identifier plugin infix operators are allowed any printing character
119 * except spaces, digits, or identifier chars
120 */
121 #define isPLUGINFIX(c) (c && !isSPACE(c) && !isDIGIT(c) && !isALPHA(c))
122 /* Plugin infix operators may not begin with a quote symbol */
123 #define isPLUGINFIX_FIRST(c) (isPLUGINFIX(c) && c != '"' && c != '\'')
124
125 #define PLUGINFIX_IS_ENABLED UNLIKELY(PL_infix_plugin != &Perl_infix_plugin_standard)
126
127 #define SPACE_OR_TAB(c) isBLANK_A(c)
128
129 #define HEXFP_PEEK(s) \
130 (((s[0] == '.') && \
131 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
132 isALPHA_FOLD_EQ(s[0], 'p'))
133
134 /* LEX_* are values for PL_lex_state, the state of the lexer.
135 * They are arranged oddly so that the guard on the switch statement
136 * can get by with a single comparison (if the compiler is smart enough).
137 *
138 * These values refer to the various states within a sublex parse,
139 * i.e. within a double quotish string
140 */
141
142 /* #define LEX_NOTPARSING 11 is done in perl.h. */
143
144 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
145 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
146 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
147 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
148 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
149
150 /* at end of code, eg "$x" followed by: */
151 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
152 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
153
154 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
155 string or after \E, $foo, etc */
156 #define LEX_INTERPCONST 2 /* NOT USED */
157 #define LEX_FORMLINE 1 /* expecting a format line */
158
159 /* returned to yyl_try() to request it to retry the parse loop, expected to only
160 be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
161 can also return it.
162
163 yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
164 other token values are 258 or higher (see perly.h), so -1 should be
165 a safe value here.
166 */
167 #define YYL_RETRY (-1)
168
169 #ifdef DEBUGGING
170 static const char* const lex_state_names[] = {
171 "KNOWNEXT",
172 "FORMLINE",
173 "INTERPCONST",
174 "INTERPCONCAT",
175 "INTERPENDMAYBE",
176 "INTERPEND",
177 "INTERPSTART",
178 "INTERPPUSH",
179 "INTERPCASEMOD",
180 "INTERPNORMAL",
181 "NORMAL"
182 };
183 #endif
184
185 #include "keywords.h"
186
187 /* CLINE is a macro that ensures PL_copline has a sane value */
188
189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
190
191 /*
192 * Convenience functions to return different tokens and prime the
193 * lexer for the next token. They all take an argument.
194 *
195 * TOKEN : generic token (used for '(', DOLSHARP, etc)
196 * OPERATOR : generic operator
197 * AOPERATOR : assignment operator
198 * PREBLOCK : beginning the block after an if, while, foreach, ...
199 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
200 * PREREF : *EXPR where EXPR is not a simple identifier
201 * TERM : expression term
202 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
203 * LOOPX : loop exiting command (goto, last, dump, etc)
204 * FTST : file test operator
205 * FUN0 : zero-argument function
206 * FUN0OP : zero-argument function, with its op created in this file
207 * FUN1 : not used, except for not, which isn't a UNIOP
208 * BOop : bitwise or or xor
209 * BAop : bitwise and
210 * BCop : bitwise complement
211 * SHop : shift operator
212 * PWop : power operator
213 * PMop : pattern-matching operator
214 * Aop : addition-level operator
215 * AopNOASSIGN : addition-level operator that is never part of .=
216 * Mop : multiplication-level operator
217 * ChEop : chaining equality-testing operator
218 * NCEop : non-chaining comparison operator at equality precedence
219 * ChRop : chaining relational operator <= != gt
220 * NCRop : non-chaining relational operator isa
221 *
222 * Also see LOP and lop() below.
223 */
224
225 #ifdef DEBUGGING /* Serve -DT. */
226 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
227 #else
228 # define REPORT(retval) (retval)
229 #endif
230
231 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
232 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
233 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
234 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
235 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
236 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
237 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
238 #define PHASERBLOCK(f) return (pl_yylval.ival=f, PL_expect = XBLOCK, PL_bufptr = s, REPORT((int)PHASER))
239 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
240 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
241 pl_yylval.ival=f, \
242 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
243 REPORT((int)LOOPEX))
244 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
245 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
246 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
247 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
248 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
249 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
250 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
251 REPORT(PERLY_TILDE)
252 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
253 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
254 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
255 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
256 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
257 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
258 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
259 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
260 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
261 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
262
263 /* This bit of chicanery makes a unary function followed by
264 * a parenthesis into a function with one argument, highest precedence.
265 * The UNIDOR macro is for unary functions that can be followed by the //
266 * operator (such as C<shift // 0>).
267 */
268 #define UNI3(f,x,have_x) { \
269 pl_yylval.ival = f; \
270 if (have_x) PL_expect = x; \
271 PL_bufptr = s; \
272 PL_last_uni = PL_oldbufptr; \
273 PL_last_lop_op = (f) < 0 ? -(f) : (f); \
274 if (*s == '(') \
275 return REPORT( (int)FUNC1 ); \
276 s = skipspace(s); \
277 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
278 }
279 #define UNI(f) UNI3(f,XTERM,1)
280 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
281 #define UNIPROTO(f,optional) { \
282 if (optional) PL_last_uni = PL_oldbufptr; \
283 OPERATOR(f); \
284 }
285
286 #define UNIBRACK(f) UNI3(f,0,0)
287
288 /* return has special case parsing.
289 *
290 * List operators have low precedence. Functions have high precedence.
291 * Every built in, *except return*, if written with () around its arguments, is
292 * parsed as a function. Hence every other list built in:
293 *
294 * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
295 * 429
296 * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
297 * 639
298 * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
299 * Useless use of a constant (2) in void context at -e line 1.
300 * Useless use of a constant (4) in void context at -e line 1.
301 *
302 * $
303 *
304 * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
305 * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
306 *
307 * Whereas return:
308 *
309 * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
310 * 2
311 * 4
312 * 9
313 * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
314 * Useless use of a constant (2) in void context at -e line 1.
315 * Useless use of a constant (4) in void context at -e line 1.
316 * 9
317 * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
318 * Useless use of a constant (2) in void context at -e line 1.
319 * Useless use of a constant (4) in void context at -e line 1.
320 * 9
321 * $
322 *
323 * and:
324 * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
325 * 2
326 * 4
327 * 6
328 *
329 * This last example is what we expect, but it's clearly inconsistent with how
330 * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently
331 * followed.
332 *
333 *
334 * Perl 3 attempted to be consistent:
335 *
336 * The rules are more consistent about where parens are needed and
337 * where they are not. In particular, unary operators and list operators now
338 * behave like functions if they're called like functions.
339 *
340 * However, the behaviour for return was reverted to the "old" parsing with
341 * patches 9-12:
342 *
343 * The construct
344 * return (1,2,3);
345 * did not do what was expected, since return was swallowing the
346 * parens in order to consider itself a function. The solution,
347 * since return never wants any trailing expression such as
348 * return (1,2,3) + 2;
349 * is to simply make return an exception to the paren-makes-a-function
350 * rule, and treat it the way it always was, so that it doesn't
351 * strip the parens.
352 *
353 * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
354 * LOP(OP_RETURN, XTERM);
355 *
356 * and constructs such as
357 *
358 * return (Internals::V())[2]
359 *
360 * turn into syntax errors
361 */
362
363 #define OLDLOP(f) \
364 do { \
365 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
366 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
367 pl_yylval.ival = (f); \
368 PL_expect = XTERM; \
369 PL_bufptr = s; \
370 return (int)LSTOP; \
371 } while(0)
372
373 #define COPLINE_INC_WITH_HERELINES \
374 STMT_START { \
375 CopLINE_inc(PL_curcop); \
376 if (PL_parser->herelines) \
377 CopLINE(PL_curcop) += PL_parser->herelines, \
378 PL_parser->herelines = 0; \
379 } STMT_END
380 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
381 * is no sublex_push to follow. */
382 #define COPLINE_SET_FROM_MULTI_END \
383 STMT_START { \
384 CopLINE_set(PL_curcop, PL_multi_end); \
385 if (PL_multi_end != PL_multi_start) \
386 PL_parser->herelines = 0; \
387 } STMT_END
388
389
390 /* A file-local structure for passing around information about subroutines and
391 * related definable words */
392 struct code {
393 SV *sv;
394 CV *cv;
395 GV *gv, **gvp;
396 OP *rv2cv_op;
397 PADOFFSET off;
398 bool lex;
399 };
400
401 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
402
403 #ifdef DEBUGGING
404
405 /* how to interpret the pl_yylval associated with the token */
406 enum token_type {
407 TOKENTYPE_NONE,
408 TOKENTYPE_IVAL,
409 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
410 TOKENTYPE_PVAL,
411 TOKENTYPE_OPVAL
412 };
413
414 #define DEBUG_TOKEN(Type, Name) \
415 { Name, TOKENTYPE_##Type, #Name }
416
417 static struct debug_tokens {
418 const int token;
419 enum token_type type;
420 const char *name;
421 } const debug_tokens[] =
422 {
423 DEBUG_TOKEN (OPNUM, ADDOP),
424 DEBUG_TOKEN (NONE, ANDAND),
425 DEBUG_TOKEN (NONE, ANDOP),
426 DEBUG_TOKEN (NONE, ARROW),
427 DEBUG_TOKEN (OPNUM, ASSIGNOP),
428 DEBUG_TOKEN (OPNUM, BITANDOP),
429 DEBUG_TOKEN (OPNUM, BITOROP),
430 DEBUG_TOKEN (OPNUM, CHEQOP),
431 DEBUG_TOKEN (OPNUM, CHRELOP),
432 DEBUG_TOKEN (NONE, COLONATTR),
433 DEBUG_TOKEN (NONE, DOLSHARP),
434 DEBUG_TOKEN (NONE, DORDOR),
435 DEBUG_TOKEN (IVAL, DOTDOT),
436 DEBUG_TOKEN (NONE, FORMLBRACK),
437 DEBUG_TOKEN (NONE, FORMRBRACK),
438 DEBUG_TOKEN (OPNUM, FUNC),
439 DEBUG_TOKEN (OPNUM, FUNC0),
440 DEBUG_TOKEN (OPVAL, FUNC0OP),
441 DEBUG_TOKEN (OPVAL, FUNC0SUB),
442 DEBUG_TOKEN (OPNUM, FUNC1),
443 DEBUG_TOKEN (NONE, HASHBRACK),
444 DEBUG_TOKEN (IVAL, KW_CATCH),
445 DEBUG_TOKEN (IVAL, KW_CLASS),
446 DEBUG_TOKEN (IVAL, KW_CONTINUE),
447 DEBUG_TOKEN (IVAL, KW_DEFAULT),
448 DEBUG_TOKEN (IVAL, KW_DO),
449 DEBUG_TOKEN (IVAL, KW_ELSE),
450 DEBUG_TOKEN (IVAL, KW_ELSIF),
451 DEBUG_TOKEN (IVAL, KW_FIELD),
452 DEBUG_TOKEN (IVAL, KW_GIVEN),
453 DEBUG_TOKEN (IVAL, KW_FOR),
454 DEBUG_TOKEN (IVAL, KW_FORMAT),
455 DEBUG_TOKEN (IVAL, KW_IF),
456 DEBUG_TOKEN (IVAL, KW_LOCAL),
457 DEBUG_TOKEN (IVAL, KW_METHOD_anon),
458 DEBUG_TOKEN (IVAL, KW_METHOD_named),
459 DEBUG_TOKEN (IVAL, KW_MY),
460 DEBUG_TOKEN (IVAL, KW_PACKAGE),
461 DEBUG_TOKEN (IVAL, KW_REQUIRE),
462 DEBUG_TOKEN (IVAL, KW_SUB_anon),
463 DEBUG_TOKEN (IVAL, KW_SUB_anon_sig),
464 DEBUG_TOKEN (IVAL, KW_SUB_named),
465 DEBUG_TOKEN (IVAL, KW_SUB_named_sig),
466 DEBUG_TOKEN (IVAL, KW_TRY),
467 DEBUG_TOKEN (IVAL, KW_USE_or_NO),
468 DEBUG_TOKEN (IVAL, KW_UNLESS),
469 DEBUG_TOKEN (IVAL, KW_UNTIL),
470 DEBUG_TOKEN (IVAL, KW_WHEN),
471 DEBUG_TOKEN (IVAL, KW_WHILE),
472 DEBUG_TOKEN (OPVAL, LABEL),
473 DEBUG_TOKEN (OPNUM, LOOPEX),
474 DEBUG_TOKEN (OPNUM, LSTOP),
475 DEBUG_TOKEN (OPVAL, LSTOPSUB),
476 DEBUG_TOKEN (OPNUM, MATCHOP),
477 DEBUG_TOKEN (OPVAL, METHCALL),
478 DEBUG_TOKEN (OPVAL, METHCALL0),
479 DEBUG_TOKEN (OPNUM, MULOP),
480 DEBUG_TOKEN (OPNUM, NCEQOP),
481 DEBUG_TOKEN (OPNUM, NCRELOP),
482 DEBUG_TOKEN (NONE, NOAMP),
483 DEBUG_TOKEN (NONE, NOTOP),
484 DEBUG_TOKEN (IVAL, OROP),
485 DEBUG_TOKEN (IVAL, OROR),
486 DEBUG_TOKEN (IVAL, PERLY_AMPERSAND),
487 DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE),
488 DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN),
489 DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE),
490 DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN),
491 DEBUG_TOKEN (IVAL, PERLY_COLON),
492 DEBUG_TOKEN (IVAL, PERLY_COMMA),
493 DEBUG_TOKEN (IVAL, PERLY_DOT),
494 DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN),
495 DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK),
496 DEBUG_TOKEN (IVAL, PERLY_MINUS),
497 DEBUG_TOKEN (IVAL, PERLY_PAREN_OPEN),
498 DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN),
499 DEBUG_TOKEN (IVAL, PERLY_PLUS),
500 DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK),
501 DEBUG_TOKEN (IVAL, PERLY_SEMICOLON),
502 DEBUG_TOKEN (IVAL, PERLY_SLASH),
503 DEBUG_TOKEN (IVAL, PERLY_SNAIL),
504 DEBUG_TOKEN (IVAL, PERLY_STAR),
505 DEBUG_TOKEN (IVAL, PERLY_TILDE),
506 DEBUG_TOKEN (OPVAL, PLUGEXPR),
507 DEBUG_TOKEN (OPVAL, PLUGSTMT),
508 DEBUG_TOKEN (PVAL, PLUGIN_ADD_OP),
509 DEBUG_TOKEN (PVAL, PLUGIN_ASSIGN_OP),
510 DEBUG_TOKEN (PVAL, PLUGIN_HIGH_OP),
511 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_AND_OP),
512 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_OR_OP),
513 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_AND_LOW_OP),
514 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_OR_LOW_OP),
515 DEBUG_TOKEN (PVAL, PLUGIN_LOW_OP),
516 DEBUG_TOKEN (PVAL, PLUGIN_MUL_OP),
517 DEBUG_TOKEN (PVAL, PLUGIN_POW_OP),
518 DEBUG_TOKEN (PVAL, PLUGIN_REL_OP),
519 DEBUG_TOKEN (OPVAL, PMFUNC),
520 DEBUG_TOKEN (NONE, POSTJOIN),
521 DEBUG_TOKEN (NONE, POSTDEC),
522 DEBUG_TOKEN (NONE, POSTINC),
523 DEBUG_TOKEN (OPNUM, POWOP),
524 DEBUG_TOKEN (NONE, PREDEC),
525 DEBUG_TOKEN (NONE, PREINC),
526 DEBUG_TOKEN (OPVAL, PRIVATEREF),
527 DEBUG_TOKEN (OPVAL, QWLIST),
528 DEBUG_TOKEN (NONE, REFGEN),
529 DEBUG_TOKEN (OPNUM, SHIFTOP),
530 DEBUG_TOKEN (NONE, SUBLEXEND),
531 DEBUG_TOKEN (NONE, SUBLEXSTART),
532 DEBUG_TOKEN (OPVAL, THING),
533 DEBUG_TOKEN (NONE, UMINUS),
534 DEBUG_TOKEN (OPNUM, UNIOP),
535 DEBUG_TOKEN (OPVAL, UNIOPSUB),
536 DEBUG_TOKEN (OPVAL, BAREWORD),
537 DEBUG_TOKEN (IVAL, YADAYADA),
538 { 0, TOKENTYPE_NONE, NULL }
539 };
540
541 #undef DEBUG_TOKEN
542
543 /* dump the returned token in rv, plus any optional arg in pl_yylval */
544
545 STATIC int
S_tokereport(pTHX_ I32 rv,const YYSTYPE * lvalp)546 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
547 {
548 PERL_ARGS_ASSERT_TOKEREPORT;
549
550 if (DEBUG_T_TEST) {
551 const char *name = NULL;
552 enum token_type type = TOKENTYPE_NONE;
553 const struct debug_tokens *p;
554 SV* const report = newSVpvs("<== ");
555
556 for (p = debug_tokens; p->token; p++) {
557 if (p->token == (int)rv) {
558 name = p->name;
559 type = p->type;
560 break;
561 }
562 }
563 if (name)
564 Perl_sv_catpv(aTHX_ report, name);
565 else if (isGRAPH(rv))
566 {
567 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
568 if ((char)rv == 'p')
569 sv_catpvs(report, " (pending identifier)");
570 }
571 else if (!rv)
572 sv_catpvs(report, "EOF");
573 else
574 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
575 switch (type) {
576 case TOKENTYPE_NONE:
577 break;
578 case TOKENTYPE_IVAL:
579 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
580 break;
581 case TOKENTYPE_OPNUM:
582 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
583 PL_op_name[lvalp->ival]);
584 break;
585 case TOKENTYPE_PVAL:
586 Perl_sv_catpvf(aTHX_ report, "(pval=%p)", lvalp->pval);
587 break;
588 case TOKENTYPE_OPVAL:
589 if (lvalp->opval) {
590 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
591 PL_op_name[lvalp->opval->op_type]);
592 if (lvalp->opval->op_type == OP_CONST) {
593 Perl_sv_catpvf(aTHX_ report, " %s",
594 SvPEEK(cSVOPx_sv(lvalp->opval)));
595 }
596
597 }
598 else
599 sv_catpvs(report, "(opval=null)");
600 break;
601 }
602 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
603 };
604 return (int)rv;
605 }
606
607
608 /* print the buffer with suitable escapes */
609
610 STATIC void
S_printbuf(pTHX_ const char * const fmt,const char * const s)611 S_printbuf(pTHX_ const char *const fmt, const char *const s)
612 {
613 SV* const tmp = newSVpvs("");
614
615 PERL_ARGS_ASSERT_PRINTBUF;
616
617 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
618 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
619 GCC_DIAG_RESTORE_STMT;
620 SvREFCNT_dec(tmp);
621 }
622
623 #endif
624
625 /*
626 * S_ao
627 *
628 * This subroutine looks for an '=' next to the operator that has just been
629 * parsed and turns it into an ASSIGNOP if it finds one.
630 */
631
632 STATIC int
S_ao(pTHX_ int toketype)633 S_ao(pTHX_ int toketype)
634 {
635 if (*PL_bufptr == '=') {
636 PL_bufptr++;
637
638 switch (toketype) {
639 case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break;
640 case OROR: pl_yylval.ival = OP_ORASSIGN; break;
641 case DORDOR: pl_yylval.ival = OP_DORASSIGN; break;
642 }
643
644 toketype = ASSIGNOP;
645 }
646 return REPORT(toketype);
647 }
648
649 /*
650 * S_no_op
651 * When Perl expects an operator and finds something else, no_op
652 * prints the warning. It always prints "<something> found where
653 * operator expected. It prints "Missing semicolon on previous line?"
654 * if the surprise occurs at the start of the line. "do you need to
655 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
656 * where the compiler doesn't know if foo is a method call or a function.
657 * It prints "Missing operator before end of line" if there's nothing
658 * after the missing operator, or "... before <...>" if there is something
659 * after the missing operator.
660 *
661 * PL_bufptr is expected to point to the start of the thing that was found,
662 * and s after the next token or partial token.
663 */
664
665 STATIC void
S_no_op(pTHX_ const char * const what,char * s)666 S_no_op(pTHX_ const char *const what, char *s)
667 {
668 char * const oldbp = PL_bufptr;
669 const bool is_first = (PL_oldbufptr == PL_linestart);
670 SV *message = sv_2mortal( newSVpvf(
671 PERL_DIAG_WARN_SYNTAX("%s found where operator expected"),
672 what
673 ) );
674
675 PERL_ARGS_ASSERT_NO_OP;
676
677 if (!s)
678 s = oldbp;
679 else
680 PL_bufptr = s;
681
682 if (ckWARN_d(WARN_SYNTAX)) {
683 bool has_more = FALSE;
684 if (is_first) {
685 has_more = TRUE;
686 sv_catpvs(message,
687 " (Missing semicolon on previous line?)");
688 }
689 else if (PL_oldoldbufptr) {
690 /* yyerror (via yywarn) would do this itself, so we should too */
691 const char *t;
692 for (t = PL_oldoldbufptr;
693 t < PL_bufptr && isSPACE(*t);
694 t += UTF ? UTF8SKIP(t) : 1)
695 {
696 NOOP;
697 }
698 /* see if we can identify the cause of the warning */
699 if (isIDFIRST_lazy_if_safe(t,PL_bufend,UTF))
700 {
701 const char *t_start= t;
702 for ( ;
703 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
704 t += UTF ? UTF8SKIP(t) : 1)
705 {
706 NOOP;
707 }
708 if (t < PL_bufptr && isSPACE(*t)) {
709 has_more = TRUE;
710 sv_catpvf( message,
711 " (Do you need to predeclare \"%" UTF8f "\"?)",
712 UTF8fARG(UTF, t - t_start, t_start));
713 }
714 }
715 }
716 if (!has_more) {
717 const char *t= oldbp;
718 assert(s >= oldbp);
719 while (t < s && isSPACE(*t)) {
720 t += UTF ? UTF8SKIP(t) : 1;
721 }
722
723 sv_catpvf(message,
724 " (Missing operator before \"%" UTF8f "\"?)",
725 UTF8fARG(UTF, s - t, t));
726 }
727 }
728 yywarn(SvPV_nolen(message), UTF ? SVf_UTF8 : 0);
729 PL_bufptr = oldbp;
730 }
731
732 /*
733 * S_missingterm
734 * Complain about missing quote/regexp/heredoc terminator.
735 * If it's called with NULL then it cauterizes the line buffer.
736 * If we're in a delimited string and the delimiter is a control
737 * character, it's reformatted into a two-char sequence like ^C.
738 * This is fatal.
739 */
740
741 STATIC void
S_missingterm(pTHX_ char * s,STRLEN len)742 S_missingterm(pTHX_ char *s, STRLEN len)
743 {
744 char tmpbuf[UTF8_MAXBYTES + 1];
745 char q;
746 bool uni = FALSE;
747 if (s) {
748 char * const nl = (char *) my_memrchr(s, '\n', len);
749 if (nl) {
750 *nl = '\0';
751 len = nl - s;
752 }
753 uni = UTF;
754 }
755 else if (PL_multi_close < 32) {
756 *tmpbuf = '^';
757 tmpbuf[1] = (char)toCTRL(PL_multi_close);
758 tmpbuf[2] = '\0';
759 s = tmpbuf;
760 len = 2;
761 }
762 else {
763 if (! UTF && LIKELY(PL_multi_close < 256)) {
764 *tmpbuf = (char)PL_multi_close;
765 tmpbuf[1] = '\0';
766 len = 1;
767 }
768 else {
769 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
770 *end = '\0';
771 len = end - tmpbuf;
772 uni = TRUE;
773 }
774 s = tmpbuf;
775 }
776 q = memchr(s, '"', len) ? '\'' : '"';
777 Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c"
778 " anywhere before EOF", q, UTF8fARG(uni, len, s), q);
779 }
780
781 #include "feature.h"
782
783 /*
784 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
785 * utf16-to-utf8-reversed.
786 */
787
788 #ifdef PERL_CR_FILTER
789 static void
strip_return(SV * sv)790 strip_return(SV *sv)
791 {
792 const char *s = SvPVX_const(sv);
793 const char * const e = s + SvCUR(sv);
794
795 PERL_ARGS_ASSERT_STRIP_RETURN;
796
797 /* outer loop optimized to do nothing if there are no CR-LFs */
798 while (s < e) {
799 if (*s++ == '\r' && *s == '\n') {
800 /* hit a CR-LF, need to copy the rest */
801 char *d = s - 1;
802 *d++ = *s++;
803 while (s < e) {
804 if (*s == '\r' && s[1] == '\n')
805 s++;
806 *d++ = *s++;
807 }
808 SvCUR(sv) -= s - d;
809 return;
810 }
811 }
812 }
813
814 STATIC I32
S_cr_textfilter(pTHX_ int idx,SV * sv,int maxlen)815 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
816 {
817 const I32 count = FILTER_READ(idx+1, sv, maxlen);
818 if (count > 0 && !maxlen)
819 strip_return(sv);
820 return count;
821 }
822 #endif
823
824 /*
825 =for apidoc lex_start
826
827 Creates and initialises a new lexer/parser state object, supplying
828 a context in which to lex and parse from a new source of Perl code.
829 A pointer to the new state object is placed in L</PL_parser>. An entry
830 is made on the save stack so that upon unwinding, the new state object
831 will be destroyed and the former value of L</PL_parser> will be restored.
832 Nothing else need be done to clean up the parsing context.
833
834 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
835 non-null, provides a string (in SV form) containing code to be parsed.
836 A copy of the string is made, so subsequent modification of C<line>
837 does not affect parsing. C<rsfp>, if non-null, provides an input stream
838 from which code will be read to be parsed. If both are non-null, the
839 code in C<line> comes first and must consist of complete lines of input,
840 and C<rsfp> supplies the remainder of the source.
841
842 The C<flags> parameter is reserved for future use. Currently it is only
843 used by perl internally, so extensions should always pass zero.
844
845 =cut
846 */
847
848 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
849 can share filters with the current parser.
850 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
851 caller, hence isn't owned by the parser, so shouldn't be closed on parser
852 destruction. This is used to handle the case of defaulting to reading the
853 script from the standard input because no filename was given on the command
854 line (without getting confused by situation where STDIN has been closed, so
855 the script handle is opened on fd 0) */
856
857 void
Perl_lex_start(pTHX_ SV * line,PerlIO * rsfp,U32 flags)858 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
859 {
860 const char *s = NULL;
861 yy_parser *parser, *oparser;
862
863 if (flags && flags & ~LEX_START_FLAGS)
864 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
865
866 /* create and initialise a parser */
867
868 Newxz(parser, 1, yy_parser);
869 parser->old_parser = oparser = PL_parser;
870 PL_parser = parser;
871
872 parser->stack = NULL;
873 parser->stack_max1 = NULL;
874 parser->ps = NULL;
875
876 /* on scope exit, free this parser and restore any outer one */
877 SAVEPARSER(parser);
878 parser->saved_curcop = PL_curcop;
879
880 /* initialise lexer state */
881
882 parser->nexttoke = 0;
883 parser->error_count = oparser ? oparser->error_count : 0;
884 parser->copline = parser->preambling = NOLINE;
885 parser->lex_state = LEX_NORMAL;
886 parser->expect = XSTATE;
887 parser->rsfp = rsfp;
888 parser->recheck_utf8_validity = TRUE;
889 parser->rsfp_filters =
890 !(flags & LEX_START_SAME_FILTER) || !oparser
891 ? NULL
892 : MUTABLE_AV(SvREFCNT_inc(
893 oparser->rsfp_filters
894 ? oparser->rsfp_filters
895 : (oparser->rsfp_filters = newAV())
896 ));
897
898 Newx(parser->lex_brackstack, 120, char);
899 Newx(parser->lex_casestack, 12, char);
900 *parser->lex_casestack = '\0';
901 Newxz(parser->lex_shared, 1, LEXSHARED);
902
903 if (line) {
904 Size_t len;
905 const U8* first_bad_char_loc;
906
907 s = SvPV_const(line, len);
908
909 if ( SvUTF8(line)
910 && UNLIKELY(! is_utf8_string_loc((U8 *) s,
911 SvCUR(line),
912 &first_bad_char_loc)))
913 {
914 _force_out_malformed_utf8_message(first_bad_char_loc,
915 (U8 *) s + SvCUR(line),
916 0,
917 1 /* 1 means die */ );
918 NOT_REACHED; /* NOTREACHED */
919 }
920
921 parser->linestr = flags & LEX_START_COPIED
922 ? SvREFCNT_inc_simple_NN(line)
923 : newSVpvn_flags(s, len, SvUTF8(line));
924 if (!rsfp)
925 sv_catpvs(parser->linestr, "\n;");
926 } else {
927 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
928 }
929
930 parser->oldoldbufptr =
931 parser->oldbufptr =
932 parser->bufptr =
933 parser->linestart = SvPVX(parser->linestr);
934 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
935 parser->last_lop = parser->last_uni = NULL;
936
937 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
938 |LEX_DONT_CLOSE_RSFP));
939 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
940 |LEX_DONT_CLOSE_RSFP));
941
942 parser->in_pod = parser->filtered = 0;
943 }
944
945
946 /* delete a parser object */
947
948 void
Perl_parser_free(pTHX_ const yy_parser * parser)949 Perl_parser_free(pTHX_ const yy_parser *parser)
950 {
951 PERL_ARGS_ASSERT_PARSER_FREE;
952
953 PL_curcop = parser->saved_curcop;
954 SvREFCNT_dec(parser->linestr);
955
956 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
957 PerlIO_clearerr(parser->rsfp);
958 else if (parser->rsfp && (!parser->old_parser
959 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
960 PerlIO_close(parser->rsfp);
961 SvREFCNT_dec(parser->rsfp_filters);
962 SvREFCNT_dec(parser->lex_stuff);
963 SvREFCNT_dec(parser->lex_sub_repl);
964
965 Safefree(parser->lex_brackstack);
966 Safefree(parser->lex_casestack);
967 Safefree(parser->lex_shared);
968 PL_parser = parser->old_parser;
969 Safefree(parser);
970 }
971
972 void
Perl_parser_free_nexttoke_ops(pTHX_ yy_parser * parser,OPSLAB * slab)973 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
974 {
975 I32 nexttoke = parser->nexttoke;
976 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
977 while (nexttoke--) {
978 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
979 && parser->nextval[nexttoke].opval
980 && parser->nextval[nexttoke].opval->op_slabbed
981 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
982 op_free(parser->nextval[nexttoke].opval);
983 parser->nextval[nexttoke].opval = NULL;
984 }
985 }
986 }
987
988
989 /*
990 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
991
992 Buffer scalar containing the chunk currently under consideration of the
993 text currently being lexed. This is always a plain string scalar (for
994 which C<SvPOK> is true). It is not intended to be used as a scalar by
995 normal scalar means; instead refer to the buffer directly by the pointer
996 variables described below.
997
998 The lexer maintains various C<char*> pointers to things in the
999 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
1000 reallocated, all of these pointers must be updated. Don't attempt to
1001 do this manually, but rather use L</lex_grow_linestr> if you need to
1002 reallocate the buffer.
1003
1004 The content of the text chunk in the buffer is commonly exactly one
1005 complete line of input, up to and including a newline terminator,
1006 but there are situations where it is otherwise. The octets of the
1007 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
1008 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
1009 flag on this scalar, which may disagree with it.
1010
1011 For direct examination of the buffer, the variable
1012 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
1013 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
1014 of these pointers is usually preferable to examination of the scalar
1015 through normal scalar means.
1016
1017 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
1018
1019 Direct pointer to the end of the chunk of text currently being lexed, the
1020 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
1021 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
1022 always located at the end of the buffer, and does not count as part of
1023 the buffer's contents.
1024
1025 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
1026
1027 Points to the current position of lexing inside the lexer buffer.
1028 Characters around this point may be freely examined, within
1029 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
1030 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
1031 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
1032
1033 Lexing code (whether in the Perl core or not) moves this pointer past
1034 the characters that it consumes. It is also expected to perform some
1035 bookkeeping whenever a newline character is consumed. This movement
1036 can be more conveniently performed by the function L</lex_read_to>,
1037 which handles newlines appropriately.
1038
1039 Interpretation of the buffer's octets can be abstracted out by
1040 using the slightly higher-level functions L</lex_peek_unichar> and
1041 L</lex_read_unichar>.
1042
1043 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
1044
1045 Points to the start of the current line inside the lexer buffer.
1046 This is useful for indicating at which column an error occurred, and
1047 not much else. This must be updated by any lexing code that consumes
1048 a newline; the function L</lex_read_to> handles this detail.
1049
1050 =cut
1051 */
1052
1053 /*
1054 =for apidoc lex_bufutf8
1055
1056 Indicates whether the octets in the lexer buffer
1057 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1058 of Unicode characters. If not, they should be interpreted as Latin-1
1059 characters. This is analogous to the C<SvUTF8> flag for scalars.
1060
1061 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1062 contains valid UTF-8. Lexing code must be robust in the face of invalid
1063 encoding.
1064
1065 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
1066 is significant, but not the whole story regarding the input character
1067 encoding. Normally, when a file is being read, the scalar contains octets
1068 and its C<SvUTF8> flag is off, but the octets should be interpreted as
1069 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
1070 however, the scalar may have the C<SvUTF8> flag on, and in this case its
1071 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1072 is in effect. This logic may change in the future; use this function
1073 instead of implementing the logic yourself.
1074
1075 =cut
1076 */
1077
1078 bool
Perl_lex_bufutf8(pTHX)1079 Perl_lex_bufutf8(pTHX)
1080 {
1081 return UTF;
1082 }
1083
1084 /*
1085 =for apidoc lex_grow_linestr
1086
1087 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
1088 at least C<len> octets (including terminating C<NUL>). Returns a
1089 pointer to the reallocated buffer. This is necessary before making
1090 any direct modification of the buffer that would increase its length.
1091 L</lex_stuff_pvn> provides a more convenient way to insert text into
1092 the buffer.
1093
1094 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
1095 this function updates all of the lexer's variables that point directly
1096 into the buffer.
1097
1098 =cut
1099 */
1100
1101 char *
Perl_lex_grow_linestr(pTHX_ STRLEN len)1102 Perl_lex_grow_linestr(pTHX_ STRLEN len)
1103 {
1104 SV *linestr;
1105 char *buf;
1106 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1107 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
1108 bool current;
1109
1110 linestr = PL_parser->linestr;
1111 buf = SvPVX(linestr);
1112 if (len <= SvLEN(linestr))
1113 return buf;
1114
1115 /* Is the lex_shared linestr SV the same as the current linestr SV?
1116 * Only in this case does re_eval_start need adjusting, since it
1117 * points within lex_shared->ls_linestr's buffer */
1118 current = ( !PL_parser->lex_shared->ls_linestr
1119 || linestr == PL_parser->lex_shared->ls_linestr);
1120
1121 bufend_pos = PL_parser->bufend - buf;
1122 bufptr_pos = PL_parser->bufptr - buf;
1123 oldbufptr_pos = PL_parser->oldbufptr - buf;
1124 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1125 linestart_pos = PL_parser->linestart - buf;
1126 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1127 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1128 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
1129 PL_parser->lex_shared->re_eval_start - buf : 0;
1130
1131 buf = sv_grow(linestr, len);
1132
1133 PL_parser->bufend = buf + bufend_pos;
1134 PL_parser->bufptr = buf + bufptr_pos;
1135 PL_parser->oldbufptr = buf + oldbufptr_pos;
1136 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1137 PL_parser->linestart = buf + linestart_pos;
1138 if (PL_parser->last_uni)
1139 PL_parser->last_uni = buf + last_uni_pos;
1140 if (PL_parser->last_lop)
1141 PL_parser->last_lop = buf + last_lop_pos;
1142 if (current && PL_parser->lex_shared->re_eval_start)
1143 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
1144 return buf;
1145 }
1146
1147 /*
1148 =for apidoc lex_stuff_pvn
1149
1150 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1151 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1152 reallocating the buffer if necessary. This means that lexing code that
1153 runs later will see the characters as if they had appeared in the input.
1154 It is not recommended to do this as part of normal parsing, and most
1155 uses of this facility run the risk of the inserted characters being
1156 interpreted in an unintended manner.
1157
1158 The string to be inserted is represented by C<len> octets starting
1159 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1160 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1161 The characters are recoded for the lexer buffer, according to how the
1162 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1163 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1164 function is more convenient.
1165
1166 =for apidoc Amnh||LEX_STUFF_UTF8
1167
1168 =cut
1169 */
1170
1171 void
Perl_lex_stuff_pvn(pTHX_ const char * pv,STRLEN len,U32 flags)1172 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1173 {
1174 char *bufptr;
1175 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1176 if (flags & ~(LEX_STUFF_UTF8))
1177 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1178 if (UTF) {
1179 if (flags & LEX_STUFF_UTF8) {
1180 goto plain_copy;
1181 } else {
1182 STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1183 (U8 *) pv + len);
1184 const char *p, *e = pv+len;;
1185 if (!highhalf)
1186 goto plain_copy;
1187 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1188 bufptr = PL_parser->bufptr;
1189 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1190 SvCUR_set(PL_parser->linestr,
1191 SvCUR(PL_parser->linestr) + len+highhalf);
1192 PL_parser->bufend += len+highhalf;
1193 for (p = pv; p != e; p++) {
1194 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1195 }
1196 }
1197 } else {
1198 if (flags & LEX_STUFF_UTF8) {
1199 STRLEN highhalf = 0;
1200 const char *p, *e = pv+len;
1201 for (p = pv; p != e; p++) {
1202 U8 c = (U8)*p;
1203 if (UTF8_IS_ABOVE_LATIN1(c)) {
1204 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1205 "non-Latin-1 character into Latin-1 input");
1206 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1207 p++;
1208 highhalf++;
1209 } else assert(UTF8_IS_INVARIANT(c));
1210 }
1211 if (!highhalf)
1212 goto plain_copy;
1213 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1214 bufptr = PL_parser->bufptr;
1215 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1216 SvCUR_set(PL_parser->linestr,
1217 SvCUR(PL_parser->linestr) + len-highhalf);
1218 PL_parser->bufend += len-highhalf;
1219 p = pv;
1220 while (p < e) {
1221 if (UTF8_IS_INVARIANT(*p)) {
1222 *bufptr++ = *p;
1223 p++;
1224 }
1225 else {
1226 assert(p < e -1 );
1227 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1228 p += 2;
1229 }
1230 }
1231 } else {
1232 plain_copy:
1233 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1234 bufptr = PL_parser->bufptr;
1235 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1236 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1237 PL_parser->bufend += len;
1238 Copy(pv, bufptr, len, char);
1239 }
1240 }
1241 }
1242
1243 /*
1244 =for apidoc lex_stuff_pv
1245
1246 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1247 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1248 reallocating the buffer if necessary. This means that lexing code that
1249 runs later will see the characters as if they had appeared in the input.
1250 It is not recommended to do this as part of normal parsing, and most
1251 uses of this facility run the risk of the inserted characters being
1252 interpreted in an unintended manner.
1253
1254 The string to be inserted is represented by octets starting at C<pv>
1255 and continuing to the first nul. These octets are interpreted as either
1256 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1257 in C<flags>. The characters are recoded for the lexer buffer, according
1258 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1259 If it is not convenient to nul-terminate a string to be inserted, the
1260 L</lex_stuff_pvn> function is more appropriate.
1261
1262 =cut
1263 */
1264
1265 void
Perl_lex_stuff_pv(pTHX_ const char * pv,U32 flags)1266 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1267 {
1268 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1269 lex_stuff_pvn(pv, strlen(pv), flags);
1270 }
1271
1272 /*
1273 =for apidoc lex_stuff_sv
1274
1275 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1276 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1277 reallocating the buffer if necessary. This means that lexing code that
1278 runs later will see the characters as if they had appeared in the input.
1279 It is not recommended to do this as part of normal parsing, and most
1280 uses of this facility run the risk of the inserted characters being
1281 interpreted in an unintended manner.
1282
1283 The string to be inserted is the string value of C<sv>. The characters
1284 are recoded for the lexer buffer, according to how the buffer is currently
1285 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1286 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1287 need to construct a scalar.
1288
1289 =cut
1290 */
1291
1292 void
Perl_lex_stuff_sv(pTHX_ SV * sv,U32 flags)1293 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1294 {
1295 char *pv;
1296 STRLEN len;
1297 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1298 if (flags)
1299 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1300 pv = SvPV(sv, len);
1301 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1302 }
1303
1304 /*
1305 =for apidoc lex_unstuff
1306
1307 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1308 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1309 This hides the discarded text from any lexing code that runs later,
1310 as if the text had never appeared.
1311
1312 This is not the normal way to consume lexed text. For that, use
1313 L</lex_read_to>.
1314
1315 =cut
1316 */
1317
1318 void
Perl_lex_unstuff(pTHX_ char * ptr)1319 Perl_lex_unstuff(pTHX_ char *ptr)
1320 {
1321 char *buf, *bufend;
1322 STRLEN unstuff_len;
1323 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1324 buf = PL_parser->bufptr;
1325 if (ptr < buf)
1326 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1327 if (ptr == buf)
1328 return;
1329 bufend = PL_parser->bufend;
1330 if (ptr > bufend)
1331 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1332 unstuff_len = ptr - buf;
1333 Move(ptr, buf, bufend+1-ptr, char);
1334 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1335 PL_parser->bufend = bufend - unstuff_len;
1336 }
1337
1338 /*
1339 =for apidoc lex_read_to
1340
1341 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1342 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1343 performing the correct bookkeeping whenever a newline character is passed.
1344 This is the normal way to consume lexed text.
1345
1346 Interpretation of the buffer's octets can be abstracted out by
1347 using the slightly higher-level functions L</lex_peek_unichar> and
1348 L</lex_read_unichar>.
1349
1350 =cut
1351 */
1352
1353 void
Perl_lex_read_to(pTHX_ char * ptr)1354 Perl_lex_read_to(pTHX_ char *ptr)
1355 {
1356 char *s;
1357 PERL_ARGS_ASSERT_LEX_READ_TO;
1358 s = PL_parser->bufptr;
1359 if (ptr < s || ptr > PL_parser->bufend)
1360 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1361 for (; s != ptr; s++)
1362 if (*s == '\n') {
1363 COPLINE_INC_WITH_HERELINES;
1364 PL_parser->linestart = s+1;
1365 }
1366 PL_parser->bufptr = ptr;
1367 }
1368
1369 /*
1370 =for apidoc lex_discard_to
1371
1372 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1373 up to C<ptr>. The remaining content of the buffer will be moved, and
1374 all pointers into the buffer updated appropriately. C<ptr> must not
1375 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1376 it is not permitted to discard text that has yet to be lexed.
1377
1378 Normally it is not necessarily to do this directly, because it suffices to
1379 use the implicit discarding behaviour of L</lex_next_chunk> and things
1380 based on it. However, if a token stretches across multiple lines,
1381 and the lexing code has kept multiple lines of text in the buffer for
1382 that purpose, then after completion of the token it would be wise to
1383 explicitly discard the now-unneeded earlier lines, to avoid future
1384 multi-line tokens growing the buffer without bound.
1385
1386 =cut
1387 */
1388
1389 void
Perl_lex_discard_to(pTHX_ char * ptr)1390 Perl_lex_discard_to(pTHX_ char *ptr)
1391 {
1392 char *buf;
1393 STRLEN discard_len;
1394 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1395 buf = SvPVX(PL_parser->linestr);
1396 if (ptr < buf)
1397 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1398 if (ptr == buf)
1399 return;
1400 if (ptr > PL_parser->bufptr)
1401 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1402 discard_len = ptr - buf;
1403 if (PL_parser->oldbufptr < ptr)
1404 PL_parser->oldbufptr = ptr;
1405 if (PL_parser->oldoldbufptr < ptr)
1406 PL_parser->oldoldbufptr = ptr;
1407 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1408 PL_parser->last_uni = NULL;
1409 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1410 PL_parser->last_lop = NULL;
1411 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1412 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1413 PL_parser->bufend -= discard_len;
1414 PL_parser->bufptr -= discard_len;
1415 PL_parser->oldbufptr -= discard_len;
1416 PL_parser->oldoldbufptr -= discard_len;
1417 if (PL_parser->last_uni)
1418 PL_parser->last_uni -= discard_len;
1419 if (PL_parser->last_lop)
1420 PL_parser->last_lop -= discard_len;
1421 }
1422
1423 void
Perl_notify_parser_that_changed_to_utf8(pTHX)1424 Perl_notify_parser_that_changed_to_utf8(pTHX)
1425 {
1426 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1427 * off to on. At compile time, this has the effect of entering a 'use
1428 * utf8' section. This means that any input was not previously checked for
1429 * UTF-8 (because it was off), but now we do need to check it, or our
1430 * assumptions about the input being sane could be wrong, and we could
1431 * segfault. This routine just sets a flag so that the next time we look
1432 * at the input we do the well-formed UTF-8 check. If we aren't in the
1433 * proper phase, there may not be a parser object, but if there is, setting
1434 * the flag is harmless */
1435
1436 if (PL_parser) {
1437 PL_parser->recheck_utf8_validity = TRUE;
1438 }
1439 }
1440
1441 /*
1442 =for apidoc lex_next_chunk
1443
1444 Reads in the next chunk of text to be lexed, appending it to
1445 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1446 looked to the end of the current chunk and wants to know more. It is
1447 usual, but not necessary, for lexing to have consumed the entirety of
1448 the current chunk at this time.
1449
1450 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1451 chunk (i.e., the current chunk has been entirely consumed), normally the
1452 current chunk will be discarded at the same time that the new chunk is
1453 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1454 will not be discarded. If the current chunk has not been entirely
1455 consumed, then it will not be discarded regardless of the flag.
1456
1457 Returns true if some new text was added to the buffer, or false if the
1458 buffer has reached the end of the input text.
1459
1460 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1461
1462 =cut
1463 */
1464
1465 #define LEX_FAKE_EOF 0x80000000
1466 #define LEX_NO_TERM 0x40000000 /* here-doc */
1467
1468 bool
Perl_lex_next_chunk(pTHX_ U32 flags)1469 Perl_lex_next_chunk(pTHX_ U32 flags)
1470 {
1471 SV *linestr;
1472 char *buf;
1473 STRLEN old_bufend_pos, new_bufend_pos;
1474 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1475 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1476 bool got_some_for_debugger = 0;
1477 bool got_some;
1478
1479 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1480 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1481 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1482 return FALSE;
1483 linestr = PL_parser->linestr;
1484 buf = SvPVX(linestr);
1485 if (!(flags & LEX_KEEP_PREVIOUS)
1486 && PL_parser->bufptr == PL_parser->bufend)
1487 {
1488 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1489 linestart_pos = 0;
1490 if (PL_parser->last_uni != PL_parser->bufend)
1491 PL_parser->last_uni = NULL;
1492 if (PL_parser->last_lop != PL_parser->bufend)
1493 PL_parser->last_lop = NULL;
1494 last_uni_pos = last_lop_pos = 0;
1495 *buf = 0;
1496 SvCUR_set(linestr, 0);
1497 } else {
1498 old_bufend_pos = PL_parser->bufend - buf;
1499 bufptr_pos = PL_parser->bufptr - buf;
1500 oldbufptr_pos = PL_parser->oldbufptr - buf;
1501 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1502 linestart_pos = PL_parser->linestart - buf;
1503 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1504 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1505 }
1506 if (flags & LEX_FAKE_EOF) {
1507 goto eof;
1508 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1509 got_some = 0;
1510 } else if (filter_gets(linestr, old_bufend_pos)) {
1511 got_some = 1;
1512 got_some_for_debugger = 1;
1513 } else if (flags & LEX_NO_TERM) {
1514 got_some = 0;
1515 } else {
1516 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1517 SvPVCLEAR(linestr);
1518 eof:
1519 /* End of real input. Close filehandle (unless it was STDIN),
1520 * then add implicit termination.
1521 */
1522 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1523 PerlIO_clearerr(PL_parser->rsfp);
1524 else if (PL_parser->rsfp)
1525 (void)PerlIO_close(PL_parser->rsfp);
1526 PL_parser->rsfp = NULL;
1527 PL_parser->in_pod = PL_parser->filtered = 0;
1528 if (!PL_in_eval && PL_minus_p) {
1529 sv_catpvs(linestr,
1530 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1531 PL_minus_n = PL_minus_p = 0;
1532 } else if (!PL_in_eval && PL_minus_n) {
1533 sv_catpvs(linestr, /*{*/";}");
1534 PL_minus_n = 0;
1535 } else
1536 sv_catpvs(linestr, ";");
1537 got_some = 1;
1538 }
1539 buf = SvPVX(linestr);
1540 new_bufend_pos = SvCUR(linestr);
1541 PL_parser->bufend = buf + new_bufend_pos;
1542 PL_parser->bufptr = buf + bufptr_pos;
1543
1544 if (UTF) {
1545 const U8* first_bad_char_loc;
1546 if (UNLIKELY(! is_utf8_string_loc(
1547 (U8 *) PL_parser->bufptr,
1548 PL_parser->bufend - PL_parser->bufptr,
1549 &first_bad_char_loc)))
1550 {
1551 _force_out_malformed_utf8_message(first_bad_char_loc,
1552 (U8 *) PL_parser->bufend,
1553 0,
1554 1 /* 1 means die */ );
1555 NOT_REACHED; /* NOTREACHED */
1556 }
1557 }
1558
1559 PL_parser->oldbufptr = buf + oldbufptr_pos;
1560 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1561 PL_parser->linestart = buf + linestart_pos;
1562 if (PL_parser->last_uni)
1563 PL_parser->last_uni = buf + last_uni_pos;
1564 if (PL_parser->last_lop)
1565 PL_parser->last_lop = buf + last_lop_pos;
1566 if (PL_parser->preambling != NOLINE) {
1567 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1568 PL_parser->preambling = NOLINE;
1569 }
1570 if ( got_some_for_debugger
1571 && PERLDB_LINE_OR_SAVESRC
1572 && PL_curstash != PL_debstash)
1573 {
1574 /* debugger active and we're not compiling the debugger code,
1575 * so store the line into the debugger's array of lines
1576 */
1577 update_debugger_info(NULL, buf+old_bufend_pos,
1578 new_bufend_pos-old_bufend_pos);
1579 }
1580 return got_some;
1581 }
1582
1583 /*
1584 =for apidoc lex_peek_unichar
1585
1586 Looks ahead one (Unicode) character in the text currently being lexed.
1587 Returns the codepoint (unsigned integer value) of the next character,
1588 or -1 if lexing has reached the end of the input text. To consume the
1589 peeked character, use L</lex_read_unichar>.
1590
1591 If the next character is in (or extends into) the next chunk of input
1592 text, the next chunk will be read in. Normally the current chunk will be
1593 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1594 bit set, then the current chunk will not be discarded.
1595
1596 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1597 is encountered, an exception is generated.
1598
1599 =cut
1600 */
1601
1602 I32
Perl_lex_peek_unichar(pTHX_ U32 flags)1603 Perl_lex_peek_unichar(pTHX_ U32 flags)
1604 {
1605 char *s, *bufend;
1606 if (flags & ~(LEX_KEEP_PREVIOUS))
1607 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1608 s = PL_parser->bufptr;
1609 bufend = PL_parser->bufend;
1610 if (UTF) {
1611 U8 head;
1612 I32 unichar;
1613 STRLEN len, retlen;
1614 if (s == bufend) {
1615 if (!lex_next_chunk(flags))
1616 return -1;
1617 s = PL_parser->bufptr;
1618 bufend = PL_parser->bufend;
1619 }
1620 head = (U8)*s;
1621 if (UTF8_IS_INVARIANT(head))
1622 return head;
1623 if (UTF8_IS_START(head)) {
1624 len = UTF8SKIP(&head);
1625 while ((STRLEN)(bufend-s) < len) {
1626 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1627 break;
1628 s = PL_parser->bufptr;
1629 bufend = PL_parser->bufend;
1630 }
1631 }
1632 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1633 if (retlen == (STRLEN)-1) {
1634 _force_out_malformed_utf8_message((U8 *) s,
1635 (U8 *) bufend,
1636 0,
1637 1 /* 1 means die */ );
1638 NOT_REACHED; /* NOTREACHED */
1639 }
1640 return unichar;
1641 } else {
1642 if (s == bufend) {
1643 if (!lex_next_chunk(flags))
1644 return -1;
1645 s = PL_parser->bufptr;
1646 }
1647 return (U8)*s;
1648 }
1649 }
1650
1651 /*
1652 =for apidoc lex_read_unichar
1653
1654 Reads the next (Unicode) character in the text currently being lexed.
1655 Returns the codepoint (unsigned integer value) of the character read,
1656 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1657 if lexing has reached the end of the input text. To non-destructively
1658 examine the next character, use L</lex_peek_unichar> instead.
1659
1660 If the next character is in (or extends into) the next chunk of input
1661 text, the next chunk will be read in. Normally the current chunk will be
1662 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1663 bit set, then the current chunk will not be discarded.
1664
1665 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1666 is encountered, an exception is generated.
1667
1668 =cut
1669 */
1670
1671 I32
Perl_lex_read_unichar(pTHX_ U32 flags)1672 Perl_lex_read_unichar(pTHX_ U32 flags)
1673 {
1674 I32 c;
1675 if (flags & ~(LEX_KEEP_PREVIOUS))
1676 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1677 c = lex_peek_unichar(flags);
1678 if (c != -1) {
1679 if (c == '\n')
1680 COPLINE_INC_WITH_HERELINES;
1681 if (UTF)
1682 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1683 else
1684 ++(PL_parser->bufptr);
1685 }
1686 return c;
1687 }
1688
1689 /*
1690 =for apidoc lex_read_space
1691
1692 Reads optional spaces, in Perl style, in the text currently being
1693 lexed. The spaces may include ordinary whitespace characters and
1694 Perl-style comments. C<#line> directives are processed if encountered.
1695 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1696 at a non-space character (or the end of the input text).
1697
1698 If spaces extend into the next chunk of input text, the next chunk will
1699 be read in. Normally the current chunk will be discarded at the same
1700 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1701 chunk will not be discarded.
1702
1703 =cut
1704 */
1705
1706 #define LEX_NO_INCLINE 0x40000000
1707 #define LEX_NO_NEXT_CHUNK 0x80000000
1708
1709 void
Perl_lex_read_space(pTHX_ U32 flags)1710 Perl_lex_read_space(pTHX_ U32 flags)
1711 {
1712 char *s, *bufend;
1713 const bool can_incline = !(flags & LEX_NO_INCLINE);
1714 bool need_incline = 0;
1715 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1716 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1717 s = PL_parser->bufptr;
1718 bufend = PL_parser->bufend;
1719 while (1) {
1720 char c = *s;
1721 if (c == '#') {
1722 do {
1723 c = *++s;
1724 } while (!(c == '\n' || (c == 0 && s == bufend)));
1725 } else if (c == '\n') {
1726 s++;
1727 if (can_incline) {
1728 PL_parser->linestart = s;
1729 if (s == bufend)
1730 need_incline = 1;
1731 else
1732 incline(s, bufend);
1733 }
1734 } else if (isSPACE(c)) {
1735 s++;
1736 } else if (c == 0 && s == bufend) {
1737 bool got_more;
1738 line_t l;
1739 if (flags & LEX_NO_NEXT_CHUNK)
1740 break;
1741 PL_parser->bufptr = s;
1742 l = CopLINE(PL_curcop);
1743 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1744 got_more = lex_next_chunk(flags);
1745 CopLINE_set(PL_curcop, l);
1746 s = PL_parser->bufptr;
1747 bufend = PL_parser->bufend;
1748 if (!got_more)
1749 break;
1750 if (can_incline && need_incline && PL_parser->rsfp) {
1751 incline(s, bufend);
1752 need_incline = 0;
1753 }
1754 } else if (!c) {
1755 s++;
1756 } else {
1757 break;
1758 }
1759 }
1760 PL_parser->bufptr = s;
1761 }
1762
1763 /*
1764
1765 =for apidoc validate_proto
1766
1767 This function performs syntax checking on a prototype, C<proto>.
1768 If C<warn> is true, any illegal characters or mismatched brackets
1769 will trigger illegalproto warnings, declaring that they were
1770 detected in the prototype for C<name>.
1771
1772 The return value is C<true> if this is a valid prototype, and
1773 C<false> if it is not, regardless of whether C<warn> was C<true> or
1774 C<false>.
1775
1776 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1777
1778 =cut
1779
1780 */
1781
1782 bool
Perl_validate_proto(pTHX_ SV * name,SV * proto,bool warn,bool curstash)1783 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1784 {
1785 STRLEN len, origlen;
1786 char *p;
1787 bool bad_proto = FALSE;
1788 bool in_brackets = FALSE;
1789 bool after_slash = FALSE;
1790 char greedy_proto = ' ';
1791 bool proto_after_greedy_proto = FALSE;
1792 bool must_be_last = FALSE;
1793 bool underscore = FALSE;
1794 bool bad_proto_after_underscore = FALSE;
1795
1796 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1797
1798 if (!proto)
1799 return TRUE;
1800
1801 p = SvPV(proto, len);
1802 origlen = len;
1803 for (; len--; p++) {
1804 if (!isSPACE(*p)) {
1805 if (must_be_last)
1806 proto_after_greedy_proto = TRUE;
1807 if (underscore) {
1808 if (!memCHRs(";@%", *p))
1809 bad_proto_after_underscore = TRUE;
1810 underscore = FALSE;
1811 }
1812 if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1813 bad_proto = TRUE;
1814 }
1815 else {
1816 if (*p == '[')
1817 in_brackets = TRUE;
1818 else if (*p == ']')
1819 in_brackets = FALSE;
1820 else if ((*p == '@' || *p == '%')
1821 && !after_slash
1822 && !in_brackets )
1823 {
1824 must_be_last = TRUE;
1825 greedy_proto = *p;
1826 }
1827 else if (*p == '_')
1828 underscore = TRUE;
1829 }
1830 if (*p == '\\')
1831 after_slash = TRUE;
1832 else
1833 after_slash = FALSE;
1834 }
1835 }
1836
1837 if (warn) {
1838 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1839 p -= origlen;
1840 p = SvUTF8(proto)
1841 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1842 origlen, UNI_DISPLAY_ISPRINT)
1843 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1844
1845 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1846 SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1847 sv_catpvs(name2, "::");
1848 sv_catsv(name2, (SV *)name);
1849 name = name2;
1850 }
1851
1852 if (proto_after_greedy_proto)
1853 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1854 "Prototype after '%c' for %" SVf " : %s",
1855 greedy_proto, SVfARG(name), p);
1856 if (in_brackets)
1857 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1858 "Missing ']' in prototype for %" SVf " : %s",
1859 SVfARG(name), p);
1860 if (bad_proto)
1861 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1862 "Illegal character in prototype for %" SVf " : %s",
1863 SVfARG(name), p);
1864 if (bad_proto_after_underscore)
1865 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1866 "Illegal character after '_' in prototype for %" SVf " : %s",
1867 SVfARG(name), p);
1868 }
1869
1870 return (! (proto_after_greedy_proto || bad_proto) );
1871 }
1872
1873 /*
1874 * S_incline
1875 * This subroutine has nothing to do with tilting, whether at windmills
1876 * or pinball tables. Its name is short for "increment line". It
1877 * increments the current line number in CopLINE(PL_curcop) and checks
1878 * to see whether the line starts with a comment of the form
1879 * # line 500 "foo.pm"
1880 * If so, it sets the current line number and file to the values in the comment.
1881 */
1882
1883 STATIC void
S_incline(pTHX_ const char * s,const char * end)1884 S_incline(pTHX_ const char *s, const char *end)
1885 {
1886 const char *t;
1887 const char *n;
1888 const char *e;
1889 line_t line_num;
1890 UV uv;
1891
1892 PERL_ARGS_ASSERT_INCLINE;
1893
1894 assert(end >= s);
1895
1896 COPLINE_INC_WITH_HERELINES;
1897 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1898 && s+1 == PL_bufend && *s == ';') {
1899 /* fake newline in string eval */
1900 CopLINE_dec(PL_curcop);
1901 return;
1902 }
1903 if (*s++ != '#')
1904 return;
1905 while (SPACE_OR_TAB(*s))
1906 s++;
1907 if (memBEGINs(s, (STRLEN) (end - s), "line"))
1908 s += sizeof("line") - 1;
1909 else
1910 return;
1911 if (SPACE_OR_TAB(*s))
1912 s++;
1913 else
1914 return;
1915 while (SPACE_OR_TAB(*s))
1916 s++;
1917 if (!isDIGIT(*s))
1918 return;
1919
1920 n = s;
1921 while (isDIGIT(*s))
1922 s++;
1923 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1924 return;
1925 while (SPACE_OR_TAB(*s))
1926 s++;
1927 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1928 s++;
1929 e = t + 1;
1930 }
1931 else {
1932 t = s;
1933 while (*t && !isSPACE(*t))
1934 t++;
1935 e = t;
1936 }
1937 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1938 e++;
1939 if (*e != '\n' && *e != '\0')
1940 return; /* false alarm */
1941
1942 if (!grok_atoUV(n, &uv, &e))
1943 return;
1944 line_num = ((line_t)uv) - 1;
1945
1946 if (t - s > 0) {
1947 const STRLEN len = t - s;
1948
1949 if (!PL_rsfp && !PL_parser->filtered) {
1950 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1951 * to *{"::_<newfilename"} */
1952 /* However, the long form of evals is only turned on by the
1953 debugger - usually they're "(eval %lu)" */
1954 GV * const cfgv = CopFILEGV(PL_curcop);
1955 if (cfgv) {
1956 char smallbuf[128];
1957 STRLEN tmplen2 = len;
1958 char *tmpbuf2;
1959 GV *gv2;
1960
1961 if (tmplen2 + 2 <= sizeof smallbuf)
1962 tmpbuf2 = smallbuf;
1963 else
1964 Newx(tmpbuf2, tmplen2 + 2, char);
1965
1966 tmpbuf2[0] = '_';
1967 tmpbuf2[1] = '<';
1968
1969 memcpy(tmpbuf2 + 2, s, tmplen2);
1970 tmplen2 += 2;
1971
1972 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1973 if (!isGV(gv2)) {
1974 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1975 /* adjust ${"::_<newfilename"} to store the new file name */
1976 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1977 /* The line number may differ. If that is the case,
1978 alias the saved lines that are in the array.
1979 Otherwise alias the whole array. */
1980 if (CopLINE(PL_curcop) == line_num) {
1981 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1982 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1983 }
1984 else if (GvAV(cfgv)) {
1985 AV * const av = GvAV(cfgv);
1986 const line_t start = CopLINE(PL_curcop)+1;
1987 SSize_t items = AvFILLp(av) - start;
1988 if (items > 0) {
1989 AV * const av2 = GvAVn(gv2);
1990 SV **svp = AvARRAY(av) + start;
1991 Size_t l = line_num+1;
1992 while (items-- && l < SSize_t_MAX && l == (line_t)l)
1993 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1994 }
1995 }
1996 }
1997
1998 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1999 }
2000 }
2001 CopFILE_free(PL_curcop);
2002 CopFILE_setn(PL_curcop, s, len);
2003 }
2004 CopLINE_set(PL_curcop, line_num);
2005 }
2006
2007 STATIC void
S_update_debugger_info(pTHX_ SV * orig_sv,const char * const buf,STRLEN len)2008 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
2009 {
2010 AV *av = CopFILEAVx(PL_curcop);
2011 if (av) {
2012 SV * sv;
2013 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
2014 else {
2015 sv = *av_fetch(av, 0, 1);
2016 SvUPGRADE(sv, SVt_PVMG);
2017 }
2018 if (!SvPOK(sv)) SvPVCLEAR(sv);
2019 if (orig_sv)
2020 sv_catsv(sv, orig_sv);
2021 else
2022 sv_catpvn(sv, buf, len);
2023 if (!SvIOK(sv)) {
2024 (void)SvIOK_on(sv);
2025 SvIV_set(sv, 0);
2026 }
2027 if (PL_parser->preambling == NOLINE)
2028 av_store(av, CopLINE(PL_curcop), sv);
2029 }
2030 }
2031
2032 /*
2033 * skipspace
2034 * Called to gobble the appropriate amount and type of whitespace.
2035 * Skips comments as well.
2036 * Returns the next character after the whitespace that is skipped.
2037 *
2038 * peekspace
2039 * Same thing, but look ahead without incrementing line numbers or
2040 * adjusting PL_linestart.
2041 */
2042
2043 #define skipspace(s) skipspace_flags(s, 0)
2044 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
2045
2046 char *
Perl_skipspace_flags(pTHX_ char * s,U32 flags)2047 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
2048 {
2049 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
2050 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2051 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
2052 s++;
2053 } else {
2054 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
2055 PL_bufptr = s;
2056 lex_read_space(flags | LEX_KEEP_PREVIOUS |
2057 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
2058 LEX_NO_NEXT_CHUNK : 0));
2059 s = PL_bufptr;
2060 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
2061 if (PL_linestart > PL_bufptr)
2062 PL_bufptr = PL_linestart;
2063 return s;
2064 }
2065 return s;
2066 }
2067
2068 /*
2069 * S_check_uni
2070 * Check the unary operators to ensure there's no ambiguity in how they're
2071 * used. An ambiguous piece of code would be:
2072 * rand + 5
2073 * This doesn't mean rand() + 5. Because rand() is a unary operator,
2074 * the +5 is its argument.
2075 */
2076
2077 STATIC void
S_check_uni(pTHX)2078 S_check_uni(pTHX)
2079 {
2080 const char *s;
2081
2082 if (PL_oldoldbufptr != PL_last_uni)
2083 return;
2084 while (isSPACE(*PL_last_uni))
2085 PL_last_uni++;
2086 s = PL_last_uni;
2087 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2088 s += UTF ? UTF8SKIP(s) : 1;
2089 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
2090 return;
2091
2092 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2093 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
2094 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2095 }
2096
2097 /*
2098 * LOP : macro to build a list operator. Its behaviour has been replaced
2099 * with a subroutine, S_lop() for which LOP is just another name.
2100 */
2101
2102 #define LOP(f,x) return lop(f,x,s)
2103
2104 /*
2105 * S_lop
2106 * Build a list operator (or something that might be one). The rules:
2107 * - if we have a next token, then it's a list operator (no parens) for
2108 * which the next token has already been parsed; e.g.,
2109 * sort foo @args
2110 * sort foo (@args)
2111 * - if the next thing is an opening paren, then it's a function
2112 * - else it's a list operator
2113 */
2114
2115 STATIC I32
S_lop(pTHX_ I32 f,U8 x,char * s)2116 S_lop(pTHX_ I32 f, U8 x, char *s)
2117 {
2118 PERL_ARGS_ASSERT_LOP;
2119
2120 pl_yylval.ival = f;
2121 CLINE;
2122 PL_bufptr = s;
2123 PL_last_lop = PL_oldbufptr;
2124 PL_last_lop_op = (OPCODE)f;
2125 if (PL_nexttoke)
2126 goto lstop;
2127 PL_expect = x;
2128 if (*s == '(')
2129 return REPORT(FUNC);
2130 s = skipspace(s);
2131 if (*s == '(')
2132 return REPORT(FUNC);
2133 else {
2134 lstop:
2135 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2136 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2137 return REPORT(LSTOP);
2138 }
2139 }
2140
2141 /*
2142 * S_force_next
2143 * When the lexer realizes it knows the next token (for instance,
2144 * it is reordering tokens for the parser) then it can call S_force_next
2145 * to know what token to return the next time the lexer is called. Caller
2146 * will need to set PL_nextval[] and possibly PL_expect to ensure
2147 * the lexer handles the token correctly.
2148 */
2149
2150 STATIC void
S_force_next(pTHX_ I32 type)2151 S_force_next(pTHX_ I32 type)
2152 {
2153 #ifdef DEBUGGING
2154 if (DEBUG_T_TEST) {
2155 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2156 tokereport(type, &NEXTVAL_NEXTTOKE);
2157 }
2158 #endif
2159 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2160 PL_nexttype[PL_nexttoke] = type;
2161 PL_nexttoke++;
2162 }
2163
2164 /*
2165 * S_postderef
2166 *
2167 * This subroutine handles postfix deref syntax after the arrow has already
2168 * been emitted. @* $* etc. are emitted as two separate tokens right here.
2169 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2170 * only the first, leaving yylex to find the next.
2171 */
2172
2173 static int
S_postderef(pTHX_ int const funny,char const next)2174 S_postderef(pTHX_ int const funny, char const next)
2175 {
2176 assert(funny == DOLSHARP
2177 || funny == PERLY_DOLLAR
2178 || funny == PERLY_SNAIL
2179 || funny == PERLY_PERCENT_SIGN
2180 || funny == PERLY_AMPERSAND
2181 || funny == PERLY_STAR
2182 );
2183 if (next == '*') {
2184 PL_expect = XOPERATOR;
2185 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2186 assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny);
2187 PL_lex_state = LEX_INTERPEND;
2188 if (PERLY_SNAIL == funny)
2189 force_next(POSTJOIN);
2190 }
2191 force_next(PERLY_STAR);
2192 PL_bufptr+=2;
2193 }
2194 else {
2195 if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
2196 && !PL_lex_brackets)
2197 PL_lex_dojoin = 2;
2198 PL_expect = XOPERATOR;
2199 PL_bufptr++;
2200 }
2201 return funny;
2202 }
2203
2204 void
Perl_yyunlex(pTHX)2205 Perl_yyunlex(pTHX)
2206 {
2207 int yyc = PL_parser->yychar;
2208 if (yyc != YYEMPTY) {
2209 if (yyc) {
2210 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2211 if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
2212 PL_lex_allbrackets--;
2213 PL_lex_brackets--;
2214 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2215 } else if (yyc == PERLY_PAREN_OPEN) {
2216 PL_lex_allbrackets--;
2217 yyc |= (2<<24);
2218 }
2219 force_next(yyc);
2220 }
2221 PL_parser->yychar = YYEMPTY;
2222 }
2223 }
2224
2225 STATIC SV *
S_newSV_maybe_utf8(pTHX_ const char * const start,STRLEN len)2226 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2227 {
2228 SV * const sv = newSVpvn_utf8(start, len,
2229 ! IN_BYTES
2230 && UTF
2231 && len != 0
2232 && is_utf8_non_invariant_string((const U8*)start, len));
2233 return sv;
2234 }
2235
2236 /*
2237 * S_force_word
2238 * When the lexer knows the next thing is a word (for instance, it has
2239 * just seen -> and it knows that the next char is a word char, then
2240 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2241 * lookahead.
2242 *
2243 * Arguments:
2244 * char *start : buffer position (must be within PL_linestr)
2245 * int token : PL_next* will be this type of bare word
2246 * (e.g., METHCALL0,BAREWORD)
2247 * int check_keyword : if true, Perl checks to make sure the word isn't
2248 * a keyword (do this if the word is a label, e.g. goto FOO)
2249 * int allow_pack : if true, : characters will also be allowed (require,
2250 * use, etc. do this)
2251 */
2252
2253 STATIC char *
S_force_word(pTHX_ char * start,int token,int check_keyword,int allow_pack)2254 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2255 {
2256 char *s;
2257 STRLEN len;
2258
2259 PERL_ARGS_ASSERT_FORCE_WORD;
2260
2261 start = skipspace(start);
2262 s = start;
2263 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2264 || (allow_pack && *s == ':' && s[1] == ':') )
2265 {
2266 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len, allow_pack);
2267 if (check_keyword) {
2268 char *s2 = PL_tokenbuf;
2269 STRLEN len2 = len;
2270 if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2271 s2 += sizeof("CORE::") - 1;
2272 len2 -= sizeof("CORE::") - 1;
2273 }
2274 if (keyword(s2, len2, 0))
2275 return start;
2276 }
2277 if (token == METHCALL0) {
2278 s = skipspace(s);
2279 if (*s == '(')
2280 PL_expect = XTERM;
2281 else {
2282 PL_expect = XOPERATOR;
2283 }
2284 }
2285 NEXTVAL_NEXTTOKE.opval
2286 = newSVOP(OP_CONST,0,
2287 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2288 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2289 force_next(token);
2290 }
2291 return s;
2292 }
2293
2294 /*
2295 * S_force_ident
2296 * Called when the lexer wants $foo *foo &foo etc, but the program
2297 * text only contains the "foo" portion. The first argument is a pointer
2298 * to the "foo", and the second argument is the type symbol to prefix.
2299 * Forces the next token to be a "BAREWORD".
2300 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2301 */
2302
2303 STATIC void
S_force_ident(pTHX_ const char * s,int kind)2304 S_force_ident(pTHX_ const char *s, int kind)
2305 {
2306 PERL_ARGS_ASSERT_FORCE_IDENT;
2307
2308 if (s[0]) {
2309 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2310 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2311 UTF ? SVf_UTF8 : 0));
2312 NEXTVAL_NEXTTOKE.opval = o;
2313 force_next(BAREWORD);
2314 if (kind) {
2315 o->op_private = OPpCONST_ENTERED;
2316 /* XXX see note in pp_entereval() for why we forgo typo
2317 warnings if the symbol must be introduced in an eval.
2318 GSAR 96-10-12 */
2319 gv_fetchpvn_flags(s, len,
2320 (PL_in_eval ? GV_ADDMULTI
2321 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2322 kind == PERLY_DOLLAR ? SVt_PV :
2323 kind == PERLY_SNAIL ? SVt_PVAV :
2324 kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
2325 SVt_PVGV
2326 );
2327 }
2328 }
2329 }
2330
2331 static void
S_force_ident_maybe_lex(pTHX_ char pit)2332 S_force_ident_maybe_lex(pTHX_ char pit)
2333 {
2334 NEXTVAL_NEXTTOKE.ival = pit;
2335 force_next('p');
2336 }
2337
2338 NV
Perl_str_to_version(pTHX_ SV * sv)2339 Perl_str_to_version(pTHX_ SV *sv)
2340 {
2341 NV retval = 0.0;
2342 NV nshift = 1.0;
2343 STRLEN len;
2344 const char *start = SvPV_const(sv,len);
2345 const char * const end = start + len;
2346 const bool utf = cBOOL(SvUTF8(sv));
2347
2348 PERL_ARGS_ASSERT_STR_TO_VERSION;
2349
2350 while (start < end) {
2351 STRLEN skip;
2352 UV n;
2353 if (utf)
2354 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2355 else {
2356 n = *(U8*)start;
2357 skip = 1;
2358 }
2359 retval += ((NV)n)/nshift;
2360 start += skip;
2361 nshift *= 1000;
2362 }
2363 return retval;
2364 }
2365
2366 /*
2367 * S_force_version
2368 * Forces the next token to be a version number.
2369 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2370 * and if "guessing" is TRUE, then no new token is created (and the caller
2371 * must use an alternative parsing method).
2372 */
2373
2374 STATIC char *
S_force_version(pTHX_ char * s,int guessing)2375 S_force_version(pTHX_ char *s, int guessing)
2376 {
2377 OP *version = NULL;
2378 char *d;
2379
2380 PERL_ARGS_ASSERT_FORCE_VERSION;
2381
2382 s = skipspace(s);
2383
2384 d = s;
2385 if (*d == 'v')
2386 d++;
2387 if (isDIGIT(*d)) {
2388 while (isDIGIT(*d) || *d == '_' || *d == '.')
2389 d++;
2390 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2391 SV *ver;
2392 s = scan_num(s, &pl_yylval);
2393 version = pl_yylval.opval;
2394 ver = cSVOPx(version)->op_sv;
2395 if (SvPOK(ver) && !SvNIOK(ver)) {
2396 SvUPGRADE(ver, SVt_PVNV);
2397 SvNV_set(ver, str_to_version(ver));
2398 SvNOK_on(ver); /* hint that it is a version */
2399 }
2400 }
2401 else if (guessing) {
2402 return s;
2403 }
2404 }
2405
2406 /* NOTE: The parser sees the package name and the VERSION swapped */
2407 NEXTVAL_NEXTTOKE.opval = version;
2408 force_next(BAREWORD);
2409
2410 return s;
2411 }
2412
2413 /*
2414 * S_force_strict_version
2415 * Forces the next token to be a version number using strict syntax rules.
2416 */
2417
2418 STATIC char *
S_force_strict_version(pTHX_ char * s)2419 S_force_strict_version(pTHX_ char *s)
2420 {
2421 OP *version = NULL;
2422 const char *errstr = NULL;
2423
2424 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2425
2426 while (isSPACE(*s)) /* leading whitespace */
2427 s++;
2428
2429 if (is_STRICT_VERSION(s,&errstr)) {
2430 SV *ver = newSV_type(SVt_NULL);
2431 s = (char *)scan_version(s, ver, 0);
2432 version = newSVOP(OP_CONST, 0, ver);
2433 }
2434 else if ((*s != ';' && *s != ':' && *s != '{' && *s != '}' )
2435 && (s = skipspace(s), (*s != ';' && *s != ':' && *s != '{' && *s != '}' )))
2436 {
2437 PL_bufptr = s;
2438 if (errstr)
2439 yyerror(errstr); /* version required */
2440 return s;
2441 }
2442
2443 /* NOTE: The parser sees the package name and the VERSION swapped */
2444 NEXTVAL_NEXTTOKE.opval = version;
2445 force_next(BAREWORD);
2446
2447 return s;
2448 }
2449
2450 /*
2451 * S_tokeq
2452 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2453 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is
2454 * unchanged, and a new SV containing the modified input is returned.
2455 */
2456
2457 STATIC SV *
S_tokeq(pTHX_ SV * sv)2458 S_tokeq(pTHX_ SV *sv)
2459 {
2460 char *s;
2461 char *send;
2462 char *d;
2463 SV *pv = sv;
2464
2465 PERL_ARGS_ASSERT_TOKEQ;
2466
2467 assert (SvPOK(sv));
2468 assert (SvLEN(sv));
2469 assert (!SvIsCOW(sv));
2470 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2471 goto finish;
2472 s = SvPVX(sv);
2473 send = SvEND(sv);
2474 /* This is relying on the SV being "well formed" with a trailing '\0' */
2475 while (s < send && !(*s == '\\' && s[1] == '\\'))
2476 s++;
2477 if (s == send)
2478 goto finish;
2479 d = s;
2480 if ( PL_hints & HINT_NEW_STRING ) {
2481 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2482 SVs_TEMP | SvUTF8(sv));
2483 }
2484 while (s < send) {
2485 if (*s == '\\') {
2486 if (s + 1 < send && (s[1] == '\\'))
2487 s++; /* all that, just for this */
2488 }
2489 *d++ = *s++;
2490 }
2491 *d = '\0';
2492 SvCUR_set(sv, d - SvPVX_const(sv));
2493 finish:
2494 if ( PL_hints & HINT_NEW_STRING )
2495 return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2496 return sv;
2497 }
2498
2499 /*
2500 * Now come three functions related to double-quote context,
2501 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2502 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2503 * interact with PL_lex_state, and create fake ( ... ) argument lists
2504 * to handle functions and concatenation.
2505 * For example,
2506 * "foo\lbar"
2507 * is tokenised as
2508 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2509 */
2510
2511 /*
2512 * S_sublex_start
2513 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2514 *
2515 * Pattern matching will set PL_lex_op to the pattern-matching op to
2516 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2517 *
2518 * OP_CONST is easy--just make the new op and return.
2519 *
2520 * Everything else becomes a FUNC.
2521 *
2522 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2523 * had an OP_CONST. This just sets us up for a
2524 * call to S_sublex_push().
2525 */
2526
2527 STATIC I32
S_sublex_start(pTHX)2528 S_sublex_start(pTHX)
2529 {
2530 const I32 op_type = pl_yylval.ival;
2531
2532 if (op_type == OP_NULL) {
2533 pl_yylval.opval = PL_lex_op;
2534 PL_lex_op = NULL;
2535 return THING;
2536 }
2537 if (op_type == OP_CONST) {
2538 SV *sv = PL_lex_stuff;
2539 PL_lex_stuff = NULL;
2540 sv = tokeq(sv);
2541
2542 if (SvTYPE(sv) == SVt_PVIV) {
2543 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2544 STRLEN len;
2545 const char * const p = SvPV_const(sv, len);
2546 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2547 SvREFCNT_dec(sv);
2548 sv = nsv;
2549 }
2550 pl_yylval.opval = newSVOP(op_type, 0, sv);
2551 return THING;
2552 }
2553
2554 PL_parser->lex_super_state = PL_lex_state;
2555 PL_parser->lex_sub_inwhat = (U16)op_type;
2556 PL_parser->lex_sub_op = PL_lex_op;
2557 PL_parser->sub_no_recover = FALSE;
2558 PL_parser->sub_error_count = PL_error_count;
2559 PL_lex_state = LEX_INTERPPUSH;
2560
2561 PL_expect = XTERM;
2562 if (PL_lex_op) {
2563 pl_yylval.opval = PL_lex_op;
2564 PL_lex_op = NULL;
2565 return PMFUNC;
2566 }
2567 else
2568 return FUNC;
2569 }
2570
2571 /*
2572 * S_sublex_push
2573 * Create a new scope to save the lexing state. The scope will be
2574 * ended in S_sublex_done. Returns a '(', starting the function arguments
2575 * to the uc, lc, etc. found before.
2576 * Sets PL_lex_state to LEX_INTERPCONCAT.
2577 */
2578
2579 STATIC I32
S_sublex_push(pTHX)2580 S_sublex_push(pTHX)
2581 {
2582 LEXSHARED *shared;
2583 const bool is_heredoc = PL_multi_close == '<';
2584 ENTER;
2585
2586 PL_lex_state = PL_parser->lex_super_state;
2587 SAVEI8(PL_lex_dojoin);
2588 SAVEI32(PL_lex_brackets);
2589 SAVEI32(PL_lex_allbrackets);
2590 SAVEI32(PL_lex_formbrack);
2591 SAVEI8(PL_lex_fakeeof);
2592 SAVEI32(PL_lex_casemods);
2593 SAVEI32(PL_lex_starts);
2594 SAVEI8(PL_lex_state);
2595 SAVESPTR(PL_lex_repl);
2596 SAVEVPTR(PL_lex_inpat);
2597 SAVEI16(PL_lex_inwhat);
2598 if (is_heredoc)
2599 {
2600 SAVECOPLINE(PL_curcop);
2601 SAVEI32(PL_multi_end);
2602 SAVEI32(PL_parser->herelines);
2603 PL_parser->herelines = 0;
2604 }
2605 SAVEIV(PL_multi_close);
2606 SAVEPPTR(PL_bufptr);
2607 SAVEPPTR(PL_bufend);
2608 SAVEPPTR(PL_oldbufptr);
2609 SAVEPPTR(PL_oldoldbufptr);
2610 SAVEPPTR(PL_last_lop);
2611 SAVEPPTR(PL_last_uni);
2612 SAVEPPTR(PL_linestart);
2613 SAVESPTR(PL_linestr);
2614 SAVEGENERICPV(PL_lex_brackstack);
2615 SAVEGENERICPV(PL_lex_casestack);
2616 SAVEGENERICPV(PL_parser->lex_shared);
2617 SAVEBOOL(PL_parser->lex_re_reparsing);
2618 SAVEI32(PL_copline);
2619
2620 /* The here-doc parser needs to be able to peek into outer lexing
2621 scopes to find the body of the here-doc. So we put PL_linestr and
2622 PL_bufptr into lex_shared, to 'share' those values.
2623 */
2624 PL_parser->lex_shared->ls_linestr = PL_linestr;
2625 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2626
2627 PL_linestr = PL_lex_stuff;
2628 PL_lex_repl = PL_parser->lex_sub_repl;
2629 PL_lex_stuff = NULL;
2630 PL_parser->lex_sub_repl = NULL;
2631
2632 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2633 set for an inner quote-like operator and then an error causes scope-
2634 popping. We must not have a PL_lex_stuff value left dangling, as
2635 that breaks assumptions elsewhere. See bug #123617. */
2636 SAVEGENERICSV(PL_lex_stuff);
2637 SAVEGENERICSV(PL_parser->lex_sub_repl);
2638
2639 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2640 = SvPVX(PL_linestr);
2641 PL_bufend += SvCUR(PL_linestr);
2642 PL_last_lop = PL_last_uni = NULL;
2643 SAVEFREESV(PL_linestr);
2644 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2645
2646 PL_lex_dojoin = FALSE;
2647 PL_lex_brackets = PL_lex_formbrack = 0;
2648 PL_lex_allbrackets = 0;
2649 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2650 Newx(PL_lex_brackstack, 120, char);
2651 Newx(PL_lex_casestack, 12, char);
2652 PL_lex_casemods = 0;
2653 *PL_lex_casestack = '\0';
2654 PL_lex_starts = 0;
2655 PL_lex_state = LEX_INTERPCONCAT;
2656 if (is_heredoc)
2657 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2658 PL_copline = NOLINE;
2659
2660 Newxz(shared, 1, LEXSHARED);
2661 shared->ls_prev = PL_parser->lex_shared;
2662 PL_parser->lex_shared = shared;
2663
2664 PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2665 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2666 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2667 PL_lex_inpat = PL_parser->lex_sub_op;
2668 else
2669 PL_lex_inpat = NULL;
2670
2671 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2672 PL_in_eval &= ~EVAL_RE_REPARSING;
2673
2674 return SUBLEXSTART;
2675 }
2676
2677 /*
2678 * S_sublex_done
2679 * Restores lexer state after a S_sublex_push.
2680 */
2681
2682 STATIC I32
S_sublex_done(pTHX)2683 S_sublex_done(pTHX)
2684 {
2685 if (!PL_lex_starts++) {
2686 SV * const sv = newSVpvs("");
2687 if (SvUTF8(PL_linestr))
2688 SvUTF8_on(sv);
2689 PL_expect = XOPERATOR;
2690 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2691 return THING;
2692 }
2693
2694 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2695 PL_lex_state = LEX_INTERPCASEMOD;
2696 return yylex();
2697 }
2698
2699 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2700 assert(PL_lex_inwhat != OP_TRANSR);
2701 if (PL_lex_repl) {
2702 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2703 PL_linestr = PL_lex_repl;
2704 PL_lex_inpat = 0;
2705 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2706 PL_bufend += SvCUR(PL_linestr);
2707 PL_last_lop = PL_last_uni = NULL;
2708 PL_lex_dojoin = FALSE;
2709 PL_lex_brackets = 0;
2710 PL_lex_allbrackets = 0;
2711 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2712 PL_lex_casemods = 0;
2713 *PL_lex_casestack = '\0';
2714 PL_lex_starts = 0;
2715 if (SvEVALED(PL_lex_repl)) {
2716 PL_lex_state = LEX_INTERPNORMAL;
2717 PL_lex_starts++;
2718 /* we don't clear PL_lex_repl here, so that we can check later
2719 whether this is an evalled subst; that means we rely on the
2720 logic to ensure sublex_done() is called again only via the
2721 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2722 }
2723 else {
2724 PL_lex_state = LEX_INTERPCONCAT;
2725 PL_lex_repl = NULL;
2726 }
2727 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2728 CopLINE(PL_curcop) +=
2729 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2730 + PL_parser->herelines;
2731 PL_parser->herelines = 0;
2732 }
2733 return PERLY_SLASH;
2734 }
2735 else {
2736 const line_t l = CopLINE(PL_curcop);
2737 LEAVE;
2738 if (PL_parser->sub_error_count != PL_error_count) {
2739 if (PL_parser->sub_no_recover) {
2740 yyquit();
2741 NOT_REACHED;
2742 }
2743 }
2744 if (PL_multi_close == '<')
2745 PL_parser->herelines += l - PL_multi_end;
2746 PL_bufend = SvPVX(PL_linestr);
2747 PL_bufend += SvCUR(PL_linestr);
2748 PL_expect = XOPERATOR;
2749 return SUBLEXEND;
2750 }
2751 }
2752
2753 HV *
Perl_load_charnames(pTHX_ SV * char_name,const char * context,const STRLEN context_len,const char ** error_msg)2754 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2755 const STRLEN context_len, const char ** error_msg)
2756 {
2757 /* Load the official _charnames module if not already there. The
2758 * parameters are just to give info for any error messages generated:
2759 * char_name a name to look up which is the reason for loading this
2760 * context 'char_name' in the context in the input in which it appears
2761 * context_len how many bytes 'context' occupies
2762 * error_msg *error_msg will be set to any error
2763 *
2764 * Returns the ^H table if success; otherwise NULL */
2765
2766 unsigned int i;
2767 HV * table;
2768 SV **cvp;
2769 SV * res;
2770
2771 PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2772
2773 /* This loop is executed 1 1/2 times. On the first time through, if it
2774 * isn't already loaded, try loading it, and iterate just once to see if it
2775 * worked. */
2776 for (i = 0; i < 2; i++) {
2777 table = GvHV(PL_hintgv); /* ^H */
2778
2779 if ( table
2780 && (PL_hints & HINT_LOCALIZE_HH)
2781 && (cvp = hv_fetchs(table, "charnames", FALSE))
2782 && SvOK(*cvp))
2783 {
2784 return table; /* Quit if already loaded */
2785 }
2786
2787 if (i == 0) {
2788 Perl_load_module(aTHX_
2789 0,
2790 newSVpvs("_charnames"),
2791
2792 /* version parameter; no need to specify it, as if we get too early
2793 * a version, will fail anyway, not being able to find 'charnames'
2794 * */
2795 NULL,
2796 newSVpvs(":full"),
2797 newSVpvs(":short"),
2798 NULL);
2799 }
2800 }
2801
2802 /* Here, it failed; new_constant will give appropriate error messages */
2803 *error_msg = NULL;
2804 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2805 context, context_len, error_msg);
2806 SvREFCNT_dec(res);
2807
2808 return NULL;
2809 }
2810
2811 STATIC SV*
S_get_and_check_backslash_N_name_wrapper(pTHX_ const char * s,const char * const e)2812 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2813 {
2814 /* This justs wraps get_and_check_backslash_N_name() to output any error
2815 * message it returns. */
2816
2817 const char * error_msg = NULL;
2818 SV * result;
2819
2820 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2821
2822 /* charnames doesn't work well if there have been errors found */
2823 if (PL_error_count > 0) {
2824 return NULL;
2825 }
2826
2827 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2828
2829 if (error_msg) {
2830 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2831 }
2832
2833 return result;
2834 }
2835
2836 SV*
Perl_get_and_check_backslash_N_name(pTHX_ const char * s,const char * e,const bool is_utf8,const char ** error_msg)2837 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2838 const char* e,
2839 const bool is_utf8,
2840 const char ** error_msg)
2841 {
2842 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2843 * interior, hence to the "}". Finds what the name resolves to, returning
2844 * an SV* containing it; NULL if no valid one found.
2845 *
2846 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2847 * doesn't have to be. */
2848
2849 SV* char_name;
2850 SV* res;
2851 HV * table;
2852 SV **cvp;
2853 SV *cv;
2854 SV *rv;
2855 HV *stash;
2856
2857 /* Points to the beginning of the \N{... so that any messages include the
2858 * context of what's failing*/
2859 const char* context = s - 3;
2860 STRLEN context_len = e - context + 1; /* include all of \N{...} */
2861
2862
2863 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2864
2865 assert(e >= s);
2866 assert(s > (char *) 3);
2867
2868 while (s < e && isBLANK(*s)) {
2869 s++;
2870 }
2871
2872 while (s < e && isBLANK(*(e - 1))) {
2873 e--;
2874 }
2875
2876 char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2877
2878 if (!SvCUR(char_name)) {
2879 SvREFCNT_dec_NN(char_name);
2880 /* diag_listed_as: Unknown charname '%s' */
2881 *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2882 return NULL;
2883 }
2884
2885 /* Autoload the charnames module */
2886
2887 table = load_charnames(char_name, context, context_len, error_msg);
2888 if (table == NULL) {
2889 return NULL;
2890 }
2891
2892 *error_msg = NULL;
2893 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2894 context, context_len, error_msg);
2895 if (*error_msg) {
2896 *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2897
2898 SvREFCNT_dec(res);
2899 return NULL;
2900 }
2901
2902 /* See if the charnames handler is the Perl core's, and if so, we can skip
2903 * the validation needed for a user-supplied one, as Perl's does its own
2904 * validation. */
2905 cvp = hv_fetchs(table, "charnames", FALSE);
2906 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2907 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2908 {
2909 const char * const name = HvNAME(stash);
2910 if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2911 return res;
2912 }
2913 }
2914
2915 /* Here, it isn't Perl's charname handler. We can't rely on a
2916 * user-supplied handler to validate the input name. For non-ut8 input,
2917 * look to see that the first character is legal. Then loop through the
2918 * rest checking that each is a continuation */
2919
2920 /* This code makes the reasonable assumption that the only Latin1-range
2921 * characters that begin a character name alias are alphabetic, otherwise
2922 * would have to create a isCHARNAME_BEGIN macro */
2923
2924 if (! is_utf8) {
2925 if (! isALPHAU(*s)) {
2926 goto bad_charname;
2927 }
2928 s++;
2929 while (s < e) {
2930 if (! isCHARNAME_CONT(*s)) {
2931 goto bad_charname;
2932 }
2933 if (*s == ' ' && *(s-1) == ' ') {
2934 goto multi_spaces;
2935 }
2936 s++;
2937 }
2938 }
2939 else {
2940 /* Similarly for utf8. For invariants can check directly; for other
2941 * Latin1, can calculate their code point and check; otherwise use an
2942 * inversion list */
2943 if (UTF8_IS_INVARIANT(*s)) {
2944 if (! isALPHAU(*s)) {
2945 goto bad_charname;
2946 }
2947 s++;
2948 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2949 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2950 goto bad_charname;
2951 }
2952 s += 2;
2953 }
2954 else {
2955 if (! _invlist_contains_cp(PL_utf8_charname_begin,
2956 utf8_to_uvchr_buf((U8 *) s,
2957 (U8 *) e,
2958 NULL)))
2959 {
2960 goto bad_charname;
2961 }
2962 s += UTF8SKIP(s);
2963 }
2964
2965 while (s < e) {
2966 if (UTF8_IS_INVARIANT(*s)) {
2967 if (! isCHARNAME_CONT(*s)) {
2968 goto bad_charname;
2969 }
2970 if (*s == ' ' && *(s-1) == ' ') {
2971 goto multi_spaces;
2972 }
2973 s++;
2974 }
2975 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2976 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2977 {
2978 goto bad_charname;
2979 }
2980 s += 2;
2981 }
2982 else {
2983 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2984 utf8_to_uvchr_buf((U8 *) s,
2985 (U8 *) e,
2986 NULL)))
2987 {
2988 goto bad_charname;
2989 }
2990 s += UTF8SKIP(s);
2991 }
2992 }
2993 }
2994 if (*(s-1) == ' ') {
2995 /* diag_listed_as: charnames alias definitions may not contain
2996 trailing white-space; marked by <-- HERE in %s
2997 */
2998 *error_msg = Perl_form(aTHX_
2999 "charnames alias definitions may not contain trailing "
3000 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
3001 (int)(s - context + 1), context,
3002 (int)(e - s + 1), s + 1);
3003 return NULL;
3004 }
3005
3006 if (SvUTF8(res)) { /* Don't accept malformed charname value */
3007 const U8* first_bad_char_loc;
3008 STRLEN len;
3009 const char* const str = SvPV_const(res, len);
3010 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
3011 &first_bad_char_loc)))
3012 {
3013 _force_out_malformed_utf8_message(first_bad_char_loc,
3014 (U8 *) PL_parser->bufend,
3015 0,
3016 0 /* 0 means don't die */ );
3017 /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
3018 immediately after '%s' */
3019 *error_msg = Perl_form(aTHX_
3020 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
3021 (int) context_len, context,
3022 (int) ((char *) first_bad_char_loc - str), str);
3023 return NULL;
3024 }
3025 }
3026
3027 return res;
3028
3029 bad_charname: {
3030
3031 /* The final %.*s makes sure that should the trailing NUL be missing
3032 * that this print won't run off the end of the string */
3033 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
3034 in \N{%s} */
3035 *error_msg = Perl_form(aTHX_
3036 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
3037 (int)(s - context + 1), context,
3038 (int)(e - s + 1), s + 1);
3039 return NULL;
3040 }
3041
3042 multi_spaces:
3043 /* diag_listed_as: charnames alias definitions may not contain a
3044 sequence of multiple spaces; marked by <-- HERE
3045 in %s */
3046 *error_msg = Perl_form(aTHX_
3047 "charnames alias definitions may not contain a sequence of "
3048 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
3049 (int)(s - context + 1), context,
3050 (int)(e - s + 1), s + 1);
3051 return NULL;
3052 }
3053
3054 /*
3055 scan_const
3056
3057 Extracts the next constant part of a pattern, double-quoted string,
3058 or transliteration. This is terrifying code.
3059
3060 For example, in parsing the double-quoted string "ab\x63$d", it would
3061 stop at the '$' and return an OP_CONST containing 'abc'.
3062
3063 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3064 processing a pattern (PL_lex_inpat is true), a transliteration
3065 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3066
3067 Returns a pointer to the character scanned up to. If this is
3068 advanced from the start pointer supplied (i.e. if anything was
3069 successfully parsed), will leave an OP_CONST for the substring scanned
3070 in pl_yylval. Caller must intuit reason for not parsing further
3071 by looking at the next characters herself.
3072
3073 In patterns:
3074 expand:
3075 \N{FOO} => \N{U+hex_for_character_FOO}
3076 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3077
3078 pass through:
3079 all other \-char, including \N and \N{ apart from \N{ABC}
3080
3081 stops on:
3082 @ and $ where it appears to be a var, but not for $ as tail anchor
3083 \l \L \u \U \Q \E
3084 (?{ or (??{ or (*{
3085
3086 In transliterations:
3087 characters are VERY literal, except for - not at the start or end
3088 of the string, which indicates a range. However some backslash sequences
3089 are recognized: \r, \n, and the like
3090 \007 \o{}, \x{}, \N{}
3091 If all elements in the transliteration are below 256,
3092 scan_const expands the range to the full set of intermediate
3093 characters. If the range is in utf8, the hyphen is replaced with
3094 a certain range mark which will be handled by pmtrans() in op.c.
3095
3096 In double-quoted strings:
3097 backslashes:
3098 all those recognized in transliterations
3099 deprecated backrefs: \1 (in substitution replacements)
3100 case and quoting: \U \Q \E
3101 stops on @ and $
3102
3103 scan_const does *not* construct ops to handle interpolated strings.
3104 It stops processing as soon as it finds an embedded $ or @ variable
3105 and leaves it to the caller to work out what's going on.
3106
3107 embedded arrays (whether in pattern or not) could be:
3108 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3109
3110 $ in double-quoted strings must be the symbol of an embedded scalar.
3111
3112 $ in pattern could be $foo or could be tail anchor. Assumption:
3113 it's a tail anchor if $ is the last thing in the string, or if it's
3114 followed by one of "()| \r\n\t"
3115
3116 \1 (backreferences) are turned into $1 in substitutions
3117
3118 The structure of the code is
3119 while (there's a character to process) {
3120 handle transliteration ranges
3121 skip regexp comments /(?#comment)/ and codes /(?{code})/ ((*{code})/
3122 skip #-initiated comments in //x patterns
3123 check for embedded arrays
3124 check for embedded scalars
3125 if (backslash) {
3126 deprecate \1 in substitution replacements
3127 handle string-changing backslashes \l \U \Q \E, etc.
3128 switch (what was escaped) {
3129 handle \- in a transliteration (becomes a literal -)
3130 if a pattern and not \N{, go treat as regular character
3131 handle \132 (octal characters)
3132 handle \x15 and \x{1234} (hex characters)
3133 handle \N{name} (named characters, also \N{3,5} in a pattern)
3134 handle \cV (control characters)
3135 handle printf-style backslashes (\f, \r, \n, etc)
3136 } (end switch)
3137 continue
3138 } (end if backslash)
3139 handle regular character
3140 } (end while character to read)
3141
3142 */
3143
3144 STATIC char *
S_scan_const(pTHX_ char * start)3145 S_scan_const(pTHX_ char *start)
3146 {
3147 const char * const send = PL_bufend;/* end of the constant */
3148 SV *sv = newSV(send - start); /* sv for the constant. See note below
3149 on sizing. */
3150 char *s = start; /* start of the constant */
3151 char *d = SvPVX(sv); /* destination for copies */
3152 bool dorange = FALSE; /* are we in a translit range? */
3153 bool didrange = FALSE; /* did we just finish a range? */
3154 bool in_charclass = FALSE; /* within /[...]/ */
3155 const bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
3156 UTF8? But, this can show as true
3157 when the source isn't utf8, as for
3158 example when it is entirely composed
3159 of hex constants */
3160 bool d_is_utf8 = FALSE; /* Output constant is UTF8 */
3161 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
3162 number of characters found so far
3163 that will expand (into 2 bytes)
3164 should we have to convert to
3165 UTF-8) */
3166 SV *res; /* result from charnames */
3167 STRLEN offset_to_max = 0; /* The offset in the output to where the range
3168 high-end character is temporarily placed */
3169
3170 /* Does something require special handling in tr/// ? This avoids extra
3171 * work in a less likely case. As such, khw didn't feel it was worth
3172 * adding any branches to the more mainline code to handle this, which
3173 * means that this doesn't get set in some circumstances when things like
3174 * \x{100} get expanded out. As a result there needs to be extra testing
3175 * done in the tr code */
3176 bool has_above_latin1 = FALSE;
3177
3178 /* Note on sizing: The scanned constant is placed into sv, which is
3179 * initialized by newSV() assuming one byte of output for every byte of
3180 * input. This routine expects newSV() to allocate an extra byte for a
3181 * trailing NUL, which this routine will append if it gets to the end of
3182 * the input. There may be more bytes of input than output (eg., \N{LATIN
3183 * CAPITAL LETTER A}), or more output than input if the constant ends up
3184 * recoded to utf8, but each time a construct is found that might increase
3185 * the needed size, SvGROW() is called. Its size parameter each time is
3186 * based on the best guess estimate at the time, namely the length used so
3187 * far, plus the length the current construct will occupy, plus room for
3188 * the trailing NUL, plus one byte for every input byte still unscanned */
3189
3190 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3191 before set */
3192 #ifdef EBCDIC
3193 int backslash_N = 0; /* ? was the character from \N{} */
3194 int non_portable_endpoint = 0; /* ? In a range is an endpoint
3195 platform-specific like \x65 */
3196 #endif
3197
3198 PERL_ARGS_ASSERT_SCAN_CONST;
3199
3200 assert(PL_lex_inwhat != OP_TRANSR);
3201
3202 /* Protect sv from errors and fatal warnings. */
3203 ENTER_with_name("scan_const");
3204 SAVEFREESV(sv);
3205
3206 /* A bunch of code in the loop below assumes that if s[n] exists and is not
3207 * NUL, then s[n+1] exists. This assertion makes sure that assumption is
3208 * valid */
3209 assert(*send == '\0');
3210
3211 while (s < send
3212 || dorange /* Handle tr/// range at right edge of input */
3213 ) {
3214
3215 /* get transliterations out of the way (they're most literal) */
3216 if (PL_lex_inwhat == OP_TRANS) {
3217
3218 /* But there isn't any special handling necessary unless there is a
3219 * range, so for most cases we just drop down and handle the value
3220 * as any other. There are two exceptions.
3221 *
3222 * 1. A hyphen indicates that we are actually going to have a
3223 * range. In this case, skip the '-', set a flag, then drop
3224 * down to handle what should be the end range value.
3225 * 2. After we've handled that value, the next time through, that
3226 * flag is set and we fix up the range.
3227 *
3228 * Ranges entirely within Latin1 are expanded out entirely, in
3229 * order to make the transliteration a simple table look-up.
3230 * Ranges that extend above Latin1 have to be done differently, so
3231 * there is no advantage to expanding them here, so they are
3232 * stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is
3233 * a byte that can't occur in legal UTF-8, and hence can signify a
3234 * hyphen without any possible ambiguity. On EBCDIC machines, if
3235 * the range is expressed as Unicode, the Latin1 portion is
3236 * expanded out even if the range extends above Latin1. This is
3237 * because each code point in it has to be processed here
3238 * individually to get its native translation */
3239
3240 if (! dorange) {
3241
3242 /* Here, we don't think we're in a range. If the new character
3243 * is not a hyphen; or if it is a hyphen, but it's too close to
3244 * either edge to indicate a range, or if we haven't output any
3245 * characters yet then it's a regular character. */
3246 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3247 {
3248
3249 /* A regular character. Process like any other, but first
3250 * clear any flags */
3251 didrange = FALSE;
3252 dorange = FALSE;
3253 #ifdef EBCDIC
3254 non_portable_endpoint = 0;
3255 backslash_N = 0;
3256 #endif
3257 /* The tests here for being above Latin1 and similar ones
3258 * in the following 'else' suffice to find all such
3259 * occurences in the constant, except those added by a
3260 * backslash escape sequence, like \x{100}. Mostly, those
3261 * set 'has_above_latin1' as appropriate */
3262 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3263 has_above_latin1 = TRUE;
3264 }
3265
3266 /* Drops down to generic code to process current byte */
3267 }
3268 else { /* Is a '-' in the context where it means a range */
3269 if (didrange) { /* Something like y/A-C-Z// */
3270 Perl_croak(aTHX_ "Ambiguous range in transliteration"
3271 " operator");
3272 }
3273
3274 dorange = TRUE;
3275
3276 s++; /* Skip past the hyphen */
3277
3278 /* d now points to where the end-range character will be
3279 * placed. Drop down to get that character. We'll finish
3280 * processing the range the next time through the loop */
3281
3282 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3283 has_above_latin1 = TRUE;
3284 }
3285
3286 /* Drops down to generic code to process current byte */
3287 }
3288 } /* End of not a range */
3289 else {
3290 /* Here we have parsed a range. Now must handle it. At this
3291 * point:
3292 * 'sv' is a SV* that contains the output string we are
3293 * constructing. The final two characters in that string
3294 * are the range start and range end, in order.
3295 * 'd' points to just beyond the range end in the 'sv' string,
3296 * where we would next place something
3297 */
3298 char * max_ptr;
3299 char * min_ptr;
3300 IV range_min;
3301 IV range_max; /* last character in range */
3302 STRLEN grow;
3303 Size_t offset_to_min = 0;
3304 Size_t extras = 0;
3305 #ifdef EBCDIC
3306 bool convert_unicode;
3307 IV real_range_max = 0;
3308 #endif
3309 /* Get the code point values of the range ends. */
3310 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3311 offset_to_max = max_ptr - SvPVX_const(sv);
3312 if (d_is_utf8) {
3313 /* We know the utf8 is valid, because we just constructed
3314 * it ourselves in previous loop iterations */
3315 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3316 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3317 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3318
3319 /* This compensates for not all code setting
3320 * 'has_above_latin1', so that we don't skip stuff that
3321 * should be executed */
3322 if (range_max > 255) {
3323 has_above_latin1 = TRUE;
3324 }
3325 }
3326 else {
3327 min_ptr = max_ptr - 1;
3328 range_min = * (U8*) min_ptr;
3329 range_max = * (U8*) max_ptr;
3330 }
3331
3332 /* If the range is just a single code point, like tr/a-a/.../,
3333 * that code point is already in the output, twice. We can
3334 * just back up over the second instance and avoid all the rest
3335 * of the work. But if it is a variant character, it's been
3336 * counted twice, so decrement. (This unlikely scenario is
3337 * special cased, like the one for a range of 2 code points
3338 * below, only because the main-line code below needs a range
3339 * of 3 or more to work without special casing. Might as well
3340 * get it out of the way now.) */
3341 if (UNLIKELY(range_max == range_min)) {
3342 d = max_ptr;
3343 if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3344 utf8_variant_count--;
3345 }
3346 goto range_done;
3347 }
3348
3349 #ifdef EBCDIC
3350 /* On EBCDIC platforms, we may have to deal with portable
3351 * ranges. These happen if at least one range endpoint is a
3352 * Unicode value (\N{...}), or if the range is a subset of
3353 * [A-Z] or [a-z], and both ends are literal characters,
3354 * like 'A', and not like \x{C1} */
3355 convert_unicode =
3356 cBOOL(backslash_N) /* \N{} forces Unicode,
3357 hence portable range */
3358 || ( ! non_portable_endpoint
3359 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
3360 || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3361 if (convert_unicode) {
3362
3363 /* Special handling is needed for these portable ranges.
3364 * They are defined to be in Unicode terms, which includes
3365 * all the Unicode code points between the end points.
3366 * Convert to Unicode to get the Unicode range. Later we
3367 * will convert each code point in the range back to
3368 * native. */
3369 range_min = NATIVE_TO_UNI(range_min);
3370 range_max = NATIVE_TO_UNI(range_max);
3371 }
3372 #endif
3373
3374 if (range_min > range_max) {
3375 #ifdef EBCDIC
3376 if (convert_unicode) {
3377 /* Need to convert back to native for meaningful
3378 * messages for this platform */
3379 range_min = UNI_TO_NATIVE(range_min);
3380 range_max = UNI_TO_NATIVE(range_max);
3381 }
3382 #endif
3383 /* Use the characters themselves for the error message if
3384 * ASCII printables; otherwise some visible representation
3385 * of them */
3386 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3387 Perl_croak(aTHX_
3388 "Invalid range \"%c-%c\" in transliteration operator",
3389 (char)range_min, (char)range_max);
3390 }
3391 #ifdef EBCDIC
3392 else if (convert_unicode) {
3393 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3394 Perl_croak(aTHX_
3395 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3396 UVXf "}\" in transliteration operator",
3397 range_min, range_max);
3398 }
3399 #endif
3400 else {
3401 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3402 Perl_croak(aTHX_
3403 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3404 " in transliteration operator",
3405 range_min, range_max);
3406 }
3407 }
3408
3409 /* If the range is exactly two code points long, they are
3410 * already both in the output */
3411 if (UNLIKELY(range_min + 1 == range_max)) {
3412 goto range_done;
3413 }
3414
3415 /* Here the range contains at least 3 code points */
3416
3417 if (d_is_utf8) {
3418
3419 /* If everything in the transliteration is below 256, we
3420 * can avoid special handling later. A translation table
3421 * for each of those bytes is created by op.c. So we
3422 * expand out all ranges to their constituent code points.
3423 * But if we've encountered something above 255, the
3424 * expanding won't help, so skip doing that. But if it's
3425 * EBCDIC, we may have to look at each character below 256
3426 * if we have to convert to/from Unicode values */
3427 if ( has_above_latin1
3428 #ifdef EBCDIC
3429 && (range_min > 255 || ! convert_unicode)
3430 #endif
3431 ) {
3432 const STRLEN off = d - SvPVX(sv);
3433 const STRLEN extra = 1 + (send - s) + 1;
3434 char *e;
3435
3436 /* Move the high character one byte to the right; then
3437 * insert between it and the range begin, an illegal
3438 * byte which serves to indicate this is a range (using
3439 * a '-' would be ambiguous). */
3440
3441 if (off + extra > SvLEN(sv)) {
3442 d = off + SvGROW(sv, off + extra);
3443 max_ptr = d - off + offset_to_max;
3444 }
3445
3446 e = d++;
3447 while (e-- > max_ptr) {
3448 *(e + 1) = *e;
3449 }
3450 *(e + 1) = (char) RANGE_INDICATOR;
3451 goto range_done;
3452 }
3453
3454 /* Here, we're going to expand out the range. For EBCDIC
3455 * the range can extend above 255 (not so in ASCII), so
3456 * for EBCDIC, split it into the parts above and below
3457 * 255/256 */
3458 #ifdef EBCDIC
3459 if (range_max > 255) {
3460 real_range_max = range_max;
3461 range_max = 255;
3462 }
3463 #endif
3464 }
3465
3466 /* Here we need to expand out the string to contain each
3467 * character in the range. Grow the output to handle this.
3468 * For non-UTF8, we need a byte for each code point in the
3469 * range, minus the three that we've already allocated for: the
3470 * hyphen, the min, and the max. For UTF-8, we need this
3471 * plus an extra byte for each code point that occupies two
3472 * bytes (is variant) when in UTF-8 (except we've already
3473 * allocated for the end points, including if they are
3474 * variants). For ASCII platforms and Unicode ranges on EBCDIC
3475 * platforms, it's easy to calculate a precise number. To
3476 * start, we count the variants in the range, which we need
3477 * elsewhere in this function anyway. (For the case where it
3478 * isn't easy to calculate, 'extras' has been initialized to 0,
3479 * and the calculation is done in a loop further down.) */
3480 #ifdef EBCDIC
3481 if (convert_unicode)
3482 #endif
3483 {
3484 /* This is executed unconditionally on ASCII, and for
3485 * Unicode ranges on EBCDIC. Under these conditions, all
3486 * code points above a certain value are variant; and none
3487 * under that value are. We just need to find out how much
3488 * of the range is above that value. We don't count the
3489 * end points here, as they will already have been counted
3490 * as they were parsed. */
3491 if (range_min >= UTF_CONTINUATION_MARK) {
3492
3493 /* The whole range is made up of variants */
3494 extras = (range_max - 1) - (range_min + 1) + 1;
3495 }
3496 else if (range_max >= UTF_CONTINUATION_MARK) {
3497
3498 /* Only the higher portion of the range is variants */
3499 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3500 }
3501
3502 utf8_variant_count += extras;
3503 }
3504
3505 /* The base growth is the number of code points in the range,
3506 * not including the endpoints, which have already been sized
3507 * for (and output). We don't subtract for the hyphen, as it
3508 * has been parsed but not output, and the SvGROW below is
3509 * based only on what's been output plus what's left to parse.
3510 * */
3511 grow = (range_max - 1) - (range_min + 1) + 1;
3512
3513 if (d_is_utf8) {
3514 #ifdef EBCDIC
3515 /* In some cases in EBCDIC, we haven't yet calculated a
3516 * precise amount needed for the UTF-8 variants. Just
3517 * assume the worst case, that everything will expand by a
3518 * byte */
3519 if (! convert_unicode) {
3520 grow *= 2;
3521 }
3522 else
3523 #endif
3524 {
3525 /* Otherwise we know exactly how many variants there
3526 * are in the range. */
3527 grow += extras;
3528 }
3529 }
3530
3531 /* Grow, but position the output to overwrite the range min end
3532 * point, because in some cases we overwrite that */
3533 SvCUR_set(sv, d - SvPVX_const(sv));
3534 offset_to_min = min_ptr - SvPVX_const(sv);
3535
3536 /* See Note on sizing above. */
3537 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3538 + (send - s)
3539 + grow
3540 + 1 /* Trailing NUL */ );
3541
3542 /* Now, we can expand out the range. */
3543 #ifdef EBCDIC
3544 if (convert_unicode) {
3545 SSize_t i;
3546
3547 /* Recall that the min and max are now in Unicode terms, so
3548 * we have to convert each character to its native
3549 * equivalent */
3550 if (d_is_utf8) {
3551 for (i = range_min; i <= range_max; i++) {
3552 append_utf8_from_native_byte(
3553 LATIN1_TO_NATIVE((U8) i),
3554 (U8 **) &d);
3555 }
3556 }
3557 else {
3558 for (i = range_min; i <= range_max; i++) {
3559 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3560 }
3561 }
3562 }
3563 else
3564 #endif
3565 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3566 {
3567 /* Here, no conversions are necessary, which means that the
3568 * first character in the range is already in 'd' and
3569 * valid, so we can skip overwriting it */
3570 if (d_is_utf8) {
3571 SSize_t i;
3572 d += UTF8SKIP(d);
3573 for (i = range_min + 1; i <= range_max; i++) {
3574 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3575 }
3576 }
3577 else {
3578 SSize_t i;
3579 d++;
3580 assert(range_min + 1 <= range_max);
3581 for (i = range_min + 1; i < range_max; i++) {
3582 #ifdef EBCDIC
3583 /* In this case on EBCDIC, we haven't calculated
3584 * the variants. Do it here, as we go along */
3585 if (! UVCHR_IS_INVARIANT(i)) {
3586 utf8_variant_count++;
3587 }
3588 #endif
3589 *d++ = (char)i;
3590 }
3591
3592 /* The range_max is done outside the loop so as to
3593 * avoid having to special case not incrementing
3594 * 'utf8_variant_count' on EBCDIC (it's already been
3595 * counted when originally parsed) */
3596 *d++ = (char) range_max;
3597 }
3598 }
3599
3600 #ifdef EBCDIC
3601 /* If the original range extended above 255, add in that
3602 * portion. */
3603 if (real_range_max) {
3604 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3605 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3606 if (real_range_max > 0x100) {
3607 if (real_range_max > 0x101) {
3608 *d++ = (char) RANGE_INDICATOR;
3609 }
3610 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3611 }
3612 }
3613 #endif
3614
3615 range_done:
3616 /* mark the range as done, and continue */
3617 didrange = TRUE;
3618 dorange = FALSE;
3619 #ifdef EBCDIC
3620 non_portable_endpoint = 0;
3621 backslash_N = 0;
3622 #endif
3623 continue;
3624 } /* End of is a range */
3625 } /* End of transliteration. Joins main code after these else's */
3626 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3627 char *s1 = s-1;
3628 int esc = 0;
3629 while (s1 >= start && *s1-- == '\\')
3630 esc = !esc;
3631 if (!esc)
3632 in_charclass = TRUE;
3633 }
3634 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3635 char *s1 = s-1;
3636 int esc = 0;
3637 while (s1 >= start && *s1-- == '\\')
3638 esc = !esc;
3639 if (!esc)
3640 in_charclass = FALSE;
3641 }
3642 /* skip for regexp comments /(?#comment)/, except for the last
3643 * char, which will be done separately. Stop on (?{..}) and
3644 * friends (??{ ... }) or (*{ ... }) */
3645 else if (*s == '(' && PL_lex_inpat && (s[1] == '?' || s[1] == '*') && !in_charclass) {
3646 if (s[1] == '?' && s[2] == '#') {
3647 if (s_is_utf8) {
3648 PERL_UINT_FAST8_T len = UTF8SKIP(s);
3649
3650 while (s + len < send && *s != ')') {
3651 Copy(s, d, len, U8);
3652 d += len;
3653 s += len;
3654 len = UTF8_SAFE_SKIP(s, send);
3655 }
3656 }
3657 else while (s+1 < send && *s != ')') {
3658 *d++ = *s++;
3659 }
3660 }
3661 else
3662 if (!PL_lex_casemods &&
3663 /* The following should match regcomp.c */
3664 ((s[1] == '?' && (s[2] == '{' /* (?{ ... }) */
3665 || (s[2] == '?' && s[3] == '{'))) || /* (??{ ... }) */
3666 (s[1] == '*' && (s[2] == '{' ))) /* (*{ ... }) */
3667 ){
3668 break;
3669 }
3670 }
3671 /* likewise skip #-initiated comments in //x patterns */
3672 else if (*s == '#'
3673 && PL_lex_inpat
3674 && !in_charclass
3675 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3676 {
3677 while (s < send && *s != '\n')
3678 *d++ = *s++;
3679 }
3680 /* no further processing of single-quoted regex */
3681 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3682 goto default_action;
3683
3684 /* check for embedded arrays
3685 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3686 */
3687 else if (*s == '@' && s[1]) {
3688 if (UTF
3689 ? isIDFIRST_utf8_safe(s+1, send)
3690 : isWORDCHAR_A(s[1]))
3691 {
3692 break;
3693 }
3694 if (memCHRs(":'{$", s[1]))
3695 break;
3696 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3697 break; /* in regexp, neither @+ nor @- are interpolated */
3698 }
3699 /* check for embedded scalars. only stop if we're sure it's a
3700 * variable. */
3701 else if (*s == '$') {
3702 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3703 break;
3704 if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3705 if (s[1] == '\\') {
3706 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3707 "Possible unintended interpolation of $\\ in regex");
3708 }
3709 break; /* in regexp, $ might be tail anchor */
3710 }
3711 }
3712
3713 /* End of else if chain - OP_TRANS rejoin rest */
3714
3715 if (UNLIKELY(s >= send)) {
3716 assert(s == send);
3717 break;
3718 }
3719
3720 /* backslashes */
3721 if (*s == '\\' && s+1 < send) {
3722 char* bslash = s; /* point to beginning \ */
3723 char* rbrace; /* point to ending '}' */
3724 char* e; /* 1 past the meat (non-blanks) before the
3725 brace */
3726 s++;
3727
3728 /* warn on \1 - \9 in substitution replacements, but note that \11
3729 * is an octal; and \19 is \1 followed by '9' */
3730 if (PL_lex_inwhat == OP_SUBST
3731 && !PL_lex_inpat
3732 && isDIGIT(*s)
3733 && *s != '0'
3734 && !isDIGIT(s[1]))
3735 {
3736 /* diag_listed_as: \%d better written as $%d */
3737 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3738 s = bslash;
3739 *s = '$';
3740 break;
3741 }
3742
3743 /* string-change backslash escapes */
3744 if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3745 s = bslash;
3746 break;
3747 }
3748 /* In a pattern, process \N, but skip any other backslash escapes.
3749 * This is because we don't want to translate an escape sequence
3750 * into a meta symbol and have the regex compiler use the meta
3751 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3752 * in spite of this, we do have to process \N here while the proper
3753 * charnames handler is in scope. See bugs #56444 and #62056.
3754 *
3755 * There is a complication because \N in a pattern may also stand
3756 * for 'match a non-nl', and not mean a charname, in which case its
3757 * processing should be deferred to the regex compiler. To be a
3758 * charname it must be followed immediately by a '{', and not look
3759 * like \N followed by a curly quantifier, i.e., not something like
3760 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3761 * quantifier */
3762 else if (PL_lex_inpat
3763 && (*s != 'N'
3764 || s[1] != '{'
3765 || regcurly(s + 1, send, NULL)))
3766 {
3767 *d++ = '\\';
3768 goto default_action;
3769 }
3770
3771 switch (*s) {
3772 default:
3773 {
3774 if ((isALPHANUMERIC(*s)))
3775 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3776 "Unrecognized escape \\%c passed through",
3777 *s);
3778 /* default action is to copy the quoted character */
3779 goto default_action;
3780 }
3781
3782 /* eg. \132 indicates the octal constant 0132 */
3783 case '0': case '1': case '2': case '3':
3784 case '4': case '5': case '6': case '7':
3785 {
3786 I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3787 | PERL_SCAN_NOTIFY_ILLDIGIT;
3788 STRLEN len = 3;
3789 uv = grok_oct(s, &len, &flags, NULL);
3790 s += len;
3791 if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3792 && s < send
3793 && isDIGIT(*s) /* like \08, \178 */
3794 && ckWARN(WARN_MISC))
3795 {
3796 Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3797 form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3798 }
3799 }
3800 goto NUM_ESCAPE_INSERT;
3801
3802 /* eg. \o{24} indicates the octal constant \024 */
3803 case 'o':
3804 {
3805 const char* error;
3806
3807 if (! grok_bslash_o(&s, send,
3808 &uv, &error,
3809 NULL,
3810 FALSE, /* Not strict */
3811 FALSE, /* No illegal cp's */
3812 UTF))
3813 {
3814 yyerror(error);
3815 uv = 0; /* drop through to ensure range ends are set */
3816 }
3817 goto NUM_ESCAPE_INSERT;
3818 }
3819
3820 /* eg. \x24 indicates the hex constant 0x24 */
3821 case 'x':
3822 {
3823 const char* error;
3824
3825 if (! grok_bslash_x(&s, send,
3826 &uv, &error,
3827 NULL,
3828 FALSE, /* Not strict */
3829 FALSE, /* No illegal cp's */
3830 UTF))
3831 {
3832 yyerror(error);
3833 uv = 0; /* drop through to ensure range ends are set */
3834 }
3835 }
3836
3837 NUM_ESCAPE_INSERT:
3838 /* Insert oct or hex escaped character. */
3839
3840 /* Here uv is the ordinal of the next character being added */
3841 if (UVCHR_IS_INVARIANT(uv)) {
3842 *d++ = (char) uv;
3843 }
3844 else {
3845 if (!d_is_utf8 && uv > 255) {
3846
3847 /* Here, 'uv' won't fit unless we convert to UTF-8.
3848 * If we've only seen invariants so far, all we have to
3849 * do is turn on the flag */
3850 if (utf8_variant_count == 0) {
3851 SvUTF8_on(sv);
3852 }
3853 else {
3854 SvCUR_set(sv, d - SvPVX_const(sv));
3855 SvPOK_on(sv);
3856 *d = '\0';
3857
3858 sv_utf8_upgrade_flags_grow(
3859 sv,
3860 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3861
3862 /* Since we're having to grow here,
3863 * make sure we have enough room for
3864 * this escape and a NUL, so the
3865 * code immediately below won't have
3866 * to actually grow again */
3867 UVCHR_SKIP(uv)
3868 + (STRLEN)(send - s) + 1);
3869 d = SvPVX(sv) + SvCUR(sv);
3870 }
3871
3872 has_above_latin1 = TRUE;
3873 d_is_utf8 = TRUE;
3874 }
3875
3876 if (! d_is_utf8) {
3877 *d++ = (char)uv;
3878 utf8_variant_count++;
3879 }
3880 else {
3881 /* Usually, there will already be enough room in 'sv'
3882 * since such escapes are likely longer than any UTF-8
3883 * sequence they can end up as. This isn't the case on
3884 * EBCDIC where \x{40000000} contains 12 bytes, and the
3885 * UTF-8 for it contains 14. And, we have to allow for
3886 * a trailing NUL. It probably can't happen on ASCII
3887 * platforms, but be safe. See Note on sizing above. */
3888 const STRLEN needed = d - SvPVX(sv)
3889 + UVCHR_SKIP(uv)
3890 + (send - s)
3891 + 1;
3892 if (UNLIKELY(needed > SvLEN(sv))) {
3893 SvCUR_set(sv, d - SvPVX_const(sv));
3894 d = SvCUR(sv) + SvGROW(sv, needed);
3895 }
3896
3897 d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3898 (ckWARN(WARN_PORTABLE))
3899 ? UNICODE_WARN_PERL_EXTENDED
3900 : 0);
3901 }
3902 }
3903 #ifdef EBCDIC
3904 non_portable_endpoint++;
3905 #endif
3906 continue;
3907
3908 case 'N':
3909 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3910 * named character, like \N{LATIN SMALL LETTER A}, or a named
3911 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3912 * GRAVE} (except y/// can't handle the latter, croaking). For
3913 * convenience all three forms are referred to as "named
3914 * characters" below.
3915 *
3916 * For patterns, \N also can mean to match a non-newline. Code
3917 * before this 'switch' statement should already have handled
3918 * this situation, and hence this code only has to deal with
3919 * the named character cases.
3920 *
3921 * For non-patterns, the named characters are converted to
3922 * their string equivalents. In patterns, named characters are
3923 * not converted to their ultimate forms for the same reasons
3924 * that other escapes aren't (mainly that the ultimate
3925 * character could be considered a meta-symbol by the regex
3926 * compiler). Instead, they are converted to the \N{U+...}
3927 * form to get the value from the charnames that is in effect
3928 * right now, while preserving the fact that it was a named
3929 * character, so that the regex compiler knows this.
3930 *
3931 * The structure of this section of code (besides checking for
3932 * errors and upgrading to utf8) is:
3933 * If the named character is of the form \N{U+...}, pass it
3934 * through if a pattern; otherwise convert the code point
3935 * to utf8
3936 * Otherwise must be some \N{NAME}: convert to
3937 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3938 *
3939 * Transliteration is an exception. The conversion to utf8 is
3940 * only done if the code point requires it to be representable.
3941 *
3942 * Here, 's' points to the 'N'; the test below is guaranteed to
3943 * succeed if we are being called on a pattern, as we already
3944 * know from a test above that the next character is a '{'. A
3945 * non-pattern \N must mean 'named character', which requires
3946 * braces */
3947 s++;
3948 if (*s != '{') {
3949 yyerror("Missing braces on \\N{}");
3950 *d++ = '\0';
3951 continue;
3952 }
3953 s++;
3954
3955 /* If there is no matching '}', it is an error. */
3956 if (! (rbrace = (char *) memchr(s, '}', send - s))) {
3957 if (! PL_lex_inpat) {
3958 yyerror("Missing right brace on \\N{}");
3959 } else {
3960 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3961 }
3962 yyquit(); /* Have exhausted the input. */
3963 }
3964
3965 /* Here it looks like a named character */
3966 while (s < rbrace && isBLANK(*s)) {
3967 s++;
3968 }
3969
3970 e = rbrace;
3971 while (s < e && isBLANK(*(e - 1))) {
3972 e--;
3973 }
3974
3975 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3976 s += 2; /* Skip to next char after the 'U+' */
3977 if (PL_lex_inpat) {
3978
3979 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3980 /* Check the syntax. */
3981 if (!isXDIGIT(*s)) {
3982 bad_NU:
3983 yyerror(
3984 "Invalid hexadecimal number in \\N{U+...}"
3985 );
3986 s = rbrace + 1;
3987 *d++ = '\0';
3988 continue;
3989 }
3990 while (++s < e) {
3991 if (isXDIGIT(*s))
3992 continue;
3993 else if ((*s == '.' || *s == '_')
3994 && isXDIGIT(s[1]))
3995 continue;
3996 goto bad_NU;
3997 }
3998
3999 /* Pass everything through unchanged.
4000 * +1 is to include the '}' */
4001 Copy(bslash, d, rbrace - bslash + 1, char);
4002 d += rbrace - bslash + 1;
4003 }
4004 else { /* Not a pattern: convert the hex to string */
4005 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
4006 | PERL_SCAN_SILENT_ILLDIGIT
4007 | PERL_SCAN_SILENT_OVERFLOW
4008 | PERL_SCAN_DISALLOW_PREFIX;
4009 STRLEN len = e - s;
4010
4011 uv = grok_hex(s, &len, &flags, NULL);
4012 if (len == 0 || (len != (STRLEN)(e - s)))
4013 goto bad_NU;
4014
4015 if ( uv > MAX_LEGAL_CP
4016 || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
4017 {
4018 yyerror(form_cp_too_large_msg(16, s, len, 0));
4019 uv = 0; /* drop through to ensure range ends are
4020 set */
4021 }
4022
4023 /* For non-tr///, if the destination is not in utf8,
4024 * unconditionally recode it to be so. This is
4025 * because \N{} implies Unicode semantics, and scalars
4026 * have to be in utf8 to guarantee those semantics.
4027 * tr/// doesn't care about Unicode rules, so no need
4028 * there to upgrade to UTF-8 for small enough code
4029 * points */
4030 if (! d_is_utf8 && ( uv > 0xFF
4031 || PL_lex_inwhat != OP_TRANS))
4032 {
4033 /* See Note on sizing above. */
4034 const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
4035
4036 SvCUR_set(sv, d - SvPVX_const(sv));
4037 SvPOK_on(sv);
4038 *d = '\0';
4039
4040 if (utf8_variant_count == 0) {
4041 SvUTF8_on(sv);
4042 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4043 }
4044 else {
4045 sv_utf8_upgrade_flags_grow(
4046 sv,
4047 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4048 extra);
4049 d = SvPVX(sv) + SvCUR(sv);
4050 }
4051
4052 d_is_utf8 = TRUE;
4053 has_above_latin1 = TRUE;
4054 }
4055
4056 /* Add the (Unicode) code point to the output. */
4057 if (OFFUNI_IS_INVARIANT(uv)) {
4058 *d++ = (char) LATIN1_TO_NATIVE(uv);
4059 }
4060 else if (! d_is_utf8) {
4061 *d++ = (char) LATIN1_TO_NATIVE(uv);
4062 utf8_variant_count++;
4063 }
4064 else {
4065 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
4066 (ckWARN(WARN_PORTABLE))
4067 ? UNICODE_WARN_PERL_EXTENDED
4068 : 0);
4069 }
4070 }
4071 }
4072 else /* Here is \N{NAME} but not \N{U+...}. */
4073 if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
4074 { /* Failed. We should die eventually, but for now use a NUL
4075 to keep parsing */
4076 *d++ = '\0';
4077 }
4078 else { /* Successfully evaluated the name */
4079 STRLEN len;
4080 const char *str = SvPV_const(res, len);
4081 if (PL_lex_inpat) {
4082
4083 if (! len) { /* The name resolved to an empty string */
4084 const char empty_N[] = "\\N{_}";
4085 Copy(empty_N, d, sizeof(empty_N) - 1, char);
4086 d += sizeof(empty_N) - 1;
4087 }
4088 else {
4089 /* In order to not lose information for the regex
4090 * compiler, pass the result in the specially made
4091 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
4092 * the code points in hex of each character
4093 * returned by charnames */
4094
4095 const char *str_end = str + len;
4096 const STRLEN off = d - SvPVX_const(sv);
4097
4098 if (! SvUTF8(res)) {
4099 /* For the non-UTF-8 case, we can determine the
4100 * exact length needed without having to parse
4101 * through the string. Each character takes up
4102 * 2 hex digits plus either a trailing dot or
4103 * the "}" */
4104 const char initial_text[] = "\\N{U+";
4105 const STRLEN initial_len = sizeof(initial_text)
4106 - 1;
4107 d = off + SvGROW(sv, off
4108 + 3 * len
4109
4110 /* +1 for trailing NUL */
4111 + initial_len + 1
4112
4113 + (STRLEN)(send - rbrace));
4114 Copy(initial_text, d, initial_len, char);
4115 d += initial_len;
4116 while (str < str_end) {
4117 char hex_string[4];
4118 int len =
4119 my_snprintf(hex_string,
4120 sizeof(hex_string),
4121 "%02X.",
4122
4123 /* The regex compiler is
4124 * expecting Unicode, not
4125 * native */
4126 NATIVE_TO_LATIN1(*str));
4127 PERL_MY_SNPRINTF_POST_GUARD(len,
4128 sizeof(hex_string));
4129 Copy(hex_string, d, 3, char);
4130 d += 3;
4131 str++;
4132 }
4133 d--; /* Below, we will overwrite the final
4134 dot with a right brace */
4135 }
4136 else {
4137 STRLEN char_length; /* cur char's byte length */
4138
4139 /* and the number of bytes after this is
4140 * translated into hex digits */
4141 STRLEN output_length;
4142
4143 /* 2 hex per byte; 2 chars for '\N'; 2 chars
4144 * for max('U+', '.'); and 1 for NUL */
4145 char hex_string[2 * UTF8_MAXBYTES + 5];
4146
4147 /* Get the first character of the result. */
4148 U32 uv = utf8n_to_uvchr((U8 *) str,
4149 len,
4150 &char_length,
4151 UTF8_ALLOW_ANYUV);
4152 /* Convert first code point to Unicode hex,
4153 * including the boiler plate before it. */
4154 output_length =
4155 my_snprintf(hex_string, sizeof(hex_string),
4156 "\\N{U+%X",
4157 (unsigned int) NATIVE_TO_UNI(uv));
4158
4159 /* Make sure there is enough space to hold it */
4160 d = off + SvGROW(sv, off
4161 + output_length
4162 + (STRLEN)(send - rbrace)
4163 + 2); /* '}' + NUL */
4164 /* And output it */
4165 Copy(hex_string, d, output_length, char);
4166 d += output_length;
4167
4168 /* For each subsequent character, append dot and
4169 * its Unicode code point in hex */
4170 while ((str += char_length) < str_end) {
4171 const STRLEN off = d - SvPVX_const(sv);
4172 U32 uv = utf8n_to_uvchr((U8 *) str,
4173 str_end - str,
4174 &char_length,
4175 UTF8_ALLOW_ANYUV);
4176 output_length =
4177 my_snprintf(hex_string,
4178 sizeof(hex_string),
4179 ".%X",
4180 (unsigned int) NATIVE_TO_UNI(uv));
4181
4182 d = off + SvGROW(sv, off
4183 + output_length
4184 + (STRLEN)(send - rbrace)
4185 + 2); /* '}' + NUL */
4186 Copy(hex_string, d, output_length, char);
4187 d += output_length;
4188 }
4189 }
4190
4191 *d++ = '}'; /* Done. Add the trailing brace */
4192 }
4193 }
4194 else { /* Here, not in a pattern. Convert the name to a
4195 * string. */
4196
4197 if (PL_lex_inwhat == OP_TRANS) {
4198 str = SvPV_const(res, len);
4199 if (len > ((SvUTF8(res))
4200 ? UTF8SKIP(str)
4201 : 1U))
4202 {
4203 yyerror(Perl_form(aTHX_
4204 "%.*s must not be a named sequence"
4205 " in transliteration operator",
4206 /* +1 to include the "}" */
4207 (int) (rbrace + 1 - start), start));
4208 *d++ = '\0';
4209 goto end_backslash_N;
4210 }
4211
4212 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4213 has_above_latin1 = TRUE;
4214 }
4215
4216 }
4217 else if (! SvUTF8(res)) {
4218 /* Make sure \N{} return is UTF-8. This is because
4219 * \N{} implies Unicode semantics, and scalars have
4220 * to be in utf8 to guarantee those semantics; but
4221 * not needed in tr/// */
4222 sv_utf8_upgrade_flags(res, 0);
4223 str = SvPV_const(res, len);
4224 }
4225
4226 /* Upgrade destination to be utf8 if this new
4227 * component is */
4228 if (! d_is_utf8 && SvUTF8(res)) {
4229 /* See Note on sizing above. */
4230 const STRLEN extra = len + (send - s) + 1;
4231
4232 SvCUR_set(sv, d - SvPVX_const(sv));
4233 SvPOK_on(sv);
4234 *d = '\0';
4235
4236 if (utf8_variant_count == 0) {
4237 SvUTF8_on(sv);
4238 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4239 }
4240 else {
4241 sv_utf8_upgrade_flags_grow(sv,
4242 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4243 extra);
4244 d = SvPVX(sv) + SvCUR(sv);
4245 }
4246 d_is_utf8 = TRUE;
4247 } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
4248
4249 /* See Note on sizing above. (NOTE: SvCUR() is not
4250 * set correctly here). */
4251 const STRLEN extra = len + (send - rbrace) + 1;
4252 const STRLEN off = d - SvPVX_const(sv);
4253 d = off + SvGROW(sv, off + extra);
4254 }
4255 Copy(str, d, len, char);
4256 d += len;
4257 }
4258
4259 SvREFCNT_dec(res);
4260
4261 } /* End \N{NAME} */
4262
4263 end_backslash_N:
4264 #ifdef EBCDIC
4265 backslash_N++; /* \N{} is defined to be Unicode */
4266 #endif
4267 s = rbrace + 1; /* Point to just after the '}' */
4268 continue;
4269
4270 /* \c is a control character */
4271 case 'c':
4272 s++;
4273 if (s < send) {
4274 const char * message;
4275
4276 if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4277 yyerror(message);
4278 yyquit(); /* Have always immediately croaked on
4279 errors in this */
4280 }
4281 d++;
4282 }
4283 else {
4284 yyerror("Missing control char name in \\c");
4285 yyquit(); /* Are at end of input, no sense continuing */
4286 }
4287 #ifdef EBCDIC
4288 non_portable_endpoint++;
4289 #endif
4290 break;
4291
4292 /* printf-style backslashes, formfeeds, newlines, etc */
4293 case 'b':
4294 *d++ = '\b';
4295 break;
4296 case 'n':
4297 *d++ = '\n';
4298 break;
4299 case 'r':
4300 *d++ = '\r';
4301 break;
4302 case 'f':
4303 *d++ = '\f';
4304 break;
4305 case 't':
4306 *d++ = '\t';
4307 break;
4308 case 'e':
4309 *d++ = ESC_NATIVE;
4310 break;
4311 case 'a':
4312 *d++ = '\a';
4313 break;
4314 } /* end switch */
4315
4316 s++;
4317 continue;
4318 } /* end if (backslash) */
4319
4320 default_action:
4321 /* Just copy the input to the output, though we may have to convert
4322 * to/from UTF-8.
4323 *
4324 * If the input has the same representation in UTF-8 as not, it will be
4325 * a single byte, and we don't care about UTF8ness; just copy the byte */
4326 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4327 *d++ = *s++;
4328 }
4329 else if (! s_is_utf8 && ! d_is_utf8) {
4330 /* If neither source nor output is UTF-8, is also a single byte,
4331 * just copy it; but this byte counts should we later have to
4332 * convert to UTF-8 */
4333 *d++ = *s++;
4334 utf8_variant_count++;
4335 }
4336 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
4337 const STRLEN len = UTF8SKIP(s);
4338
4339 /* We expect the source to have already been checked for
4340 * malformedness */
4341 assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4342
4343 Copy(s, d, len, U8);
4344 d += len;
4345 s += len;
4346 }
4347 else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4348 STRLEN need = send - s + 1; /* See Note on sizing above. */
4349
4350 SvCUR_set(sv, d - SvPVX_const(sv));
4351 SvPOK_on(sv);
4352 *d = '\0';
4353
4354 if (utf8_variant_count == 0) {
4355 SvUTF8_on(sv);
4356 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4357 }
4358 else {
4359 sv_utf8_upgrade_flags_grow(sv,
4360 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4361 need);
4362 d = SvPVX(sv) + SvCUR(sv);
4363 }
4364 d_is_utf8 = TRUE;
4365 goto default_action; /* Redo, having upgraded so both are UTF-8 */
4366 }
4367 else { /* UTF8ness matters: convert this non-UTF8 source char to
4368 UTF-8 for output. It will occupy 2 bytes, but don't include
4369 the input byte since we haven't incremented 's' yet. See
4370 Note on sizing above. */
4371 const STRLEN off = d - SvPVX(sv);
4372 const STRLEN extra = 2 + (send - s - 1) + 1;
4373 if (off + extra > SvLEN(sv)) {
4374 d = off + SvGROW(sv, off + extra);
4375 }
4376 *d++ = UTF8_EIGHT_BIT_HI(*s);
4377 *d++ = UTF8_EIGHT_BIT_LO(*s);
4378 s++;
4379 }
4380 } /* while loop to process each character */
4381
4382 {
4383 const STRLEN off = d - SvPVX(sv);
4384
4385 /* See if room for the terminating NUL */
4386 if (UNLIKELY(off >= SvLEN(sv))) {
4387
4388 #ifndef DEBUGGING
4389
4390 if (off > SvLEN(sv))
4391 #endif
4392 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4393 " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4394
4395 /* Whew! Here we don't have room for the terminating NUL, but
4396 * everything else so far has fit. It's not too late to grow
4397 * to fit the NUL and continue on. But it is a bug, as the code
4398 * above was supposed to have made room for this, so under
4399 * DEBUGGING builds, we panic anyway. */
4400 d = off + SvGROW(sv, off + 1);
4401 }
4402 }
4403
4404 /* terminate the string and set up the sv */
4405 *d = '\0';
4406 SvCUR_set(sv, d - SvPVX_const(sv));
4407
4408 SvPOK_on(sv);
4409 if (d_is_utf8) {
4410 SvUTF8_on(sv);
4411 }
4412
4413 /* shrink the sv if we allocated more than we used */
4414 if (SvCUR(sv) + 5 < SvLEN(sv)) {
4415 SvPV_shrink_to_cur(sv);
4416 }
4417
4418 /* return the substring (via pl_yylval) only if we parsed anything */
4419 if (s > start) {
4420 char *s2 = start;
4421 for (; s2 < s; s2++) {
4422 if (*s2 == '\n')
4423 COPLINE_INC_WITH_HERELINES;
4424 }
4425 SvREFCNT_inc_simple_void_NN(sv);
4426 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4427 && ! PL_parser->lex_re_reparsing)
4428 {
4429 const char *const key = PL_lex_inpat ? "qr" : "q";
4430 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4431 const char *type;
4432 STRLEN typelen;
4433
4434 if (PL_lex_inwhat == OP_TRANS) {
4435 type = "tr";
4436 typelen = 2;
4437 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4438 type = "s";
4439 typelen = 1;
4440 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4441 type = "q";
4442 typelen = 1;
4443 } else {
4444 type = "qq";
4445 typelen = 2;
4446 }
4447
4448 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4449 type, typelen, NULL);
4450 }
4451 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4452 }
4453 LEAVE_with_name("scan_const");
4454 return s;
4455 }
4456
4457 /* S_intuit_more
4458 * Returns TRUE if there's more to the expression (e.g., a subscript),
4459 * FALSE otherwise.
4460 *
4461 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4462 *
4463 * ->[ and ->{ return TRUE
4464 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4465 * { and [ outside a pattern are always subscripts, so return TRUE
4466 * if we're outside a pattern and it's not { or [, then return FALSE
4467 * if we're in a pattern and the first char is a {
4468 * {4,5} (any digits around the comma) returns FALSE
4469 * if we're in a pattern and the first char is a [
4470 * [] returns FALSE
4471 * [SOMETHING] has a funky heuristic to decide whether it's a
4472 * character class or not. It has to deal with things like
4473 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4474 * anything else returns TRUE
4475 */
4476
4477 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4478
4479 STATIC int
S_intuit_more(pTHX_ char * s,char * e)4480 S_intuit_more(pTHX_ char *s, char *e)
4481 {
4482 PERL_ARGS_ASSERT_INTUIT_MORE;
4483
4484 /* This function has been mostly untouched for a long time, due to its,
4485 * 'scariness', and lack of comments. khw has gone through and done some
4486 * cleanup, while finding various instances of problematic behavior.
4487 * Rather than change this base-level function immediately, khw has added
4488 * commentary to those areas. */
4489
4490 /* If recursed within brackets, there is more to the expression */
4491 if (PL_lex_brackets)
4492 return TRUE;
4493
4494 /* If begins with '->' ... */
4495 if (s[0] == '-' && s[1] == '>') {
4496
4497 /* '->[' and '->{' imply more to the expression */
4498 if (s[2] == '[' || s[2] == '{') {
4499 return TRUE;
4500 }
4501
4502 /* Any post deref construct implies more to the expression */
4503 if ( FEATURE_POSTDEREF_QQ_IS_ENABLED
4504 && ( (s[2] == '$' && ( s[3] == '*'
4505 || (s[3] == '#' && s[4] == '*')))
4506 || (s[2] == '@' && memCHRs("*[{", s[3])) ))
4507 {
4508 return TRUE;
4509 }
4510 }
4511
4512 if (s[0] != '{' && s[0] != '[')
4513 return FALSE;
4514
4515 /* quit immediately from any errors from now on */
4516 PL_parser->sub_no_recover = TRUE;
4517
4518 /* Here is '{' or '['. Outside patterns, they're always subscripts */
4519 if (!PL_lex_inpat)
4520 return TRUE;
4521
4522 /* In a pattern, so maybe we have {n,m}, in which case, there isn't more to
4523 * the expression.
4524 *
4525 * khw: This assumes that anything matching regcurly is a character class.
4526 * The syntax of regcurly has been loosened since this function was
4527 * written, and regcurly never required a comma, as in {0}. Probably it is
4528 * ok as-is */
4529 if (s[0] == '{') {
4530 if (regcurly(s, e, NULL)) {
4531 return FALSE;
4532 }
4533 return TRUE;
4534 }
4535
4536 /* Here is '[': maybe we have a character class. Examine the guts */
4537 s++;
4538
4539 /* '^' implies a character class; An empty '[]' isn't legal, but it does
4540 * mean there isn't more to come */
4541 if (s[0] == ']' || s[0] == '^')
4542 return FALSE;
4543
4544 /* Find matching ']'. khw: This means any s[1] below is guaranteed to
4545 * exist */
4546 const char * const send = (char *) memchr(s, ']', e - s);
4547 if (! send) /* has to be an expression */
4548 return TRUE;
4549
4550 /* If the construct consists entirely of one or two digits, call it a
4551 * subscript. */
4552 if (isDIGIT(s[0]) && send - s <= 2 && (send - s == 1 || (isDIGIT(s[1])))) {
4553 return TRUE;
4554 }
4555
4556 /* this is terrifying, and it mostly works. See GH #16478.
4557 *
4558 * khw: That ticket shows that the heuristics here get things wrong. That
4559 * most of the weights are divisible by 5 indicates that not a lot of
4560 * tuning was done, and that the values are fairly arbitrary. Especially
4561 * problematic are when all characters in the construct are numeric. We
4562 * have [89] always resolving to a subscript, though that could well be a
4563 * character class that is related to finding non-octals. And [100] is a
4564 * character class when it could well be a subscript. */
4565
4566 int weight;
4567
4568 if (s[0] == '$') { /* First char is dollar; lean very slightly to it
4569 being a subscript */
4570 weight = -1;
4571 }
4572 else { /* Otherwise, lean a little more towards it being a
4573 character class. */
4574 weight = 2;
4575 }
4576
4577 /* Unsigned version of current character */
4578 unsigned char un_char = 0;
4579
4580 /* Keep track of how many multiple occurrences of the same character there
4581 * are */
4582 char seen[256];
4583 Zero(seen, 256, char);
4584
4585 /* Examine each character in the construct */
4586 bool first_time = true;
4587 for (; s < send; s++, first_time = false) {
4588 unsigned char prev_un_char = un_char;
4589 un_char = (unsigned char) s[0];
4590 switch (s[0]) {
4591 case '@':
4592 case '&':
4593 case '$':
4594
4595 /* Each additional occurrence of one of these three strongly
4596 * indicates it is a subscript */
4597 weight -= seen[un_char] * 10;
4598
4599 /* Following one of these characters, we look to see if there is an
4600 * identifier already found in the program by that name. If so,
4601 * strongly suspect this isn't a character class */
4602 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4603 int len;
4604 char tmpbuf[sizeof PL_tokenbuf * 4];
4605 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4606 len = (int)strlen(tmpbuf);
4607 if ( len > 1
4608 && gv_fetchpvn_flags(tmpbuf,
4609 len,
4610 UTF ? SVf_UTF8 : 0,
4611 SVt_PV))
4612 weight -= 100;
4613 else /* Not a multi-char identifier already known in the
4614 program; is somewhat likely to be a subscript */
4615 weight -= 10;
4616 }
4617 else if ( s[0] == '$'
4618 && s[1]
4619 && memCHRs("[#!%*<>()-=", s[1]))
4620 {
4621 /* Here we have what could be a punctuation variable. If the
4622 * next character after it is a closing bracket, it makes it
4623 * quite likely to be that, and hence a subscript. If it is
4624 * something else, more mildly a subscript */
4625 if (/*{*/ memCHRs("])} =", s[2]))
4626 weight -= 10;
4627 else
4628 weight -= 1;
4629 }
4630 break;
4631
4632 case '\\':
4633 if (s[1]) {
4634 if (memCHRs("wds]", s[1]))
4635 weight += 100; /* \w \d \s => strongly charclass */
4636 /* khw: Why not \W \D \S \h \v, etc as well? */
4637 else if (seen[(U8)'\''] || seen[(U8)'"'])
4638 weight += 1; /* \' => mildly charclass */
4639 else if (memCHRs("abcfnrtvx", s[1]))
4640 weight += 40; /* \n, etc => charclass */
4641 /* khw: Why not \e etc as well? */
4642 else if (isDIGIT(s[1])) {
4643 weight += 40; /* \123 => charclass */
4644 while (s[1] && isDIGIT(s[1]))
4645 s++;
4646 }
4647 }
4648 else /* \ followed by NUL strongly indicates character class */
4649 weight += 100;
4650 break;
4651
4652 case '-':
4653 /* If it is something like '-\', it is more likely to be a
4654 * character class.
4655 *
4656 * khw: The rest of the conditionals in this 'case' really should
4657 * be subject to an 'else' of this condition */
4658 if (s[1] == '\\')
4659 weight += 50;
4660
4661 /* If it is something like 'a-' or '0-', it is more likely to
4662 * be a character class. '!' is the first ASCII graphic, so '!-'
4663 * would be the start of a range of graphics. */
4664 if (! first_time && memCHRs("aA01! ", prev_un_char))
4665 weight += 30;
4666
4667 /* If it is something like '-Z' or '-7' (for octal) or '-9' it
4668 * is more likely to be a character class. '~' is the final ASCII
4669 * graphic, so '-~' would be the end of a range of graphics.
4670 *
4671 * khw: Having [-z] really doesn't imply what the comments above
4672 * indicate, so this should only be tested when '! first_time' */
4673 if (memCHRs("zZ79~", s[1]))
4674 weight += 30;
4675
4676 /* If it is something like -1 or -$foo, it is more likely to be a
4677 * subscript. */
4678 if (first_time && (isDIGIT(s[1]) || s[1] == '$')) {
4679 weight -= 5; /* cope with negative subscript */
4680 }
4681 break;
4682
4683 default:
4684 if ( (first_time || ( ! isWORDCHAR(prev_un_char)
4685 && prev_un_char != '$'
4686 && prev_un_char != '@'
4687 && prev_un_char != '&'))
4688 && isALPHA(s[0])
4689 && isALPHA(s[1]))
4690 {
4691 /* Here it's \W (that isn't [$@&] ) followed immediately by two
4692 * alphas in a row. Accumulate all the consecutive alphas */
4693 char *d = s;
4694 while (isALPHA(s[0]))
4695 s++;
4696
4697 /* If those alphas spell a keyword, it's almost certainly not a
4698 * character class */
4699 if (keyword(d, s - d, 0))
4700 weight -= 150;
4701
4702 /* khw: Should those alphas be marked as seen? */
4703 }
4704
4705 /* Consecutive chars like [...12...] and [...ab...] are presumed
4706 * more likely to be character classes */
4707 if ( ! first_time
4708 && ( NATIVE_TO_LATIN1(un_char)
4709 == NATIVE_TO_LATIN1(prev_un_char) + 1))
4710 {
4711 weight += 5;
4712 }
4713
4714 /* But repeating a character inside a character class does nothing,
4715 * like [aba], so less likely that someone makes such a class, more
4716 * likely that it is a subscript; the more repeats, the less
4717 * likely. */
4718 weight -= seen[un_char];
4719 break;
4720 } /* End of switch */
4721
4722 /* khw: 'seen' is declared as a char. This ++ can cause it to wrap.
4723 * This gives different results with compilers for which a plain 'char'
4724 * is actually unsigned, versus those where it is signed. I believe it
4725 * is undefined behavior to wrap a 'signed'. I think it should be
4726 * instead declared an unsigned int to make the chances of wrapping
4727 * essentially zero.
4728 *
4729 * And I believe that extra backslashes are different from other
4730 * repeated characters. */
4731 seen[un_char]++;
4732 } /* End of loop through each character of the construct */
4733
4734 if (weight >= 0) /* probably a character class */
4735 return FALSE;
4736
4737 return TRUE;
4738 }
4739
4740 /*
4741 * S_intuit_method
4742 *
4743 * Does all the checking to disambiguate
4744 * foo bar
4745 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4746 * METHCALL (bar->foo(args)) or METHCALL0 (bar->foo args).
4747 *
4748 * First argument is the stuff after the first token, e.g. "bar".
4749 *
4750 * Not a method if foo is a filehandle.
4751 * Not a method if foo is a subroutine prototyped to take a filehandle.
4752 * Not a method if it's really "Foo $bar"
4753 * Method if it's "foo $bar"
4754 * Not a method if it's really "print foo $bar"
4755 * Method if it's really "foo package::" (interpreted as package->foo)
4756 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4757 * Not a method if bar is a filehandle or package, but is quoted with
4758 * =>
4759 */
4760
4761 STATIC int
S_intuit_method(pTHX_ char * start,SV * ioname,CV * cv)4762 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4763 {
4764 char *s = start + (*start == '$');
4765 char tmpbuf[sizeof PL_tokenbuf];
4766 STRLEN len;
4767 GV* indirgv;
4768 /* Mustn't actually add anything to a symbol table.
4769 But also don't want to "initialise" any placeholder
4770 constants that might already be there into full
4771 blown PVGVs with attached PVCV. */
4772 GV * const gv =
4773 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4774
4775 PERL_ARGS_ASSERT_INTUIT_METHOD;
4776
4777 if (!FEATURE_INDIRECT_IS_ENABLED)
4778 return 0;
4779
4780 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4781 return 0;
4782 if (cv && SvPOK(cv)) {
4783 const char *proto = CvPROTO(cv);
4784 if (proto) {
4785 while (*proto && (isSPACE(*proto) || *proto == ';'))
4786 proto++;
4787 if (*proto == '*')
4788 return 0;
4789 }
4790 }
4791
4792 if (*start == '$') {
4793 SSize_t start_off = start - SvPVX(PL_linestr);
4794 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4795 || isUPPER(*PL_tokenbuf))
4796 return 0;
4797 /* this could be $# */
4798 if (isSPACE(*s))
4799 s = skipspace(s);
4800 PL_bufptr = SvPVX(PL_linestr) + start_off;
4801 PL_expect = XREF;
4802 return *s == '(' ? METHCALL : METHCALL0;
4803 }
4804
4805 s = scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
4806 /* start is the beginning of the possible filehandle/object,
4807 * and s is the end of it
4808 * tmpbuf is a copy of it (but with single quotes as double colons)
4809 */
4810
4811 if (!keyword(tmpbuf, len, 0)) {
4812 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4813 len -= 2;
4814 tmpbuf[len] = '\0';
4815 goto bare_package;
4816 }
4817 indirgv = gv_fetchpvn_flags(tmpbuf, len,
4818 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4819 SVt_PVCV);
4820 if (indirgv && SvTYPE(indirgv) != SVt_NULL
4821 && (!isGV(indirgv) || GvCVu(indirgv)))
4822 return 0;
4823 /* filehandle or package name makes it a method */
4824 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4825 s = skipspace(s);
4826 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4827 return 0; /* no assumptions -- "=>" quotes bareword */
4828 bare_package:
4829 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4830 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4831 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4832 PL_expect = XTERM;
4833 force_next(BAREWORD);
4834 PL_bufptr = s;
4835 return *s == '(' ? METHCALL : METHCALL0;
4836 }
4837 }
4838 return 0;
4839 }
4840
4841 /* Encoded script support. filter_add() effectively inserts a
4842 * 'pre-processing' function into the current source input stream.
4843 * Note that the filter function only applies to the current source file
4844 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4845 *
4846 * The datasv parameter (which may be NULL) can be used to pass
4847 * private data to this instance of the filter. The filter function
4848 * can recover the SV using the FILTER_DATA macro and use it to
4849 * store private buffers and state information.
4850 *
4851 * The supplied datasv parameter is upgraded to a PVIO type
4852 * and the IoDIRP/IoANY field is used to store the function pointer,
4853 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4854 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4855 * private use must be set using malloc'd pointers.
4856 */
4857
4858 SV *
Perl_filter_add(pTHX_ filter_t funcp,SV * datasv)4859 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4860 {
4861 if (!funcp)
4862 return NULL;
4863
4864 if (!PL_parser)
4865 return NULL;
4866
4867 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4868 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4869
4870 if (!PL_rsfp_filters)
4871 PL_rsfp_filters = newAV();
4872 if (!datasv)
4873 datasv = newSV(0);
4874 SvUPGRADE(datasv, SVt_PVIO);
4875 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4876 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4877 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4878 FPTR2DPTR(void *, IoANY(datasv)),
4879 SvPV_nolen(datasv)));
4880 av_unshift(PL_rsfp_filters, 1);
4881 av_store(PL_rsfp_filters, 0, datasv) ;
4882 if (
4883 !PL_parser->filtered
4884 && PL_parser->lex_flags & LEX_EVALBYTES
4885 && PL_bufptr < PL_bufend
4886 ) {
4887 const char *s = PL_bufptr;
4888 while (s < PL_bufend) {
4889 if (*s == '\n') {
4890 SV *linestr = PL_parser->linestr;
4891 char *buf = SvPVX(linestr);
4892 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4893 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4894 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4895 STRLEN const linestart_pos = PL_parser->linestart - buf;
4896 STRLEN const last_uni_pos =
4897 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4898 STRLEN const last_lop_pos =
4899 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4900 av_push(PL_rsfp_filters, linestr);
4901 PL_parser->linestr =
4902 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4903 buf = SvPVX(PL_parser->linestr);
4904 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4905 PL_parser->bufptr = buf + bufptr_pos;
4906 PL_parser->oldbufptr = buf + oldbufptr_pos;
4907 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4908 PL_parser->linestart = buf + linestart_pos;
4909 if (PL_parser->last_uni)
4910 PL_parser->last_uni = buf + last_uni_pos;
4911 if (PL_parser->last_lop)
4912 PL_parser->last_lop = buf + last_lop_pos;
4913 SvLEN_set(linestr, SvCUR(linestr));
4914 SvCUR_set(linestr, s - SvPVX(linestr));
4915 PL_parser->filtered = 1;
4916 break;
4917 }
4918 s++;
4919 }
4920 }
4921 return(datasv);
4922 }
4923
4924 /*
4925 =for apidoc_section $filters
4926 =for apidoc filter_del
4927
4928 Delete most recently added instance of the filter function argument
4929
4930 =cut
4931 */
4932
4933 void
Perl_filter_del(pTHX_ filter_t funcp)4934 Perl_filter_del(pTHX_ filter_t funcp)
4935 {
4936 SV *datasv;
4937
4938 PERL_ARGS_ASSERT_FILTER_DEL;
4939
4940 #ifdef DEBUGGING
4941 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4942 FPTR2DPTR(void*, funcp)));
4943 #endif
4944 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4945 return;
4946 /* if filter is on top of stack (usual case) just pop it off */
4947 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4948 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4949 SvREFCNT_dec(av_pop(PL_rsfp_filters));
4950
4951 return;
4952 }
4953 /* we need to search for the correct entry and clear it */
4954 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4955 }
4956
4957
4958 /* Invoke the idxth filter function for the current rsfp. */
4959 /* maxlen 0 = read one text line */
4960 I32
Perl_filter_read(pTHX_ int idx,SV * buf_sv,int maxlen)4961 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4962 {
4963 filter_t funcp;
4964 I32 ret;
4965 SV *datasv = NULL;
4966 /* This API is bad. It should have been using unsigned int for maxlen.
4967 Not sure if we want to change the API, but if not we should sanity
4968 check the value here. */
4969 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4970
4971 PERL_ARGS_ASSERT_FILTER_READ;
4972
4973 if (!PL_parser || !PL_rsfp_filters)
4974 return -1;
4975 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4976 /* Provide a default input filter to make life easy. */
4977 /* Note that we append to the line. This is handy. */
4978 DEBUG_P(PerlIO_printf(Perl_debug_log,
4979 "filter_read %d: from rsfp\n", idx));
4980 if (correct_length) {
4981 /* Want a block */
4982 int len ;
4983 const int old_len = SvCUR(buf_sv);
4984
4985 /* ensure buf_sv is large enough */
4986 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4987 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4988 correct_length)) <= 0) {
4989 if (PerlIO_error(PL_rsfp))
4990 return -1; /* error */
4991 else
4992 return 0 ; /* end of file */
4993 }
4994 SvCUR_set(buf_sv, old_len + len) ;
4995 SvPVX(buf_sv)[old_len + len] = '\0';
4996 } else {
4997 /* Want a line */
4998 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4999 if (PerlIO_error(PL_rsfp))
5000 return -1; /* error */
5001 else
5002 return 0 ; /* end of file */
5003 }
5004 }
5005 return SvCUR(buf_sv);
5006 }
5007 /* Skip this filter slot if filter has been deleted */
5008 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
5009 DEBUG_P(PerlIO_printf(Perl_debug_log,
5010 "filter_read %d: skipped (filter deleted)\n",
5011 idx));
5012 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
5013 }
5014 if (SvTYPE(datasv) != SVt_PVIO) {
5015 if (correct_length) {
5016 /* Want a block */
5017 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
5018 if (!remainder) return 0; /* eof */
5019 if (correct_length > remainder) correct_length = remainder;
5020 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
5021 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
5022 } else {
5023 /* Want a line */
5024 const char *s = SvEND(datasv);
5025 const char *send = SvPVX(datasv) + SvLEN(datasv);
5026 while (s < send) {
5027 if (*s == '\n') {
5028 s++;
5029 break;
5030 }
5031 s++;
5032 }
5033 if (s == send) return 0; /* eof */
5034 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
5035 SvCUR_set(datasv, s-SvPVX(datasv));
5036 }
5037 return SvCUR(buf_sv);
5038 }
5039 /* Get function pointer hidden within datasv */
5040 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
5041 DEBUG_P(PerlIO_printf(Perl_debug_log,
5042 "filter_read %d: via function %p (%s)\n",
5043 idx, (void*)datasv, SvPV_nolen_const(datasv)));
5044 /* Call function. The function is expected to */
5045 /* call "FILTER_READ(idx+1, buf_sv)" first. */
5046 /* Return: <0:error, =0:eof, >0:not eof */
5047 ENTER;
5048 save_scalar(PL_errgv);
5049
5050 /* although this calls out to a random C function, there's a good
5051 * chance that that function will call back into perl (e.g. using
5052 * Filter::Util::Call). So downgrade the stack to
5053 * non-reference-counted for backwards compatibility - i.e. do the
5054 * equivalent of xs_wrap(), but this time we know there are no
5055 * args to be passed or returned on the stack, simplifying it.
5056 */
5057 #ifdef PERL_RC_STACK
5058 assert(AvREAL(PL_curstack));
5059 I32 oldbase = PL_curstackinfo->si_stack_nonrc_base;
5060 I32 oldsp = PL_stack_sp - PL_stack_base;
5061 if (!oldbase)
5062 PL_curstackinfo->si_stack_nonrc_base = oldsp + 1;
5063 #endif
5064
5065 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
5066
5067 #ifdef PERL_RC_STACK
5068 assert(oldsp == PL_stack_sp - PL_stack_base);
5069 assert(AvREAL(PL_curstack));
5070 assert(PL_curstackinfo->si_stack_nonrc_base ==
5071 oldbase ? oldbase : oldsp + 1);
5072 PL_curstackinfo->si_stack_nonrc_base = oldbase;
5073 #endif
5074
5075 LEAVE;
5076 return ret;
5077 }
5078
5079 STATIC char *
S_filter_gets(pTHX_ SV * sv,STRLEN append)5080 S_filter_gets(pTHX_ SV *sv, STRLEN append)
5081 {
5082 PERL_ARGS_ASSERT_FILTER_GETS;
5083
5084 #ifdef PERL_CR_FILTER
5085 if (!PL_rsfp_filters) {
5086 filter_add(S_cr_textfilter,NULL);
5087 }
5088 #endif
5089 if (PL_rsfp_filters) {
5090 if (!append)
5091 SvCUR_set(sv, 0); /* start with empty line */
5092 if (FILTER_READ(0, sv, 0) > 0)
5093 return ( SvPVX(sv) ) ;
5094 else
5095 return NULL ;
5096 }
5097 else
5098 return (sv_gets(sv, PL_rsfp, append));
5099 }
5100
5101 STATIC HV *
S_find_in_my_stash(pTHX_ const char * pkgname,STRLEN len)5102 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
5103 {
5104 GV *gv;
5105
5106 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
5107
5108 if (memEQs(pkgname, len, "__PACKAGE__"))
5109 return PL_curstash;
5110
5111 if (len > 2
5112 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
5113 && (gv = gv_fetchpvn_flags(pkgname,
5114 len,
5115 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
5116 {
5117 return GvHV(gv); /* Foo:: */
5118 }
5119
5120 /* use constant CLASS => 'MyClass' */
5121 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
5122 if (gv && GvCV(gv)) {
5123 SV * const sv = cv_const_sv(GvCV(gv));
5124 if (sv)
5125 return gv_stashsv(sv, 0);
5126 }
5127
5128 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
5129 }
5130
5131
5132 STATIC char *
S_tokenize_use(pTHX_ int is_use,char * s)5133 S_tokenize_use(pTHX_ int is_use, char *s) {
5134 PERL_ARGS_ASSERT_TOKENIZE_USE;
5135
5136 if (PL_expect != XSTATE)
5137 /* diag_listed_as: "use" not allowed in expression */
5138 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
5139 is_use ? "use" : "no"));
5140 PL_expect = XTERM;
5141 s = skipspace(s);
5142 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
5143 s = force_version(s, TRUE);
5144 if (*s == ';' || *s == '}'
5145 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
5146 NEXTVAL_NEXTTOKE.opval = NULL;
5147 force_next(BAREWORD);
5148 }
5149 else if (*s == 'v') {
5150 s = force_word(s,BAREWORD,FALSE,TRUE);
5151 s = force_version(s, FALSE);
5152 }
5153 }
5154 else {
5155 s = force_word(s,BAREWORD,FALSE,TRUE);
5156 s = force_version(s, FALSE);
5157 }
5158 pl_yylval.ival = is_use;
5159 return s;
5160 }
5161 #ifdef DEBUGGING
5162 static const char* const exp_name[] =
5163 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
5164 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
5165 "SIGVAR", "TERMORDORDOR"
5166 };
5167 #endif
5168
5169 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
5170 STATIC bool
S_word_takes_any_delimiter(char * p,STRLEN len)5171 S_word_takes_any_delimiter(char *p, STRLEN len)
5172 {
5173 return (len == 1 && memCHRs("msyq", p[0]))
5174 || (len == 2
5175 && ((p[0] == 't' && p[1] == 'r')
5176 || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
5177 }
5178
5179 static void
S_check_scalar_slice(pTHX_ char * s)5180 S_check_scalar_slice(pTHX_ char *s)
5181 {
5182 s++;
5183 while (SPACE_OR_TAB(*s)) s++;
5184 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
5185 PL_bufend,
5186 UTF))
5187 {
5188 return;
5189 }
5190 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
5191 || (*s && memCHRs(" \t$#+-'\"", *s)))
5192 {
5193 s += UTF ? UTF8SKIP(s) : 1;
5194 }
5195 if (*s == '}' || *s == ']')
5196 pl_yylval.ival = OPpSLICEWARNING;
5197 }
5198
5199 #define lex_token_boundary() S_lex_token_boundary(aTHX)
5200 static void
S_lex_token_boundary(pTHX)5201 S_lex_token_boundary(pTHX)
5202 {
5203 PL_oldoldbufptr = PL_oldbufptr;
5204 PL_oldbufptr = PL_bufptr;
5205 }
5206
5207 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
5208 static char *
S_vcs_conflict_marker(pTHX_ char * s)5209 S_vcs_conflict_marker(pTHX_ char *s)
5210 {
5211 lex_token_boundary();
5212 PL_bufptr = s;
5213 yyerror("Version control conflict marker");
5214 while (s < PL_bufend && *s != '\n')
5215 s++;
5216 return s;
5217 }
5218
5219 static int
yyl_sigvar(pTHX_ char * s)5220 yyl_sigvar(pTHX_ char *s)
5221 {
5222 /* we expect the sigil and optional var name part of a
5223 * signature element here. Since a '$' is not necessarily
5224 * followed by a var name, handle it specially here; the general
5225 * yylex code would otherwise try to interpret whatever follows
5226 * as a var; e.g. ($, ...) would be seen as the var '$,'
5227 */
5228
5229 U8 sigil;
5230
5231 s = skipspace(s);
5232 sigil = *s++;
5233 PL_bufptr = s; /* for error reporting */
5234 switch (sigil) {
5235 case '$':
5236 case '@':
5237 case '%':
5238 /* spot stuff that looks like an prototype */
5239 if (memCHRs("$:@%&*;\\[]", *s)) {
5240 yyerror("Illegal character following sigil in a subroutine signature");
5241 break;
5242 }
5243 /* '$#' is banned, while '$ # comment' isn't */
5244 if (*s == '#') {
5245 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5246 break;
5247 }
5248 s = skipspace(s);
5249 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5250 char *dest = PL_tokenbuf + 1;
5251 /* read var name, including sigil, into PL_tokenbuf */
5252 PL_tokenbuf[0] = sigil;
5253 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5254 0, cBOOL(UTF), FALSE, FALSE);
5255 *dest = '\0';
5256 assert(PL_tokenbuf[1]); /* we have a variable name */
5257 }
5258 else {
5259 *PL_tokenbuf = 0;
5260 PL_in_my = 0;
5261 }
5262
5263 s = skipspace(s);
5264 /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5265 * as the ASSIGNOP, and exclude other tokens that start with =
5266 */
5267 if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
5268 /* save now to report with the same context as we did when
5269 * all ASSIGNOPS were accepted */
5270 PL_oldbufptr = s;
5271
5272 ++s;
5273 NEXTVAL_NEXTTOKE.ival = OP_SASSIGN;
5274 force_next(ASSIGNOP);
5275 PL_expect = XTERM;
5276 }
5277 else if(*s == '/' && s[1] == '/' && s[2] == '=') {
5278 PL_oldbufptr = s;
5279
5280 s += 3;
5281 NEXTVAL_NEXTTOKE.ival = OP_DORASSIGN;
5282 force_next(ASSIGNOP);
5283 PL_expect = XTERM;
5284 }
5285 else if(*s == '|' && s[1] == '|' && s[2] == '=') {
5286 PL_oldbufptr = s;
5287
5288 s += 3;
5289 NEXTVAL_NEXTTOKE.ival = OP_ORASSIGN;
5290 force_next(ASSIGNOP);
5291 PL_expect = XTERM;
5292 }
5293 else if (*s == ',' || *s == ')') {
5294 PL_expect = XOPERATOR;
5295 }
5296 else {
5297 /* make sure the context shows the unexpected character and
5298 * hopefully a bit more */
5299 if (*s) ++s;
5300 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5301 s++;
5302 PL_bufptr = s; /* for error reporting */
5303 yyerror("Illegal operator following parameter in a subroutine signature");
5304 PL_in_my = 0;
5305 }
5306 if (*PL_tokenbuf) {
5307 NEXTVAL_NEXTTOKE.ival = sigil;
5308 force_next('p'); /* force a signature pending identifier */
5309 }
5310 break;
5311
5312 case ')':
5313 PL_expect = XBLOCK;
5314 break;
5315 case ',': /* handle ($a,,$b) */
5316 break;
5317
5318 default:
5319 PL_in_my = 0;
5320 yyerror("A signature parameter must start with '$', '@' or '%'");
5321 /* very crude error recovery: skip to likely next signature
5322 * element */
5323 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5324 s++;
5325 break;
5326 }
5327
5328 switch (sigil) {
5329 case ',': TOKEN (PERLY_COMMA);
5330 case '$': TOKEN (PERLY_DOLLAR);
5331 case '@': TOKEN (PERLY_SNAIL);
5332 case '%': TOKEN (PERLY_PERCENT_SIGN);
5333 case ')': TOKEN (PERLY_PAREN_CLOSE);
5334 default: TOKEN (sigil);
5335 }
5336 }
5337
5338 static int
yyl_dollar(pTHX_ char * s)5339 yyl_dollar(pTHX_ char *s)
5340 {
5341 CLINE;
5342
5343 if (PL_expect == XPOSTDEREF) {
5344 if (s[1] == '#') {
5345 s++;
5346 POSTDEREF(DOLSHARP);
5347 }
5348 POSTDEREF(PERLY_DOLLAR);
5349 }
5350
5351 if ( s[1] == '#'
5352 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5353 || memCHRs("{$:+-@", s[2])))
5354 {
5355 PL_tokenbuf[0] = '@';
5356 s = scan_ident(s + 1, PL_tokenbuf + 1,
5357 sizeof PL_tokenbuf - 1, FALSE);
5358 if (PL_expect == XOPERATOR) {
5359 char *d = s;
5360 if (PL_bufptr > s) {
5361 d = PL_bufptr-1;
5362 PL_bufptr = PL_oldbufptr;
5363 }
5364 no_op("Array length", d);
5365 }
5366 if (!PL_tokenbuf[1])
5367 PREREF(DOLSHARP);
5368 PL_expect = XOPERATOR;
5369 force_ident_maybe_lex('#');
5370 TOKEN(DOLSHARP);
5371 }
5372
5373 PL_tokenbuf[0] = '$';
5374 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5375 if (PL_expect == XOPERATOR) {
5376 char *d = s;
5377 if (PL_bufptr > s) {
5378 d = PL_bufptr-1;
5379 PL_bufptr = PL_oldbufptr;
5380 }
5381 no_op("Scalar", d);
5382 }
5383 if (!PL_tokenbuf[1]) {
5384 if (s == PL_bufend)
5385 yyerror("Final $ should be \\$ or $name");
5386 PREREF(PERLY_DOLLAR);
5387 }
5388
5389 {
5390 const char tmp = *s;
5391 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5392 s = skipspace(s);
5393
5394 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5395 && intuit_more(s, PL_bufend)) {
5396 if (*s == '[') {
5397 PL_tokenbuf[0] = '@';
5398 if (ckWARN(WARN_SYNTAX)) {
5399 char *t = s+1;
5400
5401 while ( t < PL_bufend ) {
5402 if (isSPACE(*t)) {
5403 do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5404 /* consumed one or more space chars */
5405 } else if (*t == '$' || *t == '@') {
5406 /* could be more than one '$' like $$ref or @$ref */
5407 do { t++; } while (t < PL_bufend && *t == '$');
5408
5409 /* could be an abigail style identifier like $ foo */
5410 while (t < PL_bufend && *t == ' ') t++;
5411
5412 /* strip off the name of the var */
5413 while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5414 t += UTF ? UTF8SKIP(t) : 1;
5415 /* consumed a varname */
5416 } else if (isDIGIT(*t)) {
5417 /* deal with hex constants like 0x11 */
5418 if (t[0] == '0' && t[1] == 'x') {
5419 t += 2;
5420 while (t < PL_bufend && isXDIGIT(*t)) t++;
5421 } else {
5422 /* deal with decimal/octal constants like 1 and 0123 */
5423 do { t++; } while (isDIGIT(*t));
5424 if (t<PL_bufend && *t == '.') {
5425 do { t++; } while (isDIGIT(*t));
5426 }
5427 }
5428 /* consumed a number */
5429 } else {
5430 /* not a var nor a space nor a number */
5431 break;
5432 }
5433 }
5434 if (t < PL_bufend && *t++ == ',') {
5435 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5436 while (t < PL_bufend && *t != ']')
5437 t++;
5438 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5439 "Multidimensional syntax %" UTF8f " not supported",
5440 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5441 }
5442 }
5443 }
5444 else if (*s == '{') {
5445 char *t;
5446 PL_tokenbuf[0] = '%';
5447 if ( strEQ(PL_tokenbuf+1, "SIG")
5448 && ckWARN(WARN_SYNTAX)
5449 && (t = (char *) memchr(s, '}', PL_bufend - s))
5450 && (t = (char *) memchr(t, '=', PL_bufend - t)))
5451 {
5452 char tmpbuf[sizeof PL_tokenbuf];
5453 do {
5454 t++;
5455 } while (isSPACE(*t));
5456 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5457 STRLEN len;
5458 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
5459 while (isSPACE(*t))
5460 t++;
5461 if ( *t == ';'
5462 && get_cvn_flags(tmpbuf, len, UTF
5463 ? SVf_UTF8
5464 : 0))
5465 {
5466 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5467 "You need to quote \"%" UTF8f "\"",
5468 UTF8fARG(UTF, len, tmpbuf));
5469 }
5470 }
5471 }
5472 }
5473 }
5474
5475 PL_expect = XOPERATOR;
5476 if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5477 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5478 if (!islop || PL_last_lop_op == OP_GREPSTART)
5479 PL_expect = XOPERATOR;
5480 else if (memCHRs("$@\"'`q", *s))
5481 PL_expect = XTERM; /* e.g. print $fh "foo" */
5482 else if ( memCHRs("&*<%", *s)
5483 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5484 {
5485 PL_expect = XTERM; /* e.g. print $fh &sub */
5486 }
5487 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5488 char tmpbuf[sizeof PL_tokenbuf];
5489 int t2;
5490 STRLEN len;
5491 scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
5492 if ((t2 = keyword(tmpbuf, len, 0))) {
5493 /* binary operators exclude handle interpretations */
5494 switch (t2) {
5495 case -KEY_x:
5496 case -KEY_eq:
5497 case -KEY_ne:
5498 case -KEY_gt:
5499 case -KEY_lt:
5500 case -KEY_ge:
5501 case -KEY_le:
5502 case -KEY_cmp:
5503 break;
5504 default:
5505 PL_expect = XTERM; /* e.g. print $fh length() */
5506 break;
5507 }
5508 }
5509 else {
5510 PL_expect = XTERM; /* e.g. print $fh subr() */
5511 }
5512 }
5513 else if (isDIGIT(*s))
5514 PL_expect = XTERM; /* e.g. print $fh 3 */
5515 else if (*s == '.' && isDIGIT(s[1]))
5516 PL_expect = XTERM; /* e.g. print $fh .3 */
5517 else if ((*s == '?' || *s == '-' || *s == '+')
5518 && !isSPACE(s[1]) && s[1] != '=')
5519 PL_expect = XTERM; /* e.g. print $fh -1 */
5520 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5521 && s[1] != '/')
5522 PL_expect = XTERM; /* e.g. print $fh /.../
5523 XXX except DORDOR operator
5524 */
5525 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5526 && s[2] != '=')
5527 PL_expect = XTERM; /* print $fh <<"EOF" */
5528 }
5529 }
5530 force_ident_maybe_lex('$');
5531 TOKEN(PERLY_DOLLAR);
5532 }
5533
5534 static int
yyl_sub(pTHX_ char * s,const int key)5535 yyl_sub(pTHX_ char *s, const int key)
5536 {
5537 char * const tmpbuf = PL_tokenbuf + 1;
5538 bool have_name, have_proto;
5539 STRLEN len;
5540 SV *format_name = NULL;
5541 bool is_method = (key == KEY_method);
5542
5543 /* method always implies signatures */
5544 bool is_sigsub = is_method || FEATURE_SIGNATURES_IS_ENABLED;
5545
5546 SSize_t off = s-SvPVX(PL_linestr);
5547 char *d;
5548
5549 s = skipspace(s); /* can move PL_linestr */
5550
5551 d = SvPVX(PL_linestr)+off;
5552
5553 SAVEBOOL(PL_parser->sig_seen);
5554 PL_parser->sig_seen = FALSE;
5555
5556 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5557 || *s == '\''
5558 || (*s == ':' && s[1] == ':'))
5559 {
5560
5561 PL_expect = XATTRBLOCK;
5562 d = scan_word6(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5563 &len, TRUE);
5564 if (key == KEY_format)
5565 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5566 *PL_tokenbuf = '&';
5567 if (memchr(tmpbuf, ':', len) || key != KEY_sub
5568 || pad_findmy_pvn(
5569 PL_tokenbuf, len + 1, 0
5570 ) != NOT_IN_PAD)
5571 sv_setpvn(PL_subname, tmpbuf, len);
5572 else {
5573 sv_setsv(PL_subname,PL_curstname);
5574 sv_catpvs(PL_subname,"::");
5575 sv_catpvn(PL_subname,tmpbuf,len);
5576 }
5577 if (SvUTF8(PL_linestr))
5578 SvUTF8_on(PL_subname);
5579 have_name = TRUE;
5580
5581 s = skipspace(d);
5582 }
5583 else {
5584 if (key == KEY_my || key == KEY_our || key==KEY_state) {
5585 *d = '\0';
5586 /* diag_listed_as: Missing name in "%s sub" */
5587 Perl_croak(aTHX_
5588 "Missing name in \"%s\"", PL_bufptr);
5589 }
5590 PL_expect = XATTRTERM;
5591 sv_setpvs(PL_subname,"?");
5592 have_name = FALSE;
5593 }
5594
5595 if (key == KEY_format) {
5596 if (format_name) {
5597 NEXTVAL_NEXTTOKE.opval
5598 = newSVOP(OP_CONST,0, format_name);
5599 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5600 force_next(BAREWORD);
5601 }
5602 PREBLOCK(KW_FORMAT);
5603 }
5604
5605 /* Look for a prototype */
5606 if (*s == '(' && !is_sigsub) {
5607 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5608 if (!s)
5609 Perl_croak(aTHX_ "Prototype not terminated");
5610 COPLINE_SET_FROM_MULTI_END;
5611 (void)validate_proto(PL_subname, PL_lex_stuff,
5612 ckWARN(WARN_ILLEGALPROTO), 0);
5613 have_proto = TRUE;
5614
5615 s = skipspace(s);
5616 }
5617 else
5618 have_proto = FALSE;
5619
5620 if ( !(*s == ':' && s[1] != ':')
5621 && (*s != '{' && *s != '(') && key != KEY_format)
5622 {
5623 assert(key == KEY_sub || key == KEY_method ||
5624 key == KEY_AUTOLOAD || key == KEY_DESTROY ||
5625 key == KEY_BEGIN || key == KEY_UNITCHECK || key == KEY_CHECK ||
5626 key == KEY_INIT || key == KEY_END ||
5627 key == KEY_my || key == KEY_state ||
5628 key == KEY_our);
5629 if (!have_name)
5630 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5631 else if (*s != ';' && *s != '}')
5632 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5633 }
5634
5635 if (have_proto) {
5636 NEXTVAL_NEXTTOKE.opval =
5637 newSVOP(OP_CONST, 0, PL_lex_stuff);
5638 PL_lex_stuff = NULL;
5639 force_next(THING);
5640 }
5641
5642 if (!have_name) {
5643 if (PL_curstash)
5644 sv_setpvs(PL_subname, "__ANON__");
5645 else
5646 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5647 if (is_method)
5648 TOKEN(KW_METHOD_anon);
5649 else if (is_sigsub)
5650 TOKEN(KW_SUB_anon_sig);
5651 else
5652 TOKEN(KW_SUB_anon);
5653 }
5654 force_ident_maybe_lex('&');
5655 if (is_method)
5656 TOKEN(KW_METHOD_named);
5657 else if (is_sigsub)
5658 TOKEN(KW_SUB_named_sig);
5659 else
5660 TOKEN(KW_SUB_named);
5661 }
5662
5663 static int
yyl_interpcasemod(pTHX_ char * s)5664 yyl_interpcasemod(pTHX_ char *s)
5665 {
5666 #ifdef DEBUGGING
5667 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5668 Perl_croak(aTHX_
5669 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5670 PL_bufptr, PL_bufend, *PL_bufptr);
5671 #endif
5672
5673 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5674 /* if at a \E */
5675 if (PL_lex_casemods) {
5676 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5677 PL_lex_casestack[PL_lex_casemods] = '\0';
5678
5679 if (PL_bufptr != PL_bufend
5680 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5681 || oldmod == 'F')) {
5682 PL_bufptr += 2;
5683 PL_lex_state = LEX_INTERPCONCAT;
5684 }
5685 PL_lex_allbrackets--;
5686 return REPORT(PERLY_PAREN_CLOSE);
5687 }
5688 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5689 /* Got an unpaired \E */
5690 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5691 "Useless use of \\E");
5692 }
5693 if (PL_bufptr != PL_bufend)
5694 PL_bufptr += 2;
5695 PL_lex_state = LEX_INTERPCONCAT;
5696 return yylex();
5697 }
5698 else {
5699 DEBUG_T({
5700 PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5701 });
5702 s = PL_bufptr + 1;
5703 if (s[1] == '\\' && s[2] == 'E') {
5704 PL_bufptr = s + 3;
5705 PL_lex_state = LEX_INTERPCONCAT;
5706 return yylex();
5707 }
5708 else {
5709 I32 tmp;
5710 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5711 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5712 {
5713 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
5714 }
5715 if ((*s == 'L' || *s == 'U' || *s == 'F')
5716 && (strpbrk(PL_lex_casestack, "LUF")))
5717 {
5718 PL_lex_casestack[--PL_lex_casemods] = '\0';
5719 PL_lex_allbrackets--;
5720 return REPORT(PERLY_PAREN_CLOSE);
5721 }
5722 if (PL_lex_casemods > 10)
5723 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5724 PL_lex_casestack[PL_lex_casemods++] = *s;
5725 PL_lex_casestack[PL_lex_casemods] = '\0';
5726 PL_lex_state = LEX_INTERPCONCAT;
5727 NEXTVAL_NEXTTOKE.ival = 0;
5728 force_next((2<<24)|PERLY_PAREN_OPEN);
5729 if (*s == 'l')
5730 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5731 else if (*s == 'u')
5732 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5733 else if (*s == 'L')
5734 NEXTVAL_NEXTTOKE.ival = OP_LC;
5735 else if (*s == 'U')
5736 NEXTVAL_NEXTTOKE.ival = OP_UC;
5737 else if (*s == 'Q')
5738 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5739 else if (*s == 'F')
5740 NEXTVAL_NEXTTOKE.ival = OP_FC;
5741 else
5742 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5743 PL_bufptr = s + 1;
5744 }
5745 force_next(FUNC);
5746 if (PL_lex_starts) {
5747 s = PL_bufptr;
5748 PL_lex_starts = 0;
5749 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5750 if (PL_lex_casemods == 1 && PL_lex_inpat)
5751 TOKEN(PERLY_COMMA);
5752 else
5753 AopNOASSIGN(OP_CONCAT);
5754 }
5755 else
5756 return yylex();
5757 }
5758 }
5759
5760 static int
yyl_secondclass_keyword(pTHX_ char * s,STRLEN len,int key,I32 * orig_keyword,GV ** pgv,GV *** pgvp)5761 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5762 GV **pgv, GV ***pgvp)
5763 {
5764 GV *ogv = NULL; /* override (winner) */
5765 GV *hgv = NULL; /* hidden (loser) */
5766 GV *gv = *pgv;
5767
5768 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5769 CV *cv;
5770 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5771 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5772 SVt_PVCV))
5773 && (cv = GvCVu(gv)))
5774 {
5775 if (GvIMPORTED_CV(gv))
5776 ogv = gv;
5777 else if (! CvNOWARN_AMBIGUOUS(cv))
5778 hgv = gv;
5779 }
5780 if (!ogv
5781 && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5782 && (gv = **pgvp)
5783 && (isGV_with_GP(gv)
5784 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5785 : SvPCS_IMPORTED(gv)
5786 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5787 len, 0), 1)))
5788 {
5789 ogv = gv;
5790 }
5791 }
5792
5793 *pgv = gv;
5794
5795 if (ogv) {
5796 *orig_keyword = key;
5797 return 0; /* overridden by import or by GLOBAL */
5798 }
5799 else if (gv && !*pgvp
5800 && -key==KEY_lock /* XXX generalizable kludge */
5801 && GvCVu(gv))
5802 {
5803 return 0; /* any sub overrides "weak" keyword */
5804 }
5805 else { /* no override */
5806 key = -key;
5807 if (key == KEY_dump) {
5808 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5809 }
5810 *pgv = NULL;
5811 *pgvp = 0;
5812 if (hgv && key != KEY_x) /* never ambiguous */
5813 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5814 "Ambiguous call resolved as CORE::%s(), "
5815 "qualify as such or use &",
5816 GvENAME(hgv));
5817 return key;
5818 }
5819 }
5820
5821 static int
yyl_qw(pTHX_ char * s,STRLEN len)5822 yyl_qw(pTHX_ char *s, STRLEN len)
5823 {
5824 OP *words = NULL;
5825
5826 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5827 if (!s)
5828 missingterm(NULL, 0);
5829
5830 COPLINE_SET_FROM_MULTI_END;
5831 PL_expect = XOPERATOR;
5832 if (SvCUR(PL_lex_stuff)) {
5833 int warned_comma = !ckWARN(WARN_QW);
5834 int warned_comment = warned_comma;
5835 char *d = SvPV_force(PL_lex_stuff, len);
5836 while (len) {
5837 for (; isSPACE(*d) && len; --len, ++d)
5838 /**/;
5839 if (len) {
5840 SV *sv;
5841 const char *b = d;
5842 if (!warned_comma || !warned_comment) {
5843 for (; !isSPACE(*d) && len; --len, ++d) {
5844 if (!warned_comma && *d == ',') {
5845 Perl_warner(aTHX_ packWARN(WARN_QW),
5846 "Possible attempt to separate words with commas");
5847 ++warned_comma;
5848 }
5849 else if (!warned_comment && *d == '#') {
5850 Perl_warner(aTHX_ packWARN(WARN_QW),
5851 "Possible attempt to put comments in qw() list");
5852 ++warned_comment;
5853 }
5854 }
5855 }
5856 else {
5857 for (; !isSPACE(*d) && len; --len, ++d)
5858 /**/;
5859 }
5860 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5861 words = op_append_elem(OP_LIST, words,
5862 newSVOP(OP_CONST, 0, tokeq(sv)));
5863 }
5864 }
5865 }
5866 if (!words)
5867 words = newNULLLIST();
5868 SvREFCNT_dec_NN(PL_lex_stuff);
5869 PL_lex_stuff = NULL;
5870 PL_expect = XOPERATOR;
5871 pl_yylval.opval = sawparens(words);
5872 TOKEN(QWLIST);
5873 }
5874
5875 static int
yyl_hyphen(pTHX_ char * s)5876 yyl_hyphen(pTHX_ char *s)
5877 {
5878 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5879 I32 ftst = 0;
5880 char tmp;
5881
5882 s++;
5883 PL_bufptr = s;
5884 tmp = *s++;
5885
5886 while (s < PL_bufend && SPACE_OR_TAB(*s))
5887 s++;
5888
5889 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5890 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5891 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5892 OPERATOR(PERLY_MINUS); /* unary minus */
5893 }
5894 switch (tmp) {
5895 case 'r': ftst = OP_FTEREAD; break;
5896 case 'w': ftst = OP_FTEWRITE; break;
5897 case 'x': ftst = OP_FTEEXEC; break;
5898 case 'o': ftst = OP_FTEOWNED; break;
5899 case 'R': ftst = OP_FTRREAD; break;
5900 case 'W': ftst = OP_FTRWRITE; break;
5901 case 'X': ftst = OP_FTREXEC; break;
5902 case 'O': ftst = OP_FTROWNED; break;
5903 case 'e': ftst = OP_FTIS; break;
5904 case 'z': ftst = OP_FTZERO; break;
5905 case 's': ftst = OP_FTSIZE; break;
5906 case 'f': ftst = OP_FTFILE; break;
5907 case 'd': ftst = OP_FTDIR; break;
5908 case 'l': ftst = OP_FTLINK; break;
5909 case 'p': ftst = OP_FTPIPE; break;
5910 case 'S': ftst = OP_FTSOCK; break;
5911 case 'u': ftst = OP_FTSUID; break;
5912 case 'g': ftst = OP_FTSGID; break;
5913 case 'k': ftst = OP_FTSVTX; break;
5914 case 'b': ftst = OP_FTBLK; break;
5915 case 'c': ftst = OP_FTCHR; break;
5916 case 't': ftst = OP_FTTTY; break;
5917 case 'T': ftst = OP_FTTEXT; break;
5918 case 'B': ftst = OP_FTBINARY; break;
5919 case 'M': case 'A': case 'C':
5920 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5921 switch (tmp) {
5922 case 'M': ftst = OP_FTMTIME; break;
5923 case 'A': ftst = OP_FTATIME; break;
5924 case 'C': ftst = OP_FTCTIME; break;
5925 default: break;
5926 }
5927 break;
5928 default:
5929 break;
5930 }
5931 if (ftst) {
5932 PL_last_uni = PL_oldbufptr;
5933 PL_last_lop_op = (OPCODE)ftst;
5934 DEBUG_T( {
5935 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5936 } );
5937 FTST(ftst);
5938 }
5939 else {
5940 /* Assume it was a minus followed by a one-letter named
5941 * subroutine call (or a -bareword), then. */
5942 DEBUG_T( {
5943 PerlIO_printf(Perl_debug_log,
5944 "### '-%c' looked like a file test but was not\n",
5945 (int) tmp);
5946 } );
5947 s = --PL_bufptr;
5948 }
5949 }
5950 {
5951 const char tmp = *s++;
5952 if (*s == tmp) {
5953 s++;
5954 if (PL_expect == XOPERATOR)
5955 TERM(POSTDEC);
5956 else
5957 OPERATOR(PREDEC);
5958 }
5959 else if (*s == '>') {
5960 s++;
5961 s = skipspace(s);
5962 if (((*s == '$' || *s == '&') && s[1] == '*')
5963 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5964 ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5965 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5966 )
5967 {
5968 PL_expect = XPOSTDEREF;
5969 TOKEN(ARROW);
5970 }
5971 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5972 s = force_word(s,METHCALL0,FALSE,TRUE);
5973 TOKEN(ARROW);
5974 }
5975 else if (*s == '$')
5976 OPERATOR(ARROW);
5977 else
5978 TERM(ARROW);
5979 }
5980 if (PL_expect == XOPERATOR) {
5981 if (*s == '='
5982 && !PL_lex_allbrackets
5983 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5984 {
5985 s--;
5986 TOKEN(0);
5987 }
5988 Aop(OP_SUBTRACT);
5989 }
5990 else {
5991 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5992 check_uni();
5993 OPERATOR(PERLY_MINUS); /* unary minus */
5994 }
5995 }
5996 }
5997
5998 static int
yyl_plus(pTHX_ char * s)5999 yyl_plus(pTHX_ char *s)
6000 {
6001 const char tmp = *s++;
6002 if (*s == tmp) {
6003 s++;
6004 if (PL_expect == XOPERATOR)
6005 TERM(POSTINC);
6006 else
6007 OPERATOR(PREINC);
6008 }
6009 if (PL_expect == XOPERATOR) {
6010 if (*s == '='
6011 && !PL_lex_allbrackets
6012 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6013 {
6014 s--;
6015 TOKEN(0);
6016 }
6017 Aop(OP_ADD);
6018 }
6019 else {
6020 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
6021 check_uni();
6022 OPERATOR(PERLY_PLUS);
6023 }
6024 }
6025
6026 static int
yyl_star(pTHX_ char * s)6027 yyl_star(pTHX_ char *s)
6028 {
6029 if (PL_expect == XPOSTDEREF)
6030 POSTDEREF(PERLY_STAR);
6031
6032 if (PL_expect != XOPERATOR) {
6033 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6034 PL_expect = XOPERATOR;
6035 force_ident(PL_tokenbuf, PERLY_STAR);
6036 if (!*PL_tokenbuf)
6037 PREREF(PERLY_STAR);
6038 TERM(PERLY_STAR);
6039 }
6040
6041 s++;
6042 if (*s == '*') {
6043 s++;
6044 if (*s == '=' && !PL_lex_allbrackets
6045 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6046 {
6047 s -= 2;
6048 TOKEN(0);
6049 }
6050 PWop(OP_POW);
6051 }
6052
6053 if (*s == '='
6054 && !PL_lex_allbrackets
6055 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6056 {
6057 s--;
6058 TOKEN(0);
6059 }
6060
6061 Mop(OP_MULTIPLY);
6062 }
6063
6064 static int
yyl_percent(pTHX_ char * s)6065 yyl_percent(pTHX_ char *s)
6066 {
6067 if (PL_expect == XOPERATOR) {
6068 if (s[1] == '='
6069 && !PL_lex_allbrackets
6070 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6071 {
6072 TOKEN(0);
6073 }
6074 ++s;
6075 Mop(OP_MODULO);
6076 }
6077 else if (PL_expect == XPOSTDEREF)
6078 POSTDEREF(PERLY_PERCENT_SIGN);
6079
6080 PL_tokenbuf[0] = '%';
6081 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6082 pl_yylval.ival = 0;
6083 if (!PL_tokenbuf[1]) {
6084 PREREF(PERLY_PERCENT_SIGN);
6085 }
6086 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6087 && intuit_more(s, PL_bufend)) {
6088 if (*s == '[')
6089 PL_tokenbuf[0] = '@';
6090 }
6091 PL_expect = XOPERATOR;
6092 force_ident_maybe_lex('%');
6093 TERM(PERLY_PERCENT_SIGN);
6094 }
6095
6096 static int
yyl_caret(pTHX_ char * s)6097 yyl_caret(pTHX_ char *s)
6098 {
6099 char *d = s;
6100 const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
6101 if (s[1] == '^') {
6102 s += 2;
6103 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6104 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6105 s -= 2;
6106 TOKEN(0);
6107 }
6108 pl_yylval.ival = OP_XOR;
6109 OPERATOR(OROR);
6110 }
6111 if (bof && s[1] == '.')
6112 s++;
6113 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6114 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
6115 {
6116 s = d;
6117 TOKEN(0);
6118 }
6119 s++;
6120 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
6121 }
6122
6123 static int
yyl_colon(pTHX_ char * s)6124 yyl_colon(pTHX_ char *s)
6125 {
6126 OP *attrs;
6127
6128 switch (PL_expect) {
6129 case XOPERATOR:
6130 if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
6131 break;
6132 PL_bufptr = s; /* update in case we back off */
6133 if (*s == '=') {
6134 Perl_croak(aTHX_
6135 "Use of := for an empty attribute list is not allowed");
6136 }
6137 goto grabattrs;
6138 case XATTRBLOCK:
6139 PL_expect = XBLOCK;
6140 goto grabattrs;
6141 case XATTRTERM:
6142 PL_expect = XTERMBLOCK;
6143 grabattrs:
6144 /* NB: as well as parsing normal attributes, we also end up
6145 * here if there is something looking like attributes
6146 * following a signature (which is illegal, but used to be
6147 * legal in 5.20..5.26). If the latter, we still parse the
6148 * attributes so that error messages(s) are less confusing,
6149 * but ignore them (parser->sig_seen).
6150 */
6151 s = skipspace(s);
6152 attrs = NULL;
6153 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6154 I32 tmp;
6155 SV *sv;
6156 STRLEN len;
6157 char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
6158 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
6159 if (tmp < 0) tmp = -tmp;
6160 switch (tmp) {
6161 case KEY_or:
6162 case KEY_and:
6163 case KEY_for:
6164 case KEY_foreach:
6165 case KEY_unless:
6166 case KEY_if:
6167 case KEY_while:
6168 case KEY_until:
6169 goto got_attrs;
6170 default:
6171 break;
6172 }
6173 }
6174 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
6175 if (*d == '(') {
6176 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
6177 if (!d) {
6178 op_free(attrs);
6179 ASSUME(sv && SvREFCNT(sv) == 1);
6180 SvREFCNT_dec(sv);
6181 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
6182 }
6183 COPLINE_SET_FROM_MULTI_END;
6184 }
6185 if (PL_lex_stuff) {
6186 sv_catsv(sv, PL_lex_stuff);
6187 attrs = op_append_elem(OP_LIST, attrs,
6188 newSVOP(OP_CONST, 0, sv));
6189 SvREFCNT_dec_NN(PL_lex_stuff);
6190 PL_lex_stuff = NULL;
6191 }
6192 else {
6193 attrs = op_append_elem(OP_LIST, attrs,
6194 newSVOP(OP_CONST, 0, sv));
6195 }
6196 s = skipspace(d);
6197 if (*s == ':' && s[1] != ':')
6198 s = skipspace(s+1);
6199 else if (s == d)
6200 break; /* require real whitespace or :'s */
6201 /* XXX losing whitespace on sequential attributes here */
6202 }
6203
6204 if (*s != ';'
6205 && *s != '}'
6206 && !(PL_expect == XOPERATOR
6207 /* if an operator is expected, permit =, //= and ||= or ) to end */
6208 ? (*s == '=' || *s == ')' || *s == '/' || *s == '|')
6209 : (*s == '{' || *s == '(')))
6210 {
6211 const char q = ((*s == '\'') ? '"' : '\'');
6212 /* If here for an expression, and parsed no attrs, back off. */
6213 if (PL_expect == XOPERATOR && !attrs) {
6214 s = PL_bufptr;
6215 break;
6216 }
6217 /* MUST advance bufptr here to avoid bogus "at end of line"
6218 context messages from yyerror().
6219 */
6220 PL_bufptr = s;
6221 yyerror( (const char *)
6222 (*s
6223 ? Perl_form(aTHX_ "Invalid separator character "
6224 "%c%c%c in attribute list", q, *s, q)
6225 : "Unterminated attribute list" ) );
6226 op_free(attrs);
6227 OPERATOR(PERLY_COLON);
6228 }
6229
6230 got_attrs:
6231 if (PL_parser->sig_seen) {
6232 /* see comment about about sig_seen and parser error
6233 * handling */
6234 op_free(attrs);
6235 Perl_croak(aTHX_ "Subroutine attributes must come "
6236 "before the signature");
6237 }
6238 if (attrs) {
6239 NEXTVAL_NEXTTOKE.opval = attrs;
6240 force_next(THING);
6241 }
6242 TOKEN(COLONATTR);
6243 }
6244
6245 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6246 s--;
6247 TOKEN(0);
6248 }
6249
6250 PL_lex_allbrackets--;
6251 OPERATOR(PERLY_COLON);
6252 }
6253
6254 static int
yyl_subproto(pTHX_ char * s,CV * cv)6255 yyl_subproto(pTHX_ char *s, CV *cv)
6256 {
6257 STRLEN protolen = CvPROTOLEN(cv);
6258 const char *proto = CvPROTO(cv);
6259 bool optional;
6260
6261 proto = S_strip_spaces(aTHX_ proto, &protolen);
6262 if (!protolen)
6263 TERM(FUNC0SUB);
6264 if ((optional = *proto == ';')) {
6265 do {
6266 proto++;
6267 } while (*proto == ';');
6268 }
6269
6270 if (
6271 (
6272 (
6273 *proto == '$' || *proto == '_'
6274 || *proto == '*' || *proto == '+'
6275 )
6276 && proto[1] == '\0'
6277 )
6278 || (
6279 *proto == '\\' && proto[1] && proto[2] == '\0'
6280 )
6281 ) {
6282 UNIPROTO(UNIOPSUB,optional);
6283 }
6284
6285 if (*proto == '\\' && proto[1] == '[') {
6286 const char *p = proto + 2;
6287 while(*p && *p != ']')
6288 ++p;
6289 if(*p == ']' && !p[1])
6290 UNIPROTO(UNIOPSUB,optional);
6291 }
6292
6293 if (*proto == '&' && *s == '{') {
6294 if (PL_curstash)
6295 sv_setpvs(PL_subname, "__ANON__");
6296 else
6297 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6298 if (!PL_lex_allbrackets
6299 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6300 {
6301 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6302 }
6303 PREBLOCK(LSTOPSUB);
6304 }
6305
6306 return KEY_NULL;
6307 }
6308
6309 static int
yyl_leftcurly(pTHX_ char * s,const U8 formbrack)6310 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
6311 {
6312 char *d;
6313 if (PL_lex_brackets > 100) {
6314 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6315 }
6316
6317 switch (PL_expect) {
6318 case XTERM:
6319 case XTERMORDORDOR:
6320 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6321 PL_lex_allbrackets++;
6322 OPERATOR(HASHBRACK);
6323 case XOPERATOR:
6324 while (s < PL_bufend && SPACE_OR_TAB(*s))
6325 s++;
6326 d = s;
6327 PL_tokenbuf[0] = '\0';
6328 if (d < PL_bufend && *d == '-') {
6329 PL_tokenbuf[0] = '-';
6330 d++;
6331 while (d < PL_bufend && SPACE_OR_TAB(*d))
6332 d++;
6333 }
6334 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6335 STRLEN len;
6336 d = scan_word6(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6337 FALSE, &len, FALSE);
6338 while (d < PL_bufend && SPACE_OR_TAB(*d))
6339 d++;
6340 if (*d == '}') {
6341 const char minus = (PL_tokenbuf[0] == '-');
6342 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6343 if (minus)
6344 force_next(PERLY_MINUS);
6345 }
6346 }
6347 /* FALLTHROUGH */
6348 case XATTRTERM:
6349 case XTERMBLOCK:
6350 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6351 PL_lex_allbrackets++;
6352 PL_expect = XSTATE;
6353 break;
6354 case XATTRBLOCK:
6355 case XBLOCK:
6356 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6357 PL_lex_allbrackets++;
6358 PL_expect = XSTATE;
6359 break;
6360 case XBLOCKTERM:
6361 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6362 PL_lex_allbrackets++;
6363 PL_expect = XSTATE;
6364 break;
6365 default: {
6366 const char *t;
6367 if (PL_oldoldbufptr == PL_last_lop)
6368 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6369 else
6370 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6371 PL_lex_allbrackets++;
6372 s = skipspace(s);
6373 if (*s == '}') {
6374 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6375 PL_expect = XTERM;
6376 /* This hack is to get the ${} in the message. */
6377 PL_bufptr = s+1;
6378 yyerror("syntax error");
6379 yyquit();
6380 break;
6381 }
6382 OPERATOR(HASHBRACK);
6383 }
6384 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6385 /* ${...} or @{...} etc., but not print {...}
6386 * Skip the disambiguation and treat this as a block.
6387 */
6388 goto block_expectation;
6389 }
6390 /* This hack serves to disambiguate a pair of curlies
6391 * as being a block or an anon hash. Normally, expectation
6392 * determines that, but in cases where we're not in a
6393 * position to expect anything in particular (like inside
6394 * eval"") we have to resolve the ambiguity. This code
6395 * covers the case where the first term in the curlies is a
6396 * quoted string. Most other cases need to be explicitly
6397 * disambiguated by prepending a "+" before the opening
6398 * curly in order to force resolution as an anon hash.
6399 *
6400 * XXX should probably propagate the outer expectation
6401 * into eval"" to rely less on this hack, but that could
6402 * potentially break current behavior of eval"".
6403 * GSAR 97-07-21
6404 */
6405 t = s;
6406 if (*s == '\'' || *s == '"' || *s == '`') {
6407 /* common case: get past first string, handling escapes */
6408 for (t++; t < PL_bufend && *t != *s;)
6409 if (*t++ == '\\')
6410 t++;
6411 t++;
6412 }
6413 else if (*s == 'q') {
6414 if (++t < PL_bufend
6415 && (!isWORDCHAR(*t)
6416 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6417 && !isWORDCHAR(*t))))
6418 {
6419 /* skip q//-like construct */
6420 const char *tmps;
6421 char open, close, term;
6422 I32 brackets = 1;
6423
6424 while (t < PL_bufend && isSPACE(*t))
6425 t++;
6426 /* check for q => */
6427 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6428 OPERATOR(HASHBRACK);
6429 }
6430 term = *t;
6431 open = term;
6432 if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6433 term = tmps[5];
6434 close = term;
6435 if (open == close)
6436 for (t++; t < PL_bufend; t++) {
6437 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6438 t++;
6439 else if (*t == open)
6440 break;
6441 }
6442 else {
6443 for (t++; t < PL_bufend; t++) {
6444 if (*t == '\\' && t+1 < PL_bufend)
6445 t++;
6446 else if (*t == close && --brackets <= 0)
6447 break;
6448 else if (*t == open)
6449 brackets++;
6450 }
6451 }
6452 t++;
6453 }
6454 else
6455 /* skip plain q word */
6456 while ( t < PL_bufend
6457 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6458 {
6459 t += UTF ? UTF8SKIP(t) : 1;
6460 }
6461 }
6462 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6463 t += UTF ? UTF8SKIP(t) : 1;
6464 while ( t < PL_bufend
6465 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6466 {
6467 t += UTF ? UTF8SKIP(t) : 1;
6468 }
6469 }
6470 while (t < PL_bufend && isSPACE(*t))
6471 t++;
6472 /* if comma follows first term, call it an anon hash */
6473 /* XXX it could be a comma expression with loop modifiers */
6474 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6475 || (*t == '=' && t[1] == '>')))
6476 OPERATOR(HASHBRACK);
6477 if (PL_expect == XREF) {
6478 block_expectation:
6479 /* If there is an opening brace or 'sub:', treat it
6480 as a term to make ${{...}}{k} and &{sub:attr...}
6481 dwim. Otherwise, treat it as a statement, so
6482 map {no strict; ...} works.
6483 */
6484 s = skipspace(s);
6485 if (*s == '{') {
6486 PL_expect = XTERM;
6487 break;
6488 }
6489 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6490 PL_bufptr = s;
6491 d = s + 3;
6492 d = skipspace(d);
6493 s = PL_bufptr;
6494 if (*d == ':') {
6495 PL_expect = XTERM;
6496 break;
6497 }
6498 }
6499 PL_expect = XSTATE;
6500 }
6501 else {
6502 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6503 PL_expect = XSTATE;
6504 }
6505 }
6506 break;
6507 }
6508
6509 pl_yylval.ival = CopLINE(PL_curcop);
6510 PL_copline = NOLINE; /* invalidate current command line number */
6511 TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
6512 }
6513
6514 static int
yyl_rightcurly(pTHX_ char * s,const U8 formbrack)6515 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6516 {
6517 assert(s != PL_bufend);
6518 s++;
6519
6520 if (PL_lex_brackets <= 0)
6521 /* diag_listed_as: Unmatched right %s bracket */
6522 yyerror("Unmatched right curly bracket");
6523 else
6524 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6525
6526 PL_lex_allbrackets--;
6527
6528 if (PL_lex_state == LEX_INTERPNORMAL) {
6529 if (PL_lex_brackets == 0) {
6530 if (PL_expect & XFAKEBRACK) {
6531 PL_expect &= XENUMMASK;
6532 PL_lex_state = LEX_INTERPEND;
6533 PL_bufptr = s;
6534 return yylex(); /* ignore fake brackets */
6535 }
6536 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6537 && SvEVALED(PL_lex_repl))
6538 PL_lex_state = LEX_INTERPEND;
6539 else if (*s == '-' && s[1] == '>')
6540 PL_lex_state = LEX_INTERPENDMAYBE;
6541 else if (*s != '[' && *s != '{')
6542 PL_lex_state = LEX_INTERPEND;
6543 }
6544 }
6545
6546 if (PL_expect & XFAKEBRACK) {
6547 PL_expect &= XENUMMASK;
6548 PL_bufptr = s;
6549 return yylex(); /* ignore fake brackets */
6550 }
6551
6552 force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
6553 if (formbrack) LEAVE_with_name("lex_format");
6554 if (formbrack == 2) { /* means . where arguments were expected */
6555 force_next(PERLY_SEMICOLON);
6556 TOKEN(FORMRBRACK);
6557 }
6558
6559 TOKEN(PERLY_SEMICOLON);
6560 }
6561
6562 static int
yyl_ampersand(pTHX_ char * s)6563 yyl_ampersand(pTHX_ char *s)
6564 {
6565 if (PL_expect == XPOSTDEREF)
6566 POSTDEREF(PERLY_AMPERSAND);
6567
6568 s++;
6569 if (*s++ == '&') {
6570 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6571 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6572 s -= 2;
6573 TOKEN(0);
6574 }
6575 AOPERATOR(ANDAND);
6576 }
6577 s--;
6578
6579 if (PL_expect == XOPERATOR) {
6580 char *d;
6581 bool bof;
6582 if ( PL_bufptr == PL_linestart
6583 && ckWARN(WARN_SEMICOLON)
6584 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6585 {
6586 CopLINE_dec(PL_curcop);
6587 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6588 CopLINE_inc(PL_curcop);
6589 }
6590 d = s;
6591 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6592 s++;
6593 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6594 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6595 s = d;
6596 s--;
6597 TOKEN(0);
6598 }
6599 if (d == s)
6600 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6601 else
6602 BAop(OP_SBIT_AND);
6603 }
6604
6605 PL_tokenbuf[0] = '&';
6606 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6607 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6608
6609 if (PL_tokenbuf[1])
6610 force_ident_maybe_lex('&');
6611 else
6612 PREREF(PERLY_AMPERSAND);
6613
6614 TERM(PERLY_AMPERSAND);
6615 }
6616
6617 static int
yyl_verticalbar(pTHX_ char * s)6618 yyl_verticalbar(pTHX_ char *s)
6619 {
6620 char *d;
6621 bool bof;
6622
6623 s++;
6624 if (*s++ == '|') {
6625 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6626 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6627 s -= 2;
6628 TOKEN(0);
6629 }
6630 pl_yylval.ival = OP_OR;
6631 AOPERATOR(OROR);
6632 }
6633
6634 s--;
6635 d = s;
6636 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6637 s++;
6638
6639 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6640 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6641 s = d - 1;
6642 TOKEN(0);
6643 }
6644
6645 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6646 }
6647
6648 static int
yyl_bang(pTHX_ char * s)6649 yyl_bang(pTHX_ char *s)
6650 {
6651 const char tmp = *s++;
6652 if (tmp == '=') {
6653 /* was this !=~ where !~ was meant?
6654 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6655
6656 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6657 const char *t = s+1;
6658
6659 while (t < PL_bufend && isSPACE(*t))
6660 ++t;
6661
6662 if (*t == '/' || *t == '?'
6663 || ((*t == 'm' || *t == 's' || *t == 'y')
6664 && !isWORDCHAR(t[1]))
6665 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6666 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6667 "!=~ should be !~");
6668 }
6669
6670 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6671 s -= 2;
6672 TOKEN(0);
6673 }
6674
6675 ChEop(OP_NE);
6676 }
6677
6678 if (tmp == '~')
6679 PMop(OP_NOT);
6680
6681 s--;
6682 OPERATOR(PERLY_EXCLAMATION_MARK);
6683 }
6684
6685 static int
yyl_snail(pTHX_ char * s)6686 yyl_snail(pTHX_ char *s)
6687 {
6688 if (PL_expect == XPOSTDEREF)
6689 POSTDEREF(PERLY_SNAIL);
6690 PL_tokenbuf[0] = '@';
6691 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6692 if (PL_expect == XOPERATOR) {
6693 char *d = s;
6694 if (PL_bufptr > s) {
6695 d = PL_bufptr-1;
6696 PL_bufptr = PL_oldbufptr;
6697 }
6698 no_op("Array", d);
6699 }
6700 pl_yylval.ival = 0;
6701 if (!PL_tokenbuf[1]) {
6702 PREREF(PERLY_SNAIL);
6703 }
6704 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6705 s = skipspace(s);
6706 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6707 && intuit_more(s, PL_bufend))
6708 {
6709 if (*s == '{')
6710 PL_tokenbuf[0] = '%';
6711
6712 /* Warn about @ where they meant $. */
6713 if (*s == '[' || *s == '{') {
6714 if (ckWARN(WARN_SYNTAX)) {
6715 S_check_scalar_slice(aTHX_ s);
6716 }
6717 }
6718 }
6719 PL_expect = XOPERATOR;
6720 force_ident_maybe_lex('@');
6721 TERM(PERLY_SNAIL);
6722 }
6723
6724 static int
yyl_slash(pTHX_ char * s)6725 yyl_slash(pTHX_ char *s)
6726 {
6727 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6728 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6729 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6730 TOKEN(0);
6731 s += 2;
6732 AOPERATOR(DORDOR);
6733 }
6734 else if (PL_expect == XOPERATOR) {
6735 s++;
6736 if (*s == '=' && !PL_lex_allbrackets
6737 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6738 {
6739 s--;
6740 TOKEN(0);
6741 }
6742 Mop(OP_DIVIDE);
6743 }
6744 else {
6745 /* Disable warning on "study /blah/" */
6746 if ( PL_oldoldbufptr == PL_last_uni
6747 && ( *PL_last_uni != 's' || s - PL_last_uni < 5
6748 || memNE(PL_last_uni, "study", 5)
6749 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6750 ))
6751 check_uni();
6752 s = scan_pat(s,OP_MATCH);
6753 TERM(sublex_start());
6754 }
6755 }
6756
6757 static int
yyl_leftsquare(pTHX_ char * s)6758 yyl_leftsquare(pTHX_ char *s)
6759 {
6760 if (PL_lex_brackets > 100)
6761 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6762 PL_lex_brackstack[PL_lex_brackets++] = 0;
6763 PL_lex_allbrackets++;
6764 s++;
6765 OPERATOR(PERLY_BRACKET_OPEN);
6766 }
6767
6768 static int
yyl_rightsquare(pTHX_ char * s)6769 yyl_rightsquare(pTHX_ char *s)
6770 {
6771 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6772 TOKEN(0);
6773 s++;
6774 if (PL_lex_brackets <= 0)
6775 /* diag_listed_as: Unmatched right %s bracket */
6776 yyerror("Unmatched right square bracket");
6777 else
6778 --PL_lex_brackets;
6779 PL_lex_allbrackets--;
6780 if (PL_lex_state == LEX_INTERPNORMAL) {
6781 if (PL_lex_brackets == 0) {
6782 if (*s == '-' && s[1] == '>')
6783 PL_lex_state = LEX_INTERPENDMAYBE;
6784 else if (*s != '[' && *s != '{')
6785 PL_lex_state = LEX_INTERPEND;
6786 }
6787 }
6788 TERM(PERLY_BRACKET_CLOSE);
6789 }
6790
6791 static int
yyl_tilde(pTHX_ char * s)6792 yyl_tilde(pTHX_ char *s)
6793 {
6794 bool bof;
6795 if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6796 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6797 TOKEN(0);
6798 s += 2;
6799 Perl_ck_warner_d(aTHX_
6800 packWARN(WARN_DEPRECATED__SMARTMATCH),
6801 "Smartmatch is deprecated");
6802 NCEop(OP_SMARTMATCH);
6803 }
6804 s++;
6805 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6806 s++;
6807 BCop(OP_SCOMPLEMENT);
6808 }
6809 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6810 }
6811
6812 static int
yyl_leftparen(pTHX_ char * s)6813 yyl_leftparen(pTHX_ char *s)
6814 {
6815 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6816 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6817 else
6818 PL_expect = XTERM;
6819 s = skipspace(s);
6820 PL_lex_allbrackets++;
6821 TOKEN(PERLY_PAREN_OPEN);
6822 }
6823
6824 static int
yyl_rightparen(pTHX_ char * s)6825 yyl_rightparen(pTHX_ char *s)
6826 {
6827 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6828 TOKEN(0);
6829 s++;
6830 PL_lex_allbrackets--;
6831 s = skipspace(s);
6832 if (*s == '{')
6833 PREBLOCK(PERLY_PAREN_CLOSE);
6834 TERM(PERLY_PAREN_CLOSE);
6835 }
6836
6837 static int
yyl_leftpointy(pTHX_ char * s)6838 yyl_leftpointy(pTHX_ char *s)
6839 {
6840 char tmp;
6841
6842 if (PL_expect != XOPERATOR) {
6843 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6844 check_uni();
6845 if (s[1] == '<' && s[2] != '>')
6846 s = scan_heredoc(s);
6847 else
6848 s = scan_inputsymbol(s);
6849 PL_expect = XOPERATOR;
6850 TOKEN(sublex_start());
6851 }
6852
6853 s++;
6854
6855 tmp = *s++;
6856 if (tmp == '<') {
6857 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6858 s -= 2;
6859 TOKEN(0);
6860 }
6861 SHop(OP_LEFT_SHIFT);
6862 }
6863 if (tmp == '=') {
6864 tmp = *s++;
6865 if (tmp == '>') {
6866 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6867 s -= 3;
6868 TOKEN(0);
6869 }
6870 NCEop(OP_NCMP);
6871 }
6872 s--;
6873 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6874 s -= 2;
6875 TOKEN(0);
6876 }
6877 ChRop(OP_LE);
6878 }
6879
6880 s--;
6881 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6882 s--;
6883 TOKEN(0);
6884 }
6885
6886 ChRop(OP_LT);
6887 }
6888
6889 static int
yyl_rightpointy(pTHX_ char * s)6890 yyl_rightpointy(pTHX_ char *s)
6891 {
6892 const char tmp = *s++;
6893
6894 if (tmp == '>') {
6895 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6896 s -= 2;
6897 TOKEN(0);
6898 }
6899 SHop(OP_RIGHT_SHIFT);
6900 }
6901 else if (tmp == '=') {
6902 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6903 s -= 2;
6904 TOKEN(0);
6905 }
6906 ChRop(OP_GE);
6907 }
6908
6909 s--;
6910 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6911 s--;
6912 TOKEN(0);
6913 }
6914
6915 ChRop(OP_GT);
6916 }
6917
6918 static int
yyl_sglquote(pTHX_ char * s)6919 yyl_sglquote(pTHX_ char *s)
6920 {
6921 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6922 if (!s)
6923 missingterm(NULL, 0);
6924 COPLINE_SET_FROM_MULTI_END;
6925 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6926 if (PL_expect == XOPERATOR) {
6927 no_op("String",s);
6928 }
6929 pl_yylval.ival = OP_CONST;
6930 TERM(sublex_start());
6931 }
6932
6933 static int
yyl_dblquote(pTHX_ char * s)6934 yyl_dblquote(pTHX_ char *s)
6935 {
6936 char *d;
6937 STRLEN len;
6938 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6939 DEBUG_T( {
6940 if (s)
6941 printbuf("### Saw string before %s\n", s);
6942 else
6943 PerlIO_printf(Perl_debug_log,
6944 "### Saw unterminated string\n");
6945 } );
6946 if (PL_expect == XOPERATOR) {
6947 no_op("String",s);
6948 }
6949 if (!s)
6950 missingterm(NULL, 0);
6951 pl_yylval.ival = OP_CONST;
6952 /* FIXME. I think that this can be const if char *d is replaced by
6953 more localised variables. */
6954 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6955 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6956 pl_yylval.ival = OP_STRINGIFY;
6957 break;
6958 }
6959 }
6960 if (pl_yylval.ival == OP_CONST)
6961 COPLINE_SET_FROM_MULTI_END;
6962 TERM(sublex_start());
6963 }
6964
6965 static int
yyl_backtick(pTHX_ char * s)6966 yyl_backtick(pTHX_ char *s)
6967 {
6968 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6969 DEBUG_T( {
6970 if (s)
6971 printbuf("### Saw backtick string before %s\n", s);
6972 else
6973 PerlIO_printf(Perl_debug_log,
6974 "### Saw unterminated backtick string\n");
6975 } );
6976 if (PL_expect == XOPERATOR)
6977 no_op("Backticks",s);
6978 if (!s)
6979 missingterm(NULL, 0);
6980 pl_yylval.ival = OP_BACKTICK;
6981 TERM(sublex_start());
6982 }
6983
6984 static int
yyl_backslash(pTHX_ char * s)6985 yyl_backslash(pTHX_ char *s)
6986 {
6987 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6988 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6989 *s, *s);
6990 if (PL_expect == XOPERATOR)
6991 no_op("Backslash",s);
6992 OPERATOR(REFGEN);
6993 }
6994
6995 static void
yyl_data_handle(pTHX)6996 yyl_data_handle(pTHX)
6997 {
6998 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6999 ? PL_curstash
7000 : PL_defstash;
7001 GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7002
7003 if (!isGV(gv))
7004 gv_init(gv,stash,"DATA",4,0);
7005
7006 GvMULTI_on(gv);
7007 if (!GvIO(gv))
7008 GvIOp(gv) = newIO();
7009 IoIFP(GvIOp(gv)) = PL_rsfp;
7010
7011 /* Mark this internal pseudo-handle as clean */
7012 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7013 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7014 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7015 else
7016 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7017
7018 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7019 /* if the script was opened in binmode, we need to revert
7020 * it to text mode for compatibility; but only iff it has CRs
7021 * XXX this is a questionable hack at best. */
7022 if (PL_bufend-PL_bufptr > 2
7023 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7024 {
7025 Off_t loc = 0;
7026 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7027 loc = PerlIO_tell(PL_rsfp);
7028 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7029 }
7030 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7031 if (loc > 0)
7032 PerlIO_seek(PL_rsfp, loc, 0);
7033 }
7034 }
7035 #endif
7036
7037 #ifdef PERLIO_LAYERS
7038 if (!IN_BYTES) {
7039 if (UTF)
7040 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7041 }
7042 #endif
7043
7044 PL_rsfp = NULL;
7045 }
7046
7047 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
7048 __attribute__noreturn__;
7049
7050 PERL_STATIC_NO_RET void
yyl_croak_unrecognised(pTHX_ char * s)7051 yyl_croak_unrecognised(pTHX_ char *s)
7052 {
7053 SV *dsv = newSVpvs_flags("", SVs_TEMP);
7054 const char *c;
7055 char *d;
7056 STRLEN len;
7057
7058 if (UTF) {
7059 STRLEN skiplen = UTF8SKIP(s);
7060 STRLEN stravail = PL_bufend - s;
7061 c = sv_uni_display(dsv, newSVpvn_flags(s,
7062 skiplen > stravail ? stravail : skiplen,
7063 SVs_TEMP | SVf_UTF8),
7064 10, UNI_DISPLAY_ISPRINT);
7065 }
7066 else {
7067 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
7068 }
7069
7070 if (s >= PL_linestart) {
7071 d = PL_linestart;
7072 }
7073 else {
7074 /* somehow (probably due to a parse failure), PL_linestart has advanced
7075 * pass PL_bufptr, get a reasonable beginning of line
7076 */
7077 d = s;
7078 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
7079 --d;
7080 }
7081 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
7082 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
7083 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
7084 }
7085
7086 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
7087 UTF8fARG(UTF, (s - d), d),
7088 (int) len + 1);
7089 }
7090
7091 static int
yyl_require(pTHX_ char * s,I32 orig_keyword)7092 yyl_require(pTHX_ char *s, I32 orig_keyword)
7093 {
7094 s = skipspace(s);
7095 if (isDIGIT(*s)) {
7096 s = force_version(s, FALSE);
7097 }
7098 else if (*s != 'v' || !isDIGIT(s[1])
7099 || (s = force_version(s, TRUE), *s == 'v'))
7100 {
7101 *PL_tokenbuf = '\0';
7102 s = force_word(s,BAREWORD,TRUE,TRUE);
7103 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
7104 PL_tokenbuf + sizeof(PL_tokenbuf),
7105 UTF))
7106 {
7107 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7108 GV_ADD | (UTF ? SVf_UTF8 : 0));
7109 }
7110 else if (*s == '<')
7111 yyerror("<> at require-statement should be quotes");
7112 }
7113
7114 if (orig_keyword == KEY_require)
7115 pl_yylval.ival = 1;
7116 else
7117 pl_yylval.ival = 0;
7118
7119 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
7120 PL_bufptr = s;
7121 PL_last_uni = PL_oldbufptr;
7122 PL_last_lop_op = OP_REQUIRE;
7123 s = skipspace(s);
7124 return REPORT( (int)KW_REQUIRE );
7125 }
7126
7127 static int
yyl_foreach(pTHX_ char * s)7128 yyl_foreach(pTHX_ char *s)
7129 {
7130 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7131 return REPORT(0);
7132 pl_yylval.ival = CopLINE(PL_curcop);
7133 s = skipspace(s);
7134 if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7135 char *p = s;
7136 SSize_t s_off = s - SvPVX(PL_linestr);
7137 bool paren_is_valid = FALSE;
7138 bool maybe_package = FALSE;
7139 bool saw_core = FALSE;
7140 bool core_valid = FALSE;
7141
7142 if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
7143 saw_core = TRUE;
7144 p += 6;
7145 }
7146 if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
7147 core_valid = TRUE;
7148 paren_is_valid = TRUE;
7149 if (isSPACE(p[2])) {
7150 p = skipspace(p + 3);
7151 maybe_package = TRUE;
7152 }
7153 else {
7154 p += 2;
7155 }
7156 }
7157 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
7158 core_valid = TRUE;
7159 if (isSPACE(p[3])) {
7160 p = skipspace(p + 4);
7161 maybe_package = TRUE;
7162 }
7163 else {
7164 p += 3;
7165 }
7166 }
7167 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
7168 core_valid = TRUE;
7169 if (isSPACE(p[5])) {
7170 p = skipspace(p + 6);
7171 }
7172 else {
7173 p += 5;
7174 }
7175 }
7176 if (saw_core && !core_valid) {
7177 Perl_croak(aTHX_ "Missing $ on loop variable");
7178 }
7179
7180 if (maybe_package && !saw_core) {
7181 /* skip optional package name, as in "for my abc $x (..)" */
7182 if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
7183 STRLEN len;
7184 p = scan_word6(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
7185 p = skipspace(p);
7186 paren_is_valid = FALSE;
7187 }
7188 }
7189
7190 if (UNLIKELY(paren_is_valid && *p == '(')) {
7191 ; /* fine - this is foreach my (list) */
7192 }
7193 else if (UNLIKELY(*p != '$' && *p != '\\')) {
7194 /* "for myfoo (" will end up here, but with p pointing at the 'f' */
7195 Perl_croak(aTHX_ "Missing $ on loop variable");
7196 }
7197 /* The buffer may have been reallocated, update s */
7198 s = SvPVX(PL_linestr) + s_off;
7199 }
7200 OPERATOR(KW_FOR);
7201 }
7202
7203 static int
yyl_do(pTHX_ char * s,I32 orig_keyword)7204 yyl_do(pTHX_ char *s, I32 orig_keyword)
7205 {
7206 s = skipspace(s);
7207 if (*s == '{')
7208 PRETERMBLOCK(KW_DO);
7209 if (*s != '\'') {
7210 char *d;
7211 STRLEN len;
7212 *PL_tokenbuf = '&';
7213 d = scan_word6(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7214 1, &len, TRUE);
7215 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7216 && !keyword(PL_tokenbuf + 1, len, 0)) {
7217 SSize_t off = s-SvPVX(PL_linestr);
7218 d = skipspace(d);
7219 s = SvPVX(PL_linestr)+off;
7220 if (*d == '(') {
7221 force_ident_maybe_lex('&');
7222 s = d;
7223 }
7224 }
7225 }
7226 if (orig_keyword == KEY_do)
7227 pl_yylval.ival = 1;
7228 else
7229 pl_yylval.ival = 0;
7230 OPERATOR(KW_DO);
7231 }
7232
7233 static int
yyl_my(pTHX_ char * s,I32 my)7234 yyl_my(pTHX_ char *s, I32 my)
7235 {
7236 if (PL_in_my) {
7237 PL_bufptr = s;
7238 yyerror(Perl_form(aTHX_
7239 "Can't redeclare \"%s\" in \"%s\"",
7240 my == KEY_my ? "my" :
7241 my == KEY_state ? "state" : "our",
7242 PL_in_my == KEY_my ? "my" :
7243 PL_in_my == KEY_state ? "state" : "our"));
7244 }
7245 PL_in_my = (U16)my;
7246 s = skipspace(s);
7247 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7248 STRLEN len;
7249 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
7250 if (memEQs(PL_tokenbuf, len, "sub"))
7251 return yyl_sub(aTHX_ s, my);
7252 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7253 if (!PL_in_my_stash) {
7254 char tmpbuf[1024];
7255 int i;
7256 PL_bufptr = s;
7257 i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7258 PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
7259 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7260 }
7261 }
7262 else if (*s == '\\') {
7263 if (!FEATURE_MYREF_IS_ENABLED)
7264 Perl_croak(aTHX_ "The experimental declared_refs "
7265 "feature is not enabled");
7266 Perl_ck_warner_d(aTHX_
7267 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7268 "Declaring references is experimental");
7269 }
7270 OPERATOR(KW_MY);
7271 }
7272
7273 static int yyl_try(pTHX_ char*);
7274
7275 static bool
yyl_eol_needs_semicolon(pTHX_ char ** ps)7276 yyl_eol_needs_semicolon(pTHX_ char **ps)
7277 {
7278 char *s = *ps;
7279 if (PL_lex_state != LEX_NORMAL
7280 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
7281 {
7282 const bool in_comment = *s == '#';
7283 char *d;
7284 if (*s == '#' && s == PL_linestart && PL_in_eval
7285 && !PL_rsfp && !PL_parser->filtered) {
7286 /* handle eval qq[#line 1 "foo"\n ...] */
7287 CopLINE_dec(PL_curcop);
7288 incline(s, PL_bufend);
7289 }
7290 d = s;
7291 while (d < PL_bufend && *d != '\n')
7292 d++;
7293 if (d < PL_bufend)
7294 d++;
7295 s = d;
7296 if (in_comment && d == PL_bufend
7297 && PL_lex_state == LEX_INTERPNORMAL
7298 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7299 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
7300 else
7301 incline(s, PL_bufend);
7302 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7303 PL_lex_state = LEX_FORMLINE;
7304 force_next(FORMRBRACK);
7305 *ps = s;
7306 return TRUE;
7307 }
7308 }
7309 else {
7310 while (s < PL_bufend && *s != '\n')
7311 s++;
7312 if (s < PL_bufend) {
7313 s++;
7314 if (s < PL_bufend)
7315 incline(s, PL_bufend);
7316 }
7317 }
7318 *ps = s;
7319 return FALSE;
7320 }
7321
7322 static int
yyl_fake_eof(pTHX_ U32 fake_eof,bool bof,char * s)7323 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
7324 {
7325 char *d;
7326
7327 goto start;
7328
7329 do {
7330 fake_eof = 0;
7331 bof = cBOOL(PL_rsfp);
7332 start:
7333
7334 PL_bufptr = PL_bufend;
7335 COPLINE_INC_WITH_HERELINES;
7336 if (!lex_next_chunk(fake_eof)) {
7337 CopLINE_dec(PL_curcop);
7338 s = PL_bufptr;
7339 TOKEN(PERLY_SEMICOLON); /* not infinite loop because rsfp is NULL now */
7340 }
7341 CopLINE_dec(PL_curcop);
7342 s = PL_bufptr;
7343 /* If it looks like the start of a BOM or raw UTF-16,
7344 * check if it in fact is. */
7345 if (bof && PL_rsfp
7346 && ( *s == 0
7347 || *(U8*)s == BOM_UTF8_FIRST_BYTE
7348 || *(U8*)s >= 0xFE
7349 || s[1] == 0))
7350 {
7351 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
7352 bof = (offset == (Off_t)SvCUR(PL_linestr));
7353 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
7354 /* offset may include swallowed CR */
7355 if (!bof)
7356 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
7357 #endif
7358 if (bof) {
7359 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7360 s = swallow_bom((U8*)s);
7361 }
7362 }
7363 if (PL_parser->in_pod) {
7364 /* Incest with pod. */
7365 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
7366 && !isALPHA(s[4]))
7367 {
7368 SvPVCLEAR(PL_linestr);
7369 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7370 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7371 PL_last_lop = PL_last_uni = NULL;
7372 PL_parser->in_pod = 0;
7373 }
7374 }
7375 if (PL_rsfp || PL_parser->filtered)
7376 incline(s, PL_bufend);
7377 } while (PL_parser->in_pod);
7378
7379 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7380 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7381 PL_last_lop = PL_last_uni = NULL;
7382 if (CopLINE(PL_curcop) == 1) {
7383 while (s < PL_bufend && isSPACE(*s))
7384 s++;
7385 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7386 s++;
7387 d = NULL;
7388 if (!PL_in_eval) {
7389 if (*s == '#' && *(s+1) == '!')
7390 d = s + 2;
7391 #ifdef ALTERNATE_SHEBANG
7392 else {
7393 static char const as[] = ALTERNATE_SHEBANG;
7394 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7395 d = s + (sizeof(as) - 1);
7396 }
7397 #endif /* ALTERNATE_SHEBANG */
7398 }
7399 if (d) {
7400 char *ipath;
7401 char *ipathend;
7402
7403 while (isSPACE(*d))
7404 d++;
7405 ipath = d;
7406 while (*d && !isSPACE(*d))
7407 d++;
7408 ipathend = d;
7409
7410 #ifdef ARG_ZERO_IS_SCRIPT
7411 if (ipathend > ipath) {
7412 /*
7413 * HP-UX (at least) sets argv[0] to the script name,
7414 * which makes $^X incorrect. And Digital UNIX and Linux,
7415 * at least, set argv[0] to the basename of the Perl
7416 * interpreter. So, having found "#!", we'll set it right.
7417 */
7418 SV* copfilesv = CopFILESV(PL_curcop);
7419 if (copfilesv) {
7420 SV * const x =
7421 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7422 SVt_PV)); /* $^X */
7423 assert(SvPOK(x) || SvGMAGICAL(x));
7424 if (sv_eq(x, copfilesv)) {
7425 sv_setpvn(x, ipath, ipathend - ipath);
7426 SvSETMAGIC(x);
7427 }
7428 else {
7429 STRLEN blen;
7430 STRLEN llen;
7431 const char *bstart = SvPV_const(copfilesv, blen);
7432 const char * const lstart = SvPV_const(x, llen);
7433 if (llen < blen) {
7434 bstart += blen - llen;
7435 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
7436 sv_setpvn(x, ipath, ipathend - ipath);
7437 SvSETMAGIC(x);
7438 }
7439 }
7440 }
7441 }
7442 else {
7443 /* Anything to do if no copfilesv? */
7444 }
7445 TAINT_NOT; /* $^X is always tainted, but that's OK */
7446 }
7447 #endif /* ARG_ZERO_IS_SCRIPT */
7448
7449 /*
7450 * Look for options.
7451 */
7452 d = instr(s,"perl -");
7453 if (!d) {
7454 d = instr(s,"perl");
7455 #if defined(DOSISH)
7456 /* avoid getting into infinite loops when shebang
7457 * line contains "Perl" rather than "perl" */
7458 if (!d) {
7459 for (d = ipathend-4; d >= ipath; --d) {
7460 if (isALPHA_FOLD_EQ(*d, 'p')
7461 && !ibcmp(d, "perl", 4))
7462 {
7463 break;
7464 }
7465 }
7466 if (d < ipath)
7467 d = NULL;
7468 }
7469 #endif
7470 }
7471 #ifdef ALTERNATE_SHEBANG
7472 /*
7473 * If the ALTERNATE_SHEBANG on this system starts with a
7474 * character that can be part of a Perl expression, then if
7475 * we see it but not "perl", we're probably looking at the
7476 * start of Perl code, not a request to hand off to some
7477 * other interpreter. Similarly, if "perl" is there, but
7478 * not in the first 'word' of the line, we assume the line
7479 * contains the start of the Perl program.
7480 */
7481 if (d && *s != '#') {
7482 const char *c = ipath;
7483 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7484 c++;
7485 if (c < d)
7486 d = NULL; /* "perl" not in first word; ignore */
7487 else
7488 *s = '#'; /* Don't try to parse shebang line */
7489 }
7490 #endif /* ALTERNATE_SHEBANG */
7491 if (!d
7492 && *s == '#'
7493 && ipathend > ipath
7494 && !PL_minus_c
7495 && !instr(s,"indir")
7496 && instr(PL_origargv[0],"perl"))
7497 {
7498 char **newargv;
7499
7500 *ipathend = '\0';
7501 s = ipathend + 1;
7502 while (s < PL_bufend && isSPACE(*s))
7503 s++;
7504 if (s < PL_bufend) {
7505 Newx(newargv,PL_origargc+3,char*);
7506 newargv[1] = s;
7507 while (s < PL_bufend && !isSPACE(*s))
7508 s++;
7509 *s = '\0';
7510 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7511 }
7512 else
7513 newargv = PL_origargv;
7514 newargv[0] = ipath;
7515 PERL_FPU_PRE_EXEC
7516 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7517 PERL_FPU_POST_EXEC
7518 Perl_croak(aTHX_ "Can't exec %s", ipath);
7519 }
7520 if (d) {
7521 while (*d && !isSPACE(*d))
7522 d++;
7523 while (SPACE_OR_TAB(*d))
7524 d++;
7525
7526 if (*d++ == '-') {
7527 const bool switches_done = PL_doswitches;
7528 const U32 oldpdb = PL_perldb;
7529 const bool oldn = PL_minus_n;
7530 const bool oldp = PL_minus_p;
7531 const char *d1 = d;
7532
7533 do {
7534 bool baduni = FALSE;
7535 if (*d1 == 'C') {
7536 const char *d2 = d1 + 1;
7537 if (parse_unicode_opts((const char **)&d2)
7538 != PL_unicode)
7539 baduni = TRUE;
7540 }
7541 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7542 const char * const m = d1;
7543 while (*d1 && !isSPACE(*d1))
7544 d1++;
7545 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7546 (int)(d1 - m), m);
7547 }
7548 d1 = moreswitches(d1);
7549 } while (d1);
7550 if (PL_doswitches && !switches_done) {
7551 int argc = PL_origargc;
7552 char **argv = PL_origargv;
7553 do {
7554 argc--,argv++;
7555 } while (argc && argv[0][0] == '-' && argv[0][1]);
7556 init_argv_symbols(argc,argv);
7557 }
7558 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7559 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7560 /* if we have already added "LINE: while (<>) {",
7561 we must not do it again */
7562 {
7563 SvPVCLEAR(PL_linestr);
7564 PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7565 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7566 PL_last_lop = PL_last_uni = NULL;
7567 PL_preambled = FALSE;
7568 if (PERLDB_LINE_OR_SAVESRC)
7569 (void)gv_fetchfile(PL_origfilename);
7570 return YYL_RETRY;
7571 }
7572 }
7573 }
7574 }
7575 }
7576
7577 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7578 PL_lex_state = LEX_FORMLINE;
7579 force_next(FORMRBRACK);
7580 TOKEN(PERLY_SEMICOLON);
7581 }
7582
7583 PL_bufptr = s;
7584 return YYL_RETRY;
7585 }
7586
7587 static int
yyl_fatcomma(pTHX_ char * s,STRLEN len)7588 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7589 {
7590 CLINE;
7591 pl_yylval.opval
7592 = newSVOP(OP_CONST, 0,
7593 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7594 pl_yylval.opval->op_private = OPpCONST_BARE;
7595 TERM(BAREWORD);
7596 }
7597
7598 static int
yyl_safe_bareword(pTHX_ char * s,const char lastchar)7599 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7600 {
7601 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7602 && PL_parser->saw_infix_sigil)
7603 {
7604 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7605 "Operator or semicolon missing before %c%" UTF8f,
7606 lastchar,
7607 UTF8fARG(UTF, strlen(PL_tokenbuf),
7608 PL_tokenbuf));
7609 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7610 "Ambiguous use of %c resolved as operator %c",
7611 lastchar, lastchar);
7612 }
7613 TOKEN(BAREWORD);
7614 }
7615
7616 static int
yyl_constant_op(pTHX_ char * s,SV * sv,CV * cv,OP * rv2cv_op,PADOFFSET off)7617 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7618 {
7619 if (sv) {
7620 op_free(rv2cv_op);
7621 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7622 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7623 if (SvTYPE(sv) == SVt_PVAV)
7624 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7625 pl_yylval.opval);
7626 else {
7627 pl_yylval.opval->op_private = 0;
7628 pl_yylval.opval->op_folded = 1;
7629 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7630 }
7631 TOKEN(BAREWORD);
7632 }
7633
7634 op_free(pl_yylval.opval);
7635 pl_yylval.opval =
7636 off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7637 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7638 PL_last_lop = PL_oldbufptr;
7639 PL_last_lop_op = OP_ENTERSUB;
7640
7641 /* Is there a prototype? */
7642 if (SvPOK(cv)) {
7643 int k = yyl_subproto(aTHX_ s, cv);
7644 if (k != KEY_NULL)
7645 return k;
7646 }
7647
7648 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7649 PL_expect = XTERM;
7650 force_next(off ? PRIVATEREF : BAREWORD);
7651 if (!PL_lex_allbrackets
7652 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7653 {
7654 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7655 }
7656
7657 TOKEN(NOAMP);
7658 }
7659
7660 /* Honour "reserved word" warnings, and enforce strict subs */
7661 static void
yyl_strictwarn_bareword(pTHX_ const char lastchar)7662 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7663 {
7664 /* after "print" and similar functions (corresponding to
7665 * "F? L" in opcode.pl), whatever wasn't already parsed as
7666 * a filehandle should be subject to "strict subs".
7667 * Likewise for the optional indirect-object argument to system
7668 * or exec, which can't be a bareword */
7669 if ((PL_last_lop_op == OP_PRINT
7670 || PL_last_lop_op == OP_PRTF
7671 || PL_last_lop_op == OP_SAY
7672 || PL_last_lop_op == OP_SYSTEM
7673 || PL_last_lop_op == OP_EXEC)
7674 && (PL_hints & HINT_STRICT_SUBS))
7675 {
7676 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7677 }
7678
7679 if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7680 char *d = PL_tokenbuf;
7681 while (isLOWER(*d))
7682 d++;
7683 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7684 /* PL_warn_reserved is constant */
7685 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7686 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7687 PL_tokenbuf);
7688 GCC_DIAG_RESTORE_STMT;
7689 }
7690 }
7691 }
7692
7693 static int
yyl_just_a_word(pTHX_ char * s,STRLEN len,I32 orig_keyword,struct code c)7694 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7695 {
7696 int pkgname = 0;
7697 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7698 bool safebw;
7699 bool no_op_error = FALSE;
7700 /* Use this var to track whether intuit_method has been
7701 called. intuit_method returns 0 or > 255. */
7702 int key = 1;
7703
7704 if (PL_expect == XOPERATOR) {
7705 if (PL_bufptr == PL_linestart) {
7706 CopLINE_dec(PL_curcop);
7707 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7708 CopLINE_inc(PL_curcop);
7709 }
7710 else
7711 /* We want to call no_op with s pointing after the
7712 bareword, so defer it. But we want it to come
7713 before the Bad name croak. */
7714 no_op_error = TRUE;
7715 }
7716
7717 /* Get the rest if it looks like a package qualifier */
7718
7719 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7720 STRLEN morelen;
7721 s = scan_word6(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7722 TRUE, &morelen, TRUE);
7723 if (no_op_error) {
7724 no_op("Bareword",s);
7725 no_op_error = FALSE;
7726 }
7727 if (!morelen)
7728 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7729 UTF8fARG(UTF, len, PL_tokenbuf),
7730 *s == '\'' ? "'" : "::");
7731 len += morelen;
7732 pkgname = 1;
7733 }
7734
7735 if (no_op_error)
7736 no_op("Bareword",s);
7737
7738 /* See if the name is "Foo::",
7739 in which case Foo is a bareword
7740 (and a package name). */
7741
7742 if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7743 if (ckWARN(WARN_BAREWORD)
7744 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7745 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7746 "Bareword \"%" UTF8f
7747 "\" refers to nonexistent package",
7748 UTF8fARG(UTF, len, PL_tokenbuf));
7749 len -= 2;
7750 PL_tokenbuf[len] = '\0';
7751 c.gv = NULL;
7752 c.gvp = 0;
7753 safebw = TRUE;
7754 }
7755 else {
7756 safebw = FALSE;
7757 }
7758
7759 /* if we saw a global override before, get the right name */
7760
7761 if (!c.sv)
7762 c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7763 if (c.gvp) {
7764 SV *sv = newSVpvs("CORE::GLOBAL::");
7765 sv_catsv(sv, c.sv);
7766 SvREFCNT_dec(c.sv);
7767 c.sv = sv;
7768 }
7769
7770 /* Presume this is going to be a bareword of some sort. */
7771 CLINE;
7772 pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7773 pl_yylval.opval->op_private = OPpCONST_BARE;
7774
7775 /* And if "Foo::", then that's what it certainly is. */
7776 if (safebw)
7777 return yyl_safe_bareword(aTHX_ s, lastchar);
7778
7779 if (!c.off) {
7780 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7781 const_op->op_private = OPpCONST_BARE;
7782 c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7783 c.cv = c.lex
7784 ? isGV(c.gv)
7785 ? GvCV(c.gv)
7786 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7787 ? (CV *)SvRV(c.gv)
7788 : ((CV *)c.gv)
7789 : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7790 }
7791
7792 /* See if it's the indirect object for a list operator. */
7793
7794 if (PL_oldoldbufptr
7795 && PL_oldoldbufptr < PL_bufptr
7796 && (PL_oldoldbufptr == PL_last_lop
7797 || PL_oldoldbufptr == PL_last_uni)
7798 && /* NO SKIPSPACE BEFORE HERE! */
7799 (PL_expect == XREF
7800 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7801 == OA_FILEREF))
7802 {
7803 bool immediate_paren = *s == '(';
7804 SSize_t s_off;
7805
7806 /* (Now we can afford to cross potential line boundary.) */
7807 s = skipspace(s);
7808
7809 /* intuit_method() can indirectly call lex_next_chunk(),
7810 * invalidating s
7811 */
7812 s_off = s - SvPVX(PL_linestr);
7813 /* Two barewords in a row may indicate method call. */
7814 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7815 || *s == '$')
7816 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7817 {
7818 /* the code at method: doesn't use s */
7819 goto method;
7820 }
7821 s = SvPVX(PL_linestr) + s_off;
7822
7823 /* If not a declared subroutine, it's an indirect object. */
7824 /* (But it's an indir obj regardless for sort.) */
7825 /* Also, if "_" follows a filetest operator, it's a bareword */
7826
7827 if (
7828 ( !immediate_paren && (PL_last_lop_op == OP_SORT
7829 || (!c.cv
7830 && (PL_last_lop_op != OP_MAPSTART
7831 && PL_last_lop_op != OP_GREPSTART))))
7832 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7833 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7834 == OA_FILESTATOP))
7835 )
7836 {
7837 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7838 yyl_strictwarn_bareword(aTHX_ lastchar);
7839 op_free(c.rv2cv_op);
7840 return yyl_safe_bareword(aTHX_ s, lastchar);
7841 }
7842 }
7843
7844 PL_expect = XOPERATOR;
7845 s = skipspace(s);
7846
7847 /* Is this a word before a => operator? */
7848 if (*s == '=' && s[1] == '>' && !pkgname) {
7849 op_free(c.rv2cv_op);
7850 CLINE;
7851 if (c.gvp || (c.lex && !c.off)) {
7852 assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7853 /* This is our own scalar, created a few lines
7854 above, so this is safe. */
7855 SvREADONLY_off(c.sv);
7856 sv_setpv(c.sv, PL_tokenbuf);
7857 if (UTF && !IN_BYTES
7858 && is_utf8_string((U8*)PL_tokenbuf, len))
7859 SvUTF8_on(c.sv);
7860 SvREADONLY_on(c.sv);
7861 }
7862 TERM(BAREWORD);
7863 }
7864
7865 /* If followed by a paren, it's certainly a subroutine. */
7866 if (*s == '(') {
7867 CLINE;
7868 if (c.cv) {
7869 char *d = s + 1;
7870 while (SPACE_OR_TAB(*d))
7871 d++;
7872 if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7873 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7874 }
7875 NEXTVAL_NEXTTOKE.opval =
7876 c.off ? c.rv2cv_op : pl_yylval.opval;
7877 if (c.off)
7878 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7879 else op_free(c.rv2cv_op), force_next(BAREWORD);
7880 pl_yylval.ival = 0;
7881 TOKEN(PERLY_AMPERSAND);
7882 }
7883
7884 /* If followed by var or block, call it a method (unless sub) */
7885
7886 if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7887 op_free(c.rv2cv_op);
7888 PL_last_lop = PL_oldbufptr;
7889 PL_last_lop_op = OP_METHOD;
7890 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7891 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7892 PL_expect = XBLOCKTERM;
7893 PL_bufptr = s;
7894 return REPORT(METHCALL0);
7895 }
7896
7897 /* If followed by a bareword, see if it looks like indir obj. */
7898
7899 if ( key == 1
7900 && !orig_keyword
7901 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7902 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7903 {
7904 method:
7905 if (c.lex && !c.off) {
7906 assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7907 SvREADONLY_off(c.sv);
7908 sv_setpvn(c.sv, PL_tokenbuf, len);
7909 if (UTF && !IN_BYTES
7910 && is_utf8_string((U8*)PL_tokenbuf, len))
7911 SvUTF8_on(c.sv);
7912 else SvUTF8_off(c.sv);
7913 }
7914 op_free(c.rv2cv_op);
7915 if (key == METHCALL0 && !PL_lex_allbrackets
7916 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7917 {
7918 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7919 }
7920 return REPORT(key);
7921 }
7922
7923 /* Not a method, so call it a subroutine (if defined) */
7924
7925 if (c.cv) {
7926 /* Check for a constant sub */
7927 c.sv = cv_const_sv_or_av(c.cv);
7928 return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7929 }
7930
7931 /* Call it a bare word */
7932
7933 if (PL_hints & HINT_STRICT_SUBS)
7934 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7935 else
7936 yyl_strictwarn_bareword(aTHX_ lastchar);
7937
7938 op_free(c.rv2cv_op);
7939
7940 return yyl_safe_bareword(aTHX_ s, lastchar);
7941 }
7942
7943 static int
yyl_word_or_keyword(pTHX_ char * s,STRLEN len,I32 key,I32 orig_keyword,struct code c)7944 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7945 {
7946 switch (key) {
7947 default: /* not a keyword */
7948 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7949
7950 case KEY___FILE__:
7951 FUN0OP(newSVOP(OP_CONST, OPpCONST_TOKEN_FILE<<8,
7952 newSVpv(CopFILE(PL_curcop),0)) );
7953
7954 case KEY___LINE__:
7955 FUN0OP(newSVOP(OP_CONST, OPpCONST_TOKEN_LINE<<8,
7956 Perl_newSVpvf(aTHX_ "%" LINE_Tf, CopLINE(PL_curcop))));
7957
7958 case KEY___PACKAGE__:
7959 FUN0OP(newSVOP(OP_CONST, OPpCONST_TOKEN_PACKAGE<<8,
7960 (PL_curstash
7961 ? newSVhek(HvNAME_HEK(PL_curstash))
7962 : &PL_sv_undef))
7963 );
7964
7965 case KEY___DATA__:
7966 case KEY___END__:
7967 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7968 yyl_data_handle(aTHX);
7969 return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7970
7971 case KEY___SUB__:
7972 /* If !CvCLONE(PL_compcv) then rpeep will probably turn this into an
7973 * OP_CONST. We need to make it big enough to allow room for that if
7974 * so */
7975 FUN0OP(CvCLONE(PL_compcv)
7976 ? newOP(OP_RUNCV, 0)
7977 : newSVOP(OP_RUNCV, 0, &PL_sv_undef));
7978
7979 case KEY___CLASS__:
7980 FUN0(OP_CLASSNAME);
7981
7982 case KEY_AUTOLOAD:
7983 case KEY_DESTROY:
7984 case KEY_BEGIN:
7985 case KEY_UNITCHECK:
7986 case KEY_CHECK:
7987 case KEY_INIT:
7988 case KEY_END:
7989 if (PL_expect == XSTATE)
7990 return yyl_sub(aTHX_ PL_bufptr, key);
7991 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7992
7993 case KEY_ADJUST:
7994 Perl_ck_warner_d(aTHX_
7995 packWARN(WARN_EXPERIMENTAL__CLASS), "ADJUST is experimental");
7996
7997 /* The way that KEY_CHECK et.al. are handled currently are nothing
7998 * short of crazy. We won't copy that model for new phasers, but use
7999 * this as an experiment to test if this will work
8000 */
8001 PHASERBLOCK(KEY_ADJUST);
8002
8003 case KEY_abs:
8004 UNI(OP_ABS);
8005
8006 case KEY_alarm:
8007 UNI(OP_ALARM);
8008
8009 case KEY_accept:
8010 LOP(OP_ACCEPT,XTERM);
8011
8012 case KEY_and:
8013 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8014 return REPORT(0);
8015 OPERATOR(ANDOP);
8016
8017 case KEY_atan2:
8018 LOP(OP_ATAN2,XTERM);
8019
8020 case KEY_bind:
8021 LOP(OP_BIND,XTERM);
8022
8023 case KEY_binmode:
8024 LOP(OP_BINMODE,XTERM);
8025
8026 case KEY_bless:
8027 LOP(OP_BLESS,XTERM);
8028
8029 case KEY_break:
8030 FUN0(OP_BREAK);
8031
8032 case KEY_catch:
8033 PREBLOCK(KW_CATCH);
8034
8035 case KEY_chop:
8036 UNI(OP_CHOP);
8037
8038 case KEY_class:
8039 Perl_ck_warner_d(aTHX_
8040 packWARN(WARN_EXPERIMENTAL__CLASS), "class is experimental");
8041
8042 s = force_word(s,BAREWORD,FALSE,TRUE);
8043 s = skipspace(s);
8044 s = force_strict_version(s);
8045 PL_expect = XATTRBLOCK;
8046 TOKEN(KW_CLASS);
8047
8048 case KEY_continue:
8049 /* We have to disambiguate the two senses of
8050 "continue". If the next token is a '{' then
8051 treat it as the start of a continue block;
8052 otherwise treat it as a control operator.
8053 */
8054 s = skipspace(s);
8055 if (*s == '{')
8056 PREBLOCK(KW_CONTINUE);
8057 else
8058 FUN0(OP_CONTINUE);
8059
8060 case KEY_chdir:
8061 /* may use HOME */
8062 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
8063 UNI(OP_CHDIR);
8064
8065 case KEY_close:
8066 UNI(OP_CLOSE);
8067
8068 case KEY_closedir:
8069 UNI(OP_CLOSEDIR);
8070
8071 case KEY_cmp:
8072 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8073 return REPORT(0);
8074 NCEop(OP_SCMP);
8075
8076 case KEY_caller:
8077 UNI(OP_CALLER);
8078
8079 case KEY_crypt:
8080
8081 LOP(OP_CRYPT,XTERM);
8082
8083 case KEY_chmod:
8084 LOP(OP_CHMOD,XTERM);
8085
8086 case KEY_chown:
8087 LOP(OP_CHOWN,XTERM);
8088
8089 case KEY_connect:
8090 LOP(OP_CONNECT,XTERM);
8091
8092 case KEY_chr:
8093 UNI(OP_CHR);
8094
8095 case KEY_cos:
8096 UNI(OP_COS);
8097
8098 case KEY_chroot:
8099 UNI(OP_CHROOT);
8100
8101 case KEY_default:
8102 PREBLOCK(KW_DEFAULT);
8103
8104 case KEY_defer:
8105 Perl_ck_warner_d(aTHX_
8106 packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
8107 PREBLOCK(KW_DEFER);
8108
8109 case KEY_do:
8110 return yyl_do(aTHX_ s, orig_keyword);
8111
8112 case KEY_die:
8113 PL_hints |= HINT_BLOCK_SCOPE;
8114 LOP(OP_DIE,XTERM);
8115
8116 case KEY_defined:
8117 UNI(OP_DEFINED);
8118
8119 case KEY_delete:
8120 UNI(OP_DELETE);
8121
8122 case KEY_dbmopen:
8123 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
8124 STR_WITH_LEN("NDBM_File::"),
8125 STR_WITH_LEN("DB_File::"),
8126 STR_WITH_LEN("GDBM_File::"),
8127 STR_WITH_LEN("SDBM_File::"),
8128 STR_WITH_LEN("ODBM_File::"),
8129 NULL);
8130 LOP(OP_DBMOPEN,XTERM);
8131
8132 case KEY_dbmclose:
8133 UNI(OP_DBMCLOSE);
8134
8135 case KEY_dump:
8136 LOOPX(OP_DUMP);
8137
8138 case KEY_else:
8139 PREBLOCK(KW_ELSE);
8140
8141 case KEY_elsif:
8142 pl_yylval.ival = CopLINE(PL_curcop);
8143 OPERATOR(KW_ELSIF);
8144
8145 case KEY_eq:
8146 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8147 return REPORT(0);
8148 ChEop(OP_SEQ);
8149
8150 case KEY_exists:
8151 UNI(OP_EXISTS);
8152
8153 case KEY_exit:
8154 UNI(OP_EXIT);
8155
8156 case KEY_eval:
8157 s = skipspace(s);
8158 if (*s == '{') { /* block eval */
8159 PL_expect = XTERMBLOCK;
8160 UNIBRACK(OP_ENTERTRY);
8161 }
8162 else { /* string eval */
8163 PL_expect = XTERM;
8164 UNIBRACK(OP_ENTEREVAL);
8165 }
8166
8167 case KEY_evalbytes:
8168 PL_expect = XTERM;
8169 UNIBRACK(-OP_ENTEREVAL);
8170
8171 case KEY_eof:
8172 UNI(OP_EOF);
8173
8174 case KEY_exp:
8175 UNI(OP_EXP);
8176
8177 case KEY_each:
8178 UNI(OP_EACH);
8179
8180 case KEY_exec:
8181 LOP(OP_EXEC,XREF);
8182
8183 case KEY_endhostent:
8184 FUN0(OP_EHOSTENT);
8185
8186 case KEY_endnetent:
8187 FUN0(OP_ENETENT);
8188
8189 case KEY_endservent:
8190 FUN0(OP_ESERVENT);
8191
8192 case KEY_endprotoent:
8193 FUN0(OP_EPROTOENT);
8194
8195 case KEY_endpwent:
8196 FUN0(OP_EPWENT);
8197
8198 case KEY_endgrent:
8199 FUN0(OP_EGRENT);
8200
8201 case KEY_field:
8202 /* TODO: maybe this should use the same parser/grammar structures as
8203 * `my`, but it's also rather messy because of the `our` conflation
8204 */
8205 Perl_ck_warner_d(aTHX_
8206 packWARN(WARN_EXPERIMENTAL__CLASS), "field is experimental");
8207
8208 croak_kw_unless_class("field");
8209
8210 PL_parser->in_my = KEY_field;
8211 OPERATOR(KW_FIELD);
8212
8213 case KEY_finally:
8214 Perl_ck_warner_d(aTHX_
8215 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
8216 PREBLOCK(KW_FINALLY);
8217
8218 case KEY_for:
8219 case KEY_foreach:
8220 return yyl_foreach(aTHX_ s);
8221
8222 case KEY_formline:
8223 LOP(OP_FORMLINE,XTERM);
8224
8225 case KEY_fork:
8226 FUN0(OP_FORK);
8227
8228 case KEY_fc:
8229 UNI(OP_FC);
8230
8231 case KEY_fcntl:
8232 LOP(OP_FCNTL,XTERM);
8233
8234 case KEY_fileno:
8235 UNI(OP_FILENO);
8236
8237 case KEY_flock:
8238 LOP(OP_FLOCK,XTERM);
8239
8240 case KEY_gt:
8241 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8242 return REPORT(0);
8243 ChRop(OP_SGT);
8244
8245 case KEY_ge:
8246 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8247 return REPORT(0);
8248 ChRop(OP_SGE);
8249
8250 case KEY_grep:
8251 LOP(OP_GREPSTART, XREF);
8252
8253 case KEY_goto:
8254 LOOPX(OP_GOTO);
8255
8256 case KEY_gmtime:
8257 UNI(OP_GMTIME);
8258
8259 case KEY_getc:
8260 UNIDOR(OP_GETC);
8261
8262 case KEY_getppid:
8263 FUN0(OP_GETPPID);
8264
8265 case KEY_getpgrp:
8266 UNI(OP_GETPGRP);
8267
8268 case KEY_getpriority:
8269 LOP(OP_GETPRIORITY,XTERM);
8270
8271 case KEY_getprotobyname:
8272 UNI(OP_GPBYNAME);
8273
8274 case KEY_getprotobynumber:
8275 LOP(OP_GPBYNUMBER,XTERM);
8276
8277 case KEY_getprotoent:
8278 FUN0(OP_GPROTOENT);
8279
8280 case KEY_getpwent:
8281 FUN0(OP_GPWENT);
8282
8283 case KEY_getpwnam:
8284 UNI(OP_GPWNAM);
8285
8286 case KEY_getpwuid:
8287 UNI(OP_GPWUID);
8288
8289 case KEY_getpeername:
8290 UNI(OP_GETPEERNAME);
8291
8292 case KEY_gethostbyname:
8293 UNI(OP_GHBYNAME);
8294
8295 case KEY_gethostbyaddr:
8296 LOP(OP_GHBYADDR,XTERM);
8297
8298 case KEY_gethostent:
8299 FUN0(OP_GHOSTENT);
8300
8301 case KEY_getnetbyname:
8302 UNI(OP_GNBYNAME);
8303
8304 case KEY_getnetbyaddr:
8305 LOP(OP_GNBYADDR,XTERM);
8306
8307 case KEY_getnetent:
8308 FUN0(OP_GNETENT);
8309
8310 case KEY_getservbyname:
8311 LOP(OP_GSBYNAME,XTERM);
8312
8313 case KEY_getservbyport:
8314 LOP(OP_GSBYPORT,XTERM);
8315
8316 case KEY_getservent:
8317 FUN0(OP_GSERVENT);
8318
8319 case KEY_getsockname:
8320 UNI(OP_GETSOCKNAME);
8321
8322 case KEY_getsockopt:
8323 LOP(OP_GSOCKOPT,XTERM);
8324
8325 case KEY_getgrent:
8326 FUN0(OP_GGRENT);
8327
8328 case KEY_getgrnam:
8329 UNI(OP_GGRNAM);
8330
8331 case KEY_getgrgid:
8332 UNI(OP_GGRGID);
8333
8334 case KEY_getlogin:
8335 FUN0(OP_GETLOGIN);
8336
8337 case KEY_given:
8338 pl_yylval.ival = CopLINE(PL_curcop);
8339 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__SMARTMATCH),
8340 "given is deprecated");
8341 OPERATOR(KW_GIVEN);
8342
8343 case KEY_glob:
8344 LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
8345
8346 case KEY_hex:
8347 UNI(OP_HEX);
8348
8349 case KEY_if:
8350 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8351 return REPORT(0);
8352 pl_yylval.ival = CopLINE(PL_curcop);
8353 OPERATOR(KW_IF);
8354
8355 case KEY_index:
8356 LOP(OP_INDEX,XTERM);
8357
8358 case KEY_int:
8359 UNI(OP_INT);
8360
8361 case KEY_ioctl:
8362 LOP(OP_IOCTL,XTERM);
8363
8364 case KEY_isa:
8365 NCRop(OP_ISA);
8366
8367 case KEY_join:
8368 LOP(OP_JOIN,XTERM);
8369
8370 case KEY_keys:
8371 UNI(OP_KEYS);
8372
8373 case KEY_kill:
8374 LOP(OP_KILL,XTERM);
8375
8376 case KEY_last:
8377 LOOPX(OP_LAST);
8378
8379 case KEY_lc:
8380 UNI(OP_LC);
8381
8382 case KEY_lcfirst:
8383 UNI(OP_LCFIRST);
8384
8385 case KEY_local:
8386 OPERATOR(KW_LOCAL);
8387
8388 case KEY_length:
8389 UNI(OP_LENGTH);
8390
8391 case KEY_lt:
8392 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8393 return REPORT(0);
8394 ChRop(OP_SLT);
8395
8396 case KEY_le:
8397 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8398 return REPORT(0);
8399 ChRop(OP_SLE);
8400
8401 case KEY_localtime:
8402 UNI(OP_LOCALTIME);
8403
8404 case KEY_log:
8405 UNI(OP_LOG);
8406
8407 case KEY_link:
8408 LOP(OP_LINK,XTERM);
8409
8410 case KEY_listen:
8411 LOP(OP_LISTEN,XTERM);
8412
8413 case KEY_lock:
8414 UNI(OP_LOCK);
8415
8416 case KEY_lstat:
8417 UNI(OP_LSTAT);
8418
8419 case KEY_m:
8420 s = scan_pat(s,OP_MATCH);
8421 TERM(sublex_start());
8422
8423 case KEY_map:
8424 LOP(OP_MAPSTART, XREF);
8425
8426 case KEY_mkdir:
8427 LOP(OP_MKDIR,XTERM);
8428
8429 case KEY_msgctl:
8430 LOP(OP_MSGCTL,XTERM);
8431
8432 case KEY_msgget:
8433 LOP(OP_MSGGET,XTERM);
8434
8435 case KEY_msgrcv:
8436 LOP(OP_MSGRCV,XTERM);
8437
8438 case KEY_msgsnd:
8439 LOP(OP_MSGSND,XTERM);
8440
8441 case KEY_our:
8442 case KEY_my:
8443 case KEY_state:
8444 return yyl_my(aTHX_ s, key);
8445
8446 case KEY_next:
8447 LOOPX(OP_NEXT);
8448
8449 case KEY_ne:
8450 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8451 return REPORT(0);
8452 ChEop(OP_SNE);
8453
8454 case KEY_no:
8455 s = tokenize_use(0, s);
8456 TOKEN(KW_USE_or_NO);
8457
8458 case KEY_not:
8459 if (*s == '(' || (s = skipspace(s), *s == '('))
8460 FUN1(OP_NOT);
8461 else {
8462 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8463 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8464 OPERATOR(NOTOP);
8465 }
8466
8467 case KEY_open:
8468 s = skipspace(s);
8469 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8470 const char *t;
8471 char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8472 for (t=d; isSPACE(*t);)
8473 t++;
8474 if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8475 /* [perl #16184] */
8476 && !(t[0] == '=' && t[1] == '>')
8477 && !(t[0] == ':' && t[1] == ':')
8478 && !keyword(s, d-s, 0)
8479 ) {
8480 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8481 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8482 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8483 }
8484 }
8485 LOP(OP_OPEN,XTERM);
8486
8487 case KEY_or:
8488 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8489 return REPORT(0);
8490 pl_yylval.ival = OP_OR;
8491 OPERATOR(OROP);
8492
8493 case KEY_ord:
8494 UNI(OP_ORD);
8495
8496 case KEY_oct:
8497 UNI(OP_OCT);
8498
8499 case KEY_opendir:
8500 LOP(OP_OPEN_DIR,XTERM);
8501
8502 case KEY_print:
8503 checkcomma(s,PL_tokenbuf,"filehandle");
8504 LOP(OP_PRINT,XREF);
8505
8506 case KEY_printf:
8507 checkcomma(s,PL_tokenbuf,"filehandle");
8508 LOP(OP_PRTF,XREF);
8509
8510 case KEY_prototype:
8511 UNI(OP_PROTOTYPE);
8512
8513 case KEY_push:
8514 LOP(OP_PUSH,XTERM);
8515
8516 case KEY_pop:
8517 UNIDOR(OP_POP);
8518
8519 case KEY_pos:
8520 UNIDOR(OP_POS);
8521
8522 case KEY_pack:
8523 LOP(OP_PACK,XTERM);
8524
8525 case KEY_package:
8526 s = force_word(s,BAREWORD,FALSE,TRUE);
8527 s = skipspace(s);
8528 s = force_strict_version(s);
8529 PREBLOCK(KW_PACKAGE);
8530
8531 case KEY_pipe:
8532 LOP(OP_PIPE_OP,XTERM);
8533
8534 case KEY_q:
8535 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8536 if (!s)
8537 missingterm(NULL, 0);
8538 COPLINE_SET_FROM_MULTI_END;
8539 pl_yylval.ival = OP_CONST;
8540 TERM(sublex_start());
8541
8542 case KEY_quotemeta:
8543 UNI(OP_QUOTEMETA);
8544
8545 case KEY_qw:
8546 return yyl_qw(aTHX_ s, len);
8547
8548 case KEY_qq:
8549 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8550 if (!s)
8551 missingterm(NULL, 0);
8552 pl_yylval.ival = OP_STRINGIFY;
8553 if (SvIVX(PL_lex_stuff) == '\'')
8554 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8555 TERM(sublex_start());
8556
8557 case KEY_qr:
8558 s = scan_pat(s,OP_QR);
8559 TERM(sublex_start());
8560
8561 case KEY_qx:
8562 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8563 if (!s)
8564 missingterm(NULL, 0);
8565 pl_yylval.ival = OP_BACKTICK;
8566 TERM(sublex_start());
8567
8568 case KEY_return:
8569 OLDLOP(OP_RETURN);
8570
8571 case KEY_require:
8572 return yyl_require(aTHX_ s, orig_keyword);
8573
8574 case KEY_reset:
8575 UNI(OP_RESET);
8576
8577 case KEY_redo:
8578 LOOPX(OP_REDO);
8579
8580 case KEY_rename:
8581 LOP(OP_RENAME,XTERM);
8582
8583 case KEY_rand:
8584 UNI(OP_RAND);
8585
8586 case KEY_rmdir:
8587 UNI(OP_RMDIR);
8588
8589 case KEY_rindex:
8590 LOP(OP_RINDEX,XTERM);
8591
8592 case KEY_read:
8593 LOP(OP_READ,XTERM);
8594
8595 case KEY_readdir:
8596 UNI(OP_READDIR);
8597
8598 case KEY_readline:
8599 UNIDOR(OP_READLINE);
8600
8601 case KEY_readpipe:
8602 UNIDOR(OP_BACKTICK);
8603
8604 case KEY_rewinddir:
8605 UNI(OP_REWINDDIR);
8606
8607 case KEY_recv:
8608 LOP(OP_RECV,XTERM);
8609
8610 case KEY_reverse:
8611 LOP(OP_REVERSE,XTERM);
8612
8613 case KEY_readlink:
8614 UNIDOR(OP_READLINK);
8615
8616 case KEY_ref:
8617 UNI(OP_REF);
8618
8619 case KEY_s:
8620 s = scan_subst(s);
8621 if (pl_yylval.opval)
8622 TERM(sublex_start());
8623 else
8624 TOKEN(1); /* force error */
8625
8626 case KEY_say:
8627 checkcomma(s,PL_tokenbuf,"filehandle");
8628 LOP(OP_SAY,XREF);
8629
8630 case KEY_chomp:
8631 UNI(OP_CHOMP);
8632
8633 case KEY_scalar:
8634 UNI(OP_SCALAR);
8635
8636 case KEY_select:
8637 LOP(OP_SELECT,XTERM);
8638
8639 case KEY_seek:
8640 LOP(OP_SEEK,XTERM);
8641
8642 case KEY_semctl:
8643 LOP(OP_SEMCTL,XTERM);
8644
8645 case KEY_semget:
8646 LOP(OP_SEMGET,XTERM);
8647
8648 case KEY_semop:
8649 LOP(OP_SEMOP,XTERM);
8650
8651 case KEY_send:
8652 LOP(OP_SEND,XTERM);
8653
8654 case KEY_setpgrp:
8655 LOP(OP_SETPGRP,XTERM);
8656
8657 case KEY_setpriority:
8658 LOP(OP_SETPRIORITY,XTERM);
8659
8660 case KEY_sethostent:
8661 UNI(OP_SHOSTENT);
8662
8663 case KEY_setnetent:
8664 UNI(OP_SNETENT);
8665
8666 case KEY_setservent:
8667 UNI(OP_SSERVENT);
8668
8669 case KEY_setprotoent:
8670 UNI(OP_SPROTOENT);
8671
8672 case KEY_setpwent:
8673 FUN0(OP_SPWENT);
8674
8675 case KEY_setgrent:
8676 FUN0(OP_SGRENT);
8677
8678 case KEY_seekdir:
8679 LOP(OP_SEEKDIR,XTERM);
8680
8681 case KEY_setsockopt:
8682 LOP(OP_SSOCKOPT,XTERM);
8683
8684 case KEY_shift:
8685 UNIDOR(OP_SHIFT);
8686
8687 case KEY_shmctl:
8688 LOP(OP_SHMCTL,XTERM);
8689
8690 case KEY_shmget:
8691 LOP(OP_SHMGET,XTERM);
8692
8693 case KEY_shmread:
8694 LOP(OP_SHMREAD,XTERM);
8695
8696 case KEY_shmwrite:
8697 LOP(OP_SHMWRITE,XTERM);
8698
8699 case KEY_shutdown:
8700 LOP(OP_SHUTDOWN,XTERM);
8701
8702 case KEY_sin:
8703 UNI(OP_SIN);
8704
8705 case KEY_sleep:
8706 UNI(OP_SLEEP);
8707
8708 case KEY_socket:
8709 LOP(OP_SOCKET,XTERM);
8710
8711 case KEY_socketpair:
8712 LOP(OP_SOCKPAIR,XTERM);
8713
8714 case KEY_sort:
8715 checkcomma(s,PL_tokenbuf,"subroutine name");
8716 s = skipspace(s);
8717 PL_expect = XTERM;
8718 s = force_word(s,BAREWORD,TRUE,TRUE);
8719 LOP(OP_SORT,XREF);
8720
8721 case KEY_split:
8722 LOP(OP_SPLIT,XTERM);
8723
8724 case KEY_sprintf:
8725 LOP(OP_SPRINTF,XTERM);
8726
8727 case KEY_splice:
8728 LOP(OP_SPLICE,XTERM);
8729
8730 case KEY_sqrt:
8731 UNI(OP_SQRT);
8732
8733 case KEY_srand:
8734 UNI(OP_SRAND);
8735
8736 case KEY_stat:
8737 UNI(OP_STAT);
8738
8739 case KEY_study:
8740 UNI(OP_STUDY);
8741
8742 case KEY_substr:
8743 LOP(OP_SUBSTR,XTERM);
8744
8745 case KEY_method:
8746 /* For now we just treat 'method' identical to 'sub' plus a warning */
8747 Perl_ck_warner_d(aTHX_
8748 packWARN(WARN_EXPERIMENTAL__CLASS), "method is experimental");
8749 return yyl_sub(aTHX_ s, KEY_method);
8750
8751 case KEY_format:
8752 case KEY_sub:
8753 return yyl_sub(aTHX_ s, key);
8754
8755 case KEY_system:
8756 LOP(OP_SYSTEM,XREF);
8757
8758 case KEY_symlink:
8759 LOP(OP_SYMLINK,XTERM);
8760
8761 case KEY_syscall:
8762 LOP(OP_SYSCALL,XTERM);
8763
8764 case KEY_sysopen:
8765 LOP(OP_SYSOPEN,XTERM);
8766
8767 case KEY_sysseek:
8768 LOP(OP_SYSSEEK,XTERM);
8769
8770 case KEY_sysread:
8771 LOP(OP_SYSREAD,XTERM);
8772
8773 case KEY_syswrite:
8774 LOP(OP_SYSWRITE,XTERM);
8775
8776 case KEY_tr:
8777 case KEY_y:
8778 s = scan_trans(s);
8779 TERM(sublex_start());
8780
8781 case KEY_tell:
8782 UNI(OP_TELL);
8783
8784 case KEY_telldir:
8785 UNI(OP_TELLDIR);
8786
8787 case KEY_tie:
8788 LOP(OP_TIE,XTERM);
8789
8790 case KEY_tied:
8791 UNI(OP_TIED);
8792
8793 case KEY_time:
8794 FUN0(OP_TIME);
8795
8796 case KEY_times:
8797 FUN0(OP_TMS);
8798
8799 case KEY_truncate:
8800 LOP(OP_TRUNCATE,XTERM);
8801
8802 case KEY_try:
8803 pl_yylval.ival = CopLINE(PL_curcop);
8804 PREBLOCK(KW_TRY);
8805
8806 case KEY_uc:
8807 UNI(OP_UC);
8808
8809 case KEY_ucfirst:
8810 UNI(OP_UCFIRST);
8811
8812 case KEY_untie:
8813 UNI(OP_UNTIE);
8814
8815 case KEY_until:
8816 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8817 return REPORT(0);
8818 pl_yylval.ival = CopLINE(PL_curcop);
8819 OPERATOR(KW_UNTIL);
8820
8821 case KEY_unless:
8822 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8823 return REPORT(0);
8824 pl_yylval.ival = CopLINE(PL_curcop);
8825 OPERATOR(KW_UNLESS);
8826
8827 case KEY_unlink:
8828 LOP(OP_UNLINK,XTERM);
8829
8830 case KEY_undef:
8831 UNIDOR(OP_UNDEF);
8832
8833 case KEY_unpack:
8834 LOP(OP_UNPACK,XTERM);
8835
8836 case KEY_utime:
8837 LOP(OP_UTIME,XTERM);
8838
8839 case KEY_umask:
8840 UNIDOR(OP_UMASK);
8841
8842 case KEY_unshift:
8843 LOP(OP_UNSHIFT,XTERM);
8844
8845 case KEY_use:
8846 s = tokenize_use(1, s);
8847 TOKEN(KW_USE_or_NO);
8848
8849 case KEY_values:
8850 UNI(OP_VALUES);
8851
8852 case KEY_vec:
8853 LOP(OP_VEC,XTERM);
8854
8855 case KEY_when:
8856 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8857 return REPORT(0);
8858 pl_yylval.ival = CopLINE(PL_curcop);
8859 Perl_ck_warner_d(aTHX_
8860 packWARN(WARN_DEPRECATED__SMARTMATCH),
8861 "when is deprecated");
8862 OPERATOR(KW_WHEN);
8863
8864 case KEY_while:
8865 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8866 return REPORT(0);
8867 pl_yylval.ival = CopLINE(PL_curcop);
8868 OPERATOR(KW_WHILE);
8869
8870 case KEY_warn:
8871 PL_hints |= HINT_BLOCK_SCOPE;
8872 LOP(OP_WARN,XTERM);
8873
8874 case KEY_wait:
8875 FUN0(OP_WAIT);
8876
8877 case KEY_waitpid:
8878 LOP(OP_WAITPID,XTERM);
8879
8880 case KEY_wantarray:
8881 FUN0(OP_WANTARRAY);
8882
8883 case KEY_write:
8884 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8885 * we use the same number on EBCDIC */
8886 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8887 UNI(OP_ENTERWRITE);
8888
8889 case KEY_x:
8890 if (PL_expect == XOPERATOR) {
8891 if (*s == '=' && !PL_lex_allbrackets
8892 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8893 {
8894 return REPORT(0);
8895 }
8896 Mop(OP_REPEAT);
8897 }
8898 check_uni();
8899 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8900
8901 case KEY_xor:
8902 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8903 return REPORT(0);
8904 pl_yylval.ival = OP_XOR;
8905 OPERATOR(OROP);
8906 }
8907 }
8908
8909 static int
yyl_key_core(pTHX_ char * s,STRLEN len,struct code c)8910 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8911 {
8912 I32 key = 0;
8913 I32 orig_keyword = 0;
8914 STRLEN olen = len;
8915 char *d = s;
8916 s += 2;
8917 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8918 if ((*s == ':' && s[1] == ':')
8919 || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8920 {
8921 Copy(PL_bufptr, PL_tokenbuf, olen, char);
8922 return yyl_just_a_word(aTHX_ d, olen, 0, c);
8923 }
8924 if (!key)
8925 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8926 UTF8fARG(UTF, len, PL_tokenbuf));
8927 if (key < 0)
8928 key = -key;
8929 else if (key == KEY_require || key == KEY_do
8930 || key == KEY_glob)
8931 /* that's a way to remember we saw "CORE::" */
8932 orig_keyword = key;
8933
8934 /* Known to be a reserved word at this point */
8935 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8936 }
8937
8938 struct Perl_custom_infix_result {
8939 struct Perl_custom_infix *def;
8940 SV *parsedata;
8941 };
8942
tokentype_for_plugop(struct Perl_custom_infix * def)8943 static enum yytokentype tokentype_for_plugop(struct Perl_custom_infix *def)
8944 {
8945 enum Perl_custom_infix_precedence prec = def->prec;
8946 if(prec <= INFIX_PREC_LOW)
8947 return PLUGIN_LOW_OP;
8948 if(prec <= INFIX_PREC_LOGICAL_OR_LOW)
8949 return PLUGIN_LOGICAL_OR_LOW_OP;
8950 if(prec <= INFIX_PREC_LOGICAL_AND_LOW)
8951 return PLUGIN_LOGICAL_AND_LOW_OP;
8952 if(prec <= INFIX_PREC_ASSIGN)
8953 return PLUGIN_ASSIGN_OP;
8954 if(prec <= INFIX_PREC_LOGICAL_OR)
8955 return PLUGIN_LOGICAL_OR_OP;
8956 if(prec <= INFIX_PREC_LOGICAL_AND)
8957 return PLUGIN_LOGICAL_AND_OP;
8958 if(prec <= INFIX_PREC_REL)
8959 return PLUGIN_REL_OP;
8960 if(prec <= INFIX_PREC_ADD)
8961 return PLUGIN_ADD_OP;
8962 if(prec <= INFIX_PREC_MUL)
8963 return PLUGIN_MUL_OP;
8964 if(prec <= INFIX_PREC_POW)
8965 return PLUGIN_POW_OP;
8966 return PLUGIN_HIGH_OP;
8967 }
8968
8969 OP *
Perl_build_infix_plugin(pTHX_ OP * lhs,OP * rhs,void * tokendata)8970 Perl_build_infix_plugin(pTHX_ OP *lhs, OP *rhs, void *tokendata)
8971 {
8972 PERL_ARGS_ASSERT_BUILD_INFIX_PLUGIN;
8973
8974 struct Perl_custom_infix_result *result = (struct Perl_custom_infix_result *)tokendata;
8975 SAVEFREEPV(result);
8976 if(result->parsedata)
8977 SAVEFREESV(result->parsedata);
8978
8979 return (*result->def->build_op)(aTHX_
8980 &result->parsedata, lhs, rhs, result->def);
8981 }
8982
8983 static int
yyl_keylookup(pTHX_ char * s,GV * gv)8984 yyl_keylookup(pTHX_ char *s, GV *gv)
8985 {
8986 STRLEN len;
8987 bool anydelim;
8988 I32 key;
8989 struct code c = no_code;
8990 I32 orig_keyword = 0;
8991 char *d;
8992
8993 c.gv = gv;
8994
8995 PL_bufptr = s;
8996 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8997
8998 /* Some keywords can be followed by any delimiter, including ':' */
8999 anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
9000
9001 /* x::* is just a word, unless x is "CORE" */
9002 if (!anydelim && *s == ':' && s[1] == ':') {
9003 if (memEQs(PL_tokenbuf, len, "CORE"))
9004 return yyl_key_core(aTHX_ s, len, c);
9005 return yyl_just_a_word(aTHX_ s, len, 0, c);
9006 }
9007
9008 d = s;
9009 while (d < PL_bufend && isSPACE(*d))
9010 d++; /* no comments skipped here, or s### is misparsed */
9011
9012 /* Is this a word before a => operator? */
9013 if (*d == '=' && d[1] == '>') {
9014 return yyl_fatcomma(aTHX_ s, len);
9015 }
9016
9017 /* Check for plugged-in keyword */
9018 {
9019 OP *o;
9020 int result;
9021 char *saved_bufptr = PL_bufptr;
9022 PL_bufptr = s;
9023 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
9024 s = PL_bufptr;
9025 if (result == KEYWORD_PLUGIN_DECLINE) {
9026 /* not a plugged-in keyword */
9027 PL_bufptr = saved_bufptr;
9028 } else if (result == KEYWORD_PLUGIN_STMT) {
9029 pl_yylval.opval = o;
9030 CLINE;
9031 if (!PL_nexttoke) PL_expect = XSTATE;
9032 return REPORT(PLUGSTMT);
9033 } else if (result == KEYWORD_PLUGIN_EXPR) {
9034 pl_yylval.opval = o;
9035 CLINE;
9036 if (!PL_nexttoke) PL_expect = XOPERATOR;
9037 return REPORT(PLUGEXPR);
9038 } else {
9039 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
9040 }
9041 }
9042
9043 /* Check for plugged-in named operator */
9044 if(PLUGINFIX_IS_ENABLED) {
9045 struct Perl_custom_infix *def;
9046 STRLEN result;
9047 result = PL_infix_plugin(aTHX_ PL_tokenbuf, len, &def);
9048 if(result) {
9049 if(result != len)
9050 Perl_croak(aTHX_ "Bad infix plugin result (%zd) - did not consume entire identifier <%s>\n",
9051 result, PL_tokenbuf);
9052 PL_bufptr = s = d;
9053 struct Perl_custom_infix_result *result;
9054 Newx(result, 1, struct Perl_custom_infix_result);
9055 result->def = def;
9056 result->parsedata = NULL;
9057 if(def->parse) {
9058 (*def->parse)(aTHX_ &result->parsedata, def);
9059 s = PL_bufptr; /* restore local s variable */
9060 }
9061 pl_yylval.pval = result;
9062 CLINE;
9063 OPERATOR(tokentype_for_plugop(def));
9064 }
9065 }
9066
9067 /* Is this a label? */
9068 if (!anydelim && PL_expect == XSTATE
9069 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
9070 s = d + 1;
9071 pl_yylval.opval =
9072 newSVOP(OP_CONST, 0,
9073 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
9074 CLINE;
9075 TOKEN(LABEL);
9076 }
9077
9078 /* Check for lexical sub */
9079 if (PL_expect != XOPERATOR) {
9080 char tmpbuf[sizeof PL_tokenbuf + 1];
9081 *tmpbuf = '&';
9082 Copy(PL_tokenbuf, tmpbuf+1, len, char);
9083 c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
9084 if (c.off != NOT_IN_PAD) {
9085 assert(c.off); /* we assume this is boolean-true below */
9086 if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
9087 HV * const stash = PAD_COMPNAME_OURSTASH(c.off);
9088 HEK * const stashname = HvNAME_HEK(stash);
9089 c.sv = newSVhek(stashname);
9090 sv_catpvs(c.sv, "::");
9091 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
9092 (UTF ? SV_CATUTF8 : SV_CATBYTES));
9093 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
9094 SVt_PVCV);
9095 c.off = 0;
9096 if (!c.gv) {
9097 ASSUME(c.sv && SvREFCNT(c.sv) == 1);
9098 SvREFCNT_dec(c.sv);
9099 c.sv = NULL;
9100 return yyl_just_a_word(aTHX_ s, len, 0, c);
9101 }
9102 }
9103 else {
9104 c.rv2cv_op = newOP(OP_PADANY, 0);
9105 c.rv2cv_op->op_targ = c.off;
9106 c.cv = find_lexical_cv(c.off);
9107 }
9108 c.lex = TRUE;
9109 return yyl_just_a_word(aTHX_ s, len, 0, c);
9110 }
9111 c.off = 0;
9112 }
9113
9114 /* Check for built-in keyword */
9115 key = keyword(PL_tokenbuf, len, 0);
9116
9117 if (key < 0)
9118 key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
9119
9120 if (key && key != KEY___DATA__ && key != KEY___END__
9121 && (!anydelim || *s != '#')) {
9122 /* no override, and not s### either; skipspace is safe here
9123 * check for => on following line */
9124 bool arrow;
9125 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
9126 STRLEN soff = s - SvPVX(PL_linestr);
9127 s = peekspace(s);
9128 arrow = *s == '=' && s[1] == '>';
9129 PL_bufptr = SvPVX(PL_linestr) + bufoff;
9130 s = SvPVX(PL_linestr) + soff;
9131 if (arrow)
9132 return yyl_fatcomma(aTHX_ s, len);
9133 }
9134
9135 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
9136 }
9137
9138 static int
yyl_try(pTHX_ char * s)9139 yyl_try(pTHX_ char *s)
9140 {
9141 char *d;
9142 GV *gv = NULL;
9143 int tok;
9144
9145 retry:
9146 /* Check for plugged-in symbolic operator */
9147 if(PLUGINFIX_IS_ENABLED && isPLUGINFIX_FIRST(*s)) {
9148 struct Perl_custom_infix *def;
9149 char *s_end = s, *d = PL_tokenbuf;
9150 STRLEN len;
9151
9152 /* Copy the longest sequence of isPLUGINFIX() chars into PL_tokenbuf */
9153 while(s_end < PL_bufend && d < PL_tokenbuf+sizeof(PL_tokenbuf)-1 && isPLUGINFIX(*s_end))
9154 *d++ = *s_end++;
9155 *d = '\0';
9156
9157 if((len = (*PL_infix_plugin)(aTHX_ PL_tokenbuf, s_end - s, &def))) {
9158 s += len;
9159 struct Perl_custom_infix_result *result;
9160 Newx(result, 1, struct Perl_custom_infix_result);
9161 result->def = def;
9162 result->parsedata = NULL;
9163 if(def->parse) {
9164 PL_bufptr = s;
9165 (*def->parse)(aTHX_ &result->parsedata, def);
9166 s = PL_bufptr; /* restore local s variable */
9167 }
9168 pl_yylval.pval = result;
9169 CLINE;
9170 OPERATOR(tokentype_for_plugop(def));
9171 }
9172 }
9173
9174 switch (*s) {
9175 default:
9176 if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
9177 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9178 return tok;
9179 goto retry_bufptr;
9180 }
9181 yyl_croak_unrecognised(aTHX_ s);
9182
9183 case 4:
9184 case 26:
9185 /* emulate EOF on ^D or ^Z */
9186 if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
9187 return tok;
9188 retry_bufptr:
9189 s = PL_bufptr;
9190 goto retry;
9191
9192 case 0:
9193 if ((!PL_rsfp || PL_lex_inwhat)
9194 && (!PL_parser->filtered || s+1 < PL_bufend)) {
9195 PL_last_uni = 0;
9196 PL_last_lop = 0;
9197 if (PL_lex_brackets
9198 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
9199 {
9200 yyerror((const char *)
9201 (PL_lex_formbrack
9202 ? "Format not terminated"
9203 : "Missing right curly or square bracket"));
9204 }
9205 DEBUG_T({
9206 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
9207 });
9208 TOKEN(0);
9209 }
9210 if (s++ < PL_bufend)
9211 goto retry; /* ignore stray nulls */
9212 PL_last_uni = 0;
9213 PL_last_lop = 0;
9214 if (!PL_in_eval && !PL_preambled) {
9215 PL_preambled = TRUE;
9216 if (PL_perldb) {
9217 /* Generate a string of Perl code to load the debugger.
9218 * If PERL5DB is set, it will return the contents of that,
9219 * otherwise a compile-time require of perl5db.pl. */
9220
9221 const char * const pdb = PerlEnv_getenv("PERL5DB");
9222
9223 if (pdb) {
9224 sv_setpv(PL_linestr, pdb);
9225 sv_catpvs(PL_linestr,";");
9226 } else {
9227 SETERRNO(0,SS_NORMAL);
9228 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
9229 }
9230 PL_parser->preambling = CopLINE(PL_curcop);
9231 } else
9232 SvPVCLEAR(PL_linestr);
9233 if (PL_preambleav) {
9234 SV **svp = AvARRAY(PL_preambleav);
9235 SV **const end = svp + AvFILLp(PL_preambleav);
9236 while(svp <= end) {
9237 sv_catsv(PL_linestr, *svp);
9238 ++svp;
9239 sv_catpvs(PL_linestr, ";");
9240 }
9241 SvREFCNT_dec(MUTABLE_SV(PL_preambleav));
9242 PL_preambleav = NULL;
9243 }
9244 if (PL_minus_E)
9245 sv_catpvs(PL_linestr,
9246 "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "'; "
9247 "use builtin ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
9248 if (PL_minus_n || PL_minus_p) {
9249 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
9250 if (PL_minus_l)
9251 sv_catpvs(PL_linestr,"chomp;");
9252 if (PL_minus_a) {
9253 if (PL_minus_F) {
9254 if ( ( *PL_splitstr == '/'
9255 || *PL_splitstr == '\''
9256 || *PL_splitstr == '"')
9257 && strchr(PL_splitstr + 1, *PL_splitstr))
9258 {
9259 /* strchr is ok, because -F pattern can't contain
9260 * embedded NULs */
9261 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
9262 }
9263 else {
9264 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
9265 bytes can be used as quoting characters. :-) */
9266 const char *splits = PL_splitstr;
9267 sv_catpvs(PL_linestr, "our @F=split(q\0");
9268 do {
9269 /* Need to \ \s */
9270 if (*splits == '\\')
9271 sv_catpvn(PL_linestr, splits, 1);
9272 sv_catpvn(PL_linestr, splits, 1);
9273 } while (*splits++);
9274 /* This loop will embed the trailing NUL of
9275 PL_linestr as the last thing it does before
9276 terminating. */
9277 sv_catpvs(PL_linestr, ");");
9278 }
9279 }
9280 else
9281 sv_catpvs(PL_linestr,"our @F=split(' ');");
9282 }
9283 }
9284 sv_catpvs(PL_linestr, "\n");
9285 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
9286 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9287 PL_last_lop = PL_last_uni = NULL;
9288 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
9289 update_debugger_info(PL_linestr, NULL, 0);
9290 goto retry;
9291 }
9292 if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
9293 return tok;
9294 goto retry_bufptr;
9295
9296 case '\r':
9297 #ifdef PERL_STRICT_CR
9298 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
9299 Perl_croak(aTHX_
9300 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
9301 #endif
9302 case ' ': case '\t': case '\f': case '\v':
9303 s++;
9304 goto retry;
9305
9306 case '#':
9307 case '\n': {
9308 const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
9309 if (needs_semicolon)
9310 TOKEN(PERLY_SEMICOLON);
9311 else
9312 goto retry;
9313 }
9314
9315 case '-':
9316 return yyl_hyphen(aTHX_ s);
9317
9318 case '+':
9319 return yyl_plus(aTHX_ s);
9320
9321 case '*':
9322 return yyl_star(aTHX_ s);
9323
9324 case '%':
9325 return yyl_percent(aTHX_ s);
9326
9327 case '^':
9328 return yyl_caret(aTHX_ s);
9329
9330 case '[':
9331 return yyl_leftsquare(aTHX_ s);
9332
9333 case '~':
9334 return yyl_tilde(aTHX_ s);
9335
9336 case ',':
9337 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9338 TOKEN(0);
9339 s++;
9340 OPERATOR(PERLY_COMMA);
9341 case ':':
9342 if (s[1] == ':')
9343 return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
9344 return yyl_colon(aTHX_ s + 1);
9345
9346 case '(':
9347 return yyl_leftparen(aTHX_ s + 1);
9348
9349 case ';':
9350 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
9351 TOKEN(0);
9352 CLINE;
9353 s++;
9354 PL_expect = XSTATE;
9355 TOKEN(PERLY_SEMICOLON);
9356
9357 case ')':
9358 return yyl_rightparen(aTHX_ s);
9359
9360 case ']':
9361 return yyl_rightsquare(aTHX_ s);
9362
9363 case '{':
9364 return yyl_leftcurly(aTHX_ s + 1, 0);
9365
9366 case '}':
9367 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
9368 TOKEN(0);
9369 return yyl_rightcurly(aTHX_ s, 0);
9370
9371 case '&':
9372 return yyl_ampersand(aTHX_ s);
9373
9374 case '|':
9375 return yyl_verticalbar(aTHX_ s);
9376
9377 case '=':
9378 if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
9379 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
9380 {
9381 s = vcs_conflict_marker(s + 7);
9382 goto retry;
9383 }
9384
9385 s++;
9386 {
9387 const char tmp = *s++;
9388 if (tmp == '=') {
9389 if (!PL_lex_allbrackets
9390 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
9391 {
9392 s -= 2;
9393 TOKEN(0);
9394 }
9395 ChEop(OP_EQ);
9396 }
9397 if (tmp == '>') {
9398 if (!PL_lex_allbrackets
9399 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9400 {
9401 s -= 2;
9402 TOKEN(0);
9403 }
9404 OPERATOR(PERLY_COMMA);
9405 }
9406 if (tmp == '~')
9407 PMop(OP_MATCH);
9408 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
9409 && memCHRs("+-*/%.^&|<",tmp))
9410 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9411 "Reversed %c= operator",(int)tmp);
9412 s--;
9413 if (PL_expect == XSTATE
9414 && isALPHA(tmp)
9415 && (s == PL_linestart+1 || s[-2] == '\n') )
9416 {
9417 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
9418 || PL_lex_state != LEX_NORMAL)
9419 {
9420 d = PL_bufend;
9421 while (s < d) {
9422 if (*s++ == '\n') {
9423 incline(s, PL_bufend);
9424 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
9425 {
9426 s = (char *) memchr(s,'\n', d - s);
9427 if (s)
9428 s++;
9429 else
9430 s = d;
9431 incline(s, PL_bufend);
9432 goto retry;
9433 }
9434 }
9435 }
9436 goto retry;
9437 }
9438 s = PL_bufend;
9439 PL_parser->in_pod = 1;
9440 goto retry;
9441 }
9442 }
9443 if (PL_expect == XBLOCK) {
9444 const char *t = s;
9445 #ifdef PERL_STRICT_CR
9446 while (SPACE_OR_TAB(*t))
9447 #else
9448 while (SPACE_OR_TAB(*t) || *t == '\r')
9449 #endif
9450 t++;
9451 if (*t == '\n' || *t == '#') {
9452 ENTER_with_name("lex_format");
9453 SAVEI8(PL_parser->form_lex_state);
9454 SAVEI32(PL_lex_formbrack);
9455 PL_parser->form_lex_state = PL_lex_state;
9456 PL_lex_formbrack = PL_lex_brackets + 1;
9457 PL_parser->sub_error_count = PL_error_count;
9458 return yyl_leftcurly(aTHX_ s, 1);
9459 }
9460 }
9461 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
9462 s--;
9463 TOKEN(0);
9464 }
9465 pl_yylval.ival = 0;
9466 OPERATOR(ASSIGNOP);
9467
9468 case '!':
9469 return yyl_bang(aTHX_ s + 1);
9470
9471 case '<':
9472 if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
9473 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
9474 {
9475 s = vcs_conflict_marker(s + 7);
9476 goto retry;
9477 }
9478 return yyl_leftpointy(aTHX_ s);
9479
9480 case '>':
9481 if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
9482 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
9483 {
9484 s = vcs_conflict_marker(s + 7);
9485 goto retry;
9486 }
9487 return yyl_rightpointy(aTHX_ s + 1);
9488
9489 case '$':
9490 return yyl_dollar(aTHX_ s);
9491
9492 case '@':
9493 return yyl_snail(aTHX_ s);
9494
9495 case '/': /* may be division, defined-or, or pattern */
9496 return yyl_slash(aTHX_ s);
9497
9498 case '?': /* conditional */
9499 s++;
9500 if (!PL_lex_allbrackets
9501 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
9502 {
9503 s--;
9504 TOKEN(0);
9505 }
9506 PL_lex_allbrackets++;
9507 OPERATOR(PERLY_QUESTION_MARK);
9508
9509 case '.':
9510 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
9511 #ifdef PERL_STRICT_CR
9512 && s[1] == '\n'
9513 #else
9514 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
9515 #endif
9516 && (s == PL_linestart || s[-1] == '\n') )
9517 {
9518 PL_expect = XSTATE;
9519 /* formbrack==2 means dot seen where arguments expected */
9520 return yyl_rightcurly(aTHX_ s, 2);
9521 }
9522 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9523 s += 3;
9524 OPERATOR(YADAYADA);
9525 }
9526 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9527 char tmp = *s++;
9528 if (*s == tmp) {
9529 if (!PL_lex_allbrackets
9530 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9531 {
9532 s--;
9533 TOKEN(0);
9534 }
9535 s++;
9536 if (*s == tmp) {
9537 s++;
9538 pl_yylval.ival = OPf_SPECIAL;
9539 }
9540 else
9541 pl_yylval.ival = 0;
9542 OPERATOR(DOTDOT);
9543 }
9544 if (*s == '=' && !PL_lex_allbrackets
9545 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9546 {
9547 s--;
9548 TOKEN(0);
9549 }
9550 Aop(OP_CONCAT);
9551 }
9552 /* FALLTHROUGH */
9553 case '0': case '1': case '2': case '3': case '4':
9554 case '5': case '6': case '7': case '8': case '9':
9555 s = scan_num(s, &pl_yylval);
9556 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9557 if (PL_expect == XOPERATOR)
9558 no_op("Number",s);
9559 TERM(THING);
9560
9561 case '\'':
9562 return yyl_sglquote(aTHX_ s);
9563
9564 case '"':
9565 return yyl_dblquote(aTHX_ s);
9566
9567 case '`':
9568 return yyl_backtick(aTHX_ s);
9569
9570 case '\\':
9571 return yyl_backslash(aTHX_ s + 1);
9572
9573 case 'v':
9574 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9575 char *start = s + 2;
9576 while (isDIGIT(*start) || *start == '_')
9577 start++;
9578 if (*start == '.' && isDIGIT(start[1])) {
9579 s = scan_num(s, &pl_yylval);
9580 TERM(THING);
9581 }
9582 else if ((*start == ':' && start[1] == ':')
9583 || (PL_expect == XSTATE && *start == ':')) {
9584 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9585 return tok;
9586 goto retry_bufptr;
9587 }
9588 else if (PL_expect == XSTATE) {
9589 d = start;
9590 while (d < PL_bufend && isSPACE(*d)) d++;
9591 if (*d == ':') {
9592 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9593 return tok;
9594 goto retry_bufptr;
9595 }
9596 }
9597 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9598 if (!isALPHA(*start) && (PL_expect == XTERM
9599 || PL_expect == XREF || PL_expect == XSTATE
9600 || PL_expect == XTERMORDORDOR)) {
9601 GV *const gv = gv_fetchpvn_flags(s, start - s,
9602 UTF ? SVf_UTF8 : 0, SVt_PVCV);
9603 if (!gv) {
9604 s = scan_num(s, &pl_yylval);
9605 TERM(THING);
9606 }
9607 }
9608 }
9609 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9610 return tok;
9611 goto retry_bufptr;
9612
9613 case 'x':
9614 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9615 s++;
9616 Mop(OP_REPEAT);
9617 }
9618 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9619 return tok;
9620 goto retry_bufptr;
9621
9622 case '_':
9623 case 'a': case 'A':
9624 case 'b': case 'B':
9625 case 'c': case 'C':
9626 case 'd': case 'D':
9627 case 'e': case 'E':
9628 case 'f': case 'F':
9629 case 'g': case 'G':
9630 case 'h': case 'H':
9631 case 'i': case 'I':
9632 case 'j': case 'J':
9633 case 'k': case 'K':
9634 case 'l': case 'L':
9635 case 'm': case 'M':
9636 case 'n': case 'N':
9637 case 'o': case 'O':
9638 case 'p': case 'P':
9639 case 'q': case 'Q':
9640 case 'r': case 'R':
9641 case 's': case 'S':
9642 case 't': case 'T':
9643 case 'u': case 'U':
9644 case 'V':
9645 case 'w': case 'W':
9646 case 'X':
9647 case 'y': case 'Y':
9648 case 'z': case 'Z':
9649 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9650 return tok;
9651 goto retry_bufptr;
9652 }
9653 }
9654
9655
9656 /*
9657 yylex
9658
9659 Works out what to call the token just pulled out of the input
9660 stream. The yacc parser takes care of taking the ops we return and
9661 stitching them into a tree.
9662
9663 Returns:
9664 The type of the next token
9665
9666 Structure:
9667 Check if we have already built the token; if so, use it.
9668 Switch based on the current state:
9669 - if we have a case modifier in a string, deal with that
9670 - handle other cases of interpolation inside a string
9671 - scan the next line if we are inside a format
9672 In the normal state, switch on the next character:
9673 - default:
9674 if alphabetic, go to key lookup
9675 unrecognized character - croak
9676 - 0/4/26: handle end-of-line or EOF
9677 - cases for whitespace
9678 - \n and #: handle comments and line numbers
9679 - various operators, brackets and sigils
9680 - numbers
9681 - quotes
9682 - 'v': vstrings (or go to key lookup)
9683 - 'x' repetition operator (or go to key lookup)
9684 - other ASCII alphanumerics (key lookup begins here):
9685 word before => ?
9686 keyword plugin
9687 scan built-in keyword (but do nothing with it yet)
9688 check for statement label
9689 check for lexical subs
9690 return yyl_just_a_word if there is one
9691 see whether built-in keyword is overridden
9692 switch on keyword number:
9693 - default: return yyl_just_a_word:
9694 not a built-in keyword; handle bareword lookup
9695 disambiguate between method and sub call
9696 fall back to bareword
9697 - cases for built-in keywords
9698 */
9699
9700 int
Perl_yylex(pTHX)9701 Perl_yylex(pTHX)
9702 {
9703 char *s = PL_bufptr;
9704
9705 if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9706 const U8* first_bad_char_loc;
9707 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9708 PL_bufend - PL_bufptr,
9709 &first_bad_char_loc)))
9710 {
9711 _force_out_malformed_utf8_message(first_bad_char_loc,
9712 (U8 *) PL_bufend,
9713 0,
9714 1 /* 1 means die */ );
9715 NOT_REACHED; /* NOTREACHED */
9716 }
9717 PL_parser->recheck_utf8_validity = FALSE;
9718 }
9719 DEBUG_T( {
9720 SV* tmp = newSVpvs("");
9721 PerlIO_printf(Perl_debug_log, "### %" LINE_Tf ":LEX_%s/X%s %s\n",
9722 CopLINE(PL_curcop),
9723 lex_state_names[PL_lex_state],
9724 exp_name[PL_expect],
9725 pv_display(tmp, s, strlen(s), 0, 60));
9726 SvREFCNT_dec(tmp);
9727 } );
9728
9729 /* when we've already built the next token, just pull it out of the queue */
9730 if (PL_nexttoke) {
9731 PL_nexttoke--;
9732 pl_yylval = PL_nextval[PL_nexttoke];
9733 {
9734 I32 next_type;
9735 next_type = PL_nexttype[PL_nexttoke];
9736 if (next_type & (7<<24)) {
9737 if (next_type & (1<<24)) {
9738 if (PL_lex_brackets > 100)
9739 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9740 PL_lex_brackstack[PL_lex_brackets++] =
9741 (char) ((U8) (next_type >> 16));
9742 }
9743 if (next_type & (2<<24))
9744 PL_lex_allbrackets++;
9745 if (next_type & (4<<24))
9746 PL_lex_allbrackets--;
9747 next_type &= 0xffff;
9748 }
9749 return REPORT(next_type == 'p' ? pending_ident() : next_type);
9750 }
9751 }
9752
9753 switch (PL_lex_state) {
9754 case LEX_NORMAL:
9755 case LEX_INTERPNORMAL:
9756 break;
9757
9758 /* interpolated case modifiers like \L \U, including \Q and \E.
9759 when we get here, PL_bufptr is at the \
9760 */
9761 case LEX_INTERPCASEMOD:
9762 /* handle \E or end of string */
9763 return yyl_interpcasemod(aTHX_ s);
9764
9765 case LEX_INTERPPUSH:
9766 return REPORT(sublex_push());
9767
9768 case LEX_INTERPSTART:
9769 if (PL_bufptr == PL_bufend)
9770 return REPORT(sublex_done());
9771 DEBUG_T({
9772 if(*PL_bufptr != '(')
9773 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9774 });
9775 PL_expect = XTERM;
9776 /* for /@a/, we leave the joining for the regex engine to do
9777 * (unless we're within \Q etc) */
9778 PL_lex_dojoin = (*PL_bufptr == '@'
9779 && (!PL_lex_inpat || PL_lex_casemods));
9780 PL_lex_state = LEX_INTERPNORMAL;
9781 if (PL_lex_dojoin) {
9782 NEXTVAL_NEXTTOKE.ival = 0;
9783 force_next(PERLY_COMMA);
9784 force_ident("\"", PERLY_DOLLAR);
9785 NEXTVAL_NEXTTOKE.ival = 0;
9786 force_next(PERLY_DOLLAR);
9787 NEXTVAL_NEXTTOKE.ival = 0;
9788 force_next((2<<24)|PERLY_PAREN_OPEN);
9789 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
9790 force_next(FUNC);
9791 }
9792 /* Convert (?{...}) or (*{...}) and friends to 'do {...}' */
9793 if (PL_lex_inpat && *PL_bufptr == '(') {
9794 PL_parser->lex_shared->re_eval_start = PL_bufptr;
9795 PL_bufptr += 2;
9796 if (*PL_bufptr != '{')
9797 PL_bufptr++;
9798 PL_expect = XTERMBLOCK;
9799 force_next(KW_DO);
9800 }
9801
9802 if (PL_lex_starts++) {
9803 s = PL_bufptr;
9804 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9805 if (!PL_lex_casemods && PL_lex_inpat)
9806 TOKEN(PERLY_COMMA);
9807 else
9808 AopNOASSIGN(OP_CONCAT);
9809 }
9810 return yylex();
9811
9812 case LEX_INTERPENDMAYBE:
9813 if (intuit_more(PL_bufptr, PL_bufend)) {
9814 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
9815 break;
9816 }
9817 /* FALLTHROUGH */
9818
9819 case LEX_INTERPEND:
9820 if (PL_lex_dojoin) {
9821 const U8 dojoin_was = PL_lex_dojoin;
9822 PL_lex_dojoin = FALSE;
9823 PL_lex_state = LEX_INTERPCONCAT;
9824 PL_lex_allbrackets--;
9825 return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
9826 }
9827 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9828 && SvEVALED(PL_lex_repl))
9829 {
9830 if (PL_bufptr != PL_bufend)
9831 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9832 PL_lex_repl = NULL;
9833 }
9834 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
9835 re_eval_str. If the here-doc body's length equals the previous
9836 value of re_eval_start, re_eval_start will now be null. So
9837 check re_eval_str as well. */
9838 if (PL_parser->lex_shared->re_eval_start
9839 || PL_parser->lex_shared->re_eval_str) {
9840 SV *sv;
9841 if (*PL_bufptr != ')')
9842 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9843 PL_bufptr++;
9844 /* having compiled a (?{..}) expression, return the original
9845 * text too, as a const */
9846 if (PL_parser->lex_shared->re_eval_str) {
9847 sv = PL_parser->lex_shared->re_eval_str;
9848 PL_parser->lex_shared->re_eval_str = NULL;
9849 SvCUR_set(sv,
9850 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9851 SvPV_shrink_to_cur(sv);
9852 }
9853 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9854 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9855 NEXTVAL_NEXTTOKE.opval =
9856 newSVOP(OP_CONST, 0,
9857 sv);
9858 force_next(THING);
9859 PL_parser->lex_shared->re_eval_start = NULL;
9860 PL_expect = XTERM;
9861 return REPORT(PERLY_COMMA);
9862 }
9863
9864 /* FALLTHROUGH */
9865 case LEX_INTERPCONCAT:
9866 #ifdef DEBUGGING
9867 if (PL_lex_brackets)
9868 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9869 (long) PL_lex_brackets);
9870 #endif
9871 if (PL_bufptr == PL_bufend)
9872 return REPORT(sublex_done());
9873
9874 /* m'foo' still needs to be parsed for possible (?{...}) */
9875 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9876 SV *sv = newSVsv(PL_linestr);
9877 sv = tokeq(sv);
9878 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9879 s = PL_bufend;
9880 }
9881 else {
9882 int save_error_count = PL_error_count;
9883
9884 s = scan_const(PL_bufptr);
9885
9886 /* Set flag if this was a pattern and there were errors. op.c will
9887 * refuse to compile a pattern with this flag set. Otherwise, we
9888 * could get segfaults, etc. */
9889 if (PL_lex_inpat && PL_error_count > save_error_count) {
9890 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9891 }
9892 if (*s == '\\')
9893 PL_lex_state = LEX_INTERPCASEMOD;
9894 else
9895 PL_lex_state = LEX_INTERPSTART;
9896 }
9897
9898 if (s != PL_bufptr) {
9899 NEXTVAL_NEXTTOKE = pl_yylval;
9900 PL_expect = XTERM;
9901 force_next(THING);
9902 if (PL_lex_starts++) {
9903 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9904 if (!PL_lex_casemods && PL_lex_inpat)
9905 TOKEN(PERLY_COMMA);
9906 else
9907 AopNOASSIGN(OP_CONCAT);
9908 }
9909 else {
9910 PL_bufptr = s;
9911 return yylex();
9912 }
9913 }
9914
9915 return yylex();
9916 case LEX_FORMLINE:
9917 if (PL_parser->sub_error_count != PL_error_count) {
9918 /* There was an error parsing a formline, which tends to
9919 mess up the parser.
9920 Unlike interpolated sub-parsing, we can't treat any of
9921 these as recoverable, so no need to check sub_no_recover.
9922 */
9923 yyquit();
9924 }
9925 assert(PL_lex_formbrack);
9926 s = scan_formline(PL_bufptr);
9927 if (!PL_lex_formbrack)
9928 return yyl_rightcurly(aTHX_ s, 1);
9929 PL_bufptr = s;
9930 return yylex();
9931 }
9932
9933 /* We really do *not* want PL_linestr ever becoming a COW. */
9934 assert (!SvIsCOW(PL_linestr));
9935 s = PL_bufptr;
9936 PL_oldoldbufptr = PL_oldbufptr;
9937 PL_oldbufptr = s;
9938
9939 if (PL_in_my == KEY_sigvar) {
9940 PL_parser->saw_infix_sigil = 0;
9941 return yyl_sigvar(aTHX_ s);
9942 }
9943
9944 {
9945 /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9946 On its return, we then need to set it to indicate whether the token
9947 we just encountered was an infix operator that (if we hadn't been
9948 expecting an operator) have been a sigil.
9949 */
9950 bool expected_operator = (PL_expect == XOPERATOR);
9951 int ret = yyl_try(aTHX_ s);
9952 switch (pl_yylval.ival) {
9953 case OP_BIT_AND:
9954 case OP_MODULO:
9955 case OP_MULTIPLY:
9956 case OP_NBIT_AND:
9957 if (expected_operator) {
9958 PL_parser->saw_infix_sigil = 1;
9959 break;
9960 }
9961 /* FALLTHROUGH */
9962 default:
9963 PL_parser->saw_infix_sigil = 0;
9964 }
9965 return ret;
9966 }
9967 }
9968
9969
9970 /*
9971 S_pending_ident
9972
9973 Looks up an identifier in the pad or in a package
9974
9975 PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9976 rather than a plain pad var.
9977
9978 Returns:
9979 PRIVATEREF if this is a lexical name.
9980 BAREWORD if this belongs to a package.
9981
9982 Structure:
9983 if we're in a my declaration
9984 croak if they tried to say my($foo::bar)
9985 build the ops for a my() declaration
9986 if it's an access to a my() variable
9987 build ops for access to a my() variable
9988 if in a dq string, and they've said @foo and we can't find @foo
9989 warn
9990 build ops for a bareword
9991 */
9992
9993 static int
S_pending_ident(pTHX)9994 S_pending_ident(pTHX)
9995 {
9996 PADOFFSET tmp = 0;
9997 const char pit = (char)pl_yylval.ival;
9998 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9999 /* All routes through this function want to know if there is a colon. */
10000 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
10001
10002 DEBUG_T({ PerlIO_printf(Perl_debug_log,
10003 "### Pending identifier '%s'\n", PL_tokenbuf); });
10004 assert(tokenbuf_len >= 2);
10005
10006 /* if we're in a my(), we can't allow dynamics here.
10007 $foo'bar has already been turned into $foo::bar, so
10008 just check for colons.
10009
10010 if it's a legal name, the OP is a PADANY.
10011 */
10012 if (PL_in_my) {
10013 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
10014 if (has_colon)
10015 /* diag_listed_as: No package name allowed for variable %s
10016 in "our" */
10017 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
10018 "%s %s in \"our\"",
10019 *PL_tokenbuf=='&' ? "subroutine" : "variable",
10020 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
10021 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
10022 }
10023 else {
10024 OP *o;
10025 if (has_colon) {
10026 /* "my" variable %s can't be in a package */
10027 /* PL_no_myglob is constant */
10028 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
10029 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
10030 PL_in_my == KEY_my ? "my" :
10031 PL_in_my == KEY_field ? "field" : "state",
10032 *PL_tokenbuf == '&' ? "subroutine" : "variable",
10033 PL_tokenbuf),
10034 UTF ? SVf_UTF8 : 0);
10035 GCC_DIAG_RESTORE_STMT;
10036 }
10037
10038 if (PL_in_my == KEY_sigvar) {
10039 /* A signature 'padop' needs in addition, an op_first to
10040 * point to a child sigdefelem, and an extra field to hold
10041 * the signature index. We can achieve both by using an
10042 * UNOP_AUX and (ab)using the op_aux field to hold the
10043 * index. If we ever need more fields, use a real malloced
10044 * aux strut instead.
10045 */
10046 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
10047 INT2PTR(UNOP_AUX_item *,
10048 (PL_parser->sig_elems)));
10049 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
10050 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
10051 : OPpARGELEM_HV);
10052 }
10053 else
10054 o = newOP(OP_PADANY, 0);
10055 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
10056 UTF ? SVf_UTF8 : 0);
10057 if (PL_in_my == KEY_sigvar)
10058 PL_in_my = 0;
10059
10060 pl_yylval.opval = o;
10061 return PRIVATEREF;
10062 }
10063 }
10064
10065 /*
10066 build the ops for accesses to a my() variable.
10067 */
10068
10069 if (!has_colon) {
10070 if (!PL_in_my)
10071 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
10072 0);
10073 if (tmp != NOT_IN_PAD) {
10074 /* might be an "our" variable" */
10075 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10076 /* build ops for a bareword */
10077 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10078 HEK * const stashname = HvNAME_HEK(stash);
10079 SV * const sym = newSVhek(stashname);
10080 sv_catpvs(sym, "::");
10081 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
10082 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
10083 pl_yylval.opval->op_private = OPpCONST_ENTERED;
10084 if (pit != '&')
10085 gv_fetchsv(sym,
10086 GV_ADDMULTI,
10087 ((PL_tokenbuf[0] == '$') ? SVt_PV
10088 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
10089 : SVt_PVHV));
10090 return BAREWORD;
10091 }
10092
10093 pl_yylval.opval = newOP(OP_PADANY, 0);
10094 pl_yylval.opval->op_targ = tmp;
10095 return PRIVATEREF;
10096 }
10097 }
10098
10099 /*
10100 Whine if they've said @foo or @foo{key} in a doublequoted string,
10101 and @foo (or %foo) isn't a variable we can find in the symbol
10102 table.
10103 */
10104 if (ckWARN(WARN_AMBIGUOUS)
10105 && pit == '@'
10106 && PL_lex_state != LEX_NORMAL
10107 && !PL_lex_brackets)
10108 {
10109 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
10110 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
10111 SVt_PVAV);
10112 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
10113 )
10114 {
10115 /* Downgraded from fatal to warning 20000522 mjd */
10116 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10117 "Possible unintended interpolation of %" UTF8f
10118 " in string",
10119 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
10120 }
10121 }
10122
10123 /* build ops for a bareword */
10124 pl_yylval.opval = newSVOP(OP_CONST, 0,
10125 newSVpvn_flags(PL_tokenbuf + 1,
10126 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
10127 UTF ? SVf_UTF8 : 0 ));
10128 pl_yylval.opval->op_private = OPpCONST_ENTERED;
10129 if (pit != '&')
10130 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
10131 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
10132 | ( UTF ? SVf_UTF8 : 0 ),
10133 ((PL_tokenbuf[0] == '$') ? SVt_PV
10134 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
10135 : SVt_PVHV));
10136 return BAREWORD;
10137 }
10138
10139 STATIC void
S_checkcomma(pTHX_ const char * s,const char * name,const char * what)10140 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10141 {
10142 PERL_ARGS_ASSERT_CHECKCOMMA;
10143
10144 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10145 if (ckWARN(WARN_SYNTAX)) {
10146 int level = 1;
10147 const char *w;
10148 for (w = s+2; *w && level; w++) {
10149 if (*w == '(')
10150 ++level;
10151 else if (*w == ')')
10152 --level;
10153 }
10154 while (isSPACE(*w))
10155 ++w;
10156 /* the list of chars below is for end of statements or
10157 * block / parens, boolean operators (&&, ||, //) and branch
10158 * constructs (or, and, if, until, unless, while, err, for).
10159 * Not a very solid hack... */
10160 if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
10161 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10162 "%s (...) interpreted as function",name);
10163 }
10164 }
10165 while (s < PL_bufend && isSPACE(*s))
10166 s++;
10167 if (*s == '(')
10168 s++;
10169 while (s < PL_bufend && isSPACE(*s))
10170 s++;
10171 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
10172 const char * const w = s;
10173 s += UTF ? UTF8SKIP(s) : 1;
10174 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10175 s += UTF ? UTF8SKIP(s) : 1;
10176 while (s < PL_bufend && isSPACE(*s))
10177 s++;
10178 if (*s == ',') {
10179 GV* gv;
10180 if (keyword(w, s - w, 0))
10181 return;
10182
10183 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
10184 if (gv && GvCVu(gv))
10185 return;
10186 if (s - w <= 254) {
10187 PADOFFSET off;
10188 char tmpbuf[256];
10189 Copy(w, tmpbuf+1, s - w, char);
10190 *tmpbuf = '&';
10191 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
10192 if (off != NOT_IN_PAD) return;
10193 }
10194 Perl_croak(aTHX_ "No comma allowed after %s", what);
10195 }
10196 }
10197 }
10198
10199 /* S_new_constant(): do any overload::constant lookup.
10200
10201 Either returns sv, or mortalizes/frees sv and returns a new SV*.
10202 Best used as sv=new_constant(..., sv, ...).
10203 If s, pv are NULL, calls subroutine with one argument,
10204 and <type> is used with error messages only.
10205 <type> is assumed to be well formed UTF-8.
10206
10207 If error_msg is not NULL, *error_msg will be set to any error encountered.
10208 Otherwise yyerror() will be used to output it */
10209
10210 STATIC SV *
S_new_constant(pTHX_ const char * s,STRLEN len,const char * key,STRLEN keylen,SV * sv,SV * pv,const char * type,STRLEN typelen,const char ** error_msg)10211 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10212 SV *sv, SV *pv, const char *type, STRLEN typelen,
10213 const char ** error_msg)
10214 {
10215 dSP;
10216 HV * table = GvHV(PL_hintgv); /* ^H */
10217 SV *res;
10218 SV *errsv = NULL;
10219 SV **cvp;
10220 SV *cv, *typesv;
10221 const char *why1 = "", *why2 = "", *why3 = "";
10222 const char * optional_colon = ":"; /* Only some messages have a colon */
10223 char *msg;
10224
10225 PERL_ARGS_ASSERT_NEW_CONSTANT;
10226 /* We assume that this is true: */
10227 assert(type || s);
10228
10229 sv_2mortal(sv); /* Parent created it permanently */
10230
10231 if ( ! table
10232 || ! (PL_hints & HINT_LOCALIZE_HH))
10233 {
10234 why1 = "unknown";
10235 optional_colon = "";
10236 goto report;
10237 }
10238
10239 cvp = hv_fetch(table, key, keylen, FALSE);
10240 if (!cvp || !SvOK(*cvp)) {
10241 why1 = "$^H{";
10242 why2 = key;
10243 why3 = "} is not defined";
10244 goto report;
10245 }
10246
10247 cv = *cvp;
10248 if (!pv && s)
10249 pv = newSVpvn_flags(s, len, SVs_TEMP);
10250 if (type && pv)
10251 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10252 else
10253 typesv = &PL_sv_undef;
10254
10255 PUSHSTACKi(PERLSI_OVERLOAD);
10256 ENTER ;
10257 SAVETMPS;
10258
10259 PUSHMARK(SP) ;
10260 EXTEND(sp, 3);
10261 if (pv)
10262 PUSHs(pv);
10263 PUSHs(sv);
10264 if (pv)
10265 PUSHs(typesv);
10266 PUTBACK;
10267 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10268
10269 SPAGAIN ;
10270
10271 /* Check the eval first */
10272 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
10273 STRLEN errlen;
10274 const char * errstr;
10275 sv_catpvs(errsv, "Propagated");
10276 errstr = SvPV_const(errsv, errlen);
10277 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
10278 (void)POPs;
10279 res = SvREFCNT_inc_simple_NN(sv);
10280 }
10281 else {
10282 res = POPs;
10283 SvREFCNT_inc_simple_void_NN(res);
10284 }
10285
10286 PUTBACK ;
10287 FREETMPS ;
10288 LEAVE ;
10289 POPSTACK;
10290
10291 if (SvOK(res)) {
10292 return res;
10293 }
10294
10295 sv = res;
10296 (void)sv_2mortal(sv);
10297
10298 why1 = "Call to &{$^H{";
10299 why2 = key;
10300 why3 = "}} did not return a defined value";
10301
10302 report:
10303
10304 msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
10305 (int)(type ? typelen : len),
10306 (type ? type: s),
10307 optional_colon,
10308 why1, why2, why3);
10309 if (error_msg) {
10310 *error_msg = msg;
10311 }
10312 else {
10313 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
10314 }
10315 return SvREFCNT_inc_simple_NN(sv);
10316 }
10317
10318 PERL_STATIC_INLINE void
S_parse_ident(pTHX_ char ** s,char ** d,char * const e,int allow_package,bool is_utf8,bool check_dollar,bool tick_warn)10319 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
10320 bool is_utf8, bool check_dollar, bool tick_warn)
10321 {
10322 int saw_tick = 0;
10323 const char *olds = *s;
10324 PERL_ARGS_ASSERT_PARSE_IDENT;
10325
10326 while (*s < PL_bufend) {
10327 if (*d >= e)
10328 Perl_croak(aTHX_ "%s", ident_too_long);
10329 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
10330 /* The UTF-8 case must come first, otherwise things
10331 * like c\N{COMBINING TILDE} would start failing, as the
10332 * isWORDCHAR_A case below would gobble the 'c' up.
10333 */
10334
10335 char *t = *s + UTF8SKIP(*s);
10336 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
10337 t += UTF8SKIP(t);
10338 }
10339 if (*d + (t - *s) > e)
10340 Perl_croak(aTHX_ "%s", ident_too_long);
10341 Copy(*s, *d, t - *s, char);
10342 *d += t - *s;
10343 *s = t;
10344 }
10345 else if ( isWORDCHAR_A(**s) ) {
10346 do {
10347 *(*d)++ = *(*s)++;
10348 } while (isWORDCHAR_A(**s) && *d < e);
10349 }
10350 else if ( allow_package
10351 && **s == '\''
10352 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
10353 {
10354 *(*d)++ = ':';
10355 *(*d)++ = ':';
10356 (*s)++;
10357 saw_tick++;
10358 }
10359 else if (allow_package && **s == ':' && (*s)[1] == ':'
10360 /* Disallow things like Foo::$bar. For the curious, this is
10361 * the code path that triggers the "Bad name after" warning
10362 * when looking for barewords.
10363 */
10364 && !(check_dollar && (*s)[2] == '$')) {
10365 *(*d)++ = *(*s)++;
10366 *(*d)++ = *(*s)++;
10367 }
10368 else
10369 break;
10370 }
10371 if (UNLIKELY(saw_tick && tick_warn && ckWARN2_d(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR))) {
10372 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10373 char *this_d;
10374 char *d2;
10375 Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
10376 d2 = this_d;
10377 SAVEFREEPV(this_d);
10378
10379 Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
10380 "Old package separator used in string");
10381 if (olds[-1] == '#')
10382 *d2++ = olds[-2];
10383 *d2++ = olds[-1];
10384 while (olds < *s) {
10385 if (*olds == '\'') {
10386 *d2++ = '\\';
10387 *d2++ = *olds++;
10388 }
10389 else
10390 *d2++ = *olds++;
10391 }
10392 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10393 "\t(Did you mean \"%" UTF8f "\" instead?)\n",
10394 UTF8fARG(is_utf8, d2-this_d, this_d));
10395 }
10396 else {
10397 Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
10398 "Old package separator \"'\" deprecated");
10399 }
10400 }
10401 return;
10402 }
10403
10404 /* Returns a NUL terminated string, with the length of the string written to
10405 *slp
10406
10407 scan_word6() may be removed once ' in names is removed.
10408 */
10409 char *
Perl_scan_word6(pTHX_ char * s,char * dest,STRLEN destlen,int allow_package,STRLEN * slp,bool warn_tick)10410 Perl_scan_word6(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp, bool warn_tick)
10411 {
10412 char *d = dest;
10413 char * const e = d + destlen - 3; /* two-character token, ending NUL */
10414 bool is_utf8 = cBOOL(UTF);
10415
10416 PERL_ARGS_ASSERT_SCAN_WORD6;
10417
10418 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, warn_tick);
10419 *d = '\0';
10420 *slp = d - dest;
10421 return s;
10422 }
10423
10424 char *
Perl_scan_word(pTHX_ char * s,char * dest,STRLEN destlen,int allow_package,STRLEN * slp)10425 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10426 {
10427 PERL_ARGS_ASSERT_SCAN_WORD;
10428 return scan_word6(s, dest, destlen, allow_package, slp, FALSE);
10429 }
10430
10431 /* scan s and extract an identifier ($var) from it if possible
10432 * into dest.
10433 * XXX: This function has subtle implications on parsing, and
10434 * changing how it behaves can cause a variable to change from
10435 * being a run time rv2sv call or a compile time binding to a
10436 * specific variable name.
10437 */
10438 STATIC char *
S_scan_ident(pTHX_ char * s,char * dest,STRLEN destlen,I32 ck_uni)10439 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10440 {
10441 I32 herelines = PL_parser->herelines;
10442 SSize_t bracket = -1;
10443 char funny = *s++;
10444 char *d = dest;
10445 char * const e = d + destlen - 3; /* two-character token, ending NUL */
10446 bool is_utf8 = cBOOL(UTF);
10447 line_t orig_copline = 0, tmp_copline = 0;
10448
10449 PERL_ARGS_ASSERT_SCAN_IDENT;
10450
10451 if (isSPACE(*s) || !*s)
10452 s = skipspace(s);
10453 if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
10454 bool is_zero= *s == '0' ? TRUE : FALSE;
10455 char *digit_start= d;
10456 *d++ = *s++;
10457 while (s < PL_bufend && isDIGIT(*s)) {
10458 if (d >= e)
10459 Perl_croak(aTHX_ "%s", ident_too_long);
10460 *d++ = *s++;
10461 }
10462 if (is_zero && d - digit_start > 1)
10463 Perl_croak(aTHX_ ident_var_zero_multi_digit);
10464 }
10465 else { /* See if it is a "normal" identifier */
10466 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
10467 }
10468 *d = '\0';
10469 d = dest;
10470 if (*d) {
10471 /* Either a digit variable, or parse_ident() found an identifier
10472 (anything valid as a bareword), so job done and return. */
10473 if (PL_lex_state != LEX_NORMAL)
10474 PL_lex_state = LEX_INTERPENDMAYBE;
10475 return s;
10476 }
10477
10478 /* Here, it is not a run-of-the-mill identifier name */
10479
10480 if (*s == '$' && s[1]
10481 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
10482 || isDIGIT_A((U8)s[1])
10483 || s[1] == '$'
10484 || s[1] == '{'
10485 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
10486 {
10487 /* Dereferencing a value in a scalar variable.
10488 The alternatives are different syntaxes for a scalar variable.
10489 Using ' as a leading package separator isn't allowed. :: is. */
10490 return s;
10491 }
10492 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
10493 if (*s == '{') {
10494 bracket = s - SvPVX(PL_linestr);
10495 s++;
10496 orig_copline = CopLINE(PL_curcop);
10497 if (s < PL_bufend && isSPACE(*s)) {
10498 s = skipspace(s);
10499 }
10500 }
10501
10502
10503 /* Extract the first character of the variable name from 's' and
10504 * copy it, null terminated into 'd'. Note that this does not
10505 * involve checking for just IDFIRST characters, as it allows the
10506 * '^' for ${^FOO} type variable names, and it allows all the
10507 * characters that are legal in a single character variable name.
10508 *
10509 * The legal ones are any of:
10510 * a) all ASCII characters except:
10511 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
10512 * 2) '{'
10513 * The final case currently doesn't get this far in the program, so we
10514 * don't test for it. If that were to change, it would be ok to allow it.
10515 * b) When not under Unicode rules, any upper Latin1 character
10516 * c) Otherwise, when unicode rules are used, all XIDS characters.
10517 *
10518 * Because all ASCII characters have the same representation whether
10519 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10520 * '{' without knowing if is UTF-8 or not. */
10521
10522 if ((s <= PL_bufend - ((is_utf8)
10523 ? UTF8SKIP(s)
10524 : 1))
10525 && (
10526 isGRAPH_A(*s)
10527 ||
10528 ( is_utf8
10529 ? isIDFIRST_utf8_safe(s, PL_bufend)
10530 : (isGRAPH_L1(*s)
10531 && LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD))
10532 )
10533 )
10534 )
10535 ){
10536 if (is_utf8) {
10537 const STRLEN skip = UTF8SKIP(s);
10538 STRLEN i;
10539 d[skip] = '\0';
10540 for ( i = 0; i < skip; i++ )
10541 d[i] = *s++;
10542 }
10543 else {
10544 *d = *s++;
10545 d[1] = '\0';
10546 }
10547 }
10548
10549 /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10550 if (isDIGIT(*d)) {
10551 bool is_zero= *d == '0' ? TRUE : FALSE;
10552 char *digit_start= d;
10553 while (s < PL_bufend && isDIGIT(*s)) {
10554 d++;
10555 if (d >= e)
10556 Perl_croak(aTHX_ "%s", ident_too_long);
10557 *d= *s++;
10558 }
10559 if (is_zero && d - digit_start >= 1) /* d points at the last digit */
10560 Perl_croak(aTHX_ ident_var_zero_multi_digit);
10561 d[1] = '\0';
10562 }
10563
10564 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10565 else if (*d == '^' && *s && isCONTROLVAR(*s)) {
10566 *d = toCTRL(*s);
10567 s++;
10568 }
10569 /* Warn about ambiguous code after unary operators if {...} notation isn't
10570 used. There's no difference in ambiguity; it's merely a heuristic
10571 about when not to warn. */
10572 else if (ck_uni && bracket == -1)
10573 check_uni();
10574
10575 if (bracket != -1) {
10576 bool skip;
10577 char *s2;
10578 /* If we were processing {...} notation then... */
10579 if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10580 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10581 && isWORDCHAR(*s))
10582 ) {
10583 /* note we have to check for a normal identifier first,
10584 * as it handles utf8 symbols, and only after that has
10585 * been ruled out can we look at the caret words */
10586 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10587 /* if it starts as a valid identifier, assume that it is one.
10588 (the later check for } being at the expected point will trap
10589 cases where this doesn't pan out.) */
10590 d += is_utf8 ? UTF8SKIP(d) : 1;
10591 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10592 *d = '\0';
10593 }
10594 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10595 d++;
10596 while (isWORDCHAR(*s) && d < e) {
10597 *d++ = *s++;
10598 }
10599 if (d >= e)
10600 Perl_croak(aTHX_ "%s", ident_too_long);
10601 *d = '\0';
10602 }
10603 tmp_copline = CopLINE(PL_curcop);
10604 if (s < PL_bufend && isSPACE(*s)) {
10605 s = skipspace(s);
10606 }
10607 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10608 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
10609 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10610 const char * const brack =
10611 (const char *)
10612 ((*s == '[') ? "[...]" : "{...}");
10613 orig_copline = CopLINE(PL_curcop);
10614 CopLINE_set(PL_curcop, tmp_copline);
10615 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10616 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10617 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10618 funny, dest, brack, funny, dest, brack);
10619 CopLINE_set(PL_curcop, orig_copline);
10620 }
10621 bracket++;
10622 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10623 PL_lex_allbrackets++;
10624 return s;
10625 }
10626 }
10627
10628 if ( !tmp_copline )
10629 tmp_copline = CopLINE(PL_curcop);
10630 if ((skip = s < PL_bufend && isSPACE(*s))) {
10631 /* Avoid incrementing line numbers or resetting PL_linestart,
10632 in case we have to back up. */
10633 STRLEN s_off = s - SvPVX(PL_linestr);
10634 s2 = peekspace(s);
10635 s = SvPVX(PL_linestr) + s_off;
10636 }
10637 else
10638 s2 = s;
10639
10640 /* Expect to find a closing } after consuming any trailing whitespace.
10641 */
10642 if (*s2 == '}') {
10643 /* Now increment line numbers if applicable. */
10644 if (skip)
10645 s = skipspace(s);
10646 s++;
10647 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10648 PL_lex_state = LEX_INTERPEND;
10649 PL_expect = XREF;
10650 }
10651 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10652 if (ckWARN(WARN_AMBIGUOUS)
10653 && (keyword(dest, d - dest, 0)
10654 || get_cvn_flags(dest, d - dest, is_utf8
10655 ? SVf_UTF8
10656 : 0)))
10657 {
10658 SV *tmp = newSVpvn_flags( dest, d - dest,
10659 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10660 if (funny == '#')
10661 funny = '@';
10662 orig_copline = CopLINE(PL_curcop);
10663 CopLINE_set(PL_curcop, tmp_copline);
10664 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10665 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10666 funny, SVfARG(tmp), funny, SVfARG(tmp));
10667 CopLINE_set(PL_curcop, orig_copline);
10668 }
10669 }
10670 }
10671 else {
10672 /* Didn't find the closing } at the point we expected, so restore
10673 state such that the next thing to process is the opening { and */
10674 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10675 CopLINE_set(PL_curcop, orig_copline);
10676 PL_parser->herelines = herelines;
10677 *dest = '\0';
10678 PL_parser->sub_no_recover = TRUE;
10679 }
10680 }
10681 else if ( PL_lex_state == LEX_INTERPNORMAL
10682 && !PL_lex_brackets
10683 && !intuit_more(s, PL_bufend))
10684 PL_lex_state = LEX_INTERPEND;
10685 return s;
10686 }
10687
10688 static bool
S_pmflag(pTHX_ const char * const valid_flags,U32 * pmfl,char ** s,char * charset,unsigned int * x_mod_count)10689 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10690
10691 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10692 * found in the parse starting at 's', based on the subset that are valid
10693 * in this context input to this routine in 'valid_flags'. Advances s.
10694 * Returns TRUE if the input should be treated as a valid flag, so the next
10695 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10696 * upon first call on the current regex. This routine will set it to any
10697 * charset modifier found. The caller shouldn't change it. This way,
10698 * another charset modifier encountered in the parse can be detected as an
10699 * error, as we have decided to allow only one */
10700
10701 const char c = **s;
10702 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10703
10704 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10705 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10706 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10707 UTF ? SVf_UTF8 : 0);
10708 (*s) += charlen;
10709 /* Pretend that it worked, so will continue processing before
10710 * dieing */
10711 return TRUE;
10712 }
10713 return FALSE;
10714 }
10715
10716 switch (c) {
10717
10718 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10719 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10720 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10721 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10722 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
10723 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10724 case LOCALE_PAT_MOD:
10725 if (*charset) {
10726 goto multiple_charsets;
10727 }
10728 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10729 *charset = c;
10730 break;
10731 case UNICODE_PAT_MOD:
10732 if (*charset) {
10733 goto multiple_charsets;
10734 }
10735 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10736 *charset = c;
10737 break;
10738 case ASCII_RESTRICT_PAT_MOD:
10739 if (! *charset) {
10740 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10741 }
10742 else {
10743
10744 /* Error if previous modifier wasn't an 'a', but if it was, see
10745 * if, and accept, a second occurrence (only) */
10746 if (*charset != 'a'
10747 || get_regex_charset(*pmfl)
10748 != REGEX_ASCII_RESTRICTED_CHARSET)
10749 {
10750 goto multiple_charsets;
10751 }
10752 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10753 }
10754 *charset = c;
10755 break;
10756 case DEPENDS_PAT_MOD:
10757 if (*charset) {
10758 goto multiple_charsets;
10759 }
10760 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10761 *charset = c;
10762 break;
10763 }
10764
10765 (*s)++;
10766 return TRUE;
10767
10768 multiple_charsets:
10769 if (*charset != c) {
10770 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10771 }
10772 else if (c == 'a') {
10773 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10774 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10775 }
10776 else {
10777 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10778 }
10779
10780 /* Pretend that it worked, so will continue processing before dieing */
10781 (*s)++;
10782 return TRUE;
10783 }
10784
10785 STATIC char *
S_scan_pat(pTHX_ char * start,I32 type)10786 S_scan_pat(pTHX_ char *start, I32 type)
10787 {
10788 PMOP *pm;
10789 char *s;
10790 const char * const valid_flags =
10791 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10792 char charset = '\0'; /* character set modifier */
10793 unsigned int x_mod_count = 0;
10794
10795 PERL_ARGS_ASSERT_SCAN_PAT;
10796
10797 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10798 if (!s)
10799 Perl_croak(aTHX_ "Search pattern not terminated");
10800
10801 pm = (PMOP*)newPMOP(type, 0);
10802 if (PL_multi_open == '?') {
10803 /* This is the only point in the code that sets PMf_ONCE: */
10804 pm->op_pmflags |= PMf_ONCE;
10805
10806 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10807 allows us to restrict the list needed by reset to just the ??
10808 matches. */
10809 assert(type != OP_TRANS);
10810 if (PL_curstash) {
10811 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10812 U32 elements;
10813 if (!mg) {
10814 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10815 0);
10816 }
10817 elements = mg->mg_len / sizeof(PMOP**);
10818 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10819 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10820 mg->mg_len = elements * sizeof(PMOP**);
10821 PmopSTASH_set(pm,PL_curstash);
10822 }
10823 }
10824
10825 /* if qr/...(?{..}).../, then need to parse the pattern within a new
10826 * anon CV. False positives like qr/[(?{]/ are harmless */
10827
10828 if (type == OP_QR) {
10829 STRLEN len;
10830 char *e, *p = SvPV(PL_lex_stuff, len);
10831 e = p + len;
10832 for (; p < e; p++) {
10833 if (p[0] == '(' && (
10834 (p[1] == '?' && (p[2] == '{' ||
10835 (p[2] == '?' && p[3] == '{'))) ||
10836 (p[1] == '*' && (p[2] == '{' ||
10837 (p[2] == '*' && p[3] == '{')))
10838 )){
10839 pm->op_pmflags |= PMf_HAS_CV;
10840 break;
10841 }
10842 }
10843 pm->op_pmflags |= PMf_IS_QR;
10844 }
10845
10846 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10847 &s, &charset, &x_mod_count))
10848 {};
10849 /* issue a warning if /c is specified,but /g is not */
10850 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10851 {
10852 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10853 "Use of /c modifier is meaningless without /g" );
10854 }
10855
10856 PL_lex_op = (OP*)pm;
10857 pl_yylval.ival = OP_MATCH;
10858 return s;
10859 }
10860
10861 STATIC char *
S_scan_subst(pTHX_ char * start)10862 S_scan_subst(pTHX_ char *start)
10863 {
10864 char *s;
10865 PMOP *pm;
10866 I32 first_start;
10867 line_t first_line;
10868 line_t linediff = 0;
10869 I32 es = 0;
10870 char charset = '\0'; /* character set modifier */
10871 unsigned int x_mod_count = 0;
10872 char *t;
10873
10874 PERL_ARGS_ASSERT_SCAN_SUBST;
10875
10876 pl_yylval.ival = OP_NULL;
10877
10878 s = scan_str(start, TRUE, FALSE, FALSE, &t);
10879
10880 if (!s)
10881 Perl_croak(aTHX_ "Substitution pattern not terminated");
10882
10883 s = t;
10884
10885 first_start = PL_multi_start;
10886 first_line = CopLINE(PL_curcop);
10887 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10888 if (!s) {
10889 SvREFCNT_dec_NN(PL_lex_stuff);
10890 PL_lex_stuff = NULL;
10891 Perl_croak(aTHX_ "Substitution replacement not terminated");
10892 }
10893 PL_multi_start = first_start; /* so whole substitution is taken together */
10894
10895 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10896
10897
10898 while (*s) {
10899 if (*s == EXEC_PAT_MOD) {
10900 s++;
10901 es++;
10902 }
10903 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10904 &s, &charset, &x_mod_count))
10905 {
10906 break;
10907 }
10908 }
10909
10910 if ((pm->op_pmflags & PMf_CONTINUE)) {
10911 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10912 }
10913
10914 if (es) {
10915 SV * const repl = newSVpvs("");
10916
10917 PL_multi_end = 0;
10918 pm->op_pmflags |= PMf_EVAL;
10919 for (; es > 1; es--) {
10920 sv_catpvs(repl, "eval ");
10921 }
10922 sv_catpvs(repl, "do {");
10923 sv_catsv(repl, PL_parser->lex_sub_repl);
10924 sv_catpvs(repl, "}");
10925 SvREFCNT_dec(PL_parser->lex_sub_repl);
10926 PL_parser->lex_sub_repl = repl;
10927 }
10928
10929
10930 linediff = CopLINE(PL_curcop) - first_line;
10931 if (linediff)
10932 CopLINE_set(PL_curcop, first_line);
10933
10934 if (linediff || es) {
10935 /* the IVX field indicates that the replacement string is a s///e;
10936 * the NVX field indicates how many src code lines the replacement
10937 * spreads over */
10938 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10939 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10940 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10941 cBOOL(es);
10942 }
10943
10944 PL_lex_op = (OP*)pm;
10945 pl_yylval.ival = OP_SUBST;
10946 return s;
10947 }
10948
10949 STATIC char *
S_scan_trans(pTHX_ char * start)10950 S_scan_trans(pTHX_ char *start)
10951 {
10952 char* s;
10953 OP *o;
10954 U8 squash;
10955 U8 del;
10956 U8 complement;
10957 bool nondestruct = 0;
10958 char *t;
10959
10960 PERL_ARGS_ASSERT_SCAN_TRANS;
10961
10962 pl_yylval.ival = OP_NULL;
10963
10964 s = scan_str(start,FALSE,FALSE,FALSE,&t);
10965 if (!s)
10966 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10967
10968 s = t;
10969
10970 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10971 if (!s) {
10972 SvREFCNT_dec_NN(PL_lex_stuff);
10973 PL_lex_stuff = NULL;
10974 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10975 }
10976
10977 complement = del = squash = 0;
10978 while (1) {
10979 switch (*s) {
10980 case 'c':
10981 complement = OPpTRANS_COMPLEMENT;
10982 break;
10983 case 'd':
10984 del = OPpTRANS_DELETE;
10985 break;
10986 case 's':
10987 squash = OPpTRANS_SQUASH;
10988 break;
10989 case 'r':
10990 nondestruct = 1;
10991 break;
10992 default:
10993 goto no_more;
10994 }
10995 s++;
10996 }
10997 no_more:
10998
10999 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
11000 o->op_private &= ~OPpTRANS_ALL;
11001 o->op_private |= del|squash|complement;
11002
11003 PL_lex_op = o;
11004 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
11005
11006
11007 return s;
11008 }
11009
11010 /* scan_heredoc
11011 Takes a pointer to the first < in <<FOO.
11012 Returns a pointer to the byte following <<FOO.
11013
11014 This function scans a heredoc, which involves different methods
11015 depending on whether we are in a string eval, quoted construct, etc.
11016 This is because PL_linestr could containing a single line of input, or
11017 a whole string being evalled, or the contents of the current quote-
11018 like operator.
11019
11020 The two basic methods are:
11021 - Steal lines from the input stream
11022 - Scan the heredoc in PL_linestr and remove it therefrom
11023
11024 In a file scope or filtered eval, the first method is used; in a
11025 string eval, the second.
11026
11027 In a quote-like operator, we have to choose between the two,
11028 depending on where we can find a newline. We peek into outer lex-
11029 ing scopes until we find one with a newline in it. If we reach the
11030 outermost lexing scope and it is a file, we use the stream method.
11031 Otherwise it is treated as an eval.
11032 */
11033
11034 STATIC char *
S_scan_heredoc(pTHX_ char * s)11035 S_scan_heredoc(pTHX_ char *s)
11036 {
11037 I32 op_type = OP_SCALAR;
11038 I32 len;
11039 SV *tmpstr;
11040 char term;
11041 char *d;
11042 char *e;
11043 char *peek;
11044 char *indent = 0;
11045 I32 indent_len = 0;
11046 bool indented = FALSE;
11047 const bool infile = PL_rsfp || PL_parser->filtered;
11048 const line_t origline = CopLINE(PL_curcop);
11049 LEXSHARED *shared = PL_parser->lex_shared;
11050
11051 PERL_ARGS_ASSERT_SCAN_HEREDOC;
11052
11053 s += 2;
11054 d = PL_tokenbuf + 1;
11055 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11056 *PL_tokenbuf = '\n';
11057 peek = s;
11058
11059 if (*peek == '~') {
11060 indented = TRUE;
11061 peek++; s++;
11062 }
11063
11064 while (SPACE_OR_TAB(*peek))
11065 peek++;
11066
11067 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11068 s = peek;
11069 term = *s++;
11070 s = delimcpy(d, e, s, PL_bufend, term, &len);
11071 if (s == PL_bufend)
11072 Perl_croak(aTHX_ "Unterminated delimiter for here document");
11073 d += len;
11074 s++;
11075 }
11076 else {
11077 if (*s == '\\')
11078 /* <<\FOO is equivalent to <<'FOO' */
11079 s++, term = '\'';
11080 else
11081 term = '"';
11082
11083 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
11084 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
11085
11086 peek = s;
11087
11088 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
11089 peek += UTF ? UTF8SKIP(peek) : 1;
11090 }
11091
11092 len = (peek - s >= e - d) ? (e - d) : (peek - s);
11093 Copy(s, d, len, char);
11094 s += len;
11095 d += len;
11096 }
11097
11098 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11099 Perl_croak(aTHX_ "Delimiter for here document is too long");
11100
11101 *d++ = '\n';
11102 *d = '\0';
11103 len = d - PL_tokenbuf;
11104
11105 #ifndef PERL_STRICT_CR
11106 d = (char *) memchr(s, '\r', PL_bufend - s);
11107 if (d) {
11108 char * const olds = s;
11109 s = d;
11110 while (s < PL_bufend) {
11111 if (*s == '\r') {
11112 *d++ = '\n';
11113 if (*++s == '\n')
11114 s++;
11115 }
11116 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11117 *d++ = *s++;
11118 s++;
11119 }
11120 else
11121 *d++ = *s++;
11122 }
11123 *d = '\0';
11124 PL_bufend = d;
11125 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11126 s = olds;
11127 }
11128 #endif
11129
11130 tmpstr = newSV_type(SVt_PVIV);
11131 if (term == '\'') {
11132 op_type = OP_CONST;
11133 SvIV_set(tmpstr, -1);
11134 }
11135 else if (term == '`') {
11136 op_type = OP_BACKTICK;
11137 SvIV_set(tmpstr, '\\');
11138 }
11139
11140 PL_multi_start = origline + 1 + PL_parser->herelines;
11141 PL_multi_open = PL_multi_close = '<';
11142
11143 /* inside a string eval or quote-like operator */
11144 if (!infile || PL_lex_inwhat) {
11145 SV *linestr;
11146 char *bufend;
11147 char * const olds = s;
11148 PERL_CONTEXT * const cx = CX_CUR();
11149 /* These two fields are not set until an inner lexing scope is
11150 entered. But we need them set here. */
11151 shared->ls_bufptr = s;
11152 shared->ls_linestr = PL_linestr;
11153
11154 if (PL_lex_inwhat) {
11155 /* Look for a newline. If the current buffer does not have one,
11156 peek into the line buffer of the parent lexing scope, going
11157 up as many levels as necessary to find one with a newline
11158 after bufptr.
11159 */
11160 while (!(s = (char *)memchr(
11161 (void *)shared->ls_bufptr, '\n',
11162 SvEND(shared->ls_linestr)-shared->ls_bufptr
11163 )))
11164 {
11165 shared = shared->ls_prev;
11166 /* shared is only null if we have gone beyond the outermost
11167 lexing scope. In a file, we will have broken out of the
11168 loop in the previous iteration. In an eval, the string buf-
11169 fer ends with "\n;", so the while condition above will have
11170 evaluated to false. So shared can never be null. Or so you
11171 might think. Odd syntax errors like s;@{<<; can gobble up
11172 the implicit semicolon at the end of a flie, causing the
11173 file handle to be closed even when we are not in a string
11174 eval. So shared may be null in that case.
11175 (Closing '>>}' here to balance the earlier open brace for
11176 editors that look for matched pairs.) */
11177 if (UNLIKELY(!shared))
11178 goto interminable;
11179 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
11180 most lexing scope. In a file, shared->ls_linestr at that
11181 level is just one line, so there is no body to steal. */
11182 if (infile && !shared->ls_prev) {
11183 s = olds;
11184 goto streaming;
11185 }
11186 }
11187 }
11188 else { /* eval or we've already hit EOF */
11189 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
11190 if (!s)
11191 goto interminable;
11192 }
11193
11194 linestr = shared->ls_linestr;
11195 bufend = SvEND(linestr);
11196 d = s;
11197 if (indented) {
11198 char *myolds = s;
11199
11200 while (s < bufend - len + 1) {
11201 if (*s++ == '\n')
11202 ++PL_parser->herelines;
11203
11204 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
11205 char *backup = s;
11206 indent_len = 0;
11207
11208 /* Only valid if it's preceded by whitespace only */
11209 while (backup != myolds && --backup >= myolds) {
11210 if (! SPACE_OR_TAB(*backup)) {
11211 break;
11212 }
11213 indent_len++;
11214 }
11215
11216 /* No whitespace or all! */
11217 if (backup == s || *backup == '\n') {
11218 Newx(indent, indent_len + 1, char);
11219 memcpy(indent, backup + 1, indent_len);
11220 indent[indent_len] = 0;
11221 s--; /* before our delimiter */
11222 PL_parser->herelines--; /* this line doesn't count */
11223 break;
11224 }
11225 }
11226 }
11227 }
11228 else {
11229 while (s < bufend - len + 1
11230 && memNE(s,PL_tokenbuf,len) )
11231 {
11232 if (*s++ == '\n')
11233 ++PL_parser->herelines;
11234 }
11235 }
11236
11237 if (s >= bufend - len + 1) {
11238 goto interminable;
11239 }
11240
11241 sv_setpvn_fresh(tmpstr,d+1,s-d);
11242 s += len - 1;
11243 /* the preceding stmt passes a newline */
11244 PL_parser->herelines++;
11245
11246 /* s now points to the newline after the heredoc terminator.
11247 d points to the newline before the body of the heredoc.
11248 */
11249
11250 /* We are going to modify linestr in place here, so set
11251 aside copies of the string if necessary for re-evals or
11252 (caller $n)[6]. */
11253 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
11254 check shared->re_eval_str. */
11255 if (shared->re_eval_start || shared->re_eval_str) {
11256 /* Set aside the rest of the regexp */
11257 if (!shared->re_eval_str)
11258 shared->re_eval_str =
11259 newSVpvn(shared->re_eval_start,
11260 bufend - shared->re_eval_start);
11261 shared->re_eval_start -= s-d;
11262 }
11263
11264 if (cxstack_ix >= 0
11265 && CxTYPE(cx) == CXt_EVAL
11266 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
11267 && cx->blk_eval.cur_text == linestr)
11268 {
11269 cx->blk_eval.cur_text = newSVsv(linestr);
11270 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
11271 }
11272
11273 /* Copy everything from s onwards back to d. */
11274 Move(s,d,bufend-s + 1,char);
11275 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
11276 /* Setting PL_bufend only applies when we have not dug deeper
11277 into other scopes, because sublex_done sets PL_bufend to
11278 SvEND(PL_linestr). */
11279 if (shared == PL_parser->lex_shared)
11280 PL_bufend = SvEND(linestr);
11281 s = olds;
11282 }
11283 else {
11284 SV *linestr_save;
11285 char *oldbufptr_save;
11286 char *oldoldbufptr_save;
11287 streaming:
11288 sv_grow_fresh(tmpstr, 80);
11289 SvPVCLEAR_FRESH(tmpstr); /* avoid "uninitialized" warning */
11290 term = PL_tokenbuf[1];
11291 len--;
11292 linestr_save = PL_linestr; /* must restore this afterwards */
11293 d = s; /* and this */
11294 oldbufptr_save = PL_oldbufptr;
11295 oldoldbufptr_save = PL_oldoldbufptr;
11296 PL_linestr = newSVpvs("");
11297 PL_bufend = SvPVX(PL_linestr);
11298
11299 while (1) {
11300 PL_bufptr = PL_bufend;
11301 CopLINE_set(PL_curcop,
11302 origline + 1 + PL_parser->herelines);
11303
11304 if ( !lex_next_chunk(LEX_NO_TERM)
11305 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
11306 {
11307 /* Simply freeing linestr_save might seem simpler here, as it
11308 does not matter what PL_linestr points to, since we are
11309 about to croak; but in a quote-like op, linestr_save
11310 will have been prospectively freed already, via
11311 SAVEFREESV(PL_linestr) in sublex_push, so it's easier to
11312 restore PL_linestr. */
11313 SvREFCNT_dec_NN(PL_linestr);
11314 PL_linestr = linestr_save;
11315 PL_oldbufptr = oldbufptr_save;
11316 PL_oldoldbufptr = oldoldbufptr_save;
11317 goto interminable;
11318 }
11319
11320 CopLINE_set(PL_curcop, origline);
11321
11322 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
11323 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
11324 /* ^That should be enough to avoid this needing to grow: */
11325 sv_catpvs(PL_linestr, "\n\0");
11326 assert(s == SvPVX(PL_linestr));
11327 PL_bufend = SvEND(PL_linestr);
11328 }
11329
11330 s = PL_bufptr;
11331 PL_parser->herelines++;
11332 PL_last_lop = PL_last_uni = NULL;
11333
11334 #ifndef PERL_STRICT_CR
11335 if (PL_bufend - PL_linestart >= 2) {
11336 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
11337 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11338 {
11339 PL_bufend[-2] = '\n';
11340 PL_bufend--;
11341 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11342 }
11343 else if (PL_bufend[-1] == '\r')
11344 PL_bufend[-1] = '\n';
11345 }
11346 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11347 PL_bufend[-1] = '\n';
11348 #endif
11349
11350 if (indented && (PL_bufend-s) >= len) {
11351 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
11352
11353 if (found) {
11354 char *backup = found;
11355 indent_len = 0;
11356
11357 /* Only valid if it's preceded by whitespace only */
11358 while (backup != s && --backup >= s) {
11359 if (! SPACE_OR_TAB(*backup)) {
11360 break;
11361 }
11362 indent_len++;
11363 }
11364
11365 /* All whitespace or none! */
11366 if (backup == found || SPACE_OR_TAB(*backup)) {
11367 Newx(indent, indent_len + 1, char);
11368 memcpy(indent, backup, indent_len);
11369 indent[indent_len] = 0;
11370 SvREFCNT_dec(PL_linestr);
11371 PL_linestr = linestr_save;
11372 PL_linestart = SvPVX(linestr_save);
11373 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11374 PL_oldbufptr = oldbufptr_save;
11375 PL_oldoldbufptr = oldoldbufptr_save;
11376 s = d;
11377 break;
11378 }
11379 }
11380
11381 /* Didn't find it */
11382 sv_catsv(tmpstr,PL_linestr);
11383 }
11384 else {
11385 if (*s == term && PL_bufend-s >= len
11386 && memEQ(s,PL_tokenbuf + 1,len))
11387 {
11388 SvREFCNT_dec(PL_linestr);
11389 PL_linestr = linestr_save;
11390 PL_linestart = SvPVX(linestr_save);
11391 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11392 PL_oldbufptr = oldbufptr_save;
11393 PL_oldoldbufptr = oldoldbufptr_save;
11394 s = d;
11395 break;
11396 }
11397 else {
11398 sv_catsv(tmpstr,PL_linestr);
11399 }
11400 }
11401 } /* while (1) */
11402 }
11403
11404 PL_multi_end = origline + PL_parser->herelines;
11405
11406 if (indented && indent) {
11407 STRLEN linecount = 1;
11408 STRLEN herelen = SvCUR(tmpstr);
11409 char *ss = SvPVX(tmpstr);
11410 char *se = ss + herelen;
11411 SV *newstr = newSV(herelen+1);
11412 SvPOK_on(newstr);
11413
11414 /* Trim leading whitespace */
11415 while (ss < se) {
11416 /* newline only? Copy and move on */
11417 if (*ss == '\n') {
11418 sv_catpvs(newstr,"\n");
11419 ss++;
11420 linecount++;
11421
11422 /* Found our indentation? Strip it */
11423 }
11424 else if (se - ss >= indent_len
11425 && memEQ(ss, indent, indent_len))
11426 {
11427 STRLEN le = 0;
11428 ss += indent_len;
11429
11430 while ((ss + le) < se && *(ss + le) != '\n')
11431 le++;
11432
11433 sv_catpvn(newstr, ss, le);
11434 ss += le;
11435
11436 /* Line doesn't begin with our indentation? Croak */
11437 }
11438 else {
11439 Safefree(indent);
11440 Perl_croak(aTHX_
11441 "Indentation on line %d of here-doc doesn't match delimiter",
11442 (int)linecount
11443 );
11444 }
11445 } /* while */
11446
11447 /* avoid sv_setsv() as we don't want to COW here */
11448 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
11449 Safefree(indent);
11450 SvREFCNT_dec_NN(newstr);
11451 }
11452
11453 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11454 SvPV_shrink_to_cur(tmpstr);
11455 }
11456
11457 if (!IN_BYTES) {
11458 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11459 SvUTF8_on(tmpstr);
11460 }
11461
11462 PL_lex_stuff = tmpstr;
11463 pl_yylval.ival = op_type;
11464 return s;
11465
11466 interminable:
11467 if (indent)
11468 Safefree(indent);
11469 SvREFCNT_dec(tmpstr);
11470 CopLINE_set(PL_curcop, origline);
11471 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
11472 }
11473
11474
11475 /* scan_inputsymbol
11476 takes: position of first '<' in input buffer
11477 returns: position of first char following the matching '>' in
11478 input buffer
11479 side-effects: pl_yylval and lex_op are set.
11480
11481 This code handles:
11482
11483 <> read from ARGV
11484 <<>> read from ARGV without magic open
11485 <FH> read from filehandle
11486 <pkg::FH> read from package qualified filehandle
11487 <pkg'FH> read from package qualified filehandle
11488 <$fh> read from filehandle in $fh
11489 <*.h> filename glob
11490
11491 */
11492
11493 STATIC char *
S_scan_inputsymbol(pTHX_ char * start)11494 S_scan_inputsymbol(pTHX_ char *start)
11495 {
11496 char *s = start; /* current position in buffer */
11497 char *end;
11498 I32 len;
11499 bool nomagicopen = FALSE;
11500 char *d = PL_tokenbuf; /* start of temp holding space */
11501 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11502
11503 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11504
11505 end = (char *) memchr(s, '\n', PL_bufend - s);
11506 if (!end)
11507 end = PL_bufend;
11508 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
11509 nomagicopen = TRUE;
11510 *d = '\0';
11511 len = 0;
11512 s += 3;
11513 }
11514 else
11515 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11516
11517 /* die if we didn't have space for the contents of the <>,
11518 or if it didn't end, or if we see a newline
11519 */
11520
11521 if (len >= (I32)sizeof PL_tokenbuf)
11522 Perl_croak(aTHX_ "Excessively long <> operator");
11523 if (s >= end)
11524 Perl_croak(aTHX_ "Unterminated <> operator");
11525
11526 s++;
11527
11528 /* check for <$fh>
11529 Remember, only scalar variables are interpreted as filehandles by
11530 this code. Anything more complex (e.g., <$fh{$num}>) will be
11531 treated as a glob() call.
11532 This code makes use of the fact that except for the $ at the front,
11533 a scalar variable and a filehandle look the same.
11534 */
11535 if (*d == '$' && d[1]) d++;
11536
11537 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11538 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11539 d += UTF ? UTF8SKIP(d) : 1;
11540 }
11541
11542 /* If we've tried to read what we allow filehandles to look like, and
11543 there's still text left, then it must be a glob() and not a getline.
11544 Use scan_str to pull out the stuff between the <> and treat it
11545 as nothing more than a string.
11546 */
11547
11548 if (d - PL_tokenbuf != len) {
11549 pl_yylval.ival = OP_GLOB;
11550 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
11551 if (!s)
11552 Perl_croak(aTHX_ "Glob not terminated");
11553 return s;
11554 }
11555 else {
11556 bool readline_overridden = FALSE;
11557 GV *gv_readline;
11558 /* we're in a filehandle read situation */
11559 d = PL_tokenbuf;
11560
11561 /* turn <> into <ARGV> */
11562 if (!len)
11563 Copy("ARGV",d,5,char);
11564
11565 /* Check whether readline() is overridden */
11566 if ((gv_readline = gv_override("readline",8)))
11567 readline_overridden = TRUE;
11568
11569 /* if <$fh>, create the ops to turn the variable into a
11570 filehandle
11571 */
11572 if (*d == '$') {
11573 /* try to find it in the pad for this block, otherwise find
11574 add symbol table ops
11575 */
11576 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11577 if (tmp != NOT_IN_PAD) {
11578 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11579 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11580 HEK * const stashname = HvNAME_HEK(stash);
11581 SV * const sym = newSVhek_mortal(stashname);
11582 sv_catpvs(sym, "::");
11583 sv_catpv(sym, d+1);
11584 d = SvPVX(sym);
11585 goto intro_sym;
11586 }
11587 else {
11588 OP * const o = newPADxVOP(OP_PADSV, 0, tmp);
11589 PL_lex_op = readline_overridden
11590 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11591 op_append_elem(OP_LIST, o,
11592 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11593 : newUNOP(OP_READLINE, 0, o);
11594 }
11595 }
11596 else {
11597 GV *gv;
11598 ++d;
11599 intro_sym:
11600 gv = gv_fetchpv(d,
11601 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11602 SVt_PV);
11603 PL_lex_op = readline_overridden
11604 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11605 op_append_elem(OP_LIST,
11606 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11607 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11608 : newUNOP(OP_READLINE, 0,
11609 newUNOP(OP_RV2SV, 0,
11610 newGVOP(OP_GV, 0, gv)));
11611 }
11612 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11613 pl_yylval.ival = OP_NULL;
11614 }
11615
11616 /* If it's none of the above, it must be a literal filehandle
11617 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11618 else {
11619 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11620 PL_lex_op = readline_overridden
11621 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11622 op_append_elem(OP_LIST,
11623 newGVOP(OP_GV, 0, gv),
11624 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11625 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11626 pl_yylval.ival = OP_NULL;
11627
11628 /* leave the token generation above to avoid confusing the parser */
11629 if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
11630 no_bareword_filehandle(d);
11631 }
11632 }
11633 }
11634
11635 return s;
11636 }
11637
11638
11639 /* scan_str
11640 takes:
11641 start position in buffer
11642 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
11643 only if they are of the open/close form
11644 keep_delims preserve the delimiters around the string
11645 re_reparse compiling a run-time /(?{})/:
11646 collapse // to /, and skip encoding src
11647 delimp if non-null, this is set to the position of
11648 the closing delimiter, or just after it if
11649 the closing and opening delimiters differ
11650 (i.e., the opening delimiter of a substitu-
11651 tion replacement)
11652 returns: position to continue reading from buffer
11653 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11654 updates the read buffer.
11655
11656 This subroutine pulls a string out of the input. It is called for:
11657 q single quotes q(literal text)
11658 ' single quotes 'literal text'
11659 qq double quotes qq(interpolate $here please)
11660 " double quotes "interpolate $here please"
11661 qx backticks qx(/bin/ls -l)
11662 ` backticks `/bin/ls -l`
11663 qw quote words @EXPORT_OK = qw( func() $spam )
11664 m// regexp match m/this/
11665 s/// regexp substitute s/this/that/
11666 tr/// string transliterate tr/this/that/
11667 y/// string transliterate y/this/that/
11668 ($*@) sub prototypes sub foo ($)
11669 (stuff) sub attr parameters sub foo : attr(stuff)
11670 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11671
11672 In most of these cases (all but <>, patterns and transliterate)
11673 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11674 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11675 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11676 calls scan_str().
11677
11678 It skips whitespace before the string starts, and treats the first
11679 character as the delimiter. If the delimiter is one of ([{< then
11680 the corresponding "close" character )]}> is used as the closing
11681 delimiter. It allows quoting of delimiters, and if the string has
11682 balanced delimiters ([{<>}]) it allows nesting.
11683
11684 On success, the SV with the resulting string is put into lex_stuff or,
11685 if that is already non-NULL, into lex_repl. The second case occurs only
11686 when parsing the RHS of the special constructs s/// and tr/// (y///).
11687 For convenience, the terminating delimiter character is stuffed into
11688 SvIVX of the SV.
11689 */
11690
11691 char *
Perl_scan_str(pTHX_ char * start,int keep_bracketed_quoted,int keep_delims,int re_reparse,char ** delimp)11692 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11693 char **delimp
11694 )
11695 {
11696 SV *sv; /* scalar value: string */
11697 char *s = start; /* current position in the buffer */
11698 char *to; /* current position in the sv's data */
11699 int brackets = 1; /* bracket nesting level */
11700 bool d_is_utf8 = FALSE; /* is there any utf8 content? */
11701 UV open_delim_code; /* code point */
11702 char open_delim_str[UTF8_MAXBYTES+1];
11703 STRLEN delim_byte_len; /* each delimiter currently is the same number
11704 of bytes */
11705 line_t herelines;
11706
11707 /* The only non-UTF character that isn't a stand alone grapheme is
11708 * white-space, hence can't be a delimiter. */
11709 const char * non_grapheme_msg = "Use of unassigned code point or"
11710 " non-standalone grapheme for a delimiter"
11711 " is not allowed";
11712 PERL_ARGS_ASSERT_SCAN_STR;
11713
11714 /* skip space before the delimiter */
11715 if (isSPACE(*s)) { /* skipspace can change the buffer 's' is in, so
11716 'start' also has to change */
11717 s = start = skipspace(s);
11718 }
11719
11720 /* mark where we are, in case we need to report errors */
11721 CLINE;
11722
11723 /* after skipping whitespace, the next character is the delimiter */
11724 if (! UTF || UTF8_IS_INVARIANT(*s)) {
11725 open_delim_code = (U8) *s;
11726 open_delim_str[0] = *s;
11727 delim_byte_len = 1;
11728 }
11729 else {
11730 open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
11731 &delim_byte_len);
11732 if (UNLIKELY(! is_grapheme((U8 *) start,
11733 (U8 *) s,
11734 (U8 *) PL_bufend,
11735 open_delim_code)))
11736 {
11737 yyerror(non_grapheme_msg);
11738 }
11739
11740 Copy(s, open_delim_str, delim_byte_len, char);
11741 }
11742 open_delim_str[delim_byte_len] = '\0'; /* Only for safety */
11743
11744
11745 /* mark where we are */
11746 PL_multi_start = CopLINE(PL_curcop);
11747 PL_multi_open = open_delim_code;
11748 herelines = PL_parser->herelines;
11749
11750 const char * legal_paired_opening_delims;
11751 const char * legal_paired_closing_delims;
11752 const char * deprecated_opening_delims;
11753 if (FEATURE_MORE_DELIMS_IS_ENABLED) {
11754 if (UTF) {
11755 legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
11756 legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
11757
11758 /* We are deprecating using a closing delimiter as the opening, in
11759 * case we want in the future to accept them reversed. The string
11760 * may include ones that are legal, but the code below won't look
11761 * at this string unless it didn't find a legal opening one */
11762 deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
11763 }
11764 else {
11765 legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
11766 legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
11767 deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11768 }
11769 }
11770 else {
11771 legal_paired_opening_delims = "([{<";
11772 legal_paired_closing_delims = ")]}>";
11773 deprecated_opening_delims = (UTF)
11774 ? DEPRECATED_OPENING_UTF8_BRACKETS
11775 : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11776 }
11777
11778 const char * legal_paired_opening_delims_end = legal_paired_opening_delims
11779 + strlen(legal_paired_opening_delims);
11780 const char * deprecated_delims_end = deprecated_opening_delims
11781 + strlen(deprecated_opening_delims);
11782
11783 const char * close_delim_str = open_delim_str;
11784 UV close_delim_code = open_delim_code;
11785
11786 /* If the delimiter has a mirror-image closing one, get it */
11787 const char *tmps = ninstr(legal_paired_opening_delims,
11788 legal_paired_opening_delims_end,
11789 open_delim_str, open_delim_str + delim_byte_len);
11790 if (tmps) {
11791 /* Here, there is a paired delimiter, and tmps points to its position
11792 in the string of the accepted opening paired delimiters. The
11793 corresponding position in the string of closing ones is the
11794 beginning of the paired mate. Both contain the same number of
11795 bytes. */
11796 close_delim_str = legal_paired_closing_delims
11797 + (tmps - legal_paired_opening_delims);
11798
11799 /* The list of paired delimiters contains all the ASCII ones that have
11800 * always been legal, and no other ASCIIs. Don't raise a message if
11801 * using one of these */
11802 if (! isASCII(open_delim_code)) {
11803 Perl_ck_warner_d(aTHX_
11804 packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
11805 "Use of '%" UTF8f "' is experimental as a string delimiter",
11806 UTF8fARG(UTF, delim_byte_len, open_delim_str));
11807 }
11808
11809 close_delim_code = (UTF)
11810 ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
11811 : * (U8 *) close_delim_str;
11812 }
11813 else { /* Here, the delimiter isn't paired, hence the close is the same as
11814 the open; and has already been set up. But make sure it isn't
11815 deprecated to use this particular delimiter, as we plan
11816 eventually to make it paired. */
11817 if (ninstr(deprecated_opening_delims, deprecated_delims_end,
11818 open_delim_str, open_delim_str + delim_byte_len))
11819 {
11820 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__DELIMITER_WILL_BE_PAIRED),
11821 "Use of '%" UTF8f "' is deprecated as a string delimiter",
11822 UTF8fARG(UTF, delim_byte_len, open_delim_str));
11823 }
11824
11825 /* Note that a NUL may be used as a delimiter, and this happens when
11826 * delimiting an empty string, and no special handling for it is
11827 * needed, as ninstr() calls are used */
11828 }
11829
11830 PL_multi_close = close_delim_code;
11831
11832 if (PL_multi_open == PL_multi_close) {
11833 keep_bracketed_quoted = FALSE;
11834 }
11835
11836 /* create a new SV to hold the contents. 79 is the SV's initial length.
11837 What a random number. */
11838 sv = newSV_type(SVt_PVIV);
11839 sv_grow_fresh(sv, 79);
11840 SvIV_set(sv, close_delim_code);
11841 (void)SvPOK_only(sv); /* validate pointer */
11842
11843 /* move past delimiter and try to read a complete string */
11844 if (keep_delims)
11845 sv_catpvn(sv, s, delim_byte_len);
11846 s += delim_byte_len;
11847 for (;;) {
11848 /* extend sv if need be */
11849 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11850 /* set 'to' to the next character in the sv's string */
11851 to = SvPVX(sv)+SvCUR(sv);
11852
11853 /* read until we run out of string, or we find the closing delimiter */
11854 while (s < PL_bufend) {
11855 /* embedded newlines increment the line count */
11856 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11857 COPLINE_INC_WITH_HERELINES;
11858
11859 /* backslashes can escape the closing delimiter */
11860 if ( *s == '\\' && s < PL_bufend - delim_byte_len
11861
11862 /* ... but not if the delimiter itself is a backslash */
11863 && close_delim_code != '\\')
11864 {
11865 /* Here, we have an escaping backslash. If we're supposed to
11866 * discard those that escape the closing delimiter, just
11867 * discard this one */
11868 if ( ! keep_bracketed_quoted
11869 && ( memEQ(s + 1, open_delim_str, delim_byte_len)
11870 || ( PL_multi_open == PL_multi_close
11871 && re_reparse && s[1] == '\\')
11872 || memEQ(s + 1, close_delim_str, delim_byte_len)))
11873 {
11874 s++;
11875 }
11876 else /* any other escapes are simply copied straight through */
11877 *to++ = *s++;
11878 }
11879 else if ( s < PL_bufend - (delim_byte_len - 1)
11880 && memEQ(s, close_delim_str, delim_byte_len)
11881 && --brackets <= 0)
11882 {
11883 /* Found unescaped closing delimiter, unnested if we care about
11884 * that; so are done.
11885 *
11886 * In the case of the opening and closing delimiters being
11887 * different, we have to deal with nesting; the conditional
11888 * above makes sure we don't get here until the nesting level,
11889 * 'brackets', is back down to zero. In the other case,
11890 * nesting isn't an issue, and 'brackets' never can get
11891 * incremented above 0, so will come here at the first closing
11892 * delimiter.
11893 *
11894 * Only grapheme delimiters are legal. */
11895 if ( UTF /* All Non-UTF-8's are graphemes */
11896 && UNLIKELY(! is_grapheme((U8 *) start,
11897 (U8 *) s,
11898 (U8 *) PL_bufend,
11899 close_delim_code)))
11900 {
11901 yyerror(non_grapheme_msg);
11902 }
11903
11904 break;
11905 }
11906 /* No nesting if open eq close */
11907 else if ( PL_multi_open != PL_multi_close
11908 && s < PL_bufend - (delim_byte_len - 1)
11909 && memEQ(s, open_delim_str, delim_byte_len))
11910 {
11911 brackets++;
11912 }
11913
11914 /* Here, still in the middle of the string; copy this character */
11915 if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
11916 *to++ = *s++;
11917 }
11918 else {
11919 size_t this_char_len = UTF8SKIP(s);
11920 Copy(s, to, this_char_len, char);
11921 s += this_char_len;
11922 to += this_char_len;
11923
11924 d_is_utf8 = TRUE;
11925 }
11926 } /* End of loop through buffer */
11927
11928 /* Here, found end of the string, OR ran out of buffer: terminate the
11929 * copied string and update the sv's end-of-string */
11930 *to = '\0';
11931 SvCUR_set(sv, to - SvPVX_const(sv));
11932
11933 /*
11934 * this next chunk reads more into the buffer if we're not done yet
11935 */
11936
11937 if (s < PL_bufend)
11938 break; /* handle case where we are done yet :-) */
11939
11940 #ifndef PERL_STRICT_CR
11941 if (to - SvPVX_const(sv) >= 2) {
11942 if ( (to[-2] == '\r' && to[-1] == '\n')
11943 || (to[-2] == '\n' && to[-1] == '\r'))
11944 {
11945 to[-2] = '\n';
11946 to--;
11947 SvCUR_set(sv, to - SvPVX_const(sv));
11948 }
11949 else if (to[-1] == '\r')
11950 to[-1] = '\n';
11951 }
11952 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11953 to[-1] = '\n';
11954 #endif
11955
11956 /* if we're out of file, or a read fails, bail and reset the current
11957 line marker so we can report where the unterminated string began
11958 */
11959 COPLINE_INC_WITH_HERELINES;
11960 PL_bufptr = PL_bufend;
11961 if (!lex_next_chunk(0)) {
11962 ASSUME(sv);
11963 SvREFCNT_dec(sv);
11964 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11965 return NULL;
11966 }
11967 s = start = PL_bufptr;
11968 } /* End of infinite loop */
11969
11970 /* at this point, we have successfully read the delimited string */
11971
11972 if (keep_delims)
11973 sv_catpvn(sv, s, delim_byte_len);
11974 s += delim_byte_len;
11975
11976 if (d_is_utf8)
11977 SvUTF8_on(sv);
11978
11979 PL_multi_end = CopLINE(PL_curcop);
11980 CopLINE_set(PL_curcop, PL_multi_start);
11981 PL_parser->herelines = herelines;
11982
11983 /* if we allocated too much space, give some back */
11984 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11985 SvLEN_set(sv, SvCUR(sv) + 1);
11986 SvPV_shrink_to_cur(sv);
11987 }
11988
11989 /* decide whether this is the first or second quoted string we've read
11990 for this op
11991 */
11992
11993 if (PL_lex_stuff)
11994 PL_parser->lex_sub_repl = sv;
11995 else
11996 PL_lex_stuff = sv;
11997 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s;
11998 return s;
11999 }
12000
12001 /*
12002 scan_num
12003 takes: pointer to position in buffer
12004 returns: pointer to new position in buffer
12005 side-effects: builds ops for the constant in pl_yylval.op
12006
12007 Read a number in any of the formats that Perl accepts:
12008
12009 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12010 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
12011 0b[01](_?[01])* binary integers
12012 0o?[0-7](_?[0-7])* octal integers
12013 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
12014 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
12015
12016 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12017 thing it reads.
12018
12019 If it reads a number without a decimal point or an exponent, it will
12020 try converting the number to an integer and see if it can do so
12021 without loss of precision.
12022 */
12023
12024 char *
Perl_scan_num(pTHX_ const char * start,YYSTYPE * lvalp)12025 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12026 {
12027 const char *s = start; /* current position in buffer */
12028 char *d; /* destination in temp buffer */
12029 char *e; /* end of temp buffer */
12030 NV nv; /* number read, as a double */
12031 SV *sv = NULL; /* place to put the converted number */
12032 bool floatit; /* boolean: int or float? */
12033 const char *lastub = NULL; /* position of last underbar */
12034 static const char* const number_too_long = "Number too long";
12035 bool warned_about_underscore = 0;
12036 I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
12037 #define WARN_ABOUT_UNDERSCORE() \
12038 do { \
12039 if (!warned_about_underscore) { \
12040 warned_about_underscore = 1; \
12041 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
12042 "Misplaced _ in number"); \
12043 } \
12044 } while(0)
12045 /* Hexadecimal floating point.
12046 *
12047 * In many places (where we have quads and NV is IEEE 754 double)
12048 * we can fit the mantissa bits of a NV into an unsigned quad.
12049 * (Note that UVs might not be quads even when we have quads.)
12050 * This will not work everywhere, though (either no quads, or
12051 * using long doubles), in which case we have to resort to NV,
12052 * which will probably mean horrible loss of precision due to
12053 * multiple fp operations. */
12054 bool hexfp = FALSE;
12055 int total_bits = 0;
12056 int significant_bits = 0;
12057 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
12058 # define HEXFP_UQUAD
12059 Uquad_t hexfp_uquad = 0;
12060 int hexfp_frac_bits = 0;
12061 #else
12062 # define HEXFP_NV
12063 NV hexfp_nv = 0.0;
12064 #endif
12065 NV hexfp_mult = 1.0;
12066 UV high_non_zero = 0; /* highest digit */
12067 int non_zero_integer_digits = 0;
12068 bool new_octal = FALSE; /* octal with "0o" prefix */
12069
12070 PERL_ARGS_ASSERT_SCAN_NUM;
12071
12072 /* We use the first character to decide what type of number this is */
12073
12074 switch (*s) {
12075 default:
12076 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
12077
12078 /* if it starts with a 0, it could be an octal number, a decimal in
12079 0.13 disguise, or a hexadecimal number, or a binary number. */
12080 case '0':
12081 {
12082 /* variables:
12083 u holds the "number so far"
12084 overflowed was the number more than we can hold?
12085
12086 Shift is used when we add a digit. It also serves as an "are
12087 we in octal/hex/binary?" indicator to disallow hex characters
12088 when in octal mode.
12089 */
12090 NV n = 0.0;
12091 UV u = 0;
12092 bool overflowed = FALSE;
12093 bool just_zero = TRUE; /* just plain 0 or binary number? */
12094 bool has_digs = FALSE;
12095 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12096 static const char* const bases[5] =
12097 { "", "binary", "", "octal", "hexadecimal" };
12098 static const char* const Bases[5] =
12099 { "", "Binary", "", "Octal", "Hexadecimal" };
12100 static const char* const maxima[5] =
12101 { "",
12102 "0b11111111111111111111111111111111",
12103 "",
12104 "037777777777",
12105 "0xffffffff" };
12106
12107 /* check for hex */
12108 if (isALPHA_FOLD_EQ(s[1], 'x')) {
12109 shift = 4;
12110 s += 2;
12111 just_zero = FALSE;
12112 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
12113 shift = 1;
12114 s += 2;
12115 just_zero = FALSE;
12116 }
12117 /* check for a decimal in disguise */
12118 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
12119 goto decimal;
12120 /* so it must be octal */
12121 else {
12122 shift = 3;
12123 s++;
12124 if (isALPHA_FOLD_EQ(*s, 'o')) {
12125 s++;
12126 just_zero = FALSE;
12127 new_octal = TRUE;
12128 }
12129 }
12130
12131 if (*s == '_') {
12132 WARN_ABOUT_UNDERSCORE();
12133 lastub = s++;
12134 }
12135
12136 /* read the rest of the number */
12137 for (;;) {
12138 /* x is used in the overflow test,
12139 b is the digit we're adding on. */
12140 UV x, b;
12141
12142 switch (*s) {
12143
12144 /* if we don't mention it, we're done */
12145 default:
12146 goto out;
12147
12148 /* _ are ignored -- but warned about if consecutive */
12149 case '_':
12150 if (lastub && s == lastub + 1)
12151 WARN_ABOUT_UNDERSCORE();
12152 lastub = s++;
12153 break;
12154
12155 /* 8 and 9 are not octal */
12156 case '8': case '9':
12157 if (shift == 3)
12158 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12159 /* FALLTHROUGH */
12160
12161 /* octal digits */
12162 case '2': case '3': case '4':
12163 case '5': case '6': case '7':
12164 if (shift == 1)
12165 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12166 /* FALLTHROUGH */
12167
12168 case '0': case '1':
12169 b = *s++ & 15; /* ASCII digit -> value of digit */
12170 goto digit;
12171
12172 /* hex digits */
12173 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12174 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12175 /* make sure they said 0x */
12176 if (shift != 4)
12177 goto out;
12178 b = (*s++ & 7) + 9;
12179
12180 /* Prepare to put the digit we have onto the end
12181 of the number so far. We check for overflows.
12182 */
12183
12184 digit:
12185 just_zero = FALSE;
12186 has_digs = TRUE;
12187 if (!overflowed) {
12188 assert(shift >= 0);
12189 x = u << shift; /* make room for the digit */
12190
12191 total_bits += shift;
12192
12193 if ((x >> shift) != u
12194 && !(PL_hints & HINT_NEW_BINARY)) {
12195 overflowed = TRUE;
12196 n = (NV) u;
12197 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12198 "Integer overflow in %s number",
12199 bases[shift]);
12200 } else
12201 u = x | b; /* add the digit to the end */
12202 }
12203 if (overflowed) {
12204 n *= nvshift[shift];
12205 /* If an NV has not enough bits in its
12206 * mantissa to represent an UV this summing of
12207 * small low-order numbers is a waste of time
12208 * (because the NV cannot preserve the
12209 * low-order bits anyway): we could just
12210 * remember when did we overflow and in the
12211 * end just multiply n by the right
12212 * amount. */
12213 n += (NV) b;
12214 }
12215
12216 if (high_non_zero == 0 && b > 0)
12217 high_non_zero = b;
12218
12219 if (high_non_zero)
12220 non_zero_integer_digits++;
12221
12222 /* this could be hexfp, but peek ahead
12223 * to avoid matching ".." */
12224 if (UNLIKELY(HEXFP_PEEK(s))) {
12225 goto out;
12226 }
12227
12228 break;
12229 }
12230 }
12231
12232 /* if we get here, we had success: make a scalar value from
12233 the number.
12234 */
12235 out:
12236
12237 /* final misplaced underbar check */
12238 if (s[-1] == '_')
12239 WARN_ABOUT_UNDERSCORE();
12240
12241 if (UNLIKELY(HEXFP_PEEK(s))) {
12242 /* Do sloppy (on the underbars) but quick detection
12243 * (and value construction) for hexfp, the decimal
12244 * detection will shortly be more thorough with the
12245 * underbar checks. */
12246 const char* h = s;
12247 significant_bits = non_zero_integer_digits * shift;
12248 #ifdef HEXFP_UQUAD
12249 hexfp_uquad = u;
12250 #else /* HEXFP_NV */
12251 hexfp_nv = u;
12252 #endif
12253 /* Ignore the leading zero bits of
12254 * the high (first) non-zero digit. */
12255 if (high_non_zero) {
12256 if (high_non_zero < 0x8)
12257 significant_bits--;
12258 if (high_non_zero < 0x4)
12259 significant_bits--;
12260 if (high_non_zero < 0x2)
12261 significant_bits--;
12262 }
12263
12264 if (*h == '.') {
12265 #ifdef HEXFP_NV
12266 NV nv_mult = 1.0;
12267 #endif
12268 bool accumulate = TRUE;
12269 U8 b = 0; /* silence compiler warning */
12270 int lim = 1 << shift;
12271 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
12272 *h == '_'); h++) {
12273 if (isXDIGIT(*h)) {
12274 significant_bits += shift;
12275 #ifdef HEXFP_UQUAD
12276 if (accumulate) {
12277 if (significant_bits < NV_MANT_DIG) {
12278 /* We are in the long "run" of xdigits,
12279 * accumulate the full four bits. */
12280 assert(shift >= 0);
12281 hexfp_uquad <<= shift;
12282 hexfp_uquad |= b;
12283 hexfp_frac_bits += shift;
12284 } else if (significant_bits - shift < NV_MANT_DIG) {
12285 /* We are at a hexdigit either at,
12286 * or straddling, the edge of mantissa.
12287 * We will try grabbing as many as
12288 * possible bits. */
12289 int tail =
12290 significant_bits - NV_MANT_DIG;
12291 if (tail <= 0)
12292 tail += shift;
12293 assert(tail >= 0);
12294 hexfp_uquad <<= tail;
12295 assert((shift - tail) >= 0);
12296 hexfp_uquad |= b >> (shift - tail);
12297 hexfp_frac_bits += tail;
12298
12299 /* Ignore the trailing zero bits
12300 * of the last non-zero xdigit.
12301 *
12302 * The assumption here is that if
12303 * one has input of e.g. the xdigit
12304 * eight (0x8), there is only one
12305 * bit being input, not the full
12306 * four bits. Conversely, if one
12307 * specifies a zero xdigit, the
12308 * assumption is that one really
12309 * wants all those bits to be zero. */
12310 if (b) {
12311 if ((b & 0x1) == 0x0) {
12312 significant_bits--;
12313 if ((b & 0x2) == 0x0) {
12314 significant_bits--;
12315 if ((b & 0x4) == 0x0) {
12316 significant_bits--;
12317 }
12318 }
12319 }
12320 }
12321
12322 accumulate = FALSE;
12323 }
12324 } else {
12325 /* Keep skipping the xdigits, and
12326 * accumulating the significant bits,
12327 * but do not shift the uquad
12328 * (which would catastrophically drop
12329 * high-order bits) or accumulate the
12330 * xdigits anymore. */
12331 }
12332 #else /* HEXFP_NV */
12333 if (accumulate) {
12334 nv_mult /= nvshift[shift];
12335 if (nv_mult > 0.0)
12336 hexfp_nv += b * nv_mult;
12337 else
12338 accumulate = FALSE;
12339 }
12340 #endif
12341 }
12342 if (significant_bits >= NV_MANT_DIG)
12343 accumulate = FALSE;
12344 }
12345 }
12346
12347 if ((total_bits > 0 || significant_bits > 0) &&
12348 isALPHA_FOLD_EQ(*h, 'p')) {
12349 bool negexp = FALSE;
12350 h++;
12351 if (*h == '+')
12352 h++;
12353 else if (*h == '-') {
12354 negexp = TRUE;
12355 h++;
12356 }
12357 if (isDIGIT(*h)) {
12358 I32 hexfp_exp = 0;
12359 while (isDIGIT(*h) || *h == '_') {
12360 if (isDIGIT(*h)) {
12361 hexfp_exp *= 10;
12362 hexfp_exp += *h - '0';
12363 #ifdef NV_MIN_EXP
12364 if (negexp
12365 && -hexfp_exp < NV_MIN_EXP - 1) {
12366 /* NOTE: this means that the exponent
12367 * underflow warning happens for
12368 * the IEEE 754 subnormals (denormals),
12369 * because DBL_MIN_EXP etc are the lowest
12370 * possible binary (or, rather, DBL_RADIX-base)
12371 * exponent for normals, not subnormals.
12372 *
12373 * This may or may not be a good thing. */
12374 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12375 "Hexadecimal float: exponent underflow");
12376 break;
12377 }
12378 #endif
12379 #ifdef NV_MAX_EXP
12380 if (!negexp
12381 && hexfp_exp > NV_MAX_EXP - 1) {
12382 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12383 "Hexadecimal float: exponent overflow");
12384 break;
12385 }
12386 #endif
12387 }
12388 h++;
12389 }
12390 if (negexp)
12391 hexfp_exp = -hexfp_exp;
12392 #ifdef HEXFP_UQUAD
12393 hexfp_exp -= hexfp_frac_bits;
12394 #endif
12395 hexfp_mult = Perl_pow(2.0, hexfp_exp);
12396 hexfp = TRUE;
12397 goto decimal;
12398 }
12399 }
12400 }
12401
12402 if (!just_zero && !has_digs) {
12403 /* 0x, 0o or 0b with no digits, treat it as an error.
12404 Originally this backed up the parse before the b or
12405 x, but that has the potential for silent changes in
12406 behaviour, like for: "0x.3" and "0x+$foo".
12407 */
12408 const char *d = s;
12409 char *oldbp = PL_bufptr;
12410 if (*d) ++d; /* so the user sees the bad non-digit */
12411 PL_bufptr = (char *)d; /* so yyerror reports the context */
12412 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
12413 bases[shift]));
12414 PL_bufptr = oldbp;
12415 }
12416
12417 if (overflowed) {
12418 if (n > 4294967295.0)
12419 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12420 "%s number > %s non-portable",
12421 Bases[shift],
12422 new_octal ? "0o37777777777" : maxima[shift]);
12423 sv = newSVnv(n);
12424 }
12425 else {
12426 #if UVSIZE > 4
12427 if (u > 0xffffffff)
12428 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12429 "%s number > %s non-portable",
12430 Bases[shift],
12431 new_octal ? "0o37777777777" : maxima[shift]);
12432 #endif
12433 sv = newSVuv(u);
12434 }
12435 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12436 sv = new_constant(start, s - start, "integer",
12437 sv, NULL, NULL, 0, NULL);
12438 else if (PL_hints & HINT_NEW_BINARY)
12439 sv = new_constant(start, s - start, "binary",
12440 sv, NULL, NULL, 0, NULL);
12441 }
12442 break;
12443
12444 /*
12445 handle decimal numbers.
12446 we're also sent here when we read a 0 as the first digit
12447 */
12448 case '1': case '2': case '3': case '4': case '5':
12449 case '6': case '7': case '8': case '9': case '.':
12450 decimal:
12451 d = PL_tokenbuf;
12452 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12453 floatit = FALSE;
12454 if (hexfp) {
12455 floatit = TRUE;
12456 *d++ = '0';
12457 switch (shift) {
12458 case 4:
12459 *d++ = 'x';
12460 s = start + 2;
12461 break;
12462 case 3:
12463 if (new_octal) {
12464 *d++ = 'o';
12465 s = start + 2;
12466 break;
12467 }
12468 s = start + 1;
12469 break;
12470 case 1:
12471 *d++ = 'b';
12472 s = start + 2;
12473 break;
12474 default:
12475 NOT_REACHED; /* NOTREACHED */
12476 }
12477 }
12478
12479 /* read next group of digits and _ and copy into d */
12480 while (isDIGIT(*s)
12481 || *s == '_'
12482 || UNLIKELY(hexfp && isXDIGIT(*s)))
12483 {
12484 /* skip underscores, checking for misplaced ones
12485 if -w is on
12486 */
12487 if (*s == '_') {
12488 if (lastub && s == lastub + 1)
12489 WARN_ABOUT_UNDERSCORE();
12490 lastub = s++;
12491 }
12492 else {
12493 /* check for end of fixed-length buffer */
12494 if (d >= e)
12495 Perl_croak(aTHX_ "%s", number_too_long);
12496 /* if we're ok, copy the character */
12497 *d++ = *s++;
12498 }
12499 }
12500
12501 /* final misplaced underbar check */
12502 if (lastub && s == lastub + 1)
12503 WARN_ABOUT_UNDERSCORE();
12504
12505 /* read a decimal portion if there is one. avoid
12506 3..5 being interpreted as the number 3. followed
12507 by .5
12508 */
12509 if (*s == '.' && s[1] != '.') {
12510 floatit = TRUE;
12511 *d++ = *s++;
12512
12513 if (*s == '_') {
12514 WARN_ABOUT_UNDERSCORE();
12515 lastub = s;
12516 }
12517
12518 /* copy, ignoring underbars, until we run out of digits.
12519 */
12520 for (; isDIGIT(*s)
12521 || *s == '_'
12522 || UNLIKELY(hexfp && isXDIGIT(*s));
12523 s++)
12524 {
12525 /* fixed length buffer check */
12526 if (d >= e)
12527 Perl_croak(aTHX_ "%s", number_too_long);
12528 if (*s == '_') {
12529 if (lastub && s == lastub + 1)
12530 WARN_ABOUT_UNDERSCORE();
12531 lastub = s;
12532 }
12533 else
12534 *d++ = *s;
12535 }
12536 /* fractional part ending in underbar? */
12537 if (s[-1] == '_')
12538 WARN_ABOUT_UNDERSCORE();
12539 if (*s == '.' && isDIGIT(s[1])) {
12540 /* oops, it's really a v-string, but without the "v" */
12541 s = start;
12542 goto vstring;
12543 }
12544 }
12545
12546 /* read exponent part, if present */
12547 if ((isALPHA_FOLD_EQ(*s, 'e')
12548 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
12549 && memCHRs("+-0123456789_", s[1]))
12550 {
12551 int exp_digits = 0;
12552 const char *save_s = s;
12553 char * save_d = d;
12554
12555 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
12556 ditto for p (hexfloats) */
12557 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
12558 /* At least some Mach atof()s don't grok 'E' */
12559 *d++ = 'e';
12560 }
12561 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
12562 *d++ = 'p';
12563 }
12564
12565 s++;
12566
12567
12568 /* stray preinitial _ */
12569 if (*s == '_') {
12570 WARN_ABOUT_UNDERSCORE();
12571 lastub = s++;
12572 }
12573
12574 /* allow positive or negative exponent */
12575 if (*s == '+' || *s == '-')
12576 *d++ = *s++;
12577
12578 /* stray initial _ */
12579 if (*s == '_') {
12580 WARN_ABOUT_UNDERSCORE();
12581 lastub = s++;
12582 }
12583
12584 /* read digits of exponent */
12585 while (isDIGIT(*s) || *s == '_') {
12586 if (isDIGIT(*s)) {
12587 ++exp_digits;
12588 if (d >= e)
12589 Perl_croak(aTHX_ "%s", number_too_long);
12590 *d++ = *s++;
12591 }
12592 else {
12593 if (((lastub && s == lastub + 1)
12594 || (!isDIGIT(s[1]) && s[1] != '_')))
12595 WARN_ABOUT_UNDERSCORE();
12596 lastub = s++;
12597 }
12598 }
12599
12600 if (!exp_digits) {
12601 /* no exponent digits, the [eEpP] could be for something else,
12602 * though in practice we don't get here for p since that's preparsed
12603 * earlier, and results in only the 0xX being consumed, so behave similarly
12604 * for decimal floats and consume only the D.DD, leaving the [eE] to the
12605 * next token.
12606 */
12607 s = save_s;
12608 d = save_d;
12609 }
12610 else {
12611 floatit = TRUE;
12612 }
12613 }
12614
12615
12616 /*
12617 We try to do an integer conversion first if no characters
12618 indicating "float" have been found.
12619 */
12620
12621 if (!floatit) {
12622 UV uv;
12623 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12624
12625 if (flags == IS_NUMBER_IN_UV) {
12626 if (uv <= IV_MAX)
12627 sv = newSViv(uv); /* Prefer IVs over UVs. */
12628 else
12629 sv = newSVuv(uv);
12630 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12631 if (uv <= (UV) IV_MIN)
12632 sv = newSViv(-(IV)uv);
12633 else
12634 floatit = TRUE;
12635 } else
12636 floatit = TRUE;
12637 }
12638 if (floatit) {
12639 /* terminate the string */
12640 *d = '\0';
12641 if (UNLIKELY(hexfp)) {
12642 # ifdef NV_MANT_DIG
12643 if (significant_bits > NV_MANT_DIG)
12644 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12645 "Hexadecimal float: mantissa overflow");
12646 # endif
12647 #ifdef HEXFP_UQUAD
12648 nv = hexfp_uquad * hexfp_mult;
12649 #else /* HEXFP_NV */
12650 nv = hexfp_nv * hexfp_mult;
12651 #endif
12652 } else {
12653 nv = Atof(PL_tokenbuf);
12654 }
12655 sv = newSVnv(nv);
12656 }
12657
12658 if ( floatit
12659 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12660 const char *const key = floatit ? "float" : "integer";
12661 const STRLEN keylen = floatit ? 5 : 7;
12662 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12663 key, keylen, sv, NULL, NULL, 0, NULL);
12664 }
12665 break;
12666
12667 /* if it starts with a v, it could be a v-string */
12668 case 'v':
12669 vstring:
12670 sv = newSV(5); /* preallocate storage space */
12671 ENTER_with_name("scan_vstring");
12672 SAVEFREESV(sv);
12673 s = scan_vstring(s, PL_bufend, sv);
12674 SvREFCNT_inc_simple_void_NN(sv);
12675 LEAVE_with_name("scan_vstring");
12676 break;
12677 }
12678
12679 /* make the op for the constant and return */
12680
12681 if (sv)
12682 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12683 else
12684 lvalp->opval = NULL;
12685
12686 return (char *)s;
12687 }
12688
12689 STATIC char *
S_scan_formline(pTHX_ char * s)12690 S_scan_formline(pTHX_ char *s)
12691 {
12692 SV * const stuff = newSVpvs("");
12693 bool needargs = FALSE;
12694 bool eofmt = FALSE;
12695
12696 PERL_ARGS_ASSERT_SCAN_FORMLINE;
12697
12698 while (!needargs) {
12699 char *eol;
12700 if (*s == '.') {
12701 char *t = s+1;
12702 #ifdef PERL_STRICT_CR
12703 while (SPACE_OR_TAB(*t))
12704 t++;
12705 #else
12706 while (SPACE_OR_TAB(*t) || *t == '\r')
12707 t++;
12708 #endif
12709 if (*t == '\n' || t == PL_bufend) {
12710 eofmt = TRUE;
12711 break;
12712 }
12713 }
12714 eol = (char *) memchr(s,'\n',PL_bufend-s);
12715 if (! eol) {
12716 eol = PL_bufend;
12717 }
12718 else {
12719 eol++;
12720 }
12721 if (*s != '#') {
12722 char *t;
12723 for (t = s; t < eol; t++) {
12724 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12725 needargs = FALSE;
12726 goto enough; /* ~~ must be first line in formline */
12727 }
12728 if (*t == '@' || *t == '^')
12729 needargs = TRUE;
12730 }
12731 if (eol > s) {
12732 sv_catpvn(stuff, s, eol-s);
12733 #ifndef PERL_STRICT_CR
12734 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12735 char *end = SvPVX(stuff) + SvCUR(stuff);
12736 end[-2] = '\n';
12737 end[-1] = '\0';
12738 SvCUR_set(stuff, SvCUR(stuff) - 1);
12739 }
12740 #endif
12741 }
12742 else
12743 break;
12744 }
12745 s = (char*)eol;
12746 if ((PL_rsfp || PL_parser->filtered)
12747 && PL_parser->form_lex_state == LEX_NORMAL) {
12748 bool got_some;
12749 PL_bufptr = PL_bufend;
12750 COPLINE_INC_WITH_HERELINES;
12751 got_some = lex_next_chunk(0);
12752 CopLINE_dec(PL_curcop);
12753 s = PL_bufptr;
12754 if (!got_some)
12755 break;
12756 }
12757 incline(s, PL_bufend);
12758 }
12759 enough:
12760 if (!SvCUR(stuff) || needargs)
12761 PL_lex_state = PL_parser->form_lex_state;
12762 if (SvCUR(stuff)) {
12763 PL_expect = XSTATE;
12764 if (needargs) {
12765 const char *s2 = s;
12766 while (isSPACE(*s2) && *s2 != '\n')
12767 s2++;
12768 if (*s2 == '{') {
12769 PL_expect = XTERMBLOCK;
12770 NEXTVAL_NEXTTOKE.ival = 0;
12771 force_next(KW_DO);
12772 }
12773 NEXTVAL_NEXTTOKE.ival = 0;
12774 force_next(FORMLBRACK);
12775 }
12776 if (!IN_BYTES) {
12777 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12778 SvUTF8_on(stuff);
12779 }
12780 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12781 force_next(THING);
12782 }
12783 else {
12784 SvREFCNT_dec(stuff);
12785 if (eofmt)
12786 PL_lex_formbrack = 0;
12787 }
12788 return s;
12789 }
12790
12791 /*
12792 =for apidoc start_subparse
12793
12794 Set things up for parsing a subroutine.
12795
12796 If C<is_format> is non-zero, the input is to be considered a format sub
12797 (a specialised sub used to implement perl's C<format> feature); else a
12798 normal C<sub>.
12799
12800 C<flags> are added to the flags for C<PL_compcv>. C<flags> may include the
12801 C<CVf_IsMETHOD> bit, which causes the new subroutine to be a method.
12802
12803 This returns the value of C<PL_savestack_ix> that was in effect upon entry to
12804 the function;
12805
12806 =cut
12807 */
12808
12809 I32
Perl_start_subparse(pTHX_ I32 is_format,U32 flags)12810 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12811 {
12812 const I32 oldsavestack_ix = PL_savestack_ix;
12813 CV* const outsidecv = PL_compcv;
12814 bool is_method = flags & CVf_IsMETHOD;
12815
12816 if (is_method)
12817 croak_kw_unless_class("method");
12818
12819 SAVEI32(PL_subline);
12820 save_item(PL_subname);
12821 SAVESPTR(PL_compcv);
12822
12823 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12824 CvFLAGS(PL_compcv) |= flags;
12825
12826 PL_subline = CopLINE(PL_curcop);
12827 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12828 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12829 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12830 if (outsidecv && CvPADLIST(outsidecv))
12831 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12832 if (is_method)
12833 class_prepare_method_parse(PL_compcv);
12834
12835 return oldsavestack_ix;
12836 }
12837
12838 /* If o represents a builtin attribute, apply it to cv and returns true.
12839 * Otherwise does nothing and returns false
12840 */
12841
12842 STATIC bool
S_apply_builtin_cv_attribute(pTHX_ CV * cv,OP * o)12843 S_apply_builtin_cv_attribute(pTHX_ CV *cv, OP *o)
12844 {
12845 assert(o->op_type == OP_CONST);
12846 SV *sv = cSVOPo_sv;
12847 STRLEN len = SvCUR(sv);
12848
12849 /* NOTE: any CV attrs applied here need to be part of
12850 the CVf_BUILTIN_ATTRS define in cv.h! */
12851
12852 if(memEQs(SvPVX(sv), len, "lvalue"))
12853 CvLVALUE_on(cv);
12854 else if(memEQs(SvPVX(sv), len, "method"))
12855 CvNOWARN_AMBIGUOUS_on(cv);
12856 else if(memEQs(SvPVX(sv), len, "const")) {
12857 CvANONCONST_on(cv);
12858 if (!CvANON(cv))
12859 yyerror(":const is not permitted on named subroutines");
12860 }
12861 else
12862 return false;
12863
12864 return true;
12865 }
12866
12867 /*
12868 =for apidoc apply_builtin_cv_attributes
12869
12870 Given an OP_LIST containing attribute definitions, filter it for known builtin
12871 attributes to apply to the cv, returning a possibly-smaller list containing
12872 just the remaining ones.
12873
12874 =cut
12875 */
12876
12877 OP *
Perl_apply_builtin_cv_attributes(pTHX_ CV * cv,OP * attrlist)12878 Perl_apply_builtin_cv_attributes(pTHX_ CV *cv, OP *attrlist)
12879 {
12880 PERL_ARGS_ASSERT_APPLY_BUILTIN_CV_ATTRIBUTES;
12881
12882 if(!attrlist)
12883 return attrlist;
12884
12885 if(attrlist->op_type != OP_LIST) {
12886 /* Not in fact a list but just a single attribute */
12887 if(S_apply_builtin_cv_attribute(aTHX_ cv, attrlist)) {
12888 op_free(attrlist);
12889 return NULL;
12890 }
12891
12892 return attrlist;
12893 }
12894
12895 OP *prev = cLISTOPx(attrlist)->op_first;
12896 assert(prev->op_type == OP_PUSHMARK);
12897 OP *o = OpSIBLING(prev);
12898
12899 OP *next;
12900 for(; o; o = next) {
12901 next = OpSIBLING(o);
12902
12903 if(S_apply_builtin_cv_attribute(aTHX_ cv, o)) {
12904 op_sibling_splice(attrlist, prev, 1, NULL);
12905 op_free(o);
12906 }
12907 else {
12908 prev = o;
12909 }
12910 }
12911
12912 if(OpHAS_SIBLING(cLISTOPx(attrlist)->op_first))
12913 return attrlist;
12914
12915 /* The list is now entirely empty, we might as well discard it */
12916 op_free(attrlist);
12917 return NULL;
12918 }
12919
12920
12921 /* Do extra initialisation of a CV (typically one just created by
12922 * start_subparse()) if that CV is for a named sub
12923 */
12924
12925 void
Perl_init_named_cv(pTHX_ CV * cv,OP * nameop)12926 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12927 {
12928 PERL_ARGS_ASSERT_INIT_NAMED_CV;
12929
12930 if (nameop->op_type == OP_CONST) {
12931 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12932 if ( strEQ(name, "BEGIN")
12933 || strEQ(name, "END")
12934 || strEQ(name, "INIT")
12935 || strEQ(name, "CHECK")
12936 || strEQ(name, "UNITCHECK")
12937 )
12938 CvSPECIAL_on(cv);
12939 }
12940 else
12941 /* State subs inside anonymous subs need to be
12942 clonable themselves. */
12943 if ( CvANON(CvOUTSIDE(cv))
12944 || CvCLONE(CvOUTSIDE(cv))
12945 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12946 CvOUTSIDE(cv)
12947 ))[nameop->op_targ])
12948 )
12949 CvCLONE_on(cv);
12950 }
12951
12952
12953 static int
S_yywarn(pTHX_ const char * const s,U32 flags)12954 S_yywarn(pTHX_ const char *const s, U32 flags)
12955 {
12956 PERL_ARGS_ASSERT_YYWARN;
12957
12958 PL_in_eval |= EVAL_WARNONLY;
12959 yyerror_pv(s, flags);
12960 return 0;
12961 }
12962
12963 void
Perl_abort_execution(pTHX_ SV * msg_sv,const char * const name)12964 Perl_abort_execution(pTHX_ SV* msg_sv, const char * const name)
12965 {
12966 PERL_ARGS_ASSERT_ABORT_EXECUTION;
12967
12968 if (msg_sv) {
12969 if (PL_minus_c)
12970 Perl_croak(aTHX_ "%" SVf "%s had compilation errors.\n", SVfARG(msg_sv), name);
12971 else {
12972 Perl_croak(aTHX_
12973 "%" SVf "Execution of %s aborted due to compilation errors.\n", SVfARG(msg_sv), name);
12974 }
12975 } else {
12976 if (PL_minus_c)
12977 Perl_croak(aTHX_ "%s had compilation errors.\n", name);
12978 else {
12979 Perl_croak(aTHX_
12980 "Execution of %s aborted due to compilation errors.\n", name);
12981 }
12982 }
12983
12984 NOT_REACHED; /* NOTREACHED */
12985 }
12986
12987 void
Perl_yyquit(pTHX)12988 Perl_yyquit(pTHX)
12989 {
12990 /* Called, after at least one error has been found, to abort the parse now,
12991 * instead of trying to forge ahead */
12992
12993 yyerror_pvn(NULL, 0, 0);
12994 }
12995
12996 int
Perl_yyerror(pTHX_ const char * const s)12997 Perl_yyerror(pTHX_ const char *const s)
12998 {
12999 PERL_ARGS_ASSERT_YYERROR;
13000 int r = yyerror_pvn(s, strlen(s), 0);
13001 return r;
13002 }
13003
13004 int
Perl_yyerror_pv(pTHX_ const char * const s,U32 flags)13005 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
13006 {
13007 PERL_ARGS_ASSERT_YYERROR_PV;
13008 int r = yyerror_pvn(s, strlen(s), flags);
13009 return r;
13010 }
13011
13012 int
Perl_yyerror_pvn(pTHX_ const char * const s,STRLEN len,U32 flags)13013 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
13014 {
13015 const char *context = NULL;
13016 int contlen = -1;
13017 SV *msg;
13018 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
13019 int yychar = PL_parser->yychar;
13020
13021 /* Output error message 's' with length 'len'. 'flags' are SV flags that
13022 * apply. If the number of errors found is large enough, it abandons
13023 * parsing. If 's' is NULL, there is no message, and it abandons
13024 * processing unconditionally */
13025
13026 if (s != NULL) {
13027 if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp))
13028 sv_catpvs(where_sv, "at EOF");
13029 else if ( PL_oldoldbufptr
13030 && PL_bufptr > PL_oldoldbufptr
13031 && PL_bufptr - PL_oldoldbufptr < 200
13032 && PL_oldoldbufptr != PL_oldbufptr
13033 && PL_oldbufptr != PL_bufptr)
13034 {
13035 while (isSPACE(*PL_oldoldbufptr))
13036 PL_oldoldbufptr++;
13037 context = PL_oldoldbufptr;
13038 contlen = PL_bufptr - PL_oldoldbufptr;
13039 }
13040 else if ( PL_oldbufptr
13041 && PL_bufptr > PL_oldbufptr
13042 && PL_bufptr - PL_oldbufptr < 200
13043 && PL_oldbufptr != PL_bufptr)
13044 {
13045 while (isSPACE(*PL_oldbufptr))
13046 PL_oldbufptr++;
13047 context = PL_oldbufptr;
13048 contlen = PL_bufptr - PL_oldbufptr;
13049 }
13050 else if (yychar > 255)
13051 sv_catpvs(where_sv, "next token ???");
13052 else if (yychar == YYEMPTY) {
13053 if (PL_lex_state == LEX_NORMAL)
13054 sv_catpvs(where_sv, "at end of line");
13055 else if (PL_lex_inpat)
13056 sv_catpvs(where_sv, "within pattern");
13057 else
13058 sv_catpvs(where_sv, "within string");
13059 }
13060 else {
13061 sv_catpvs(where_sv, "next char ");
13062 if (yychar < 32)
13063 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13064 else if (isPRINT_LC(yychar)) {
13065 const char string = yychar;
13066 sv_catpvn(where_sv, &string, 1);
13067 }
13068 else
13069 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13070 }
13071 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
13072 Perl_sv_catpvf(aTHX_ msg, " at %s line %" LINE_Tf ", ",
13073 OutCopFILE(PL_curcop),
13074 (PL_parser->preambling == NOLINE
13075 ? CopLINE(PL_curcop)
13076 : PL_parser->preambling));
13077 if (context)
13078 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
13079 UTF8fARG(UTF, contlen, context));
13080 else
13081 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
13082 if ( PL_multi_start < PL_multi_end
13083 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
13084 {
13085 Perl_sv_catpvf(aTHX_ msg,
13086 " (Might be a runaway multi-line %c%c string starting on"
13087 " line %" LINE_Tf ")\n",
13088 (int)PL_multi_open,(int)PL_multi_close,(line_t)PL_multi_start);
13089 PL_multi_end = 0;
13090 }
13091 if (PL_in_eval & EVAL_WARNONLY) {
13092 PL_in_eval &= ~EVAL_WARNONLY;
13093 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
13094 }
13095 else {
13096 qerror(msg);
13097 }
13098 }
13099 /* if there was no message then this is a yyquit(), which is actualy handled
13100 * by qerror() with a NULL argument */
13101 if (s == NULL)
13102 qerror(NULL);
13103
13104 PL_in_my = 0;
13105 PL_in_my_stash = NULL;
13106 return 0;
13107 }
13108
13109 STATIC char*
S_swallow_bom(pTHX_ U8 * s)13110 S_swallow_bom(pTHX_ U8 *s)
13111 {
13112 const STRLEN slen = SvCUR(PL_linestr);
13113
13114 PERL_ARGS_ASSERT_SWALLOW_BOM;
13115
13116 switch (s[0]) {
13117 case 0xFF:
13118 if (s[1] == 0xFE) {
13119 /* UTF-16 little-endian? (or UTF-32LE?) */
13120 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
13121 /* diag_listed_as: Unsupported script encoding %s */
13122 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13123 #ifndef PERL_NO_UTF16_FILTER
13124 #ifdef DEBUGGING
13125 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13126 #endif
13127 s += 2;
13128 if (PL_bufend > (char*)s) {
13129 s = add_utf16_textfilter(s, TRUE);
13130 }
13131 #else
13132 /* diag_listed_as: Unsupported script encoding %s */
13133 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13134 #endif
13135 }
13136 break;
13137 case 0xFE:
13138 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
13139 #ifndef PERL_NO_UTF16_FILTER
13140 #ifdef DEBUGGING
13141 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13142 #endif
13143 s += 2;
13144 if (PL_bufend > (char *)s) {
13145 s = add_utf16_textfilter(s, FALSE);
13146 }
13147 #else
13148 /* diag_listed_as: Unsupported script encoding %s */
13149 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13150 #endif
13151 }
13152 break;
13153 case BOM_UTF8_FIRST_BYTE: {
13154 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
13155 #ifdef DEBUGGING
13156 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13157 #endif
13158 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */
13159 }
13160 break;
13161 }
13162 case 0:
13163 if (slen > 3) {
13164 if (s[1] == 0) {
13165 if (s[2] == 0xFE && s[3] == 0xFF) {
13166 /* UTF-32 big-endian */
13167 /* diag_listed_as: Unsupported script encoding %s */
13168 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13169 }
13170 }
13171 else if (s[2] == 0 && s[3] != 0) {
13172 /* Leading bytes
13173 * 00 xx 00 xx
13174 * are a good indicator of UTF-16BE. */
13175 #ifndef PERL_NO_UTF16_FILTER
13176 #ifdef DEBUGGING
13177 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13178 #endif
13179 s = add_utf16_textfilter(s, FALSE);
13180 #else
13181 /* diag_listed_as: Unsupported script encoding %s */
13182 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13183 #endif
13184 }
13185 }
13186 break;
13187
13188 default:
13189 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13190 /* Leading bytes
13191 * xx 00 xx 00
13192 * are a good indicator of UTF-16LE. */
13193 #ifndef PERL_NO_UTF16_FILTER
13194 #ifdef DEBUGGING
13195 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13196 #endif
13197 s = add_utf16_textfilter(s, TRUE);
13198 #else
13199 /* diag_listed_as: Unsupported script encoding %s */
13200 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13201 #endif
13202 }
13203 }
13204 return (char*)s;
13205 }
13206
13207
13208 #ifndef PERL_NO_UTF16_FILTER
13209 static I32
S_utf16_textfilter(pTHX_ int idx,SV * sv,int maxlen)13210 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13211 {
13212 SV *const filter = FILTER_DATA(idx);
13213 /* We re-use this each time round, throwing the contents away before we
13214 return. */
13215 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13216 SV *const utf8_buffer = filter;
13217 IV status = IoPAGE(filter);
13218 const bool reverse = cBOOL(IoLINES(filter));
13219 I32 retval;
13220
13221 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13222
13223 /* As we're automatically added, at the lowest level, and hence only called
13224 from this file, we can be sure that we're not called in block mode. Hence
13225 don't bother writing code to deal with block mode. */
13226 if (maxlen) {
13227 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13228 }
13229 if (status < 0) {
13230 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
13231 }
13232 DEBUG_P(PerlIO_printf(Perl_debug_log,
13233 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
13234 FPTR2DPTR(void *, S_utf16_textfilter),
13235 reverse ? 'l' : 'b', idx, maxlen, status,
13236 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13237
13238 while (1) {
13239 STRLEN chars;
13240 STRLEN have;
13241 Size_t newlen;
13242 U8 *end;
13243 /* First, look in our buffer of existing UTF-8 data: */
13244 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13245
13246 if (nl) {
13247 ++nl;
13248 } else if (status == 0) {
13249 /* EOF */
13250 IoPAGE(filter) = 0;
13251 nl = SvEND(utf8_buffer);
13252 }
13253 if (nl) {
13254 STRLEN got = nl - SvPVX(utf8_buffer);
13255 /* Did we have anything to append? */
13256 retval = got != 0;
13257 sv_catpvn(sv, SvPVX(utf8_buffer), got);
13258 /* Everything else in this code works just fine if SVp_POK isn't
13259 set. This, however, needs it, and we need it to work, else
13260 we loop infinitely because the buffer is never consumed. */
13261 sv_chop(utf8_buffer, nl);
13262 break;
13263 }
13264
13265 /* OK, not a complete line there, so need to read some more UTF-16.
13266 Read an extra octect if the buffer currently has an odd number. */
13267 while (1) {
13268 if (status <= 0)
13269 break;
13270 if (SvCUR(utf16_buffer) >= 2) {
13271 /* Location of the high octet of the last complete code point.
13272 Gosh, UTF-16 is a pain. All the benefits of variable length,
13273 *coupled* with all the benefits of partial reads and
13274 endianness. */
13275 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13276 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13277
13278 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13279 break;
13280 }
13281
13282 /* We have the first half of a surrogate. Read more. */
13283 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13284 }
13285
13286 status = FILTER_READ(idx + 1, utf16_buffer,
13287 160 + (SvCUR(utf16_buffer) & 1));
13288 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
13289 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13290 if (status < 0) {
13291 /* Error */
13292 IoPAGE(filter) = status;
13293 return status;
13294 }
13295 }
13296
13297 /* 'chars' isn't quite the right name, as code points above 0xFFFF
13298 * require 4 bytes per char */
13299 chars = SvCUR(utf16_buffer) >> 1;
13300 have = SvCUR(utf8_buffer);
13301
13302 /* Assume the worst case size as noted by the functions: twice the
13303 * number of input bytes */
13304 SvGROW(utf8_buffer, have + chars * 4 + 1);
13305
13306 if (reverse) {
13307 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13308 (U8*)SvPVX_const(utf8_buffer) + have,
13309 chars * 2, &newlen);
13310 } else {
13311 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13312 (U8*)SvPVX_const(utf8_buffer) + have,
13313 chars * 2, &newlen);
13314 }
13315 SvCUR_set(utf8_buffer, have + newlen);
13316 *end = '\0';
13317
13318 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13319 it's private to us, and utf16_to_utf8{,reversed} take a
13320 (pointer,length) pair, rather than a NUL-terminated string. */
13321 if(SvCUR(utf16_buffer) & 1) {
13322 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13323 SvCUR_set(utf16_buffer, 1);
13324 } else {
13325 SvCUR_set(utf16_buffer, 0);
13326 }
13327 }
13328 DEBUG_P(PerlIO_printf(Perl_debug_log,
13329 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
13330 status,
13331 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13332 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13333 return retval;
13334 }
13335
13336 static U8 *
S_add_utf16_textfilter(pTHX_ U8 * const s,bool reversed)13337 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13338 {
13339 SV *filter = filter_add(S_utf16_textfilter, NULL);
13340
13341 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13342
13343 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13344 SvPVCLEAR(filter);
13345 IoLINES(filter) = reversed;
13346 IoPAGE(filter) = 1; /* Not EOF */
13347
13348 /* Sadly, we have to return a valid pointer, come what may, so we have to
13349 ignore any error return from this. */
13350 SvCUR_set(PL_linestr, 0);
13351 if (FILTER_READ(0, PL_linestr, 0)) {
13352 SvUTF8_on(PL_linestr);
13353 } else {
13354 SvUTF8_on(PL_linestr);
13355 }
13356 PL_bufend = SvEND(PL_linestr);
13357 return (U8*)SvPVX(PL_linestr);
13358 }
13359 #endif
13360
13361 /*
13362 =for apidoc scan_vstring
13363
13364 Returns a pointer to the next character after the parsed
13365 vstring, as well as updating the passed in sv.
13366
13367 Function must be called like
13368
13369 sv = sv_2mortal(newSV(5));
13370 s = scan_vstring(s,e,sv);
13371
13372 where s and e are the start and end of the string.
13373 The sv should already be large enough to store the vstring
13374 passed in, for performance reasons.
13375
13376 This function may croak if fatal warnings are enabled in the
13377 calling scope, hence the sv_2mortal in the example (to prevent
13378 a leak). Make sure to do SvREFCNT_inc afterwards if you use
13379 sv_2mortal.
13380
13381 =cut
13382 */
13383
13384 char *
Perl_scan_vstring(pTHX_ const char * s,const char * const e,SV * sv)13385 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13386 {
13387 const char *pos = s;
13388 const char *start = s;
13389
13390 PERL_ARGS_ASSERT_SCAN_VSTRING;
13391
13392 if (*pos == 'v') pos++; /* get past 'v' */
13393 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13394 pos++;
13395 if ( *pos != '.') {
13396 /* this may not be a v-string if followed by => */
13397 const char *next = pos;
13398 while (next < e && isSPACE(*next))
13399 ++next;
13400 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13401 /* return string not v-string */
13402 sv_setpvn(sv,(char *)s,pos-s);
13403 return (char *)pos;
13404 }
13405 }
13406
13407 if (!isALPHA(*pos)) {
13408 U8 tmpbuf[UTF8_MAXBYTES+1];
13409
13410 if (*s == 'v')
13411 s++; /* get past 'v' */
13412
13413 SvPVCLEAR(sv);
13414
13415 for (;;) {
13416 /* this is atoi() that tolerates underscores */
13417 U8 *tmpend;
13418 UV rev = 0;
13419 const char *end = pos;
13420 UV mult = 1;
13421 while (--end >= s) {
13422 if (*end != '_') {
13423 const UV orev = rev;
13424 rev += (*end - '0') * mult;
13425 mult *= 10;
13426 if (orev > rev)
13427 /* diag_listed_as: Integer overflow in %s number */
13428 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13429 "Integer overflow in decimal number");
13430 }
13431 }
13432
13433 /* Append native character for the rev point */
13434 tmpend = uvchr_to_utf8(tmpbuf, rev);
13435 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13436 if (!UVCHR_IS_INVARIANT(rev))
13437 SvUTF8_on(sv);
13438 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13439 s = ++pos;
13440 else {
13441 s = pos;
13442 break;
13443 }
13444 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13445 pos++;
13446 }
13447 SvPOK_on(sv);
13448 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13449 SvRMAGICAL_on(sv);
13450 }
13451 return (char *)s;
13452 }
13453
13454 int
Perl_keyword_plugin_standard(pTHX_ char * keyword_ptr,STRLEN keyword_len,OP ** op_ptr)13455 Perl_keyword_plugin_standard(pTHX_
13456 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13457 {
13458 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13459 PERL_UNUSED_CONTEXT;
13460 PERL_UNUSED_ARG(keyword_ptr);
13461 PERL_UNUSED_ARG(keyword_len);
13462 PERL_UNUSED_ARG(op_ptr);
13463 return KEYWORD_PLUGIN_DECLINE;
13464 }
13465
13466 STRLEN
Perl_infix_plugin_standard(pTHX_ char * operator_ptr,STRLEN operator_len,struct Perl_custom_infix ** def)13467 Perl_infix_plugin_standard(pTHX_
13468 char *operator_ptr, STRLEN operator_len, struct Perl_custom_infix **def)
13469 {
13470 PERL_ARGS_ASSERT_INFIX_PLUGIN_STANDARD;
13471 PERL_UNUSED_CONTEXT;
13472 PERL_UNUSED_ARG(operator_ptr);
13473 PERL_UNUSED_ARG(operator_len);
13474 PERL_UNUSED_ARG(def);
13475 return 0;
13476 }
13477
13478 /*
13479 =for apidoc_section $lexer
13480 =for apidoc wrap_keyword_plugin
13481
13482 Puts a C function into the chain of keyword plugins. This is the
13483 preferred way to manipulate the L</PL_keyword_plugin> variable.
13484 C<new_plugin> is a pointer to the C function that is to be added to the
13485 keyword plugin chain, and C<old_plugin_p> points to the storage location
13486 where a pointer to the next function in the chain will be stored. The
13487 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
13488 while the value previously stored there is written to C<*old_plugin_p>.
13489
13490 L</PL_keyword_plugin> is global to an entire process, and a module wishing
13491 to hook keyword parsing may find itself invoked more than once per
13492 process, typically in different threads. To handle that situation, this
13493 function is idempotent. The location C<*old_plugin_p> must initially
13494 (once per process) contain a null pointer. A C variable of static
13495 duration (declared at file scope, typically also marked C<static> to give
13496 it internal linkage) will be implicitly initialised appropriately, if it
13497 does not have an explicit initialiser. This function will only actually
13498 modify the plugin chain if it finds C<*old_plugin_p> to be null. This
13499 function is also thread safe on the small scale. It uses appropriate
13500 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
13501
13502 When this function is called, the function referenced by C<new_plugin>
13503 must be ready to be called, except for C<*old_plugin_p> being unfilled.
13504 In a threading situation, C<new_plugin> may be called immediately, even
13505 before this function has returned. C<*old_plugin_p> will always be
13506 appropriately set before C<new_plugin> is called. If C<new_plugin>
13507 decides not to do anything special with the identifier that it is given
13508 (which is the usual case for most calls to a keyword plugin), it must
13509 chain the plugin function referenced by C<*old_plugin_p>.
13510
13511 Taken all together, XS code to install a keyword plugin should typically
13512 look something like this:
13513
13514 static Perl_keyword_plugin_t next_keyword_plugin;
13515 static OP *my_keyword_plugin(pTHX_
13516 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13517 {
13518 if (memEQs(keyword_ptr, keyword_len,
13519 "my_new_keyword")) {
13520 ...
13521 } else {
13522 return next_keyword_plugin(aTHX_
13523 keyword_ptr, keyword_len, op_ptr);
13524 }
13525 }
13526 BOOT:
13527 wrap_keyword_plugin(my_keyword_plugin,
13528 &next_keyword_plugin);
13529
13530 Direct access to L</PL_keyword_plugin> should be avoided.
13531
13532 =cut
13533 */
13534
13535 void
Perl_wrap_keyword_plugin(pTHX_ Perl_keyword_plugin_t new_plugin,Perl_keyword_plugin_t * old_plugin_p)13536 Perl_wrap_keyword_plugin(pTHX_
13537 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
13538 {
13539
13540 PERL_UNUSED_CONTEXT;
13541 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
13542 if (*old_plugin_p) return;
13543 KEYWORD_PLUGIN_MUTEX_LOCK;
13544 if (!*old_plugin_p) {
13545 *old_plugin_p = PL_keyword_plugin;
13546 PL_keyword_plugin = new_plugin;
13547 }
13548 KEYWORD_PLUGIN_MUTEX_UNLOCK;
13549 }
13550
13551 /*
13552 =for apidoc wrap_infix_plugin
13553
13554 B<NOTE:> This API exists entirely for the purpose of making the CPAN module
13555 C<XS::Parse::Infix> work. It is not expected that additional modules will make
13556 use of it; rather, that they should use C<XS::Parse::Infix> to provide parsing
13557 of new infix operators.
13558
13559 Puts a C function into the chain of infix plugins. This is the preferred
13560 way to manipulate the L</PL_infix_plugin> variable. C<new_plugin> is a
13561 pointer to the C function that is to be added to the infix plugin chain, and
13562 C<old_plugin_p> points to a storage location where a pointer to the next
13563 function in the chain will be stored. The value of C<new_plugin> is written
13564 into the L</PL_infix_plugin> variable, while the value previously stored there
13565 is written to C<*old_plugin_p>.
13566
13567 Direct access to L</PL_infix_plugin> should be avoided.
13568
13569 =cut
13570 */
13571
13572 void
Perl_wrap_infix_plugin(pTHX_ Perl_infix_plugin_t new_plugin,Perl_infix_plugin_t * old_plugin_p)13573 Perl_wrap_infix_plugin(pTHX_
13574 Perl_infix_plugin_t new_plugin, Perl_infix_plugin_t *old_plugin_p)
13575 {
13576
13577 PERL_UNUSED_CONTEXT;
13578 PERL_ARGS_ASSERT_WRAP_INFIX_PLUGIN;
13579 if (*old_plugin_p) return;
13580 /* We use the same mutex as for PL_keyword_plugin as it's so rare either
13581 * of them is actually updated; no need for a dedicated one each */
13582 KEYWORD_PLUGIN_MUTEX_LOCK;
13583 if (!*old_plugin_p) {
13584 *old_plugin_p = PL_infix_plugin;
13585 PL_infix_plugin = new_plugin;
13586 }
13587 KEYWORD_PLUGIN_MUTEX_UNLOCK;
13588 }
13589
13590 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
13591 static void
S_parse_recdescent(pTHX_ int gramtype,I32 fakeeof)13592 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
13593 {
13594 SAVEI32(PL_lex_brackets);
13595 if (PL_lex_brackets > 100)
13596 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
13597 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
13598 SAVEI32(PL_lex_allbrackets);
13599 PL_lex_allbrackets = 0;
13600 SAVEI8(PL_lex_fakeeof);
13601 PL_lex_fakeeof = (U8)fakeeof;
13602 if(yyparse(gramtype) && !PL_parser->error_count)
13603 qerror(Perl_mess(aTHX_ "Parse error"));
13604 }
13605
13606 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
13607 static OP *
S_parse_recdescent_for_op(pTHX_ int gramtype,I32 fakeeof)13608 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
13609 {
13610 OP *o;
13611 ENTER;
13612 SAVEVPTR(PL_eval_root);
13613 PL_eval_root = NULL;
13614 parse_recdescent(gramtype, fakeeof);
13615 o = PL_eval_root;
13616 LEAVE;
13617 return o;
13618 }
13619
13620 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
13621 static OP *
S_parse_expr(pTHX_ I32 fakeeof,U32 flags)13622 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
13623 {
13624 OP *exprop;
13625 if (flags & ~PARSE_OPTIONAL)
13626 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
13627 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
13628 if (!exprop && !(flags & PARSE_OPTIONAL)) {
13629 if (!PL_parser->error_count)
13630 qerror(Perl_mess(aTHX_ "Parse error"));
13631 exprop = newOP(OP_NULL, 0);
13632 }
13633 return exprop;
13634 }
13635
13636 /*
13637 =for apidoc parse_arithexpr
13638
13639 Parse a Perl arithmetic expression. This may contain operators of precedence
13640 down to the bit shift operators. The expression must be followed (and thus
13641 terminated) either by a comparison or lower-precedence operator or by
13642 something that would normally terminate an expression such as semicolon.
13643 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13644 otherwise it is mandatory. It is up to the caller to ensure that the
13645 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13646 the source of the code to be parsed and the lexical context for the
13647 expression.
13648
13649 The op tree representing the expression is returned. If an optional
13650 expression is absent, a null pointer is returned, otherwise the pointer
13651 will be non-null.
13652
13653 If an error occurs in parsing or compilation, in most cases a valid op
13654 tree is returned anyway. The error is reflected in the parser state,
13655 normally resulting in a single exception at the top level of parsing
13656 which covers all the compilation errors that occurred. Some compilation
13657 errors, however, will throw an exception immediately.
13658
13659 =for apidoc Amnh||PARSE_OPTIONAL
13660
13661 =cut
13662
13663 */
13664
13665 OP *
Perl_parse_arithexpr(pTHX_ U32 flags)13666 Perl_parse_arithexpr(pTHX_ U32 flags)
13667 {
13668 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
13669 }
13670
13671 /*
13672 =for apidoc parse_termexpr
13673
13674 Parse a Perl term expression. This may contain operators of precedence
13675 down to the assignment operators. The expression must be followed (and thus
13676 terminated) either by a comma or lower-precedence operator or by
13677 something that would normally terminate an expression such as semicolon.
13678 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13679 otherwise it is mandatory. It is up to the caller to ensure that the
13680 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13681 the source of the code to be parsed and the lexical context for the
13682 expression.
13683
13684 The op tree representing the expression is returned. If an optional
13685 expression is absent, a null pointer is returned, otherwise the pointer
13686 will be non-null.
13687
13688 If an error occurs in parsing or compilation, in most cases a valid op
13689 tree is returned anyway. The error is reflected in the parser state,
13690 normally resulting in a single exception at the top level of parsing
13691 which covers all the compilation errors that occurred. Some compilation
13692 errors, however, will throw an exception immediately.
13693
13694 =cut
13695 */
13696
13697 OP *
Perl_parse_termexpr(pTHX_ U32 flags)13698 Perl_parse_termexpr(pTHX_ U32 flags)
13699 {
13700 return parse_expr(LEX_FAKEEOF_COMMA, flags);
13701 }
13702
13703 /*
13704 =for apidoc parse_listexpr
13705
13706 Parse a Perl list expression. This may contain operators of precedence
13707 down to the comma operator. The expression must be followed (and thus
13708 terminated) either by a low-precedence logic operator such as C<or> or by
13709 something that would normally terminate an expression such as semicolon.
13710 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13711 otherwise it is mandatory. It is up to the caller to ensure that the
13712 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13713 the source of the code to be parsed and the lexical context for the
13714 expression.
13715
13716 The op tree representing the expression is returned. If an optional
13717 expression is absent, a null pointer is returned, otherwise the pointer
13718 will be non-null.
13719
13720 If an error occurs in parsing or compilation, in most cases a valid op
13721 tree is returned anyway. The error is reflected in the parser state,
13722 normally resulting in a single exception at the top level of parsing
13723 which covers all the compilation errors that occurred. Some compilation
13724 errors, however, will throw an exception immediately.
13725
13726 =cut
13727 */
13728
13729 OP *
Perl_parse_listexpr(pTHX_ U32 flags)13730 Perl_parse_listexpr(pTHX_ U32 flags)
13731 {
13732 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
13733 }
13734
13735 /*
13736 =for apidoc parse_fullexpr
13737
13738 Parse a single complete Perl expression. This allows the full
13739 expression grammar, including the lowest-precedence operators such
13740 as C<or>. The expression must be followed (and thus terminated) by a
13741 token that an expression would normally be terminated by: end-of-file,
13742 closing bracketing punctuation, semicolon, or one of the keywords that
13743 signals a postfix expression-statement modifier. If C<flags> has the
13744 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
13745 mandatory. It is up to the caller to ensure that the dynamic parser
13746 state (L</PL_parser> et al) is correctly set to reflect the source of
13747 the code to be parsed and the lexical context for the expression.
13748
13749 The op tree representing the expression is returned. If an optional
13750 expression is absent, a null pointer is returned, otherwise the pointer
13751 will be non-null.
13752
13753 If an error occurs in parsing or compilation, in most cases a valid op
13754 tree is returned anyway. The error is reflected in the parser state,
13755 normally resulting in a single exception at the top level of parsing
13756 which covers all the compilation errors that occurred. Some compilation
13757 errors, however, will throw an exception immediately.
13758
13759 =cut
13760 */
13761
13762 OP *
Perl_parse_fullexpr(pTHX_ U32 flags)13763 Perl_parse_fullexpr(pTHX_ U32 flags)
13764 {
13765 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
13766 }
13767
13768 /*
13769 =for apidoc parse_block
13770
13771 Parse a single complete Perl code block. This consists of an opening
13772 brace, a sequence of statements, and a closing brace. The block
13773 constitutes a lexical scope, so C<my> variables and various compile-time
13774 effects can be contained within it. It is up to the caller to ensure
13775 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13776 reflect the source of the code to be parsed and the lexical context for
13777 the statement.
13778
13779 The op tree representing the code block is returned. This is always a
13780 real op, never a null pointer. It will normally be a C<lineseq> list,
13781 including C<nextstate> or equivalent ops. No ops to construct any kind
13782 of runtime scope are included by virtue of it being a block.
13783
13784 If an error occurs in parsing or compilation, in most cases a valid op
13785 tree (most likely null) is returned anyway. The error is reflected in
13786 the parser state, normally resulting in a single exception at the top
13787 level of parsing which covers all the compilation errors that occurred.
13788 Some compilation errors, however, will throw an exception immediately.
13789
13790 The C<flags> parameter is reserved for future use, and must always
13791 be zero.
13792
13793 =cut
13794 */
13795
13796 OP *
Perl_parse_block(pTHX_ U32 flags)13797 Perl_parse_block(pTHX_ U32 flags)
13798 {
13799 if (flags)
13800 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13801 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13802 }
13803
13804 /*
13805 =for apidoc parse_barestmt
13806
13807 Parse a single unadorned Perl statement. This may be a normal imperative
13808 statement or a declaration that has compile-time effect. It does not
13809 include any label or other affixture. It is up to the caller to ensure
13810 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13811 reflect the source of the code to be parsed and the lexical context for
13812 the statement.
13813
13814 The op tree representing the statement is returned. This may be a
13815 null pointer if the statement is null, for example if it was actually
13816 a subroutine definition (which has compile-time side effects). If not
13817 null, it will be ops directly implementing the statement, suitable to
13818 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
13819 equivalent op (except for those embedded in a scope contained entirely
13820 within the statement).
13821
13822 If an error occurs in parsing or compilation, in most cases a valid op
13823 tree (most likely null) is returned anyway. The error is reflected in
13824 the parser state, normally resulting in a single exception at the top
13825 level of parsing which covers all the compilation errors that occurred.
13826 Some compilation errors, however, will throw an exception immediately.
13827
13828 The C<flags> parameter is reserved for future use, and must always
13829 be zero.
13830
13831 =cut
13832 */
13833
13834 OP *
Perl_parse_barestmt(pTHX_ U32 flags)13835 Perl_parse_barestmt(pTHX_ U32 flags)
13836 {
13837 if (flags)
13838 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13839 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13840 }
13841
13842 /*
13843 =for apidoc parse_label
13844
13845 Parse a single label, possibly optional, of the type that may prefix a
13846 Perl statement. It is up to the caller to ensure that the dynamic parser
13847 state (L</PL_parser> et al) is correctly set to reflect the source of
13848 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13849 label is optional, otherwise it is mandatory.
13850
13851 The name of the label is returned in the form of a fresh scalar. If an
13852 optional label is absent, a null pointer is returned.
13853
13854 If an error occurs in parsing, which can only occur if the label is
13855 mandatory, a valid label is returned anyway. The error is reflected in
13856 the parser state, normally resulting in a single exception at the top
13857 level of parsing which covers all the compilation errors that occurred.
13858
13859 =cut
13860 */
13861
13862 SV *
Perl_parse_label(pTHX_ U32 flags)13863 Perl_parse_label(pTHX_ U32 flags)
13864 {
13865 if (flags & ~PARSE_OPTIONAL)
13866 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13867 if (PL_nexttoke) {
13868 PL_parser->yychar = yylex();
13869 if (PL_parser->yychar == LABEL) {
13870 SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13871 PL_parser->yychar = YYEMPTY;
13872 cSVOPx(pl_yylval.opval)->op_sv = NULL;
13873 op_free(pl_yylval.opval);
13874 return labelsv;
13875 } else {
13876 yyunlex();
13877 goto no_label;
13878 }
13879 } else {
13880 char *s, *t;
13881 STRLEN wlen, bufptr_pos;
13882 lex_read_space(0);
13883 t = s = PL_bufptr;
13884 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13885 goto no_label;
13886 t = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen, FALSE);
13887 if (word_takes_any_delimiter(s, wlen))
13888 goto no_label;
13889 bufptr_pos = s - SvPVX(PL_linestr);
13890 PL_bufptr = t;
13891 lex_read_space(LEX_KEEP_PREVIOUS);
13892 t = PL_bufptr;
13893 s = SvPVX(PL_linestr) + bufptr_pos;
13894 if (t[0] == ':' && t[1] != ':') {
13895 PL_oldoldbufptr = PL_oldbufptr;
13896 PL_oldbufptr = s;
13897 PL_bufptr = t+1;
13898 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13899 } else {
13900 PL_bufptr = s;
13901 no_label:
13902 if (flags & PARSE_OPTIONAL) {
13903 return NULL;
13904 } else {
13905 qerror(Perl_mess(aTHX_ "Parse error"));
13906 return newSVpvs("x");
13907 }
13908 }
13909 }
13910 }
13911
13912 /*
13913 =for apidoc parse_fullstmt
13914
13915 Parse a single complete Perl statement. This may be a normal imperative
13916 statement or a declaration that has compile-time effect, and may include
13917 optional labels. It is up to the caller to ensure that the dynamic
13918 parser state (L</PL_parser> et al) is correctly set to reflect the source
13919 of the code to be parsed and the lexical context for the statement.
13920
13921 The op tree representing the statement is returned. This may be a
13922 null pointer if the statement is null, for example if it was actually
13923 a subroutine definition (which has compile-time side effects). If not
13924 null, it will be the result of a L</newSTATEOP> call, normally including
13925 a C<nextstate> or equivalent op.
13926
13927 If an error occurs in parsing or compilation, in most cases a valid op
13928 tree (most likely null) is returned anyway. The error is reflected in
13929 the parser state, normally resulting in a single exception at the top
13930 level of parsing which covers all the compilation errors that occurred.
13931 Some compilation errors, however, will throw an exception immediately.
13932
13933 The C<flags> parameter is reserved for future use, and must always
13934 be zero.
13935
13936 =cut
13937 */
13938
13939 OP *
Perl_parse_fullstmt(pTHX_ U32 flags)13940 Perl_parse_fullstmt(pTHX_ U32 flags)
13941 {
13942 if (flags)
13943 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13944 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13945 }
13946
13947 /*
13948 =for apidoc parse_stmtseq
13949
13950 Parse a sequence of zero or more Perl statements. These may be normal
13951 imperative statements, including optional labels, or declarations
13952 that have compile-time effect, or any mixture thereof. The statement
13953 sequence ends when a closing brace or end-of-file is encountered in a
13954 place where a new statement could have validly started. It is up to
13955 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13956 is correctly set to reflect the source of the code to be parsed and the
13957 lexical context for the statements.
13958
13959 The op tree representing the statement sequence is returned. This may
13960 be a null pointer if the statements were all null, for example if there
13961 were no statements or if there were only subroutine definitions (which
13962 have compile-time side effects). If not null, it will be a C<lineseq>
13963 list, normally including C<nextstate> or equivalent ops.
13964
13965 If an error occurs in parsing or compilation, in most cases a valid op
13966 tree is returned anyway. The error is reflected in the parser state,
13967 normally resulting in a single exception at the top level of parsing
13968 which covers all the compilation errors that occurred. Some compilation
13969 errors, however, will throw an exception immediately.
13970
13971 The C<flags> parameter is reserved for future use, and must always
13972 be zero.
13973
13974 =cut
13975 */
13976
13977 OP *
Perl_parse_stmtseq(pTHX_ U32 flags)13978 Perl_parse_stmtseq(pTHX_ U32 flags)
13979 {
13980 OP *stmtseqop;
13981 I32 c;
13982 if (flags)
13983 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13984 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13985 c = lex_peek_unichar(0);
13986 if (c != -1 && c != /*{*/'}')
13987 qerror(Perl_mess(aTHX_ "Parse error"));
13988 return stmtseqop;
13989 }
13990
13991 /*
13992 =for apidoc parse_subsignature
13993
13994 Parse a subroutine signature declaration. This is the contents of the
13995 parentheses following a named or anonymous subroutine declaration when the
13996 C<signatures> feature is enabled. Note that this function neither expects
13997 nor consumes the opening and closing parentheses around the signature; it
13998 is the caller's job to handle these.
13999
14000 This function must only be called during parsing of a subroutine; after
14001 L</start_subparse> has been called. It might allocate lexical variables on
14002 the pad for the current subroutine.
14003
14004 The op tree to unpack the arguments from the stack at runtime is returned.
14005 This op tree should appear at the beginning of the compiled function. The
14006 caller may wish to use L</op_append_list> to build their function body
14007 after it, or splice it together with the body before calling L</newATTRSUB>.
14008
14009 The C<flags> parameter is reserved for future use, and must always
14010 be zero.
14011
14012 =cut
14013 */
14014
14015 OP *
Perl_parse_subsignature(pTHX_ U32 flags)14016 Perl_parse_subsignature(pTHX_ U32 flags)
14017 {
14018 if (flags)
14019 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
14020 return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
14021 }
14022
14023 /*
14024 * ex: set ts=8 sts=4 sw=4 et:
14025 */
14026