xref: /openbsd/gnu/usr.bin/perl/toke.c (revision e0680481)
1 /*    toke.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  *  'It all comes from here, the stench and the peril.'    --Frodo
13  *
14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15  */
16 
17 /*
18  * This file is the lexer for Perl.  It's closely linked to the
19  * parser, perly.y.
20  *
21  * The main routine is yylex(), which returns the next token.
22  */
23 
24 /*
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
27 
28 =for apidoc AmnU|yy_parser *|PL_parser
29 
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress.  The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
34 
35 =cut
36 */
37 
38 #include "EXTERN.h"
39 #define PERL_IN_TOKE_C
40 #include "perl.h"
41 #include "invlist_inline.h"
42 
43 #define new_constant(a,b,c,d,e,f,g, h)	\
44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
45 
46 #define pl_yylval	(PL_parser->yylval)
47 
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets		(PL_parser->lex_brackets)
50 #define PL_lex_allbrackets	(PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof		(PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack	(PL_parser->lex_brackstack)
53 #define PL_lex_casemods		(PL_parser->lex_casemods)
54 #define PL_lex_casestack        (PL_parser->lex_casestack)
55 #define PL_lex_dojoin		(PL_parser->lex_dojoin)
56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
57 #define PL_lex_inpat		(PL_parser->lex_inpat)
58 #define PL_lex_inwhat		(PL_parser->lex_inwhat)
59 #define PL_lex_op		(PL_parser->lex_op)
60 #define PL_lex_repl		(PL_parser->lex_repl)
61 #define PL_lex_starts		(PL_parser->lex_starts)
62 #define PL_lex_stuff		(PL_parser->lex_stuff)
63 #define PL_multi_start		(PL_parser->multi_start)
64 #define PL_multi_open		(PL_parser->multi_open)
65 #define PL_multi_close		(PL_parser->multi_close)
66 #define PL_preambled		(PL_parser->preambled)
67 #define PL_linestr		(PL_parser->linestr)
68 #define PL_expect		(PL_parser->expect)
69 #define PL_copline		(PL_parser->copline)
70 #define PL_bufptr		(PL_parser->bufptr)
71 #define PL_oldbufptr		(PL_parser->oldbufptr)
72 #define PL_oldoldbufptr		(PL_parser->oldoldbufptr)
73 #define PL_linestart		(PL_parser->linestart)
74 #define PL_bufend		(PL_parser->bufend)
75 #define PL_last_uni		(PL_parser->last_uni)
76 #define PL_last_lop		(PL_parser->last_lop)
77 #define PL_last_lop_op		(PL_parser->last_lop_op)
78 #define PL_lex_state		(PL_parser->lex_state)
79 #define PL_rsfp			(PL_parser->rsfp)
80 #define PL_rsfp_filters		(PL_parser->rsfp_filters)
81 #define PL_in_my		(PL_parser->in_my)
82 #define PL_in_my_stash		(PL_parser->in_my_stash)
83 #define PL_tokenbuf		(PL_parser->tokenbuf)
84 #define PL_multi_end		(PL_parser->multi_end)
85 #define PL_error_count		(PL_parser->error_count)
86 
87 #  define PL_nexttoke		(PL_parser->nexttoke)
88 #  define PL_nexttype		(PL_parser->nexttype)
89 #  define PL_nextval		(PL_parser->nextval)
90 
91 
92 #define SvEVALED(sv) \
93     (SvTYPE(sv) >= SVt_PVNV \
94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
95 
96 static const char ident_too_long[] = "Identifier too long";
97 static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
98 
99 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
100 
101 #define XENUMMASK  0x3f
102 #define XFAKEEOF   0x40
103 #define XFAKEBRACK 0x80
104 
105 #ifdef USE_UTF8_SCRIPTS
106 #   define UTF cBOOL(!IN_BYTES)
107 #else
108 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
109 #endif
110 
111 /* The maximum number of characters preceding the unrecognized one to display */
112 #define UNRECOGNIZED_PRECEDE_COUNT 10
113 
114 /* In variables named $^X, these are the legal values for X.
115  * 1999-02-27 mjd-perl-patch@plover.com */
116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
117 
118 /* Non-identifier plugin infix operators are allowed any printing character
119  * except spaces, digits, or identifier chars
120  */
121 #define isPLUGINFIX(c) (c && !isSPACE(c) && !isDIGIT(c) && !isALPHA(c))
122 /* Plugin infix operators may not begin with a quote symbol */
123 #define isPLUGINFIX_FIRST(c) (isPLUGINFIX(c) && c != '"' && c != '\'')
124 
125 #define PLUGINFIX_IS_ENABLED  UNLIKELY(PL_infix_plugin != &Perl_infix_plugin_standard)
126 
127 #define SPACE_OR_TAB(c) isBLANK_A(c)
128 
129 #define HEXFP_PEEK(s)     \
130     (((s[0] == '.') && \
131       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
132      isALPHA_FOLD_EQ(s[0], 'p'))
133 
134 /* LEX_* are values for PL_lex_state, the state of the lexer.
135  * They are arranged oddly so that the guard on the switch statement
136  * can get by with a single comparison (if the compiler is smart enough).
137  *
138  * These values refer to the various states within a sublex parse,
139  * i.e. within a double quotish string
140  */
141 
142 /* #define LEX_NOTPARSING		11 is done in perl.h. */
143 
144 #define LEX_NORMAL		10 /* normal code (ie not within "...")     */
145 #define LEX_INTERPNORMAL	 9 /* code within a string, eg "$foo[$x+1]" */
146 #define LEX_INTERPCASEMOD	 8 /* expecting a \U, \Q or \E etc          */
147 #define LEX_INTERPPUSH		 7 /* starting a new sublex parse level     */
148 #define LEX_INTERPSTART		 6 /* expecting the start of a $var         */
149 
150                                    /* at end of code, eg "$x" followed by:  */
151 #define LEX_INTERPEND		 5 /* ... eg not one of [, { or ->          */
152 #define LEX_INTERPENDMAYBE	 4 /* ... eg one of [, { or ->              */
153 
154 #define LEX_INTERPCONCAT	 3 /* expecting anything, eg at start of
155                                         string or after \E, $foo, etc       */
156 #define LEX_INTERPCONST		 2 /* NOT USED */
157 #define LEX_FORMLINE		 1 /* expecting a format line               */
158 
159 /* returned to yyl_try() to request it to retry the parse loop, expected to only
160    be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
161    can also return it.
162 
163    yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
164    other token values are 258 or higher (see perly.h), so -1 should be
165    a safe value here.
166 */
167 #define YYL_RETRY (-1)
168 
169 #ifdef DEBUGGING
170 static const char* const lex_state_names[] = {
171     "KNOWNEXT",
172     "FORMLINE",
173     "INTERPCONST",
174     "INTERPCONCAT",
175     "INTERPENDMAYBE",
176     "INTERPEND",
177     "INTERPSTART",
178     "INTERPPUSH",
179     "INTERPCASEMOD",
180     "INTERPNORMAL",
181     "NORMAL"
182 };
183 #endif
184 
185 #include "keywords.h"
186 
187 /* CLINE is a macro that ensures PL_copline has a sane value */
188 
189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
190 
191 /*
192  * Convenience functions to return different tokens and prime the
193  * lexer for the next token.  They all take an argument.
194  *
195  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
196  * OPERATOR     : generic operator
197  * AOPERATOR    : assignment operator
198  * PREBLOCK     : beginning the block after an if, while, foreach, ...
199  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
200  * PREREF       : *EXPR where EXPR is not a simple identifier
201  * TERM         : expression term
202  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
203  * LOOPX        : loop exiting command (goto, last, dump, etc)
204  * FTST         : file test operator
205  * FUN0         : zero-argument function
206  * FUN0OP       : zero-argument function, with its op created in this file
207  * FUN1         : not used, except for not, which isn't a UNIOP
208  * BOop         : bitwise or or xor
209  * BAop         : bitwise and
210  * BCop         : bitwise complement
211  * SHop         : shift operator
212  * PWop         : power operator
213  * PMop         : pattern-matching operator
214  * Aop          : addition-level operator
215  * AopNOASSIGN  : addition-level operator that is never part of .=
216  * Mop          : multiplication-level operator
217  * ChEop        : chaining equality-testing operator
218  * NCEop        : non-chaining comparison operator at equality precedence
219  * ChRop        : chaining relational operator <= != gt
220  * NCRop        : non-chaining relational operator isa
221  *
222  * Also see LOP and lop() below.
223  */
224 
225 #ifdef DEBUGGING /* Serve -DT. */
226 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
227 #else
228 #   define REPORT(retval) (retval)
229 #endif
230 
231 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
232 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
233 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
234 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
235 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
236 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
237 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
238 #define PHASERBLOCK(f) return (pl_yylval.ival=f, PL_expect = XBLOCK, PL_bufptr = s, REPORT((int)PHASER))
239 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
240 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
241                          pl_yylval.ival=f, \
242                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
243                          REPORT((int)LOOPEX))
244 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
245 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
246 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
247 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
248 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
249 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
250 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
251                        REPORT(PERLY_TILDE)
252 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
253 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
254 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
255 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
256 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
257 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
258 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
259 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
260 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
261 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
262 
263 /* This bit of chicanery makes a unary function followed by
264  * a parenthesis into a function with one argument, highest precedence.
265  * The UNIDOR macro is for unary functions that can be followed by the //
266  * operator (such as C<shift // 0>).
267  */
268 #define UNI3(f,x,have_x) { \
269         pl_yylval.ival = f; \
270         if (have_x) PL_expect = x; \
271         PL_bufptr = s; \
272         PL_last_uni = PL_oldbufptr; \
273         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
274         if (*s == '(') \
275             return REPORT( (int)FUNC1 ); \
276         s = skipspace(s); \
277         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
278         }
279 #define UNI(f)    UNI3(f,XTERM,1)
280 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
281 #define UNIPROTO(f,optional) { \
282         if (optional) PL_last_uni = PL_oldbufptr; \
283         OPERATOR(f); \
284         }
285 
286 #define UNIBRACK(f) UNI3(f,0,0)
287 
288 /* return has special case parsing.
289  *
290  * List operators have low precedence. Functions have high precedence.
291  * Every built in, *except return*, if written with () around its arguments, is
292  * parsed as a function. Hence every other list built in:
293  *
294  * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
295  * 429
296  * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
297  * 639
298  * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
299  * Useless use of a constant (2) in void context at -e line 1.
300  * Useless use of a constant (4) in void context at -e line 1.
301  *
302  * $
303  *
304  * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
305  * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
306  *
307  * Whereas return:
308  *
309  * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
310  * 2
311  * 4
312  * 9
313  * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
314  * Useless use of a constant (2) in void context at -e line 1.
315  * Useless use of a constant (4) in void context at -e line 1.
316  * 9
317  * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
318  * Useless use of a constant (2) in void context at -e line 1.
319  * Useless use of a constant (4) in void context at -e line 1.
320  * 9
321  * $
322  *
323  * and:
324  * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
325  * 2
326  * 4
327  * 6
328  *
329  * This last example is what we expect, but it's clearly inconsistent with how
330  * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently
331  * followed.
332  *
333  *
334  * Perl 3 attempted to be consistent:
335  *
336  *   The rules are more consistent about where parens are needed and
337  *   where they are not.  In particular, unary operators and list operators now
338  *   behave like functions if they're called like functions.
339  *
340  * However, the behaviour for return was reverted to the "old" parsing with
341  * patches 9-12:
342  *
343  *   The construct
344  *   return (1,2,3);
345  *   did not do what was expected, since return was swallowing the
346  *   parens in order to consider itself a function.  The solution,
347  *   since return never wants any trailing expression such as
348  *   return (1,2,3) + 2;
349  *   is to simply make return an exception to the paren-makes-a-function
350  *   rule, and treat it the way it always was, so that it doesn't
351  *   strip the parens.
352  *
353  * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
354  * LOP(OP_RETURN, XTERM);
355  *
356  * and constructs such as
357  *
358  *     return (Internals::V())[2]
359  *
360  * turn into syntax errors
361  */
362 
363 #define OLDLOP(f) \
364         do { \
365             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
366                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
367             pl_yylval.ival = (f); \
368             PL_expect = XTERM; \
369             PL_bufptr = s; \
370             return (int)LSTOP; \
371         } while(0)
372 
373 #define COPLINE_INC_WITH_HERELINES		    \
374     STMT_START {				     \
375         CopLINE_inc(PL_curcop);			      \
376         if (PL_parser->herelines)		       \
377             CopLINE(PL_curcop) += PL_parser->herelines, \
378             PL_parser->herelines = 0;			 \
379     } STMT_END
380 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
381  * is no sublex_push to follow. */
382 #define COPLINE_SET_FROM_MULTI_END	      \
383     STMT_START {			       \
384         CopLINE_set(PL_curcop, PL_multi_end);	\
385         if (PL_multi_end != PL_multi_start)	 \
386             PL_parser->herelines = 0;		  \
387     } STMT_END
388 
389 
390 /* A file-local structure for passing around information about subroutines and
391  * related definable words */
392 struct code {
393     SV *sv;
394     CV *cv;
395     GV *gv, **gvp;
396     OP *rv2cv_op;
397     PADOFFSET off;
398     bool lex;
399 };
400 
401 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
402 
403 #ifdef DEBUGGING
404 
405 /* how to interpret the pl_yylval associated with the token */
406 enum token_type {
407     TOKENTYPE_NONE,
408     TOKENTYPE_IVAL,
409     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
410     TOKENTYPE_PVAL,
411     TOKENTYPE_OPVAL
412 };
413 
414 #define DEBUG_TOKEN(Type, Name)                                         \
415     { Name, TOKENTYPE_##Type, #Name }
416 
417 static struct debug_tokens {
418     const int token;
419     enum token_type type;
420     const char *name;
421 } const debug_tokens[] =
422 {
423     DEBUG_TOKEN (OPNUM, ADDOP),
424     DEBUG_TOKEN (NONE,  ANDAND),
425     DEBUG_TOKEN (NONE,  ANDOP),
426     DEBUG_TOKEN (NONE,  ARROW),
427     DEBUG_TOKEN (OPNUM, ASSIGNOP),
428     DEBUG_TOKEN (OPNUM, BITANDOP),
429     DEBUG_TOKEN (OPNUM, BITOROP),
430     DEBUG_TOKEN (OPNUM, CHEQOP),
431     DEBUG_TOKEN (OPNUM, CHRELOP),
432     DEBUG_TOKEN (NONE,  COLONATTR),
433     DEBUG_TOKEN (NONE,  DOLSHARP),
434     DEBUG_TOKEN (NONE,  DORDOR),
435     DEBUG_TOKEN (IVAL,  DOTDOT),
436     DEBUG_TOKEN (NONE,  FORMLBRACK),
437     DEBUG_TOKEN (NONE,  FORMRBRACK),
438     DEBUG_TOKEN (OPNUM, FUNC),
439     DEBUG_TOKEN (OPNUM, FUNC0),
440     DEBUG_TOKEN (OPVAL, FUNC0OP),
441     DEBUG_TOKEN (OPVAL, FUNC0SUB),
442     DEBUG_TOKEN (OPNUM, FUNC1),
443     DEBUG_TOKEN (NONE,  HASHBRACK),
444     DEBUG_TOKEN (IVAL,  KW_CATCH),
445     DEBUG_TOKEN (IVAL,  KW_CLASS),
446     DEBUG_TOKEN (IVAL,  KW_CONTINUE),
447     DEBUG_TOKEN (IVAL,  KW_DEFAULT),
448     DEBUG_TOKEN (IVAL,  KW_DO),
449     DEBUG_TOKEN (IVAL,  KW_ELSE),
450     DEBUG_TOKEN (IVAL,  KW_ELSIF),
451     DEBUG_TOKEN (IVAL,  KW_FIELD),
452     DEBUG_TOKEN (IVAL,  KW_GIVEN),
453     DEBUG_TOKEN (IVAL,  KW_FOR),
454     DEBUG_TOKEN (IVAL,  KW_FORMAT),
455     DEBUG_TOKEN (IVAL,  KW_IF),
456     DEBUG_TOKEN (IVAL,  KW_LOCAL),
457     DEBUG_TOKEN (IVAL,  KW_METHOD_anon),
458     DEBUG_TOKEN (IVAL,  KW_METHOD_named),
459     DEBUG_TOKEN (IVAL,  KW_MY),
460     DEBUG_TOKEN (IVAL,  KW_PACKAGE),
461     DEBUG_TOKEN (IVAL,  KW_REQUIRE),
462     DEBUG_TOKEN (IVAL,  KW_SUB_anon),
463     DEBUG_TOKEN (IVAL,  KW_SUB_anon_sig),
464     DEBUG_TOKEN (IVAL,  KW_SUB_named),
465     DEBUG_TOKEN (IVAL,  KW_SUB_named_sig),
466     DEBUG_TOKEN (IVAL,  KW_TRY),
467     DEBUG_TOKEN (IVAL,  KW_USE_or_NO),
468     DEBUG_TOKEN (IVAL,  KW_UNLESS),
469     DEBUG_TOKEN (IVAL,  KW_UNTIL),
470     DEBUG_TOKEN (IVAL,  KW_WHEN),
471     DEBUG_TOKEN (IVAL,  KW_WHILE),
472     DEBUG_TOKEN (OPVAL, LABEL),
473     DEBUG_TOKEN (OPNUM, LOOPEX),
474     DEBUG_TOKEN (OPNUM, LSTOP),
475     DEBUG_TOKEN (OPVAL, LSTOPSUB),
476     DEBUG_TOKEN (OPNUM, MATCHOP),
477     DEBUG_TOKEN (OPVAL, METHCALL),
478     DEBUG_TOKEN (OPVAL, METHCALL0),
479     DEBUG_TOKEN (OPNUM, MULOP),
480     DEBUG_TOKEN (OPNUM, NCEQOP),
481     DEBUG_TOKEN (OPNUM, NCRELOP),
482     DEBUG_TOKEN (NONE,  NOAMP),
483     DEBUG_TOKEN (NONE,  NOTOP),
484     DEBUG_TOKEN (IVAL,  OROP),
485     DEBUG_TOKEN (NONE,  OROR),
486     DEBUG_TOKEN (IVAL,  PERLY_AMPERSAND),
487     DEBUG_TOKEN (IVAL,  PERLY_BRACE_CLOSE),
488     DEBUG_TOKEN (IVAL,  PERLY_BRACE_OPEN),
489     DEBUG_TOKEN (IVAL,  PERLY_BRACKET_CLOSE),
490     DEBUG_TOKEN (IVAL,  PERLY_BRACKET_OPEN),
491     DEBUG_TOKEN (IVAL,  PERLY_COLON),
492     DEBUG_TOKEN (IVAL,  PERLY_COMMA),
493     DEBUG_TOKEN (IVAL,  PERLY_DOT),
494     DEBUG_TOKEN (IVAL,  PERLY_EQUAL_SIGN),
495     DEBUG_TOKEN (IVAL,  PERLY_EXCLAMATION_MARK),
496     DEBUG_TOKEN (IVAL,  PERLY_MINUS),
497     DEBUG_TOKEN (IVAL,  PERLY_PAREN_OPEN),
498     DEBUG_TOKEN (IVAL,  PERLY_PERCENT_SIGN),
499     DEBUG_TOKEN (IVAL,  PERLY_PLUS),
500     DEBUG_TOKEN (IVAL,  PERLY_QUESTION_MARK),
501     DEBUG_TOKEN (IVAL,  PERLY_SEMICOLON),
502     DEBUG_TOKEN (IVAL,  PERLY_SLASH),
503     DEBUG_TOKEN (IVAL,  PERLY_SNAIL),
504     DEBUG_TOKEN (IVAL,  PERLY_STAR),
505     DEBUG_TOKEN (IVAL,  PERLY_TILDE),
506     DEBUG_TOKEN (OPVAL, PLUGEXPR),
507     DEBUG_TOKEN (OPVAL, PLUGSTMT),
508     DEBUG_TOKEN (PVAL,  PLUGIN_ADD_OP),
509     DEBUG_TOKEN (PVAL,  PLUGIN_ASSIGN_OP),
510     DEBUG_TOKEN (PVAL,  PLUGIN_HIGH_OP),
511     DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_AND_OP),
512     DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_OR_OP),
513     DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_AND_LOW_OP),
514     DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_OR_LOW_OP),
515     DEBUG_TOKEN (PVAL,  PLUGIN_LOW_OP),
516     DEBUG_TOKEN (PVAL,  PLUGIN_MUL_OP),
517     DEBUG_TOKEN (PVAL,  PLUGIN_POW_OP),
518     DEBUG_TOKEN (PVAL,  PLUGIN_REL_OP),
519     DEBUG_TOKEN (OPVAL, PMFUNC),
520     DEBUG_TOKEN (NONE,  POSTJOIN),
521     DEBUG_TOKEN (NONE,  POSTDEC),
522     DEBUG_TOKEN (NONE,  POSTINC),
523     DEBUG_TOKEN (OPNUM, POWOP),
524     DEBUG_TOKEN (NONE,  PREDEC),
525     DEBUG_TOKEN (NONE,  PREINC),
526     DEBUG_TOKEN (OPVAL, PRIVATEREF),
527     DEBUG_TOKEN (OPVAL, QWLIST),
528     DEBUG_TOKEN (NONE,  REFGEN),
529     DEBUG_TOKEN (OPNUM, SHIFTOP),
530     DEBUG_TOKEN (NONE,  SUBLEXEND),
531     DEBUG_TOKEN (NONE,  SUBLEXSTART),
532     DEBUG_TOKEN (OPVAL, THING),
533     DEBUG_TOKEN (NONE,  UMINUS),
534     DEBUG_TOKEN (OPNUM, UNIOP),
535     DEBUG_TOKEN (OPVAL, UNIOPSUB),
536     DEBUG_TOKEN (OPVAL, BAREWORD),
537     DEBUG_TOKEN (IVAL,  YADAYADA),
538     { 0,		TOKENTYPE_NONE,		NULL }
539 };
540 
541 #undef DEBUG_TOKEN
542 
543 /* dump the returned token in rv, plus any optional arg in pl_yylval */
544 
545 STATIC int
S_tokereport(pTHX_ I32 rv,const YYSTYPE * lvalp)546 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
547 {
548     PERL_ARGS_ASSERT_TOKEREPORT;
549 
550     if (DEBUG_T_TEST) {
551         const char *name = NULL;
552         enum token_type type = TOKENTYPE_NONE;
553         const struct debug_tokens *p;
554         SV* const report = newSVpvs("<== ");
555 
556         for (p = debug_tokens; p->token; p++) {
557             if (p->token == (int)rv) {
558                 name = p->name;
559                 type = p->type;
560                 break;
561             }
562         }
563         if (name)
564             Perl_sv_catpv(aTHX_ report, name);
565         else if (isGRAPH(rv))
566         {
567             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
568             if ((char)rv == 'p')
569                 sv_catpvs(report, " (pending identifier)");
570         }
571         else if (!rv)
572             sv_catpvs(report, "EOF");
573         else
574             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
575         switch (type) {
576         case TOKENTYPE_NONE:
577             break;
578         case TOKENTYPE_IVAL:
579             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
580             break;
581         case TOKENTYPE_OPNUM:
582             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
583                                     PL_op_name[lvalp->ival]);
584             break;
585         case TOKENTYPE_PVAL:
586             Perl_sv_catpvf(aTHX_ report, "(pval=%p)", lvalp->pval);
587             break;
588         case TOKENTYPE_OPVAL:
589             if (lvalp->opval) {
590                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
591                                     PL_op_name[lvalp->opval->op_type]);
592                 if (lvalp->opval->op_type == OP_CONST) {
593                     Perl_sv_catpvf(aTHX_ report, " %s",
594                         SvPEEK(cSVOPx_sv(lvalp->opval)));
595                 }
596 
597             }
598             else
599                 sv_catpvs(report, "(opval=null)");
600             break;
601         }
602         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
603     };
604     return (int)rv;
605 }
606 
607 
608 /* print the buffer with suitable escapes */
609 
610 STATIC void
S_printbuf(pTHX_ const char * const fmt,const char * const s)611 S_printbuf(pTHX_ const char *const fmt, const char *const s)
612 {
613     SV* const tmp = newSVpvs("");
614 
615     PERL_ARGS_ASSERT_PRINTBUF;
616 
617     GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
618     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
619     GCC_DIAG_RESTORE_STMT;
620     SvREFCNT_dec(tmp);
621 }
622 
623 #endif
624 
625 /*
626  * S_ao
627  *
628  * This subroutine looks for an '=' next to the operator that has just been
629  * parsed and turns it into an ASSIGNOP if it finds one.
630  */
631 
632 STATIC int
S_ao(pTHX_ int toketype)633 S_ao(pTHX_ int toketype)
634 {
635     if (*PL_bufptr == '=') {
636         PL_bufptr++;
637 
638         switch (toketype) {
639             case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break;
640             case OROR:   pl_yylval.ival = OP_ORASSIGN;  break;
641             case DORDOR: pl_yylval.ival = OP_DORASSIGN; break;
642         }
643 
644         toketype = ASSIGNOP;
645     }
646     return REPORT(toketype);
647 }
648 
649 /*
650  * S_no_op
651  * When Perl expects an operator and finds something else, no_op
652  * prints the warning.  It always prints "<something> found where
653  * operator expected.  It prints "Missing semicolon on previous line?"
654  * if the surprise occurs at the start of the line.  "do you need to
655  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
656  * where the compiler doesn't know if foo is a method call or a function.
657  * It prints "Missing operator before end of line" if there's nothing
658  * after the missing operator, or "... before <...>" if there is something
659  * after the missing operator.
660  *
661  * PL_bufptr is expected to point to the start of the thing that was found,
662  * and s after the next token or partial token.
663  */
664 
665 STATIC void
S_no_op(pTHX_ const char * const what,char * s)666 S_no_op(pTHX_ const char *const what, char *s)
667 {
668     char * const oldbp = PL_bufptr;
669     const bool is_first = (PL_oldbufptr == PL_linestart);
670     SV *message = sv_2mortal( newSVpvf(
671                    PERL_DIAG_WARN_SYNTAX("%s found where operator expected"),
672                    what
673                   ) );
674 
675     PERL_ARGS_ASSERT_NO_OP;
676 
677     if (!s)
678         s = oldbp;
679     else
680         PL_bufptr = s;
681 
682     if (ckWARN_d(WARN_SYNTAX)) {
683         bool has_more = FALSE;
684         if (is_first) {
685             has_more = TRUE;
686             sv_catpvs(message,
687                     " (Missing semicolon on previous line?)");
688         }
689         else if (PL_oldoldbufptr) {
690             /* yyerror (via yywarn) would do this itself, so we should too */
691             const char *t;
692             for (t = PL_oldoldbufptr;
693                  t < PL_bufptr && isSPACE(*t);
694                  t += UTF ? UTF8SKIP(t) : 1)
695             {
696                 NOOP;
697             }
698             /* see if we can identify the cause of the warning */
699             if (isIDFIRST_lazy_if_safe(t,PL_bufend,UTF))
700             {
701                 const char *t_start= t;
702                 for ( ;
703                      (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
704                      t += UTF ? UTF8SKIP(t) : 1)
705                 {
706                     NOOP;
707                 }
708                 if (t < PL_bufptr && isSPACE(*t)) {
709                     has_more = TRUE;
710                     sv_catpvf( message,
711                             " (Do you need to predeclare \"%" UTF8f "\"?)",
712                           UTF8fARG(UTF, t - t_start, t_start));
713                 }
714             }
715         }
716         if (!has_more) {
717             const char *t= oldbp;
718             assert(s >= oldbp);
719             while (t < s && isSPACE(*t)) {
720                 t += UTF ? UTF8SKIP(t) : 1;
721             }
722 
723             sv_catpvf(message,
724                     " (Missing operator before \"%" UTF8f "\"?)",
725                      UTF8fARG(UTF, s - t, t));
726         }
727     }
728     yywarn(SvPV_nolen(message), UTF ? SVf_UTF8 : 0);
729     PL_bufptr = oldbp;
730 }
731 
732 /*
733  * S_missingterm
734  * Complain about missing quote/regexp/heredoc terminator.
735  * If it's called with NULL then it cauterizes the line buffer.
736  * If we're in a delimited string and the delimiter is a control
737  * character, it's reformatted into a two-char sequence like ^C.
738  * This is fatal.
739  */
740 
741 STATIC void
S_missingterm(pTHX_ char * s,STRLEN len)742 S_missingterm(pTHX_ char *s, STRLEN len)
743 {
744     char tmpbuf[UTF8_MAXBYTES + 1];
745     char q;
746     bool uni = FALSE;
747     if (s) {
748         char * const nl = (char *) my_memrchr(s, '\n', len);
749         if (nl) {
750             *nl = '\0';
751             len = nl - s;
752         }
753         uni = UTF;
754     }
755     else if (PL_multi_close < 32) {
756         *tmpbuf = '^';
757         tmpbuf[1] = (char)toCTRL(PL_multi_close);
758         tmpbuf[2] = '\0';
759         s = tmpbuf;
760         len = 2;
761     }
762     else {
763         if (! UTF && LIKELY(PL_multi_close < 256)) {
764             *tmpbuf = (char)PL_multi_close;
765             tmpbuf[1] = '\0';
766             len = 1;
767         }
768         else {
769             char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
770             *end = '\0';
771             len = end - tmpbuf;
772             uni = TRUE;
773         }
774         s = tmpbuf;
775     }
776     q = memchr(s, '"', len) ? '\'' : '"';
777     Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c"
778                      " anywhere before EOF", q, UTF8fARG(uni, len, s), q);
779 }
780 
781 #include "feature.h"
782 
783 /*
784  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
785  * utf16-to-utf8-reversed.
786  */
787 
788 #ifdef PERL_CR_FILTER
789 static void
strip_return(SV * sv)790 strip_return(SV *sv)
791 {
792     const char *s = SvPVX_const(sv);
793     const char * const e = s + SvCUR(sv);
794 
795     PERL_ARGS_ASSERT_STRIP_RETURN;
796 
797     /* outer loop optimized to do nothing if there are no CR-LFs */
798     while (s < e) {
799         if (*s++ == '\r' && *s == '\n') {
800             /* hit a CR-LF, need to copy the rest */
801             char *d = s - 1;
802             *d++ = *s++;
803             while (s < e) {
804                 if (*s == '\r' && s[1] == '\n')
805                     s++;
806                 *d++ = *s++;
807             }
808             SvCUR(sv) -= s - d;
809             return;
810         }
811     }
812 }
813 
814 STATIC I32
S_cr_textfilter(pTHX_ int idx,SV * sv,int maxlen)815 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
816 {
817     const I32 count = FILTER_READ(idx+1, sv, maxlen);
818     if (count > 0 && !maxlen)
819         strip_return(sv);
820     return count;
821 }
822 #endif
823 
824 /*
825 =for apidoc lex_start
826 
827 Creates and initialises a new lexer/parser state object, supplying
828 a context in which to lex and parse from a new source of Perl code.
829 A pointer to the new state object is placed in L</PL_parser>.  An entry
830 is made on the save stack so that upon unwinding, the new state object
831 will be destroyed and the former value of L</PL_parser> will be restored.
832 Nothing else need be done to clean up the parsing context.
833 
834 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
835 non-null, provides a string (in SV form) containing code to be parsed.
836 A copy of the string is made, so subsequent modification of C<line>
837 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
838 from which code will be read to be parsed.  If both are non-null, the
839 code in C<line> comes first and must consist of complete lines of input,
840 and C<rsfp> supplies the remainder of the source.
841 
842 The C<flags> parameter is reserved for future use.  Currently it is only
843 used by perl internally, so extensions should always pass zero.
844 
845 =cut
846 */
847 
848 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
849    can share filters with the current parser.
850    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
851    caller, hence isn't owned by the parser, so shouldn't be closed on parser
852    destruction. This is used to handle the case of defaulting to reading the
853    script from the standard input because no filename was given on the command
854    line (without getting confused by situation where STDIN has been closed, so
855    the script handle is opened on fd 0)  */
856 
857 void
Perl_lex_start(pTHX_ SV * line,PerlIO * rsfp,U32 flags)858 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
859 {
860     const char *s = NULL;
861     yy_parser *parser, *oparser;
862 
863     if (flags && flags & ~LEX_START_FLAGS)
864         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
865 
866     /* create and initialise a parser */
867 
868     Newxz(parser, 1, yy_parser);
869     parser->old_parser = oparser = PL_parser;
870     PL_parser = parser;
871 
872     parser->stack = NULL;
873     parser->stack_max1 = NULL;
874     parser->ps = NULL;
875 
876     /* on scope exit, free this parser and restore any outer one */
877     SAVEPARSER(parser);
878     parser->saved_curcop = PL_curcop;
879 
880     /* initialise lexer state */
881 
882     parser->nexttoke = 0;
883     parser->error_count = oparser ? oparser->error_count : 0;
884     parser->copline = parser->preambling = NOLINE;
885     parser->lex_state = LEX_NORMAL;
886     parser->expect = XSTATE;
887     parser->rsfp = rsfp;
888     parser->recheck_utf8_validity = TRUE;
889     parser->rsfp_filters =
890       !(flags & LEX_START_SAME_FILTER) || !oparser
891         ? NULL
892         : MUTABLE_AV(SvREFCNT_inc(
893             oparser->rsfp_filters
894              ? oparser->rsfp_filters
895              : (oparser->rsfp_filters = newAV())
896           ));
897 
898     Newx(parser->lex_brackstack, 120, char);
899     Newx(parser->lex_casestack, 12, char);
900     *parser->lex_casestack = '\0';
901     Newxz(parser->lex_shared, 1, LEXSHARED);
902 
903     if (line) {
904         Size_t len;
905         const U8* first_bad_char_loc;
906 
907         s = SvPV_const(line, len);
908 
909         if (   SvUTF8(line)
910             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
911                                              SvCUR(line),
912                                              &first_bad_char_loc)))
913         {
914             _force_out_malformed_utf8_message(first_bad_char_loc,
915                                               (U8 *) s + SvCUR(line),
916                                               0,
917                                               1 /* 1 means die */ );
918             NOT_REACHED; /* NOTREACHED */
919         }
920 
921         parser->linestr = flags & LEX_START_COPIED
922                             ? SvREFCNT_inc_simple_NN(line)
923                             : newSVpvn_flags(s, len, SvUTF8(line));
924         if (!rsfp)
925             sv_catpvs(parser->linestr, "\n;");
926     } else {
927         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
928     }
929 
930     parser->oldoldbufptr =
931         parser->oldbufptr =
932         parser->bufptr =
933         parser->linestart = SvPVX(parser->linestr);
934     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
935     parser->last_lop = parser->last_uni = NULL;
936 
937     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
938                                                         |LEX_DONT_CLOSE_RSFP));
939     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
940                                                         |LEX_DONT_CLOSE_RSFP));
941 
942     parser->in_pod = parser->filtered = 0;
943 }
944 
945 
946 /* delete a parser object */
947 
948 void
Perl_parser_free(pTHX_ const yy_parser * parser)949 Perl_parser_free(pTHX_  const yy_parser *parser)
950 {
951     PERL_ARGS_ASSERT_PARSER_FREE;
952 
953     PL_curcop = parser->saved_curcop;
954     SvREFCNT_dec(parser->linestr);
955 
956     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
957         PerlIO_clearerr(parser->rsfp);
958     else if (parser->rsfp && (!parser->old_parser
959           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
960         PerlIO_close(parser->rsfp);
961     SvREFCNT_dec(parser->rsfp_filters);
962     SvREFCNT_dec(parser->lex_stuff);
963     SvREFCNT_dec(parser->lex_sub_repl);
964 
965     Safefree(parser->lex_brackstack);
966     Safefree(parser->lex_casestack);
967     Safefree(parser->lex_shared);
968     PL_parser = parser->old_parser;
969     Safefree(parser);
970 }
971 
972 void
Perl_parser_free_nexttoke_ops(pTHX_ yy_parser * parser,OPSLAB * slab)973 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
974 {
975     I32 nexttoke = parser->nexttoke;
976     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
977     while (nexttoke--) {
978         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
979          && parser->nextval[nexttoke].opval
980          && parser->nextval[nexttoke].opval->op_slabbed
981          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
982             op_free(parser->nextval[nexttoke].opval);
983             parser->nextval[nexttoke].opval = NULL;
984         }
985     }
986 }
987 
988 
989 /*
990 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
991 
992 Buffer scalar containing the chunk currently under consideration of the
993 text currently being lexed.  This is always a plain string scalar (for
994 which C<SvPOK> is true).  It is not intended to be used as a scalar by
995 normal scalar means; instead refer to the buffer directly by the pointer
996 variables described below.
997 
998 The lexer maintains various C<char*> pointers to things in the
999 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
1000 reallocated, all of these pointers must be updated.  Don't attempt to
1001 do this manually, but rather use L</lex_grow_linestr> if you need to
1002 reallocate the buffer.
1003 
1004 The content of the text chunk in the buffer is commonly exactly one
1005 complete line of input, up to and including a newline terminator,
1006 but there are situations where it is otherwise.  The octets of the
1007 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
1008 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
1009 flag on this scalar, which may disagree with it.
1010 
1011 For direct examination of the buffer, the variable
1012 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
1013 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
1014 of these pointers is usually preferable to examination of the scalar
1015 through normal scalar means.
1016 
1017 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
1018 
1019 Direct pointer to the end of the chunk of text currently being lexed, the
1020 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
1021 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
1022 always located at the end of the buffer, and does not count as part of
1023 the buffer's contents.
1024 
1025 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
1026 
1027 Points to the current position of lexing inside the lexer buffer.
1028 Characters around this point may be freely examined, within
1029 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
1030 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
1031 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
1032 
1033 Lexing code (whether in the Perl core or not) moves this pointer past
1034 the characters that it consumes.  It is also expected to perform some
1035 bookkeeping whenever a newline character is consumed.  This movement
1036 can be more conveniently performed by the function L</lex_read_to>,
1037 which handles newlines appropriately.
1038 
1039 Interpretation of the buffer's octets can be abstracted out by
1040 using the slightly higher-level functions L</lex_peek_unichar> and
1041 L</lex_read_unichar>.
1042 
1043 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
1044 
1045 Points to the start of the current line inside the lexer buffer.
1046 This is useful for indicating at which column an error occurred, and
1047 not much else.  This must be updated by any lexing code that consumes
1048 a newline; the function L</lex_read_to> handles this detail.
1049 
1050 =cut
1051 */
1052 
1053 /*
1054 =for apidoc lex_bufutf8
1055 
1056 Indicates whether the octets in the lexer buffer
1057 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1058 of Unicode characters.  If not, they should be interpreted as Latin-1
1059 characters.  This is analogous to the C<SvUTF8> flag for scalars.
1060 
1061 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1062 contains valid UTF-8.  Lexing code must be robust in the face of invalid
1063 encoding.
1064 
1065 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
1066 is significant, but not the whole story regarding the input character
1067 encoding.  Normally, when a file is being read, the scalar contains octets
1068 and its C<SvUTF8> flag is off, but the octets should be interpreted as
1069 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
1070 however, the scalar may have the C<SvUTF8> flag on, and in this case its
1071 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1072 is in effect.  This logic may change in the future; use this function
1073 instead of implementing the logic yourself.
1074 
1075 =cut
1076 */
1077 
1078 bool
Perl_lex_bufutf8(pTHX)1079 Perl_lex_bufutf8(pTHX)
1080 {
1081     return UTF;
1082 }
1083 
1084 /*
1085 =for apidoc lex_grow_linestr
1086 
1087 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
1088 at least C<len> octets (including terminating C<NUL>).  Returns a
1089 pointer to the reallocated buffer.  This is necessary before making
1090 any direct modification of the buffer that would increase its length.
1091 L</lex_stuff_pvn> provides a more convenient way to insert text into
1092 the buffer.
1093 
1094 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
1095 this function updates all of the lexer's variables that point directly
1096 into the buffer.
1097 
1098 =cut
1099 */
1100 
1101 char *
Perl_lex_grow_linestr(pTHX_ STRLEN len)1102 Perl_lex_grow_linestr(pTHX_ STRLEN len)
1103 {
1104     SV *linestr;
1105     char *buf;
1106     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1107     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
1108     bool current;
1109 
1110     linestr = PL_parser->linestr;
1111     buf = SvPVX(linestr);
1112     if (len <= SvLEN(linestr))
1113         return buf;
1114 
1115     /* Is the lex_shared linestr SV the same as the current linestr SV?
1116      * Only in this case does re_eval_start need adjusting, since it
1117      * points within lex_shared->ls_linestr's buffer */
1118     current = (   !PL_parser->lex_shared->ls_linestr
1119                || linestr == PL_parser->lex_shared->ls_linestr);
1120 
1121     bufend_pos = PL_parser->bufend - buf;
1122     bufptr_pos = PL_parser->bufptr - buf;
1123     oldbufptr_pos = PL_parser->oldbufptr - buf;
1124     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1125     linestart_pos = PL_parser->linestart - buf;
1126     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1127     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1128     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
1129                             PL_parser->lex_shared->re_eval_start - buf : 0;
1130 
1131     buf = sv_grow(linestr, len);
1132 
1133     PL_parser->bufend = buf + bufend_pos;
1134     PL_parser->bufptr = buf + bufptr_pos;
1135     PL_parser->oldbufptr = buf + oldbufptr_pos;
1136     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1137     PL_parser->linestart = buf + linestart_pos;
1138     if (PL_parser->last_uni)
1139         PL_parser->last_uni = buf + last_uni_pos;
1140     if (PL_parser->last_lop)
1141         PL_parser->last_lop = buf + last_lop_pos;
1142     if (current && PL_parser->lex_shared->re_eval_start)
1143         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
1144     return buf;
1145 }
1146 
1147 /*
1148 =for apidoc lex_stuff_pvn
1149 
1150 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1151 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1152 reallocating the buffer if necessary.  This means that lexing code that
1153 runs later will see the characters as if they had appeared in the input.
1154 It is not recommended to do this as part of normal parsing, and most
1155 uses of this facility run the risk of the inserted characters being
1156 interpreted in an unintended manner.
1157 
1158 The string to be inserted is represented by C<len> octets starting
1159 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1160 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1161 The characters are recoded for the lexer buffer, according to how the
1162 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1163 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1164 function is more convenient.
1165 
1166 =for apidoc Amnh||LEX_STUFF_UTF8
1167 
1168 =cut
1169 */
1170 
1171 void
Perl_lex_stuff_pvn(pTHX_ const char * pv,STRLEN len,U32 flags)1172 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1173 {
1174     char *bufptr;
1175     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1176     if (flags & ~(LEX_STUFF_UTF8))
1177         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1178     if (UTF) {
1179         if (flags & LEX_STUFF_UTF8) {
1180             goto plain_copy;
1181         } else {
1182             STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1183                                                        (U8 *) pv + len);
1184             const char *p, *e = pv+len;;
1185             if (!highhalf)
1186                 goto plain_copy;
1187             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1188             bufptr = PL_parser->bufptr;
1189             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1190             SvCUR_set(PL_parser->linestr,
1191                 SvCUR(PL_parser->linestr) + len+highhalf);
1192             PL_parser->bufend += len+highhalf;
1193             for (p = pv; p != e; p++) {
1194                 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1195             }
1196         }
1197     } else {
1198         if (flags & LEX_STUFF_UTF8) {
1199             STRLEN highhalf = 0;
1200             const char *p, *e = pv+len;
1201             for (p = pv; p != e; p++) {
1202                 U8 c = (U8)*p;
1203                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1204                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1205                                 "non-Latin-1 character into Latin-1 input");
1206                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1207                     p++;
1208                     highhalf++;
1209                 } else assert(UTF8_IS_INVARIANT(c));
1210             }
1211             if (!highhalf)
1212                 goto plain_copy;
1213             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1214             bufptr = PL_parser->bufptr;
1215             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1216             SvCUR_set(PL_parser->linestr,
1217                 SvCUR(PL_parser->linestr) + len-highhalf);
1218             PL_parser->bufend += len-highhalf;
1219             p = pv;
1220             while (p < e) {
1221                 if (UTF8_IS_INVARIANT(*p)) {
1222                     *bufptr++ = *p;
1223                     p++;
1224                 }
1225                 else {
1226                     assert(p < e -1 );
1227                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1228                     p += 2;
1229                 }
1230             }
1231         } else {
1232           plain_copy:
1233             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1234             bufptr = PL_parser->bufptr;
1235             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1236             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1237             PL_parser->bufend += len;
1238             Copy(pv, bufptr, len, char);
1239         }
1240     }
1241 }
1242 
1243 /*
1244 =for apidoc lex_stuff_pv
1245 
1246 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1247 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1248 reallocating the buffer if necessary.  This means that lexing code that
1249 runs later will see the characters as if they had appeared in the input.
1250 It is not recommended to do this as part of normal parsing, and most
1251 uses of this facility run the risk of the inserted characters being
1252 interpreted in an unintended manner.
1253 
1254 The string to be inserted is represented by octets starting at C<pv>
1255 and continuing to the first nul.  These octets are interpreted as either
1256 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1257 in C<flags>.  The characters are recoded for the lexer buffer, according
1258 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1259 If it is not convenient to nul-terminate a string to be inserted, the
1260 L</lex_stuff_pvn> function is more appropriate.
1261 
1262 =cut
1263 */
1264 
1265 void
Perl_lex_stuff_pv(pTHX_ const char * pv,U32 flags)1266 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1267 {
1268     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1269     lex_stuff_pvn(pv, strlen(pv), flags);
1270 }
1271 
1272 /*
1273 =for apidoc lex_stuff_sv
1274 
1275 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1276 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1277 reallocating the buffer if necessary.  This means that lexing code that
1278 runs later will see the characters as if they had appeared in the input.
1279 It is not recommended to do this as part of normal parsing, and most
1280 uses of this facility run the risk of the inserted characters being
1281 interpreted in an unintended manner.
1282 
1283 The string to be inserted is the string value of C<sv>.  The characters
1284 are recoded for the lexer buffer, according to how the buffer is currently
1285 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1286 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1287 need to construct a scalar.
1288 
1289 =cut
1290 */
1291 
1292 void
Perl_lex_stuff_sv(pTHX_ SV * sv,U32 flags)1293 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1294 {
1295     char *pv;
1296     STRLEN len;
1297     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1298     if (flags)
1299         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1300     pv = SvPV(sv, len);
1301     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1302 }
1303 
1304 /*
1305 =for apidoc lex_unstuff
1306 
1307 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1308 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1309 This hides the discarded text from any lexing code that runs later,
1310 as if the text had never appeared.
1311 
1312 This is not the normal way to consume lexed text.  For that, use
1313 L</lex_read_to>.
1314 
1315 =cut
1316 */
1317 
1318 void
Perl_lex_unstuff(pTHX_ char * ptr)1319 Perl_lex_unstuff(pTHX_ char *ptr)
1320 {
1321     char *buf, *bufend;
1322     STRLEN unstuff_len;
1323     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1324     buf = PL_parser->bufptr;
1325     if (ptr < buf)
1326         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1327     if (ptr == buf)
1328         return;
1329     bufend = PL_parser->bufend;
1330     if (ptr > bufend)
1331         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1332     unstuff_len = ptr - buf;
1333     Move(ptr, buf, bufend+1-ptr, char);
1334     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1335     PL_parser->bufend = bufend - unstuff_len;
1336 }
1337 
1338 /*
1339 =for apidoc lex_read_to
1340 
1341 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1342 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1343 performing the correct bookkeeping whenever a newline character is passed.
1344 This is the normal way to consume lexed text.
1345 
1346 Interpretation of the buffer's octets can be abstracted out by
1347 using the slightly higher-level functions L</lex_peek_unichar> and
1348 L</lex_read_unichar>.
1349 
1350 =cut
1351 */
1352 
1353 void
Perl_lex_read_to(pTHX_ char * ptr)1354 Perl_lex_read_to(pTHX_ char *ptr)
1355 {
1356     char *s;
1357     PERL_ARGS_ASSERT_LEX_READ_TO;
1358     s = PL_parser->bufptr;
1359     if (ptr < s || ptr > PL_parser->bufend)
1360         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1361     for (; s != ptr; s++)
1362         if (*s == '\n') {
1363             COPLINE_INC_WITH_HERELINES;
1364             PL_parser->linestart = s+1;
1365         }
1366     PL_parser->bufptr = ptr;
1367 }
1368 
1369 /*
1370 =for apidoc lex_discard_to
1371 
1372 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1373 up to C<ptr>.  The remaining content of the buffer will be moved, and
1374 all pointers into the buffer updated appropriately.  C<ptr> must not
1375 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1376 it is not permitted to discard text that has yet to be lexed.
1377 
1378 Normally it is not necessarily to do this directly, because it suffices to
1379 use the implicit discarding behaviour of L</lex_next_chunk> and things
1380 based on it.  However, if a token stretches across multiple lines,
1381 and the lexing code has kept multiple lines of text in the buffer for
1382 that purpose, then after completion of the token it would be wise to
1383 explicitly discard the now-unneeded earlier lines, to avoid future
1384 multi-line tokens growing the buffer without bound.
1385 
1386 =cut
1387 */
1388 
1389 void
Perl_lex_discard_to(pTHX_ char * ptr)1390 Perl_lex_discard_to(pTHX_ char *ptr)
1391 {
1392     char *buf;
1393     STRLEN discard_len;
1394     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1395     buf = SvPVX(PL_parser->linestr);
1396     if (ptr < buf)
1397         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1398     if (ptr == buf)
1399         return;
1400     if (ptr > PL_parser->bufptr)
1401         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1402     discard_len = ptr - buf;
1403     if (PL_parser->oldbufptr < ptr)
1404         PL_parser->oldbufptr = ptr;
1405     if (PL_parser->oldoldbufptr < ptr)
1406         PL_parser->oldoldbufptr = ptr;
1407     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1408         PL_parser->last_uni = NULL;
1409     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1410         PL_parser->last_lop = NULL;
1411     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1412     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1413     PL_parser->bufend -= discard_len;
1414     PL_parser->bufptr -= discard_len;
1415     PL_parser->oldbufptr -= discard_len;
1416     PL_parser->oldoldbufptr -= discard_len;
1417     if (PL_parser->last_uni)
1418         PL_parser->last_uni -= discard_len;
1419     if (PL_parser->last_lop)
1420         PL_parser->last_lop -= discard_len;
1421 }
1422 
1423 void
Perl_notify_parser_that_changed_to_utf8(pTHX)1424 Perl_notify_parser_that_changed_to_utf8(pTHX)
1425 {
1426     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1427      * off to on.  At compile time, this has the effect of entering a 'use
1428      * utf8' section.  This means that any input was not previously checked for
1429      * UTF-8 (because it was off), but now we do need to check it, or our
1430      * assumptions about the input being sane could be wrong, and we could
1431      * segfault.  This routine just sets a flag so that the next time we look
1432      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1433      * proper phase, there may not be a parser object, but if there is, setting
1434      * the flag is harmless */
1435 
1436     if (PL_parser) {
1437         PL_parser->recheck_utf8_validity = TRUE;
1438     }
1439 }
1440 
1441 /*
1442 =for apidoc lex_next_chunk
1443 
1444 Reads in the next chunk of text to be lexed, appending it to
1445 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1446 looked to the end of the current chunk and wants to know more.  It is
1447 usual, but not necessary, for lexing to have consumed the entirety of
1448 the current chunk at this time.
1449 
1450 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1451 chunk (i.e., the current chunk has been entirely consumed), normally the
1452 current chunk will be discarded at the same time that the new chunk is
1453 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1454 will not be discarded.  If the current chunk has not been entirely
1455 consumed, then it will not be discarded regardless of the flag.
1456 
1457 Returns true if some new text was added to the buffer, or false if the
1458 buffer has reached the end of the input text.
1459 
1460 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1461 
1462 =cut
1463 */
1464 
1465 #define LEX_FAKE_EOF 0x80000000
1466 #define LEX_NO_TERM  0x40000000 /* here-doc */
1467 
1468 bool
Perl_lex_next_chunk(pTHX_ U32 flags)1469 Perl_lex_next_chunk(pTHX_ U32 flags)
1470 {
1471     SV *linestr;
1472     char *buf;
1473     STRLEN old_bufend_pos, new_bufend_pos;
1474     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1475     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1476     bool got_some_for_debugger = 0;
1477     bool got_some;
1478 
1479     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1480         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1481     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1482         return FALSE;
1483     linestr = PL_parser->linestr;
1484     buf = SvPVX(linestr);
1485     if (!(flags & LEX_KEEP_PREVIOUS)
1486           && PL_parser->bufptr == PL_parser->bufend)
1487     {
1488         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1489         linestart_pos = 0;
1490         if (PL_parser->last_uni != PL_parser->bufend)
1491             PL_parser->last_uni = NULL;
1492         if (PL_parser->last_lop != PL_parser->bufend)
1493             PL_parser->last_lop = NULL;
1494         last_uni_pos = last_lop_pos = 0;
1495         *buf = 0;
1496         SvCUR_set(linestr, 0);
1497     } else {
1498         old_bufend_pos = PL_parser->bufend - buf;
1499         bufptr_pos = PL_parser->bufptr - buf;
1500         oldbufptr_pos = PL_parser->oldbufptr - buf;
1501         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1502         linestart_pos = PL_parser->linestart - buf;
1503         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1504         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1505     }
1506     if (flags & LEX_FAKE_EOF) {
1507         goto eof;
1508     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1509         got_some = 0;
1510     } else if (filter_gets(linestr, old_bufend_pos)) {
1511         got_some = 1;
1512         got_some_for_debugger = 1;
1513     } else if (flags & LEX_NO_TERM) {
1514         got_some = 0;
1515     } else {
1516         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1517             SvPVCLEAR(linestr);
1518         eof:
1519         /* End of real input.  Close filehandle (unless it was STDIN),
1520          * then add implicit termination.
1521          */
1522         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1523             PerlIO_clearerr(PL_parser->rsfp);
1524         else if (PL_parser->rsfp)
1525             (void)PerlIO_close(PL_parser->rsfp);
1526         PL_parser->rsfp = NULL;
1527         PL_parser->in_pod = PL_parser->filtered = 0;
1528         if (!PL_in_eval && PL_minus_p) {
1529             sv_catpvs(linestr,
1530                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1531             PL_minus_n = PL_minus_p = 0;
1532         } else if (!PL_in_eval && PL_minus_n) {
1533             sv_catpvs(linestr, /*{*/";}");
1534             PL_minus_n = 0;
1535         } else
1536             sv_catpvs(linestr, ";");
1537         got_some = 1;
1538     }
1539     buf = SvPVX(linestr);
1540     new_bufend_pos = SvCUR(linestr);
1541     PL_parser->bufend = buf + new_bufend_pos;
1542     PL_parser->bufptr = buf + bufptr_pos;
1543 
1544     if (UTF) {
1545         const U8* first_bad_char_loc;
1546         if (UNLIKELY(! is_utf8_string_loc(
1547                             (U8 *) PL_parser->bufptr,
1548                                    PL_parser->bufend - PL_parser->bufptr,
1549                                    &first_bad_char_loc)))
1550         {
1551             _force_out_malformed_utf8_message(first_bad_char_loc,
1552                                               (U8 *) PL_parser->bufend,
1553                                               0,
1554                                               1 /* 1 means die */ );
1555             NOT_REACHED; /* NOTREACHED */
1556         }
1557     }
1558 
1559     PL_parser->oldbufptr = buf + oldbufptr_pos;
1560     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1561     PL_parser->linestart = buf + linestart_pos;
1562     if (PL_parser->last_uni)
1563         PL_parser->last_uni = buf + last_uni_pos;
1564     if (PL_parser->last_lop)
1565         PL_parser->last_lop = buf + last_lop_pos;
1566     if (PL_parser->preambling != NOLINE) {
1567         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1568         PL_parser->preambling = NOLINE;
1569     }
1570     if (   got_some_for_debugger
1571         && PERLDB_LINE_OR_SAVESRC
1572         && PL_curstash != PL_debstash)
1573     {
1574         /* debugger active and we're not compiling the debugger code,
1575          * so store the line into the debugger's array of lines
1576          */
1577         update_debugger_info(NULL, buf+old_bufend_pos,
1578             new_bufend_pos-old_bufend_pos);
1579     }
1580     return got_some;
1581 }
1582 
1583 /*
1584 =for apidoc lex_peek_unichar
1585 
1586 Looks ahead one (Unicode) character in the text currently being lexed.
1587 Returns the codepoint (unsigned integer value) of the next character,
1588 or -1 if lexing has reached the end of the input text.  To consume the
1589 peeked character, use L</lex_read_unichar>.
1590 
1591 If the next character is in (or extends into) the next chunk of input
1592 text, the next chunk will be read in.  Normally the current chunk will be
1593 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1594 bit set, then the current chunk will not be discarded.
1595 
1596 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1597 is encountered, an exception is generated.
1598 
1599 =cut
1600 */
1601 
1602 I32
Perl_lex_peek_unichar(pTHX_ U32 flags)1603 Perl_lex_peek_unichar(pTHX_ U32 flags)
1604 {
1605     char *s, *bufend;
1606     if (flags & ~(LEX_KEEP_PREVIOUS))
1607         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1608     s = PL_parser->bufptr;
1609     bufend = PL_parser->bufend;
1610     if (UTF) {
1611         U8 head;
1612         I32 unichar;
1613         STRLEN len, retlen;
1614         if (s == bufend) {
1615             if (!lex_next_chunk(flags))
1616                 return -1;
1617             s = PL_parser->bufptr;
1618             bufend = PL_parser->bufend;
1619         }
1620         head = (U8)*s;
1621         if (UTF8_IS_INVARIANT(head))
1622             return head;
1623         if (UTF8_IS_START(head)) {
1624             len = UTF8SKIP(&head);
1625             while ((STRLEN)(bufend-s) < len) {
1626                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1627                     break;
1628                 s = PL_parser->bufptr;
1629                 bufend = PL_parser->bufend;
1630             }
1631         }
1632         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1633         if (retlen == (STRLEN)-1) {
1634             _force_out_malformed_utf8_message((U8 *) s,
1635                                               (U8 *) bufend,
1636                                               0,
1637                                               1 /* 1 means die */ );
1638             NOT_REACHED; /* NOTREACHED */
1639         }
1640         return unichar;
1641     } else {
1642         if (s == bufend) {
1643             if (!lex_next_chunk(flags))
1644                 return -1;
1645             s = PL_parser->bufptr;
1646         }
1647         return (U8)*s;
1648     }
1649 }
1650 
1651 /*
1652 =for apidoc lex_read_unichar
1653 
1654 Reads the next (Unicode) character in the text currently being lexed.
1655 Returns the codepoint (unsigned integer value) of the character read,
1656 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1657 if lexing has reached the end of the input text.  To non-destructively
1658 examine the next character, use L</lex_peek_unichar> instead.
1659 
1660 If the next character is in (or extends into) the next chunk of input
1661 text, the next chunk will be read in.  Normally the current chunk will be
1662 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1663 bit set, then the current chunk will not be discarded.
1664 
1665 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1666 is encountered, an exception is generated.
1667 
1668 =cut
1669 */
1670 
1671 I32
Perl_lex_read_unichar(pTHX_ U32 flags)1672 Perl_lex_read_unichar(pTHX_ U32 flags)
1673 {
1674     I32 c;
1675     if (flags & ~(LEX_KEEP_PREVIOUS))
1676         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1677     c = lex_peek_unichar(flags);
1678     if (c != -1) {
1679         if (c == '\n')
1680             COPLINE_INC_WITH_HERELINES;
1681         if (UTF)
1682             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1683         else
1684             ++(PL_parser->bufptr);
1685     }
1686     return c;
1687 }
1688 
1689 /*
1690 =for apidoc lex_read_space
1691 
1692 Reads optional spaces, in Perl style, in the text currently being
1693 lexed.  The spaces may include ordinary whitespace characters and
1694 Perl-style comments.  C<#line> directives are processed if encountered.
1695 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1696 at a non-space character (or the end of the input text).
1697 
1698 If spaces extend into the next chunk of input text, the next chunk will
1699 be read in.  Normally the current chunk will be discarded at the same
1700 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1701 chunk will not be discarded.
1702 
1703 =cut
1704 */
1705 
1706 #define LEX_NO_INCLINE    0x40000000
1707 #define LEX_NO_NEXT_CHUNK 0x80000000
1708 
1709 void
Perl_lex_read_space(pTHX_ U32 flags)1710 Perl_lex_read_space(pTHX_ U32 flags)
1711 {
1712     char *s, *bufend;
1713     const bool can_incline = !(flags & LEX_NO_INCLINE);
1714     bool need_incline = 0;
1715     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1716         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1717     s = PL_parser->bufptr;
1718     bufend = PL_parser->bufend;
1719     while (1) {
1720         char c = *s;
1721         if (c == '#') {
1722             do {
1723                 c = *++s;
1724             } while (!(c == '\n' || (c == 0 && s == bufend)));
1725         } else if (c == '\n') {
1726             s++;
1727             if (can_incline) {
1728                 PL_parser->linestart = s;
1729                 if (s == bufend)
1730                     need_incline = 1;
1731                 else
1732                     incline(s, bufend);
1733             }
1734         } else if (isSPACE(c)) {
1735             s++;
1736         } else if (c == 0 && s == bufend) {
1737             bool got_more;
1738             line_t l;
1739             if (flags & LEX_NO_NEXT_CHUNK)
1740                 break;
1741             PL_parser->bufptr = s;
1742             l = CopLINE(PL_curcop);
1743             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1744             got_more = lex_next_chunk(flags);
1745             CopLINE_set(PL_curcop, l);
1746             s = PL_parser->bufptr;
1747             bufend = PL_parser->bufend;
1748             if (!got_more)
1749                 break;
1750             if (can_incline && need_incline && PL_parser->rsfp) {
1751                 incline(s, bufend);
1752                 need_incline = 0;
1753             }
1754         } else if (!c) {
1755             s++;
1756         } else {
1757             break;
1758         }
1759     }
1760     PL_parser->bufptr = s;
1761 }
1762 
1763 /*
1764 
1765 =for apidoc validate_proto
1766 
1767 This function performs syntax checking on a prototype, C<proto>.
1768 If C<warn> is true, any illegal characters or mismatched brackets
1769 will trigger illegalproto warnings, declaring that they were
1770 detected in the prototype for C<name>.
1771 
1772 The return value is C<true> if this is a valid prototype, and
1773 C<false> if it is not, regardless of whether C<warn> was C<true> or
1774 C<false>.
1775 
1776 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1777 
1778 =cut
1779 
1780  */
1781 
1782 bool
Perl_validate_proto(pTHX_ SV * name,SV * proto,bool warn,bool curstash)1783 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1784 {
1785     STRLEN len, origlen;
1786     char *p;
1787     bool bad_proto = FALSE;
1788     bool in_brackets = FALSE;
1789     bool after_slash = FALSE;
1790     char greedy_proto = ' ';
1791     bool proto_after_greedy_proto = FALSE;
1792     bool must_be_last = FALSE;
1793     bool underscore = FALSE;
1794     bool bad_proto_after_underscore = FALSE;
1795 
1796     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1797 
1798     if (!proto)
1799         return TRUE;
1800 
1801     p = SvPV(proto, len);
1802     origlen = len;
1803     for (; len--; p++) {
1804         if (!isSPACE(*p)) {
1805             if (must_be_last)
1806                 proto_after_greedy_proto = TRUE;
1807             if (underscore) {
1808                 if (!memCHRs(";@%", *p))
1809                     bad_proto_after_underscore = TRUE;
1810                 underscore = FALSE;
1811             }
1812             if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1813                 bad_proto = TRUE;
1814             }
1815             else {
1816                 if (*p == '[')
1817                     in_brackets = TRUE;
1818                 else if (*p == ']')
1819                     in_brackets = FALSE;
1820                 else if ((*p == '@' || *p == '%')
1821                          && !after_slash
1822                          && !in_brackets )
1823                 {
1824                     must_be_last = TRUE;
1825                     greedy_proto = *p;
1826                 }
1827                 else if (*p == '_')
1828                     underscore = TRUE;
1829             }
1830             if (*p == '\\')
1831                 after_slash = TRUE;
1832             else
1833                 after_slash = FALSE;
1834         }
1835     }
1836 
1837     if (warn) {
1838         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1839         p -= origlen;
1840         p = SvUTF8(proto)
1841             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1842                              origlen, UNI_DISPLAY_ISPRINT)
1843             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1844 
1845         if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1846             SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1847             sv_catpvs(name2, "::");
1848             sv_catsv(name2, (SV *)name);
1849             name = name2;
1850         }
1851 
1852         if (proto_after_greedy_proto)
1853             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1854                         "Prototype after '%c' for %" SVf " : %s",
1855                         greedy_proto, SVfARG(name), p);
1856         if (in_brackets)
1857             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1858                         "Missing ']' in prototype for %" SVf " : %s",
1859                         SVfARG(name), p);
1860         if (bad_proto)
1861             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1862                         "Illegal character in prototype for %" SVf " : %s",
1863                         SVfARG(name), p);
1864         if (bad_proto_after_underscore)
1865             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1866                         "Illegal character after '_' in prototype for %" SVf " : %s",
1867                         SVfARG(name), p);
1868     }
1869 
1870     return (! (proto_after_greedy_proto || bad_proto) );
1871 }
1872 
1873 /*
1874  * S_incline
1875  * This subroutine has nothing to do with tilting, whether at windmills
1876  * or pinball tables.  Its name is short for "increment line".  It
1877  * increments the current line number in CopLINE(PL_curcop) and checks
1878  * to see whether the line starts with a comment of the form
1879  *    # line 500 "foo.pm"
1880  * If so, it sets the current line number and file to the values in the comment.
1881  */
1882 
1883 STATIC void
S_incline(pTHX_ const char * s,const char * end)1884 S_incline(pTHX_ const char *s, const char *end)
1885 {
1886     const char *t;
1887     const char *n;
1888     const char *e;
1889     line_t line_num;
1890     UV uv;
1891 
1892     PERL_ARGS_ASSERT_INCLINE;
1893 
1894     assert(end >= s);
1895 
1896     COPLINE_INC_WITH_HERELINES;
1897     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1898      && s+1 == PL_bufend && *s == ';') {
1899         /* fake newline in string eval */
1900         CopLINE_dec(PL_curcop);
1901         return;
1902     }
1903     if (*s++ != '#')
1904         return;
1905     while (SPACE_OR_TAB(*s))
1906         s++;
1907     if (memBEGINs(s, (STRLEN) (end - s), "line"))
1908         s += sizeof("line") - 1;
1909     else
1910         return;
1911     if (SPACE_OR_TAB(*s))
1912         s++;
1913     else
1914         return;
1915     while (SPACE_OR_TAB(*s))
1916         s++;
1917     if (!isDIGIT(*s))
1918         return;
1919 
1920     n = s;
1921     while (isDIGIT(*s))
1922         s++;
1923     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1924         return;
1925     while (SPACE_OR_TAB(*s))
1926         s++;
1927     if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1928         s++;
1929         e = t + 1;
1930     }
1931     else {
1932         t = s;
1933         while (*t && !isSPACE(*t))
1934             t++;
1935         e = t;
1936     }
1937     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1938         e++;
1939     if (*e != '\n' && *e != '\0')
1940         return;		/* false alarm */
1941 
1942     if (!grok_atoUV(n, &uv, &e))
1943         return;
1944     line_num = ((line_t)uv) - 1;
1945 
1946     if (t - s > 0) {
1947         const STRLEN len = t - s;
1948 
1949         if (!PL_rsfp && !PL_parser->filtered) {
1950             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1951              * to *{"::_<newfilename"} */
1952             /* However, the long form of evals is only turned on by the
1953                debugger - usually they're "(eval %lu)" */
1954             GV * const cfgv = CopFILEGV(PL_curcop);
1955             if (cfgv) {
1956                 char smallbuf[128];
1957                 STRLEN tmplen2 = len;
1958                 char *tmpbuf2;
1959                 GV *gv2;
1960 
1961                 if (tmplen2 + 2 <= sizeof smallbuf)
1962                     tmpbuf2 = smallbuf;
1963                 else
1964                     Newx(tmpbuf2, tmplen2 + 2, char);
1965 
1966                 tmpbuf2[0] = '_';
1967                 tmpbuf2[1] = '<';
1968 
1969                 memcpy(tmpbuf2 + 2, s, tmplen2);
1970                 tmplen2 += 2;
1971 
1972                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1973                 if (!isGV(gv2)) {
1974                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1975                     /* adjust ${"::_<newfilename"} to store the new file name */
1976                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1977                     /* The line number may differ. If that is the case,
1978                        alias the saved lines that are in the array.
1979                        Otherwise alias the whole array. */
1980                     if (CopLINE(PL_curcop) == line_num) {
1981                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1982                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1983                     }
1984                     else if (GvAV(cfgv)) {
1985                         AV * const av = GvAV(cfgv);
1986                         const line_t start = CopLINE(PL_curcop)+1;
1987                         SSize_t items = AvFILLp(av) - start;
1988                         if (items > 0) {
1989                             AV * const av2 = GvAVn(gv2);
1990                             SV **svp = AvARRAY(av) + start;
1991                             Size_t l = line_num+1;
1992                             while (items-- && l < SSize_t_MAX && l == (line_t)l)
1993                                 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1994                         }
1995                     }
1996                 }
1997 
1998                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1999             }
2000         }
2001         CopFILE_free(PL_curcop);
2002         CopFILE_setn(PL_curcop, s, len);
2003     }
2004     CopLINE_set(PL_curcop, line_num);
2005 }
2006 
2007 STATIC void
S_update_debugger_info(pTHX_ SV * orig_sv,const char * const buf,STRLEN len)2008 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
2009 {
2010     AV *av = CopFILEAVx(PL_curcop);
2011     if (av) {
2012         SV * sv;
2013         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
2014         else {
2015             sv = *av_fetch(av, 0, 1);
2016             SvUPGRADE(sv, SVt_PVMG);
2017         }
2018         if (!SvPOK(sv)) SvPVCLEAR(sv);
2019         if (orig_sv)
2020             sv_catsv(sv, orig_sv);
2021         else
2022             sv_catpvn(sv, buf, len);
2023         if (!SvIOK(sv)) {
2024             (void)SvIOK_on(sv);
2025             SvIV_set(sv, 0);
2026         }
2027         if (PL_parser->preambling == NOLINE)
2028             av_store(av, CopLINE(PL_curcop), sv);
2029     }
2030 }
2031 
2032 /*
2033  * skipspace
2034  * Called to gobble the appropriate amount and type of whitespace.
2035  * Skips comments as well.
2036  * Returns the next character after the whitespace that is skipped.
2037  *
2038  * peekspace
2039  * Same thing, but look ahead without incrementing line numbers or
2040  * adjusting PL_linestart.
2041  */
2042 
2043 #define skipspace(s) skipspace_flags(s, 0)
2044 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
2045 
2046 char *
Perl_skipspace_flags(pTHX_ char * s,U32 flags)2047 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
2048 {
2049     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
2050     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2051         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
2052             s++;
2053     } else {
2054         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
2055         PL_bufptr = s;
2056         lex_read_space(flags | LEX_KEEP_PREVIOUS |
2057                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
2058                     LEX_NO_NEXT_CHUNK : 0));
2059         s = PL_bufptr;
2060         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
2061         if (PL_linestart > PL_bufptr)
2062             PL_bufptr = PL_linestart;
2063         return s;
2064     }
2065     return s;
2066 }
2067 
2068 /*
2069  * S_check_uni
2070  * Check the unary operators to ensure there's no ambiguity in how they're
2071  * used.  An ambiguous piece of code would be:
2072  *     rand + 5
2073  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
2074  * the +5 is its argument.
2075  */
2076 
2077 STATIC void
S_check_uni(pTHX)2078 S_check_uni(pTHX)
2079 {
2080     const char *s;
2081 
2082     if (PL_oldoldbufptr != PL_last_uni)
2083         return;
2084     while (isSPACE(*PL_last_uni))
2085         PL_last_uni++;
2086     s = PL_last_uni;
2087     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2088         s += UTF ? UTF8SKIP(s) : 1;
2089     if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
2090         return;
2091 
2092     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2093                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
2094                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2095 }
2096 
2097 /*
2098  * LOP : macro to build a list operator.  Its behaviour has been replaced
2099  * with a subroutine, S_lop() for which LOP is just another name.
2100  */
2101 
2102 #define LOP(f,x) return lop(f,x,s)
2103 
2104 /*
2105  * S_lop
2106  * Build a list operator (or something that might be one).  The rules:
2107  *  - if we have a next token, then it's a list operator (no parens) for
2108  *    which the next token has already been parsed; e.g.,
2109  *       sort foo @args
2110  *       sort foo (@args)
2111  *  - if the next thing is an opening paren, then it's a function
2112  *  - else it's a list operator
2113  */
2114 
2115 STATIC I32
S_lop(pTHX_ I32 f,U8 x,char * s)2116 S_lop(pTHX_ I32 f, U8 x, char *s)
2117 {
2118     PERL_ARGS_ASSERT_LOP;
2119 
2120     pl_yylval.ival = f;
2121     CLINE;
2122     PL_bufptr = s;
2123     PL_last_lop = PL_oldbufptr;
2124     PL_last_lop_op = (OPCODE)f;
2125     if (PL_nexttoke)
2126         goto lstop;
2127     PL_expect = x;
2128     if (*s == '(')
2129         return REPORT(FUNC);
2130     s = skipspace(s);
2131     if (*s == '(')
2132         return REPORT(FUNC);
2133     else {
2134         lstop:
2135         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2136             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2137         return REPORT(LSTOP);
2138     }
2139 }
2140 
2141 /*
2142  * S_force_next
2143  * When the lexer realizes it knows the next token (for instance,
2144  * it is reordering tokens for the parser) then it can call S_force_next
2145  * to know what token to return the next time the lexer is called.  Caller
2146  * will need to set PL_nextval[] and possibly PL_expect to ensure
2147  * the lexer handles the token correctly.
2148  */
2149 
2150 STATIC void
S_force_next(pTHX_ I32 type)2151 S_force_next(pTHX_ I32 type)
2152 {
2153 #ifdef DEBUGGING
2154     if (DEBUG_T_TEST) {
2155         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2156         tokereport(type, &NEXTVAL_NEXTTOKE);
2157     }
2158 #endif
2159     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2160     PL_nexttype[PL_nexttoke] = type;
2161     PL_nexttoke++;
2162 }
2163 
2164 /*
2165  * S_postderef
2166  *
2167  * This subroutine handles postfix deref syntax after the arrow has already
2168  * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2169  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2170  * only the first, leaving yylex to find the next.
2171  */
2172 
2173 static int
S_postderef(pTHX_ int const funny,char const next)2174 S_postderef(pTHX_ int const funny, char const next)
2175 {
2176     assert(funny == DOLSHARP
2177         || funny == PERLY_DOLLAR
2178         || funny == PERLY_SNAIL
2179         || funny == PERLY_PERCENT_SIGN
2180         || funny == PERLY_AMPERSAND
2181         || funny == PERLY_STAR
2182     );
2183     if (next == '*') {
2184         PL_expect = XOPERATOR;
2185         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2186             assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny);
2187             PL_lex_state = LEX_INTERPEND;
2188             if (PERLY_SNAIL == funny)
2189                 force_next(POSTJOIN);
2190         }
2191         force_next(PERLY_STAR);
2192         PL_bufptr+=2;
2193     }
2194     else {
2195         if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
2196          && !PL_lex_brackets)
2197             PL_lex_dojoin = 2;
2198         PL_expect = XOPERATOR;
2199         PL_bufptr++;
2200     }
2201     return funny;
2202 }
2203 
2204 void
Perl_yyunlex(pTHX)2205 Perl_yyunlex(pTHX)
2206 {
2207     int yyc = PL_parser->yychar;
2208     if (yyc != YYEMPTY) {
2209         if (yyc) {
2210             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2211             if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
2212                 PL_lex_allbrackets--;
2213                 PL_lex_brackets--;
2214                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2215             } else if (yyc == PERLY_PAREN_OPEN) {
2216                 PL_lex_allbrackets--;
2217                 yyc |= (2<<24);
2218             }
2219             force_next(yyc);
2220         }
2221         PL_parser->yychar = YYEMPTY;
2222     }
2223 }
2224 
2225 STATIC SV *
S_newSV_maybe_utf8(pTHX_ const char * const start,STRLEN len)2226 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2227 {
2228     SV * const sv = newSVpvn_utf8(start, len,
2229                     ! IN_BYTES
2230                   &&  UTF
2231                   &&  len != 0
2232                   &&  is_utf8_non_invariant_string((const U8*)start, len));
2233     return sv;
2234 }
2235 
2236 /*
2237  * S_force_word
2238  * When the lexer knows the next thing is a word (for instance, it has
2239  * just seen -> and it knows that the next char is a word char, then
2240  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2241  * lookahead.
2242  *
2243  * Arguments:
2244  *   char *start : buffer position (must be within PL_linestr)
2245  *   int token   : PL_next* will be this type of bare word
2246  *                 (e.g., METHCALL0,BAREWORD)
2247  *   int check_keyword : if true, Perl checks to make sure the word isn't
2248  *       a keyword (do this if the word is a label, e.g. goto FOO)
2249  *   int allow_pack : if true, : characters will also be allowed (require,
2250  *       use, etc. do this)
2251  */
2252 
2253 STATIC char *
S_force_word(pTHX_ char * start,int token,int check_keyword,int allow_pack)2254 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2255 {
2256     char *s;
2257     STRLEN len;
2258 
2259     PERL_ARGS_ASSERT_FORCE_WORD;
2260 
2261     start = skipspace(start);
2262     s = start;
2263     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2264         || (allow_pack && *s == ':' && s[1] == ':') )
2265     {
2266         s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len, allow_pack);
2267         if (check_keyword) {
2268           char *s2 = PL_tokenbuf;
2269           STRLEN len2 = len;
2270           if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2271             s2 += sizeof("CORE::") - 1;
2272             len2 -= sizeof("CORE::") - 1;
2273           }
2274           if (keyword(s2, len2, 0))
2275             return start;
2276         }
2277         if (token == METHCALL0) {
2278             s = skipspace(s);
2279             if (*s == '(')
2280                 PL_expect = XTERM;
2281             else {
2282                 PL_expect = XOPERATOR;
2283             }
2284         }
2285         NEXTVAL_NEXTTOKE.opval
2286             = newSVOP(OP_CONST,0,
2287                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2288         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2289         force_next(token);
2290     }
2291     return s;
2292 }
2293 
2294 /*
2295  * S_force_ident
2296  * Called when the lexer wants $foo *foo &foo etc, but the program
2297  * text only contains the "foo" portion.  The first argument is a pointer
2298  * to the "foo", and the second argument is the type symbol to prefix.
2299  * Forces the next token to be a "BAREWORD".
2300  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2301  */
2302 
2303 STATIC void
S_force_ident(pTHX_ const char * s,int kind)2304 S_force_ident(pTHX_ const char *s, int kind)
2305 {
2306     PERL_ARGS_ASSERT_FORCE_IDENT;
2307 
2308     if (s[0]) {
2309         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2310         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2311                                                                 UTF ? SVf_UTF8 : 0));
2312         NEXTVAL_NEXTTOKE.opval = o;
2313         force_next(BAREWORD);
2314         if (kind) {
2315             o->op_private = OPpCONST_ENTERED;
2316             /* XXX see note in pp_entereval() for why we forgo typo
2317                warnings if the symbol must be introduced in an eval.
2318                GSAR 96-10-12 */
2319             gv_fetchpvn_flags(s, len,
2320                               (PL_in_eval ? GV_ADDMULTI
2321                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2322                               kind == PERLY_DOLLAR ? SVt_PV :
2323                               kind == PERLY_SNAIL ? SVt_PVAV :
2324                               kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
2325                               SVt_PVGV
2326                               );
2327         }
2328     }
2329 }
2330 
2331 static void
S_force_ident_maybe_lex(pTHX_ char pit)2332 S_force_ident_maybe_lex(pTHX_ char pit)
2333 {
2334     NEXTVAL_NEXTTOKE.ival = pit;
2335     force_next('p');
2336 }
2337 
2338 NV
Perl_str_to_version(pTHX_ SV * sv)2339 Perl_str_to_version(pTHX_ SV *sv)
2340 {
2341     NV retval = 0.0;
2342     NV nshift = 1.0;
2343     STRLEN len;
2344     const char *start = SvPV_const(sv,len);
2345     const char * const end = start + len;
2346     const bool utf = cBOOL(SvUTF8(sv));
2347 
2348     PERL_ARGS_ASSERT_STR_TO_VERSION;
2349 
2350     while (start < end) {
2351         STRLEN skip;
2352         UV n;
2353         if (utf)
2354             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2355         else {
2356             n = *(U8*)start;
2357             skip = 1;
2358         }
2359         retval += ((NV)n)/nshift;
2360         start += skip;
2361         nshift *= 1000;
2362     }
2363     return retval;
2364 }
2365 
2366 /*
2367  * S_force_version
2368  * Forces the next token to be a version number.
2369  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2370  * and if "guessing" is TRUE, then no new token is created (and the caller
2371  * must use an alternative parsing method).
2372  */
2373 
2374 STATIC char *
S_force_version(pTHX_ char * s,int guessing)2375 S_force_version(pTHX_ char *s, int guessing)
2376 {
2377     OP *version = NULL;
2378     char *d;
2379 
2380     PERL_ARGS_ASSERT_FORCE_VERSION;
2381 
2382     s = skipspace(s);
2383 
2384     d = s;
2385     if (*d == 'v')
2386         d++;
2387     if (isDIGIT(*d)) {
2388         while (isDIGIT(*d) || *d == '_' || *d == '.')
2389             d++;
2390         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2391             SV *ver;
2392             s = scan_num(s, &pl_yylval);
2393             version = pl_yylval.opval;
2394             ver = cSVOPx(version)->op_sv;
2395             if (SvPOK(ver) && !SvNIOK(ver)) {
2396                 SvUPGRADE(ver, SVt_PVNV);
2397                 SvNV_set(ver, str_to_version(ver));
2398                 SvNOK_on(ver);		/* hint that it is a version */
2399             }
2400         }
2401         else if (guessing) {
2402             return s;
2403         }
2404     }
2405 
2406     /* NOTE: The parser sees the package name and the VERSION swapped */
2407     NEXTVAL_NEXTTOKE.opval = version;
2408     force_next(BAREWORD);
2409 
2410     return s;
2411 }
2412 
2413 /*
2414  * S_force_strict_version
2415  * Forces the next token to be a version number using strict syntax rules.
2416  */
2417 
2418 STATIC char *
S_force_strict_version(pTHX_ char * s)2419 S_force_strict_version(pTHX_ char *s)
2420 {
2421     OP *version = NULL;
2422     const char *errstr = NULL;
2423 
2424     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2425 
2426     while (isSPACE(*s)) /* leading whitespace */
2427         s++;
2428 
2429     if (is_STRICT_VERSION(s,&errstr)) {
2430         SV *ver = newSV_type(SVt_NULL);
2431         s = (char *)scan_version(s, ver, 0);
2432         version = newSVOP(OP_CONST, 0, ver);
2433     }
2434     else if ((*s != ';' && *s != ':' && *s != '{' && *s != '}' )
2435              && (s = skipspace(s), (*s != ';' && *s != ':' && *s != '{' && *s != '}' )))
2436     {
2437         PL_bufptr = s;
2438         if (errstr)
2439             yyerror(errstr); /* version required */
2440         return s;
2441     }
2442 
2443     /* NOTE: The parser sees the package name and the VERSION swapped */
2444     NEXTVAL_NEXTTOKE.opval = version;
2445     force_next(BAREWORD);
2446 
2447     return s;
2448 }
2449 
2450 /*
2451  * S_tokeq
2452  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2453  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2454  * unchanged, and a new SV containing the modified input is returned.
2455  */
2456 
2457 STATIC SV *
S_tokeq(pTHX_ SV * sv)2458 S_tokeq(pTHX_ SV *sv)
2459 {
2460     char *s;
2461     char *send;
2462     char *d;
2463     SV *pv = sv;
2464 
2465     PERL_ARGS_ASSERT_TOKEQ;
2466 
2467     assert (SvPOK(sv));
2468     assert (SvLEN(sv));
2469     assert (!SvIsCOW(sv));
2470     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2471         goto finish;
2472     s = SvPVX(sv);
2473     send = SvEND(sv);
2474     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2475     while (s < send && !(*s == '\\' && s[1] == '\\'))
2476         s++;
2477     if (s == send)
2478         goto finish;
2479     d = s;
2480     if ( PL_hints & HINT_NEW_STRING ) {
2481         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2482                             SVs_TEMP | SvUTF8(sv));
2483     }
2484     while (s < send) {
2485         if (*s == '\\') {
2486             if (s + 1 < send && (s[1] == '\\'))
2487                 s++;		/* all that, just for this */
2488         }
2489         *d++ = *s++;
2490     }
2491     *d = '\0';
2492     SvCUR_set(sv, d - SvPVX_const(sv));
2493   finish:
2494     if ( PL_hints & HINT_NEW_STRING )
2495        return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2496     return sv;
2497 }
2498 
2499 /*
2500  * Now come three functions related to double-quote context,
2501  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2502  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2503  * interact with PL_lex_state, and create fake ( ... ) argument lists
2504  * to handle functions and concatenation.
2505  * For example,
2506  *   "foo\lbar"
2507  * is tokenised as
2508  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2509  */
2510 
2511 /*
2512  * S_sublex_start
2513  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2514  *
2515  * Pattern matching will set PL_lex_op to the pattern-matching op to
2516  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2517  *
2518  * OP_CONST is easy--just make the new op and return.
2519  *
2520  * Everything else becomes a FUNC.
2521  *
2522  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2523  * had an OP_CONST.  This just sets us up for a
2524  * call to S_sublex_push().
2525  */
2526 
2527 STATIC I32
S_sublex_start(pTHX)2528 S_sublex_start(pTHX)
2529 {
2530     const I32 op_type = pl_yylval.ival;
2531 
2532     if (op_type == OP_NULL) {
2533         pl_yylval.opval = PL_lex_op;
2534         PL_lex_op = NULL;
2535         return THING;
2536     }
2537     if (op_type == OP_CONST) {
2538         SV *sv = PL_lex_stuff;
2539         PL_lex_stuff = NULL;
2540         sv = tokeq(sv);
2541 
2542         if (SvTYPE(sv) == SVt_PVIV) {
2543             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2544             STRLEN len;
2545             const char * const p = SvPV_const(sv, len);
2546             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2547             SvREFCNT_dec(sv);
2548             sv = nsv;
2549         }
2550         pl_yylval.opval = newSVOP(op_type, 0, sv);
2551         return THING;
2552     }
2553 
2554     PL_parser->lex_super_state = PL_lex_state;
2555     PL_parser->lex_sub_inwhat = (U16)op_type;
2556     PL_parser->lex_sub_op = PL_lex_op;
2557     PL_parser->sub_no_recover = FALSE;
2558     PL_parser->sub_error_count = PL_error_count;
2559     PL_lex_state = LEX_INTERPPUSH;
2560 
2561     PL_expect = XTERM;
2562     if (PL_lex_op) {
2563         pl_yylval.opval = PL_lex_op;
2564         PL_lex_op = NULL;
2565         return PMFUNC;
2566     }
2567     else
2568         return FUNC;
2569 }
2570 
2571 /*
2572  * S_sublex_push
2573  * Create a new scope to save the lexing state.  The scope will be
2574  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2575  * to the uc, lc, etc. found before.
2576  * Sets PL_lex_state to LEX_INTERPCONCAT.
2577  */
2578 
2579 STATIC I32
S_sublex_push(pTHX)2580 S_sublex_push(pTHX)
2581 {
2582     LEXSHARED *shared;
2583     const bool is_heredoc = PL_multi_close == '<';
2584     ENTER;
2585 
2586     PL_lex_state = PL_parser->lex_super_state;
2587     SAVEI8(PL_lex_dojoin);
2588     SAVEI32(PL_lex_brackets);
2589     SAVEI32(PL_lex_allbrackets);
2590     SAVEI32(PL_lex_formbrack);
2591     SAVEI8(PL_lex_fakeeof);
2592     SAVEI32(PL_lex_casemods);
2593     SAVEI32(PL_lex_starts);
2594     SAVEI8(PL_lex_state);
2595     SAVESPTR(PL_lex_repl);
2596     SAVEVPTR(PL_lex_inpat);
2597     SAVEI16(PL_lex_inwhat);
2598     if (is_heredoc)
2599     {
2600         SAVECOPLINE(PL_curcop);
2601         SAVEI32(PL_multi_end);
2602         SAVEI32(PL_parser->herelines);
2603         PL_parser->herelines = 0;
2604     }
2605     SAVEIV(PL_multi_close);
2606     SAVEPPTR(PL_bufptr);
2607     SAVEPPTR(PL_bufend);
2608     SAVEPPTR(PL_oldbufptr);
2609     SAVEPPTR(PL_oldoldbufptr);
2610     SAVEPPTR(PL_last_lop);
2611     SAVEPPTR(PL_last_uni);
2612     SAVEPPTR(PL_linestart);
2613     SAVESPTR(PL_linestr);
2614     SAVEGENERICPV(PL_lex_brackstack);
2615     SAVEGENERICPV(PL_lex_casestack);
2616     SAVEGENERICPV(PL_parser->lex_shared);
2617     SAVEBOOL(PL_parser->lex_re_reparsing);
2618     SAVEI32(PL_copline);
2619 
2620     /* The here-doc parser needs to be able to peek into outer lexing
2621        scopes to find the body of the here-doc.  So we put PL_linestr and
2622        PL_bufptr into lex_shared, to 'share' those values.
2623      */
2624     PL_parser->lex_shared->ls_linestr = PL_linestr;
2625     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2626 
2627     PL_linestr = PL_lex_stuff;
2628     PL_lex_repl = PL_parser->lex_sub_repl;
2629     PL_lex_stuff = NULL;
2630     PL_parser->lex_sub_repl = NULL;
2631 
2632     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2633        set for an inner quote-like operator and then an error causes scope-
2634        popping.  We must not have a PL_lex_stuff value left dangling, as
2635        that breaks assumptions elsewhere.  See bug #123617.  */
2636     SAVEGENERICSV(PL_lex_stuff);
2637     SAVEGENERICSV(PL_parser->lex_sub_repl);
2638 
2639     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2640         = SvPVX(PL_linestr);
2641     PL_bufend += SvCUR(PL_linestr);
2642     PL_last_lop = PL_last_uni = NULL;
2643     SAVEFREESV(PL_linestr);
2644     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2645 
2646     PL_lex_dojoin = FALSE;
2647     PL_lex_brackets = PL_lex_formbrack = 0;
2648     PL_lex_allbrackets = 0;
2649     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2650     Newx(PL_lex_brackstack, 120, char);
2651     Newx(PL_lex_casestack, 12, char);
2652     PL_lex_casemods = 0;
2653     *PL_lex_casestack = '\0';
2654     PL_lex_starts = 0;
2655     PL_lex_state = LEX_INTERPCONCAT;
2656     if (is_heredoc)
2657         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2658     PL_copline = NOLINE;
2659 
2660     Newxz(shared, 1, LEXSHARED);
2661     shared->ls_prev = PL_parser->lex_shared;
2662     PL_parser->lex_shared = shared;
2663 
2664     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2665     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2666     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2667         PL_lex_inpat = PL_parser->lex_sub_op;
2668     else
2669         PL_lex_inpat = NULL;
2670 
2671     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2672     PL_in_eval &= ~EVAL_RE_REPARSING;
2673 
2674     return SUBLEXSTART;
2675 }
2676 
2677 /*
2678  * S_sublex_done
2679  * Restores lexer state after a S_sublex_push.
2680  */
2681 
2682 STATIC I32
S_sublex_done(pTHX)2683 S_sublex_done(pTHX)
2684 {
2685     if (!PL_lex_starts++) {
2686         SV * const sv = newSVpvs("");
2687         if (SvUTF8(PL_linestr))
2688             SvUTF8_on(sv);
2689         PL_expect = XOPERATOR;
2690         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2691         return THING;
2692     }
2693 
2694     if (PL_lex_casemods) {		/* oops, we've got some unbalanced parens */
2695         PL_lex_state = LEX_INTERPCASEMOD;
2696         return yylex();
2697     }
2698 
2699     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2700     assert(PL_lex_inwhat != OP_TRANSR);
2701     if (PL_lex_repl) {
2702         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2703         PL_linestr = PL_lex_repl;
2704         PL_lex_inpat = 0;
2705         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2706         PL_bufend += SvCUR(PL_linestr);
2707         PL_last_lop = PL_last_uni = NULL;
2708         PL_lex_dojoin = FALSE;
2709         PL_lex_brackets = 0;
2710         PL_lex_allbrackets = 0;
2711         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2712         PL_lex_casemods = 0;
2713         *PL_lex_casestack = '\0';
2714         PL_lex_starts = 0;
2715         if (SvEVALED(PL_lex_repl)) {
2716             PL_lex_state = LEX_INTERPNORMAL;
2717             PL_lex_starts++;
2718             /*	we don't clear PL_lex_repl here, so that we can check later
2719                 whether this is an evalled subst; that means we rely on the
2720                 logic to ensure sublex_done() is called again only via the
2721                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2722         }
2723         else {
2724             PL_lex_state = LEX_INTERPCONCAT;
2725             PL_lex_repl = NULL;
2726         }
2727         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2728             CopLINE(PL_curcop) +=
2729                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2730                  + PL_parser->herelines;
2731             PL_parser->herelines = 0;
2732         }
2733         return PERLY_SLASH;
2734     }
2735     else {
2736         const line_t l = CopLINE(PL_curcop);
2737         LEAVE;
2738         if (PL_parser->sub_error_count != PL_error_count) {
2739             if (PL_parser->sub_no_recover) {
2740                 yyquit();
2741                 NOT_REACHED;
2742             }
2743         }
2744         if (PL_multi_close == '<')
2745             PL_parser->herelines += l - PL_multi_end;
2746         PL_bufend = SvPVX(PL_linestr);
2747         PL_bufend += SvCUR(PL_linestr);
2748         PL_expect = XOPERATOR;
2749         return SUBLEXEND;
2750     }
2751 }
2752 
2753 HV *
Perl_load_charnames(pTHX_ SV * char_name,const char * context,const STRLEN context_len,const char ** error_msg)2754 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2755                           const STRLEN context_len, const char ** error_msg)
2756 {
2757     /* Load the official _charnames module if not already there.  The
2758      * parameters are just to give info for any error messages generated:
2759      *  char_name   a name to look up which is the reason for loading this
2760      *  context     'char_name' in the context in the input in which it appears
2761      *  context_len how many bytes 'context' occupies
2762      *  error_msg   *error_msg will be set to any error
2763      *
2764      *  Returns the ^H table if success; otherwise NULL */
2765 
2766     unsigned int i;
2767     HV * table;
2768     SV **cvp;
2769     SV * res;
2770 
2771     PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2772 
2773     /* This loop is executed 1 1/2 times.  On the first time through, if it
2774      * isn't already loaded, try loading it, and iterate just once to see if it
2775      * worked.  */
2776     for (i = 0; i < 2; i++) {
2777         table = GvHV(PL_hintgv);		 /* ^H */
2778 
2779         if (    table
2780             && (PL_hints & HINT_LOCALIZE_HH)
2781             && (cvp = hv_fetchs(table, "charnames", FALSE))
2782             &&  SvOK(*cvp))
2783         {
2784             return table;   /* Quit if already loaded */
2785         }
2786 
2787         if (i == 0) {
2788             Perl_load_module(aTHX_
2789                 0,
2790                 newSVpvs("_charnames"),
2791 
2792                 /* version parameter; no need to specify it, as if we get too early
2793                 * a version, will fail anyway, not being able to find 'charnames'
2794                 * */
2795                 NULL,
2796                 newSVpvs(":full"),
2797                 newSVpvs(":short"),
2798                 NULL);
2799         }
2800     }
2801 
2802     /* Here, it failed; new_constant will give appropriate error messages */
2803     *error_msg = NULL;
2804     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2805                         context, context_len, error_msg);
2806     SvREFCNT_dec(res);
2807 
2808     return NULL;
2809 }
2810 
2811 STATIC SV*
S_get_and_check_backslash_N_name_wrapper(pTHX_ const char * s,const char * const e)2812 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2813 {
2814     /* This justs wraps get_and_check_backslash_N_name() to output any error
2815      * message it returns. */
2816 
2817     const char * error_msg = NULL;
2818     SV * result;
2819 
2820     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2821 
2822     /* charnames doesn't work well if there have been errors found */
2823     if (PL_error_count > 0) {
2824         return NULL;
2825     }
2826 
2827     result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2828 
2829     if (error_msg) {
2830         yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2831     }
2832 
2833     return result;
2834 }
2835 
2836 SV*
Perl_get_and_check_backslash_N_name(pTHX_ const char * s,const char * e,const bool is_utf8,const char ** error_msg)2837 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2838                                           const char* e,
2839                                           const bool is_utf8,
2840                                           const char ** error_msg)
2841 {
2842     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2843      * interior, hence to the "}".  Finds what the name resolves to, returning
2844      * an SV* containing it; NULL if no valid one found.
2845      *
2846      * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2847      * doesn't have to be. */
2848 
2849     SV* char_name;
2850     SV* res;
2851     HV * table;
2852     SV **cvp;
2853     SV *cv;
2854     SV *rv;
2855     HV *stash;
2856 
2857     /* Points to the beginning of the \N{... so that any messages include the
2858      * context of what's failing*/
2859     const char* context = s - 3;
2860     STRLEN context_len = e - context + 1; /* include all of \N{...} */
2861 
2862 
2863     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2864 
2865     assert(e >= s);
2866     assert(s > (char *) 3);
2867 
2868     while (s < e && isBLANK(*s)) {
2869         s++;
2870     }
2871 
2872     while (s < e && isBLANK(*(e - 1))) {
2873         e--;
2874     }
2875 
2876     char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2877 
2878     if (!SvCUR(char_name)) {
2879         SvREFCNT_dec_NN(char_name);
2880         /* diag_listed_as: Unknown charname '%s' */
2881         *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2882         return NULL;
2883     }
2884 
2885     /* Autoload the charnames module */
2886 
2887     table = load_charnames(char_name, context, context_len, error_msg);
2888     if (table == NULL) {
2889         return NULL;
2890     }
2891 
2892     *error_msg = NULL;
2893     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2894                         context, context_len, error_msg);
2895     if (*error_msg) {
2896         *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2897 
2898         SvREFCNT_dec(res);
2899         return NULL;
2900     }
2901 
2902     /* See if the charnames handler is the Perl core's, and if so, we can skip
2903      * the validation needed for a user-supplied one, as Perl's does its own
2904      * validation. */
2905     cvp = hv_fetchs(table, "charnames", FALSE);
2906     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2907         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2908     {
2909         const char * const name = HvNAME(stash);
2910          if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2911            return res;
2912        }
2913     }
2914 
2915     /* Here, it isn't Perl's charname handler.  We can't rely on a
2916      * user-supplied handler to validate the input name.  For non-ut8 input,
2917      * look to see that the first character is legal.  Then loop through the
2918      * rest checking that each is a continuation */
2919 
2920     /* This code makes the reasonable assumption that the only Latin1-range
2921      * characters that begin a character name alias are alphabetic, otherwise
2922      * would have to create a isCHARNAME_BEGIN macro */
2923 
2924     if (! is_utf8) {
2925         if (! isALPHAU(*s)) {
2926             goto bad_charname;
2927         }
2928         s++;
2929         while (s < e) {
2930             if (! isCHARNAME_CONT(*s)) {
2931                 goto bad_charname;
2932             }
2933             if (*s == ' ' && *(s-1) == ' ') {
2934                 goto multi_spaces;
2935             }
2936             s++;
2937         }
2938     }
2939     else {
2940         /* Similarly for utf8.  For invariants can check directly; for other
2941          * Latin1, can calculate their code point and check; otherwise  use an
2942          * inversion list */
2943         if (UTF8_IS_INVARIANT(*s)) {
2944             if (! isALPHAU(*s)) {
2945                 goto bad_charname;
2946             }
2947             s++;
2948         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2949             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2950                 goto bad_charname;
2951             }
2952             s += 2;
2953         }
2954         else {
2955             if (! _invlist_contains_cp(PL_utf8_charname_begin,
2956                                        utf8_to_uvchr_buf((U8 *) s,
2957                                                          (U8 *) e,
2958                                                          NULL)))
2959             {
2960                 goto bad_charname;
2961             }
2962             s += UTF8SKIP(s);
2963         }
2964 
2965         while (s < e) {
2966             if (UTF8_IS_INVARIANT(*s)) {
2967                 if (! isCHARNAME_CONT(*s)) {
2968                     goto bad_charname;
2969                 }
2970                 if (*s == ' ' && *(s-1) == ' ') {
2971                     goto multi_spaces;
2972                 }
2973                 s++;
2974             }
2975             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2976                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2977                 {
2978                     goto bad_charname;
2979                 }
2980                 s += 2;
2981             }
2982             else {
2983                 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2984                                            utf8_to_uvchr_buf((U8 *) s,
2985                                                              (U8 *) e,
2986                                                              NULL)))
2987                 {
2988                     goto bad_charname;
2989                 }
2990                 s += UTF8SKIP(s);
2991             }
2992         }
2993     }
2994     if (*(s-1) == ' ') {
2995         /* diag_listed_as: charnames alias definitions may not contain
2996                            trailing white-space; marked by <-- HERE in %s
2997          */
2998         *error_msg = Perl_form(aTHX_
2999             "charnames alias definitions may not contain trailing "
3000             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
3001             (int)(s - context + 1), context,
3002             (int)(e - s + 1), s + 1);
3003         return NULL;
3004     }
3005 
3006     if (SvUTF8(res)) { /* Don't accept malformed charname value */
3007         const U8* first_bad_char_loc;
3008         STRLEN len;
3009         const char* const str = SvPV_const(res, len);
3010         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
3011                                           &first_bad_char_loc)))
3012         {
3013             _force_out_malformed_utf8_message(first_bad_char_loc,
3014                                               (U8 *) PL_parser->bufend,
3015                                               0,
3016                                               0 /* 0 means don't die */ );
3017             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
3018                                immediately after '%s' */
3019             *error_msg = Perl_form(aTHX_
3020                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
3021                  (int) context_len, context,
3022                  (int) ((char *) first_bad_char_loc - str), str);
3023             return NULL;
3024         }
3025     }
3026 
3027     return res;
3028 
3029   bad_charname: {
3030 
3031         /* The final %.*s makes sure that should the trailing NUL be missing
3032          * that this print won't run off the end of the string */
3033         /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
3034                            in \N{%s} */
3035         *error_msg = Perl_form(aTHX_
3036             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
3037             (int)(s - context + 1), context,
3038             (int)(e - s + 1), s + 1);
3039         return NULL;
3040     }
3041 
3042   multi_spaces:
3043         /* diag_listed_as: charnames alias definitions may not contain a
3044                            sequence of multiple spaces; marked by <-- HERE
3045                            in %s */
3046         *error_msg = Perl_form(aTHX_
3047             "charnames alias definitions may not contain a sequence of "
3048             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
3049             (int)(s - context + 1), context,
3050             (int)(e - s + 1), s + 1);
3051         return NULL;
3052 }
3053 
3054 /*
3055   scan_const
3056 
3057   Extracts the next constant part of a pattern, double-quoted string,
3058   or transliteration.  This is terrifying code.
3059 
3060   For example, in parsing the double-quoted string "ab\x63$d", it would
3061   stop at the '$' and return an OP_CONST containing 'abc'.
3062 
3063   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3064   processing a pattern (PL_lex_inpat is true), a transliteration
3065   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3066 
3067   Returns a pointer to the character scanned up to. If this is
3068   advanced from the start pointer supplied (i.e. if anything was
3069   successfully parsed), will leave an OP_CONST for the substring scanned
3070   in pl_yylval. Caller must intuit reason for not parsing further
3071   by looking at the next characters herself.
3072 
3073   In patterns:
3074     expand:
3075       \N{FOO}  => \N{U+hex_for_character_FOO}
3076       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3077 
3078     pass through:
3079         all other \-char, including \N and \N{ apart from \N{ABC}
3080 
3081     stops on:
3082         @ and $ where it appears to be a var, but not for $ as tail anchor
3083         \l \L \u \U \Q \E
3084         (?{  or  (??{ or (*{
3085 
3086   In transliterations:
3087     characters are VERY literal, except for - not at the start or end
3088     of the string, which indicates a range.  However some backslash sequences
3089     are recognized: \r, \n, and the like
3090                     \007 \o{}, \x{}, \N{}
3091     If all elements in the transliteration are below 256,
3092     scan_const expands the range to the full set of intermediate
3093     characters. If the range is in utf8, the hyphen is replaced with
3094     a certain range mark which will be handled by pmtrans() in op.c.
3095 
3096   In double-quoted strings:
3097     backslashes:
3098       all those recognized in transliterations
3099       deprecated backrefs: \1 (in substitution replacements)
3100       case and quoting: \U \Q \E
3101     stops on @ and $
3102 
3103   scan_const does *not* construct ops to handle interpolated strings.
3104   It stops processing as soon as it finds an embedded $ or @ variable
3105   and leaves it to the caller to work out what's going on.
3106 
3107   embedded arrays (whether in pattern or not) could be:
3108       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3109 
3110   $ in double-quoted strings must be the symbol of an embedded scalar.
3111 
3112   $ in pattern could be $foo or could be tail anchor.  Assumption:
3113   it's a tail anchor if $ is the last thing in the string, or if it's
3114   followed by one of "()| \r\n\t"
3115 
3116   \1 (backreferences) are turned into $1 in substitutions
3117 
3118   The structure of the code is
3119       while (there's a character to process) {
3120           handle transliteration ranges
3121           skip regexp comments /(?#comment)/ and codes /(?{code})/ ((*{code})/
3122           skip #-initiated comments in //x patterns
3123           check for embedded arrays
3124           check for embedded scalars
3125           if (backslash) {
3126               deprecate \1 in substitution replacements
3127               handle string-changing backslashes \l \U \Q \E, etc.
3128               switch (what was escaped) {
3129                   handle \- in a transliteration (becomes a literal -)
3130                   if a pattern and not \N{, go treat as regular character
3131                   handle \132 (octal characters)
3132                   handle \x15 and \x{1234} (hex characters)
3133                   handle \N{name} (named characters, also \N{3,5} in a pattern)
3134                   handle \cV (control characters)
3135                   handle printf-style backslashes (\f, \r, \n, etc)
3136               } (end switch)
3137               continue
3138           } (end if backslash)
3139           handle regular character
3140     } (end while character to read)
3141 
3142 */
3143 
3144 STATIC char *
S_scan_const(pTHX_ char * start)3145 S_scan_const(pTHX_ char *start)
3146 {
3147     const char * const send = PL_bufend;/* end of the constant */
3148     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
3149                                            on sizing. */
3150     char *s = start;			/* start of the constant */
3151     char *d = SvPVX(sv);		/* destination for copies */
3152     bool dorange = FALSE;               /* are we in a translit range? */
3153     bool didrange = FALSE;              /* did we just finish a range? */
3154     bool in_charclass = FALSE;          /* within /[...]/ */
3155     const bool s_is_utf8 = cBOOL(UTF);  /* Is the source string assumed to be
3156                                            UTF8?  But, this can show as true
3157                                            when the source isn't utf8, as for
3158                                            example when it is entirely composed
3159                                            of hex constants */
3160     bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
3161     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
3162                                            number of characters found so far
3163                                            that will expand (into 2 bytes)
3164                                            should we have to convert to
3165                                            UTF-8) */
3166     SV *res;		                /* result from charnames */
3167     STRLEN offset_to_max = 0;   /* The offset in the output to where the range
3168                                    high-end character is temporarily placed */
3169 
3170     /* Does something require special handling in tr/// ?  This avoids extra
3171      * work in a less likely case.  As such, khw didn't feel it was worth
3172      * adding any branches to the more mainline code to handle this, which
3173      * means that this doesn't get set in some circumstances when things like
3174      * \x{100} get expanded out.  As a result there needs to be extra testing
3175      * done in the tr code */
3176     bool has_above_latin1 = FALSE;
3177 
3178     /* Note on sizing:  The scanned constant is placed into sv, which is
3179      * initialized by newSV() assuming one byte of output for every byte of
3180      * input.  This routine expects newSV() to allocate an extra byte for a
3181      * trailing NUL, which this routine will append if it gets to the end of
3182      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3183      * CAPITAL LETTER A}), or more output than input if the constant ends up
3184      * recoded to utf8, but each time a construct is found that might increase
3185      * the needed size, SvGROW() is called.  Its size parameter each time is
3186      * based on the best guess estimate at the time, namely the length used so
3187      * far, plus the length the current construct will occupy, plus room for
3188      * the trailing NUL, plus one byte for every input byte still unscanned */
3189 
3190     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3191                        before set */
3192 #ifdef EBCDIC
3193     int backslash_N = 0;            /* ? was the character from \N{} */
3194     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
3195                                        platform-specific like \x65 */
3196 #endif
3197 
3198     PERL_ARGS_ASSERT_SCAN_CONST;
3199 
3200     assert(PL_lex_inwhat != OP_TRANSR);
3201 
3202     /* Protect sv from errors and fatal warnings. */
3203     ENTER_with_name("scan_const");
3204     SAVEFREESV(sv);
3205 
3206     /* A bunch of code in the loop below assumes that if s[n] exists and is not
3207      * NUL, then s[n+1] exists.  This assertion makes sure that assumption is
3208      * valid */
3209     assert(*send == '\0');
3210 
3211     while (s < send
3212            || dorange   /* Handle tr/// range at right edge of input */
3213     ) {
3214 
3215         /* get transliterations out of the way (they're most literal) */
3216         if (PL_lex_inwhat == OP_TRANS) {
3217 
3218             /* But there isn't any special handling necessary unless there is a
3219              * range, so for most cases we just drop down and handle the value
3220              * as any other.  There are two exceptions.
3221              *
3222              * 1.  A hyphen indicates that we are actually going to have a
3223              *     range.  In this case, skip the '-', set a flag, then drop
3224              *     down to handle what should be the end range value.
3225              * 2.  After we've handled that value, the next time through, that
3226              *     flag is set and we fix up the range.
3227              *
3228              * Ranges entirely within Latin1 are expanded out entirely, in
3229              * order to make the transliteration a simple table look-up.
3230              * Ranges that extend above Latin1 have to be done differently, so
3231              * there is no advantage to expanding them here, so they are
3232              * stored here as Min, RANGE_INDICATOR, Max.  'RANGE_INDICATOR' is
3233              * a byte that can't occur in legal UTF-8, and hence can signify a
3234              * hyphen without any possible ambiguity.  On EBCDIC machines, if
3235              * the range is expressed as Unicode, the Latin1 portion is
3236              * expanded out even if the range extends above Latin1.  This is
3237              * because each code point in it has to be processed here
3238              * individually to get its native translation */
3239 
3240             if (! dorange) {
3241 
3242                 /* Here, we don't think we're in a range.  If the new character
3243                  * is not a hyphen; or if it is a hyphen, but it's too close to
3244                  * either edge to indicate a range, or if we haven't output any
3245                  * characters yet then it's a regular character. */
3246                 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3247                 {
3248 
3249                     /* A regular character.  Process like any other, but first
3250                      * clear any flags */
3251                     didrange = FALSE;
3252                     dorange = FALSE;
3253 #ifdef EBCDIC
3254                     non_portable_endpoint = 0;
3255                     backslash_N = 0;
3256 #endif
3257                     /* The tests here for being above Latin1 and similar ones
3258                      * in the following 'else' suffice to find all such
3259                      * occurences in the constant, except those added by a
3260                      * backslash escape sequence, like \x{100}.  Mostly, those
3261                      * set 'has_above_latin1' as appropriate */
3262                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3263                         has_above_latin1 = TRUE;
3264                     }
3265 
3266                     /* Drops down to generic code to process current byte */
3267                 }
3268                 else {  /* Is a '-' in the context where it means a range */
3269                     if (didrange) { /* Something like y/A-C-Z// */
3270                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
3271                                          " operator");
3272                     }
3273 
3274                     dorange = TRUE;
3275 
3276                     s++;    /* Skip past the hyphen */
3277 
3278                     /* d now points to where the end-range character will be
3279                      * placed.  Drop down to get that character.  We'll finish
3280                      * processing the range the next time through the loop */
3281 
3282                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3283                         has_above_latin1 = TRUE;
3284                     }
3285 
3286                     /* Drops down to generic code to process current byte */
3287                 }
3288             }  /* End of not a range */
3289             else {
3290                 /* Here we have parsed a range.  Now must handle it.  At this
3291                  * point:
3292                  * 'sv' is a SV* that contains the output string we are
3293                  *      constructing.  The final two characters in that string
3294                  *      are the range start and range end, in order.
3295                  * 'd'  points to just beyond the range end in the 'sv' string,
3296                  *      where we would next place something
3297                  */
3298                 char * max_ptr;
3299                 char * min_ptr;
3300                 IV range_min;
3301                 IV range_max;	/* last character in range */
3302                 STRLEN grow;
3303                 Size_t offset_to_min = 0;
3304                 Size_t extras = 0;
3305 #ifdef EBCDIC
3306                 bool convert_unicode;
3307                 IV real_range_max = 0;
3308 #endif
3309                 /* Get the code point values of the range ends. */
3310                 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3311                 offset_to_max = max_ptr - SvPVX_const(sv);
3312                 if (d_is_utf8) {
3313                     /* We know the utf8 is valid, because we just constructed
3314                      * it ourselves in previous loop iterations */
3315                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3316                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3317                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3318 
3319                     /* This compensates for not all code setting
3320                      * 'has_above_latin1', so that we don't skip stuff that
3321                      * should be executed */
3322                     if (range_max > 255) {
3323                         has_above_latin1 = TRUE;
3324                     }
3325                 }
3326                 else {
3327                     min_ptr = max_ptr - 1;
3328                     range_min = * (U8*) min_ptr;
3329                     range_max = * (U8*) max_ptr;
3330                 }
3331 
3332                 /* If the range is just a single code point, like tr/a-a/.../,
3333                  * that code point is already in the output, twice.  We can
3334                  * just back up over the second instance and avoid all the rest
3335                  * of the work.  But if it is a variant character, it's been
3336                  * counted twice, so decrement.  (This unlikely scenario is
3337                  * special cased, like the one for a range of 2 code points
3338                  * below, only because the main-line code below needs a range
3339                  * of 3 or more to work without special casing.  Might as well
3340                  * get it out of the way now.) */
3341                 if (UNLIKELY(range_max == range_min)) {
3342                     d = max_ptr;
3343                     if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3344                         utf8_variant_count--;
3345                     }
3346                     goto range_done;
3347                 }
3348 
3349 #ifdef EBCDIC
3350                 /* On EBCDIC platforms, we may have to deal with portable
3351                  * ranges.  These happen if at least one range endpoint is a
3352                  * Unicode value (\N{...}), or if the range is a subset of
3353                  * [A-Z] or [a-z], and both ends are literal characters,
3354                  * like 'A', and not like \x{C1} */
3355                 convert_unicode =
3356                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3357                                                        hence portable range */
3358                     || (     ! non_portable_endpoint
3359                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3360                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3361                 if (convert_unicode) {
3362 
3363                     /* Special handling is needed for these portable ranges.
3364                      * They are defined to be in Unicode terms, which includes
3365                      * all the Unicode code points between the end points.
3366                      * Convert to Unicode to get the Unicode range.  Later we
3367                      * will convert each code point in the range back to
3368                      * native.  */
3369                     range_min = NATIVE_TO_UNI(range_min);
3370                     range_max = NATIVE_TO_UNI(range_max);
3371                 }
3372 #endif
3373 
3374                 if (range_min > range_max) {
3375 #ifdef EBCDIC
3376                     if (convert_unicode) {
3377                         /* Need to convert back to native for meaningful
3378                          * messages for this platform */
3379                         range_min = UNI_TO_NATIVE(range_min);
3380                         range_max = UNI_TO_NATIVE(range_max);
3381                     }
3382 #endif
3383                     /* Use the characters themselves for the error message if
3384                      * ASCII printables; otherwise some visible representation
3385                      * of them */
3386                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3387                         Perl_croak(aTHX_
3388                          "Invalid range \"%c-%c\" in transliteration operator",
3389                          (char)range_min, (char)range_max);
3390                     }
3391 #ifdef EBCDIC
3392                     else if (convert_unicode) {
3393         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3394                         Perl_croak(aTHX_
3395                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3396                            UVXf "}\" in transliteration operator",
3397                            range_min, range_max);
3398                     }
3399 #endif
3400                     else {
3401         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3402                         Perl_croak(aTHX_
3403                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3404                            " in transliteration operator",
3405                            range_min, range_max);
3406                     }
3407                 }
3408 
3409                 /* If the range is exactly two code points long, they are
3410                  * already both in the output */
3411                 if (UNLIKELY(range_min + 1 == range_max)) {
3412                     goto range_done;
3413                 }
3414 
3415                 /* Here the range contains at least 3 code points */
3416 
3417                 if (d_is_utf8) {
3418 
3419                     /* If everything in the transliteration is below 256, we
3420                      * can avoid special handling later.  A translation table
3421                      * for each of those bytes is created by op.c.  So we
3422                      * expand out all ranges to their constituent code points.
3423                      * But if we've encountered something above 255, the
3424                      * expanding won't help, so skip doing that.  But if it's
3425                      * EBCDIC, we may have to look at each character below 256
3426                      * if we have to convert to/from Unicode values */
3427                     if (   has_above_latin1
3428 #ifdef EBCDIC
3429                         && (range_min > 255 || ! convert_unicode)
3430 #endif
3431                     ) {
3432                         const STRLEN off = d - SvPVX(sv);
3433                         const STRLEN extra = 1 + (send - s) + 1;
3434                         char *e;
3435 
3436                         /* Move the high character one byte to the right; then
3437                          * insert between it and the range begin, an illegal
3438                          * byte which serves to indicate this is a range (using
3439                          * a '-' would be ambiguous). */
3440 
3441                         if (off + extra > SvLEN(sv)) {
3442                             d = off + SvGROW(sv, off + extra);
3443                             max_ptr = d - off + offset_to_max;
3444                         }
3445 
3446                         e = d++;
3447                         while (e-- > max_ptr) {
3448                             *(e + 1) = *e;
3449                         }
3450                         *(e + 1) = (char) RANGE_INDICATOR;
3451                         goto range_done;
3452                     }
3453 
3454                     /* Here, we're going to expand out the range.  For EBCDIC
3455                      * the range can extend above 255 (not so in ASCII), so
3456                      * for EBCDIC, split it into the parts above and below
3457                      * 255/256 */
3458 #ifdef EBCDIC
3459                     if (range_max > 255) {
3460                         real_range_max = range_max;
3461                         range_max = 255;
3462                     }
3463 #endif
3464                 }
3465 
3466                 /* Here we need to expand out the string to contain each
3467                  * character in the range.  Grow the output to handle this.
3468                  * For non-UTF8, we need a byte for each code point in the
3469                  * range, minus the three that we've already allocated for: the
3470                  * hyphen, the min, and the max.  For UTF-8, we need this
3471                  * plus an extra byte for each code point that occupies two
3472                  * bytes (is variant) when in UTF-8 (except we've already
3473                  * allocated for the end points, including if they are
3474                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3475                  * platforms, it's easy to calculate a precise number.  To
3476                  * start, we count the variants in the range, which we need
3477                  * elsewhere in this function anyway.  (For the case where it
3478                  * isn't easy to calculate, 'extras' has been initialized to 0,
3479                  * and the calculation is done in a loop further down.) */
3480 #ifdef EBCDIC
3481                 if (convert_unicode)
3482 #endif
3483                 {
3484                     /* This is executed unconditionally on ASCII, and for
3485                      * Unicode ranges on EBCDIC.  Under these conditions, all
3486                      * code points above a certain value are variant; and none
3487                      * under that value are.  We just need to find out how much
3488                      * of the range is above that value.  We don't count the
3489                      * end points here, as they will already have been counted
3490                      * as they were parsed. */
3491                     if (range_min >= UTF_CONTINUATION_MARK) {
3492 
3493                         /* The whole range is made up of variants */
3494                         extras = (range_max - 1) - (range_min + 1) + 1;
3495                     }
3496                     else if (range_max >= UTF_CONTINUATION_MARK) {
3497 
3498                         /* Only the higher portion of the range is variants */
3499                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3500                     }
3501 
3502                     utf8_variant_count += extras;
3503                 }
3504 
3505                 /* The base growth is the number of code points in the range,
3506                  * not including the endpoints, which have already been sized
3507                  * for (and output).  We don't subtract for the hyphen, as it
3508                  * has been parsed but not output, and the SvGROW below is
3509                  * based only on what's been output plus what's left to parse.
3510                  * */
3511                 grow = (range_max - 1) - (range_min + 1) + 1;
3512 
3513                 if (d_is_utf8) {
3514 #ifdef EBCDIC
3515                     /* In some cases in EBCDIC, we haven't yet calculated a
3516                      * precise amount needed for the UTF-8 variants.  Just
3517                      * assume the worst case, that everything will expand by a
3518                      * byte */
3519                     if (! convert_unicode) {
3520                         grow *= 2;
3521                     }
3522                     else
3523 #endif
3524                     {
3525                         /* Otherwise we know exactly how many variants there
3526                          * are in the range. */
3527                         grow += extras;
3528                     }
3529                 }
3530 
3531                 /* Grow, but position the output to overwrite the range min end
3532                  * point, because in some cases we overwrite that */
3533                 SvCUR_set(sv, d - SvPVX_const(sv));
3534                 offset_to_min = min_ptr - SvPVX_const(sv);
3535 
3536                 /* See Note on sizing above. */
3537                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3538                                              + (send - s)
3539                                              + grow
3540                                              + 1 /* Trailing NUL */ );
3541 
3542                 /* Now, we can expand out the range. */
3543 #ifdef EBCDIC
3544                 if (convert_unicode) {
3545                     SSize_t i;
3546 
3547                     /* Recall that the min and max are now in Unicode terms, so
3548                      * we have to convert each character to its native
3549                      * equivalent */
3550                     if (d_is_utf8) {
3551                         for (i = range_min; i <= range_max; i++) {
3552                             append_utf8_from_native_byte(
3553                                                     LATIN1_TO_NATIVE((U8) i),
3554                                                     (U8 **) &d);
3555                         }
3556                     }
3557                     else {
3558                         for (i = range_min; i <= range_max; i++) {
3559                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3560                         }
3561                     }
3562                 }
3563                 else
3564 #endif
3565                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3566                 {
3567                     /* Here, no conversions are necessary, which means that the
3568                      * first character in the range is already in 'd' and
3569                      * valid, so we can skip overwriting it */
3570                     if (d_is_utf8) {
3571                         SSize_t i;
3572                         d += UTF8SKIP(d);
3573                         for (i = range_min + 1; i <= range_max; i++) {
3574                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3575                         }
3576                     }
3577                     else {
3578                         SSize_t i;
3579                         d++;
3580                         assert(range_min + 1 <= range_max);
3581                         for (i = range_min + 1; i < range_max; i++) {
3582 #ifdef EBCDIC
3583                             /* In this case on EBCDIC, we haven't calculated
3584                              * the variants.  Do it here, as we go along */
3585                             if (! UVCHR_IS_INVARIANT(i)) {
3586                                 utf8_variant_count++;
3587                             }
3588 #endif
3589                             *d++ = (char)i;
3590                         }
3591 
3592                         /* The range_max is done outside the loop so as to
3593                          * avoid having to special case not incrementing
3594                          * 'utf8_variant_count' on EBCDIC (it's already been
3595                          * counted when originally parsed) */
3596                         *d++ = (char) range_max;
3597                     }
3598                 }
3599 
3600 #ifdef EBCDIC
3601                 /* If the original range extended above 255, add in that
3602                  * portion. */
3603                 if (real_range_max) {
3604                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3605                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3606                     if (real_range_max > 0x100) {
3607                         if (real_range_max > 0x101) {
3608                             *d++ = (char) RANGE_INDICATOR;
3609                         }
3610                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3611                     }
3612                 }
3613 #endif
3614 
3615               range_done:
3616                 /* mark the range as done, and continue */
3617                 didrange = TRUE;
3618                 dorange = FALSE;
3619 #ifdef EBCDIC
3620                 non_portable_endpoint = 0;
3621                 backslash_N = 0;
3622 #endif
3623                 continue;
3624             } /* End of is a range */
3625         } /* End of transliteration.  Joins main code after these else's */
3626         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3627             char *s1 = s-1;
3628             int esc = 0;
3629             while (s1 >= start && *s1-- == '\\')
3630                 esc = !esc;
3631             if (!esc)
3632                 in_charclass = TRUE;
3633         }
3634         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3635             char *s1 = s-1;
3636             int esc = 0;
3637             while (s1 >= start && *s1-- == '\\')
3638                 esc = !esc;
3639             if (!esc)
3640                 in_charclass = FALSE;
3641         }
3642             /* skip for regexp comments /(?#comment)/, except for the last
3643              * char, which will be done separately.  Stop on (?{..}) and
3644              * friends (??{ ... }) or (*{ ... }) */
3645         else if (*s == '(' && PL_lex_inpat && (s[1] == '?' || s[1] == '*') && !in_charclass) {
3646             if (s[1] == '?' && s[2] == '#') {
3647                 if (s_is_utf8) {
3648                     PERL_UINT_FAST8_T  len = UTF8SKIP(s);
3649 
3650                     while (s + len < send && *s != ')') {
3651                         Copy(s, d, len, U8);
3652                         d += len;
3653                         s += len;
3654                         len = UTF8_SAFE_SKIP(s, send);
3655                     }
3656                 }
3657                 else while (s+1 < send && *s != ')') {
3658                     *d++ = *s++;
3659                 }
3660             }
3661             else
3662             if (!PL_lex_casemods &&
3663                 /* The following should match regcomp.c */
3664                 ((s[1] == '?' && (s[2] == '{'                        /* (?{ ... })  */
3665                               || (s[2] == '?' && s[3] == '{'))) ||   /* (??{ ... }) */
3666                  (s[1] == '*' && (s[2] == '{' )))                    /* (*{ ... })  */
3667             ){
3668                 break;
3669             }
3670         }
3671             /* likewise skip #-initiated comments in //x patterns */
3672         else if (*s == '#'
3673                  && PL_lex_inpat
3674                  && !in_charclass
3675                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3676         {
3677             while (s < send && *s != '\n')
3678                 *d++ = *s++;
3679         }
3680             /* no further processing of single-quoted regex */
3681         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3682             goto default_action;
3683 
3684             /* check for embedded arrays
3685              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3686              */
3687         else if (*s == '@' && s[1]) {
3688             if (UTF
3689                ? isIDFIRST_utf8_safe(s+1, send)
3690                : isWORDCHAR_A(s[1]))
3691             {
3692                 break;
3693             }
3694             if (memCHRs(":'{$", s[1]))
3695                 break;
3696             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3697                 break; /* in regexp, neither @+ nor @- are interpolated */
3698         }
3699             /* check for embedded scalars.  only stop if we're sure it's a
3700              * variable.  */
3701         else if (*s == '$') {
3702             if (!PL_lex_inpat)	/* not a regexp, so $ must be var */
3703                 break;
3704             if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3705                 if (s[1] == '\\') {
3706                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3707                                    "Possible unintended interpolation of $\\ in regex");
3708                 }
3709                 break;		/* in regexp, $ might be tail anchor */
3710             }
3711         }
3712 
3713         /* End of else if chain - OP_TRANS rejoin rest */
3714 
3715         if (UNLIKELY(s >= send)) {
3716             assert(s == send);
3717             break;
3718         }
3719 
3720         /* backslashes */
3721         if (*s == '\\' && s+1 < send) {
3722             char* bslash = s;   /* point to beginning \ */
3723             char* rbrace;	/* point to ending '}' */
3724             char* e;	        /* 1 past the meat (non-blanks) before the
3725                                    brace */
3726             s++;
3727 
3728             /* warn on \1 - \9 in substitution replacements, but note that \11
3729              * is an octal; and \19 is \1 followed by '9' */
3730             if (PL_lex_inwhat == OP_SUBST
3731                 && !PL_lex_inpat
3732                 && isDIGIT(*s)
3733                 && *s != '0'
3734                 && !isDIGIT(s[1]))
3735             {
3736                 /* diag_listed_as: \%d better written as $%d */
3737                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3738                 s = bslash;
3739                 *s = '$';
3740                 break;
3741             }
3742 
3743             /* string-change backslash escapes */
3744             if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3745                 s = bslash;
3746                 break;
3747             }
3748             /* In a pattern, process \N, but skip any other backslash escapes.
3749              * This is because we don't want to translate an escape sequence
3750              * into a meta symbol and have the regex compiler use the meta
3751              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3752              * in spite of this, we do have to process \N here while the proper
3753              * charnames handler is in scope.  See bugs #56444 and #62056.
3754              *
3755              * There is a complication because \N in a pattern may also stand
3756              * for 'match a non-nl', and not mean a charname, in which case its
3757              * processing should be deferred to the regex compiler.  To be a
3758              * charname it must be followed immediately by a '{', and not look
3759              * like \N followed by a curly quantifier, i.e., not something like
3760              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3761              * quantifier */
3762             else if (PL_lex_inpat
3763                     && (*s != 'N'
3764                         || s[1] != '{'
3765                         || regcurly(s + 1, send, NULL)))
3766             {
3767                 *d++ = '\\';
3768                 goto default_action;
3769             }
3770 
3771             switch (*s) {
3772             default:
3773                 {
3774                     if ((isALPHANUMERIC(*s)))
3775                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3776                                        "Unrecognized escape \\%c passed through",
3777                                        *s);
3778                     /* default action is to copy the quoted character */
3779                     goto default_action;
3780                 }
3781 
3782             /* eg. \132 indicates the octal constant 0132 */
3783             case '0': case '1': case '2': case '3':
3784             case '4': case '5': case '6': case '7':
3785                 {
3786                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3787                               | PERL_SCAN_NOTIFY_ILLDIGIT;
3788                     STRLEN len = 3;
3789                     uv = grok_oct(s, &len, &flags, NULL);
3790                     s += len;
3791                     if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3792                         && s < send
3793                         && isDIGIT(*s)  /* like \08, \178 */
3794                         && ckWARN(WARN_MISC))
3795                     {
3796                         Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3797                             form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3798                     }
3799                 }
3800                 goto NUM_ESCAPE_INSERT;
3801 
3802             /* eg. \o{24} indicates the octal constant \024 */
3803             case 'o':
3804                 {
3805                     const char* error;
3806 
3807                     if (! grok_bslash_o(&s, send,
3808                                                &uv, &error,
3809                                                NULL,
3810                                                FALSE, /* Not strict */
3811                                                FALSE, /* No illegal cp's */
3812                                                UTF))
3813                     {
3814                         yyerror(error);
3815                         uv = 0; /* drop through to ensure range ends are set */
3816                     }
3817                     goto NUM_ESCAPE_INSERT;
3818                 }
3819 
3820             /* eg. \x24 indicates the hex constant 0x24 */
3821             case 'x':
3822                 {
3823                     const char* error;
3824 
3825                     if (! grok_bslash_x(&s, send,
3826                                                &uv, &error,
3827                                                NULL,
3828                                                FALSE, /* Not strict */
3829                                                FALSE, /* No illegal cp's */
3830                                                UTF))
3831                     {
3832                         yyerror(error);
3833                         uv = 0; /* drop through to ensure range ends are set */
3834                     }
3835                 }
3836 
3837               NUM_ESCAPE_INSERT:
3838                 /* Insert oct or hex escaped character. */
3839 
3840                 /* Here uv is the ordinal of the next character being added */
3841                 if (UVCHR_IS_INVARIANT(uv)) {
3842                     *d++ = (char) uv;
3843                 }
3844                 else {
3845                     if (!d_is_utf8 && uv > 255) {
3846 
3847                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3848                          * If we've only seen invariants so far, all we have to
3849                          * do is turn on the flag */
3850                         if (utf8_variant_count == 0) {
3851                             SvUTF8_on(sv);
3852                         }
3853                         else {
3854                             SvCUR_set(sv, d - SvPVX_const(sv));
3855                             SvPOK_on(sv);
3856                             *d = '\0';
3857 
3858                             sv_utf8_upgrade_flags_grow(
3859                                            sv,
3860                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3861 
3862                                            /* Since we're having to grow here,
3863                                             * make sure we have enough room for
3864                                             * this escape and a NUL, so the
3865                                             * code immediately below won't have
3866                                             * to actually grow again */
3867                                           UVCHR_SKIP(uv)
3868                                         + (STRLEN)(send - s) + 1);
3869                             d = SvPVX(sv) + SvCUR(sv);
3870                         }
3871 
3872                         has_above_latin1 = TRUE;
3873                         d_is_utf8 = TRUE;
3874                     }
3875 
3876                     if (! d_is_utf8) {
3877                         *d++ = (char)uv;
3878                         utf8_variant_count++;
3879                     }
3880                     else {
3881                        /* Usually, there will already be enough room in 'sv'
3882                         * since such escapes are likely longer than any UTF-8
3883                         * sequence they can end up as.  This isn't the case on
3884                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3885                         * UTF-8 for it contains 14.  And, we have to allow for
3886                         * a trailing NUL.  It probably can't happen on ASCII
3887                         * platforms, but be safe.  See Note on sizing above. */
3888                         const STRLEN needed = d - SvPVX(sv)
3889                                             + UVCHR_SKIP(uv)
3890                                             + (send - s)
3891                                             + 1;
3892                         if (UNLIKELY(needed > SvLEN(sv))) {
3893                             SvCUR_set(sv, d - SvPVX_const(sv));
3894                             d = SvCUR(sv) + SvGROW(sv, needed);
3895                         }
3896 
3897                         d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3898                                                    (ckWARN(WARN_PORTABLE))
3899                                                    ? UNICODE_WARN_PERL_EXTENDED
3900                                                    : 0);
3901                     }
3902                 }
3903 #ifdef EBCDIC
3904                 non_portable_endpoint++;
3905 #endif
3906                 continue;
3907 
3908             case 'N':
3909                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3910                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3911                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3912                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3913                  * convenience all three forms are referred to as "named
3914                  * characters" below.
3915                  *
3916                  * For patterns, \N also can mean to match a non-newline.  Code
3917                  * before this 'switch' statement should already have handled
3918                  * this situation, and hence this code only has to deal with
3919                  * the named character cases.
3920                  *
3921                  * For non-patterns, the named characters are converted to
3922                  * their string equivalents.  In patterns, named characters are
3923                  * not converted to their ultimate forms for the same reasons
3924                  * that other escapes aren't (mainly that the ultimate
3925                  * character could be considered a meta-symbol by the regex
3926                  * compiler).  Instead, they are converted to the \N{U+...}
3927                  * form to get the value from the charnames that is in effect
3928                  * right now, while preserving the fact that it was a named
3929                  * character, so that the regex compiler knows this.
3930                  *
3931                  * The structure of this section of code (besides checking for
3932                  * errors and upgrading to utf8) is:
3933                  *    If the named character is of the form \N{U+...}, pass it
3934                  *      through if a pattern; otherwise convert the code point
3935                  *      to utf8
3936                  *    Otherwise must be some \N{NAME}: convert to
3937                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3938                  *
3939                  * Transliteration is an exception.  The conversion to utf8 is
3940                  * only done if the code point requires it to be representable.
3941                  *
3942                  * Here, 's' points to the 'N'; the test below is guaranteed to
3943                  * succeed if we are being called on a pattern, as we already
3944                  * know from a test above that the next character is a '{'.  A
3945                  * non-pattern \N must mean 'named character', which requires
3946                  * braces */
3947                 s++;
3948                 if (*s != '{') {
3949                     yyerror("Missing braces on \\N{}");
3950                     *d++ = '\0';
3951                     continue;
3952                 }
3953                 s++;
3954 
3955                 /* If there is no matching '}', it is an error. */
3956                 if (! (rbrace = (char *) memchr(s, '}', send - s))) {
3957                     if (! PL_lex_inpat) {
3958                         yyerror("Missing right brace on \\N{}");
3959                     } else {
3960                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3961                     }
3962                     yyquit(); /* Have exhausted the input. */
3963                 }
3964 
3965                 /* Here it looks like a named character */
3966                 while (s < rbrace && isBLANK(*s)) {
3967                     s++;
3968                 }
3969 
3970                 e = rbrace;
3971                 while (s < e && isBLANK(*(e - 1))) {
3972                     e--;
3973                 }
3974 
3975                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3976                     s += 2;	    /* Skip to next char after the 'U+' */
3977                     if (PL_lex_inpat) {
3978 
3979                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3980                         /* Check the syntax.  */
3981                         if (!isXDIGIT(*s)) {
3982                           bad_NU:
3983                             yyerror(
3984                                 "Invalid hexadecimal number in \\N{U+...}"
3985                             );
3986                             s = rbrace + 1;
3987                             *d++ = '\0';
3988                             continue;
3989                         }
3990                         while (++s < e) {
3991                             if (isXDIGIT(*s))
3992                                 continue;
3993                             else if ((*s == '.' || *s == '_')
3994                                   && isXDIGIT(s[1]))
3995                                 continue;
3996                             goto bad_NU;
3997                         }
3998 
3999                         /* Pass everything through unchanged.
4000                          * +1 is to include the '}' */
4001                         Copy(bslash, d, rbrace - bslash + 1, char);
4002                         d += rbrace - bslash + 1;
4003                     }
4004                     else {  /* Not a pattern: convert the hex to string */
4005                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
4006                                   | PERL_SCAN_SILENT_ILLDIGIT
4007                                   | PERL_SCAN_SILENT_OVERFLOW
4008                                   | PERL_SCAN_DISALLOW_PREFIX;
4009                         STRLEN len = e - s;
4010 
4011                         uv = grok_hex(s, &len, &flags, NULL);
4012                         if (len == 0 || (len != (STRLEN)(e - s)))
4013                             goto bad_NU;
4014 
4015                         if (    uv > MAX_LEGAL_CP
4016                             || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
4017                         {
4018                             yyerror(form_cp_too_large_msg(16, s, len, 0));
4019                             uv = 0; /* drop through to ensure range ends are
4020                                        set */
4021                         }
4022 
4023                          /* For non-tr///, if the destination is not in utf8,
4024                           * unconditionally recode it to be so.  This is
4025                           * because \N{} implies Unicode semantics, and scalars
4026                           * have to be in utf8 to guarantee those semantics.
4027                           * tr/// doesn't care about Unicode rules, so no need
4028                           * there to upgrade to UTF-8 for small enough code
4029                           * points */
4030                         if (! d_is_utf8 && (   uv > 0xFF
4031                                            || PL_lex_inwhat != OP_TRANS))
4032                         {
4033                             /* See Note on sizing above.  */
4034                             const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
4035 
4036                             SvCUR_set(sv, d - SvPVX_const(sv));
4037                             SvPOK_on(sv);
4038                             *d = '\0';
4039 
4040                             if (utf8_variant_count == 0) {
4041                                 SvUTF8_on(sv);
4042                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4043                             }
4044                             else {
4045                                 sv_utf8_upgrade_flags_grow(
4046                                                sv,
4047                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4048                                                extra);
4049                                 d = SvPVX(sv) + SvCUR(sv);
4050                             }
4051 
4052                             d_is_utf8 = TRUE;
4053                             has_above_latin1 = TRUE;
4054                         }
4055 
4056                         /* Add the (Unicode) code point to the output. */
4057                         if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
4058                             *d++ = (char) LATIN1_TO_NATIVE(uv);
4059                         }
4060                         else {
4061                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
4062                                                    (ckWARN(WARN_PORTABLE))
4063                                                    ? UNICODE_WARN_PERL_EXTENDED
4064                                                    : 0);
4065                         }
4066                     }
4067                 }
4068                 else     /* Here is \N{NAME} but not \N{U+...}. */
4069                      if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
4070                 {   /* Failed.  We should die eventually, but for now use a NUL
4071                        to keep parsing */
4072                     *d++ = '\0';
4073                 }
4074                 else {  /* Successfully evaluated the name */
4075                     STRLEN len;
4076                     const char *str = SvPV_const(res, len);
4077                     if (PL_lex_inpat) {
4078 
4079                         if (! len) { /* The name resolved to an empty string */
4080                             const char empty_N[] = "\\N{_}";
4081                             Copy(empty_N, d, sizeof(empty_N) - 1, char);
4082                             d += sizeof(empty_N) - 1;
4083                         }
4084                         else {
4085                             /* In order to not lose information for the regex
4086                             * compiler, pass the result in the specially made
4087                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
4088                             * the code points in hex of each character
4089                             * returned by charnames */
4090 
4091                             const char *str_end = str + len;
4092                             const STRLEN off = d - SvPVX_const(sv);
4093 
4094                             if (! SvUTF8(res)) {
4095                                 /* For the non-UTF-8 case, we can determine the
4096                                  * exact length needed without having to parse
4097                                  * through the string.  Each character takes up
4098                                  * 2 hex digits plus either a trailing dot or
4099                                  * the "}" */
4100                                 const char initial_text[] = "\\N{U+";
4101                                 const STRLEN initial_len = sizeof(initial_text)
4102                                                            - 1;
4103                                 d = off + SvGROW(sv, off
4104                                                     + 3 * len
4105 
4106                                                     /* +1 for trailing NUL */
4107                                                     + initial_len + 1
4108 
4109                                                     + (STRLEN)(send - rbrace));
4110                                 Copy(initial_text, d, initial_len, char);
4111                                 d += initial_len;
4112                                 while (str < str_end) {
4113                                     char hex_string[4];
4114                                     int len =
4115                                         my_snprintf(hex_string,
4116                                                   sizeof(hex_string),
4117                                                   "%02X.",
4118 
4119                                                   /* The regex compiler is
4120                                                    * expecting Unicode, not
4121                                                    * native */
4122                                                   NATIVE_TO_LATIN1(*str));
4123                                     PERL_MY_SNPRINTF_POST_GUARD(len,
4124                                                            sizeof(hex_string));
4125                                     Copy(hex_string, d, 3, char);
4126                                     d += 3;
4127                                     str++;
4128                                 }
4129                                 d--;    /* Below, we will overwrite the final
4130                                            dot with a right brace */
4131                             }
4132                             else {
4133                                 STRLEN char_length; /* cur char's byte length */
4134 
4135                                 /* and the number of bytes after this is
4136                                  * translated into hex digits */
4137                                 STRLEN output_length;
4138 
4139                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
4140                                  * for max('U+', '.'); and 1 for NUL */
4141                                 char hex_string[2 * UTF8_MAXBYTES + 5];
4142 
4143                                 /* Get the first character of the result. */
4144                                 U32 uv = utf8n_to_uvchr((U8 *) str,
4145                                                         len,
4146                                                         &char_length,
4147                                                         UTF8_ALLOW_ANYUV);
4148                                 /* Convert first code point to Unicode hex,
4149                                  * including the boiler plate before it. */
4150                                 output_length =
4151                                     my_snprintf(hex_string, sizeof(hex_string),
4152                                              "\\N{U+%X",
4153                                              (unsigned int) NATIVE_TO_UNI(uv));
4154 
4155                                 /* Make sure there is enough space to hold it */
4156                                 d = off + SvGROW(sv, off
4157                                                     + output_length
4158                                                     + (STRLEN)(send - rbrace)
4159                                                     + 2);	/* '}' + NUL */
4160                                 /* And output it */
4161                                 Copy(hex_string, d, output_length, char);
4162                                 d += output_length;
4163 
4164                                 /* For each subsequent character, append dot and
4165                                 * its Unicode code point in hex */
4166                                 while ((str += char_length) < str_end) {
4167                                     const STRLEN off = d - SvPVX_const(sv);
4168                                     U32 uv = utf8n_to_uvchr((U8 *) str,
4169                                                             str_end - str,
4170                                                             &char_length,
4171                                                             UTF8_ALLOW_ANYUV);
4172                                     output_length =
4173                                         my_snprintf(hex_string,
4174                                              sizeof(hex_string),
4175                                              ".%X",
4176                                              (unsigned int) NATIVE_TO_UNI(uv));
4177 
4178                                     d = off + SvGROW(sv, off
4179                                                         + output_length
4180                                                         + (STRLEN)(send - rbrace)
4181                                                         + 2);	/* '}' +  NUL */
4182                                     Copy(hex_string, d, output_length, char);
4183                                     d += output_length;
4184                                 }
4185                             }
4186 
4187                             *d++ = '}';	/* Done.  Add the trailing brace */
4188                         }
4189                     }
4190                     else { /* Here, not in a pattern.  Convert the name to a
4191                             * string. */
4192 
4193                         if (PL_lex_inwhat == OP_TRANS) {
4194                             str = SvPV_const(res, len);
4195                             if (len > ((SvUTF8(res))
4196                                        ? UTF8SKIP(str)
4197                                        : 1U))
4198                             {
4199                                 yyerror(Perl_form(aTHX_
4200                                     "%.*s must not be a named sequence"
4201                                     " in transliteration operator",
4202                                         /*  +1 to include the "}" */
4203                                     (int) (rbrace + 1 - start), start));
4204                                 *d++ = '\0';
4205                                 goto end_backslash_N;
4206                             }
4207 
4208                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4209                                 has_above_latin1 = TRUE;
4210                             }
4211 
4212                         }
4213                         else if (! SvUTF8(res)) {
4214                             /* Make sure \N{} return is UTF-8.  This is because
4215                              * \N{} implies Unicode semantics, and scalars have
4216                              * to be in utf8 to guarantee those semantics; but
4217                              * not needed in tr/// */
4218                             sv_utf8_upgrade_flags(res, 0);
4219                             str = SvPV_const(res, len);
4220                         }
4221 
4222                          /* Upgrade destination to be utf8 if this new
4223                           * component is */
4224                         if (! d_is_utf8 && SvUTF8(res)) {
4225                             /* See Note on sizing above.  */
4226                             const STRLEN extra = len + (send - s) + 1;
4227 
4228                             SvCUR_set(sv, d - SvPVX_const(sv));
4229                             SvPOK_on(sv);
4230                             *d = '\0';
4231 
4232                             if (utf8_variant_count == 0) {
4233                                 SvUTF8_on(sv);
4234                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4235                             }
4236                             else {
4237                                 sv_utf8_upgrade_flags_grow(sv,
4238                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4239                                                 extra);
4240                                 d = SvPVX(sv) + SvCUR(sv);
4241                             }
4242                             d_is_utf8 = TRUE;
4243                         } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
4244 
4245                             /* See Note on sizing above.  (NOTE: SvCUR() is not
4246                              * set correctly here). */
4247                             const STRLEN extra = len + (send - rbrace) + 1;
4248                             const STRLEN off = d - SvPVX_const(sv);
4249                             d = off + SvGROW(sv, off + extra);
4250                         }
4251                         Copy(str, d, len, char);
4252                         d += len;
4253                     }
4254 
4255                     SvREFCNT_dec(res);
4256 
4257                 } /* End \N{NAME} */
4258 
4259               end_backslash_N:
4260 #ifdef EBCDIC
4261                 backslash_N++; /* \N{} is defined to be Unicode */
4262 #endif
4263                 s = rbrace + 1;  /* Point to just after the '}' */
4264                 continue;
4265 
4266             /* \c is a control character */
4267             case 'c':
4268                 s++;
4269                 if (s < send) {
4270                     const char * message;
4271 
4272                     if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4273                         yyerror(message);
4274                         yyquit();   /* Have always immediately croaked on
4275                                        errors in this */
4276                     }
4277                     d++;
4278                 }
4279                 else {
4280                     yyerror("Missing control char name in \\c");
4281                     yyquit();   /* Are at end of input, no sense continuing */
4282                 }
4283 #ifdef EBCDIC
4284                 non_portable_endpoint++;
4285 #endif
4286                 break;
4287 
4288             /* printf-style backslashes, formfeeds, newlines, etc */
4289             case 'b':
4290                 *d++ = '\b';
4291                 break;
4292             case 'n':
4293                 *d++ = '\n';
4294                 break;
4295             case 'r':
4296                 *d++ = '\r';
4297                 break;
4298             case 'f':
4299                 *d++ = '\f';
4300                 break;
4301             case 't':
4302                 *d++ = '\t';
4303                 break;
4304             case 'e':
4305                 *d++ = ESC_NATIVE;
4306                 break;
4307             case 'a':
4308                 *d++ = '\a';
4309                 break;
4310             } /* end switch */
4311 
4312             s++;
4313             continue;
4314         } /* end if (backslash) */
4315 
4316     default_action:
4317         /* Just copy the input to the output, though we may have to convert
4318          * to/from UTF-8.
4319          *
4320          * If the input has the same representation in UTF-8 as not, it will be
4321          * a single byte, and we don't care about UTF8ness; just copy the byte */
4322         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4323             *d++ = *s++;
4324         }
4325         else if (! s_is_utf8 && ! d_is_utf8) {
4326             /* If neither source nor output is UTF-8, is also a single byte,
4327              * just copy it; but this byte counts should we later have to
4328              * convert to UTF-8 */
4329             *d++ = *s++;
4330             utf8_variant_count++;
4331         }
4332         else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
4333             const STRLEN len = UTF8SKIP(s);
4334 
4335             /* We expect the source to have already been checked for
4336              * malformedness */
4337             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4338 
4339             Copy(s, d, len, U8);
4340             d += len;
4341             s += len;
4342         }
4343         else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4344             STRLEN need = send - s + 1; /* See Note on sizing above. */
4345 
4346             SvCUR_set(sv, d - SvPVX_const(sv));
4347             SvPOK_on(sv);
4348             *d = '\0';
4349 
4350             if (utf8_variant_count == 0) {
4351                 SvUTF8_on(sv);
4352                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4353             }
4354             else {
4355                 sv_utf8_upgrade_flags_grow(sv,
4356                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4357                                            need);
4358                 d = SvPVX(sv) + SvCUR(sv);
4359             }
4360             d_is_utf8 = TRUE;
4361             goto default_action; /* Redo, having upgraded so both are UTF-8 */
4362         }
4363         else {  /* UTF8ness matters: convert this non-UTF8 source char to
4364                    UTF-8 for output.  It will occupy 2 bytes, but don't include
4365                    the input byte since we haven't incremented 's' yet. See
4366                    Note on sizing above. */
4367             const STRLEN off = d - SvPVX(sv);
4368             const STRLEN extra = 2 + (send - s - 1) + 1;
4369             if (off + extra > SvLEN(sv)) {
4370                 d = off + SvGROW(sv, off + extra);
4371             }
4372             *d++ = UTF8_EIGHT_BIT_HI(*s);
4373             *d++ = UTF8_EIGHT_BIT_LO(*s);
4374             s++;
4375         }
4376     } /* while loop to process each character */
4377 
4378     {
4379         const STRLEN off = d - SvPVX(sv);
4380 
4381         /* See if room for the terminating NUL */
4382         if (UNLIKELY(off >= SvLEN(sv))) {
4383 
4384 #ifndef DEBUGGING
4385 
4386             if (off > SvLEN(sv))
4387 #endif
4388                 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4389                         " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4390 
4391             /* Whew!  Here we don't have room for the terminating NUL, but
4392              * everything else so far has fit.  It's not too late to grow
4393              * to fit the NUL and continue on.  But it is a bug, as the code
4394              * above was supposed to have made room for this, so under
4395              * DEBUGGING builds, we panic anyway.  */
4396             d = off + SvGROW(sv, off + 1);
4397         }
4398     }
4399 
4400     /* terminate the string and set up the sv */
4401     *d = '\0';
4402     SvCUR_set(sv, d - SvPVX_const(sv));
4403 
4404     SvPOK_on(sv);
4405     if (d_is_utf8) {
4406         SvUTF8_on(sv);
4407     }
4408 
4409     /* shrink the sv if we allocated more than we used */
4410     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4411         SvPV_shrink_to_cur(sv);
4412     }
4413 
4414     /* return the substring (via pl_yylval) only if we parsed anything */
4415     if (s > start) {
4416         char *s2 = start;
4417         for (; s2 < s; s2++) {
4418             if (*s2 == '\n')
4419                 COPLINE_INC_WITH_HERELINES;
4420         }
4421         SvREFCNT_inc_simple_void_NN(sv);
4422         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4423             && ! PL_parser->lex_re_reparsing)
4424         {
4425             const char *const key = PL_lex_inpat ? "qr" : "q";
4426             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4427             const char *type;
4428             STRLEN typelen;
4429 
4430             if (PL_lex_inwhat == OP_TRANS) {
4431                 type = "tr";
4432                 typelen = 2;
4433             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4434                 type = "s";
4435                 typelen = 1;
4436             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4437                 type = "q";
4438                 typelen = 1;
4439             } else {
4440                 type = "qq";
4441                 typelen = 2;
4442             }
4443 
4444             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4445                                 type, typelen, NULL);
4446         }
4447         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4448     }
4449     LEAVE_with_name("scan_const");
4450     return s;
4451 }
4452 
4453 /* S_intuit_more
4454  * Returns TRUE if there's more to the expression (e.g., a subscript),
4455  * FALSE otherwise.
4456  *
4457  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4458  *
4459  * ->[ and ->{ return TRUE
4460  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4461  * { and [ outside a pattern are always subscripts, so return TRUE
4462  * if we're outside a pattern and it's not { or [, then return FALSE
4463  * if we're in a pattern and the first char is a {
4464  *   {4,5} (any digits around the comma) returns FALSE
4465  * if we're in a pattern and the first char is a [
4466  *   [] returns FALSE
4467  *   [SOMETHING] has a funky algorithm to decide whether it's a
4468  *      character class or not.  It has to deal with things like
4469  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4470  * anything else returns TRUE
4471  */
4472 
4473 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4474 
4475 STATIC int
S_intuit_more(pTHX_ char * s,char * e)4476 S_intuit_more(pTHX_ char *s, char *e)
4477 {
4478     PERL_ARGS_ASSERT_INTUIT_MORE;
4479 
4480     if (PL_lex_brackets)
4481         return TRUE;
4482     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4483         return TRUE;
4484     if (*s == '-' && s[1] == '>'
4485      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4486      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4487         ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4488         return TRUE;
4489     if (*s != '{' && *s != '[')
4490         return FALSE;
4491     PL_parser->sub_no_recover = TRUE;
4492     if (!PL_lex_inpat)
4493         return TRUE;
4494 
4495     /* In a pattern, so maybe we have {n,m}. */
4496     if (*s == '{') {
4497         if (regcurly(s, e, NULL)) {
4498             return FALSE;
4499         }
4500         return TRUE;
4501     }
4502 
4503     /* On the other hand, maybe we have a character class */
4504 
4505     s++;
4506     if (*s == ']' || *s == '^')
4507         return FALSE;
4508     else {
4509         /* this is terrifying, and it works */
4510         int weight;
4511         char seen[256];
4512         const char * const send = (char *) memchr(s, ']', e - s);
4513         unsigned char un_char, last_un_char;
4514         char tmpbuf[sizeof PL_tokenbuf * 4];
4515 
4516         if (!send)		/* has to be an expression */
4517             return TRUE;
4518         weight = 2;		/* let's weigh the evidence */
4519 
4520         if (*s == '$')
4521             weight -= 3;
4522         else if (isDIGIT(*s)) {
4523             if (s[1] != ']') {
4524                 if (isDIGIT(s[1]) && s[2] == ']')
4525                     weight -= 10;
4526             }
4527             else
4528                 weight -= 100;
4529         }
4530         Zero(seen,256,char);
4531         un_char = 255;
4532         for (; s < send; s++) {
4533             last_un_char = un_char;
4534             un_char = (unsigned char)*s;
4535             switch (*s) {
4536             case '@':
4537             case '&':
4538             case '$':
4539                 weight -= seen[un_char] * 10;
4540                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4541                     int len;
4542                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4543                     len = (int)strlen(tmpbuf);
4544                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4545                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4546                         weight -= 100;
4547                     else
4548                         weight -= 10;
4549                 }
4550                 else if (*s == '$'
4551                          && s[1]
4552                          && memCHRs("[#!%*<>()-=",s[1]))
4553                 {
4554                     if (/*{*/ memCHRs("])} =",s[2]))
4555                         weight -= 10;
4556                     else
4557                         weight -= 1;
4558                 }
4559                 break;
4560             case '\\':
4561                 un_char = 254;
4562                 if (s[1]) {
4563                     if (memCHRs("wds]",s[1]))
4564                         weight += 100;
4565                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4566                         weight += 1;
4567                     else if (memCHRs("rnftbxcav",s[1]))
4568                         weight += 40;
4569                     else if (isDIGIT(s[1])) {
4570                         weight += 40;
4571                         while (s[1] && isDIGIT(s[1]))
4572                             s++;
4573                     }
4574                 }
4575                 else
4576                     weight += 100;
4577                 break;
4578             case '-':
4579                 if (s[1] == '\\')
4580                     weight += 50;
4581                 if (memCHRs("aA01! ",last_un_char))
4582                     weight += 30;
4583                 if (memCHRs("zZ79~",s[1]))
4584                     weight += 30;
4585                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4586                     weight -= 5;	/* cope with negative subscript */
4587                 break;
4588             default:
4589                 if (!isWORDCHAR(last_un_char)
4590                     && !(last_un_char == '$' || last_un_char == '@'
4591                          || last_un_char == '&')
4592                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4593                     char *d = s;
4594                     while (isALPHA(*s))
4595                         s++;
4596                     if (keyword(d, s - d, 0))
4597                         weight -= 150;
4598                 }
4599                 if (un_char == last_un_char + 1)
4600                     weight += 5;
4601                 weight -= seen[un_char];
4602                 break;
4603             }
4604             seen[un_char]++;
4605         }
4606         if (weight >= 0)	/* probably a character class */
4607             return FALSE;
4608     }
4609 
4610     return TRUE;
4611 }
4612 
4613 /*
4614  * S_intuit_method
4615  *
4616  * Does all the checking to disambiguate
4617  *   foo bar
4618  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4619  * METHCALL (bar->foo(args)) or METHCALL0 (bar->foo args).
4620  *
4621  * First argument is the stuff after the first token, e.g. "bar".
4622  *
4623  * Not a method if foo is a filehandle.
4624  * Not a method if foo is a subroutine prototyped to take a filehandle.
4625  * Not a method if it's really "Foo $bar"
4626  * Method if it's "foo $bar"
4627  * Not a method if it's really "print foo $bar"
4628  * Method if it's really "foo package::" (interpreted as package->foo)
4629  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4630  * Not a method if bar is a filehandle or package, but is quoted with
4631  *   =>
4632  */
4633 
4634 STATIC int
S_intuit_method(pTHX_ char * start,SV * ioname,CV * cv)4635 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4636 {
4637     char *s = start + (*start == '$');
4638     char tmpbuf[sizeof PL_tokenbuf];
4639     STRLEN len;
4640     GV* indirgv;
4641         /* Mustn't actually add anything to a symbol table.
4642            But also don't want to "initialise" any placeholder
4643            constants that might already be there into full
4644            blown PVGVs with attached PVCV.  */
4645     GV * const gv =
4646         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4647 
4648     PERL_ARGS_ASSERT_INTUIT_METHOD;
4649 
4650     if (!FEATURE_INDIRECT_IS_ENABLED)
4651         return 0;
4652 
4653     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4654             return 0;
4655     if (cv && SvPOK(cv)) {
4656         const char *proto = CvPROTO(cv);
4657         if (proto) {
4658             while (*proto && (isSPACE(*proto) || *proto == ';'))
4659                 proto++;
4660             if (*proto == '*')
4661                 return 0;
4662         }
4663     }
4664 
4665     if (*start == '$') {
4666         SSize_t start_off = start - SvPVX(PL_linestr);
4667         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4668             || isUPPER(*PL_tokenbuf))
4669             return 0;
4670         /* this could be $# */
4671         if (isSPACE(*s))
4672             s = skipspace(s);
4673         PL_bufptr = SvPVX(PL_linestr) + start_off;
4674         PL_expect = XREF;
4675         return *s == '(' ? METHCALL : METHCALL0;
4676     }
4677 
4678     s = scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
4679     /* start is the beginning of the possible filehandle/object,
4680      * and s is the end of it
4681      * tmpbuf is a copy of it (but with single quotes as double colons)
4682      */
4683 
4684     if (!keyword(tmpbuf, len, 0)) {
4685         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4686             len -= 2;
4687             tmpbuf[len] = '\0';
4688             goto bare_package;
4689         }
4690         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4691                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4692                                     SVt_PVCV);
4693         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4694          && (!isGV(indirgv) || GvCVu(indirgv)))
4695             return 0;
4696         /* filehandle or package name makes it a method */
4697         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4698             s = skipspace(s);
4699             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4700                 return 0;	/* no assumptions -- "=>" quotes bareword */
4701       bare_package:
4702             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4703                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4704             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4705             PL_expect = XTERM;
4706             force_next(BAREWORD);
4707             PL_bufptr = s;
4708             return *s == '(' ? METHCALL : METHCALL0;
4709         }
4710     }
4711     return 0;
4712 }
4713 
4714 /* Encoded script support. filter_add() effectively inserts a
4715  * 'pre-processing' function into the current source input stream.
4716  * Note that the filter function only applies to the current source file
4717  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4718  *
4719  * The datasv parameter (which may be NULL) can be used to pass
4720  * private data to this instance of the filter. The filter function
4721  * can recover the SV using the FILTER_DATA macro and use it to
4722  * store private buffers and state information.
4723  *
4724  * The supplied datasv parameter is upgraded to a PVIO type
4725  * and the IoDIRP/IoANY field is used to store the function pointer,
4726  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4727  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4728  * private use must be set using malloc'd pointers.
4729  */
4730 
4731 SV *
Perl_filter_add(pTHX_ filter_t funcp,SV * datasv)4732 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4733 {
4734     if (!funcp)
4735         return NULL;
4736 
4737     if (!PL_parser)
4738         return NULL;
4739 
4740     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4741         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4742 
4743     if (!PL_rsfp_filters)
4744         PL_rsfp_filters = newAV();
4745     if (!datasv)
4746         datasv = newSV(0);
4747     SvUPGRADE(datasv, SVt_PVIO);
4748     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4749     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4750     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4751                           FPTR2DPTR(void *, IoANY(datasv)),
4752                           SvPV_nolen(datasv)));
4753     av_unshift(PL_rsfp_filters, 1);
4754     av_store(PL_rsfp_filters, 0, datasv) ;
4755     if (
4756         !PL_parser->filtered
4757      && PL_parser->lex_flags & LEX_EVALBYTES
4758      && PL_bufptr < PL_bufend
4759     ) {
4760         const char *s = PL_bufptr;
4761         while (s < PL_bufend) {
4762             if (*s == '\n') {
4763                 SV *linestr = PL_parser->linestr;
4764                 char *buf = SvPVX(linestr);
4765                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4766                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4767                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4768                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4769                 STRLEN const last_uni_pos =
4770                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4771                 STRLEN const last_lop_pos =
4772                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4773                 av_push(PL_rsfp_filters, linestr);
4774                 PL_parser->linestr =
4775                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4776                 buf = SvPVX(PL_parser->linestr);
4777                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4778                 PL_parser->bufptr = buf + bufptr_pos;
4779                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4780                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4781                 PL_parser->linestart = buf + linestart_pos;
4782                 if (PL_parser->last_uni)
4783                     PL_parser->last_uni = buf + last_uni_pos;
4784                 if (PL_parser->last_lop)
4785                     PL_parser->last_lop = buf + last_lop_pos;
4786                 SvLEN_set(linestr, SvCUR(linestr));
4787                 SvCUR_set(linestr, s - SvPVX(linestr));
4788                 PL_parser->filtered = 1;
4789                 break;
4790             }
4791             s++;
4792         }
4793     }
4794     return(datasv);
4795 }
4796 
4797 /*
4798 =for apidoc_section $filters
4799 =for apidoc filter_del
4800 
4801 Delete most recently added instance of the filter function argument
4802 
4803 =cut
4804 */
4805 
4806 void
Perl_filter_del(pTHX_ filter_t funcp)4807 Perl_filter_del(pTHX_ filter_t funcp)
4808 {
4809     SV *datasv;
4810 
4811     PERL_ARGS_ASSERT_FILTER_DEL;
4812 
4813 #ifdef DEBUGGING
4814     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4815                           FPTR2DPTR(void*, funcp)));
4816 #endif
4817     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4818         return;
4819     /* if filter is on top of stack (usual case) just pop it off */
4820     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4821     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4822         SvREFCNT_dec(av_pop(PL_rsfp_filters));
4823 
4824         return;
4825     }
4826     /* we need to search for the correct entry and clear it	*/
4827     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4828 }
4829 
4830 
4831 /* Invoke the idxth filter function for the current rsfp.	 */
4832 /* maxlen 0 = read one text line */
4833 I32
Perl_filter_read(pTHX_ int idx,SV * buf_sv,int maxlen)4834 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4835 {
4836     filter_t funcp;
4837     I32 ret;
4838     SV *datasv = NULL;
4839     /* This API is bad. It should have been using unsigned int for maxlen.
4840        Not sure if we want to change the API, but if not we should sanity
4841        check the value here.  */
4842     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4843 
4844     PERL_ARGS_ASSERT_FILTER_READ;
4845 
4846     if (!PL_parser || !PL_rsfp_filters)
4847         return -1;
4848     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?	*/
4849         /* Provide a default input filter to make life easy.	*/
4850         /* Note that we append to the line. This is handy.	*/
4851         DEBUG_P(PerlIO_printf(Perl_debug_log,
4852                               "filter_read %d: from rsfp\n", idx));
4853         if (correct_length) {
4854             /* Want a block */
4855             int len ;
4856             const int old_len = SvCUR(buf_sv);
4857 
4858             /* ensure buf_sv is large enough */
4859             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4860             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4861                                    correct_length)) <= 0) {
4862                 if (PerlIO_error(PL_rsfp))
4863                     return -1;		/* error */
4864                 else
4865                     return 0 ;		/* end of file */
4866             }
4867             SvCUR_set(buf_sv, old_len + len) ;
4868             SvPVX(buf_sv)[old_len + len] = '\0';
4869         } else {
4870             /* Want a line */
4871             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4872                 if (PerlIO_error(PL_rsfp))
4873                     return -1;		/* error */
4874                 else
4875                     return 0 ;		/* end of file */
4876             }
4877         }
4878         return SvCUR(buf_sv);
4879     }
4880     /* Skip this filter slot if filter has been deleted	*/
4881     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4882         DEBUG_P(PerlIO_printf(Perl_debug_log,
4883                               "filter_read %d: skipped (filter deleted)\n",
4884                               idx));
4885         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4886     }
4887     if (SvTYPE(datasv) != SVt_PVIO) {
4888         if (correct_length) {
4889             /* Want a block */
4890             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4891             if (!remainder) return 0; /* eof */
4892             if (correct_length > remainder) correct_length = remainder;
4893             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4894             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4895         } else {
4896             /* Want a line */
4897             const char *s = SvEND(datasv);
4898             const char *send = SvPVX(datasv) + SvLEN(datasv);
4899             while (s < send) {
4900                 if (*s == '\n') {
4901                     s++;
4902                     break;
4903                 }
4904                 s++;
4905             }
4906             if (s == send) return 0; /* eof */
4907             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4908             SvCUR_set(datasv, s-SvPVX(datasv));
4909         }
4910         return SvCUR(buf_sv);
4911     }
4912     /* Get function pointer hidden within datasv	*/
4913     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4914     DEBUG_P(PerlIO_printf(Perl_debug_log,
4915                           "filter_read %d: via function %p (%s)\n",
4916                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4917     /* Call function. The function is expected to 	*/
4918     /* call "FILTER_READ(idx+1, buf_sv)" first.		*/
4919     /* Return: <0:error, =0:eof, >0:not eof 		*/
4920     ENTER;
4921     save_scalar(PL_errgv);
4922     ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4923     LEAVE;
4924     return ret;
4925 }
4926 
4927 STATIC char *
S_filter_gets(pTHX_ SV * sv,STRLEN append)4928 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4929 {
4930     PERL_ARGS_ASSERT_FILTER_GETS;
4931 
4932 #ifdef PERL_CR_FILTER
4933     if (!PL_rsfp_filters) {
4934         filter_add(S_cr_textfilter,NULL);
4935     }
4936 #endif
4937     if (PL_rsfp_filters) {
4938         if (!append)
4939             SvCUR_set(sv, 0);	/* start with empty line	*/
4940         if (FILTER_READ(0, sv, 0) > 0)
4941             return ( SvPVX(sv) ) ;
4942         else
4943             return NULL ;
4944     }
4945     else
4946         return (sv_gets(sv, PL_rsfp, append));
4947 }
4948 
4949 STATIC HV *
S_find_in_my_stash(pTHX_ const char * pkgname,STRLEN len)4950 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4951 {
4952     GV *gv;
4953 
4954     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4955 
4956     if (memEQs(pkgname, len, "__PACKAGE__"))
4957         return PL_curstash;
4958 
4959     if (len > 2
4960         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4961         && (gv = gv_fetchpvn_flags(pkgname,
4962                                    len,
4963                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4964     {
4965         return GvHV(gv);			/* Foo:: */
4966     }
4967 
4968     /* use constant CLASS => 'MyClass' */
4969     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4970     if (gv && GvCV(gv)) {
4971         SV * const sv = cv_const_sv(GvCV(gv));
4972         if (sv)
4973             return gv_stashsv(sv, 0);
4974     }
4975 
4976     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4977 }
4978 
4979 
4980 STATIC char *
S_tokenize_use(pTHX_ int is_use,char * s)4981 S_tokenize_use(pTHX_ int is_use, char *s) {
4982     PERL_ARGS_ASSERT_TOKENIZE_USE;
4983 
4984     if (PL_expect != XSTATE)
4985         /* diag_listed_as: "use" not allowed in expression */
4986         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4987                     is_use ? "use" : "no"));
4988     PL_expect = XTERM;
4989     s = skipspace(s);
4990     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4991         s = force_version(s, TRUE);
4992         if (*s == ';' || *s == '}'
4993                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4994             NEXTVAL_NEXTTOKE.opval = NULL;
4995             force_next(BAREWORD);
4996         }
4997         else if (*s == 'v') {
4998             s = force_word(s,BAREWORD,FALSE,TRUE);
4999             s = force_version(s, FALSE);
5000         }
5001     }
5002     else {
5003         s = force_word(s,BAREWORD,FALSE,TRUE);
5004         s = force_version(s, FALSE);
5005     }
5006     pl_yylval.ival = is_use;
5007     return s;
5008 }
5009 #ifdef DEBUGGING
5010     static const char* const exp_name[] =
5011         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
5012           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
5013           "SIGVAR", "TERMORDORDOR"
5014         };
5015 #endif
5016 
5017 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
5018 STATIC bool
S_word_takes_any_delimiter(char * p,STRLEN len)5019 S_word_takes_any_delimiter(char *p, STRLEN len)
5020 {
5021     return (len == 1 && memCHRs("msyq", p[0]))
5022             || (len == 2
5023                 && ((p[0] == 't' && p[1] == 'r')
5024                     || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
5025 }
5026 
5027 static void
S_check_scalar_slice(pTHX_ char * s)5028 S_check_scalar_slice(pTHX_ char *s)
5029 {
5030     s++;
5031     while (SPACE_OR_TAB(*s)) s++;
5032     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
5033                                                              PL_bufend,
5034                                                              UTF))
5035     {
5036         return;
5037     }
5038     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
5039            || (*s && memCHRs(" \t$#+-'\"", *s)))
5040     {
5041         s += UTF ? UTF8SKIP(s) : 1;
5042     }
5043     if (*s == '}' || *s == ']')
5044         pl_yylval.ival = OPpSLICEWARNING;
5045 }
5046 
5047 #define lex_token_boundary() S_lex_token_boundary(aTHX)
5048 static void
S_lex_token_boundary(pTHX)5049 S_lex_token_boundary(pTHX)
5050 {
5051     PL_oldoldbufptr = PL_oldbufptr;
5052     PL_oldbufptr = PL_bufptr;
5053 }
5054 
5055 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
5056 static char *
S_vcs_conflict_marker(pTHX_ char * s)5057 S_vcs_conflict_marker(pTHX_ char *s)
5058 {
5059     lex_token_boundary();
5060     PL_bufptr = s;
5061     yyerror("Version control conflict marker");
5062     while (s < PL_bufend && *s != '\n')
5063         s++;
5064     return s;
5065 }
5066 
5067 static int
yyl_sigvar(pTHX_ char * s)5068 yyl_sigvar(pTHX_ char *s)
5069 {
5070     /* we expect the sigil and optional var name part of a
5071      * signature element here. Since a '$' is not necessarily
5072      * followed by a var name, handle it specially here; the general
5073      * yylex code would otherwise try to interpret whatever follows
5074      * as a var; e.g. ($, ...) would be seen as the var '$,'
5075      */
5076 
5077     U8 sigil;
5078 
5079     s = skipspace(s);
5080     sigil = *s++;
5081     PL_bufptr = s; /* for error reporting */
5082     switch (sigil) {
5083     case '$':
5084     case '@':
5085     case '%':
5086         /* spot stuff that looks like an prototype */
5087         if (memCHRs("$:@%&*;\\[]", *s)) {
5088             yyerror("Illegal character following sigil in a subroutine signature");
5089             break;
5090         }
5091         /* '$#' is banned, while '$ # comment' isn't */
5092         if (*s == '#') {
5093             yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5094             break;
5095         }
5096         s = skipspace(s);
5097         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5098             char *dest = PL_tokenbuf + 1;
5099             /* read var name, including sigil, into PL_tokenbuf */
5100             PL_tokenbuf[0] = sigil;
5101             parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5102                 0, cBOOL(UTF), FALSE, FALSE);
5103             *dest = '\0';
5104             assert(PL_tokenbuf[1]); /* we have a variable name */
5105         }
5106         else {
5107             *PL_tokenbuf = 0;
5108             PL_in_my = 0;
5109         }
5110 
5111         s = skipspace(s);
5112         /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5113          * as the ASSIGNOP, and exclude other tokens that start with =
5114          */
5115         if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
5116             /* save now to report with the same context as we did when
5117              * all ASSIGNOPS were accepted */
5118             PL_oldbufptr = s;
5119 
5120             ++s;
5121             NEXTVAL_NEXTTOKE.ival = OP_SASSIGN;
5122             force_next(ASSIGNOP);
5123             PL_expect = XTERM;
5124         }
5125         else if(*s == '/' && s[1] == '/' && s[2] == '=') {
5126             PL_oldbufptr = s;
5127 
5128             s += 3;
5129             NEXTVAL_NEXTTOKE.ival = OP_DORASSIGN;
5130             force_next(ASSIGNOP);
5131             PL_expect = XTERM;
5132         }
5133         else if(*s == '|' && s[1] == '|' && s[2] == '=') {
5134             PL_oldbufptr = s;
5135 
5136             s += 3;
5137             NEXTVAL_NEXTTOKE.ival = OP_ORASSIGN;
5138             force_next(ASSIGNOP);
5139             PL_expect = XTERM;
5140         }
5141         else if (*s == ',' || *s == ')') {
5142             PL_expect = XOPERATOR;
5143         }
5144         else {
5145             /* make sure the context shows the unexpected character and
5146              * hopefully a bit more */
5147             if (*s) ++s;
5148             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5149                 s++;
5150             PL_bufptr = s; /* for error reporting */
5151             yyerror("Illegal operator following parameter in a subroutine signature");
5152             PL_in_my = 0;
5153         }
5154         if (*PL_tokenbuf) {
5155             NEXTVAL_NEXTTOKE.ival = sigil;
5156             force_next('p'); /* force a signature pending identifier */
5157         }
5158         break;
5159 
5160     case ')':
5161         PL_expect = XBLOCK;
5162         break;
5163     case ',': /* handle ($a,,$b) */
5164         break;
5165 
5166     default:
5167         PL_in_my = 0;
5168         yyerror("A signature parameter must start with '$', '@' or '%'");
5169         /* very crude error recovery: skip to likely next signature
5170          * element */
5171         while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5172             s++;
5173         break;
5174     }
5175 
5176     switch (sigil) {
5177         case ',': TOKEN (PERLY_COMMA);
5178         case '$': TOKEN (PERLY_DOLLAR);
5179         case '@': TOKEN (PERLY_SNAIL);
5180         case '%': TOKEN (PERLY_PERCENT_SIGN);
5181         case ')': TOKEN (PERLY_PAREN_CLOSE);
5182         default:  TOKEN (sigil);
5183     }
5184 }
5185 
5186 static int
yyl_dollar(pTHX_ char * s)5187 yyl_dollar(pTHX_ char *s)
5188 {
5189     CLINE;
5190 
5191     if (PL_expect == XPOSTDEREF) {
5192         if (s[1] == '#') {
5193             s++;
5194             POSTDEREF(DOLSHARP);
5195         }
5196         POSTDEREF(PERLY_DOLLAR);
5197     }
5198 
5199     if (   s[1] == '#'
5200         && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5201             || memCHRs("{$:+-@", s[2])))
5202     {
5203         PL_tokenbuf[0] = '@';
5204         s = scan_ident(s + 1, PL_tokenbuf + 1,
5205                        sizeof PL_tokenbuf - 1, FALSE);
5206         if (PL_expect == XOPERATOR) {
5207             char *d = s;
5208             if (PL_bufptr > s) {
5209                 d = PL_bufptr-1;
5210                 PL_bufptr = PL_oldbufptr;
5211             }
5212             no_op("Array length", d);
5213         }
5214         if (!PL_tokenbuf[1])
5215             PREREF(DOLSHARP);
5216         PL_expect = XOPERATOR;
5217         force_ident_maybe_lex('#');
5218         TOKEN(DOLSHARP);
5219     }
5220 
5221     PL_tokenbuf[0] = '$';
5222     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5223     if (PL_expect == XOPERATOR) {
5224         char *d = s;
5225         if (PL_bufptr > s) {
5226             d = PL_bufptr-1;
5227             PL_bufptr = PL_oldbufptr;
5228         }
5229         no_op("Scalar", d);
5230     }
5231     if (!PL_tokenbuf[1]) {
5232         if (s == PL_bufend)
5233             yyerror("Final $ should be \\$ or $name");
5234         PREREF(PERLY_DOLLAR);
5235     }
5236 
5237     {
5238         const char tmp = *s;
5239         if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5240             s = skipspace(s);
5241 
5242         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5243             && intuit_more(s, PL_bufend)) {
5244             if (*s == '[') {
5245                 PL_tokenbuf[0] = '@';
5246                 if (ckWARN(WARN_SYNTAX)) {
5247                     char *t = s+1;
5248 
5249                     while ( t < PL_bufend ) {
5250                         if (isSPACE(*t)) {
5251                             do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5252                             /* consumed one or more space chars */
5253                         } else if (*t == '$' || *t == '@') {
5254                             /* could be more than one '$' like $$ref or @$ref */
5255                             do { t++; } while (t < PL_bufend && *t == '$');
5256 
5257                             /* could be an abigail style identifier like $ foo */
5258                             while (t < PL_bufend && *t == ' ') t++;
5259 
5260                             /* strip off the name of the var */
5261                             while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5262                                 t += UTF ? UTF8SKIP(t) : 1;
5263                             /* consumed a varname */
5264                         } else if (isDIGIT(*t)) {
5265                             /* deal with hex constants like 0x11 */
5266                             if (t[0] == '0' && t[1] == 'x') {
5267                                 t += 2;
5268                                 while (t < PL_bufend && isXDIGIT(*t)) t++;
5269                             } else {
5270                                 /* deal with decimal/octal constants like 1 and 0123 */
5271                                 do { t++; } while (isDIGIT(*t));
5272                                 if (t<PL_bufend && *t == '.') {
5273                                     do { t++; } while (isDIGIT(*t));
5274                                 }
5275                             }
5276                             /* consumed a number */
5277                         } else {
5278                             /* not a var nor a space nor a number */
5279                             break;
5280                         }
5281                     }
5282                     if (t < PL_bufend && *t++ == ',') {
5283                         PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5284                         while (t < PL_bufend && *t != ']')
5285                             t++;
5286                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5287                                     "Multidimensional syntax %" UTF8f " not supported",
5288                                     UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5289                     }
5290                 }
5291             }
5292             else if (*s == '{') {
5293                 char *t;
5294                 PL_tokenbuf[0] = '%';
5295                 if (    strEQ(PL_tokenbuf+1, "SIG")
5296                     && ckWARN(WARN_SYNTAX)
5297                     && (t = (char *) memchr(s, '}', PL_bufend - s))
5298                     && (t = (char *) memchr(t, '=', PL_bufend - t)))
5299                 {
5300                     char tmpbuf[sizeof PL_tokenbuf];
5301                     do {
5302                         t++;
5303                     } while (isSPACE(*t));
5304                     if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5305                         STRLEN len;
5306                         t = scan_word6(t, tmpbuf, sizeof tmpbuf, TRUE,
5307                                       &len, TRUE);
5308                         while (isSPACE(*t))
5309                             t++;
5310                         if (  *t == ';'
5311                             && get_cvn_flags(tmpbuf, len, UTF
5312                                                             ? SVf_UTF8
5313                                                             : 0))
5314                         {
5315                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5316                                 "You need to quote \"%" UTF8f "\"",
5317                                     UTF8fARG(UTF, len, tmpbuf));
5318                         }
5319                     }
5320                 }
5321             }
5322         }
5323 
5324         PL_expect = XOPERATOR;
5325         if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5326             const bool islop = (PL_last_lop == PL_oldoldbufptr);
5327             if (!islop || PL_last_lop_op == OP_GREPSTART)
5328                 PL_expect = XOPERATOR;
5329             else if (memCHRs("$@\"'`q", *s))
5330                 PL_expect = XTERM;		/* e.g. print $fh "foo" */
5331             else if (   memCHRs("&*<%", *s)
5332                      && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5333             {
5334                 PL_expect = XTERM;		/* e.g. print $fh &sub */
5335             }
5336             else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5337                 char tmpbuf[sizeof PL_tokenbuf];
5338                 int t2;
5339                 STRLEN len;
5340                 scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
5341                 if ((t2 = keyword(tmpbuf, len, 0))) {
5342                     /* binary operators exclude handle interpretations */
5343                     switch (t2) {
5344                     case -KEY_x:
5345                     case -KEY_eq:
5346                     case -KEY_ne:
5347                     case -KEY_gt:
5348                     case -KEY_lt:
5349                     case -KEY_ge:
5350                     case -KEY_le:
5351                     case -KEY_cmp:
5352                         break;
5353                     default:
5354                         PL_expect = XTERM;	/* e.g. print $fh length() */
5355                         break;
5356                     }
5357                 }
5358                 else {
5359                     PL_expect = XTERM;	/* e.g. print $fh subr() */
5360                 }
5361             }
5362             else if (isDIGIT(*s))
5363                 PL_expect = XTERM;		/* e.g. print $fh 3 */
5364             else if (*s == '.' && isDIGIT(s[1]))
5365                 PL_expect = XTERM;		/* e.g. print $fh .3 */
5366             else if ((*s == '?' || *s == '-' || *s == '+')
5367                 && !isSPACE(s[1]) && s[1] != '=')
5368                 PL_expect = XTERM;		/* e.g. print $fh -1 */
5369             else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5370                      && s[1] != '/')
5371                 PL_expect = XTERM;		/* e.g. print $fh /.../
5372                                                XXX except DORDOR operator
5373                                             */
5374             else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5375                      && s[2] != '=')
5376                 PL_expect = XTERM;		/* print $fh <<"EOF" */
5377         }
5378     }
5379     force_ident_maybe_lex('$');
5380     TOKEN(PERLY_DOLLAR);
5381 }
5382 
5383 static int
yyl_sub(pTHX_ char * s,const int key)5384 yyl_sub(pTHX_ char *s, const int key)
5385 {
5386     char * const tmpbuf = PL_tokenbuf + 1;
5387     bool have_name, have_proto;
5388     STRLEN len;
5389     SV *format_name = NULL;
5390     bool is_method = (key == KEY_method);
5391 
5392     /* method always implies signatures */
5393     bool is_sigsub = is_method || FEATURE_SIGNATURES_IS_ENABLED;
5394 
5395     SSize_t off = s-SvPVX(PL_linestr);
5396     char *d;
5397 
5398     s = skipspace(s); /* can move PL_linestr */
5399 
5400     d = SvPVX(PL_linestr)+off;
5401 
5402     SAVEBOOL(PL_parser->sig_seen);
5403     PL_parser->sig_seen = FALSE;
5404 
5405     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5406         || *s == '\''
5407         || (*s == ':' && s[1] == ':'))
5408     {
5409 
5410         PL_expect = XATTRBLOCK;
5411         d = scan_word6(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5412                       &len, TRUE);
5413         if (key == KEY_format)
5414             format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5415         *PL_tokenbuf = '&';
5416         if (memchr(tmpbuf, ':', len) || key != KEY_sub
5417          || pad_findmy_pvn(
5418                 PL_tokenbuf, len + 1, 0
5419             ) != NOT_IN_PAD)
5420             sv_setpvn(PL_subname, tmpbuf, len);
5421         else {
5422             sv_setsv(PL_subname,PL_curstname);
5423             sv_catpvs(PL_subname,"::");
5424             sv_catpvn(PL_subname,tmpbuf,len);
5425         }
5426         if (SvUTF8(PL_linestr))
5427             SvUTF8_on(PL_subname);
5428         have_name = TRUE;
5429 
5430         s = skipspace(d);
5431     }
5432     else {
5433         if (key == KEY_my || key == KEY_our || key==KEY_state) {
5434             *d = '\0';
5435             /* diag_listed_as: Missing name in "%s sub" */
5436             Perl_croak(aTHX_
5437                       "Missing name in \"%s\"", PL_bufptr);
5438         }
5439         PL_expect = XATTRTERM;
5440         sv_setpvs(PL_subname,"?");
5441         have_name = FALSE;
5442     }
5443 
5444     if (key == KEY_format) {
5445         if (format_name) {
5446             NEXTVAL_NEXTTOKE.opval
5447                 = newSVOP(OP_CONST,0, format_name);
5448             NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5449             force_next(BAREWORD);
5450         }
5451         PREBLOCK(KW_FORMAT);
5452     }
5453 
5454     /* Look for a prototype */
5455     if (*s == '(' && !is_sigsub) {
5456         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5457         if (!s)
5458             Perl_croak(aTHX_ "Prototype not terminated");
5459         COPLINE_SET_FROM_MULTI_END;
5460         (void)validate_proto(PL_subname, PL_lex_stuff,
5461                              ckWARN(WARN_ILLEGALPROTO), 0);
5462         have_proto = TRUE;
5463 
5464         s = skipspace(s);
5465     }
5466     else
5467         have_proto = FALSE;
5468 
5469     if (  !(*s == ':' && s[1] != ':')
5470         && (*s != '{' && *s != '(') && key != KEY_format)
5471     {
5472         assert(key == KEY_sub || key == KEY_method ||
5473                key == KEY_AUTOLOAD || key == KEY_DESTROY ||
5474                key == KEY_BEGIN || key == KEY_UNITCHECK || key == KEY_CHECK ||
5475                key == KEY_INIT || key == KEY_END ||
5476                key == KEY_my || key == KEY_state ||
5477                key == KEY_our);
5478         if (!have_name)
5479             Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5480         else if (*s != ';' && *s != '}')
5481             Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5482     }
5483 
5484     if (have_proto) {
5485         NEXTVAL_NEXTTOKE.opval =
5486             newSVOP(OP_CONST, 0, PL_lex_stuff);
5487         PL_lex_stuff = NULL;
5488         force_next(THING);
5489     }
5490 
5491     if (!have_name) {
5492         if (PL_curstash)
5493             sv_setpvs(PL_subname, "__ANON__");
5494         else
5495             sv_setpvs(PL_subname, "__ANON__::__ANON__");
5496         if (is_method)
5497             TOKEN(KW_METHOD_anon);
5498         else if (is_sigsub)
5499             TOKEN(KW_SUB_anon_sig);
5500         else
5501             TOKEN(KW_SUB_anon);
5502     }
5503     force_ident_maybe_lex('&');
5504     if (is_method)
5505         TOKEN(KW_METHOD_named);
5506     else if (is_sigsub)
5507         TOKEN(KW_SUB_named_sig);
5508     else
5509         TOKEN(KW_SUB_named);
5510 }
5511 
5512 static int
yyl_interpcasemod(pTHX_ char * s)5513 yyl_interpcasemod(pTHX_ char *s)
5514 {
5515 #ifdef DEBUGGING
5516     if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5517         Perl_croak(aTHX_
5518                    "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5519                    PL_bufptr, PL_bufend, *PL_bufptr);
5520 #endif
5521 
5522     if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5523         /* if at a \E */
5524         if (PL_lex_casemods) {
5525             const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5526             PL_lex_casestack[PL_lex_casemods] = '\0';
5527 
5528             if (PL_bufptr != PL_bufend
5529                 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5530                     || oldmod == 'F')) {
5531                 PL_bufptr += 2;
5532                 PL_lex_state = LEX_INTERPCONCAT;
5533             }
5534             PL_lex_allbrackets--;
5535             return REPORT(PERLY_PAREN_CLOSE);
5536         }
5537         else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5538            /* Got an unpaired \E */
5539            Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5540                     "Useless use of \\E");
5541         }
5542         if (PL_bufptr != PL_bufend)
5543             PL_bufptr += 2;
5544         PL_lex_state = LEX_INTERPCONCAT;
5545         return yylex();
5546     }
5547     else {
5548         DEBUG_T({
5549             PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5550         });
5551         s = PL_bufptr + 1;
5552         if (s[1] == '\\' && s[2] == 'E') {
5553             PL_bufptr = s + 3;
5554             PL_lex_state = LEX_INTERPCONCAT;
5555             return yylex();
5556         }
5557         else {
5558             I32 tmp;
5559             if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5560                 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5561             {
5562                 tmp = *s, *s = s[2], s[2] = (char)tmp;	/* misordered... */
5563             }
5564             if ((*s == 'L' || *s == 'U' || *s == 'F')
5565                 && (strpbrk(PL_lex_casestack, "LUF")))
5566             {
5567                 PL_lex_casestack[--PL_lex_casemods] = '\0';
5568                 PL_lex_allbrackets--;
5569                 return REPORT(PERLY_PAREN_CLOSE);
5570             }
5571             if (PL_lex_casemods > 10)
5572                 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5573             PL_lex_casestack[PL_lex_casemods++] = *s;
5574             PL_lex_casestack[PL_lex_casemods] = '\0';
5575             PL_lex_state = LEX_INTERPCONCAT;
5576             NEXTVAL_NEXTTOKE.ival = 0;
5577             force_next((2<<24)|PERLY_PAREN_OPEN);
5578             if (*s == 'l')
5579                 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5580             else if (*s == 'u')
5581                 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5582             else if (*s == 'L')
5583                 NEXTVAL_NEXTTOKE.ival = OP_LC;
5584             else if (*s == 'U')
5585                 NEXTVAL_NEXTTOKE.ival = OP_UC;
5586             else if (*s == 'Q')
5587                 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5588             else if (*s == 'F')
5589                 NEXTVAL_NEXTTOKE.ival = OP_FC;
5590             else
5591                 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5592             PL_bufptr = s + 1;
5593         }
5594         force_next(FUNC);
5595         if (PL_lex_starts) {
5596             s = PL_bufptr;
5597             PL_lex_starts = 0;
5598             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5599             if (PL_lex_casemods == 1 && PL_lex_inpat)
5600                 TOKEN(PERLY_COMMA);
5601             else
5602                 AopNOASSIGN(OP_CONCAT);
5603         }
5604         else
5605             return yylex();
5606     }
5607 }
5608 
5609 static int
yyl_secondclass_keyword(pTHX_ char * s,STRLEN len,int key,I32 * orig_keyword,GV ** pgv,GV *** pgvp)5610 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5611                         GV **pgv, GV ***pgvp)
5612 {
5613     GV *ogv = NULL;	/* override (winner) */
5614     GV *hgv = NULL;	/* hidden (loser) */
5615     GV *gv = *pgv;
5616 
5617     if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5618         CV *cv;
5619         if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5620                                     (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5621                                     SVt_PVCV))
5622             && (cv = GvCVu(gv)))
5623         {
5624             if (GvIMPORTED_CV(gv))
5625                 ogv = gv;
5626             else if (! CvNOWARN_AMBIGUOUS(cv))
5627                 hgv = gv;
5628         }
5629         if (!ogv
5630             && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5631             && (gv = **pgvp)
5632             && (isGV_with_GP(gv)
5633                 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5634                 :   SvPCS_IMPORTED(gv)
5635                 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5636                                                          len, 0), 1)))
5637         {
5638             ogv = gv;
5639         }
5640     }
5641 
5642     *pgv = gv;
5643 
5644     if (ogv) {
5645         *orig_keyword = key;
5646         return 0;		/* overridden by import or by GLOBAL */
5647     }
5648     else if (gv && !*pgvp
5649              && -key==KEY_lock	/* XXX generalizable kludge */
5650              && GvCVu(gv))
5651     {
5652         return 0;		/* any sub overrides "weak" keyword */
5653     }
5654     else {			/* no override */
5655         key = -key;
5656         if (key == KEY_dump) {
5657             Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5658         }
5659         *pgv = NULL;
5660         *pgvp = 0;
5661         if (hgv && key != KEY_x)	/* never ambiguous */
5662             Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5663                            "Ambiguous call resolved as CORE::%s(), "
5664                            "qualify as such or use &",
5665                            GvENAME(hgv));
5666         return key;
5667     }
5668 }
5669 
5670 static int
yyl_qw(pTHX_ char * s,STRLEN len)5671 yyl_qw(pTHX_ char *s, STRLEN len)
5672 {
5673     OP *words = NULL;
5674 
5675     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5676     if (!s)
5677         missingterm(NULL, 0);
5678 
5679     COPLINE_SET_FROM_MULTI_END;
5680     PL_expect = XOPERATOR;
5681     if (SvCUR(PL_lex_stuff)) {
5682         int warned_comma = !ckWARN(WARN_QW);
5683         int warned_comment = warned_comma;
5684         char *d = SvPV_force(PL_lex_stuff, len);
5685         while (len) {
5686             for (; isSPACE(*d) && len; --len, ++d)
5687                 /**/;
5688             if (len) {
5689                 SV *sv;
5690                 const char *b = d;
5691                 if (!warned_comma || !warned_comment) {
5692                     for (; !isSPACE(*d) && len; --len, ++d) {
5693                         if (!warned_comma && *d == ',') {
5694                             Perl_warner(aTHX_ packWARN(WARN_QW),
5695                                 "Possible attempt to separate words with commas");
5696                             ++warned_comma;
5697                         }
5698                         else if (!warned_comment && *d == '#') {
5699                             Perl_warner(aTHX_ packWARN(WARN_QW),
5700                                 "Possible attempt to put comments in qw() list");
5701                             ++warned_comment;
5702                         }
5703                     }
5704                 }
5705                 else {
5706                     for (; !isSPACE(*d) && len; --len, ++d)
5707                         /**/;
5708                 }
5709                 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5710                 words = op_append_elem(OP_LIST, words,
5711                                        newSVOP(OP_CONST, 0, tokeq(sv)));
5712             }
5713         }
5714     }
5715     if (!words)
5716         words = newNULLLIST();
5717     SvREFCNT_dec_NN(PL_lex_stuff);
5718     PL_lex_stuff = NULL;
5719     PL_expect = XOPERATOR;
5720     pl_yylval.opval = sawparens(words);
5721     TOKEN(QWLIST);
5722 }
5723 
5724 static int
yyl_hyphen(pTHX_ char * s)5725 yyl_hyphen(pTHX_ char *s)
5726 {
5727     if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5728         I32 ftst = 0;
5729         char tmp;
5730 
5731         s++;
5732         PL_bufptr = s;
5733         tmp = *s++;
5734 
5735         while (s < PL_bufend && SPACE_OR_TAB(*s))
5736             s++;
5737 
5738         if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5739             s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5740             DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5741             OPERATOR(PERLY_MINUS);              /* unary minus */
5742         }
5743         switch (tmp) {
5744         case 'r': ftst = OP_FTEREAD;    break;
5745         case 'w': ftst = OP_FTEWRITE;   break;
5746         case 'x': ftst = OP_FTEEXEC;    break;
5747         case 'o': ftst = OP_FTEOWNED;   break;
5748         case 'R': ftst = OP_FTRREAD;    break;
5749         case 'W': ftst = OP_FTRWRITE;   break;
5750         case 'X': ftst = OP_FTREXEC;    break;
5751         case 'O': ftst = OP_FTROWNED;   break;
5752         case 'e': ftst = OP_FTIS;       break;
5753         case 'z': ftst = OP_FTZERO;     break;
5754         case 's': ftst = OP_FTSIZE;     break;
5755         case 'f': ftst = OP_FTFILE;     break;
5756         case 'd': ftst = OP_FTDIR;      break;
5757         case 'l': ftst = OP_FTLINK;     break;
5758         case 'p': ftst = OP_FTPIPE;     break;
5759         case 'S': ftst = OP_FTSOCK;     break;
5760         case 'u': ftst = OP_FTSUID;     break;
5761         case 'g': ftst = OP_FTSGID;     break;
5762         case 'k': ftst = OP_FTSVTX;     break;
5763         case 'b': ftst = OP_FTBLK;      break;
5764         case 'c': ftst = OP_FTCHR;      break;
5765         case 't': ftst = OP_FTTTY;      break;
5766         case 'T': ftst = OP_FTTEXT;     break;
5767         case 'B': ftst = OP_FTBINARY;   break;
5768         case 'M': case 'A': case 'C':
5769             gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5770             switch (tmp) {
5771             case 'M': ftst = OP_FTMTIME; break;
5772             case 'A': ftst = OP_FTATIME; break;
5773             case 'C': ftst = OP_FTCTIME; break;
5774             default:                     break;
5775             }
5776             break;
5777         default:
5778             break;
5779         }
5780         if (ftst) {
5781             PL_last_uni = PL_oldbufptr;
5782             PL_last_lop_op = (OPCODE)ftst;
5783             DEBUG_T( {
5784                 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5785             } );
5786             FTST(ftst);
5787         }
5788         else {
5789             /* Assume it was a minus followed by a one-letter named
5790              * subroutine call (or a -bareword), then. */
5791             DEBUG_T( {
5792                 PerlIO_printf(Perl_debug_log,
5793                     "### '-%c' looked like a file test but was not\n",
5794                     (int) tmp);
5795             } );
5796             s = --PL_bufptr;
5797         }
5798     }
5799     {
5800         const char tmp = *s++;
5801         if (*s == tmp) {
5802             s++;
5803             if (PL_expect == XOPERATOR)
5804                 TERM(POSTDEC);
5805             else
5806                 OPERATOR(PREDEC);
5807         }
5808         else if (*s == '>') {
5809             s++;
5810             s = skipspace(s);
5811             if (((*s == '$' || *s == '&') && s[1] == '*')
5812               ||(*s == '$' && s[1] == '#' && s[2] == '*')
5813               ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5814               ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5815              )
5816             {
5817                 PL_expect = XPOSTDEREF;
5818                 TOKEN(ARROW);
5819             }
5820             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5821                 s = force_word(s,METHCALL0,FALSE,TRUE);
5822                 TOKEN(ARROW);
5823             }
5824             else if (*s == '$')
5825                 OPERATOR(ARROW);
5826             else
5827                 TERM(ARROW);
5828         }
5829         if (PL_expect == XOPERATOR) {
5830             if (*s == '='
5831                 && !PL_lex_allbrackets
5832                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5833             {
5834                 s--;
5835                 TOKEN(0);
5836             }
5837             Aop(OP_SUBTRACT);
5838         }
5839         else {
5840             if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5841                 check_uni();
5842             OPERATOR(PERLY_MINUS);              /* unary minus */
5843         }
5844     }
5845 }
5846 
5847 static int
yyl_plus(pTHX_ char * s)5848 yyl_plus(pTHX_ char *s)
5849 {
5850     const char tmp = *s++;
5851     if (*s == tmp) {
5852         s++;
5853         if (PL_expect == XOPERATOR)
5854             TERM(POSTINC);
5855         else
5856             OPERATOR(PREINC);
5857     }
5858     if (PL_expect == XOPERATOR) {
5859         if (*s == '='
5860             && !PL_lex_allbrackets
5861             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5862         {
5863             s--;
5864             TOKEN(0);
5865         }
5866         Aop(OP_ADD);
5867     }
5868     else {
5869         if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5870             check_uni();
5871         OPERATOR(PERLY_PLUS);
5872     }
5873 }
5874 
5875 static int
yyl_star(pTHX_ char * s)5876 yyl_star(pTHX_ char *s)
5877 {
5878     if (PL_expect == XPOSTDEREF)
5879         POSTDEREF(PERLY_STAR);
5880 
5881     if (PL_expect != XOPERATOR) {
5882         s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5883         PL_expect = XOPERATOR;
5884         force_ident(PL_tokenbuf, PERLY_STAR);
5885         if (!*PL_tokenbuf)
5886             PREREF(PERLY_STAR);
5887         TERM(PERLY_STAR);
5888     }
5889 
5890     s++;
5891     if (*s == '*') {
5892         s++;
5893         if (*s == '=' && !PL_lex_allbrackets
5894             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5895         {
5896             s -= 2;
5897             TOKEN(0);
5898         }
5899         PWop(OP_POW);
5900     }
5901 
5902     if (*s == '='
5903         && !PL_lex_allbrackets
5904         && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5905     {
5906         s--;
5907         TOKEN(0);
5908     }
5909 
5910     Mop(OP_MULTIPLY);
5911 }
5912 
5913 static int
yyl_percent(pTHX_ char * s)5914 yyl_percent(pTHX_ char *s)
5915 {
5916     if (PL_expect == XOPERATOR) {
5917         if (s[1] == '='
5918             && !PL_lex_allbrackets
5919             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5920         {
5921             TOKEN(0);
5922         }
5923         ++s;
5924         Mop(OP_MODULO);
5925     }
5926     else if (PL_expect == XPOSTDEREF)
5927         POSTDEREF(PERLY_PERCENT_SIGN);
5928 
5929     PL_tokenbuf[0] = '%';
5930     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5931     pl_yylval.ival = 0;
5932     if (!PL_tokenbuf[1]) {
5933         PREREF(PERLY_PERCENT_SIGN);
5934     }
5935     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5936         && intuit_more(s, PL_bufend)) {
5937         if (*s == '[')
5938             PL_tokenbuf[0] = '@';
5939     }
5940     PL_expect = XOPERATOR;
5941     force_ident_maybe_lex('%');
5942     TERM(PERLY_PERCENT_SIGN);
5943 }
5944 
5945 static int
yyl_caret(pTHX_ char * s)5946 yyl_caret(pTHX_ char *s)
5947 {
5948     char *d = s;
5949     const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5950     if (bof && s[1] == '.')
5951         s++;
5952     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5953             (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5954     {
5955         s = d;
5956         TOKEN(0);
5957     }
5958     s++;
5959     BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5960 }
5961 
5962 static int
yyl_colon(pTHX_ char * s)5963 yyl_colon(pTHX_ char *s)
5964 {
5965     OP *attrs;
5966 
5967     switch (PL_expect) {
5968     case XOPERATOR:
5969         if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5970             break;
5971         PL_bufptr = s;	/* update in case we back off */
5972         if (*s == '=') {
5973             Perl_croak(aTHX_
5974                        "Use of := for an empty attribute list is not allowed");
5975         }
5976         goto grabattrs;
5977     case XATTRBLOCK:
5978         PL_expect = XBLOCK;
5979         goto grabattrs;
5980     case XATTRTERM:
5981         PL_expect = XTERMBLOCK;
5982      grabattrs:
5983         /* NB: as well as parsing normal attributes, we also end up
5984          * here if there is something looking like attributes
5985          * following a signature (which is illegal, but used to be
5986          * legal in 5.20..5.26). If the latter, we still parse the
5987          * attributes so that error messages(s) are less confusing,
5988          * but ignore them (parser->sig_seen).
5989          */
5990         s = skipspace(s);
5991         attrs = NULL;
5992         while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5993             I32 tmp;
5994             SV *sv;
5995             STRLEN len;
5996             char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
5997             if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5998                 if (tmp < 0) tmp = -tmp;
5999                 switch (tmp) {
6000                 case KEY_or:
6001                 case KEY_and:
6002                 case KEY_for:
6003                 case KEY_foreach:
6004                 case KEY_unless:
6005                 case KEY_if:
6006                 case KEY_while:
6007                 case KEY_until:
6008                     goto got_attrs;
6009                 default:
6010                     break;
6011                 }
6012             }
6013             sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
6014             if (*d == '(') {
6015                 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
6016                 if (!d) {
6017                     if (attrs)
6018                         op_free(attrs);
6019                     ASSUME(sv && SvREFCNT(sv) == 1);
6020                     SvREFCNT_dec(sv);
6021                     Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
6022                 }
6023                 COPLINE_SET_FROM_MULTI_END;
6024             }
6025             if (PL_lex_stuff) {
6026                 sv_catsv(sv, PL_lex_stuff);
6027                 attrs = op_append_elem(OP_LIST, attrs,
6028                                     newSVOP(OP_CONST, 0, sv));
6029                 SvREFCNT_dec_NN(PL_lex_stuff);
6030                 PL_lex_stuff = NULL;
6031             }
6032             else {
6033                 attrs = op_append_elem(OP_LIST, attrs,
6034                                     newSVOP(OP_CONST, 0, sv));
6035             }
6036             s = skipspace(d);
6037             if (*s == ':' && s[1] != ':')
6038                 s = skipspace(s+1);
6039             else if (s == d)
6040                 break;	/* require real whitespace or :'s */
6041             /* XXX losing whitespace on sequential attributes here */
6042         }
6043 
6044         if (*s != ';'
6045             && *s != '}'
6046             && !(PL_expect == XOPERATOR
6047                    /* if an operator is expected, permit =, //= and ||= or ) to end */
6048                  ? (*s == '=' || *s == ')' || *s == '/' || *s == '|')
6049                  : (*s == '{' || *s == '(')))
6050         {
6051             const char q = ((*s == '\'') ? '"' : '\'');
6052             /* If here for an expression, and parsed no attrs, back off. */
6053             if (PL_expect == XOPERATOR && !attrs) {
6054                 s = PL_bufptr;
6055                 break;
6056             }
6057             /* MUST advance bufptr here to avoid bogus "at end of line"
6058                context messages from yyerror().
6059             */
6060             PL_bufptr = s;
6061             yyerror( (const char *)
6062                      (*s
6063                       ? Perl_form(aTHX_ "Invalid separator character "
6064                                   "%c%c%c in attribute list", q, *s, q)
6065                       : "Unterminated attribute list" ) );
6066             if (attrs)
6067                 op_free(attrs);
6068             OPERATOR(PERLY_COLON);
6069         }
6070 
6071     got_attrs:
6072         if (PL_parser->sig_seen) {
6073             /* see comment about about sig_seen and parser error
6074              * handling */
6075             if (attrs)
6076                 op_free(attrs);
6077             Perl_croak(aTHX_ "Subroutine attributes must come "
6078                              "before the signature");
6079         }
6080         if (attrs) {
6081             NEXTVAL_NEXTTOKE.opval = attrs;
6082             force_next(THING);
6083         }
6084         TOKEN(COLONATTR);
6085     }
6086 
6087     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6088         s--;
6089         TOKEN(0);
6090     }
6091 
6092     PL_lex_allbrackets--;
6093     OPERATOR(PERLY_COLON);
6094 }
6095 
6096 static int
yyl_subproto(pTHX_ char * s,CV * cv)6097 yyl_subproto(pTHX_ char *s, CV *cv)
6098 {
6099     STRLEN protolen = CvPROTOLEN(cv);
6100     const char *proto = CvPROTO(cv);
6101     bool optional;
6102 
6103     proto = S_strip_spaces(aTHX_ proto, &protolen);
6104     if (!protolen)
6105         TERM(FUNC0SUB);
6106     if ((optional = *proto == ';')) {
6107         do {
6108             proto++;
6109         } while (*proto == ';');
6110     }
6111 
6112     if (
6113         (
6114             (
6115                 *proto == '$' || *proto == '_'
6116              || *proto == '*' || *proto == '+'
6117             )
6118          && proto[1] == '\0'
6119         )
6120      || (
6121          *proto == '\\' && proto[1] && proto[2] == '\0'
6122         )
6123     ) {
6124         UNIPROTO(UNIOPSUB,optional);
6125     }
6126 
6127     if (*proto == '\\' && proto[1] == '[') {
6128         const char *p = proto + 2;
6129         while(*p && *p != ']')
6130             ++p;
6131         if(*p == ']' && !p[1])
6132             UNIPROTO(UNIOPSUB,optional);
6133     }
6134 
6135     if (*proto == '&' && *s == '{') {
6136         if (PL_curstash)
6137             sv_setpvs(PL_subname, "__ANON__");
6138         else
6139             sv_setpvs(PL_subname, "__ANON__::__ANON__");
6140         if (!PL_lex_allbrackets
6141             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6142         {
6143             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6144         }
6145         PREBLOCK(LSTOPSUB);
6146     }
6147 
6148     return KEY_NULL;
6149 }
6150 
6151 static int
yyl_leftcurly(pTHX_ char * s,const U8 formbrack)6152 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
6153 {
6154     char *d;
6155     if (PL_lex_brackets > 100) {
6156         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6157     }
6158 
6159     switch (PL_expect) {
6160     case XTERM:
6161     case XTERMORDORDOR:
6162         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6163         PL_lex_allbrackets++;
6164         OPERATOR(HASHBRACK);
6165     case XOPERATOR:
6166         while (s < PL_bufend && SPACE_OR_TAB(*s))
6167             s++;
6168         d = s;
6169         PL_tokenbuf[0] = '\0';
6170         if (d < PL_bufend && *d == '-') {
6171             PL_tokenbuf[0] = '-';
6172             d++;
6173             while (d < PL_bufend && SPACE_OR_TAB(*d))
6174                 d++;
6175         }
6176         if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6177             STRLEN len;
6178             d = scan_word6(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6179                           FALSE, &len, FALSE);
6180             while (d < PL_bufend && SPACE_OR_TAB(*d))
6181                 d++;
6182             if (*d == '}') {
6183                 const char minus = (PL_tokenbuf[0] == '-');
6184                 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6185                 if (minus)
6186                     force_next(PERLY_MINUS);
6187             }
6188         }
6189         /* FALLTHROUGH */
6190     case XATTRTERM:
6191     case XTERMBLOCK:
6192         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6193         PL_lex_allbrackets++;
6194         PL_expect = XSTATE;
6195         break;
6196     case XATTRBLOCK:
6197     case XBLOCK:
6198         PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6199         PL_lex_allbrackets++;
6200         PL_expect = XSTATE;
6201         break;
6202     case XBLOCKTERM:
6203         PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6204         PL_lex_allbrackets++;
6205         PL_expect = XSTATE;
6206         break;
6207     default: {
6208             const char *t;
6209             if (PL_oldoldbufptr == PL_last_lop)
6210                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6211             else
6212                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6213             PL_lex_allbrackets++;
6214             s = skipspace(s);
6215             if (*s == '}') {
6216                 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6217                     PL_expect = XTERM;
6218                     /* This hack is to get the ${} in the message. */
6219                     PL_bufptr = s+1;
6220                     yyerror("syntax error");
6221                     yyquit();
6222                     break;
6223                 }
6224                 OPERATOR(HASHBRACK);
6225             }
6226             if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6227                 /* ${...} or @{...} etc., but not print {...}
6228                  * Skip the disambiguation and treat this as a block.
6229                  */
6230                 goto block_expectation;
6231             }
6232             /* This hack serves to disambiguate a pair of curlies
6233              * as being a block or an anon hash.  Normally, expectation
6234              * determines that, but in cases where we're not in a
6235              * position to expect anything in particular (like inside
6236              * eval"") we have to resolve the ambiguity.  This code
6237              * covers the case where the first term in the curlies is a
6238              * quoted string.  Most other cases need to be explicitly
6239              * disambiguated by prepending a "+" before the opening
6240              * curly in order to force resolution as an anon hash.
6241              *
6242              * XXX should probably propagate the outer expectation
6243              * into eval"" to rely less on this hack, but that could
6244              * potentially break current behavior of eval"".
6245              * GSAR 97-07-21
6246              */
6247             t = s;
6248             if (*s == '\'' || *s == '"' || *s == '`') {
6249                 /* common case: get past first string, handling escapes */
6250                 for (t++; t < PL_bufend && *t != *s;)
6251                     if (*t++ == '\\')
6252                         t++;
6253                 t++;
6254             }
6255             else if (*s == 'q') {
6256                 if (++t < PL_bufend
6257                     && (!isWORDCHAR(*t)
6258                         || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6259                             && !isWORDCHAR(*t))))
6260                 {
6261                     /* skip q//-like construct */
6262                     const char *tmps;
6263                     char open, close, term;
6264                     I32 brackets = 1;
6265 
6266                     while (t < PL_bufend && isSPACE(*t))
6267                         t++;
6268                     /* check for q => */
6269                     if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6270                         OPERATOR(HASHBRACK);
6271                     }
6272                     term = *t;
6273                     open = term;
6274                     if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6275                         term = tmps[5];
6276                     close = term;
6277                     if (open == close)
6278                         for (t++; t < PL_bufend; t++) {
6279                             if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6280                                 t++;
6281                             else if (*t == open)
6282                                 break;
6283                         }
6284                     else {
6285                         for (t++; t < PL_bufend; t++) {
6286                             if (*t == '\\' && t+1 < PL_bufend)
6287                                 t++;
6288                             else if (*t == close && --brackets <= 0)
6289                                 break;
6290                             else if (*t == open)
6291                                 brackets++;
6292                         }
6293                     }
6294                     t++;
6295                 }
6296                 else
6297                     /* skip plain q word */
6298                     while (   t < PL_bufend
6299                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6300                     {
6301                         t += UTF ? UTF8SKIP(t) : 1;
6302                     }
6303             }
6304             else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6305                 t += UTF ? UTF8SKIP(t) : 1;
6306                 while (   t < PL_bufend
6307                        && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6308                 {
6309                     t += UTF ? UTF8SKIP(t) : 1;
6310                 }
6311             }
6312             while (t < PL_bufend && isSPACE(*t))
6313                 t++;
6314             /* if comma follows first term, call it an anon hash */
6315             /* XXX it could be a comma expression with loop modifiers */
6316             if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6317                                || (*t == '=' && t[1] == '>')))
6318                 OPERATOR(HASHBRACK);
6319             if (PL_expect == XREF) {
6320               block_expectation:
6321                 /* If there is an opening brace or 'sub:', treat it
6322                    as a term to make ${{...}}{k} and &{sub:attr...}
6323                    dwim.  Otherwise, treat it as a statement, so
6324                    map {no strict; ...} works.
6325                  */
6326                 s = skipspace(s);
6327                 if (*s == '{') {
6328                     PL_expect = XTERM;
6329                     break;
6330                 }
6331                 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6332                     PL_bufptr = s;
6333                     d = s + 3;
6334                     d = skipspace(d);
6335                     s = PL_bufptr;
6336                     if (*d == ':') {
6337                         PL_expect = XTERM;
6338                         break;
6339                     }
6340                 }
6341                 PL_expect = XSTATE;
6342             }
6343             else {
6344                 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6345                 PL_expect = XSTATE;
6346             }
6347         }
6348         break;
6349     }
6350 
6351     pl_yylval.ival = CopLINE(PL_curcop);
6352     PL_copline = NOLINE;   /* invalidate current command line number */
6353     TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
6354 }
6355 
6356 static int
yyl_rightcurly(pTHX_ char * s,const U8 formbrack)6357 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6358 {
6359     assert(s != PL_bufend);
6360     s++;
6361 
6362     if (PL_lex_brackets <= 0)
6363         /* diag_listed_as: Unmatched right %s bracket */
6364         yyerror("Unmatched right curly bracket");
6365     else
6366         PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6367 
6368     PL_lex_allbrackets--;
6369 
6370     if (PL_lex_state == LEX_INTERPNORMAL) {
6371         if (PL_lex_brackets == 0) {
6372             if (PL_expect & XFAKEBRACK) {
6373                 PL_expect &= XENUMMASK;
6374                 PL_lex_state = LEX_INTERPEND;
6375                 PL_bufptr = s;
6376                 return yylex();	/* ignore fake brackets */
6377             }
6378             if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6379              && SvEVALED(PL_lex_repl))
6380                 PL_lex_state = LEX_INTERPEND;
6381             else if (*s == '-' && s[1] == '>')
6382                 PL_lex_state = LEX_INTERPENDMAYBE;
6383             else if (*s != '[' && *s != '{')
6384                 PL_lex_state = LEX_INTERPEND;
6385         }
6386     }
6387 
6388     if (PL_expect & XFAKEBRACK) {
6389         PL_expect &= XENUMMASK;
6390         PL_bufptr = s;
6391         return yylex();		/* ignore fake brackets */
6392     }
6393 
6394     force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
6395     if (formbrack) LEAVE_with_name("lex_format");
6396     if (formbrack == 2) { /* means . where arguments were expected */
6397         force_next(PERLY_SEMICOLON);
6398         TOKEN(FORMRBRACK);
6399     }
6400 
6401     TOKEN(PERLY_SEMICOLON);
6402 }
6403 
6404 static int
yyl_ampersand(pTHX_ char * s)6405 yyl_ampersand(pTHX_ char *s)
6406 {
6407     if (PL_expect == XPOSTDEREF)
6408         POSTDEREF(PERLY_AMPERSAND);
6409 
6410     s++;
6411     if (*s++ == '&') {
6412         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6413                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6414             s -= 2;
6415             TOKEN(0);
6416         }
6417         AOPERATOR(ANDAND);
6418     }
6419     s--;
6420 
6421     if (PL_expect == XOPERATOR) {
6422         char *d;
6423         bool bof;
6424         if (   PL_bufptr == PL_linestart
6425             && ckWARN(WARN_SEMICOLON)
6426             && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6427         {
6428             CopLINE_dec(PL_curcop);
6429             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6430             CopLINE_inc(PL_curcop);
6431         }
6432         d = s;
6433         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6434             s++;
6435         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6436                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6437             s = d;
6438             s--;
6439             TOKEN(0);
6440         }
6441         if (d == s)
6442             BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6443         else
6444             BAop(OP_SBIT_AND);
6445     }
6446 
6447     PL_tokenbuf[0] = '&';
6448     s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6449     pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6450 
6451     if (PL_tokenbuf[1])
6452         force_ident_maybe_lex('&');
6453     else
6454         PREREF(PERLY_AMPERSAND);
6455 
6456     TERM(PERLY_AMPERSAND);
6457 }
6458 
6459 static int
yyl_verticalbar(pTHX_ char * s)6460 yyl_verticalbar(pTHX_ char *s)
6461 {
6462     char *d;
6463     bool bof;
6464 
6465     s++;
6466     if (*s++ == '|') {
6467         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6468                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6469             s -= 2;
6470             TOKEN(0);
6471         }
6472         AOPERATOR(OROR);
6473     }
6474 
6475     s--;
6476     d = s;
6477     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6478         s++;
6479 
6480     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6481             (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6482         s = d - 1;
6483         TOKEN(0);
6484     }
6485 
6486     BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6487 }
6488 
6489 static int
yyl_bang(pTHX_ char * s)6490 yyl_bang(pTHX_ char *s)
6491 {
6492     const char tmp = *s++;
6493     if (tmp == '=') {
6494         /* was this !=~ where !~ was meant?
6495          * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6496 
6497         if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6498             const char *t = s+1;
6499 
6500             while (t < PL_bufend && isSPACE(*t))
6501                 ++t;
6502 
6503             if (*t == '/' || *t == '?'
6504                 || ((*t == 'm' || *t == 's' || *t == 'y')
6505                     && !isWORDCHAR(t[1]))
6506                 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6507                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6508                             "!=~ should be !~");
6509         }
6510 
6511         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6512             s -= 2;
6513             TOKEN(0);
6514         }
6515 
6516         ChEop(OP_NE);
6517     }
6518 
6519     if (tmp == '~')
6520         PMop(OP_NOT);
6521 
6522     s--;
6523     OPERATOR(PERLY_EXCLAMATION_MARK);
6524 }
6525 
6526 static int
yyl_snail(pTHX_ char * s)6527 yyl_snail(pTHX_ char *s)
6528 {
6529     if (PL_expect == XPOSTDEREF)
6530         POSTDEREF(PERLY_SNAIL);
6531     PL_tokenbuf[0] = '@';
6532     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6533     if (PL_expect == XOPERATOR) {
6534         char *d = s;
6535         if (PL_bufptr > s) {
6536             d = PL_bufptr-1;
6537             PL_bufptr = PL_oldbufptr;
6538         }
6539         no_op("Array", d);
6540     }
6541     pl_yylval.ival = 0;
6542     if (!PL_tokenbuf[1]) {
6543         PREREF(PERLY_SNAIL);
6544     }
6545     if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6546         s = skipspace(s);
6547     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6548         && intuit_more(s, PL_bufend))
6549     {
6550         if (*s == '{')
6551             PL_tokenbuf[0] = '%';
6552 
6553         /* Warn about @ where they meant $. */
6554         if (*s == '[' || *s == '{') {
6555             if (ckWARN(WARN_SYNTAX)) {
6556                 S_check_scalar_slice(aTHX_ s);
6557             }
6558         }
6559     }
6560     PL_expect = XOPERATOR;
6561     force_ident_maybe_lex('@');
6562     TERM(PERLY_SNAIL);
6563 }
6564 
6565 static int
yyl_slash(pTHX_ char * s)6566 yyl_slash(pTHX_ char *s)
6567 {
6568     if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6569         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6570                 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6571             TOKEN(0);
6572         s += 2;
6573         AOPERATOR(DORDOR);
6574     }
6575     else if (PL_expect == XOPERATOR) {
6576         s++;
6577         if (*s == '=' && !PL_lex_allbrackets
6578             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6579         {
6580             s--;
6581             TOKEN(0);
6582         }
6583         Mop(OP_DIVIDE);
6584     }
6585     else {
6586         /* Disable warning on "study /blah/" */
6587         if (    PL_oldoldbufptr == PL_last_uni
6588             && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6589                 || memNE(PL_last_uni, "study", 5)
6590                 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6591          ))
6592             check_uni();
6593         s = scan_pat(s,OP_MATCH);
6594         TERM(sublex_start());
6595     }
6596 }
6597 
6598 static int
yyl_leftsquare(pTHX_ char * s)6599 yyl_leftsquare(pTHX_ char *s)
6600 {
6601     if (PL_lex_brackets > 100)
6602         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6603     PL_lex_brackstack[PL_lex_brackets++] = 0;
6604     PL_lex_allbrackets++;
6605     s++;
6606     OPERATOR(PERLY_BRACKET_OPEN);
6607 }
6608 
6609 static int
yyl_rightsquare(pTHX_ char * s)6610 yyl_rightsquare(pTHX_ char *s)
6611 {
6612     if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6613         TOKEN(0);
6614     s++;
6615     if (PL_lex_brackets <= 0)
6616         /* diag_listed_as: Unmatched right %s bracket */
6617         yyerror("Unmatched right square bracket");
6618     else
6619         --PL_lex_brackets;
6620     PL_lex_allbrackets--;
6621     if (PL_lex_state == LEX_INTERPNORMAL) {
6622         if (PL_lex_brackets == 0) {
6623             if (*s == '-' && s[1] == '>')
6624                 PL_lex_state = LEX_INTERPENDMAYBE;
6625             else if (*s != '[' && *s != '{')
6626                 PL_lex_state = LEX_INTERPEND;
6627         }
6628     }
6629     TERM(PERLY_BRACKET_CLOSE);
6630 }
6631 
6632 static int
yyl_tilde(pTHX_ char * s)6633 yyl_tilde(pTHX_ char *s)
6634 {
6635     bool bof;
6636     if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6637         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6638             TOKEN(0);
6639         s += 2;
6640         Perl_ck_warner_d(aTHX_
6641             packWARN(WARN_DEPRECATED__SMARTMATCH),
6642             "Smartmatch is deprecated");
6643         NCEop(OP_SMARTMATCH);
6644     }
6645     s++;
6646     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6647         s++;
6648         BCop(OP_SCOMPLEMENT);
6649     }
6650     BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6651 }
6652 
6653 static int
yyl_leftparen(pTHX_ char * s)6654 yyl_leftparen(pTHX_ char *s)
6655 {
6656     if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6657         PL_oldbufptr = PL_oldoldbufptr;		/* allow print(STDOUT 123) */
6658     else
6659         PL_expect = XTERM;
6660     s = skipspace(s);
6661     PL_lex_allbrackets++;
6662     TOKEN(PERLY_PAREN_OPEN);
6663 }
6664 
6665 static int
yyl_rightparen(pTHX_ char * s)6666 yyl_rightparen(pTHX_ char *s)
6667 {
6668     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6669         TOKEN(0);
6670     s++;
6671     PL_lex_allbrackets--;
6672     s = skipspace(s);
6673     if (*s == '{')
6674         PREBLOCK(PERLY_PAREN_CLOSE);
6675     TERM(PERLY_PAREN_CLOSE);
6676 }
6677 
6678 static int
yyl_leftpointy(pTHX_ char * s)6679 yyl_leftpointy(pTHX_ char *s)
6680 {
6681     char tmp;
6682 
6683     if (PL_expect != XOPERATOR) {
6684         if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6685             check_uni();
6686         if (s[1] == '<' && s[2] != '>')
6687             s = scan_heredoc(s);
6688         else
6689             s = scan_inputsymbol(s);
6690         PL_expect = XOPERATOR;
6691         TOKEN(sublex_start());
6692     }
6693 
6694     s++;
6695 
6696     tmp = *s++;
6697     if (tmp == '<') {
6698         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6699             s -= 2;
6700             TOKEN(0);
6701         }
6702         SHop(OP_LEFT_SHIFT);
6703     }
6704     if (tmp == '=') {
6705         tmp = *s++;
6706         if (tmp == '>') {
6707             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6708                 s -= 3;
6709                 TOKEN(0);
6710             }
6711             NCEop(OP_NCMP);
6712         }
6713         s--;
6714         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6715             s -= 2;
6716             TOKEN(0);
6717         }
6718         ChRop(OP_LE);
6719     }
6720 
6721     s--;
6722     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6723         s--;
6724         TOKEN(0);
6725     }
6726 
6727     ChRop(OP_LT);
6728 }
6729 
6730 static int
yyl_rightpointy(pTHX_ char * s)6731 yyl_rightpointy(pTHX_ char *s)
6732 {
6733     const char tmp = *s++;
6734 
6735     if (tmp == '>') {
6736         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6737             s -= 2;
6738             TOKEN(0);
6739         }
6740         SHop(OP_RIGHT_SHIFT);
6741     }
6742     else if (tmp == '=') {
6743         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6744             s -= 2;
6745             TOKEN(0);
6746         }
6747         ChRop(OP_GE);
6748     }
6749 
6750     s--;
6751     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6752         s--;
6753         TOKEN(0);
6754     }
6755 
6756     ChRop(OP_GT);
6757 }
6758 
6759 static int
yyl_sglquote(pTHX_ char * s)6760 yyl_sglquote(pTHX_ char *s)
6761 {
6762     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6763     if (!s)
6764         missingterm(NULL, 0);
6765     COPLINE_SET_FROM_MULTI_END;
6766     DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6767     if (PL_expect == XOPERATOR) {
6768         no_op("String",s);
6769     }
6770     pl_yylval.ival = OP_CONST;
6771     TERM(sublex_start());
6772 }
6773 
6774 static int
yyl_dblquote(pTHX_ char * s)6775 yyl_dblquote(pTHX_ char *s)
6776 {
6777     char *d;
6778     STRLEN len;
6779     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6780     DEBUG_T( {
6781         if (s)
6782             printbuf("### Saw string before %s\n", s);
6783         else
6784             PerlIO_printf(Perl_debug_log,
6785                          "### Saw unterminated string\n");
6786     } );
6787     if (PL_expect == XOPERATOR) {
6788             no_op("String",s);
6789     }
6790     if (!s)
6791         missingterm(NULL, 0);
6792     pl_yylval.ival = OP_CONST;
6793     /* FIXME. I think that this can be const if char *d is replaced by
6794        more localised variables.  */
6795     for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6796         if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6797             pl_yylval.ival = OP_STRINGIFY;
6798             break;
6799         }
6800     }
6801     if (pl_yylval.ival == OP_CONST)
6802         COPLINE_SET_FROM_MULTI_END;
6803     TERM(sublex_start());
6804 }
6805 
6806 static int
yyl_backtick(pTHX_ char * s)6807 yyl_backtick(pTHX_ char *s)
6808 {
6809     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6810     DEBUG_T( {
6811         if (s)
6812             printbuf("### Saw backtick string before %s\n", s);
6813         else
6814             PerlIO_printf(Perl_debug_log,
6815                          "### Saw unterminated backtick string\n");
6816     } );
6817     if (PL_expect == XOPERATOR)
6818         no_op("Backticks",s);
6819     if (!s)
6820         missingterm(NULL, 0);
6821     pl_yylval.ival = OP_BACKTICK;
6822     TERM(sublex_start());
6823 }
6824 
6825 static int
yyl_backslash(pTHX_ char * s)6826 yyl_backslash(pTHX_ char *s)
6827 {
6828     if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6829         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6830                        *s, *s);
6831     if (PL_expect == XOPERATOR)
6832         no_op("Backslash",s);
6833     OPERATOR(REFGEN);
6834 }
6835 
6836 static void
yyl_data_handle(pTHX)6837 yyl_data_handle(pTHX)
6838 {
6839     HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6840                             ? PL_curstash
6841                             : PL_defstash;
6842     GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6843 
6844     if (!isGV(gv))
6845         gv_init(gv,stash,"DATA",4,0);
6846 
6847     GvMULTI_on(gv);
6848     if (!GvIO(gv))
6849         GvIOp(gv) = newIO();
6850     IoIFP(GvIOp(gv)) = PL_rsfp;
6851 
6852     /* Mark this internal pseudo-handle as clean */
6853     IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6854     if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6855         IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6856     else
6857         IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6858 
6859 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6860     /* if the script was opened in binmode, we need to revert
6861      * it to text mode for compatibility; but only iff it has CRs
6862      * XXX this is a questionable hack at best. */
6863     if (PL_bufend-PL_bufptr > 2
6864         && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6865     {
6866         Off_t loc = 0;
6867         if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6868             loc = PerlIO_tell(PL_rsfp);
6869             (void)PerlIO_seek(PL_rsfp, 0L, 0);
6870         }
6871         if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6872             if (loc > 0)
6873                 PerlIO_seek(PL_rsfp, loc, 0);
6874         }
6875     }
6876 #endif
6877 
6878 #ifdef PERLIO_LAYERS
6879     if (!IN_BYTES) {
6880         if (UTF)
6881             PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6882     }
6883 #endif
6884 
6885     PL_rsfp = NULL;
6886 }
6887 
6888 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6889     __attribute__noreturn__;
6890 
6891 PERL_STATIC_NO_RET void
yyl_croak_unrecognised(pTHX_ char * s)6892 yyl_croak_unrecognised(pTHX_ char *s)
6893 {
6894     SV *dsv = newSVpvs_flags("", SVs_TEMP);
6895     const char *c;
6896     char *d;
6897     STRLEN len;
6898 
6899     if (UTF) {
6900         STRLEN skiplen = UTF8SKIP(s);
6901         STRLEN stravail = PL_bufend - s;
6902         c = sv_uni_display(dsv, newSVpvn_flags(s,
6903                                                skiplen > stravail ? stravail : skiplen,
6904                                                SVs_TEMP | SVf_UTF8),
6905                            10, UNI_DISPLAY_ISPRINT);
6906     }
6907     else {
6908         c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6909     }
6910 
6911     if (s >= PL_linestart) {
6912         d = PL_linestart;
6913     }
6914     else {
6915         /* somehow (probably due to a parse failure), PL_linestart has advanced
6916          * pass PL_bufptr, get a reasonable beginning of line
6917          */
6918         d = s;
6919         while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6920             --d;
6921     }
6922     len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6923     if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6924         d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6925     }
6926 
6927     Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6928                       UTF8fARG(UTF, (s - d), d),
6929                      (int) len + 1);
6930 }
6931 
6932 static int
yyl_require(pTHX_ char * s,I32 orig_keyword)6933 yyl_require(pTHX_ char *s, I32 orig_keyword)
6934 {
6935     s = skipspace(s);
6936     if (isDIGIT(*s)) {
6937         s = force_version(s, FALSE);
6938     }
6939     else if (*s != 'v' || !isDIGIT(s[1])
6940             || (s = force_version(s, TRUE), *s == 'v'))
6941     {
6942         *PL_tokenbuf = '\0';
6943         s = force_word(s,BAREWORD,TRUE,TRUE);
6944         if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6945                                    PL_tokenbuf + sizeof(PL_tokenbuf),
6946                                    UTF))
6947         {
6948             gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6949                         GV_ADD | (UTF ? SVf_UTF8 : 0));
6950         }
6951         else if (*s == '<')
6952             yyerror("<> at require-statement should be quotes");
6953     }
6954 
6955     if (orig_keyword == KEY_require)
6956         pl_yylval.ival = 1;
6957     else
6958         pl_yylval.ival = 0;
6959 
6960     PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6961     PL_bufptr = s;
6962     PL_last_uni = PL_oldbufptr;
6963     PL_last_lop_op = OP_REQUIRE;
6964     s = skipspace(s);
6965     return REPORT( (int)KW_REQUIRE );
6966 }
6967 
6968 static int
yyl_foreach(pTHX_ char * s)6969 yyl_foreach(pTHX_ char *s)
6970 {
6971     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6972         return REPORT(0);
6973     pl_yylval.ival = CopLINE(PL_curcop);
6974     s = skipspace(s);
6975     if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6976         char *p = s;
6977         SSize_t s_off = s - SvPVX(PL_linestr);
6978         bool paren_is_valid = FALSE;
6979         bool maybe_package = FALSE;
6980         bool saw_core = FALSE;
6981         bool core_valid = FALSE;
6982 
6983         if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
6984             saw_core = TRUE;
6985             p += 6;
6986         }
6987         if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
6988             core_valid = TRUE;
6989             paren_is_valid = TRUE;
6990             if (isSPACE(p[2])) {
6991                 p = skipspace(p + 3);
6992                 maybe_package = TRUE;
6993             }
6994             else {
6995                 p += 2;
6996             }
6997         }
6998         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
6999             core_valid = TRUE;
7000             if (isSPACE(p[3])) {
7001                 p = skipspace(p + 4);
7002                 maybe_package = TRUE;
7003             }
7004             else {
7005                 p += 3;
7006             }
7007         }
7008         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
7009             core_valid = TRUE;
7010             if (isSPACE(p[5])) {
7011                 p = skipspace(p + 6);
7012             }
7013             else {
7014                 p += 5;
7015             }
7016         }
7017         if (saw_core && !core_valid) {
7018             Perl_croak(aTHX_ "Missing $ on loop variable");
7019         }
7020 
7021         if (maybe_package && !saw_core) {
7022             /* skip optional package name, as in "for my abc $x (..)" */
7023             if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
7024                 STRLEN len;
7025                 p = scan_word6(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
7026                 p = skipspace(p);
7027                 paren_is_valid = FALSE;
7028             }
7029         }
7030 
7031         if (UNLIKELY(paren_is_valid && *p == '(')) {
7032             Perl_ck_warner_d(aTHX_
7033                              packWARN(WARN_EXPERIMENTAL__FOR_LIST),
7034                              "for my (...) is experimental");
7035         }
7036         else if (UNLIKELY(*p != '$' && *p != '\\')) {
7037             /* "for myfoo (" will end up here, but with p pointing at the 'f' */
7038             Perl_croak(aTHX_ "Missing $ on loop variable");
7039         }
7040         /* The buffer may have been reallocated, update s */
7041         s = SvPVX(PL_linestr) + s_off;
7042     }
7043     OPERATOR(KW_FOR);
7044 }
7045 
7046 static int
yyl_do(pTHX_ char * s,I32 orig_keyword)7047 yyl_do(pTHX_ char *s, I32 orig_keyword)
7048 {
7049     s = skipspace(s);
7050     if (*s == '{')
7051         PRETERMBLOCK(KW_DO);
7052     if (*s != '\'') {
7053         char *d;
7054         STRLEN len;
7055         *PL_tokenbuf = '&';
7056         d = scan_word6(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7057                       1, &len, TRUE);
7058         if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7059          && !keyword(PL_tokenbuf + 1, len, 0)) {
7060             SSize_t off = s-SvPVX(PL_linestr);
7061             d = skipspace(d);
7062             s = SvPVX(PL_linestr)+off;
7063             if (*d == '(') {
7064                 force_ident_maybe_lex('&');
7065                 s = d;
7066             }
7067         }
7068     }
7069     if (orig_keyword == KEY_do)
7070         pl_yylval.ival = 1;
7071     else
7072         pl_yylval.ival = 0;
7073     OPERATOR(KW_DO);
7074 }
7075 
7076 static int
yyl_my(pTHX_ char * s,I32 my)7077 yyl_my(pTHX_ char *s, I32 my)
7078 {
7079     if (PL_in_my) {
7080         PL_bufptr = s;
7081         yyerror(Perl_form(aTHX_
7082                           "Can't redeclare \"%s\" in \"%s\"",
7083                            my       == KEY_my    ? "my" :
7084                            my       == KEY_state ? "state" : "our",
7085                            PL_in_my == KEY_my    ? "my" :
7086                            PL_in_my == KEY_state ? "state" : "our"));
7087     }
7088     PL_in_my = (U16)my;
7089     s = skipspace(s);
7090     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7091         STRLEN len;
7092         s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
7093         if (memEQs(PL_tokenbuf, len, "sub"))
7094             return yyl_sub(aTHX_ s, my);
7095         PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7096         if (!PL_in_my_stash) {
7097             char tmpbuf[1024];
7098             int i;
7099             PL_bufptr = s;
7100             i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7101             PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
7102             yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7103         }
7104     }
7105     else if (*s == '\\') {
7106         if (!FEATURE_MYREF_IS_ENABLED)
7107             Perl_croak(aTHX_ "The experimental declared_refs "
7108                              "feature is not enabled");
7109         Perl_ck_warner_d(aTHX_
7110              packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7111             "Declaring references is experimental");
7112     }
7113     OPERATOR(KW_MY);
7114 }
7115 
7116 static int yyl_try(pTHX_ char*);
7117 
7118 static bool
yyl_eol_needs_semicolon(pTHX_ char ** ps)7119 yyl_eol_needs_semicolon(pTHX_ char **ps)
7120 {
7121     char *s = *ps;
7122     if (PL_lex_state != LEX_NORMAL
7123         || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
7124     {
7125         const bool in_comment = *s == '#';
7126         char *d;
7127         if (*s == '#' && s == PL_linestart && PL_in_eval
7128          && !PL_rsfp && !PL_parser->filtered) {
7129             /* handle eval qq[#line 1 "foo"\n ...] */
7130             CopLINE_dec(PL_curcop);
7131             incline(s, PL_bufend);
7132         }
7133         d = s;
7134         while (d < PL_bufend && *d != '\n')
7135             d++;
7136         if (d < PL_bufend)
7137             d++;
7138         s = d;
7139         if (in_comment && d == PL_bufend
7140             && PL_lex_state == LEX_INTERPNORMAL
7141             && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7142             && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
7143         else
7144             incline(s, PL_bufend);
7145         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7146             PL_lex_state = LEX_FORMLINE;
7147             force_next(FORMRBRACK);
7148             *ps = s;
7149             return TRUE;
7150         }
7151     }
7152     else {
7153         while (s < PL_bufend && *s != '\n')
7154             s++;
7155         if (s < PL_bufend) {
7156             s++;
7157             if (s < PL_bufend)
7158                 incline(s, PL_bufend);
7159         }
7160     }
7161     *ps = s;
7162     return FALSE;
7163 }
7164 
7165 static int
yyl_fake_eof(pTHX_ U32 fake_eof,bool bof,char * s)7166 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
7167 {
7168     char *d;
7169 
7170     goto start;
7171 
7172     do {
7173         fake_eof = 0;
7174         bof = cBOOL(PL_rsfp);
7175       start:
7176 
7177         PL_bufptr = PL_bufend;
7178         COPLINE_INC_WITH_HERELINES;
7179         if (!lex_next_chunk(fake_eof)) {
7180             CopLINE_dec(PL_curcop);
7181             s = PL_bufptr;
7182             TOKEN(PERLY_SEMICOLON);	/* not infinite loop because rsfp is NULL now */
7183         }
7184         CopLINE_dec(PL_curcop);
7185         s = PL_bufptr;
7186         /* If it looks like the start of a BOM or raw UTF-16,
7187          * check if it in fact is. */
7188         if (bof && PL_rsfp
7189             && (   *s == 0
7190                 || *(U8*)s == BOM_UTF8_FIRST_BYTE
7191                 || *(U8*)s >= 0xFE
7192                 || s[1] == 0))
7193         {
7194             Off_t offset = (IV)PerlIO_tell(PL_rsfp);
7195             bof = (offset == (Off_t)SvCUR(PL_linestr));
7196 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
7197             /* offset may include swallowed CR */
7198             if (!bof)
7199                 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
7200 #endif
7201             if (bof) {
7202                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7203                 s = swallow_bom((U8*)s);
7204             }
7205         }
7206         if (PL_parser->in_pod) {
7207             /* Incest with pod. */
7208             if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
7209                 && !isALPHA(s[4]))
7210             {
7211                 SvPVCLEAR(PL_linestr);
7212                 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7213                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7214                 PL_last_lop = PL_last_uni = NULL;
7215                 PL_parser->in_pod = 0;
7216             }
7217         }
7218         if (PL_rsfp || PL_parser->filtered)
7219             incline(s, PL_bufend);
7220     } while (PL_parser->in_pod);
7221 
7222     PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7223     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7224     PL_last_lop = PL_last_uni = NULL;
7225     if (CopLINE(PL_curcop) == 1) {
7226         while (s < PL_bufend && isSPACE(*s))
7227             s++;
7228         if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7229             s++;
7230         d = NULL;
7231         if (!PL_in_eval) {
7232             if (*s == '#' && *(s+1) == '!')
7233                 d = s + 2;
7234 #ifdef ALTERNATE_SHEBANG
7235             else {
7236                 static char const as[] = ALTERNATE_SHEBANG;
7237                 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7238                     d = s + (sizeof(as) - 1);
7239             }
7240 #endif /* ALTERNATE_SHEBANG */
7241         }
7242         if (d) {
7243             char *ipath;
7244             char *ipathend;
7245 
7246             while (isSPACE(*d))
7247                 d++;
7248             ipath = d;
7249             while (*d && !isSPACE(*d))
7250                 d++;
7251             ipathend = d;
7252 
7253 #ifdef ARG_ZERO_IS_SCRIPT
7254             if (ipathend > ipath) {
7255                 /*
7256                  * HP-UX (at least) sets argv[0] to the script name,
7257                  * which makes $^X incorrect.  And Digital UNIX and Linux,
7258                  * at least, set argv[0] to the basename of the Perl
7259                  * interpreter. So, having found "#!", we'll set it right.
7260                  */
7261                 SV* copfilesv = CopFILESV(PL_curcop);
7262                 if (copfilesv) {
7263                     SV * const x =
7264                         GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7265                                          SVt_PV)); /* $^X */
7266                     assert(SvPOK(x) || SvGMAGICAL(x));
7267                     if (sv_eq(x, copfilesv)) {
7268                         sv_setpvn(x, ipath, ipathend - ipath);
7269                         SvSETMAGIC(x);
7270                     }
7271                     else {
7272                         STRLEN blen;
7273                         STRLEN llen;
7274                         const char *bstart = SvPV_const(copfilesv, blen);
7275                         const char * const lstart = SvPV_const(x, llen);
7276                         if (llen < blen) {
7277                             bstart += blen - llen;
7278                             if (strnEQ(bstart, lstart, llen) &&	bstart[-1] == '/') {
7279                                 sv_setpvn(x, ipath, ipathend - ipath);
7280                                 SvSETMAGIC(x);
7281                             }
7282                         }
7283                     }
7284                 }
7285                 else {
7286                     /* Anything to do if no copfilesv? */
7287                 }
7288                 TAINT_NOT;	/* $^X is always tainted, but that's OK */
7289             }
7290 #endif /* ARG_ZERO_IS_SCRIPT */
7291 
7292             /*
7293              * Look for options.
7294              */
7295             d = instr(s,"perl -");
7296             if (!d) {
7297                 d = instr(s,"perl");
7298 #if defined(DOSISH)
7299                 /* avoid getting into infinite loops when shebang
7300                  * line contains "Perl" rather than "perl" */
7301                 if (!d) {
7302                     for (d = ipathend-4; d >= ipath; --d) {
7303                         if (isALPHA_FOLD_EQ(*d, 'p')
7304                             && !ibcmp(d, "perl", 4))
7305                         {
7306                             break;
7307                         }
7308                     }
7309                     if (d < ipath)
7310                         d = NULL;
7311                 }
7312 #endif
7313             }
7314 #ifdef ALTERNATE_SHEBANG
7315             /*
7316              * If the ALTERNATE_SHEBANG on this system starts with a
7317              * character that can be part of a Perl expression, then if
7318              * we see it but not "perl", we're probably looking at the
7319              * start of Perl code, not a request to hand off to some
7320              * other interpreter.  Similarly, if "perl" is there, but
7321              * not in the first 'word' of the line, we assume the line
7322              * contains the start of the Perl program.
7323              */
7324             if (d && *s != '#') {
7325                 const char *c = ipath;
7326                 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7327                     c++;
7328                 if (c < d)
7329                     d = NULL;	/* "perl" not in first word; ignore */
7330                 else
7331                     *s = '#';	/* Don't try to parse shebang line */
7332             }
7333 #endif /* ALTERNATE_SHEBANG */
7334             if (!d
7335                 && *s == '#'
7336                 && ipathend > ipath
7337                 && !PL_minus_c
7338                 && !instr(s,"indir")
7339                 && instr(PL_origargv[0],"perl"))
7340             {
7341                 char **newargv;
7342 
7343                 *ipathend = '\0';
7344                 s = ipathend + 1;
7345                 while (s < PL_bufend && isSPACE(*s))
7346                     s++;
7347                 if (s < PL_bufend) {
7348                     Newx(newargv,PL_origargc+3,char*);
7349                     newargv[1] = s;
7350                     while (s < PL_bufend && !isSPACE(*s))
7351                         s++;
7352                     *s = '\0';
7353                     Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7354                 }
7355                 else
7356                     newargv = PL_origargv;
7357                 newargv[0] = ipath;
7358                 PERL_FPU_PRE_EXEC
7359                 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7360                 PERL_FPU_POST_EXEC
7361                 Perl_croak(aTHX_ "Can't exec %s", ipath);
7362             }
7363             if (d) {
7364                 while (*d && !isSPACE(*d))
7365                     d++;
7366                 while (SPACE_OR_TAB(*d))
7367                     d++;
7368 
7369                 if (*d++ == '-') {
7370                     const bool switches_done = PL_doswitches;
7371                     const U32 oldpdb = PL_perldb;
7372                     const bool oldn = PL_minus_n;
7373                     const bool oldp = PL_minus_p;
7374                     const char *d1 = d;
7375 
7376                     do {
7377                         bool baduni = FALSE;
7378                         if (*d1 == 'C') {
7379                             const char *d2 = d1 + 1;
7380                             if (parse_unicode_opts((const char **)&d2)
7381                                 != PL_unicode)
7382                                 baduni = TRUE;
7383                         }
7384                         if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7385                             const char * const m = d1;
7386                             while (*d1 && !isSPACE(*d1))
7387                                 d1++;
7388                             Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7389                                   (int)(d1 - m), m);
7390                         }
7391                         d1 = moreswitches(d1);
7392                     } while (d1);
7393                     if (PL_doswitches && !switches_done) {
7394                         int argc = PL_origargc;
7395                         char **argv = PL_origargv;
7396                         do {
7397                             argc--,argv++;
7398                         } while (argc && argv[0][0] == '-' && argv[0][1]);
7399                         init_argv_symbols(argc,argv);
7400                     }
7401                     if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7402                         || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7403                           /* if we have already added "LINE: while (<>) {",
7404                              we must not do it again */
7405                     {
7406                         SvPVCLEAR(PL_linestr);
7407                         PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7408                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7409                         PL_last_lop = PL_last_uni = NULL;
7410                         PL_preambled = FALSE;
7411                         if (PERLDB_LINE_OR_SAVESRC)
7412                             (void)gv_fetchfile(PL_origfilename);
7413                         return YYL_RETRY;
7414                     }
7415                 }
7416             }
7417         }
7418     }
7419 
7420     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7421         PL_lex_state = LEX_FORMLINE;
7422         force_next(FORMRBRACK);
7423         TOKEN(PERLY_SEMICOLON);
7424     }
7425 
7426     PL_bufptr = s;
7427     return YYL_RETRY;
7428 }
7429 
7430 static int
yyl_fatcomma(pTHX_ char * s,STRLEN len)7431 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7432 {
7433     CLINE;
7434     pl_yylval.opval
7435         = newSVOP(OP_CONST, 0,
7436                        S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7437     pl_yylval.opval->op_private = OPpCONST_BARE;
7438     TERM(BAREWORD);
7439 }
7440 
7441 static int
yyl_safe_bareword(pTHX_ char * s,const char lastchar)7442 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7443 {
7444     if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7445         && PL_parser->saw_infix_sigil)
7446     {
7447         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7448                          "Operator or semicolon missing before %c%" UTF8f,
7449                          lastchar,
7450                          UTF8fARG(UTF, strlen(PL_tokenbuf),
7451                                   PL_tokenbuf));
7452         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7453                          "Ambiguous use of %c resolved as operator %c",
7454                          lastchar, lastchar);
7455     }
7456     TOKEN(BAREWORD);
7457 }
7458 
7459 static int
yyl_constant_op(pTHX_ char * s,SV * sv,CV * cv,OP * rv2cv_op,PADOFFSET off)7460 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7461 {
7462     if (sv) {
7463         op_free(rv2cv_op);
7464         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7465         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7466         if (SvTYPE(sv) == SVt_PVAV)
7467             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7468                                       pl_yylval.opval);
7469         else {
7470             pl_yylval.opval->op_private = 0;
7471             pl_yylval.opval->op_folded = 1;
7472             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7473         }
7474         TOKEN(BAREWORD);
7475     }
7476 
7477     op_free(pl_yylval.opval);
7478     pl_yylval.opval =
7479         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7480     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7481     PL_last_lop = PL_oldbufptr;
7482     PL_last_lop_op = OP_ENTERSUB;
7483 
7484     /* Is there a prototype? */
7485     if (SvPOK(cv)) {
7486         int k = yyl_subproto(aTHX_ s, cv);
7487         if (k != KEY_NULL)
7488             return k;
7489     }
7490 
7491     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7492     PL_expect = XTERM;
7493     force_next(off ? PRIVATEREF : BAREWORD);
7494     if (!PL_lex_allbrackets
7495         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7496     {
7497         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7498     }
7499 
7500     TOKEN(NOAMP);
7501 }
7502 
7503 /* Honour "reserved word" warnings, and enforce strict subs */
7504 static void
yyl_strictwarn_bareword(pTHX_ const char lastchar)7505 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7506 {
7507     /* after "print" and similar functions (corresponding to
7508      * "F? L" in opcode.pl), whatever wasn't already parsed as
7509      * a filehandle should be subject to "strict subs".
7510      * Likewise for the optional indirect-object argument to system
7511      * or exec, which can't be a bareword */
7512     if ((PL_last_lop_op == OP_PRINT
7513             || PL_last_lop_op == OP_PRTF
7514             || PL_last_lop_op == OP_SAY
7515             || PL_last_lop_op == OP_SYSTEM
7516             || PL_last_lop_op == OP_EXEC)
7517         && (PL_hints & HINT_STRICT_SUBS))
7518     {
7519         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7520     }
7521 
7522     if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7523         char *d = PL_tokenbuf;
7524         while (isLOWER(*d))
7525             d++;
7526         if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7527             /* PL_warn_reserved is constant */
7528             GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7529             Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7530                         PL_tokenbuf);
7531             GCC_DIAG_RESTORE_STMT;
7532         }
7533     }
7534 }
7535 
7536 static int
yyl_just_a_word(pTHX_ char * s,STRLEN len,I32 orig_keyword,struct code c)7537 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7538 {
7539     int pkgname = 0;
7540     const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7541     bool safebw;
7542     bool no_op_error = FALSE;
7543     /* Use this var to track whether intuit_method has been
7544        called.  intuit_method returns 0 or > 255.  */
7545     int key = 1;
7546 
7547     if (PL_expect == XOPERATOR) {
7548         if (PL_bufptr == PL_linestart) {
7549             CopLINE_dec(PL_curcop);
7550             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7551             CopLINE_inc(PL_curcop);
7552         }
7553         else
7554             /* We want to call no_op with s pointing after the
7555                bareword, so defer it.  But we want it to come
7556                before the Bad name croak.  */
7557             no_op_error = TRUE;
7558     }
7559 
7560     /* Get the rest if it looks like a package qualifier */
7561 
7562     if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7563         STRLEN morelen;
7564         s = scan_word6(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7565                       TRUE, &morelen, TRUE);
7566         if (no_op_error) {
7567             no_op("Bareword",s);
7568             no_op_error = FALSE;
7569         }
7570         if (!morelen)
7571             Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7572                     UTF8fARG(UTF, len, PL_tokenbuf),
7573                     *s == '\'' ? "'" : "::");
7574         len += morelen;
7575         pkgname = 1;
7576     }
7577 
7578     if (no_op_error)
7579         no_op("Bareword",s);
7580 
7581     /* See if the name is "Foo::",
7582        in which case Foo is a bareword
7583        (and a package name). */
7584 
7585     if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7586         if (ckWARN(WARN_BAREWORD)
7587             && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7588             Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7589                         "Bareword \"%" UTF8f
7590                         "\" refers to nonexistent package",
7591                         UTF8fARG(UTF, len, PL_tokenbuf));
7592         len -= 2;
7593         PL_tokenbuf[len] = '\0';
7594         c.gv = NULL;
7595         c.gvp = 0;
7596         safebw = TRUE;
7597     }
7598     else {
7599         safebw = FALSE;
7600     }
7601 
7602     /* if we saw a global override before, get the right name */
7603 
7604     if (!c.sv)
7605         c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7606     if (c.gvp) {
7607         SV *sv = newSVpvs("CORE::GLOBAL::");
7608         sv_catsv(sv, c.sv);
7609         SvREFCNT_dec(c.sv);
7610         c.sv = sv;
7611     }
7612 
7613     /* Presume this is going to be a bareword of some sort. */
7614     CLINE;
7615     pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7616     pl_yylval.opval->op_private = OPpCONST_BARE;
7617 
7618     /* And if "Foo::", then that's what it certainly is. */
7619     if (safebw)
7620         return yyl_safe_bareword(aTHX_ s, lastchar);
7621 
7622     if (!c.off) {
7623         OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7624         const_op->op_private = OPpCONST_BARE;
7625         c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7626         c.cv = c.lex
7627             ? isGV(c.gv)
7628                 ? GvCV(c.gv)
7629                 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7630                     ? (CV *)SvRV(c.gv)
7631                     : ((CV *)c.gv)
7632             : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7633     }
7634 
7635     /* See if it's the indirect object for a list operator. */
7636 
7637     if (PL_oldoldbufptr
7638         && PL_oldoldbufptr < PL_bufptr
7639         && (PL_oldoldbufptr == PL_last_lop
7640             || PL_oldoldbufptr == PL_last_uni)
7641         && /* NO SKIPSPACE BEFORE HERE! */
7642            (PL_expect == XREF
7643             || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7644                                                    == OA_FILEREF))
7645     {
7646         bool immediate_paren = *s == '(';
7647         SSize_t s_off;
7648 
7649         /* (Now we can afford to cross potential line boundary.) */
7650         s = skipspace(s);
7651 
7652         /* intuit_method() can indirectly call lex_next_chunk(),
7653          * invalidating s
7654          */
7655         s_off = s - SvPVX(PL_linestr);
7656         /* Two barewords in a row may indicate method call. */
7657         if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7658                 || *s == '$')
7659             && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7660         {
7661             /* the code at method: doesn't use s */
7662             goto method;
7663         }
7664         s = SvPVX(PL_linestr) + s_off;
7665 
7666         /* If not a declared subroutine, it's an indirect object. */
7667         /* (But it's an indir obj regardless for sort.) */
7668         /* Also, if "_" follows a filetest operator, it's a bareword */
7669 
7670         if (
7671             ( !immediate_paren && (PL_last_lop_op == OP_SORT
7672              || (!c.cv
7673                  && (PL_last_lop_op != OP_MAPSTART
7674                      && PL_last_lop_op != OP_GREPSTART))))
7675            || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7676                 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7677                                                 == OA_FILESTATOP))
7678            )
7679         {
7680             PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7681             yyl_strictwarn_bareword(aTHX_ lastchar);
7682             op_free(c.rv2cv_op);
7683             return yyl_safe_bareword(aTHX_ s, lastchar);
7684         }
7685     }
7686 
7687     PL_expect = XOPERATOR;
7688     s = skipspace(s);
7689 
7690     /* Is this a word before a => operator? */
7691     if (*s == '=' && s[1] == '>' && !pkgname) {
7692         op_free(c.rv2cv_op);
7693         CLINE;
7694         if (c.gvp || (c.lex && !c.off)) {
7695             assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7696             /* This is our own scalar, created a few lines
7697                above, so this is safe. */
7698             SvREADONLY_off(c.sv);
7699             sv_setpv(c.sv, PL_tokenbuf);
7700             if (UTF && !IN_BYTES
7701              && is_utf8_string((U8*)PL_tokenbuf, len))
7702                   SvUTF8_on(c.sv);
7703             SvREADONLY_on(c.sv);
7704         }
7705         TERM(BAREWORD);
7706     }
7707 
7708     /* If followed by a paren, it's certainly a subroutine. */
7709     if (*s == '(') {
7710         CLINE;
7711         if (c.cv) {
7712             char *d = s + 1;
7713             while (SPACE_OR_TAB(*d))
7714                 d++;
7715             if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7716                 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7717         }
7718         NEXTVAL_NEXTTOKE.opval =
7719             c.off ? c.rv2cv_op : pl_yylval.opval;
7720         if (c.off)
7721              op_free(pl_yylval.opval), force_next(PRIVATEREF);
7722         else op_free(c.rv2cv_op),      force_next(BAREWORD);
7723         pl_yylval.ival = 0;
7724         TOKEN(PERLY_AMPERSAND);
7725     }
7726 
7727     /* If followed by var or block, call it a method (unless sub) */
7728 
7729     if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7730         op_free(c.rv2cv_op);
7731         PL_last_lop = PL_oldbufptr;
7732         PL_last_lop_op = OP_METHOD;
7733         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7734             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7735         PL_expect = XBLOCKTERM;
7736         PL_bufptr = s;
7737         return REPORT(METHCALL0);
7738     }
7739 
7740     /* If followed by a bareword, see if it looks like indir obj. */
7741 
7742     if (   key == 1
7743         && !orig_keyword
7744         && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7745         && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7746     {
7747       method:
7748         if (c.lex && !c.off) {
7749             assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7750             SvREADONLY_off(c.sv);
7751             sv_setpvn(c.sv, PL_tokenbuf, len);
7752             if (UTF && !IN_BYTES
7753              && is_utf8_string((U8*)PL_tokenbuf, len))
7754                 SvUTF8_on(c.sv);
7755             else SvUTF8_off(c.sv);
7756         }
7757         op_free(c.rv2cv_op);
7758         if (key == METHCALL0 && !PL_lex_allbrackets
7759             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7760         {
7761             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7762         }
7763         return REPORT(key);
7764     }
7765 
7766     /* Not a method, so call it a subroutine (if defined) */
7767 
7768     if (c.cv) {
7769         /* Check for a constant sub */
7770         c.sv = cv_const_sv_or_av(c.cv);
7771         return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7772     }
7773 
7774     /* Call it a bare word */
7775 
7776     if (PL_hints & HINT_STRICT_SUBS)
7777         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7778     else
7779         yyl_strictwarn_bareword(aTHX_ lastchar);
7780 
7781     op_free(c.rv2cv_op);
7782 
7783     return yyl_safe_bareword(aTHX_ s, lastchar);
7784 }
7785 
7786 static int
yyl_word_or_keyword(pTHX_ char * s,STRLEN len,I32 key,I32 orig_keyword,struct code c)7787 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7788 {
7789     switch (key) {
7790     default:			/* not a keyword */
7791         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7792 
7793     case KEY___FILE__:
7794         FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7795 
7796     case KEY___LINE__:
7797         FUN0OP(
7798             newSVOP(OP_CONST, 0,
7799                 Perl_newSVpvf(aTHX_ "%" LINE_Tf, CopLINE(PL_curcop)))
7800         );
7801 
7802     case KEY___PACKAGE__:
7803         FUN0OP(
7804             newSVOP(OP_CONST, 0, (PL_curstash
7805                                      ? newSVhek(HvNAME_HEK(PL_curstash))
7806                                      : &PL_sv_undef))
7807         );
7808 
7809     case KEY___DATA__:
7810     case KEY___END__:
7811         if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7812             yyl_data_handle(aTHX);
7813         return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7814 
7815     case KEY___SUB__:
7816         /* If !CvCLONE(PL_compcv) then rpeep will probably turn this into an
7817          * OP_CONST. We need to make it big enough to allow room for that if
7818          * so */
7819         FUN0OP(CvCLONE(PL_compcv)
7820                     ? newOP(OP_RUNCV, 0)
7821                     : newSVOP(OP_RUNCV, 0, &PL_sv_undef));
7822 
7823     case KEY_AUTOLOAD:
7824     case KEY_DESTROY:
7825     case KEY_BEGIN:
7826     case KEY_UNITCHECK:
7827     case KEY_CHECK:
7828     case KEY_INIT:
7829     case KEY_END:
7830         if (PL_expect == XSTATE)
7831             return yyl_sub(aTHX_ PL_bufptr, key);
7832         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7833 
7834     case KEY_ADJUST:
7835         Perl_ck_warner_d(aTHX_
7836             packWARN(WARN_EXPERIMENTAL__CLASS), "ADJUST is experimental");
7837 
7838         /* The way that KEY_CHECK et.al. are handled currently are nothing
7839          * short of crazy. We won't copy that model for new phasers, but use
7840          * this as an experiment to test if this will work
7841          */
7842         PHASERBLOCK(KEY_ADJUST);
7843 
7844     case KEY_abs:
7845         UNI(OP_ABS);
7846 
7847     case KEY_alarm:
7848         UNI(OP_ALARM);
7849 
7850     case KEY_accept:
7851         LOP(OP_ACCEPT,XTERM);
7852 
7853     case KEY_and:
7854         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7855             return REPORT(0);
7856         OPERATOR(ANDOP);
7857 
7858     case KEY_atan2:
7859         LOP(OP_ATAN2,XTERM);
7860 
7861     case KEY_bind:
7862         LOP(OP_BIND,XTERM);
7863 
7864     case KEY_binmode:
7865         LOP(OP_BINMODE,XTERM);
7866 
7867     case KEY_bless:
7868         LOP(OP_BLESS,XTERM);
7869 
7870     case KEY_break:
7871         FUN0(OP_BREAK);
7872 
7873     case KEY_catch:
7874         Perl_ck_warner_d(aTHX_
7875             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
7876         PREBLOCK(KW_CATCH);
7877 
7878     case KEY_chop:
7879         UNI(OP_CHOP);
7880 
7881     case KEY_class:
7882         Perl_ck_warner_d(aTHX_
7883             packWARN(WARN_EXPERIMENTAL__CLASS), "class is experimental");
7884 
7885         s = force_word(s,BAREWORD,FALSE,TRUE);
7886         s = skipspace(s);
7887         s = force_strict_version(s);
7888         PL_expect = XATTRBLOCK;
7889         TOKEN(KW_CLASS);
7890 
7891     case KEY_continue:
7892         /* We have to disambiguate the two senses of
7893           "continue". If the next token is a '{' then
7894           treat it as the start of a continue block;
7895           otherwise treat it as a control operator.
7896          */
7897         s = skipspace(s);
7898         if (*s == '{')
7899             PREBLOCK(KW_CONTINUE);
7900         else
7901             FUN0(OP_CONTINUE);
7902 
7903     case KEY_chdir:
7904         /* may use HOME */
7905         (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7906         UNI(OP_CHDIR);
7907 
7908     case KEY_close:
7909         UNI(OP_CLOSE);
7910 
7911     case KEY_closedir:
7912         UNI(OP_CLOSEDIR);
7913 
7914     case KEY_cmp:
7915         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7916             return REPORT(0);
7917         NCEop(OP_SCMP);
7918 
7919     case KEY_caller:
7920         UNI(OP_CALLER);
7921 
7922     case KEY_crypt:
7923 
7924         LOP(OP_CRYPT,XTERM);
7925 
7926     case KEY_chmod:
7927         LOP(OP_CHMOD,XTERM);
7928 
7929     case KEY_chown:
7930         LOP(OP_CHOWN,XTERM);
7931 
7932     case KEY_connect:
7933         LOP(OP_CONNECT,XTERM);
7934 
7935     case KEY_chr:
7936         UNI(OP_CHR);
7937 
7938     case KEY_cos:
7939         UNI(OP_COS);
7940 
7941     case KEY_chroot:
7942         UNI(OP_CHROOT);
7943 
7944     case KEY_default:
7945         PREBLOCK(KW_DEFAULT);
7946 
7947     case KEY_defer:
7948         Perl_ck_warner_d(aTHX_
7949             packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
7950         PREBLOCK(KW_DEFER);
7951 
7952     case KEY_do:
7953         return yyl_do(aTHX_ s, orig_keyword);
7954 
7955     case KEY_die:
7956         PL_hints |= HINT_BLOCK_SCOPE;
7957         LOP(OP_DIE,XTERM);
7958 
7959     case KEY_defined:
7960         UNI(OP_DEFINED);
7961 
7962     case KEY_delete:
7963         UNI(OP_DELETE);
7964 
7965     case KEY_dbmopen:
7966         Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7967                           STR_WITH_LEN("NDBM_File::"),
7968                           STR_WITH_LEN("DB_File::"),
7969                           STR_WITH_LEN("GDBM_File::"),
7970                           STR_WITH_LEN("SDBM_File::"),
7971                           STR_WITH_LEN("ODBM_File::"),
7972                           NULL);
7973         LOP(OP_DBMOPEN,XTERM);
7974 
7975     case KEY_dbmclose:
7976         UNI(OP_DBMCLOSE);
7977 
7978     case KEY_dump:
7979         LOOPX(OP_DUMP);
7980 
7981     case KEY_else:
7982         PREBLOCK(KW_ELSE);
7983 
7984     case KEY_elsif:
7985         pl_yylval.ival = CopLINE(PL_curcop);
7986         OPERATOR(KW_ELSIF);
7987 
7988     case KEY_eq:
7989         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7990             return REPORT(0);
7991         ChEop(OP_SEQ);
7992 
7993     case KEY_exists:
7994         UNI(OP_EXISTS);
7995 
7996     case KEY_exit:
7997         UNI(OP_EXIT);
7998 
7999     case KEY_eval:
8000         s = skipspace(s);
8001         if (*s == '{') { /* block eval */
8002             PL_expect = XTERMBLOCK;
8003             UNIBRACK(OP_ENTERTRY);
8004         }
8005         else { /* string eval */
8006             PL_expect = XTERM;
8007             UNIBRACK(OP_ENTEREVAL);
8008         }
8009 
8010     case KEY_evalbytes:
8011         PL_expect = XTERM;
8012         UNIBRACK(-OP_ENTEREVAL);
8013 
8014     case KEY_eof:
8015         UNI(OP_EOF);
8016 
8017     case KEY_exp:
8018         UNI(OP_EXP);
8019 
8020     case KEY_each:
8021         UNI(OP_EACH);
8022 
8023     case KEY_exec:
8024         LOP(OP_EXEC,XREF);
8025 
8026     case KEY_endhostent:
8027         FUN0(OP_EHOSTENT);
8028 
8029     case KEY_endnetent:
8030         FUN0(OP_ENETENT);
8031 
8032     case KEY_endservent:
8033         FUN0(OP_ESERVENT);
8034 
8035     case KEY_endprotoent:
8036         FUN0(OP_EPROTOENT);
8037 
8038     case KEY_endpwent:
8039         FUN0(OP_EPWENT);
8040 
8041     case KEY_endgrent:
8042         FUN0(OP_EGRENT);
8043 
8044     case KEY_field:
8045         /* TODO: maybe this should use the same parser/grammar structures as
8046          * `my`, but it's also rather messy because of the `our` conflation
8047          */
8048         Perl_ck_warner_d(aTHX_
8049             packWARN(WARN_EXPERIMENTAL__CLASS), "field is experimental");
8050 
8051         croak_kw_unless_class("field");
8052 
8053         PL_parser->in_my = KEY_field;
8054         OPERATOR(KW_FIELD);
8055 
8056     case KEY_finally:
8057         Perl_ck_warner_d(aTHX_
8058             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
8059         PREBLOCK(KW_FINALLY);
8060 
8061     case KEY_for:
8062     case KEY_foreach:
8063         return yyl_foreach(aTHX_ s);
8064 
8065     case KEY_formline:
8066         LOP(OP_FORMLINE,XTERM);
8067 
8068     case KEY_fork:
8069         FUN0(OP_FORK);
8070 
8071     case KEY_fc:
8072         UNI(OP_FC);
8073 
8074     case KEY_fcntl:
8075         LOP(OP_FCNTL,XTERM);
8076 
8077     case KEY_fileno:
8078         UNI(OP_FILENO);
8079 
8080     case KEY_flock:
8081         LOP(OP_FLOCK,XTERM);
8082 
8083     case KEY_gt:
8084         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8085             return REPORT(0);
8086         ChRop(OP_SGT);
8087 
8088     case KEY_ge:
8089         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8090             return REPORT(0);
8091         ChRop(OP_SGE);
8092 
8093     case KEY_grep:
8094         LOP(OP_GREPSTART, XREF);
8095 
8096     case KEY_goto:
8097         LOOPX(OP_GOTO);
8098 
8099     case KEY_gmtime:
8100         UNI(OP_GMTIME);
8101 
8102     case KEY_getc:
8103         UNIDOR(OP_GETC);
8104 
8105     case KEY_getppid:
8106         FUN0(OP_GETPPID);
8107 
8108     case KEY_getpgrp:
8109         UNI(OP_GETPGRP);
8110 
8111     case KEY_getpriority:
8112         LOP(OP_GETPRIORITY,XTERM);
8113 
8114     case KEY_getprotobyname:
8115         UNI(OP_GPBYNAME);
8116 
8117     case KEY_getprotobynumber:
8118         LOP(OP_GPBYNUMBER,XTERM);
8119 
8120     case KEY_getprotoent:
8121         FUN0(OP_GPROTOENT);
8122 
8123     case KEY_getpwent:
8124         FUN0(OP_GPWENT);
8125 
8126     case KEY_getpwnam:
8127         UNI(OP_GPWNAM);
8128 
8129     case KEY_getpwuid:
8130         UNI(OP_GPWUID);
8131 
8132     case KEY_getpeername:
8133         UNI(OP_GETPEERNAME);
8134 
8135     case KEY_gethostbyname:
8136         UNI(OP_GHBYNAME);
8137 
8138     case KEY_gethostbyaddr:
8139         LOP(OP_GHBYADDR,XTERM);
8140 
8141     case KEY_gethostent:
8142         FUN0(OP_GHOSTENT);
8143 
8144     case KEY_getnetbyname:
8145         UNI(OP_GNBYNAME);
8146 
8147     case KEY_getnetbyaddr:
8148         LOP(OP_GNBYADDR,XTERM);
8149 
8150     case KEY_getnetent:
8151         FUN0(OP_GNETENT);
8152 
8153     case KEY_getservbyname:
8154         LOP(OP_GSBYNAME,XTERM);
8155 
8156     case KEY_getservbyport:
8157         LOP(OP_GSBYPORT,XTERM);
8158 
8159     case KEY_getservent:
8160         FUN0(OP_GSERVENT);
8161 
8162     case KEY_getsockname:
8163         UNI(OP_GETSOCKNAME);
8164 
8165     case KEY_getsockopt:
8166         LOP(OP_GSOCKOPT,XTERM);
8167 
8168     case KEY_getgrent:
8169         FUN0(OP_GGRENT);
8170 
8171     case KEY_getgrnam:
8172         UNI(OP_GGRNAM);
8173 
8174     case KEY_getgrgid:
8175         UNI(OP_GGRGID);
8176 
8177     case KEY_getlogin:
8178         FUN0(OP_GETLOGIN);
8179 
8180     case KEY_given:
8181         pl_yylval.ival = CopLINE(PL_curcop);
8182         Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__SMARTMATCH),
8183                          "given is deprecated");
8184         OPERATOR(KW_GIVEN);
8185 
8186     case KEY_glob:
8187         LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
8188 
8189     case KEY_hex:
8190         UNI(OP_HEX);
8191 
8192     case KEY_if:
8193         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8194             return REPORT(0);
8195         pl_yylval.ival = CopLINE(PL_curcop);
8196         OPERATOR(KW_IF);
8197 
8198     case KEY_index:
8199         LOP(OP_INDEX,XTERM);
8200 
8201     case KEY_int:
8202         UNI(OP_INT);
8203 
8204     case KEY_ioctl:
8205         LOP(OP_IOCTL,XTERM);
8206 
8207     case KEY_isa:
8208         NCRop(OP_ISA);
8209 
8210     case KEY_join:
8211         LOP(OP_JOIN,XTERM);
8212 
8213     case KEY_keys:
8214         UNI(OP_KEYS);
8215 
8216     case KEY_kill:
8217         LOP(OP_KILL,XTERM);
8218 
8219     case KEY_last:
8220         LOOPX(OP_LAST);
8221 
8222     case KEY_lc:
8223         UNI(OP_LC);
8224 
8225     case KEY_lcfirst:
8226         UNI(OP_LCFIRST);
8227 
8228     case KEY_local:
8229         OPERATOR(KW_LOCAL);
8230 
8231     case KEY_length:
8232         UNI(OP_LENGTH);
8233 
8234     case KEY_lt:
8235         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8236             return REPORT(0);
8237         ChRop(OP_SLT);
8238 
8239     case KEY_le:
8240         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8241             return REPORT(0);
8242         ChRop(OP_SLE);
8243 
8244     case KEY_localtime:
8245         UNI(OP_LOCALTIME);
8246 
8247     case KEY_log:
8248         UNI(OP_LOG);
8249 
8250     case KEY_link:
8251         LOP(OP_LINK,XTERM);
8252 
8253     case KEY_listen:
8254         LOP(OP_LISTEN,XTERM);
8255 
8256     case KEY_lock:
8257         UNI(OP_LOCK);
8258 
8259     case KEY_lstat:
8260         UNI(OP_LSTAT);
8261 
8262     case KEY_m:
8263         s = scan_pat(s,OP_MATCH);
8264         TERM(sublex_start());
8265 
8266     case KEY_map:
8267         LOP(OP_MAPSTART, XREF);
8268 
8269     case KEY_mkdir:
8270         LOP(OP_MKDIR,XTERM);
8271 
8272     case KEY_msgctl:
8273         LOP(OP_MSGCTL,XTERM);
8274 
8275     case KEY_msgget:
8276         LOP(OP_MSGGET,XTERM);
8277 
8278     case KEY_msgrcv:
8279         LOP(OP_MSGRCV,XTERM);
8280 
8281     case KEY_msgsnd:
8282         LOP(OP_MSGSND,XTERM);
8283 
8284     case KEY_our:
8285     case KEY_my:
8286     case KEY_state:
8287         return yyl_my(aTHX_ s, key);
8288 
8289     case KEY_next:
8290         LOOPX(OP_NEXT);
8291 
8292     case KEY_ne:
8293         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8294             return REPORT(0);
8295         ChEop(OP_SNE);
8296 
8297     case KEY_no:
8298         s = tokenize_use(0, s);
8299         TOKEN(KW_USE_or_NO);
8300 
8301     case KEY_not:
8302         if (*s == '(' || (s = skipspace(s), *s == '('))
8303             FUN1(OP_NOT);
8304         else {
8305             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8306                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8307             OPERATOR(NOTOP);
8308         }
8309 
8310     case KEY_open:
8311         s = skipspace(s);
8312         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8313             const char *t;
8314             char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8315             for (t=d; isSPACE(*t);)
8316                 t++;
8317             if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8318                 /* [perl #16184] */
8319                 && !(t[0] == '=' && t[1] == '>')
8320                 && !(t[0] == ':' && t[1] == ':')
8321                 && !keyword(s, d-s, 0)
8322             ) {
8323                 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8324                    "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8325                     UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8326             }
8327         }
8328         LOP(OP_OPEN,XTERM);
8329 
8330     case KEY_or:
8331         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8332             return REPORT(0);
8333         pl_yylval.ival = OP_OR;
8334         OPERATOR(OROP);
8335 
8336     case KEY_ord:
8337         UNI(OP_ORD);
8338 
8339     case KEY_oct:
8340         UNI(OP_OCT);
8341 
8342     case KEY_opendir:
8343         LOP(OP_OPEN_DIR,XTERM);
8344 
8345     case KEY_print:
8346         checkcomma(s,PL_tokenbuf,"filehandle");
8347         LOP(OP_PRINT,XREF);
8348 
8349     case KEY_printf:
8350         checkcomma(s,PL_tokenbuf,"filehandle");
8351         LOP(OP_PRTF,XREF);
8352 
8353     case KEY_prototype:
8354         UNI(OP_PROTOTYPE);
8355 
8356     case KEY_push:
8357         LOP(OP_PUSH,XTERM);
8358 
8359     case KEY_pop:
8360         UNIDOR(OP_POP);
8361 
8362     case KEY_pos:
8363         UNIDOR(OP_POS);
8364 
8365     case KEY_pack:
8366         LOP(OP_PACK,XTERM);
8367 
8368     case KEY_package:
8369         s = force_word(s,BAREWORD,FALSE,TRUE);
8370         s = skipspace(s);
8371         s = force_strict_version(s);
8372         PREBLOCK(KW_PACKAGE);
8373 
8374     case KEY_pipe:
8375         LOP(OP_PIPE_OP,XTERM);
8376 
8377     case KEY_q:
8378         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8379         if (!s)
8380             missingterm(NULL, 0);
8381         COPLINE_SET_FROM_MULTI_END;
8382         pl_yylval.ival = OP_CONST;
8383         TERM(sublex_start());
8384 
8385     case KEY_quotemeta:
8386         UNI(OP_QUOTEMETA);
8387 
8388     case KEY_qw:
8389         return yyl_qw(aTHX_ s, len);
8390 
8391     case KEY_qq:
8392         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8393         if (!s)
8394             missingterm(NULL, 0);
8395         pl_yylval.ival = OP_STRINGIFY;
8396         if (SvIVX(PL_lex_stuff) == '\'')
8397             SvIV_set(PL_lex_stuff, 0);	/* qq'$foo' should interpolate */
8398         TERM(sublex_start());
8399 
8400     case KEY_qr:
8401         s = scan_pat(s,OP_QR);
8402         TERM(sublex_start());
8403 
8404     case KEY_qx:
8405         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8406         if (!s)
8407             missingterm(NULL, 0);
8408         pl_yylval.ival = OP_BACKTICK;
8409         TERM(sublex_start());
8410 
8411     case KEY_return:
8412         OLDLOP(OP_RETURN);
8413 
8414     case KEY_require:
8415         return yyl_require(aTHX_ s, orig_keyword);
8416 
8417     case KEY_reset:
8418         UNI(OP_RESET);
8419 
8420     case KEY_redo:
8421         LOOPX(OP_REDO);
8422 
8423     case KEY_rename:
8424         LOP(OP_RENAME,XTERM);
8425 
8426     case KEY_rand:
8427         UNI(OP_RAND);
8428 
8429     case KEY_rmdir:
8430         UNI(OP_RMDIR);
8431 
8432     case KEY_rindex:
8433         LOP(OP_RINDEX,XTERM);
8434 
8435     case KEY_read:
8436         LOP(OP_READ,XTERM);
8437 
8438     case KEY_readdir:
8439         UNI(OP_READDIR);
8440 
8441     case KEY_readline:
8442         UNIDOR(OP_READLINE);
8443 
8444     case KEY_readpipe:
8445         UNIDOR(OP_BACKTICK);
8446 
8447     case KEY_rewinddir:
8448         UNI(OP_REWINDDIR);
8449 
8450     case KEY_recv:
8451         LOP(OP_RECV,XTERM);
8452 
8453     case KEY_reverse:
8454         LOP(OP_REVERSE,XTERM);
8455 
8456     case KEY_readlink:
8457         UNIDOR(OP_READLINK);
8458 
8459     case KEY_ref:
8460         UNI(OP_REF);
8461 
8462     case KEY_s:
8463         s = scan_subst(s);
8464         if (pl_yylval.opval)
8465             TERM(sublex_start());
8466         else
8467             TOKEN(1);	/* force error */
8468 
8469     case KEY_say:
8470         checkcomma(s,PL_tokenbuf,"filehandle");
8471         LOP(OP_SAY,XREF);
8472 
8473     case KEY_chomp:
8474         UNI(OP_CHOMP);
8475 
8476     case KEY_scalar:
8477         UNI(OP_SCALAR);
8478 
8479     case KEY_select:
8480         LOP(OP_SELECT,XTERM);
8481 
8482     case KEY_seek:
8483         LOP(OP_SEEK,XTERM);
8484 
8485     case KEY_semctl:
8486         LOP(OP_SEMCTL,XTERM);
8487 
8488     case KEY_semget:
8489         LOP(OP_SEMGET,XTERM);
8490 
8491     case KEY_semop:
8492         LOP(OP_SEMOP,XTERM);
8493 
8494     case KEY_send:
8495         LOP(OP_SEND,XTERM);
8496 
8497     case KEY_setpgrp:
8498         LOP(OP_SETPGRP,XTERM);
8499 
8500     case KEY_setpriority:
8501         LOP(OP_SETPRIORITY,XTERM);
8502 
8503     case KEY_sethostent:
8504         UNI(OP_SHOSTENT);
8505 
8506     case KEY_setnetent:
8507         UNI(OP_SNETENT);
8508 
8509     case KEY_setservent:
8510         UNI(OP_SSERVENT);
8511 
8512     case KEY_setprotoent:
8513         UNI(OP_SPROTOENT);
8514 
8515     case KEY_setpwent:
8516         FUN0(OP_SPWENT);
8517 
8518     case KEY_setgrent:
8519         FUN0(OP_SGRENT);
8520 
8521     case KEY_seekdir:
8522         LOP(OP_SEEKDIR,XTERM);
8523 
8524     case KEY_setsockopt:
8525         LOP(OP_SSOCKOPT,XTERM);
8526 
8527     case KEY_shift:
8528         UNIDOR(OP_SHIFT);
8529 
8530     case KEY_shmctl:
8531         LOP(OP_SHMCTL,XTERM);
8532 
8533     case KEY_shmget:
8534         LOP(OP_SHMGET,XTERM);
8535 
8536     case KEY_shmread:
8537         LOP(OP_SHMREAD,XTERM);
8538 
8539     case KEY_shmwrite:
8540         LOP(OP_SHMWRITE,XTERM);
8541 
8542     case KEY_shutdown:
8543         LOP(OP_SHUTDOWN,XTERM);
8544 
8545     case KEY_sin:
8546         UNI(OP_SIN);
8547 
8548     case KEY_sleep:
8549         UNI(OP_SLEEP);
8550 
8551     case KEY_socket:
8552         LOP(OP_SOCKET,XTERM);
8553 
8554     case KEY_socketpair:
8555         LOP(OP_SOCKPAIR,XTERM);
8556 
8557     case KEY_sort:
8558         checkcomma(s,PL_tokenbuf,"subroutine name");
8559         s = skipspace(s);
8560         PL_expect = XTERM;
8561         s = force_word(s,BAREWORD,TRUE,TRUE);
8562         LOP(OP_SORT,XREF);
8563 
8564     case KEY_split:
8565         LOP(OP_SPLIT,XTERM);
8566 
8567     case KEY_sprintf:
8568         LOP(OP_SPRINTF,XTERM);
8569 
8570     case KEY_splice:
8571         LOP(OP_SPLICE,XTERM);
8572 
8573     case KEY_sqrt:
8574         UNI(OP_SQRT);
8575 
8576     case KEY_srand:
8577         UNI(OP_SRAND);
8578 
8579     case KEY_stat:
8580         UNI(OP_STAT);
8581 
8582     case KEY_study:
8583         UNI(OP_STUDY);
8584 
8585     case KEY_substr:
8586         LOP(OP_SUBSTR,XTERM);
8587 
8588     case KEY_method:
8589         /* For now we just treat 'method' identical to 'sub' plus a warning */
8590         Perl_ck_warner_d(aTHX_
8591             packWARN(WARN_EXPERIMENTAL__CLASS), "method is experimental");
8592         return yyl_sub(aTHX_ s, KEY_method);
8593 
8594     case KEY_format:
8595     case KEY_sub:
8596         return yyl_sub(aTHX_ s, key);
8597 
8598     case KEY_system:
8599         LOP(OP_SYSTEM,XREF);
8600 
8601     case KEY_symlink:
8602         LOP(OP_SYMLINK,XTERM);
8603 
8604     case KEY_syscall:
8605         LOP(OP_SYSCALL,XTERM);
8606 
8607     case KEY_sysopen:
8608         LOP(OP_SYSOPEN,XTERM);
8609 
8610     case KEY_sysseek:
8611         LOP(OP_SYSSEEK,XTERM);
8612 
8613     case KEY_sysread:
8614         LOP(OP_SYSREAD,XTERM);
8615 
8616     case KEY_syswrite:
8617         LOP(OP_SYSWRITE,XTERM);
8618 
8619     case KEY_tr:
8620     case KEY_y:
8621         s = scan_trans(s);
8622         TERM(sublex_start());
8623 
8624     case KEY_tell:
8625         UNI(OP_TELL);
8626 
8627     case KEY_telldir:
8628         UNI(OP_TELLDIR);
8629 
8630     case KEY_tie:
8631         LOP(OP_TIE,XTERM);
8632 
8633     case KEY_tied:
8634         UNI(OP_TIED);
8635 
8636     case KEY_time:
8637         FUN0(OP_TIME);
8638 
8639     case KEY_times:
8640         FUN0(OP_TMS);
8641 
8642     case KEY_truncate:
8643         LOP(OP_TRUNCATE,XTERM);
8644 
8645     case KEY_try:
8646         pl_yylval.ival = CopLINE(PL_curcop);
8647         Perl_ck_warner_d(aTHX_
8648             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
8649         PREBLOCK(KW_TRY);
8650 
8651     case KEY_uc:
8652         UNI(OP_UC);
8653 
8654     case KEY_ucfirst:
8655         UNI(OP_UCFIRST);
8656 
8657     case KEY_untie:
8658         UNI(OP_UNTIE);
8659 
8660     case KEY_until:
8661         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8662             return REPORT(0);
8663         pl_yylval.ival = CopLINE(PL_curcop);
8664         OPERATOR(KW_UNTIL);
8665 
8666     case KEY_unless:
8667         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8668             return REPORT(0);
8669         pl_yylval.ival = CopLINE(PL_curcop);
8670         OPERATOR(KW_UNLESS);
8671 
8672     case KEY_unlink:
8673         LOP(OP_UNLINK,XTERM);
8674 
8675     case KEY_undef:
8676         UNIDOR(OP_UNDEF);
8677 
8678     case KEY_unpack:
8679         LOP(OP_UNPACK,XTERM);
8680 
8681     case KEY_utime:
8682         LOP(OP_UTIME,XTERM);
8683 
8684     case KEY_umask:
8685         UNIDOR(OP_UMASK);
8686 
8687     case KEY_unshift:
8688         LOP(OP_UNSHIFT,XTERM);
8689 
8690     case KEY_use:
8691         s = tokenize_use(1, s);
8692         TOKEN(KW_USE_or_NO);
8693 
8694     case KEY_values:
8695         UNI(OP_VALUES);
8696 
8697     case KEY_vec:
8698         LOP(OP_VEC,XTERM);
8699 
8700     case KEY_when:
8701         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8702             return REPORT(0);
8703         pl_yylval.ival = CopLINE(PL_curcop);
8704         Perl_ck_warner_d(aTHX_
8705             packWARN(WARN_DEPRECATED__SMARTMATCH),
8706             "when is deprecated");
8707         OPERATOR(KW_WHEN);
8708 
8709     case KEY_while:
8710         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8711             return REPORT(0);
8712         pl_yylval.ival = CopLINE(PL_curcop);
8713         OPERATOR(KW_WHILE);
8714 
8715     case KEY_warn:
8716         PL_hints |= HINT_BLOCK_SCOPE;
8717         LOP(OP_WARN,XTERM);
8718 
8719     case KEY_wait:
8720         FUN0(OP_WAIT);
8721 
8722     case KEY_waitpid:
8723         LOP(OP_WAITPID,XTERM);
8724 
8725     case KEY_wantarray:
8726         FUN0(OP_WANTARRAY);
8727 
8728     case KEY_write:
8729         /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8730          * we use the same number on EBCDIC */
8731         gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8732         UNI(OP_ENTERWRITE);
8733 
8734     case KEY_x:
8735         if (PL_expect == XOPERATOR) {
8736             if (*s == '=' && !PL_lex_allbrackets
8737                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8738             {
8739                 return REPORT(0);
8740             }
8741             Mop(OP_REPEAT);
8742         }
8743         check_uni();
8744         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8745 
8746     case KEY_xor:
8747         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8748             return REPORT(0);
8749         pl_yylval.ival = OP_XOR;
8750         OPERATOR(OROP);
8751     }
8752 }
8753 
8754 static int
yyl_key_core(pTHX_ char * s,STRLEN len,struct code c)8755 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8756 {
8757     I32 key = 0;
8758     I32 orig_keyword = 0;
8759     STRLEN olen = len;
8760     char *d = s;
8761     s += 2;
8762     s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8763     if ((*s == ':' && s[1] == ':')
8764         || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8765     {
8766         Copy(PL_bufptr, PL_tokenbuf, olen, char);
8767         return yyl_just_a_word(aTHX_ d, olen, 0, c);
8768     }
8769     if (!key)
8770         Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8771                           UTF8fARG(UTF, len, PL_tokenbuf));
8772     if (key < 0)
8773         key = -key;
8774     else if (key == KEY_require || key == KEY_do
8775           || key == KEY_glob)
8776         /* that's a way to remember we saw "CORE::" */
8777         orig_keyword = key;
8778 
8779     /* Known to be a reserved word at this point */
8780     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8781 }
8782 
8783 struct Perl_custom_infix_result {
8784     struct Perl_custom_infix *def;
8785     SV                       *parsedata;
8786 };
8787 
tokentype_for_plugop(struct Perl_custom_infix * def)8788 static enum yytokentype tokentype_for_plugop(struct Perl_custom_infix *def)
8789 {
8790     enum Perl_custom_infix_precedence prec = def->prec;
8791     if(prec <= INFIX_PREC_LOW)
8792         return PLUGIN_LOW_OP;
8793     if(prec <= INFIX_PREC_LOGICAL_OR_LOW)
8794         return PLUGIN_LOGICAL_OR_LOW_OP;
8795     if(prec <= INFIX_PREC_LOGICAL_AND_LOW)
8796         return PLUGIN_LOGICAL_AND_LOW_OP;
8797     if(prec <= INFIX_PREC_ASSIGN)
8798         return PLUGIN_ASSIGN_OP;
8799     if(prec <= INFIX_PREC_LOGICAL_OR)
8800         return PLUGIN_LOGICAL_OR_OP;
8801     if(prec <= INFIX_PREC_LOGICAL_AND)
8802         return PLUGIN_LOGICAL_AND_OP;
8803     if(prec <= INFIX_PREC_REL)
8804         return PLUGIN_REL_OP;
8805     if(prec <= INFIX_PREC_ADD)
8806         return PLUGIN_ADD_OP;
8807     if(prec <= INFIX_PREC_MUL)
8808         return PLUGIN_MUL_OP;
8809     if(prec <= INFIX_PREC_POW)
8810         return PLUGIN_POW_OP;
8811     return PLUGIN_HIGH_OP;
8812 }
8813 
8814 OP *
Perl_build_infix_plugin(pTHX_ OP * lhs,OP * rhs,void * tokendata)8815 Perl_build_infix_plugin(pTHX_ OP *lhs, OP *rhs, void *tokendata)
8816 {
8817     PERL_ARGS_ASSERT_BUILD_INFIX_PLUGIN;
8818 
8819     struct Perl_custom_infix_result *result = (struct Perl_custom_infix_result *)tokendata;
8820     SAVEFREEPV(result);
8821     if(result->parsedata)
8822         SAVEFREESV(result->parsedata);
8823 
8824     return (*result->def->build_op)(aTHX_
8825         &result->parsedata, lhs, rhs, result->def);
8826 }
8827 
8828 static int
yyl_keylookup(pTHX_ char * s,GV * gv)8829 yyl_keylookup(pTHX_ char *s, GV *gv)
8830 {
8831     STRLEN len;
8832     bool anydelim;
8833     I32 key;
8834     struct code c = no_code;
8835     I32 orig_keyword = 0;
8836     char *d;
8837 
8838     c.gv = gv;
8839 
8840     PL_bufptr = s;
8841     s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8842 
8843     /* Some keywords can be followed by any delimiter, including ':' */
8844     anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8845 
8846     /* x::* is just a word, unless x is "CORE" */
8847     if (!anydelim && *s == ':' && s[1] == ':') {
8848         if (memEQs(PL_tokenbuf, len, "CORE"))
8849             return yyl_key_core(aTHX_ s, len, c);
8850         return yyl_just_a_word(aTHX_ s, len, 0, c);
8851     }
8852 
8853     d = s;
8854     while (d < PL_bufend && isSPACE(*d))
8855             d++;	/* no comments skipped here, or s### is misparsed */
8856 
8857     /* Is this a word before a => operator? */
8858     if (*d == '=' && d[1] == '>') {
8859         return yyl_fatcomma(aTHX_ s, len);
8860     }
8861 
8862     /* Check for plugged-in keyword */
8863     {
8864         OP *o;
8865         int result;
8866         char *saved_bufptr = PL_bufptr;
8867         PL_bufptr = s;
8868         result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8869         s = PL_bufptr;
8870         if (result == KEYWORD_PLUGIN_DECLINE) {
8871             /* not a plugged-in keyword */
8872             PL_bufptr = saved_bufptr;
8873         } else if (result == KEYWORD_PLUGIN_STMT) {
8874             pl_yylval.opval = o;
8875             CLINE;
8876             if (!PL_nexttoke) PL_expect = XSTATE;
8877             return REPORT(PLUGSTMT);
8878         } else if (result == KEYWORD_PLUGIN_EXPR) {
8879             pl_yylval.opval = o;
8880             CLINE;
8881             if (!PL_nexttoke) PL_expect = XOPERATOR;
8882             return REPORT(PLUGEXPR);
8883         } else {
8884             Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8885         }
8886     }
8887 
8888     /* Check for plugged-in named operator */
8889     if(PLUGINFIX_IS_ENABLED) {
8890         struct Perl_custom_infix *def;
8891         STRLEN result;
8892         result = PL_infix_plugin(aTHX_ PL_tokenbuf, len, &def);
8893         if(result) {
8894             if(result != len)
8895                 Perl_croak(aTHX_ "Bad infix plugin result (%zd) - did not consume entire identifier <%s>\n",
8896                     result, PL_tokenbuf);
8897             PL_bufptr = s = d;
8898             struct Perl_custom_infix_result *result;
8899             Newx(result, 1, struct Perl_custom_infix_result);
8900             result->def = def;
8901             result->parsedata = NULL;
8902             if(def->parse) {
8903                 (*def->parse)(aTHX_ &result->parsedata, def);
8904                 s = PL_bufptr; /* restore local s variable */
8905             }
8906             pl_yylval.pval = result;
8907             CLINE;
8908             OPERATOR(tokentype_for_plugop(def));
8909         }
8910     }
8911 
8912     /* Is this a label? */
8913     if (!anydelim && PL_expect == XSTATE
8914           && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8915         s = d + 1;
8916         pl_yylval.opval =
8917             newSVOP(OP_CONST, 0,
8918                 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8919         CLINE;
8920         TOKEN(LABEL);
8921     }
8922 
8923     /* Check for lexical sub */
8924     if (PL_expect != XOPERATOR) {
8925         char tmpbuf[sizeof PL_tokenbuf + 1];
8926         *tmpbuf = '&';
8927         Copy(PL_tokenbuf, tmpbuf+1, len, char);
8928         c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8929         if (c.off != NOT_IN_PAD) {
8930             assert(c.off); /* we assume this is boolean-true below */
8931             if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8932                 HV *  const stash = PAD_COMPNAME_OURSTASH(c.off);
8933                 HEK * const stashname = HvNAME_HEK(stash);
8934                 c.sv = newSVhek(stashname);
8935                 sv_catpvs(c.sv, "::");
8936                 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8937                                 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8938                 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8939                                   SVt_PVCV);
8940                 c.off = 0;
8941                 if (!c.gv) {
8942                     ASSUME(c.sv && SvREFCNT(c.sv) == 1);
8943                     SvREFCNT_dec(c.sv);
8944                     c.sv = NULL;
8945                     return yyl_just_a_word(aTHX_ s, len, 0, c);
8946                 }
8947             }
8948             else {
8949                 c.rv2cv_op = newOP(OP_PADANY, 0);
8950                 c.rv2cv_op->op_targ = c.off;
8951                 c.cv = find_lexical_cv(c.off);
8952             }
8953             c.lex = TRUE;
8954             return yyl_just_a_word(aTHX_ s, len, 0, c);
8955         }
8956         c.off = 0;
8957     }
8958 
8959     /* Check for built-in keyword */
8960     key = keyword(PL_tokenbuf, len, 0);
8961 
8962     if (key < 0)
8963         key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8964 
8965     if (key && key != KEY___DATA__ && key != KEY___END__
8966      && (!anydelim || *s != '#')) {
8967         /* no override, and not s### either; skipspace is safe here
8968          * check for => on following line */
8969         bool arrow;
8970         STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8971         STRLEN   soff = s         - SvPVX(PL_linestr);
8972         s = peekspace(s);
8973         arrow = *s == '=' && s[1] == '>';
8974         PL_bufptr = SvPVX(PL_linestr) + bufoff;
8975         s         = SvPVX(PL_linestr) +   soff;
8976         if (arrow)
8977             return yyl_fatcomma(aTHX_ s, len);
8978     }
8979 
8980     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8981 }
8982 
8983 static int
yyl_try(pTHX_ char * s)8984 yyl_try(pTHX_ char *s)
8985 {
8986     char *d;
8987     GV *gv = NULL;
8988     int tok;
8989 
8990   retry:
8991     /* Check for plugged-in symbolic operator */
8992     if(PLUGINFIX_IS_ENABLED && isPLUGINFIX_FIRST(*s)) {
8993         struct Perl_custom_infix *def;
8994         char *s_end = s, *d = PL_tokenbuf;
8995         STRLEN len;
8996 
8997         /* Copy the longest sequence of isPLUGINFIX() chars into PL_tokenbuf */
8998         while(s_end < PL_bufend && d < PL_tokenbuf+sizeof(PL_tokenbuf)-1 && isPLUGINFIX(*s_end))
8999             *d++ = *s_end++;
9000         *d = '\0';
9001 
9002         if((len = (*PL_infix_plugin)(aTHX_ PL_tokenbuf, s_end - s, &def))) {
9003             s += len;
9004             struct Perl_custom_infix_result *result;
9005             Newx(result, 1, struct Perl_custom_infix_result);
9006             result->def = def;
9007             result->parsedata = NULL;
9008             if(def->parse) {
9009                 PL_bufptr = s;
9010                 (*def->parse)(aTHX_ &result->parsedata, def);
9011                 s = PL_bufptr; /* restore local s variable */
9012             }
9013             pl_yylval.pval = result;
9014             CLINE;
9015             OPERATOR(tokentype_for_plugop(def));
9016         }
9017     }
9018 
9019     switch (*s) {
9020     default:
9021         if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
9022             if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9023                 return tok;
9024             goto retry_bufptr;
9025         }
9026         yyl_croak_unrecognised(aTHX_ s);
9027 
9028     case 4:
9029     case 26:
9030         /* emulate EOF on ^D or ^Z */
9031         if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
9032             return tok;
9033     retry_bufptr:
9034         s = PL_bufptr;
9035         goto retry;
9036 
9037     case 0:
9038         if ((!PL_rsfp || PL_lex_inwhat)
9039          && (!PL_parser->filtered || s+1 < PL_bufend)) {
9040             PL_last_uni = 0;
9041             PL_last_lop = 0;
9042             if (PL_lex_brackets
9043                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
9044             {
9045                 yyerror((const char *)
9046                         (PL_lex_formbrack
9047                          ? "Format not terminated"
9048                          : "Missing right curly or square bracket"));
9049             }
9050             DEBUG_T({
9051                 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
9052             });
9053             TOKEN(0);
9054         }
9055         if (s++ < PL_bufend)
9056             goto retry;  /* ignore stray nulls */
9057         PL_last_uni = 0;
9058         PL_last_lop = 0;
9059         if (!PL_in_eval && !PL_preambled) {
9060             PL_preambled = TRUE;
9061             if (PL_perldb) {
9062                 /* Generate a string of Perl code to load the debugger.
9063                  * If PERL5DB is set, it will return the contents of that,
9064                  * otherwise a compile-time require of perl5db.pl.  */
9065 
9066                 const char * const pdb = PerlEnv_getenv("PERL5DB");
9067 
9068                 if (pdb) {
9069                     sv_setpv(PL_linestr, pdb);
9070                     sv_catpvs(PL_linestr,";");
9071                 } else {
9072                     SETERRNO(0,SS_NORMAL);
9073                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
9074                 }
9075                 PL_parser->preambling = CopLINE(PL_curcop);
9076             } else
9077                 SvPVCLEAR(PL_linestr);
9078             if (PL_preambleav) {
9079                 SV **svp = AvARRAY(PL_preambleav);
9080                 SV **const end = svp + AvFILLp(PL_preambleav);
9081                 while(svp <= end) {
9082                     sv_catsv(PL_linestr, *svp);
9083                     ++svp;
9084                     sv_catpvs(PL_linestr, ";");
9085                 }
9086                 SvREFCNT_dec(MUTABLE_SV(PL_preambleav));
9087                 PL_preambleav = NULL;
9088             }
9089             if (PL_minus_E)
9090                 sv_catpvs(PL_linestr,
9091                           "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
9092             if (PL_minus_n || PL_minus_p) {
9093                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
9094                 if (PL_minus_l)
9095                     sv_catpvs(PL_linestr,"chomp;");
9096                 if (PL_minus_a) {
9097                     if (PL_minus_F) {
9098                         if (   (   *PL_splitstr == '/'
9099                                 || *PL_splitstr == '\''
9100                                 || *PL_splitstr == '"')
9101                             && strchr(PL_splitstr + 1, *PL_splitstr))
9102                         {
9103                             /* strchr is ok, because -F pattern can't contain
9104                              * embedded NULs */
9105                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
9106                         }
9107                         else {
9108                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
9109                                bytes can be used as quoting characters.  :-) */
9110                             const char *splits = PL_splitstr;
9111                             sv_catpvs(PL_linestr, "our @F=split(q\0");
9112                             do {
9113                                 /* Need to \ \s  */
9114                                 if (*splits == '\\')
9115                                     sv_catpvn(PL_linestr, splits, 1);
9116                                 sv_catpvn(PL_linestr, splits, 1);
9117                             } while (*splits++);
9118                             /* This loop will embed the trailing NUL of
9119                                PL_linestr as the last thing it does before
9120                                terminating.  */
9121                             sv_catpvs(PL_linestr, ");");
9122                         }
9123                     }
9124                     else
9125                         sv_catpvs(PL_linestr,"our @F=split(' ');");
9126                 }
9127             }
9128             sv_catpvs(PL_linestr, "\n");
9129             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
9130             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9131             PL_last_lop = PL_last_uni = NULL;
9132             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
9133                 update_debugger_info(PL_linestr, NULL, 0);
9134             goto retry;
9135         }
9136         if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
9137             return tok;
9138         goto retry_bufptr;
9139 
9140     case '\r':
9141 #ifdef PERL_STRICT_CR
9142         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
9143         Perl_croak(aTHX_
9144       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
9145 #endif
9146     case ' ': case '\t': case '\f': case '\v':
9147         s++;
9148         goto retry;
9149 
9150     case '#':
9151     case '\n': {
9152         const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
9153         if (needs_semicolon)
9154             TOKEN(PERLY_SEMICOLON);
9155         else
9156             goto retry;
9157     }
9158 
9159     case '-':
9160         return yyl_hyphen(aTHX_ s);
9161 
9162     case '+':
9163         return yyl_plus(aTHX_ s);
9164 
9165     case '*':
9166         return yyl_star(aTHX_ s);
9167 
9168     case '%':
9169         return yyl_percent(aTHX_ s);
9170 
9171     case '^':
9172         return yyl_caret(aTHX_ s);
9173 
9174     case '[':
9175         return yyl_leftsquare(aTHX_ s);
9176 
9177     case '~':
9178         return yyl_tilde(aTHX_ s);
9179 
9180     case ',':
9181         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9182             TOKEN(0);
9183         s++;
9184         OPERATOR(PERLY_COMMA);
9185     case ':':
9186         if (s[1] == ':')
9187             return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
9188         return yyl_colon(aTHX_ s + 1);
9189 
9190     case '(':
9191         return yyl_leftparen(aTHX_ s + 1);
9192 
9193     case ';':
9194         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
9195             TOKEN(0);
9196         CLINE;
9197         s++;
9198         PL_expect = XSTATE;
9199         TOKEN(PERLY_SEMICOLON);
9200 
9201     case ')':
9202         return yyl_rightparen(aTHX_ s);
9203 
9204     case ']':
9205         return yyl_rightsquare(aTHX_ s);
9206 
9207     case '{':
9208         return yyl_leftcurly(aTHX_ s + 1, 0);
9209 
9210     case '}':
9211         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
9212             TOKEN(0);
9213         return yyl_rightcurly(aTHX_ s, 0);
9214 
9215     case '&':
9216         return yyl_ampersand(aTHX_ s);
9217 
9218     case '|':
9219         return yyl_verticalbar(aTHX_ s);
9220 
9221     case '=':
9222         if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
9223             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
9224         {
9225             s = vcs_conflict_marker(s + 7);
9226             goto retry;
9227         }
9228 
9229         s++;
9230         {
9231             const char tmp = *s++;
9232             if (tmp == '=') {
9233                 if (!PL_lex_allbrackets
9234                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
9235                 {
9236                     s -= 2;
9237                     TOKEN(0);
9238                 }
9239                 ChEop(OP_EQ);
9240             }
9241             if (tmp == '>') {
9242                 if (!PL_lex_allbrackets
9243                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9244                 {
9245                     s -= 2;
9246                     TOKEN(0);
9247                 }
9248                 OPERATOR(PERLY_COMMA);
9249             }
9250             if (tmp == '~')
9251                 PMop(OP_MATCH);
9252             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
9253                 && memCHRs("+-*/%.^&|<",tmp))
9254                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9255                             "Reversed %c= operator",(int)tmp);
9256             s--;
9257             if (PL_expect == XSTATE
9258                 && isALPHA(tmp)
9259                 && (s == PL_linestart+1 || s[-2] == '\n') )
9260             {
9261                 if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
9262                     || PL_lex_state != LEX_NORMAL)
9263                 {
9264                     d = PL_bufend;
9265                     while (s < d) {
9266                         if (*s++ == '\n') {
9267                             incline(s, PL_bufend);
9268                             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
9269                             {
9270                                 s = (char *) memchr(s,'\n', d - s);
9271                                 if (s)
9272                                     s++;
9273                                 else
9274                                     s = d;
9275                                 incline(s, PL_bufend);
9276                                 goto retry;
9277                             }
9278                         }
9279                     }
9280                     goto retry;
9281                 }
9282                 s = PL_bufend;
9283                 PL_parser->in_pod = 1;
9284                 goto retry;
9285             }
9286         }
9287         if (PL_expect == XBLOCK) {
9288             const char *t = s;
9289 #ifdef PERL_STRICT_CR
9290             while (SPACE_OR_TAB(*t))
9291 #else
9292             while (SPACE_OR_TAB(*t) || *t == '\r')
9293 #endif
9294                 t++;
9295             if (*t == '\n' || *t == '#') {
9296                 ENTER_with_name("lex_format");
9297                 SAVEI8(PL_parser->form_lex_state);
9298                 SAVEI32(PL_lex_formbrack);
9299                 PL_parser->form_lex_state = PL_lex_state;
9300                 PL_lex_formbrack = PL_lex_brackets + 1;
9301                 PL_parser->sub_error_count = PL_error_count;
9302                 return yyl_leftcurly(aTHX_ s, 1);
9303             }
9304         }
9305         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
9306             s--;
9307             TOKEN(0);
9308         }
9309         pl_yylval.ival = 0;
9310         OPERATOR(ASSIGNOP);
9311 
9312         case '!':
9313         return yyl_bang(aTHX_ s + 1);
9314 
9315     case '<':
9316         if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
9317             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
9318         {
9319             s = vcs_conflict_marker(s + 7);
9320             goto retry;
9321         }
9322         return yyl_leftpointy(aTHX_ s);
9323 
9324     case '>':
9325         if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
9326             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
9327         {
9328             s = vcs_conflict_marker(s + 7);
9329             goto retry;
9330         }
9331         return yyl_rightpointy(aTHX_ s + 1);
9332 
9333     case '$':
9334         return yyl_dollar(aTHX_ s);
9335 
9336     case '@':
9337         return yyl_snail(aTHX_ s);
9338 
9339     case '/':			/* may be division, defined-or, or pattern */
9340         return yyl_slash(aTHX_ s);
9341 
9342      case '?':			/* conditional */
9343         s++;
9344         if (!PL_lex_allbrackets
9345             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
9346         {
9347             s--;
9348             TOKEN(0);
9349         }
9350         PL_lex_allbrackets++;
9351         OPERATOR(PERLY_QUESTION_MARK);
9352 
9353     case '.':
9354         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
9355 #ifdef PERL_STRICT_CR
9356             && s[1] == '\n'
9357 #else
9358             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
9359 #endif
9360             && (s == PL_linestart || s[-1] == '\n') )
9361         {
9362             PL_expect = XSTATE;
9363             /* formbrack==2 means dot seen where arguments expected */
9364             return yyl_rightcurly(aTHX_ s, 2);
9365         }
9366         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9367             s += 3;
9368             OPERATOR(YADAYADA);
9369         }
9370         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9371             char tmp = *s++;
9372             if (*s == tmp) {
9373                 if (!PL_lex_allbrackets
9374                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9375                 {
9376                     s--;
9377                     TOKEN(0);
9378                 }
9379                 s++;
9380                 if (*s == tmp) {
9381                     s++;
9382                     pl_yylval.ival = OPf_SPECIAL;
9383                 }
9384                 else
9385                     pl_yylval.ival = 0;
9386                 OPERATOR(DOTDOT);
9387             }
9388             if (*s == '=' && !PL_lex_allbrackets
9389                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9390             {
9391                 s--;
9392                 TOKEN(0);
9393             }
9394             Aop(OP_CONCAT);
9395         }
9396         /* FALLTHROUGH */
9397     case '0': case '1': case '2': case '3': case '4':
9398     case '5': case '6': case '7': case '8': case '9':
9399         s = scan_num(s, &pl_yylval);
9400         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9401         if (PL_expect == XOPERATOR)
9402             no_op("Number",s);
9403         TERM(THING);
9404 
9405     case '\'':
9406         return yyl_sglquote(aTHX_ s);
9407 
9408     case '"':
9409         return yyl_dblquote(aTHX_ s);
9410 
9411     case '`':
9412         return yyl_backtick(aTHX_ s);
9413 
9414     case '\\':
9415         return yyl_backslash(aTHX_ s + 1);
9416 
9417     case 'v':
9418         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9419             char *start = s + 2;
9420             while (isDIGIT(*start) || *start == '_')
9421                 start++;
9422             if (*start == '.' && isDIGIT(start[1])) {
9423                 s = scan_num(s, &pl_yylval);
9424                 TERM(THING);
9425             }
9426             else if ((*start == ':' && start[1] == ':')
9427                      || (PL_expect == XSTATE && *start == ':')) {
9428                 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9429                     return tok;
9430                 goto retry_bufptr;
9431             }
9432             else if (PL_expect == XSTATE) {
9433                 d = start;
9434                 while (d < PL_bufend && isSPACE(*d)) d++;
9435                 if (*d == ':') {
9436                     if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9437                         return tok;
9438                     goto retry_bufptr;
9439                 }
9440             }
9441             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9442             if (!isALPHA(*start) && (PL_expect == XTERM
9443                         || PL_expect == XREF || PL_expect == XSTATE
9444                         || PL_expect == XTERMORDORDOR)) {
9445                 GV *const gv = gv_fetchpvn_flags(s, start - s,
9446                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
9447                 if (!gv) {
9448                     s = scan_num(s, &pl_yylval);
9449                     TERM(THING);
9450                 }
9451             }
9452         }
9453         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9454             return tok;
9455         goto retry_bufptr;
9456 
9457     case 'x':
9458         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9459             s++;
9460             Mop(OP_REPEAT);
9461         }
9462         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9463             return tok;
9464         goto retry_bufptr;
9465 
9466     case '_':
9467     case 'a': case 'A':
9468     case 'b': case 'B':
9469     case 'c': case 'C':
9470     case 'd': case 'D':
9471     case 'e': case 'E':
9472     case 'f': case 'F':
9473     case 'g': case 'G':
9474     case 'h': case 'H':
9475     case 'i': case 'I':
9476     case 'j': case 'J':
9477     case 'k': case 'K':
9478     case 'l': case 'L':
9479     case 'm': case 'M':
9480     case 'n': case 'N':
9481     case 'o': case 'O':
9482     case 'p': case 'P':
9483     case 'q': case 'Q':
9484     case 'r': case 'R':
9485     case 's': case 'S':
9486     case 't': case 'T':
9487     case 'u': case 'U':
9488               case 'V':
9489     case 'w': case 'W':
9490               case 'X':
9491     case 'y': case 'Y':
9492     case 'z': case 'Z':
9493         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9494             return tok;
9495         goto retry_bufptr;
9496     }
9497 }
9498 
9499 
9500 /*
9501   yylex
9502 
9503   Works out what to call the token just pulled out of the input
9504   stream.  The yacc parser takes care of taking the ops we return and
9505   stitching them into a tree.
9506 
9507   Returns:
9508     The type of the next token
9509 
9510   Structure:
9511       Check if we have already built the token; if so, use it.
9512       Switch based on the current state:
9513           - if we have a case modifier in a string, deal with that
9514           - handle other cases of interpolation inside a string
9515           - scan the next line if we are inside a format
9516       In the normal state, switch on the next character:
9517           - default:
9518             if alphabetic, go to key lookup
9519             unrecognized character - croak
9520           - 0/4/26: handle end-of-line or EOF
9521           - cases for whitespace
9522           - \n and #: handle comments and line numbers
9523           - various operators, brackets and sigils
9524           - numbers
9525           - quotes
9526           - 'v': vstrings (or go to key lookup)
9527           - 'x' repetition operator (or go to key lookup)
9528           - other ASCII alphanumerics (key lookup begins here):
9529               word before => ?
9530               keyword plugin
9531               scan built-in keyword (but do nothing with it yet)
9532               check for statement label
9533               check for lexical subs
9534                   return yyl_just_a_word if there is one
9535               see whether built-in keyword is overridden
9536               switch on keyword number:
9537                   - default: return yyl_just_a_word:
9538                       not a built-in keyword; handle bareword lookup
9539                       disambiguate between method and sub call
9540                       fall back to bareword
9541                   - cases for built-in keywords
9542 */
9543 
9544 int
Perl_yylex(pTHX)9545 Perl_yylex(pTHX)
9546 {
9547     char *s = PL_bufptr;
9548 
9549     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9550         const U8* first_bad_char_loc;
9551         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9552                                                         PL_bufend - PL_bufptr,
9553                                                         &first_bad_char_loc)))
9554         {
9555             _force_out_malformed_utf8_message(first_bad_char_loc,
9556                                               (U8 *) PL_bufend,
9557                                               0,
9558                                               1 /* 1 means die */ );
9559             NOT_REACHED; /* NOTREACHED */
9560         }
9561         PL_parser->recheck_utf8_validity = FALSE;
9562     }
9563     DEBUG_T( {
9564         SV* tmp = newSVpvs("");
9565         PerlIO_printf(Perl_debug_log, "### %" LINE_Tf ":LEX_%s/X%s %s\n",
9566             CopLINE(PL_curcop),
9567             lex_state_names[PL_lex_state],
9568             exp_name[PL_expect],
9569             pv_display(tmp, s, strlen(s), 0, 60));
9570         SvREFCNT_dec(tmp);
9571     } );
9572 
9573     /* when we've already built the next token, just pull it out of the queue */
9574     if (PL_nexttoke) {
9575         PL_nexttoke--;
9576         pl_yylval = PL_nextval[PL_nexttoke];
9577         {
9578             I32 next_type;
9579             next_type = PL_nexttype[PL_nexttoke];
9580             if (next_type & (7<<24)) {
9581                 if (next_type & (1<<24)) {
9582                     if (PL_lex_brackets > 100)
9583                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9584                     PL_lex_brackstack[PL_lex_brackets++] =
9585                         (char) ((U8) (next_type >> 16));
9586                 }
9587                 if (next_type & (2<<24))
9588                     PL_lex_allbrackets++;
9589                 if (next_type & (4<<24))
9590                     PL_lex_allbrackets--;
9591                 next_type &= 0xffff;
9592             }
9593             return REPORT(next_type == 'p' ? pending_ident() : next_type);
9594         }
9595     }
9596 
9597     switch (PL_lex_state) {
9598     case LEX_NORMAL:
9599     case LEX_INTERPNORMAL:
9600         break;
9601 
9602     /* interpolated case modifiers like \L \U, including \Q and \E.
9603        when we get here, PL_bufptr is at the \
9604     */
9605     case LEX_INTERPCASEMOD:
9606         /* handle \E or end of string */
9607         return yyl_interpcasemod(aTHX_ s);
9608 
9609     case LEX_INTERPPUSH:
9610         return REPORT(sublex_push());
9611 
9612     case LEX_INTERPSTART:
9613         if (PL_bufptr == PL_bufend)
9614             return REPORT(sublex_done());
9615         DEBUG_T({
9616             if(*PL_bufptr != '(')
9617                 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9618         });
9619         PL_expect = XTERM;
9620         /* for /@a/, we leave the joining for the regex engine to do
9621          * (unless we're within \Q etc) */
9622         PL_lex_dojoin = (*PL_bufptr == '@'
9623                             && (!PL_lex_inpat || PL_lex_casemods));
9624         PL_lex_state = LEX_INTERPNORMAL;
9625         if (PL_lex_dojoin) {
9626             NEXTVAL_NEXTTOKE.ival = 0;
9627             force_next(PERLY_COMMA);
9628             force_ident("\"", PERLY_DOLLAR);
9629             NEXTVAL_NEXTTOKE.ival = 0;
9630             force_next(PERLY_DOLLAR);
9631             NEXTVAL_NEXTTOKE.ival = 0;
9632             force_next((2<<24)|PERLY_PAREN_OPEN);
9633             NEXTVAL_NEXTTOKE.ival = OP_JOIN;	/* emulate join($", ...) */
9634             force_next(FUNC);
9635         }
9636         /* Convert (?{...}) or (*{...}) and friends to 'do {...}' */
9637         if (PL_lex_inpat && *PL_bufptr == '(') {
9638             PL_parser->lex_shared->re_eval_start = PL_bufptr;
9639             PL_bufptr += 2;
9640             if (*PL_bufptr != '{')
9641                 PL_bufptr++;
9642             PL_expect = XTERMBLOCK;
9643             force_next(KW_DO);
9644         }
9645 
9646         if (PL_lex_starts++) {
9647             s = PL_bufptr;
9648             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9649             if (!PL_lex_casemods && PL_lex_inpat)
9650                 TOKEN(PERLY_COMMA);
9651             else
9652                 AopNOASSIGN(OP_CONCAT);
9653         }
9654         return yylex();
9655 
9656     case LEX_INTERPENDMAYBE:
9657         if (intuit_more(PL_bufptr, PL_bufend)) {
9658             PL_lex_state = LEX_INTERPNORMAL;	/* false alarm, more expr */
9659             break;
9660         }
9661         /* FALLTHROUGH */
9662 
9663     case LEX_INTERPEND:
9664         if (PL_lex_dojoin) {
9665             const U8 dojoin_was = PL_lex_dojoin;
9666             PL_lex_dojoin = FALSE;
9667             PL_lex_state = LEX_INTERPCONCAT;
9668             PL_lex_allbrackets--;
9669             return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
9670         }
9671         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9672             && SvEVALED(PL_lex_repl))
9673         {
9674             if (PL_bufptr != PL_bufend)
9675                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9676             PL_lex_repl = NULL;
9677         }
9678         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
9679            re_eval_str.  If the here-doc body's length equals the previous
9680            value of re_eval_start, re_eval_start will now be null.  So
9681            check re_eval_str as well. */
9682         if (PL_parser->lex_shared->re_eval_start
9683          || PL_parser->lex_shared->re_eval_str) {
9684             SV *sv;
9685             if (*PL_bufptr != ')')
9686                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9687             PL_bufptr++;
9688             /* having compiled a (?{..}) expression, return the original
9689              * text too, as a const */
9690             if (PL_parser->lex_shared->re_eval_str) {
9691                 sv = PL_parser->lex_shared->re_eval_str;
9692                 PL_parser->lex_shared->re_eval_str = NULL;
9693                 SvCUR_set(sv,
9694                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9695                 SvPV_shrink_to_cur(sv);
9696             }
9697             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9698                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9699             NEXTVAL_NEXTTOKE.opval =
9700                     newSVOP(OP_CONST, 0,
9701                                  sv);
9702             force_next(THING);
9703             PL_parser->lex_shared->re_eval_start = NULL;
9704             PL_expect = XTERM;
9705             return REPORT(PERLY_COMMA);
9706         }
9707 
9708         /* FALLTHROUGH */
9709     case LEX_INTERPCONCAT:
9710 #ifdef DEBUGGING
9711         if (PL_lex_brackets)
9712             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9713                        (long) PL_lex_brackets);
9714 #endif
9715         if (PL_bufptr == PL_bufend)
9716             return REPORT(sublex_done());
9717 
9718         /* m'foo' still needs to be parsed for possible (?{...}) */
9719         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9720             SV *sv = newSVsv(PL_linestr);
9721             sv = tokeq(sv);
9722             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9723             s = PL_bufend;
9724         }
9725         else {
9726             int save_error_count = PL_error_count;
9727 
9728             s = scan_const(PL_bufptr);
9729 
9730             /* Set flag if this was a pattern and there were errors.  op.c will
9731              * refuse to compile a pattern with this flag set.  Otherwise, we
9732              * could get segfaults, etc. */
9733             if (PL_lex_inpat && PL_error_count > save_error_count) {
9734                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9735             }
9736             if (*s == '\\')
9737                 PL_lex_state = LEX_INTERPCASEMOD;
9738             else
9739                 PL_lex_state = LEX_INTERPSTART;
9740         }
9741 
9742         if (s != PL_bufptr) {
9743             NEXTVAL_NEXTTOKE = pl_yylval;
9744             PL_expect = XTERM;
9745             force_next(THING);
9746             if (PL_lex_starts++) {
9747                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9748                 if (!PL_lex_casemods && PL_lex_inpat)
9749                     TOKEN(PERLY_COMMA);
9750                 else
9751                     AopNOASSIGN(OP_CONCAT);
9752             }
9753             else {
9754                 PL_bufptr = s;
9755                 return yylex();
9756             }
9757         }
9758 
9759         return yylex();
9760     case LEX_FORMLINE:
9761         if (PL_parser->sub_error_count != PL_error_count) {
9762             /* There was an error parsing a formline, which tends to
9763                mess up the parser.
9764                Unlike interpolated sub-parsing, we can't treat any of
9765                these as recoverable, so no need to check sub_no_recover.
9766             */
9767             yyquit();
9768         }
9769         assert(PL_lex_formbrack);
9770         s = scan_formline(PL_bufptr);
9771         if (!PL_lex_formbrack)
9772             return yyl_rightcurly(aTHX_ s, 1);
9773         PL_bufptr = s;
9774         return yylex();
9775     }
9776 
9777     /* We really do *not* want PL_linestr ever becoming a COW. */
9778     assert (!SvIsCOW(PL_linestr));
9779     s = PL_bufptr;
9780     PL_oldoldbufptr = PL_oldbufptr;
9781     PL_oldbufptr = s;
9782 
9783     if (PL_in_my == KEY_sigvar) {
9784         PL_parser->saw_infix_sigil = 0;
9785         return yyl_sigvar(aTHX_ s);
9786     }
9787 
9788     {
9789         /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9790            On its return, we then need to set it to indicate whether the token
9791            we just encountered was an infix operator that (if we hadn't been
9792            expecting an operator) have been a sigil.
9793         */
9794         bool expected_operator = (PL_expect == XOPERATOR);
9795         int ret = yyl_try(aTHX_ s);
9796         switch (pl_yylval.ival) {
9797         case OP_BIT_AND:
9798         case OP_MODULO:
9799         case OP_MULTIPLY:
9800         case OP_NBIT_AND:
9801             if (expected_operator) {
9802                 PL_parser->saw_infix_sigil = 1;
9803                 break;
9804             }
9805             /* FALLTHROUGH */
9806         default:
9807             PL_parser->saw_infix_sigil = 0;
9808         }
9809         return ret;
9810     }
9811 }
9812 
9813 
9814 /*
9815   S_pending_ident
9816 
9817   Looks up an identifier in the pad or in a package
9818 
9819   PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9820   rather than a plain pad var.
9821 
9822   Returns:
9823     PRIVATEREF if this is a lexical name.
9824     BAREWORD   if this belongs to a package.
9825 
9826   Structure:
9827       if we're in a my declaration
9828           croak if they tried to say my($foo::bar)
9829           build the ops for a my() declaration
9830       if it's an access to a my() variable
9831           build ops for access to a my() variable
9832       if in a dq string, and they've said @foo and we can't find @foo
9833           warn
9834       build ops for a bareword
9835 */
9836 
9837 static int
S_pending_ident(pTHX)9838 S_pending_ident(pTHX)
9839 {
9840     PADOFFSET tmp = 0;
9841     const char pit = (char)pl_yylval.ival;
9842     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9843     /* All routes through this function want to know if there is a colon.  */
9844     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9845 
9846     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9847           "### Pending identifier '%s'\n", PL_tokenbuf); });
9848     assert(tokenbuf_len >= 2);
9849 
9850     /* if we're in a my(), we can't allow dynamics here.
9851        $foo'bar has already been turned into $foo::bar, so
9852        just check for colons.
9853 
9854        if it's a legal name, the OP is a PADANY.
9855     */
9856     if (PL_in_my) {
9857         if (PL_in_my == KEY_our) {	/* "our" is merely analogous to "my" */
9858             if (has_colon)
9859                 /* diag_listed_as: No package name allowed for variable %s
9860                                    in "our" */
9861                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9862                                   "%s %s in \"our\"",
9863                                   *PL_tokenbuf=='&' ? "subroutine" : "variable",
9864                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9865             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9866         }
9867         else {
9868             OP *o;
9869             if (has_colon) {
9870                 /* "my" variable %s can't be in a package */
9871                 /* PL_no_myglob is constant */
9872                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9873                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9874                             PL_in_my == KEY_my ? "my" :
9875                             PL_in_my == KEY_field ? "field" : "state",
9876                             *PL_tokenbuf == '&' ? "subroutine" : "variable",
9877                             PL_tokenbuf),
9878                             UTF ? SVf_UTF8 : 0);
9879                 GCC_DIAG_RESTORE_STMT;
9880             }
9881 
9882             if (PL_in_my == KEY_sigvar) {
9883                 /* A signature 'padop' needs in addition, an op_first to
9884                  * point to a child sigdefelem, and an extra field to hold
9885                  * the signature index. We can achieve both by using an
9886                  * UNOP_AUX and (ab)using the op_aux field to hold the
9887                  * index. If we ever need more fields, use a real malloced
9888                  * aux strut instead.
9889                  */
9890                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9891                                     INT2PTR(UNOP_AUX_item *,
9892                                         (PL_parser->sig_elems)));
9893                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9894                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9895                                   :                         OPpARGELEM_HV);
9896             }
9897             else
9898                 o = newOP(OP_PADANY, 0);
9899             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9900                                                         UTF ? SVf_UTF8 : 0);
9901             if (PL_in_my == KEY_sigvar)
9902                 PL_in_my = 0;
9903 
9904             pl_yylval.opval = o;
9905             return PRIVATEREF;
9906         }
9907     }
9908 
9909     /*
9910        build the ops for accesses to a my() variable.
9911     */
9912 
9913     if (!has_colon) {
9914         if (!PL_in_my)
9915             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9916                                  0);
9917         if (tmp != NOT_IN_PAD) {
9918             /* might be an "our" variable" */
9919             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9920                 /* build ops for a bareword */
9921                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9922                 HEK * const stashname = HvNAME_HEK(stash);
9923                 SV *  const sym = newSVhek(stashname);
9924                 sv_catpvs(sym, "::");
9925                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9926                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9927                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9928                 if (pit != '&')
9929                   gv_fetchsv(sym,
9930                     GV_ADDMULTI,
9931                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9932                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9933                      : SVt_PVHV));
9934                 return BAREWORD;
9935             }
9936 
9937             pl_yylval.opval = newOP(OP_PADANY, 0);
9938             pl_yylval.opval->op_targ = tmp;
9939             return PRIVATEREF;
9940         }
9941     }
9942 
9943     /*
9944        Whine if they've said @foo or @foo{key} in a doublequoted string,
9945        and @foo (or %foo) isn't a variable we can find in the symbol
9946        table.
9947     */
9948     if (ckWARN(WARN_AMBIGUOUS)
9949         && pit == '@'
9950         && PL_lex_state != LEX_NORMAL
9951         && !PL_lex_brackets)
9952     {
9953         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9954                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9955                                          SVt_PVAV);
9956         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9957            )
9958         {
9959             /* Downgraded from fatal to warning 20000522 mjd */
9960             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9961                         "Possible unintended interpolation of %" UTF8f
9962                         " in string",
9963                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9964         }
9965     }
9966 
9967     /* build ops for a bareword */
9968     pl_yylval.opval = newSVOP(OP_CONST, 0,
9969                                    newSVpvn_flags(PL_tokenbuf + 1,
9970                                                       tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9971                                                       UTF ? SVf_UTF8 : 0 ));
9972     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9973     if (pit != '&')
9974         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9975                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9976                      | ( UTF ? SVf_UTF8 : 0 ),
9977                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9978                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9979                       : SVt_PVHV));
9980     return BAREWORD;
9981 }
9982 
9983 STATIC void
S_checkcomma(pTHX_ const char * s,const char * name,const char * what)9984 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9985 {
9986     PERL_ARGS_ASSERT_CHECKCOMMA;
9987 
9988     if (*s == ' ' && s[1] == '(') {	/* XXX gotta be a better way */
9989         if (ckWARN(WARN_SYNTAX)) {
9990             int level = 1;
9991             const char *w;
9992             for (w = s+2; *w && level; w++) {
9993                 if (*w == '(')
9994                     ++level;
9995                 else if (*w == ')')
9996                     --level;
9997             }
9998             while (isSPACE(*w))
9999                 ++w;
10000             /* the list of chars below is for end of statements or
10001              * block / parens, boolean operators (&&, ||, //) and branch
10002              * constructs (or, and, if, until, unless, while, err, for).
10003              * Not a very solid hack... */
10004             if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
10005                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10006                             "%s (...) interpreted as function",name);
10007         }
10008     }
10009     while (s < PL_bufend && isSPACE(*s))
10010         s++;
10011     if (*s == '(')
10012         s++;
10013     while (s < PL_bufend && isSPACE(*s))
10014         s++;
10015     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
10016         const char * const w = s;
10017         s += UTF ? UTF8SKIP(s) : 1;
10018         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10019             s += UTF ? UTF8SKIP(s) : 1;
10020         while (s < PL_bufend && isSPACE(*s))
10021             s++;
10022         if (*s == ',') {
10023             GV* gv;
10024             if (keyword(w, s - w, 0))
10025                 return;
10026 
10027             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
10028             if (gv && GvCVu(gv))
10029                 return;
10030             if (s - w <= 254) {
10031                 PADOFFSET off;
10032                 char tmpbuf[256];
10033                 Copy(w, tmpbuf+1, s - w, char);
10034                 *tmpbuf = '&';
10035                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
10036                 if (off != NOT_IN_PAD) return;
10037             }
10038             Perl_croak(aTHX_ "No comma allowed after %s", what);
10039         }
10040     }
10041 }
10042 
10043 /* S_new_constant(): do any overload::constant lookup.
10044 
10045    Either returns sv, or mortalizes/frees sv and returns a new SV*.
10046    Best used as sv=new_constant(..., sv, ...).
10047    If s, pv are NULL, calls subroutine with one argument,
10048    and <type> is used with error messages only.
10049    <type> is assumed to be well formed UTF-8.
10050 
10051    If error_msg is not NULL, *error_msg will be set to any error encountered.
10052    Otherwise yyerror() will be used to output it */
10053 
10054 STATIC SV *
S_new_constant(pTHX_ const char * s,STRLEN len,const char * key,STRLEN keylen,SV * sv,SV * pv,const char * type,STRLEN typelen,const char ** error_msg)10055 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10056                SV *sv, SV *pv, const char *type, STRLEN typelen,
10057                const char ** error_msg)
10058 {
10059     dSP;
10060     HV * table = GvHV(PL_hintgv);		 /* ^H */
10061     SV *res;
10062     SV *errsv = NULL;
10063     SV **cvp;
10064     SV *cv, *typesv;
10065     const char *why1 = "", *why2 = "", *why3 = "";
10066     const char * optional_colon = ":";  /* Only some messages have a colon */
10067     char *msg;
10068 
10069     PERL_ARGS_ASSERT_NEW_CONSTANT;
10070     /* We assume that this is true: */
10071     assert(type || s);
10072 
10073     sv_2mortal(sv);			/* Parent created it permanently */
10074 
10075     if (   ! table
10076         || ! (PL_hints & HINT_LOCALIZE_HH))
10077     {
10078         why1 = "unknown";
10079         optional_colon = "";
10080         goto report;
10081     }
10082 
10083     cvp = hv_fetch(table, key, keylen, FALSE);
10084     if (!cvp || !SvOK(*cvp)) {
10085         why1 = "$^H{";
10086         why2 = key;
10087         why3 = "} is not defined";
10088         goto report;
10089     }
10090 
10091     cv = *cvp;
10092     if (!pv && s)
10093         pv = newSVpvn_flags(s, len, SVs_TEMP);
10094     if (type && pv)
10095         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10096     else
10097         typesv = &PL_sv_undef;
10098 
10099     PUSHSTACKi(PERLSI_OVERLOAD);
10100     ENTER ;
10101     SAVETMPS;
10102 
10103     PUSHMARK(SP) ;
10104     EXTEND(sp, 3);
10105     if (pv)
10106         PUSHs(pv);
10107     PUSHs(sv);
10108     if (pv)
10109         PUSHs(typesv);
10110     PUTBACK;
10111     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10112 
10113     SPAGAIN ;
10114 
10115     /* Check the eval first */
10116     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
10117         STRLEN errlen;
10118         const char * errstr;
10119         sv_catpvs(errsv, "Propagated");
10120         errstr = SvPV_const(errsv, errlen);
10121         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
10122         (void)POPs;
10123         res = SvREFCNT_inc_simple_NN(sv);
10124     }
10125     else {
10126         res = POPs;
10127         SvREFCNT_inc_simple_void_NN(res);
10128     }
10129 
10130     PUTBACK ;
10131     FREETMPS ;
10132     LEAVE ;
10133     POPSTACK;
10134 
10135     if (SvOK(res)) {
10136         return res;
10137     }
10138 
10139     sv = res;
10140     (void)sv_2mortal(sv);
10141 
10142     why1 = "Call to &{$^H{";
10143     why2 = key;
10144     why3 = "}} did not return a defined value";
10145 
10146   report:
10147 
10148     msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
10149                         (int)(type ? typelen : len),
10150                         (type ? type: s),
10151                         optional_colon,
10152                         why1, why2, why3);
10153     if (error_msg) {
10154         *error_msg = msg;
10155     }
10156     else {
10157         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
10158     }
10159     return SvREFCNT_inc_simple_NN(sv);
10160 }
10161 
10162 PERL_STATIC_INLINE void
S_parse_ident(pTHX_ char ** s,char ** d,char * const e,int allow_package,bool is_utf8,bool check_dollar,bool tick_warn)10163 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
10164                     bool is_utf8, bool check_dollar, bool tick_warn)
10165 {
10166     int saw_tick = 0;
10167     const char *olds = *s;
10168     PERL_ARGS_ASSERT_PARSE_IDENT;
10169 
10170     while (*s < PL_bufend) {
10171         if (*d >= e)
10172             Perl_croak(aTHX_ "%s", ident_too_long);
10173         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
10174              /* The UTF-8 case must come first, otherwise things
10175              * like c\N{COMBINING TILDE} would start failing, as the
10176              * isWORDCHAR_A case below would gobble the 'c' up.
10177              */
10178 
10179             char *t = *s + UTF8SKIP(*s);
10180             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
10181                 t += UTF8SKIP(t);
10182             }
10183             if (*d + (t - *s) > e)
10184                 Perl_croak(aTHX_ "%s", ident_too_long);
10185             Copy(*s, *d, t - *s, char);
10186             *d += t - *s;
10187             *s = t;
10188         }
10189         else if ( isWORDCHAR_A(**s) ) {
10190             do {
10191                 *(*d)++ = *(*s)++;
10192             } while (isWORDCHAR_A(**s) && *d < e);
10193         }
10194         else if (   allow_package
10195                  && **s == '\''
10196                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
10197         {
10198             *(*d)++ = ':';
10199             *(*d)++ = ':';
10200             (*s)++;
10201             saw_tick++;
10202         }
10203         else if (allow_package && **s == ':' && (*s)[1] == ':'
10204            /* Disallow things like Foo::$bar. For the curious, this is
10205             * the code path that triggers the "Bad name after" warning
10206             * when looking for barewords.
10207             */
10208            && !(check_dollar && (*s)[2] == '$')) {
10209             *(*d)++ = *(*s)++;
10210             *(*d)++ = *(*s)++;
10211         }
10212         else
10213             break;
10214     }
10215     if (UNLIKELY(saw_tick && tick_warn && ckWARN2_d(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR))) {
10216         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10217             char *this_d;
10218             char *d2;
10219             Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
10220             d2 = this_d;
10221             SAVEFREEPV(this_d);
10222 
10223             Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
10224                         "Old package separator used in string");
10225             if (olds[-1] == '#')
10226                 *d2++ = olds[-2];
10227             *d2++ = olds[-1];
10228             while (olds < *s) {
10229                 if (*olds == '\'') {
10230                     *d2++ = '\\';
10231                     *d2++ = *olds++;
10232                 }
10233                 else
10234                     *d2++ = *olds++;
10235             }
10236             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10237                         "\t(Did you mean \"%" UTF8f "\" instead?)\n",
10238                         UTF8fARG(is_utf8, d2-this_d, this_d));
10239         }
10240         else {
10241             Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
10242                         "Old package separator \"'\" deprecated");
10243         }
10244     }
10245     return;
10246 }
10247 
10248 /* Returns a NUL terminated string, with the length of the string written to
10249    *slp
10250 
10251    scan_word6() may be removed once ' in names is removed.
10252    */
10253 char *
Perl_scan_word6(pTHX_ char * s,char * dest,STRLEN destlen,int allow_package,STRLEN * slp,bool warn_tick)10254 Perl_scan_word6(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp, bool warn_tick)
10255 {
10256     char *d = dest;
10257     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
10258     bool is_utf8 = cBOOL(UTF);
10259 
10260     PERL_ARGS_ASSERT_SCAN_WORD6;
10261 
10262     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, warn_tick);
10263     *d = '\0';
10264     *slp = d - dest;
10265     return s;
10266 }
10267 
10268 char *
Perl_scan_word(pTHX_ char * s,char * dest,STRLEN destlen,int allow_package,STRLEN * slp)10269 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10270 {
10271     PERL_ARGS_ASSERT_SCAN_WORD;
10272     return scan_word6(s, dest, destlen, allow_package, slp, FALSE);
10273 }
10274 
10275 /* scan s and extract an identifier ($var) from it if possible
10276  * into dest.
10277  * XXX: This function has subtle implications on parsing, and
10278  * changing how it behaves can cause a variable to change from
10279  * being a run time rv2sv call or a compile time binding to a
10280  * specific variable name.
10281  */
10282 STATIC char *
S_scan_ident(pTHX_ char * s,char * dest,STRLEN destlen,I32 ck_uni)10283 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10284 {
10285     I32 herelines = PL_parser->herelines;
10286     SSize_t bracket = -1;
10287     char funny = *s++;
10288     char *d = dest;
10289     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
10290     bool is_utf8 = cBOOL(UTF);
10291     line_t orig_copline = 0, tmp_copline = 0;
10292 
10293     PERL_ARGS_ASSERT_SCAN_IDENT;
10294 
10295     if (isSPACE(*s) || !*s)
10296         s = skipspace(s);
10297     if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
10298         bool is_zero= *s == '0' ? TRUE : FALSE;
10299         char *digit_start= d;
10300         *d++ = *s++;
10301         while (s < PL_bufend && isDIGIT(*s)) {
10302             if (d >= e)
10303                 Perl_croak(aTHX_ "%s", ident_too_long);
10304             *d++ = *s++;
10305         }
10306         if (is_zero && d - digit_start > 1)
10307             Perl_croak(aTHX_ ident_var_zero_multi_digit);
10308     }
10309     else {  /* See if it is a "normal" identifier */
10310         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
10311     }
10312     *d = '\0';
10313     d = dest;
10314     if (*d) {
10315         /* Either a digit variable, or parse_ident() found an identifier
10316            (anything valid as a bareword), so job done and return.  */
10317         if (PL_lex_state != LEX_NORMAL)
10318             PL_lex_state = LEX_INTERPENDMAYBE;
10319         return s;
10320     }
10321 
10322     /* Here, it is not a run-of-the-mill identifier name */
10323 
10324     if (*s == '$' && s[1]
10325         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
10326             || isDIGIT_A((U8)s[1])
10327             || s[1] == '$'
10328             || s[1] == '{'
10329             || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
10330     {
10331         /* Dereferencing a value in a scalar variable.
10332            The alternatives are different syntaxes for a scalar variable.
10333            Using ' as a leading package separator isn't allowed. :: is.   */
10334         return s;
10335     }
10336     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
10337     if (*s == '{') {
10338         bracket = s - SvPVX(PL_linestr);
10339         s++;
10340         orig_copline = CopLINE(PL_curcop);
10341         if (s < PL_bufend && isSPACE(*s)) {
10342             s = skipspace(s);
10343         }
10344     }
10345 
10346 
10347     /* Extract the first character of the variable name from 's' and
10348      * copy it, null terminated into 'd'. Note that this does not
10349      * involve checking for just IDFIRST characters, as it allows the
10350      * '^' for ${^FOO} type variable names, and it allows all the
10351      * characters that are legal in a single character variable name.
10352      *
10353      * The legal ones are any of:
10354      *  a) all ASCII characters except:
10355      *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
10356      *          2) '{'
10357      *     The final case currently doesn't get this far in the program, so we
10358      *     don't test for it.  If that were to change, it would be ok to allow it.
10359      *  b) When not under Unicode rules, any upper Latin1 character
10360      *  c) Otherwise, when unicode rules are used, all XIDS characters.
10361      *
10362      *      Because all ASCII characters have the same representation whether
10363      *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10364      *      '{' without knowing if is UTF-8 or not. */
10365 
10366     if ((s <= PL_bufend - ((is_utf8)
10367                           ? UTF8SKIP(s)
10368                           : 1))
10369         && (
10370             isGRAPH_A(*s)
10371             ||
10372             ( is_utf8
10373               ? isIDFIRST_utf8_safe(s, PL_bufend)
10374               : (isGRAPH_L1(*s)
10375                  && LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD))
10376                 )
10377             )
10378         )
10379     ){
10380         if (is_utf8) {
10381             const STRLEN skip = UTF8SKIP(s);
10382             STRLEN i;
10383             d[skip] = '\0';
10384             for ( i = 0; i < skip; i++ )
10385                 d[i] = *s++;
10386         }
10387         else {
10388             *d = *s++;
10389             d[1] = '\0';
10390         }
10391     }
10392 
10393     /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10394     if (isDIGIT(*d)) {
10395         bool is_zero= *d == '0' ? TRUE : FALSE;
10396         char *digit_start= d;
10397         while (s < PL_bufend && isDIGIT(*s)) {
10398             d++;
10399             if (d >= e)
10400                 Perl_croak(aTHX_ "%s", ident_too_long);
10401             *d= *s++;
10402         }
10403         if (is_zero && d - digit_start >= 1) /* d points at the last digit */
10404             Perl_croak(aTHX_ ident_var_zero_multi_digit);
10405         d[1] = '\0';
10406     }
10407 
10408     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10409     else if (*d == '^' && *s && isCONTROLVAR(*s)) {
10410         *d = toCTRL(*s);
10411         s++;
10412     }
10413     /* Warn about ambiguous code after unary operators if {...} notation isn't
10414        used.  There's no difference in ambiguity; it's merely a heuristic
10415        about when not to warn.  */
10416     else if (ck_uni && bracket == -1)
10417         check_uni();
10418 
10419     if (bracket != -1) {
10420         bool skip;
10421         char *s2;
10422         /* If we were processing {...} notation then...  */
10423         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10424             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10425                  && isWORDCHAR(*s))
10426         ) {
10427             /* note we have to check for a normal identifier first,
10428              * as it handles utf8 symbols, and only after that has
10429              * been ruled out can we look at the caret words */
10430             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10431                 /* if it starts as a valid identifier, assume that it is one.
10432                    (the later check for } being at the expected point will trap
10433                    cases where this doesn't pan out.)  */
10434                 d += is_utf8 ? UTF8SKIP(d) : 1;
10435                 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10436                 *d = '\0';
10437             }
10438             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10439                 d++;
10440                 while (isWORDCHAR(*s) && d < e) {
10441                     *d++ = *s++;
10442                 }
10443                 if (d >= e)
10444                     Perl_croak(aTHX_ "%s", ident_too_long);
10445                 *d = '\0';
10446             }
10447             tmp_copline = CopLINE(PL_curcop);
10448             if (s < PL_bufend && isSPACE(*s)) {
10449                 s = skipspace(s);
10450             }
10451             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10452                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
10453                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10454                     const char * const brack =
10455                         (const char *)
10456                         ((*s == '[') ? "[...]" : "{...}");
10457                     orig_copline = CopLINE(PL_curcop);
10458                     CopLINE_set(PL_curcop, tmp_copline);
10459    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10460                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10461                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10462                         funny, dest, brack, funny, dest, brack);
10463                     CopLINE_set(PL_curcop, orig_copline);
10464                 }
10465                 bracket++;
10466                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10467                 PL_lex_allbrackets++;
10468                 return s;
10469             }
10470         }
10471 
10472         if ( !tmp_copline )
10473             tmp_copline = CopLINE(PL_curcop);
10474         if ((skip = s < PL_bufend && isSPACE(*s))) {
10475             /* Avoid incrementing line numbers or resetting PL_linestart,
10476                in case we have to back up.  */
10477             STRLEN s_off = s - SvPVX(PL_linestr);
10478             s2 = peekspace(s);
10479             s = SvPVX(PL_linestr) + s_off;
10480         }
10481         else
10482             s2 = s;
10483 
10484         /* Expect to find a closing } after consuming any trailing whitespace.
10485          */
10486         if (*s2 == '}') {
10487             /* Now increment line numbers if applicable.  */
10488             if (skip)
10489                 s = skipspace(s);
10490             s++;
10491             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10492                 PL_lex_state = LEX_INTERPEND;
10493                 PL_expect = XREF;
10494             }
10495             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10496                 if (ckWARN(WARN_AMBIGUOUS)
10497                     && (keyword(dest, d - dest, 0)
10498                         || get_cvn_flags(dest, d - dest, is_utf8
10499                            ? SVf_UTF8
10500                            : 0)))
10501                 {
10502                     SV *tmp = newSVpvn_flags( dest, d - dest,
10503                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10504                     if (funny == '#')
10505                         funny = '@';
10506                     orig_copline = CopLINE(PL_curcop);
10507                     CopLINE_set(PL_curcop, tmp_copline);
10508                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10509                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10510                         funny, SVfARG(tmp), funny, SVfARG(tmp));
10511                     CopLINE_set(PL_curcop, orig_copline);
10512                 }
10513             }
10514         }
10515         else {
10516             /* Didn't find the closing } at the point we expected, so restore
10517                state such that the next thing to process is the opening { and */
10518             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10519             CopLINE_set(PL_curcop, orig_copline);
10520             PL_parser->herelines = herelines;
10521             *dest = '\0';
10522             PL_parser->sub_no_recover = TRUE;
10523         }
10524     }
10525     else if (   PL_lex_state == LEX_INTERPNORMAL
10526              && !PL_lex_brackets
10527              && !intuit_more(s, PL_bufend))
10528         PL_lex_state = LEX_INTERPEND;
10529     return s;
10530 }
10531 
10532 static bool
S_pmflag(pTHX_ const char * const valid_flags,U32 * pmfl,char ** s,char * charset,unsigned int * x_mod_count)10533 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10534 
10535     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10536      * found in the parse starting at 's', based on the subset that are valid
10537      * in this context input to this routine in 'valid_flags'. Advances s.
10538      * Returns TRUE if the input should be treated as a valid flag, so the next
10539      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10540      * upon first call on the current regex.  This routine will set it to any
10541      * charset modifier found.  The caller shouldn't change it.  This way,
10542      * another charset modifier encountered in the parse can be detected as an
10543      * error, as we have decided to allow only one */
10544 
10545     const char c = **s;
10546     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10547 
10548     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10549         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10550             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10551                        UTF ? SVf_UTF8 : 0);
10552             (*s) += charlen;
10553             /* Pretend that it worked, so will continue processing before
10554              * dieing */
10555             return TRUE;
10556         }
10557         return FALSE;
10558     }
10559 
10560     switch (c) {
10561 
10562         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10563         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
10564         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
10565         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
10566         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
10567         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10568         case LOCALE_PAT_MOD:
10569             if (*charset) {
10570                 goto multiple_charsets;
10571             }
10572             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10573             *charset = c;
10574             break;
10575         case UNICODE_PAT_MOD:
10576             if (*charset) {
10577                 goto multiple_charsets;
10578             }
10579             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10580             *charset = c;
10581             break;
10582         case ASCII_RESTRICT_PAT_MOD:
10583             if (! *charset) {
10584                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10585             }
10586             else {
10587 
10588                 /* Error if previous modifier wasn't an 'a', but if it was, see
10589                  * if, and accept, a second occurrence (only) */
10590                 if (*charset != 'a'
10591                     || get_regex_charset(*pmfl)
10592                         != REGEX_ASCII_RESTRICTED_CHARSET)
10593                 {
10594                         goto multiple_charsets;
10595                 }
10596                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10597             }
10598             *charset = c;
10599             break;
10600         case DEPENDS_PAT_MOD:
10601             if (*charset) {
10602                 goto multiple_charsets;
10603             }
10604             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10605             *charset = c;
10606             break;
10607     }
10608 
10609     (*s)++;
10610     return TRUE;
10611 
10612     multiple_charsets:
10613         if (*charset != c) {
10614             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10615         }
10616         else if (c == 'a') {
10617   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10618             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10619         }
10620         else {
10621             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10622         }
10623 
10624         /* Pretend that it worked, so will continue processing before dieing */
10625         (*s)++;
10626         return TRUE;
10627 }
10628 
10629 STATIC char *
S_scan_pat(pTHX_ char * start,I32 type)10630 S_scan_pat(pTHX_ char *start, I32 type)
10631 {
10632     PMOP *pm;
10633     char *s;
10634     const char * const valid_flags =
10635         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10636     char charset = '\0';    /* character set modifier */
10637     unsigned int x_mod_count = 0;
10638 
10639     PERL_ARGS_ASSERT_SCAN_PAT;
10640 
10641     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10642     if (!s)
10643         Perl_croak(aTHX_ "Search pattern not terminated");
10644 
10645     pm = (PMOP*)newPMOP(type, 0);
10646     if (PL_multi_open == '?') {
10647         /* This is the only point in the code that sets PMf_ONCE:  */
10648         pm->op_pmflags |= PMf_ONCE;
10649 
10650         /* Hence it's safe to do this bit of PMOP book-keeping here, which
10651            allows us to restrict the list needed by reset to just the ??
10652            matches.  */
10653         assert(type != OP_TRANS);
10654         if (PL_curstash) {
10655             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10656             U32 elements;
10657             if (!mg) {
10658                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10659                                  0);
10660             }
10661             elements = mg->mg_len / sizeof(PMOP**);
10662             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10663             ((PMOP**)mg->mg_ptr) [elements++] = pm;
10664             mg->mg_len = elements * sizeof(PMOP**);
10665             PmopSTASH_set(pm,PL_curstash);
10666         }
10667     }
10668 
10669     /* if qr/...(?{..}).../, then need to parse the pattern within a new
10670      * anon CV. False positives like qr/[(?{]/ are harmless */
10671 
10672     if (type == OP_QR) {
10673         STRLEN len;
10674         char *e, *p = SvPV(PL_lex_stuff, len);
10675         e = p + len;
10676         for (; p < e; p++) {
10677             if (p[0] == '(' && (
10678                 (p[1] == '?' && (p[2] == '{' ||
10679                                 (p[2] == '?' && p[3] == '{'))) ||
10680                 (p[1] == '*' && (p[2] == '{' ||
10681                                 (p[2] == '*' && p[3] == '{')))
10682             )){
10683                 pm->op_pmflags |= PMf_HAS_CV;
10684                 break;
10685             }
10686         }
10687         pm->op_pmflags |= PMf_IS_QR;
10688     }
10689 
10690     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10691                                 &s, &charset, &x_mod_count))
10692     {};
10693     /* issue a warning if /c is specified,but /g is not */
10694     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10695     {
10696         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10697                        "Use of /c modifier is meaningless without /g" );
10698     }
10699 
10700     PL_lex_op = (OP*)pm;
10701     pl_yylval.ival = OP_MATCH;
10702     return s;
10703 }
10704 
10705 STATIC char *
S_scan_subst(pTHX_ char * start)10706 S_scan_subst(pTHX_ char *start)
10707 {
10708     char *s;
10709     PMOP *pm;
10710     I32 first_start;
10711     line_t first_line;
10712     line_t linediff = 0;
10713     I32 es = 0;
10714     char charset = '\0';    /* character set modifier */
10715     unsigned int x_mod_count = 0;
10716     char *t;
10717 
10718     PERL_ARGS_ASSERT_SCAN_SUBST;
10719 
10720     pl_yylval.ival = OP_NULL;
10721 
10722     s = scan_str(start, TRUE, FALSE, FALSE, &t);
10723 
10724     if (!s)
10725         Perl_croak(aTHX_ "Substitution pattern not terminated");
10726 
10727     s = t;
10728 
10729     first_start = PL_multi_start;
10730     first_line = CopLINE(PL_curcop);
10731     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10732     if (!s) {
10733         SvREFCNT_dec_NN(PL_lex_stuff);
10734         PL_lex_stuff = NULL;
10735         Perl_croak(aTHX_ "Substitution replacement not terminated");
10736     }
10737     PL_multi_start = first_start;	/* so whole substitution is taken together */
10738 
10739     pm = (PMOP*)newPMOP(OP_SUBST, 0);
10740 
10741 
10742     while (*s) {
10743         if (*s == EXEC_PAT_MOD) {
10744             s++;
10745             es++;
10746         }
10747         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10748                                   &s, &charset, &x_mod_count))
10749         {
10750             break;
10751         }
10752     }
10753 
10754     if ((pm->op_pmflags & PMf_CONTINUE)) {
10755         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10756     }
10757 
10758     if (es) {
10759         SV * const repl = newSVpvs("");
10760 
10761         PL_multi_end = 0;
10762         pm->op_pmflags |= PMf_EVAL;
10763         for (; es > 1; es--) {
10764             sv_catpvs(repl, "eval ");
10765         }
10766         sv_catpvs(repl, "do {");
10767         sv_catsv(repl, PL_parser->lex_sub_repl);
10768         sv_catpvs(repl, "}");
10769         SvREFCNT_dec(PL_parser->lex_sub_repl);
10770         PL_parser->lex_sub_repl = repl;
10771     }
10772 
10773 
10774     linediff = CopLINE(PL_curcop) - first_line;
10775     if (linediff)
10776         CopLINE_set(PL_curcop, first_line);
10777 
10778     if (linediff || es) {
10779         /* the IVX field indicates that the replacement string is a s///e;
10780          * the NVX field indicates how many src code lines the replacement
10781          * spreads over */
10782         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10783         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10784         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10785                                                                     cBOOL(es);
10786     }
10787 
10788     PL_lex_op = (OP*)pm;
10789     pl_yylval.ival = OP_SUBST;
10790     return s;
10791 }
10792 
10793 STATIC char *
S_scan_trans(pTHX_ char * start)10794 S_scan_trans(pTHX_ char *start)
10795 {
10796     char* s;
10797     OP *o;
10798     U8 squash;
10799     U8 del;
10800     U8 complement;
10801     bool nondestruct = 0;
10802     char *t;
10803 
10804     PERL_ARGS_ASSERT_SCAN_TRANS;
10805 
10806     pl_yylval.ival = OP_NULL;
10807 
10808     s = scan_str(start,FALSE,FALSE,FALSE,&t);
10809     if (!s)
10810         Perl_croak(aTHX_ "Transliteration pattern not terminated");
10811 
10812     s = t;
10813 
10814     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10815     if (!s) {
10816         SvREFCNT_dec_NN(PL_lex_stuff);
10817         PL_lex_stuff = NULL;
10818         Perl_croak(aTHX_ "Transliteration replacement not terminated");
10819     }
10820 
10821     complement = del = squash = 0;
10822     while (1) {
10823         switch (*s) {
10824         case 'c':
10825             complement = OPpTRANS_COMPLEMENT;
10826             break;
10827         case 'd':
10828             del = OPpTRANS_DELETE;
10829             break;
10830         case 's':
10831             squash = OPpTRANS_SQUASH;
10832             break;
10833         case 'r':
10834             nondestruct = 1;
10835             break;
10836         default:
10837             goto no_more;
10838         }
10839         s++;
10840     }
10841   no_more:
10842 
10843     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10844     o->op_private &= ~OPpTRANS_ALL;
10845     o->op_private |= del|squash|complement;
10846 
10847     PL_lex_op = o;
10848     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10849 
10850 
10851     return s;
10852 }
10853 
10854 /* scan_heredoc
10855    Takes a pointer to the first < in <<FOO.
10856    Returns a pointer to the byte following <<FOO.
10857 
10858    This function scans a heredoc, which involves different methods
10859    depending on whether we are in a string eval, quoted construct, etc.
10860    This is because PL_linestr could containing a single line of input, or
10861    a whole string being evalled, or the contents of the current quote-
10862    like operator.
10863 
10864    The two basic methods are:
10865     - Steal lines from the input stream
10866     - Scan the heredoc in PL_linestr and remove it therefrom
10867 
10868    In a file scope or filtered eval, the first method is used; in a
10869    string eval, the second.
10870 
10871    In a quote-like operator, we have to choose between the two,
10872    depending on where we can find a newline.  We peek into outer lex-
10873    ing scopes until we find one with a newline in it.  If we reach the
10874    outermost lexing scope and it is a file, we use the stream method.
10875    Otherwise it is treated as an eval.
10876 */
10877 
10878 STATIC char *
S_scan_heredoc(pTHX_ char * s)10879 S_scan_heredoc(pTHX_ char *s)
10880 {
10881     I32 op_type = OP_SCALAR;
10882     I32 len;
10883     SV *tmpstr;
10884     char term;
10885     char *d;
10886     char *e;
10887     char *peek;
10888     char *indent = 0;
10889     I32 indent_len = 0;
10890     bool indented = FALSE;
10891     const bool infile = PL_rsfp || PL_parser->filtered;
10892     const line_t origline = CopLINE(PL_curcop);
10893     LEXSHARED *shared = PL_parser->lex_shared;
10894 
10895     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10896 
10897     s += 2;
10898     d = PL_tokenbuf + 1;
10899     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10900     *PL_tokenbuf = '\n';
10901     peek = s;
10902 
10903     if (*peek == '~') {
10904         indented = TRUE;
10905         peek++; s++;
10906     }
10907 
10908     while (SPACE_OR_TAB(*peek))
10909         peek++;
10910 
10911     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10912         s = peek;
10913         term = *s++;
10914         s = delimcpy(d, e, s, PL_bufend, term, &len);
10915         if (s == PL_bufend)
10916             Perl_croak(aTHX_ "Unterminated delimiter for here document");
10917         d += len;
10918         s++;
10919     }
10920     else {
10921         if (*s == '\\')
10922             /* <<\FOO is equivalent to <<'FOO' */
10923             s++, term = '\'';
10924         else
10925             term = '"';
10926 
10927         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10928             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10929 
10930         peek = s;
10931 
10932         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10933             peek += UTF ? UTF8SKIP(peek) : 1;
10934         }
10935 
10936         len = (peek - s >= e - d) ? (e - d) : (peek - s);
10937         Copy(s, d, len, char);
10938         s += len;
10939         d += len;
10940     }
10941 
10942     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10943         Perl_croak(aTHX_ "Delimiter for here document is too long");
10944 
10945     *d++ = '\n';
10946     *d = '\0';
10947     len = d - PL_tokenbuf;
10948 
10949 #ifndef PERL_STRICT_CR
10950     d = (char *) memchr(s, '\r', PL_bufend - s);
10951     if (d) {
10952         char * const olds = s;
10953         s = d;
10954         while (s < PL_bufend) {
10955             if (*s == '\r') {
10956                 *d++ = '\n';
10957                 if (*++s == '\n')
10958                     s++;
10959             }
10960             else if (*s == '\n' && s[1] == '\r') {	/* \015\013 on a mac? */
10961                 *d++ = *s++;
10962                 s++;
10963             }
10964             else
10965                 *d++ = *s++;
10966         }
10967         *d = '\0';
10968         PL_bufend = d;
10969         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10970         s = olds;
10971     }
10972 #endif
10973 
10974     tmpstr = newSV_type(SVt_PVIV);
10975     if (term == '\'') {
10976         op_type = OP_CONST;
10977         SvIV_set(tmpstr, -1);
10978     }
10979     else if (term == '`') {
10980         op_type = OP_BACKTICK;
10981         SvIV_set(tmpstr, '\\');
10982     }
10983 
10984     PL_multi_start = origline + 1 + PL_parser->herelines;
10985     PL_multi_open = PL_multi_close = '<';
10986 
10987     /* inside a string eval or quote-like operator */
10988     if (!infile || PL_lex_inwhat) {
10989         SV *linestr;
10990         char *bufend;
10991         char * const olds = s;
10992         PERL_CONTEXT * const cx = CX_CUR();
10993         /* These two fields are not set until an inner lexing scope is
10994            entered.  But we need them set here. */
10995         shared->ls_bufptr  = s;
10996         shared->ls_linestr = PL_linestr;
10997 
10998         if (PL_lex_inwhat) {
10999             /* Look for a newline.  If the current buffer does not have one,
11000              peek into the line buffer of the parent lexing scope, going
11001              up as many levels as necessary to find one with a newline
11002              after bufptr.
11003             */
11004             while (!(s = (char *)memchr(
11005                                 (void *)shared->ls_bufptr, '\n',
11006                                 SvEND(shared->ls_linestr)-shared->ls_bufptr
11007                 )))
11008             {
11009                 shared = shared->ls_prev;
11010                 /* shared is only null if we have gone beyond the outermost
11011                    lexing scope.  In a file, we will have broken out of the
11012                    loop in the previous iteration.  In an eval, the string buf-
11013                    fer ends with "\n;", so the while condition above will have
11014                    evaluated to false.  So shared can never be null.  Or so you
11015                    might think.  Odd syntax errors like s;@{<<; can gobble up
11016                    the implicit semicolon at the end of a flie, causing the
11017                    file handle to be closed even when we are not in a string
11018                    eval.  So shared may be null in that case.
11019                    (Closing '>>}' here to balance the earlier open brace for
11020                    editors that look for matched pairs.) */
11021                 if (UNLIKELY(!shared))
11022                     goto interminable;
11023                 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
11024                    most lexing scope.  In a file, shared->ls_linestr at that
11025                    level is just one line, so there is no body to steal. */
11026                 if (infile && !shared->ls_prev) {
11027                     s = olds;
11028                     goto streaming;
11029                 }
11030             }
11031         }
11032         else {	/* eval or we've already hit EOF */
11033             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
11034             if (!s)
11035                 goto interminable;
11036         }
11037 
11038         linestr = shared->ls_linestr;
11039         bufend = SvEND(linestr);
11040         d = s;
11041         if (indented) {
11042             char *myolds = s;
11043 
11044             while (s < bufend - len + 1) {
11045                 if (*s++ == '\n')
11046                     ++PL_parser->herelines;
11047 
11048                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
11049                     char *backup = s;
11050                     indent_len = 0;
11051 
11052                     /* Only valid if it's preceded by whitespace only */
11053                     while (backup != myolds && --backup >= myolds) {
11054                         if (! SPACE_OR_TAB(*backup)) {
11055                             break;
11056                         }
11057                         indent_len++;
11058                     }
11059 
11060                     /* No whitespace or all! */
11061                     if (backup == s || *backup == '\n') {
11062                         Newx(indent, indent_len + 1, char);
11063                         memcpy(indent, backup + 1, indent_len);
11064                         indent[indent_len] = 0;
11065                         s--; /* before our delimiter */
11066                         PL_parser->herelines--; /* this line doesn't count */
11067                         break;
11068                     }
11069                 }
11070             }
11071         }
11072         else {
11073             while (s < bufend - len + 1
11074                    && memNE(s,PL_tokenbuf,len) )
11075             {
11076                 if (*s++ == '\n')
11077                     ++PL_parser->herelines;
11078             }
11079         }
11080 
11081         if (s >= bufend - len + 1) {
11082             goto interminable;
11083         }
11084 
11085         sv_setpvn_fresh(tmpstr,d+1,s-d);
11086         s += len - 1;
11087         /* the preceding stmt passes a newline */
11088         PL_parser->herelines++;
11089 
11090         /* s now points to the newline after the heredoc terminator.
11091            d points to the newline before the body of the heredoc.
11092          */
11093 
11094         /* We are going to modify linestr in place here, so set
11095            aside copies of the string if necessary for re-evals or
11096            (caller $n)[6]. */
11097         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
11098            check shared->re_eval_str. */
11099         if (shared->re_eval_start || shared->re_eval_str) {
11100             /* Set aside the rest of the regexp */
11101             if (!shared->re_eval_str)
11102                 shared->re_eval_str =
11103                        newSVpvn(shared->re_eval_start,
11104                                 bufend - shared->re_eval_start);
11105             shared->re_eval_start -= s-d;
11106         }
11107 
11108         if (cxstack_ix >= 0
11109             && CxTYPE(cx) == CXt_EVAL
11110             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
11111             && cx->blk_eval.cur_text == linestr)
11112         {
11113             cx->blk_eval.cur_text = newSVsv(linestr);
11114             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
11115         }
11116 
11117         /* Copy everything from s onwards back to d. */
11118         Move(s,d,bufend-s + 1,char);
11119         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
11120         /* Setting PL_bufend only applies when we have not dug deeper
11121            into other scopes, because sublex_done sets PL_bufend to
11122            SvEND(PL_linestr). */
11123         if (shared == PL_parser->lex_shared)
11124             PL_bufend = SvEND(linestr);
11125         s = olds;
11126     }
11127     else {
11128         SV *linestr_save;
11129         char *oldbufptr_save;
11130         char *oldoldbufptr_save;
11131       streaming:
11132         sv_grow_fresh(tmpstr, 80);
11133         SvPVCLEAR_FRESH(tmpstr);   /* avoid "uninitialized" warning */
11134         term = PL_tokenbuf[1];
11135         len--;
11136         linestr_save = PL_linestr; /* must restore this afterwards */
11137         d = s;			 /* and this */
11138         oldbufptr_save = PL_oldbufptr;
11139         oldoldbufptr_save = PL_oldoldbufptr;
11140         PL_linestr = newSVpvs("");
11141         PL_bufend = SvPVX(PL_linestr);
11142 
11143         while (1) {
11144             PL_bufptr = PL_bufend;
11145             CopLINE_set(PL_curcop,
11146                         origline + 1 + PL_parser->herelines);
11147 
11148             if (   !lex_next_chunk(LEX_NO_TERM)
11149                 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
11150             {
11151                 /* Simply freeing linestr_save might seem simpler here, as it
11152                    does not matter what PL_linestr points to, since we are
11153                    about to croak; but in a quote-like op, linestr_save
11154                    will have been prospectively freed already, via
11155                    SAVEFREESV(PL_linestr) in sublex_push, so it's easier to
11156                    restore PL_linestr. */
11157                 SvREFCNT_dec_NN(PL_linestr);
11158                 PL_linestr = linestr_save;
11159                 PL_oldbufptr = oldbufptr_save;
11160                 PL_oldoldbufptr = oldoldbufptr_save;
11161                 goto interminable;
11162             }
11163 
11164             CopLINE_set(PL_curcop, origline);
11165 
11166             if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
11167                 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
11168                 /* ^That should be enough to avoid this needing to grow:  */
11169                 sv_catpvs(PL_linestr, "\n\0");
11170                 assert(s == SvPVX(PL_linestr));
11171                 PL_bufend = SvEND(PL_linestr);
11172             }
11173 
11174             s = PL_bufptr;
11175             PL_parser->herelines++;
11176             PL_last_lop = PL_last_uni = NULL;
11177 
11178 #ifndef PERL_STRICT_CR
11179             if (PL_bufend - PL_linestart >= 2) {
11180                 if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
11181                     || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11182                 {
11183                     PL_bufend[-2] = '\n';
11184                     PL_bufend--;
11185                     SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11186                 }
11187                 else if (PL_bufend[-1] == '\r')
11188                     PL_bufend[-1] = '\n';
11189             }
11190             else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11191                 PL_bufend[-1] = '\n';
11192 #endif
11193 
11194             if (indented && (PL_bufend-s) >= len) {
11195                 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
11196 
11197                 if (found) {
11198                     char *backup = found;
11199                     indent_len = 0;
11200 
11201                     /* Only valid if it's preceded by whitespace only */
11202                     while (backup != s && --backup >= s) {
11203                         if (! SPACE_OR_TAB(*backup)) {
11204                             break;
11205                         }
11206                         indent_len++;
11207                     }
11208 
11209                     /* All whitespace or none! */
11210                     if (backup == found || SPACE_OR_TAB(*backup)) {
11211                         Newx(indent, indent_len + 1, char);
11212                         memcpy(indent, backup, indent_len);
11213                         indent[indent_len] = 0;
11214                         SvREFCNT_dec(PL_linestr);
11215                         PL_linestr = linestr_save;
11216                         PL_linestart = SvPVX(linestr_save);
11217                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11218                         PL_oldbufptr = oldbufptr_save;
11219                         PL_oldoldbufptr = oldoldbufptr_save;
11220                         s = d;
11221                         break;
11222                     }
11223                 }
11224 
11225                 /* Didn't find it */
11226                 sv_catsv(tmpstr,PL_linestr);
11227             }
11228             else {
11229                 if (*s == term && PL_bufend-s >= len
11230                     && memEQ(s,PL_tokenbuf + 1,len))
11231                 {
11232                     SvREFCNT_dec(PL_linestr);
11233                     PL_linestr = linestr_save;
11234                     PL_linestart = SvPVX(linestr_save);
11235                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11236                     PL_oldbufptr = oldbufptr_save;
11237                     PL_oldoldbufptr = oldoldbufptr_save;
11238                     s = d;
11239                     break;
11240                 }
11241                 else {
11242                     sv_catsv(tmpstr,PL_linestr);
11243                 }
11244             }
11245         } /* while (1) */
11246     }
11247 
11248     PL_multi_end = origline + PL_parser->herelines;
11249 
11250     if (indented && indent) {
11251         STRLEN linecount = 1;
11252         STRLEN herelen = SvCUR(tmpstr);
11253         char *ss = SvPVX(tmpstr);
11254         char *se = ss + herelen;
11255         SV *newstr = newSV(herelen+1);
11256         SvPOK_on(newstr);
11257 
11258         /* Trim leading whitespace */
11259         while (ss < se) {
11260             /* newline only? Copy and move on */
11261             if (*ss == '\n') {
11262                 sv_catpvs(newstr,"\n");
11263                 ss++;
11264                 linecount++;
11265 
11266             /* Found our indentation? Strip it */
11267             }
11268             else if (se - ss >= indent_len
11269                        && memEQ(ss, indent, indent_len))
11270             {
11271                 STRLEN le = 0;
11272                 ss += indent_len;
11273 
11274                 while ((ss + le) < se && *(ss + le) != '\n')
11275                     le++;
11276 
11277                 sv_catpvn(newstr, ss, le);
11278                 ss += le;
11279 
11280             /* Line doesn't begin with our indentation? Croak */
11281             }
11282             else {
11283                 Safefree(indent);
11284                 Perl_croak(aTHX_
11285                     "Indentation on line %d of here-doc doesn't match delimiter",
11286                     (int)linecount
11287                 );
11288             }
11289         } /* while */
11290 
11291         /* avoid sv_setsv() as we don't want to COW here */
11292         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
11293         Safefree(indent);
11294         SvREFCNT_dec_NN(newstr);
11295     }
11296 
11297     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11298         SvPV_shrink_to_cur(tmpstr);
11299     }
11300 
11301     if (!IN_BYTES) {
11302         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11303             SvUTF8_on(tmpstr);
11304     }
11305 
11306     PL_lex_stuff = tmpstr;
11307     pl_yylval.ival = op_type;
11308     return s;
11309 
11310   interminable:
11311     if (indent)
11312         Safefree(indent);
11313     SvREFCNT_dec(tmpstr);
11314     CopLINE_set(PL_curcop, origline);
11315     missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
11316 }
11317 
11318 
11319 /* scan_inputsymbol
11320    takes: position of first '<' in input buffer
11321    returns: position of first char following the matching '>' in
11322             input buffer
11323    side-effects: pl_yylval and lex_op are set.
11324 
11325    This code handles:
11326 
11327    <>		read from ARGV
11328    <<>>		read from ARGV without magic open
11329    <FH> 	read from filehandle
11330    <pkg::FH>	read from package qualified filehandle
11331    <pkg'FH>	read from package qualified filehandle
11332    <$fh>	read from filehandle in $fh
11333    <*.h>	filename glob
11334 
11335 */
11336 
11337 STATIC char *
S_scan_inputsymbol(pTHX_ char * start)11338 S_scan_inputsymbol(pTHX_ char *start)
11339 {
11340     char *s = start;		/* current position in buffer */
11341     char *end;
11342     I32 len;
11343     bool nomagicopen = FALSE;
11344     char *d = PL_tokenbuf;					/* start of temp holding space */
11345     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;	/* end of temp holding space */
11346 
11347     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11348 
11349     end = (char *) memchr(s, '\n', PL_bufend - s);
11350     if (!end)
11351         end = PL_bufend;
11352     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
11353         nomagicopen = TRUE;
11354         *d = '\0';
11355         len = 0;
11356         s += 3;
11357     }
11358     else
11359         s = delimcpy(d, e, s + 1, end, '>', &len);	/* extract until > */
11360 
11361     /* die if we didn't have space for the contents of the <>,
11362        or if it didn't end, or if we see a newline
11363     */
11364 
11365     if (len >= (I32)sizeof PL_tokenbuf)
11366         Perl_croak(aTHX_ "Excessively long <> operator");
11367     if (s >= end)
11368         Perl_croak(aTHX_ "Unterminated <> operator");
11369 
11370     s++;
11371 
11372     /* check for <$fh>
11373        Remember, only scalar variables are interpreted as filehandles by
11374        this code.  Anything more complex (e.g., <$fh{$num}>) will be
11375        treated as a glob() call.
11376        This code makes use of the fact that except for the $ at the front,
11377        a scalar variable and a filehandle look the same.
11378     */
11379     if (*d == '$' && d[1]) d++;
11380 
11381     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11382     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11383         d += UTF ? UTF8SKIP(d) : 1;
11384     }
11385 
11386     /* If we've tried to read what we allow filehandles to look like, and
11387        there's still text left, then it must be a glob() and not a getline.
11388        Use scan_str to pull out the stuff between the <> and treat it
11389        as nothing more than a string.
11390     */
11391 
11392     if (d - PL_tokenbuf != len) {
11393         pl_yylval.ival = OP_GLOB;
11394         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
11395         if (!s)
11396            Perl_croak(aTHX_ "Glob not terminated");
11397         return s;
11398     }
11399     else {
11400         bool readline_overridden = FALSE;
11401         GV *gv_readline;
11402         /* we're in a filehandle read situation */
11403         d = PL_tokenbuf;
11404 
11405         /* turn <> into <ARGV> */
11406         if (!len)
11407             Copy("ARGV",d,5,char);
11408 
11409         /* Check whether readline() is overridden */
11410         if ((gv_readline = gv_override("readline",8)))
11411             readline_overridden = TRUE;
11412 
11413         /* if <$fh>, create the ops to turn the variable into a
11414            filehandle
11415         */
11416         if (*d == '$') {
11417             /* try to find it in the pad for this block, otherwise find
11418                add symbol table ops
11419             */
11420             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11421             if (tmp != NOT_IN_PAD) {
11422                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11423                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11424                     HEK * const stashname = HvNAME_HEK(stash);
11425                     SV * const sym = newSVhek_mortal(stashname);
11426                     sv_catpvs(sym, "::");
11427                     sv_catpv(sym, d+1);
11428                     d = SvPVX(sym);
11429                     goto intro_sym;
11430                 }
11431                 else {
11432                     OP * const o = newPADxVOP(OP_PADSV, 0, tmp);
11433                     PL_lex_op = readline_overridden
11434                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11435                                 op_append_elem(OP_LIST, o,
11436                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11437                         : newUNOP(OP_READLINE, 0, o);
11438                 }
11439             }
11440             else {
11441                 GV *gv;
11442                 ++d;
11443               intro_sym:
11444                 gv = gv_fetchpv(d,
11445                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11446                                 SVt_PV);
11447                 PL_lex_op = readline_overridden
11448                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11449                             op_append_elem(OP_LIST,
11450                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11451                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11452                     : newUNOP(OP_READLINE, 0,
11453                             newUNOP(OP_RV2SV, 0,
11454                                 newGVOP(OP_GV, 0, gv)));
11455             }
11456             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11457             pl_yylval.ival = OP_NULL;
11458         }
11459 
11460         /* If it's none of the above, it must be a literal filehandle
11461            (<Foo::BAR> or <FOO>) so build a simple readline OP */
11462         else {
11463             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11464             PL_lex_op = readline_overridden
11465                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11466                         op_append_elem(OP_LIST,
11467                             newGVOP(OP_GV, 0, gv),
11468                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11469                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11470             pl_yylval.ival = OP_NULL;
11471 
11472             /* leave the token generation above to avoid confusing the parser */
11473             if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
11474                 no_bareword_filehandle(d);
11475             }
11476         }
11477     }
11478 
11479     return s;
11480 }
11481 
11482 
11483 /* scan_str
11484    takes:
11485         start			position in buffer
11486         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
11487                                 only if they are of the open/close form
11488         keep_delims		preserve the delimiters around the string
11489         re_reparse		compiling a run-time /(?{})/:
11490                                    collapse // to /,  and skip encoding src
11491         delimp			if non-null, this is set to the position of
11492                                 the closing delimiter, or just after it if
11493                                 the closing and opening delimiters differ
11494                                 (i.e., the opening delimiter of a substitu-
11495                                 tion replacement)
11496    returns: position to continue reading from buffer
11497    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11498         updates the read buffer.
11499 
11500    This subroutine pulls a string out of the input.  It is called for:
11501         q		single quotes		q(literal text)
11502         '		single quotes		'literal text'
11503         qq		double quotes		qq(interpolate $here please)
11504         "		double quotes		"interpolate $here please"
11505         qx		backticks		qx(/bin/ls -l)
11506         `		backticks		`/bin/ls -l`
11507         qw		quote words		@EXPORT_OK = qw( func() $spam )
11508         m//		regexp match		m/this/
11509         s///		regexp substitute	s/this/that/
11510         tr///		string transliterate	tr/this/that/
11511         y///		string transliterate	y/this/that/
11512         ($*@)		sub prototypes		sub foo ($)
11513         (stuff)		sub attr parameters	sub foo : attr(stuff)
11514         <>		readline or globs	<FOO>, <>, <$fh>, or <*.c>
11515 
11516    In most of these cases (all but <>, patterns and transliterate)
11517    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
11518    calls scan_str().  s/// makes yylex() call scan_subst() which calls
11519    scan_str().  tr/// and y/// make yylex() call scan_trans() which
11520    calls scan_str().
11521 
11522    It skips whitespace before the string starts, and treats the first
11523    character as the delimiter.  If the delimiter is one of ([{< then
11524    the corresponding "close" character )]}> is used as the closing
11525    delimiter.  It allows quoting of delimiters, and if the string has
11526    balanced delimiters ([{<>}]) it allows nesting.
11527 
11528    On success, the SV with the resulting string is put into lex_stuff or,
11529    if that is already non-NULL, into lex_repl. The second case occurs only
11530    when parsing the RHS of the special constructs s/// and tr/// (y///).
11531    For convenience, the terminating delimiter character is stuffed into
11532    SvIVX of the SV.
11533 */
11534 
11535 char *
Perl_scan_str(pTHX_ char * start,int keep_bracketed_quoted,int keep_delims,int re_reparse,char ** delimp)11536 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11537                  char **delimp
11538     )
11539 {
11540     SV *sv;			/* scalar value: string */
11541     char *s = start;		/* current position in the buffer */
11542     char *to;			/* current position in the sv's data */
11543     int brackets = 1;		/* bracket nesting level */
11544     bool d_is_utf8 = FALSE;	/* is there any utf8 content? */
11545     UV open_delim_code;         /* code point */
11546     char open_delim_str[UTF8_MAXBYTES+1];
11547     STRLEN delim_byte_len;      /* each delimiter currently is the same number
11548                                    of bytes */
11549     line_t herelines;
11550 
11551     /* The only non-UTF character that isn't a stand alone grapheme is
11552      * white-space, hence can't be a delimiter. */
11553     const char * non_grapheme_msg = "Use of unassigned code point or"
11554                                     " non-standalone grapheme for a delimiter"
11555                                     " is not allowed";
11556     PERL_ARGS_ASSERT_SCAN_STR;
11557 
11558     /* skip space before the delimiter */
11559     if (isSPACE(*s)) {  /* skipspace can change the buffer 's' is in, so
11560                            'start' also has to change */
11561         s = start = skipspace(s);
11562     }
11563 
11564     /* mark where we are, in case we need to report errors */
11565     CLINE;
11566 
11567     /* after skipping whitespace, the next character is the delimiter */
11568     if (! UTF || UTF8_IS_INVARIANT(*s)) {
11569         open_delim_code   = (U8) *s;
11570         open_delim_str[0] =      *s;
11571         delim_byte_len = 1;
11572     }
11573     else {
11574         open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
11575                                             &delim_byte_len);
11576         if (UNLIKELY(! is_grapheme((U8 *) start,
11577                                    (U8 *) s,
11578                                    (U8 *) PL_bufend,
11579                                    open_delim_code)))
11580         {
11581             yyerror(non_grapheme_msg);
11582         }
11583 
11584         Copy(s, open_delim_str, delim_byte_len, char);
11585     }
11586     open_delim_str[delim_byte_len] = '\0';  /* Only for safety */
11587 
11588 
11589     /* mark where we are */
11590     PL_multi_start = CopLINE(PL_curcop);
11591     PL_multi_open = open_delim_code;
11592     herelines = PL_parser->herelines;
11593 
11594     const char * legal_paired_opening_delims;
11595     const char * legal_paired_closing_delims;
11596     const char * deprecated_opening_delims;
11597     if (FEATURE_MORE_DELIMS_IS_ENABLED) {
11598         if (UTF) {
11599             legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
11600             legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
11601 
11602             /* We are deprecating using a closing delimiter as the opening, in
11603              * case we want in the future to accept them reversed.  The string
11604              * may include ones that are legal, but the code below won't look
11605              * at this string unless it didn't find a legal opening one */
11606             deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
11607         }
11608         else {
11609             legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
11610             legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
11611             deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11612         }
11613     }
11614     else {
11615         legal_paired_opening_delims = "([{<";
11616         legal_paired_closing_delims = ")]}>";
11617         deprecated_opening_delims = (UTF)
11618                                     ? DEPRECATED_OPENING_UTF8_BRACKETS
11619                                     : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11620     }
11621 
11622     const char * legal_paired_opening_delims_end = legal_paired_opening_delims
11623                                           + strlen(legal_paired_opening_delims);
11624     const char * deprecated_delims_end = deprecated_opening_delims
11625                                 + strlen(deprecated_opening_delims);
11626 
11627     const char * close_delim_str = open_delim_str;
11628     UV close_delim_code = open_delim_code;
11629 
11630     /* If the delimiter has a mirror-image closing one, get it */
11631     const char *tmps = ninstr(legal_paired_opening_delims,
11632                               legal_paired_opening_delims_end,
11633                               open_delim_str, open_delim_str + delim_byte_len);
11634     if (tmps) {
11635         /* Here, there is a paired delimiter, and tmps points to its position
11636            in the string of the accepted opening paired delimiters.  The
11637            corresponding position in the string of closing ones is the
11638            beginning of the paired mate.  Both contain the same number of
11639            bytes. */
11640         close_delim_str = legal_paired_closing_delims
11641                         + (tmps - legal_paired_opening_delims);
11642 
11643         /* The list of paired delimiters contains all the ASCII ones that have
11644          * always been legal, and no other ASCIIs.  Don't raise a message if
11645          * using one of these */
11646         if (! isASCII(open_delim_code)) {
11647             Perl_ck_warner_d(aTHX_
11648                              packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
11649                              "Use of '%" UTF8f "' is experimental as a string delimiter",
11650                              UTF8fARG(UTF, delim_byte_len, open_delim_str));
11651         }
11652 
11653         close_delim_code = (UTF)
11654                            ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
11655                            : * (U8 *) close_delim_str;
11656     }
11657     else {  /* Here, the delimiter isn't paired, hence the close is the same as
11658                the open; and has already been set up.  But make sure it isn't
11659                deprecated to use this particular delimiter, as we plan
11660                eventually to make it paired. */
11661         if (ninstr(deprecated_opening_delims, deprecated_delims_end,
11662                    open_delim_str, open_delim_str + delim_byte_len))
11663         {
11664             Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__DELIMITER_WILL_BE_PAIRED),
11665                              "Use of '%" UTF8f "' is deprecated as a string delimiter",
11666                              UTF8fARG(UTF, delim_byte_len, open_delim_str));
11667         }
11668 
11669         /* Note that a NUL may be used as a delimiter, and this happens when
11670          * delimiting an empty string, and no special handling for it is
11671          * needed, as ninstr() calls are used */
11672     }
11673 
11674     PL_multi_close = close_delim_code;
11675 
11676     if (PL_multi_open == PL_multi_close) {
11677         keep_bracketed_quoted = FALSE;
11678     }
11679 
11680     /* create a new SV to hold the contents.  79 is the SV's initial length.
11681        What a random number. */
11682     sv = newSV_type(SVt_PVIV);
11683     sv_grow_fresh(sv, 79);
11684     SvIV_set(sv, close_delim_code);
11685     (void)SvPOK_only(sv);		/* validate pointer */
11686 
11687     /* move past delimiter and try to read a complete string */
11688     if (keep_delims)
11689         sv_catpvn(sv, s, delim_byte_len);
11690     s += delim_byte_len;
11691     for (;;) {
11692         /* extend sv if need be */
11693         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11694         /* set 'to' to the next character in the sv's string */
11695         to = SvPVX(sv)+SvCUR(sv);
11696 
11697         /* read until we run out of string, or we find the closing delimiter */
11698         while (s < PL_bufend) {
11699             /* embedded newlines increment the line count */
11700             if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11701                 COPLINE_INC_WITH_HERELINES;
11702 
11703             /* backslashes can escape the closing delimiter */
11704             if (   *s == '\\' && s < PL_bufend - delim_byte_len
11705 
11706                    /* ... but not if the delimiter itself is a backslash */
11707                 && close_delim_code != '\\')
11708             {
11709                 /* Here, we have an escaping backslash.  If we're supposed to
11710                  * discard those that escape the closing delimiter, just
11711                  * discard this one */
11712                 if (   !  keep_bracketed_quoted
11713                     &&   (    memEQ(s + 1,  open_delim_str, delim_byte_len)
11714                           ||  (   PL_multi_open == PL_multi_close
11715                                && re_reparse && s[1] == '\\')
11716                           ||  memEQ(s + 1, close_delim_str, delim_byte_len)))
11717                 {
11718                     s++;
11719                 }
11720                 else /* any other escapes are simply copied straight through */
11721                     *to++ = *s++;
11722             }
11723             else if (   s < PL_bufend - (delim_byte_len - 1)
11724                      && memEQ(s, close_delim_str, delim_byte_len)
11725                      && --brackets <= 0)
11726             {
11727                 /* Found unescaped closing delimiter, unnested if we care about
11728                  * that; so are done.
11729                  *
11730                  * In the case of the opening and closing delimiters being
11731                  * different, we have to deal with nesting; the conditional
11732                  * above makes sure we don't get here until the nesting level,
11733                  * 'brackets', is back down to zero.  In the other case,
11734                  * nesting isn't an issue, and 'brackets' never can get
11735                  * incremented above 0, so will come here at the first closing
11736                  * delimiter.
11737                  *
11738                  * Only grapheme delimiters are legal. */
11739                 if (   UTF  /* All Non-UTF-8's are graphemes */
11740                     && UNLIKELY(! is_grapheme((U8 *) start,
11741                                               (U8 *) s,
11742                                               (U8 *) PL_bufend,
11743                                               close_delim_code)))
11744                 {
11745                     yyerror(non_grapheme_msg);
11746                 }
11747 
11748                 break;
11749             }
11750                         /* No nesting if open eq close */
11751             else if (   PL_multi_open != PL_multi_close
11752                      && s < PL_bufend - (delim_byte_len - 1)
11753                      && memEQ(s, open_delim_str, delim_byte_len))
11754             {
11755                 brackets++;
11756             }
11757 
11758             /* Here, still in the middle of the string; copy this character */
11759             if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
11760                 *to++ = *s++;
11761             }
11762             else {
11763                 size_t this_char_len = UTF8SKIP(s);
11764                 Copy(s, to, this_char_len, char);
11765                 s  += this_char_len;
11766                 to += this_char_len;
11767 
11768                 d_is_utf8 = TRUE;
11769             }
11770         } /* End of loop through buffer */
11771 
11772         /* Here, found end of the string, OR ran out of buffer: terminate the
11773          * copied string and update the sv's end-of-string */
11774         *to = '\0';
11775         SvCUR_set(sv, to - SvPVX_const(sv));
11776 
11777         /*
11778          * this next chunk reads more into the buffer if we're not done yet
11779          */
11780 
11781         if (s < PL_bufend)
11782             break;		/* handle case where we are done yet :-) */
11783 
11784 #ifndef PERL_STRICT_CR
11785         if (to - SvPVX_const(sv) >= 2) {
11786             if (   (to[-2] == '\r' && to[-1] == '\n')
11787                 || (to[-2] == '\n' && to[-1] == '\r'))
11788             {
11789                 to[-2] = '\n';
11790                 to--;
11791                 SvCUR_set(sv, to - SvPVX_const(sv));
11792             }
11793             else if (to[-1] == '\r')
11794                 to[-1] = '\n';
11795         }
11796         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11797             to[-1] = '\n';
11798 #endif
11799 
11800         /* if we're out of file, or a read fails, bail and reset the current
11801            line marker so we can report where the unterminated string began
11802         */
11803         COPLINE_INC_WITH_HERELINES;
11804         PL_bufptr = PL_bufend;
11805         if (!lex_next_chunk(0)) {
11806             ASSUME(sv);
11807             SvREFCNT_dec(sv);
11808             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11809             return NULL;
11810         }
11811         s = start = PL_bufptr;
11812     } /* End of infinite loop */
11813 
11814     /* at this point, we have successfully read the delimited string */
11815 
11816     if (keep_delims)
11817             sv_catpvn(sv, s, delim_byte_len);
11818     s += delim_byte_len;
11819 
11820     if (d_is_utf8)
11821         SvUTF8_on(sv);
11822 
11823     PL_multi_end = CopLINE(PL_curcop);
11824     CopLINE_set(PL_curcop, PL_multi_start);
11825     PL_parser->herelines = herelines;
11826 
11827     /* if we allocated too much space, give some back */
11828     if (SvCUR(sv) + 5 < SvLEN(sv)) {
11829         SvLEN_set(sv, SvCUR(sv) + 1);
11830         SvPV_shrink_to_cur(sv);
11831     }
11832 
11833     /* decide whether this is the first or second quoted string we've read
11834        for this op
11835     */
11836 
11837     if (PL_lex_stuff)
11838         PL_parser->lex_sub_repl = sv;
11839     else
11840         PL_lex_stuff = sv;
11841     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s;
11842     return s;
11843 }
11844 
11845 /*
11846   scan_num
11847   takes: pointer to position in buffer
11848   returns: pointer to new position in buffer
11849   side-effects: builds ops for the constant in pl_yylval.op
11850 
11851   Read a number in any of the formats that Perl accepts:
11852 
11853   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)	12 12.34 12.
11854   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)			.34
11855   0b[01](_?[01])*                                       binary integers
11856   0o?[0-7](_?[0-7])*                                    octal integers
11857   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
11858   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
11859 
11860   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11861   thing it reads.
11862 
11863   If it reads a number without a decimal point or an exponent, it will
11864   try converting the number to an integer and see if it can do so
11865   without loss of precision.
11866 */
11867 
11868 char *
Perl_scan_num(pTHX_ const char * start,YYSTYPE * lvalp)11869 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11870 {
11871     const char *s = start;	/* current position in buffer */
11872     char *d;			/* destination in temp buffer */
11873     char *e;			/* end of temp buffer */
11874     NV nv;				/* number read, as a double */
11875     SV *sv = NULL;			/* place to put the converted number */
11876     bool floatit;			/* boolean: int or float? */
11877     const char *lastub = NULL;		/* position of last underbar */
11878     static const char* const number_too_long = "Number too long";
11879     bool warned_about_underscore = 0;
11880     I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11881 #define WARN_ABOUT_UNDERSCORE() \
11882         do { \
11883             if (!warned_about_underscore) { \
11884                 warned_about_underscore = 1; \
11885                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11886                                "Misplaced _ in number"); \
11887             } \
11888         } while(0)
11889     /* Hexadecimal floating point.
11890      *
11891      * In many places (where we have quads and NV is IEEE 754 double)
11892      * we can fit the mantissa bits of a NV into an unsigned quad.
11893      * (Note that UVs might not be quads even when we have quads.)
11894      * This will not work everywhere, though (either no quads, or
11895      * using long doubles), in which case we have to resort to NV,
11896      * which will probably mean horrible loss of precision due to
11897      * multiple fp operations. */
11898     bool hexfp = FALSE;
11899     int total_bits = 0;
11900     int significant_bits = 0;
11901 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11902 #  define HEXFP_UQUAD
11903     Uquad_t hexfp_uquad = 0;
11904     int hexfp_frac_bits = 0;
11905 #else
11906 #  define HEXFP_NV
11907     NV hexfp_nv = 0.0;
11908 #endif
11909     NV hexfp_mult = 1.0;
11910     UV high_non_zero = 0; /* highest digit */
11911     int non_zero_integer_digits = 0;
11912     bool new_octal = FALSE;     /* octal with "0o" prefix */
11913 
11914     PERL_ARGS_ASSERT_SCAN_NUM;
11915 
11916     /* We use the first character to decide what type of number this is */
11917 
11918     switch (*s) {
11919     default:
11920         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11921 
11922     /* if it starts with a 0, it could be an octal number, a decimal in
11923        0.13 disguise, or a hexadecimal number, or a binary number. */
11924     case '0':
11925         {
11926           /* variables:
11927              u		holds the "number so far"
11928              overflowed	was the number more than we can hold?
11929 
11930              Shift is used when we add a digit.  It also serves as an "are
11931              we in octal/hex/binary?" indicator to disallow hex characters
11932              when in octal mode.
11933            */
11934             NV n = 0.0;
11935             UV u = 0;
11936             bool overflowed = FALSE;
11937             bool just_zero  = TRUE;	/* just plain 0 or binary number? */
11938             bool has_digs = FALSE;
11939             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11940             static const char* const bases[5] =
11941               { "", "binary", "", "octal", "hexadecimal" };
11942             static const char* const Bases[5] =
11943               { "", "Binary", "", "Octal", "Hexadecimal" };
11944             static const char* const maxima[5] =
11945               { "",
11946                 "0b11111111111111111111111111111111",
11947                 "",
11948                 "037777777777",
11949                 "0xffffffff" };
11950 
11951             /* check for hex */
11952             if (isALPHA_FOLD_EQ(s[1], 'x')) {
11953                 shift = 4;
11954                 s += 2;
11955                 just_zero = FALSE;
11956             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11957                 shift = 1;
11958                 s += 2;
11959                 just_zero = FALSE;
11960             }
11961             /* check for a decimal in disguise */
11962             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11963                 goto decimal;
11964             /* so it must be octal */
11965             else {
11966                 shift = 3;
11967                 s++;
11968                 if (isALPHA_FOLD_EQ(*s, 'o')) {
11969                     s++;
11970                     just_zero = FALSE;
11971                     new_octal = TRUE;
11972                 }
11973             }
11974 
11975             if (*s == '_') {
11976                 WARN_ABOUT_UNDERSCORE();
11977                lastub = s++;
11978             }
11979 
11980             /* read the rest of the number */
11981             for (;;) {
11982                 /* x is used in the overflow test,
11983                    b is the digit we're adding on. */
11984                 UV x, b;
11985 
11986                 switch (*s) {
11987 
11988                 /* if we don't mention it, we're done */
11989                 default:
11990                     goto out;
11991 
11992                 /* _ are ignored -- but warned about if consecutive */
11993                 case '_':
11994                     if (lastub && s == lastub + 1)
11995                         WARN_ABOUT_UNDERSCORE();
11996                     lastub = s++;
11997                     break;
11998 
11999                 /* 8 and 9 are not octal */
12000                 case '8': case '9':
12001                     if (shift == 3)
12002                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12003                     /* FALLTHROUGH */
12004 
12005                 /* octal digits */
12006                 case '2': case '3': case '4':
12007                 case '5': case '6': case '7':
12008                     if (shift == 1)
12009                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12010                     /* FALLTHROUGH */
12011 
12012                 case '0': case '1':
12013                     b = *s++ & 15;		/* ASCII digit -> value of digit */
12014                     goto digit;
12015 
12016                 /* hex digits */
12017                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12018                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12019                     /* make sure they said 0x */
12020                     if (shift != 4)
12021                         goto out;
12022                     b = (*s++ & 7) + 9;
12023 
12024                     /* Prepare to put the digit we have onto the end
12025                        of the number so far.  We check for overflows.
12026                     */
12027 
12028                   digit:
12029                     just_zero = FALSE;
12030                     has_digs = TRUE;
12031                     if (!overflowed) {
12032                         assert(shift >= 0);
12033                         x = u << shift;	/* make room for the digit */
12034 
12035                         total_bits += shift;
12036 
12037                         if ((x >> shift) != u
12038                             && !(PL_hints & HINT_NEW_BINARY)) {
12039                             overflowed = TRUE;
12040                             n = (NV) u;
12041                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12042                                              "Integer overflow in %s number",
12043                                              bases[shift]);
12044                         } else
12045                             u = x | b;		/* add the digit to the end */
12046                     }
12047                     if (overflowed) {
12048                         n *= nvshift[shift];
12049                         /* If an NV has not enough bits in its
12050                          * mantissa to represent an UV this summing of
12051                          * small low-order numbers is a waste of time
12052                          * (because the NV cannot preserve the
12053                          * low-order bits anyway): we could just
12054                          * remember when did we overflow and in the
12055                          * end just multiply n by the right
12056                          * amount. */
12057                         n += (NV) b;
12058                     }
12059 
12060                     if (high_non_zero == 0 && b > 0)
12061                         high_non_zero = b;
12062 
12063                     if (high_non_zero)
12064                         non_zero_integer_digits++;
12065 
12066                     /* this could be hexfp, but peek ahead
12067                      * to avoid matching ".." */
12068                     if (UNLIKELY(HEXFP_PEEK(s))) {
12069                         goto out;
12070                     }
12071 
12072                     break;
12073                 }
12074             }
12075 
12076           /* if we get here, we had success: make a scalar value from
12077              the number.
12078           */
12079           out:
12080 
12081             /* final misplaced underbar check */
12082             if (s[-1] == '_')
12083                 WARN_ABOUT_UNDERSCORE();
12084 
12085             if (UNLIKELY(HEXFP_PEEK(s))) {
12086                 /* Do sloppy (on the underbars) but quick detection
12087                  * (and value construction) for hexfp, the decimal
12088                  * detection will shortly be more thorough with the
12089                  * underbar checks. */
12090                 const char* h = s;
12091                 significant_bits = non_zero_integer_digits * shift;
12092 #ifdef HEXFP_UQUAD
12093                 hexfp_uquad = u;
12094 #else /* HEXFP_NV */
12095                 hexfp_nv = u;
12096 #endif
12097                 /* Ignore the leading zero bits of
12098                  * the high (first) non-zero digit. */
12099                 if (high_non_zero) {
12100                     if (high_non_zero < 0x8)
12101                         significant_bits--;
12102                     if (high_non_zero < 0x4)
12103                         significant_bits--;
12104                     if (high_non_zero < 0x2)
12105                         significant_bits--;
12106                 }
12107 
12108                 if (*h == '.') {
12109 #ifdef HEXFP_NV
12110                     NV nv_mult = 1.0;
12111 #endif
12112                     bool accumulate = TRUE;
12113                     U8 b = 0; /* silence compiler warning */
12114                     int lim = 1 << shift;
12115                     for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
12116                                *h == '_'); h++) {
12117                         if (isXDIGIT(*h)) {
12118                             significant_bits += shift;
12119 #ifdef HEXFP_UQUAD
12120                             if (accumulate) {
12121                                 if (significant_bits < NV_MANT_DIG) {
12122                                     /* We are in the long "run" of xdigits,
12123                                      * accumulate the full four bits. */
12124                                     assert(shift >= 0);
12125                                     hexfp_uquad <<= shift;
12126                                     hexfp_uquad |= b;
12127                                     hexfp_frac_bits += shift;
12128                                 } else if (significant_bits - shift < NV_MANT_DIG) {
12129                                     /* We are at a hexdigit either at,
12130                                      * or straddling, the edge of mantissa.
12131                                      * We will try grabbing as many as
12132                                      * possible bits. */
12133                                     int tail =
12134                                       significant_bits - NV_MANT_DIG;
12135                                     if (tail <= 0)
12136                                        tail += shift;
12137                                     assert(tail >= 0);
12138                                     hexfp_uquad <<= tail;
12139                                     assert((shift - tail) >= 0);
12140                                     hexfp_uquad |= b >> (shift - tail);
12141                                     hexfp_frac_bits += tail;
12142 
12143                                     /* Ignore the trailing zero bits
12144                                      * of the last non-zero xdigit.
12145                                      *
12146                                      * The assumption here is that if
12147                                      * one has input of e.g. the xdigit
12148                                      * eight (0x8), there is only one
12149                                      * bit being input, not the full
12150                                      * four bits.  Conversely, if one
12151                                      * specifies a zero xdigit, the
12152                                      * assumption is that one really
12153                                      * wants all those bits to be zero. */
12154                                     if (b) {
12155                                         if ((b & 0x1) == 0x0) {
12156                                             significant_bits--;
12157                                             if ((b & 0x2) == 0x0) {
12158                                                 significant_bits--;
12159                                                 if ((b & 0x4) == 0x0) {
12160                                                     significant_bits--;
12161                                                 }
12162                                             }
12163                                         }
12164                                     }
12165 
12166                                     accumulate = FALSE;
12167                                 }
12168                             } else {
12169                                 /* Keep skipping the xdigits, and
12170                                  * accumulating the significant bits,
12171                                  * but do not shift the uquad
12172                                  * (which would catastrophically drop
12173                                  * high-order bits) or accumulate the
12174                                  * xdigits anymore. */
12175                             }
12176 #else /* HEXFP_NV */
12177                             if (accumulate) {
12178                                 nv_mult /= nvshift[shift];
12179                                 if (nv_mult > 0.0)
12180                                     hexfp_nv += b * nv_mult;
12181                                 else
12182                                     accumulate = FALSE;
12183                             }
12184 #endif
12185                         }
12186                         if (significant_bits >= NV_MANT_DIG)
12187                             accumulate = FALSE;
12188                     }
12189                 }
12190 
12191                 if ((total_bits > 0 || significant_bits > 0) &&
12192                     isALPHA_FOLD_EQ(*h, 'p')) {
12193                     bool negexp = FALSE;
12194                     h++;
12195                     if (*h == '+')
12196                         h++;
12197                     else if (*h == '-') {
12198                         negexp = TRUE;
12199                         h++;
12200                     }
12201                     if (isDIGIT(*h)) {
12202                         I32 hexfp_exp = 0;
12203                         while (isDIGIT(*h) || *h == '_') {
12204                             if (isDIGIT(*h)) {
12205                                 hexfp_exp *= 10;
12206                                 hexfp_exp += *h - '0';
12207 #ifdef NV_MIN_EXP
12208                                 if (negexp
12209                                     && -hexfp_exp < NV_MIN_EXP - 1) {
12210                                     /* NOTE: this means that the exponent
12211                                      * underflow warning happens for
12212                                      * the IEEE 754 subnormals (denormals),
12213                                      * because DBL_MIN_EXP etc are the lowest
12214                                      * possible binary (or, rather, DBL_RADIX-base)
12215                                      * exponent for normals, not subnormals.
12216                                      *
12217                                      * This may or may not be a good thing. */
12218                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12219                                                    "Hexadecimal float: exponent underflow");
12220                                     break;
12221                                 }
12222 #endif
12223 #ifdef NV_MAX_EXP
12224                                 if (!negexp
12225                                     && hexfp_exp > NV_MAX_EXP - 1) {
12226                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12227                                                    "Hexadecimal float: exponent overflow");
12228                                     break;
12229                                 }
12230 #endif
12231                             }
12232                             h++;
12233                         }
12234                         if (negexp)
12235                             hexfp_exp = -hexfp_exp;
12236 #ifdef HEXFP_UQUAD
12237                         hexfp_exp -= hexfp_frac_bits;
12238 #endif
12239                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
12240                         hexfp = TRUE;
12241                         goto decimal;
12242                     }
12243                 }
12244             }
12245 
12246             if (!just_zero && !has_digs) {
12247                 /* 0x, 0o or 0b with no digits, treat it as an error.
12248                    Originally this backed up the parse before the b or
12249                    x, but that has the potential for silent changes in
12250                    behaviour, like for: "0x.3" and "0x+$foo".
12251                 */
12252                 const char *d = s;
12253                 char *oldbp = PL_bufptr;
12254                 if (*d) ++d; /* so the user sees the bad non-digit */
12255                 PL_bufptr = (char *)d; /* so yyerror reports the context */
12256                 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
12257                                   bases[shift]));
12258                 PL_bufptr = oldbp;
12259             }
12260 
12261             if (overflowed) {
12262                 if (n > 4294967295.0)
12263                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12264                                    "%s number > %s non-portable",
12265                                    Bases[shift],
12266                                    new_octal ? "0o37777777777" : maxima[shift]);
12267                 sv = newSVnv(n);
12268             }
12269             else {
12270 #if UVSIZE > 4
12271                 if (u > 0xffffffff)
12272                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12273                                    "%s number > %s non-portable",
12274                                    Bases[shift],
12275                                    new_octal ? "0o37777777777" : maxima[shift]);
12276 #endif
12277                 sv = newSVuv(u);
12278             }
12279             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12280                 sv = new_constant(start, s - start, "integer",
12281                                   sv, NULL, NULL, 0, NULL);
12282             else if (PL_hints & HINT_NEW_BINARY)
12283                 sv = new_constant(start, s - start, "binary",
12284                                   sv, NULL, NULL, 0, NULL);
12285         }
12286         break;
12287 
12288     /*
12289       handle decimal numbers.
12290       we're also sent here when we read a 0 as the first digit
12291     */
12292     case '1': case '2': case '3': case '4': case '5':
12293     case '6': case '7': case '8': case '9': case '.':
12294       decimal:
12295         d = PL_tokenbuf;
12296         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12297         floatit = FALSE;
12298         if (hexfp) {
12299             floatit = TRUE;
12300             *d++ = '0';
12301             switch (shift) {
12302             case 4:
12303                 *d++ = 'x';
12304                 s = start + 2;
12305                 break;
12306             case 3:
12307                 if (new_octal) {
12308                     *d++ = 'o';
12309                     s = start + 2;
12310                     break;
12311                 }
12312                 s = start + 1;
12313                 break;
12314             case 1:
12315                 *d++ = 'b';
12316                 s = start + 2;
12317                 break;
12318             default:
12319                 NOT_REACHED; /* NOTREACHED */
12320             }
12321         }
12322 
12323         /* read next group of digits and _ and copy into d */
12324         while (isDIGIT(*s)
12325                || *s == '_'
12326                || UNLIKELY(hexfp && isXDIGIT(*s)))
12327         {
12328             /* skip underscores, checking for misplaced ones
12329                if -w is on
12330             */
12331             if (*s == '_') {
12332                 if (lastub && s == lastub + 1)
12333                     WARN_ABOUT_UNDERSCORE();
12334                 lastub = s++;
12335             }
12336             else {
12337                 /* check for end of fixed-length buffer */
12338                 if (d >= e)
12339                     Perl_croak(aTHX_ "%s", number_too_long);
12340                 /* if we're ok, copy the character */
12341                 *d++ = *s++;
12342             }
12343         }
12344 
12345         /* final misplaced underbar check */
12346         if (lastub && s == lastub + 1)
12347             WARN_ABOUT_UNDERSCORE();
12348 
12349         /* read a decimal portion if there is one.  avoid
12350            3..5 being interpreted as the number 3. followed
12351            by .5
12352         */
12353         if (*s == '.' && s[1] != '.') {
12354             floatit = TRUE;
12355             *d++ = *s++;
12356 
12357             if (*s == '_') {
12358                 WARN_ABOUT_UNDERSCORE();
12359                 lastub = s;
12360             }
12361 
12362             /* copy, ignoring underbars, until we run out of digits.
12363             */
12364             for (; isDIGIT(*s)
12365                    || *s == '_'
12366                    || UNLIKELY(hexfp && isXDIGIT(*s));
12367                  s++)
12368             {
12369                 /* fixed length buffer check */
12370                 if (d >= e)
12371                     Perl_croak(aTHX_ "%s", number_too_long);
12372                 if (*s == '_') {
12373                    if (lastub && s == lastub + 1)
12374                         WARN_ABOUT_UNDERSCORE();
12375                    lastub = s;
12376                 }
12377                 else
12378                     *d++ = *s;
12379             }
12380             /* fractional part ending in underbar? */
12381             if (s[-1] == '_')
12382                 WARN_ABOUT_UNDERSCORE();
12383             if (*s == '.' && isDIGIT(s[1])) {
12384                 /* oops, it's really a v-string, but without the "v" */
12385                 s = start;
12386                 goto vstring;
12387             }
12388         }
12389 
12390         /* read exponent part, if present */
12391         if ((isALPHA_FOLD_EQ(*s, 'e')
12392               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
12393             && memCHRs("+-0123456789_", s[1]))
12394         {
12395             int exp_digits = 0;
12396             const char *save_s = s;
12397             char * save_d = d;
12398 
12399             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
12400                ditto for p (hexfloats) */
12401             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
12402                 /* At least some Mach atof()s don't grok 'E' */
12403                 *d++ = 'e';
12404             }
12405             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
12406                 *d++ = 'p';
12407             }
12408 
12409             s++;
12410 
12411 
12412             /* stray preinitial _ */
12413             if (*s == '_') {
12414                 WARN_ABOUT_UNDERSCORE();
12415                 lastub = s++;
12416             }
12417 
12418             /* allow positive or negative exponent */
12419             if (*s == '+' || *s == '-')
12420                 *d++ = *s++;
12421 
12422             /* stray initial _ */
12423             if (*s == '_') {
12424                 WARN_ABOUT_UNDERSCORE();
12425                 lastub = s++;
12426             }
12427 
12428             /* read digits of exponent */
12429             while (isDIGIT(*s) || *s == '_') {
12430                 if (isDIGIT(*s)) {
12431                     ++exp_digits;
12432                     if (d >= e)
12433                         Perl_croak(aTHX_ "%s", number_too_long);
12434                     *d++ = *s++;
12435                 }
12436                 else {
12437                    if (((lastub && s == lastub + 1)
12438                         || (!isDIGIT(s[1]) && s[1] != '_')))
12439                         WARN_ABOUT_UNDERSCORE();
12440                    lastub = s++;
12441                 }
12442             }
12443 
12444             if (!exp_digits) {
12445                 /* no exponent digits, the [eEpP] could be for something else,
12446                  * though in practice we don't get here for p since that's preparsed
12447                  * earlier, and results in only the 0xX being consumed, so behave similarly
12448                  * for decimal floats and consume only the D.DD, leaving the [eE] to the
12449                  * next token.
12450                  */
12451                 s = save_s;
12452                 d = save_d;
12453             }
12454             else {
12455                 floatit = TRUE;
12456             }
12457         }
12458 
12459 
12460         /*
12461            We try to do an integer conversion first if no characters
12462            indicating "float" have been found.
12463          */
12464 
12465         if (!floatit) {
12466             UV uv;
12467             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12468 
12469             if (flags == IS_NUMBER_IN_UV) {
12470               if (uv <= IV_MAX)
12471                 sv = newSViv(uv); /* Prefer IVs over UVs. */
12472               else
12473                 sv = newSVuv(uv);
12474             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12475               if (uv <= (UV) IV_MIN)
12476                 sv = newSViv(-(IV)uv);
12477               else
12478                 floatit = TRUE;
12479             } else
12480               floatit = TRUE;
12481         }
12482         if (floatit) {
12483             /* terminate the string */
12484             *d = '\0';
12485             if (UNLIKELY(hexfp)) {
12486 #  ifdef NV_MANT_DIG
12487                 if (significant_bits > NV_MANT_DIG)
12488                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12489                                    "Hexadecimal float: mantissa overflow");
12490 #  endif
12491 #ifdef HEXFP_UQUAD
12492                 nv = hexfp_uquad * hexfp_mult;
12493 #else /* HEXFP_NV */
12494                 nv = hexfp_nv * hexfp_mult;
12495 #endif
12496             } else {
12497                 nv = Atof(PL_tokenbuf);
12498             }
12499             sv = newSVnv(nv);
12500         }
12501 
12502         if ( floatit
12503              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12504             const char *const key = floatit ? "float" : "integer";
12505             const STRLEN keylen = floatit ? 5 : 7;
12506             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12507                                 key, keylen, sv, NULL, NULL, 0, NULL);
12508         }
12509         break;
12510 
12511     /* if it starts with a v, it could be a v-string */
12512     case 'v':
12513     vstring:
12514                 sv = newSV(5); /* preallocate storage space */
12515                 ENTER_with_name("scan_vstring");
12516                 SAVEFREESV(sv);
12517                 s = scan_vstring(s, PL_bufend, sv);
12518                 SvREFCNT_inc_simple_void_NN(sv);
12519                 LEAVE_with_name("scan_vstring");
12520         break;
12521     }
12522 
12523     /* make the op for the constant and return */
12524 
12525     if (sv)
12526         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12527     else
12528         lvalp->opval = NULL;
12529 
12530     return (char *)s;
12531 }
12532 
12533 STATIC char *
S_scan_formline(pTHX_ char * s)12534 S_scan_formline(pTHX_ char *s)
12535 {
12536     SV * const stuff = newSVpvs("");
12537     bool needargs = FALSE;
12538     bool eofmt = FALSE;
12539 
12540     PERL_ARGS_ASSERT_SCAN_FORMLINE;
12541 
12542     while (!needargs) {
12543         char *eol;
12544         if (*s == '.') {
12545             char *t = s+1;
12546 #ifdef PERL_STRICT_CR
12547             while (SPACE_OR_TAB(*t))
12548                 t++;
12549 #else
12550             while (SPACE_OR_TAB(*t) || *t == '\r')
12551                 t++;
12552 #endif
12553             if (*t == '\n' || t == PL_bufend) {
12554                 eofmt = TRUE;
12555                 break;
12556             }
12557         }
12558         eol = (char *) memchr(s,'\n',PL_bufend-s);
12559         if (! eol) {
12560             eol = PL_bufend;
12561         }
12562         else {
12563             eol++;
12564         }
12565         if (*s != '#') {
12566             char *t;
12567             for (t = s; t < eol; t++) {
12568                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12569                     needargs = FALSE;
12570                     goto enough;	/* ~~ must be first line in formline */
12571                 }
12572                 if (*t == '@' || *t == '^')
12573                     needargs = TRUE;
12574             }
12575             if (eol > s) {
12576                 sv_catpvn(stuff, s, eol-s);
12577 #ifndef PERL_STRICT_CR
12578                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12579                     char *end = SvPVX(stuff) + SvCUR(stuff);
12580                     end[-2] = '\n';
12581                     end[-1] = '\0';
12582                     SvCUR_set(stuff, SvCUR(stuff) - 1);
12583                 }
12584 #endif
12585             }
12586             else
12587               break;
12588         }
12589         s = (char*)eol;
12590         if ((PL_rsfp || PL_parser->filtered)
12591          && PL_parser->form_lex_state == LEX_NORMAL) {
12592             bool got_some;
12593             PL_bufptr = PL_bufend;
12594             COPLINE_INC_WITH_HERELINES;
12595             got_some = lex_next_chunk(0);
12596             CopLINE_dec(PL_curcop);
12597             s = PL_bufptr;
12598             if (!got_some)
12599                 break;
12600         }
12601         incline(s, PL_bufend);
12602     }
12603   enough:
12604     if (!SvCUR(stuff) || needargs)
12605         PL_lex_state = PL_parser->form_lex_state;
12606     if (SvCUR(stuff)) {
12607         PL_expect = XSTATE;
12608         if (needargs) {
12609             const char *s2 = s;
12610             while (isSPACE(*s2) && *s2 != '\n')
12611                 s2++;
12612             if (*s2 == '{') {
12613                 PL_expect = XTERMBLOCK;
12614                 NEXTVAL_NEXTTOKE.ival = 0;
12615                 force_next(KW_DO);
12616             }
12617             NEXTVAL_NEXTTOKE.ival = 0;
12618             force_next(FORMLBRACK);
12619         }
12620         if (!IN_BYTES) {
12621             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12622                 SvUTF8_on(stuff);
12623         }
12624         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12625         force_next(THING);
12626     }
12627     else {
12628         SvREFCNT_dec(stuff);
12629         if (eofmt)
12630             PL_lex_formbrack = 0;
12631     }
12632     return s;
12633 }
12634 
12635 /*
12636 =for apidoc start_subparse
12637 
12638 Set things up for parsing a subroutine.
12639 
12640 If C<is_format> is non-zero, the input is to be considered a format sub
12641 (a specialised sub used to implement perl's C<format> feature); else a
12642 normal C<sub>.
12643 
12644 C<flags> are added to the flags for C<PL_compcv>.  C<flags> may include the
12645 C<CVf_IsMETHOD> bit, which causes the new subroutine to be a method.
12646 
12647 This returns the value of C<PL_savestack_ix> that was in effect upon entry to
12648 the function;
12649 
12650 =cut
12651 */
12652 
12653 I32
Perl_start_subparse(pTHX_ I32 is_format,U32 flags)12654 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12655 {
12656     const I32 oldsavestack_ix = PL_savestack_ix;
12657     CV* const outsidecv = PL_compcv;
12658     bool is_method = flags & CVf_IsMETHOD;
12659 
12660     if (is_method)
12661         croak_kw_unless_class("method");
12662 
12663     SAVEI32(PL_subline);
12664     save_item(PL_subname);
12665     SAVESPTR(PL_compcv);
12666 
12667     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12668     CvFLAGS(PL_compcv) |= flags;
12669 
12670     PL_subline = CopLINE(PL_curcop);
12671     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12672     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12673     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12674     if (outsidecv && CvPADLIST(outsidecv))
12675         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12676     if (is_method)
12677         class_prepare_method_parse(PL_compcv);
12678 
12679     return oldsavestack_ix;
12680 }
12681 
12682 /* If o represents a builtin attribute, apply it to cv and returns true.
12683  * Otherwise does nothing and returns false
12684  */
12685 
12686 STATIC bool
S_apply_builtin_cv_attribute(pTHX_ CV * cv,OP * o)12687 S_apply_builtin_cv_attribute(pTHX_ CV *cv, OP *o)
12688 {
12689     assert(o->op_type == OP_CONST);
12690     SV *sv = cSVOPo_sv;
12691     STRLEN len = SvCUR(sv);
12692 
12693     /* NOTE: any CV attrs applied here need to be part of
12694        the CVf_BUILTIN_ATTRS define in cv.h! */
12695 
12696     if(memEQs(SvPVX(sv), len, "lvalue"))
12697         CvLVALUE_on(cv);
12698     else if(memEQs(SvPVX(sv), len, "method"))
12699         CvNOWARN_AMBIGUOUS_on(cv);
12700     else if(memEQs(SvPVX(sv), len, "const")) {
12701         Perl_ck_warner_d(aTHX_
12702             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
12703            ":const is experimental"
12704         );
12705         CvANONCONST_on(cv);
12706         if (!CvANON(cv))
12707             yyerror(":const is not permitted on named subroutines");
12708     }
12709     else
12710         return false;
12711 
12712     return true;
12713 }
12714 
12715 /*
12716 =for apidoc apply_builtin_cv_attributes
12717 
12718 Given an OP_LIST containing attribute definitions, filter it for known builtin
12719 attributes to apply to the cv, returning a possibly-smaller list containing
12720 just the remaining ones.
12721 
12722 =cut
12723 */
12724 
12725 OP *
Perl_apply_builtin_cv_attributes(pTHX_ CV * cv,OP * attrlist)12726 Perl_apply_builtin_cv_attributes(pTHX_ CV *cv, OP *attrlist)
12727 {
12728     PERL_ARGS_ASSERT_APPLY_BUILTIN_CV_ATTRIBUTES;
12729 
12730     if(!attrlist)
12731         return attrlist;
12732 
12733     if(attrlist->op_type != OP_LIST) {
12734         /* Not in fact a list but just a single attribute */
12735         if(S_apply_builtin_cv_attribute(aTHX_ cv, attrlist)) {
12736             op_free(attrlist);
12737             return NULL;
12738         }
12739 
12740         return attrlist;
12741     }
12742 
12743     OP *prev = cLISTOPx(attrlist)->op_first;
12744     assert(prev->op_type == OP_PUSHMARK);
12745     OP *o = OpSIBLING(prev);
12746 
12747     OP *next;
12748     for(; o; o = next) {
12749         next = OpSIBLING(o);
12750 
12751         if(S_apply_builtin_cv_attribute(aTHX_ cv, o)) {
12752             op_sibling_splice(attrlist, prev, 1, NULL);
12753             op_free(o);
12754         }
12755         else {
12756             prev = o;
12757         }
12758     }
12759 
12760     if(OpHAS_SIBLING(cLISTOPx(attrlist)->op_first))
12761         return attrlist;
12762 
12763     /* The list is now entirely empty, we might as well discard it */
12764     op_free(attrlist);
12765     return NULL;
12766 }
12767 
12768 
12769 /* Do extra initialisation of a CV (typically one just created by
12770  * start_subparse()) if that CV is for a named sub
12771  */
12772 
12773 void
Perl_init_named_cv(pTHX_ CV * cv,OP * nameop)12774 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12775 {
12776     PERL_ARGS_ASSERT_INIT_NAMED_CV;
12777 
12778     if (nameop->op_type == OP_CONST) {
12779         const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12780         if (   strEQ(name, "BEGIN")
12781             || strEQ(name, "END")
12782             || strEQ(name, "INIT")
12783             || strEQ(name, "CHECK")
12784             || strEQ(name, "UNITCHECK")
12785         )
12786           CvSPECIAL_on(cv);
12787     }
12788     else
12789     /* State subs inside anonymous subs need to be
12790      clonable themselves. */
12791     if (   CvANON(CvOUTSIDE(cv))
12792         || CvCLONE(CvOUTSIDE(cv))
12793         || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12794                         CvOUTSIDE(cv)
12795                      ))[nameop->op_targ])
12796     )
12797       CvCLONE_on(cv);
12798 }
12799 
12800 
12801 static int
S_yywarn(pTHX_ const char * const s,U32 flags)12802 S_yywarn(pTHX_ const char *const s, U32 flags)
12803 {
12804     PERL_ARGS_ASSERT_YYWARN;
12805 
12806     PL_in_eval |= EVAL_WARNONLY;
12807     yyerror_pv(s, flags);
12808     return 0;
12809 }
12810 
12811 void
Perl_abort_execution(pTHX_ SV * msg_sv,const char * const name)12812 Perl_abort_execution(pTHX_ SV* msg_sv, const char * const name)
12813 {
12814     PERL_ARGS_ASSERT_ABORT_EXECUTION;
12815 
12816     if (msg_sv) {
12817         if (PL_minus_c)
12818             Perl_croak(aTHX_ "%" SVf "%s had compilation errors.\n", SVfARG(msg_sv), name);
12819         else {
12820             Perl_croak(aTHX_
12821                     "%" SVf "Execution of %s aborted due to compilation errors.\n", SVfARG(msg_sv), name);
12822         }
12823     } else {
12824         if (PL_minus_c)
12825             Perl_croak(aTHX_ "%s had compilation errors.\n", name);
12826         else {
12827             Perl_croak(aTHX_
12828                     "Execution of %s aborted due to compilation errors.\n", name);
12829         }
12830     }
12831 
12832     NOT_REACHED; /* NOTREACHED */
12833 }
12834 
12835 void
Perl_yyquit(pTHX)12836 Perl_yyquit(pTHX)
12837 {
12838     /* Called, after at least one error has been found, to abort the parse now,
12839      * instead of trying to forge ahead */
12840 
12841     yyerror_pvn(NULL, 0, 0);
12842 }
12843 
12844 int
Perl_yyerror(pTHX_ const char * const s)12845 Perl_yyerror(pTHX_ const char *const s)
12846 {
12847     PERL_ARGS_ASSERT_YYERROR;
12848     int r = yyerror_pvn(s, strlen(s), 0);
12849     return r;
12850 }
12851 
12852 int
Perl_yyerror_pv(pTHX_ const char * const s,U32 flags)12853 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12854 {
12855     PERL_ARGS_ASSERT_YYERROR_PV;
12856     int r = yyerror_pvn(s, strlen(s), flags);
12857     return r;
12858 }
12859 
12860 int
Perl_yyerror_pvn(pTHX_ const char * const s,STRLEN len,U32 flags)12861 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12862 {
12863     const char *context = NULL;
12864     int contlen = -1;
12865     SV *msg;
12866     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12867     int yychar  = PL_parser->yychar;
12868 
12869     /* Output error message 's' with length 'len'.  'flags' are SV flags that
12870      * apply.  If the number of errors found is large enough, it abandons
12871      * parsing.  If 's' is NULL, there is no message, and it abandons
12872      * processing unconditionally */
12873 
12874     if (s != NULL) {
12875         if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp))
12876             sv_catpvs(where_sv, "at EOF");
12877         else if (   PL_oldoldbufptr
12878                  && PL_bufptr > PL_oldoldbufptr
12879                  && PL_bufptr - PL_oldoldbufptr < 200
12880                  && PL_oldoldbufptr != PL_oldbufptr
12881                  && PL_oldbufptr != PL_bufptr)
12882         {
12883             while (isSPACE(*PL_oldoldbufptr))
12884                 PL_oldoldbufptr++;
12885             context = PL_oldoldbufptr;
12886             contlen = PL_bufptr - PL_oldoldbufptr;
12887         }
12888         else if (  PL_oldbufptr
12889                 && PL_bufptr > PL_oldbufptr
12890                 && PL_bufptr - PL_oldbufptr < 200
12891                 && PL_oldbufptr != PL_bufptr)
12892         {
12893             while (isSPACE(*PL_oldbufptr))
12894                 PL_oldbufptr++;
12895             context = PL_oldbufptr;
12896             contlen = PL_bufptr - PL_oldbufptr;
12897         }
12898         else if (yychar > 255)
12899             sv_catpvs(where_sv, "next token ???");
12900         else if (yychar == YYEMPTY) {
12901             if (PL_lex_state == LEX_NORMAL)
12902                 sv_catpvs(where_sv, "at end of line");
12903             else if (PL_lex_inpat)
12904                 sv_catpvs(where_sv, "within pattern");
12905             else
12906                 sv_catpvs(where_sv, "within string");
12907         }
12908         else {
12909             sv_catpvs(where_sv, "next char ");
12910             if (yychar < 32)
12911                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12912             else if (isPRINT_LC(yychar)) {
12913                 const char string = yychar;
12914                 sv_catpvn(where_sv, &string, 1);
12915             }
12916             else
12917                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12918         }
12919         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12920         Perl_sv_catpvf(aTHX_ msg, " at %s line %" LINE_Tf ", ",
12921             OutCopFILE(PL_curcop),
12922             (PL_parser->preambling == NOLINE
12923                    ? CopLINE(PL_curcop)
12924                    : PL_parser->preambling));
12925         if (context)
12926             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12927                                  UTF8fARG(UTF, contlen, context));
12928         else
12929             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12930         if (   PL_multi_start < PL_multi_end
12931             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12932         {
12933             Perl_sv_catpvf(aTHX_ msg,
12934             "  (Might be a runaway multi-line %c%c string starting on"
12935             " line %" LINE_Tf ")\n",
12936                     (int)PL_multi_open,(int)PL_multi_close,(line_t)PL_multi_start);
12937             PL_multi_end = 0;
12938         }
12939         if (PL_in_eval & EVAL_WARNONLY) {
12940             PL_in_eval &= ~EVAL_WARNONLY;
12941             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12942         }
12943         else {
12944             qerror(msg);
12945         }
12946     }
12947     /* if there was no message then this is a yyquit(), which is actualy handled
12948      * by qerror() with a NULL argument */
12949     if (s == NULL)
12950         qerror(NULL);
12951 
12952     PL_in_my = 0;
12953     PL_in_my_stash = NULL;
12954     return 0;
12955 }
12956 
12957 STATIC char*
S_swallow_bom(pTHX_ U8 * s)12958 S_swallow_bom(pTHX_ U8 *s)
12959 {
12960     const STRLEN slen = SvCUR(PL_linestr);
12961 
12962     PERL_ARGS_ASSERT_SWALLOW_BOM;
12963 
12964     switch (s[0]) {
12965     case 0xFF:
12966         if (s[1] == 0xFE) {
12967             /* UTF-16 little-endian? (or UTF-32LE?) */
12968             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
12969                 /* diag_listed_as: Unsupported script encoding %s */
12970                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12971 #ifndef PERL_NO_UTF16_FILTER
12972 #ifdef DEBUGGING
12973             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12974 #endif
12975             s += 2;
12976             if (PL_bufend > (char*)s) {
12977                 s = add_utf16_textfilter(s, TRUE);
12978             }
12979 #else
12980             /* diag_listed_as: Unsupported script encoding %s */
12981             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12982 #endif
12983         }
12984         break;
12985     case 0xFE:
12986         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
12987 #ifndef PERL_NO_UTF16_FILTER
12988 #ifdef DEBUGGING
12989             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12990 #endif
12991             s += 2;
12992             if (PL_bufend > (char *)s) {
12993                 s = add_utf16_textfilter(s, FALSE);
12994             }
12995 #else
12996             /* diag_listed_as: Unsupported script encoding %s */
12997             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12998 #endif
12999         }
13000         break;
13001     case BOM_UTF8_FIRST_BYTE: {
13002         if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
13003 #ifdef DEBUGGING
13004             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13005 #endif
13006             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
13007         }
13008         break;
13009     }
13010     case 0:
13011         if (slen > 3) {
13012              if (s[1] == 0) {
13013                   if (s[2] == 0xFE && s[3] == 0xFF) {
13014                        /* UTF-32 big-endian */
13015                        /* diag_listed_as: Unsupported script encoding %s */
13016                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13017                   }
13018              }
13019              else if (s[2] == 0 && s[3] != 0) {
13020                   /* Leading bytes
13021                    * 00 xx 00 xx
13022                    * are a good indicator of UTF-16BE. */
13023 #ifndef PERL_NO_UTF16_FILTER
13024 #ifdef DEBUGGING
13025                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13026 #endif
13027                   s = add_utf16_textfilter(s, FALSE);
13028 #else
13029                   /* diag_listed_as: Unsupported script encoding %s */
13030                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13031 #endif
13032              }
13033         }
13034         break;
13035 
13036     default:
13037          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13038                   /* Leading bytes
13039                    * xx 00 xx 00
13040                    * are a good indicator of UTF-16LE. */
13041 #ifndef PERL_NO_UTF16_FILTER
13042 #ifdef DEBUGGING
13043               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13044 #endif
13045               s = add_utf16_textfilter(s, TRUE);
13046 #else
13047               /* diag_listed_as: Unsupported script encoding %s */
13048               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13049 #endif
13050          }
13051     }
13052     return (char*)s;
13053 }
13054 
13055 
13056 #ifndef PERL_NO_UTF16_FILTER
13057 static I32
S_utf16_textfilter(pTHX_ int idx,SV * sv,int maxlen)13058 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13059 {
13060     SV *const filter = FILTER_DATA(idx);
13061     /* We re-use this each time round, throwing the contents away before we
13062        return.  */
13063     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13064     SV *const utf8_buffer = filter;
13065     IV status = IoPAGE(filter);
13066     const bool reverse = cBOOL(IoLINES(filter));
13067     I32 retval;
13068 
13069     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13070 
13071     /* As we're automatically added, at the lowest level, and hence only called
13072        from this file, we can be sure that we're not called in block mode. Hence
13073        don't bother writing code to deal with block mode.  */
13074     if (maxlen) {
13075         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13076     }
13077     if (status < 0) {
13078         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
13079     }
13080     DEBUG_P(PerlIO_printf(Perl_debug_log,
13081                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
13082                           FPTR2DPTR(void *, S_utf16_textfilter),
13083                           reverse ? 'l' : 'b', idx, maxlen, status,
13084                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13085 
13086     while (1) {
13087         STRLEN chars;
13088         STRLEN have;
13089         Size_t newlen;
13090         U8 *end;
13091         /* First, look in our buffer of existing UTF-8 data:  */
13092         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13093 
13094         if (nl) {
13095             ++nl;
13096         } else if (status == 0) {
13097             /* EOF */
13098             IoPAGE(filter) = 0;
13099             nl = SvEND(utf8_buffer);
13100         }
13101         if (nl) {
13102             STRLEN got = nl - SvPVX(utf8_buffer);
13103             /* Did we have anything to append?  */
13104             retval = got != 0;
13105             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13106             /* Everything else in this code works just fine if SVp_POK isn't
13107                set.  This, however, needs it, and we need it to work, else
13108                we loop infinitely because the buffer is never consumed.  */
13109             sv_chop(utf8_buffer, nl);
13110             break;
13111         }
13112 
13113         /* OK, not a complete line there, so need to read some more UTF-16.
13114            Read an extra octect if the buffer currently has an odd number. */
13115         while (1) {
13116             if (status <= 0)
13117                 break;
13118             if (SvCUR(utf16_buffer) >= 2) {
13119                 /* Location of the high octet of the last complete code point.
13120                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13121                    *coupled* with all the benefits of partial reads and
13122                    endianness.  */
13123                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13124                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13125 
13126                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13127                     break;
13128                 }
13129 
13130                 /* We have the first half of a surrogate. Read more.  */
13131                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13132             }
13133 
13134             status = FILTER_READ(idx + 1, utf16_buffer,
13135                                  160 + (SvCUR(utf16_buffer) & 1));
13136             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
13137             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13138             if (status < 0) {
13139                 /* Error */
13140                 IoPAGE(filter) = status;
13141                 return status;
13142             }
13143         }
13144 
13145         /* 'chars' isn't quite the right name, as code points above 0xFFFF
13146          * require 4 bytes per char */
13147         chars = SvCUR(utf16_buffer) >> 1;
13148         have = SvCUR(utf8_buffer);
13149 
13150         /* Assume the worst case size as noted by the functions: twice the
13151          * number of input bytes */
13152         SvGROW(utf8_buffer, have + chars * 4 + 1);
13153 
13154         if (reverse) {
13155             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13156                                          (U8*)SvPVX_const(utf8_buffer) + have,
13157                                          chars * 2, &newlen);
13158         } else {
13159             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13160                                 (U8*)SvPVX_const(utf8_buffer) + have,
13161                                 chars * 2, &newlen);
13162         }
13163         SvCUR_set(utf8_buffer, have + newlen);
13164         *end = '\0';
13165 
13166         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13167            it's private to us, and utf16_to_utf8{,reversed} take a
13168            (pointer,length) pair, rather than a NUL-terminated string.  */
13169         if(SvCUR(utf16_buffer) & 1) {
13170             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13171             SvCUR_set(utf16_buffer, 1);
13172         } else {
13173             SvCUR_set(utf16_buffer, 0);
13174         }
13175     }
13176     DEBUG_P(PerlIO_printf(Perl_debug_log,
13177                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
13178                           status,
13179                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13180     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13181     return retval;
13182 }
13183 
13184 static U8 *
S_add_utf16_textfilter(pTHX_ U8 * const s,bool reversed)13185 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13186 {
13187     SV *filter = filter_add(S_utf16_textfilter, NULL);
13188 
13189     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13190 
13191     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13192     SvPVCLEAR(filter);
13193     IoLINES(filter) = reversed;
13194     IoPAGE(filter) = 1; /* Not EOF */
13195 
13196     /* Sadly, we have to return a valid pointer, come what may, so we have to
13197        ignore any error return from this.  */
13198     SvCUR_set(PL_linestr, 0);
13199     if (FILTER_READ(0, PL_linestr, 0)) {
13200         SvUTF8_on(PL_linestr);
13201     } else {
13202         SvUTF8_on(PL_linestr);
13203     }
13204     PL_bufend = SvEND(PL_linestr);
13205     return (U8*)SvPVX(PL_linestr);
13206 }
13207 #endif
13208 
13209 /*
13210 =for apidoc scan_vstring
13211 
13212 Returns a pointer to the next character after the parsed
13213 vstring, as well as updating the passed in sv.
13214 
13215 Function must be called like
13216 
13217         sv = sv_2mortal(newSV(5));
13218         s = scan_vstring(s,e,sv);
13219 
13220 where s and e are the start and end of the string.
13221 The sv should already be large enough to store the vstring
13222 passed in, for performance reasons.
13223 
13224 This function may croak if fatal warnings are enabled in the
13225 calling scope, hence the sv_2mortal in the example (to prevent
13226 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
13227 sv_2mortal.
13228 
13229 =cut
13230 */
13231 
13232 char *
Perl_scan_vstring(pTHX_ const char * s,const char * const e,SV * sv)13233 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13234 {
13235     const char *pos = s;
13236     const char *start = s;
13237 
13238     PERL_ARGS_ASSERT_SCAN_VSTRING;
13239 
13240     if (*pos == 'v') pos++;  /* get past 'v' */
13241     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13242         pos++;
13243     if ( *pos != '.') {
13244         /* this may not be a v-string if followed by => */
13245         const char *next = pos;
13246         while (next < e && isSPACE(*next))
13247             ++next;
13248         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13249             /* return string not v-string */
13250             sv_setpvn(sv,(char *)s,pos-s);
13251             return (char *)pos;
13252         }
13253     }
13254 
13255     if (!isALPHA(*pos)) {
13256         U8 tmpbuf[UTF8_MAXBYTES+1];
13257 
13258         if (*s == 'v')
13259             s++;  /* get past 'v' */
13260 
13261         SvPVCLEAR(sv);
13262 
13263         for (;;) {
13264             /* this is atoi() that tolerates underscores */
13265             U8 *tmpend;
13266             UV rev = 0;
13267             const char *end = pos;
13268             UV mult = 1;
13269             while (--end >= s) {
13270                 if (*end != '_') {
13271                     const UV orev = rev;
13272                     rev += (*end - '0') * mult;
13273                     mult *= 10;
13274                     if (orev > rev)
13275                         /* diag_listed_as: Integer overflow in %s number */
13276                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13277                                          "Integer overflow in decimal number");
13278                 }
13279             }
13280 
13281             /* Append native character for the rev point */
13282             tmpend = uvchr_to_utf8(tmpbuf, rev);
13283             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13284             if (!UVCHR_IS_INVARIANT(rev))
13285                  SvUTF8_on(sv);
13286             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13287                  s = ++pos;
13288             else {
13289                  s = pos;
13290                  break;
13291             }
13292             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13293                  pos++;
13294         }
13295         SvPOK_on(sv);
13296         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13297         SvRMAGICAL_on(sv);
13298     }
13299     return (char *)s;
13300 }
13301 
13302 int
Perl_keyword_plugin_standard(pTHX_ char * keyword_ptr,STRLEN keyword_len,OP ** op_ptr)13303 Perl_keyword_plugin_standard(pTHX_
13304         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13305 {
13306     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13307     PERL_UNUSED_CONTEXT;
13308     PERL_UNUSED_ARG(keyword_ptr);
13309     PERL_UNUSED_ARG(keyword_len);
13310     PERL_UNUSED_ARG(op_ptr);
13311     return KEYWORD_PLUGIN_DECLINE;
13312 }
13313 
13314 STRLEN
Perl_infix_plugin_standard(pTHX_ char * operator_ptr,STRLEN operator_len,struct Perl_custom_infix ** def)13315 Perl_infix_plugin_standard(pTHX_
13316         char *operator_ptr, STRLEN operator_len, struct Perl_custom_infix **def)
13317 {
13318     PERL_ARGS_ASSERT_INFIX_PLUGIN_STANDARD;
13319     PERL_UNUSED_CONTEXT;
13320     PERL_UNUSED_ARG(operator_ptr);
13321     PERL_UNUSED_ARG(operator_len);
13322     PERL_UNUSED_ARG(def);
13323     return 0;
13324 }
13325 
13326 /*
13327 =for apidoc_section $lexer
13328 =for apidoc wrap_keyword_plugin
13329 
13330 Puts a C function into the chain of keyword plugins.  This is the
13331 preferred way to manipulate the L</PL_keyword_plugin> variable.
13332 C<new_plugin> is a pointer to the C function that is to be added to the
13333 keyword plugin chain, and C<old_plugin_p> points to the storage location
13334 where a pointer to the next function in the chain will be stored.  The
13335 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
13336 while the value previously stored there is written to C<*old_plugin_p>.
13337 
13338 L</PL_keyword_plugin> is global to an entire process, and a module wishing
13339 to hook keyword parsing may find itself invoked more than once per
13340 process, typically in different threads.  To handle that situation, this
13341 function is idempotent.  The location C<*old_plugin_p> must initially
13342 (once per process) contain a null pointer.  A C variable of static
13343 duration (declared at file scope, typically also marked C<static> to give
13344 it internal linkage) will be implicitly initialised appropriately, if it
13345 does not have an explicit initialiser.  This function will only actually
13346 modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
13347 function is also thread safe on the small scale.  It uses appropriate
13348 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
13349 
13350 When this function is called, the function referenced by C<new_plugin>
13351 must be ready to be called, except for C<*old_plugin_p> being unfilled.
13352 In a threading situation, C<new_plugin> may be called immediately, even
13353 before this function has returned.  C<*old_plugin_p> will always be
13354 appropriately set before C<new_plugin> is called.  If C<new_plugin>
13355 decides not to do anything special with the identifier that it is given
13356 (which is the usual case for most calls to a keyword plugin), it must
13357 chain the plugin function referenced by C<*old_plugin_p>.
13358 
13359 Taken all together, XS code to install a keyword plugin should typically
13360 look something like this:
13361 
13362     static Perl_keyword_plugin_t next_keyword_plugin;
13363     static OP *my_keyword_plugin(pTHX_
13364         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13365     {
13366         if (memEQs(keyword_ptr, keyword_len,
13367                    "my_new_keyword")) {
13368             ...
13369         } else {
13370             return next_keyword_plugin(aTHX_
13371                 keyword_ptr, keyword_len, op_ptr);
13372         }
13373     }
13374     BOOT:
13375         wrap_keyword_plugin(my_keyword_plugin,
13376                             &next_keyword_plugin);
13377 
13378 Direct access to L</PL_keyword_plugin> should be avoided.
13379 
13380 =cut
13381 */
13382 
13383 void
Perl_wrap_keyword_plugin(pTHX_ Perl_keyword_plugin_t new_plugin,Perl_keyword_plugin_t * old_plugin_p)13384 Perl_wrap_keyword_plugin(pTHX_
13385     Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
13386 {
13387 
13388     PERL_UNUSED_CONTEXT;
13389     PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
13390     if (*old_plugin_p) return;
13391     KEYWORD_PLUGIN_MUTEX_LOCK;
13392     if (!*old_plugin_p) {
13393         *old_plugin_p = PL_keyword_plugin;
13394         PL_keyword_plugin = new_plugin;
13395     }
13396     KEYWORD_PLUGIN_MUTEX_UNLOCK;
13397 }
13398 
13399 /*
13400 =for apidoc wrap_infix_plugin
13401 
13402 B<NOTE:> This API exists entirely for the purpose of making the CPAN module
13403 C<XS::Parse::Infix> work. It is not expected that additional modules will make
13404 use of it; rather, that they should use C<XS::Parse::Infix> to provide parsing
13405 of new infix operators.
13406 
13407 Puts a C function into the chain of infix plugins.  This is the preferred
13408 way to manipulate the L</PL_infix_plugin> variable.  C<new_plugin> is a
13409 pointer to the C function that is to be added to the infix plugin chain, and
13410 C<old_plugin_p> points to a storage location where a pointer to the next
13411 function in the chain will be stored.  The value of C<new_plugin> is written
13412 into the L</PL_infix_plugin> variable, while the value previously stored there
13413 is written to C<*old_plugin_p>.
13414 
13415 Direct access to L</PL_infix_plugin> should be avoided.
13416 
13417 =cut
13418 */
13419 
13420 void
Perl_wrap_infix_plugin(pTHX_ Perl_infix_plugin_t new_plugin,Perl_infix_plugin_t * old_plugin_p)13421 Perl_wrap_infix_plugin(pTHX_
13422     Perl_infix_plugin_t new_plugin, Perl_infix_plugin_t *old_plugin_p)
13423 {
13424 
13425     PERL_UNUSED_CONTEXT;
13426     PERL_ARGS_ASSERT_WRAP_INFIX_PLUGIN;
13427     if (*old_plugin_p) return;
13428     /* We use the same mutex as for PL_keyword_plugin as it's so rare either
13429      * of them is actually updated; no need for a dedicated one each */
13430     KEYWORD_PLUGIN_MUTEX_LOCK;
13431     if (!*old_plugin_p) {
13432         *old_plugin_p = PL_infix_plugin;
13433         PL_infix_plugin = new_plugin;
13434     }
13435     KEYWORD_PLUGIN_MUTEX_UNLOCK;
13436 }
13437 
13438 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
13439 static void
S_parse_recdescent(pTHX_ int gramtype,I32 fakeeof)13440 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
13441 {
13442     SAVEI32(PL_lex_brackets);
13443     if (PL_lex_brackets > 100)
13444         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
13445     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
13446     SAVEI32(PL_lex_allbrackets);
13447     PL_lex_allbrackets = 0;
13448     SAVEI8(PL_lex_fakeeof);
13449     PL_lex_fakeeof = (U8)fakeeof;
13450     if(yyparse(gramtype) && !PL_parser->error_count)
13451         qerror(Perl_mess(aTHX_ "Parse error"));
13452 }
13453 
13454 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
13455 static OP *
S_parse_recdescent_for_op(pTHX_ int gramtype,I32 fakeeof)13456 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
13457 {
13458     OP *o;
13459     ENTER;
13460     SAVEVPTR(PL_eval_root);
13461     PL_eval_root = NULL;
13462     parse_recdescent(gramtype, fakeeof);
13463     o = PL_eval_root;
13464     LEAVE;
13465     return o;
13466 }
13467 
13468 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
13469 static OP *
S_parse_expr(pTHX_ I32 fakeeof,U32 flags)13470 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
13471 {
13472     OP *exprop;
13473     if (flags & ~PARSE_OPTIONAL)
13474         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
13475     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
13476     if (!exprop && !(flags & PARSE_OPTIONAL)) {
13477         if (!PL_parser->error_count)
13478             qerror(Perl_mess(aTHX_ "Parse error"));
13479         exprop = newOP(OP_NULL, 0);
13480     }
13481     return exprop;
13482 }
13483 
13484 /*
13485 =for apidoc parse_arithexpr
13486 
13487 Parse a Perl arithmetic expression.  This may contain operators of precedence
13488 down to the bit shift operators.  The expression must be followed (and thus
13489 terminated) either by a comparison or lower-precedence operator or by
13490 something that would normally terminate an expression such as semicolon.
13491 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13492 otherwise it is mandatory.  It is up to the caller to ensure that the
13493 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13494 the source of the code to be parsed and the lexical context for the
13495 expression.
13496 
13497 The op tree representing the expression is returned.  If an optional
13498 expression is absent, a null pointer is returned, otherwise the pointer
13499 will be non-null.
13500 
13501 If an error occurs in parsing or compilation, in most cases a valid op
13502 tree is returned anyway.  The error is reflected in the parser state,
13503 normally resulting in a single exception at the top level of parsing
13504 which covers all the compilation errors that occurred.  Some compilation
13505 errors, however, will throw an exception immediately.
13506 
13507 =for apidoc Amnh||PARSE_OPTIONAL
13508 
13509 =cut
13510 
13511 */
13512 
13513 OP *
Perl_parse_arithexpr(pTHX_ U32 flags)13514 Perl_parse_arithexpr(pTHX_ U32 flags)
13515 {
13516     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
13517 }
13518 
13519 /*
13520 =for apidoc parse_termexpr
13521 
13522 Parse a Perl term expression.  This may contain operators of precedence
13523 down to the assignment operators.  The expression must be followed (and thus
13524 terminated) either by a comma or lower-precedence operator or by
13525 something that would normally terminate an expression such as semicolon.
13526 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13527 otherwise it is mandatory.  It is up to the caller to ensure that the
13528 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13529 the source of the code to be parsed and the lexical context for the
13530 expression.
13531 
13532 The op tree representing the expression is returned.  If an optional
13533 expression is absent, a null pointer is returned, otherwise the pointer
13534 will be non-null.
13535 
13536 If an error occurs in parsing or compilation, in most cases a valid op
13537 tree is returned anyway.  The error is reflected in the parser state,
13538 normally resulting in a single exception at the top level of parsing
13539 which covers all the compilation errors that occurred.  Some compilation
13540 errors, however, will throw an exception immediately.
13541 
13542 =cut
13543 */
13544 
13545 OP *
Perl_parse_termexpr(pTHX_ U32 flags)13546 Perl_parse_termexpr(pTHX_ U32 flags)
13547 {
13548     return parse_expr(LEX_FAKEEOF_COMMA, flags);
13549 }
13550 
13551 /*
13552 =for apidoc parse_listexpr
13553 
13554 Parse a Perl list expression.  This may contain operators of precedence
13555 down to the comma operator.  The expression must be followed (and thus
13556 terminated) either by a low-precedence logic operator such as C<or> or by
13557 something that would normally terminate an expression such as semicolon.
13558 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13559 otherwise it is mandatory.  It is up to the caller to ensure that the
13560 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13561 the source of the code to be parsed and the lexical context for the
13562 expression.
13563 
13564 The op tree representing the expression is returned.  If an optional
13565 expression is absent, a null pointer is returned, otherwise the pointer
13566 will be non-null.
13567 
13568 If an error occurs in parsing or compilation, in most cases a valid op
13569 tree is returned anyway.  The error is reflected in the parser state,
13570 normally resulting in a single exception at the top level of parsing
13571 which covers all the compilation errors that occurred.  Some compilation
13572 errors, however, will throw an exception immediately.
13573 
13574 =cut
13575 */
13576 
13577 OP *
Perl_parse_listexpr(pTHX_ U32 flags)13578 Perl_parse_listexpr(pTHX_ U32 flags)
13579 {
13580     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
13581 }
13582 
13583 /*
13584 =for apidoc parse_fullexpr
13585 
13586 Parse a single complete Perl expression.  This allows the full
13587 expression grammar, including the lowest-precedence operators such
13588 as C<or>.  The expression must be followed (and thus terminated) by a
13589 token that an expression would normally be terminated by: end-of-file,
13590 closing bracketing punctuation, semicolon, or one of the keywords that
13591 signals a postfix expression-statement modifier.  If C<flags> has the
13592 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
13593 mandatory.  It is up to the caller to ensure that the dynamic parser
13594 state (L</PL_parser> et al) is correctly set to reflect the source of
13595 the code to be parsed and the lexical context for the expression.
13596 
13597 The op tree representing the expression is returned.  If an optional
13598 expression is absent, a null pointer is returned, otherwise the pointer
13599 will be non-null.
13600 
13601 If an error occurs in parsing or compilation, in most cases a valid op
13602 tree is returned anyway.  The error is reflected in the parser state,
13603 normally resulting in a single exception at the top level of parsing
13604 which covers all the compilation errors that occurred.  Some compilation
13605 errors, however, will throw an exception immediately.
13606 
13607 =cut
13608 */
13609 
13610 OP *
Perl_parse_fullexpr(pTHX_ U32 flags)13611 Perl_parse_fullexpr(pTHX_ U32 flags)
13612 {
13613     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
13614 }
13615 
13616 /*
13617 =for apidoc parse_block
13618 
13619 Parse a single complete Perl code block.  This consists of an opening
13620 brace, a sequence of statements, and a closing brace.  The block
13621 constitutes a lexical scope, so C<my> variables and various compile-time
13622 effects can be contained within it.  It is up to the caller to ensure
13623 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13624 reflect the source of the code to be parsed and the lexical context for
13625 the statement.
13626 
13627 The op tree representing the code block is returned.  This is always a
13628 real op, never a null pointer.  It will normally be a C<lineseq> list,
13629 including C<nextstate> or equivalent ops.  No ops to construct any kind
13630 of runtime scope are included by virtue of it being a block.
13631 
13632 If an error occurs in parsing or compilation, in most cases a valid op
13633 tree (most likely null) is returned anyway.  The error is reflected in
13634 the parser state, normally resulting in a single exception at the top
13635 level of parsing which covers all the compilation errors that occurred.
13636 Some compilation errors, however, will throw an exception immediately.
13637 
13638 The C<flags> parameter is reserved for future use, and must always
13639 be zero.
13640 
13641 =cut
13642 */
13643 
13644 OP *
Perl_parse_block(pTHX_ U32 flags)13645 Perl_parse_block(pTHX_ U32 flags)
13646 {
13647     if (flags)
13648         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13649     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13650 }
13651 
13652 /*
13653 =for apidoc parse_barestmt
13654 
13655 Parse a single unadorned Perl statement.  This may be a normal imperative
13656 statement or a declaration that has compile-time effect.  It does not
13657 include any label or other affixture.  It is up to the caller to ensure
13658 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13659 reflect the source of the code to be parsed and the lexical context for
13660 the statement.
13661 
13662 The op tree representing the statement is returned.  This may be a
13663 null pointer if the statement is null, for example if it was actually
13664 a subroutine definition (which has compile-time side effects).  If not
13665 null, it will be ops directly implementing the statement, suitable to
13666 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
13667 equivalent op (except for those embedded in a scope contained entirely
13668 within the statement).
13669 
13670 If an error occurs in parsing or compilation, in most cases a valid op
13671 tree (most likely null) is returned anyway.  The error is reflected in
13672 the parser state, normally resulting in a single exception at the top
13673 level of parsing which covers all the compilation errors that occurred.
13674 Some compilation errors, however, will throw an exception immediately.
13675 
13676 The C<flags> parameter is reserved for future use, and must always
13677 be zero.
13678 
13679 =cut
13680 */
13681 
13682 OP *
Perl_parse_barestmt(pTHX_ U32 flags)13683 Perl_parse_barestmt(pTHX_ U32 flags)
13684 {
13685     if (flags)
13686         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13687     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13688 }
13689 
13690 /*
13691 =for apidoc parse_label
13692 
13693 Parse a single label, possibly optional, of the type that may prefix a
13694 Perl statement.  It is up to the caller to ensure that the dynamic parser
13695 state (L</PL_parser> et al) is correctly set to reflect the source of
13696 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13697 label is optional, otherwise it is mandatory.
13698 
13699 The name of the label is returned in the form of a fresh scalar.  If an
13700 optional label is absent, a null pointer is returned.
13701 
13702 If an error occurs in parsing, which can only occur if the label is
13703 mandatory, a valid label is returned anyway.  The error is reflected in
13704 the parser state, normally resulting in a single exception at the top
13705 level of parsing which covers all the compilation errors that occurred.
13706 
13707 =cut
13708 */
13709 
13710 SV *
Perl_parse_label(pTHX_ U32 flags)13711 Perl_parse_label(pTHX_ U32 flags)
13712 {
13713     if (flags & ~PARSE_OPTIONAL)
13714         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13715     if (PL_nexttoke) {
13716         PL_parser->yychar = yylex();
13717         if (PL_parser->yychar == LABEL) {
13718             SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13719             PL_parser->yychar = YYEMPTY;
13720             cSVOPx(pl_yylval.opval)->op_sv = NULL;
13721             op_free(pl_yylval.opval);
13722             return labelsv;
13723         } else {
13724             yyunlex();
13725             goto no_label;
13726         }
13727     } else {
13728         char *s, *t;
13729         STRLEN wlen, bufptr_pos;
13730         lex_read_space(0);
13731         t = s = PL_bufptr;
13732         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13733             goto no_label;
13734         t = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen, FALSE);
13735         if (word_takes_any_delimiter(s, wlen))
13736             goto no_label;
13737         bufptr_pos = s - SvPVX(PL_linestr);
13738         PL_bufptr = t;
13739         lex_read_space(LEX_KEEP_PREVIOUS);
13740         t = PL_bufptr;
13741         s = SvPVX(PL_linestr) + bufptr_pos;
13742         if (t[0] == ':' && t[1] != ':') {
13743             PL_oldoldbufptr = PL_oldbufptr;
13744             PL_oldbufptr = s;
13745             PL_bufptr = t+1;
13746             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13747         } else {
13748             PL_bufptr = s;
13749             no_label:
13750             if (flags & PARSE_OPTIONAL) {
13751                 return NULL;
13752             } else {
13753                 qerror(Perl_mess(aTHX_ "Parse error"));
13754                 return newSVpvs("x");
13755             }
13756         }
13757     }
13758 }
13759 
13760 /*
13761 =for apidoc parse_fullstmt
13762 
13763 Parse a single complete Perl statement.  This may be a normal imperative
13764 statement or a declaration that has compile-time effect, and may include
13765 optional labels.  It is up to the caller to ensure that the dynamic
13766 parser state (L</PL_parser> et al) is correctly set to reflect the source
13767 of the code to be parsed and the lexical context for the statement.
13768 
13769 The op tree representing the statement is returned.  This may be a
13770 null pointer if the statement is null, for example if it was actually
13771 a subroutine definition (which has compile-time side effects).  If not
13772 null, it will be the result of a L</newSTATEOP> call, normally including
13773 a C<nextstate> or equivalent op.
13774 
13775 If an error occurs in parsing or compilation, in most cases a valid op
13776 tree (most likely null) is returned anyway.  The error is reflected in
13777 the parser state, normally resulting in a single exception at the top
13778 level of parsing which covers all the compilation errors that occurred.
13779 Some compilation errors, however, will throw an exception immediately.
13780 
13781 The C<flags> parameter is reserved for future use, and must always
13782 be zero.
13783 
13784 =cut
13785 */
13786 
13787 OP *
Perl_parse_fullstmt(pTHX_ U32 flags)13788 Perl_parse_fullstmt(pTHX_ U32 flags)
13789 {
13790     if (flags)
13791         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13792     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13793 }
13794 
13795 /*
13796 =for apidoc parse_stmtseq
13797 
13798 Parse a sequence of zero or more Perl statements.  These may be normal
13799 imperative statements, including optional labels, or declarations
13800 that have compile-time effect, or any mixture thereof.  The statement
13801 sequence ends when a closing brace or end-of-file is encountered in a
13802 place where a new statement could have validly started.  It is up to
13803 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13804 is correctly set to reflect the source of the code to be parsed and the
13805 lexical context for the statements.
13806 
13807 The op tree representing the statement sequence is returned.  This may
13808 be a null pointer if the statements were all null, for example if there
13809 were no statements or if there were only subroutine definitions (which
13810 have compile-time side effects).  If not null, it will be a C<lineseq>
13811 list, normally including C<nextstate> or equivalent ops.
13812 
13813 If an error occurs in parsing or compilation, in most cases a valid op
13814 tree is returned anyway.  The error is reflected in the parser state,
13815 normally resulting in a single exception at the top level of parsing
13816 which covers all the compilation errors that occurred.  Some compilation
13817 errors, however, will throw an exception immediately.
13818 
13819 The C<flags> parameter is reserved for future use, and must always
13820 be zero.
13821 
13822 =cut
13823 */
13824 
13825 OP *
Perl_parse_stmtseq(pTHX_ U32 flags)13826 Perl_parse_stmtseq(pTHX_ U32 flags)
13827 {
13828     OP *stmtseqop;
13829     I32 c;
13830     if (flags)
13831         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13832     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13833     c = lex_peek_unichar(0);
13834     if (c != -1 && c != /*{*/'}')
13835         qerror(Perl_mess(aTHX_ "Parse error"));
13836     return stmtseqop;
13837 }
13838 
13839 /*
13840 =for apidoc parse_subsignature
13841 
13842 Parse a subroutine signature declaration. This is the contents of the
13843 parentheses following a named or anonymous subroutine declaration when the
13844 C<signatures> feature is enabled. Note that this function neither expects
13845 nor consumes the opening and closing parentheses around the signature; it
13846 is the caller's job to handle these.
13847 
13848 This function must only be called during parsing of a subroutine; after
13849 L</start_subparse> has been called. It might allocate lexical variables on
13850 the pad for the current subroutine.
13851 
13852 The op tree to unpack the arguments from the stack at runtime is returned.
13853 This op tree should appear at the beginning of the compiled function. The
13854 caller may wish to use L</op_append_list> to build their function body
13855 after it, or splice it together with the body before calling L</newATTRSUB>.
13856 
13857 The C<flags> parameter is reserved for future use, and must always
13858 be zero.
13859 
13860 =cut
13861 */
13862 
13863 OP *
Perl_parse_subsignature(pTHX_ U32 flags)13864 Perl_parse_subsignature(pTHX_ U32 flags)
13865 {
13866     if (flags)
13867         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13868     return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13869 }
13870 
13871 /*
13872  * ex: set ts=8 sts=4 sw=4 et:
13873  */
13874