xref: /openbsd/gnu/usr.bin/perl/toke.c (revision 3d61058a)
1 /*    toke.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  *  'It all comes from here, the stench and the peril.'    --Frodo
13  *
14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15  */
16 
17 /*
18  * This file is the lexer for Perl.  It's closely linked to the
19  * parser, perly.y.
20  *
21  * The main routine is yylex(), which returns the next token.
22  */
23 
24 /*
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
27 
28 =for apidoc AmnU|yy_parser *|PL_parser
29 
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress.  The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
34 
35 =cut
36 */
37 
38 #include "EXTERN.h"
39 #define PERL_IN_TOKE_C
40 #include "perl.h"
41 #include "invlist_inline.h"
42 
43 #define new_constant(a,b,c,d,e,f,g, h)	\
44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
45 
46 #define pl_yylval	(PL_parser->yylval)
47 
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets		(PL_parser->lex_brackets)
50 #define PL_lex_allbrackets	(PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof		(PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack	(PL_parser->lex_brackstack)
53 #define PL_lex_casemods		(PL_parser->lex_casemods)
54 #define PL_lex_casestack        (PL_parser->lex_casestack)
55 #define PL_lex_dojoin		(PL_parser->lex_dojoin)
56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
57 #define PL_lex_inpat		(PL_parser->lex_inpat)
58 #define PL_lex_inwhat		(PL_parser->lex_inwhat)
59 #define PL_lex_op		(PL_parser->lex_op)
60 #define PL_lex_repl		(PL_parser->lex_repl)
61 #define PL_lex_starts		(PL_parser->lex_starts)
62 #define PL_lex_stuff		(PL_parser->lex_stuff)
63 #define PL_multi_start		(PL_parser->multi_start)
64 #define PL_multi_open		(PL_parser->multi_open)
65 #define PL_multi_close		(PL_parser->multi_close)
66 #define PL_preambled		(PL_parser->preambled)
67 #define PL_linestr		(PL_parser->linestr)
68 #define PL_expect		(PL_parser->expect)
69 #define PL_copline		(PL_parser->copline)
70 #define PL_bufptr		(PL_parser->bufptr)
71 #define PL_oldbufptr		(PL_parser->oldbufptr)
72 #define PL_oldoldbufptr		(PL_parser->oldoldbufptr)
73 #define PL_linestart		(PL_parser->linestart)
74 #define PL_bufend		(PL_parser->bufend)
75 #define PL_last_uni		(PL_parser->last_uni)
76 #define PL_last_lop		(PL_parser->last_lop)
77 #define PL_last_lop_op		(PL_parser->last_lop_op)
78 #define PL_lex_state		(PL_parser->lex_state)
79 #define PL_rsfp			(PL_parser->rsfp)
80 #define PL_rsfp_filters		(PL_parser->rsfp_filters)
81 #define PL_in_my		(PL_parser->in_my)
82 #define PL_in_my_stash		(PL_parser->in_my_stash)
83 #define PL_tokenbuf		(PL_parser->tokenbuf)
84 #define PL_multi_end		(PL_parser->multi_end)
85 #define PL_error_count		(PL_parser->error_count)
86 
87 #  define PL_nexttoke		(PL_parser->nexttoke)
88 #  define PL_nexttype		(PL_parser->nexttype)
89 #  define PL_nextval		(PL_parser->nextval)
90 
91 
92 #define SvEVALED(sv) \
93     (SvTYPE(sv) >= SVt_PVNV \
94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
95 
96 static const char ident_too_long[] = "Identifier too long";
97 static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
98 
99 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
100 
101 #define XENUMMASK  0x3f
102 #define XFAKEEOF   0x40
103 #define XFAKEBRACK 0x80
104 
105 #ifdef USE_UTF8_SCRIPTS
106 #   define UTF cBOOL(!IN_BYTES)
107 #else
108 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
109 #endif
110 
111 /* The maximum number of characters preceding the unrecognized one to display */
112 #define UNRECOGNIZED_PRECEDE_COUNT 10
113 
114 /* In variables named $^X, these are the legal values for X.
115  * 1999-02-27 mjd-perl-patch@plover.com */
116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
117 
118 /* Non-identifier plugin infix operators are allowed any printing character
119  * except spaces, digits, or identifier chars
120  */
121 #define isPLUGINFIX(c) (c && !isSPACE(c) && !isDIGIT(c) && !isALPHA(c))
122 /* Plugin infix operators may not begin with a quote symbol */
123 #define isPLUGINFIX_FIRST(c) (isPLUGINFIX(c) && c != '"' && c != '\'')
124 
125 #define PLUGINFIX_IS_ENABLED  UNLIKELY(PL_infix_plugin != &Perl_infix_plugin_standard)
126 
127 #define SPACE_OR_TAB(c) isBLANK_A(c)
128 
129 #define HEXFP_PEEK(s)     \
130     (((s[0] == '.') && \
131       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
132      isALPHA_FOLD_EQ(s[0], 'p'))
133 
134 /* LEX_* are values for PL_lex_state, the state of the lexer.
135  * They are arranged oddly so that the guard on the switch statement
136  * can get by with a single comparison (if the compiler is smart enough).
137  *
138  * These values refer to the various states within a sublex parse,
139  * i.e. within a double quotish string
140  */
141 
142 /* #define LEX_NOTPARSING		11 is done in perl.h. */
143 
144 #define LEX_NORMAL		10 /* normal code (ie not within "...")     */
145 #define LEX_INTERPNORMAL	 9 /* code within a string, eg "$foo[$x+1]" */
146 #define LEX_INTERPCASEMOD	 8 /* expecting a \U, \Q or \E etc          */
147 #define LEX_INTERPPUSH		 7 /* starting a new sublex parse level     */
148 #define LEX_INTERPSTART		 6 /* expecting the start of a $var         */
149 
150                                    /* at end of code, eg "$x" followed by:  */
151 #define LEX_INTERPEND		 5 /* ... eg not one of [, { or ->          */
152 #define LEX_INTERPENDMAYBE	 4 /* ... eg one of [, { or ->              */
153 
154 #define LEX_INTERPCONCAT	 3 /* expecting anything, eg at start of
155                                         string or after \E, $foo, etc       */
156 #define LEX_INTERPCONST		 2 /* NOT USED */
157 #define LEX_FORMLINE		 1 /* expecting a format line               */
158 
159 /* returned to yyl_try() to request it to retry the parse loop, expected to only
160    be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
161    can also return it.
162 
163    yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
164    other token values are 258 or higher (see perly.h), so -1 should be
165    a safe value here.
166 */
167 #define YYL_RETRY (-1)
168 
169 #ifdef DEBUGGING
170 static const char* const lex_state_names[] = {
171     "KNOWNEXT",
172     "FORMLINE",
173     "INTERPCONST",
174     "INTERPCONCAT",
175     "INTERPENDMAYBE",
176     "INTERPEND",
177     "INTERPSTART",
178     "INTERPPUSH",
179     "INTERPCASEMOD",
180     "INTERPNORMAL",
181     "NORMAL"
182 };
183 #endif
184 
185 #include "keywords.h"
186 
187 /* CLINE is a macro that ensures PL_copline has a sane value */
188 
189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
190 
191 /*
192  * Convenience functions to return different tokens and prime the
193  * lexer for the next token.  They all take an argument.
194  *
195  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
196  * OPERATOR     : generic operator
197  * AOPERATOR    : assignment operator
198  * PREBLOCK     : beginning the block after an if, while, foreach, ...
199  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
200  * PREREF       : *EXPR where EXPR is not a simple identifier
201  * TERM         : expression term
202  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
203  * LOOPX        : loop exiting command (goto, last, dump, etc)
204  * FTST         : file test operator
205  * FUN0         : zero-argument function
206  * FUN0OP       : zero-argument function, with its op created in this file
207  * FUN1         : not used, except for not, which isn't a UNIOP
208  * BOop         : bitwise or or xor
209  * BAop         : bitwise and
210  * BCop         : bitwise complement
211  * SHop         : shift operator
212  * PWop         : power operator
213  * PMop         : pattern-matching operator
214  * Aop          : addition-level operator
215  * AopNOASSIGN  : addition-level operator that is never part of .=
216  * Mop          : multiplication-level operator
217  * ChEop        : chaining equality-testing operator
218  * NCEop        : non-chaining comparison operator at equality precedence
219  * ChRop        : chaining relational operator <= != gt
220  * NCRop        : non-chaining relational operator isa
221  *
222  * Also see LOP and lop() below.
223  */
224 
225 #ifdef DEBUGGING /* Serve -DT. */
226 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
227 #else
228 #   define REPORT(retval) (retval)
229 #endif
230 
231 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
232 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
233 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
234 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
235 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
236 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
237 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
238 #define PHASERBLOCK(f) return (pl_yylval.ival=f, PL_expect = XBLOCK, PL_bufptr = s, REPORT((int)PHASER))
239 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
240 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
241                          pl_yylval.ival=f, \
242                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
243                          REPORT((int)LOOPEX))
244 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
245 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
246 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
247 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
248 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
249 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
250 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
251                        REPORT(PERLY_TILDE)
252 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
253 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
254 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
255 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
256 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
257 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
258 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
259 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
260 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
261 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
262 
263 /* This bit of chicanery makes a unary function followed by
264  * a parenthesis into a function with one argument, highest precedence.
265  * The UNIDOR macro is for unary functions that can be followed by the //
266  * operator (such as C<shift // 0>).
267  */
268 #define UNI3(f,x,have_x) { \
269         pl_yylval.ival = f; \
270         if (have_x) PL_expect = x; \
271         PL_bufptr = s; \
272         PL_last_uni = PL_oldbufptr; \
273         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
274         if (*s == '(') \
275             return REPORT( (int)FUNC1 ); \
276         s = skipspace(s); \
277         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
278         }
279 #define UNI(f)    UNI3(f,XTERM,1)
280 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
281 #define UNIPROTO(f,optional) { \
282         if (optional) PL_last_uni = PL_oldbufptr; \
283         OPERATOR(f); \
284         }
285 
286 #define UNIBRACK(f) UNI3(f,0,0)
287 
288 /* return has special case parsing.
289  *
290  * List operators have low precedence. Functions have high precedence.
291  * Every built in, *except return*, if written with () around its arguments, is
292  * parsed as a function. Hence every other list built in:
293  *
294  * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
295  * 429
296  * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
297  * 639
298  * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
299  * Useless use of a constant (2) in void context at -e line 1.
300  * Useless use of a constant (4) in void context at -e line 1.
301  *
302  * $
303  *
304  * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
305  * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
306  *
307  * Whereas return:
308  *
309  * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
310  * 2
311  * 4
312  * 9
313  * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
314  * Useless use of a constant (2) in void context at -e line 1.
315  * Useless use of a constant (4) in void context at -e line 1.
316  * 9
317  * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
318  * Useless use of a constant (2) in void context at -e line 1.
319  * Useless use of a constant (4) in void context at -e line 1.
320  * 9
321  * $
322  *
323  * and:
324  * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
325  * 2
326  * 4
327  * 6
328  *
329  * This last example is what we expect, but it's clearly inconsistent with how
330  * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently
331  * followed.
332  *
333  *
334  * Perl 3 attempted to be consistent:
335  *
336  *   The rules are more consistent about where parens are needed and
337  *   where they are not.  In particular, unary operators and list operators now
338  *   behave like functions if they're called like functions.
339  *
340  * However, the behaviour for return was reverted to the "old" parsing with
341  * patches 9-12:
342  *
343  *   The construct
344  *   return (1,2,3);
345  *   did not do what was expected, since return was swallowing the
346  *   parens in order to consider itself a function.  The solution,
347  *   since return never wants any trailing expression such as
348  *   return (1,2,3) + 2;
349  *   is to simply make return an exception to the paren-makes-a-function
350  *   rule, and treat it the way it always was, so that it doesn't
351  *   strip the parens.
352  *
353  * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
354  * LOP(OP_RETURN, XTERM);
355  *
356  * and constructs such as
357  *
358  *     return (Internals::V())[2]
359  *
360  * turn into syntax errors
361  */
362 
363 #define OLDLOP(f) \
364         do { \
365             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
366                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
367             pl_yylval.ival = (f); \
368             PL_expect = XTERM; \
369             PL_bufptr = s; \
370             return (int)LSTOP; \
371         } while(0)
372 
373 #define COPLINE_INC_WITH_HERELINES		    \
374     STMT_START {				     \
375         CopLINE_inc(PL_curcop);			      \
376         if (PL_parser->herelines)		       \
377             CopLINE(PL_curcop) += PL_parser->herelines, \
378             PL_parser->herelines = 0;			 \
379     } STMT_END
380 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
381  * is no sublex_push to follow. */
382 #define COPLINE_SET_FROM_MULTI_END	      \
383     STMT_START {			       \
384         CopLINE_set(PL_curcop, PL_multi_end);	\
385         if (PL_multi_end != PL_multi_start)	 \
386             PL_parser->herelines = 0;		  \
387     } STMT_END
388 
389 
390 /* A file-local structure for passing around information about subroutines and
391  * related definable words */
392 struct code {
393     SV *sv;
394     CV *cv;
395     GV *gv, **gvp;
396     OP *rv2cv_op;
397     PADOFFSET off;
398     bool lex;
399 };
400 
401 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
402 
403 #ifdef DEBUGGING
404 
405 /* how to interpret the pl_yylval associated with the token */
406 enum token_type {
407     TOKENTYPE_NONE,
408     TOKENTYPE_IVAL,
409     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
410     TOKENTYPE_PVAL,
411     TOKENTYPE_OPVAL
412 };
413 
414 #define DEBUG_TOKEN(Type, Name)                                         \
415     { Name, TOKENTYPE_##Type, #Name }
416 
417 static struct debug_tokens {
418     const int token;
419     enum token_type type;
420     const char *name;
421 } const debug_tokens[] =
422 {
423     DEBUG_TOKEN (OPNUM, ADDOP),
424     DEBUG_TOKEN (NONE,  ANDAND),
425     DEBUG_TOKEN (NONE,  ANDOP),
426     DEBUG_TOKEN (NONE,  ARROW),
427     DEBUG_TOKEN (OPNUM, ASSIGNOP),
428     DEBUG_TOKEN (OPNUM, BITANDOP),
429     DEBUG_TOKEN (OPNUM, BITOROP),
430     DEBUG_TOKEN (OPNUM, CHEQOP),
431     DEBUG_TOKEN (OPNUM, CHRELOP),
432     DEBUG_TOKEN (NONE,  COLONATTR),
433     DEBUG_TOKEN (NONE,  DOLSHARP),
434     DEBUG_TOKEN (NONE,  DORDOR),
435     DEBUG_TOKEN (IVAL,  DOTDOT),
436     DEBUG_TOKEN (NONE,  FORMLBRACK),
437     DEBUG_TOKEN (NONE,  FORMRBRACK),
438     DEBUG_TOKEN (OPNUM, FUNC),
439     DEBUG_TOKEN (OPNUM, FUNC0),
440     DEBUG_TOKEN (OPVAL, FUNC0OP),
441     DEBUG_TOKEN (OPVAL, FUNC0SUB),
442     DEBUG_TOKEN (OPNUM, FUNC1),
443     DEBUG_TOKEN (NONE,  HASHBRACK),
444     DEBUG_TOKEN (IVAL,  KW_CATCH),
445     DEBUG_TOKEN (IVAL,  KW_CLASS),
446     DEBUG_TOKEN (IVAL,  KW_CONTINUE),
447     DEBUG_TOKEN (IVAL,  KW_DEFAULT),
448     DEBUG_TOKEN (IVAL,  KW_DO),
449     DEBUG_TOKEN (IVAL,  KW_ELSE),
450     DEBUG_TOKEN (IVAL,  KW_ELSIF),
451     DEBUG_TOKEN (IVAL,  KW_FIELD),
452     DEBUG_TOKEN (IVAL,  KW_GIVEN),
453     DEBUG_TOKEN (IVAL,  KW_FOR),
454     DEBUG_TOKEN (IVAL,  KW_FORMAT),
455     DEBUG_TOKEN (IVAL,  KW_IF),
456     DEBUG_TOKEN (IVAL,  KW_LOCAL),
457     DEBUG_TOKEN (IVAL,  KW_METHOD_anon),
458     DEBUG_TOKEN (IVAL,  KW_METHOD_named),
459     DEBUG_TOKEN (IVAL,  KW_MY),
460     DEBUG_TOKEN (IVAL,  KW_PACKAGE),
461     DEBUG_TOKEN (IVAL,  KW_REQUIRE),
462     DEBUG_TOKEN (IVAL,  KW_SUB_anon),
463     DEBUG_TOKEN (IVAL,  KW_SUB_anon_sig),
464     DEBUG_TOKEN (IVAL,  KW_SUB_named),
465     DEBUG_TOKEN (IVAL,  KW_SUB_named_sig),
466     DEBUG_TOKEN (IVAL,  KW_TRY),
467     DEBUG_TOKEN (IVAL,  KW_USE_or_NO),
468     DEBUG_TOKEN (IVAL,  KW_UNLESS),
469     DEBUG_TOKEN (IVAL,  KW_UNTIL),
470     DEBUG_TOKEN (IVAL,  KW_WHEN),
471     DEBUG_TOKEN (IVAL,  KW_WHILE),
472     DEBUG_TOKEN (OPVAL, LABEL),
473     DEBUG_TOKEN (OPNUM, LOOPEX),
474     DEBUG_TOKEN (OPNUM, LSTOP),
475     DEBUG_TOKEN (OPVAL, LSTOPSUB),
476     DEBUG_TOKEN (OPNUM, MATCHOP),
477     DEBUG_TOKEN (OPVAL, METHCALL),
478     DEBUG_TOKEN (OPVAL, METHCALL0),
479     DEBUG_TOKEN (OPNUM, MULOP),
480     DEBUG_TOKEN (OPNUM, NCEQOP),
481     DEBUG_TOKEN (OPNUM, NCRELOP),
482     DEBUG_TOKEN (NONE,  NOAMP),
483     DEBUG_TOKEN (NONE,  NOTOP),
484     DEBUG_TOKEN (IVAL,  OROP),
485     DEBUG_TOKEN (IVAL,  OROR),
486     DEBUG_TOKEN (IVAL,  PERLY_AMPERSAND),
487     DEBUG_TOKEN (IVAL,  PERLY_BRACE_CLOSE),
488     DEBUG_TOKEN (IVAL,  PERLY_BRACE_OPEN),
489     DEBUG_TOKEN (IVAL,  PERLY_BRACKET_CLOSE),
490     DEBUG_TOKEN (IVAL,  PERLY_BRACKET_OPEN),
491     DEBUG_TOKEN (IVAL,  PERLY_COLON),
492     DEBUG_TOKEN (IVAL,  PERLY_COMMA),
493     DEBUG_TOKEN (IVAL,  PERLY_DOT),
494     DEBUG_TOKEN (IVAL,  PERLY_EQUAL_SIGN),
495     DEBUG_TOKEN (IVAL,  PERLY_EXCLAMATION_MARK),
496     DEBUG_TOKEN (IVAL,  PERLY_MINUS),
497     DEBUG_TOKEN (IVAL,  PERLY_PAREN_OPEN),
498     DEBUG_TOKEN (IVAL,  PERLY_PERCENT_SIGN),
499     DEBUG_TOKEN (IVAL,  PERLY_PLUS),
500     DEBUG_TOKEN (IVAL,  PERLY_QUESTION_MARK),
501     DEBUG_TOKEN (IVAL,  PERLY_SEMICOLON),
502     DEBUG_TOKEN (IVAL,  PERLY_SLASH),
503     DEBUG_TOKEN (IVAL,  PERLY_SNAIL),
504     DEBUG_TOKEN (IVAL,  PERLY_STAR),
505     DEBUG_TOKEN (IVAL,  PERLY_TILDE),
506     DEBUG_TOKEN (OPVAL, PLUGEXPR),
507     DEBUG_TOKEN (OPVAL, PLUGSTMT),
508     DEBUG_TOKEN (PVAL,  PLUGIN_ADD_OP),
509     DEBUG_TOKEN (PVAL,  PLUGIN_ASSIGN_OP),
510     DEBUG_TOKEN (PVAL,  PLUGIN_HIGH_OP),
511     DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_AND_OP),
512     DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_OR_OP),
513     DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_AND_LOW_OP),
514     DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_OR_LOW_OP),
515     DEBUG_TOKEN (PVAL,  PLUGIN_LOW_OP),
516     DEBUG_TOKEN (PVAL,  PLUGIN_MUL_OP),
517     DEBUG_TOKEN (PVAL,  PLUGIN_POW_OP),
518     DEBUG_TOKEN (PVAL,  PLUGIN_REL_OP),
519     DEBUG_TOKEN (OPVAL, PMFUNC),
520     DEBUG_TOKEN (NONE,  POSTJOIN),
521     DEBUG_TOKEN (NONE,  POSTDEC),
522     DEBUG_TOKEN (NONE,  POSTINC),
523     DEBUG_TOKEN (OPNUM, POWOP),
524     DEBUG_TOKEN (NONE,  PREDEC),
525     DEBUG_TOKEN (NONE,  PREINC),
526     DEBUG_TOKEN (OPVAL, PRIVATEREF),
527     DEBUG_TOKEN (OPVAL, QWLIST),
528     DEBUG_TOKEN (NONE,  REFGEN),
529     DEBUG_TOKEN (OPNUM, SHIFTOP),
530     DEBUG_TOKEN (NONE,  SUBLEXEND),
531     DEBUG_TOKEN (NONE,  SUBLEXSTART),
532     DEBUG_TOKEN (OPVAL, THING),
533     DEBUG_TOKEN (NONE,  UMINUS),
534     DEBUG_TOKEN (OPNUM, UNIOP),
535     DEBUG_TOKEN (OPVAL, UNIOPSUB),
536     DEBUG_TOKEN (OPVAL, BAREWORD),
537     DEBUG_TOKEN (IVAL,  YADAYADA),
538     { 0,		TOKENTYPE_NONE,		NULL }
539 };
540 
541 #undef DEBUG_TOKEN
542 
543 /* dump the returned token in rv, plus any optional arg in pl_yylval */
544 
545 STATIC int
S_tokereport(pTHX_ I32 rv,const YYSTYPE * lvalp)546 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
547 {
548     PERL_ARGS_ASSERT_TOKEREPORT;
549 
550     if (DEBUG_T_TEST) {
551         const char *name = NULL;
552         enum token_type type = TOKENTYPE_NONE;
553         const struct debug_tokens *p;
554         SV* const report = newSVpvs("<== ");
555 
556         for (p = debug_tokens; p->token; p++) {
557             if (p->token == (int)rv) {
558                 name = p->name;
559                 type = p->type;
560                 break;
561             }
562         }
563         if (name)
564             Perl_sv_catpv(aTHX_ report, name);
565         else if (isGRAPH(rv))
566         {
567             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
568             if ((char)rv == 'p')
569                 sv_catpvs(report, " (pending identifier)");
570         }
571         else if (!rv)
572             sv_catpvs(report, "EOF");
573         else
574             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
575         switch (type) {
576         case TOKENTYPE_NONE:
577             break;
578         case TOKENTYPE_IVAL:
579             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
580             break;
581         case TOKENTYPE_OPNUM:
582             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
583                                     PL_op_name[lvalp->ival]);
584             break;
585         case TOKENTYPE_PVAL:
586             Perl_sv_catpvf(aTHX_ report, "(pval=%p)", lvalp->pval);
587             break;
588         case TOKENTYPE_OPVAL:
589             if (lvalp->opval) {
590                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
591                                     PL_op_name[lvalp->opval->op_type]);
592                 if (lvalp->opval->op_type == OP_CONST) {
593                     Perl_sv_catpvf(aTHX_ report, " %s",
594                         SvPEEK(cSVOPx_sv(lvalp->opval)));
595                 }
596 
597             }
598             else
599                 sv_catpvs(report, "(opval=null)");
600             break;
601         }
602         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
603     };
604     return (int)rv;
605 }
606 
607 
608 /* print the buffer with suitable escapes */
609 
610 STATIC void
S_printbuf(pTHX_ const char * const fmt,const char * const s)611 S_printbuf(pTHX_ const char *const fmt, const char *const s)
612 {
613     SV* const tmp = newSVpvs("");
614 
615     PERL_ARGS_ASSERT_PRINTBUF;
616 
617     GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
618     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
619     GCC_DIAG_RESTORE_STMT;
620     SvREFCNT_dec(tmp);
621 }
622 
623 #endif
624 
625 /*
626  * S_ao
627  *
628  * This subroutine looks for an '=' next to the operator that has just been
629  * parsed and turns it into an ASSIGNOP if it finds one.
630  */
631 
632 STATIC int
S_ao(pTHX_ int toketype)633 S_ao(pTHX_ int toketype)
634 {
635     if (*PL_bufptr == '=') {
636         PL_bufptr++;
637 
638         switch (toketype) {
639             case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break;
640             case OROR:   pl_yylval.ival = OP_ORASSIGN;  break;
641             case DORDOR: pl_yylval.ival = OP_DORASSIGN; break;
642         }
643 
644         toketype = ASSIGNOP;
645     }
646     return REPORT(toketype);
647 }
648 
649 /*
650  * S_no_op
651  * When Perl expects an operator and finds something else, no_op
652  * prints the warning.  It always prints "<something> found where
653  * operator expected.  It prints "Missing semicolon on previous line?"
654  * if the surprise occurs at the start of the line.  "do you need to
655  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
656  * where the compiler doesn't know if foo is a method call or a function.
657  * It prints "Missing operator before end of line" if there's nothing
658  * after the missing operator, or "... before <...>" if there is something
659  * after the missing operator.
660  *
661  * PL_bufptr is expected to point to the start of the thing that was found,
662  * and s after the next token or partial token.
663  */
664 
665 STATIC void
S_no_op(pTHX_ const char * const what,char * s)666 S_no_op(pTHX_ const char *const what, char *s)
667 {
668     char * const oldbp = PL_bufptr;
669     const bool is_first = (PL_oldbufptr == PL_linestart);
670     SV *message = sv_2mortal( newSVpvf(
671                    PERL_DIAG_WARN_SYNTAX("%s found where operator expected"),
672                    what
673                   ) );
674 
675     PERL_ARGS_ASSERT_NO_OP;
676 
677     if (!s)
678         s = oldbp;
679     else
680         PL_bufptr = s;
681 
682     if (ckWARN_d(WARN_SYNTAX)) {
683         bool has_more = FALSE;
684         if (is_first) {
685             has_more = TRUE;
686             sv_catpvs(message,
687                     " (Missing semicolon on previous line?)");
688         }
689         else if (PL_oldoldbufptr) {
690             /* yyerror (via yywarn) would do this itself, so we should too */
691             const char *t;
692             for (t = PL_oldoldbufptr;
693                  t < PL_bufptr && isSPACE(*t);
694                  t += UTF ? UTF8SKIP(t) : 1)
695             {
696                 NOOP;
697             }
698             /* see if we can identify the cause of the warning */
699             if (isIDFIRST_lazy_if_safe(t,PL_bufend,UTF))
700             {
701                 const char *t_start= t;
702                 for ( ;
703                      (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
704                      t += UTF ? UTF8SKIP(t) : 1)
705                 {
706                     NOOP;
707                 }
708                 if (t < PL_bufptr && isSPACE(*t)) {
709                     has_more = TRUE;
710                     sv_catpvf( message,
711                             " (Do you need to predeclare \"%" UTF8f "\"?)",
712                           UTF8fARG(UTF, t - t_start, t_start));
713                 }
714             }
715         }
716         if (!has_more) {
717             const char *t= oldbp;
718             assert(s >= oldbp);
719             while (t < s && isSPACE(*t)) {
720                 t += UTF ? UTF8SKIP(t) : 1;
721             }
722 
723             sv_catpvf(message,
724                     " (Missing operator before \"%" UTF8f "\"?)",
725                      UTF8fARG(UTF, s - t, t));
726         }
727     }
728     yywarn(SvPV_nolen(message), UTF ? SVf_UTF8 : 0);
729     PL_bufptr = oldbp;
730 }
731 
732 /*
733  * S_missingterm
734  * Complain about missing quote/regexp/heredoc terminator.
735  * If it's called with NULL then it cauterizes the line buffer.
736  * If we're in a delimited string and the delimiter is a control
737  * character, it's reformatted into a two-char sequence like ^C.
738  * This is fatal.
739  */
740 
741 STATIC void
S_missingterm(pTHX_ char * s,STRLEN len)742 S_missingterm(pTHX_ char *s, STRLEN len)
743 {
744     char tmpbuf[UTF8_MAXBYTES + 1];
745     char q;
746     bool uni = FALSE;
747     if (s) {
748         char * const nl = (char *) my_memrchr(s, '\n', len);
749         if (nl) {
750             *nl = '\0';
751             len = nl - s;
752         }
753         uni = UTF;
754     }
755     else if (PL_multi_close < 32) {
756         *tmpbuf = '^';
757         tmpbuf[1] = (char)toCTRL(PL_multi_close);
758         tmpbuf[2] = '\0';
759         s = tmpbuf;
760         len = 2;
761     }
762     else {
763         if (! UTF && LIKELY(PL_multi_close < 256)) {
764             *tmpbuf = (char)PL_multi_close;
765             tmpbuf[1] = '\0';
766             len = 1;
767         }
768         else {
769             char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
770             *end = '\0';
771             len = end - tmpbuf;
772             uni = TRUE;
773         }
774         s = tmpbuf;
775     }
776     q = memchr(s, '"', len) ? '\'' : '"';
777     Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c"
778                      " anywhere before EOF", q, UTF8fARG(uni, len, s), q);
779 }
780 
781 #include "feature.h"
782 
783 /*
784  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
785  * utf16-to-utf8-reversed.
786  */
787 
788 #ifdef PERL_CR_FILTER
789 static void
strip_return(SV * sv)790 strip_return(SV *sv)
791 {
792     const char *s = SvPVX_const(sv);
793     const char * const e = s + SvCUR(sv);
794 
795     PERL_ARGS_ASSERT_STRIP_RETURN;
796 
797     /* outer loop optimized to do nothing if there are no CR-LFs */
798     while (s < e) {
799         if (*s++ == '\r' && *s == '\n') {
800             /* hit a CR-LF, need to copy the rest */
801             char *d = s - 1;
802             *d++ = *s++;
803             while (s < e) {
804                 if (*s == '\r' && s[1] == '\n')
805                     s++;
806                 *d++ = *s++;
807             }
808             SvCUR(sv) -= s - d;
809             return;
810         }
811     }
812 }
813 
814 STATIC I32
S_cr_textfilter(pTHX_ int idx,SV * sv,int maxlen)815 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
816 {
817     const I32 count = FILTER_READ(idx+1, sv, maxlen);
818     if (count > 0 && !maxlen)
819         strip_return(sv);
820     return count;
821 }
822 #endif
823 
824 /*
825 =for apidoc lex_start
826 
827 Creates and initialises a new lexer/parser state object, supplying
828 a context in which to lex and parse from a new source of Perl code.
829 A pointer to the new state object is placed in L</PL_parser>.  An entry
830 is made on the save stack so that upon unwinding, the new state object
831 will be destroyed and the former value of L</PL_parser> will be restored.
832 Nothing else need be done to clean up the parsing context.
833 
834 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
835 non-null, provides a string (in SV form) containing code to be parsed.
836 A copy of the string is made, so subsequent modification of C<line>
837 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
838 from which code will be read to be parsed.  If both are non-null, the
839 code in C<line> comes first and must consist of complete lines of input,
840 and C<rsfp> supplies the remainder of the source.
841 
842 The C<flags> parameter is reserved for future use.  Currently it is only
843 used by perl internally, so extensions should always pass zero.
844 
845 =cut
846 */
847 
848 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
849    can share filters with the current parser.
850    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
851    caller, hence isn't owned by the parser, so shouldn't be closed on parser
852    destruction. This is used to handle the case of defaulting to reading the
853    script from the standard input because no filename was given on the command
854    line (without getting confused by situation where STDIN has been closed, so
855    the script handle is opened on fd 0)  */
856 
857 void
Perl_lex_start(pTHX_ SV * line,PerlIO * rsfp,U32 flags)858 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
859 {
860     const char *s = NULL;
861     yy_parser *parser, *oparser;
862 
863     if (flags && flags & ~LEX_START_FLAGS)
864         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
865 
866     /* create and initialise a parser */
867 
868     Newxz(parser, 1, yy_parser);
869     parser->old_parser = oparser = PL_parser;
870     PL_parser = parser;
871 
872     parser->stack = NULL;
873     parser->stack_max1 = NULL;
874     parser->ps = NULL;
875 
876     /* on scope exit, free this parser and restore any outer one */
877     SAVEPARSER(parser);
878     parser->saved_curcop = PL_curcop;
879 
880     /* initialise lexer state */
881 
882     parser->nexttoke = 0;
883     parser->error_count = oparser ? oparser->error_count : 0;
884     parser->copline = parser->preambling = NOLINE;
885     parser->lex_state = LEX_NORMAL;
886     parser->expect = XSTATE;
887     parser->rsfp = rsfp;
888     parser->recheck_utf8_validity = TRUE;
889     parser->rsfp_filters =
890       !(flags & LEX_START_SAME_FILTER) || !oparser
891         ? NULL
892         : MUTABLE_AV(SvREFCNT_inc(
893             oparser->rsfp_filters
894              ? oparser->rsfp_filters
895              : (oparser->rsfp_filters = newAV())
896           ));
897 
898     Newx(parser->lex_brackstack, 120, char);
899     Newx(parser->lex_casestack, 12, char);
900     *parser->lex_casestack = '\0';
901     Newxz(parser->lex_shared, 1, LEXSHARED);
902 
903     if (line) {
904         Size_t len;
905         const U8* first_bad_char_loc;
906 
907         s = SvPV_const(line, len);
908 
909         if (   SvUTF8(line)
910             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
911                                              SvCUR(line),
912                                              &first_bad_char_loc)))
913         {
914             _force_out_malformed_utf8_message(first_bad_char_loc,
915                                               (U8 *) s + SvCUR(line),
916                                               0,
917                                               1 /* 1 means die */ );
918             NOT_REACHED; /* NOTREACHED */
919         }
920 
921         parser->linestr = flags & LEX_START_COPIED
922                             ? SvREFCNT_inc_simple_NN(line)
923                             : newSVpvn_flags(s, len, SvUTF8(line));
924         if (!rsfp)
925             sv_catpvs(parser->linestr, "\n;");
926     } else {
927         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
928     }
929 
930     parser->oldoldbufptr =
931         parser->oldbufptr =
932         parser->bufptr =
933         parser->linestart = SvPVX(parser->linestr);
934     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
935     parser->last_lop = parser->last_uni = NULL;
936 
937     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
938                                                         |LEX_DONT_CLOSE_RSFP));
939     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
940                                                         |LEX_DONT_CLOSE_RSFP));
941 
942     parser->in_pod = parser->filtered = 0;
943 }
944 
945 
946 /* delete a parser object */
947 
948 void
Perl_parser_free(pTHX_ const yy_parser * parser)949 Perl_parser_free(pTHX_  const yy_parser *parser)
950 {
951     PERL_ARGS_ASSERT_PARSER_FREE;
952 
953     PL_curcop = parser->saved_curcop;
954     SvREFCNT_dec(parser->linestr);
955 
956     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
957         PerlIO_clearerr(parser->rsfp);
958     else if (parser->rsfp && (!parser->old_parser
959           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
960         PerlIO_close(parser->rsfp);
961     SvREFCNT_dec(parser->rsfp_filters);
962     SvREFCNT_dec(parser->lex_stuff);
963     SvREFCNT_dec(parser->lex_sub_repl);
964 
965     Safefree(parser->lex_brackstack);
966     Safefree(parser->lex_casestack);
967     Safefree(parser->lex_shared);
968     PL_parser = parser->old_parser;
969     Safefree(parser);
970 }
971 
972 void
Perl_parser_free_nexttoke_ops(pTHX_ yy_parser * parser,OPSLAB * slab)973 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
974 {
975     I32 nexttoke = parser->nexttoke;
976     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
977     while (nexttoke--) {
978         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
979          && parser->nextval[nexttoke].opval
980          && parser->nextval[nexttoke].opval->op_slabbed
981          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
982             op_free(parser->nextval[nexttoke].opval);
983             parser->nextval[nexttoke].opval = NULL;
984         }
985     }
986 }
987 
988 
989 /*
990 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
991 
992 Buffer scalar containing the chunk currently under consideration of the
993 text currently being lexed.  This is always a plain string scalar (for
994 which C<SvPOK> is true).  It is not intended to be used as a scalar by
995 normal scalar means; instead refer to the buffer directly by the pointer
996 variables described below.
997 
998 The lexer maintains various C<char*> pointers to things in the
999 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
1000 reallocated, all of these pointers must be updated.  Don't attempt to
1001 do this manually, but rather use L</lex_grow_linestr> if you need to
1002 reallocate the buffer.
1003 
1004 The content of the text chunk in the buffer is commonly exactly one
1005 complete line of input, up to and including a newline terminator,
1006 but there are situations where it is otherwise.  The octets of the
1007 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
1008 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
1009 flag on this scalar, which may disagree with it.
1010 
1011 For direct examination of the buffer, the variable
1012 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
1013 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
1014 of these pointers is usually preferable to examination of the scalar
1015 through normal scalar means.
1016 
1017 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
1018 
1019 Direct pointer to the end of the chunk of text currently being lexed, the
1020 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
1021 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
1022 always located at the end of the buffer, and does not count as part of
1023 the buffer's contents.
1024 
1025 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
1026 
1027 Points to the current position of lexing inside the lexer buffer.
1028 Characters around this point may be freely examined, within
1029 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
1030 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
1031 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
1032 
1033 Lexing code (whether in the Perl core or not) moves this pointer past
1034 the characters that it consumes.  It is also expected to perform some
1035 bookkeeping whenever a newline character is consumed.  This movement
1036 can be more conveniently performed by the function L</lex_read_to>,
1037 which handles newlines appropriately.
1038 
1039 Interpretation of the buffer's octets can be abstracted out by
1040 using the slightly higher-level functions L</lex_peek_unichar> and
1041 L</lex_read_unichar>.
1042 
1043 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
1044 
1045 Points to the start of the current line inside the lexer buffer.
1046 This is useful for indicating at which column an error occurred, and
1047 not much else.  This must be updated by any lexing code that consumes
1048 a newline; the function L</lex_read_to> handles this detail.
1049 
1050 =cut
1051 */
1052 
1053 /*
1054 =for apidoc lex_bufutf8
1055 
1056 Indicates whether the octets in the lexer buffer
1057 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1058 of Unicode characters.  If not, they should be interpreted as Latin-1
1059 characters.  This is analogous to the C<SvUTF8> flag for scalars.
1060 
1061 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1062 contains valid UTF-8.  Lexing code must be robust in the face of invalid
1063 encoding.
1064 
1065 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
1066 is significant, but not the whole story regarding the input character
1067 encoding.  Normally, when a file is being read, the scalar contains octets
1068 and its C<SvUTF8> flag is off, but the octets should be interpreted as
1069 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
1070 however, the scalar may have the C<SvUTF8> flag on, and in this case its
1071 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1072 is in effect.  This logic may change in the future; use this function
1073 instead of implementing the logic yourself.
1074 
1075 =cut
1076 */
1077 
1078 bool
Perl_lex_bufutf8(pTHX)1079 Perl_lex_bufutf8(pTHX)
1080 {
1081     return UTF;
1082 }
1083 
1084 /*
1085 =for apidoc lex_grow_linestr
1086 
1087 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
1088 at least C<len> octets (including terminating C<NUL>).  Returns a
1089 pointer to the reallocated buffer.  This is necessary before making
1090 any direct modification of the buffer that would increase its length.
1091 L</lex_stuff_pvn> provides a more convenient way to insert text into
1092 the buffer.
1093 
1094 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
1095 this function updates all of the lexer's variables that point directly
1096 into the buffer.
1097 
1098 =cut
1099 */
1100 
1101 char *
Perl_lex_grow_linestr(pTHX_ STRLEN len)1102 Perl_lex_grow_linestr(pTHX_ STRLEN len)
1103 {
1104     SV *linestr;
1105     char *buf;
1106     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1107     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
1108     bool current;
1109 
1110     linestr = PL_parser->linestr;
1111     buf = SvPVX(linestr);
1112     if (len <= SvLEN(linestr))
1113         return buf;
1114 
1115     /* Is the lex_shared linestr SV the same as the current linestr SV?
1116      * Only in this case does re_eval_start need adjusting, since it
1117      * points within lex_shared->ls_linestr's buffer */
1118     current = (   !PL_parser->lex_shared->ls_linestr
1119                || linestr == PL_parser->lex_shared->ls_linestr);
1120 
1121     bufend_pos = PL_parser->bufend - buf;
1122     bufptr_pos = PL_parser->bufptr - buf;
1123     oldbufptr_pos = PL_parser->oldbufptr - buf;
1124     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1125     linestart_pos = PL_parser->linestart - buf;
1126     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1127     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1128     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
1129                             PL_parser->lex_shared->re_eval_start - buf : 0;
1130 
1131     buf = sv_grow(linestr, len);
1132 
1133     PL_parser->bufend = buf + bufend_pos;
1134     PL_parser->bufptr = buf + bufptr_pos;
1135     PL_parser->oldbufptr = buf + oldbufptr_pos;
1136     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1137     PL_parser->linestart = buf + linestart_pos;
1138     if (PL_parser->last_uni)
1139         PL_parser->last_uni = buf + last_uni_pos;
1140     if (PL_parser->last_lop)
1141         PL_parser->last_lop = buf + last_lop_pos;
1142     if (current && PL_parser->lex_shared->re_eval_start)
1143         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
1144     return buf;
1145 }
1146 
1147 /*
1148 =for apidoc lex_stuff_pvn
1149 
1150 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1151 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1152 reallocating the buffer if necessary.  This means that lexing code that
1153 runs later will see the characters as if they had appeared in the input.
1154 It is not recommended to do this as part of normal parsing, and most
1155 uses of this facility run the risk of the inserted characters being
1156 interpreted in an unintended manner.
1157 
1158 The string to be inserted is represented by C<len> octets starting
1159 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1160 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1161 The characters are recoded for the lexer buffer, according to how the
1162 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1163 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1164 function is more convenient.
1165 
1166 =for apidoc Amnh||LEX_STUFF_UTF8
1167 
1168 =cut
1169 */
1170 
1171 void
Perl_lex_stuff_pvn(pTHX_ const char * pv,STRLEN len,U32 flags)1172 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1173 {
1174     char *bufptr;
1175     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1176     if (flags & ~(LEX_STUFF_UTF8))
1177         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1178     if (UTF) {
1179         if (flags & LEX_STUFF_UTF8) {
1180             goto plain_copy;
1181         } else {
1182             STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1183                                                        (U8 *) pv + len);
1184             const char *p, *e = pv+len;;
1185             if (!highhalf)
1186                 goto plain_copy;
1187             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1188             bufptr = PL_parser->bufptr;
1189             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1190             SvCUR_set(PL_parser->linestr,
1191                 SvCUR(PL_parser->linestr) + len+highhalf);
1192             PL_parser->bufend += len+highhalf;
1193             for (p = pv; p != e; p++) {
1194                 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1195             }
1196         }
1197     } else {
1198         if (flags & LEX_STUFF_UTF8) {
1199             STRLEN highhalf = 0;
1200             const char *p, *e = pv+len;
1201             for (p = pv; p != e; p++) {
1202                 U8 c = (U8)*p;
1203                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1204                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1205                                 "non-Latin-1 character into Latin-1 input");
1206                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1207                     p++;
1208                     highhalf++;
1209                 } else assert(UTF8_IS_INVARIANT(c));
1210             }
1211             if (!highhalf)
1212                 goto plain_copy;
1213             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1214             bufptr = PL_parser->bufptr;
1215             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1216             SvCUR_set(PL_parser->linestr,
1217                 SvCUR(PL_parser->linestr) + len-highhalf);
1218             PL_parser->bufend += len-highhalf;
1219             p = pv;
1220             while (p < e) {
1221                 if (UTF8_IS_INVARIANT(*p)) {
1222                     *bufptr++ = *p;
1223                     p++;
1224                 }
1225                 else {
1226                     assert(p < e -1 );
1227                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1228                     p += 2;
1229                 }
1230             }
1231         } else {
1232           plain_copy:
1233             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1234             bufptr = PL_parser->bufptr;
1235             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1236             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1237             PL_parser->bufend += len;
1238             Copy(pv, bufptr, len, char);
1239         }
1240     }
1241 }
1242 
1243 /*
1244 =for apidoc lex_stuff_pv
1245 
1246 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1247 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1248 reallocating the buffer if necessary.  This means that lexing code that
1249 runs later will see the characters as if they had appeared in the input.
1250 It is not recommended to do this as part of normal parsing, and most
1251 uses of this facility run the risk of the inserted characters being
1252 interpreted in an unintended manner.
1253 
1254 The string to be inserted is represented by octets starting at C<pv>
1255 and continuing to the first nul.  These octets are interpreted as either
1256 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1257 in C<flags>.  The characters are recoded for the lexer buffer, according
1258 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1259 If it is not convenient to nul-terminate a string to be inserted, the
1260 L</lex_stuff_pvn> function is more appropriate.
1261 
1262 =cut
1263 */
1264 
1265 void
Perl_lex_stuff_pv(pTHX_ const char * pv,U32 flags)1266 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1267 {
1268     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1269     lex_stuff_pvn(pv, strlen(pv), flags);
1270 }
1271 
1272 /*
1273 =for apidoc lex_stuff_sv
1274 
1275 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1276 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1277 reallocating the buffer if necessary.  This means that lexing code that
1278 runs later will see the characters as if they had appeared in the input.
1279 It is not recommended to do this as part of normal parsing, and most
1280 uses of this facility run the risk of the inserted characters being
1281 interpreted in an unintended manner.
1282 
1283 The string to be inserted is the string value of C<sv>.  The characters
1284 are recoded for the lexer buffer, according to how the buffer is currently
1285 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1286 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1287 need to construct a scalar.
1288 
1289 =cut
1290 */
1291 
1292 void
Perl_lex_stuff_sv(pTHX_ SV * sv,U32 flags)1293 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1294 {
1295     char *pv;
1296     STRLEN len;
1297     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1298     if (flags)
1299         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1300     pv = SvPV(sv, len);
1301     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1302 }
1303 
1304 /*
1305 =for apidoc lex_unstuff
1306 
1307 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1308 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1309 This hides the discarded text from any lexing code that runs later,
1310 as if the text had never appeared.
1311 
1312 This is not the normal way to consume lexed text.  For that, use
1313 L</lex_read_to>.
1314 
1315 =cut
1316 */
1317 
1318 void
Perl_lex_unstuff(pTHX_ char * ptr)1319 Perl_lex_unstuff(pTHX_ char *ptr)
1320 {
1321     char *buf, *bufend;
1322     STRLEN unstuff_len;
1323     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1324     buf = PL_parser->bufptr;
1325     if (ptr < buf)
1326         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1327     if (ptr == buf)
1328         return;
1329     bufend = PL_parser->bufend;
1330     if (ptr > bufend)
1331         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1332     unstuff_len = ptr - buf;
1333     Move(ptr, buf, bufend+1-ptr, char);
1334     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1335     PL_parser->bufend = bufend - unstuff_len;
1336 }
1337 
1338 /*
1339 =for apidoc lex_read_to
1340 
1341 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1342 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1343 performing the correct bookkeeping whenever a newline character is passed.
1344 This is the normal way to consume lexed text.
1345 
1346 Interpretation of the buffer's octets can be abstracted out by
1347 using the slightly higher-level functions L</lex_peek_unichar> and
1348 L</lex_read_unichar>.
1349 
1350 =cut
1351 */
1352 
1353 void
Perl_lex_read_to(pTHX_ char * ptr)1354 Perl_lex_read_to(pTHX_ char *ptr)
1355 {
1356     char *s;
1357     PERL_ARGS_ASSERT_LEX_READ_TO;
1358     s = PL_parser->bufptr;
1359     if (ptr < s || ptr > PL_parser->bufend)
1360         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1361     for (; s != ptr; s++)
1362         if (*s == '\n') {
1363             COPLINE_INC_WITH_HERELINES;
1364             PL_parser->linestart = s+1;
1365         }
1366     PL_parser->bufptr = ptr;
1367 }
1368 
1369 /*
1370 =for apidoc lex_discard_to
1371 
1372 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1373 up to C<ptr>.  The remaining content of the buffer will be moved, and
1374 all pointers into the buffer updated appropriately.  C<ptr> must not
1375 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1376 it is not permitted to discard text that has yet to be lexed.
1377 
1378 Normally it is not necessarily to do this directly, because it suffices to
1379 use the implicit discarding behaviour of L</lex_next_chunk> and things
1380 based on it.  However, if a token stretches across multiple lines,
1381 and the lexing code has kept multiple lines of text in the buffer for
1382 that purpose, then after completion of the token it would be wise to
1383 explicitly discard the now-unneeded earlier lines, to avoid future
1384 multi-line tokens growing the buffer without bound.
1385 
1386 =cut
1387 */
1388 
1389 void
Perl_lex_discard_to(pTHX_ char * ptr)1390 Perl_lex_discard_to(pTHX_ char *ptr)
1391 {
1392     char *buf;
1393     STRLEN discard_len;
1394     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1395     buf = SvPVX(PL_parser->linestr);
1396     if (ptr < buf)
1397         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1398     if (ptr == buf)
1399         return;
1400     if (ptr > PL_parser->bufptr)
1401         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1402     discard_len = ptr - buf;
1403     if (PL_parser->oldbufptr < ptr)
1404         PL_parser->oldbufptr = ptr;
1405     if (PL_parser->oldoldbufptr < ptr)
1406         PL_parser->oldoldbufptr = ptr;
1407     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1408         PL_parser->last_uni = NULL;
1409     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1410         PL_parser->last_lop = NULL;
1411     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1412     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1413     PL_parser->bufend -= discard_len;
1414     PL_parser->bufptr -= discard_len;
1415     PL_parser->oldbufptr -= discard_len;
1416     PL_parser->oldoldbufptr -= discard_len;
1417     if (PL_parser->last_uni)
1418         PL_parser->last_uni -= discard_len;
1419     if (PL_parser->last_lop)
1420         PL_parser->last_lop -= discard_len;
1421 }
1422 
1423 void
Perl_notify_parser_that_changed_to_utf8(pTHX)1424 Perl_notify_parser_that_changed_to_utf8(pTHX)
1425 {
1426     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1427      * off to on.  At compile time, this has the effect of entering a 'use
1428      * utf8' section.  This means that any input was not previously checked for
1429      * UTF-8 (because it was off), but now we do need to check it, or our
1430      * assumptions about the input being sane could be wrong, and we could
1431      * segfault.  This routine just sets a flag so that the next time we look
1432      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1433      * proper phase, there may not be a parser object, but if there is, setting
1434      * the flag is harmless */
1435 
1436     if (PL_parser) {
1437         PL_parser->recheck_utf8_validity = TRUE;
1438     }
1439 }
1440 
1441 /*
1442 =for apidoc lex_next_chunk
1443 
1444 Reads in the next chunk of text to be lexed, appending it to
1445 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1446 looked to the end of the current chunk and wants to know more.  It is
1447 usual, but not necessary, for lexing to have consumed the entirety of
1448 the current chunk at this time.
1449 
1450 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1451 chunk (i.e., the current chunk has been entirely consumed), normally the
1452 current chunk will be discarded at the same time that the new chunk is
1453 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1454 will not be discarded.  If the current chunk has not been entirely
1455 consumed, then it will not be discarded regardless of the flag.
1456 
1457 Returns true if some new text was added to the buffer, or false if the
1458 buffer has reached the end of the input text.
1459 
1460 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1461 
1462 =cut
1463 */
1464 
1465 #define LEX_FAKE_EOF 0x80000000
1466 #define LEX_NO_TERM  0x40000000 /* here-doc */
1467 
1468 bool
Perl_lex_next_chunk(pTHX_ U32 flags)1469 Perl_lex_next_chunk(pTHX_ U32 flags)
1470 {
1471     SV *linestr;
1472     char *buf;
1473     STRLEN old_bufend_pos, new_bufend_pos;
1474     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1475     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1476     bool got_some_for_debugger = 0;
1477     bool got_some;
1478 
1479     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1480         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1481     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1482         return FALSE;
1483     linestr = PL_parser->linestr;
1484     buf = SvPVX(linestr);
1485     if (!(flags & LEX_KEEP_PREVIOUS)
1486           && PL_parser->bufptr == PL_parser->bufend)
1487     {
1488         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1489         linestart_pos = 0;
1490         if (PL_parser->last_uni != PL_parser->bufend)
1491             PL_parser->last_uni = NULL;
1492         if (PL_parser->last_lop != PL_parser->bufend)
1493             PL_parser->last_lop = NULL;
1494         last_uni_pos = last_lop_pos = 0;
1495         *buf = 0;
1496         SvCUR_set(linestr, 0);
1497     } else {
1498         old_bufend_pos = PL_parser->bufend - buf;
1499         bufptr_pos = PL_parser->bufptr - buf;
1500         oldbufptr_pos = PL_parser->oldbufptr - buf;
1501         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1502         linestart_pos = PL_parser->linestart - buf;
1503         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1504         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1505     }
1506     if (flags & LEX_FAKE_EOF) {
1507         goto eof;
1508     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1509         got_some = 0;
1510     } else if (filter_gets(linestr, old_bufend_pos)) {
1511         got_some = 1;
1512         got_some_for_debugger = 1;
1513     } else if (flags & LEX_NO_TERM) {
1514         got_some = 0;
1515     } else {
1516         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1517             SvPVCLEAR(linestr);
1518         eof:
1519         /* End of real input.  Close filehandle (unless it was STDIN),
1520          * then add implicit termination.
1521          */
1522         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1523             PerlIO_clearerr(PL_parser->rsfp);
1524         else if (PL_parser->rsfp)
1525             (void)PerlIO_close(PL_parser->rsfp);
1526         PL_parser->rsfp = NULL;
1527         PL_parser->in_pod = PL_parser->filtered = 0;
1528         if (!PL_in_eval && PL_minus_p) {
1529             sv_catpvs(linestr,
1530                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1531             PL_minus_n = PL_minus_p = 0;
1532         } else if (!PL_in_eval && PL_minus_n) {
1533             sv_catpvs(linestr, /*{*/";}");
1534             PL_minus_n = 0;
1535         } else
1536             sv_catpvs(linestr, ";");
1537         got_some = 1;
1538     }
1539     buf = SvPVX(linestr);
1540     new_bufend_pos = SvCUR(linestr);
1541     PL_parser->bufend = buf + new_bufend_pos;
1542     PL_parser->bufptr = buf + bufptr_pos;
1543 
1544     if (UTF) {
1545         const U8* first_bad_char_loc;
1546         if (UNLIKELY(! is_utf8_string_loc(
1547                             (U8 *) PL_parser->bufptr,
1548                                    PL_parser->bufend - PL_parser->bufptr,
1549                                    &first_bad_char_loc)))
1550         {
1551             _force_out_malformed_utf8_message(first_bad_char_loc,
1552                                               (U8 *) PL_parser->bufend,
1553                                               0,
1554                                               1 /* 1 means die */ );
1555             NOT_REACHED; /* NOTREACHED */
1556         }
1557     }
1558 
1559     PL_parser->oldbufptr = buf + oldbufptr_pos;
1560     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1561     PL_parser->linestart = buf + linestart_pos;
1562     if (PL_parser->last_uni)
1563         PL_parser->last_uni = buf + last_uni_pos;
1564     if (PL_parser->last_lop)
1565         PL_parser->last_lop = buf + last_lop_pos;
1566     if (PL_parser->preambling != NOLINE) {
1567         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1568         PL_parser->preambling = NOLINE;
1569     }
1570     if (   got_some_for_debugger
1571         && PERLDB_LINE_OR_SAVESRC
1572         && PL_curstash != PL_debstash)
1573     {
1574         /* debugger active and we're not compiling the debugger code,
1575          * so store the line into the debugger's array of lines
1576          */
1577         update_debugger_info(NULL, buf+old_bufend_pos,
1578             new_bufend_pos-old_bufend_pos);
1579     }
1580     return got_some;
1581 }
1582 
1583 /*
1584 =for apidoc lex_peek_unichar
1585 
1586 Looks ahead one (Unicode) character in the text currently being lexed.
1587 Returns the codepoint (unsigned integer value) of the next character,
1588 or -1 if lexing has reached the end of the input text.  To consume the
1589 peeked character, use L</lex_read_unichar>.
1590 
1591 If the next character is in (or extends into) the next chunk of input
1592 text, the next chunk will be read in.  Normally the current chunk will be
1593 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1594 bit set, then the current chunk will not be discarded.
1595 
1596 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1597 is encountered, an exception is generated.
1598 
1599 =cut
1600 */
1601 
1602 I32
Perl_lex_peek_unichar(pTHX_ U32 flags)1603 Perl_lex_peek_unichar(pTHX_ U32 flags)
1604 {
1605     char *s, *bufend;
1606     if (flags & ~(LEX_KEEP_PREVIOUS))
1607         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1608     s = PL_parser->bufptr;
1609     bufend = PL_parser->bufend;
1610     if (UTF) {
1611         U8 head;
1612         I32 unichar;
1613         STRLEN len, retlen;
1614         if (s == bufend) {
1615             if (!lex_next_chunk(flags))
1616                 return -1;
1617             s = PL_parser->bufptr;
1618             bufend = PL_parser->bufend;
1619         }
1620         head = (U8)*s;
1621         if (UTF8_IS_INVARIANT(head))
1622             return head;
1623         if (UTF8_IS_START(head)) {
1624             len = UTF8SKIP(&head);
1625             while ((STRLEN)(bufend-s) < len) {
1626                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1627                     break;
1628                 s = PL_parser->bufptr;
1629                 bufend = PL_parser->bufend;
1630             }
1631         }
1632         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1633         if (retlen == (STRLEN)-1) {
1634             _force_out_malformed_utf8_message((U8 *) s,
1635                                               (U8 *) bufend,
1636                                               0,
1637                                               1 /* 1 means die */ );
1638             NOT_REACHED; /* NOTREACHED */
1639         }
1640         return unichar;
1641     } else {
1642         if (s == bufend) {
1643             if (!lex_next_chunk(flags))
1644                 return -1;
1645             s = PL_parser->bufptr;
1646         }
1647         return (U8)*s;
1648     }
1649 }
1650 
1651 /*
1652 =for apidoc lex_read_unichar
1653 
1654 Reads the next (Unicode) character in the text currently being lexed.
1655 Returns the codepoint (unsigned integer value) of the character read,
1656 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1657 if lexing has reached the end of the input text.  To non-destructively
1658 examine the next character, use L</lex_peek_unichar> instead.
1659 
1660 If the next character is in (or extends into) the next chunk of input
1661 text, the next chunk will be read in.  Normally the current chunk will be
1662 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1663 bit set, then the current chunk will not be discarded.
1664 
1665 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1666 is encountered, an exception is generated.
1667 
1668 =cut
1669 */
1670 
1671 I32
Perl_lex_read_unichar(pTHX_ U32 flags)1672 Perl_lex_read_unichar(pTHX_ U32 flags)
1673 {
1674     I32 c;
1675     if (flags & ~(LEX_KEEP_PREVIOUS))
1676         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1677     c = lex_peek_unichar(flags);
1678     if (c != -1) {
1679         if (c == '\n')
1680             COPLINE_INC_WITH_HERELINES;
1681         if (UTF)
1682             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1683         else
1684             ++(PL_parser->bufptr);
1685     }
1686     return c;
1687 }
1688 
1689 /*
1690 =for apidoc lex_read_space
1691 
1692 Reads optional spaces, in Perl style, in the text currently being
1693 lexed.  The spaces may include ordinary whitespace characters and
1694 Perl-style comments.  C<#line> directives are processed if encountered.
1695 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1696 at a non-space character (or the end of the input text).
1697 
1698 If spaces extend into the next chunk of input text, the next chunk will
1699 be read in.  Normally the current chunk will be discarded at the same
1700 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1701 chunk will not be discarded.
1702 
1703 =cut
1704 */
1705 
1706 #define LEX_NO_INCLINE    0x40000000
1707 #define LEX_NO_NEXT_CHUNK 0x80000000
1708 
1709 void
Perl_lex_read_space(pTHX_ U32 flags)1710 Perl_lex_read_space(pTHX_ U32 flags)
1711 {
1712     char *s, *bufend;
1713     const bool can_incline = !(flags & LEX_NO_INCLINE);
1714     bool need_incline = 0;
1715     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1716         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1717     s = PL_parser->bufptr;
1718     bufend = PL_parser->bufend;
1719     while (1) {
1720         char c = *s;
1721         if (c == '#') {
1722             do {
1723                 c = *++s;
1724             } while (!(c == '\n' || (c == 0 && s == bufend)));
1725         } else if (c == '\n') {
1726             s++;
1727             if (can_incline) {
1728                 PL_parser->linestart = s;
1729                 if (s == bufend)
1730                     need_incline = 1;
1731                 else
1732                     incline(s, bufend);
1733             }
1734         } else if (isSPACE(c)) {
1735             s++;
1736         } else if (c == 0 && s == bufend) {
1737             bool got_more;
1738             line_t l;
1739             if (flags & LEX_NO_NEXT_CHUNK)
1740                 break;
1741             PL_parser->bufptr = s;
1742             l = CopLINE(PL_curcop);
1743             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1744             got_more = lex_next_chunk(flags);
1745             CopLINE_set(PL_curcop, l);
1746             s = PL_parser->bufptr;
1747             bufend = PL_parser->bufend;
1748             if (!got_more)
1749                 break;
1750             if (can_incline && need_incline && PL_parser->rsfp) {
1751                 incline(s, bufend);
1752                 need_incline = 0;
1753             }
1754         } else if (!c) {
1755             s++;
1756         } else {
1757             break;
1758         }
1759     }
1760     PL_parser->bufptr = s;
1761 }
1762 
1763 /*
1764 
1765 =for apidoc validate_proto
1766 
1767 This function performs syntax checking on a prototype, C<proto>.
1768 If C<warn> is true, any illegal characters or mismatched brackets
1769 will trigger illegalproto warnings, declaring that they were
1770 detected in the prototype for C<name>.
1771 
1772 The return value is C<true> if this is a valid prototype, and
1773 C<false> if it is not, regardless of whether C<warn> was C<true> or
1774 C<false>.
1775 
1776 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1777 
1778 =cut
1779 
1780  */
1781 
1782 bool
Perl_validate_proto(pTHX_ SV * name,SV * proto,bool warn,bool curstash)1783 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1784 {
1785     STRLEN len, origlen;
1786     char *p;
1787     bool bad_proto = FALSE;
1788     bool in_brackets = FALSE;
1789     bool after_slash = FALSE;
1790     char greedy_proto = ' ';
1791     bool proto_after_greedy_proto = FALSE;
1792     bool must_be_last = FALSE;
1793     bool underscore = FALSE;
1794     bool bad_proto_after_underscore = FALSE;
1795 
1796     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1797 
1798     if (!proto)
1799         return TRUE;
1800 
1801     p = SvPV(proto, len);
1802     origlen = len;
1803     for (; len--; p++) {
1804         if (!isSPACE(*p)) {
1805             if (must_be_last)
1806                 proto_after_greedy_proto = TRUE;
1807             if (underscore) {
1808                 if (!memCHRs(";@%", *p))
1809                     bad_proto_after_underscore = TRUE;
1810                 underscore = FALSE;
1811             }
1812             if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1813                 bad_proto = TRUE;
1814             }
1815             else {
1816                 if (*p == '[')
1817                     in_brackets = TRUE;
1818                 else if (*p == ']')
1819                     in_brackets = FALSE;
1820                 else if ((*p == '@' || *p == '%')
1821                          && !after_slash
1822                          && !in_brackets )
1823                 {
1824                     must_be_last = TRUE;
1825                     greedy_proto = *p;
1826                 }
1827                 else if (*p == '_')
1828                     underscore = TRUE;
1829             }
1830             if (*p == '\\')
1831                 after_slash = TRUE;
1832             else
1833                 after_slash = FALSE;
1834         }
1835     }
1836 
1837     if (warn) {
1838         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1839         p -= origlen;
1840         p = SvUTF8(proto)
1841             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1842                              origlen, UNI_DISPLAY_ISPRINT)
1843             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1844 
1845         if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1846             SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1847             sv_catpvs(name2, "::");
1848             sv_catsv(name2, (SV *)name);
1849             name = name2;
1850         }
1851 
1852         if (proto_after_greedy_proto)
1853             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1854                         "Prototype after '%c' for %" SVf " : %s",
1855                         greedy_proto, SVfARG(name), p);
1856         if (in_brackets)
1857             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1858                         "Missing ']' in prototype for %" SVf " : %s",
1859                         SVfARG(name), p);
1860         if (bad_proto)
1861             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1862                         "Illegal character in prototype for %" SVf " : %s",
1863                         SVfARG(name), p);
1864         if (bad_proto_after_underscore)
1865             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1866                         "Illegal character after '_' in prototype for %" SVf " : %s",
1867                         SVfARG(name), p);
1868     }
1869 
1870     return (! (proto_after_greedy_proto || bad_proto) );
1871 }
1872 
1873 /*
1874  * S_incline
1875  * This subroutine has nothing to do with tilting, whether at windmills
1876  * or pinball tables.  Its name is short for "increment line".  It
1877  * increments the current line number in CopLINE(PL_curcop) and checks
1878  * to see whether the line starts with a comment of the form
1879  *    # line 500 "foo.pm"
1880  * If so, it sets the current line number and file to the values in the comment.
1881  */
1882 
1883 STATIC void
S_incline(pTHX_ const char * s,const char * end)1884 S_incline(pTHX_ const char *s, const char *end)
1885 {
1886     const char *t;
1887     const char *n;
1888     const char *e;
1889     line_t line_num;
1890     UV uv;
1891 
1892     PERL_ARGS_ASSERT_INCLINE;
1893 
1894     assert(end >= s);
1895 
1896     COPLINE_INC_WITH_HERELINES;
1897     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1898      && s+1 == PL_bufend && *s == ';') {
1899         /* fake newline in string eval */
1900         CopLINE_dec(PL_curcop);
1901         return;
1902     }
1903     if (*s++ != '#')
1904         return;
1905     while (SPACE_OR_TAB(*s))
1906         s++;
1907     if (memBEGINs(s, (STRLEN) (end - s), "line"))
1908         s += sizeof("line") - 1;
1909     else
1910         return;
1911     if (SPACE_OR_TAB(*s))
1912         s++;
1913     else
1914         return;
1915     while (SPACE_OR_TAB(*s))
1916         s++;
1917     if (!isDIGIT(*s))
1918         return;
1919 
1920     n = s;
1921     while (isDIGIT(*s))
1922         s++;
1923     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1924         return;
1925     while (SPACE_OR_TAB(*s))
1926         s++;
1927     if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1928         s++;
1929         e = t + 1;
1930     }
1931     else {
1932         t = s;
1933         while (*t && !isSPACE(*t))
1934             t++;
1935         e = t;
1936     }
1937     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1938         e++;
1939     if (*e != '\n' && *e != '\0')
1940         return;		/* false alarm */
1941 
1942     if (!grok_atoUV(n, &uv, &e))
1943         return;
1944     line_num = ((line_t)uv) - 1;
1945 
1946     if (t - s > 0) {
1947         const STRLEN len = t - s;
1948 
1949         if (!PL_rsfp && !PL_parser->filtered) {
1950             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1951              * to *{"::_<newfilename"} */
1952             /* However, the long form of evals is only turned on by the
1953                debugger - usually they're "(eval %lu)" */
1954             GV * const cfgv = CopFILEGV(PL_curcop);
1955             if (cfgv) {
1956                 char smallbuf[128];
1957                 STRLEN tmplen2 = len;
1958                 char *tmpbuf2;
1959                 GV *gv2;
1960 
1961                 if (tmplen2 + 2 <= sizeof smallbuf)
1962                     tmpbuf2 = smallbuf;
1963                 else
1964                     Newx(tmpbuf2, tmplen2 + 2, char);
1965 
1966                 tmpbuf2[0] = '_';
1967                 tmpbuf2[1] = '<';
1968 
1969                 memcpy(tmpbuf2 + 2, s, tmplen2);
1970                 tmplen2 += 2;
1971 
1972                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1973                 if (!isGV(gv2)) {
1974                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1975                     /* adjust ${"::_<newfilename"} to store the new file name */
1976                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1977                     /* The line number may differ. If that is the case,
1978                        alias the saved lines that are in the array.
1979                        Otherwise alias the whole array. */
1980                     if (CopLINE(PL_curcop) == line_num) {
1981                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1982                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1983                     }
1984                     else if (GvAV(cfgv)) {
1985                         AV * const av = GvAV(cfgv);
1986                         const line_t start = CopLINE(PL_curcop)+1;
1987                         SSize_t items = AvFILLp(av) - start;
1988                         if (items > 0) {
1989                             AV * const av2 = GvAVn(gv2);
1990                             SV **svp = AvARRAY(av) + start;
1991                             Size_t l = line_num+1;
1992                             while (items-- && l < SSize_t_MAX && l == (line_t)l)
1993                                 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1994                         }
1995                     }
1996                 }
1997 
1998                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1999             }
2000         }
2001         CopFILE_free(PL_curcop);
2002         CopFILE_setn(PL_curcop, s, len);
2003     }
2004     CopLINE_set(PL_curcop, line_num);
2005 }
2006 
2007 STATIC void
S_update_debugger_info(pTHX_ SV * orig_sv,const char * const buf,STRLEN len)2008 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
2009 {
2010     AV *av = CopFILEAVx(PL_curcop);
2011     if (av) {
2012         SV * sv;
2013         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
2014         else {
2015             sv = *av_fetch(av, 0, 1);
2016             SvUPGRADE(sv, SVt_PVMG);
2017         }
2018         if (!SvPOK(sv)) SvPVCLEAR(sv);
2019         if (orig_sv)
2020             sv_catsv(sv, orig_sv);
2021         else
2022             sv_catpvn(sv, buf, len);
2023         if (!SvIOK(sv)) {
2024             (void)SvIOK_on(sv);
2025             SvIV_set(sv, 0);
2026         }
2027         if (PL_parser->preambling == NOLINE)
2028             av_store(av, CopLINE(PL_curcop), sv);
2029     }
2030 }
2031 
2032 /*
2033  * skipspace
2034  * Called to gobble the appropriate amount and type of whitespace.
2035  * Skips comments as well.
2036  * Returns the next character after the whitespace that is skipped.
2037  *
2038  * peekspace
2039  * Same thing, but look ahead without incrementing line numbers or
2040  * adjusting PL_linestart.
2041  */
2042 
2043 #define skipspace(s) skipspace_flags(s, 0)
2044 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
2045 
2046 char *
Perl_skipspace_flags(pTHX_ char * s,U32 flags)2047 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
2048 {
2049     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
2050     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2051         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
2052             s++;
2053     } else {
2054         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
2055         PL_bufptr = s;
2056         lex_read_space(flags | LEX_KEEP_PREVIOUS |
2057                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
2058                     LEX_NO_NEXT_CHUNK : 0));
2059         s = PL_bufptr;
2060         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
2061         if (PL_linestart > PL_bufptr)
2062             PL_bufptr = PL_linestart;
2063         return s;
2064     }
2065     return s;
2066 }
2067 
2068 /*
2069  * S_check_uni
2070  * Check the unary operators to ensure there's no ambiguity in how they're
2071  * used.  An ambiguous piece of code would be:
2072  *     rand + 5
2073  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
2074  * the +5 is its argument.
2075  */
2076 
2077 STATIC void
S_check_uni(pTHX)2078 S_check_uni(pTHX)
2079 {
2080     const char *s;
2081 
2082     if (PL_oldoldbufptr != PL_last_uni)
2083         return;
2084     while (isSPACE(*PL_last_uni))
2085         PL_last_uni++;
2086     s = PL_last_uni;
2087     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2088         s += UTF ? UTF8SKIP(s) : 1;
2089     if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
2090         return;
2091 
2092     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2093                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
2094                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2095 }
2096 
2097 /*
2098  * LOP : macro to build a list operator.  Its behaviour has been replaced
2099  * with a subroutine, S_lop() for which LOP is just another name.
2100  */
2101 
2102 #define LOP(f,x) return lop(f,x,s)
2103 
2104 /*
2105  * S_lop
2106  * Build a list operator (or something that might be one).  The rules:
2107  *  - if we have a next token, then it's a list operator (no parens) for
2108  *    which the next token has already been parsed; e.g.,
2109  *       sort foo @args
2110  *       sort foo (@args)
2111  *  - if the next thing is an opening paren, then it's a function
2112  *  - else it's a list operator
2113  */
2114 
2115 STATIC I32
S_lop(pTHX_ I32 f,U8 x,char * s)2116 S_lop(pTHX_ I32 f, U8 x, char *s)
2117 {
2118     PERL_ARGS_ASSERT_LOP;
2119 
2120     pl_yylval.ival = f;
2121     CLINE;
2122     PL_bufptr = s;
2123     PL_last_lop = PL_oldbufptr;
2124     PL_last_lop_op = (OPCODE)f;
2125     if (PL_nexttoke)
2126         goto lstop;
2127     PL_expect = x;
2128     if (*s == '(')
2129         return REPORT(FUNC);
2130     s = skipspace(s);
2131     if (*s == '(')
2132         return REPORT(FUNC);
2133     else {
2134         lstop:
2135         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2136             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2137         return REPORT(LSTOP);
2138     }
2139 }
2140 
2141 /*
2142  * S_force_next
2143  * When the lexer realizes it knows the next token (for instance,
2144  * it is reordering tokens for the parser) then it can call S_force_next
2145  * to know what token to return the next time the lexer is called.  Caller
2146  * will need to set PL_nextval[] and possibly PL_expect to ensure
2147  * the lexer handles the token correctly.
2148  */
2149 
2150 STATIC void
S_force_next(pTHX_ I32 type)2151 S_force_next(pTHX_ I32 type)
2152 {
2153 #ifdef DEBUGGING
2154     if (DEBUG_T_TEST) {
2155         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2156         tokereport(type, &NEXTVAL_NEXTTOKE);
2157     }
2158 #endif
2159     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2160     PL_nexttype[PL_nexttoke] = type;
2161     PL_nexttoke++;
2162 }
2163 
2164 /*
2165  * S_postderef
2166  *
2167  * This subroutine handles postfix deref syntax after the arrow has already
2168  * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2169  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2170  * only the first, leaving yylex to find the next.
2171  */
2172 
2173 static int
S_postderef(pTHX_ int const funny,char const next)2174 S_postderef(pTHX_ int const funny, char const next)
2175 {
2176     assert(funny == DOLSHARP
2177         || funny == PERLY_DOLLAR
2178         || funny == PERLY_SNAIL
2179         || funny == PERLY_PERCENT_SIGN
2180         || funny == PERLY_AMPERSAND
2181         || funny == PERLY_STAR
2182     );
2183     if (next == '*') {
2184         PL_expect = XOPERATOR;
2185         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2186             assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny);
2187             PL_lex_state = LEX_INTERPEND;
2188             if (PERLY_SNAIL == funny)
2189                 force_next(POSTJOIN);
2190         }
2191         force_next(PERLY_STAR);
2192         PL_bufptr+=2;
2193     }
2194     else {
2195         if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
2196          && !PL_lex_brackets)
2197             PL_lex_dojoin = 2;
2198         PL_expect = XOPERATOR;
2199         PL_bufptr++;
2200     }
2201     return funny;
2202 }
2203 
2204 void
Perl_yyunlex(pTHX)2205 Perl_yyunlex(pTHX)
2206 {
2207     int yyc = PL_parser->yychar;
2208     if (yyc != YYEMPTY) {
2209         if (yyc) {
2210             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2211             if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
2212                 PL_lex_allbrackets--;
2213                 PL_lex_brackets--;
2214                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2215             } else if (yyc == PERLY_PAREN_OPEN) {
2216                 PL_lex_allbrackets--;
2217                 yyc |= (2<<24);
2218             }
2219             force_next(yyc);
2220         }
2221         PL_parser->yychar = YYEMPTY;
2222     }
2223 }
2224 
2225 STATIC SV *
S_newSV_maybe_utf8(pTHX_ const char * const start,STRLEN len)2226 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2227 {
2228     SV * const sv = newSVpvn_utf8(start, len,
2229                     ! IN_BYTES
2230                   &&  UTF
2231                   &&  len != 0
2232                   &&  is_utf8_non_invariant_string((const U8*)start, len));
2233     return sv;
2234 }
2235 
2236 /*
2237  * S_force_word
2238  * When the lexer knows the next thing is a word (for instance, it has
2239  * just seen -> and it knows that the next char is a word char, then
2240  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2241  * lookahead.
2242  *
2243  * Arguments:
2244  *   char *start : buffer position (must be within PL_linestr)
2245  *   int token   : PL_next* will be this type of bare word
2246  *                 (e.g., METHCALL0,BAREWORD)
2247  *   int check_keyword : if true, Perl checks to make sure the word isn't
2248  *       a keyword (do this if the word is a label, e.g. goto FOO)
2249  *   int allow_pack : if true, : characters will also be allowed (require,
2250  *       use, etc. do this)
2251  */
2252 
2253 STATIC char *
S_force_word(pTHX_ char * start,int token,int check_keyword,int allow_pack)2254 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2255 {
2256     char *s;
2257     STRLEN len;
2258 
2259     PERL_ARGS_ASSERT_FORCE_WORD;
2260 
2261     start = skipspace(start);
2262     s = start;
2263     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2264         || (allow_pack && *s == ':' && s[1] == ':') )
2265     {
2266         s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len, allow_pack);
2267         if (check_keyword) {
2268           char *s2 = PL_tokenbuf;
2269           STRLEN len2 = len;
2270           if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2271             s2 += sizeof("CORE::") - 1;
2272             len2 -= sizeof("CORE::") - 1;
2273           }
2274           if (keyword(s2, len2, 0))
2275             return start;
2276         }
2277         if (token == METHCALL0) {
2278             s = skipspace(s);
2279             if (*s == '(')
2280                 PL_expect = XTERM;
2281             else {
2282                 PL_expect = XOPERATOR;
2283             }
2284         }
2285         NEXTVAL_NEXTTOKE.opval
2286             = newSVOP(OP_CONST,0,
2287                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2288         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2289         force_next(token);
2290     }
2291     return s;
2292 }
2293 
2294 /*
2295  * S_force_ident
2296  * Called when the lexer wants $foo *foo &foo etc, but the program
2297  * text only contains the "foo" portion.  The first argument is a pointer
2298  * to the "foo", and the second argument is the type symbol to prefix.
2299  * Forces the next token to be a "BAREWORD".
2300  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2301  */
2302 
2303 STATIC void
S_force_ident(pTHX_ const char * s,int kind)2304 S_force_ident(pTHX_ const char *s, int kind)
2305 {
2306     PERL_ARGS_ASSERT_FORCE_IDENT;
2307 
2308     if (s[0]) {
2309         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2310         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2311                                                                 UTF ? SVf_UTF8 : 0));
2312         NEXTVAL_NEXTTOKE.opval = o;
2313         force_next(BAREWORD);
2314         if (kind) {
2315             o->op_private = OPpCONST_ENTERED;
2316             /* XXX see note in pp_entereval() for why we forgo typo
2317                warnings if the symbol must be introduced in an eval.
2318                GSAR 96-10-12 */
2319             gv_fetchpvn_flags(s, len,
2320                               (PL_in_eval ? GV_ADDMULTI
2321                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2322                               kind == PERLY_DOLLAR ? SVt_PV :
2323                               kind == PERLY_SNAIL ? SVt_PVAV :
2324                               kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
2325                               SVt_PVGV
2326                               );
2327         }
2328     }
2329 }
2330 
2331 static void
S_force_ident_maybe_lex(pTHX_ char pit)2332 S_force_ident_maybe_lex(pTHX_ char pit)
2333 {
2334     NEXTVAL_NEXTTOKE.ival = pit;
2335     force_next('p');
2336 }
2337 
2338 NV
Perl_str_to_version(pTHX_ SV * sv)2339 Perl_str_to_version(pTHX_ SV *sv)
2340 {
2341     NV retval = 0.0;
2342     NV nshift = 1.0;
2343     STRLEN len;
2344     const char *start = SvPV_const(sv,len);
2345     const char * const end = start + len;
2346     const bool utf = cBOOL(SvUTF8(sv));
2347 
2348     PERL_ARGS_ASSERT_STR_TO_VERSION;
2349 
2350     while (start < end) {
2351         STRLEN skip;
2352         UV n;
2353         if (utf)
2354             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2355         else {
2356             n = *(U8*)start;
2357             skip = 1;
2358         }
2359         retval += ((NV)n)/nshift;
2360         start += skip;
2361         nshift *= 1000;
2362     }
2363     return retval;
2364 }
2365 
2366 /*
2367  * S_force_version
2368  * Forces the next token to be a version number.
2369  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2370  * and if "guessing" is TRUE, then no new token is created (and the caller
2371  * must use an alternative parsing method).
2372  */
2373 
2374 STATIC char *
S_force_version(pTHX_ char * s,int guessing)2375 S_force_version(pTHX_ char *s, int guessing)
2376 {
2377     OP *version = NULL;
2378     char *d;
2379 
2380     PERL_ARGS_ASSERT_FORCE_VERSION;
2381 
2382     s = skipspace(s);
2383 
2384     d = s;
2385     if (*d == 'v')
2386         d++;
2387     if (isDIGIT(*d)) {
2388         while (isDIGIT(*d) || *d == '_' || *d == '.')
2389             d++;
2390         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2391             SV *ver;
2392             s = scan_num(s, &pl_yylval);
2393             version = pl_yylval.opval;
2394             ver = cSVOPx(version)->op_sv;
2395             if (SvPOK(ver) && !SvNIOK(ver)) {
2396                 SvUPGRADE(ver, SVt_PVNV);
2397                 SvNV_set(ver, str_to_version(ver));
2398                 SvNOK_on(ver);		/* hint that it is a version */
2399             }
2400         }
2401         else if (guessing) {
2402             return s;
2403         }
2404     }
2405 
2406     /* NOTE: The parser sees the package name and the VERSION swapped */
2407     NEXTVAL_NEXTTOKE.opval = version;
2408     force_next(BAREWORD);
2409 
2410     return s;
2411 }
2412 
2413 /*
2414  * S_force_strict_version
2415  * Forces the next token to be a version number using strict syntax rules.
2416  */
2417 
2418 STATIC char *
S_force_strict_version(pTHX_ char * s)2419 S_force_strict_version(pTHX_ char *s)
2420 {
2421     OP *version = NULL;
2422     const char *errstr = NULL;
2423 
2424     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2425 
2426     while (isSPACE(*s)) /* leading whitespace */
2427         s++;
2428 
2429     if (is_STRICT_VERSION(s,&errstr)) {
2430         SV *ver = newSV_type(SVt_NULL);
2431         s = (char *)scan_version(s, ver, 0);
2432         version = newSVOP(OP_CONST, 0, ver);
2433     }
2434     else if ((*s != ';' && *s != ':' && *s != '{' && *s != '}' )
2435              && (s = skipspace(s), (*s != ';' && *s != ':' && *s != '{' && *s != '}' )))
2436     {
2437         PL_bufptr = s;
2438         if (errstr)
2439             yyerror(errstr); /* version required */
2440         return s;
2441     }
2442 
2443     /* NOTE: The parser sees the package name and the VERSION swapped */
2444     NEXTVAL_NEXTTOKE.opval = version;
2445     force_next(BAREWORD);
2446 
2447     return s;
2448 }
2449 
2450 /*
2451  * S_tokeq
2452  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2453  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2454  * unchanged, and a new SV containing the modified input is returned.
2455  */
2456 
2457 STATIC SV *
S_tokeq(pTHX_ SV * sv)2458 S_tokeq(pTHX_ SV *sv)
2459 {
2460     char *s;
2461     char *send;
2462     char *d;
2463     SV *pv = sv;
2464 
2465     PERL_ARGS_ASSERT_TOKEQ;
2466 
2467     assert (SvPOK(sv));
2468     assert (SvLEN(sv));
2469     assert (!SvIsCOW(sv));
2470     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2471         goto finish;
2472     s = SvPVX(sv);
2473     send = SvEND(sv);
2474     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2475     while (s < send && !(*s == '\\' && s[1] == '\\'))
2476         s++;
2477     if (s == send)
2478         goto finish;
2479     d = s;
2480     if ( PL_hints & HINT_NEW_STRING ) {
2481         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2482                             SVs_TEMP | SvUTF8(sv));
2483     }
2484     while (s < send) {
2485         if (*s == '\\') {
2486             if (s + 1 < send && (s[1] == '\\'))
2487                 s++;		/* all that, just for this */
2488         }
2489         *d++ = *s++;
2490     }
2491     *d = '\0';
2492     SvCUR_set(sv, d - SvPVX_const(sv));
2493   finish:
2494     if ( PL_hints & HINT_NEW_STRING )
2495        return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2496     return sv;
2497 }
2498 
2499 /*
2500  * Now come three functions related to double-quote context,
2501  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2502  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2503  * interact with PL_lex_state, and create fake ( ... ) argument lists
2504  * to handle functions and concatenation.
2505  * For example,
2506  *   "foo\lbar"
2507  * is tokenised as
2508  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2509  */
2510 
2511 /*
2512  * S_sublex_start
2513  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2514  *
2515  * Pattern matching will set PL_lex_op to the pattern-matching op to
2516  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2517  *
2518  * OP_CONST is easy--just make the new op and return.
2519  *
2520  * Everything else becomes a FUNC.
2521  *
2522  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2523  * had an OP_CONST.  This just sets us up for a
2524  * call to S_sublex_push().
2525  */
2526 
2527 STATIC I32
S_sublex_start(pTHX)2528 S_sublex_start(pTHX)
2529 {
2530     const I32 op_type = pl_yylval.ival;
2531 
2532     if (op_type == OP_NULL) {
2533         pl_yylval.opval = PL_lex_op;
2534         PL_lex_op = NULL;
2535         return THING;
2536     }
2537     if (op_type == OP_CONST) {
2538         SV *sv = PL_lex_stuff;
2539         PL_lex_stuff = NULL;
2540         sv = tokeq(sv);
2541 
2542         if (SvTYPE(sv) == SVt_PVIV) {
2543             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2544             STRLEN len;
2545             const char * const p = SvPV_const(sv, len);
2546             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2547             SvREFCNT_dec(sv);
2548             sv = nsv;
2549         }
2550         pl_yylval.opval = newSVOP(op_type, 0, sv);
2551         return THING;
2552     }
2553 
2554     PL_parser->lex_super_state = PL_lex_state;
2555     PL_parser->lex_sub_inwhat = (U16)op_type;
2556     PL_parser->lex_sub_op = PL_lex_op;
2557     PL_parser->sub_no_recover = FALSE;
2558     PL_parser->sub_error_count = PL_error_count;
2559     PL_lex_state = LEX_INTERPPUSH;
2560 
2561     PL_expect = XTERM;
2562     if (PL_lex_op) {
2563         pl_yylval.opval = PL_lex_op;
2564         PL_lex_op = NULL;
2565         return PMFUNC;
2566     }
2567     else
2568         return FUNC;
2569 }
2570 
2571 /*
2572  * S_sublex_push
2573  * Create a new scope to save the lexing state.  The scope will be
2574  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2575  * to the uc, lc, etc. found before.
2576  * Sets PL_lex_state to LEX_INTERPCONCAT.
2577  */
2578 
2579 STATIC I32
S_sublex_push(pTHX)2580 S_sublex_push(pTHX)
2581 {
2582     LEXSHARED *shared;
2583     const bool is_heredoc = PL_multi_close == '<';
2584     ENTER;
2585 
2586     PL_lex_state = PL_parser->lex_super_state;
2587     SAVEI8(PL_lex_dojoin);
2588     SAVEI32(PL_lex_brackets);
2589     SAVEI32(PL_lex_allbrackets);
2590     SAVEI32(PL_lex_formbrack);
2591     SAVEI8(PL_lex_fakeeof);
2592     SAVEI32(PL_lex_casemods);
2593     SAVEI32(PL_lex_starts);
2594     SAVEI8(PL_lex_state);
2595     SAVESPTR(PL_lex_repl);
2596     SAVEVPTR(PL_lex_inpat);
2597     SAVEI16(PL_lex_inwhat);
2598     if (is_heredoc)
2599     {
2600         SAVECOPLINE(PL_curcop);
2601         SAVEI32(PL_multi_end);
2602         SAVEI32(PL_parser->herelines);
2603         PL_parser->herelines = 0;
2604     }
2605     SAVEIV(PL_multi_close);
2606     SAVEPPTR(PL_bufptr);
2607     SAVEPPTR(PL_bufend);
2608     SAVEPPTR(PL_oldbufptr);
2609     SAVEPPTR(PL_oldoldbufptr);
2610     SAVEPPTR(PL_last_lop);
2611     SAVEPPTR(PL_last_uni);
2612     SAVEPPTR(PL_linestart);
2613     SAVESPTR(PL_linestr);
2614     SAVEGENERICPV(PL_lex_brackstack);
2615     SAVEGENERICPV(PL_lex_casestack);
2616     SAVEGENERICPV(PL_parser->lex_shared);
2617     SAVEBOOL(PL_parser->lex_re_reparsing);
2618     SAVEI32(PL_copline);
2619 
2620     /* The here-doc parser needs to be able to peek into outer lexing
2621        scopes to find the body of the here-doc.  So we put PL_linestr and
2622        PL_bufptr into lex_shared, to 'share' those values.
2623      */
2624     PL_parser->lex_shared->ls_linestr = PL_linestr;
2625     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2626 
2627     PL_linestr = PL_lex_stuff;
2628     PL_lex_repl = PL_parser->lex_sub_repl;
2629     PL_lex_stuff = NULL;
2630     PL_parser->lex_sub_repl = NULL;
2631 
2632     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2633        set for an inner quote-like operator and then an error causes scope-
2634        popping.  We must not have a PL_lex_stuff value left dangling, as
2635        that breaks assumptions elsewhere.  See bug #123617.  */
2636     SAVEGENERICSV(PL_lex_stuff);
2637     SAVEGENERICSV(PL_parser->lex_sub_repl);
2638 
2639     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2640         = SvPVX(PL_linestr);
2641     PL_bufend += SvCUR(PL_linestr);
2642     PL_last_lop = PL_last_uni = NULL;
2643     SAVEFREESV(PL_linestr);
2644     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2645 
2646     PL_lex_dojoin = FALSE;
2647     PL_lex_brackets = PL_lex_formbrack = 0;
2648     PL_lex_allbrackets = 0;
2649     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2650     Newx(PL_lex_brackstack, 120, char);
2651     Newx(PL_lex_casestack, 12, char);
2652     PL_lex_casemods = 0;
2653     *PL_lex_casestack = '\0';
2654     PL_lex_starts = 0;
2655     PL_lex_state = LEX_INTERPCONCAT;
2656     if (is_heredoc)
2657         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2658     PL_copline = NOLINE;
2659 
2660     Newxz(shared, 1, LEXSHARED);
2661     shared->ls_prev = PL_parser->lex_shared;
2662     PL_parser->lex_shared = shared;
2663 
2664     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2665     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2666     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2667         PL_lex_inpat = PL_parser->lex_sub_op;
2668     else
2669         PL_lex_inpat = NULL;
2670 
2671     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2672     PL_in_eval &= ~EVAL_RE_REPARSING;
2673 
2674     return SUBLEXSTART;
2675 }
2676 
2677 /*
2678  * S_sublex_done
2679  * Restores lexer state after a S_sublex_push.
2680  */
2681 
2682 STATIC I32
S_sublex_done(pTHX)2683 S_sublex_done(pTHX)
2684 {
2685     if (!PL_lex_starts++) {
2686         SV * const sv = newSVpvs("");
2687         if (SvUTF8(PL_linestr))
2688             SvUTF8_on(sv);
2689         PL_expect = XOPERATOR;
2690         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2691         return THING;
2692     }
2693 
2694     if (PL_lex_casemods) {		/* oops, we've got some unbalanced parens */
2695         PL_lex_state = LEX_INTERPCASEMOD;
2696         return yylex();
2697     }
2698 
2699     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2700     assert(PL_lex_inwhat != OP_TRANSR);
2701     if (PL_lex_repl) {
2702         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2703         PL_linestr = PL_lex_repl;
2704         PL_lex_inpat = 0;
2705         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2706         PL_bufend += SvCUR(PL_linestr);
2707         PL_last_lop = PL_last_uni = NULL;
2708         PL_lex_dojoin = FALSE;
2709         PL_lex_brackets = 0;
2710         PL_lex_allbrackets = 0;
2711         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2712         PL_lex_casemods = 0;
2713         *PL_lex_casestack = '\0';
2714         PL_lex_starts = 0;
2715         if (SvEVALED(PL_lex_repl)) {
2716             PL_lex_state = LEX_INTERPNORMAL;
2717             PL_lex_starts++;
2718             /*	we don't clear PL_lex_repl here, so that we can check later
2719                 whether this is an evalled subst; that means we rely on the
2720                 logic to ensure sublex_done() is called again only via the
2721                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2722         }
2723         else {
2724             PL_lex_state = LEX_INTERPCONCAT;
2725             PL_lex_repl = NULL;
2726         }
2727         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2728             CopLINE(PL_curcop) +=
2729                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2730                  + PL_parser->herelines;
2731             PL_parser->herelines = 0;
2732         }
2733         return PERLY_SLASH;
2734     }
2735     else {
2736         const line_t l = CopLINE(PL_curcop);
2737         LEAVE;
2738         if (PL_parser->sub_error_count != PL_error_count) {
2739             if (PL_parser->sub_no_recover) {
2740                 yyquit();
2741                 NOT_REACHED;
2742             }
2743         }
2744         if (PL_multi_close == '<')
2745             PL_parser->herelines += l - PL_multi_end;
2746         PL_bufend = SvPVX(PL_linestr);
2747         PL_bufend += SvCUR(PL_linestr);
2748         PL_expect = XOPERATOR;
2749         return SUBLEXEND;
2750     }
2751 }
2752 
2753 HV *
Perl_load_charnames(pTHX_ SV * char_name,const char * context,const STRLEN context_len,const char ** error_msg)2754 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2755                           const STRLEN context_len, const char ** error_msg)
2756 {
2757     /* Load the official _charnames module if not already there.  The
2758      * parameters are just to give info for any error messages generated:
2759      *  char_name   a name to look up which is the reason for loading this
2760      *  context     'char_name' in the context in the input in which it appears
2761      *  context_len how many bytes 'context' occupies
2762      *  error_msg   *error_msg will be set to any error
2763      *
2764      *  Returns the ^H table if success; otherwise NULL */
2765 
2766     unsigned int i;
2767     HV * table;
2768     SV **cvp;
2769     SV * res;
2770 
2771     PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2772 
2773     /* This loop is executed 1 1/2 times.  On the first time through, if it
2774      * isn't already loaded, try loading it, and iterate just once to see if it
2775      * worked.  */
2776     for (i = 0; i < 2; i++) {
2777         table = GvHV(PL_hintgv);		 /* ^H */
2778 
2779         if (    table
2780             && (PL_hints & HINT_LOCALIZE_HH)
2781             && (cvp = hv_fetchs(table, "charnames", FALSE))
2782             &&  SvOK(*cvp))
2783         {
2784             return table;   /* Quit if already loaded */
2785         }
2786 
2787         if (i == 0) {
2788             Perl_load_module(aTHX_
2789                 0,
2790                 newSVpvs("_charnames"),
2791 
2792                 /* version parameter; no need to specify it, as if we get too early
2793                 * a version, will fail anyway, not being able to find 'charnames'
2794                 * */
2795                 NULL,
2796                 newSVpvs(":full"),
2797                 newSVpvs(":short"),
2798                 NULL);
2799         }
2800     }
2801 
2802     /* Here, it failed; new_constant will give appropriate error messages */
2803     *error_msg = NULL;
2804     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2805                         context, context_len, error_msg);
2806     SvREFCNT_dec(res);
2807 
2808     return NULL;
2809 }
2810 
2811 STATIC SV*
S_get_and_check_backslash_N_name_wrapper(pTHX_ const char * s,const char * const e)2812 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2813 {
2814     /* This justs wraps get_and_check_backslash_N_name() to output any error
2815      * message it returns. */
2816 
2817     const char * error_msg = NULL;
2818     SV * result;
2819 
2820     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2821 
2822     /* charnames doesn't work well if there have been errors found */
2823     if (PL_error_count > 0) {
2824         return NULL;
2825     }
2826 
2827     result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2828 
2829     if (error_msg) {
2830         yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2831     }
2832 
2833     return result;
2834 }
2835 
2836 SV*
Perl_get_and_check_backslash_N_name(pTHX_ const char * s,const char * e,const bool is_utf8,const char ** error_msg)2837 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2838                                           const char* e,
2839                                           const bool is_utf8,
2840                                           const char ** error_msg)
2841 {
2842     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2843      * interior, hence to the "}".  Finds what the name resolves to, returning
2844      * an SV* containing it; NULL if no valid one found.
2845      *
2846      * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2847      * doesn't have to be. */
2848 
2849     SV* char_name;
2850     SV* res;
2851     HV * table;
2852     SV **cvp;
2853     SV *cv;
2854     SV *rv;
2855     HV *stash;
2856 
2857     /* Points to the beginning of the \N{... so that any messages include the
2858      * context of what's failing*/
2859     const char* context = s - 3;
2860     STRLEN context_len = e - context + 1; /* include all of \N{...} */
2861 
2862 
2863     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2864 
2865     assert(e >= s);
2866     assert(s > (char *) 3);
2867 
2868     while (s < e && isBLANK(*s)) {
2869         s++;
2870     }
2871 
2872     while (s < e && isBLANK(*(e - 1))) {
2873         e--;
2874     }
2875 
2876     char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2877 
2878     if (!SvCUR(char_name)) {
2879         SvREFCNT_dec_NN(char_name);
2880         /* diag_listed_as: Unknown charname '%s' */
2881         *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2882         return NULL;
2883     }
2884 
2885     /* Autoload the charnames module */
2886 
2887     table = load_charnames(char_name, context, context_len, error_msg);
2888     if (table == NULL) {
2889         return NULL;
2890     }
2891 
2892     *error_msg = NULL;
2893     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2894                         context, context_len, error_msg);
2895     if (*error_msg) {
2896         *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2897 
2898         SvREFCNT_dec(res);
2899         return NULL;
2900     }
2901 
2902     /* See if the charnames handler is the Perl core's, and if so, we can skip
2903      * the validation needed for a user-supplied one, as Perl's does its own
2904      * validation. */
2905     cvp = hv_fetchs(table, "charnames", FALSE);
2906     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2907         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2908     {
2909         const char * const name = HvNAME(stash);
2910          if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2911            return res;
2912        }
2913     }
2914 
2915     /* Here, it isn't Perl's charname handler.  We can't rely on a
2916      * user-supplied handler to validate the input name.  For non-ut8 input,
2917      * look to see that the first character is legal.  Then loop through the
2918      * rest checking that each is a continuation */
2919 
2920     /* This code makes the reasonable assumption that the only Latin1-range
2921      * characters that begin a character name alias are alphabetic, otherwise
2922      * would have to create a isCHARNAME_BEGIN macro */
2923 
2924     if (! is_utf8) {
2925         if (! isALPHAU(*s)) {
2926             goto bad_charname;
2927         }
2928         s++;
2929         while (s < e) {
2930             if (! isCHARNAME_CONT(*s)) {
2931                 goto bad_charname;
2932             }
2933             if (*s == ' ' && *(s-1) == ' ') {
2934                 goto multi_spaces;
2935             }
2936             s++;
2937         }
2938     }
2939     else {
2940         /* Similarly for utf8.  For invariants can check directly; for other
2941          * Latin1, can calculate their code point and check; otherwise  use an
2942          * inversion list */
2943         if (UTF8_IS_INVARIANT(*s)) {
2944             if (! isALPHAU(*s)) {
2945                 goto bad_charname;
2946             }
2947             s++;
2948         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2949             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2950                 goto bad_charname;
2951             }
2952             s += 2;
2953         }
2954         else {
2955             if (! _invlist_contains_cp(PL_utf8_charname_begin,
2956                                        utf8_to_uvchr_buf((U8 *) s,
2957                                                          (U8 *) e,
2958                                                          NULL)))
2959             {
2960                 goto bad_charname;
2961             }
2962             s += UTF8SKIP(s);
2963         }
2964 
2965         while (s < e) {
2966             if (UTF8_IS_INVARIANT(*s)) {
2967                 if (! isCHARNAME_CONT(*s)) {
2968                     goto bad_charname;
2969                 }
2970                 if (*s == ' ' && *(s-1) == ' ') {
2971                     goto multi_spaces;
2972                 }
2973                 s++;
2974             }
2975             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2976                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2977                 {
2978                     goto bad_charname;
2979                 }
2980                 s += 2;
2981             }
2982             else {
2983                 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2984                                            utf8_to_uvchr_buf((U8 *) s,
2985                                                              (U8 *) e,
2986                                                              NULL)))
2987                 {
2988                     goto bad_charname;
2989                 }
2990                 s += UTF8SKIP(s);
2991             }
2992         }
2993     }
2994     if (*(s-1) == ' ') {
2995         /* diag_listed_as: charnames alias definitions may not contain
2996                            trailing white-space; marked by <-- HERE in %s
2997          */
2998         *error_msg = Perl_form(aTHX_
2999             "charnames alias definitions may not contain trailing "
3000             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
3001             (int)(s - context + 1), context,
3002             (int)(e - s + 1), s + 1);
3003         return NULL;
3004     }
3005 
3006     if (SvUTF8(res)) { /* Don't accept malformed charname value */
3007         const U8* first_bad_char_loc;
3008         STRLEN len;
3009         const char* const str = SvPV_const(res, len);
3010         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
3011                                           &first_bad_char_loc)))
3012         {
3013             _force_out_malformed_utf8_message(first_bad_char_loc,
3014                                               (U8 *) PL_parser->bufend,
3015                                               0,
3016                                               0 /* 0 means don't die */ );
3017             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
3018                                immediately after '%s' */
3019             *error_msg = Perl_form(aTHX_
3020                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
3021                  (int) context_len, context,
3022                  (int) ((char *) first_bad_char_loc - str), str);
3023             return NULL;
3024         }
3025     }
3026 
3027     return res;
3028 
3029   bad_charname: {
3030 
3031         /* The final %.*s makes sure that should the trailing NUL be missing
3032          * that this print won't run off the end of the string */
3033         /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
3034                            in \N{%s} */
3035         *error_msg = Perl_form(aTHX_
3036             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
3037             (int)(s - context + 1), context,
3038             (int)(e - s + 1), s + 1);
3039         return NULL;
3040     }
3041 
3042   multi_spaces:
3043         /* diag_listed_as: charnames alias definitions may not contain a
3044                            sequence of multiple spaces; marked by <-- HERE
3045                            in %s */
3046         *error_msg = Perl_form(aTHX_
3047             "charnames alias definitions may not contain a sequence of "
3048             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
3049             (int)(s - context + 1), context,
3050             (int)(e - s + 1), s + 1);
3051         return NULL;
3052 }
3053 
3054 /*
3055   scan_const
3056 
3057   Extracts the next constant part of a pattern, double-quoted string,
3058   or transliteration.  This is terrifying code.
3059 
3060   For example, in parsing the double-quoted string "ab\x63$d", it would
3061   stop at the '$' and return an OP_CONST containing 'abc'.
3062 
3063   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3064   processing a pattern (PL_lex_inpat is true), a transliteration
3065   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3066 
3067   Returns a pointer to the character scanned up to. If this is
3068   advanced from the start pointer supplied (i.e. if anything was
3069   successfully parsed), will leave an OP_CONST for the substring scanned
3070   in pl_yylval. Caller must intuit reason for not parsing further
3071   by looking at the next characters herself.
3072 
3073   In patterns:
3074     expand:
3075       \N{FOO}  => \N{U+hex_for_character_FOO}
3076       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3077 
3078     pass through:
3079         all other \-char, including \N and \N{ apart from \N{ABC}
3080 
3081     stops on:
3082         @ and $ where it appears to be a var, but not for $ as tail anchor
3083         \l \L \u \U \Q \E
3084         (?{  or  (??{ or (*{
3085 
3086   In transliterations:
3087     characters are VERY literal, except for - not at the start or end
3088     of the string, which indicates a range.  However some backslash sequences
3089     are recognized: \r, \n, and the like
3090                     \007 \o{}, \x{}, \N{}
3091     If all elements in the transliteration are below 256,
3092     scan_const expands the range to the full set of intermediate
3093     characters. If the range is in utf8, the hyphen is replaced with
3094     a certain range mark which will be handled by pmtrans() in op.c.
3095 
3096   In double-quoted strings:
3097     backslashes:
3098       all those recognized in transliterations
3099       deprecated backrefs: \1 (in substitution replacements)
3100       case and quoting: \U \Q \E
3101     stops on @ and $
3102 
3103   scan_const does *not* construct ops to handle interpolated strings.
3104   It stops processing as soon as it finds an embedded $ or @ variable
3105   and leaves it to the caller to work out what's going on.
3106 
3107   embedded arrays (whether in pattern or not) could be:
3108       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3109 
3110   $ in double-quoted strings must be the symbol of an embedded scalar.
3111 
3112   $ in pattern could be $foo or could be tail anchor.  Assumption:
3113   it's a tail anchor if $ is the last thing in the string, or if it's
3114   followed by one of "()| \r\n\t"
3115 
3116   \1 (backreferences) are turned into $1 in substitutions
3117 
3118   The structure of the code is
3119       while (there's a character to process) {
3120           handle transliteration ranges
3121           skip regexp comments /(?#comment)/ and codes /(?{code})/ ((*{code})/
3122           skip #-initiated comments in //x patterns
3123           check for embedded arrays
3124           check for embedded scalars
3125           if (backslash) {
3126               deprecate \1 in substitution replacements
3127               handle string-changing backslashes \l \U \Q \E, etc.
3128               switch (what was escaped) {
3129                   handle \- in a transliteration (becomes a literal -)
3130                   if a pattern and not \N{, go treat as regular character
3131                   handle \132 (octal characters)
3132                   handle \x15 and \x{1234} (hex characters)
3133                   handle \N{name} (named characters, also \N{3,5} in a pattern)
3134                   handle \cV (control characters)
3135                   handle printf-style backslashes (\f, \r, \n, etc)
3136               } (end switch)
3137               continue
3138           } (end if backslash)
3139           handle regular character
3140     } (end while character to read)
3141 
3142 */
3143 
3144 STATIC char *
S_scan_const(pTHX_ char * start)3145 S_scan_const(pTHX_ char *start)
3146 {
3147     const char * const send = PL_bufend;/* end of the constant */
3148     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
3149                                            on sizing. */
3150     char *s = start;			/* start of the constant */
3151     char *d = SvPVX(sv);		/* destination for copies */
3152     bool dorange = FALSE;               /* are we in a translit range? */
3153     bool didrange = FALSE;              /* did we just finish a range? */
3154     bool in_charclass = FALSE;          /* within /[...]/ */
3155     const bool s_is_utf8 = cBOOL(UTF);  /* Is the source string assumed to be
3156                                            UTF8?  But, this can show as true
3157                                            when the source isn't utf8, as for
3158                                            example when it is entirely composed
3159                                            of hex constants */
3160     bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
3161     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
3162                                            number of characters found so far
3163                                            that will expand (into 2 bytes)
3164                                            should we have to convert to
3165                                            UTF-8) */
3166     SV *res;		                /* result from charnames */
3167     STRLEN offset_to_max = 0;   /* The offset in the output to where the range
3168                                    high-end character is temporarily placed */
3169 
3170     /* Does something require special handling in tr/// ?  This avoids extra
3171      * work in a less likely case.  As such, khw didn't feel it was worth
3172      * adding any branches to the more mainline code to handle this, which
3173      * means that this doesn't get set in some circumstances when things like
3174      * \x{100} get expanded out.  As a result there needs to be extra testing
3175      * done in the tr code */
3176     bool has_above_latin1 = FALSE;
3177 
3178     /* Note on sizing:  The scanned constant is placed into sv, which is
3179      * initialized by newSV() assuming one byte of output for every byte of
3180      * input.  This routine expects newSV() to allocate an extra byte for a
3181      * trailing NUL, which this routine will append if it gets to the end of
3182      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3183      * CAPITAL LETTER A}), or more output than input if the constant ends up
3184      * recoded to utf8, but each time a construct is found that might increase
3185      * the needed size, SvGROW() is called.  Its size parameter each time is
3186      * based on the best guess estimate at the time, namely the length used so
3187      * far, plus the length the current construct will occupy, plus room for
3188      * the trailing NUL, plus one byte for every input byte still unscanned */
3189 
3190     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3191                        before set */
3192 #ifdef EBCDIC
3193     int backslash_N = 0;            /* ? was the character from \N{} */
3194     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
3195                                        platform-specific like \x65 */
3196 #endif
3197 
3198     PERL_ARGS_ASSERT_SCAN_CONST;
3199 
3200     assert(PL_lex_inwhat != OP_TRANSR);
3201 
3202     /* Protect sv from errors and fatal warnings. */
3203     ENTER_with_name("scan_const");
3204     SAVEFREESV(sv);
3205 
3206     /* A bunch of code in the loop below assumes that if s[n] exists and is not
3207      * NUL, then s[n+1] exists.  This assertion makes sure that assumption is
3208      * valid */
3209     assert(*send == '\0');
3210 
3211     while (s < send
3212            || dorange   /* Handle tr/// range at right edge of input */
3213     ) {
3214 
3215         /* get transliterations out of the way (they're most literal) */
3216         if (PL_lex_inwhat == OP_TRANS) {
3217 
3218             /* But there isn't any special handling necessary unless there is a
3219              * range, so for most cases we just drop down and handle the value
3220              * as any other.  There are two exceptions.
3221              *
3222              * 1.  A hyphen indicates that we are actually going to have a
3223              *     range.  In this case, skip the '-', set a flag, then drop
3224              *     down to handle what should be the end range value.
3225              * 2.  After we've handled that value, the next time through, that
3226              *     flag is set and we fix up the range.
3227              *
3228              * Ranges entirely within Latin1 are expanded out entirely, in
3229              * order to make the transliteration a simple table look-up.
3230              * Ranges that extend above Latin1 have to be done differently, so
3231              * there is no advantage to expanding them here, so they are
3232              * stored here as Min, RANGE_INDICATOR, Max.  'RANGE_INDICATOR' is
3233              * a byte that can't occur in legal UTF-8, and hence can signify a
3234              * hyphen without any possible ambiguity.  On EBCDIC machines, if
3235              * the range is expressed as Unicode, the Latin1 portion is
3236              * expanded out even if the range extends above Latin1.  This is
3237              * because each code point in it has to be processed here
3238              * individually to get its native translation */
3239 
3240             if (! dorange) {
3241 
3242                 /* Here, we don't think we're in a range.  If the new character
3243                  * is not a hyphen; or if it is a hyphen, but it's too close to
3244                  * either edge to indicate a range, or if we haven't output any
3245                  * characters yet then it's a regular character. */
3246                 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3247                 {
3248 
3249                     /* A regular character.  Process like any other, but first
3250                      * clear any flags */
3251                     didrange = FALSE;
3252                     dorange = FALSE;
3253 #ifdef EBCDIC
3254                     non_portable_endpoint = 0;
3255                     backslash_N = 0;
3256 #endif
3257                     /* The tests here for being above Latin1 and similar ones
3258                      * in the following 'else' suffice to find all such
3259                      * occurences in the constant, except those added by a
3260                      * backslash escape sequence, like \x{100}.  Mostly, those
3261                      * set 'has_above_latin1' as appropriate */
3262                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3263                         has_above_latin1 = TRUE;
3264                     }
3265 
3266                     /* Drops down to generic code to process current byte */
3267                 }
3268                 else {  /* Is a '-' in the context where it means a range */
3269                     if (didrange) { /* Something like y/A-C-Z// */
3270                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
3271                                          " operator");
3272                     }
3273 
3274                     dorange = TRUE;
3275 
3276                     s++;    /* Skip past the hyphen */
3277 
3278                     /* d now points to where the end-range character will be
3279                      * placed.  Drop down to get that character.  We'll finish
3280                      * processing the range the next time through the loop */
3281 
3282                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3283                         has_above_latin1 = TRUE;
3284                     }
3285 
3286                     /* Drops down to generic code to process current byte */
3287                 }
3288             }  /* End of not a range */
3289             else {
3290                 /* Here we have parsed a range.  Now must handle it.  At this
3291                  * point:
3292                  * 'sv' is a SV* that contains the output string we are
3293                  *      constructing.  The final two characters in that string
3294                  *      are the range start and range end, in order.
3295                  * 'd'  points to just beyond the range end in the 'sv' string,
3296                  *      where we would next place something
3297                  */
3298                 char * max_ptr;
3299                 char * min_ptr;
3300                 IV range_min;
3301                 IV range_max;	/* last character in range */
3302                 STRLEN grow;
3303                 Size_t offset_to_min = 0;
3304                 Size_t extras = 0;
3305 #ifdef EBCDIC
3306                 bool convert_unicode;
3307                 IV real_range_max = 0;
3308 #endif
3309                 /* Get the code point values of the range ends. */
3310                 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3311                 offset_to_max = max_ptr - SvPVX_const(sv);
3312                 if (d_is_utf8) {
3313                     /* We know the utf8 is valid, because we just constructed
3314                      * it ourselves in previous loop iterations */
3315                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3316                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3317                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3318 
3319                     /* This compensates for not all code setting
3320                      * 'has_above_latin1', so that we don't skip stuff that
3321                      * should be executed */
3322                     if (range_max > 255) {
3323                         has_above_latin1 = TRUE;
3324                     }
3325                 }
3326                 else {
3327                     min_ptr = max_ptr - 1;
3328                     range_min = * (U8*) min_ptr;
3329                     range_max = * (U8*) max_ptr;
3330                 }
3331 
3332                 /* If the range is just a single code point, like tr/a-a/.../,
3333                  * that code point is already in the output, twice.  We can
3334                  * just back up over the second instance and avoid all the rest
3335                  * of the work.  But if it is a variant character, it's been
3336                  * counted twice, so decrement.  (This unlikely scenario is
3337                  * special cased, like the one for a range of 2 code points
3338                  * below, only because the main-line code below needs a range
3339                  * of 3 or more to work without special casing.  Might as well
3340                  * get it out of the way now.) */
3341                 if (UNLIKELY(range_max == range_min)) {
3342                     d = max_ptr;
3343                     if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3344                         utf8_variant_count--;
3345                     }
3346                     goto range_done;
3347                 }
3348 
3349 #ifdef EBCDIC
3350                 /* On EBCDIC platforms, we may have to deal with portable
3351                  * ranges.  These happen if at least one range endpoint is a
3352                  * Unicode value (\N{...}), or if the range is a subset of
3353                  * [A-Z] or [a-z], and both ends are literal characters,
3354                  * like 'A', and not like \x{C1} */
3355                 convert_unicode =
3356                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3357                                                        hence portable range */
3358                     || (     ! non_portable_endpoint
3359                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3360                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3361                 if (convert_unicode) {
3362 
3363                     /* Special handling is needed for these portable ranges.
3364                      * They are defined to be in Unicode terms, which includes
3365                      * all the Unicode code points between the end points.
3366                      * Convert to Unicode to get the Unicode range.  Later we
3367                      * will convert each code point in the range back to
3368                      * native.  */
3369                     range_min = NATIVE_TO_UNI(range_min);
3370                     range_max = NATIVE_TO_UNI(range_max);
3371                 }
3372 #endif
3373 
3374                 if (range_min > range_max) {
3375 #ifdef EBCDIC
3376                     if (convert_unicode) {
3377                         /* Need to convert back to native for meaningful
3378                          * messages for this platform */
3379                         range_min = UNI_TO_NATIVE(range_min);
3380                         range_max = UNI_TO_NATIVE(range_max);
3381                     }
3382 #endif
3383                     /* Use the characters themselves for the error message if
3384                      * ASCII printables; otherwise some visible representation
3385                      * of them */
3386                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3387                         Perl_croak(aTHX_
3388                          "Invalid range \"%c-%c\" in transliteration operator",
3389                          (char)range_min, (char)range_max);
3390                     }
3391 #ifdef EBCDIC
3392                     else if (convert_unicode) {
3393         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3394                         Perl_croak(aTHX_
3395                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3396                            UVXf "}\" in transliteration operator",
3397                            range_min, range_max);
3398                     }
3399 #endif
3400                     else {
3401         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3402                         Perl_croak(aTHX_
3403                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3404                            " in transliteration operator",
3405                            range_min, range_max);
3406                     }
3407                 }
3408 
3409                 /* If the range is exactly two code points long, they are
3410                  * already both in the output */
3411                 if (UNLIKELY(range_min + 1 == range_max)) {
3412                     goto range_done;
3413                 }
3414 
3415                 /* Here the range contains at least 3 code points */
3416 
3417                 if (d_is_utf8) {
3418 
3419                     /* If everything in the transliteration is below 256, we
3420                      * can avoid special handling later.  A translation table
3421                      * for each of those bytes is created by op.c.  So we
3422                      * expand out all ranges to their constituent code points.
3423                      * But if we've encountered something above 255, the
3424                      * expanding won't help, so skip doing that.  But if it's
3425                      * EBCDIC, we may have to look at each character below 256
3426                      * if we have to convert to/from Unicode values */
3427                     if (   has_above_latin1
3428 #ifdef EBCDIC
3429                         && (range_min > 255 || ! convert_unicode)
3430 #endif
3431                     ) {
3432                         const STRLEN off = d - SvPVX(sv);
3433                         const STRLEN extra = 1 + (send - s) + 1;
3434                         char *e;
3435 
3436                         /* Move the high character one byte to the right; then
3437                          * insert between it and the range begin, an illegal
3438                          * byte which serves to indicate this is a range (using
3439                          * a '-' would be ambiguous). */
3440 
3441                         if (off + extra > SvLEN(sv)) {
3442                             d = off + SvGROW(sv, off + extra);
3443                             max_ptr = d - off + offset_to_max;
3444                         }
3445 
3446                         e = d++;
3447                         while (e-- > max_ptr) {
3448                             *(e + 1) = *e;
3449                         }
3450                         *(e + 1) = (char) RANGE_INDICATOR;
3451                         goto range_done;
3452                     }
3453 
3454                     /* Here, we're going to expand out the range.  For EBCDIC
3455                      * the range can extend above 255 (not so in ASCII), so
3456                      * for EBCDIC, split it into the parts above and below
3457                      * 255/256 */
3458 #ifdef EBCDIC
3459                     if (range_max > 255) {
3460                         real_range_max = range_max;
3461                         range_max = 255;
3462                     }
3463 #endif
3464                 }
3465 
3466                 /* Here we need to expand out the string to contain each
3467                  * character in the range.  Grow the output to handle this.
3468                  * For non-UTF8, we need a byte for each code point in the
3469                  * range, minus the three that we've already allocated for: the
3470                  * hyphen, the min, and the max.  For UTF-8, we need this
3471                  * plus an extra byte for each code point that occupies two
3472                  * bytes (is variant) when in UTF-8 (except we've already
3473                  * allocated for the end points, including if they are
3474                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3475                  * platforms, it's easy to calculate a precise number.  To
3476                  * start, we count the variants in the range, which we need
3477                  * elsewhere in this function anyway.  (For the case where it
3478                  * isn't easy to calculate, 'extras' has been initialized to 0,
3479                  * and the calculation is done in a loop further down.) */
3480 #ifdef EBCDIC
3481                 if (convert_unicode)
3482 #endif
3483                 {
3484                     /* This is executed unconditionally on ASCII, and for
3485                      * Unicode ranges on EBCDIC.  Under these conditions, all
3486                      * code points above a certain value are variant; and none
3487                      * under that value are.  We just need to find out how much
3488                      * of the range is above that value.  We don't count the
3489                      * end points here, as they will already have been counted
3490                      * as they were parsed. */
3491                     if (range_min >= UTF_CONTINUATION_MARK) {
3492 
3493                         /* The whole range is made up of variants */
3494                         extras = (range_max - 1) - (range_min + 1) + 1;
3495                     }
3496                     else if (range_max >= UTF_CONTINUATION_MARK) {
3497 
3498                         /* Only the higher portion of the range is variants */
3499                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3500                     }
3501 
3502                     utf8_variant_count += extras;
3503                 }
3504 
3505                 /* The base growth is the number of code points in the range,
3506                  * not including the endpoints, which have already been sized
3507                  * for (and output).  We don't subtract for the hyphen, as it
3508                  * has been parsed but not output, and the SvGROW below is
3509                  * based only on what's been output plus what's left to parse.
3510                  * */
3511                 grow = (range_max - 1) - (range_min + 1) + 1;
3512 
3513                 if (d_is_utf8) {
3514 #ifdef EBCDIC
3515                     /* In some cases in EBCDIC, we haven't yet calculated a
3516                      * precise amount needed for the UTF-8 variants.  Just
3517                      * assume the worst case, that everything will expand by a
3518                      * byte */
3519                     if (! convert_unicode) {
3520                         grow *= 2;
3521                     }
3522                     else
3523 #endif
3524                     {
3525                         /* Otherwise we know exactly how many variants there
3526                          * are in the range. */
3527                         grow += extras;
3528                     }
3529                 }
3530 
3531                 /* Grow, but position the output to overwrite the range min end
3532                  * point, because in some cases we overwrite that */
3533                 SvCUR_set(sv, d - SvPVX_const(sv));
3534                 offset_to_min = min_ptr - SvPVX_const(sv);
3535 
3536                 /* See Note on sizing above. */
3537                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3538                                              + (send - s)
3539                                              + grow
3540                                              + 1 /* Trailing NUL */ );
3541 
3542                 /* Now, we can expand out the range. */
3543 #ifdef EBCDIC
3544                 if (convert_unicode) {
3545                     SSize_t i;
3546 
3547                     /* Recall that the min and max are now in Unicode terms, so
3548                      * we have to convert each character to its native
3549                      * equivalent */
3550                     if (d_is_utf8) {
3551                         for (i = range_min; i <= range_max; i++) {
3552                             append_utf8_from_native_byte(
3553                                                     LATIN1_TO_NATIVE((U8) i),
3554                                                     (U8 **) &d);
3555                         }
3556                     }
3557                     else {
3558                         for (i = range_min; i <= range_max; i++) {
3559                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3560                         }
3561                     }
3562                 }
3563                 else
3564 #endif
3565                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3566                 {
3567                     /* Here, no conversions are necessary, which means that the
3568                      * first character in the range is already in 'd' and
3569                      * valid, so we can skip overwriting it */
3570                     if (d_is_utf8) {
3571                         SSize_t i;
3572                         d += UTF8SKIP(d);
3573                         for (i = range_min + 1; i <= range_max; i++) {
3574                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3575                         }
3576                     }
3577                     else {
3578                         SSize_t i;
3579                         d++;
3580                         assert(range_min + 1 <= range_max);
3581                         for (i = range_min + 1; i < range_max; i++) {
3582 #ifdef EBCDIC
3583                             /* In this case on EBCDIC, we haven't calculated
3584                              * the variants.  Do it here, as we go along */
3585                             if (! UVCHR_IS_INVARIANT(i)) {
3586                                 utf8_variant_count++;
3587                             }
3588 #endif
3589                             *d++ = (char)i;
3590                         }
3591 
3592                         /* The range_max is done outside the loop so as to
3593                          * avoid having to special case not incrementing
3594                          * 'utf8_variant_count' on EBCDIC (it's already been
3595                          * counted when originally parsed) */
3596                         *d++ = (char) range_max;
3597                     }
3598                 }
3599 
3600 #ifdef EBCDIC
3601                 /* If the original range extended above 255, add in that
3602                  * portion. */
3603                 if (real_range_max) {
3604                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3605                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3606                     if (real_range_max > 0x100) {
3607                         if (real_range_max > 0x101) {
3608                             *d++ = (char) RANGE_INDICATOR;
3609                         }
3610                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3611                     }
3612                 }
3613 #endif
3614 
3615               range_done:
3616                 /* mark the range as done, and continue */
3617                 didrange = TRUE;
3618                 dorange = FALSE;
3619 #ifdef EBCDIC
3620                 non_portable_endpoint = 0;
3621                 backslash_N = 0;
3622 #endif
3623                 continue;
3624             } /* End of is a range */
3625         } /* End of transliteration.  Joins main code after these else's */
3626         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3627             char *s1 = s-1;
3628             int esc = 0;
3629             while (s1 >= start && *s1-- == '\\')
3630                 esc = !esc;
3631             if (!esc)
3632                 in_charclass = TRUE;
3633         }
3634         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3635             char *s1 = s-1;
3636             int esc = 0;
3637             while (s1 >= start && *s1-- == '\\')
3638                 esc = !esc;
3639             if (!esc)
3640                 in_charclass = FALSE;
3641         }
3642             /* skip for regexp comments /(?#comment)/, except for the last
3643              * char, which will be done separately.  Stop on (?{..}) and
3644              * friends (??{ ... }) or (*{ ... }) */
3645         else if (*s == '(' && PL_lex_inpat && (s[1] == '?' || s[1] == '*') && !in_charclass) {
3646             if (s[1] == '?' && s[2] == '#') {
3647                 if (s_is_utf8) {
3648                     PERL_UINT_FAST8_T  len = UTF8SKIP(s);
3649 
3650                     while (s + len < send && *s != ')') {
3651                         Copy(s, d, len, U8);
3652                         d += len;
3653                         s += len;
3654                         len = UTF8_SAFE_SKIP(s, send);
3655                     }
3656                 }
3657                 else while (s+1 < send && *s != ')') {
3658                     *d++ = *s++;
3659                 }
3660             }
3661             else
3662             if (!PL_lex_casemods &&
3663                 /* The following should match regcomp.c */
3664                 ((s[1] == '?' && (s[2] == '{'                        /* (?{ ... })  */
3665                               || (s[2] == '?' && s[3] == '{'))) ||   /* (??{ ... }) */
3666                  (s[1] == '*' && (s[2] == '{' )))                    /* (*{ ... })  */
3667             ){
3668                 break;
3669             }
3670         }
3671             /* likewise skip #-initiated comments in //x patterns */
3672         else if (*s == '#'
3673                  && PL_lex_inpat
3674                  && !in_charclass
3675                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3676         {
3677             while (s < send && *s != '\n')
3678                 *d++ = *s++;
3679         }
3680             /* no further processing of single-quoted regex */
3681         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3682             goto default_action;
3683 
3684             /* check for embedded arrays
3685              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3686              */
3687         else if (*s == '@' && s[1]) {
3688             if (UTF
3689                ? isIDFIRST_utf8_safe(s+1, send)
3690                : isWORDCHAR_A(s[1]))
3691             {
3692                 break;
3693             }
3694             if (memCHRs(":'{$", s[1]))
3695                 break;
3696             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3697                 break; /* in regexp, neither @+ nor @- are interpolated */
3698         }
3699             /* check for embedded scalars.  only stop if we're sure it's a
3700              * variable.  */
3701         else if (*s == '$') {
3702             if (!PL_lex_inpat)	/* not a regexp, so $ must be var */
3703                 break;
3704             if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3705                 if (s[1] == '\\') {
3706                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3707                                    "Possible unintended interpolation of $\\ in regex");
3708                 }
3709                 break;		/* in regexp, $ might be tail anchor */
3710             }
3711         }
3712 
3713         /* End of else if chain - OP_TRANS rejoin rest */
3714 
3715         if (UNLIKELY(s >= send)) {
3716             assert(s == send);
3717             break;
3718         }
3719 
3720         /* backslashes */
3721         if (*s == '\\' && s+1 < send) {
3722             char* bslash = s;   /* point to beginning \ */
3723             char* rbrace;	/* point to ending '}' */
3724             char* e;	        /* 1 past the meat (non-blanks) before the
3725                                    brace */
3726             s++;
3727 
3728             /* warn on \1 - \9 in substitution replacements, but note that \11
3729              * is an octal; and \19 is \1 followed by '9' */
3730             if (PL_lex_inwhat == OP_SUBST
3731                 && !PL_lex_inpat
3732                 && isDIGIT(*s)
3733                 && *s != '0'
3734                 && !isDIGIT(s[1]))
3735             {
3736                 /* diag_listed_as: \%d better written as $%d */
3737                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3738                 s = bslash;
3739                 *s = '$';
3740                 break;
3741             }
3742 
3743             /* string-change backslash escapes */
3744             if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3745                 s = bslash;
3746                 break;
3747             }
3748             /* In a pattern, process \N, but skip any other backslash escapes.
3749              * This is because we don't want to translate an escape sequence
3750              * into a meta symbol and have the regex compiler use the meta
3751              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3752              * in spite of this, we do have to process \N here while the proper
3753              * charnames handler is in scope.  See bugs #56444 and #62056.
3754              *
3755              * There is a complication because \N in a pattern may also stand
3756              * for 'match a non-nl', and not mean a charname, in which case its
3757              * processing should be deferred to the regex compiler.  To be a
3758              * charname it must be followed immediately by a '{', and not look
3759              * like \N followed by a curly quantifier, i.e., not something like
3760              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3761              * quantifier */
3762             else if (PL_lex_inpat
3763                     && (*s != 'N'
3764                         || s[1] != '{'
3765                         || regcurly(s + 1, send, NULL)))
3766             {
3767                 *d++ = '\\';
3768                 goto default_action;
3769             }
3770 
3771             switch (*s) {
3772             default:
3773                 {
3774                     if ((isALPHANUMERIC(*s)))
3775                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3776                                        "Unrecognized escape \\%c passed through",
3777                                        *s);
3778                     /* default action is to copy the quoted character */
3779                     goto default_action;
3780                 }
3781 
3782             /* eg. \132 indicates the octal constant 0132 */
3783             case '0': case '1': case '2': case '3':
3784             case '4': case '5': case '6': case '7':
3785                 {
3786                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3787                               | PERL_SCAN_NOTIFY_ILLDIGIT;
3788                     STRLEN len = 3;
3789                     uv = grok_oct(s, &len, &flags, NULL);
3790                     s += len;
3791                     if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3792                         && s < send
3793                         && isDIGIT(*s)  /* like \08, \178 */
3794                         && ckWARN(WARN_MISC))
3795                     {
3796                         Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3797                             form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3798                     }
3799                 }
3800                 goto NUM_ESCAPE_INSERT;
3801 
3802             /* eg. \o{24} indicates the octal constant \024 */
3803             case 'o':
3804                 {
3805                     const char* error;
3806 
3807                     if (! grok_bslash_o(&s, send,
3808                                                &uv, &error,
3809                                                NULL,
3810                                                FALSE, /* Not strict */
3811                                                FALSE, /* No illegal cp's */
3812                                                UTF))
3813                     {
3814                         yyerror(error);
3815                         uv = 0; /* drop through to ensure range ends are set */
3816                     }
3817                     goto NUM_ESCAPE_INSERT;
3818                 }
3819 
3820             /* eg. \x24 indicates the hex constant 0x24 */
3821             case 'x':
3822                 {
3823                     const char* error;
3824 
3825                     if (! grok_bslash_x(&s, send,
3826                                                &uv, &error,
3827                                                NULL,
3828                                                FALSE, /* Not strict */
3829                                                FALSE, /* No illegal cp's */
3830                                                UTF))
3831                     {
3832                         yyerror(error);
3833                         uv = 0; /* drop through to ensure range ends are set */
3834                     }
3835                 }
3836 
3837               NUM_ESCAPE_INSERT:
3838                 /* Insert oct or hex escaped character. */
3839 
3840                 /* Here uv is the ordinal of the next character being added */
3841                 if (UVCHR_IS_INVARIANT(uv)) {
3842                     *d++ = (char) uv;
3843                 }
3844                 else {
3845                     if (!d_is_utf8 && uv > 255) {
3846 
3847                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3848                          * If we've only seen invariants so far, all we have to
3849                          * do is turn on the flag */
3850                         if (utf8_variant_count == 0) {
3851                             SvUTF8_on(sv);
3852                         }
3853                         else {
3854                             SvCUR_set(sv, d - SvPVX_const(sv));
3855                             SvPOK_on(sv);
3856                             *d = '\0';
3857 
3858                             sv_utf8_upgrade_flags_grow(
3859                                            sv,
3860                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3861 
3862                                            /* Since we're having to grow here,
3863                                             * make sure we have enough room for
3864                                             * this escape and a NUL, so the
3865                                             * code immediately below won't have
3866                                             * to actually grow again */
3867                                           UVCHR_SKIP(uv)
3868                                         + (STRLEN)(send - s) + 1);
3869                             d = SvPVX(sv) + SvCUR(sv);
3870                         }
3871 
3872                         has_above_latin1 = TRUE;
3873                         d_is_utf8 = TRUE;
3874                     }
3875 
3876                     if (! d_is_utf8) {
3877                         *d++ = (char)uv;
3878                         utf8_variant_count++;
3879                     }
3880                     else {
3881                        /* Usually, there will already be enough room in 'sv'
3882                         * since such escapes are likely longer than any UTF-8
3883                         * sequence they can end up as.  This isn't the case on
3884                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3885                         * UTF-8 for it contains 14.  And, we have to allow for
3886                         * a trailing NUL.  It probably can't happen on ASCII
3887                         * platforms, but be safe.  See Note on sizing above. */
3888                         const STRLEN needed = d - SvPVX(sv)
3889                                             + UVCHR_SKIP(uv)
3890                                             + (send - s)
3891                                             + 1;
3892                         if (UNLIKELY(needed > SvLEN(sv))) {
3893                             SvCUR_set(sv, d - SvPVX_const(sv));
3894                             d = SvCUR(sv) + SvGROW(sv, needed);
3895                         }
3896 
3897                         d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3898                                                    (ckWARN(WARN_PORTABLE))
3899                                                    ? UNICODE_WARN_PERL_EXTENDED
3900                                                    : 0);
3901                     }
3902                 }
3903 #ifdef EBCDIC
3904                 non_portable_endpoint++;
3905 #endif
3906                 continue;
3907 
3908             case 'N':
3909                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3910                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3911                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3912                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3913                  * convenience all three forms are referred to as "named
3914                  * characters" below.
3915                  *
3916                  * For patterns, \N also can mean to match a non-newline.  Code
3917                  * before this 'switch' statement should already have handled
3918                  * this situation, and hence this code only has to deal with
3919                  * the named character cases.
3920                  *
3921                  * For non-patterns, the named characters are converted to
3922                  * their string equivalents.  In patterns, named characters are
3923                  * not converted to their ultimate forms for the same reasons
3924                  * that other escapes aren't (mainly that the ultimate
3925                  * character could be considered a meta-symbol by the regex
3926                  * compiler).  Instead, they are converted to the \N{U+...}
3927                  * form to get the value from the charnames that is in effect
3928                  * right now, while preserving the fact that it was a named
3929                  * character, so that the regex compiler knows this.
3930                  *
3931                  * The structure of this section of code (besides checking for
3932                  * errors and upgrading to utf8) is:
3933                  *    If the named character is of the form \N{U+...}, pass it
3934                  *      through if a pattern; otherwise convert the code point
3935                  *      to utf8
3936                  *    Otherwise must be some \N{NAME}: convert to
3937                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3938                  *
3939                  * Transliteration is an exception.  The conversion to utf8 is
3940                  * only done if the code point requires it to be representable.
3941                  *
3942                  * Here, 's' points to the 'N'; the test below is guaranteed to
3943                  * succeed if we are being called on a pattern, as we already
3944                  * know from a test above that the next character is a '{'.  A
3945                  * non-pattern \N must mean 'named character', which requires
3946                  * braces */
3947                 s++;
3948                 if (*s != '{') {
3949                     yyerror("Missing braces on \\N{}");
3950                     *d++ = '\0';
3951                     continue;
3952                 }
3953                 s++;
3954 
3955                 /* If there is no matching '}', it is an error. */
3956                 if (! (rbrace = (char *) memchr(s, '}', send - s))) {
3957                     if (! PL_lex_inpat) {
3958                         yyerror("Missing right brace on \\N{}");
3959                     } else {
3960                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3961                     }
3962                     yyquit(); /* Have exhausted the input. */
3963                 }
3964 
3965                 /* Here it looks like a named character */
3966                 while (s < rbrace && isBLANK(*s)) {
3967                     s++;
3968                 }
3969 
3970                 e = rbrace;
3971                 while (s < e && isBLANK(*(e - 1))) {
3972                     e--;
3973                 }
3974 
3975                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3976                     s += 2;	    /* Skip to next char after the 'U+' */
3977                     if (PL_lex_inpat) {
3978 
3979                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3980                         /* Check the syntax.  */
3981                         if (!isXDIGIT(*s)) {
3982                           bad_NU:
3983                             yyerror(
3984                                 "Invalid hexadecimal number in \\N{U+...}"
3985                             );
3986                             s = rbrace + 1;
3987                             *d++ = '\0';
3988                             continue;
3989                         }
3990                         while (++s < e) {
3991                             if (isXDIGIT(*s))
3992                                 continue;
3993                             else if ((*s == '.' || *s == '_')
3994                                   && isXDIGIT(s[1]))
3995                                 continue;
3996                             goto bad_NU;
3997                         }
3998 
3999                         /* Pass everything through unchanged.
4000                          * +1 is to include the '}' */
4001                         Copy(bslash, d, rbrace - bslash + 1, char);
4002                         d += rbrace - bslash + 1;
4003                     }
4004                     else {  /* Not a pattern: convert the hex to string */
4005                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
4006                                   | PERL_SCAN_SILENT_ILLDIGIT
4007                                   | PERL_SCAN_SILENT_OVERFLOW
4008                                   | PERL_SCAN_DISALLOW_PREFIX;
4009                         STRLEN len = e - s;
4010 
4011                         uv = grok_hex(s, &len, &flags, NULL);
4012                         if (len == 0 || (len != (STRLEN)(e - s)))
4013                             goto bad_NU;
4014 
4015                         if (    uv > MAX_LEGAL_CP
4016                             || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
4017                         {
4018                             yyerror(form_cp_too_large_msg(16, s, len, 0));
4019                             uv = 0; /* drop through to ensure range ends are
4020                                        set */
4021                         }
4022 
4023                          /* For non-tr///, if the destination is not in utf8,
4024                           * unconditionally recode it to be so.  This is
4025                           * because \N{} implies Unicode semantics, and scalars
4026                           * have to be in utf8 to guarantee those semantics.
4027                           * tr/// doesn't care about Unicode rules, so no need
4028                           * there to upgrade to UTF-8 for small enough code
4029                           * points */
4030                         if (! d_is_utf8 && (   uv > 0xFF
4031                                            || PL_lex_inwhat != OP_TRANS))
4032                         {
4033                             /* See Note on sizing above.  */
4034                             const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
4035 
4036                             SvCUR_set(sv, d - SvPVX_const(sv));
4037                             SvPOK_on(sv);
4038                             *d = '\0';
4039 
4040                             if (utf8_variant_count == 0) {
4041                                 SvUTF8_on(sv);
4042                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4043                             }
4044                             else {
4045                                 sv_utf8_upgrade_flags_grow(
4046                                                sv,
4047                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4048                                                extra);
4049                                 d = SvPVX(sv) + SvCUR(sv);
4050                             }
4051 
4052                             d_is_utf8 = TRUE;
4053                             has_above_latin1 = TRUE;
4054                         }
4055 
4056                         /* Add the (Unicode) code point to the output. */
4057                         if (OFFUNI_IS_INVARIANT(uv)) {
4058                             *d++ = (char) LATIN1_TO_NATIVE(uv);
4059                         }
4060                         else if (! d_is_utf8) {
4061                             *d++ = (char) LATIN1_TO_NATIVE(uv);
4062                             utf8_variant_count++;
4063                         }
4064                         else {
4065                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
4066                                                    (ckWARN(WARN_PORTABLE))
4067                                                    ? UNICODE_WARN_PERL_EXTENDED
4068                                                    : 0);
4069                         }
4070                     }
4071                 }
4072                 else     /* Here is \N{NAME} but not \N{U+...}. */
4073                      if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
4074                 {   /* Failed.  We should die eventually, but for now use a NUL
4075                        to keep parsing */
4076                     *d++ = '\0';
4077                 }
4078                 else {  /* Successfully evaluated the name */
4079                     STRLEN len;
4080                     const char *str = SvPV_const(res, len);
4081                     if (PL_lex_inpat) {
4082 
4083                         if (! len) { /* The name resolved to an empty string */
4084                             const char empty_N[] = "\\N{_}";
4085                             Copy(empty_N, d, sizeof(empty_N) - 1, char);
4086                             d += sizeof(empty_N) - 1;
4087                         }
4088                         else {
4089                             /* In order to not lose information for the regex
4090                             * compiler, pass the result in the specially made
4091                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
4092                             * the code points in hex of each character
4093                             * returned by charnames */
4094 
4095                             const char *str_end = str + len;
4096                             const STRLEN off = d - SvPVX_const(sv);
4097 
4098                             if (! SvUTF8(res)) {
4099                                 /* For the non-UTF-8 case, we can determine the
4100                                  * exact length needed without having to parse
4101                                  * through the string.  Each character takes up
4102                                  * 2 hex digits plus either a trailing dot or
4103                                  * the "}" */
4104                                 const char initial_text[] = "\\N{U+";
4105                                 const STRLEN initial_len = sizeof(initial_text)
4106                                                            - 1;
4107                                 d = off + SvGROW(sv, off
4108                                                     + 3 * len
4109 
4110                                                     /* +1 for trailing NUL */
4111                                                     + initial_len + 1
4112 
4113                                                     + (STRLEN)(send - rbrace));
4114                                 Copy(initial_text, d, initial_len, char);
4115                                 d += initial_len;
4116                                 while (str < str_end) {
4117                                     char hex_string[4];
4118                                     int len =
4119                                         my_snprintf(hex_string,
4120                                                   sizeof(hex_string),
4121                                                   "%02X.",
4122 
4123                                                   /* The regex compiler is
4124                                                    * expecting Unicode, not
4125                                                    * native */
4126                                                   NATIVE_TO_LATIN1(*str));
4127                                     PERL_MY_SNPRINTF_POST_GUARD(len,
4128                                                            sizeof(hex_string));
4129                                     Copy(hex_string, d, 3, char);
4130                                     d += 3;
4131                                     str++;
4132                                 }
4133                                 d--;    /* Below, we will overwrite the final
4134                                            dot with a right brace */
4135                             }
4136                             else {
4137                                 STRLEN char_length; /* cur char's byte length */
4138 
4139                                 /* and the number of bytes after this is
4140                                  * translated into hex digits */
4141                                 STRLEN output_length;
4142 
4143                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
4144                                  * for max('U+', '.'); and 1 for NUL */
4145                                 char hex_string[2 * UTF8_MAXBYTES + 5];
4146 
4147                                 /* Get the first character of the result. */
4148                                 U32 uv = utf8n_to_uvchr((U8 *) str,
4149                                                         len,
4150                                                         &char_length,
4151                                                         UTF8_ALLOW_ANYUV);
4152                                 /* Convert first code point to Unicode hex,
4153                                  * including the boiler plate before it. */
4154                                 output_length =
4155                                     my_snprintf(hex_string, sizeof(hex_string),
4156                                              "\\N{U+%X",
4157                                              (unsigned int) NATIVE_TO_UNI(uv));
4158 
4159                                 /* Make sure there is enough space to hold it */
4160                                 d = off + SvGROW(sv, off
4161                                                     + output_length
4162                                                     + (STRLEN)(send - rbrace)
4163                                                     + 2);	/* '}' + NUL */
4164                                 /* And output it */
4165                                 Copy(hex_string, d, output_length, char);
4166                                 d += output_length;
4167 
4168                                 /* For each subsequent character, append dot and
4169                                 * its Unicode code point in hex */
4170                                 while ((str += char_length) < str_end) {
4171                                     const STRLEN off = d - SvPVX_const(sv);
4172                                     U32 uv = utf8n_to_uvchr((U8 *) str,
4173                                                             str_end - str,
4174                                                             &char_length,
4175                                                             UTF8_ALLOW_ANYUV);
4176                                     output_length =
4177                                         my_snprintf(hex_string,
4178                                              sizeof(hex_string),
4179                                              ".%X",
4180                                              (unsigned int) NATIVE_TO_UNI(uv));
4181 
4182                                     d = off + SvGROW(sv, off
4183                                                         + output_length
4184                                                         + (STRLEN)(send - rbrace)
4185                                                         + 2);	/* '}' +  NUL */
4186                                     Copy(hex_string, d, output_length, char);
4187                                     d += output_length;
4188                                 }
4189                             }
4190 
4191                             *d++ = '}';	/* Done.  Add the trailing brace */
4192                         }
4193                     }
4194                     else { /* Here, not in a pattern.  Convert the name to a
4195                             * string. */
4196 
4197                         if (PL_lex_inwhat == OP_TRANS) {
4198                             str = SvPV_const(res, len);
4199                             if (len > ((SvUTF8(res))
4200                                        ? UTF8SKIP(str)
4201                                        : 1U))
4202                             {
4203                                 yyerror(Perl_form(aTHX_
4204                                     "%.*s must not be a named sequence"
4205                                     " in transliteration operator",
4206                                         /*  +1 to include the "}" */
4207                                     (int) (rbrace + 1 - start), start));
4208                                 *d++ = '\0';
4209                                 goto end_backslash_N;
4210                             }
4211 
4212                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4213                                 has_above_latin1 = TRUE;
4214                             }
4215 
4216                         }
4217                         else if (! SvUTF8(res)) {
4218                             /* Make sure \N{} return is UTF-8.  This is because
4219                              * \N{} implies Unicode semantics, and scalars have
4220                              * to be in utf8 to guarantee those semantics; but
4221                              * not needed in tr/// */
4222                             sv_utf8_upgrade_flags(res, 0);
4223                             str = SvPV_const(res, len);
4224                         }
4225 
4226                          /* Upgrade destination to be utf8 if this new
4227                           * component is */
4228                         if (! d_is_utf8 && SvUTF8(res)) {
4229                             /* See Note on sizing above.  */
4230                             const STRLEN extra = len + (send - s) + 1;
4231 
4232                             SvCUR_set(sv, d - SvPVX_const(sv));
4233                             SvPOK_on(sv);
4234                             *d = '\0';
4235 
4236                             if (utf8_variant_count == 0) {
4237                                 SvUTF8_on(sv);
4238                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4239                             }
4240                             else {
4241                                 sv_utf8_upgrade_flags_grow(sv,
4242                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4243                                                 extra);
4244                                 d = SvPVX(sv) + SvCUR(sv);
4245                             }
4246                             d_is_utf8 = TRUE;
4247                         } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
4248 
4249                             /* See Note on sizing above.  (NOTE: SvCUR() is not
4250                              * set correctly here). */
4251                             const STRLEN extra = len + (send - rbrace) + 1;
4252                             const STRLEN off = d - SvPVX_const(sv);
4253                             d = off + SvGROW(sv, off + extra);
4254                         }
4255                         Copy(str, d, len, char);
4256                         d += len;
4257                     }
4258 
4259                     SvREFCNT_dec(res);
4260 
4261                 } /* End \N{NAME} */
4262 
4263               end_backslash_N:
4264 #ifdef EBCDIC
4265                 backslash_N++; /* \N{} is defined to be Unicode */
4266 #endif
4267                 s = rbrace + 1;  /* Point to just after the '}' */
4268                 continue;
4269 
4270             /* \c is a control character */
4271             case 'c':
4272                 s++;
4273                 if (s < send) {
4274                     const char * message;
4275 
4276                     if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4277                         yyerror(message);
4278                         yyquit();   /* Have always immediately croaked on
4279                                        errors in this */
4280                     }
4281                     d++;
4282                 }
4283                 else {
4284                     yyerror("Missing control char name in \\c");
4285                     yyquit();   /* Are at end of input, no sense continuing */
4286                 }
4287 #ifdef EBCDIC
4288                 non_portable_endpoint++;
4289 #endif
4290                 break;
4291 
4292             /* printf-style backslashes, formfeeds, newlines, etc */
4293             case 'b':
4294                 *d++ = '\b';
4295                 break;
4296             case 'n':
4297                 *d++ = '\n';
4298                 break;
4299             case 'r':
4300                 *d++ = '\r';
4301                 break;
4302             case 'f':
4303                 *d++ = '\f';
4304                 break;
4305             case 't':
4306                 *d++ = '\t';
4307                 break;
4308             case 'e':
4309                 *d++ = ESC_NATIVE;
4310                 break;
4311             case 'a':
4312                 *d++ = '\a';
4313                 break;
4314             } /* end switch */
4315 
4316             s++;
4317             continue;
4318         } /* end if (backslash) */
4319 
4320     default_action:
4321         /* Just copy the input to the output, though we may have to convert
4322          * to/from UTF-8.
4323          *
4324          * If the input has the same representation in UTF-8 as not, it will be
4325          * a single byte, and we don't care about UTF8ness; just copy the byte */
4326         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4327             *d++ = *s++;
4328         }
4329         else if (! s_is_utf8 && ! d_is_utf8) {
4330             /* If neither source nor output is UTF-8, is also a single byte,
4331              * just copy it; but this byte counts should we later have to
4332              * convert to UTF-8 */
4333             *d++ = *s++;
4334             utf8_variant_count++;
4335         }
4336         else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
4337             const STRLEN len = UTF8SKIP(s);
4338 
4339             /* We expect the source to have already been checked for
4340              * malformedness */
4341             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4342 
4343             Copy(s, d, len, U8);
4344             d += len;
4345             s += len;
4346         }
4347         else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4348             STRLEN need = send - s + 1; /* See Note on sizing above. */
4349 
4350             SvCUR_set(sv, d - SvPVX_const(sv));
4351             SvPOK_on(sv);
4352             *d = '\0';
4353 
4354             if (utf8_variant_count == 0) {
4355                 SvUTF8_on(sv);
4356                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4357             }
4358             else {
4359                 sv_utf8_upgrade_flags_grow(sv,
4360                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4361                                            need);
4362                 d = SvPVX(sv) + SvCUR(sv);
4363             }
4364             d_is_utf8 = TRUE;
4365             goto default_action; /* Redo, having upgraded so both are UTF-8 */
4366         }
4367         else {  /* UTF8ness matters: convert this non-UTF8 source char to
4368                    UTF-8 for output.  It will occupy 2 bytes, but don't include
4369                    the input byte since we haven't incremented 's' yet. See
4370                    Note on sizing above. */
4371             const STRLEN off = d - SvPVX(sv);
4372             const STRLEN extra = 2 + (send - s - 1) + 1;
4373             if (off + extra > SvLEN(sv)) {
4374                 d = off + SvGROW(sv, off + extra);
4375             }
4376             *d++ = UTF8_EIGHT_BIT_HI(*s);
4377             *d++ = UTF8_EIGHT_BIT_LO(*s);
4378             s++;
4379         }
4380     } /* while loop to process each character */
4381 
4382     {
4383         const STRLEN off = d - SvPVX(sv);
4384 
4385         /* See if room for the terminating NUL */
4386         if (UNLIKELY(off >= SvLEN(sv))) {
4387 
4388 #ifndef DEBUGGING
4389 
4390             if (off > SvLEN(sv))
4391 #endif
4392                 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4393                         " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4394 
4395             /* Whew!  Here we don't have room for the terminating NUL, but
4396              * everything else so far has fit.  It's not too late to grow
4397              * to fit the NUL and continue on.  But it is a bug, as the code
4398              * above was supposed to have made room for this, so under
4399              * DEBUGGING builds, we panic anyway.  */
4400             d = off + SvGROW(sv, off + 1);
4401         }
4402     }
4403 
4404     /* terminate the string and set up the sv */
4405     *d = '\0';
4406     SvCUR_set(sv, d - SvPVX_const(sv));
4407 
4408     SvPOK_on(sv);
4409     if (d_is_utf8) {
4410         SvUTF8_on(sv);
4411     }
4412 
4413     /* shrink the sv if we allocated more than we used */
4414     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4415         SvPV_shrink_to_cur(sv);
4416     }
4417 
4418     /* return the substring (via pl_yylval) only if we parsed anything */
4419     if (s > start) {
4420         char *s2 = start;
4421         for (; s2 < s; s2++) {
4422             if (*s2 == '\n')
4423                 COPLINE_INC_WITH_HERELINES;
4424         }
4425         SvREFCNT_inc_simple_void_NN(sv);
4426         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4427             && ! PL_parser->lex_re_reparsing)
4428         {
4429             const char *const key = PL_lex_inpat ? "qr" : "q";
4430             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4431             const char *type;
4432             STRLEN typelen;
4433 
4434             if (PL_lex_inwhat == OP_TRANS) {
4435                 type = "tr";
4436                 typelen = 2;
4437             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4438                 type = "s";
4439                 typelen = 1;
4440             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4441                 type = "q";
4442                 typelen = 1;
4443             } else {
4444                 type = "qq";
4445                 typelen = 2;
4446             }
4447 
4448             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4449                                 type, typelen, NULL);
4450         }
4451         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4452     }
4453     LEAVE_with_name("scan_const");
4454     return s;
4455 }
4456 
4457 /* S_intuit_more
4458  * Returns TRUE if there's more to the expression (e.g., a subscript),
4459  * FALSE otherwise.
4460  *
4461  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4462  *
4463  * ->[ and ->{ return TRUE
4464  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4465  * { and [ outside a pattern are always subscripts, so return TRUE
4466  * if we're outside a pattern and it's not { or [, then return FALSE
4467  * if we're in a pattern and the first char is a {
4468  *   {4,5} (any digits around the comma) returns FALSE
4469  * if we're in a pattern and the first char is a [
4470  *   [] returns FALSE
4471  *   [SOMETHING] has a funky heuristic to decide whether it's a
4472  *      character class or not.  It has to deal with things like
4473  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4474  * anything else returns TRUE
4475  */
4476 
4477 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4478 
4479 STATIC int
S_intuit_more(pTHX_ char * s,char * e)4480 S_intuit_more(pTHX_ char *s, char *e)
4481 {
4482     PERL_ARGS_ASSERT_INTUIT_MORE;
4483 
4484     /* This function has been mostly untouched for a long time, due to its,
4485      * 'scariness', and lack of comments.  khw has gone through and done some
4486      * cleanup, while finding various instances of problematic behavior.
4487      * Rather than change this base-level function immediately, khw has added
4488      * commentary to those areas. */
4489 
4490     /* If recursed within brackets, there is more to the expression */
4491     if (PL_lex_brackets)
4492         return TRUE;
4493 
4494     /* If begins with '->' ... */
4495     if (s[0] == '-' && s[1] == '>') {
4496 
4497         /* '->[' and '->{' imply more to the expression */
4498         if (s[2] == '[' || s[2] == '{') {
4499             return TRUE;
4500         }
4501 
4502         /* Any post deref construct implies more to the expression */
4503         if (   FEATURE_POSTDEREF_QQ_IS_ENABLED
4504             && (   (s[2] == '$' && (    s[3] == '*'
4505                                     || (s[3] == '#' && s[4] == '*')))
4506                 || (s[2] == '@' && memCHRs("*[{", s[3])) ))
4507         {
4508             return TRUE;
4509         }
4510     }
4511 
4512     if (s[0] != '{' && s[0] != '[')
4513         return FALSE;
4514 
4515     /* quit immediately from any errors from now on */
4516     PL_parser->sub_no_recover = TRUE;
4517 
4518     /* Here is '{' or '['.  Outside patterns, they're always subscripts */
4519     if (!PL_lex_inpat)
4520         return TRUE;
4521 
4522     /* In a pattern, so maybe we have {n,m}, in which case, there isn't more to
4523      * the expression.
4524      *
4525      * khw: This assumes that anything matching regcurly is a character class.
4526      * The syntax of regcurly has been loosened since this function was
4527      * written, and regcurly never required a comma, as in {0}.  Probably it is
4528      * ok as-is */
4529     if (s[0] == '{') {
4530         if (regcurly(s, e, NULL)) {
4531             return FALSE;
4532         }
4533         return TRUE;
4534     }
4535 
4536     /* Here is '[': maybe we have a character class.  Examine the guts */
4537     s++;
4538 
4539     /* '^' implies a character class; An empty '[]' isn't legal, but it does
4540      * mean there isn't more to come */
4541     if (s[0] == ']' || s[0] == '^')
4542         return FALSE;
4543 
4544     /* Find matching ']'.  khw: This means any s[1] below is guaranteed to
4545      * exist */
4546     const char * const send = (char *) memchr(s, ']', e - s);
4547     if (! send)		/* has to be an expression */
4548         return TRUE;
4549 
4550     /* If the construct consists entirely of one or two digits, call it a
4551      * subscript. */
4552     if (isDIGIT(s[0]) && send - s <= 2 && (send - s == 1 || (isDIGIT(s[1])))) {
4553         return TRUE;
4554     }
4555 
4556     /* this is terrifying, and it mostly works.  See GH #16478.
4557      *
4558      * khw: That ticket shows that the heuristics here get things wrong.  That
4559      * most of the weights are divisible by 5 indicates that not a lot of
4560      * tuning was done, and that the values are fairly arbitrary.  Especially
4561      * problematic are when all characters in the construct are numeric.  We
4562      * have [89] always resolving to a subscript, though that could well be a
4563      * character class that is related to finding non-octals.  And [100] is a
4564      * character class when it could well be a subscript. */
4565 
4566     int weight;
4567 
4568     if (s[0] == '$') {  /* First char is dollar; lean very slightly to it
4569                            being a subscript */
4570         weight = -1;
4571     }
4572     else {              /* Otherwise, lean a little more towards it being a
4573                            character class. */
4574         weight = 2;
4575     }
4576 
4577     /* Unsigned version of current character */
4578     unsigned char un_char = 0;
4579 
4580     /* Keep track of how many multiple occurrences of the same character there
4581      * are */
4582     char seen[256];
4583     Zero(seen, 256, char);
4584 
4585     /* Examine each character in the construct */
4586     bool first_time = true;
4587     for (; s < send; s++, first_time = false) {
4588         unsigned char prev_un_char = un_char;
4589         un_char = (unsigned char) s[0];
4590         switch (s[0]) {
4591           case '@':
4592           case '&':
4593           case '$':
4594 
4595             /* Each additional occurrence of one of these three strongly
4596              * indicates it is a subscript */
4597             weight -= seen[un_char] * 10;
4598 
4599             /* Following one of these characters, we look to see if there is an
4600              * identifier already found in the program by that name.  If so,
4601              * strongly suspect this isn't a character class */
4602             if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4603                 int len;
4604                 char tmpbuf[sizeof PL_tokenbuf * 4];
4605                 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4606                 len = (int)strlen(tmpbuf);
4607                 if (   len > 1
4608                     && gv_fetchpvn_flags(tmpbuf,
4609                                          len,
4610                                          UTF ? SVf_UTF8 : 0,
4611                                          SVt_PV))
4612                     weight -= 100;
4613                 else    /* Not a multi-char identifier already known in the
4614                            program; is somewhat likely to be a subscript */
4615                     weight -= 10;
4616             }
4617             else if (   s[0] == '$'
4618                      && s[1]
4619                      && memCHRs("[#!%*<>()-=", s[1]))
4620             {
4621                 /* Here we have what could be a punctuation variable.  If the
4622                  * next character after it is a closing bracket, it makes it
4623                  * quite likely to be that, and hence a subscript.  If it is
4624                  * something else, more mildly a subscript */
4625                 if (/*{*/ memCHRs("])} =", s[2]))
4626                     weight -= 10;
4627                 else
4628                     weight -= 1;
4629             }
4630             break;
4631 
4632           case '\\':
4633             if (s[1]) {
4634                 if (memCHRs("wds]", s[1]))
4635                     weight += 100;  /* \w \d \s => strongly charclass */
4636                     /* khw: Why not \W \D \S \h \v, etc as well? */
4637                 else if (seen[(U8)'\''] || seen[(U8)'"'])
4638                     weight += 1;    /* \' => mildly charclass */
4639                 else if (memCHRs("abcfnrtvx", s[1]))
4640                     weight += 40;   /* \n, etc => charclass */
4641                     /* khw: Why not \e etc as well? */
4642                 else if (isDIGIT(s[1])) {
4643                     weight += 40;   /* \123 => charclass */
4644                     while (s[1] && isDIGIT(s[1]))
4645                         s++;
4646                 }
4647             }
4648             else /* \ followed by NUL strongly indicates character class */
4649                 weight += 100;
4650             break;
4651 
4652           case '-':
4653             /* If it is something like '-\', it is more likely to be a
4654              * character class.
4655              *
4656              * khw: The rest of the conditionals in this 'case' really should
4657              * be subject to an 'else' of this condition */
4658             if (s[1] == '\\')
4659                 weight += 50;
4660 
4661             /* If it is something like 'a-' or '0-', it is more likely to
4662              * be a character class. '!' is the first ASCII graphic, so '!-'
4663              * would be the start of a range of graphics. */
4664             if (! first_time && memCHRs("aA01! ", prev_un_char))
4665                 weight += 30;
4666 
4667             /* If it is something like '-Z' or '-7' (for octal) or '-9' it
4668              * is more likely to be a character class. '~' is the final ASCII
4669              * graphic, so '-~' would be the end of a range of graphics.
4670              *
4671              * khw: Having [-z] really doesn't imply what the comments above
4672              * indicate, so this should only be tested when '! first_time' */
4673             if (memCHRs("zZ79~", s[1]))
4674                 weight += 30;
4675 
4676             /* If it is something like -1 or -$foo, it is more likely to be a
4677              * subscript.  */
4678             if (first_time && (isDIGIT(s[1]) || s[1] == '$')) {
4679                 weight -= 5;	/* cope with negative subscript */
4680             }
4681             break;
4682 
4683           default:
4684             if (  (first_time || (  ! isWORDCHAR(prev_un_char)
4685                                   &&  prev_un_char != '$'
4686                                   &&  prev_un_char != '@'
4687                                   &&  prev_un_char != '&'))
4688                 && isALPHA(s[0])
4689                 && isALPHA(s[1]))
4690             {
4691                 /* Here it's \W (that isn't [$@&] ) followed immediately by two
4692                  * alphas in a row.  Accumulate all the consecutive alphas */
4693                 char *d = s;
4694                 while (isALPHA(s[0]))
4695                     s++;
4696 
4697                 /* If those alphas spell a keyword, it's almost certainly not a
4698                  * character class */
4699                 if (keyword(d, s - d, 0))
4700                     weight -= 150;
4701 
4702                 /* khw: Should those alphas be marked as seen? */
4703             }
4704 
4705             /* Consecutive chars like [...12...] and [...ab...] are presumed
4706              * more likely to be character classes */
4707             if (    ! first_time
4708                 && (   NATIVE_TO_LATIN1(un_char)
4709                     == NATIVE_TO_LATIN1(prev_un_char) + 1))
4710             {
4711                 weight += 5;
4712             }
4713 
4714             /* But repeating a character inside a character class does nothing,
4715              * like [aba], so less likely that someone makes such a class, more
4716              * likely that it is a subscript; the more repeats, the less
4717              * likely. */
4718             weight -= seen[un_char];
4719             break;
4720         }   /* End of switch */
4721 
4722         /* khw: 'seen' is declared as a char.  This ++ can cause it to wrap.
4723          * This gives different results with compilers for which a plain 'char'
4724          * is actually unsigned, versus those where it is signed.  I believe it
4725          * is undefined behavior to wrap a 'signed'.  I think it should be
4726          * instead declared an unsigned int to make the chances of wrapping
4727          * essentially zero.
4728          *
4729          * And I believe that extra backslashes are different from other
4730          * repeated characters. */
4731         seen[un_char]++;
4732     }   /* End of loop through each character of the construct */
4733 
4734     if (weight >= 0)	/* probably a character class */
4735         return FALSE;
4736 
4737     return TRUE;
4738 }
4739 
4740 /*
4741  * S_intuit_method
4742  *
4743  * Does all the checking to disambiguate
4744  *   foo bar
4745  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4746  * METHCALL (bar->foo(args)) or METHCALL0 (bar->foo args).
4747  *
4748  * First argument is the stuff after the first token, e.g. "bar".
4749  *
4750  * Not a method if foo is a filehandle.
4751  * Not a method if foo is a subroutine prototyped to take a filehandle.
4752  * Not a method if it's really "Foo $bar"
4753  * Method if it's "foo $bar"
4754  * Not a method if it's really "print foo $bar"
4755  * Method if it's really "foo package::" (interpreted as package->foo)
4756  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4757  * Not a method if bar is a filehandle or package, but is quoted with
4758  *   =>
4759  */
4760 
4761 STATIC int
S_intuit_method(pTHX_ char * start,SV * ioname,CV * cv)4762 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4763 {
4764     char *s = start + (*start == '$');
4765     char tmpbuf[sizeof PL_tokenbuf];
4766     STRLEN len;
4767     GV* indirgv;
4768         /* Mustn't actually add anything to a symbol table.
4769            But also don't want to "initialise" any placeholder
4770            constants that might already be there into full
4771            blown PVGVs with attached PVCV.  */
4772     GV * const gv =
4773         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4774 
4775     PERL_ARGS_ASSERT_INTUIT_METHOD;
4776 
4777     if (!FEATURE_INDIRECT_IS_ENABLED)
4778         return 0;
4779 
4780     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4781             return 0;
4782     if (cv && SvPOK(cv)) {
4783         const char *proto = CvPROTO(cv);
4784         if (proto) {
4785             while (*proto && (isSPACE(*proto) || *proto == ';'))
4786                 proto++;
4787             if (*proto == '*')
4788                 return 0;
4789         }
4790     }
4791 
4792     if (*start == '$') {
4793         SSize_t start_off = start - SvPVX(PL_linestr);
4794         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4795             || isUPPER(*PL_tokenbuf))
4796             return 0;
4797         /* this could be $# */
4798         if (isSPACE(*s))
4799             s = skipspace(s);
4800         PL_bufptr = SvPVX(PL_linestr) + start_off;
4801         PL_expect = XREF;
4802         return *s == '(' ? METHCALL : METHCALL0;
4803     }
4804 
4805     s = scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
4806     /* start is the beginning of the possible filehandle/object,
4807      * and s is the end of it
4808      * tmpbuf is a copy of it (but with single quotes as double colons)
4809      */
4810 
4811     if (!keyword(tmpbuf, len, 0)) {
4812         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4813             len -= 2;
4814             tmpbuf[len] = '\0';
4815             goto bare_package;
4816         }
4817         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4818                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4819                                     SVt_PVCV);
4820         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4821          && (!isGV(indirgv) || GvCVu(indirgv)))
4822             return 0;
4823         /* filehandle or package name makes it a method */
4824         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4825             s = skipspace(s);
4826             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4827                 return 0;	/* no assumptions -- "=>" quotes bareword */
4828       bare_package:
4829             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4830                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4831             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4832             PL_expect = XTERM;
4833             force_next(BAREWORD);
4834             PL_bufptr = s;
4835             return *s == '(' ? METHCALL : METHCALL0;
4836         }
4837     }
4838     return 0;
4839 }
4840 
4841 /* Encoded script support. filter_add() effectively inserts a
4842  * 'pre-processing' function into the current source input stream.
4843  * Note that the filter function only applies to the current source file
4844  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4845  *
4846  * The datasv parameter (which may be NULL) can be used to pass
4847  * private data to this instance of the filter. The filter function
4848  * can recover the SV using the FILTER_DATA macro and use it to
4849  * store private buffers and state information.
4850  *
4851  * The supplied datasv parameter is upgraded to a PVIO type
4852  * and the IoDIRP/IoANY field is used to store the function pointer,
4853  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4854  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4855  * private use must be set using malloc'd pointers.
4856  */
4857 
4858 SV *
Perl_filter_add(pTHX_ filter_t funcp,SV * datasv)4859 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4860 {
4861     if (!funcp)
4862         return NULL;
4863 
4864     if (!PL_parser)
4865         return NULL;
4866 
4867     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4868         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4869 
4870     if (!PL_rsfp_filters)
4871         PL_rsfp_filters = newAV();
4872     if (!datasv)
4873         datasv = newSV(0);
4874     SvUPGRADE(datasv, SVt_PVIO);
4875     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4876     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4877     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4878                           FPTR2DPTR(void *, IoANY(datasv)),
4879                           SvPV_nolen(datasv)));
4880     av_unshift(PL_rsfp_filters, 1);
4881     av_store(PL_rsfp_filters, 0, datasv) ;
4882     if (
4883         !PL_parser->filtered
4884      && PL_parser->lex_flags & LEX_EVALBYTES
4885      && PL_bufptr < PL_bufend
4886     ) {
4887         const char *s = PL_bufptr;
4888         while (s < PL_bufend) {
4889             if (*s == '\n') {
4890                 SV *linestr = PL_parser->linestr;
4891                 char *buf = SvPVX(linestr);
4892                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4893                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4894                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4895                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4896                 STRLEN const last_uni_pos =
4897                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4898                 STRLEN const last_lop_pos =
4899                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4900                 av_push(PL_rsfp_filters, linestr);
4901                 PL_parser->linestr =
4902                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4903                 buf = SvPVX(PL_parser->linestr);
4904                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4905                 PL_parser->bufptr = buf + bufptr_pos;
4906                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4907                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4908                 PL_parser->linestart = buf + linestart_pos;
4909                 if (PL_parser->last_uni)
4910                     PL_parser->last_uni = buf + last_uni_pos;
4911                 if (PL_parser->last_lop)
4912                     PL_parser->last_lop = buf + last_lop_pos;
4913                 SvLEN_set(linestr, SvCUR(linestr));
4914                 SvCUR_set(linestr, s - SvPVX(linestr));
4915                 PL_parser->filtered = 1;
4916                 break;
4917             }
4918             s++;
4919         }
4920     }
4921     return(datasv);
4922 }
4923 
4924 /*
4925 =for apidoc_section $filters
4926 =for apidoc filter_del
4927 
4928 Delete most recently added instance of the filter function argument
4929 
4930 =cut
4931 */
4932 
4933 void
Perl_filter_del(pTHX_ filter_t funcp)4934 Perl_filter_del(pTHX_ filter_t funcp)
4935 {
4936     SV *datasv;
4937 
4938     PERL_ARGS_ASSERT_FILTER_DEL;
4939 
4940 #ifdef DEBUGGING
4941     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4942                           FPTR2DPTR(void*, funcp)));
4943 #endif
4944     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4945         return;
4946     /* if filter is on top of stack (usual case) just pop it off */
4947     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4948     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4949         SvREFCNT_dec(av_pop(PL_rsfp_filters));
4950 
4951         return;
4952     }
4953     /* we need to search for the correct entry and clear it	*/
4954     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4955 }
4956 
4957 
4958 /* Invoke the idxth filter function for the current rsfp.	 */
4959 /* maxlen 0 = read one text line */
4960 I32
Perl_filter_read(pTHX_ int idx,SV * buf_sv,int maxlen)4961 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4962 {
4963     filter_t funcp;
4964     I32 ret;
4965     SV *datasv = NULL;
4966     /* This API is bad. It should have been using unsigned int for maxlen.
4967        Not sure if we want to change the API, but if not we should sanity
4968        check the value here.  */
4969     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4970 
4971     PERL_ARGS_ASSERT_FILTER_READ;
4972 
4973     if (!PL_parser || !PL_rsfp_filters)
4974         return -1;
4975     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?	*/
4976         /* Provide a default input filter to make life easy.	*/
4977         /* Note that we append to the line. This is handy.	*/
4978         DEBUG_P(PerlIO_printf(Perl_debug_log,
4979                               "filter_read %d: from rsfp\n", idx));
4980         if (correct_length) {
4981             /* Want a block */
4982             int len ;
4983             const int old_len = SvCUR(buf_sv);
4984 
4985             /* ensure buf_sv is large enough */
4986             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4987             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4988                                    correct_length)) <= 0) {
4989                 if (PerlIO_error(PL_rsfp))
4990                     return -1;		/* error */
4991                 else
4992                     return 0 ;		/* end of file */
4993             }
4994             SvCUR_set(buf_sv, old_len + len) ;
4995             SvPVX(buf_sv)[old_len + len] = '\0';
4996         } else {
4997             /* Want a line */
4998             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4999                 if (PerlIO_error(PL_rsfp))
5000                     return -1;		/* error */
5001                 else
5002                     return 0 ;		/* end of file */
5003             }
5004         }
5005         return SvCUR(buf_sv);
5006     }
5007     /* Skip this filter slot if filter has been deleted	*/
5008     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
5009         DEBUG_P(PerlIO_printf(Perl_debug_log,
5010                               "filter_read %d: skipped (filter deleted)\n",
5011                               idx));
5012         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
5013     }
5014     if (SvTYPE(datasv) != SVt_PVIO) {
5015         if (correct_length) {
5016             /* Want a block */
5017             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
5018             if (!remainder) return 0; /* eof */
5019             if (correct_length > remainder) correct_length = remainder;
5020             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
5021             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
5022         } else {
5023             /* Want a line */
5024             const char *s = SvEND(datasv);
5025             const char *send = SvPVX(datasv) + SvLEN(datasv);
5026             while (s < send) {
5027                 if (*s == '\n') {
5028                     s++;
5029                     break;
5030                 }
5031                 s++;
5032             }
5033             if (s == send) return 0; /* eof */
5034             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
5035             SvCUR_set(datasv, s-SvPVX(datasv));
5036         }
5037         return SvCUR(buf_sv);
5038     }
5039     /* Get function pointer hidden within datasv	*/
5040     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
5041     DEBUG_P(PerlIO_printf(Perl_debug_log,
5042                           "filter_read %d: via function %p (%s)\n",
5043                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
5044     /* Call function. The function is expected to 	*/
5045     /* call "FILTER_READ(idx+1, buf_sv)" first.		*/
5046     /* Return: <0:error, =0:eof, >0:not eof 		*/
5047     ENTER;
5048     save_scalar(PL_errgv);
5049 
5050     /* although this calls out to a random C function, there's a good
5051      * chance that that function will call back into perl (e.g. using
5052      * Filter::Util::Call). So downgrade the stack to
5053      * non-reference-counted for backwards compatibility - i.e. do the
5054      * equivalent of xs_wrap(), but this time we know there are no
5055      * args to be passed or returned on the stack, simplifying it.
5056      */
5057 #ifdef PERL_RC_STACK
5058     assert(AvREAL(PL_curstack));
5059     I32 oldbase = PL_curstackinfo->si_stack_nonrc_base;
5060     I32 oldsp   = PL_stack_sp - PL_stack_base;
5061     if (!oldbase)
5062         PL_curstackinfo->si_stack_nonrc_base = oldsp + 1;
5063 #endif
5064 
5065     ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
5066 
5067 #ifdef PERL_RC_STACK
5068     assert(oldsp == PL_stack_sp - PL_stack_base);
5069     assert(AvREAL(PL_curstack));
5070     assert(PL_curstackinfo->si_stack_nonrc_base ==
5071                                         oldbase ? oldbase : oldsp + 1);
5072     PL_curstackinfo->si_stack_nonrc_base = oldbase;
5073 #endif
5074 
5075     LEAVE;
5076     return ret;
5077 }
5078 
5079 STATIC char *
S_filter_gets(pTHX_ SV * sv,STRLEN append)5080 S_filter_gets(pTHX_ SV *sv, STRLEN append)
5081 {
5082     PERL_ARGS_ASSERT_FILTER_GETS;
5083 
5084 #ifdef PERL_CR_FILTER
5085     if (!PL_rsfp_filters) {
5086         filter_add(S_cr_textfilter,NULL);
5087     }
5088 #endif
5089     if (PL_rsfp_filters) {
5090         if (!append)
5091             SvCUR_set(sv, 0);	/* start with empty line	*/
5092         if (FILTER_READ(0, sv, 0) > 0)
5093             return ( SvPVX(sv) ) ;
5094         else
5095             return NULL ;
5096     }
5097     else
5098         return (sv_gets(sv, PL_rsfp, append));
5099 }
5100 
5101 STATIC HV *
S_find_in_my_stash(pTHX_ const char * pkgname,STRLEN len)5102 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
5103 {
5104     GV *gv;
5105 
5106     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
5107 
5108     if (memEQs(pkgname, len, "__PACKAGE__"))
5109         return PL_curstash;
5110 
5111     if (len > 2
5112         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
5113         && (gv = gv_fetchpvn_flags(pkgname,
5114                                    len,
5115                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
5116     {
5117         return GvHV(gv);			/* Foo:: */
5118     }
5119 
5120     /* use constant CLASS => 'MyClass' */
5121     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
5122     if (gv && GvCV(gv)) {
5123         SV * const sv = cv_const_sv(GvCV(gv));
5124         if (sv)
5125             return gv_stashsv(sv, 0);
5126     }
5127 
5128     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
5129 }
5130 
5131 
5132 STATIC char *
S_tokenize_use(pTHX_ int is_use,char * s)5133 S_tokenize_use(pTHX_ int is_use, char *s) {
5134     PERL_ARGS_ASSERT_TOKENIZE_USE;
5135 
5136     if (PL_expect != XSTATE)
5137         /* diag_listed_as: "use" not allowed in expression */
5138         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
5139                     is_use ? "use" : "no"));
5140     PL_expect = XTERM;
5141     s = skipspace(s);
5142     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
5143         s = force_version(s, TRUE);
5144         if (*s == ';' || *s == '}'
5145                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
5146             NEXTVAL_NEXTTOKE.opval = NULL;
5147             force_next(BAREWORD);
5148         }
5149         else if (*s == 'v') {
5150             s = force_word(s,BAREWORD,FALSE,TRUE);
5151             s = force_version(s, FALSE);
5152         }
5153     }
5154     else {
5155         s = force_word(s,BAREWORD,FALSE,TRUE);
5156         s = force_version(s, FALSE);
5157     }
5158     pl_yylval.ival = is_use;
5159     return s;
5160 }
5161 #ifdef DEBUGGING
5162     static const char* const exp_name[] =
5163         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
5164           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
5165           "SIGVAR", "TERMORDORDOR"
5166         };
5167 #endif
5168 
5169 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
5170 STATIC bool
S_word_takes_any_delimiter(char * p,STRLEN len)5171 S_word_takes_any_delimiter(char *p, STRLEN len)
5172 {
5173     return (len == 1 && memCHRs("msyq", p[0]))
5174             || (len == 2
5175                 && ((p[0] == 't' && p[1] == 'r')
5176                     || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
5177 }
5178 
5179 static void
S_check_scalar_slice(pTHX_ char * s)5180 S_check_scalar_slice(pTHX_ char *s)
5181 {
5182     s++;
5183     while (SPACE_OR_TAB(*s)) s++;
5184     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
5185                                                              PL_bufend,
5186                                                              UTF))
5187     {
5188         return;
5189     }
5190     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
5191            || (*s && memCHRs(" \t$#+-'\"", *s)))
5192     {
5193         s += UTF ? UTF8SKIP(s) : 1;
5194     }
5195     if (*s == '}' || *s == ']')
5196         pl_yylval.ival = OPpSLICEWARNING;
5197 }
5198 
5199 #define lex_token_boundary() S_lex_token_boundary(aTHX)
5200 static void
S_lex_token_boundary(pTHX)5201 S_lex_token_boundary(pTHX)
5202 {
5203     PL_oldoldbufptr = PL_oldbufptr;
5204     PL_oldbufptr = PL_bufptr;
5205 }
5206 
5207 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
5208 static char *
S_vcs_conflict_marker(pTHX_ char * s)5209 S_vcs_conflict_marker(pTHX_ char *s)
5210 {
5211     lex_token_boundary();
5212     PL_bufptr = s;
5213     yyerror("Version control conflict marker");
5214     while (s < PL_bufend && *s != '\n')
5215         s++;
5216     return s;
5217 }
5218 
5219 static int
yyl_sigvar(pTHX_ char * s)5220 yyl_sigvar(pTHX_ char *s)
5221 {
5222     /* we expect the sigil and optional var name part of a
5223      * signature element here. Since a '$' is not necessarily
5224      * followed by a var name, handle it specially here; the general
5225      * yylex code would otherwise try to interpret whatever follows
5226      * as a var; e.g. ($, ...) would be seen as the var '$,'
5227      */
5228 
5229     U8 sigil;
5230 
5231     s = skipspace(s);
5232     sigil = *s++;
5233     PL_bufptr = s; /* for error reporting */
5234     switch (sigil) {
5235     case '$':
5236     case '@':
5237     case '%':
5238         /* spot stuff that looks like an prototype */
5239         if (memCHRs("$:@%&*;\\[]", *s)) {
5240             yyerror("Illegal character following sigil in a subroutine signature");
5241             break;
5242         }
5243         /* '$#' is banned, while '$ # comment' isn't */
5244         if (*s == '#') {
5245             yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5246             break;
5247         }
5248         s = skipspace(s);
5249         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5250             char *dest = PL_tokenbuf + 1;
5251             /* read var name, including sigil, into PL_tokenbuf */
5252             PL_tokenbuf[0] = sigil;
5253             parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5254                 0, cBOOL(UTF), FALSE, FALSE);
5255             *dest = '\0';
5256             assert(PL_tokenbuf[1]); /* we have a variable name */
5257         }
5258         else {
5259             *PL_tokenbuf = 0;
5260             PL_in_my = 0;
5261         }
5262 
5263         s = skipspace(s);
5264         /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5265          * as the ASSIGNOP, and exclude other tokens that start with =
5266          */
5267         if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
5268             /* save now to report with the same context as we did when
5269              * all ASSIGNOPS were accepted */
5270             PL_oldbufptr = s;
5271 
5272             ++s;
5273             NEXTVAL_NEXTTOKE.ival = OP_SASSIGN;
5274             force_next(ASSIGNOP);
5275             PL_expect = XTERM;
5276         }
5277         else if(*s == '/' && s[1] == '/' && s[2] == '=') {
5278             PL_oldbufptr = s;
5279 
5280             s += 3;
5281             NEXTVAL_NEXTTOKE.ival = OP_DORASSIGN;
5282             force_next(ASSIGNOP);
5283             PL_expect = XTERM;
5284         }
5285         else if(*s == '|' && s[1] == '|' && s[2] == '=') {
5286             PL_oldbufptr = s;
5287 
5288             s += 3;
5289             NEXTVAL_NEXTTOKE.ival = OP_ORASSIGN;
5290             force_next(ASSIGNOP);
5291             PL_expect = XTERM;
5292         }
5293         else if (*s == ',' || *s == ')') {
5294             PL_expect = XOPERATOR;
5295         }
5296         else {
5297             /* make sure the context shows the unexpected character and
5298              * hopefully a bit more */
5299             if (*s) ++s;
5300             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5301                 s++;
5302             PL_bufptr = s; /* for error reporting */
5303             yyerror("Illegal operator following parameter in a subroutine signature");
5304             PL_in_my = 0;
5305         }
5306         if (*PL_tokenbuf) {
5307             NEXTVAL_NEXTTOKE.ival = sigil;
5308             force_next('p'); /* force a signature pending identifier */
5309         }
5310         break;
5311 
5312     case ')':
5313         PL_expect = XBLOCK;
5314         break;
5315     case ',': /* handle ($a,,$b) */
5316         break;
5317 
5318     default:
5319         PL_in_my = 0;
5320         yyerror("A signature parameter must start with '$', '@' or '%'");
5321         /* very crude error recovery: skip to likely next signature
5322          * element */
5323         while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5324             s++;
5325         break;
5326     }
5327 
5328     switch (sigil) {
5329         case ',': TOKEN (PERLY_COMMA);
5330         case '$': TOKEN (PERLY_DOLLAR);
5331         case '@': TOKEN (PERLY_SNAIL);
5332         case '%': TOKEN (PERLY_PERCENT_SIGN);
5333         case ')': TOKEN (PERLY_PAREN_CLOSE);
5334         default:  TOKEN (sigil);
5335     }
5336 }
5337 
5338 static int
yyl_dollar(pTHX_ char * s)5339 yyl_dollar(pTHX_ char *s)
5340 {
5341     CLINE;
5342 
5343     if (PL_expect == XPOSTDEREF) {
5344         if (s[1] == '#') {
5345             s++;
5346             POSTDEREF(DOLSHARP);
5347         }
5348         POSTDEREF(PERLY_DOLLAR);
5349     }
5350 
5351     if (   s[1] == '#'
5352         && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5353             || memCHRs("{$:+-@", s[2])))
5354     {
5355         PL_tokenbuf[0] = '@';
5356         s = scan_ident(s + 1, PL_tokenbuf + 1,
5357                        sizeof PL_tokenbuf - 1, FALSE);
5358         if (PL_expect == XOPERATOR) {
5359             char *d = s;
5360             if (PL_bufptr > s) {
5361                 d = PL_bufptr-1;
5362                 PL_bufptr = PL_oldbufptr;
5363             }
5364             no_op("Array length", d);
5365         }
5366         if (!PL_tokenbuf[1])
5367             PREREF(DOLSHARP);
5368         PL_expect = XOPERATOR;
5369         force_ident_maybe_lex('#');
5370         TOKEN(DOLSHARP);
5371     }
5372 
5373     PL_tokenbuf[0] = '$';
5374     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5375     if (PL_expect == XOPERATOR) {
5376         char *d = s;
5377         if (PL_bufptr > s) {
5378             d = PL_bufptr-1;
5379             PL_bufptr = PL_oldbufptr;
5380         }
5381         no_op("Scalar", d);
5382     }
5383     if (!PL_tokenbuf[1]) {
5384         if (s == PL_bufend)
5385             yyerror("Final $ should be \\$ or $name");
5386         PREREF(PERLY_DOLLAR);
5387     }
5388 
5389     {
5390         const char tmp = *s;
5391         if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5392             s = skipspace(s);
5393 
5394         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5395             && intuit_more(s, PL_bufend)) {
5396             if (*s == '[') {
5397                 PL_tokenbuf[0] = '@';
5398                 if (ckWARN(WARN_SYNTAX)) {
5399                     char *t = s+1;
5400 
5401                     while ( t < PL_bufend ) {
5402                         if (isSPACE(*t)) {
5403                             do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5404                             /* consumed one or more space chars */
5405                         } else if (*t == '$' || *t == '@') {
5406                             /* could be more than one '$' like $$ref or @$ref */
5407                             do { t++; } while (t < PL_bufend && *t == '$');
5408 
5409                             /* could be an abigail style identifier like $ foo */
5410                             while (t < PL_bufend && *t == ' ') t++;
5411 
5412                             /* strip off the name of the var */
5413                             while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5414                                 t += UTF ? UTF8SKIP(t) : 1;
5415                             /* consumed a varname */
5416                         } else if (isDIGIT(*t)) {
5417                             /* deal with hex constants like 0x11 */
5418                             if (t[0] == '0' && t[1] == 'x') {
5419                                 t += 2;
5420                                 while (t < PL_bufend && isXDIGIT(*t)) t++;
5421                             } else {
5422                                 /* deal with decimal/octal constants like 1 and 0123 */
5423                                 do { t++; } while (isDIGIT(*t));
5424                                 if (t<PL_bufend && *t == '.') {
5425                                     do { t++; } while (isDIGIT(*t));
5426                                 }
5427                             }
5428                             /* consumed a number */
5429                         } else {
5430                             /* not a var nor a space nor a number */
5431                             break;
5432                         }
5433                     }
5434                     if (t < PL_bufend && *t++ == ',') {
5435                         PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5436                         while (t < PL_bufend && *t != ']')
5437                             t++;
5438                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5439                                     "Multidimensional syntax %" UTF8f " not supported",
5440                                     UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5441                     }
5442                 }
5443             }
5444             else if (*s == '{') {
5445                 char *t;
5446                 PL_tokenbuf[0] = '%';
5447                 if (    strEQ(PL_tokenbuf+1, "SIG")
5448                     && ckWARN(WARN_SYNTAX)
5449                     && (t = (char *) memchr(s, '}', PL_bufend - s))
5450                     && (t = (char *) memchr(t, '=', PL_bufend - t)))
5451                 {
5452                     char tmpbuf[sizeof PL_tokenbuf];
5453                     do {
5454                         t++;
5455                     } while (isSPACE(*t));
5456                     if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5457                         STRLEN len;
5458                         t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
5459                         while (isSPACE(*t))
5460                             t++;
5461                         if (  *t == ';'
5462                             && get_cvn_flags(tmpbuf, len, UTF
5463                                                             ? SVf_UTF8
5464                                                             : 0))
5465                         {
5466                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5467                                 "You need to quote \"%" UTF8f "\"",
5468                                     UTF8fARG(UTF, len, tmpbuf));
5469                         }
5470                     }
5471                 }
5472             }
5473         }
5474 
5475         PL_expect = XOPERATOR;
5476         if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5477             const bool islop = (PL_last_lop == PL_oldoldbufptr);
5478             if (!islop || PL_last_lop_op == OP_GREPSTART)
5479                 PL_expect = XOPERATOR;
5480             else if (memCHRs("$@\"'`q", *s))
5481                 PL_expect = XTERM;		/* e.g. print $fh "foo" */
5482             else if (   memCHRs("&*<%", *s)
5483                      && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5484             {
5485                 PL_expect = XTERM;		/* e.g. print $fh &sub */
5486             }
5487             else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5488                 char tmpbuf[sizeof PL_tokenbuf];
5489                 int t2;
5490                 STRLEN len;
5491                 scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
5492                 if ((t2 = keyword(tmpbuf, len, 0))) {
5493                     /* binary operators exclude handle interpretations */
5494                     switch (t2) {
5495                     case -KEY_x:
5496                     case -KEY_eq:
5497                     case -KEY_ne:
5498                     case -KEY_gt:
5499                     case -KEY_lt:
5500                     case -KEY_ge:
5501                     case -KEY_le:
5502                     case -KEY_cmp:
5503                         break;
5504                     default:
5505                         PL_expect = XTERM;	/* e.g. print $fh length() */
5506                         break;
5507                     }
5508                 }
5509                 else {
5510                     PL_expect = XTERM;	/* e.g. print $fh subr() */
5511                 }
5512             }
5513             else if (isDIGIT(*s))
5514                 PL_expect = XTERM;		/* e.g. print $fh 3 */
5515             else if (*s == '.' && isDIGIT(s[1]))
5516                 PL_expect = XTERM;		/* e.g. print $fh .3 */
5517             else if ((*s == '?' || *s == '-' || *s == '+')
5518                 && !isSPACE(s[1]) && s[1] != '=')
5519                 PL_expect = XTERM;		/* e.g. print $fh -1 */
5520             else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5521                      && s[1] != '/')
5522                 PL_expect = XTERM;		/* e.g. print $fh /.../
5523                                                XXX except DORDOR operator
5524                                             */
5525             else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5526                      && s[2] != '=')
5527                 PL_expect = XTERM;		/* print $fh <<"EOF" */
5528         }
5529     }
5530     force_ident_maybe_lex('$');
5531     TOKEN(PERLY_DOLLAR);
5532 }
5533 
5534 static int
yyl_sub(pTHX_ char * s,const int key)5535 yyl_sub(pTHX_ char *s, const int key)
5536 {
5537     char * const tmpbuf = PL_tokenbuf + 1;
5538     bool have_name, have_proto;
5539     STRLEN len;
5540     SV *format_name = NULL;
5541     bool is_method = (key == KEY_method);
5542 
5543     /* method always implies signatures */
5544     bool is_sigsub = is_method || FEATURE_SIGNATURES_IS_ENABLED;
5545 
5546     SSize_t off = s-SvPVX(PL_linestr);
5547     char *d;
5548 
5549     s = skipspace(s); /* can move PL_linestr */
5550 
5551     d = SvPVX(PL_linestr)+off;
5552 
5553     SAVEBOOL(PL_parser->sig_seen);
5554     PL_parser->sig_seen = FALSE;
5555 
5556     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5557         || *s == '\''
5558         || (*s == ':' && s[1] == ':'))
5559     {
5560 
5561         PL_expect = XATTRBLOCK;
5562         d = scan_word6(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5563                       &len, TRUE);
5564         if (key == KEY_format)
5565             format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5566         *PL_tokenbuf = '&';
5567         if (memchr(tmpbuf, ':', len) || key != KEY_sub
5568          || pad_findmy_pvn(
5569                 PL_tokenbuf, len + 1, 0
5570             ) != NOT_IN_PAD)
5571             sv_setpvn(PL_subname, tmpbuf, len);
5572         else {
5573             sv_setsv(PL_subname,PL_curstname);
5574             sv_catpvs(PL_subname,"::");
5575             sv_catpvn(PL_subname,tmpbuf,len);
5576         }
5577         if (SvUTF8(PL_linestr))
5578             SvUTF8_on(PL_subname);
5579         have_name = TRUE;
5580 
5581         s = skipspace(d);
5582     }
5583     else {
5584         if (key == KEY_my || key == KEY_our || key==KEY_state) {
5585             *d = '\0';
5586             /* diag_listed_as: Missing name in "%s sub" */
5587             Perl_croak(aTHX_
5588                       "Missing name in \"%s\"", PL_bufptr);
5589         }
5590         PL_expect = XATTRTERM;
5591         sv_setpvs(PL_subname,"?");
5592         have_name = FALSE;
5593     }
5594 
5595     if (key == KEY_format) {
5596         if (format_name) {
5597             NEXTVAL_NEXTTOKE.opval
5598                 = newSVOP(OP_CONST,0, format_name);
5599             NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5600             force_next(BAREWORD);
5601         }
5602         PREBLOCK(KW_FORMAT);
5603     }
5604 
5605     /* Look for a prototype */
5606     if (*s == '(' && !is_sigsub) {
5607         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5608         if (!s)
5609             Perl_croak(aTHX_ "Prototype not terminated");
5610         COPLINE_SET_FROM_MULTI_END;
5611         (void)validate_proto(PL_subname, PL_lex_stuff,
5612                              ckWARN(WARN_ILLEGALPROTO), 0);
5613         have_proto = TRUE;
5614 
5615         s = skipspace(s);
5616     }
5617     else
5618         have_proto = FALSE;
5619 
5620     if (  !(*s == ':' && s[1] != ':')
5621         && (*s != '{' && *s != '(') && key != KEY_format)
5622     {
5623         assert(key == KEY_sub || key == KEY_method ||
5624                key == KEY_AUTOLOAD || key == KEY_DESTROY ||
5625                key == KEY_BEGIN || key == KEY_UNITCHECK || key == KEY_CHECK ||
5626                key == KEY_INIT || key == KEY_END ||
5627                key == KEY_my || key == KEY_state ||
5628                key == KEY_our);
5629         if (!have_name)
5630             Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5631         else if (*s != ';' && *s != '}')
5632             Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5633     }
5634 
5635     if (have_proto) {
5636         NEXTVAL_NEXTTOKE.opval =
5637             newSVOP(OP_CONST, 0, PL_lex_stuff);
5638         PL_lex_stuff = NULL;
5639         force_next(THING);
5640     }
5641 
5642     if (!have_name) {
5643         if (PL_curstash)
5644             sv_setpvs(PL_subname, "__ANON__");
5645         else
5646             sv_setpvs(PL_subname, "__ANON__::__ANON__");
5647         if (is_method)
5648             TOKEN(KW_METHOD_anon);
5649         else if (is_sigsub)
5650             TOKEN(KW_SUB_anon_sig);
5651         else
5652             TOKEN(KW_SUB_anon);
5653     }
5654     force_ident_maybe_lex('&');
5655     if (is_method)
5656         TOKEN(KW_METHOD_named);
5657     else if (is_sigsub)
5658         TOKEN(KW_SUB_named_sig);
5659     else
5660         TOKEN(KW_SUB_named);
5661 }
5662 
5663 static int
yyl_interpcasemod(pTHX_ char * s)5664 yyl_interpcasemod(pTHX_ char *s)
5665 {
5666 #ifdef DEBUGGING
5667     if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5668         Perl_croak(aTHX_
5669                    "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5670                    PL_bufptr, PL_bufend, *PL_bufptr);
5671 #endif
5672 
5673     if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5674         /* if at a \E */
5675         if (PL_lex_casemods) {
5676             const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5677             PL_lex_casestack[PL_lex_casemods] = '\0';
5678 
5679             if (PL_bufptr != PL_bufend
5680                 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5681                     || oldmod == 'F')) {
5682                 PL_bufptr += 2;
5683                 PL_lex_state = LEX_INTERPCONCAT;
5684             }
5685             PL_lex_allbrackets--;
5686             return REPORT(PERLY_PAREN_CLOSE);
5687         }
5688         else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5689            /* Got an unpaired \E */
5690            Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5691                     "Useless use of \\E");
5692         }
5693         if (PL_bufptr != PL_bufend)
5694             PL_bufptr += 2;
5695         PL_lex_state = LEX_INTERPCONCAT;
5696         return yylex();
5697     }
5698     else {
5699         DEBUG_T({
5700             PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5701         });
5702         s = PL_bufptr + 1;
5703         if (s[1] == '\\' && s[2] == 'E') {
5704             PL_bufptr = s + 3;
5705             PL_lex_state = LEX_INTERPCONCAT;
5706             return yylex();
5707         }
5708         else {
5709             I32 tmp;
5710             if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5711                 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5712             {
5713                 tmp = *s, *s = s[2], s[2] = (char)tmp;	/* misordered... */
5714             }
5715             if ((*s == 'L' || *s == 'U' || *s == 'F')
5716                 && (strpbrk(PL_lex_casestack, "LUF")))
5717             {
5718                 PL_lex_casestack[--PL_lex_casemods] = '\0';
5719                 PL_lex_allbrackets--;
5720                 return REPORT(PERLY_PAREN_CLOSE);
5721             }
5722             if (PL_lex_casemods > 10)
5723                 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5724             PL_lex_casestack[PL_lex_casemods++] = *s;
5725             PL_lex_casestack[PL_lex_casemods] = '\0';
5726             PL_lex_state = LEX_INTERPCONCAT;
5727             NEXTVAL_NEXTTOKE.ival = 0;
5728             force_next((2<<24)|PERLY_PAREN_OPEN);
5729             if (*s == 'l')
5730                 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5731             else if (*s == 'u')
5732                 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5733             else if (*s == 'L')
5734                 NEXTVAL_NEXTTOKE.ival = OP_LC;
5735             else if (*s == 'U')
5736                 NEXTVAL_NEXTTOKE.ival = OP_UC;
5737             else if (*s == 'Q')
5738                 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5739             else if (*s == 'F')
5740                 NEXTVAL_NEXTTOKE.ival = OP_FC;
5741             else
5742                 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5743             PL_bufptr = s + 1;
5744         }
5745         force_next(FUNC);
5746         if (PL_lex_starts) {
5747             s = PL_bufptr;
5748             PL_lex_starts = 0;
5749             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5750             if (PL_lex_casemods == 1 && PL_lex_inpat)
5751                 TOKEN(PERLY_COMMA);
5752             else
5753                 AopNOASSIGN(OP_CONCAT);
5754         }
5755         else
5756             return yylex();
5757     }
5758 }
5759 
5760 static int
yyl_secondclass_keyword(pTHX_ char * s,STRLEN len,int key,I32 * orig_keyword,GV ** pgv,GV *** pgvp)5761 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5762                         GV **pgv, GV ***pgvp)
5763 {
5764     GV *ogv = NULL;	/* override (winner) */
5765     GV *hgv = NULL;	/* hidden (loser) */
5766     GV *gv = *pgv;
5767 
5768     if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5769         CV *cv;
5770         if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5771                                     (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5772                                     SVt_PVCV))
5773             && (cv = GvCVu(gv)))
5774         {
5775             if (GvIMPORTED_CV(gv))
5776                 ogv = gv;
5777             else if (! CvNOWARN_AMBIGUOUS(cv))
5778                 hgv = gv;
5779         }
5780         if (!ogv
5781             && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5782             && (gv = **pgvp)
5783             && (isGV_with_GP(gv)
5784                 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5785                 :   SvPCS_IMPORTED(gv)
5786                 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5787                                                          len, 0), 1)))
5788         {
5789             ogv = gv;
5790         }
5791     }
5792 
5793     *pgv = gv;
5794 
5795     if (ogv) {
5796         *orig_keyword = key;
5797         return 0;		/* overridden by import or by GLOBAL */
5798     }
5799     else if (gv && !*pgvp
5800              && -key==KEY_lock	/* XXX generalizable kludge */
5801              && GvCVu(gv))
5802     {
5803         return 0;		/* any sub overrides "weak" keyword */
5804     }
5805     else {			/* no override */
5806         key = -key;
5807         if (key == KEY_dump) {
5808             Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5809         }
5810         *pgv = NULL;
5811         *pgvp = 0;
5812         if (hgv && key != KEY_x)	/* never ambiguous */
5813             Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5814                            "Ambiguous call resolved as CORE::%s(), "
5815                            "qualify as such or use &",
5816                            GvENAME(hgv));
5817         return key;
5818     }
5819 }
5820 
5821 static int
yyl_qw(pTHX_ char * s,STRLEN len)5822 yyl_qw(pTHX_ char *s, STRLEN len)
5823 {
5824     OP *words = NULL;
5825 
5826     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5827     if (!s)
5828         missingterm(NULL, 0);
5829 
5830     COPLINE_SET_FROM_MULTI_END;
5831     PL_expect = XOPERATOR;
5832     if (SvCUR(PL_lex_stuff)) {
5833         int warned_comma = !ckWARN(WARN_QW);
5834         int warned_comment = warned_comma;
5835         char *d = SvPV_force(PL_lex_stuff, len);
5836         while (len) {
5837             for (; isSPACE(*d) && len; --len, ++d)
5838                 /**/;
5839             if (len) {
5840                 SV *sv;
5841                 const char *b = d;
5842                 if (!warned_comma || !warned_comment) {
5843                     for (; !isSPACE(*d) && len; --len, ++d) {
5844                         if (!warned_comma && *d == ',') {
5845                             Perl_warner(aTHX_ packWARN(WARN_QW),
5846                                 "Possible attempt to separate words with commas");
5847                             ++warned_comma;
5848                         }
5849                         else if (!warned_comment && *d == '#') {
5850                             Perl_warner(aTHX_ packWARN(WARN_QW),
5851                                 "Possible attempt to put comments in qw() list");
5852                             ++warned_comment;
5853                         }
5854                     }
5855                 }
5856                 else {
5857                     for (; !isSPACE(*d) && len; --len, ++d)
5858                         /**/;
5859                 }
5860                 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5861                 words = op_append_elem(OP_LIST, words,
5862                                        newSVOP(OP_CONST, 0, tokeq(sv)));
5863             }
5864         }
5865     }
5866     if (!words)
5867         words = newNULLLIST();
5868     SvREFCNT_dec_NN(PL_lex_stuff);
5869     PL_lex_stuff = NULL;
5870     PL_expect = XOPERATOR;
5871     pl_yylval.opval = sawparens(words);
5872     TOKEN(QWLIST);
5873 }
5874 
5875 static int
yyl_hyphen(pTHX_ char * s)5876 yyl_hyphen(pTHX_ char *s)
5877 {
5878     if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5879         I32 ftst = 0;
5880         char tmp;
5881 
5882         s++;
5883         PL_bufptr = s;
5884         tmp = *s++;
5885 
5886         while (s < PL_bufend && SPACE_OR_TAB(*s))
5887             s++;
5888 
5889         if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5890             s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5891             DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5892             OPERATOR(PERLY_MINUS);              /* unary minus */
5893         }
5894         switch (tmp) {
5895         case 'r': ftst = OP_FTEREAD;    break;
5896         case 'w': ftst = OP_FTEWRITE;   break;
5897         case 'x': ftst = OP_FTEEXEC;    break;
5898         case 'o': ftst = OP_FTEOWNED;   break;
5899         case 'R': ftst = OP_FTRREAD;    break;
5900         case 'W': ftst = OP_FTRWRITE;   break;
5901         case 'X': ftst = OP_FTREXEC;    break;
5902         case 'O': ftst = OP_FTROWNED;   break;
5903         case 'e': ftst = OP_FTIS;       break;
5904         case 'z': ftst = OP_FTZERO;     break;
5905         case 's': ftst = OP_FTSIZE;     break;
5906         case 'f': ftst = OP_FTFILE;     break;
5907         case 'd': ftst = OP_FTDIR;      break;
5908         case 'l': ftst = OP_FTLINK;     break;
5909         case 'p': ftst = OP_FTPIPE;     break;
5910         case 'S': ftst = OP_FTSOCK;     break;
5911         case 'u': ftst = OP_FTSUID;     break;
5912         case 'g': ftst = OP_FTSGID;     break;
5913         case 'k': ftst = OP_FTSVTX;     break;
5914         case 'b': ftst = OP_FTBLK;      break;
5915         case 'c': ftst = OP_FTCHR;      break;
5916         case 't': ftst = OP_FTTTY;      break;
5917         case 'T': ftst = OP_FTTEXT;     break;
5918         case 'B': ftst = OP_FTBINARY;   break;
5919         case 'M': case 'A': case 'C':
5920             gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5921             switch (tmp) {
5922             case 'M': ftst = OP_FTMTIME; break;
5923             case 'A': ftst = OP_FTATIME; break;
5924             case 'C': ftst = OP_FTCTIME; break;
5925             default:                     break;
5926             }
5927             break;
5928         default:
5929             break;
5930         }
5931         if (ftst) {
5932             PL_last_uni = PL_oldbufptr;
5933             PL_last_lop_op = (OPCODE)ftst;
5934             DEBUG_T( {
5935                 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5936             } );
5937             FTST(ftst);
5938         }
5939         else {
5940             /* Assume it was a minus followed by a one-letter named
5941              * subroutine call (or a -bareword), then. */
5942             DEBUG_T( {
5943                 PerlIO_printf(Perl_debug_log,
5944                     "### '-%c' looked like a file test but was not\n",
5945                     (int) tmp);
5946             } );
5947             s = --PL_bufptr;
5948         }
5949     }
5950     {
5951         const char tmp = *s++;
5952         if (*s == tmp) {
5953             s++;
5954             if (PL_expect == XOPERATOR)
5955                 TERM(POSTDEC);
5956             else
5957                 OPERATOR(PREDEC);
5958         }
5959         else if (*s == '>') {
5960             s++;
5961             s = skipspace(s);
5962             if (((*s == '$' || *s == '&') && s[1] == '*')
5963               ||(*s == '$' && s[1] == '#' && s[2] == '*')
5964               ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5965               ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5966              )
5967             {
5968                 PL_expect = XPOSTDEREF;
5969                 TOKEN(ARROW);
5970             }
5971             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5972                 s = force_word(s,METHCALL0,FALSE,TRUE);
5973                 TOKEN(ARROW);
5974             }
5975             else if (*s == '$')
5976                 OPERATOR(ARROW);
5977             else
5978                 TERM(ARROW);
5979         }
5980         if (PL_expect == XOPERATOR) {
5981             if (*s == '='
5982                 && !PL_lex_allbrackets
5983                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5984             {
5985                 s--;
5986                 TOKEN(0);
5987             }
5988             Aop(OP_SUBTRACT);
5989         }
5990         else {
5991             if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5992                 check_uni();
5993             OPERATOR(PERLY_MINUS);              /* unary minus */
5994         }
5995     }
5996 }
5997 
5998 static int
yyl_plus(pTHX_ char * s)5999 yyl_plus(pTHX_ char *s)
6000 {
6001     const char tmp = *s++;
6002     if (*s == tmp) {
6003         s++;
6004         if (PL_expect == XOPERATOR)
6005             TERM(POSTINC);
6006         else
6007             OPERATOR(PREINC);
6008     }
6009     if (PL_expect == XOPERATOR) {
6010         if (*s == '='
6011             && !PL_lex_allbrackets
6012             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6013         {
6014             s--;
6015             TOKEN(0);
6016         }
6017         Aop(OP_ADD);
6018     }
6019     else {
6020         if (isSPACE(*s) || !isSPACE(*PL_bufptr))
6021             check_uni();
6022         OPERATOR(PERLY_PLUS);
6023     }
6024 }
6025 
6026 static int
yyl_star(pTHX_ char * s)6027 yyl_star(pTHX_ char *s)
6028 {
6029     if (PL_expect == XPOSTDEREF)
6030         POSTDEREF(PERLY_STAR);
6031 
6032     if (PL_expect != XOPERATOR) {
6033         s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6034         PL_expect = XOPERATOR;
6035         force_ident(PL_tokenbuf, PERLY_STAR);
6036         if (!*PL_tokenbuf)
6037             PREREF(PERLY_STAR);
6038         TERM(PERLY_STAR);
6039     }
6040 
6041     s++;
6042     if (*s == '*') {
6043         s++;
6044         if (*s == '=' && !PL_lex_allbrackets
6045             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6046         {
6047             s -= 2;
6048             TOKEN(0);
6049         }
6050         PWop(OP_POW);
6051     }
6052 
6053     if (*s == '='
6054         && !PL_lex_allbrackets
6055         && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6056     {
6057         s--;
6058         TOKEN(0);
6059     }
6060 
6061     Mop(OP_MULTIPLY);
6062 }
6063 
6064 static int
yyl_percent(pTHX_ char * s)6065 yyl_percent(pTHX_ char *s)
6066 {
6067     if (PL_expect == XOPERATOR) {
6068         if (s[1] == '='
6069             && !PL_lex_allbrackets
6070             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6071         {
6072             TOKEN(0);
6073         }
6074         ++s;
6075         Mop(OP_MODULO);
6076     }
6077     else if (PL_expect == XPOSTDEREF)
6078         POSTDEREF(PERLY_PERCENT_SIGN);
6079 
6080     PL_tokenbuf[0] = '%';
6081     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6082     pl_yylval.ival = 0;
6083     if (!PL_tokenbuf[1]) {
6084         PREREF(PERLY_PERCENT_SIGN);
6085     }
6086     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6087         && intuit_more(s, PL_bufend)) {
6088         if (*s == '[')
6089             PL_tokenbuf[0] = '@';
6090     }
6091     PL_expect = XOPERATOR;
6092     force_ident_maybe_lex('%');
6093     TERM(PERLY_PERCENT_SIGN);
6094 }
6095 
6096 static int
yyl_caret(pTHX_ char * s)6097 yyl_caret(pTHX_ char *s)
6098 {
6099     char *d = s;
6100     const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
6101     if (s[1] == '^') {
6102         s += 2;
6103         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6104                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6105             s -= 2;
6106             TOKEN(0);
6107         }
6108         pl_yylval.ival = OP_XOR;
6109         OPERATOR(OROR);
6110     }
6111     if (bof && s[1] == '.')
6112         s++;
6113     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6114             (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
6115     {
6116         s = d;
6117         TOKEN(0);
6118     }
6119     s++;
6120     BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
6121 }
6122 
6123 static int
yyl_colon(pTHX_ char * s)6124 yyl_colon(pTHX_ char *s)
6125 {
6126     OP *attrs;
6127 
6128     switch (PL_expect) {
6129     case XOPERATOR:
6130         if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
6131             break;
6132         PL_bufptr = s;	/* update in case we back off */
6133         if (*s == '=') {
6134             Perl_croak(aTHX_
6135                        "Use of := for an empty attribute list is not allowed");
6136         }
6137         goto grabattrs;
6138     case XATTRBLOCK:
6139         PL_expect = XBLOCK;
6140         goto grabattrs;
6141     case XATTRTERM:
6142         PL_expect = XTERMBLOCK;
6143      grabattrs:
6144         /* NB: as well as parsing normal attributes, we also end up
6145          * here if there is something looking like attributes
6146          * following a signature (which is illegal, but used to be
6147          * legal in 5.20..5.26). If the latter, we still parse the
6148          * attributes so that error messages(s) are less confusing,
6149          * but ignore them (parser->sig_seen).
6150          */
6151         s = skipspace(s);
6152         attrs = NULL;
6153         while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6154             I32 tmp;
6155             SV *sv;
6156             STRLEN len;
6157             char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
6158             if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
6159                 if (tmp < 0) tmp = -tmp;
6160                 switch (tmp) {
6161                 case KEY_or:
6162                 case KEY_and:
6163                 case KEY_for:
6164                 case KEY_foreach:
6165                 case KEY_unless:
6166                 case KEY_if:
6167                 case KEY_while:
6168                 case KEY_until:
6169                     goto got_attrs;
6170                 default:
6171                     break;
6172                 }
6173             }
6174             sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
6175             if (*d == '(') {
6176                 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
6177                 if (!d) {
6178                     op_free(attrs);
6179                     ASSUME(sv && SvREFCNT(sv) == 1);
6180                     SvREFCNT_dec(sv);
6181                     Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
6182                 }
6183                 COPLINE_SET_FROM_MULTI_END;
6184             }
6185             if (PL_lex_stuff) {
6186                 sv_catsv(sv, PL_lex_stuff);
6187                 attrs = op_append_elem(OP_LIST, attrs,
6188                                     newSVOP(OP_CONST, 0, sv));
6189                 SvREFCNT_dec_NN(PL_lex_stuff);
6190                 PL_lex_stuff = NULL;
6191             }
6192             else {
6193                 attrs = op_append_elem(OP_LIST, attrs,
6194                                     newSVOP(OP_CONST, 0, sv));
6195             }
6196             s = skipspace(d);
6197             if (*s == ':' && s[1] != ':')
6198                 s = skipspace(s+1);
6199             else if (s == d)
6200                 break;	/* require real whitespace or :'s */
6201             /* XXX losing whitespace on sequential attributes here */
6202         }
6203 
6204         if (*s != ';'
6205             && *s != '}'
6206             && !(PL_expect == XOPERATOR
6207                    /* if an operator is expected, permit =, //= and ||= or ) to end */
6208                  ? (*s == '=' || *s == ')' || *s == '/' || *s == '|')
6209                  : (*s == '{' || *s == '(')))
6210         {
6211             const char q = ((*s == '\'') ? '"' : '\'');
6212             /* If here for an expression, and parsed no attrs, back off. */
6213             if (PL_expect == XOPERATOR && !attrs) {
6214                 s = PL_bufptr;
6215                 break;
6216             }
6217             /* MUST advance bufptr here to avoid bogus "at end of line"
6218                context messages from yyerror().
6219             */
6220             PL_bufptr = s;
6221             yyerror( (const char *)
6222                      (*s
6223                       ? Perl_form(aTHX_ "Invalid separator character "
6224                                   "%c%c%c in attribute list", q, *s, q)
6225                       : "Unterminated attribute list" ) );
6226             op_free(attrs);
6227             OPERATOR(PERLY_COLON);
6228         }
6229 
6230     got_attrs:
6231         if (PL_parser->sig_seen) {
6232             /* see comment about about sig_seen and parser error
6233              * handling */
6234             op_free(attrs);
6235             Perl_croak(aTHX_ "Subroutine attributes must come "
6236                              "before the signature");
6237         }
6238         if (attrs) {
6239             NEXTVAL_NEXTTOKE.opval = attrs;
6240             force_next(THING);
6241         }
6242         TOKEN(COLONATTR);
6243     }
6244 
6245     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6246         s--;
6247         TOKEN(0);
6248     }
6249 
6250     PL_lex_allbrackets--;
6251     OPERATOR(PERLY_COLON);
6252 }
6253 
6254 static int
yyl_subproto(pTHX_ char * s,CV * cv)6255 yyl_subproto(pTHX_ char *s, CV *cv)
6256 {
6257     STRLEN protolen = CvPROTOLEN(cv);
6258     const char *proto = CvPROTO(cv);
6259     bool optional;
6260 
6261     proto = S_strip_spaces(aTHX_ proto, &protolen);
6262     if (!protolen)
6263         TERM(FUNC0SUB);
6264     if ((optional = *proto == ';')) {
6265         do {
6266             proto++;
6267         } while (*proto == ';');
6268     }
6269 
6270     if (
6271         (
6272             (
6273                 *proto == '$' || *proto == '_'
6274              || *proto == '*' || *proto == '+'
6275             )
6276          && proto[1] == '\0'
6277         )
6278      || (
6279          *proto == '\\' && proto[1] && proto[2] == '\0'
6280         )
6281     ) {
6282         UNIPROTO(UNIOPSUB,optional);
6283     }
6284 
6285     if (*proto == '\\' && proto[1] == '[') {
6286         const char *p = proto + 2;
6287         while(*p && *p != ']')
6288             ++p;
6289         if(*p == ']' && !p[1])
6290             UNIPROTO(UNIOPSUB,optional);
6291     }
6292 
6293     if (*proto == '&' && *s == '{') {
6294         if (PL_curstash)
6295             sv_setpvs(PL_subname, "__ANON__");
6296         else
6297             sv_setpvs(PL_subname, "__ANON__::__ANON__");
6298         if (!PL_lex_allbrackets
6299             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6300         {
6301             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6302         }
6303         PREBLOCK(LSTOPSUB);
6304     }
6305 
6306     return KEY_NULL;
6307 }
6308 
6309 static int
yyl_leftcurly(pTHX_ char * s,const U8 formbrack)6310 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
6311 {
6312     char *d;
6313     if (PL_lex_brackets > 100) {
6314         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6315     }
6316 
6317     switch (PL_expect) {
6318     case XTERM:
6319     case XTERMORDORDOR:
6320         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6321         PL_lex_allbrackets++;
6322         OPERATOR(HASHBRACK);
6323     case XOPERATOR:
6324         while (s < PL_bufend && SPACE_OR_TAB(*s))
6325             s++;
6326         d = s;
6327         PL_tokenbuf[0] = '\0';
6328         if (d < PL_bufend && *d == '-') {
6329             PL_tokenbuf[0] = '-';
6330             d++;
6331             while (d < PL_bufend && SPACE_OR_TAB(*d))
6332                 d++;
6333         }
6334         if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6335             STRLEN len;
6336             d = scan_word6(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6337                           FALSE, &len, FALSE);
6338             while (d < PL_bufend && SPACE_OR_TAB(*d))
6339                 d++;
6340             if (*d == '}') {
6341                 const char minus = (PL_tokenbuf[0] == '-');
6342                 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6343                 if (minus)
6344                     force_next(PERLY_MINUS);
6345             }
6346         }
6347         /* FALLTHROUGH */
6348     case XATTRTERM:
6349     case XTERMBLOCK:
6350         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6351         PL_lex_allbrackets++;
6352         PL_expect = XSTATE;
6353         break;
6354     case XATTRBLOCK:
6355     case XBLOCK:
6356         PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6357         PL_lex_allbrackets++;
6358         PL_expect = XSTATE;
6359         break;
6360     case XBLOCKTERM:
6361         PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6362         PL_lex_allbrackets++;
6363         PL_expect = XSTATE;
6364         break;
6365     default: {
6366             const char *t;
6367             if (PL_oldoldbufptr == PL_last_lop)
6368                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6369             else
6370                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6371             PL_lex_allbrackets++;
6372             s = skipspace(s);
6373             if (*s == '}') {
6374                 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6375                     PL_expect = XTERM;
6376                     /* This hack is to get the ${} in the message. */
6377                     PL_bufptr = s+1;
6378                     yyerror("syntax error");
6379                     yyquit();
6380                     break;
6381                 }
6382                 OPERATOR(HASHBRACK);
6383             }
6384             if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6385                 /* ${...} or @{...} etc., but not print {...}
6386                  * Skip the disambiguation and treat this as a block.
6387                  */
6388                 goto block_expectation;
6389             }
6390             /* This hack serves to disambiguate a pair of curlies
6391              * as being a block or an anon hash.  Normally, expectation
6392              * determines that, but in cases where we're not in a
6393              * position to expect anything in particular (like inside
6394              * eval"") we have to resolve the ambiguity.  This code
6395              * covers the case where the first term in the curlies is a
6396              * quoted string.  Most other cases need to be explicitly
6397              * disambiguated by prepending a "+" before the opening
6398              * curly in order to force resolution as an anon hash.
6399              *
6400              * XXX should probably propagate the outer expectation
6401              * into eval"" to rely less on this hack, but that could
6402              * potentially break current behavior of eval"".
6403              * GSAR 97-07-21
6404              */
6405             t = s;
6406             if (*s == '\'' || *s == '"' || *s == '`') {
6407                 /* common case: get past first string, handling escapes */
6408                 for (t++; t < PL_bufend && *t != *s;)
6409                     if (*t++ == '\\')
6410                         t++;
6411                 t++;
6412             }
6413             else if (*s == 'q') {
6414                 if (++t < PL_bufend
6415                     && (!isWORDCHAR(*t)
6416                         || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6417                             && !isWORDCHAR(*t))))
6418                 {
6419                     /* skip q//-like construct */
6420                     const char *tmps;
6421                     char open, close, term;
6422                     I32 brackets = 1;
6423 
6424                     while (t < PL_bufend && isSPACE(*t))
6425                         t++;
6426                     /* check for q => */
6427                     if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6428                         OPERATOR(HASHBRACK);
6429                     }
6430                     term = *t;
6431                     open = term;
6432                     if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6433                         term = tmps[5];
6434                     close = term;
6435                     if (open == close)
6436                         for (t++; t < PL_bufend; t++) {
6437                             if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6438                                 t++;
6439                             else if (*t == open)
6440                                 break;
6441                         }
6442                     else {
6443                         for (t++; t < PL_bufend; t++) {
6444                             if (*t == '\\' && t+1 < PL_bufend)
6445                                 t++;
6446                             else if (*t == close && --brackets <= 0)
6447                                 break;
6448                             else if (*t == open)
6449                                 brackets++;
6450                         }
6451                     }
6452                     t++;
6453                 }
6454                 else
6455                     /* skip plain q word */
6456                     while (   t < PL_bufend
6457                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6458                     {
6459                         t += UTF ? UTF8SKIP(t) : 1;
6460                     }
6461             }
6462             else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6463                 t += UTF ? UTF8SKIP(t) : 1;
6464                 while (   t < PL_bufend
6465                        && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6466                 {
6467                     t += UTF ? UTF8SKIP(t) : 1;
6468                 }
6469             }
6470             while (t < PL_bufend && isSPACE(*t))
6471                 t++;
6472             /* if comma follows first term, call it an anon hash */
6473             /* XXX it could be a comma expression with loop modifiers */
6474             if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6475                                || (*t == '=' && t[1] == '>')))
6476                 OPERATOR(HASHBRACK);
6477             if (PL_expect == XREF) {
6478               block_expectation:
6479                 /* If there is an opening brace or 'sub:', treat it
6480                    as a term to make ${{...}}{k} and &{sub:attr...}
6481                    dwim.  Otherwise, treat it as a statement, so
6482                    map {no strict; ...} works.
6483                  */
6484                 s = skipspace(s);
6485                 if (*s == '{') {
6486                     PL_expect = XTERM;
6487                     break;
6488                 }
6489                 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6490                     PL_bufptr = s;
6491                     d = s + 3;
6492                     d = skipspace(d);
6493                     s = PL_bufptr;
6494                     if (*d == ':') {
6495                         PL_expect = XTERM;
6496                         break;
6497                     }
6498                 }
6499                 PL_expect = XSTATE;
6500             }
6501             else {
6502                 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6503                 PL_expect = XSTATE;
6504             }
6505         }
6506         break;
6507     }
6508 
6509     pl_yylval.ival = CopLINE(PL_curcop);
6510     PL_copline = NOLINE;   /* invalidate current command line number */
6511     TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
6512 }
6513 
6514 static int
yyl_rightcurly(pTHX_ char * s,const U8 formbrack)6515 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6516 {
6517     assert(s != PL_bufend);
6518     s++;
6519 
6520     if (PL_lex_brackets <= 0)
6521         /* diag_listed_as: Unmatched right %s bracket */
6522         yyerror("Unmatched right curly bracket");
6523     else
6524         PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6525 
6526     PL_lex_allbrackets--;
6527 
6528     if (PL_lex_state == LEX_INTERPNORMAL) {
6529         if (PL_lex_brackets == 0) {
6530             if (PL_expect & XFAKEBRACK) {
6531                 PL_expect &= XENUMMASK;
6532                 PL_lex_state = LEX_INTERPEND;
6533                 PL_bufptr = s;
6534                 return yylex();	/* ignore fake brackets */
6535             }
6536             if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6537              && SvEVALED(PL_lex_repl))
6538                 PL_lex_state = LEX_INTERPEND;
6539             else if (*s == '-' && s[1] == '>')
6540                 PL_lex_state = LEX_INTERPENDMAYBE;
6541             else if (*s != '[' && *s != '{')
6542                 PL_lex_state = LEX_INTERPEND;
6543         }
6544     }
6545 
6546     if (PL_expect & XFAKEBRACK) {
6547         PL_expect &= XENUMMASK;
6548         PL_bufptr = s;
6549         return yylex();		/* ignore fake brackets */
6550     }
6551 
6552     force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
6553     if (formbrack) LEAVE_with_name("lex_format");
6554     if (formbrack == 2) { /* means . where arguments were expected */
6555         force_next(PERLY_SEMICOLON);
6556         TOKEN(FORMRBRACK);
6557     }
6558 
6559     TOKEN(PERLY_SEMICOLON);
6560 }
6561 
6562 static int
yyl_ampersand(pTHX_ char * s)6563 yyl_ampersand(pTHX_ char *s)
6564 {
6565     if (PL_expect == XPOSTDEREF)
6566         POSTDEREF(PERLY_AMPERSAND);
6567 
6568     s++;
6569     if (*s++ == '&') {
6570         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6571                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6572             s -= 2;
6573             TOKEN(0);
6574         }
6575         AOPERATOR(ANDAND);
6576     }
6577     s--;
6578 
6579     if (PL_expect == XOPERATOR) {
6580         char *d;
6581         bool bof;
6582         if (   PL_bufptr == PL_linestart
6583             && ckWARN(WARN_SEMICOLON)
6584             && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6585         {
6586             CopLINE_dec(PL_curcop);
6587             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6588             CopLINE_inc(PL_curcop);
6589         }
6590         d = s;
6591         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6592             s++;
6593         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6594                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6595             s = d;
6596             s--;
6597             TOKEN(0);
6598         }
6599         if (d == s)
6600             BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6601         else
6602             BAop(OP_SBIT_AND);
6603     }
6604 
6605     PL_tokenbuf[0] = '&';
6606     s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6607     pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6608 
6609     if (PL_tokenbuf[1])
6610         force_ident_maybe_lex('&');
6611     else
6612         PREREF(PERLY_AMPERSAND);
6613 
6614     TERM(PERLY_AMPERSAND);
6615 }
6616 
6617 static int
yyl_verticalbar(pTHX_ char * s)6618 yyl_verticalbar(pTHX_ char *s)
6619 {
6620     char *d;
6621     bool bof;
6622 
6623     s++;
6624     if (*s++ == '|') {
6625         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6626                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6627             s -= 2;
6628             TOKEN(0);
6629         }
6630         pl_yylval.ival = OP_OR;
6631         AOPERATOR(OROR);
6632     }
6633 
6634     s--;
6635     d = s;
6636     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6637         s++;
6638 
6639     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6640             (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6641         s = d - 1;
6642         TOKEN(0);
6643     }
6644 
6645     BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6646 }
6647 
6648 static int
yyl_bang(pTHX_ char * s)6649 yyl_bang(pTHX_ char *s)
6650 {
6651     const char tmp = *s++;
6652     if (tmp == '=') {
6653         /* was this !=~ where !~ was meant?
6654          * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6655 
6656         if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6657             const char *t = s+1;
6658 
6659             while (t < PL_bufend && isSPACE(*t))
6660                 ++t;
6661 
6662             if (*t == '/' || *t == '?'
6663                 || ((*t == 'm' || *t == 's' || *t == 'y')
6664                     && !isWORDCHAR(t[1]))
6665                 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6666                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6667                             "!=~ should be !~");
6668         }
6669 
6670         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6671             s -= 2;
6672             TOKEN(0);
6673         }
6674 
6675         ChEop(OP_NE);
6676     }
6677 
6678     if (tmp == '~')
6679         PMop(OP_NOT);
6680 
6681     s--;
6682     OPERATOR(PERLY_EXCLAMATION_MARK);
6683 }
6684 
6685 static int
yyl_snail(pTHX_ char * s)6686 yyl_snail(pTHX_ char *s)
6687 {
6688     if (PL_expect == XPOSTDEREF)
6689         POSTDEREF(PERLY_SNAIL);
6690     PL_tokenbuf[0] = '@';
6691     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6692     if (PL_expect == XOPERATOR) {
6693         char *d = s;
6694         if (PL_bufptr > s) {
6695             d = PL_bufptr-1;
6696             PL_bufptr = PL_oldbufptr;
6697         }
6698         no_op("Array", d);
6699     }
6700     pl_yylval.ival = 0;
6701     if (!PL_tokenbuf[1]) {
6702         PREREF(PERLY_SNAIL);
6703     }
6704     if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6705         s = skipspace(s);
6706     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6707         && intuit_more(s, PL_bufend))
6708     {
6709         if (*s == '{')
6710             PL_tokenbuf[0] = '%';
6711 
6712         /* Warn about @ where they meant $. */
6713         if (*s == '[' || *s == '{') {
6714             if (ckWARN(WARN_SYNTAX)) {
6715                 S_check_scalar_slice(aTHX_ s);
6716             }
6717         }
6718     }
6719     PL_expect = XOPERATOR;
6720     force_ident_maybe_lex('@');
6721     TERM(PERLY_SNAIL);
6722 }
6723 
6724 static int
yyl_slash(pTHX_ char * s)6725 yyl_slash(pTHX_ char *s)
6726 {
6727     if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6728         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6729                 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6730             TOKEN(0);
6731         s += 2;
6732         AOPERATOR(DORDOR);
6733     }
6734     else if (PL_expect == XOPERATOR) {
6735         s++;
6736         if (*s == '=' && !PL_lex_allbrackets
6737             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6738         {
6739             s--;
6740             TOKEN(0);
6741         }
6742         Mop(OP_DIVIDE);
6743     }
6744     else {
6745         /* Disable warning on "study /blah/" */
6746         if (    PL_oldoldbufptr == PL_last_uni
6747             && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6748                 || memNE(PL_last_uni, "study", 5)
6749                 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6750          ))
6751             check_uni();
6752         s = scan_pat(s,OP_MATCH);
6753         TERM(sublex_start());
6754     }
6755 }
6756 
6757 static int
yyl_leftsquare(pTHX_ char * s)6758 yyl_leftsquare(pTHX_ char *s)
6759 {
6760     if (PL_lex_brackets > 100)
6761         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6762     PL_lex_brackstack[PL_lex_brackets++] = 0;
6763     PL_lex_allbrackets++;
6764     s++;
6765     OPERATOR(PERLY_BRACKET_OPEN);
6766 }
6767 
6768 static int
yyl_rightsquare(pTHX_ char * s)6769 yyl_rightsquare(pTHX_ char *s)
6770 {
6771     if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6772         TOKEN(0);
6773     s++;
6774     if (PL_lex_brackets <= 0)
6775         /* diag_listed_as: Unmatched right %s bracket */
6776         yyerror("Unmatched right square bracket");
6777     else
6778         --PL_lex_brackets;
6779     PL_lex_allbrackets--;
6780     if (PL_lex_state == LEX_INTERPNORMAL) {
6781         if (PL_lex_brackets == 0) {
6782             if (*s == '-' && s[1] == '>')
6783                 PL_lex_state = LEX_INTERPENDMAYBE;
6784             else if (*s != '[' && *s != '{')
6785                 PL_lex_state = LEX_INTERPEND;
6786         }
6787     }
6788     TERM(PERLY_BRACKET_CLOSE);
6789 }
6790 
6791 static int
yyl_tilde(pTHX_ char * s)6792 yyl_tilde(pTHX_ char *s)
6793 {
6794     bool bof;
6795     if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6796         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6797             TOKEN(0);
6798         s += 2;
6799         Perl_ck_warner_d(aTHX_
6800             packWARN(WARN_DEPRECATED__SMARTMATCH),
6801             "Smartmatch is deprecated");
6802         NCEop(OP_SMARTMATCH);
6803     }
6804     s++;
6805     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6806         s++;
6807         BCop(OP_SCOMPLEMENT);
6808     }
6809     BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6810 }
6811 
6812 static int
yyl_leftparen(pTHX_ char * s)6813 yyl_leftparen(pTHX_ char *s)
6814 {
6815     if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6816         PL_oldbufptr = PL_oldoldbufptr;		/* allow print(STDOUT 123) */
6817     else
6818         PL_expect = XTERM;
6819     s = skipspace(s);
6820     PL_lex_allbrackets++;
6821     TOKEN(PERLY_PAREN_OPEN);
6822 }
6823 
6824 static int
yyl_rightparen(pTHX_ char * s)6825 yyl_rightparen(pTHX_ char *s)
6826 {
6827     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6828         TOKEN(0);
6829     s++;
6830     PL_lex_allbrackets--;
6831     s = skipspace(s);
6832     if (*s == '{')
6833         PREBLOCK(PERLY_PAREN_CLOSE);
6834     TERM(PERLY_PAREN_CLOSE);
6835 }
6836 
6837 static int
yyl_leftpointy(pTHX_ char * s)6838 yyl_leftpointy(pTHX_ char *s)
6839 {
6840     char tmp;
6841 
6842     if (PL_expect != XOPERATOR) {
6843         if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6844             check_uni();
6845         if (s[1] == '<' && s[2] != '>')
6846             s = scan_heredoc(s);
6847         else
6848             s = scan_inputsymbol(s);
6849         PL_expect = XOPERATOR;
6850         TOKEN(sublex_start());
6851     }
6852 
6853     s++;
6854 
6855     tmp = *s++;
6856     if (tmp == '<') {
6857         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6858             s -= 2;
6859             TOKEN(0);
6860         }
6861         SHop(OP_LEFT_SHIFT);
6862     }
6863     if (tmp == '=') {
6864         tmp = *s++;
6865         if (tmp == '>') {
6866             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6867                 s -= 3;
6868                 TOKEN(0);
6869             }
6870             NCEop(OP_NCMP);
6871         }
6872         s--;
6873         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6874             s -= 2;
6875             TOKEN(0);
6876         }
6877         ChRop(OP_LE);
6878     }
6879 
6880     s--;
6881     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6882         s--;
6883         TOKEN(0);
6884     }
6885 
6886     ChRop(OP_LT);
6887 }
6888 
6889 static int
yyl_rightpointy(pTHX_ char * s)6890 yyl_rightpointy(pTHX_ char *s)
6891 {
6892     const char tmp = *s++;
6893 
6894     if (tmp == '>') {
6895         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6896             s -= 2;
6897             TOKEN(0);
6898         }
6899         SHop(OP_RIGHT_SHIFT);
6900     }
6901     else if (tmp == '=') {
6902         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6903             s -= 2;
6904             TOKEN(0);
6905         }
6906         ChRop(OP_GE);
6907     }
6908 
6909     s--;
6910     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6911         s--;
6912         TOKEN(0);
6913     }
6914 
6915     ChRop(OP_GT);
6916 }
6917 
6918 static int
yyl_sglquote(pTHX_ char * s)6919 yyl_sglquote(pTHX_ char *s)
6920 {
6921     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6922     if (!s)
6923         missingterm(NULL, 0);
6924     COPLINE_SET_FROM_MULTI_END;
6925     DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6926     if (PL_expect == XOPERATOR) {
6927         no_op("String",s);
6928     }
6929     pl_yylval.ival = OP_CONST;
6930     TERM(sublex_start());
6931 }
6932 
6933 static int
yyl_dblquote(pTHX_ char * s)6934 yyl_dblquote(pTHX_ char *s)
6935 {
6936     char *d;
6937     STRLEN len;
6938     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6939     DEBUG_T( {
6940         if (s)
6941             printbuf("### Saw string before %s\n", s);
6942         else
6943             PerlIO_printf(Perl_debug_log,
6944                          "### Saw unterminated string\n");
6945     } );
6946     if (PL_expect == XOPERATOR) {
6947             no_op("String",s);
6948     }
6949     if (!s)
6950         missingterm(NULL, 0);
6951     pl_yylval.ival = OP_CONST;
6952     /* FIXME. I think that this can be const if char *d is replaced by
6953        more localised variables.  */
6954     for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6955         if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6956             pl_yylval.ival = OP_STRINGIFY;
6957             break;
6958         }
6959     }
6960     if (pl_yylval.ival == OP_CONST)
6961         COPLINE_SET_FROM_MULTI_END;
6962     TERM(sublex_start());
6963 }
6964 
6965 static int
yyl_backtick(pTHX_ char * s)6966 yyl_backtick(pTHX_ char *s)
6967 {
6968     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6969     DEBUG_T( {
6970         if (s)
6971             printbuf("### Saw backtick string before %s\n", s);
6972         else
6973             PerlIO_printf(Perl_debug_log,
6974                          "### Saw unterminated backtick string\n");
6975     } );
6976     if (PL_expect == XOPERATOR)
6977         no_op("Backticks",s);
6978     if (!s)
6979         missingterm(NULL, 0);
6980     pl_yylval.ival = OP_BACKTICK;
6981     TERM(sublex_start());
6982 }
6983 
6984 static int
yyl_backslash(pTHX_ char * s)6985 yyl_backslash(pTHX_ char *s)
6986 {
6987     if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6988         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6989                        *s, *s);
6990     if (PL_expect == XOPERATOR)
6991         no_op("Backslash",s);
6992     OPERATOR(REFGEN);
6993 }
6994 
6995 static void
yyl_data_handle(pTHX)6996 yyl_data_handle(pTHX)
6997 {
6998     HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6999                             ? PL_curstash
7000                             : PL_defstash;
7001     GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7002 
7003     if (!isGV(gv))
7004         gv_init(gv,stash,"DATA",4,0);
7005 
7006     GvMULTI_on(gv);
7007     if (!GvIO(gv))
7008         GvIOp(gv) = newIO();
7009     IoIFP(GvIOp(gv)) = PL_rsfp;
7010 
7011     /* Mark this internal pseudo-handle as clean */
7012     IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7013     if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7014         IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7015     else
7016         IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7017 
7018 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7019     /* if the script was opened in binmode, we need to revert
7020      * it to text mode for compatibility; but only iff it has CRs
7021      * XXX this is a questionable hack at best. */
7022     if (PL_bufend-PL_bufptr > 2
7023         && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7024     {
7025         Off_t loc = 0;
7026         if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7027             loc = PerlIO_tell(PL_rsfp);
7028             (void)PerlIO_seek(PL_rsfp, 0L, 0);
7029         }
7030         if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7031             if (loc > 0)
7032                 PerlIO_seek(PL_rsfp, loc, 0);
7033         }
7034     }
7035 #endif
7036 
7037 #ifdef PERLIO_LAYERS
7038     if (!IN_BYTES) {
7039         if (UTF)
7040             PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7041     }
7042 #endif
7043 
7044     PL_rsfp = NULL;
7045 }
7046 
7047 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
7048     __attribute__noreturn__;
7049 
7050 PERL_STATIC_NO_RET void
yyl_croak_unrecognised(pTHX_ char * s)7051 yyl_croak_unrecognised(pTHX_ char *s)
7052 {
7053     SV *dsv = newSVpvs_flags("", SVs_TEMP);
7054     const char *c;
7055     char *d;
7056     STRLEN len;
7057 
7058     if (UTF) {
7059         STRLEN skiplen = UTF8SKIP(s);
7060         STRLEN stravail = PL_bufend - s;
7061         c = sv_uni_display(dsv, newSVpvn_flags(s,
7062                                                skiplen > stravail ? stravail : skiplen,
7063                                                SVs_TEMP | SVf_UTF8),
7064                            10, UNI_DISPLAY_ISPRINT);
7065     }
7066     else {
7067         c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
7068     }
7069 
7070     if (s >= PL_linestart) {
7071         d = PL_linestart;
7072     }
7073     else {
7074         /* somehow (probably due to a parse failure), PL_linestart has advanced
7075          * pass PL_bufptr, get a reasonable beginning of line
7076          */
7077         d = s;
7078         while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
7079             --d;
7080     }
7081     len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
7082     if (len > UNRECOGNIZED_PRECEDE_COUNT) {
7083         d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
7084     }
7085 
7086     Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
7087                       UTF8fARG(UTF, (s - d), d),
7088                      (int) len + 1);
7089 }
7090 
7091 static int
yyl_require(pTHX_ char * s,I32 orig_keyword)7092 yyl_require(pTHX_ char *s, I32 orig_keyword)
7093 {
7094     s = skipspace(s);
7095     if (isDIGIT(*s)) {
7096         s = force_version(s, FALSE);
7097     }
7098     else if (*s != 'v' || !isDIGIT(s[1])
7099             || (s = force_version(s, TRUE), *s == 'v'))
7100     {
7101         *PL_tokenbuf = '\0';
7102         s = force_word(s,BAREWORD,TRUE,TRUE);
7103         if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
7104                                    PL_tokenbuf + sizeof(PL_tokenbuf),
7105                                    UTF))
7106         {
7107             gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7108                         GV_ADD | (UTF ? SVf_UTF8 : 0));
7109         }
7110         else if (*s == '<')
7111             yyerror("<> at require-statement should be quotes");
7112     }
7113 
7114     if (orig_keyword == KEY_require)
7115         pl_yylval.ival = 1;
7116     else
7117         pl_yylval.ival = 0;
7118 
7119     PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
7120     PL_bufptr = s;
7121     PL_last_uni = PL_oldbufptr;
7122     PL_last_lop_op = OP_REQUIRE;
7123     s = skipspace(s);
7124     return REPORT( (int)KW_REQUIRE );
7125 }
7126 
7127 static int
yyl_foreach(pTHX_ char * s)7128 yyl_foreach(pTHX_ char *s)
7129 {
7130     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7131         return REPORT(0);
7132     pl_yylval.ival = CopLINE(PL_curcop);
7133     s = skipspace(s);
7134     if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7135         char *p = s;
7136         SSize_t s_off = s - SvPVX(PL_linestr);
7137         bool paren_is_valid = FALSE;
7138         bool maybe_package = FALSE;
7139         bool saw_core = FALSE;
7140         bool core_valid = FALSE;
7141 
7142         if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
7143             saw_core = TRUE;
7144             p += 6;
7145         }
7146         if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
7147             core_valid = TRUE;
7148             paren_is_valid = TRUE;
7149             if (isSPACE(p[2])) {
7150                 p = skipspace(p + 3);
7151                 maybe_package = TRUE;
7152             }
7153             else {
7154                 p += 2;
7155             }
7156         }
7157         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
7158             core_valid = TRUE;
7159             if (isSPACE(p[3])) {
7160                 p = skipspace(p + 4);
7161                 maybe_package = TRUE;
7162             }
7163             else {
7164                 p += 3;
7165             }
7166         }
7167         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
7168             core_valid = TRUE;
7169             if (isSPACE(p[5])) {
7170                 p = skipspace(p + 6);
7171             }
7172             else {
7173                 p += 5;
7174             }
7175         }
7176         if (saw_core && !core_valid) {
7177             Perl_croak(aTHX_ "Missing $ on loop variable");
7178         }
7179 
7180         if (maybe_package && !saw_core) {
7181             /* skip optional package name, as in "for my abc $x (..)" */
7182             if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
7183                 STRLEN len;
7184                 p = scan_word6(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
7185                 p = skipspace(p);
7186                 paren_is_valid = FALSE;
7187             }
7188         }
7189 
7190         if (UNLIKELY(paren_is_valid && *p == '(')) {
7191             ; /* fine - this is foreach my (list) */
7192         }
7193         else if (UNLIKELY(*p != '$' && *p != '\\')) {
7194             /* "for myfoo (" will end up here, but with p pointing at the 'f' */
7195             Perl_croak(aTHX_ "Missing $ on loop variable");
7196         }
7197         /* The buffer may have been reallocated, update s */
7198         s = SvPVX(PL_linestr) + s_off;
7199     }
7200     OPERATOR(KW_FOR);
7201 }
7202 
7203 static int
yyl_do(pTHX_ char * s,I32 orig_keyword)7204 yyl_do(pTHX_ char *s, I32 orig_keyword)
7205 {
7206     s = skipspace(s);
7207     if (*s == '{')
7208         PRETERMBLOCK(KW_DO);
7209     if (*s != '\'') {
7210         char *d;
7211         STRLEN len;
7212         *PL_tokenbuf = '&';
7213         d = scan_word6(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7214                       1, &len, TRUE);
7215         if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7216          && !keyword(PL_tokenbuf + 1, len, 0)) {
7217             SSize_t off = s-SvPVX(PL_linestr);
7218             d = skipspace(d);
7219             s = SvPVX(PL_linestr)+off;
7220             if (*d == '(') {
7221                 force_ident_maybe_lex('&');
7222                 s = d;
7223             }
7224         }
7225     }
7226     if (orig_keyword == KEY_do)
7227         pl_yylval.ival = 1;
7228     else
7229         pl_yylval.ival = 0;
7230     OPERATOR(KW_DO);
7231 }
7232 
7233 static int
yyl_my(pTHX_ char * s,I32 my)7234 yyl_my(pTHX_ char *s, I32 my)
7235 {
7236     if (PL_in_my) {
7237         PL_bufptr = s;
7238         yyerror(Perl_form(aTHX_
7239                           "Can't redeclare \"%s\" in \"%s\"",
7240                            my       == KEY_my    ? "my" :
7241                            my       == KEY_state ? "state" : "our",
7242                            PL_in_my == KEY_my    ? "my" :
7243                            PL_in_my == KEY_state ? "state" : "our"));
7244     }
7245     PL_in_my = (U16)my;
7246     s = skipspace(s);
7247     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7248         STRLEN len;
7249         s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
7250         if (memEQs(PL_tokenbuf, len, "sub"))
7251             return yyl_sub(aTHX_ s, my);
7252         PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7253         if (!PL_in_my_stash) {
7254             char tmpbuf[1024];
7255             int i;
7256             PL_bufptr = s;
7257             i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7258             PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
7259             yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7260         }
7261     }
7262     else if (*s == '\\') {
7263         if (!FEATURE_MYREF_IS_ENABLED)
7264             Perl_croak(aTHX_ "The experimental declared_refs "
7265                              "feature is not enabled");
7266         Perl_ck_warner_d(aTHX_
7267              packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7268             "Declaring references is experimental");
7269     }
7270     OPERATOR(KW_MY);
7271 }
7272 
7273 static int yyl_try(pTHX_ char*);
7274 
7275 static bool
yyl_eol_needs_semicolon(pTHX_ char ** ps)7276 yyl_eol_needs_semicolon(pTHX_ char **ps)
7277 {
7278     char *s = *ps;
7279     if (PL_lex_state != LEX_NORMAL
7280         || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
7281     {
7282         const bool in_comment = *s == '#';
7283         char *d;
7284         if (*s == '#' && s == PL_linestart && PL_in_eval
7285          && !PL_rsfp && !PL_parser->filtered) {
7286             /* handle eval qq[#line 1 "foo"\n ...] */
7287             CopLINE_dec(PL_curcop);
7288             incline(s, PL_bufend);
7289         }
7290         d = s;
7291         while (d < PL_bufend && *d != '\n')
7292             d++;
7293         if (d < PL_bufend)
7294             d++;
7295         s = d;
7296         if (in_comment && d == PL_bufend
7297             && PL_lex_state == LEX_INTERPNORMAL
7298             && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7299             && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
7300         else
7301             incline(s, PL_bufend);
7302         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7303             PL_lex_state = LEX_FORMLINE;
7304             force_next(FORMRBRACK);
7305             *ps = s;
7306             return TRUE;
7307         }
7308     }
7309     else {
7310         while (s < PL_bufend && *s != '\n')
7311             s++;
7312         if (s < PL_bufend) {
7313             s++;
7314             if (s < PL_bufend)
7315                 incline(s, PL_bufend);
7316         }
7317     }
7318     *ps = s;
7319     return FALSE;
7320 }
7321 
7322 static int
yyl_fake_eof(pTHX_ U32 fake_eof,bool bof,char * s)7323 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
7324 {
7325     char *d;
7326 
7327     goto start;
7328 
7329     do {
7330         fake_eof = 0;
7331         bof = cBOOL(PL_rsfp);
7332       start:
7333 
7334         PL_bufptr = PL_bufend;
7335         COPLINE_INC_WITH_HERELINES;
7336         if (!lex_next_chunk(fake_eof)) {
7337             CopLINE_dec(PL_curcop);
7338             s = PL_bufptr;
7339             TOKEN(PERLY_SEMICOLON);	/* not infinite loop because rsfp is NULL now */
7340         }
7341         CopLINE_dec(PL_curcop);
7342         s = PL_bufptr;
7343         /* If it looks like the start of a BOM or raw UTF-16,
7344          * check if it in fact is. */
7345         if (bof && PL_rsfp
7346             && (   *s == 0
7347                 || *(U8*)s == BOM_UTF8_FIRST_BYTE
7348                 || *(U8*)s >= 0xFE
7349                 || s[1] == 0))
7350         {
7351             Off_t offset = (IV)PerlIO_tell(PL_rsfp);
7352             bof = (offset == (Off_t)SvCUR(PL_linestr));
7353 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
7354             /* offset may include swallowed CR */
7355             if (!bof)
7356                 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
7357 #endif
7358             if (bof) {
7359                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7360                 s = swallow_bom((U8*)s);
7361             }
7362         }
7363         if (PL_parser->in_pod) {
7364             /* Incest with pod. */
7365             if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
7366                 && !isALPHA(s[4]))
7367             {
7368                 SvPVCLEAR(PL_linestr);
7369                 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7370                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7371                 PL_last_lop = PL_last_uni = NULL;
7372                 PL_parser->in_pod = 0;
7373             }
7374         }
7375         if (PL_rsfp || PL_parser->filtered)
7376             incline(s, PL_bufend);
7377     } while (PL_parser->in_pod);
7378 
7379     PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7380     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7381     PL_last_lop = PL_last_uni = NULL;
7382     if (CopLINE(PL_curcop) == 1) {
7383         while (s < PL_bufend && isSPACE(*s))
7384             s++;
7385         if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7386             s++;
7387         d = NULL;
7388         if (!PL_in_eval) {
7389             if (*s == '#' && *(s+1) == '!')
7390                 d = s + 2;
7391 #ifdef ALTERNATE_SHEBANG
7392             else {
7393                 static char const as[] = ALTERNATE_SHEBANG;
7394                 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7395                     d = s + (sizeof(as) - 1);
7396             }
7397 #endif /* ALTERNATE_SHEBANG */
7398         }
7399         if (d) {
7400             char *ipath;
7401             char *ipathend;
7402 
7403             while (isSPACE(*d))
7404                 d++;
7405             ipath = d;
7406             while (*d && !isSPACE(*d))
7407                 d++;
7408             ipathend = d;
7409 
7410 #ifdef ARG_ZERO_IS_SCRIPT
7411             if (ipathend > ipath) {
7412                 /*
7413                  * HP-UX (at least) sets argv[0] to the script name,
7414                  * which makes $^X incorrect.  And Digital UNIX and Linux,
7415                  * at least, set argv[0] to the basename of the Perl
7416                  * interpreter. So, having found "#!", we'll set it right.
7417                  */
7418                 SV* copfilesv = CopFILESV(PL_curcop);
7419                 if (copfilesv) {
7420                     SV * const x =
7421                         GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7422                                          SVt_PV)); /* $^X */
7423                     assert(SvPOK(x) || SvGMAGICAL(x));
7424                     if (sv_eq(x, copfilesv)) {
7425                         sv_setpvn(x, ipath, ipathend - ipath);
7426                         SvSETMAGIC(x);
7427                     }
7428                     else {
7429                         STRLEN blen;
7430                         STRLEN llen;
7431                         const char *bstart = SvPV_const(copfilesv, blen);
7432                         const char * const lstart = SvPV_const(x, llen);
7433                         if (llen < blen) {
7434                             bstart += blen - llen;
7435                             if (strnEQ(bstart, lstart, llen) &&	bstart[-1] == '/') {
7436                                 sv_setpvn(x, ipath, ipathend - ipath);
7437                                 SvSETMAGIC(x);
7438                             }
7439                         }
7440                     }
7441                 }
7442                 else {
7443                     /* Anything to do if no copfilesv? */
7444                 }
7445                 TAINT_NOT;	/* $^X is always tainted, but that's OK */
7446             }
7447 #endif /* ARG_ZERO_IS_SCRIPT */
7448 
7449             /*
7450              * Look for options.
7451              */
7452             d = instr(s,"perl -");
7453             if (!d) {
7454                 d = instr(s,"perl");
7455 #if defined(DOSISH)
7456                 /* avoid getting into infinite loops when shebang
7457                  * line contains "Perl" rather than "perl" */
7458                 if (!d) {
7459                     for (d = ipathend-4; d >= ipath; --d) {
7460                         if (isALPHA_FOLD_EQ(*d, 'p')
7461                             && !ibcmp(d, "perl", 4))
7462                         {
7463                             break;
7464                         }
7465                     }
7466                     if (d < ipath)
7467                         d = NULL;
7468                 }
7469 #endif
7470             }
7471 #ifdef ALTERNATE_SHEBANG
7472             /*
7473              * If the ALTERNATE_SHEBANG on this system starts with a
7474              * character that can be part of a Perl expression, then if
7475              * we see it but not "perl", we're probably looking at the
7476              * start of Perl code, not a request to hand off to some
7477              * other interpreter.  Similarly, if "perl" is there, but
7478              * not in the first 'word' of the line, we assume the line
7479              * contains the start of the Perl program.
7480              */
7481             if (d && *s != '#') {
7482                 const char *c = ipath;
7483                 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7484                     c++;
7485                 if (c < d)
7486                     d = NULL;	/* "perl" not in first word; ignore */
7487                 else
7488                     *s = '#';	/* Don't try to parse shebang line */
7489             }
7490 #endif /* ALTERNATE_SHEBANG */
7491             if (!d
7492                 && *s == '#'
7493                 && ipathend > ipath
7494                 && !PL_minus_c
7495                 && !instr(s,"indir")
7496                 && instr(PL_origargv[0],"perl"))
7497             {
7498                 char **newargv;
7499 
7500                 *ipathend = '\0';
7501                 s = ipathend + 1;
7502                 while (s < PL_bufend && isSPACE(*s))
7503                     s++;
7504                 if (s < PL_bufend) {
7505                     Newx(newargv,PL_origargc+3,char*);
7506                     newargv[1] = s;
7507                     while (s < PL_bufend && !isSPACE(*s))
7508                         s++;
7509                     *s = '\0';
7510                     Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7511                 }
7512                 else
7513                     newargv = PL_origargv;
7514                 newargv[0] = ipath;
7515                 PERL_FPU_PRE_EXEC
7516                 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7517                 PERL_FPU_POST_EXEC
7518                 Perl_croak(aTHX_ "Can't exec %s", ipath);
7519             }
7520             if (d) {
7521                 while (*d && !isSPACE(*d))
7522                     d++;
7523                 while (SPACE_OR_TAB(*d))
7524                     d++;
7525 
7526                 if (*d++ == '-') {
7527                     const bool switches_done = PL_doswitches;
7528                     const U32 oldpdb = PL_perldb;
7529                     const bool oldn = PL_minus_n;
7530                     const bool oldp = PL_minus_p;
7531                     const char *d1 = d;
7532 
7533                     do {
7534                         bool baduni = FALSE;
7535                         if (*d1 == 'C') {
7536                             const char *d2 = d1 + 1;
7537                             if (parse_unicode_opts((const char **)&d2)
7538                                 != PL_unicode)
7539                                 baduni = TRUE;
7540                         }
7541                         if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7542                             const char * const m = d1;
7543                             while (*d1 && !isSPACE(*d1))
7544                                 d1++;
7545                             Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7546                                   (int)(d1 - m), m);
7547                         }
7548                         d1 = moreswitches(d1);
7549                     } while (d1);
7550                     if (PL_doswitches && !switches_done) {
7551                         int argc = PL_origargc;
7552                         char **argv = PL_origargv;
7553                         do {
7554                             argc--,argv++;
7555                         } while (argc && argv[0][0] == '-' && argv[0][1]);
7556                         init_argv_symbols(argc,argv);
7557                     }
7558                     if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7559                         || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7560                           /* if we have already added "LINE: while (<>) {",
7561                              we must not do it again */
7562                     {
7563                         SvPVCLEAR(PL_linestr);
7564                         PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7565                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7566                         PL_last_lop = PL_last_uni = NULL;
7567                         PL_preambled = FALSE;
7568                         if (PERLDB_LINE_OR_SAVESRC)
7569                             (void)gv_fetchfile(PL_origfilename);
7570                         return YYL_RETRY;
7571                     }
7572                 }
7573             }
7574         }
7575     }
7576 
7577     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7578         PL_lex_state = LEX_FORMLINE;
7579         force_next(FORMRBRACK);
7580         TOKEN(PERLY_SEMICOLON);
7581     }
7582 
7583     PL_bufptr = s;
7584     return YYL_RETRY;
7585 }
7586 
7587 static int
yyl_fatcomma(pTHX_ char * s,STRLEN len)7588 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7589 {
7590     CLINE;
7591     pl_yylval.opval
7592         = newSVOP(OP_CONST, 0,
7593                        S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7594     pl_yylval.opval->op_private = OPpCONST_BARE;
7595     TERM(BAREWORD);
7596 }
7597 
7598 static int
yyl_safe_bareword(pTHX_ char * s,const char lastchar)7599 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7600 {
7601     if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7602         && PL_parser->saw_infix_sigil)
7603     {
7604         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7605                          "Operator or semicolon missing before %c%" UTF8f,
7606                          lastchar,
7607                          UTF8fARG(UTF, strlen(PL_tokenbuf),
7608                                   PL_tokenbuf));
7609         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7610                          "Ambiguous use of %c resolved as operator %c",
7611                          lastchar, lastchar);
7612     }
7613     TOKEN(BAREWORD);
7614 }
7615 
7616 static int
yyl_constant_op(pTHX_ char * s,SV * sv,CV * cv,OP * rv2cv_op,PADOFFSET off)7617 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7618 {
7619     if (sv) {
7620         op_free(rv2cv_op);
7621         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7622         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7623         if (SvTYPE(sv) == SVt_PVAV)
7624             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7625                                       pl_yylval.opval);
7626         else {
7627             pl_yylval.opval->op_private = 0;
7628             pl_yylval.opval->op_folded = 1;
7629             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7630         }
7631         TOKEN(BAREWORD);
7632     }
7633 
7634     op_free(pl_yylval.opval);
7635     pl_yylval.opval =
7636         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7637     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7638     PL_last_lop = PL_oldbufptr;
7639     PL_last_lop_op = OP_ENTERSUB;
7640 
7641     /* Is there a prototype? */
7642     if (SvPOK(cv)) {
7643         int k = yyl_subproto(aTHX_ s, cv);
7644         if (k != KEY_NULL)
7645             return k;
7646     }
7647 
7648     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7649     PL_expect = XTERM;
7650     force_next(off ? PRIVATEREF : BAREWORD);
7651     if (!PL_lex_allbrackets
7652         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7653     {
7654         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7655     }
7656 
7657     TOKEN(NOAMP);
7658 }
7659 
7660 /* Honour "reserved word" warnings, and enforce strict subs */
7661 static void
yyl_strictwarn_bareword(pTHX_ const char lastchar)7662 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7663 {
7664     /* after "print" and similar functions (corresponding to
7665      * "F? L" in opcode.pl), whatever wasn't already parsed as
7666      * a filehandle should be subject to "strict subs".
7667      * Likewise for the optional indirect-object argument to system
7668      * or exec, which can't be a bareword */
7669     if ((PL_last_lop_op == OP_PRINT
7670             || PL_last_lop_op == OP_PRTF
7671             || PL_last_lop_op == OP_SAY
7672             || PL_last_lop_op == OP_SYSTEM
7673             || PL_last_lop_op == OP_EXEC)
7674         && (PL_hints & HINT_STRICT_SUBS))
7675     {
7676         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7677     }
7678 
7679     if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7680         char *d = PL_tokenbuf;
7681         while (isLOWER(*d))
7682             d++;
7683         if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7684             /* PL_warn_reserved is constant */
7685             GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7686             Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7687                         PL_tokenbuf);
7688             GCC_DIAG_RESTORE_STMT;
7689         }
7690     }
7691 }
7692 
7693 static int
yyl_just_a_word(pTHX_ char * s,STRLEN len,I32 orig_keyword,struct code c)7694 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7695 {
7696     int pkgname = 0;
7697     const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7698     bool safebw;
7699     bool no_op_error = FALSE;
7700     /* Use this var to track whether intuit_method has been
7701        called.  intuit_method returns 0 or > 255.  */
7702     int key = 1;
7703 
7704     if (PL_expect == XOPERATOR) {
7705         if (PL_bufptr == PL_linestart) {
7706             CopLINE_dec(PL_curcop);
7707             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7708             CopLINE_inc(PL_curcop);
7709         }
7710         else
7711             /* We want to call no_op with s pointing after the
7712                bareword, so defer it.  But we want it to come
7713                before the Bad name croak.  */
7714             no_op_error = TRUE;
7715     }
7716 
7717     /* Get the rest if it looks like a package qualifier */
7718 
7719     if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7720         STRLEN morelen;
7721         s = scan_word6(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7722                       TRUE, &morelen, TRUE);
7723         if (no_op_error) {
7724             no_op("Bareword",s);
7725             no_op_error = FALSE;
7726         }
7727         if (!morelen)
7728             Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7729                     UTF8fARG(UTF, len, PL_tokenbuf),
7730                     *s == '\'' ? "'" : "::");
7731         len += morelen;
7732         pkgname = 1;
7733     }
7734 
7735     if (no_op_error)
7736         no_op("Bareword",s);
7737 
7738     /* See if the name is "Foo::",
7739        in which case Foo is a bareword
7740        (and a package name). */
7741 
7742     if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7743         if (ckWARN(WARN_BAREWORD)
7744             && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7745             Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7746                         "Bareword \"%" UTF8f
7747                         "\" refers to nonexistent package",
7748                         UTF8fARG(UTF, len, PL_tokenbuf));
7749         len -= 2;
7750         PL_tokenbuf[len] = '\0';
7751         c.gv = NULL;
7752         c.gvp = 0;
7753         safebw = TRUE;
7754     }
7755     else {
7756         safebw = FALSE;
7757     }
7758 
7759     /* if we saw a global override before, get the right name */
7760 
7761     if (!c.sv)
7762         c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7763     if (c.gvp) {
7764         SV *sv = newSVpvs("CORE::GLOBAL::");
7765         sv_catsv(sv, c.sv);
7766         SvREFCNT_dec(c.sv);
7767         c.sv = sv;
7768     }
7769 
7770     /* Presume this is going to be a bareword of some sort. */
7771     CLINE;
7772     pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7773     pl_yylval.opval->op_private = OPpCONST_BARE;
7774 
7775     /* And if "Foo::", then that's what it certainly is. */
7776     if (safebw)
7777         return yyl_safe_bareword(aTHX_ s, lastchar);
7778 
7779     if (!c.off) {
7780         OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7781         const_op->op_private = OPpCONST_BARE;
7782         c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7783         c.cv = c.lex
7784             ? isGV(c.gv)
7785                 ? GvCV(c.gv)
7786                 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7787                     ? (CV *)SvRV(c.gv)
7788                     : ((CV *)c.gv)
7789             : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7790     }
7791 
7792     /* See if it's the indirect object for a list operator. */
7793 
7794     if (PL_oldoldbufptr
7795         && PL_oldoldbufptr < PL_bufptr
7796         && (PL_oldoldbufptr == PL_last_lop
7797             || PL_oldoldbufptr == PL_last_uni)
7798         && /* NO SKIPSPACE BEFORE HERE! */
7799            (PL_expect == XREF
7800             || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7801                                                    == OA_FILEREF))
7802     {
7803         bool immediate_paren = *s == '(';
7804         SSize_t s_off;
7805 
7806         /* (Now we can afford to cross potential line boundary.) */
7807         s = skipspace(s);
7808 
7809         /* intuit_method() can indirectly call lex_next_chunk(),
7810          * invalidating s
7811          */
7812         s_off = s - SvPVX(PL_linestr);
7813         /* Two barewords in a row may indicate method call. */
7814         if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7815                 || *s == '$')
7816             && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7817         {
7818             /* the code at method: doesn't use s */
7819             goto method;
7820         }
7821         s = SvPVX(PL_linestr) + s_off;
7822 
7823         /* If not a declared subroutine, it's an indirect object. */
7824         /* (But it's an indir obj regardless for sort.) */
7825         /* Also, if "_" follows a filetest operator, it's a bareword */
7826 
7827         if (
7828             ( !immediate_paren && (PL_last_lop_op == OP_SORT
7829              || (!c.cv
7830                  && (PL_last_lop_op != OP_MAPSTART
7831                      && PL_last_lop_op != OP_GREPSTART))))
7832            || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7833                 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7834                                                 == OA_FILESTATOP))
7835            )
7836         {
7837             PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7838             yyl_strictwarn_bareword(aTHX_ lastchar);
7839             op_free(c.rv2cv_op);
7840             return yyl_safe_bareword(aTHX_ s, lastchar);
7841         }
7842     }
7843 
7844     PL_expect = XOPERATOR;
7845     s = skipspace(s);
7846 
7847     /* Is this a word before a => operator? */
7848     if (*s == '=' && s[1] == '>' && !pkgname) {
7849         op_free(c.rv2cv_op);
7850         CLINE;
7851         if (c.gvp || (c.lex && !c.off)) {
7852             assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7853             /* This is our own scalar, created a few lines
7854                above, so this is safe. */
7855             SvREADONLY_off(c.sv);
7856             sv_setpv(c.sv, PL_tokenbuf);
7857             if (UTF && !IN_BYTES
7858              && is_utf8_string((U8*)PL_tokenbuf, len))
7859                   SvUTF8_on(c.sv);
7860             SvREADONLY_on(c.sv);
7861         }
7862         TERM(BAREWORD);
7863     }
7864 
7865     /* If followed by a paren, it's certainly a subroutine. */
7866     if (*s == '(') {
7867         CLINE;
7868         if (c.cv) {
7869             char *d = s + 1;
7870             while (SPACE_OR_TAB(*d))
7871                 d++;
7872             if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7873                 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7874         }
7875         NEXTVAL_NEXTTOKE.opval =
7876             c.off ? c.rv2cv_op : pl_yylval.opval;
7877         if (c.off)
7878              op_free(pl_yylval.opval), force_next(PRIVATEREF);
7879         else op_free(c.rv2cv_op),      force_next(BAREWORD);
7880         pl_yylval.ival = 0;
7881         TOKEN(PERLY_AMPERSAND);
7882     }
7883 
7884     /* If followed by var or block, call it a method (unless sub) */
7885 
7886     if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7887         op_free(c.rv2cv_op);
7888         PL_last_lop = PL_oldbufptr;
7889         PL_last_lop_op = OP_METHOD;
7890         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7891             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7892         PL_expect = XBLOCKTERM;
7893         PL_bufptr = s;
7894         return REPORT(METHCALL0);
7895     }
7896 
7897     /* If followed by a bareword, see if it looks like indir obj. */
7898 
7899     if (   key == 1
7900         && !orig_keyword
7901         && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7902         && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7903     {
7904       method:
7905         if (c.lex && !c.off) {
7906             assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7907             SvREADONLY_off(c.sv);
7908             sv_setpvn(c.sv, PL_tokenbuf, len);
7909             if (UTF && !IN_BYTES
7910              && is_utf8_string((U8*)PL_tokenbuf, len))
7911                 SvUTF8_on(c.sv);
7912             else SvUTF8_off(c.sv);
7913         }
7914         op_free(c.rv2cv_op);
7915         if (key == METHCALL0 && !PL_lex_allbrackets
7916             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7917         {
7918             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7919         }
7920         return REPORT(key);
7921     }
7922 
7923     /* Not a method, so call it a subroutine (if defined) */
7924 
7925     if (c.cv) {
7926         /* Check for a constant sub */
7927         c.sv = cv_const_sv_or_av(c.cv);
7928         return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7929     }
7930 
7931     /* Call it a bare word */
7932 
7933     if (PL_hints & HINT_STRICT_SUBS)
7934         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7935     else
7936         yyl_strictwarn_bareword(aTHX_ lastchar);
7937 
7938     op_free(c.rv2cv_op);
7939 
7940     return yyl_safe_bareword(aTHX_ s, lastchar);
7941 }
7942 
7943 static int
yyl_word_or_keyword(pTHX_ char * s,STRLEN len,I32 key,I32 orig_keyword,struct code c)7944 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7945 {
7946     switch (key) {
7947     default:			/* not a keyword */
7948         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7949 
7950     case KEY___FILE__:
7951         FUN0OP(newSVOP(OP_CONST, OPpCONST_TOKEN_FILE<<8,
7952                 newSVpv(CopFILE(PL_curcop),0)) );
7953 
7954     case KEY___LINE__:
7955         FUN0OP(newSVOP(OP_CONST, OPpCONST_TOKEN_LINE<<8,
7956                 Perl_newSVpvf(aTHX_ "%" LINE_Tf, CopLINE(PL_curcop))));
7957 
7958     case KEY___PACKAGE__:
7959         FUN0OP(newSVOP(OP_CONST, OPpCONST_TOKEN_PACKAGE<<8,
7960                 (PL_curstash
7961                      ? newSVhek(HvNAME_HEK(PL_curstash))
7962                      : &PL_sv_undef))
7963         );
7964 
7965     case KEY___DATA__:
7966     case KEY___END__:
7967         if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7968             yyl_data_handle(aTHX);
7969         return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7970 
7971     case KEY___SUB__:
7972         /* If !CvCLONE(PL_compcv) then rpeep will probably turn this into an
7973          * OP_CONST. We need to make it big enough to allow room for that if
7974          * so */
7975         FUN0OP(CvCLONE(PL_compcv)
7976                     ? newOP(OP_RUNCV, 0)
7977                     : newSVOP(OP_RUNCV, 0, &PL_sv_undef));
7978 
7979     case KEY___CLASS__:
7980         FUN0(OP_CLASSNAME);
7981 
7982     case KEY_AUTOLOAD:
7983     case KEY_DESTROY:
7984     case KEY_BEGIN:
7985     case KEY_UNITCHECK:
7986     case KEY_CHECK:
7987     case KEY_INIT:
7988     case KEY_END:
7989         if (PL_expect == XSTATE)
7990             return yyl_sub(aTHX_ PL_bufptr, key);
7991         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7992 
7993     case KEY_ADJUST:
7994         Perl_ck_warner_d(aTHX_
7995             packWARN(WARN_EXPERIMENTAL__CLASS), "ADJUST is experimental");
7996 
7997         /* The way that KEY_CHECK et.al. are handled currently are nothing
7998          * short of crazy. We won't copy that model for new phasers, but use
7999          * this as an experiment to test if this will work
8000          */
8001         PHASERBLOCK(KEY_ADJUST);
8002 
8003     case KEY_abs:
8004         UNI(OP_ABS);
8005 
8006     case KEY_alarm:
8007         UNI(OP_ALARM);
8008 
8009     case KEY_accept:
8010         LOP(OP_ACCEPT,XTERM);
8011 
8012     case KEY_and:
8013         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8014             return REPORT(0);
8015         OPERATOR(ANDOP);
8016 
8017     case KEY_atan2:
8018         LOP(OP_ATAN2,XTERM);
8019 
8020     case KEY_bind:
8021         LOP(OP_BIND,XTERM);
8022 
8023     case KEY_binmode:
8024         LOP(OP_BINMODE,XTERM);
8025 
8026     case KEY_bless:
8027         LOP(OP_BLESS,XTERM);
8028 
8029     case KEY_break:
8030         FUN0(OP_BREAK);
8031 
8032     case KEY_catch:
8033         PREBLOCK(KW_CATCH);
8034 
8035     case KEY_chop:
8036         UNI(OP_CHOP);
8037 
8038     case KEY_class:
8039         Perl_ck_warner_d(aTHX_
8040             packWARN(WARN_EXPERIMENTAL__CLASS), "class is experimental");
8041 
8042         s = force_word(s,BAREWORD,FALSE,TRUE);
8043         s = skipspace(s);
8044         s = force_strict_version(s);
8045         PL_expect = XATTRBLOCK;
8046         TOKEN(KW_CLASS);
8047 
8048     case KEY_continue:
8049         /* We have to disambiguate the two senses of
8050           "continue". If the next token is a '{' then
8051           treat it as the start of a continue block;
8052           otherwise treat it as a control operator.
8053          */
8054         s = skipspace(s);
8055         if (*s == '{')
8056             PREBLOCK(KW_CONTINUE);
8057         else
8058             FUN0(OP_CONTINUE);
8059 
8060     case KEY_chdir:
8061         /* may use HOME */
8062         (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
8063         UNI(OP_CHDIR);
8064 
8065     case KEY_close:
8066         UNI(OP_CLOSE);
8067 
8068     case KEY_closedir:
8069         UNI(OP_CLOSEDIR);
8070 
8071     case KEY_cmp:
8072         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8073             return REPORT(0);
8074         NCEop(OP_SCMP);
8075 
8076     case KEY_caller:
8077         UNI(OP_CALLER);
8078 
8079     case KEY_crypt:
8080 
8081         LOP(OP_CRYPT,XTERM);
8082 
8083     case KEY_chmod:
8084         LOP(OP_CHMOD,XTERM);
8085 
8086     case KEY_chown:
8087         LOP(OP_CHOWN,XTERM);
8088 
8089     case KEY_connect:
8090         LOP(OP_CONNECT,XTERM);
8091 
8092     case KEY_chr:
8093         UNI(OP_CHR);
8094 
8095     case KEY_cos:
8096         UNI(OP_COS);
8097 
8098     case KEY_chroot:
8099         UNI(OP_CHROOT);
8100 
8101     case KEY_default:
8102         PREBLOCK(KW_DEFAULT);
8103 
8104     case KEY_defer:
8105         Perl_ck_warner_d(aTHX_
8106             packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
8107         PREBLOCK(KW_DEFER);
8108 
8109     case KEY_do:
8110         return yyl_do(aTHX_ s, orig_keyword);
8111 
8112     case KEY_die:
8113         PL_hints |= HINT_BLOCK_SCOPE;
8114         LOP(OP_DIE,XTERM);
8115 
8116     case KEY_defined:
8117         UNI(OP_DEFINED);
8118 
8119     case KEY_delete:
8120         UNI(OP_DELETE);
8121 
8122     case KEY_dbmopen:
8123         Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
8124                           STR_WITH_LEN("NDBM_File::"),
8125                           STR_WITH_LEN("DB_File::"),
8126                           STR_WITH_LEN("GDBM_File::"),
8127                           STR_WITH_LEN("SDBM_File::"),
8128                           STR_WITH_LEN("ODBM_File::"),
8129                           NULL);
8130         LOP(OP_DBMOPEN,XTERM);
8131 
8132     case KEY_dbmclose:
8133         UNI(OP_DBMCLOSE);
8134 
8135     case KEY_dump:
8136         LOOPX(OP_DUMP);
8137 
8138     case KEY_else:
8139         PREBLOCK(KW_ELSE);
8140 
8141     case KEY_elsif:
8142         pl_yylval.ival = CopLINE(PL_curcop);
8143         OPERATOR(KW_ELSIF);
8144 
8145     case KEY_eq:
8146         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8147             return REPORT(0);
8148         ChEop(OP_SEQ);
8149 
8150     case KEY_exists:
8151         UNI(OP_EXISTS);
8152 
8153     case KEY_exit:
8154         UNI(OP_EXIT);
8155 
8156     case KEY_eval:
8157         s = skipspace(s);
8158         if (*s == '{') { /* block eval */
8159             PL_expect = XTERMBLOCK;
8160             UNIBRACK(OP_ENTERTRY);
8161         }
8162         else { /* string eval */
8163             PL_expect = XTERM;
8164             UNIBRACK(OP_ENTEREVAL);
8165         }
8166 
8167     case KEY_evalbytes:
8168         PL_expect = XTERM;
8169         UNIBRACK(-OP_ENTEREVAL);
8170 
8171     case KEY_eof:
8172         UNI(OP_EOF);
8173 
8174     case KEY_exp:
8175         UNI(OP_EXP);
8176 
8177     case KEY_each:
8178         UNI(OP_EACH);
8179 
8180     case KEY_exec:
8181         LOP(OP_EXEC,XREF);
8182 
8183     case KEY_endhostent:
8184         FUN0(OP_EHOSTENT);
8185 
8186     case KEY_endnetent:
8187         FUN0(OP_ENETENT);
8188 
8189     case KEY_endservent:
8190         FUN0(OP_ESERVENT);
8191 
8192     case KEY_endprotoent:
8193         FUN0(OP_EPROTOENT);
8194 
8195     case KEY_endpwent:
8196         FUN0(OP_EPWENT);
8197 
8198     case KEY_endgrent:
8199         FUN0(OP_EGRENT);
8200 
8201     case KEY_field:
8202         /* TODO: maybe this should use the same parser/grammar structures as
8203          * `my`, but it's also rather messy because of the `our` conflation
8204          */
8205         Perl_ck_warner_d(aTHX_
8206             packWARN(WARN_EXPERIMENTAL__CLASS), "field is experimental");
8207 
8208         croak_kw_unless_class("field");
8209 
8210         PL_parser->in_my = KEY_field;
8211         OPERATOR(KW_FIELD);
8212 
8213     case KEY_finally:
8214         Perl_ck_warner_d(aTHX_
8215             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
8216         PREBLOCK(KW_FINALLY);
8217 
8218     case KEY_for:
8219     case KEY_foreach:
8220         return yyl_foreach(aTHX_ s);
8221 
8222     case KEY_formline:
8223         LOP(OP_FORMLINE,XTERM);
8224 
8225     case KEY_fork:
8226         FUN0(OP_FORK);
8227 
8228     case KEY_fc:
8229         UNI(OP_FC);
8230 
8231     case KEY_fcntl:
8232         LOP(OP_FCNTL,XTERM);
8233 
8234     case KEY_fileno:
8235         UNI(OP_FILENO);
8236 
8237     case KEY_flock:
8238         LOP(OP_FLOCK,XTERM);
8239 
8240     case KEY_gt:
8241         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8242             return REPORT(0);
8243         ChRop(OP_SGT);
8244 
8245     case KEY_ge:
8246         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8247             return REPORT(0);
8248         ChRop(OP_SGE);
8249 
8250     case KEY_grep:
8251         LOP(OP_GREPSTART, XREF);
8252 
8253     case KEY_goto:
8254         LOOPX(OP_GOTO);
8255 
8256     case KEY_gmtime:
8257         UNI(OP_GMTIME);
8258 
8259     case KEY_getc:
8260         UNIDOR(OP_GETC);
8261 
8262     case KEY_getppid:
8263         FUN0(OP_GETPPID);
8264 
8265     case KEY_getpgrp:
8266         UNI(OP_GETPGRP);
8267 
8268     case KEY_getpriority:
8269         LOP(OP_GETPRIORITY,XTERM);
8270 
8271     case KEY_getprotobyname:
8272         UNI(OP_GPBYNAME);
8273 
8274     case KEY_getprotobynumber:
8275         LOP(OP_GPBYNUMBER,XTERM);
8276 
8277     case KEY_getprotoent:
8278         FUN0(OP_GPROTOENT);
8279 
8280     case KEY_getpwent:
8281         FUN0(OP_GPWENT);
8282 
8283     case KEY_getpwnam:
8284         UNI(OP_GPWNAM);
8285 
8286     case KEY_getpwuid:
8287         UNI(OP_GPWUID);
8288 
8289     case KEY_getpeername:
8290         UNI(OP_GETPEERNAME);
8291 
8292     case KEY_gethostbyname:
8293         UNI(OP_GHBYNAME);
8294 
8295     case KEY_gethostbyaddr:
8296         LOP(OP_GHBYADDR,XTERM);
8297 
8298     case KEY_gethostent:
8299         FUN0(OP_GHOSTENT);
8300 
8301     case KEY_getnetbyname:
8302         UNI(OP_GNBYNAME);
8303 
8304     case KEY_getnetbyaddr:
8305         LOP(OP_GNBYADDR,XTERM);
8306 
8307     case KEY_getnetent:
8308         FUN0(OP_GNETENT);
8309 
8310     case KEY_getservbyname:
8311         LOP(OP_GSBYNAME,XTERM);
8312 
8313     case KEY_getservbyport:
8314         LOP(OP_GSBYPORT,XTERM);
8315 
8316     case KEY_getservent:
8317         FUN0(OP_GSERVENT);
8318 
8319     case KEY_getsockname:
8320         UNI(OP_GETSOCKNAME);
8321 
8322     case KEY_getsockopt:
8323         LOP(OP_GSOCKOPT,XTERM);
8324 
8325     case KEY_getgrent:
8326         FUN0(OP_GGRENT);
8327 
8328     case KEY_getgrnam:
8329         UNI(OP_GGRNAM);
8330 
8331     case KEY_getgrgid:
8332         UNI(OP_GGRGID);
8333 
8334     case KEY_getlogin:
8335         FUN0(OP_GETLOGIN);
8336 
8337     case KEY_given:
8338         pl_yylval.ival = CopLINE(PL_curcop);
8339         Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__SMARTMATCH),
8340                          "given is deprecated");
8341         OPERATOR(KW_GIVEN);
8342 
8343     case KEY_glob:
8344         LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
8345 
8346     case KEY_hex:
8347         UNI(OP_HEX);
8348 
8349     case KEY_if:
8350         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8351             return REPORT(0);
8352         pl_yylval.ival = CopLINE(PL_curcop);
8353         OPERATOR(KW_IF);
8354 
8355     case KEY_index:
8356         LOP(OP_INDEX,XTERM);
8357 
8358     case KEY_int:
8359         UNI(OP_INT);
8360 
8361     case KEY_ioctl:
8362         LOP(OP_IOCTL,XTERM);
8363 
8364     case KEY_isa:
8365         NCRop(OP_ISA);
8366 
8367     case KEY_join:
8368         LOP(OP_JOIN,XTERM);
8369 
8370     case KEY_keys:
8371         UNI(OP_KEYS);
8372 
8373     case KEY_kill:
8374         LOP(OP_KILL,XTERM);
8375 
8376     case KEY_last:
8377         LOOPX(OP_LAST);
8378 
8379     case KEY_lc:
8380         UNI(OP_LC);
8381 
8382     case KEY_lcfirst:
8383         UNI(OP_LCFIRST);
8384 
8385     case KEY_local:
8386         OPERATOR(KW_LOCAL);
8387 
8388     case KEY_length:
8389         UNI(OP_LENGTH);
8390 
8391     case KEY_lt:
8392         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8393             return REPORT(0);
8394         ChRop(OP_SLT);
8395 
8396     case KEY_le:
8397         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8398             return REPORT(0);
8399         ChRop(OP_SLE);
8400 
8401     case KEY_localtime:
8402         UNI(OP_LOCALTIME);
8403 
8404     case KEY_log:
8405         UNI(OP_LOG);
8406 
8407     case KEY_link:
8408         LOP(OP_LINK,XTERM);
8409 
8410     case KEY_listen:
8411         LOP(OP_LISTEN,XTERM);
8412 
8413     case KEY_lock:
8414         UNI(OP_LOCK);
8415 
8416     case KEY_lstat:
8417         UNI(OP_LSTAT);
8418 
8419     case KEY_m:
8420         s = scan_pat(s,OP_MATCH);
8421         TERM(sublex_start());
8422 
8423     case KEY_map:
8424         LOP(OP_MAPSTART, XREF);
8425 
8426     case KEY_mkdir:
8427         LOP(OP_MKDIR,XTERM);
8428 
8429     case KEY_msgctl:
8430         LOP(OP_MSGCTL,XTERM);
8431 
8432     case KEY_msgget:
8433         LOP(OP_MSGGET,XTERM);
8434 
8435     case KEY_msgrcv:
8436         LOP(OP_MSGRCV,XTERM);
8437 
8438     case KEY_msgsnd:
8439         LOP(OP_MSGSND,XTERM);
8440 
8441     case KEY_our:
8442     case KEY_my:
8443     case KEY_state:
8444         return yyl_my(aTHX_ s, key);
8445 
8446     case KEY_next:
8447         LOOPX(OP_NEXT);
8448 
8449     case KEY_ne:
8450         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8451             return REPORT(0);
8452         ChEop(OP_SNE);
8453 
8454     case KEY_no:
8455         s = tokenize_use(0, s);
8456         TOKEN(KW_USE_or_NO);
8457 
8458     case KEY_not:
8459         if (*s == '(' || (s = skipspace(s), *s == '('))
8460             FUN1(OP_NOT);
8461         else {
8462             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8463                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8464             OPERATOR(NOTOP);
8465         }
8466 
8467     case KEY_open:
8468         s = skipspace(s);
8469         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8470             const char *t;
8471             char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8472             for (t=d; isSPACE(*t);)
8473                 t++;
8474             if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8475                 /* [perl #16184] */
8476                 && !(t[0] == '=' && t[1] == '>')
8477                 && !(t[0] == ':' && t[1] == ':')
8478                 && !keyword(s, d-s, 0)
8479             ) {
8480                 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8481                    "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8482                     UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8483             }
8484         }
8485         LOP(OP_OPEN,XTERM);
8486 
8487     case KEY_or:
8488         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8489             return REPORT(0);
8490         pl_yylval.ival = OP_OR;
8491         OPERATOR(OROP);
8492 
8493     case KEY_ord:
8494         UNI(OP_ORD);
8495 
8496     case KEY_oct:
8497         UNI(OP_OCT);
8498 
8499     case KEY_opendir:
8500         LOP(OP_OPEN_DIR,XTERM);
8501 
8502     case KEY_print:
8503         checkcomma(s,PL_tokenbuf,"filehandle");
8504         LOP(OP_PRINT,XREF);
8505 
8506     case KEY_printf:
8507         checkcomma(s,PL_tokenbuf,"filehandle");
8508         LOP(OP_PRTF,XREF);
8509 
8510     case KEY_prototype:
8511         UNI(OP_PROTOTYPE);
8512 
8513     case KEY_push:
8514         LOP(OP_PUSH,XTERM);
8515 
8516     case KEY_pop:
8517         UNIDOR(OP_POP);
8518 
8519     case KEY_pos:
8520         UNIDOR(OP_POS);
8521 
8522     case KEY_pack:
8523         LOP(OP_PACK,XTERM);
8524 
8525     case KEY_package:
8526         s = force_word(s,BAREWORD,FALSE,TRUE);
8527         s = skipspace(s);
8528         s = force_strict_version(s);
8529         PREBLOCK(KW_PACKAGE);
8530 
8531     case KEY_pipe:
8532         LOP(OP_PIPE_OP,XTERM);
8533 
8534     case KEY_q:
8535         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8536         if (!s)
8537             missingterm(NULL, 0);
8538         COPLINE_SET_FROM_MULTI_END;
8539         pl_yylval.ival = OP_CONST;
8540         TERM(sublex_start());
8541 
8542     case KEY_quotemeta:
8543         UNI(OP_QUOTEMETA);
8544 
8545     case KEY_qw:
8546         return yyl_qw(aTHX_ s, len);
8547 
8548     case KEY_qq:
8549         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8550         if (!s)
8551             missingterm(NULL, 0);
8552         pl_yylval.ival = OP_STRINGIFY;
8553         if (SvIVX(PL_lex_stuff) == '\'')
8554             SvIV_set(PL_lex_stuff, 0);	/* qq'$foo' should interpolate */
8555         TERM(sublex_start());
8556 
8557     case KEY_qr:
8558         s = scan_pat(s,OP_QR);
8559         TERM(sublex_start());
8560 
8561     case KEY_qx:
8562         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8563         if (!s)
8564             missingterm(NULL, 0);
8565         pl_yylval.ival = OP_BACKTICK;
8566         TERM(sublex_start());
8567 
8568     case KEY_return:
8569         OLDLOP(OP_RETURN);
8570 
8571     case KEY_require:
8572         return yyl_require(aTHX_ s, orig_keyword);
8573 
8574     case KEY_reset:
8575         UNI(OP_RESET);
8576 
8577     case KEY_redo:
8578         LOOPX(OP_REDO);
8579 
8580     case KEY_rename:
8581         LOP(OP_RENAME,XTERM);
8582 
8583     case KEY_rand:
8584         UNI(OP_RAND);
8585 
8586     case KEY_rmdir:
8587         UNI(OP_RMDIR);
8588 
8589     case KEY_rindex:
8590         LOP(OP_RINDEX,XTERM);
8591 
8592     case KEY_read:
8593         LOP(OP_READ,XTERM);
8594 
8595     case KEY_readdir:
8596         UNI(OP_READDIR);
8597 
8598     case KEY_readline:
8599         UNIDOR(OP_READLINE);
8600 
8601     case KEY_readpipe:
8602         UNIDOR(OP_BACKTICK);
8603 
8604     case KEY_rewinddir:
8605         UNI(OP_REWINDDIR);
8606 
8607     case KEY_recv:
8608         LOP(OP_RECV,XTERM);
8609 
8610     case KEY_reverse:
8611         LOP(OP_REVERSE,XTERM);
8612 
8613     case KEY_readlink:
8614         UNIDOR(OP_READLINK);
8615 
8616     case KEY_ref:
8617         UNI(OP_REF);
8618 
8619     case KEY_s:
8620         s = scan_subst(s);
8621         if (pl_yylval.opval)
8622             TERM(sublex_start());
8623         else
8624             TOKEN(1);	/* force error */
8625 
8626     case KEY_say:
8627         checkcomma(s,PL_tokenbuf,"filehandle");
8628         LOP(OP_SAY,XREF);
8629 
8630     case KEY_chomp:
8631         UNI(OP_CHOMP);
8632 
8633     case KEY_scalar:
8634         UNI(OP_SCALAR);
8635 
8636     case KEY_select:
8637         LOP(OP_SELECT,XTERM);
8638 
8639     case KEY_seek:
8640         LOP(OP_SEEK,XTERM);
8641 
8642     case KEY_semctl:
8643         LOP(OP_SEMCTL,XTERM);
8644 
8645     case KEY_semget:
8646         LOP(OP_SEMGET,XTERM);
8647 
8648     case KEY_semop:
8649         LOP(OP_SEMOP,XTERM);
8650 
8651     case KEY_send:
8652         LOP(OP_SEND,XTERM);
8653 
8654     case KEY_setpgrp:
8655         LOP(OP_SETPGRP,XTERM);
8656 
8657     case KEY_setpriority:
8658         LOP(OP_SETPRIORITY,XTERM);
8659 
8660     case KEY_sethostent:
8661         UNI(OP_SHOSTENT);
8662 
8663     case KEY_setnetent:
8664         UNI(OP_SNETENT);
8665 
8666     case KEY_setservent:
8667         UNI(OP_SSERVENT);
8668 
8669     case KEY_setprotoent:
8670         UNI(OP_SPROTOENT);
8671 
8672     case KEY_setpwent:
8673         FUN0(OP_SPWENT);
8674 
8675     case KEY_setgrent:
8676         FUN0(OP_SGRENT);
8677 
8678     case KEY_seekdir:
8679         LOP(OP_SEEKDIR,XTERM);
8680 
8681     case KEY_setsockopt:
8682         LOP(OP_SSOCKOPT,XTERM);
8683 
8684     case KEY_shift:
8685         UNIDOR(OP_SHIFT);
8686 
8687     case KEY_shmctl:
8688         LOP(OP_SHMCTL,XTERM);
8689 
8690     case KEY_shmget:
8691         LOP(OP_SHMGET,XTERM);
8692 
8693     case KEY_shmread:
8694         LOP(OP_SHMREAD,XTERM);
8695 
8696     case KEY_shmwrite:
8697         LOP(OP_SHMWRITE,XTERM);
8698 
8699     case KEY_shutdown:
8700         LOP(OP_SHUTDOWN,XTERM);
8701 
8702     case KEY_sin:
8703         UNI(OP_SIN);
8704 
8705     case KEY_sleep:
8706         UNI(OP_SLEEP);
8707 
8708     case KEY_socket:
8709         LOP(OP_SOCKET,XTERM);
8710 
8711     case KEY_socketpair:
8712         LOP(OP_SOCKPAIR,XTERM);
8713 
8714     case KEY_sort:
8715         checkcomma(s,PL_tokenbuf,"subroutine name");
8716         s = skipspace(s);
8717         PL_expect = XTERM;
8718         s = force_word(s,BAREWORD,TRUE,TRUE);
8719         LOP(OP_SORT,XREF);
8720 
8721     case KEY_split:
8722         LOP(OP_SPLIT,XTERM);
8723 
8724     case KEY_sprintf:
8725         LOP(OP_SPRINTF,XTERM);
8726 
8727     case KEY_splice:
8728         LOP(OP_SPLICE,XTERM);
8729 
8730     case KEY_sqrt:
8731         UNI(OP_SQRT);
8732 
8733     case KEY_srand:
8734         UNI(OP_SRAND);
8735 
8736     case KEY_stat:
8737         UNI(OP_STAT);
8738 
8739     case KEY_study:
8740         UNI(OP_STUDY);
8741 
8742     case KEY_substr:
8743         LOP(OP_SUBSTR,XTERM);
8744 
8745     case KEY_method:
8746         /* For now we just treat 'method' identical to 'sub' plus a warning */
8747         Perl_ck_warner_d(aTHX_
8748             packWARN(WARN_EXPERIMENTAL__CLASS), "method is experimental");
8749         return yyl_sub(aTHX_ s, KEY_method);
8750 
8751     case KEY_format:
8752     case KEY_sub:
8753         return yyl_sub(aTHX_ s, key);
8754 
8755     case KEY_system:
8756         LOP(OP_SYSTEM,XREF);
8757 
8758     case KEY_symlink:
8759         LOP(OP_SYMLINK,XTERM);
8760 
8761     case KEY_syscall:
8762         LOP(OP_SYSCALL,XTERM);
8763 
8764     case KEY_sysopen:
8765         LOP(OP_SYSOPEN,XTERM);
8766 
8767     case KEY_sysseek:
8768         LOP(OP_SYSSEEK,XTERM);
8769 
8770     case KEY_sysread:
8771         LOP(OP_SYSREAD,XTERM);
8772 
8773     case KEY_syswrite:
8774         LOP(OP_SYSWRITE,XTERM);
8775 
8776     case KEY_tr:
8777     case KEY_y:
8778         s = scan_trans(s);
8779         TERM(sublex_start());
8780 
8781     case KEY_tell:
8782         UNI(OP_TELL);
8783 
8784     case KEY_telldir:
8785         UNI(OP_TELLDIR);
8786 
8787     case KEY_tie:
8788         LOP(OP_TIE,XTERM);
8789 
8790     case KEY_tied:
8791         UNI(OP_TIED);
8792 
8793     case KEY_time:
8794         FUN0(OP_TIME);
8795 
8796     case KEY_times:
8797         FUN0(OP_TMS);
8798 
8799     case KEY_truncate:
8800         LOP(OP_TRUNCATE,XTERM);
8801 
8802     case KEY_try:
8803         pl_yylval.ival = CopLINE(PL_curcop);
8804         PREBLOCK(KW_TRY);
8805 
8806     case KEY_uc:
8807         UNI(OP_UC);
8808 
8809     case KEY_ucfirst:
8810         UNI(OP_UCFIRST);
8811 
8812     case KEY_untie:
8813         UNI(OP_UNTIE);
8814 
8815     case KEY_until:
8816         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8817             return REPORT(0);
8818         pl_yylval.ival = CopLINE(PL_curcop);
8819         OPERATOR(KW_UNTIL);
8820 
8821     case KEY_unless:
8822         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8823             return REPORT(0);
8824         pl_yylval.ival = CopLINE(PL_curcop);
8825         OPERATOR(KW_UNLESS);
8826 
8827     case KEY_unlink:
8828         LOP(OP_UNLINK,XTERM);
8829 
8830     case KEY_undef:
8831         UNIDOR(OP_UNDEF);
8832 
8833     case KEY_unpack:
8834         LOP(OP_UNPACK,XTERM);
8835 
8836     case KEY_utime:
8837         LOP(OP_UTIME,XTERM);
8838 
8839     case KEY_umask:
8840         UNIDOR(OP_UMASK);
8841 
8842     case KEY_unshift:
8843         LOP(OP_UNSHIFT,XTERM);
8844 
8845     case KEY_use:
8846         s = tokenize_use(1, s);
8847         TOKEN(KW_USE_or_NO);
8848 
8849     case KEY_values:
8850         UNI(OP_VALUES);
8851 
8852     case KEY_vec:
8853         LOP(OP_VEC,XTERM);
8854 
8855     case KEY_when:
8856         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8857             return REPORT(0);
8858         pl_yylval.ival = CopLINE(PL_curcop);
8859         Perl_ck_warner_d(aTHX_
8860             packWARN(WARN_DEPRECATED__SMARTMATCH),
8861             "when is deprecated");
8862         OPERATOR(KW_WHEN);
8863 
8864     case KEY_while:
8865         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8866             return REPORT(0);
8867         pl_yylval.ival = CopLINE(PL_curcop);
8868         OPERATOR(KW_WHILE);
8869 
8870     case KEY_warn:
8871         PL_hints |= HINT_BLOCK_SCOPE;
8872         LOP(OP_WARN,XTERM);
8873 
8874     case KEY_wait:
8875         FUN0(OP_WAIT);
8876 
8877     case KEY_waitpid:
8878         LOP(OP_WAITPID,XTERM);
8879 
8880     case KEY_wantarray:
8881         FUN0(OP_WANTARRAY);
8882 
8883     case KEY_write:
8884         /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8885          * we use the same number on EBCDIC */
8886         gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8887         UNI(OP_ENTERWRITE);
8888 
8889     case KEY_x:
8890         if (PL_expect == XOPERATOR) {
8891             if (*s == '=' && !PL_lex_allbrackets
8892                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8893             {
8894                 return REPORT(0);
8895             }
8896             Mop(OP_REPEAT);
8897         }
8898         check_uni();
8899         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8900 
8901     case KEY_xor:
8902         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8903             return REPORT(0);
8904         pl_yylval.ival = OP_XOR;
8905         OPERATOR(OROP);
8906     }
8907 }
8908 
8909 static int
yyl_key_core(pTHX_ char * s,STRLEN len,struct code c)8910 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8911 {
8912     I32 key = 0;
8913     I32 orig_keyword = 0;
8914     STRLEN olen = len;
8915     char *d = s;
8916     s += 2;
8917     s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8918     if ((*s == ':' && s[1] == ':')
8919         || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8920     {
8921         Copy(PL_bufptr, PL_tokenbuf, olen, char);
8922         return yyl_just_a_word(aTHX_ d, olen, 0, c);
8923     }
8924     if (!key)
8925         Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8926                           UTF8fARG(UTF, len, PL_tokenbuf));
8927     if (key < 0)
8928         key = -key;
8929     else if (key == KEY_require || key == KEY_do
8930           || key == KEY_glob)
8931         /* that's a way to remember we saw "CORE::" */
8932         orig_keyword = key;
8933 
8934     /* Known to be a reserved word at this point */
8935     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8936 }
8937 
8938 struct Perl_custom_infix_result {
8939     struct Perl_custom_infix *def;
8940     SV                       *parsedata;
8941 };
8942 
tokentype_for_plugop(struct Perl_custom_infix * def)8943 static enum yytokentype tokentype_for_plugop(struct Perl_custom_infix *def)
8944 {
8945     enum Perl_custom_infix_precedence prec = def->prec;
8946     if(prec <= INFIX_PREC_LOW)
8947         return PLUGIN_LOW_OP;
8948     if(prec <= INFIX_PREC_LOGICAL_OR_LOW)
8949         return PLUGIN_LOGICAL_OR_LOW_OP;
8950     if(prec <= INFIX_PREC_LOGICAL_AND_LOW)
8951         return PLUGIN_LOGICAL_AND_LOW_OP;
8952     if(prec <= INFIX_PREC_ASSIGN)
8953         return PLUGIN_ASSIGN_OP;
8954     if(prec <= INFIX_PREC_LOGICAL_OR)
8955         return PLUGIN_LOGICAL_OR_OP;
8956     if(prec <= INFIX_PREC_LOGICAL_AND)
8957         return PLUGIN_LOGICAL_AND_OP;
8958     if(prec <= INFIX_PREC_REL)
8959         return PLUGIN_REL_OP;
8960     if(prec <= INFIX_PREC_ADD)
8961         return PLUGIN_ADD_OP;
8962     if(prec <= INFIX_PREC_MUL)
8963         return PLUGIN_MUL_OP;
8964     if(prec <= INFIX_PREC_POW)
8965         return PLUGIN_POW_OP;
8966     return PLUGIN_HIGH_OP;
8967 }
8968 
8969 OP *
Perl_build_infix_plugin(pTHX_ OP * lhs,OP * rhs,void * tokendata)8970 Perl_build_infix_plugin(pTHX_ OP *lhs, OP *rhs, void *tokendata)
8971 {
8972     PERL_ARGS_ASSERT_BUILD_INFIX_PLUGIN;
8973 
8974     struct Perl_custom_infix_result *result = (struct Perl_custom_infix_result *)tokendata;
8975     SAVEFREEPV(result);
8976     if(result->parsedata)
8977         SAVEFREESV(result->parsedata);
8978 
8979     return (*result->def->build_op)(aTHX_
8980         &result->parsedata, lhs, rhs, result->def);
8981 }
8982 
8983 static int
yyl_keylookup(pTHX_ char * s,GV * gv)8984 yyl_keylookup(pTHX_ char *s, GV *gv)
8985 {
8986     STRLEN len;
8987     bool anydelim;
8988     I32 key;
8989     struct code c = no_code;
8990     I32 orig_keyword = 0;
8991     char *d;
8992 
8993     c.gv = gv;
8994 
8995     PL_bufptr = s;
8996     s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8997 
8998     /* Some keywords can be followed by any delimiter, including ':' */
8999     anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
9000 
9001     /* x::* is just a word, unless x is "CORE" */
9002     if (!anydelim && *s == ':' && s[1] == ':') {
9003         if (memEQs(PL_tokenbuf, len, "CORE"))
9004             return yyl_key_core(aTHX_ s, len, c);
9005         return yyl_just_a_word(aTHX_ s, len, 0, c);
9006     }
9007 
9008     d = s;
9009     while (d < PL_bufend && isSPACE(*d))
9010             d++;	/* no comments skipped here, or s### is misparsed */
9011 
9012     /* Is this a word before a => operator? */
9013     if (*d == '=' && d[1] == '>') {
9014         return yyl_fatcomma(aTHX_ s, len);
9015     }
9016 
9017     /* Check for plugged-in keyword */
9018     {
9019         OP *o;
9020         int result;
9021         char *saved_bufptr = PL_bufptr;
9022         PL_bufptr = s;
9023         result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
9024         s = PL_bufptr;
9025         if (result == KEYWORD_PLUGIN_DECLINE) {
9026             /* not a plugged-in keyword */
9027             PL_bufptr = saved_bufptr;
9028         } else if (result == KEYWORD_PLUGIN_STMT) {
9029             pl_yylval.opval = o;
9030             CLINE;
9031             if (!PL_nexttoke) PL_expect = XSTATE;
9032             return REPORT(PLUGSTMT);
9033         } else if (result == KEYWORD_PLUGIN_EXPR) {
9034             pl_yylval.opval = o;
9035             CLINE;
9036             if (!PL_nexttoke) PL_expect = XOPERATOR;
9037             return REPORT(PLUGEXPR);
9038         } else {
9039             Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
9040         }
9041     }
9042 
9043     /* Check for plugged-in named operator */
9044     if(PLUGINFIX_IS_ENABLED) {
9045         struct Perl_custom_infix *def;
9046         STRLEN result;
9047         result = PL_infix_plugin(aTHX_ PL_tokenbuf, len, &def);
9048         if(result) {
9049             if(result != len)
9050                 Perl_croak(aTHX_ "Bad infix plugin result (%zd) - did not consume entire identifier <%s>\n",
9051                     result, PL_tokenbuf);
9052             PL_bufptr = s = d;
9053             struct Perl_custom_infix_result *result;
9054             Newx(result, 1, struct Perl_custom_infix_result);
9055             result->def = def;
9056             result->parsedata = NULL;
9057             if(def->parse) {
9058                 (*def->parse)(aTHX_ &result->parsedata, def);
9059                 s = PL_bufptr; /* restore local s variable */
9060             }
9061             pl_yylval.pval = result;
9062             CLINE;
9063             OPERATOR(tokentype_for_plugop(def));
9064         }
9065     }
9066 
9067     /* Is this a label? */
9068     if (!anydelim && PL_expect == XSTATE
9069           && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
9070         s = d + 1;
9071         pl_yylval.opval =
9072             newSVOP(OP_CONST, 0,
9073                 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
9074         CLINE;
9075         TOKEN(LABEL);
9076     }
9077 
9078     /* Check for lexical sub */
9079     if (PL_expect != XOPERATOR) {
9080         char tmpbuf[sizeof PL_tokenbuf + 1];
9081         *tmpbuf = '&';
9082         Copy(PL_tokenbuf, tmpbuf+1, len, char);
9083         c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
9084         if (c.off != NOT_IN_PAD) {
9085             assert(c.off); /* we assume this is boolean-true below */
9086             if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
9087                 HV *  const stash = PAD_COMPNAME_OURSTASH(c.off);
9088                 HEK * const stashname = HvNAME_HEK(stash);
9089                 c.sv = newSVhek(stashname);
9090                 sv_catpvs(c.sv, "::");
9091                 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
9092                                 (UTF ? SV_CATUTF8 : SV_CATBYTES));
9093                 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
9094                                   SVt_PVCV);
9095                 c.off = 0;
9096                 if (!c.gv) {
9097                     ASSUME(c.sv && SvREFCNT(c.sv) == 1);
9098                     SvREFCNT_dec(c.sv);
9099                     c.sv = NULL;
9100                     return yyl_just_a_word(aTHX_ s, len, 0, c);
9101                 }
9102             }
9103             else {
9104                 c.rv2cv_op = newOP(OP_PADANY, 0);
9105                 c.rv2cv_op->op_targ = c.off;
9106                 c.cv = find_lexical_cv(c.off);
9107             }
9108             c.lex = TRUE;
9109             return yyl_just_a_word(aTHX_ s, len, 0, c);
9110         }
9111         c.off = 0;
9112     }
9113 
9114     /* Check for built-in keyword */
9115     key = keyword(PL_tokenbuf, len, 0);
9116 
9117     if (key < 0)
9118         key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
9119 
9120     if (key && key != KEY___DATA__ && key != KEY___END__
9121      && (!anydelim || *s != '#')) {
9122         /* no override, and not s### either; skipspace is safe here
9123          * check for => on following line */
9124         bool arrow;
9125         STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
9126         STRLEN   soff = s         - SvPVX(PL_linestr);
9127         s = peekspace(s);
9128         arrow = *s == '=' && s[1] == '>';
9129         PL_bufptr = SvPVX(PL_linestr) + bufoff;
9130         s         = SvPVX(PL_linestr) +   soff;
9131         if (arrow)
9132             return yyl_fatcomma(aTHX_ s, len);
9133     }
9134 
9135     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
9136 }
9137 
9138 static int
yyl_try(pTHX_ char * s)9139 yyl_try(pTHX_ char *s)
9140 {
9141     char *d;
9142     GV *gv = NULL;
9143     int tok;
9144 
9145   retry:
9146     /* Check for plugged-in symbolic operator */
9147     if(PLUGINFIX_IS_ENABLED && isPLUGINFIX_FIRST(*s)) {
9148         struct Perl_custom_infix *def;
9149         char *s_end = s, *d = PL_tokenbuf;
9150         STRLEN len;
9151 
9152         /* Copy the longest sequence of isPLUGINFIX() chars into PL_tokenbuf */
9153         while(s_end < PL_bufend && d < PL_tokenbuf+sizeof(PL_tokenbuf)-1 && isPLUGINFIX(*s_end))
9154             *d++ = *s_end++;
9155         *d = '\0';
9156 
9157         if((len = (*PL_infix_plugin)(aTHX_ PL_tokenbuf, s_end - s, &def))) {
9158             s += len;
9159             struct Perl_custom_infix_result *result;
9160             Newx(result, 1, struct Perl_custom_infix_result);
9161             result->def = def;
9162             result->parsedata = NULL;
9163             if(def->parse) {
9164                 PL_bufptr = s;
9165                 (*def->parse)(aTHX_ &result->parsedata, def);
9166                 s = PL_bufptr; /* restore local s variable */
9167             }
9168             pl_yylval.pval = result;
9169             CLINE;
9170             OPERATOR(tokentype_for_plugop(def));
9171         }
9172     }
9173 
9174     switch (*s) {
9175     default:
9176         if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
9177             if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9178                 return tok;
9179             goto retry_bufptr;
9180         }
9181         yyl_croak_unrecognised(aTHX_ s);
9182 
9183     case 4:
9184     case 26:
9185         /* emulate EOF on ^D or ^Z */
9186         if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
9187             return tok;
9188     retry_bufptr:
9189         s = PL_bufptr;
9190         goto retry;
9191 
9192     case 0:
9193         if ((!PL_rsfp || PL_lex_inwhat)
9194          && (!PL_parser->filtered || s+1 < PL_bufend)) {
9195             PL_last_uni = 0;
9196             PL_last_lop = 0;
9197             if (PL_lex_brackets
9198                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
9199             {
9200                 yyerror((const char *)
9201                         (PL_lex_formbrack
9202                          ? "Format not terminated"
9203                          : "Missing right curly or square bracket"));
9204             }
9205             DEBUG_T({
9206                 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
9207             });
9208             TOKEN(0);
9209         }
9210         if (s++ < PL_bufend)
9211             goto retry;  /* ignore stray nulls */
9212         PL_last_uni = 0;
9213         PL_last_lop = 0;
9214         if (!PL_in_eval && !PL_preambled) {
9215             PL_preambled = TRUE;
9216             if (PL_perldb) {
9217                 /* Generate a string of Perl code to load the debugger.
9218                  * If PERL5DB is set, it will return the contents of that,
9219                  * otherwise a compile-time require of perl5db.pl.  */
9220 
9221                 const char * const pdb = PerlEnv_getenv("PERL5DB");
9222 
9223                 if (pdb) {
9224                     sv_setpv(PL_linestr, pdb);
9225                     sv_catpvs(PL_linestr,";");
9226                 } else {
9227                     SETERRNO(0,SS_NORMAL);
9228                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
9229                 }
9230                 PL_parser->preambling = CopLINE(PL_curcop);
9231             } else
9232                 SvPVCLEAR(PL_linestr);
9233             if (PL_preambleav) {
9234                 SV **svp = AvARRAY(PL_preambleav);
9235                 SV **const end = svp + AvFILLp(PL_preambleav);
9236                 while(svp <= end) {
9237                     sv_catsv(PL_linestr, *svp);
9238                     ++svp;
9239                     sv_catpvs(PL_linestr, ";");
9240                 }
9241                 SvREFCNT_dec(MUTABLE_SV(PL_preambleav));
9242                 PL_preambleav = NULL;
9243             }
9244             if (PL_minus_E)
9245                 sv_catpvs(PL_linestr,
9246                           "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "'; "
9247                           "use builtin ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
9248             if (PL_minus_n || PL_minus_p) {
9249                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
9250                 if (PL_minus_l)
9251                     sv_catpvs(PL_linestr,"chomp;");
9252                 if (PL_minus_a) {
9253                     if (PL_minus_F) {
9254                         if (   (   *PL_splitstr == '/'
9255                                 || *PL_splitstr == '\''
9256                                 || *PL_splitstr == '"')
9257                             && strchr(PL_splitstr + 1, *PL_splitstr))
9258                         {
9259                             /* strchr is ok, because -F pattern can't contain
9260                              * embedded NULs */
9261                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
9262                         }
9263                         else {
9264                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
9265                                bytes can be used as quoting characters.  :-) */
9266                             const char *splits = PL_splitstr;
9267                             sv_catpvs(PL_linestr, "our @F=split(q\0");
9268                             do {
9269                                 /* Need to \ \s  */
9270                                 if (*splits == '\\')
9271                                     sv_catpvn(PL_linestr, splits, 1);
9272                                 sv_catpvn(PL_linestr, splits, 1);
9273                             } while (*splits++);
9274                             /* This loop will embed the trailing NUL of
9275                                PL_linestr as the last thing it does before
9276                                terminating.  */
9277                             sv_catpvs(PL_linestr, ");");
9278                         }
9279                     }
9280                     else
9281                         sv_catpvs(PL_linestr,"our @F=split(' ');");
9282                 }
9283             }
9284             sv_catpvs(PL_linestr, "\n");
9285             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
9286             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9287             PL_last_lop = PL_last_uni = NULL;
9288             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
9289                 update_debugger_info(PL_linestr, NULL, 0);
9290             goto retry;
9291         }
9292         if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
9293             return tok;
9294         goto retry_bufptr;
9295 
9296     case '\r':
9297 #ifdef PERL_STRICT_CR
9298         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
9299         Perl_croak(aTHX_
9300       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
9301 #endif
9302     case ' ': case '\t': case '\f': case '\v':
9303         s++;
9304         goto retry;
9305 
9306     case '#':
9307     case '\n': {
9308         const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
9309         if (needs_semicolon)
9310             TOKEN(PERLY_SEMICOLON);
9311         else
9312             goto retry;
9313     }
9314 
9315     case '-':
9316         return yyl_hyphen(aTHX_ s);
9317 
9318     case '+':
9319         return yyl_plus(aTHX_ s);
9320 
9321     case '*':
9322         return yyl_star(aTHX_ s);
9323 
9324     case '%':
9325         return yyl_percent(aTHX_ s);
9326 
9327     case '^':
9328         return yyl_caret(aTHX_ s);
9329 
9330     case '[':
9331         return yyl_leftsquare(aTHX_ s);
9332 
9333     case '~':
9334         return yyl_tilde(aTHX_ s);
9335 
9336     case ',':
9337         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9338             TOKEN(0);
9339         s++;
9340         OPERATOR(PERLY_COMMA);
9341     case ':':
9342         if (s[1] == ':')
9343             return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
9344         return yyl_colon(aTHX_ s + 1);
9345 
9346     case '(':
9347         return yyl_leftparen(aTHX_ s + 1);
9348 
9349     case ';':
9350         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
9351             TOKEN(0);
9352         CLINE;
9353         s++;
9354         PL_expect = XSTATE;
9355         TOKEN(PERLY_SEMICOLON);
9356 
9357     case ')':
9358         return yyl_rightparen(aTHX_ s);
9359 
9360     case ']':
9361         return yyl_rightsquare(aTHX_ s);
9362 
9363     case '{':
9364         return yyl_leftcurly(aTHX_ s + 1, 0);
9365 
9366     case '}':
9367         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
9368             TOKEN(0);
9369         return yyl_rightcurly(aTHX_ s, 0);
9370 
9371     case '&':
9372         return yyl_ampersand(aTHX_ s);
9373 
9374     case '|':
9375         return yyl_verticalbar(aTHX_ s);
9376 
9377     case '=':
9378         if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
9379             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
9380         {
9381             s = vcs_conflict_marker(s + 7);
9382             goto retry;
9383         }
9384 
9385         s++;
9386         {
9387             const char tmp = *s++;
9388             if (tmp == '=') {
9389                 if (!PL_lex_allbrackets
9390                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
9391                 {
9392                     s -= 2;
9393                     TOKEN(0);
9394                 }
9395                 ChEop(OP_EQ);
9396             }
9397             if (tmp == '>') {
9398                 if (!PL_lex_allbrackets
9399                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9400                 {
9401                     s -= 2;
9402                     TOKEN(0);
9403                 }
9404                 OPERATOR(PERLY_COMMA);
9405             }
9406             if (tmp == '~')
9407                 PMop(OP_MATCH);
9408             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
9409                 && memCHRs("+-*/%.^&|<",tmp))
9410                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9411                             "Reversed %c= operator",(int)tmp);
9412             s--;
9413             if (PL_expect == XSTATE
9414                 && isALPHA(tmp)
9415                 && (s == PL_linestart+1 || s[-2] == '\n') )
9416             {
9417                 if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
9418                     || PL_lex_state != LEX_NORMAL)
9419                 {
9420                     d = PL_bufend;
9421                     while (s < d) {
9422                         if (*s++ == '\n') {
9423                             incline(s, PL_bufend);
9424                             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
9425                             {
9426                                 s = (char *) memchr(s,'\n', d - s);
9427                                 if (s)
9428                                     s++;
9429                                 else
9430                                     s = d;
9431                                 incline(s, PL_bufend);
9432                                 goto retry;
9433                             }
9434                         }
9435                     }
9436                     goto retry;
9437                 }
9438                 s = PL_bufend;
9439                 PL_parser->in_pod = 1;
9440                 goto retry;
9441             }
9442         }
9443         if (PL_expect == XBLOCK) {
9444             const char *t = s;
9445 #ifdef PERL_STRICT_CR
9446             while (SPACE_OR_TAB(*t))
9447 #else
9448             while (SPACE_OR_TAB(*t) || *t == '\r')
9449 #endif
9450                 t++;
9451             if (*t == '\n' || *t == '#') {
9452                 ENTER_with_name("lex_format");
9453                 SAVEI8(PL_parser->form_lex_state);
9454                 SAVEI32(PL_lex_formbrack);
9455                 PL_parser->form_lex_state = PL_lex_state;
9456                 PL_lex_formbrack = PL_lex_brackets + 1;
9457                 PL_parser->sub_error_count = PL_error_count;
9458                 return yyl_leftcurly(aTHX_ s, 1);
9459             }
9460         }
9461         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
9462             s--;
9463             TOKEN(0);
9464         }
9465         pl_yylval.ival = 0;
9466         OPERATOR(ASSIGNOP);
9467 
9468         case '!':
9469         return yyl_bang(aTHX_ s + 1);
9470 
9471     case '<':
9472         if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
9473             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
9474         {
9475             s = vcs_conflict_marker(s + 7);
9476             goto retry;
9477         }
9478         return yyl_leftpointy(aTHX_ s);
9479 
9480     case '>':
9481         if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
9482             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
9483         {
9484             s = vcs_conflict_marker(s + 7);
9485             goto retry;
9486         }
9487         return yyl_rightpointy(aTHX_ s + 1);
9488 
9489     case '$':
9490         return yyl_dollar(aTHX_ s);
9491 
9492     case '@':
9493         return yyl_snail(aTHX_ s);
9494 
9495     case '/':			/* may be division, defined-or, or pattern */
9496         return yyl_slash(aTHX_ s);
9497 
9498      case '?':			/* conditional */
9499         s++;
9500         if (!PL_lex_allbrackets
9501             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
9502         {
9503             s--;
9504             TOKEN(0);
9505         }
9506         PL_lex_allbrackets++;
9507         OPERATOR(PERLY_QUESTION_MARK);
9508 
9509     case '.':
9510         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
9511 #ifdef PERL_STRICT_CR
9512             && s[1] == '\n'
9513 #else
9514             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
9515 #endif
9516             && (s == PL_linestart || s[-1] == '\n') )
9517         {
9518             PL_expect = XSTATE;
9519             /* formbrack==2 means dot seen where arguments expected */
9520             return yyl_rightcurly(aTHX_ s, 2);
9521         }
9522         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9523             s += 3;
9524             OPERATOR(YADAYADA);
9525         }
9526         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9527             char tmp = *s++;
9528             if (*s == tmp) {
9529                 if (!PL_lex_allbrackets
9530                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9531                 {
9532                     s--;
9533                     TOKEN(0);
9534                 }
9535                 s++;
9536                 if (*s == tmp) {
9537                     s++;
9538                     pl_yylval.ival = OPf_SPECIAL;
9539                 }
9540                 else
9541                     pl_yylval.ival = 0;
9542                 OPERATOR(DOTDOT);
9543             }
9544             if (*s == '=' && !PL_lex_allbrackets
9545                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9546             {
9547                 s--;
9548                 TOKEN(0);
9549             }
9550             Aop(OP_CONCAT);
9551         }
9552         /* FALLTHROUGH */
9553     case '0': case '1': case '2': case '3': case '4':
9554     case '5': case '6': case '7': case '8': case '9':
9555         s = scan_num(s, &pl_yylval);
9556         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9557         if (PL_expect == XOPERATOR)
9558             no_op("Number",s);
9559         TERM(THING);
9560 
9561     case '\'':
9562         return yyl_sglquote(aTHX_ s);
9563 
9564     case '"':
9565         return yyl_dblquote(aTHX_ s);
9566 
9567     case '`':
9568         return yyl_backtick(aTHX_ s);
9569 
9570     case '\\':
9571         return yyl_backslash(aTHX_ s + 1);
9572 
9573     case 'v':
9574         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9575             char *start = s + 2;
9576             while (isDIGIT(*start) || *start == '_')
9577                 start++;
9578             if (*start == '.' && isDIGIT(start[1])) {
9579                 s = scan_num(s, &pl_yylval);
9580                 TERM(THING);
9581             }
9582             else if ((*start == ':' && start[1] == ':')
9583                      || (PL_expect == XSTATE && *start == ':')) {
9584                 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9585                     return tok;
9586                 goto retry_bufptr;
9587             }
9588             else if (PL_expect == XSTATE) {
9589                 d = start;
9590                 while (d < PL_bufend && isSPACE(*d)) d++;
9591                 if (*d == ':') {
9592                     if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9593                         return tok;
9594                     goto retry_bufptr;
9595                 }
9596             }
9597             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9598             if (!isALPHA(*start) && (PL_expect == XTERM
9599                         || PL_expect == XREF || PL_expect == XSTATE
9600                         || PL_expect == XTERMORDORDOR)) {
9601                 GV *const gv = gv_fetchpvn_flags(s, start - s,
9602                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
9603                 if (!gv) {
9604                     s = scan_num(s, &pl_yylval);
9605                     TERM(THING);
9606                 }
9607             }
9608         }
9609         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9610             return tok;
9611         goto retry_bufptr;
9612 
9613     case 'x':
9614         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9615             s++;
9616             Mop(OP_REPEAT);
9617         }
9618         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9619             return tok;
9620         goto retry_bufptr;
9621 
9622     case '_':
9623     case 'a': case 'A':
9624     case 'b': case 'B':
9625     case 'c': case 'C':
9626     case 'd': case 'D':
9627     case 'e': case 'E':
9628     case 'f': case 'F':
9629     case 'g': case 'G':
9630     case 'h': case 'H':
9631     case 'i': case 'I':
9632     case 'j': case 'J':
9633     case 'k': case 'K':
9634     case 'l': case 'L':
9635     case 'm': case 'M':
9636     case 'n': case 'N':
9637     case 'o': case 'O':
9638     case 'p': case 'P':
9639     case 'q': case 'Q':
9640     case 'r': case 'R':
9641     case 's': case 'S':
9642     case 't': case 'T':
9643     case 'u': case 'U':
9644               case 'V':
9645     case 'w': case 'W':
9646               case 'X':
9647     case 'y': case 'Y':
9648     case 'z': case 'Z':
9649         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9650             return tok;
9651         goto retry_bufptr;
9652     }
9653 }
9654 
9655 
9656 /*
9657   yylex
9658 
9659   Works out what to call the token just pulled out of the input
9660   stream.  The yacc parser takes care of taking the ops we return and
9661   stitching them into a tree.
9662 
9663   Returns:
9664     The type of the next token
9665 
9666   Structure:
9667       Check if we have already built the token; if so, use it.
9668       Switch based on the current state:
9669           - if we have a case modifier in a string, deal with that
9670           - handle other cases of interpolation inside a string
9671           - scan the next line if we are inside a format
9672       In the normal state, switch on the next character:
9673           - default:
9674             if alphabetic, go to key lookup
9675             unrecognized character - croak
9676           - 0/4/26: handle end-of-line or EOF
9677           - cases for whitespace
9678           - \n and #: handle comments and line numbers
9679           - various operators, brackets and sigils
9680           - numbers
9681           - quotes
9682           - 'v': vstrings (or go to key lookup)
9683           - 'x' repetition operator (or go to key lookup)
9684           - other ASCII alphanumerics (key lookup begins here):
9685               word before => ?
9686               keyword plugin
9687               scan built-in keyword (but do nothing with it yet)
9688               check for statement label
9689               check for lexical subs
9690                   return yyl_just_a_word if there is one
9691               see whether built-in keyword is overridden
9692               switch on keyword number:
9693                   - default: return yyl_just_a_word:
9694                       not a built-in keyword; handle bareword lookup
9695                       disambiguate between method and sub call
9696                       fall back to bareword
9697                   - cases for built-in keywords
9698 */
9699 
9700 int
Perl_yylex(pTHX)9701 Perl_yylex(pTHX)
9702 {
9703     char *s = PL_bufptr;
9704 
9705     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9706         const U8* first_bad_char_loc;
9707         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9708                                                         PL_bufend - PL_bufptr,
9709                                                         &first_bad_char_loc)))
9710         {
9711             _force_out_malformed_utf8_message(first_bad_char_loc,
9712                                               (U8 *) PL_bufend,
9713                                               0,
9714                                               1 /* 1 means die */ );
9715             NOT_REACHED; /* NOTREACHED */
9716         }
9717         PL_parser->recheck_utf8_validity = FALSE;
9718     }
9719     DEBUG_T( {
9720         SV* tmp = newSVpvs("");
9721         PerlIO_printf(Perl_debug_log, "### %" LINE_Tf ":LEX_%s/X%s %s\n",
9722             CopLINE(PL_curcop),
9723             lex_state_names[PL_lex_state],
9724             exp_name[PL_expect],
9725             pv_display(tmp, s, strlen(s), 0, 60));
9726         SvREFCNT_dec(tmp);
9727     } );
9728 
9729     /* when we've already built the next token, just pull it out of the queue */
9730     if (PL_nexttoke) {
9731         PL_nexttoke--;
9732         pl_yylval = PL_nextval[PL_nexttoke];
9733         {
9734             I32 next_type;
9735             next_type = PL_nexttype[PL_nexttoke];
9736             if (next_type & (7<<24)) {
9737                 if (next_type & (1<<24)) {
9738                     if (PL_lex_brackets > 100)
9739                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9740                     PL_lex_brackstack[PL_lex_brackets++] =
9741                         (char) ((U8) (next_type >> 16));
9742                 }
9743                 if (next_type & (2<<24))
9744                     PL_lex_allbrackets++;
9745                 if (next_type & (4<<24))
9746                     PL_lex_allbrackets--;
9747                 next_type &= 0xffff;
9748             }
9749             return REPORT(next_type == 'p' ? pending_ident() : next_type);
9750         }
9751     }
9752 
9753     switch (PL_lex_state) {
9754     case LEX_NORMAL:
9755     case LEX_INTERPNORMAL:
9756         break;
9757 
9758     /* interpolated case modifiers like \L \U, including \Q and \E.
9759        when we get here, PL_bufptr is at the \
9760     */
9761     case LEX_INTERPCASEMOD:
9762         /* handle \E or end of string */
9763         return yyl_interpcasemod(aTHX_ s);
9764 
9765     case LEX_INTERPPUSH:
9766         return REPORT(sublex_push());
9767 
9768     case LEX_INTERPSTART:
9769         if (PL_bufptr == PL_bufend)
9770             return REPORT(sublex_done());
9771         DEBUG_T({
9772             if(*PL_bufptr != '(')
9773                 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9774         });
9775         PL_expect = XTERM;
9776         /* for /@a/, we leave the joining for the regex engine to do
9777          * (unless we're within \Q etc) */
9778         PL_lex_dojoin = (*PL_bufptr == '@'
9779                             && (!PL_lex_inpat || PL_lex_casemods));
9780         PL_lex_state = LEX_INTERPNORMAL;
9781         if (PL_lex_dojoin) {
9782             NEXTVAL_NEXTTOKE.ival = 0;
9783             force_next(PERLY_COMMA);
9784             force_ident("\"", PERLY_DOLLAR);
9785             NEXTVAL_NEXTTOKE.ival = 0;
9786             force_next(PERLY_DOLLAR);
9787             NEXTVAL_NEXTTOKE.ival = 0;
9788             force_next((2<<24)|PERLY_PAREN_OPEN);
9789             NEXTVAL_NEXTTOKE.ival = OP_JOIN;	/* emulate join($", ...) */
9790             force_next(FUNC);
9791         }
9792         /* Convert (?{...}) or (*{...}) and friends to 'do {...}' */
9793         if (PL_lex_inpat && *PL_bufptr == '(') {
9794             PL_parser->lex_shared->re_eval_start = PL_bufptr;
9795             PL_bufptr += 2;
9796             if (*PL_bufptr != '{')
9797                 PL_bufptr++;
9798             PL_expect = XTERMBLOCK;
9799             force_next(KW_DO);
9800         }
9801 
9802         if (PL_lex_starts++) {
9803             s = PL_bufptr;
9804             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9805             if (!PL_lex_casemods && PL_lex_inpat)
9806                 TOKEN(PERLY_COMMA);
9807             else
9808                 AopNOASSIGN(OP_CONCAT);
9809         }
9810         return yylex();
9811 
9812     case LEX_INTERPENDMAYBE:
9813         if (intuit_more(PL_bufptr, PL_bufend)) {
9814             PL_lex_state = LEX_INTERPNORMAL;	/* false alarm, more expr */
9815             break;
9816         }
9817         /* FALLTHROUGH */
9818 
9819     case LEX_INTERPEND:
9820         if (PL_lex_dojoin) {
9821             const U8 dojoin_was = PL_lex_dojoin;
9822             PL_lex_dojoin = FALSE;
9823             PL_lex_state = LEX_INTERPCONCAT;
9824             PL_lex_allbrackets--;
9825             return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
9826         }
9827         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9828             && SvEVALED(PL_lex_repl))
9829         {
9830             if (PL_bufptr != PL_bufend)
9831                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9832             PL_lex_repl = NULL;
9833         }
9834         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
9835            re_eval_str.  If the here-doc body's length equals the previous
9836            value of re_eval_start, re_eval_start will now be null.  So
9837            check re_eval_str as well. */
9838         if (PL_parser->lex_shared->re_eval_start
9839          || PL_parser->lex_shared->re_eval_str) {
9840             SV *sv;
9841             if (*PL_bufptr != ')')
9842                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9843             PL_bufptr++;
9844             /* having compiled a (?{..}) expression, return the original
9845              * text too, as a const */
9846             if (PL_parser->lex_shared->re_eval_str) {
9847                 sv = PL_parser->lex_shared->re_eval_str;
9848                 PL_parser->lex_shared->re_eval_str = NULL;
9849                 SvCUR_set(sv,
9850                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9851                 SvPV_shrink_to_cur(sv);
9852             }
9853             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9854                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9855             NEXTVAL_NEXTTOKE.opval =
9856                     newSVOP(OP_CONST, 0,
9857                                  sv);
9858             force_next(THING);
9859             PL_parser->lex_shared->re_eval_start = NULL;
9860             PL_expect = XTERM;
9861             return REPORT(PERLY_COMMA);
9862         }
9863 
9864         /* FALLTHROUGH */
9865     case LEX_INTERPCONCAT:
9866 #ifdef DEBUGGING
9867         if (PL_lex_brackets)
9868             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9869                        (long) PL_lex_brackets);
9870 #endif
9871         if (PL_bufptr == PL_bufend)
9872             return REPORT(sublex_done());
9873 
9874         /* m'foo' still needs to be parsed for possible (?{...}) */
9875         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9876             SV *sv = newSVsv(PL_linestr);
9877             sv = tokeq(sv);
9878             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9879             s = PL_bufend;
9880         }
9881         else {
9882             int save_error_count = PL_error_count;
9883 
9884             s = scan_const(PL_bufptr);
9885 
9886             /* Set flag if this was a pattern and there were errors.  op.c will
9887              * refuse to compile a pattern with this flag set.  Otherwise, we
9888              * could get segfaults, etc. */
9889             if (PL_lex_inpat && PL_error_count > save_error_count) {
9890                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9891             }
9892             if (*s == '\\')
9893                 PL_lex_state = LEX_INTERPCASEMOD;
9894             else
9895                 PL_lex_state = LEX_INTERPSTART;
9896         }
9897 
9898         if (s != PL_bufptr) {
9899             NEXTVAL_NEXTTOKE = pl_yylval;
9900             PL_expect = XTERM;
9901             force_next(THING);
9902             if (PL_lex_starts++) {
9903                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9904                 if (!PL_lex_casemods && PL_lex_inpat)
9905                     TOKEN(PERLY_COMMA);
9906                 else
9907                     AopNOASSIGN(OP_CONCAT);
9908             }
9909             else {
9910                 PL_bufptr = s;
9911                 return yylex();
9912             }
9913         }
9914 
9915         return yylex();
9916     case LEX_FORMLINE:
9917         if (PL_parser->sub_error_count != PL_error_count) {
9918             /* There was an error parsing a formline, which tends to
9919                mess up the parser.
9920                Unlike interpolated sub-parsing, we can't treat any of
9921                these as recoverable, so no need to check sub_no_recover.
9922             */
9923             yyquit();
9924         }
9925         assert(PL_lex_formbrack);
9926         s = scan_formline(PL_bufptr);
9927         if (!PL_lex_formbrack)
9928             return yyl_rightcurly(aTHX_ s, 1);
9929         PL_bufptr = s;
9930         return yylex();
9931     }
9932 
9933     /* We really do *not* want PL_linestr ever becoming a COW. */
9934     assert (!SvIsCOW(PL_linestr));
9935     s = PL_bufptr;
9936     PL_oldoldbufptr = PL_oldbufptr;
9937     PL_oldbufptr = s;
9938 
9939     if (PL_in_my == KEY_sigvar) {
9940         PL_parser->saw_infix_sigil = 0;
9941         return yyl_sigvar(aTHX_ s);
9942     }
9943 
9944     {
9945         /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9946            On its return, we then need to set it to indicate whether the token
9947            we just encountered was an infix operator that (if we hadn't been
9948            expecting an operator) have been a sigil.
9949         */
9950         bool expected_operator = (PL_expect == XOPERATOR);
9951         int ret = yyl_try(aTHX_ s);
9952         switch (pl_yylval.ival) {
9953         case OP_BIT_AND:
9954         case OP_MODULO:
9955         case OP_MULTIPLY:
9956         case OP_NBIT_AND:
9957             if (expected_operator) {
9958                 PL_parser->saw_infix_sigil = 1;
9959                 break;
9960             }
9961             /* FALLTHROUGH */
9962         default:
9963             PL_parser->saw_infix_sigil = 0;
9964         }
9965         return ret;
9966     }
9967 }
9968 
9969 
9970 /*
9971   S_pending_ident
9972 
9973   Looks up an identifier in the pad or in a package
9974 
9975   PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9976   rather than a plain pad var.
9977 
9978   Returns:
9979     PRIVATEREF if this is a lexical name.
9980     BAREWORD   if this belongs to a package.
9981 
9982   Structure:
9983       if we're in a my declaration
9984           croak if they tried to say my($foo::bar)
9985           build the ops for a my() declaration
9986       if it's an access to a my() variable
9987           build ops for access to a my() variable
9988       if in a dq string, and they've said @foo and we can't find @foo
9989           warn
9990       build ops for a bareword
9991 */
9992 
9993 static int
S_pending_ident(pTHX)9994 S_pending_ident(pTHX)
9995 {
9996     PADOFFSET tmp = 0;
9997     const char pit = (char)pl_yylval.ival;
9998     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9999     /* All routes through this function want to know if there is a colon.  */
10000     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
10001 
10002     DEBUG_T({ PerlIO_printf(Perl_debug_log,
10003           "### Pending identifier '%s'\n", PL_tokenbuf); });
10004     assert(tokenbuf_len >= 2);
10005 
10006     /* if we're in a my(), we can't allow dynamics here.
10007        $foo'bar has already been turned into $foo::bar, so
10008        just check for colons.
10009 
10010        if it's a legal name, the OP is a PADANY.
10011     */
10012     if (PL_in_my) {
10013         if (PL_in_my == KEY_our) {	/* "our" is merely analogous to "my" */
10014             if (has_colon)
10015                 /* diag_listed_as: No package name allowed for variable %s
10016                                    in "our" */
10017                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
10018                                   "%s %s in \"our\"",
10019                                   *PL_tokenbuf=='&' ? "subroutine" : "variable",
10020                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
10021             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
10022         }
10023         else {
10024             OP *o;
10025             if (has_colon) {
10026                 /* "my" variable %s can't be in a package */
10027                 /* PL_no_myglob is constant */
10028                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
10029                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
10030                             PL_in_my == KEY_my ? "my" :
10031                             PL_in_my == KEY_field ? "field" : "state",
10032                             *PL_tokenbuf == '&' ? "subroutine" : "variable",
10033                             PL_tokenbuf),
10034                             UTF ? SVf_UTF8 : 0);
10035                 GCC_DIAG_RESTORE_STMT;
10036             }
10037 
10038             if (PL_in_my == KEY_sigvar) {
10039                 /* A signature 'padop' needs in addition, an op_first to
10040                  * point to a child sigdefelem, and an extra field to hold
10041                  * the signature index. We can achieve both by using an
10042                  * UNOP_AUX and (ab)using the op_aux field to hold the
10043                  * index. If we ever need more fields, use a real malloced
10044                  * aux strut instead.
10045                  */
10046                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
10047                                     INT2PTR(UNOP_AUX_item *,
10048                                         (PL_parser->sig_elems)));
10049                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
10050                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
10051                                   :                         OPpARGELEM_HV);
10052             }
10053             else
10054                 o = newOP(OP_PADANY, 0);
10055             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
10056                                                         UTF ? SVf_UTF8 : 0);
10057             if (PL_in_my == KEY_sigvar)
10058                 PL_in_my = 0;
10059 
10060             pl_yylval.opval = o;
10061             return PRIVATEREF;
10062         }
10063     }
10064 
10065     /*
10066        build the ops for accesses to a my() variable.
10067     */
10068 
10069     if (!has_colon) {
10070         if (!PL_in_my)
10071             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
10072                                  0);
10073         if (tmp != NOT_IN_PAD) {
10074             /* might be an "our" variable" */
10075             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10076                 /* build ops for a bareword */
10077                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
10078                 HEK * const stashname = HvNAME_HEK(stash);
10079                 SV *  const sym = newSVhek(stashname);
10080                 sv_catpvs(sym, "::");
10081                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
10082                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
10083                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
10084                 if (pit != '&')
10085                   gv_fetchsv(sym,
10086                     GV_ADDMULTI,
10087                     ((PL_tokenbuf[0] == '$') ? SVt_PV
10088                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
10089                      : SVt_PVHV));
10090                 return BAREWORD;
10091             }
10092 
10093             pl_yylval.opval = newOP(OP_PADANY, 0);
10094             pl_yylval.opval->op_targ = tmp;
10095             return PRIVATEREF;
10096         }
10097     }
10098 
10099     /*
10100        Whine if they've said @foo or @foo{key} in a doublequoted string,
10101        and @foo (or %foo) isn't a variable we can find in the symbol
10102        table.
10103     */
10104     if (ckWARN(WARN_AMBIGUOUS)
10105         && pit == '@'
10106         && PL_lex_state != LEX_NORMAL
10107         && !PL_lex_brackets)
10108     {
10109         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
10110                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
10111                                          SVt_PVAV);
10112         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
10113            )
10114         {
10115             /* Downgraded from fatal to warning 20000522 mjd */
10116             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10117                         "Possible unintended interpolation of %" UTF8f
10118                         " in string",
10119                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
10120         }
10121     }
10122 
10123     /* build ops for a bareword */
10124     pl_yylval.opval = newSVOP(OP_CONST, 0,
10125                                    newSVpvn_flags(PL_tokenbuf + 1,
10126                                                       tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
10127                                                       UTF ? SVf_UTF8 : 0 ));
10128     pl_yylval.opval->op_private = OPpCONST_ENTERED;
10129     if (pit != '&')
10130         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
10131                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
10132                      | ( UTF ? SVf_UTF8 : 0 ),
10133                      ((PL_tokenbuf[0] == '$') ? SVt_PV
10134                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
10135                       : SVt_PVHV));
10136     return BAREWORD;
10137 }
10138 
10139 STATIC void
S_checkcomma(pTHX_ const char * s,const char * name,const char * what)10140 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10141 {
10142     PERL_ARGS_ASSERT_CHECKCOMMA;
10143 
10144     if (*s == ' ' && s[1] == '(') {	/* XXX gotta be a better way */
10145         if (ckWARN(WARN_SYNTAX)) {
10146             int level = 1;
10147             const char *w;
10148             for (w = s+2; *w && level; w++) {
10149                 if (*w == '(')
10150                     ++level;
10151                 else if (*w == ')')
10152                     --level;
10153             }
10154             while (isSPACE(*w))
10155                 ++w;
10156             /* the list of chars below is for end of statements or
10157              * block / parens, boolean operators (&&, ||, //) and branch
10158              * constructs (or, and, if, until, unless, while, err, for).
10159              * Not a very solid hack... */
10160             if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
10161                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10162                             "%s (...) interpreted as function",name);
10163         }
10164     }
10165     while (s < PL_bufend && isSPACE(*s))
10166         s++;
10167     if (*s == '(')
10168         s++;
10169     while (s < PL_bufend && isSPACE(*s))
10170         s++;
10171     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
10172         const char * const w = s;
10173         s += UTF ? UTF8SKIP(s) : 1;
10174         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10175             s += UTF ? UTF8SKIP(s) : 1;
10176         while (s < PL_bufend && isSPACE(*s))
10177             s++;
10178         if (*s == ',') {
10179             GV* gv;
10180             if (keyword(w, s - w, 0))
10181                 return;
10182 
10183             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
10184             if (gv && GvCVu(gv))
10185                 return;
10186             if (s - w <= 254) {
10187                 PADOFFSET off;
10188                 char tmpbuf[256];
10189                 Copy(w, tmpbuf+1, s - w, char);
10190                 *tmpbuf = '&';
10191                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
10192                 if (off != NOT_IN_PAD) return;
10193             }
10194             Perl_croak(aTHX_ "No comma allowed after %s", what);
10195         }
10196     }
10197 }
10198 
10199 /* S_new_constant(): do any overload::constant lookup.
10200 
10201    Either returns sv, or mortalizes/frees sv and returns a new SV*.
10202    Best used as sv=new_constant(..., sv, ...).
10203    If s, pv are NULL, calls subroutine with one argument,
10204    and <type> is used with error messages only.
10205    <type> is assumed to be well formed UTF-8.
10206 
10207    If error_msg is not NULL, *error_msg will be set to any error encountered.
10208    Otherwise yyerror() will be used to output it */
10209 
10210 STATIC SV *
S_new_constant(pTHX_ const char * s,STRLEN len,const char * key,STRLEN keylen,SV * sv,SV * pv,const char * type,STRLEN typelen,const char ** error_msg)10211 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10212                SV *sv, SV *pv, const char *type, STRLEN typelen,
10213                const char ** error_msg)
10214 {
10215     dSP;
10216     HV * table = GvHV(PL_hintgv);		 /* ^H */
10217     SV *res;
10218     SV *errsv = NULL;
10219     SV **cvp;
10220     SV *cv, *typesv;
10221     const char *why1 = "", *why2 = "", *why3 = "";
10222     const char * optional_colon = ":";  /* Only some messages have a colon */
10223     char *msg;
10224 
10225     PERL_ARGS_ASSERT_NEW_CONSTANT;
10226     /* We assume that this is true: */
10227     assert(type || s);
10228 
10229     sv_2mortal(sv);			/* Parent created it permanently */
10230 
10231     if (   ! table
10232         || ! (PL_hints & HINT_LOCALIZE_HH))
10233     {
10234         why1 = "unknown";
10235         optional_colon = "";
10236         goto report;
10237     }
10238 
10239     cvp = hv_fetch(table, key, keylen, FALSE);
10240     if (!cvp || !SvOK(*cvp)) {
10241         why1 = "$^H{";
10242         why2 = key;
10243         why3 = "} is not defined";
10244         goto report;
10245     }
10246 
10247     cv = *cvp;
10248     if (!pv && s)
10249         pv = newSVpvn_flags(s, len, SVs_TEMP);
10250     if (type && pv)
10251         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10252     else
10253         typesv = &PL_sv_undef;
10254 
10255     PUSHSTACKi(PERLSI_OVERLOAD);
10256     ENTER ;
10257     SAVETMPS;
10258 
10259     PUSHMARK(SP) ;
10260     EXTEND(sp, 3);
10261     if (pv)
10262         PUSHs(pv);
10263     PUSHs(sv);
10264     if (pv)
10265         PUSHs(typesv);
10266     PUTBACK;
10267     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10268 
10269     SPAGAIN ;
10270 
10271     /* Check the eval first */
10272     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
10273         STRLEN errlen;
10274         const char * errstr;
10275         sv_catpvs(errsv, "Propagated");
10276         errstr = SvPV_const(errsv, errlen);
10277         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
10278         (void)POPs;
10279         res = SvREFCNT_inc_simple_NN(sv);
10280     }
10281     else {
10282         res = POPs;
10283         SvREFCNT_inc_simple_void_NN(res);
10284     }
10285 
10286     PUTBACK ;
10287     FREETMPS ;
10288     LEAVE ;
10289     POPSTACK;
10290 
10291     if (SvOK(res)) {
10292         return res;
10293     }
10294 
10295     sv = res;
10296     (void)sv_2mortal(sv);
10297 
10298     why1 = "Call to &{$^H{";
10299     why2 = key;
10300     why3 = "}} did not return a defined value";
10301 
10302   report:
10303 
10304     msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
10305                         (int)(type ? typelen : len),
10306                         (type ? type: s),
10307                         optional_colon,
10308                         why1, why2, why3);
10309     if (error_msg) {
10310         *error_msg = msg;
10311     }
10312     else {
10313         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
10314     }
10315     return SvREFCNT_inc_simple_NN(sv);
10316 }
10317 
10318 PERL_STATIC_INLINE void
S_parse_ident(pTHX_ char ** s,char ** d,char * const e,int allow_package,bool is_utf8,bool check_dollar,bool tick_warn)10319 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
10320                     bool is_utf8, bool check_dollar, bool tick_warn)
10321 {
10322     int saw_tick = 0;
10323     const char *olds = *s;
10324     PERL_ARGS_ASSERT_PARSE_IDENT;
10325 
10326     while (*s < PL_bufend) {
10327         if (*d >= e)
10328             Perl_croak(aTHX_ "%s", ident_too_long);
10329         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
10330              /* The UTF-8 case must come first, otherwise things
10331              * like c\N{COMBINING TILDE} would start failing, as the
10332              * isWORDCHAR_A case below would gobble the 'c' up.
10333              */
10334 
10335             char *t = *s + UTF8SKIP(*s);
10336             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
10337                 t += UTF8SKIP(t);
10338             }
10339             if (*d + (t - *s) > e)
10340                 Perl_croak(aTHX_ "%s", ident_too_long);
10341             Copy(*s, *d, t - *s, char);
10342             *d += t - *s;
10343             *s = t;
10344         }
10345         else if ( isWORDCHAR_A(**s) ) {
10346             do {
10347                 *(*d)++ = *(*s)++;
10348             } while (isWORDCHAR_A(**s) && *d < e);
10349         }
10350         else if (   allow_package
10351                  && **s == '\''
10352                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
10353         {
10354             *(*d)++ = ':';
10355             *(*d)++ = ':';
10356             (*s)++;
10357             saw_tick++;
10358         }
10359         else if (allow_package && **s == ':' && (*s)[1] == ':'
10360            /* Disallow things like Foo::$bar. For the curious, this is
10361             * the code path that triggers the "Bad name after" warning
10362             * when looking for barewords.
10363             */
10364            && !(check_dollar && (*s)[2] == '$')) {
10365             *(*d)++ = *(*s)++;
10366             *(*d)++ = *(*s)++;
10367         }
10368         else
10369             break;
10370     }
10371     if (UNLIKELY(saw_tick && tick_warn && ckWARN2_d(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR))) {
10372         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10373             char *this_d;
10374             char *d2;
10375             Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
10376             d2 = this_d;
10377             SAVEFREEPV(this_d);
10378 
10379             Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
10380                         "Old package separator used in string");
10381             if (olds[-1] == '#')
10382                 *d2++ = olds[-2];
10383             *d2++ = olds[-1];
10384             while (olds < *s) {
10385                 if (*olds == '\'') {
10386                     *d2++ = '\\';
10387                     *d2++ = *olds++;
10388                 }
10389                 else
10390                     *d2++ = *olds++;
10391             }
10392             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10393                         "\t(Did you mean \"%" UTF8f "\" instead?)\n",
10394                         UTF8fARG(is_utf8, d2-this_d, this_d));
10395         }
10396         else {
10397             Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
10398                         "Old package separator \"'\" deprecated");
10399         }
10400     }
10401     return;
10402 }
10403 
10404 /* Returns a NUL terminated string, with the length of the string written to
10405    *slp
10406 
10407    scan_word6() may be removed once ' in names is removed.
10408    */
10409 char *
Perl_scan_word6(pTHX_ char * s,char * dest,STRLEN destlen,int allow_package,STRLEN * slp,bool warn_tick)10410 Perl_scan_word6(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp, bool warn_tick)
10411 {
10412     char *d = dest;
10413     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
10414     bool is_utf8 = cBOOL(UTF);
10415 
10416     PERL_ARGS_ASSERT_SCAN_WORD6;
10417 
10418     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, warn_tick);
10419     *d = '\0';
10420     *slp = d - dest;
10421     return s;
10422 }
10423 
10424 char *
Perl_scan_word(pTHX_ char * s,char * dest,STRLEN destlen,int allow_package,STRLEN * slp)10425 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10426 {
10427     PERL_ARGS_ASSERT_SCAN_WORD;
10428     return scan_word6(s, dest, destlen, allow_package, slp, FALSE);
10429 }
10430 
10431 /* scan s and extract an identifier ($var) from it if possible
10432  * into dest.
10433  * XXX: This function has subtle implications on parsing, and
10434  * changing how it behaves can cause a variable to change from
10435  * being a run time rv2sv call or a compile time binding to a
10436  * specific variable name.
10437  */
10438 STATIC char *
S_scan_ident(pTHX_ char * s,char * dest,STRLEN destlen,I32 ck_uni)10439 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10440 {
10441     I32 herelines = PL_parser->herelines;
10442     SSize_t bracket = -1;
10443     char funny = *s++;
10444     char *d = dest;
10445     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
10446     bool is_utf8 = cBOOL(UTF);
10447     line_t orig_copline = 0, tmp_copline = 0;
10448 
10449     PERL_ARGS_ASSERT_SCAN_IDENT;
10450 
10451     if (isSPACE(*s) || !*s)
10452         s = skipspace(s);
10453     if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
10454         bool is_zero= *s == '0' ? TRUE : FALSE;
10455         char *digit_start= d;
10456         *d++ = *s++;
10457         while (s < PL_bufend && isDIGIT(*s)) {
10458             if (d >= e)
10459                 Perl_croak(aTHX_ "%s", ident_too_long);
10460             *d++ = *s++;
10461         }
10462         if (is_zero && d - digit_start > 1)
10463             Perl_croak(aTHX_ ident_var_zero_multi_digit);
10464     }
10465     else {  /* See if it is a "normal" identifier */
10466         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
10467     }
10468     *d = '\0';
10469     d = dest;
10470     if (*d) {
10471         /* Either a digit variable, or parse_ident() found an identifier
10472            (anything valid as a bareword), so job done and return.  */
10473         if (PL_lex_state != LEX_NORMAL)
10474             PL_lex_state = LEX_INTERPENDMAYBE;
10475         return s;
10476     }
10477 
10478     /* Here, it is not a run-of-the-mill identifier name */
10479 
10480     if (*s == '$' && s[1]
10481         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
10482             || isDIGIT_A((U8)s[1])
10483             || s[1] == '$'
10484             || s[1] == '{'
10485             || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
10486     {
10487         /* Dereferencing a value in a scalar variable.
10488            The alternatives are different syntaxes for a scalar variable.
10489            Using ' as a leading package separator isn't allowed. :: is.   */
10490         return s;
10491     }
10492     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
10493     if (*s == '{') {
10494         bracket = s - SvPVX(PL_linestr);
10495         s++;
10496         orig_copline = CopLINE(PL_curcop);
10497         if (s < PL_bufend && isSPACE(*s)) {
10498             s = skipspace(s);
10499         }
10500     }
10501 
10502 
10503     /* Extract the first character of the variable name from 's' and
10504      * copy it, null terminated into 'd'. Note that this does not
10505      * involve checking for just IDFIRST characters, as it allows the
10506      * '^' for ${^FOO} type variable names, and it allows all the
10507      * characters that are legal in a single character variable name.
10508      *
10509      * The legal ones are any of:
10510      *  a) all ASCII characters except:
10511      *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
10512      *          2) '{'
10513      *     The final case currently doesn't get this far in the program, so we
10514      *     don't test for it.  If that were to change, it would be ok to allow it.
10515      *  b) When not under Unicode rules, any upper Latin1 character
10516      *  c) Otherwise, when unicode rules are used, all XIDS characters.
10517      *
10518      *      Because all ASCII characters have the same representation whether
10519      *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10520      *      '{' without knowing if is UTF-8 or not. */
10521 
10522     if ((s <= PL_bufend - ((is_utf8)
10523                           ? UTF8SKIP(s)
10524                           : 1))
10525         && (
10526             isGRAPH_A(*s)
10527             ||
10528             ( is_utf8
10529               ? isIDFIRST_utf8_safe(s, PL_bufend)
10530               : (isGRAPH_L1(*s)
10531                  && LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD))
10532                 )
10533             )
10534         )
10535     ){
10536         if (is_utf8) {
10537             const STRLEN skip = UTF8SKIP(s);
10538             STRLEN i;
10539             d[skip] = '\0';
10540             for ( i = 0; i < skip; i++ )
10541                 d[i] = *s++;
10542         }
10543         else {
10544             *d = *s++;
10545             d[1] = '\0';
10546         }
10547     }
10548 
10549     /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10550     if (isDIGIT(*d)) {
10551         bool is_zero= *d == '0' ? TRUE : FALSE;
10552         char *digit_start= d;
10553         while (s < PL_bufend && isDIGIT(*s)) {
10554             d++;
10555             if (d >= e)
10556                 Perl_croak(aTHX_ "%s", ident_too_long);
10557             *d= *s++;
10558         }
10559         if (is_zero && d - digit_start >= 1) /* d points at the last digit */
10560             Perl_croak(aTHX_ ident_var_zero_multi_digit);
10561         d[1] = '\0';
10562     }
10563 
10564     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10565     else if (*d == '^' && *s && isCONTROLVAR(*s)) {
10566         *d = toCTRL(*s);
10567         s++;
10568     }
10569     /* Warn about ambiguous code after unary operators if {...} notation isn't
10570        used.  There's no difference in ambiguity; it's merely a heuristic
10571        about when not to warn.  */
10572     else if (ck_uni && bracket == -1)
10573         check_uni();
10574 
10575     if (bracket != -1) {
10576         bool skip;
10577         char *s2;
10578         /* If we were processing {...} notation then...  */
10579         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10580             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10581                  && isWORDCHAR(*s))
10582         ) {
10583             /* note we have to check for a normal identifier first,
10584              * as it handles utf8 symbols, and only after that has
10585              * been ruled out can we look at the caret words */
10586             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10587                 /* if it starts as a valid identifier, assume that it is one.
10588                    (the later check for } being at the expected point will trap
10589                    cases where this doesn't pan out.)  */
10590                 d += is_utf8 ? UTF8SKIP(d) : 1;
10591                 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10592                 *d = '\0';
10593             }
10594             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10595                 d++;
10596                 while (isWORDCHAR(*s) && d < e) {
10597                     *d++ = *s++;
10598                 }
10599                 if (d >= e)
10600                     Perl_croak(aTHX_ "%s", ident_too_long);
10601                 *d = '\0';
10602             }
10603             tmp_copline = CopLINE(PL_curcop);
10604             if (s < PL_bufend && isSPACE(*s)) {
10605                 s = skipspace(s);
10606             }
10607             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10608                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
10609                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10610                     const char * const brack =
10611                         (const char *)
10612                         ((*s == '[') ? "[...]" : "{...}");
10613                     orig_copline = CopLINE(PL_curcop);
10614                     CopLINE_set(PL_curcop, tmp_copline);
10615    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10616                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10617                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10618                         funny, dest, brack, funny, dest, brack);
10619                     CopLINE_set(PL_curcop, orig_copline);
10620                 }
10621                 bracket++;
10622                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10623                 PL_lex_allbrackets++;
10624                 return s;
10625             }
10626         }
10627 
10628         if ( !tmp_copline )
10629             tmp_copline = CopLINE(PL_curcop);
10630         if ((skip = s < PL_bufend && isSPACE(*s))) {
10631             /* Avoid incrementing line numbers or resetting PL_linestart,
10632                in case we have to back up.  */
10633             STRLEN s_off = s - SvPVX(PL_linestr);
10634             s2 = peekspace(s);
10635             s = SvPVX(PL_linestr) + s_off;
10636         }
10637         else
10638             s2 = s;
10639 
10640         /* Expect to find a closing } after consuming any trailing whitespace.
10641          */
10642         if (*s2 == '}') {
10643             /* Now increment line numbers if applicable.  */
10644             if (skip)
10645                 s = skipspace(s);
10646             s++;
10647             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10648                 PL_lex_state = LEX_INTERPEND;
10649                 PL_expect = XREF;
10650             }
10651             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10652                 if (ckWARN(WARN_AMBIGUOUS)
10653                     && (keyword(dest, d - dest, 0)
10654                         || get_cvn_flags(dest, d - dest, is_utf8
10655                            ? SVf_UTF8
10656                            : 0)))
10657                 {
10658                     SV *tmp = newSVpvn_flags( dest, d - dest,
10659                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10660                     if (funny == '#')
10661                         funny = '@';
10662                     orig_copline = CopLINE(PL_curcop);
10663                     CopLINE_set(PL_curcop, tmp_copline);
10664                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10665                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10666                         funny, SVfARG(tmp), funny, SVfARG(tmp));
10667                     CopLINE_set(PL_curcop, orig_copline);
10668                 }
10669             }
10670         }
10671         else {
10672             /* Didn't find the closing } at the point we expected, so restore
10673                state such that the next thing to process is the opening { and */
10674             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10675             CopLINE_set(PL_curcop, orig_copline);
10676             PL_parser->herelines = herelines;
10677             *dest = '\0';
10678             PL_parser->sub_no_recover = TRUE;
10679         }
10680     }
10681     else if (   PL_lex_state == LEX_INTERPNORMAL
10682              && !PL_lex_brackets
10683              && !intuit_more(s, PL_bufend))
10684         PL_lex_state = LEX_INTERPEND;
10685     return s;
10686 }
10687 
10688 static bool
S_pmflag(pTHX_ const char * const valid_flags,U32 * pmfl,char ** s,char * charset,unsigned int * x_mod_count)10689 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10690 
10691     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10692      * found in the parse starting at 's', based on the subset that are valid
10693      * in this context input to this routine in 'valid_flags'. Advances s.
10694      * Returns TRUE if the input should be treated as a valid flag, so the next
10695      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10696      * upon first call on the current regex.  This routine will set it to any
10697      * charset modifier found.  The caller shouldn't change it.  This way,
10698      * another charset modifier encountered in the parse can be detected as an
10699      * error, as we have decided to allow only one */
10700 
10701     const char c = **s;
10702     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10703 
10704     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10705         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10706             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10707                        UTF ? SVf_UTF8 : 0);
10708             (*s) += charlen;
10709             /* Pretend that it worked, so will continue processing before
10710              * dieing */
10711             return TRUE;
10712         }
10713         return FALSE;
10714     }
10715 
10716     switch (c) {
10717 
10718         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10719         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
10720         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
10721         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
10722         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
10723         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10724         case LOCALE_PAT_MOD:
10725             if (*charset) {
10726                 goto multiple_charsets;
10727             }
10728             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10729             *charset = c;
10730             break;
10731         case UNICODE_PAT_MOD:
10732             if (*charset) {
10733                 goto multiple_charsets;
10734             }
10735             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10736             *charset = c;
10737             break;
10738         case ASCII_RESTRICT_PAT_MOD:
10739             if (! *charset) {
10740                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10741             }
10742             else {
10743 
10744                 /* Error if previous modifier wasn't an 'a', but if it was, see
10745                  * if, and accept, a second occurrence (only) */
10746                 if (*charset != 'a'
10747                     || get_regex_charset(*pmfl)
10748                         != REGEX_ASCII_RESTRICTED_CHARSET)
10749                 {
10750                         goto multiple_charsets;
10751                 }
10752                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10753             }
10754             *charset = c;
10755             break;
10756         case DEPENDS_PAT_MOD:
10757             if (*charset) {
10758                 goto multiple_charsets;
10759             }
10760             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10761             *charset = c;
10762             break;
10763     }
10764 
10765     (*s)++;
10766     return TRUE;
10767 
10768     multiple_charsets:
10769         if (*charset != c) {
10770             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10771         }
10772         else if (c == 'a') {
10773   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10774             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10775         }
10776         else {
10777             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10778         }
10779 
10780         /* Pretend that it worked, so will continue processing before dieing */
10781         (*s)++;
10782         return TRUE;
10783 }
10784 
10785 STATIC char *
S_scan_pat(pTHX_ char * start,I32 type)10786 S_scan_pat(pTHX_ char *start, I32 type)
10787 {
10788     PMOP *pm;
10789     char *s;
10790     const char * const valid_flags =
10791         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10792     char charset = '\0';    /* character set modifier */
10793     unsigned int x_mod_count = 0;
10794 
10795     PERL_ARGS_ASSERT_SCAN_PAT;
10796 
10797     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10798     if (!s)
10799         Perl_croak(aTHX_ "Search pattern not terminated");
10800 
10801     pm = (PMOP*)newPMOP(type, 0);
10802     if (PL_multi_open == '?') {
10803         /* This is the only point in the code that sets PMf_ONCE:  */
10804         pm->op_pmflags |= PMf_ONCE;
10805 
10806         /* Hence it's safe to do this bit of PMOP book-keeping here, which
10807            allows us to restrict the list needed by reset to just the ??
10808            matches.  */
10809         assert(type != OP_TRANS);
10810         if (PL_curstash) {
10811             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10812             U32 elements;
10813             if (!mg) {
10814                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10815                                  0);
10816             }
10817             elements = mg->mg_len / sizeof(PMOP**);
10818             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10819             ((PMOP**)mg->mg_ptr) [elements++] = pm;
10820             mg->mg_len = elements * sizeof(PMOP**);
10821             PmopSTASH_set(pm,PL_curstash);
10822         }
10823     }
10824 
10825     /* if qr/...(?{..}).../, then need to parse the pattern within a new
10826      * anon CV. False positives like qr/[(?{]/ are harmless */
10827 
10828     if (type == OP_QR) {
10829         STRLEN len;
10830         char *e, *p = SvPV(PL_lex_stuff, len);
10831         e = p + len;
10832         for (; p < e; p++) {
10833             if (p[0] == '(' && (
10834                 (p[1] == '?' && (p[2] == '{' ||
10835                                 (p[2] == '?' && p[3] == '{'))) ||
10836                 (p[1] == '*' && (p[2] == '{' ||
10837                                 (p[2] == '*' && p[3] == '{')))
10838             )){
10839                 pm->op_pmflags |= PMf_HAS_CV;
10840                 break;
10841             }
10842         }
10843         pm->op_pmflags |= PMf_IS_QR;
10844     }
10845 
10846     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10847                                 &s, &charset, &x_mod_count))
10848     {};
10849     /* issue a warning if /c is specified,but /g is not */
10850     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10851     {
10852         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10853                        "Use of /c modifier is meaningless without /g" );
10854     }
10855 
10856     PL_lex_op = (OP*)pm;
10857     pl_yylval.ival = OP_MATCH;
10858     return s;
10859 }
10860 
10861 STATIC char *
S_scan_subst(pTHX_ char * start)10862 S_scan_subst(pTHX_ char *start)
10863 {
10864     char *s;
10865     PMOP *pm;
10866     I32 first_start;
10867     line_t first_line;
10868     line_t linediff = 0;
10869     I32 es = 0;
10870     char charset = '\0';    /* character set modifier */
10871     unsigned int x_mod_count = 0;
10872     char *t;
10873 
10874     PERL_ARGS_ASSERT_SCAN_SUBST;
10875 
10876     pl_yylval.ival = OP_NULL;
10877 
10878     s = scan_str(start, TRUE, FALSE, FALSE, &t);
10879 
10880     if (!s)
10881         Perl_croak(aTHX_ "Substitution pattern not terminated");
10882 
10883     s = t;
10884 
10885     first_start = PL_multi_start;
10886     first_line = CopLINE(PL_curcop);
10887     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10888     if (!s) {
10889         SvREFCNT_dec_NN(PL_lex_stuff);
10890         PL_lex_stuff = NULL;
10891         Perl_croak(aTHX_ "Substitution replacement not terminated");
10892     }
10893     PL_multi_start = first_start;	/* so whole substitution is taken together */
10894 
10895     pm = (PMOP*)newPMOP(OP_SUBST, 0);
10896 
10897 
10898     while (*s) {
10899         if (*s == EXEC_PAT_MOD) {
10900             s++;
10901             es++;
10902         }
10903         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10904                                   &s, &charset, &x_mod_count))
10905         {
10906             break;
10907         }
10908     }
10909 
10910     if ((pm->op_pmflags & PMf_CONTINUE)) {
10911         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10912     }
10913 
10914     if (es) {
10915         SV * const repl = newSVpvs("");
10916 
10917         PL_multi_end = 0;
10918         pm->op_pmflags |= PMf_EVAL;
10919         for (; es > 1; es--) {
10920             sv_catpvs(repl, "eval ");
10921         }
10922         sv_catpvs(repl, "do {");
10923         sv_catsv(repl, PL_parser->lex_sub_repl);
10924         sv_catpvs(repl, "}");
10925         SvREFCNT_dec(PL_parser->lex_sub_repl);
10926         PL_parser->lex_sub_repl = repl;
10927     }
10928 
10929 
10930     linediff = CopLINE(PL_curcop) - first_line;
10931     if (linediff)
10932         CopLINE_set(PL_curcop, first_line);
10933 
10934     if (linediff || es) {
10935         /* the IVX field indicates that the replacement string is a s///e;
10936          * the NVX field indicates how many src code lines the replacement
10937          * spreads over */
10938         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10939         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10940         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10941                                                                     cBOOL(es);
10942     }
10943 
10944     PL_lex_op = (OP*)pm;
10945     pl_yylval.ival = OP_SUBST;
10946     return s;
10947 }
10948 
10949 STATIC char *
S_scan_trans(pTHX_ char * start)10950 S_scan_trans(pTHX_ char *start)
10951 {
10952     char* s;
10953     OP *o;
10954     U8 squash;
10955     U8 del;
10956     U8 complement;
10957     bool nondestruct = 0;
10958     char *t;
10959 
10960     PERL_ARGS_ASSERT_SCAN_TRANS;
10961 
10962     pl_yylval.ival = OP_NULL;
10963 
10964     s = scan_str(start,FALSE,FALSE,FALSE,&t);
10965     if (!s)
10966         Perl_croak(aTHX_ "Transliteration pattern not terminated");
10967 
10968     s = t;
10969 
10970     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10971     if (!s) {
10972         SvREFCNT_dec_NN(PL_lex_stuff);
10973         PL_lex_stuff = NULL;
10974         Perl_croak(aTHX_ "Transliteration replacement not terminated");
10975     }
10976 
10977     complement = del = squash = 0;
10978     while (1) {
10979         switch (*s) {
10980         case 'c':
10981             complement = OPpTRANS_COMPLEMENT;
10982             break;
10983         case 'd':
10984             del = OPpTRANS_DELETE;
10985             break;
10986         case 's':
10987             squash = OPpTRANS_SQUASH;
10988             break;
10989         case 'r':
10990             nondestruct = 1;
10991             break;
10992         default:
10993             goto no_more;
10994         }
10995         s++;
10996     }
10997   no_more:
10998 
10999     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
11000     o->op_private &= ~OPpTRANS_ALL;
11001     o->op_private |= del|squash|complement;
11002 
11003     PL_lex_op = o;
11004     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
11005 
11006 
11007     return s;
11008 }
11009 
11010 /* scan_heredoc
11011    Takes a pointer to the first < in <<FOO.
11012    Returns a pointer to the byte following <<FOO.
11013 
11014    This function scans a heredoc, which involves different methods
11015    depending on whether we are in a string eval, quoted construct, etc.
11016    This is because PL_linestr could containing a single line of input, or
11017    a whole string being evalled, or the contents of the current quote-
11018    like operator.
11019 
11020    The two basic methods are:
11021     - Steal lines from the input stream
11022     - Scan the heredoc in PL_linestr and remove it therefrom
11023 
11024    In a file scope or filtered eval, the first method is used; in a
11025    string eval, the second.
11026 
11027    In a quote-like operator, we have to choose between the two,
11028    depending on where we can find a newline.  We peek into outer lex-
11029    ing scopes until we find one with a newline in it.  If we reach the
11030    outermost lexing scope and it is a file, we use the stream method.
11031    Otherwise it is treated as an eval.
11032 */
11033 
11034 STATIC char *
S_scan_heredoc(pTHX_ char * s)11035 S_scan_heredoc(pTHX_ char *s)
11036 {
11037     I32 op_type = OP_SCALAR;
11038     I32 len;
11039     SV *tmpstr;
11040     char term;
11041     char *d;
11042     char *e;
11043     char *peek;
11044     char *indent = 0;
11045     I32 indent_len = 0;
11046     bool indented = FALSE;
11047     const bool infile = PL_rsfp || PL_parser->filtered;
11048     const line_t origline = CopLINE(PL_curcop);
11049     LEXSHARED *shared = PL_parser->lex_shared;
11050 
11051     PERL_ARGS_ASSERT_SCAN_HEREDOC;
11052 
11053     s += 2;
11054     d = PL_tokenbuf + 1;
11055     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11056     *PL_tokenbuf = '\n';
11057     peek = s;
11058 
11059     if (*peek == '~') {
11060         indented = TRUE;
11061         peek++; s++;
11062     }
11063 
11064     while (SPACE_OR_TAB(*peek))
11065         peek++;
11066 
11067     if (*peek == '`' || *peek == '\'' || *peek =='"') {
11068         s = peek;
11069         term = *s++;
11070         s = delimcpy(d, e, s, PL_bufend, term, &len);
11071         if (s == PL_bufend)
11072             Perl_croak(aTHX_ "Unterminated delimiter for here document");
11073         d += len;
11074         s++;
11075     }
11076     else {
11077         if (*s == '\\')
11078             /* <<\FOO is equivalent to <<'FOO' */
11079             s++, term = '\'';
11080         else
11081             term = '"';
11082 
11083         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
11084             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
11085 
11086         peek = s;
11087 
11088         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
11089             peek += UTF ? UTF8SKIP(peek) : 1;
11090         }
11091 
11092         len = (peek - s >= e - d) ? (e - d) : (peek - s);
11093         Copy(s, d, len, char);
11094         s += len;
11095         d += len;
11096     }
11097 
11098     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11099         Perl_croak(aTHX_ "Delimiter for here document is too long");
11100 
11101     *d++ = '\n';
11102     *d = '\0';
11103     len = d - PL_tokenbuf;
11104 
11105 #ifndef PERL_STRICT_CR
11106     d = (char *) memchr(s, '\r', PL_bufend - s);
11107     if (d) {
11108         char * const olds = s;
11109         s = d;
11110         while (s < PL_bufend) {
11111             if (*s == '\r') {
11112                 *d++ = '\n';
11113                 if (*++s == '\n')
11114                     s++;
11115             }
11116             else if (*s == '\n' && s[1] == '\r') {	/* \015\013 on a mac? */
11117                 *d++ = *s++;
11118                 s++;
11119             }
11120             else
11121                 *d++ = *s++;
11122         }
11123         *d = '\0';
11124         PL_bufend = d;
11125         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11126         s = olds;
11127     }
11128 #endif
11129 
11130     tmpstr = newSV_type(SVt_PVIV);
11131     if (term == '\'') {
11132         op_type = OP_CONST;
11133         SvIV_set(tmpstr, -1);
11134     }
11135     else if (term == '`') {
11136         op_type = OP_BACKTICK;
11137         SvIV_set(tmpstr, '\\');
11138     }
11139 
11140     PL_multi_start = origline + 1 + PL_parser->herelines;
11141     PL_multi_open = PL_multi_close = '<';
11142 
11143     /* inside a string eval or quote-like operator */
11144     if (!infile || PL_lex_inwhat) {
11145         SV *linestr;
11146         char *bufend;
11147         char * const olds = s;
11148         PERL_CONTEXT * const cx = CX_CUR();
11149         /* These two fields are not set until an inner lexing scope is
11150            entered.  But we need them set here. */
11151         shared->ls_bufptr  = s;
11152         shared->ls_linestr = PL_linestr;
11153 
11154         if (PL_lex_inwhat) {
11155             /* Look for a newline.  If the current buffer does not have one,
11156              peek into the line buffer of the parent lexing scope, going
11157              up as many levels as necessary to find one with a newline
11158              after bufptr.
11159             */
11160             while (!(s = (char *)memchr(
11161                                 (void *)shared->ls_bufptr, '\n',
11162                                 SvEND(shared->ls_linestr)-shared->ls_bufptr
11163                 )))
11164             {
11165                 shared = shared->ls_prev;
11166                 /* shared is only null if we have gone beyond the outermost
11167                    lexing scope.  In a file, we will have broken out of the
11168                    loop in the previous iteration.  In an eval, the string buf-
11169                    fer ends with "\n;", so the while condition above will have
11170                    evaluated to false.  So shared can never be null.  Or so you
11171                    might think.  Odd syntax errors like s;@{<<; can gobble up
11172                    the implicit semicolon at the end of a flie, causing the
11173                    file handle to be closed even when we are not in a string
11174                    eval.  So shared may be null in that case.
11175                    (Closing '>>}' here to balance the earlier open brace for
11176                    editors that look for matched pairs.) */
11177                 if (UNLIKELY(!shared))
11178                     goto interminable;
11179                 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
11180                    most lexing scope.  In a file, shared->ls_linestr at that
11181                    level is just one line, so there is no body to steal. */
11182                 if (infile && !shared->ls_prev) {
11183                     s = olds;
11184                     goto streaming;
11185                 }
11186             }
11187         }
11188         else {	/* eval or we've already hit EOF */
11189             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
11190             if (!s)
11191                 goto interminable;
11192         }
11193 
11194         linestr = shared->ls_linestr;
11195         bufend = SvEND(linestr);
11196         d = s;
11197         if (indented) {
11198             char *myolds = s;
11199 
11200             while (s < bufend - len + 1) {
11201                 if (*s++ == '\n')
11202                     ++PL_parser->herelines;
11203 
11204                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
11205                     char *backup = s;
11206                     indent_len = 0;
11207 
11208                     /* Only valid if it's preceded by whitespace only */
11209                     while (backup != myolds && --backup >= myolds) {
11210                         if (! SPACE_OR_TAB(*backup)) {
11211                             break;
11212                         }
11213                         indent_len++;
11214                     }
11215 
11216                     /* No whitespace or all! */
11217                     if (backup == s || *backup == '\n') {
11218                         Newx(indent, indent_len + 1, char);
11219                         memcpy(indent, backup + 1, indent_len);
11220                         indent[indent_len] = 0;
11221                         s--; /* before our delimiter */
11222                         PL_parser->herelines--; /* this line doesn't count */
11223                         break;
11224                     }
11225                 }
11226             }
11227         }
11228         else {
11229             while (s < bufend - len + 1
11230                    && memNE(s,PL_tokenbuf,len) )
11231             {
11232                 if (*s++ == '\n')
11233                     ++PL_parser->herelines;
11234             }
11235         }
11236 
11237         if (s >= bufend - len + 1) {
11238             goto interminable;
11239         }
11240 
11241         sv_setpvn_fresh(tmpstr,d+1,s-d);
11242         s += len - 1;
11243         /* the preceding stmt passes a newline */
11244         PL_parser->herelines++;
11245 
11246         /* s now points to the newline after the heredoc terminator.
11247            d points to the newline before the body of the heredoc.
11248          */
11249 
11250         /* We are going to modify linestr in place here, so set
11251            aside copies of the string if necessary for re-evals or
11252            (caller $n)[6]. */
11253         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
11254            check shared->re_eval_str. */
11255         if (shared->re_eval_start || shared->re_eval_str) {
11256             /* Set aside the rest of the regexp */
11257             if (!shared->re_eval_str)
11258                 shared->re_eval_str =
11259                        newSVpvn(shared->re_eval_start,
11260                                 bufend - shared->re_eval_start);
11261             shared->re_eval_start -= s-d;
11262         }
11263 
11264         if (cxstack_ix >= 0
11265             && CxTYPE(cx) == CXt_EVAL
11266             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
11267             && cx->blk_eval.cur_text == linestr)
11268         {
11269             cx->blk_eval.cur_text = newSVsv(linestr);
11270             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
11271         }
11272 
11273         /* Copy everything from s onwards back to d. */
11274         Move(s,d,bufend-s + 1,char);
11275         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
11276         /* Setting PL_bufend only applies when we have not dug deeper
11277            into other scopes, because sublex_done sets PL_bufend to
11278            SvEND(PL_linestr). */
11279         if (shared == PL_parser->lex_shared)
11280             PL_bufend = SvEND(linestr);
11281         s = olds;
11282     }
11283     else {
11284         SV *linestr_save;
11285         char *oldbufptr_save;
11286         char *oldoldbufptr_save;
11287       streaming:
11288         sv_grow_fresh(tmpstr, 80);
11289         SvPVCLEAR_FRESH(tmpstr);   /* avoid "uninitialized" warning */
11290         term = PL_tokenbuf[1];
11291         len--;
11292         linestr_save = PL_linestr; /* must restore this afterwards */
11293         d = s;			 /* and this */
11294         oldbufptr_save = PL_oldbufptr;
11295         oldoldbufptr_save = PL_oldoldbufptr;
11296         PL_linestr = newSVpvs("");
11297         PL_bufend = SvPVX(PL_linestr);
11298 
11299         while (1) {
11300             PL_bufptr = PL_bufend;
11301             CopLINE_set(PL_curcop,
11302                         origline + 1 + PL_parser->herelines);
11303 
11304             if (   !lex_next_chunk(LEX_NO_TERM)
11305                 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
11306             {
11307                 /* Simply freeing linestr_save might seem simpler here, as it
11308                    does not matter what PL_linestr points to, since we are
11309                    about to croak; but in a quote-like op, linestr_save
11310                    will have been prospectively freed already, via
11311                    SAVEFREESV(PL_linestr) in sublex_push, so it's easier to
11312                    restore PL_linestr. */
11313                 SvREFCNT_dec_NN(PL_linestr);
11314                 PL_linestr = linestr_save;
11315                 PL_oldbufptr = oldbufptr_save;
11316                 PL_oldoldbufptr = oldoldbufptr_save;
11317                 goto interminable;
11318             }
11319 
11320             CopLINE_set(PL_curcop, origline);
11321 
11322             if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
11323                 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
11324                 /* ^That should be enough to avoid this needing to grow:  */
11325                 sv_catpvs(PL_linestr, "\n\0");
11326                 assert(s == SvPVX(PL_linestr));
11327                 PL_bufend = SvEND(PL_linestr);
11328             }
11329 
11330             s = PL_bufptr;
11331             PL_parser->herelines++;
11332             PL_last_lop = PL_last_uni = NULL;
11333 
11334 #ifndef PERL_STRICT_CR
11335             if (PL_bufend - PL_linestart >= 2) {
11336                 if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
11337                     || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11338                 {
11339                     PL_bufend[-2] = '\n';
11340                     PL_bufend--;
11341                     SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11342                 }
11343                 else if (PL_bufend[-1] == '\r')
11344                     PL_bufend[-1] = '\n';
11345             }
11346             else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11347                 PL_bufend[-1] = '\n';
11348 #endif
11349 
11350             if (indented && (PL_bufend-s) >= len) {
11351                 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
11352 
11353                 if (found) {
11354                     char *backup = found;
11355                     indent_len = 0;
11356 
11357                     /* Only valid if it's preceded by whitespace only */
11358                     while (backup != s && --backup >= s) {
11359                         if (! SPACE_OR_TAB(*backup)) {
11360                             break;
11361                         }
11362                         indent_len++;
11363                     }
11364 
11365                     /* All whitespace or none! */
11366                     if (backup == found || SPACE_OR_TAB(*backup)) {
11367                         Newx(indent, indent_len + 1, char);
11368                         memcpy(indent, backup, indent_len);
11369                         indent[indent_len] = 0;
11370                         SvREFCNT_dec(PL_linestr);
11371                         PL_linestr = linestr_save;
11372                         PL_linestart = SvPVX(linestr_save);
11373                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11374                         PL_oldbufptr = oldbufptr_save;
11375                         PL_oldoldbufptr = oldoldbufptr_save;
11376                         s = d;
11377                         break;
11378                     }
11379                 }
11380 
11381                 /* Didn't find it */
11382                 sv_catsv(tmpstr,PL_linestr);
11383             }
11384             else {
11385                 if (*s == term && PL_bufend-s >= len
11386                     && memEQ(s,PL_tokenbuf + 1,len))
11387                 {
11388                     SvREFCNT_dec(PL_linestr);
11389                     PL_linestr = linestr_save;
11390                     PL_linestart = SvPVX(linestr_save);
11391                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11392                     PL_oldbufptr = oldbufptr_save;
11393                     PL_oldoldbufptr = oldoldbufptr_save;
11394                     s = d;
11395                     break;
11396                 }
11397                 else {
11398                     sv_catsv(tmpstr,PL_linestr);
11399                 }
11400             }
11401         } /* while (1) */
11402     }
11403 
11404     PL_multi_end = origline + PL_parser->herelines;
11405 
11406     if (indented && indent) {
11407         STRLEN linecount = 1;
11408         STRLEN herelen = SvCUR(tmpstr);
11409         char *ss = SvPVX(tmpstr);
11410         char *se = ss + herelen;
11411         SV *newstr = newSV(herelen+1);
11412         SvPOK_on(newstr);
11413 
11414         /* Trim leading whitespace */
11415         while (ss < se) {
11416             /* newline only? Copy and move on */
11417             if (*ss == '\n') {
11418                 sv_catpvs(newstr,"\n");
11419                 ss++;
11420                 linecount++;
11421 
11422             /* Found our indentation? Strip it */
11423             }
11424             else if (se - ss >= indent_len
11425                        && memEQ(ss, indent, indent_len))
11426             {
11427                 STRLEN le = 0;
11428                 ss += indent_len;
11429 
11430                 while ((ss + le) < se && *(ss + le) != '\n')
11431                     le++;
11432 
11433                 sv_catpvn(newstr, ss, le);
11434                 ss += le;
11435 
11436             /* Line doesn't begin with our indentation? Croak */
11437             }
11438             else {
11439                 Safefree(indent);
11440                 Perl_croak(aTHX_
11441                     "Indentation on line %d of here-doc doesn't match delimiter",
11442                     (int)linecount
11443                 );
11444             }
11445         } /* while */
11446 
11447         /* avoid sv_setsv() as we don't want to COW here */
11448         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
11449         Safefree(indent);
11450         SvREFCNT_dec_NN(newstr);
11451     }
11452 
11453     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11454         SvPV_shrink_to_cur(tmpstr);
11455     }
11456 
11457     if (!IN_BYTES) {
11458         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11459             SvUTF8_on(tmpstr);
11460     }
11461 
11462     PL_lex_stuff = tmpstr;
11463     pl_yylval.ival = op_type;
11464     return s;
11465 
11466   interminable:
11467     if (indent)
11468         Safefree(indent);
11469     SvREFCNT_dec(tmpstr);
11470     CopLINE_set(PL_curcop, origline);
11471     missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
11472 }
11473 
11474 
11475 /* scan_inputsymbol
11476    takes: position of first '<' in input buffer
11477    returns: position of first char following the matching '>' in
11478             input buffer
11479    side-effects: pl_yylval and lex_op are set.
11480 
11481    This code handles:
11482 
11483    <>		read from ARGV
11484    <<>>		read from ARGV without magic open
11485    <FH> 	read from filehandle
11486    <pkg::FH>	read from package qualified filehandle
11487    <pkg'FH>	read from package qualified filehandle
11488    <$fh>	read from filehandle in $fh
11489    <*.h>	filename glob
11490 
11491 */
11492 
11493 STATIC char *
S_scan_inputsymbol(pTHX_ char * start)11494 S_scan_inputsymbol(pTHX_ char *start)
11495 {
11496     char *s = start;		/* current position in buffer */
11497     char *end;
11498     I32 len;
11499     bool nomagicopen = FALSE;
11500     char *d = PL_tokenbuf;					/* start of temp holding space */
11501     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;	/* end of temp holding space */
11502 
11503     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11504 
11505     end = (char *) memchr(s, '\n', PL_bufend - s);
11506     if (!end)
11507         end = PL_bufend;
11508     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
11509         nomagicopen = TRUE;
11510         *d = '\0';
11511         len = 0;
11512         s += 3;
11513     }
11514     else
11515         s = delimcpy(d, e, s + 1, end, '>', &len);	/* extract until > */
11516 
11517     /* die if we didn't have space for the contents of the <>,
11518        or if it didn't end, or if we see a newline
11519     */
11520 
11521     if (len >= (I32)sizeof PL_tokenbuf)
11522         Perl_croak(aTHX_ "Excessively long <> operator");
11523     if (s >= end)
11524         Perl_croak(aTHX_ "Unterminated <> operator");
11525 
11526     s++;
11527 
11528     /* check for <$fh>
11529        Remember, only scalar variables are interpreted as filehandles by
11530        this code.  Anything more complex (e.g., <$fh{$num}>) will be
11531        treated as a glob() call.
11532        This code makes use of the fact that except for the $ at the front,
11533        a scalar variable and a filehandle look the same.
11534     */
11535     if (*d == '$' && d[1]) d++;
11536 
11537     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11538     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11539         d += UTF ? UTF8SKIP(d) : 1;
11540     }
11541 
11542     /* If we've tried to read what we allow filehandles to look like, and
11543        there's still text left, then it must be a glob() and not a getline.
11544        Use scan_str to pull out the stuff between the <> and treat it
11545        as nothing more than a string.
11546     */
11547 
11548     if (d - PL_tokenbuf != len) {
11549         pl_yylval.ival = OP_GLOB;
11550         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
11551         if (!s)
11552            Perl_croak(aTHX_ "Glob not terminated");
11553         return s;
11554     }
11555     else {
11556         bool readline_overridden = FALSE;
11557         GV *gv_readline;
11558         /* we're in a filehandle read situation */
11559         d = PL_tokenbuf;
11560 
11561         /* turn <> into <ARGV> */
11562         if (!len)
11563             Copy("ARGV",d,5,char);
11564 
11565         /* Check whether readline() is overridden */
11566         if ((gv_readline = gv_override("readline",8)))
11567             readline_overridden = TRUE;
11568 
11569         /* if <$fh>, create the ops to turn the variable into a
11570            filehandle
11571         */
11572         if (*d == '$') {
11573             /* try to find it in the pad for this block, otherwise find
11574                add symbol table ops
11575             */
11576             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11577             if (tmp != NOT_IN_PAD) {
11578                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11579                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11580                     HEK * const stashname = HvNAME_HEK(stash);
11581                     SV * const sym = newSVhek_mortal(stashname);
11582                     sv_catpvs(sym, "::");
11583                     sv_catpv(sym, d+1);
11584                     d = SvPVX(sym);
11585                     goto intro_sym;
11586                 }
11587                 else {
11588                     OP * const o = newPADxVOP(OP_PADSV, 0, tmp);
11589                     PL_lex_op = readline_overridden
11590                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11591                                 op_append_elem(OP_LIST, o,
11592                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11593                         : newUNOP(OP_READLINE, 0, o);
11594                 }
11595             }
11596             else {
11597                 GV *gv;
11598                 ++d;
11599               intro_sym:
11600                 gv = gv_fetchpv(d,
11601                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11602                                 SVt_PV);
11603                 PL_lex_op = readline_overridden
11604                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11605                             op_append_elem(OP_LIST,
11606                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11607                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11608                     : newUNOP(OP_READLINE, 0,
11609                             newUNOP(OP_RV2SV, 0,
11610                                 newGVOP(OP_GV, 0, gv)));
11611             }
11612             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11613             pl_yylval.ival = OP_NULL;
11614         }
11615 
11616         /* If it's none of the above, it must be a literal filehandle
11617            (<Foo::BAR> or <FOO>) so build a simple readline OP */
11618         else {
11619             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11620             PL_lex_op = readline_overridden
11621                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11622                         op_append_elem(OP_LIST,
11623                             newGVOP(OP_GV, 0, gv),
11624                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11625                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11626             pl_yylval.ival = OP_NULL;
11627 
11628             /* leave the token generation above to avoid confusing the parser */
11629             if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
11630                 no_bareword_filehandle(d);
11631             }
11632         }
11633     }
11634 
11635     return s;
11636 }
11637 
11638 
11639 /* scan_str
11640    takes:
11641         start			position in buffer
11642         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
11643                                 only if they are of the open/close form
11644         keep_delims		preserve the delimiters around the string
11645         re_reparse		compiling a run-time /(?{})/:
11646                                    collapse // to /,  and skip encoding src
11647         delimp			if non-null, this is set to the position of
11648                                 the closing delimiter, or just after it if
11649                                 the closing and opening delimiters differ
11650                                 (i.e., the opening delimiter of a substitu-
11651                                 tion replacement)
11652    returns: position to continue reading from buffer
11653    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11654         updates the read buffer.
11655 
11656    This subroutine pulls a string out of the input.  It is called for:
11657         q		single quotes		q(literal text)
11658         '		single quotes		'literal text'
11659         qq		double quotes		qq(interpolate $here please)
11660         "		double quotes		"interpolate $here please"
11661         qx		backticks		qx(/bin/ls -l)
11662         `		backticks		`/bin/ls -l`
11663         qw		quote words		@EXPORT_OK = qw( func() $spam )
11664         m//		regexp match		m/this/
11665         s///		regexp substitute	s/this/that/
11666         tr///		string transliterate	tr/this/that/
11667         y///		string transliterate	y/this/that/
11668         ($*@)		sub prototypes		sub foo ($)
11669         (stuff)		sub attr parameters	sub foo : attr(stuff)
11670         <>		readline or globs	<FOO>, <>, <$fh>, or <*.c>
11671 
11672    In most of these cases (all but <>, patterns and transliterate)
11673    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
11674    calls scan_str().  s/// makes yylex() call scan_subst() which calls
11675    scan_str().  tr/// and y/// make yylex() call scan_trans() which
11676    calls scan_str().
11677 
11678    It skips whitespace before the string starts, and treats the first
11679    character as the delimiter.  If the delimiter is one of ([{< then
11680    the corresponding "close" character )]}> is used as the closing
11681    delimiter.  It allows quoting of delimiters, and if the string has
11682    balanced delimiters ([{<>}]) it allows nesting.
11683 
11684    On success, the SV with the resulting string is put into lex_stuff or,
11685    if that is already non-NULL, into lex_repl. The second case occurs only
11686    when parsing the RHS of the special constructs s/// and tr/// (y///).
11687    For convenience, the terminating delimiter character is stuffed into
11688    SvIVX of the SV.
11689 */
11690 
11691 char *
Perl_scan_str(pTHX_ char * start,int keep_bracketed_quoted,int keep_delims,int re_reparse,char ** delimp)11692 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11693                  char **delimp
11694     )
11695 {
11696     SV *sv;			/* scalar value: string */
11697     char *s = start;		/* current position in the buffer */
11698     char *to;			/* current position in the sv's data */
11699     int brackets = 1;		/* bracket nesting level */
11700     bool d_is_utf8 = FALSE;	/* is there any utf8 content? */
11701     UV open_delim_code;         /* code point */
11702     char open_delim_str[UTF8_MAXBYTES+1];
11703     STRLEN delim_byte_len;      /* each delimiter currently is the same number
11704                                    of bytes */
11705     line_t herelines;
11706 
11707     /* The only non-UTF character that isn't a stand alone grapheme is
11708      * white-space, hence can't be a delimiter. */
11709     const char * non_grapheme_msg = "Use of unassigned code point or"
11710                                     " non-standalone grapheme for a delimiter"
11711                                     " is not allowed";
11712     PERL_ARGS_ASSERT_SCAN_STR;
11713 
11714     /* skip space before the delimiter */
11715     if (isSPACE(*s)) {  /* skipspace can change the buffer 's' is in, so
11716                            'start' also has to change */
11717         s = start = skipspace(s);
11718     }
11719 
11720     /* mark where we are, in case we need to report errors */
11721     CLINE;
11722 
11723     /* after skipping whitespace, the next character is the delimiter */
11724     if (! UTF || UTF8_IS_INVARIANT(*s)) {
11725         open_delim_code   = (U8) *s;
11726         open_delim_str[0] =      *s;
11727         delim_byte_len = 1;
11728     }
11729     else {
11730         open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
11731                                             &delim_byte_len);
11732         if (UNLIKELY(! is_grapheme((U8 *) start,
11733                                    (U8 *) s,
11734                                    (U8 *) PL_bufend,
11735                                    open_delim_code)))
11736         {
11737             yyerror(non_grapheme_msg);
11738         }
11739 
11740         Copy(s, open_delim_str, delim_byte_len, char);
11741     }
11742     open_delim_str[delim_byte_len] = '\0';  /* Only for safety */
11743 
11744 
11745     /* mark where we are */
11746     PL_multi_start = CopLINE(PL_curcop);
11747     PL_multi_open = open_delim_code;
11748     herelines = PL_parser->herelines;
11749 
11750     const char * legal_paired_opening_delims;
11751     const char * legal_paired_closing_delims;
11752     const char * deprecated_opening_delims;
11753     if (FEATURE_MORE_DELIMS_IS_ENABLED) {
11754         if (UTF) {
11755             legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
11756             legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
11757 
11758             /* We are deprecating using a closing delimiter as the opening, in
11759              * case we want in the future to accept them reversed.  The string
11760              * may include ones that are legal, but the code below won't look
11761              * at this string unless it didn't find a legal opening one */
11762             deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
11763         }
11764         else {
11765             legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
11766             legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
11767             deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11768         }
11769     }
11770     else {
11771         legal_paired_opening_delims = "([{<";
11772         legal_paired_closing_delims = ")]}>";
11773         deprecated_opening_delims = (UTF)
11774                                     ? DEPRECATED_OPENING_UTF8_BRACKETS
11775                                     : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11776     }
11777 
11778     const char * legal_paired_opening_delims_end = legal_paired_opening_delims
11779                                           + strlen(legal_paired_opening_delims);
11780     const char * deprecated_delims_end = deprecated_opening_delims
11781                                 + strlen(deprecated_opening_delims);
11782 
11783     const char * close_delim_str = open_delim_str;
11784     UV close_delim_code = open_delim_code;
11785 
11786     /* If the delimiter has a mirror-image closing one, get it */
11787     const char *tmps = ninstr(legal_paired_opening_delims,
11788                               legal_paired_opening_delims_end,
11789                               open_delim_str, open_delim_str + delim_byte_len);
11790     if (tmps) {
11791         /* Here, there is a paired delimiter, and tmps points to its position
11792            in the string of the accepted opening paired delimiters.  The
11793            corresponding position in the string of closing ones is the
11794            beginning of the paired mate.  Both contain the same number of
11795            bytes. */
11796         close_delim_str = legal_paired_closing_delims
11797                         + (tmps - legal_paired_opening_delims);
11798 
11799         /* The list of paired delimiters contains all the ASCII ones that have
11800          * always been legal, and no other ASCIIs.  Don't raise a message if
11801          * using one of these */
11802         if (! isASCII(open_delim_code)) {
11803             Perl_ck_warner_d(aTHX_
11804                              packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
11805                              "Use of '%" UTF8f "' is experimental as a string delimiter",
11806                              UTF8fARG(UTF, delim_byte_len, open_delim_str));
11807         }
11808 
11809         close_delim_code = (UTF)
11810                            ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
11811                            : * (U8 *) close_delim_str;
11812     }
11813     else {  /* Here, the delimiter isn't paired, hence the close is the same as
11814                the open; and has already been set up.  But make sure it isn't
11815                deprecated to use this particular delimiter, as we plan
11816                eventually to make it paired. */
11817         if (ninstr(deprecated_opening_delims, deprecated_delims_end,
11818                    open_delim_str, open_delim_str + delim_byte_len))
11819         {
11820             Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__DELIMITER_WILL_BE_PAIRED),
11821                              "Use of '%" UTF8f "' is deprecated as a string delimiter",
11822                              UTF8fARG(UTF, delim_byte_len, open_delim_str));
11823         }
11824 
11825         /* Note that a NUL may be used as a delimiter, and this happens when
11826          * delimiting an empty string, and no special handling for it is
11827          * needed, as ninstr() calls are used */
11828     }
11829 
11830     PL_multi_close = close_delim_code;
11831 
11832     if (PL_multi_open == PL_multi_close) {
11833         keep_bracketed_quoted = FALSE;
11834     }
11835 
11836     /* create a new SV to hold the contents.  79 is the SV's initial length.
11837        What a random number. */
11838     sv = newSV_type(SVt_PVIV);
11839     sv_grow_fresh(sv, 79);
11840     SvIV_set(sv, close_delim_code);
11841     (void)SvPOK_only(sv);		/* validate pointer */
11842 
11843     /* move past delimiter and try to read a complete string */
11844     if (keep_delims)
11845         sv_catpvn(sv, s, delim_byte_len);
11846     s += delim_byte_len;
11847     for (;;) {
11848         /* extend sv if need be */
11849         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11850         /* set 'to' to the next character in the sv's string */
11851         to = SvPVX(sv)+SvCUR(sv);
11852 
11853         /* read until we run out of string, or we find the closing delimiter */
11854         while (s < PL_bufend) {
11855             /* embedded newlines increment the line count */
11856             if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11857                 COPLINE_INC_WITH_HERELINES;
11858 
11859             /* backslashes can escape the closing delimiter */
11860             if (   *s == '\\' && s < PL_bufend - delim_byte_len
11861 
11862                    /* ... but not if the delimiter itself is a backslash */
11863                 && close_delim_code != '\\')
11864             {
11865                 /* Here, we have an escaping backslash.  If we're supposed to
11866                  * discard those that escape the closing delimiter, just
11867                  * discard this one */
11868                 if (   !  keep_bracketed_quoted
11869                     &&   (    memEQ(s + 1,  open_delim_str, delim_byte_len)
11870                           ||  (   PL_multi_open == PL_multi_close
11871                                && re_reparse && s[1] == '\\')
11872                           ||  memEQ(s + 1, close_delim_str, delim_byte_len)))
11873                 {
11874                     s++;
11875                 }
11876                 else /* any other escapes are simply copied straight through */
11877                     *to++ = *s++;
11878             }
11879             else if (   s < PL_bufend - (delim_byte_len - 1)
11880                      && memEQ(s, close_delim_str, delim_byte_len)
11881                      && --brackets <= 0)
11882             {
11883                 /* Found unescaped closing delimiter, unnested if we care about
11884                  * that; so are done.
11885                  *
11886                  * In the case of the opening and closing delimiters being
11887                  * different, we have to deal with nesting; the conditional
11888                  * above makes sure we don't get here until the nesting level,
11889                  * 'brackets', is back down to zero.  In the other case,
11890                  * nesting isn't an issue, and 'brackets' never can get
11891                  * incremented above 0, so will come here at the first closing
11892                  * delimiter.
11893                  *
11894                  * Only grapheme delimiters are legal. */
11895                 if (   UTF  /* All Non-UTF-8's are graphemes */
11896                     && UNLIKELY(! is_grapheme((U8 *) start,
11897                                               (U8 *) s,
11898                                               (U8 *) PL_bufend,
11899                                               close_delim_code)))
11900                 {
11901                     yyerror(non_grapheme_msg);
11902                 }
11903 
11904                 break;
11905             }
11906                         /* No nesting if open eq close */
11907             else if (   PL_multi_open != PL_multi_close
11908                      && s < PL_bufend - (delim_byte_len - 1)
11909                      && memEQ(s, open_delim_str, delim_byte_len))
11910             {
11911                 brackets++;
11912             }
11913 
11914             /* Here, still in the middle of the string; copy this character */
11915             if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
11916                 *to++ = *s++;
11917             }
11918             else {
11919                 size_t this_char_len = UTF8SKIP(s);
11920                 Copy(s, to, this_char_len, char);
11921                 s  += this_char_len;
11922                 to += this_char_len;
11923 
11924                 d_is_utf8 = TRUE;
11925             }
11926         } /* End of loop through buffer */
11927 
11928         /* Here, found end of the string, OR ran out of buffer: terminate the
11929          * copied string and update the sv's end-of-string */
11930         *to = '\0';
11931         SvCUR_set(sv, to - SvPVX_const(sv));
11932 
11933         /*
11934          * this next chunk reads more into the buffer if we're not done yet
11935          */
11936 
11937         if (s < PL_bufend)
11938             break;		/* handle case where we are done yet :-) */
11939 
11940 #ifndef PERL_STRICT_CR
11941         if (to - SvPVX_const(sv) >= 2) {
11942             if (   (to[-2] == '\r' && to[-1] == '\n')
11943                 || (to[-2] == '\n' && to[-1] == '\r'))
11944             {
11945                 to[-2] = '\n';
11946                 to--;
11947                 SvCUR_set(sv, to - SvPVX_const(sv));
11948             }
11949             else if (to[-1] == '\r')
11950                 to[-1] = '\n';
11951         }
11952         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11953             to[-1] = '\n';
11954 #endif
11955 
11956         /* if we're out of file, or a read fails, bail and reset the current
11957            line marker so we can report where the unterminated string began
11958         */
11959         COPLINE_INC_WITH_HERELINES;
11960         PL_bufptr = PL_bufend;
11961         if (!lex_next_chunk(0)) {
11962             ASSUME(sv);
11963             SvREFCNT_dec(sv);
11964             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11965             return NULL;
11966         }
11967         s = start = PL_bufptr;
11968     } /* End of infinite loop */
11969 
11970     /* at this point, we have successfully read the delimited string */
11971 
11972     if (keep_delims)
11973             sv_catpvn(sv, s, delim_byte_len);
11974     s += delim_byte_len;
11975 
11976     if (d_is_utf8)
11977         SvUTF8_on(sv);
11978 
11979     PL_multi_end = CopLINE(PL_curcop);
11980     CopLINE_set(PL_curcop, PL_multi_start);
11981     PL_parser->herelines = herelines;
11982 
11983     /* if we allocated too much space, give some back */
11984     if (SvCUR(sv) + 5 < SvLEN(sv)) {
11985         SvLEN_set(sv, SvCUR(sv) + 1);
11986         SvPV_shrink_to_cur(sv);
11987     }
11988 
11989     /* decide whether this is the first or second quoted string we've read
11990        for this op
11991     */
11992 
11993     if (PL_lex_stuff)
11994         PL_parser->lex_sub_repl = sv;
11995     else
11996         PL_lex_stuff = sv;
11997     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s;
11998     return s;
11999 }
12000 
12001 /*
12002   scan_num
12003   takes: pointer to position in buffer
12004   returns: pointer to new position in buffer
12005   side-effects: builds ops for the constant in pl_yylval.op
12006 
12007   Read a number in any of the formats that Perl accepts:
12008 
12009   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)	12 12.34 12.
12010   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)			.34
12011   0b[01](_?[01])*                                       binary integers
12012   0o?[0-7](_?[0-7])*                                    octal integers
12013   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
12014   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
12015 
12016   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12017   thing it reads.
12018 
12019   If it reads a number without a decimal point or an exponent, it will
12020   try converting the number to an integer and see if it can do so
12021   without loss of precision.
12022 */
12023 
12024 char *
Perl_scan_num(pTHX_ const char * start,YYSTYPE * lvalp)12025 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12026 {
12027     const char *s = start;	/* current position in buffer */
12028     char *d;			/* destination in temp buffer */
12029     char *e;			/* end of temp buffer */
12030     NV nv;				/* number read, as a double */
12031     SV *sv = NULL;			/* place to put the converted number */
12032     bool floatit;			/* boolean: int or float? */
12033     const char *lastub = NULL;		/* position of last underbar */
12034     static const char* const number_too_long = "Number too long";
12035     bool warned_about_underscore = 0;
12036     I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
12037 #define WARN_ABOUT_UNDERSCORE() \
12038         do { \
12039             if (!warned_about_underscore) { \
12040                 warned_about_underscore = 1; \
12041                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
12042                                "Misplaced _ in number"); \
12043             } \
12044         } while(0)
12045     /* Hexadecimal floating point.
12046      *
12047      * In many places (where we have quads and NV is IEEE 754 double)
12048      * we can fit the mantissa bits of a NV into an unsigned quad.
12049      * (Note that UVs might not be quads even when we have quads.)
12050      * This will not work everywhere, though (either no quads, or
12051      * using long doubles), in which case we have to resort to NV,
12052      * which will probably mean horrible loss of precision due to
12053      * multiple fp operations. */
12054     bool hexfp = FALSE;
12055     int total_bits = 0;
12056     int significant_bits = 0;
12057 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
12058 #  define HEXFP_UQUAD
12059     Uquad_t hexfp_uquad = 0;
12060     int hexfp_frac_bits = 0;
12061 #else
12062 #  define HEXFP_NV
12063     NV hexfp_nv = 0.0;
12064 #endif
12065     NV hexfp_mult = 1.0;
12066     UV high_non_zero = 0; /* highest digit */
12067     int non_zero_integer_digits = 0;
12068     bool new_octal = FALSE;     /* octal with "0o" prefix */
12069 
12070     PERL_ARGS_ASSERT_SCAN_NUM;
12071 
12072     /* We use the first character to decide what type of number this is */
12073 
12074     switch (*s) {
12075     default:
12076         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
12077 
12078     /* if it starts with a 0, it could be an octal number, a decimal in
12079        0.13 disguise, or a hexadecimal number, or a binary number. */
12080     case '0':
12081         {
12082           /* variables:
12083              u		holds the "number so far"
12084              overflowed	was the number more than we can hold?
12085 
12086              Shift is used when we add a digit.  It also serves as an "are
12087              we in octal/hex/binary?" indicator to disallow hex characters
12088              when in octal mode.
12089            */
12090             NV n = 0.0;
12091             UV u = 0;
12092             bool overflowed = FALSE;
12093             bool just_zero  = TRUE;	/* just plain 0 or binary number? */
12094             bool has_digs = FALSE;
12095             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12096             static const char* const bases[5] =
12097               { "", "binary", "", "octal", "hexadecimal" };
12098             static const char* const Bases[5] =
12099               { "", "Binary", "", "Octal", "Hexadecimal" };
12100             static const char* const maxima[5] =
12101               { "",
12102                 "0b11111111111111111111111111111111",
12103                 "",
12104                 "037777777777",
12105                 "0xffffffff" };
12106 
12107             /* check for hex */
12108             if (isALPHA_FOLD_EQ(s[1], 'x')) {
12109                 shift = 4;
12110                 s += 2;
12111                 just_zero = FALSE;
12112             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
12113                 shift = 1;
12114                 s += 2;
12115                 just_zero = FALSE;
12116             }
12117             /* check for a decimal in disguise */
12118             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
12119                 goto decimal;
12120             /* so it must be octal */
12121             else {
12122                 shift = 3;
12123                 s++;
12124                 if (isALPHA_FOLD_EQ(*s, 'o')) {
12125                     s++;
12126                     just_zero = FALSE;
12127                     new_octal = TRUE;
12128                 }
12129             }
12130 
12131             if (*s == '_') {
12132                 WARN_ABOUT_UNDERSCORE();
12133                lastub = s++;
12134             }
12135 
12136             /* read the rest of the number */
12137             for (;;) {
12138                 /* x is used in the overflow test,
12139                    b is the digit we're adding on. */
12140                 UV x, b;
12141 
12142                 switch (*s) {
12143 
12144                 /* if we don't mention it, we're done */
12145                 default:
12146                     goto out;
12147 
12148                 /* _ are ignored -- but warned about if consecutive */
12149                 case '_':
12150                     if (lastub && s == lastub + 1)
12151                         WARN_ABOUT_UNDERSCORE();
12152                     lastub = s++;
12153                     break;
12154 
12155                 /* 8 and 9 are not octal */
12156                 case '8': case '9':
12157                     if (shift == 3)
12158                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12159                     /* FALLTHROUGH */
12160 
12161                 /* octal digits */
12162                 case '2': case '3': case '4':
12163                 case '5': case '6': case '7':
12164                     if (shift == 1)
12165                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12166                     /* FALLTHROUGH */
12167 
12168                 case '0': case '1':
12169                     b = *s++ & 15;		/* ASCII digit -> value of digit */
12170                     goto digit;
12171 
12172                 /* hex digits */
12173                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12174                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12175                     /* make sure they said 0x */
12176                     if (shift != 4)
12177                         goto out;
12178                     b = (*s++ & 7) + 9;
12179 
12180                     /* Prepare to put the digit we have onto the end
12181                        of the number so far.  We check for overflows.
12182                     */
12183 
12184                   digit:
12185                     just_zero = FALSE;
12186                     has_digs = TRUE;
12187                     if (!overflowed) {
12188                         assert(shift >= 0);
12189                         x = u << shift;	/* make room for the digit */
12190 
12191                         total_bits += shift;
12192 
12193                         if ((x >> shift) != u
12194                             && !(PL_hints & HINT_NEW_BINARY)) {
12195                             overflowed = TRUE;
12196                             n = (NV) u;
12197                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12198                                              "Integer overflow in %s number",
12199                                              bases[shift]);
12200                         } else
12201                             u = x | b;		/* add the digit to the end */
12202                     }
12203                     if (overflowed) {
12204                         n *= nvshift[shift];
12205                         /* If an NV has not enough bits in its
12206                          * mantissa to represent an UV this summing of
12207                          * small low-order numbers is a waste of time
12208                          * (because the NV cannot preserve the
12209                          * low-order bits anyway): we could just
12210                          * remember when did we overflow and in the
12211                          * end just multiply n by the right
12212                          * amount. */
12213                         n += (NV) b;
12214                     }
12215 
12216                     if (high_non_zero == 0 && b > 0)
12217                         high_non_zero = b;
12218 
12219                     if (high_non_zero)
12220                         non_zero_integer_digits++;
12221 
12222                     /* this could be hexfp, but peek ahead
12223                      * to avoid matching ".." */
12224                     if (UNLIKELY(HEXFP_PEEK(s))) {
12225                         goto out;
12226                     }
12227 
12228                     break;
12229                 }
12230             }
12231 
12232           /* if we get here, we had success: make a scalar value from
12233              the number.
12234           */
12235           out:
12236 
12237             /* final misplaced underbar check */
12238             if (s[-1] == '_')
12239                 WARN_ABOUT_UNDERSCORE();
12240 
12241             if (UNLIKELY(HEXFP_PEEK(s))) {
12242                 /* Do sloppy (on the underbars) but quick detection
12243                  * (and value construction) for hexfp, the decimal
12244                  * detection will shortly be more thorough with the
12245                  * underbar checks. */
12246                 const char* h = s;
12247                 significant_bits = non_zero_integer_digits * shift;
12248 #ifdef HEXFP_UQUAD
12249                 hexfp_uquad = u;
12250 #else /* HEXFP_NV */
12251                 hexfp_nv = u;
12252 #endif
12253                 /* Ignore the leading zero bits of
12254                  * the high (first) non-zero digit. */
12255                 if (high_non_zero) {
12256                     if (high_non_zero < 0x8)
12257                         significant_bits--;
12258                     if (high_non_zero < 0x4)
12259                         significant_bits--;
12260                     if (high_non_zero < 0x2)
12261                         significant_bits--;
12262                 }
12263 
12264                 if (*h == '.') {
12265 #ifdef HEXFP_NV
12266                     NV nv_mult = 1.0;
12267 #endif
12268                     bool accumulate = TRUE;
12269                     U8 b = 0; /* silence compiler warning */
12270                     int lim = 1 << shift;
12271                     for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
12272                                *h == '_'); h++) {
12273                         if (isXDIGIT(*h)) {
12274                             significant_bits += shift;
12275 #ifdef HEXFP_UQUAD
12276                             if (accumulate) {
12277                                 if (significant_bits < NV_MANT_DIG) {
12278                                     /* We are in the long "run" of xdigits,
12279                                      * accumulate the full four bits. */
12280                                     assert(shift >= 0);
12281                                     hexfp_uquad <<= shift;
12282                                     hexfp_uquad |= b;
12283                                     hexfp_frac_bits += shift;
12284                                 } else if (significant_bits - shift < NV_MANT_DIG) {
12285                                     /* We are at a hexdigit either at,
12286                                      * or straddling, the edge of mantissa.
12287                                      * We will try grabbing as many as
12288                                      * possible bits. */
12289                                     int tail =
12290                                       significant_bits - NV_MANT_DIG;
12291                                     if (tail <= 0)
12292                                        tail += shift;
12293                                     assert(tail >= 0);
12294                                     hexfp_uquad <<= tail;
12295                                     assert((shift - tail) >= 0);
12296                                     hexfp_uquad |= b >> (shift - tail);
12297                                     hexfp_frac_bits += tail;
12298 
12299                                     /* Ignore the trailing zero bits
12300                                      * of the last non-zero xdigit.
12301                                      *
12302                                      * The assumption here is that if
12303                                      * one has input of e.g. the xdigit
12304                                      * eight (0x8), there is only one
12305                                      * bit being input, not the full
12306                                      * four bits.  Conversely, if one
12307                                      * specifies a zero xdigit, the
12308                                      * assumption is that one really
12309                                      * wants all those bits to be zero. */
12310                                     if (b) {
12311                                         if ((b & 0x1) == 0x0) {
12312                                             significant_bits--;
12313                                             if ((b & 0x2) == 0x0) {
12314                                                 significant_bits--;
12315                                                 if ((b & 0x4) == 0x0) {
12316                                                     significant_bits--;
12317                                                 }
12318                                             }
12319                                         }
12320                                     }
12321 
12322                                     accumulate = FALSE;
12323                                 }
12324                             } else {
12325                                 /* Keep skipping the xdigits, and
12326                                  * accumulating the significant bits,
12327                                  * but do not shift the uquad
12328                                  * (which would catastrophically drop
12329                                  * high-order bits) or accumulate the
12330                                  * xdigits anymore. */
12331                             }
12332 #else /* HEXFP_NV */
12333                             if (accumulate) {
12334                                 nv_mult /= nvshift[shift];
12335                                 if (nv_mult > 0.0)
12336                                     hexfp_nv += b * nv_mult;
12337                                 else
12338                                     accumulate = FALSE;
12339                             }
12340 #endif
12341                         }
12342                         if (significant_bits >= NV_MANT_DIG)
12343                             accumulate = FALSE;
12344                     }
12345                 }
12346 
12347                 if ((total_bits > 0 || significant_bits > 0) &&
12348                     isALPHA_FOLD_EQ(*h, 'p')) {
12349                     bool negexp = FALSE;
12350                     h++;
12351                     if (*h == '+')
12352                         h++;
12353                     else if (*h == '-') {
12354                         negexp = TRUE;
12355                         h++;
12356                     }
12357                     if (isDIGIT(*h)) {
12358                         I32 hexfp_exp = 0;
12359                         while (isDIGIT(*h) || *h == '_') {
12360                             if (isDIGIT(*h)) {
12361                                 hexfp_exp *= 10;
12362                                 hexfp_exp += *h - '0';
12363 #ifdef NV_MIN_EXP
12364                                 if (negexp
12365                                     && -hexfp_exp < NV_MIN_EXP - 1) {
12366                                     /* NOTE: this means that the exponent
12367                                      * underflow warning happens for
12368                                      * the IEEE 754 subnormals (denormals),
12369                                      * because DBL_MIN_EXP etc are the lowest
12370                                      * possible binary (or, rather, DBL_RADIX-base)
12371                                      * exponent for normals, not subnormals.
12372                                      *
12373                                      * This may or may not be a good thing. */
12374                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12375                                                    "Hexadecimal float: exponent underflow");
12376                                     break;
12377                                 }
12378 #endif
12379 #ifdef NV_MAX_EXP
12380                                 if (!negexp
12381                                     && hexfp_exp > NV_MAX_EXP - 1) {
12382                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12383                                                    "Hexadecimal float: exponent overflow");
12384                                     break;
12385                                 }
12386 #endif
12387                             }
12388                             h++;
12389                         }
12390                         if (negexp)
12391                             hexfp_exp = -hexfp_exp;
12392 #ifdef HEXFP_UQUAD
12393                         hexfp_exp -= hexfp_frac_bits;
12394 #endif
12395                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
12396                         hexfp = TRUE;
12397                         goto decimal;
12398                     }
12399                 }
12400             }
12401 
12402             if (!just_zero && !has_digs) {
12403                 /* 0x, 0o or 0b with no digits, treat it as an error.
12404                    Originally this backed up the parse before the b or
12405                    x, but that has the potential for silent changes in
12406                    behaviour, like for: "0x.3" and "0x+$foo".
12407                 */
12408                 const char *d = s;
12409                 char *oldbp = PL_bufptr;
12410                 if (*d) ++d; /* so the user sees the bad non-digit */
12411                 PL_bufptr = (char *)d; /* so yyerror reports the context */
12412                 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
12413                                   bases[shift]));
12414                 PL_bufptr = oldbp;
12415             }
12416 
12417             if (overflowed) {
12418                 if (n > 4294967295.0)
12419                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12420                                    "%s number > %s non-portable",
12421                                    Bases[shift],
12422                                    new_octal ? "0o37777777777" : maxima[shift]);
12423                 sv = newSVnv(n);
12424             }
12425             else {
12426 #if UVSIZE > 4
12427                 if (u > 0xffffffff)
12428                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12429                                    "%s number > %s non-portable",
12430                                    Bases[shift],
12431                                    new_octal ? "0o37777777777" : maxima[shift]);
12432 #endif
12433                 sv = newSVuv(u);
12434             }
12435             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12436                 sv = new_constant(start, s - start, "integer",
12437                                   sv, NULL, NULL, 0, NULL);
12438             else if (PL_hints & HINT_NEW_BINARY)
12439                 sv = new_constant(start, s - start, "binary",
12440                                   sv, NULL, NULL, 0, NULL);
12441         }
12442         break;
12443 
12444     /*
12445       handle decimal numbers.
12446       we're also sent here when we read a 0 as the first digit
12447     */
12448     case '1': case '2': case '3': case '4': case '5':
12449     case '6': case '7': case '8': case '9': case '.':
12450       decimal:
12451         d = PL_tokenbuf;
12452         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12453         floatit = FALSE;
12454         if (hexfp) {
12455             floatit = TRUE;
12456             *d++ = '0';
12457             switch (shift) {
12458             case 4:
12459                 *d++ = 'x';
12460                 s = start + 2;
12461                 break;
12462             case 3:
12463                 if (new_octal) {
12464                     *d++ = 'o';
12465                     s = start + 2;
12466                     break;
12467                 }
12468                 s = start + 1;
12469                 break;
12470             case 1:
12471                 *d++ = 'b';
12472                 s = start + 2;
12473                 break;
12474             default:
12475                 NOT_REACHED; /* NOTREACHED */
12476             }
12477         }
12478 
12479         /* read next group of digits and _ and copy into d */
12480         while (isDIGIT(*s)
12481                || *s == '_'
12482                || UNLIKELY(hexfp && isXDIGIT(*s)))
12483         {
12484             /* skip underscores, checking for misplaced ones
12485                if -w is on
12486             */
12487             if (*s == '_') {
12488                 if (lastub && s == lastub + 1)
12489                     WARN_ABOUT_UNDERSCORE();
12490                 lastub = s++;
12491             }
12492             else {
12493                 /* check for end of fixed-length buffer */
12494                 if (d >= e)
12495                     Perl_croak(aTHX_ "%s", number_too_long);
12496                 /* if we're ok, copy the character */
12497                 *d++ = *s++;
12498             }
12499         }
12500 
12501         /* final misplaced underbar check */
12502         if (lastub && s == lastub + 1)
12503             WARN_ABOUT_UNDERSCORE();
12504 
12505         /* read a decimal portion if there is one.  avoid
12506            3..5 being interpreted as the number 3. followed
12507            by .5
12508         */
12509         if (*s == '.' && s[1] != '.') {
12510             floatit = TRUE;
12511             *d++ = *s++;
12512 
12513             if (*s == '_') {
12514                 WARN_ABOUT_UNDERSCORE();
12515                 lastub = s;
12516             }
12517 
12518             /* copy, ignoring underbars, until we run out of digits.
12519             */
12520             for (; isDIGIT(*s)
12521                    || *s == '_'
12522                    || UNLIKELY(hexfp && isXDIGIT(*s));
12523                  s++)
12524             {
12525                 /* fixed length buffer check */
12526                 if (d >= e)
12527                     Perl_croak(aTHX_ "%s", number_too_long);
12528                 if (*s == '_') {
12529                    if (lastub && s == lastub + 1)
12530                         WARN_ABOUT_UNDERSCORE();
12531                    lastub = s;
12532                 }
12533                 else
12534                     *d++ = *s;
12535             }
12536             /* fractional part ending in underbar? */
12537             if (s[-1] == '_')
12538                 WARN_ABOUT_UNDERSCORE();
12539             if (*s == '.' && isDIGIT(s[1])) {
12540                 /* oops, it's really a v-string, but without the "v" */
12541                 s = start;
12542                 goto vstring;
12543             }
12544         }
12545 
12546         /* read exponent part, if present */
12547         if ((isALPHA_FOLD_EQ(*s, 'e')
12548               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
12549             && memCHRs("+-0123456789_", s[1]))
12550         {
12551             int exp_digits = 0;
12552             const char *save_s = s;
12553             char * save_d = d;
12554 
12555             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
12556                ditto for p (hexfloats) */
12557             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
12558                 /* At least some Mach atof()s don't grok 'E' */
12559                 *d++ = 'e';
12560             }
12561             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
12562                 *d++ = 'p';
12563             }
12564 
12565             s++;
12566 
12567 
12568             /* stray preinitial _ */
12569             if (*s == '_') {
12570                 WARN_ABOUT_UNDERSCORE();
12571                 lastub = s++;
12572             }
12573 
12574             /* allow positive or negative exponent */
12575             if (*s == '+' || *s == '-')
12576                 *d++ = *s++;
12577 
12578             /* stray initial _ */
12579             if (*s == '_') {
12580                 WARN_ABOUT_UNDERSCORE();
12581                 lastub = s++;
12582             }
12583 
12584             /* read digits of exponent */
12585             while (isDIGIT(*s) || *s == '_') {
12586                 if (isDIGIT(*s)) {
12587                     ++exp_digits;
12588                     if (d >= e)
12589                         Perl_croak(aTHX_ "%s", number_too_long);
12590                     *d++ = *s++;
12591                 }
12592                 else {
12593                    if (((lastub && s == lastub + 1)
12594                         || (!isDIGIT(s[1]) && s[1] != '_')))
12595                         WARN_ABOUT_UNDERSCORE();
12596                    lastub = s++;
12597                 }
12598             }
12599 
12600             if (!exp_digits) {
12601                 /* no exponent digits, the [eEpP] could be for something else,
12602                  * though in practice we don't get here for p since that's preparsed
12603                  * earlier, and results in only the 0xX being consumed, so behave similarly
12604                  * for decimal floats and consume only the D.DD, leaving the [eE] to the
12605                  * next token.
12606                  */
12607                 s = save_s;
12608                 d = save_d;
12609             }
12610             else {
12611                 floatit = TRUE;
12612             }
12613         }
12614 
12615 
12616         /*
12617            We try to do an integer conversion first if no characters
12618            indicating "float" have been found.
12619          */
12620 
12621         if (!floatit) {
12622             UV uv;
12623             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12624 
12625             if (flags == IS_NUMBER_IN_UV) {
12626               if (uv <= IV_MAX)
12627                 sv = newSViv(uv); /* Prefer IVs over UVs. */
12628               else
12629                 sv = newSVuv(uv);
12630             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12631               if (uv <= (UV) IV_MIN)
12632                 sv = newSViv(-(IV)uv);
12633               else
12634                 floatit = TRUE;
12635             } else
12636               floatit = TRUE;
12637         }
12638         if (floatit) {
12639             /* terminate the string */
12640             *d = '\0';
12641             if (UNLIKELY(hexfp)) {
12642 #  ifdef NV_MANT_DIG
12643                 if (significant_bits > NV_MANT_DIG)
12644                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12645                                    "Hexadecimal float: mantissa overflow");
12646 #  endif
12647 #ifdef HEXFP_UQUAD
12648                 nv = hexfp_uquad * hexfp_mult;
12649 #else /* HEXFP_NV */
12650                 nv = hexfp_nv * hexfp_mult;
12651 #endif
12652             } else {
12653                 nv = Atof(PL_tokenbuf);
12654             }
12655             sv = newSVnv(nv);
12656         }
12657 
12658         if ( floatit
12659              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12660             const char *const key = floatit ? "float" : "integer";
12661             const STRLEN keylen = floatit ? 5 : 7;
12662             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12663                                 key, keylen, sv, NULL, NULL, 0, NULL);
12664         }
12665         break;
12666 
12667     /* if it starts with a v, it could be a v-string */
12668     case 'v':
12669     vstring:
12670                 sv = newSV(5); /* preallocate storage space */
12671                 ENTER_with_name("scan_vstring");
12672                 SAVEFREESV(sv);
12673                 s = scan_vstring(s, PL_bufend, sv);
12674                 SvREFCNT_inc_simple_void_NN(sv);
12675                 LEAVE_with_name("scan_vstring");
12676         break;
12677     }
12678 
12679     /* make the op for the constant and return */
12680 
12681     if (sv)
12682         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12683     else
12684         lvalp->opval = NULL;
12685 
12686     return (char *)s;
12687 }
12688 
12689 STATIC char *
S_scan_formline(pTHX_ char * s)12690 S_scan_formline(pTHX_ char *s)
12691 {
12692     SV * const stuff = newSVpvs("");
12693     bool needargs = FALSE;
12694     bool eofmt = FALSE;
12695 
12696     PERL_ARGS_ASSERT_SCAN_FORMLINE;
12697 
12698     while (!needargs) {
12699         char *eol;
12700         if (*s == '.') {
12701             char *t = s+1;
12702 #ifdef PERL_STRICT_CR
12703             while (SPACE_OR_TAB(*t))
12704                 t++;
12705 #else
12706             while (SPACE_OR_TAB(*t) || *t == '\r')
12707                 t++;
12708 #endif
12709             if (*t == '\n' || t == PL_bufend) {
12710                 eofmt = TRUE;
12711                 break;
12712             }
12713         }
12714         eol = (char *) memchr(s,'\n',PL_bufend-s);
12715         if (! eol) {
12716             eol = PL_bufend;
12717         }
12718         else {
12719             eol++;
12720         }
12721         if (*s != '#') {
12722             char *t;
12723             for (t = s; t < eol; t++) {
12724                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12725                     needargs = FALSE;
12726                     goto enough;	/* ~~ must be first line in formline */
12727                 }
12728                 if (*t == '@' || *t == '^')
12729                     needargs = TRUE;
12730             }
12731             if (eol > s) {
12732                 sv_catpvn(stuff, s, eol-s);
12733 #ifndef PERL_STRICT_CR
12734                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12735                     char *end = SvPVX(stuff) + SvCUR(stuff);
12736                     end[-2] = '\n';
12737                     end[-1] = '\0';
12738                     SvCUR_set(stuff, SvCUR(stuff) - 1);
12739                 }
12740 #endif
12741             }
12742             else
12743               break;
12744         }
12745         s = (char*)eol;
12746         if ((PL_rsfp || PL_parser->filtered)
12747          && PL_parser->form_lex_state == LEX_NORMAL) {
12748             bool got_some;
12749             PL_bufptr = PL_bufend;
12750             COPLINE_INC_WITH_HERELINES;
12751             got_some = lex_next_chunk(0);
12752             CopLINE_dec(PL_curcop);
12753             s = PL_bufptr;
12754             if (!got_some)
12755                 break;
12756         }
12757         incline(s, PL_bufend);
12758     }
12759   enough:
12760     if (!SvCUR(stuff) || needargs)
12761         PL_lex_state = PL_parser->form_lex_state;
12762     if (SvCUR(stuff)) {
12763         PL_expect = XSTATE;
12764         if (needargs) {
12765             const char *s2 = s;
12766             while (isSPACE(*s2) && *s2 != '\n')
12767                 s2++;
12768             if (*s2 == '{') {
12769                 PL_expect = XTERMBLOCK;
12770                 NEXTVAL_NEXTTOKE.ival = 0;
12771                 force_next(KW_DO);
12772             }
12773             NEXTVAL_NEXTTOKE.ival = 0;
12774             force_next(FORMLBRACK);
12775         }
12776         if (!IN_BYTES) {
12777             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12778                 SvUTF8_on(stuff);
12779         }
12780         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12781         force_next(THING);
12782     }
12783     else {
12784         SvREFCNT_dec(stuff);
12785         if (eofmt)
12786             PL_lex_formbrack = 0;
12787     }
12788     return s;
12789 }
12790 
12791 /*
12792 =for apidoc start_subparse
12793 
12794 Set things up for parsing a subroutine.
12795 
12796 If C<is_format> is non-zero, the input is to be considered a format sub
12797 (a specialised sub used to implement perl's C<format> feature); else a
12798 normal C<sub>.
12799 
12800 C<flags> are added to the flags for C<PL_compcv>.  C<flags> may include the
12801 C<CVf_IsMETHOD> bit, which causes the new subroutine to be a method.
12802 
12803 This returns the value of C<PL_savestack_ix> that was in effect upon entry to
12804 the function;
12805 
12806 =cut
12807 */
12808 
12809 I32
Perl_start_subparse(pTHX_ I32 is_format,U32 flags)12810 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12811 {
12812     const I32 oldsavestack_ix = PL_savestack_ix;
12813     CV* const outsidecv = PL_compcv;
12814     bool is_method = flags & CVf_IsMETHOD;
12815 
12816     if (is_method)
12817         croak_kw_unless_class("method");
12818 
12819     SAVEI32(PL_subline);
12820     save_item(PL_subname);
12821     SAVESPTR(PL_compcv);
12822 
12823     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12824     CvFLAGS(PL_compcv) |= flags;
12825 
12826     PL_subline = CopLINE(PL_curcop);
12827     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12828     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12829     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12830     if (outsidecv && CvPADLIST(outsidecv))
12831         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12832     if (is_method)
12833         class_prepare_method_parse(PL_compcv);
12834 
12835     return oldsavestack_ix;
12836 }
12837 
12838 /* If o represents a builtin attribute, apply it to cv and returns true.
12839  * Otherwise does nothing and returns false
12840  */
12841 
12842 STATIC bool
S_apply_builtin_cv_attribute(pTHX_ CV * cv,OP * o)12843 S_apply_builtin_cv_attribute(pTHX_ CV *cv, OP *o)
12844 {
12845     assert(o->op_type == OP_CONST);
12846     SV *sv = cSVOPo_sv;
12847     STRLEN len = SvCUR(sv);
12848 
12849     /* NOTE: any CV attrs applied here need to be part of
12850        the CVf_BUILTIN_ATTRS define in cv.h! */
12851 
12852     if(memEQs(SvPVX(sv), len, "lvalue"))
12853         CvLVALUE_on(cv);
12854     else if(memEQs(SvPVX(sv), len, "method"))
12855         CvNOWARN_AMBIGUOUS_on(cv);
12856     else if(memEQs(SvPVX(sv), len, "const")) {
12857         CvANONCONST_on(cv);
12858         if (!CvANON(cv))
12859             yyerror(":const is not permitted on named subroutines");
12860     }
12861     else
12862         return false;
12863 
12864     return true;
12865 }
12866 
12867 /*
12868 =for apidoc apply_builtin_cv_attributes
12869 
12870 Given an OP_LIST containing attribute definitions, filter it for known builtin
12871 attributes to apply to the cv, returning a possibly-smaller list containing
12872 just the remaining ones.
12873 
12874 =cut
12875 */
12876 
12877 OP *
Perl_apply_builtin_cv_attributes(pTHX_ CV * cv,OP * attrlist)12878 Perl_apply_builtin_cv_attributes(pTHX_ CV *cv, OP *attrlist)
12879 {
12880     PERL_ARGS_ASSERT_APPLY_BUILTIN_CV_ATTRIBUTES;
12881 
12882     if(!attrlist)
12883         return attrlist;
12884 
12885     if(attrlist->op_type != OP_LIST) {
12886         /* Not in fact a list but just a single attribute */
12887         if(S_apply_builtin_cv_attribute(aTHX_ cv, attrlist)) {
12888             op_free(attrlist);
12889             return NULL;
12890         }
12891 
12892         return attrlist;
12893     }
12894 
12895     OP *prev = cLISTOPx(attrlist)->op_first;
12896     assert(prev->op_type == OP_PUSHMARK);
12897     OP *o = OpSIBLING(prev);
12898 
12899     OP *next;
12900     for(; o; o = next) {
12901         next = OpSIBLING(o);
12902 
12903         if(S_apply_builtin_cv_attribute(aTHX_ cv, o)) {
12904             op_sibling_splice(attrlist, prev, 1, NULL);
12905             op_free(o);
12906         }
12907         else {
12908             prev = o;
12909         }
12910     }
12911 
12912     if(OpHAS_SIBLING(cLISTOPx(attrlist)->op_first))
12913         return attrlist;
12914 
12915     /* The list is now entirely empty, we might as well discard it */
12916     op_free(attrlist);
12917     return NULL;
12918 }
12919 
12920 
12921 /* Do extra initialisation of a CV (typically one just created by
12922  * start_subparse()) if that CV is for a named sub
12923  */
12924 
12925 void
Perl_init_named_cv(pTHX_ CV * cv,OP * nameop)12926 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12927 {
12928     PERL_ARGS_ASSERT_INIT_NAMED_CV;
12929 
12930     if (nameop->op_type == OP_CONST) {
12931         const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12932         if (   strEQ(name, "BEGIN")
12933             || strEQ(name, "END")
12934             || strEQ(name, "INIT")
12935             || strEQ(name, "CHECK")
12936             || strEQ(name, "UNITCHECK")
12937         )
12938           CvSPECIAL_on(cv);
12939     }
12940     else
12941     /* State subs inside anonymous subs need to be
12942      clonable themselves. */
12943     if (   CvANON(CvOUTSIDE(cv))
12944         || CvCLONE(CvOUTSIDE(cv))
12945         || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12946                         CvOUTSIDE(cv)
12947                      ))[nameop->op_targ])
12948     )
12949       CvCLONE_on(cv);
12950 }
12951 
12952 
12953 static int
S_yywarn(pTHX_ const char * const s,U32 flags)12954 S_yywarn(pTHX_ const char *const s, U32 flags)
12955 {
12956     PERL_ARGS_ASSERT_YYWARN;
12957 
12958     PL_in_eval |= EVAL_WARNONLY;
12959     yyerror_pv(s, flags);
12960     return 0;
12961 }
12962 
12963 void
Perl_abort_execution(pTHX_ SV * msg_sv,const char * const name)12964 Perl_abort_execution(pTHX_ SV* msg_sv, const char * const name)
12965 {
12966     PERL_ARGS_ASSERT_ABORT_EXECUTION;
12967 
12968     if (msg_sv) {
12969         if (PL_minus_c)
12970             Perl_croak(aTHX_ "%" SVf "%s had compilation errors.\n", SVfARG(msg_sv), name);
12971         else {
12972             Perl_croak(aTHX_
12973                     "%" SVf "Execution of %s aborted due to compilation errors.\n", SVfARG(msg_sv), name);
12974         }
12975     } else {
12976         if (PL_minus_c)
12977             Perl_croak(aTHX_ "%s had compilation errors.\n", name);
12978         else {
12979             Perl_croak(aTHX_
12980                     "Execution of %s aborted due to compilation errors.\n", name);
12981         }
12982     }
12983 
12984     NOT_REACHED; /* NOTREACHED */
12985 }
12986 
12987 void
Perl_yyquit(pTHX)12988 Perl_yyquit(pTHX)
12989 {
12990     /* Called, after at least one error has been found, to abort the parse now,
12991      * instead of trying to forge ahead */
12992 
12993     yyerror_pvn(NULL, 0, 0);
12994 }
12995 
12996 int
Perl_yyerror(pTHX_ const char * const s)12997 Perl_yyerror(pTHX_ const char *const s)
12998 {
12999     PERL_ARGS_ASSERT_YYERROR;
13000     int r = yyerror_pvn(s, strlen(s), 0);
13001     return r;
13002 }
13003 
13004 int
Perl_yyerror_pv(pTHX_ const char * const s,U32 flags)13005 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
13006 {
13007     PERL_ARGS_ASSERT_YYERROR_PV;
13008     int r = yyerror_pvn(s, strlen(s), flags);
13009     return r;
13010 }
13011 
13012 int
Perl_yyerror_pvn(pTHX_ const char * const s,STRLEN len,U32 flags)13013 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
13014 {
13015     const char *context = NULL;
13016     int contlen = -1;
13017     SV *msg;
13018     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
13019     int yychar  = PL_parser->yychar;
13020 
13021     /* Output error message 's' with length 'len'.  'flags' are SV flags that
13022      * apply.  If the number of errors found is large enough, it abandons
13023      * parsing.  If 's' is NULL, there is no message, and it abandons
13024      * processing unconditionally */
13025 
13026     if (s != NULL) {
13027         if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp))
13028             sv_catpvs(where_sv, "at EOF");
13029         else if (   PL_oldoldbufptr
13030                  && PL_bufptr > PL_oldoldbufptr
13031                  && PL_bufptr - PL_oldoldbufptr < 200
13032                  && PL_oldoldbufptr != PL_oldbufptr
13033                  && PL_oldbufptr != PL_bufptr)
13034         {
13035             while (isSPACE(*PL_oldoldbufptr))
13036                 PL_oldoldbufptr++;
13037             context = PL_oldoldbufptr;
13038             contlen = PL_bufptr - PL_oldoldbufptr;
13039         }
13040         else if (  PL_oldbufptr
13041                 && PL_bufptr > PL_oldbufptr
13042                 && PL_bufptr - PL_oldbufptr < 200
13043                 && PL_oldbufptr != PL_bufptr)
13044         {
13045             while (isSPACE(*PL_oldbufptr))
13046                 PL_oldbufptr++;
13047             context = PL_oldbufptr;
13048             contlen = PL_bufptr - PL_oldbufptr;
13049         }
13050         else if (yychar > 255)
13051             sv_catpvs(where_sv, "next token ???");
13052         else if (yychar == YYEMPTY) {
13053             if (PL_lex_state == LEX_NORMAL)
13054                 sv_catpvs(where_sv, "at end of line");
13055             else if (PL_lex_inpat)
13056                 sv_catpvs(where_sv, "within pattern");
13057             else
13058                 sv_catpvs(where_sv, "within string");
13059         }
13060         else {
13061             sv_catpvs(where_sv, "next char ");
13062             if (yychar < 32)
13063                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13064             else if (isPRINT_LC(yychar)) {
13065                 const char string = yychar;
13066                 sv_catpvn(where_sv, &string, 1);
13067             }
13068             else
13069                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13070         }
13071         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
13072         Perl_sv_catpvf(aTHX_ msg, " at %s line %" LINE_Tf ", ",
13073             OutCopFILE(PL_curcop),
13074             (PL_parser->preambling == NOLINE
13075                    ? CopLINE(PL_curcop)
13076                    : PL_parser->preambling));
13077         if (context)
13078             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
13079                                  UTF8fARG(UTF, contlen, context));
13080         else
13081             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
13082         if (   PL_multi_start < PL_multi_end
13083             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
13084         {
13085             Perl_sv_catpvf(aTHX_ msg,
13086             "  (Might be a runaway multi-line %c%c string starting on"
13087             " line %" LINE_Tf ")\n",
13088                     (int)PL_multi_open,(int)PL_multi_close,(line_t)PL_multi_start);
13089             PL_multi_end = 0;
13090         }
13091         if (PL_in_eval & EVAL_WARNONLY) {
13092             PL_in_eval &= ~EVAL_WARNONLY;
13093             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
13094         }
13095         else {
13096             qerror(msg);
13097         }
13098     }
13099     /* if there was no message then this is a yyquit(), which is actualy handled
13100      * by qerror() with a NULL argument */
13101     if (s == NULL)
13102         qerror(NULL);
13103 
13104     PL_in_my = 0;
13105     PL_in_my_stash = NULL;
13106     return 0;
13107 }
13108 
13109 STATIC char*
S_swallow_bom(pTHX_ U8 * s)13110 S_swallow_bom(pTHX_ U8 *s)
13111 {
13112     const STRLEN slen = SvCUR(PL_linestr);
13113 
13114     PERL_ARGS_ASSERT_SWALLOW_BOM;
13115 
13116     switch (s[0]) {
13117     case 0xFF:
13118         if (s[1] == 0xFE) {
13119             /* UTF-16 little-endian? (or UTF-32LE?) */
13120             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13121                 /* diag_listed_as: Unsupported script encoding %s */
13122                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13123 #ifndef PERL_NO_UTF16_FILTER
13124 #ifdef DEBUGGING
13125             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13126 #endif
13127             s += 2;
13128             if (PL_bufend > (char*)s) {
13129                 s = add_utf16_textfilter(s, TRUE);
13130             }
13131 #else
13132             /* diag_listed_as: Unsupported script encoding %s */
13133             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13134 #endif
13135         }
13136         break;
13137     case 0xFE:
13138         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13139 #ifndef PERL_NO_UTF16_FILTER
13140 #ifdef DEBUGGING
13141             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13142 #endif
13143             s += 2;
13144             if (PL_bufend > (char *)s) {
13145                 s = add_utf16_textfilter(s, FALSE);
13146             }
13147 #else
13148             /* diag_listed_as: Unsupported script encoding %s */
13149             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13150 #endif
13151         }
13152         break;
13153     case BOM_UTF8_FIRST_BYTE: {
13154         if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
13155 #ifdef DEBUGGING
13156             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13157 #endif
13158             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
13159         }
13160         break;
13161     }
13162     case 0:
13163         if (slen > 3) {
13164              if (s[1] == 0) {
13165                   if (s[2] == 0xFE && s[3] == 0xFF) {
13166                        /* UTF-32 big-endian */
13167                        /* diag_listed_as: Unsupported script encoding %s */
13168                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13169                   }
13170              }
13171              else if (s[2] == 0 && s[3] != 0) {
13172                   /* Leading bytes
13173                    * 00 xx 00 xx
13174                    * are a good indicator of UTF-16BE. */
13175 #ifndef PERL_NO_UTF16_FILTER
13176 #ifdef DEBUGGING
13177                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13178 #endif
13179                   s = add_utf16_textfilter(s, FALSE);
13180 #else
13181                   /* diag_listed_as: Unsupported script encoding %s */
13182                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13183 #endif
13184              }
13185         }
13186         break;
13187 
13188     default:
13189          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13190                   /* Leading bytes
13191                    * xx 00 xx 00
13192                    * are a good indicator of UTF-16LE. */
13193 #ifndef PERL_NO_UTF16_FILTER
13194 #ifdef DEBUGGING
13195               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13196 #endif
13197               s = add_utf16_textfilter(s, TRUE);
13198 #else
13199               /* diag_listed_as: Unsupported script encoding %s */
13200               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13201 #endif
13202          }
13203     }
13204     return (char*)s;
13205 }
13206 
13207 
13208 #ifndef PERL_NO_UTF16_FILTER
13209 static I32
S_utf16_textfilter(pTHX_ int idx,SV * sv,int maxlen)13210 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13211 {
13212     SV *const filter = FILTER_DATA(idx);
13213     /* We re-use this each time round, throwing the contents away before we
13214        return.  */
13215     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13216     SV *const utf8_buffer = filter;
13217     IV status = IoPAGE(filter);
13218     const bool reverse = cBOOL(IoLINES(filter));
13219     I32 retval;
13220 
13221     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13222 
13223     /* As we're automatically added, at the lowest level, and hence only called
13224        from this file, we can be sure that we're not called in block mode. Hence
13225        don't bother writing code to deal with block mode.  */
13226     if (maxlen) {
13227         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13228     }
13229     if (status < 0) {
13230         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
13231     }
13232     DEBUG_P(PerlIO_printf(Perl_debug_log,
13233                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
13234                           FPTR2DPTR(void *, S_utf16_textfilter),
13235                           reverse ? 'l' : 'b', idx, maxlen, status,
13236                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13237 
13238     while (1) {
13239         STRLEN chars;
13240         STRLEN have;
13241         Size_t newlen;
13242         U8 *end;
13243         /* First, look in our buffer of existing UTF-8 data:  */
13244         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13245 
13246         if (nl) {
13247             ++nl;
13248         } else if (status == 0) {
13249             /* EOF */
13250             IoPAGE(filter) = 0;
13251             nl = SvEND(utf8_buffer);
13252         }
13253         if (nl) {
13254             STRLEN got = nl - SvPVX(utf8_buffer);
13255             /* Did we have anything to append?  */
13256             retval = got != 0;
13257             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13258             /* Everything else in this code works just fine if SVp_POK isn't
13259                set.  This, however, needs it, and we need it to work, else
13260                we loop infinitely because the buffer is never consumed.  */
13261             sv_chop(utf8_buffer, nl);
13262             break;
13263         }
13264 
13265         /* OK, not a complete line there, so need to read some more UTF-16.
13266            Read an extra octect if the buffer currently has an odd number. */
13267         while (1) {
13268             if (status <= 0)
13269                 break;
13270             if (SvCUR(utf16_buffer) >= 2) {
13271                 /* Location of the high octet of the last complete code point.
13272                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13273                    *coupled* with all the benefits of partial reads and
13274                    endianness.  */
13275                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13276                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13277 
13278                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13279                     break;
13280                 }
13281 
13282                 /* We have the first half of a surrogate. Read more.  */
13283                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13284             }
13285 
13286             status = FILTER_READ(idx + 1, utf16_buffer,
13287                                  160 + (SvCUR(utf16_buffer) & 1));
13288             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
13289             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13290             if (status < 0) {
13291                 /* Error */
13292                 IoPAGE(filter) = status;
13293                 return status;
13294             }
13295         }
13296 
13297         /* 'chars' isn't quite the right name, as code points above 0xFFFF
13298          * require 4 bytes per char */
13299         chars = SvCUR(utf16_buffer) >> 1;
13300         have = SvCUR(utf8_buffer);
13301 
13302         /* Assume the worst case size as noted by the functions: twice the
13303          * number of input bytes */
13304         SvGROW(utf8_buffer, have + chars * 4 + 1);
13305 
13306         if (reverse) {
13307             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13308                                          (U8*)SvPVX_const(utf8_buffer) + have,
13309                                          chars * 2, &newlen);
13310         } else {
13311             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13312                                 (U8*)SvPVX_const(utf8_buffer) + have,
13313                                 chars * 2, &newlen);
13314         }
13315         SvCUR_set(utf8_buffer, have + newlen);
13316         *end = '\0';
13317 
13318         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13319            it's private to us, and utf16_to_utf8{,reversed} take a
13320            (pointer,length) pair, rather than a NUL-terminated string.  */
13321         if(SvCUR(utf16_buffer) & 1) {
13322             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13323             SvCUR_set(utf16_buffer, 1);
13324         } else {
13325             SvCUR_set(utf16_buffer, 0);
13326         }
13327     }
13328     DEBUG_P(PerlIO_printf(Perl_debug_log,
13329                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
13330                           status,
13331                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13332     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13333     return retval;
13334 }
13335 
13336 static U8 *
S_add_utf16_textfilter(pTHX_ U8 * const s,bool reversed)13337 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13338 {
13339     SV *filter = filter_add(S_utf16_textfilter, NULL);
13340 
13341     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13342 
13343     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13344     SvPVCLEAR(filter);
13345     IoLINES(filter) = reversed;
13346     IoPAGE(filter) = 1; /* Not EOF */
13347 
13348     /* Sadly, we have to return a valid pointer, come what may, so we have to
13349        ignore any error return from this.  */
13350     SvCUR_set(PL_linestr, 0);
13351     if (FILTER_READ(0, PL_linestr, 0)) {
13352         SvUTF8_on(PL_linestr);
13353     } else {
13354         SvUTF8_on(PL_linestr);
13355     }
13356     PL_bufend = SvEND(PL_linestr);
13357     return (U8*)SvPVX(PL_linestr);
13358 }
13359 #endif
13360 
13361 /*
13362 =for apidoc scan_vstring
13363 
13364 Returns a pointer to the next character after the parsed
13365 vstring, as well as updating the passed in sv.
13366 
13367 Function must be called like
13368 
13369         sv = sv_2mortal(newSV(5));
13370         s = scan_vstring(s,e,sv);
13371 
13372 where s and e are the start and end of the string.
13373 The sv should already be large enough to store the vstring
13374 passed in, for performance reasons.
13375 
13376 This function may croak if fatal warnings are enabled in the
13377 calling scope, hence the sv_2mortal in the example (to prevent
13378 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
13379 sv_2mortal.
13380 
13381 =cut
13382 */
13383 
13384 char *
Perl_scan_vstring(pTHX_ const char * s,const char * const e,SV * sv)13385 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13386 {
13387     const char *pos = s;
13388     const char *start = s;
13389 
13390     PERL_ARGS_ASSERT_SCAN_VSTRING;
13391 
13392     if (*pos == 'v') pos++;  /* get past 'v' */
13393     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13394         pos++;
13395     if ( *pos != '.') {
13396         /* this may not be a v-string if followed by => */
13397         const char *next = pos;
13398         while (next < e && isSPACE(*next))
13399             ++next;
13400         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13401             /* return string not v-string */
13402             sv_setpvn(sv,(char *)s,pos-s);
13403             return (char *)pos;
13404         }
13405     }
13406 
13407     if (!isALPHA(*pos)) {
13408         U8 tmpbuf[UTF8_MAXBYTES+1];
13409 
13410         if (*s == 'v')
13411             s++;  /* get past 'v' */
13412 
13413         SvPVCLEAR(sv);
13414 
13415         for (;;) {
13416             /* this is atoi() that tolerates underscores */
13417             U8 *tmpend;
13418             UV rev = 0;
13419             const char *end = pos;
13420             UV mult = 1;
13421             while (--end >= s) {
13422                 if (*end != '_') {
13423                     const UV orev = rev;
13424                     rev += (*end - '0') * mult;
13425                     mult *= 10;
13426                     if (orev > rev)
13427                         /* diag_listed_as: Integer overflow in %s number */
13428                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13429                                          "Integer overflow in decimal number");
13430                 }
13431             }
13432 
13433             /* Append native character for the rev point */
13434             tmpend = uvchr_to_utf8(tmpbuf, rev);
13435             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13436             if (!UVCHR_IS_INVARIANT(rev))
13437                  SvUTF8_on(sv);
13438             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13439                  s = ++pos;
13440             else {
13441                  s = pos;
13442                  break;
13443             }
13444             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13445                  pos++;
13446         }
13447         SvPOK_on(sv);
13448         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13449         SvRMAGICAL_on(sv);
13450     }
13451     return (char *)s;
13452 }
13453 
13454 int
Perl_keyword_plugin_standard(pTHX_ char * keyword_ptr,STRLEN keyword_len,OP ** op_ptr)13455 Perl_keyword_plugin_standard(pTHX_
13456         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13457 {
13458     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13459     PERL_UNUSED_CONTEXT;
13460     PERL_UNUSED_ARG(keyword_ptr);
13461     PERL_UNUSED_ARG(keyword_len);
13462     PERL_UNUSED_ARG(op_ptr);
13463     return KEYWORD_PLUGIN_DECLINE;
13464 }
13465 
13466 STRLEN
Perl_infix_plugin_standard(pTHX_ char * operator_ptr,STRLEN operator_len,struct Perl_custom_infix ** def)13467 Perl_infix_plugin_standard(pTHX_
13468         char *operator_ptr, STRLEN operator_len, struct Perl_custom_infix **def)
13469 {
13470     PERL_ARGS_ASSERT_INFIX_PLUGIN_STANDARD;
13471     PERL_UNUSED_CONTEXT;
13472     PERL_UNUSED_ARG(operator_ptr);
13473     PERL_UNUSED_ARG(operator_len);
13474     PERL_UNUSED_ARG(def);
13475     return 0;
13476 }
13477 
13478 /*
13479 =for apidoc_section $lexer
13480 =for apidoc wrap_keyword_plugin
13481 
13482 Puts a C function into the chain of keyword plugins.  This is the
13483 preferred way to manipulate the L</PL_keyword_plugin> variable.
13484 C<new_plugin> is a pointer to the C function that is to be added to the
13485 keyword plugin chain, and C<old_plugin_p> points to the storage location
13486 where a pointer to the next function in the chain will be stored.  The
13487 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
13488 while the value previously stored there is written to C<*old_plugin_p>.
13489 
13490 L</PL_keyword_plugin> is global to an entire process, and a module wishing
13491 to hook keyword parsing may find itself invoked more than once per
13492 process, typically in different threads.  To handle that situation, this
13493 function is idempotent.  The location C<*old_plugin_p> must initially
13494 (once per process) contain a null pointer.  A C variable of static
13495 duration (declared at file scope, typically also marked C<static> to give
13496 it internal linkage) will be implicitly initialised appropriately, if it
13497 does not have an explicit initialiser.  This function will only actually
13498 modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
13499 function is also thread safe on the small scale.  It uses appropriate
13500 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
13501 
13502 When this function is called, the function referenced by C<new_plugin>
13503 must be ready to be called, except for C<*old_plugin_p> being unfilled.
13504 In a threading situation, C<new_plugin> may be called immediately, even
13505 before this function has returned.  C<*old_plugin_p> will always be
13506 appropriately set before C<new_plugin> is called.  If C<new_plugin>
13507 decides not to do anything special with the identifier that it is given
13508 (which is the usual case for most calls to a keyword plugin), it must
13509 chain the plugin function referenced by C<*old_plugin_p>.
13510 
13511 Taken all together, XS code to install a keyword plugin should typically
13512 look something like this:
13513 
13514     static Perl_keyword_plugin_t next_keyword_plugin;
13515     static OP *my_keyword_plugin(pTHX_
13516         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13517     {
13518         if (memEQs(keyword_ptr, keyword_len,
13519                    "my_new_keyword")) {
13520             ...
13521         } else {
13522             return next_keyword_plugin(aTHX_
13523                 keyword_ptr, keyword_len, op_ptr);
13524         }
13525     }
13526     BOOT:
13527         wrap_keyword_plugin(my_keyword_plugin,
13528                             &next_keyword_plugin);
13529 
13530 Direct access to L</PL_keyword_plugin> should be avoided.
13531 
13532 =cut
13533 */
13534 
13535 void
Perl_wrap_keyword_plugin(pTHX_ Perl_keyword_plugin_t new_plugin,Perl_keyword_plugin_t * old_plugin_p)13536 Perl_wrap_keyword_plugin(pTHX_
13537     Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
13538 {
13539 
13540     PERL_UNUSED_CONTEXT;
13541     PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
13542     if (*old_plugin_p) return;
13543     KEYWORD_PLUGIN_MUTEX_LOCK;
13544     if (!*old_plugin_p) {
13545         *old_plugin_p = PL_keyword_plugin;
13546         PL_keyword_plugin = new_plugin;
13547     }
13548     KEYWORD_PLUGIN_MUTEX_UNLOCK;
13549 }
13550 
13551 /*
13552 =for apidoc wrap_infix_plugin
13553 
13554 B<NOTE:> This API exists entirely for the purpose of making the CPAN module
13555 C<XS::Parse::Infix> work. It is not expected that additional modules will make
13556 use of it; rather, that they should use C<XS::Parse::Infix> to provide parsing
13557 of new infix operators.
13558 
13559 Puts a C function into the chain of infix plugins.  This is the preferred
13560 way to manipulate the L</PL_infix_plugin> variable.  C<new_plugin> is a
13561 pointer to the C function that is to be added to the infix plugin chain, and
13562 C<old_plugin_p> points to a storage location where a pointer to the next
13563 function in the chain will be stored.  The value of C<new_plugin> is written
13564 into the L</PL_infix_plugin> variable, while the value previously stored there
13565 is written to C<*old_plugin_p>.
13566 
13567 Direct access to L</PL_infix_plugin> should be avoided.
13568 
13569 =cut
13570 */
13571 
13572 void
Perl_wrap_infix_plugin(pTHX_ Perl_infix_plugin_t new_plugin,Perl_infix_plugin_t * old_plugin_p)13573 Perl_wrap_infix_plugin(pTHX_
13574     Perl_infix_plugin_t new_plugin, Perl_infix_plugin_t *old_plugin_p)
13575 {
13576 
13577     PERL_UNUSED_CONTEXT;
13578     PERL_ARGS_ASSERT_WRAP_INFIX_PLUGIN;
13579     if (*old_plugin_p) return;
13580     /* We use the same mutex as for PL_keyword_plugin as it's so rare either
13581      * of them is actually updated; no need for a dedicated one each */
13582     KEYWORD_PLUGIN_MUTEX_LOCK;
13583     if (!*old_plugin_p) {
13584         *old_plugin_p = PL_infix_plugin;
13585         PL_infix_plugin = new_plugin;
13586     }
13587     KEYWORD_PLUGIN_MUTEX_UNLOCK;
13588 }
13589 
13590 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
13591 static void
S_parse_recdescent(pTHX_ int gramtype,I32 fakeeof)13592 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
13593 {
13594     SAVEI32(PL_lex_brackets);
13595     if (PL_lex_brackets > 100)
13596         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
13597     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
13598     SAVEI32(PL_lex_allbrackets);
13599     PL_lex_allbrackets = 0;
13600     SAVEI8(PL_lex_fakeeof);
13601     PL_lex_fakeeof = (U8)fakeeof;
13602     if(yyparse(gramtype) && !PL_parser->error_count)
13603         qerror(Perl_mess(aTHX_ "Parse error"));
13604 }
13605 
13606 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
13607 static OP *
S_parse_recdescent_for_op(pTHX_ int gramtype,I32 fakeeof)13608 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
13609 {
13610     OP *o;
13611     ENTER;
13612     SAVEVPTR(PL_eval_root);
13613     PL_eval_root = NULL;
13614     parse_recdescent(gramtype, fakeeof);
13615     o = PL_eval_root;
13616     LEAVE;
13617     return o;
13618 }
13619 
13620 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
13621 static OP *
S_parse_expr(pTHX_ I32 fakeeof,U32 flags)13622 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
13623 {
13624     OP *exprop;
13625     if (flags & ~PARSE_OPTIONAL)
13626         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
13627     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
13628     if (!exprop && !(flags & PARSE_OPTIONAL)) {
13629         if (!PL_parser->error_count)
13630             qerror(Perl_mess(aTHX_ "Parse error"));
13631         exprop = newOP(OP_NULL, 0);
13632     }
13633     return exprop;
13634 }
13635 
13636 /*
13637 =for apidoc parse_arithexpr
13638 
13639 Parse a Perl arithmetic expression.  This may contain operators of precedence
13640 down to the bit shift operators.  The expression must be followed (and thus
13641 terminated) either by a comparison or lower-precedence operator or by
13642 something that would normally terminate an expression such as semicolon.
13643 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13644 otherwise it is mandatory.  It is up to the caller to ensure that the
13645 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13646 the source of the code to be parsed and the lexical context for the
13647 expression.
13648 
13649 The op tree representing the expression is returned.  If an optional
13650 expression is absent, a null pointer is returned, otherwise the pointer
13651 will be non-null.
13652 
13653 If an error occurs in parsing or compilation, in most cases a valid op
13654 tree is returned anyway.  The error is reflected in the parser state,
13655 normally resulting in a single exception at the top level of parsing
13656 which covers all the compilation errors that occurred.  Some compilation
13657 errors, however, will throw an exception immediately.
13658 
13659 =for apidoc Amnh||PARSE_OPTIONAL
13660 
13661 =cut
13662 
13663 */
13664 
13665 OP *
Perl_parse_arithexpr(pTHX_ U32 flags)13666 Perl_parse_arithexpr(pTHX_ U32 flags)
13667 {
13668     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
13669 }
13670 
13671 /*
13672 =for apidoc parse_termexpr
13673 
13674 Parse a Perl term expression.  This may contain operators of precedence
13675 down to the assignment operators.  The expression must be followed (and thus
13676 terminated) either by a comma or lower-precedence operator or by
13677 something that would normally terminate an expression such as semicolon.
13678 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13679 otherwise it is mandatory.  It is up to the caller to ensure that the
13680 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13681 the source of the code to be parsed and the lexical context for the
13682 expression.
13683 
13684 The op tree representing the expression is returned.  If an optional
13685 expression is absent, a null pointer is returned, otherwise the pointer
13686 will be non-null.
13687 
13688 If an error occurs in parsing or compilation, in most cases a valid op
13689 tree is returned anyway.  The error is reflected in the parser state,
13690 normally resulting in a single exception at the top level of parsing
13691 which covers all the compilation errors that occurred.  Some compilation
13692 errors, however, will throw an exception immediately.
13693 
13694 =cut
13695 */
13696 
13697 OP *
Perl_parse_termexpr(pTHX_ U32 flags)13698 Perl_parse_termexpr(pTHX_ U32 flags)
13699 {
13700     return parse_expr(LEX_FAKEEOF_COMMA, flags);
13701 }
13702 
13703 /*
13704 =for apidoc parse_listexpr
13705 
13706 Parse a Perl list expression.  This may contain operators of precedence
13707 down to the comma operator.  The expression must be followed (and thus
13708 terminated) either by a low-precedence logic operator such as C<or> or by
13709 something that would normally terminate an expression such as semicolon.
13710 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13711 otherwise it is mandatory.  It is up to the caller to ensure that the
13712 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13713 the source of the code to be parsed and the lexical context for the
13714 expression.
13715 
13716 The op tree representing the expression is returned.  If an optional
13717 expression is absent, a null pointer is returned, otherwise the pointer
13718 will be non-null.
13719 
13720 If an error occurs in parsing or compilation, in most cases a valid op
13721 tree is returned anyway.  The error is reflected in the parser state,
13722 normally resulting in a single exception at the top level of parsing
13723 which covers all the compilation errors that occurred.  Some compilation
13724 errors, however, will throw an exception immediately.
13725 
13726 =cut
13727 */
13728 
13729 OP *
Perl_parse_listexpr(pTHX_ U32 flags)13730 Perl_parse_listexpr(pTHX_ U32 flags)
13731 {
13732     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
13733 }
13734 
13735 /*
13736 =for apidoc parse_fullexpr
13737 
13738 Parse a single complete Perl expression.  This allows the full
13739 expression grammar, including the lowest-precedence operators such
13740 as C<or>.  The expression must be followed (and thus terminated) by a
13741 token that an expression would normally be terminated by: end-of-file,
13742 closing bracketing punctuation, semicolon, or one of the keywords that
13743 signals a postfix expression-statement modifier.  If C<flags> has the
13744 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
13745 mandatory.  It is up to the caller to ensure that the dynamic parser
13746 state (L</PL_parser> et al) is correctly set to reflect the source of
13747 the code to be parsed and the lexical context for the expression.
13748 
13749 The op tree representing the expression is returned.  If an optional
13750 expression is absent, a null pointer is returned, otherwise the pointer
13751 will be non-null.
13752 
13753 If an error occurs in parsing or compilation, in most cases a valid op
13754 tree is returned anyway.  The error is reflected in the parser state,
13755 normally resulting in a single exception at the top level of parsing
13756 which covers all the compilation errors that occurred.  Some compilation
13757 errors, however, will throw an exception immediately.
13758 
13759 =cut
13760 */
13761 
13762 OP *
Perl_parse_fullexpr(pTHX_ U32 flags)13763 Perl_parse_fullexpr(pTHX_ U32 flags)
13764 {
13765     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
13766 }
13767 
13768 /*
13769 =for apidoc parse_block
13770 
13771 Parse a single complete Perl code block.  This consists of an opening
13772 brace, a sequence of statements, and a closing brace.  The block
13773 constitutes a lexical scope, so C<my> variables and various compile-time
13774 effects can be contained within it.  It is up to the caller to ensure
13775 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13776 reflect the source of the code to be parsed and the lexical context for
13777 the statement.
13778 
13779 The op tree representing the code block is returned.  This is always a
13780 real op, never a null pointer.  It will normally be a C<lineseq> list,
13781 including C<nextstate> or equivalent ops.  No ops to construct any kind
13782 of runtime scope are included by virtue of it being a block.
13783 
13784 If an error occurs in parsing or compilation, in most cases a valid op
13785 tree (most likely null) is returned anyway.  The error is reflected in
13786 the parser state, normally resulting in a single exception at the top
13787 level of parsing which covers all the compilation errors that occurred.
13788 Some compilation errors, however, will throw an exception immediately.
13789 
13790 The C<flags> parameter is reserved for future use, and must always
13791 be zero.
13792 
13793 =cut
13794 */
13795 
13796 OP *
Perl_parse_block(pTHX_ U32 flags)13797 Perl_parse_block(pTHX_ U32 flags)
13798 {
13799     if (flags)
13800         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13801     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13802 }
13803 
13804 /*
13805 =for apidoc parse_barestmt
13806 
13807 Parse a single unadorned Perl statement.  This may be a normal imperative
13808 statement or a declaration that has compile-time effect.  It does not
13809 include any label or other affixture.  It is up to the caller to ensure
13810 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13811 reflect the source of the code to be parsed and the lexical context for
13812 the statement.
13813 
13814 The op tree representing the statement is returned.  This may be a
13815 null pointer if the statement is null, for example if it was actually
13816 a subroutine definition (which has compile-time side effects).  If not
13817 null, it will be ops directly implementing the statement, suitable to
13818 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
13819 equivalent op (except for those embedded in a scope contained entirely
13820 within the statement).
13821 
13822 If an error occurs in parsing or compilation, in most cases a valid op
13823 tree (most likely null) is returned anyway.  The error is reflected in
13824 the parser state, normally resulting in a single exception at the top
13825 level of parsing which covers all the compilation errors that occurred.
13826 Some compilation errors, however, will throw an exception immediately.
13827 
13828 The C<flags> parameter is reserved for future use, and must always
13829 be zero.
13830 
13831 =cut
13832 */
13833 
13834 OP *
Perl_parse_barestmt(pTHX_ U32 flags)13835 Perl_parse_barestmt(pTHX_ U32 flags)
13836 {
13837     if (flags)
13838         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13839     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13840 }
13841 
13842 /*
13843 =for apidoc parse_label
13844 
13845 Parse a single label, possibly optional, of the type that may prefix a
13846 Perl statement.  It is up to the caller to ensure that the dynamic parser
13847 state (L</PL_parser> et al) is correctly set to reflect the source of
13848 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13849 label is optional, otherwise it is mandatory.
13850 
13851 The name of the label is returned in the form of a fresh scalar.  If an
13852 optional label is absent, a null pointer is returned.
13853 
13854 If an error occurs in parsing, which can only occur if the label is
13855 mandatory, a valid label is returned anyway.  The error is reflected in
13856 the parser state, normally resulting in a single exception at the top
13857 level of parsing which covers all the compilation errors that occurred.
13858 
13859 =cut
13860 */
13861 
13862 SV *
Perl_parse_label(pTHX_ U32 flags)13863 Perl_parse_label(pTHX_ U32 flags)
13864 {
13865     if (flags & ~PARSE_OPTIONAL)
13866         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13867     if (PL_nexttoke) {
13868         PL_parser->yychar = yylex();
13869         if (PL_parser->yychar == LABEL) {
13870             SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13871             PL_parser->yychar = YYEMPTY;
13872             cSVOPx(pl_yylval.opval)->op_sv = NULL;
13873             op_free(pl_yylval.opval);
13874             return labelsv;
13875         } else {
13876             yyunlex();
13877             goto no_label;
13878         }
13879     } else {
13880         char *s, *t;
13881         STRLEN wlen, bufptr_pos;
13882         lex_read_space(0);
13883         t = s = PL_bufptr;
13884         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13885             goto no_label;
13886         t = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen, FALSE);
13887         if (word_takes_any_delimiter(s, wlen))
13888             goto no_label;
13889         bufptr_pos = s - SvPVX(PL_linestr);
13890         PL_bufptr = t;
13891         lex_read_space(LEX_KEEP_PREVIOUS);
13892         t = PL_bufptr;
13893         s = SvPVX(PL_linestr) + bufptr_pos;
13894         if (t[0] == ':' && t[1] != ':') {
13895             PL_oldoldbufptr = PL_oldbufptr;
13896             PL_oldbufptr = s;
13897             PL_bufptr = t+1;
13898             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13899         } else {
13900             PL_bufptr = s;
13901             no_label:
13902             if (flags & PARSE_OPTIONAL) {
13903                 return NULL;
13904             } else {
13905                 qerror(Perl_mess(aTHX_ "Parse error"));
13906                 return newSVpvs("x");
13907             }
13908         }
13909     }
13910 }
13911 
13912 /*
13913 =for apidoc parse_fullstmt
13914 
13915 Parse a single complete Perl statement.  This may be a normal imperative
13916 statement or a declaration that has compile-time effect, and may include
13917 optional labels.  It is up to the caller to ensure that the dynamic
13918 parser state (L</PL_parser> et al) is correctly set to reflect the source
13919 of the code to be parsed and the lexical context for the statement.
13920 
13921 The op tree representing the statement is returned.  This may be a
13922 null pointer if the statement is null, for example if it was actually
13923 a subroutine definition (which has compile-time side effects).  If not
13924 null, it will be the result of a L</newSTATEOP> call, normally including
13925 a C<nextstate> or equivalent op.
13926 
13927 If an error occurs in parsing or compilation, in most cases a valid op
13928 tree (most likely null) is returned anyway.  The error is reflected in
13929 the parser state, normally resulting in a single exception at the top
13930 level of parsing which covers all the compilation errors that occurred.
13931 Some compilation errors, however, will throw an exception immediately.
13932 
13933 The C<flags> parameter is reserved for future use, and must always
13934 be zero.
13935 
13936 =cut
13937 */
13938 
13939 OP *
Perl_parse_fullstmt(pTHX_ U32 flags)13940 Perl_parse_fullstmt(pTHX_ U32 flags)
13941 {
13942     if (flags)
13943         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13944     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13945 }
13946 
13947 /*
13948 =for apidoc parse_stmtseq
13949 
13950 Parse a sequence of zero or more Perl statements.  These may be normal
13951 imperative statements, including optional labels, or declarations
13952 that have compile-time effect, or any mixture thereof.  The statement
13953 sequence ends when a closing brace or end-of-file is encountered in a
13954 place where a new statement could have validly started.  It is up to
13955 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13956 is correctly set to reflect the source of the code to be parsed and the
13957 lexical context for the statements.
13958 
13959 The op tree representing the statement sequence is returned.  This may
13960 be a null pointer if the statements were all null, for example if there
13961 were no statements or if there were only subroutine definitions (which
13962 have compile-time side effects).  If not null, it will be a C<lineseq>
13963 list, normally including C<nextstate> or equivalent ops.
13964 
13965 If an error occurs in parsing or compilation, in most cases a valid op
13966 tree is returned anyway.  The error is reflected in the parser state,
13967 normally resulting in a single exception at the top level of parsing
13968 which covers all the compilation errors that occurred.  Some compilation
13969 errors, however, will throw an exception immediately.
13970 
13971 The C<flags> parameter is reserved for future use, and must always
13972 be zero.
13973 
13974 =cut
13975 */
13976 
13977 OP *
Perl_parse_stmtseq(pTHX_ U32 flags)13978 Perl_parse_stmtseq(pTHX_ U32 flags)
13979 {
13980     OP *stmtseqop;
13981     I32 c;
13982     if (flags)
13983         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13984     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13985     c = lex_peek_unichar(0);
13986     if (c != -1 && c != /*{*/'}')
13987         qerror(Perl_mess(aTHX_ "Parse error"));
13988     return stmtseqop;
13989 }
13990 
13991 /*
13992 =for apidoc parse_subsignature
13993 
13994 Parse a subroutine signature declaration. This is the contents of the
13995 parentheses following a named or anonymous subroutine declaration when the
13996 C<signatures> feature is enabled. Note that this function neither expects
13997 nor consumes the opening and closing parentheses around the signature; it
13998 is the caller's job to handle these.
13999 
14000 This function must only be called during parsing of a subroutine; after
14001 L</start_subparse> has been called. It might allocate lexical variables on
14002 the pad for the current subroutine.
14003 
14004 The op tree to unpack the arguments from the stack at runtime is returned.
14005 This op tree should appear at the beginning of the compiled function. The
14006 caller may wish to use L</op_append_list> to build their function body
14007 after it, or splice it together with the body before calling L</newATTRSUB>.
14008 
14009 The C<flags> parameter is reserved for future use, and must always
14010 be zero.
14011 
14012 =cut
14013 */
14014 
14015 OP *
Perl_parse_subsignature(pTHX_ U32 flags)14016 Perl_parse_subsignature(pTHX_ U32 flags)
14017 {
14018     if (flags)
14019         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
14020     return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
14021 }
14022 
14023 /*
14024  * ex: set ts=8 sts=4 sw=4 et:
14025  */
14026