xref: /openbsd/gnu/usr.bin/perl/toke.c (revision d415bd75)
1 /*    toke.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  *  'It all comes from here, the stench and the peril.'    --Frodo
13  *
14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15  */
16 
17 /*
18  * This file is the lexer for Perl.  It's closely linked to the
19  * parser, perly.y.
20  *
21  * The main routine is yylex(), which returns the next token.
22  */
23 
24 /*
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
27 
28 =for apidoc AmnU|yy_parser *|PL_parser
29 
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress.  The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
34 
35 =cut
36 */
37 
38 #include "EXTERN.h"
39 #define PERL_IN_TOKE_C
40 #include "perl.h"
41 #include "invlist_inline.h"
42 
43 #define new_constant(a,b,c,d,e,f,g, h)	\
44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
45 
46 #define pl_yylval	(PL_parser->yylval)
47 
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets		(PL_parser->lex_brackets)
50 #define PL_lex_allbrackets	(PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof		(PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack	(PL_parser->lex_brackstack)
53 #define PL_lex_casemods		(PL_parser->lex_casemods)
54 #define PL_lex_casestack        (PL_parser->lex_casestack)
55 #define PL_lex_dojoin		(PL_parser->lex_dojoin)
56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
57 #define PL_lex_inpat		(PL_parser->lex_inpat)
58 #define PL_lex_inwhat		(PL_parser->lex_inwhat)
59 #define PL_lex_op		(PL_parser->lex_op)
60 #define PL_lex_repl		(PL_parser->lex_repl)
61 #define PL_lex_starts		(PL_parser->lex_starts)
62 #define PL_lex_stuff		(PL_parser->lex_stuff)
63 #define PL_multi_start		(PL_parser->multi_start)
64 #define PL_multi_open		(PL_parser->multi_open)
65 #define PL_multi_close		(PL_parser->multi_close)
66 #define PL_preambled		(PL_parser->preambled)
67 #define PL_linestr		(PL_parser->linestr)
68 #define PL_expect		(PL_parser->expect)
69 #define PL_copline		(PL_parser->copline)
70 #define PL_bufptr		(PL_parser->bufptr)
71 #define PL_oldbufptr		(PL_parser->oldbufptr)
72 #define PL_oldoldbufptr		(PL_parser->oldoldbufptr)
73 #define PL_linestart		(PL_parser->linestart)
74 #define PL_bufend		(PL_parser->bufend)
75 #define PL_last_uni		(PL_parser->last_uni)
76 #define PL_last_lop		(PL_parser->last_lop)
77 #define PL_last_lop_op		(PL_parser->last_lop_op)
78 #define PL_lex_state		(PL_parser->lex_state)
79 #define PL_rsfp			(PL_parser->rsfp)
80 #define PL_rsfp_filters		(PL_parser->rsfp_filters)
81 #define PL_in_my		(PL_parser->in_my)
82 #define PL_in_my_stash		(PL_parser->in_my_stash)
83 #define PL_tokenbuf		(PL_parser->tokenbuf)
84 #define PL_multi_end		(PL_parser->multi_end)
85 #define PL_error_count		(PL_parser->error_count)
86 
87 #  define PL_nexttoke		(PL_parser->nexttoke)
88 #  define PL_nexttype		(PL_parser->nexttype)
89 #  define PL_nextval		(PL_parser->nextval)
90 
91 
92 #define SvEVALED(sv) \
93     (SvTYPE(sv) >= SVt_PVNV \
94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
95 
96 static const char ident_too_long[] = "Identifier too long";
97 static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
98 
99 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
100 
101 #define XENUMMASK  0x3f
102 #define XFAKEEOF   0x40
103 #define XFAKEBRACK 0x80
104 
105 #ifdef USE_UTF8_SCRIPTS
106 #   define UTF cBOOL(!IN_BYTES)
107 #else
108 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
109 #endif
110 
111 /* The maximum number of characters preceding the unrecognized one to display */
112 #define UNRECOGNIZED_PRECEDE_COUNT 10
113 
114 /* In variables named $^X, these are the legal values for X.
115  * 1999-02-27 mjd-perl-patch@plover.com */
116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
117 
118 #define SPACE_OR_TAB(c) isBLANK_A(c)
119 
120 #define HEXFP_PEEK(s)     \
121     (((s[0] == '.') && \
122       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
123      isALPHA_FOLD_EQ(s[0], 'p'))
124 
125 /* LEX_* are values for PL_lex_state, the state of the lexer.
126  * They are arranged oddly so that the guard on the switch statement
127  * can get by with a single comparison (if the compiler is smart enough).
128  *
129  * These values refer to the various states within a sublex parse,
130  * i.e. within a double quotish string
131  */
132 
133 /* #define LEX_NOTPARSING		11 is done in perl.h. */
134 
135 #define LEX_NORMAL		10 /* normal code (ie not within "...")     */
136 #define LEX_INTERPNORMAL	 9 /* code within a string, eg "$foo[$x+1]" */
137 #define LEX_INTERPCASEMOD	 8 /* expecting a \U, \Q or \E etc          */
138 #define LEX_INTERPPUSH		 7 /* starting a new sublex parse level     */
139 #define LEX_INTERPSTART		 6 /* expecting the start of a $var         */
140 
141                                    /* at end of code, eg "$x" followed by:  */
142 #define LEX_INTERPEND		 5 /* ... eg not one of [, { or ->          */
143 #define LEX_INTERPENDMAYBE	 4 /* ... eg one of [, { or ->              */
144 
145 #define LEX_INTERPCONCAT	 3 /* expecting anything, eg at start of
146                                         string or after \E, $foo, etc       */
147 #define LEX_INTERPCONST		 2 /* NOT USED */
148 #define LEX_FORMLINE		 1 /* expecting a format line               */
149 
150 /* returned to yyl_try() to request it to retry the parse loop, expected to only
151    be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
152    can also return it.
153 
154    yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
155    other token values are 258 or higher (see perly.h), so -1 should be
156    a safe value here.
157 */
158 #define YYL_RETRY (-1)
159 
160 #ifdef DEBUGGING
161 static const char* const lex_state_names[] = {
162     "KNOWNEXT",
163     "FORMLINE",
164     "INTERPCONST",
165     "INTERPCONCAT",
166     "INTERPENDMAYBE",
167     "INTERPEND",
168     "INTERPSTART",
169     "INTERPPUSH",
170     "INTERPCASEMOD",
171     "INTERPNORMAL",
172     "NORMAL"
173 };
174 #endif
175 
176 #include "keywords.h"
177 
178 /* CLINE is a macro that ensures PL_copline has a sane value */
179 
180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
181 
182 /*
183  * Convenience functions to return different tokens and prime the
184  * lexer for the next token.  They all take an argument.
185  *
186  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
187  * OPERATOR     : generic operator
188  * AOPERATOR    : assignment operator
189  * PREBLOCK     : beginning the block after an if, while, foreach, ...
190  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
191  * PREREF       : *EXPR where EXPR is not a simple identifier
192  * TERM         : expression term
193  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
194  * LOOPX        : loop exiting command (goto, last, dump, etc)
195  * FTST         : file test operator
196  * FUN0         : zero-argument function
197  * FUN0OP       : zero-argument function, with its op created in this file
198  * FUN1         : not used, except for not, which isn't a UNIOP
199  * BOop         : bitwise or or xor
200  * BAop         : bitwise and
201  * BCop         : bitwise complement
202  * SHop         : shift operator
203  * PWop         : power operator
204  * PMop         : pattern-matching operator
205  * Aop          : addition-level operator
206  * AopNOASSIGN  : addition-level operator that is never part of .=
207  * Mop          : multiplication-level operator
208  * ChEop        : chaining equality-testing operator
209  * NCEop        : non-chaining comparison operator at equality precedence
210  * ChRop        : chaining relational operator <= != gt
211  * NCRop        : non-chaining relational operator isa
212  *
213  * Also see LOP and lop() below.
214  */
215 
216 #ifdef DEBUGGING /* Serve -DT. */
217 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
218 #else
219 #   define REPORT(retval) (retval)
220 #endif
221 
222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
231                          pl_yylval.ival=f, \
232                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
233                          REPORT((int)LOOPEX))
234 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
235 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
236 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
237 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
238 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
239 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
241                        REPORT(PERLY_TILDE)
242 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
243 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
244 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
245 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
247 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
252 
253 /* This bit of chicanery makes a unary function followed by
254  * a parenthesis into a function with one argument, highest precedence.
255  * The UNIDOR macro is for unary functions that can be followed by the //
256  * operator (such as C<shift // 0>).
257  */
258 #define UNI3(f,x,have_x) { \
259         pl_yylval.ival = f; \
260         if (have_x) PL_expect = x; \
261         PL_bufptr = s; \
262         PL_last_uni = PL_oldbufptr; \
263         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
264         if (*s == '(') \
265             return REPORT( (int)FUNC1 ); \
266         s = skipspace(s); \
267         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
268         }
269 #define UNI(f)    UNI3(f,XTERM,1)
270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
271 #define UNIPROTO(f,optional) { \
272         if (optional) PL_last_uni = PL_oldbufptr; \
273         OPERATOR(f); \
274         }
275 
276 #define UNIBRACK(f) UNI3(f,0,0)
277 
278 /* return has special case parsing.
279  *
280  * List operators have low precedence. Functions have high precedence.
281  * Every built in, *except return*, if written with () around its arguments, is
282  * parsed as a function. Hence every other list built in:
283  *
284  * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
285  * 429
286  * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
287  * 639
288  * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
289  * Useless use of a constant (2) in void context at -e line 1.
290  * Useless use of a constant (4) in void context at -e line 1.
291  *
292  * $
293  *
294  * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
295  * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
296  *
297  * Whereas return:
298  *
299  * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
300  * 2
301  * 4
302  * 9
303  * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
304  * Useless use of a constant (2) in void context at -e line 1.
305  * Useless use of a constant (4) in void context at -e line 1.
306  * 9
307  * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
308  * Useless use of a constant (2) in void context at -e line 1.
309  * Useless use of a constant (4) in void context at -e line 1.
310  * 9
311  * $
312  *
313  * and:
314  * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
315  * 2
316  * 4
317  * 6
318  *
319  * This last example is what we expect, but it's clearly inconsistent with how
320  * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently
321  * followed.
322  *
323  *
324  * Perl 3 attempted to be consistent:
325  *
326  *   The rules are more consistent about where parens are needed and
327  *   where they are not.  In particular, unary operators and list operators now
328  *   behave like functions if they're called like functions.
329  *
330  * However, the behaviour for return was reverted to the "old" parsing with
331  * patches 9-12:
332  *
333  *   The construct
334  *   return (1,2,3);
335  *   did not do what was expected, since return was swallowing the
336  *   parens in order to consider itself a function.  The solution,
337  *   since return never wants any trailing expression such as
338  *   return (1,2,3) + 2;
339  *   is to simply make return an exception to the paren-makes-a-function
340  *   rule, and treat it the way it always was, so that it doesn't
341  *   strip the parens.
342  *
343  * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
344  * LOP(OP_RETURN, XTERM);
345  *
346  * and constructs such as
347  *
348  *     return (Internals::V())[2]
349  *
350  * turn into syntax errors
351  */
352 
353 #define OLDLOP(f) \
354         do { \
355             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
356                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
357             pl_yylval.ival = (f); \
358             PL_expect = XTERM; \
359             PL_bufptr = s; \
360             return (int)LSTOP; \
361         } while(0)
362 
363 #define COPLINE_INC_WITH_HERELINES		    \
364     STMT_START {				     \
365         CopLINE_inc(PL_curcop);			      \
366         if (PL_parser->herelines)		       \
367             CopLINE(PL_curcop) += PL_parser->herelines, \
368             PL_parser->herelines = 0;			 \
369     } STMT_END
370 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
371  * is no sublex_push to follow. */
372 #define COPLINE_SET_FROM_MULTI_END	      \
373     STMT_START {			       \
374         CopLINE_set(PL_curcop, PL_multi_end);	\
375         if (PL_multi_end != PL_multi_start)	 \
376             PL_parser->herelines = 0;		  \
377     } STMT_END
378 
379 
380 /* A file-local structure for passing around information about subroutines and
381  * related definable words */
382 struct code {
383     SV *sv;
384     CV *cv;
385     GV *gv, **gvp;
386     OP *rv2cv_op;
387     PADOFFSET off;
388     bool lex;
389 };
390 
391 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
392 
393 #ifdef DEBUGGING
394 
395 /* how to interpret the pl_yylval associated with the token */
396 enum token_type {
397     TOKENTYPE_NONE,
398     TOKENTYPE_IVAL,
399     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
400     TOKENTYPE_PVAL,
401     TOKENTYPE_OPVAL
402 };
403 
404 #define DEBUG_TOKEN(Type, Name)                                         \
405     { Name, TOKENTYPE_##Type, #Name }
406 
407 static struct debug_tokens {
408     const int token;
409     enum token_type type;
410     const char *name;
411 } const debug_tokens[] =
412 {
413     { ADDOP,		TOKENTYPE_OPNUM,	"ADDOP" },
414     { ANDAND,		TOKENTYPE_NONE,		"ANDAND" },
415     { ANDOP,		TOKENTYPE_NONE,		"ANDOP" },
416     { ANONSUB,		TOKENTYPE_IVAL,		"ANONSUB" },
417     { ANON_SIGSUB,	TOKENTYPE_IVAL,		"ANON_SIGSUB" },
418     { ARROW,		TOKENTYPE_NONE,		"ARROW" },
419     { ASSIGNOP,		TOKENTYPE_OPNUM,	"ASSIGNOP" },
420     { BITANDOP,		TOKENTYPE_OPNUM,	"BITANDOP" },
421     { BITOROP,		TOKENTYPE_OPNUM,	"BITOROP" },
422     { CATCH,		TOKENTYPE_IVAL,		"CATCH" },
423     { CHEQOP,		TOKENTYPE_OPNUM,	"CHEQOP" },
424     { CHRELOP,		TOKENTYPE_OPNUM,	"CHRELOP" },
425     { COLONATTR,	TOKENTYPE_NONE,		"COLONATTR" },
426     { CONTINUE,		TOKENTYPE_NONE,		"CONTINUE" },
427     { DEFAULT,		TOKENTYPE_NONE,		"DEFAULT" },
428     { DO,		TOKENTYPE_NONE,		"DO" },
429     { DOLSHARP,		TOKENTYPE_NONE,		"DOLSHARP" },
430     { DORDOR,		TOKENTYPE_NONE,		"DORDOR" },
431     { DOTDOT,		TOKENTYPE_IVAL,		"DOTDOT" },
432     { ELSE,		TOKENTYPE_NONE,		"ELSE" },
433     { ELSIF,		TOKENTYPE_IVAL,		"ELSIF" },
434     { FOR,		TOKENTYPE_IVAL,		"FOR" },
435     { FORMAT,		TOKENTYPE_NONE,		"FORMAT" },
436     { FORMLBRACK,	TOKENTYPE_NONE,		"FORMLBRACK" },
437     { FORMRBRACK,	TOKENTYPE_NONE,		"FORMRBRACK" },
438     { FUNC,		TOKENTYPE_OPNUM,	"FUNC" },
439     { FUNC0,		TOKENTYPE_OPNUM,	"FUNC0" },
440     { FUNC0OP,		TOKENTYPE_OPVAL,	"FUNC0OP" },
441     { FUNC0SUB,		TOKENTYPE_OPVAL,	"FUNC0SUB" },
442     { FUNC1,		TOKENTYPE_OPNUM,	"FUNC1" },
443     { FUNCMETH,		TOKENTYPE_OPVAL,	"FUNCMETH" },
444     { GIVEN,		TOKENTYPE_IVAL,		"GIVEN" },
445     { HASHBRACK,	TOKENTYPE_NONE,		"HASHBRACK" },
446     { IF,		TOKENTYPE_IVAL,		"IF" },
447     { LABEL,		TOKENTYPE_OPVAL,	"LABEL" },
448     { LOCAL,		TOKENTYPE_IVAL,		"LOCAL" },
449     { LOOPEX,		TOKENTYPE_OPNUM,	"LOOPEX" },
450     { LSTOP,		TOKENTYPE_OPNUM,	"LSTOP" },
451     { LSTOPSUB,		TOKENTYPE_OPVAL,	"LSTOPSUB" },
452     { MATCHOP,		TOKENTYPE_OPNUM,	"MATCHOP" },
453     { METHOD,		TOKENTYPE_OPVAL,	"METHOD" },
454     { MULOP,		TOKENTYPE_OPNUM,	"MULOP" },
455     { MY,		TOKENTYPE_IVAL,		"MY" },
456     { NCEQOP,		TOKENTYPE_OPNUM,	"NCEQOP" },
457     { NCRELOP,		TOKENTYPE_OPNUM,	"NCRELOP" },
458     { NOAMP,		TOKENTYPE_NONE,		"NOAMP" },
459     { NOTOP,		TOKENTYPE_NONE,		"NOTOP" },
460     { OROP,		TOKENTYPE_IVAL,		"OROP" },
461     { OROR,		TOKENTYPE_NONE,		"OROR" },
462     { PACKAGE,		TOKENTYPE_NONE,		"PACKAGE" },
463     DEBUG_TOKEN (IVAL, PERLY_AMPERSAND),
464     DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE),
465     DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN),
466     DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE),
467     DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN),
468     DEBUG_TOKEN (IVAL, PERLY_COLON),
469     DEBUG_TOKEN (IVAL, PERLY_COMMA),
470     DEBUG_TOKEN (IVAL, PERLY_DOT),
471     DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN),
472     DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK),
473     DEBUG_TOKEN (IVAL, PERLY_MINUS),
474     DEBUG_TOKEN (IVAL, PERLY_PAREN_OPEN),
475     DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN),
476     DEBUG_TOKEN (IVAL, PERLY_PLUS),
477     DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK),
478     DEBUG_TOKEN (IVAL, PERLY_SEMICOLON),
479     DEBUG_TOKEN (IVAL, PERLY_SLASH),
480     DEBUG_TOKEN (IVAL, PERLY_SNAIL),
481     DEBUG_TOKEN (IVAL, PERLY_STAR),
482     DEBUG_TOKEN (IVAL, PERLY_TILDE),
483     { PLUGEXPR,		TOKENTYPE_OPVAL,	"PLUGEXPR" },
484     { PLUGSTMT,		TOKENTYPE_OPVAL,	"PLUGSTMT" },
485     { PMFUNC,		TOKENTYPE_OPVAL,	"PMFUNC" },
486     { POSTJOIN,		TOKENTYPE_NONE,		"POSTJOIN" },
487     { POSTDEC,		TOKENTYPE_NONE,		"POSTDEC" },
488     { POSTINC,		TOKENTYPE_NONE,		"POSTINC" },
489     { POWOP,		TOKENTYPE_OPNUM,	"POWOP" },
490     { PREDEC,		TOKENTYPE_NONE,		"PREDEC" },
491     { PREINC,		TOKENTYPE_NONE,		"PREINC" },
492     { PRIVATEREF,	TOKENTYPE_OPVAL,	"PRIVATEREF" },
493     { QWLIST,		TOKENTYPE_OPVAL,	"QWLIST" },
494     { REFGEN,		TOKENTYPE_NONE,		"REFGEN" },
495     { REQUIRE,		TOKENTYPE_NONE,		"REQUIRE" },
496     { SHIFTOP,		TOKENTYPE_OPNUM,	"SHIFTOP" },
497     { SIGSUB,		TOKENTYPE_NONE,		"SIGSUB" },
498     { SUB,		TOKENTYPE_NONE,		"SUB" },
499     { SUBLEXEND,	TOKENTYPE_NONE,		"SUBLEXEND" },
500     { SUBLEXSTART,	TOKENTYPE_NONE,		"SUBLEXSTART" },
501     { THING,		TOKENTYPE_OPVAL,	"THING" },
502     { TRY,		TOKENTYPE_IVAL,		"TRY" },
503     { UMINUS,		TOKENTYPE_NONE,		"UMINUS" },
504     { UNIOP,		TOKENTYPE_OPNUM,	"UNIOP" },
505     { UNIOPSUB,		TOKENTYPE_OPVAL,	"UNIOPSUB" },
506     { UNLESS,		TOKENTYPE_IVAL,		"UNLESS" },
507     { UNTIL,		TOKENTYPE_IVAL,		"UNTIL" },
508     { USE,		TOKENTYPE_IVAL,		"USE" },
509     { WHEN,		TOKENTYPE_IVAL,		"WHEN" },
510     { WHILE,		TOKENTYPE_IVAL,		"WHILE" },
511     { BAREWORD,		TOKENTYPE_OPVAL,	"BAREWORD" },
512     { YADAYADA,		TOKENTYPE_IVAL,		"YADAYADA" },
513     { 0,		TOKENTYPE_NONE,		NULL }
514 };
515 
516 #undef DEBUG_TOKEN
517 
518 /* dump the returned token in rv, plus any optional arg in pl_yylval */
519 
520 STATIC int
521 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
522 {
523     PERL_ARGS_ASSERT_TOKEREPORT;
524 
525     if (DEBUG_T_TEST) {
526         const char *name = NULL;
527         enum token_type type = TOKENTYPE_NONE;
528         const struct debug_tokens *p;
529         SV* const report = newSVpvs("<== ");
530 
531         for (p = debug_tokens; p->token; p++) {
532             if (p->token == (int)rv) {
533                 name = p->name;
534                 type = p->type;
535                 break;
536             }
537         }
538         if (name)
539             Perl_sv_catpv(aTHX_ report, name);
540         else if (isGRAPH(rv))
541         {
542             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
543             if ((char)rv == 'p')
544                 sv_catpvs(report, " (pending identifier)");
545         }
546         else if (!rv)
547             sv_catpvs(report, "EOF");
548         else
549             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
550         switch (type) {
551         case TOKENTYPE_NONE:
552             break;
553         case TOKENTYPE_IVAL:
554             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
555             break;
556         case TOKENTYPE_OPNUM:
557             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
558                                     PL_op_name[lvalp->ival]);
559             break;
560         case TOKENTYPE_PVAL:
561             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
562             break;
563         case TOKENTYPE_OPVAL:
564             if (lvalp->opval) {
565                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
566                                     PL_op_name[lvalp->opval->op_type]);
567                 if (lvalp->opval->op_type == OP_CONST) {
568                     Perl_sv_catpvf(aTHX_ report, " %s",
569                         SvPEEK(cSVOPx_sv(lvalp->opval)));
570                 }
571 
572             }
573             else
574                 sv_catpvs(report, "(opval=null)");
575             break;
576         }
577         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
578     };
579     return (int)rv;
580 }
581 
582 
583 /* print the buffer with suitable escapes */
584 
585 STATIC void
586 S_printbuf(pTHX_ const char *const fmt, const char *const s)
587 {
588     SV* const tmp = newSVpvs("");
589 
590     PERL_ARGS_ASSERT_PRINTBUF;
591 
592     GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
593     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
594     GCC_DIAG_RESTORE_STMT;
595     SvREFCNT_dec(tmp);
596 }
597 
598 #endif
599 
600 /*
601  * S_ao
602  *
603  * This subroutine looks for an '=' next to the operator that has just been
604  * parsed and turns it into an ASSIGNOP if it finds one.
605  */
606 
607 STATIC int
608 S_ao(pTHX_ int toketype)
609 {
610     if (*PL_bufptr == '=') {
611         PL_bufptr++;
612 
613         switch (toketype) {
614             case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break;
615             case OROR:   pl_yylval.ival = OP_ORASSIGN;  break;
616             case DORDOR: pl_yylval.ival = OP_DORASSIGN; break;
617         }
618 
619         toketype = ASSIGNOP;
620     }
621     return REPORT(toketype);
622 }
623 
624 /*
625  * S_no_op
626  * When Perl expects an operator and finds something else, no_op
627  * prints the warning.  It always prints "<something> found where
628  * operator expected.  It prints "Missing semicolon on previous line?"
629  * if the surprise occurs at the start of the line.  "do you need to
630  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
631  * where the compiler doesn't know if foo is a method call or a function.
632  * It prints "Missing operator before end of line" if there's nothing
633  * after the missing operator, or "... before <...>" if there is something
634  * after the missing operator.
635  *
636  * PL_bufptr is expected to point to the start of the thing that was found,
637  * and s after the next token or partial token.
638  */
639 
640 STATIC void
641 S_no_op(pTHX_ const char *const what, char *s)
642 {
643     char * const oldbp = PL_bufptr;
644     const bool is_first = (PL_oldbufptr == PL_linestart);
645 
646     PERL_ARGS_ASSERT_NO_OP;
647 
648     if (!s)
649         s = oldbp;
650     else
651         PL_bufptr = s;
652     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
653     if (ckWARN_d(WARN_SYNTAX)) {
654         if (is_first)
655             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
656                     "\t(Missing semicolon on previous line?)\n");
657         else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
658                                                            PL_bufend,
659                                                            UTF))
660         {
661             const char *t;
662             for (t = PL_oldoldbufptr;
663                  (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
664                  t += UTF ? UTF8SKIP(t) : 1)
665             {
666                 NOOP;
667             }
668             if (t < PL_bufptr && isSPACE(*t))
669                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
670                         "\t(Do you need to predeclare %" UTF8f "?)\n",
671                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
672         }
673         else {
674             assert(s >= oldbp);
675             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
676                     "\t(Missing operator before %" UTF8f "?)\n",
677                      UTF8fARG(UTF, s - oldbp, oldbp));
678         }
679     }
680     PL_bufptr = oldbp;
681 }
682 
683 /*
684  * S_missingterm
685  * Complain about missing quote/regexp/heredoc terminator.
686  * If it's called with NULL then it cauterizes the line buffer.
687  * If we're in a delimited string and the delimiter is a control
688  * character, it's reformatted into a two-char sequence like ^C.
689  * This is fatal.
690  */
691 
692 STATIC void
693 S_missingterm(pTHX_ char *s, STRLEN len)
694 {
695     char tmpbuf[UTF8_MAXBYTES + 1];
696     char q;
697     bool uni = FALSE;
698     if (s) {
699         char * const nl = (char *) my_memrchr(s, '\n', len);
700         if (nl) {
701             *nl = '\0';
702             len = nl - s;
703         }
704         uni = UTF;
705     }
706     else if (PL_multi_close < 32) {
707         *tmpbuf = '^';
708         tmpbuf[1] = (char)toCTRL(PL_multi_close);
709         tmpbuf[2] = '\0';
710         s = tmpbuf;
711         len = 2;
712     }
713     else {
714         if (! UTF && LIKELY(PL_multi_close < 256)) {
715             *tmpbuf = (char)PL_multi_close;
716             tmpbuf[1] = '\0';
717             len = 1;
718         }
719         else {
720             char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
721             *end = '\0';
722             len = end - tmpbuf;
723             uni = TRUE;
724         }
725         s = tmpbuf;
726     }
727     q = memchr(s, '"', len) ? '\'' : '"';
728     Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c"
729                      " anywhere before EOF", q, UTF8fARG(uni, len, s), q);
730 }
731 
732 #include "feature.h"
733 
734 /*
735  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
736  * utf16-to-utf8-reversed.
737  */
738 
739 #ifdef PERL_CR_FILTER
740 static void
741 strip_return(SV *sv)
742 {
743     const char *s = SvPVX_const(sv);
744     const char * const e = s + SvCUR(sv);
745 
746     PERL_ARGS_ASSERT_STRIP_RETURN;
747 
748     /* outer loop optimized to do nothing if there are no CR-LFs */
749     while (s < e) {
750         if (*s++ == '\r' && *s == '\n') {
751             /* hit a CR-LF, need to copy the rest */
752             char *d = s - 1;
753             *d++ = *s++;
754             while (s < e) {
755                 if (*s == '\r' && s[1] == '\n')
756                     s++;
757                 *d++ = *s++;
758             }
759             SvCUR(sv) -= s - d;
760             return;
761         }
762     }
763 }
764 
765 STATIC I32
766 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
767 {
768     const I32 count = FILTER_READ(idx+1, sv, maxlen);
769     if (count > 0 && !maxlen)
770         strip_return(sv);
771     return count;
772 }
773 #endif
774 
775 /*
776 =for apidoc lex_start
777 
778 Creates and initialises a new lexer/parser state object, supplying
779 a context in which to lex and parse from a new source of Perl code.
780 A pointer to the new state object is placed in L</PL_parser>.  An entry
781 is made on the save stack so that upon unwinding, the new state object
782 will be destroyed and the former value of L</PL_parser> will be restored.
783 Nothing else need be done to clean up the parsing context.
784 
785 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
786 non-null, provides a string (in SV form) containing code to be parsed.
787 A copy of the string is made, so subsequent modification of C<line>
788 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
789 from which code will be read to be parsed.  If both are non-null, the
790 code in C<line> comes first and must consist of complete lines of input,
791 and C<rsfp> supplies the remainder of the source.
792 
793 The C<flags> parameter is reserved for future use.  Currently it is only
794 used by perl internally, so extensions should always pass zero.
795 
796 =cut
797 */
798 
799 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
800    can share filters with the current parser.
801    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
802    caller, hence isn't owned by the parser, so shouldn't be closed on parser
803    destruction. This is used to handle the case of defaulting to reading the
804    script from the standard input because no filename was given on the command
805    line (without getting confused by situation where STDIN has been closed, so
806    the script handle is opened on fd 0)  */
807 
808 void
809 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
810 {
811     const char *s = NULL;
812     yy_parser *parser, *oparser;
813 
814     if (flags && flags & ~LEX_START_FLAGS)
815         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
816 
817     /* create and initialise a parser */
818 
819     Newxz(parser, 1, yy_parser);
820     parser->old_parser = oparser = PL_parser;
821     PL_parser = parser;
822 
823     parser->stack = NULL;
824     parser->stack_max1 = NULL;
825     parser->ps = NULL;
826 
827     /* on scope exit, free this parser and restore any outer one */
828     SAVEPARSER(parser);
829     parser->saved_curcop = PL_curcop;
830 
831     /* initialise lexer state */
832 
833     parser->nexttoke = 0;
834     parser->error_count = oparser ? oparser->error_count : 0;
835     parser->copline = parser->preambling = NOLINE;
836     parser->lex_state = LEX_NORMAL;
837     parser->expect = XSTATE;
838     parser->rsfp = rsfp;
839     parser->recheck_utf8_validity = TRUE;
840     parser->rsfp_filters =
841       !(flags & LEX_START_SAME_FILTER) || !oparser
842         ? NULL
843         : MUTABLE_AV(SvREFCNT_inc(
844             oparser->rsfp_filters
845              ? oparser->rsfp_filters
846              : (oparser->rsfp_filters = newAV())
847           ));
848 
849     Newx(parser->lex_brackstack, 120, char);
850     Newx(parser->lex_casestack, 12, char);
851     *parser->lex_casestack = '\0';
852     Newxz(parser->lex_shared, 1, LEXSHARED);
853 
854     if (line) {
855         STRLEN len;
856         const U8* first_bad_char_loc;
857 
858         s = SvPV_const(line, len);
859 
860         if (   SvUTF8(line)
861             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
862                                              SvCUR(line),
863                                              &first_bad_char_loc)))
864         {
865             _force_out_malformed_utf8_message(first_bad_char_loc,
866                                               (U8 *) s + SvCUR(line),
867                                               0,
868                                               1 /* 1 means die */ );
869             NOT_REACHED; /* NOTREACHED */
870         }
871 
872         parser->linestr = flags & LEX_START_COPIED
873                             ? SvREFCNT_inc_simple_NN(line)
874                             : newSVpvn_flags(s, len, SvUTF8(line));
875         if (!rsfp)
876             sv_catpvs(parser->linestr, "\n;");
877     } else {
878         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
879     }
880 
881     parser->oldoldbufptr =
882         parser->oldbufptr =
883         parser->bufptr =
884         parser->linestart = SvPVX(parser->linestr);
885     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
886     parser->last_lop = parser->last_uni = NULL;
887 
888     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
889                                                         |LEX_DONT_CLOSE_RSFP));
890     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
891                                                         |LEX_DONT_CLOSE_RSFP));
892 
893     parser->in_pod = parser->filtered = 0;
894 }
895 
896 
897 /* delete a parser object */
898 
899 void
900 Perl_parser_free(pTHX_  const yy_parser *parser)
901 {
902     PERL_ARGS_ASSERT_PARSER_FREE;
903 
904     PL_curcop = parser->saved_curcop;
905     SvREFCNT_dec(parser->linestr);
906 
907     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
908         PerlIO_clearerr(parser->rsfp);
909     else if (parser->rsfp && (!parser->old_parser
910           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
911         PerlIO_close(parser->rsfp);
912     SvREFCNT_dec(parser->rsfp_filters);
913     SvREFCNT_dec(parser->lex_stuff);
914     SvREFCNT_dec(parser->lex_sub_repl);
915 
916     Safefree(parser->lex_brackstack);
917     Safefree(parser->lex_casestack);
918     Safefree(parser->lex_shared);
919     PL_parser = parser->old_parser;
920     Safefree(parser);
921 }
922 
923 void
924 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
925 {
926     I32 nexttoke = parser->nexttoke;
927     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
928     while (nexttoke--) {
929         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
930          && parser->nextval[nexttoke].opval
931          && parser->nextval[nexttoke].opval->op_slabbed
932          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
933             op_free(parser->nextval[nexttoke].opval);
934             parser->nextval[nexttoke].opval = NULL;
935         }
936     }
937 }
938 
939 
940 /*
941 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
942 
943 Buffer scalar containing the chunk currently under consideration of the
944 text currently being lexed.  This is always a plain string scalar (for
945 which C<SvPOK> is true).  It is not intended to be used as a scalar by
946 normal scalar means; instead refer to the buffer directly by the pointer
947 variables described below.
948 
949 The lexer maintains various C<char*> pointers to things in the
950 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
951 reallocated, all of these pointers must be updated.  Don't attempt to
952 do this manually, but rather use L</lex_grow_linestr> if you need to
953 reallocate the buffer.
954 
955 The content of the text chunk in the buffer is commonly exactly one
956 complete line of input, up to and including a newline terminator,
957 but there are situations where it is otherwise.  The octets of the
958 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
959 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
960 flag on this scalar, which may disagree with it.
961 
962 For direct examination of the buffer, the variable
963 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
964 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
965 of these pointers is usually preferable to examination of the scalar
966 through normal scalar means.
967 
968 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
969 
970 Direct pointer to the end of the chunk of text currently being lexed, the
971 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
972 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
973 always located at the end of the buffer, and does not count as part of
974 the buffer's contents.
975 
976 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
977 
978 Points to the current position of lexing inside the lexer buffer.
979 Characters around this point may be freely examined, within
980 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
981 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
982 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
983 
984 Lexing code (whether in the Perl core or not) moves this pointer past
985 the characters that it consumes.  It is also expected to perform some
986 bookkeeping whenever a newline character is consumed.  This movement
987 can be more conveniently performed by the function L</lex_read_to>,
988 which handles newlines appropriately.
989 
990 Interpretation of the buffer's octets can be abstracted out by
991 using the slightly higher-level functions L</lex_peek_unichar> and
992 L</lex_read_unichar>.
993 
994 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
995 
996 Points to the start of the current line inside the lexer buffer.
997 This is useful for indicating at which column an error occurred, and
998 not much else.  This must be updated by any lexing code that consumes
999 a newline; the function L</lex_read_to> handles this detail.
1000 
1001 =cut
1002 */
1003 
1004 /*
1005 =for apidoc lex_bufutf8
1006 
1007 Indicates whether the octets in the lexer buffer
1008 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1009 of Unicode characters.  If not, they should be interpreted as Latin-1
1010 characters.  This is analogous to the C<SvUTF8> flag for scalars.
1011 
1012 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1013 contains valid UTF-8.  Lexing code must be robust in the face of invalid
1014 encoding.
1015 
1016 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
1017 is significant, but not the whole story regarding the input character
1018 encoding.  Normally, when a file is being read, the scalar contains octets
1019 and its C<SvUTF8> flag is off, but the octets should be interpreted as
1020 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
1021 however, the scalar may have the C<SvUTF8> flag on, and in this case its
1022 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1023 is in effect.  This logic may change in the future; use this function
1024 instead of implementing the logic yourself.
1025 
1026 =cut
1027 */
1028 
1029 bool
1030 Perl_lex_bufutf8(pTHX)
1031 {
1032     return UTF;
1033 }
1034 
1035 /*
1036 =for apidoc lex_grow_linestr
1037 
1038 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
1039 at least C<len> octets (including terminating C<NUL>).  Returns a
1040 pointer to the reallocated buffer.  This is necessary before making
1041 any direct modification of the buffer that would increase its length.
1042 L</lex_stuff_pvn> provides a more convenient way to insert text into
1043 the buffer.
1044 
1045 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
1046 this function updates all of the lexer's variables that point directly
1047 into the buffer.
1048 
1049 =cut
1050 */
1051 
1052 char *
1053 Perl_lex_grow_linestr(pTHX_ STRLEN len)
1054 {
1055     SV *linestr;
1056     char *buf;
1057     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1058     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
1059     bool current;
1060 
1061     linestr = PL_parser->linestr;
1062     buf = SvPVX(linestr);
1063     if (len <= SvLEN(linestr))
1064         return buf;
1065 
1066     /* Is the lex_shared linestr SV the same as the current linestr SV?
1067      * Only in this case does re_eval_start need adjusting, since it
1068      * points within lex_shared->ls_linestr's buffer */
1069     current = (   !PL_parser->lex_shared->ls_linestr
1070                || linestr == PL_parser->lex_shared->ls_linestr);
1071 
1072     bufend_pos = PL_parser->bufend - buf;
1073     bufptr_pos = PL_parser->bufptr - buf;
1074     oldbufptr_pos = PL_parser->oldbufptr - buf;
1075     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1076     linestart_pos = PL_parser->linestart - buf;
1077     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1078     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1079     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
1080                             PL_parser->lex_shared->re_eval_start - buf : 0;
1081 
1082     buf = sv_grow(linestr, len);
1083 
1084     PL_parser->bufend = buf + bufend_pos;
1085     PL_parser->bufptr = buf + bufptr_pos;
1086     PL_parser->oldbufptr = buf + oldbufptr_pos;
1087     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1088     PL_parser->linestart = buf + linestart_pos;
1089     if (PL_parser->last_uni)
1090         PL_parser->last_uni = buf + last_uni_pos;
1091     if (PL_parser->last_lop)
1092         PL_parser->last_lop = buf + last_lop_pos;
1093     if (current && PL_parser->lex_shared->re_eval_start)
1094         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
1095     return buf;
1096 }
1097 
1098 /*
1099 =for apidoc lex_stuff_pvn
1100 
1101 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1102 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1103 reallocating the buffer if necessary.  This means that lexing code that
1104 runs later will see the characters as if they had appeared in the input.
1105 It is not recommended to do this as part of normal parsing, and most
1106 uses of this facility run the risk of the inserted characters being
1107 interpreted in an unintended manner.
1108 
1109 The string to be inserted is represented by C<len> octets starting
1110 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1111 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1112 The characters are recoded for the lexer buffer, according to how the
1113 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1114 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1115 function is more convenient.
1116 
1117 =for apidoc Amnh||LEX_STUFF_UTF8
1118 
1119 =cut
1120 */
1121 
1122 void
1123 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1124 {
1125     char *bufptr;
1126     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1127     if (flags & ~(LEX_STUFF_UTF8))
1128         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1129     if (UTF) {
1130         if (flags & LEX_STUFF_UTF8) {
1131             goto plain_copy;
1132         } else {
1133             STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1134                                                        (U8 *) pv + len);
1135             const char *p, *e = pv+len;;
1136             if (!highhalf)
1137                 goto plain_copy;
1138             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1139             bufptr = PL_parser->bufptr;
1140             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1141             SvCUR_set(PL_parser->linestr,
1142                 SvCUR(PL_parser->linestr) + len+highhalf);
1143             PL_parser->bufend += len+highhalf;
1144             for (p = pv; p != e; p++) {
1145                 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1146             }
1147         }
1148     } else {
1149         if (flags & LEX_STUFF_UTF8) {
1150             STRLEN highhalf = 0;
1151             const char *p, *e = pv+len;
1152             for (p = pv; p != e; p++) {
1153                 U8 c = (U8)*p;
1154                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1155                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1156                                 "non-Latin-1 character into Latin-1 input");
1157                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1158                     p++;
1159                     highhalf++;
1160                 } else assert(UTF8_IS_INVARIANT(c));
1161             }
1162             if (!highhalf)
1163                 goto plain_copy;
1164             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1165             bufptr = PL_parser->bufptr;
1166             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1167             SvCUR_set(PL_parser->linestr,
1168                 SvCUR(PL_parser->linestr) + len-highhalf);
1169             PL_parser->bufend += len-highhalf;
1170             p = pv;
1171             while (p < e) {
1172                 if (UTF8_IS_INVARIANT(*p)) {
1173                     *bufptr++ = *p;
1174                     p++;
1175                 }
1176                 else {
1177                     assert(p < e -1 );
1178                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1179                     p += 2;
1180                 }
1181             }
1182         } else {
1183           plain_copy:
1184             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1185             bufptr = PL_parser->bufptr;
1186             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1187             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1188             PL_parser->bufend += len;
1189             Copy(pv, bufptr, len, char);
1190         }
1191     }
1192 }
1193 
1194 /*
1195 =for apidoc lex_stuff_pv
1196 
1197 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1198 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1199 reallocating the buffer if necessary.  This means that lexing code that
1200 runs later will see the characters as if they had appeared in the input.
1201 It is not recommended to do this as part of normal parsing, and most
1202 uses of this facility run the risk of the inserted characters being
1203 interpreted in an unintended manner.
1204 
1205 The string to be inserted is represented by octets starting at C<pv>
1206 and continuing to the first nul.  These octets are interpreted as either
1207 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1208 in C<flags>.  The characters are recoded for the lexer buffer, according
1209 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1210 If it is not convenient to nul-terminate a string to be inserted, the
1211 L</lex_stuff_pvn> function is more appropriate.
1212 
1213 =cut
1214 */
1215 
1216 void
1217 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1218 {
1219     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1220     lex_stuff_pvn(pv, strlen(pv), flags);
1221 }
1222 
1223 /*
1224 =for apidoc lex_stuff_sv
1225 
1226 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1227 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1228 reallocating the buffer if necessary.  This means that lexing code that
1229 runs later will see the characters as if they had appeared in the input.
1230 It is not recommended to do this as part of normal parsing, and most
1231 uses of this facility run the risk of the inserted characters being
1232 interpreted in an unintended manner.
1233 
1234 The string to be inserted is the string value of C<sv>.  The characters
1235 are recoded for the lexer buffer, according to how the buffer is currently
1236 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1237 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1238 need to construct a scalar.
1239 
1240 =cut
1241 */
1242 
1243 void
1244 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1245 {
1246     char *pv;
1247     STRLEN len;
1248     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1249     if (flags)
1250         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1251     pv = SvPV(sv, len);
1252     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1253 }
1254 
1255 /*
1256 =for apidoc lex_unstuff
1257 
1258 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1259 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1260 This hides the discarded text from any lexing code that runs later,
1261 as if the text had never appeared.
1262 
1263 This is not the normal way to consume lexed text.  For that, use
1264 L</lex_read_to>.
1265 
1266 =cut
1267 */
1268 
1269 void
1270 Perl_lex_unstuff(pTHX_ char *ptr)
1271 {
1272     char *buf, *bufend;
1273     STRLEN unstuff_len;
1274     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1275     buf = PL_parser->bufptr;
1276     if (ptr < buf)
1277         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1278     if (ptr == buf)
1279         return;
1280     bufend = PL_parser->bufend;
1281     if (ptr > bufend)
1282         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1283     unstuff_len = ptr - buf;
1284     Move(ptr, buf, bufend+1-ptr, char);
1285     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1286     PL_parser->bufend = bufend - unstuff_len;
1287 }
1288 
1289 /*
1290 =for apidoc lex_read_to
1291 
1292 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1293 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1294 performing the correct bookkeeping whenever a newline character is passed.
1295 This is the normal way to consume lexed text.
1296 
1297 Interpretation of the buffer's octets can be abstracted out by
1298 using the slightly higher-level functions L</lex_peek_unichar> and
1299 L</lex_read_unichar>.
1300 
1301 =cut
1302 */
1303 
1304 void
1305 Perl_lex_read_to(pTHX_ char *ptr)
1306 {
1307     char *s;
1308     PERL_ARGS_ASSERT_LEX_READ_TO;
1309     s = PL_parser->bufptr;
1310     if (ptr < s || ptr > PL_parser->bufend)
1311         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1312     for (; s != ptr; s++)
1313         if (*s == '\n') {
1314             COPLINE_INC_WITH_HERELINES;
1315             PL_parser->linestart = s+1;
1316         }
1317     PL_parser->bufptr = ptr;
1318 }
1319 
1320 /*
1321 =for apidoc lex_discard_to
1322 
1323 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1324 up to C<ptr>.  The remaining content of the buffer will be moved, and
1325 all pointers into the buffer updated appropriately.  C<ptr> must not
1326 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1327 it is not permitted to discard text that has yet to be lexed.
1328 
1329 Normally it is not necessarily to do this directly, because it suffices to
1330 use the implicit discarding behaviour of L</lex_next_chunk> and things
1331 based on it.  However, if a token stretches across multiple lines,
1332 and the lexing code has kept multiple lines of text in the buffer for
1333 that purpose, then after completion of the token it would be wise to
1334 explicitly discard the now-unneeded earlier lines, to avoid future
1335 multi-line tokens growing the buffer without bound.
1336 
1337 =cut
1338 */
1339 
1340 void
1341 Perl_lex_discard_to(pTHX_ char *ptr)
1342 {
1343     char *buf;
1344     STRLEN discard_len;
1345     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1346     buf = SvPVX(PL_parser->linestr);
1347     if (ptr < buf)
1348         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1349     if (ptr == buf)
1350         return;
1351     if (ptr > PL_parser->bufptr)
1352         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1353     discard_len = ptr - buf;
1354     if (PL_parser->oldbufptr < ptr)
1355         PL_parser->oldbufptr = ptr;
1356     if (PL_parser->oldoldbufptr < ptr)
1357         PL_parser->oldoldbufptr = ptr;
1358     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1359         PL_parser->last_uni = NULL;
1360     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1361         PL_parser->last_lop = NULL;
1362     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1363     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1364     PL_parser->bufend -= discard_len;
1365     PL_parser->bufptr -= discard_len;
1366     PL_parser->oldbufptr -= discard_len;
1367     PL_parser->oldoldbufptr -= discard_len;
1368     if (PL_parser->last_uni)
1369         PL_parser->last_uni -= discard_len;
1370     if (PL_parser->last_lop)
1371         PL_parser->last_lop -= discard_len;
1372 }
1373 
1374 void
1375 Perl_notify_parser_that_changed_to_utf8(pTHX)
1376 {
1377     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1378      * off to on.  At compile time, this has the effect of entering a 'use
1379      * utf8' section.  This means that any input was not previously checked for
1380      * UTF-8 (because it was off), but now we do need to check it, or our
1381      * assumptions about the input being sane could be wrong, and we could
1382      * segfault.  This routine just sets a flag so that the next time we look
1383      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1384      * proper phase, there may not be a parser object, but if there is, setting
1385      * the flag is harmless */
1386 
1387     if (PL_parser) {
1388         PL_parser->recheck_utf8_validity = TRUE;
1389     }
1390 }
1391 
1392 /*
1393 =for apidoc lex_next_chunk
1394 
1395 Reads in the next chunk of text to be lexed, appending it to
1396 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1397 looked to the end of the current chunk and wants to know more.  It is
1398 usual, but not necessary, for lexing to have consumed the entirety of
1399 the current chunk at this time.
1400 
1401 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1402 chunk (i.e., the current chunk has been entirely consumed), normally the
1403 current chunk will be discarded at the same time that the new chunk is
1404 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1405 will not be discarded.  If the current chunk has not been entirely
1406 consumed, then it will not be discarded regardless of the flag.
1407 
1408 Returns true if some new text was added to the buffer, or false if the
1409 buffer has reached the end of the input text.
1410 
1411 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1412 
1413 =cut
1414 */
1415 
1416 #define LEX_FAKE_EOF 0x80000000
1417 #define LEX_NO_TERM  0x40000000 /* here-doc */
1418 
1419 bool
1420 Perl_lex_next_chunk(pTHX_ U32 flags)
1421 {
1422     SV *linestr;
1423     char *buf;
1424     STRLEN old_bufend_pos, new_bufend_pos;
1425     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1426     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1427     bool got_some_for_debugger = 0;
1428     bool got_some;
1429 
1430     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1431         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1432     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1433         return FALSE;
1434     linestr = PL_parser->linestr;
1435     buf = SvPVX(linestr);
1436     if (!(flags & LEX_KEEP_PREVIOUS)
1437           && PL_parser->bufptr == PL_parser->bufend)
1438     {
1439         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1440         linestart_pos = 0;
1441         if (PL_parser->last_uni != PL_parser->bufend)
1442             PL_parser->last_uni = NULL;
1443         if (PL_parser->last_lop != PL_parser->bufend)
1444             PL_parser->last_lop = NULL;
1445         last_uni_pos = last_lop_pos = 0;
1446         *buf = 0;
1447         SvCUR_set(linestr, 0);
1448     } else {
1449         old_bufend_pos = PL_parser->bufend - buf;
1450         bufptr_pos = PL_parser->bufptr - buf;
1451         oldbufptr_pos = PL_parser->oldbufptr - buf;
1452         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1453         linestart_pos = PL_parser->linestart - buf;
1454         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1455         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1456     }
1457     if (flags & LEX_FAKE_EOF) {
1458         goto eof;
1459     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1460         got_some = 0;
1461     } else if (filter_gets(linestr, old_bufend_pos)) {
1462         got_some = 1;
1463         got_some_for_debugger = 1;
1464     } else if (flags & LEX_NO_TERM) {
1465         got_some = 0;
1466     } else {
1467         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1468             SvPVCLEAR(linestr);
1469         eof:
1470         /* End of real input.  Close filehandle (unless it was STDIN),
1471          * then add implicit termination.
1472          */
1473         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1474             PerlIO_clearerr(PL_parser->rsfp);
1475         else if (PL_parser->rsfp)
1476             (void)PerlIO_close(PL_parser->rsfp);
1477         PL_parser->rsfp = NULL;
1478         PL_parser->in_pod = PL_parser->filtered = 0;
1479         if (!PL_in_eval && PL_minus_p) {
1480             sv_catpvs(linestr,
1481                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1482             PL_minus_n = PL_minus_p = 0;
1483         } else if (!PL_in_eval && PL_minus_n) {
1484             sv_catpvs(linestr, /*{*/";}");
1485             PL_minus_n = 0;
1486         } else
1487             sv_catpvs(linestr, ";");
1488         got_some = 1;
1489     }
1490     buf = SvPVX(linestr);
1491     new_bufend_pos = SvCUR(linestr);
1492     PL_parser->bufend = buf + new_bufend_pos;
1493     PL_parser->bufptr = buf + bufptr_pos;
1494 
1495     if (UTF) {
1496         const U8* first_bad_char_loc;
1497         if (UNLIKELY(! is_utf8_string_loc(
1498                             (U8 *) PL_parser->bufptr,
1499                                    PL_parser->bufend - PL_parser->bufptr,
1500                                    &first_bad_char_loc)))
1501         {
1502             _force_out_malformed_utf8_message(first_bad_char_loc,
1503                                               (U8 *) PL_parser->bufend,
1504                                               0,
1505                                               1 /* 1 means die */ );
1506             NOT_REACHED; /* NOTREACHED */
1507         }
1508     }
1509 
1510     PL_parser->oldbufptr = buf + oldbufptr_pos;
1511     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1512     PL_parser->linestart = buf + linestart_pos;
1513     if (PL_parser->last_uni)
1514         PL_parser->last_uni = buf + last_uni_pos;
1515     if (PL_parser->last_lop)
1516         PL_parser->last_lop = buf + last_lop_pos;
1517     if (PL_parser->preambling != NOLINE) {
1518         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1519         PL_parser->preambling = NOLINE;
1520     }
1521     if (   got_some_for_debugger
1522         && PERLDB_LINE_OR_SAVESRC
1523         && PL_curstash != PL_debstash)
1524     {
1525         /* debugger active and we're not compiling the debugger code,
1526          * so store the line into the debugger's array of lines
1527          */
1528         update_debugger_info(NULL, buf+old_bufend_pos,
1529             new_bufend_pos-old_bufend_pos);
1530     }
1531     return got_some;
1532 }
1533 
1534 /*
1535 =for apidoc lex_peek_unichar
1536 
1537 Looks ahead one (Unicode) character in the text currently being lexed.
1538 Returns the codepoint (unsigned integer value) of the next character,
1539 or -1 if lexing has reached the end of the input text.  To consume the
1540 peeked character, use L</lex_read_unichar>.
1541 
1542 If the next character is in (or extends into) the next chunk of input
1543 text, the next chunk will be read in.  Normally the current chunk will be
1544 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1545 bit set, then the current chunk will not be discarded.
1546 
1547 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1548 is encountered, an exception is generated.
1549 
1550 =cut
1551 */
1552 
1553 I32
1554 Perl_lex_peek_unichar(pTHX_ U32 flags)
1555 {
1556     char *s, *bufend;
1557     if (flags & ~(LEX_KEEP_PREVIOUS))
1558         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1559     s = PL_parser->bufptr;
1560     bufend = PL_parser->bufend;
1561     if (UTF) {
1562         U8 head;
1563         I32 unichar;
1564         STRLEN len, retlen;
1565         if (s == bufend) {
1566             if (!lex_next_chunk(flags))
1567                 return -1;
1568             s = PL_parser->bufptr;
1569             bufend = PL_parser->bufend;
1570         }
1571         head = (U8)*s;
1572         if (UTF8_IS_INVARIANT(head))
1573             return head;
1574         if (UTF8_IS_START(head)) {
1575             len = UTF8SKIP(&head);
1576             while ((STRLEN)(bufend-s) < len) {
1577                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1578                     break;
1579                 s = PL_parser->bufptr;
1580                 bufend = PL_parser->bufend;
1581             }
1582         }
1583         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1584         if (retlen == (STRLEN)-1) {
1585             _force_out_malformed_utf8_message((U8 *) s,
1586                                               (U8 *) bufend,
1587                                               0,
1588                                               1 /* 1 means die */ );
1589             NOT_REACHED; /* NOTREACHED */
1590         }
1591         return unichar;
1592     } else {
1593         if (s == bufend) {
1594             if (!lex_next_chunk(flags))
1595                 return -1;
1596             s = PL_parser->bufptr;
1597         }
1598         return (U8)*s;
1599     }
1600 }
1601 
1602 /*
1603 =for apidoc lex_read_unichar
1604 
1605 Reads the next (Unicode) character in the text currently being lexed.
1606 Returns the codepoint (unsigned integer value) of the character read,
1607 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1608 if lexing has reached the end of the input text.  To non-destructively
1609 examine the next character, use L</lex_peek_unichar> instead.
1610 
1611 If the next character is in (or extends into) the next chunk of input
1612 text, the next chunk will be read in.  Normally the current chunk will be
1613 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1614 bit set, then the current chunk will not be discarded.
1615 
1616 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1617 is encountered, an exception is generated.
1618 
1619 =cut
1620 */
1621 
1622 I32
1623 Perl_lex_read_unichar(pTHX_ U32 flags)
1624 {
1625     I32 c;
1626     if (flags & ~(LEX_KEEP_PREVIOUS))
1627         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1628     c = lex_peek_unichar(flags);
1629     if (c != -1) {
1630         if (c == '\n')
1631             COPLINE_INC_WITH_HERELINES;
1632         if (UTF)
1633             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1634         else
1635             ++(PL_parser->bufptr);
1636     }
1637     return c;
1638 }
1639 
1640 /*
1641 =for apidoc lex_read_space
1642 
1643 Reads optional spaces, in Perl style, in the text currently being
1644 lexed.  The spaces may include ordinary whitespace characters and
1645 Perl-style comments.  C<#line> directives are processed if encountered.
1646 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1647 at a non-space character (or the end of the input text).
1648 
1649 If spaces extend into the next chunk of input text, the next chunk will
1650 be read in.  Normally the current chunk will be discarded at the same
1651 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1652 chunk will not be discarded.
1653 
1654 =cut
1655 */
1656 
1657 #define LEX_NO_INCLINE    0x40000000
1658 #define LEX_NO_NEXT_CHUNK 0x80000000
1659 
1660 void
1661 Perl_lex_read_space(pTHX_ U32 flags)
1662 {
1663     char *s, *bufend;
1664     const bool can_incline = !(flags & LEX_NO_INCLINE);
1665     bool need_incline = 0;
1666     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1667         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1668     s = PL_parser->bufptr;
1669     bufend = PL_parser->bufend;
1670     while (1) {
1671         char c = *s;
1672         if (c == '#') {
1673             do {
1674                 c = *++s;
1675             } while (!(c == '\n' || (c == 0 && s == bufend)));
1676         } else if (c == '\n') {
1677             s++;
1678             if (can_incline) {
1679                 PL_parser->linestart = s;
1680                 if (s == bufend)
1681                     need_incline = 1;
1682                 else
1683                     incline(s, bufend);
1684             }
1685         } else if (isSPACE(c)) {
1686             s++;
1687         } else if (c == 0 && s == bufend) {
1688             bool got_more;
1689             line_t l;
1690             if (flags & LEX_NO_NEXT_CHUNK)
1691                 break;
1692             PL_parser->bufptr = s;
1693             l = CopLINE(PL_curcop);
1694             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1695             got_more = lex_next_chunk(flags);
1696             CopLINE_set(PL_curcop, l);
1697             s = PL_parser->bufptr;
1698             bufend = PL_parser->bufend;
1699             if (!got_more)
1700                 break;
1701             if (can_incline && need_incline && PL_parser->rsfp) {
1702                 incline(s, bufend);
1703                 need_incline = 0;
1704             }
1705         } else if (!c) {
1706             s++;
1707         } else {
1708             break;
1709         }
1710     }
1711     PL_parser->bufptr = s;
1712 }
1713 
1714 /*
1715 
1716 =for apidoc validate_proto
1717 
1718 This function performs syntax checking on a prototype, C<proto>.
1719 If C<warn> is true, any illegal characters or mismatched brackets
1720 will trigger illegalproto warnings, declaring that they were
1721 detected in the prototype for C<name>.
1722 
1723 The return value is C<true> if this is a valid prototype, and
1724 C<false> if it is not, regardless of whether C<warn> was C<true> or
1725 C<false>.
1726 
1727 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1728 
1729 =cut
1730 
1731  */
1732 
1733 bool
1734 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1735 {
1736     STRLEN len, origlen;
1737     char *p;
1738     bool bad_proto = FALSE;
1739     bool in_brackets = FALSE;
1740     bool after_slash = FALSE;
1741     char greedy_proto = ' ';
1742     bool proto_after_greedy_proto = FALSE;
1743     bool must_be_last = FALSE;
1744     bool underscore = FALSE;
1745     bool bad_proto_after_underscore = FALSE;
1746 
1747     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1748 
1749     if (!proto)
1750         return TRUE;
1751 
1752     p = SvPV(proto, len);
1753     origlen = len;
1754     for (; len--; p++) {
1755         if (!isSPACE(*p)) {
1756             if (must_be_last)
1757                 proto_after_greedy_proto = TRUE;
1758             if (underscore) {
1759                 if (!memCHRs(";@%", *p))
1760                     bad_proto_after_underscore = TRUE;
1761                 underscore = FALSE;
1762             }
1763             if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1764                 bad_proto = TRUE;
1765             }
1766             else {
1767                 if (*p == '[')
1768                     in_brackets = TRUE;
1769                 else if (*p == ']')
1770                     in_brackets = FALSE;
1771                 else if ((*p == '@' || *p == '%')
1772                          && !after_slash
1773                          && !in_brackets )
1774                 {
1775                     must_be_last = TRUE;
1776                     greedy_proto = *p;
1777                 }
1778                 else if (*p == '_')
1779                     underscore = TRUE;
1780             }
1781             if (*p == '\\')
1782                 after_slash = TRUE;
1783             else
1784                 after_slash = FALSE;
1785         }
1786     }
1787 
1788     if (warn) {
1789         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1790         p -= origlen;
1791         p = SvUTF8(proto)
1792             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1793                              origlen, UNI_DISPLAY_ISPRINT)
1794             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1795 
1796         if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1797             SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1798             sv_catpvs(name2, "::");
1799             sv_catsv(name2, (SV *)name);
1800             name = name2;
1801         }
1802 
1803         if (proto_after_greedy_proto)
1804             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1805                         "Prototype after '%c' for %" SVf " : %s",
1806                         greedy_proto, SVfARG(name), p);
1807         if (in_brackets)
1808             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1809                         "Missing ']' in prototype for %" SVf " : %s",
1810                         SVfARG(name), p);
1811         if (bad_proto)
1812             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1813                         "Illegal character in prototype for %" SVf " : %s",
1814                         SVfARG(name), p);
1815         if (bad_proto_after_underscore)
1816             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1817                         "Illegal character after '_' in prototype for %" SVf " : %s",
1818                         SVfARG(name), p);
1819     }
1820 
1821     return (! (proto_after_greedy_proto || bad_proto) );
1822 }
1823 
1824 /*
1825  * S_incline
1826  * This subroutine has nothing to do with tilting, whether at windmills
1827  * or pinball tables.  Its name is short for "increment line".  It
1828  * increments the current line number in CopLINE(PL_curcop) and checks
1829  * to see whether the line starts with a comment of the form
1830  *    # line 500 "foo.pm"
1831  * If so, it sets the current line number and file to the values in the comment.
1832  */
1833 
1834 STATIC void
1835 S_incline(pTHX_ const char *s, const char *end)
1836 {
1837     const char *t;
1838     const char *n;
1839     const char *e;
1840     line_t line_num;
1841     UV uv;
1842 
1843     PERL_ARGS_ASSERT_INCLINE;
1844 
1845     assert(end >= s);
1846 
1847     COPLINE_INC_WITH_HERELINES;
1848     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1849      && s+1 == PL_bufend && *s == ';') {
1850         /* fake newline in string eval */
1851         CopLINE_dec(PL_curcop);
1852         return;
1853     }
1854     if (*s++ != '#')
1855         return;
1856     while (SPACE_OR_TAB(*s))
1857         s++;
1858     if (memBEGINs(s, (STRLEN) (end - s), "line"))
1859         s += sizeof("line") - 1;
1860     else
1861         return;
1862     if (SPACE_OR_TAB(*s))
1863         s++;
1864     else
1865         return;
1866     while (SPACE_OR_TAB(*s))
1867         s++;
1868     if (!isDIGIT(*s))
1869         return;
1870 
1871     n = s;
1872     while (isDIGIT(*s))
1873         s++;
1874     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1875         return;
1876     while (SPACE_OR_TAB(*s))
1877         s++;
1878     if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1879         s++;
1880         e = t + 1;
1881     }
1882     else {
1883         t = s;
1884         while (*t && !isSPACE(*t))
1885             t++;
1886         e = t;
1887     }
1888     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1889         e++;
1890     if (*e != '\n' && *e != '\0')
1891         return;		/* false alarm */
1892 
1893     if (!grok_atoUV(n, &uv, &e))
1894         return;
1895     line_num = ((line_t)uv) - 1;
1896 
1897     if (t - s > 0) {
1898         const STRLEN len = t - s;
1899 
1900         if (!PL_rsfp && !PL_parser->filtered) {
1901             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1902              * to *{"::_<newfilename"} */
1903             /* However, the long form of evals is only turned on by the
1904                debugger - usually they're "(eval %lu)" */
1905             GV * const cfgv = CopFILEGV(PL_curcop);
1906             if (cfgv) {
1907                 char smallbuf[128];
1908                 STRLEN tmplen2 = len;
1909                 char *tmpbuf2;
1910                 GV *gv2;
1911 
1912                 if (tmplen2 + 2 <= sizeof smallbuf)
1913                     tmpbuf2 = smallbuf;
1914                 else
1915                     Newx(tmpbuf2, tmplen2 + 2, char);
1916 
1917                 tmpbuf2[0] = '_';
1918                 tmpbuf2[1] = '<';
1919 
1920                 memcpy(tmpbuf2 + 2, s, tmplen2);
1921                 tmplen2 += 2;
1922 
1923                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1924                 if (!isGV(gv2)) {
1925                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1926                     /* adjust ${"::_<newfilename"} to store the new file name */
1927                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1928                     /* The line number may differ. If that is the case,
1929                        alias the saved lines that are in the array.
1930                        Otherwise alias the whole array. */
1931                     if (CopLINE(PL_curcop) == line_num) {
1932                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1933                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1934                     }
1935                     else if (GvAV(cfgv)) {
1936                         AV * const av = GvAV(cfgv);
1937                         const line_t start = CopLINE(PL_curcop)+1;
1938                         SSize_t items = AvFILLp(av) - start;
1939                         if (items > 0) {
1940                             AV * const av2 = GvAVn(gv2);
1941                             SV **svp = AvARRAY(av) + start;
1942                             Size_t l = line_num+1;
1943                             while (items-- && l < SSize_t_MAX && l == (line_t)l)
1944                                 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1945                         }
1946                     }
1947                 }
1948 
1949                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1950             }
1951         }
1952         CopFILE_free(PL_curcop);
1953         CopFILE_setn(PL_curcop, s, len);
1954     }
1955     CopLINE_set(PL_curcop, line_num);
1956 }
1957 
1958 STATIC void
1959 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1960 {
1961     AV *av = CopFILEAVx(PL_curcop);
1962     if (av) {
1963         SV * sv;
1964         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1965         else {
1966             sv = *av_fetch(av, 0, 1);
1967             SvUPGRADE(sv, SVt_PVMG);
1968         }
1969         if (!SvPOK(sv)) SvPVCLEAR(sv);
1970         if (orig_sv)
1971             sv_catsv(sv, orig_sv);
1972         else
1973             sv_catpvn(sv, buf, len);
1974         if (!SvIOK(sv)) {
1975             (void)SvIOK_on(sv);
1976             SvIV_set(sv, 0);
1977         }
1978         if (PL_parser->preambling == NOLINE)
1979             av_store(av, CopLINE(PL_curcop), sv);
1980     }
1981 }
1982 
1983 /*
1984  * skipspace
1985  * Called to gobble the appropriate amount and type of whitespace.
1986  * Skips comments as well.
1987  * Returns the next character after the whitespace that is skipped.
1988  *
1989  * peekspace
1990  * Same thing, but look ahead without incrementing line numbers or
1991  * adjusting PL_linestart.
1992  */
1993 
1994 #define skipspace(s) skipspace_flags(s, 0)
1995 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1996 
1997 char *
1998 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
1999 {
2000     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
2001     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2002         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
2003             s++;
2004     } else {
2005         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
2006         PL_bufptr = s;
2007         lex_read_space(flags | LEX_KEEP_PREVIOUS |
2008                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
2009                     LEX_NO_NEXT_CHUNK : 0));
2010         s = PL_bufptr;
2011         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
2012         if (PL_linestart > PL_bufptr)
2013             PL_bufptr = PL_linestart;
2014         return s;
2015     }
2016     return s;
2017 }
2018 
2019 /*
2020  * S_check_uni
2021  * Check the unary operators to ensure there's no ambiguity in how they're
2022  * used.  An ambiguous piece of code would be:
2023  *     rand + 5
2024  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
2025  * the +5 is its argument.
2026  */
2027 
2028 STATIC void
2029 S_check_uni(pTHX)
2030 {
2031     const char *s;
2032 
2033     if (PL_oldoldbufptr != PL_last_uni)
2034         return;
2035     while (isSPACE(*PL_last_uni))
2036         PL_last_uni++;
2037     s = PL_last_uni;
2038     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2039         s += UTF ? UTF8SKIP(s) : 1;
2040     if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
2041         return;
2042 
2043     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2044                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
2045                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2046 }
2047 
2048 /*
2049  * LOP : macro to build a list operator.  Its behaviour has been replaced
2050  * with a subroutine, S_lop() for which LOP is just another name.
2051  */
2052 
2053 #define LOP(f,x) return lop(f,x,s)
2054 
2055 /*
2056  * S_lop
2057  * Build a list operator (or something that might be one).  The rules:
2058  *  - if we have a next token, then it's a list operator (no parens) for
2059  *    which the next token has already been parsed; e.g.,
2060  *       sort foo @args
2061  *       sort foo (@args)
2062  *  - if the next thing is an opening paren, then it's a function
2063  *  - else it's a list operator
2064  */
2065 
2066 STATIC I32
2067 S_lop(pTHX_ I32 f, U8 x, char *s)
2068 {
2069     PERL_ARGS_ASSERT_LOP;
2070 
2071     pl_yylval.ival = f;
2072     CLINE;
2073     PL_bufptr = s;
2074     PL_last_lop = PL_oldbufptr;
2075     PL_last_lop_op = (OPCODE)f;
2076     if (PL_nexttoke)
2077         goto lstop;
2078     PL_expect = x;
2079     if (*s == '(')
2080         return REPORT(FUNC);
2081     s = skipspace(s);
2082     if (*s == '(')
2083         return REPORT(FUNC);
2084     else {
2085         lstop:
2086         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2087             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2088         return REPORT(LSTOP);
2089     }
2090 }
2091 
2092 /*
2093  * S_force_next
2094  * When the lexer realizes it knows the next token (for instance,
2095  * it is reordering tokens for the parser) then it can call S_force_next
2096  * to know what token to return the next time the lexer is called.  Caller
2097  * will need to set PL_nextval[] and possibly PL_expect to ensure
2098  * the lexer handles the token correctly.
2099  */
2100 
2101 STATIC void
2102 S_force_next(pTHX_ I32 type)
2103 {
2104 #ifdef DEBUGGING
2105     if (DEBUG_T_TEST) {
2106         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2107         tokereport(type, &NEXTVAL_NEXTTOKE);
2108     }
2109 #endif
2110     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2111     PL_nexttype[PL_nexttoke] = type;
2112     PL_nexttoke++;
2113 }
2114 
2115 /*
2116  * S_postderef
2117  *
2118  * This subroutine handles postfix deref syntax after the arrow has already
2119  * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2120  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2121  * only the first, leaving yylex to find the next.
2122  */
2123 
2124 static int
2125 S_postderef(pTHX_ int const funny, char const next)
2126 {
2127     assert(funny == DOLSHARP
2128         || funny == PERLY_DOLLAR
2129         || funny == PERLY_SNAIL
2130         || funny == PERLY_PERCENT_SIGN
2131         || funny == PERLY_AMPERSAND
2132         || funny == PERLY_STAR
2133     );
2134     if (next == '*') {
2135         PL_expect = XOPERATOR;
2136         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2137             assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny);
2138             PL_lex_state = LEX_INTERPEND;
2139             if (PERLY_SNAIL == funny)
2140                 force_next(POSTJOIN);
2141         }
2142         force_next(PERLY_STAR);
2143         PL_bufptr+=2;
2144     }
2145     else {
2146         if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
2147          && !PL_lex_brackets)
2148             PL_lex_dojoin = 2;
2149         PL_expect = XOPERATOR;
2150         PL_bufptr++;
2151     }
2152     return funny;
2153 }
2154 
2155 void
2156 Perl_yyunlex(pTHX)
2157 {
2158     int yyc = PL_parser->yychar;
2159     if (yyc != YYEMPTY) {
2160         if (yyc) {
2161             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2162             if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
2163                 PL_lex_allbrackets--;
2164                 PL_lex_brackets--;
2165                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2166             } else if (yyc == PERLY_PAREN_OPEN) {
2167                 PL_lex_allbrackets--;
2168                 yyc |= (2<<24);
2169             }
2170             force_next(yyc);
2171         }
2172         PL_parser->yychar = YYEMPTY;
2173     }
2174 }
2175 
2176 STATIC SV *
2177 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2178 {
2179     SV * const sv = newSVpvn_utf8(start, len,
2180                     ! IN_BYTES
2181                   &&  UTF
2182                   &&  len != 0
2183                   &&  is_utf8_non_invariant_string((const U8*)start, len));
2184     return sv;
2185 }
2186 
2187 /*
2188  * S_force_word
2189  * When the lexer knows the next thing is a word (for instance, it has
2190  * just seen -> and it knows that the next char is a word char, then
2191  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2192  * lookahead.
2193  *
2194  * Arguments:
2195  *   char *start : buffer position (must be within PL_linestr)
2196  *   int token   : PL_next* will be this type of bare word
2197  *                 (e.g., METHOD,BAREWORD)
2198  *   int check_keyword : if true, Perl checks to make sure the word isn't
2199  *       a keyword (do this if the word is a label, e.g. goto FOO)
2200  *   int allow_pack : if true, : characters will also be allowed (require,
2201  *       use, etc. do this)
2202  */
2203 
2204 STATIC char *
2205 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2206 {
2207     char *s;
2208     STRLEN len;
2209 
2210     PERL_ARGS_ASSERT_FORCE_WORD;
2211 
2212     start = skipspace(start);
2213     s = start;
2214     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2215         || (allow_pack && *s == ':' && s[1] == ':') )
2216     {
2217         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2218         if (check_keyword) {
2219           char *s2 = PL_tokenbuf;
2220           STRLEN len2 = len;
2221           if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2222             s2 += sizeof("CORE::") - 1;
2223             len2 -= sizeof("CORE::") - 1;
2224           }
2225           if (keyword(s2, len2, 0))
2226             return start;
2227         }
2228         if (token == METHOD) {
2229             s = skipspace(s);
2230             if (*s == '(')
2231                 PL_expect = XTERM;
2232             else {
2233                 PL_expect = XOPERATOR;
2234             }
2235         }
2236         NEXTVAL_NEXTTOKE.opval
2237             = newSVOP(OP_CONST,0,
2238                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2239         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2240         force_next(token);
2241     }
2242     return s;
2243 }
2244 
2245 /*
2246  * S_force_ident
2247  * Called when the lexer wants $foo *foo &foo etc, but the program
2248  * text only contains the "foo" portion.  The first argument is a pointer
2249  * to the "foo", and the second argument is the type symbol to prefix.
2250  * Forces the next token to be a "BAREWORD".
2251  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2252  */
2253 
2254 STATIC void
2255 S_force_ident(pTHX_ const char *s, int kind)
2256 {
2257     PERL_ARGS_ASSERT_FORCE_IDENT;
2258 
2259     if (s[0]) {
2260         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2261         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2262                                                                 UTF ? SVf_UTF8 : 0));
2263         NEXTVAL_NEXTTOKE.opval = o;
2264         force_next(BAREWORD);
2265         if (kind) {
2266             o->op_private = OPpCONST_ENTERED;
2267             /* XXX see note in pp_entereval() for why we forgo typo
2268                warnings if the symbol must be introduced in an eval.
2269                GSAR 96-10-12 */
2270             gv_fetchpvn_flags(s, len,
2271                               (PL_in_eval ? GV_ADDMULTI
2272                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2273                               kind == PERLY_DOLLAR ? SVt_PV :
2274                               kind == PERLY_SNAIL ? SVt_PVAV :
2275                               kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
2276                               SVt_PVGV
2277                               );
2278         }
2279     }
2280 }
2281 
2282 static void
2283 S_force_ident_maybe_lex(pTHX_ char pit)
2284 {
2285     NEXTVAL_NEXTTOKE.ival = pit;
2286     force_next('p');
2287 }
2288 
2289 NV
2290 Perl_str_to_version(pTHX_ SV *sv)
2291 {
2292     NV retval = 0.0;
2293     NV nshift = 1.0;
2294     STRLEN len;
2295     const char *start = SvPV_const(sv,len);
2296     const char * const end = start + len;
2297     const bool utf = cBOOL(SvUTF8(sv));
2298 
2299     PERL_ARGS_ASSERT_STR_TO_VERSION;
2300 
2301     while (start < end) {
2302         STRLEN skip;
2303         UV n;
2304         if (utf)
2305             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2306         else {
2307             n = *(U8*)start;
2308             skip = 1;
2309         }
2310         retval += ((NV)n)/nshift;
2311         start += skip;
2312         nshift *= 1000;
2313     }
2314     return retval;
2315 }
2316 
2317 /*
2318  * S_force_version
2319  * Forces the next token to be a version number.
2320  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2321  * and if "guessing" is TRUE, then no new token is created (and the caller
2322  * must use an alternative parsing method).
2323  */
2324 
2325 STATIC char *
2326 S_force_version(pTHX_ char *s, int guessing)
2327 {
2328     OP *version = NULL;
2329     char *d;
2330 
2331     PERL_ARGS_ASSERT_FORCE_VERSION;
2332 
2333     s = skipspace(s);
2334 
2335     d = s;
2336     if (*d == 'v')
2337         d++;
2338     if (isDIGIT(*d)) {
2339         while (isDIGIT(*d) || *d == '_' || *d == '.')
2340             d++;
2341         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2342             SV *ver;
2343             s = scan_num(s, &pl_yylval);
2344             version = pl_yylval.opval;
2345             ver = cSVOPx(version)->op_sv;
2346             if (SvPOK(ver) && !SvNIOK(ver)) {
2347                 SvUPGRADE(ver, SVt_PVNV);
2348                 SvNV_set(ver, str_to_version(ver));
2349                 SvNOK_on(ver);		/* hint that it is a version */
2350             }
2351         }
2352         else if (guessing) {
2353             return s;
2354         }
2355     }
2356 
2357     /* NOTE: The parser sees the package name and the VERSION swapped */
2358     NEXTVAL_NEXTTOKE.opval = version;
2359     force_next(BAREWORD);
2360 
2361     return s;
2362 }
2363 
2364 /*
2365  * S_force_strict_version
2366  * Forces the next token to be a version number using strict syntax rules.
2367  */
2368 
2369 STATIC char *
2370 S_force_strict_version(pTHX_ char *s)
2371 {
2372     OP *version = NULL;
2373     const char *errstr = NULL;
2374 
2375     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2376 
2377     while (isSPACE(*s)) /* leading whitespace */
2378         s++;
2379 
2380     if (is_STRICT_VERSION(s,&errstr)) {
2381         SV *ver = newSV_type(SVt_NULL);
2382         s = (char *)scan_version(s, ver, 0);
2383         version = newSVOP(OP_CONST, 0, ver);
2384     }
2385     else if ((*s != ';' && *s != '{' && *s != '}' )
2386              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2387     {
2388         PL_bufptr = s;
2389         if (errstr)
2390             yyerror(errstr); /* version required */
2391         return s;
2392     }
2393 
2394     /* NOTE: The parser sees the package name and the VERSION swapped */
2395     NEXTVAL_NEXTTOKE.opval = version;
2396     force_next(BAREWORD);
2397 
2398     return s;
2399 }
2400 
2401 /*
2402  * S_tokeq
2403  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2404  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2405  * unchanged, and a new SV containing the modified input is returned.
2406  */
2407 
2408 STATIC SV *
2409 S_tokeq(pTHX_ SV *sv)
2410 {
2411     char *s;
2412     char *send;
2413     char *d;
2414     SV *pv = sv;
2415 
2416     PERL_ARGS_ASSERT_TOKEQ;
2417 
2418     assert (SvPOK(sv));
2419     assert (SvLEN(sv));
2420     assert (!SvIsCOW(sv));
2421     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2422         goto finish;
2423     s = SvPVX(sv);
2424     send = SvEND(sv);
2425     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2426     while (s < send && !(*s == '\\' && s[1] == '\\'))
2427         s++;
2428     if (s == send)
2429         goto finish;
2430     d = s;
2431     if ( PL_hints & HINT_NEW_STRING ) {
2432         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2433                             SVs_TEMP | SvUTF8(sv));
2434     }
2435     while (s < send) {
2436         if (*s == '\\') {
2437             if (s + 1 < send && (s[1] == '\\'))
2438                 s++;		/* all that, just for this */
2439         }
2440         *d++ = *s++;
2441     }
2442     *d = '\0';
2443     SvCUR_set(sv, d - SvPVX_const(sv));
2444   finish:
2445     if ( PL_hints & HINT_NEW_STRING )
2446        return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2447     return sv;
2448 }
2449 
2450 /*
2451  * Now come three functions related to double-quote context,
2452  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2453  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2454  * interact with PL_lex_state, and create fake ( ... ) argument lists
2455  * to handle functions and concatenation.
2456  * For example,
2457  *   "foo\lbar"
2458  * is tokenised as
2459  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2460  */
2461 
2462 /*
2463  * S_sublex_start
2464  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2465  *
2466  * Pattern matching will set PL_lex_op to the pattern-matching op to
2467  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2468  *
2469  * OP_CONST is easy--just make the new op and return.
2470  *
2471  * Everything else becomes a FUNC.
2472  *
2473  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2474  * had an OP_CONST.  This just sets us up for a
2475  * call to S_sublex_push().
2476  */
2477 
2478 STATIC I32
2479 S_sublex_start(pTHX)
2480 {
2481     const I32 op_type = pl_yylval.ival;
2482 
2483     if (op_type == OP_NULL) {
2484         pl_yylval.opval = PL_lex_op;
2485         PL_lex_op = NULL;
2486         return THING;
2487     }
2488     if (op_type == OP_CONST) {
2489         SV *sv = PL_lex_stuff;
2490         PL_lex_stuff = NULL;
2491         sv = tokeq(sv);
2492 
2493         if (SvTYPE(sv) == SVt_PVIV) {
2494             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2495             STRLEN len;
2496             const char * const p = SvPV_const(sv, len);
2497             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2498             SvREFCNT_dec(sv);
2499             sv = nsv;
2500         }
2501         pl_yylval.opval = newSVOP(op_type, 0, sv);
2502         return THING;
2503     }
2504 
2505     PL_parser->lex_super_state = PL_lex_state;
2506     PL_parser->lex_sub_inwhat = (U16)op_type;
2507     PL_parser->lex_sub_op = PL_lex_op;
2508     PL_parser->sub_no_recover = FALSE;
2509     PL_parser->sub_error_count = PL_error_count;
2510     PL_lex_state = LEX_INTERPPUSH;
2511 
2512     PL_expect = XTERM;
2513     if (PL_lex_op) {
2514         pl_yylval.opval = PL_lex_op;
2515         PL_lex_op = NULL;
2516         return PMFUNC;
2517     }
2518     else
2519         return FUNC;
2520 }
2521 
2522 /*
2523  * S_sublex_push
2524  * Create a new scope to save the lexing state.  The scope will be
2525  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2526  * to the uc, lc, etc. found before.
2527  * Sets PL_lex_state to LEX_INTERPCONCAT.
2528  */
2529 
2530 STATIC I32
2531 S_sublex_push(pTHX)
2532 {
2533     LEXSHARED *shared;
2534     const bool is_heredoc = PL_multi_close == '<';
2535     ENTER;
2536 
2537     PL_lex_state = PL_parser->lex_super_state;
2538     SAVEI8(PL_lex_dojoin);
2539     SAVEI32(PL_lex_brackets);
2540     SAVEI32(PL_lex_allbrackets);
2541     SAVEI32(PL_lex_formbrack);
2542     SAVEI8(PL_lex_fakeeof);
2543     SAVEI32(PL_lex_casemods);
2544     SAVEI32(PL_lex_starts);
2545     SAVEI8(PL_lex_state);
2546     SAVESPTR(PL_lex_repl);
2547     SAVEVPTR(PL_lex_inpat);
2548     SAVEI16(PL_lex_inwhat);
2549     if (is_heredoc)
2550     {
2551         SAVECOPLINE(PL_curcop);
2552         SAVEI32(PL_multi_end);
2553         SAVEI32(PL_parser->herelines);
2554         PL_parser->herelines = 0;
2555     }
2556     SAVEIV(PL_multi_close);
2557     SAVEPPTR(PL_bufptr);
2558     SAVEPPTR(PL_bufend);
2559     SAVEPPTR(PL_oldbufptr);
2560     SAVEPPTR(PL_oldoldbufptr);
2561     SAVEPPTR(PL_last_lop);
2562     SAVEPPTR(PL_last_uni);
2563     SAVEPPTR(PL_linestart);
2564     SAVESPTR(PL_linestr);
2565     SAVEGENERICPV(PL_lex_brackstack);
2566     SAVEGENERICPV(PL_lex_casestack);
2567     SAVEGENERICPV(PL_parser->lex_shared);
2568     SAVEBOOL(PL_parser->lex_re_reparsing);
2569     SAVEI32(PL_copline);
2570 
2571     /* The here-doc parser needs to be able to peek into outer lexing
2572        scopes to find the body of the here-doc.  So we put PL_linestr and
2573        PL_bufptr into lex_shared, to 'share' those values.
2574      */
2575     PL_parser->lex_shared->ls_linestr = PL_linestr;
2576     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2577 
2578     PL_linestr = PL_lex_stuff;
2579     PL_lex_repl = PL_parser->lex_sub_repl;
2580     PL_lex_stuff = NULL;
2581     PL_parser->lex_sub_repl = NULL;
2582 
2583     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2584        set for an inner quote-like operator and then an error causes scope-
2585        popping.  We must not have a PL_lex_stuff value left dangling, as
2586        that breaks assumptions elsewhere.  See bug #123617.  */
2587     SAVEGENERICSV(PL_lex_stuff);
2588     SAVEGENERICSV(PL_parser->lex_sub_repl);
2589 
2590     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2591         = SvPVX(PL_linestr);
2592     PL_bufend += SvCUR(PL_linestr);
2593     PL_last_lop = PL_last_uni = NULL;
2594     SAVEFREESV(PL_linestr);
2595     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2596 
2597     PL_lex_dojoin = FALSE;
2598     PL_lex_brackets = PL_lex_formbrack = 0;
2599     PL_lex_allbrackets = 0;
2600     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2601     Newx(PL_lex_brackstack, 120, char);
2602     Newx(PL_lex_casestack, 12, char);
2603     PL_lex_casemods = 0;
2604     *PL_lex_casestack = '\0';
2605     PL_lex_starts = 0;
2606     PL_lex_state = LEX_INTERPCONCAT;
2607     if (is_heredoc)
2608         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2609     PL_copline = NOLINE;
2610 
2611     Newxz(shared, 1, LEXSHARED);
2612     shared->ls_prev = PL_parser->lex_shared;
2613     PL_parser->lex_shared = shared;
2614 
2615     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2616     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2617     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2618         PL_lex_inpat = PL_parser->lex_sub_op;
2619     else
2620         PL_lex_inpat = NULL;
2621 
2622     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2623     PL_in_eval &= ~EVAL_RE_REPARSING;
2624 
2625     return SUBLEXSTART;
2626 }
2627 
2628 /*
2629  * S_sublex_done
2630  * Restores lexer state after a S_sublex_push.
2631  */
2632 
2633 STATIC I32
2634 S_sublex_done(pTHX)
2635 {
2636     if (!PL_lex_starts++) {
2637         SV * const sv = newSVpvs("");
2638         if (SvUTF8(PL_linestr))
2639             SvUTF8_on(sv);
2640         PL_expect = XOPERATOR;
2641         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2642         return THING;
2643     }
2644 
2645     if (PL_lex_casemods) {		/* oops, we've got some unbalanced parens */
2646         PL_lex_state = LEX_INTERPCASEMOD;
2647         return yylex();
2648     }
2649 
2650     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2651     assert(PL_lex_inwhat != OP_TRANSR);
2652     if (PL_lex_repl) {
2653         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2654         PL_linestr = PL_lex_repl;
2655         PL_lex_inpat = 0;
2656         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2657         PL_bufend += SvCUR(PL_linestr);
2658         PL_last_lop = PL_last_uni = NULL;
2659         PL_lex_dojoin = FALSE;
2660         PL_lex_brackets = 0;
2661         PL_lex_allbrackets = 0;
2662         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2663         PL_lex_casemods = 0;
2664         *PL_lex_casestack = '\0';
2665         PL_lex_starts = 0;
2666         if (SvEVALED(PL_lex_repl)) {
2667             PL_lex_state = LEX_INTERPNORMAL;
2668             PL_lex_starts++;
2669             /*	we don't clear PL_lex_repl here, so that we can check later
2670                 whether this is an evalled subst; that means we rely on the
2671                 logic to ensure sublex_done() is called again only via the
2672                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2673         }
2674         else {
2675             PL_lex_state = LEX_INTERPCONCAT;
2676             PL_lex_repl = NULL;
2677         }
2678         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2679             CopLINE(PL_curcop) +=
2680                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2681                  + PL_parser->herelines;
2682             PL_parser->herelines = 0;
2683         }
2684         return PERLY_SLASH;
2685     }
2686     else {
2687         const line_t l = CopLINE(PL_curcop);
2688         LEAVE;
2689         if (PL_parser->sub_error_count != PL_error_count) {
2690             if (PL_parser->sub_no_recover) {
2691                 yyquit();
2692                 NOT_REACHED;
2693             }
2694         }
2695         if (PL_multi_close == '<')
2696             PL_parser->herelines += l - PL_multi_end;
2697         PL_bufend = SvPVX(PL_linestr);
2698         PL_bufend += SvCUR(PL_linestr);
2699         PL_expect = XOPERATOR;
2700         return SUBLEXEND;
2701     }
2702 }
2703 
2704 HV *
2705 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2706                           const STRLEN context_len, const char ** error_msg)
2707 {
2708     /* Load the official _charnames module if not already there.  The
2709      * parameters are just to give info for any error messages generated:
2710      *  char_name   a name to look up which is the reason for loading this
2711      *  context     'char_name' in the context in the input in which it appears
2712      *  context_len how many bytes 'context' occupies
2713      *  error_msg   *error_msg will be set to any error
2714      *
2715      *  Returns the ^H table if success; otherwise NULL */
2716 
2717     unsigned int i;
2718     HV * table;
2719     SV **cvp;
2720     SV * res;
2721 
2722     PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2723 
2724     /* This loop is executed 1 1/2 times.  On the first time through, if it
2725      * isn't already loaded, try loading it, and iterate just once to see if it
2726      * worked.  */
2727     for (i = 0; i < 2; i++) {
2728         table = GvHV(PL_hintgv);		 /* ^H */
2729 
2730         if (    table
2731             && (PL_hints & HINT_LOCALIZE_HH)
2732             && (cvp = hv_fetchs(table, "charnames", FALSE))
2733             &&  SvOK(*cvp))
2734         {
2735             return table;   /* Quit if already loaded */
2736         }
2737 
2738         if (i == 0) {
2739             Perl_load_module(aTHX_
2740                 0,
2741                 newSVpvs("_charnames"),
2742 
2743                 /* version parameter; no need to specify it, as if we get too early
2744                 * a version, will fail anyway, not being able to find 'charnames'
2745                 * */
2746                 NULL,
2747                 newSVpvs(":full"),
2748                 newSVpvs(":short"),
2749                 NULL);
2750         }
2751     }
2752 
2753     /* Here, it failed; new_constant will give appropriate error messages */
2754     *error_msg = NULL;
2755     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2756                         context, context_len, error_msg);
2757     SvREFCNT_dec(res);
2758 
2759     return NULL;
2760 }
2761 
2762 STATIC SV*
2763 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2764 {
2765     /* This justs wraps get_and_check_backslash_N_name() to output any error
2766      * message it returns. */
2767 
2768     const char * error_msg = NULL;
2769     SV * result;
2770 
2771     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2772 
2773     /* charnames doesn't work well if there have been errors found */
2774     if (PL_error_count > 0) {
2775         return NULL;
2776     }
2777 
2778     result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2779 
2780     if (error_msg) {
2781         yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2782     }
2783 
2784     return result;
2785 }
2786 
2787 SV*
2788 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2789                                           const char* e,
2790                                           const bool is_utf8,
2791                                           const char ** error_msg)
2792 {
2793     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2794      * interior, hence to the "}".  Finds what the name resolves to, returning
2795      * an SV* containing it; NULL if no valid one found.
2796      *
2797      * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2798      * doesn't have to be. */
2799 
2800     SV* char_name;
2801     SV* res;
2802     HV * table;
2803     SV **cvp;
2804     SV *cv;
2805     SV *rv;
2806     HV *stash;
2807 
2808     /* Points to the beginning of the \N{... so that any messages include the
2809      * context of what's failing*/
2810     const char* context = s - 3;
2811     STRLEN context_len = e - context + 1; /* include all of \N{...} */
2812 
2813 
2814     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2815 
2816     assert(e >= s);
2817     assert(s > (char *) 3);
2818 
2819     while (s < e && isBLANK(*s)) {
2820         s++;
2821     }
2822 
2823     while (s < e && isBLANK(*(e - 1))) {
2824         e--;
2825     }
2826 
2827     char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2828 
2829     if (!SvCUR(char_name)) {
2830         SvREFCNT_dec_NN(char_name);
2831         /* diag_listed_as: Unknown charname '%s' */
2832         *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2833         return NULL;
2834     }
2835 
2836     /* Autoload the charnames module */
2837 
2838     table = load_charnames(char_name, context, context_len, error_msg);
2839     if (table == NULL) {
2840         return NULL;
2841     }
2842 
2843     *error_msg = NULL;
2844     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2845                         context, context_len, error_msg);
2846     if (*error_msg) {
2847         *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2848 
2849         SvREFCNT_dec(res);
2850         return NULL;
2851     }
2852 
2853     /* See if the charnames handler is the Perl core's, and if so, we can skip
2854      * the validation needed for a user-supplied one, as Perl's does its own
2855      * validation. */
2856     cvp = hv_fetchs(table, "charnames", FALSE);
2857     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2858         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2859     {
2860         const char * const name = HvNAME(stash);
2861          if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2862            return res;
2863        }
2864     }
2865 
2866     /* Here, it isn't Perl's charname handler.  We can't rely on a
2867      * user-supplied handler to validate the input name.  For non-ut8 input,
2868      * look to see that the first character is legal.  Then loop through the
2869      * rest checking that each is a continuation */
2870 
2871     /* This code makes the reasonable assumption that the only Latin1-range
2872      * characters that begin a character name alias are alphabetic, otherwise
2873      * would have to create a isCHARNAME_BEGIN macro */
2874 
2875     if (! is_utf8) {
2876         if (! isALPHAU(*s)) {
2877             goto bad_charname;
2878         }
2879         s++;
2880         while (s < e) {
2881             if (! isCHARNAME_CONT(*s)) {
2882                 goto bad_charname;
2883             }
2884             if (*s == ' ' && *(s-1) == ' ') {
2885                 goto multi_spaces;
2886             }
2887             s++;
2888         }
2889     }
2890     else {
2891         /* Similarly for utf8.  For invariants can check directly; for other
2892          * Latin1, can calculate their code point and check; otherwise  use an
2893          * inversion list */
2894         if (UTF8_IS_INVARIANT(*s)) {
2895             if (! isALPHAU(*s)) {
2896                 goto bad_charname;
2897             }
2898             s++;
2899         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2900             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2901                 goto bad_charname;
2902             }
2903             s += 2;
2904         }
2905         else {
2906             if (! _invlist_contains_cp(PL_utf8_charname_begin,
2907                                        utf8_to_uvchr_buf((U8 *) s,
2908                                                          (U8 *) e,
2909                                                          NULL)))
2910             {
2911                 goto bad_charname;
2912             }
2913             s += UTF8SKIP(s);
2914         }
2915 
2916         while (s < e) {
2917             if (UTF8_IS_INVARIANT(*s)) {
2918                 if (! isCHARNAME_CONT(*s)) {
2919                     goto bad_charname;
2920                 }
2921                 if (*s == ' ' && *(s-1) == ' ') {
2922                     goto multi_spaces;
2923                 }
2924                 s++;
2925             }
2926             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2927                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2928                 {
2929                     goto bad_charname;
2930                 }
2931                 s += 2;
2932             }
2933             else {
2934                 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2935                                            utf8_to_uvchr_buf((U8 *) s,
2936                                                              (U8 *) e,
2937                                                              NULL)))
2938                 {
2939                     goto bad_charname;
2940                 }
2941                 s += UTF8SKIP(s);
2942             }
2943         }
2944     }
2945     if (*(s-1) == ' ') {
2946         /* diag_listed_as: charnames alias definitions may not contain
2947                            trailing white-space; marked by <-- HERE in %s
2948          */
2949         *error_msg = Perl_form(aTHX_
2950             "charnames alias definitions may not contain trailing "
2951             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2952             (int)(s - context + 1), context,
2953             (int)(e - s + 1), s + 1);
2954         return NULL;
2955     }
2956 
2957     if (SvUTF8(res)) { /* Don't accept malformed charname value */
2958         const U8* first_bad_char_loc;
2959         STRLEN len;
2960         const char* const str = SvPV_const(res, len);
2961         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2962                                           &first_bad_char_loc)))
2963         {
2964             _force_out_malformed_utf8_message(first_bad_char_loc,
2965                                               (U8 *) PL_parser->bufend,
2966                                               0,
2967                                               0 /* 0 means don't die */ );
2968             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2969                                immediately after '%s' */
2970             *error_msg = Perl_form(aTHX_
2971                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2972                  (int) context_len, context,
2973                  (int) ((char *) first_bad_char_loc - str), str);
2974             return NULL;
2975         }
2976     }
2977 
2978     return res;
2979 
2980   bad_charname: {
2981 
2982         /* The final %.*s makes sure that should the trailing NUL be missing
2983          * that this print won't run off the end of the string */
2984         /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2985                            in \N{%s} */
2986         *error_msg = Perl_form(aTHX_
2987             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2988             (int)(s - context + 1), context,
2989             (int)(e - s + 1), s + 1);
2990         return NULL;
2991     }
2992 
2993   multi_spaces:
2994         /* diag_listed_as: charnames alias definitions may not contain a
2995                            sequence of multiple spaces; marked by <-- HERE
2996                            in %s */
2997         *error_msg = Perl_form(aTHX_
2998             "charnames alias definitions may not contain a sequence of "
2999             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
3000             (int)(s - context + 1), context,
3001             (int)(e - s + 1), s + 1);
3002         return NULL;
3003 }
3004 
3005 /*
3006   scan_const
3007 
3008   Extracts the next constant part of a pattern, double-quoted string,
3009   or transliteration.  This is terrifying code.
3010 
3011   For example, in parsing the double-quoted string "ab\x63$d", it would
3012   stop at the '$' and return an OP_CONST containing 'abc'.
3013 
3014   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3015   processing a pattern (PL_lex_inpat is true), a transliteration
3016   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3017 
3018   Returns a pointer to the character scanned up to. If this is
3019   advanced from the start pointer supplied (i.e. if anything was
3020   successfully parsed), will leave an OP_CONST for the substring scanned
3021   in pl_yylval. Caller must intuit reason for not parsing further
3022   by looking at the next characters herself.
3023 
3024   In patterns:
3025     expand:
3026       \N{FOO}  => \N{U+hex_for_character_FOO}
3027       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3028 
3029     pass through:
3030         all other \-char, including \N and \N{ apart from \N{ABC}
3031 
3032     stops on:
3033         @ and $ where it appears to be a var, but not for $ as tail anchor
3034         \l \L \u \U \Q \E
3035         (?{  or  (??{
3036 
3037   In transliterations:
3038     characters are VERY literal, except for - not at the start or end
3039     of the string, which indicates a range.  However some backslash sequences
3040     are recognized: \r, \n, and the like
3041                     \007 \o{}, \x{}, \N{}
3042     If all elements in the transliteration are below 256,
3043     scan_const expands the range to the full set of intermediate
3044     characters. If the range is in utf8, the hyphen is replaced with
3045     a certain range mark which will be handled by pmtrans() in op.c.
3046 
3047   In double-quoted strings:
3048     backslashes:
3049       all those recognized in transliterations
3050       deprecated backrefs: \1 (in substitution replacements)
3051       case and quoting: \U \Q \E
3052     stops on @ and $
3053 
3054   scan_const does *not* construct ops to handle interpolated strings.
3055   It stops processing as soon as it finds an embedded $ or @ variable
3056   and leaves it to the caller to work out what's going on.
3057 
3058   embedded arrays (whether in pattern or not) could be:
3059       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3060 
3061   $ in double-quoted strings must be the symbol of an embedded scalar.
3062 
3063   $ in pattern could be $foo or could be tail anchor.  Assumption:
3064   it's a tail anchor if $ is the last thing in the string, or if it's
3065   followed by one of "()| \r\n\t"
3066 
3067   \1 (backreferences) are turned into $1 in substitutions
3068 
3069   The structure of the code is
3070       while (there's a character to process) {
3071           handle transliteration ranges
3072           skip regexp comments /(?#comment)/ and codes /(?{code})/
3073           skip #-initiated comments in //x patterns
3074           check for embedded arrays
3075           check for embedded scalars
3076           if (backslash) {
3077               deprecate \1 in substitution replacements
3078               handle string-changing backslashes \l \U \Q \E, etc.
3079               switch (what was escaped) {
3080                   handle \- in a transliteration (becomes a literal -)
3081                   if a pattern and not \N{, go treat as regular character
3082                   handle \132 (octal characters)
3083                   handle \x15 and \x{1234} (hex characters)
3084                   handle \N{name} (named characters, also \N{3,5} in a pattern)
3085                   handle \cV (control characters)
3086                   handle printf-style backslashes (\f, \r, \n, etc)
3087               } (end switch)
3088               continue
3089           } (end if backslash)
3090           handle regular character
3091     } (end while character to read)
3092 
3093 */
3094 
3095 STATIC char *
3096 S_scan_const(pTHX_ char *start)
3097 {
3098     const char * const send = PL_bufend;/* end of the constant */
3099     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
3100                                            on sizing. */
3101     char *s = start;			/* start of the constant */
3102     char *d = SvPVX(sv);		/* destination for copies */
3103     bool dorange = FALSE;               /* are we in a translit range? */
3104     bool didrange = FALSE;              /* did we just finish a range? */
3105     bool in_charclass = FALSE;          /* within /[...]/ */
3106     const bool s_is_utf8 = cBOOL(UTF);  /* Is the source string assumed to be
3107                                            UTF8?  But, this can show as true
3108                                            when the source isn't utf8, as for
3109                                            example when it is entirely composed
3110                                            of hex constants */
3111     bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
3112     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
3113                                            number of characters found so far
3114                                            that will expand (into 2 bytes)
3115                                            should we have to convert to
3116                                            UTF-8) */
3117     SV *res;		                /* result from charnames */
3118     STRLEN offset_to_max = 0;   /* The offset in the output to where the range
3119                                    high-end character is temporarily placed */
3120 
3121     /* Does something require special handling in tr/// ?  This avoids extra
3122      * work in a less likely case.  As such, khw didn't feel it was worth
3123      * adding any branches to the more mainline code to handle this, which
3124      * means that this doesn't get set in some circumstances when things like
3125      * \x{100} get expanded out.  As a result there needs to be extra testing
3126      * done in the tr code */
3127     bool has_above_latin1 = FALSE;
3128 
3129     /* Note on sizing:  The scanned constant is placed into sv, which is
3130      * initialized by newSV() assuming one byte of output for every byte of
3131      * input.  This routine expects newSV() to allocate an extra byte for a
3132      * trailing NUL, which this routine will append if it gets to the end of
3133      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3134      * CAPITAL LETTER A}), or more output than input if the constant ends up
3135      * recoded to utf8, but each time a construct is found that might increase
3136      * the needed size, SvGROW() is called.  Its size parameter each time is
3137      * based on the best guess estimate at the time, namely the length used so
3138      * far, plus the length the current construct will occupy, plus room for
3139      * the trailing NUL, plus one byte for every input byte still unscanned */
3140 
3141     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3142                        before set */
3143 #ifdef EBCDIC
3144     int backslash_N = 0;            /* ? was the character from \N{} */
3145     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
3146                                        platform-specific like \x65 */
3147 #endif
3148 
3149     PERL_ARGS_ASSERT_SCAN_CONST;
3150 
3151     assert(PL_lex_inwhat != OP_TRANSR);
3152 
3153     /* Protect sv from errors and fatal warnings. */
3154     ENTER_with_name("scan_const");
3155     SAVEFREESV(sv);
3156 
3157     /* A bunch of code in the loop below assumes that if s[n] exists and is not
3158      * NUL, then s[n+1] exists.  This assertion makes sure that assumption is
3159      * valid */
3160     assert(*send == '\0');
3161 
3162     while (s < send
3163            || dorange   /* Handle tr/// range at right edge of input */
3164     ) {
3165 
3166         /* get transliterations out of the way (they're most literal) */
3167         if (PL_lex_inwhat == OP_TRANS) {
3168 
3169             /* But there isn't any special handling necessary unless there is a
3170              * range, so for most cases we just drop down and handle the value
3171              * as any other.  There are two exceptions.
3172              *
3173              * 1.  A hyphen indicates that we are actually going to have a
3174              *     range.  In this case, skip the '-', set a flag, then drop
3175              *     down to handle what should be the end range value.
3176              * 2.  After we've handled that value, the next time through, that
3177              *     flag is set and we fix up the range.
3178              *
3179              * Ranges entirely within Latin1 are expanded out entirely, in
3180              * order to make the transliteration a simple table look-up.
3181              * Ranges that extend above Latin1 have to be done differently, so
3182              * there is no advantage to expanding them here, so they are
3183              * stored here as Min, RANGE_INDICATOR, Max.  'RANGE_INDICATOR' is
3184              * a byte that can't occur in legal UTF-8, and hence can signify a
3185              * hyphen without any possible ambiguity.  On EBCDIC machines, if
3186              * the range is expressed as Unicode, the Latin1 portion is
3187              * expanded out even if the range extends above Latin1.  This is
3188              * because each code point in it has to be processed here
3189              * individually to get its native translation */
3190 
3191             if (! dorange) {
3192 
3193                 /* Here, we don't think we're in a range.  If the new character
3194                  * is not a hyphen; or if it is a hyphen, but it's too close to
3195                  * either edge to indicate a range, or if we haven't output any
3196                  * characters yet then it's a regular character. */
3197                 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3198                 {
3199 
3200                     /* A regular character.  Process like any other, but first
3201                      * clear any flags */
3202                     didrange = FALSE;
3203                     dorange = FALSE;
3204 #ifdef EBCDIC
3205                     non_portable_endpoint = 0;
3206                     backslash_N = 0;
3207 #endif
3208                     /* The tests here for being above Latin1 and similar ones
3209                      * in the following 'else' suffice to find all such
3210                      * occurences in the constant, except those added by a
3211                      * backslash escape sequence, like \x{100}.  Mostly, those
3212                      * set 'has_above_latin1' as appropriate */
3213                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3214                         has_above_latin1 = TRUE;
3215                     }
3216 
3217                     /* Drops down to generic code to process current byte */
3218                 }
3219                 else {  /* Is a '-' in the context where it means a range */
3220                     if (didrange) { /* Something like y/A-C-Z// */
3221                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
3222                                          " operator");
3223                     }
3224 
3225                     dorange = TRUE;
3226 
3227                     s++;    /* Skip past the hyphen */
3228 
3229                     /* d now points to where the end-range character will be
3230                      * placed.  Drop down to get that character.  We'll finish
3231                      * processing the range the next time through the loop */
3232 
3233                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3234                         has_above_latin1 = TRUE;
3235                     }
3236 
3237                     /* Drops down to generic code to process current byte */
3238                 }
3239             }  /* End of not a range */
3240             else {
3241                 /* Here we have parsed a range.  Now must handle it.  At this
3242                  * point:
3243                  * 'sv' is a SV* that contains the output string we are
3244                  *      constructing.  The final two characters in that string
3245                  *      are the range start and range end, in order.
3246                  * 'd'  points to just beyond the range end in the 'sv' string,
3247                  *      where we would next place something
3248                  */
3249                 char * max_ptr;
3250                 char * min_ptr;
3251                 IV range_min;
3252                 IV range_max;	/* last character in range */
3253                 STRLEN grow;
3254                 Size_t offset_to_min = 0;
3255                 Size_t extras = 0;
3256 #ifdef EBCDIC
3257                 bool convert_unicode;
3258                 IV real_range_max = 0;
3259 #endif
3260                 /* Get the code point values of the range ends. */
3261                 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3262                 offset_to_max = max_ptr - SvPVX_const(sv);
3263                 if (d_is_utf8) {
3264                     /* We know the utf8 is valid, because we just constructed
3265                      * it ourselves in previous loop iterations */
3266                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3267                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3268                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3269 
3270                     /* This compensates for not all code setting
3271                      * 'has_above_latin1', so that we don't skip stuff that
3272                      * should be executed */
3273                     if (range_max > 255) {
3274                         has_above_latin1 = TRUE;
3275                     }
3276                 }
3277                 else {
3278                     min_ptr = max_ptr - 1;
3279                     range_min = * (U8*) min_ptr;
3280                     range_max = * (U8*) max_ptr;
3281                 }
3282 
3283                 /* If the range is just a single code point, like tr/a-a/.../,
3284                  * that code point is already in the output, twice.  We can
3285                  * just back up over the second instance and avoid all the rest
3286                  * of the work.  But if it is a variant character, it's been
3287                  * counted twice, so decrement.  (This unlikely scenario is
3288                  * special cased, like the one for a range of 2 code points
3289                  * below, only because the main-line code below needs a range
3290                  * of 3 or more to work without special casing.  Might as well
3291                  * get it out of the way now.) */
3292                 if (UNLIKELY(range_max == range_min)) {
3293                     d = max_ptr;
3294                     if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3295                         utf8_variant_count--;
3296                     }
3297                     goto range_done;
3298                 }
3299 
3300 #ifdef EBCDIC
3301                 /* On EBCDIC platforms, we may have to deal with portable
3302                  * ranges.  These happen if at least one range endpoint is a
3303                  * Unicode value (\N{...}), or if the range is a subset of
3304                  * [A-Z] or [a-z], and both ends are literal characters,
3305                  * like 'A', and not like \x{C1} */
3306                 convert_unicode =
3307                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3308                                                        hence portable range */
3309                     || (     ! non_portable_endpoint
3310                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3311                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3312                 if (convert_unicode) {
3313 
3314                     /* Special handling is needed for these portable ranges.
3315                      * They are defined to be in Unicode terms, which includes
3316                      * all the Unicode code points between the end points.
3317                      * Convert to Unicode to get the Unicode range.  Later we
3318                      * will convert each code point in the range back to
3319                      * native.  */
3320                     range_min = NATIVE_TO_UNI(range_min);
3321                     range_max = NATIVE_TO_UNI(range_max);
3322                 }
3323 #endif
3324 
3325                 if (range_min > range_max) {
3326 #ifdef EBCDIC
3327                     if (convert_unicode) {
3328                         /* Need to convert back to native for meaningful
3329                          * messages for this platform */
3330                         range_min = UNI_TO_NATIVE(range_min);
3331                         range_max = UNI_TO_NATIVE(range_max);
3332                     }
3333 #endif
3334                     /* Use the characters themselves for the error message if
3335                      * ASCII printables; otherwise some visible representation
3336                      * of them */
3337                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3338                         Perl_croak(aTHX_
3339                          "Invalid range \"%c-%c\" in transliteration operator",
3340                          (char)range_min, (char)range_max);
3341                     }
3342 #ifdef EBCDIC
3343                     else if (convert_unicode) {
3344         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3345                         Perl_croak(aTHX_
3346                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3347                            UVXf "}\" in transliteration operator",
3348                            range_min, range_max);
3349                     }
3350 #endif
3351                     else {
3352         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3353                         Perl_croak(aTHX_
3354                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3355                            " in transliteration operator",
3356                            range_min, range_max);
3357                     }
3358                 }
3359 
3360                 /* If the range is exactly two code points long, they are
3361                  * already both in the output */
3362                 if (UNLIKELY(range_min + 1 == range_max)) {
3363                     goto range_done;
3364                 }
3365 
3366                 /* Here the range contains at least 3 code points */
3367 
3368                 if (d_is_utf8) {
3369 
3370                     /* If everything in the transliteration is below 256, we
3371                      * can avoid special handling later.  A translation table
3372                      * for each of those bytes is created by op.c.  So we
3373                      * expand out all ranges to their constituent code points.
3374                      * But if we've encountered something above 255, the
3375                      * expanding won't help, so skip doing that.  But if it's
3376                      * EBCDIC, we may have to look at each character below 256
3377                      * if we have to convert to/from Unicode values */
3378                     if (   has_above_latin1
3379 #ifdef EBCDIC
3380                         && (range_min > 255 || ! convert_unicode)
3381 #endif
3382                     ) {
3383                         const STRLEN off = d - SvPVX(sv);
3384                         const STRLEN extra = 1 + (send - s) + 1;
3385                         char *e;
3386 
3387                         /* Move the high character one byte to the right; then
3388                          * insert between it and the range begin, an illegal
3389                          * byte which serves to indicate this is a range (using
3390                          * a '-' would be ambiguous). */
3391 
3392                         if (off + extra > SvLEN(sv)) {
3393                             d = off + SvGROW(sv, off + extra);
3394                             max_ptr = d - off + offset_to_max;
3395                         }
3396 
3397                         e = d++;
3398                         while (e-- > max_ptr) {
3399                             *(e + 1) = *e;
3400                         }
3401                         *(e + 1) = (char) RANGE_INDICATOR;
3402                         goto range_done;
3403                     }
3404 
3405                     /* Here, we're going to expand out the range.  For EBCDIC
3406                      * the range can extend above 255 (not so in ASCII), so
3407                      * for EBCDIC, split it into the parts above and below
3408                      * 255/256 */
3409 #ifdef EBCDIC
3410                     if (range_max > 255) {
3411                         real_range_max = range_max;
3412                         range_max = 255;
3413                     }
3414 #endif
3415                 }
3416 
3417                 /* Here we need to expand out the string to contain each
3418                  * character in the range.  Grow the output to handle this.
3419                  * For non-UTF8, we need a byte for each code point in the
3420                  * range, minus the three that we've already allocated for: the
3421                  * hyphen, the min, and the max.  For UTF-8, we need this
3422                  * plus an extra byte for each code point that occupies two
3423                  * bytes (is variant) when in UTF-8 (except we've already
3424                  * allocated for the end points, including if they are
3425                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3426                  * platforms, it's easy to calculate a precise number.  To
3427                  * start, we count the variants in the range, which we need
3428                  * elsewhere in this function anyway.  (For the case where it
3429                  * isn't easy to calculate, 'extras' has been initialized to 0,
3430                  * and the calculation is done in a loop further down.) */
3431 #ifdef EBCDIC
3432                 if (convert_unicode)
3433 #endif
3434                 {
3435                     /* This is executed unconditionally on ASCII, and for
3436                      * Unicode ranges on EBCDIC.  Under these conditions, all
3437                      * code points above a certain value are variant; and none
3438                      * under that value are.  We just need to find out how much
3439                      * of the range is above that value.  We don't count the
3440                      * end points here, as they will already have been counted
3441                      * as they were parsed. */
3442                     if (range_min >= UTF_CONTINUATION_MARK) {
3443 
3444                         /* The whole range is made up of variants */
3445                         extras = (range_max - 1) - (range_min + 1) + 1;
3446                     }
3447                     else if (range_max >= UTF_CONTINUATION_MARK) {
3448 
3449                         /* Only the higher portion of the range is variants */
3450                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3451                     }
3452 
3453                     utf8_variant_count += extras;
3454                 }
3455 
3456                 /* The base growth is the number of code points in the range,
3457                  * not including the endpoints, which have already been sized
3458                  * for (and output).  We don't subtract for the hyphen, as it
3459                  * has been parsed but not output, and the SvGROW below is
3460                  * based only on what's been output plus what's left to parse.
3461                  * */
3462                 grow = (range_max - 1) - (range_min + 1) + 1;
3463 
3464                 if (d_is_utf8) {
3465 #ifdef EBCDIC
3466                     /* In some cases in EBCDIC, we haven't yet calculated a
3467                      * precise amount needed for the UTF-8 variants.  Just
3468                      * assume the worst case, that everything will expand by a
3469                      * byte */
3470                     if (! convert_unicode) {
3471                         grow *= 2;
3472                     }
3473                     else
3474 #endif
3475                     {
3476                         /* Otherwise we know exactly how many variants there
3477                          * are in the range. */
3478                         grow += extras;
3479                     }
3480                 }
3481 
3482                 /* Grow, but position the output to overwrite the range min end
3483                  * point, because in some cases we overwrite that */
3484                 SvCUR_set(sv, d - SvPVX_const(sv));
3485                 offset_to_min = min_ptr - SvPVX_const(sv);
3486 
3487                 /* See Note on sizing above. */
3488                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3489                                              + (send - s)
3490                                              + grow
3491                                              + 1 /* Trailing NUL */ );
3492 
3493                 /* Now, we can expand out the range. */
3494 #ifdef EBCDIC
3495                 if (convert_unicode) {
3496                     SSize_t i;
3497 
3498                     /* Recall that the min and max are now in Unicode terms, so
3499                      * we have to convert each character to its native
3500                      * equivalent */
3501                     if (d_is_utf8) {
3502                         for (i = range_min; i <= range_max; i++) {
3503                             append_utf8_from_native_byte(
3504                                                     LATIN1_TO_NATIVE((U8) i),
3505                                                     (U8 **) &d);
3506                         }
3507                     }
3508                     else {
3509                         for (i = range_min; i <= range_max; i++) {
3510                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3511                         }
3512                     }
3513                 }
3514                 else
3515 #endif
3516                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3517                 {
3518                     /* Here, no conversions are necessary, which means that the
3519                      * first character in the range is already in 'd' and
3520                      * valid, so we can skip overwriting it */
3521                     if (d_is_utf8) {
3522                         SSize_t i;
3523                         d += UTF8SKIP(d);
3524                         for (i = range_min + 1; i <= range_max; i++) {
3525                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3526                         }
3527                     }
3528                     else {
3529                         SSize_t i;
3530                         d++;
3531                         assert(range_min + 1 <= range_max);
3532                         for (i = range_min + 1; i < range_max; i++) {
3533 #ifdef EBCDIC
3534                             /* In this case on EBCDIC, we haven't calculated
3535                              * the variants.  Do it here, as we go along */
3536                             if (! UVCHR_IS_INVARIANT(i)) {
3537                                 utf8_variant_count++;
3538                             }
3539 #endif
3540                             *d++ = (char)i;
3541                         }
3542 
3543                         /* The range_max is done outside the loop so as to
3544                          * avoid having to special case not incrementing
3545                          * 'utf8_variant_count' on EBCDIC (it's already been
3546                          * counted when originally parsed) */
3547                         *d++ = (char) range_max;
3548                     }
3549                 }
3550 
3551 #ifdef EBCDIC
3552                 /* If the original range extended above 255, add in that
3553                  * portion. */
3554                 if (real_range_max) {
3555                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3556                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3557                     if (real_range_max > 0x100) {
3558                         if (real_range_max > 0x101) {
3559                             *d++ = (char) RANGE_INDICATOR;
3560                         }
3561                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3562                     }
3563                 }
3564 #endif
3565 
3566               range_done:
3567                 /* mark the range as done, and continue */
3568                 didrange = TRUE;
3569                 dorange = FALSE;
3570 #ifdef EBCDIC
3571                 non_portable_endpoint = 0;
3572                 backslash_N = 0;
3573 #endif
3574                 continue;
3575             } /* End of is a range */
3576         } /* End of transliteration.  Joins main code after these else's */
3577         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3578             char *s1 = s-1;
3579             int esc = 0;
3580             while (s1 >= start && *s1-- == '\\')
3581                 esc = !esc;
3582             if (!esc)
3583                 in_charclass = TRUE;
3584         }
3585         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3586             char *s1 = s-1;
3587             int esc = 0;
3588             while (s1 >= start && *s1-- == '\\')
3589                 esc = !esc;
3590             if (!esc)
3591                 in_charclass = FALSE;
3592         }
3593             /* skip for regexp comments /(?#comment)/, except for the last
3594              * char, which will be done separately.  Stop on (?{..}) and
3595              * friends */
3596         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3597             if (s[2] == '#') {
3598                 if (s_is_utf8) {
3599                     PERL_UINT_FAST8_T  len = UTF8SKIP(s);
3600 
3601                     while (s + len < send && *s != ')') {
3602                         Copy(s, d, len, U8);
3603                         d += len;
3604                         s += len;
3605                         len = UTF8_SAFE_SKIP(s, send);
3606                     }
3607                 }
3608                 else while (s+1 < send && *s != ')') {
3609                     *d++ = *s++;
3610                 }
3611             }
3612             else if (!PL_lex_casemods
3613                      && (    s[2] == '{' /* This should match regcomp.c */
3614                          || (s[2] == '?' && s[3] == '{')))
3615             {
3616                 break;
3617             }
3618         }
3619             /* likewise skip #-initiated comments in //x patterns */
3620         else if (*s == '#'
3621                  && PL_lex_inpat
3622                  && !in_charclass
3623                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3624         {
3625             while (s < send && *s != '\n')
3626                 *d++ = *s++;
3627         }
3628             /* no further processing of single-quoted regex */
3629         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3630             goto default_action;
3631 
3632             /* check for embedded arrays
3633              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3634              */
3635         else if (*s == '@' && s[1]) {
3636             if (UTF
3637                ? isIDFIRST_utf8_safe(s+1, send)
3638                : isWORDCHAR_A(s[1]))
3639             {
3640                 break;
3641             }
3642             if (memCHRs(":'{$", s[1]))
3643                 break;
3644             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3645                 break; /* in regexp, neither @+ nor @- are interpolated */
3646         }
3647             /* check for embedded scalars.  only stop if we're sure it's a
3648              * variable.  */
3649         else if (*s == '$') {
3650             if (!PL_lex_inpat)	/* not a regexp, so $ must be var */
3651                 break;
3652             if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3653                 if (s[1] == '\\') {
3654                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3655                                    "Possible unintended interpolation of $\\ in regex");
3656                 }
3657                 break;		/* in regexp, $ might be tail anchor */
3658             }
3659         }
3660 
3661         /* End of else if chain - OP_TRANS rejoin rest */
3662 
3663         if (UNLIKELY(s >= send)) {
3664             assert(s == send);
3665             break;
3666         }
3667 
3668         /* backslashes */
3669         if (*s == '\\' && s+1 < send) {
3670             char* bslash = s;   /* point to beginning \ */
3671             char* rbrace;	/* point to ending '}' */
3672             char* e;	        /* 1 past the meat (non-blanks) before the
3673                                    brace */
3674             s++;
3675 
3676             /* warn on \1 - \9 in substitution replacements, but note that \11
3677              * is an octal; and \19 is \1 followed by '9' */
3678             if (PL_lex_inwhat == OP_SUBST
3679                 && !PL_lex_inpat
3680                 && isDIGIT(*s)
3681                 && *s != '0'
3682                 && !isDIGIT(s[1]))
3683             {
3684                 /* diag_listed_as: \%d better written as $%d */
3685                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3686                 s = bslash;
3687                 *s = '$';
3688                 break;
3689             }
3690 
3691             /* string-change backslash escapes */
3692             if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3693                 s = bslash;
3694                 break;
3695             }
3696             /* In a pattern, process \N, but skip any other backslash escapes.
3697              * This is because we don't want to translate an escape sequence
3698              * into a meta symbol and have the regex compiler use the meta
3699              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3700              * in spite of this, we do have to process \N here while the proper
3701              * charnames handler is in scope.  See bugs #56444 and #62056.
3702              *
3703              * There is a complication because \N in a pattern may also stand
3704              * for 'match a non-nl', and not mean a charname, in which case its
3705              * processing should be deferred to the regex compiler.  To be a
3706              * charname it must be followed immediately by a '{', and not look
3707              * like \N followed by a curly quantifier, i.e., not something like
3708              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3709              * quantifier */
3710             else if (PL_lex_inpat
3711                     && (*s != 'N'
3712                         || s[1] != '{'
3713                         || regcurly(s + 1, send, NULL)))
3714             {
3715                 *d++ = '\\';
3716                 goto default_action;
3717             }
3718 
3719             switch (*s) {
3720             default:
3721                 {
3722                     if ((isALPHANUMERIC(*s)))
3723                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3724                                        "Unrecognized escape \\%c passed through",
3725                                        *s);
3726                     /* default action is to copy the quoted character */
3727                     goto default_action;
3728                 }
3729 
3730             /* eg. \132 indicates the octal constant 0132 */
3731             case '0': case '1': case '2': case '3':
3732             case '4': case '5': case '6': case '7':
3733                 {
3734                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3735                               | PERL_SCAN_NOTIFY_ILLDIGIT;
3736                     STRLEN len = 3;
3737                     uv = grok_oct(s, &len, &flags, NULL);
3738                     s += len;
3739                     if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3740                         && s < send
3741                         && isDIGIT(*s)  /* like \08, \178 */
3742                         && ckWARN(WARN_MISC))
3743                     {
3744                         Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3745                             form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3746                     }
3747                 }
3748                 goto NUM_ESCAPE_INSERT;
3749 
3750             /* eg. \o{24} indicates the octal constant \024 */
3751             case 'o':
3752                 {
3753                     const char* error;
3754 
3755                     if (! grok_bslash_o(&s, send,
3756                                                &uv, &error,
3757                                                NULL,
3758                                                FALSE, /* Not strict */
3759                                                FALSE, /* No illegal cp's */
3760                                                UTF))
3761                     {
3762                         yyerror(error);
3763                         uv = 0; /* drop through to ensure range ends are set */
3764                     }
3765                     goto NUM_ESCAPE_INSERT;
3766                 }
3767 
3768             /* eg. \x24 indicates the hex constant 0x24 */
3769             case 'x':
3770                 {
3771                     const char* error;
3772 
3773                     if (! grok_bslash_x(&s, send,
3774                                                &uv, &error,
3775                                                NULL,
3776                                                FALSE, /* Not strict */
3777                                                FALSE, /* No illegal cp's */
3778                                                UTF))
3779                     {
3780                         yyerror(error);
3781                         uv = 0; /* drop through to ensure range ends are set */
3782                     }
3783                 }
3784 
3785               NUM_ESCAPE_INSERT:
3786                 /* Insert oct or hex escaped character. */
3787 
3788                 /* Here uv is the ordinal of the next character being added */
3789                 if (UVCHR_IS_INVARIANT(uv)) {
3790                     *d++ = (char) uv;
3791                 }
3792                 else {
3793                     if (!d_is_utf8 && uv > 255) {
3794 
3795                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3796                          * If we've only seen invariants so far, all we have to
3797                          * do is turn on the flag */
3798                         if (utf8_variant_count == 0) {
3799                             SvUTF8_on(sv);
3800                         }
3801                         else {
3802                             SvCUR_set(sv, d - SvPVX_const(sv));
3803                             SvPOK_on(sv);
3804                             *d = '\0';
3805 
3806                             sv_utf8_upgrade_flags_grow(
3807                                            sv,
3808                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3809 
3810                                            /* Since we're having to grow here,
3811                                             * make sure we have enough room for
3812                                             * this escape and a NUL, so the
3813                                             * code immediately below won't have
3814                                             * to actually grow again */
3815                                           UVCHR_SKIP(uv)
3816                                         + (STRLEN)(send - s) + 1);
3817                             d = SvPVX(sv) + SvCUR(sv);
3818                         }
3819 
3820                         has_above_latin1 = TRUE;
3821                         d_is_utf8 = TRUE;
3822                     }
3823 
3824                     if (! d_is_utf8) {
3825                         *d++ = (char)uv;
3826                         utf8_variant_count++;
3827                     }
3828                     else {
3829                        /* Usually, there will already be enough room in 'sv'
3830                         * since such escapes are likely longer than any UTF-8
3831                         * sequence they can end up as.  This isn't the case on
3832                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3833                         * UTF-8 for it contains 14.  And, we have to allow for
3834                         * a trailing NUL.  It probably can't happen on ASCII
3835                         * platforms, but be safe.  See Note on sizing above. */
3836                         const STRLEN needed = d - SvPVX(sv)
3837                                             + UVCHR_SKIP(uv)
3838                                             + (send - s)
3839                                             + 1;
3840                         if (UNLIKELY(needed > SvLEN(sv))) {
3841                             SvCUR_set(sv, d - SvPVX_const(sv));
3842                             d = SvCUR(sv) + SvGROW(sv, needed);
3843                         }
3844 
3845                         d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3846                                                    (ckWARN(WARN_PORTABLE))
3847                                                    ? UNICODE_WARN_PERL_EXTENDED
3848                                                    : 0);
3849                     }
3850                 }
3851 #ifdef EBCDIC
3852                 non_portable_endpoint++;
3853 #endif
3854                 continue;
3855 
3856             case 'N':
3857                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3858                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3859                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3860                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3861                  * convenience all three forms are referred to as "named
3862                  * characters" below.
3863                  *
3864                  * For patterns, \N also can mean to match a non-newline.  Code
3865                  * before this 'switch' statement should already have handled
3866                  * this situation, and hence this code only has to deal with
3867                  * the named character cases.
3868                  *
3869                  * For non-patterns, the named characters are converted to
3870                  * their string equivalents.  In patterns, named characters are
3871                  * not converted to their ultimate forms for the same reasons
3872                  * that other escapes aren't (mainly that the ultimate
3873                  * character could be considered a meta-symbol by the regex
3874                  * compiler).  Instead, they are converted to the \N{U+...}
3875                  * form to get the value from the charnames that is in effect
3876                  * right now, while preserving the fact that it was a named
3877                  * character, so that the regex compiler knows this.
3878                  *
3879                  * The structure of this section of code (besides checking for
3880                  * errors and upgrading to utf8) is:
3881                  *    If the named character is of the form \N{U+...}, pass it
3882                  *      through if a pattern; otherwise convert the code point
3883                  *      to utf8
3884                  *    Otherwise must be some \N{NAME}: convert to
3885                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3886                  *
3887                  * Transliteration is an exception.  The conversion to utf8 is
3888                  * only done if the code point requires it to be representable.
3889                  *
3890                  * Here, 's' points to the 'N'; the test below is guaranteed to
3891                  * succeed if we are being called on a pattern, as we already
3892                  * know from a test above that the next character is a '{'.  A
3893                  * non-pattern \N must mean 'named character', which requires
3894                  * braces */
3895                 s++;
3896                 if (*s != '{') {
3897                     yyerror("Missing braces on \\N{}");
3898                     *d++ = '\0';
3899                     continue;
3900                 }
3901                 s++;
3902 
3903                 /* If there is no matching '}', it is an error. */
3904                 if (! (rbrace = (char *) memchr(s, '}', send - s))) {
3905                     if (! PL_lex_inpat) {
3906                         yyerror("Missing right brace on \\N{}");
3907                     } else {
3908                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3909                     }
3910                     yyquit(); /* Have exhausted the input. */
3911                 }
3912 
3913                 /* Here it looks like a named character */
3914                 while (s < rbrace && isBLANK(*s)) {
3915                     s++;
3916                 }
3917 
3918                 e = rbrace;
3919                 while (s < e && isBLANK(*(e - 1))) {
3920                     e--;
3921                 }
3922 
3923                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3924                     s += 2;	    /* Skip to next char after the 'U+' */
3925                     if (PL_lex_inpat) {
3926 
3927                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3928                         /* Check the syntax.  */
3929                         if (!isXDIGIT(*s)) {
3930                           bad_NU:
3931                             yyerror(
3932                                 "Invalid hexadecimal number in \\N{U+...}"
3933                             );
3934                             s = rbrace + 1;
3935                             *d++ = '\0';
3936                             continue;
3937                         }
3938                         while (++s < e) {
3939                             if (isXDIGIT(*s))
3940                                 continue;
3941                             else if ((*s == '.' || *s == '_')
3942                                   && isXDIGIT(s[1]))
3943                                 continue;
3944                             goto bad_NU;
3945                         }
3946 
3947                         /* Pass everything through unchanged.
3948                          * +1 is to include the '}' */
3949                         Copy(bslash, d, rbrace - bslash + 1, char);
3950                         d += rbrace - bslash + 1;
3951                     }
3952                     else {  /* Not a pattern: convert the hex to string */
3953                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3954                                   | PERL_SCAN_SILENT_ILLDIGIT
3955                                   | PERL_SCAN_SILENT_OVERFLOW
3956                                   | PERL_SCAN_DISALLOW_PREFIX;
3957                         STRLEN len = e - s;
3958 
3959                         uv = grok_hex(s, &len, &flags, NULL);
3960                         if (len == 0 || (len != (STRLEN)(e - s)))
3961                             goto bad_NU;
3962 
3963                         if (    uv > MAX_LEGAL_CP
3964                             || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
3965                         {
3966                             yyerror(form_cp_too_large_msg(16, s, len, 0));
3967                             uv = 0; /* drop through to ensure range ends are
3968                                        set */
3969                         }
3970 
3971                          /* For non-tr///, if the destination is not in utf8,
3972                           * unconditionally recode it to be so.  This is
3973                           * because \N{} implies Unicode semantics, and scalars
3974                           * have to be in utf8 to guarantee those semantics.
3975                           * tr/// doesn't care about Unicode rules, so no need
3976                           * there to upgrade to UTF-8 for small enough code
3977                           * points */
3978                         if (! d_is_utf8 && (   uv > 0xFF
3979                                            || PL_lex_inwhat != OP_TRANS))
3980                         {
3981                             /* See Note on sizing above.  */
3982                             const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
3983 
3984                             SvCUR_set(sv, d - SvPVX_const(sv));
3985                             SvPOK_on(sv);
3986                             *d = '\0';
3987 
3988                             if (utf8_variant_count == 0) {
3989                                 SvUTF8_on(sv);
3990                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3991                             }
3992                             else {
3993                                 sv_utf8_upgrade_flags_grow(
3994                                                sv,
3995                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3996                                                extra);
3997                                 d = SvPVX(sv) + SvCUR(sv);
3998                             }
3999 
4000                             d_is_utf8 = TRUE;
4001                             has_above_latin1 = TRUE;
4002                         }
4003 
4004                         /* Add the (Unicode) code point to the output. */
4005                         if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
4006                             *d++ = (char) LATIN1_TO_NATIVE(uv);
4007                         }
4008                         else {
4009                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
4010                                                    (ckWARN(WARN_PORTABLE))
4011                                                    ? UNICODE_WARN_PERL_EXTENDED
4012                                                    : 0);
4013                         }
4014                     }
4015                 }
4016                 else     /* Here is \N{NAME} but not \N{U+...}. */
4017                      if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
4018                 {   /* Failed.  We should die eventually, but for now use a NUL
4019                        to keep parsing */
4020                     *d++ = '\0';
4021                 }
4022                 else {  /* Successfully evaluated the name */
4023                     STRLEN len;
4024                     const char *str = SvPV_const(res, len);
4025                     if (PL_lex_inpat) {
4026 
4027                         if (! len) { /* The name resolved to an empty string */
4028                             const char empty_N[] = "\\N{_}";
4029                             Copy(empty_N, d, sizeof(empty_N) - 1, char);
4030                             d += sizeof(empty_N) - 1;
4031                         }
4032                         else {
4033                             /* In order to not lose information for the regex
4034                             * compiler, pass the result in the specially made
4035                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
4036                             * the code points in hex of each character
4037                             * returned by charnames */
4038 
4039                             const char *str_end = str + len;
4040                             const STRLEN off = d - SvPVX_const(sv);
4041 
4042                             if (! SvUTF8(res)) {
4043                                 /* For the non-UTF-8 case, we can determine the
4044                                  * exact length needed without having to parse
4045                                  * through the string.  Each character takes up
4046                                  * 2 hex digits plus either a trailing dot or
4047                                  * the "}" */
4048                                 const char initial_text[] = "\\N{U+";
4049                                 const STRLEN initial_len = sizeof(initial_text)
4050                                                            - 1;
4051                                 d = off + SvGROW(sv, off
4052                                                     + 3 * len
4053 
4054                                                     /* +1 for trailing NUL */
4055                                                     + initial_len + 1
4056 
4057                                                     + (STRLEN)(send - rbrace));
4058                                 Copy(initial_text, d, initial_len, char);
4059                                 d += initial_len;
4060                                 while (str < str_end) {
4061                                     char hex_string[4];
4062                                     int len =
4063                                         my_snprintf(hex_string,
4064                                                   sizeof(hex_string),
4065                                                   "%02X.",
4066 
4067                                                   /* The regex compiler is
4068                                                    * expecting Unicode, not
4069                                                    * native */
4070                                                   NATIVE_TO_LATIN1(*str));
4071                                     PERL_MY_SNPRINTF_POST_GUARD(len,
4072                                                            sizeof(hex_string));
4073                                     Copy(hex_string, d, 3, char);
4074                                     d += 3;
4075                                     str++;
4076                                 }
4077                                 d--;    /* Below, we will overwrite the final
4078                                            dot with a right brace */
4079                             }
4080                             else {
4081                                 STRLEN char_length; /* cur char's byte length */
4082 
4083                                 /* and the number of bytes after this is
4084                                  * translated into hex digits */
4085                                 STRLEN output_length;
4086 
4087                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
4088                                  * for max('U+', '.'); and 1 for NUL */
4089                                 char hex_string[2 * UTF8_MAXBYTES + 5];
4090 
4091                                 /* Get the first character of the result. */
4092                                 U32 uv = utf8n_to_uvchr((U8 *) str,
4093                                                         len,
4094                                                         &char_length,
4095                                                         UTF8_ALLOW_ANYUV);
4096                                 /* Convert first code point to Unicode hex,
4097                                  * including the boiler plate before it. */
4098                                 output_length =
4099                                     my_snprintf(hex_string, sizeof(hex_string),
4100                                              "\\N{U+%X",
4101                                              (unsigned int) NATIVE_TO_UNI(uv));
4102 
4103                                 /* Make sure there is enough space to hold it */
4104                                 d = off + SvGROW(sv, off
4105                                                     + output_length
4106                                                     + (STRLEN)(send - rbrace)
4107                                                     + 2);	/* '}' + NUL */
4108                                 /* And output it */
4109                                 Copy(hex_string, d, output_length, char);
4110                                 d += output_length;
4111 
4112                                 /* For each subsequent character, append dot and
4113                                 * its Unicode code point in hex */
4114                                 while ((str += char_length) < str_end) {
4115                                     const STRLEN off = d - SvPVX_const(sv);
4116                                     U32 uv = utf8n_to_uvchr((U8 *) str,
4117                                                             str_end - str,
4118                                                             &char_length,
4119                                                             UTF8_ALLOW_ANYUV);
4120                                     output_length =
4121                                         my_snprintf(hex_string,
4122                                              sizeof(hex_string),
4123                                              ".%X",
4124                                              (unsigned int) NATIVE_TO_UNI(uv));
4125 
4126                                     d = off + SvGROW(sv, off
4127                                                         + output_length
4128                                                         + (STRLEN)(send - rbrace)
4129                                                         + 2);	/* '}' +  NUL */
4130                                     Copy(hex_string, d, output_length, char);
4131                                     d += output_length;
4132                                 }
4133                             }
4134 
4135                             *d++ = '}';	/* Done.  Add the trailing brace */
4136                         }
4137                     }
4138                     else { /* Here, not in a pattern.  Convert the name to a
4139                             * string. */
4140 
4141                         if (PL_lex_inwhat == OP_TRANS) {
4142                             str = SvPV_const(res, len);
4143                             if (len > ((SvUTF8(res))
4144                                        ? UTF8SKIP(str)
4145                                        : 1U))
4146                             {
4147                                 yyerror(Perl_form(aTHX_
4148                                     "%.*s must not be a named sequence"
4149                                     " in transliteration operator",
4150                                         /*  +1 to include the "}" */
4151                                     (int) (rbrace + 1 - start), start));
4152                                 *d++ = '\0';
4153                                 goto end_backslash_N;
4154                             }
4155 
4156                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4157                                 has_above_latin1 = TRUE;
4158                             }
4159 
4160                         }
4161                         else if (! SvUTF8(res)) {
4162                             /* Make sure \N{} return is UTF-8.  This is because
4163                              * \N{} implies Unicode semantics, and scalars have
4164                              * to be in utf8 to guarantee those semantics; but
4165                              * not needed in tr/// */
4166                             sv_utf8_upgrade_flags(res, 0);
4167                             str = SvPV_const(res, len);
4168                         }
4169 
4170                          /* Upgrade destination to be utf8 if this new
4171                           * component is */
4172                         if (! d_is_utf8 && SvUTF8(res)) {
4173                             /* See Note on sizing above.  */
4174                             const STRLEN extra = len + (send - s) + 1;
4175 
4176                             SvCUR_set(sv, d - SvPVX_const(sv));
4177                             SvPOK_on(sv);
4178                             *d = '\0';
4179 
4180                             if (utf8_variant_count == 0) {
4181                                 SvUTF8_on(sv);
4182                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4183                             }
4184                             else {
4185                                 sv_utf8_upgrade_flags_grow(sv,
4186                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4187                                                 extra);
4188                                 d = SvPVX(sv) + SvCUR(sv);
4189                             }
4190                             d_is_utf8 = TRUE;
4191                         } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
4192 
4193                             /* See Note on sizing above.  (NOTE: SvCUR() is not
4194                              * set correctly here). */
4195                             const STRLEN extra = len + (send - rbrace) + 1;
4196                             const STRLEN off = d - SvPVX_const(sv);
4197                             d = off + SvGROW(sv, off + extra);
4198                         }
4199                         Copy(str, d, len, char);
4200                         d += len;
4201                     }
4202 
4203                     SvREFCNT_dec(res);
4204 
4205                 } /* End \N{NAME} */
4206 
4207               end_backslash_N:
4208 #ifdef EBCDIC
4209                 backslash_N++; /* \N{} is defined to be Unicode */
4210 #endif
4211                 s = rbrace + 1;  /* Point to just after the '}' */
4212                 continue;
4213 
4214             /* \c is a control character */
4215             case 'c':
4216                 s++;
4217                 if (s < send) {
4218                     const char * message;
4219 
4220                     if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4221                         yyerror(message);
4222                         yyquit();   /* Have always immediately croaked on
4223                                        errors in this */
4224                     }
4225                     d++;
4226                 }
4227                 else {
4228                     yyerror("Missing control char name in \\c");
4229                     yyquit();   /* Are at end of input, no sense continuing */
4230                 }
4231 #ifdef EBCDIC
4232                 non_portable_endpoint++;
4233 #endif
4234                 break;
4235 
4236             /* printf-style backslashes, formfeeds, newlines, etc */
4237             case 'b':
4238                 *d++ = '\b';
4239                 break;
4240             case 'n':
4241                 *d++ = '\n';
4242                 break;
4243             case 'r':
4244                 *d++ = '\r';
4245                 break;
4246             case 'f':
4247                 *d++ = '\f';
4248                 break;
4249             case 't':
4250                 *d++ = '\t';
4251                 break;
4252             case 'e':
4253                 *d++ = ESC_NATIVE;
4254                 break;
4255             case 'a':
4256                 *d++ = '\a';
4257                 break;
4258             } /* end switch */
4259 
4260             s++;
4261             continue;
4262         } /* end if (backslash) */
4263 
4264     default_action:
4265         /* Just copy the input to the output, though we may have to convert
4266          * to/from UTF-8.
4267          *
4268          * If the input has the same representation in UTF-8 as not, it will be
4269          * a single byte, and we don't care about UTF8ness; just copy the byte */
4270         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4271             *d++ = *s++;
4272         }
4273         else if (! s_is_utf8 && ! d_is_utf8) {
4274             /* If neither source nor output is UTF-8, is also a single byte,
4275              * just copy it; but this byte counts should we later have to
4276              * convert to UTF-8 */
4277             *d++ = *s++;
4278             utf8_variant_count++;
4279         }
4280         else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
4281             const STRLEN len = UTF8SKIP(s);
4282 
4283             /* We expect the source to have already been checked for
4284              * malformedness */
4285             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4286 
4287             Copy(s, d, len, U8);
4288             d += len;
4289             s += len;
4290         }
4291         else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4292             STRLEN need = send - s + 1; /* See Note on sizing above. */
4293 
4294             SvCUR_set(sv, d - SvPVX_const(sv));
4295             SvPOK_on(sv);
4296             *d = '\0';
4297 
4298             if (utf8_variant_count == 0) {
4299                 SvUTF8_on(sv);
4300                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4301             }
4302             else {
4303                 sv_utf8_upgrade_flags_grow(sv,
4304                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4305                                            need);
4306                 d = SvPVX(sv) + SvCUR(sv);
4307             }
4308             d_is_utf8 = TRUE;
4309             goto default_action; /* Redo, having upgraded so both are UTF-8 */
4310         }
4311         else {  /* UTF8ness matters: convert this non-UTF8 source char to
4312                    UTF-8 for output.  It will occupy 2 bytes, but don't include
4313                    the input byte since we haven't incremented 's' yet. See
4314                    Note on sizing above. */
4315             const STRLEN off = d - SvPVX(sv);
4316             const STRLEN extra = 2 + (send - s - 1) + 1;
4317             if (off + extra > SvLEN(sv)) {
4318                 d = off + SvGROW(sv, off + extra);
4319             }
4320             *d++ = UTF8_EIGHT_BIT_HI(*s);
4321             *d++ = UTF8_EIGHT_BIT_LO(*s);
4322             s++;
4323         }
4324     } /* while loop to process each character */
4325 
4326     {
4327         const STRLEN off = d - SvPVX(sv);
4328 
4329         /* See if room for the terminating NUL */
4330         if (UNLIKELY(off >= SvLEN(sv))) {
4331 
4332 #ifndef DEBUGGING
4333 
4334             if (off > SvLEN(sv))
4335 #endif
4336                 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4337                         " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4338 
4339             /* Whew!  Here we don't have room for the terminating NUL, but
4340              * everything else so far has fit.  It's not too late to grow
4341              * to fit the NUL and continue on.  But it is a bug, as the code
4342              * above was supposed to have made room for this, so under
4343              * DEBUGGING builds, we panic anyway.  */
4344             d = off + SvGROW(sv, off + 1);
4345         }
4346     }
4347 
4348     /* terminate the string and set up the sv */
4349     *d = '\0';
4350     SvCUR_set(sv, d - SvPVX_const(sv));
4351 
4352     SvPOK_on(sv);
4353     if (d_is_utf8) {
4354         SvUTF8_on(sv);
4355     }
4356 
4357     /* shrink the sv if we allocated more than we used */
4358     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4359         SvPV_shrink_to_cur(sv);
4360     }
4361 
4362     /* return the substring (via pl_yylval) only if we parsed anything */
4363     if (s > start) {
4364         char *s2 = start;
4365         for (; s2 < s; s2++) {
4366             if (*s2 == '\n')
4367                 COPLINE_INC_WITH_HERELINES;
4368         }
4369         SvREFCNT_inc_simple_void_NN(sv);
4370         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4371             && ! PL_parser->lex_re_reparsing)
4372         {
4373             const char *const key = PL_lex_inpat ? "qr" : "q";
4374             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4375             const char *type;
4376             STRLEN typelen;
4377 
4378             if (PL_lex_inwhat == OP_TRANS) {
4379                 type = "tr";
4380                 typelen = 2;
4381             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4382                 type = "s";
4383                 typelen = 1;
4384             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4385                 type = "q";
4386                 typelen = 1;
4387             } else {
4388                 type = "qq";
4389                 typelen = 2;
4390             }
4391 
4392             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4393                                 type, typelen, NULL);
4394         }
4395         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4396     }
4397     LEAVE_with_name("scan_const");
4398     return s;
4399 }
4400 
4401 /* S_intuit_more
4402  * Returns TRUE if there's more to the expression (e.g., a subscript),
4403  * FALSE otherwise.
4404  *
4405  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4406  *
4407  * ->[ and ->{ return TRUE
4408  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4409  * { and [ outside a pattern are always subscripts, so return TRUE
4410  * if we're outside a pattern and it's not { or [, then return FALSE
4411  * if we're in a pattern and the first char is a {
4412  *   {4,5} (any digits around the comma) returns FALSE
4413  * if we're in a pattern and the first char is a [
4414  *   [] returns FALSE
4415  *   [SOMETHING] has a funky algorithm to decide whether it's a
4416  *      character class or not.  It has to deal with things like
4417  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4418  * anything else returns TRUE
4419  */
4420 
4421 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4422 
4423 STATIC int
4424 S_intuit_more(pTHX_ char *s, char *e)
4425 {
4426     PERL_ARGS_ASSERT_INTUIT_MORE;
4427 
4428     if (PL_lex_brackets)
4429         return TRUE;
4430     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4431         return TRUE;
4432     if (*s == '-' && s[1] == '>'
4433      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4434      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4435         ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4436         return TRUE;
4437     if (*s != '{' && *s != '[')
4438         return FALSE;
4439     PL_parser->sub_no_recover = TRUE;
4440     if (!PL_lex_inpat)
4441         return TRUE;
4442 
4443     /* In a pattern, so maybe we have {n,m}. */
4444     if (*s == '{') {
4445         if (regcurly(s, e, NULL)) {
4446             return FALSE;
4447         }
4448         return TRUE;
4449     }
4450 
4451     /* On the other hand, maybe we have a character class */
4452 
4453     s++;
4454     if (*s == ']' || *s == '^')
4455         return FALSE;
4456     else {
4457         /* this is terrifying, and it works */
4458         int weight;
4459         char seen[256];
4460         const char * const send = (char *) memchr(s, ']', e - s);
4461         unsigned char un_char, last_un_char;
4462         char tmpbuf[sizeof PL_tokenbuf * 4];
4463 
4464         if (!send)		/* has to be an expression */
4465             return TRUE;
4466         weight = 2;		/* let's weigh the evidence */
4467 
4468         if (*s == '$')
4469             weight -= 3;
4470         else if (isDIGIT(*s)) {
4471             if (s[1] != ']') {
4472                 if (isDIGIT(s[1]) && s[2] == ']')
4473                     weight -= 10;
4474             }
4475             else
4476                 weight -= 100;
4477         }
4478         Zero(seen,256,char);
4479         un_char = 255;
4480         for (; s < send; s++) {
4481             last_un_char = un_char;
4482             un_char = (unsigned char)*s;
4483             switch (*s) {
4484             case '@':
4485             case '&':
4486             case '$':
4487                 weight -= seen[un_char] * 10;
4488                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4489                     int len;
4490                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4491                     len = (int)strlen(tmpbuf);
4492                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4493                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4494                         weight -= 100;
4495                     else
4496                         weight -= 10;
4497                 }
4498                 else if (*s == '$'
4499                          && s[1]
4500                          && memCHRs("[#!%*<>()-=",s[1]))
4501                 {
4502                     if (/*{*/ memCHRs("])} =",s[2]))
4503                         weight -= 10;
4504                     else
4505                         weight -= 1;
4506                 }
4507                 break;
4508             case '\\':
4509                 un_char = 254;
4510                 if (s[1]) {
4511                     if (memCHRs("wds]",s[1]))
4512                         weight += 100;
4513                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4514                         weight += 1;
4515                     else if (memCHRs("rnftbxcav",s[1]))
4516                         weight += 40;
4517                     else if (isDIGIT(s[1])) {
4518                         weight += 40;
4519                         while (s[1] && isDIGIT(s[1]))
4520                             s++;
4521                     }
4522                 }
4523                 else
4524                     weight += 100;
4525                 break;
4526             case '-':
4527                 if (s[1] == '\\')
4528                     weight += 50;
4529                 if (memCHRs("aA01! ",last_un_char))
4530                     weight += 30;
4531                 if (memCHRs("zZ79~",s[1]))
4532                     weight += 30;
4533                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4534                     weight -= 5;	/* cope with negative subscript */
4535                 break;
4536             default:
4537                 if (!isWORDCHAR(last_un_char)
4538                     && !(last_un_char == '$' || last_un_char == '@'
4539                          || last_un_char == '&')
4540                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4541                     char *d = s;
4542                     while (isALPHA(*s))
4543                         s++;
4544                     if (keyword(d, s - d, 0))
4545                         weight -= 150;
4546                 }
4547                 if (un_char == last_un_char + 1)
4548                     weight += 5;
4549                 weight -= seen[un_char];
4550                 break;
4551             }
4552             seen[un_char]++;
4553         }
4554         if (weight >= 0)	/* probably a character class */
4555             return FALSE;
4556     }
4557 
4558     return TRUE;
4559 }
4560 
4561 /*
4562  * S_intuit_method
4563  *
4564  * Does all the checking to disambiguate
4565  *   foo bar
4566  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4567  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4568  *
4569  * First argument is the stuff after the first token, e.g. "bar".
4570  *
4571  * Not a method if foo is a filehandle.
4572  * Not a method if foo is a subroutine prototyped to take a filehandle.
4573  * Not a method if it's really "Foo $bar"
4574  * Method if it's "foo $bar"
4575  * Not a method if it's really "print foo $bar"
4576  * Method if it's really "foo package::" (interpreted as package->foo)
4577  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4578  * Not a method if bar is a filehandle or package, but is quoted with
4579  *   =>
4580  */
4581 
4582 STATIC int
4583 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4584 {
4585     char *s = start + (*start == '$');
4586     char tmpbuf[sizeof PL_tokenbuf];
4587     STRLEN len;
4588     GV* indirgv;
4589         /* Mustn't actually add anything to a symbol table.
4590            But also don't want to "initialise" any placeholder
4591            constants that might already be there into full
4592            blown PVGVs with attached PVCV.  */
4593     GV * const gv =
4594         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4595 
4596     PERL_ARGS_ASSERT_INTUIT_METHOD;
4597 
4598     if (!FEATURE_INDIRECT_IS_ENABLED)
4599         return 0;
4600 
4601     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4602             return 0;
4603     if (cv && SvPOK(cv)) {
4604         const char *proto = CvPROTO(cv);
4605         if (proto) {
4606             while (*proto && (isSPACE(*proto) || *proto == ';'))
4607                 proto++;
4608             if (*proto == '*')
4609                 return 0;
4610         }
4611     }
4612 
4613     if (*start == '$') {
4614         SSize_t start_off = start - SvPVX(PL_linestr);
4615         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4616             || isUPPER(*PL_tokenbuf))
4617             return 0;
4618         /* this could be $# */
4619         if (isSPACE(*s))
4620             s = skipspace(s);
4621         PL_bufptr = SvPVX(PL_linestr) + start_off;
4622         PL_expect = XREF;
4623         return *s == '(' ? FUNCMETH : METHOD;
4624     }
4625 
4626     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4627     /* start is the beginning of the possible filehandle/object,
4628      * and s is the end of it
4629      * tmpbuf is a copy of it (but with single quotes as double colons)
4630      */
4631 
4632     if (!keyword(tmpbuf, len, 0)) {
4633         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4634             len -= 2;
4635             tmpbuf[len] = '\0';
4636             goto bare_package;
4637         }
4638         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4639                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4640                                     SVt_PVCV);
4641         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4642          && (!isGV(indirgv) || GvCVu(indirgv)))
4643             return 0;
4644         /* filehandle or package name makes it a method */
4645         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4646             s = skipspace(s);
4647             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4648                 return 0;	/* no assumptions -- "=>" quotes bareword */
4649       bare_package:
4650             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4651                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4652             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4653             PL_expect = XTERM;
4654             force_next(BAREWORD);
4655             PL_bufptr = s;
4656             return *s == '(' ? FUNCMETH : METHOD;
4657         }
4658     }
4659     return 0;
4660 }
4661 
4662 /* Encoded script support. filter_add() effectively inserts a
4663  * 'pre-processing' function into the current source input stream.
4664  * Note that the filter function only applies to the current source file
4665  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4666  *
4667  * The datasv parameter (which may be NULL) can be used to pass
4668  * private data to this instance of the filter. The filter function
4669  * can recover the SV using the FILTER_DATA macro and use it to
4670  * store private buffers and state information.
4671  *
4672  * The supplied datasv parameter is upgraded to a PVIO type
4673  * and the IoDIRP/IoANY field is used to store the function pointer,
4674  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4675  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4676  * private use must be set using malloc'd pointers.
4677  */
4678 
4679 SV *
4680 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4681 {
4682     if (!funcp)
4683         return NULL;
4684 
4685     if (!PL_parser)
4686         return NULL;
4687 
4688     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4689         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4690 
4691     if (!PL_rsfp_filters)
4692         PL_rsfp_filters = newAV();
4693     if (!datasv)
4694         datasv = newSV(0);
4695     SvUPGRADE(datasv, SVt_PVIO);
4696     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4697     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4698     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4699                           FPTR2DPTR(void *, IoANY(datasv)),
4700                           SvPV_nolen(datasv)));
4701     av_unshift(PL_rsfp_filters, 1);
4702     av_store(PL_rsfp_filters, 0, datasv) ;
4703     if (
4704         !PL_parser->filtered
4705      && PL_parser->lex_flags & LEX_EVALBYTES
4706      && PL_bufptr < PL_bufend
4707     ) {
4708         const char *s = PL_bufptr;
4709         while (s < PL_bufend) {
4710             if (*s == '\n') {
4711                 SV *linestr = PL_parser->linestr;
4712                 char *buf = SvPVX(linestr);
4713                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4714                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4715                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4716                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4717                 STRLEN const last_uni_pos =
4718                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4719                 STRLEN const last_lop_pos =
4720                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4721                 av_push(PL_rsfp_filters, linestr);
4722                 PL_parser->linestr =
4723                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4724                 buf = SvPVX(PL_parser->linestr);
4725                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4726                 PL_parser->bufptr = buf + bufptr_pos;
4727                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4728                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4729                 PL_parser->linestart = buf + linestart_pos;
4730                 if (PL_parser->last_uni)
4731                     PL_parser->last_uni = buf + last_uni_pos;
4732                 if (PL_parser->last_lop)
4733                     PL_parser->last_lop = buf + last_lop_pos;
4734                 SvLEN_set(linestr, SvCUR(linestr));
4735                 SvCUR_set(linestr, s - SvPVX(linestr));
4736                 PL_parser->filtered = 1;
4737                 break;
4738             }
4739             s++;
4740         }
4741     }
4742     return(datasv);
4743 }
4744 
4745 /*
4746 =for apidoc_section $filters
4747 =for apidoc filter_del
4748 
4749 Delete most recently added instance of the filter function argument
4750 
4751 =cut
4752 */
4753 
4754 void
4755 Perl_filter_del(pTHX_ filter_t funcp)
4756 {
4757     SV *datasv;
4758 
4759     PERL_ARGS_ASSERT_FILTER_DEL;
4760 
4761 #ifdef DEBUGGING
4762     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4763                           FPTR2DPTR(void*, funcp)));
4764 #endif
4765     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4766         return;
4767     /* if filter is on top of stack (usual case) just pop it off */
4768     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4769     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4770         sv_free(av_pop(PL_rsfp_filters));
4771 
4772         return;
4773     }
4774     /* we need to search for the correct entry and clear it	*/
4775     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4776 }
4777 
4778 
4779 /* Invoke the idxth filter function for the current rsfp.	 */
4780 /* maxlen 0 = read one text line */
4781 I32
4782 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4783 {
4784     filter_t funcp;
4785     I32 ret;
4786     SV *datasv = NULL;
4787     /* This API is bad. It should have been using unsigned int for maxlen.
4788        Not sure if we want to change the API, but if not we should sanity
4789        check the value here.  */
4790     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4791 
4792     PERL_ARGS_ASSERT_FILTER_READ;
4793 
4794     if (!PL_parser || !PL_rsfp_filters)
4795         return -1;
4796     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?	*/
4797         /* Provide a default input filter to make life easy.	*/
4798         /* Note that we append to the line. This is handy.	*/
4799         DEBUG_P(PerlIO_printf(Perl_debug_log,
4800                               "filter_read %d: from rsfp\n", idx));
4801         if (correct_length) {
4802             /* Want a block */
4803             int len ;
4804             const int old_len = SvCUR(buf_sv);
4805 
4806             /* ensure buf_sv is large enough */
4807             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4808             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4809                                    correct_length)) <= 0) {
4810                 if (PerlIO_error(PL_rsfp))
4811                     return -1;		/* error */
4812                 else
4813                     return 0 ;		/* end of file */
4814             }
4815             SvCUR_set(buf_sv, old_len + len) ;
4816             SvPVX(buf_sv)[old_len + len] = '\0';
4817         } else {
4818             /* Want a line */
4819             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4820                 if (PerlIO_error(PL_rsfp))
4821                     return -1;		/* error */
4822                 else
4823                     return 0 ;		/* end of file */
4824             }
4825         }
4826         return SvCUR(buf_sv);
4827     }
4828     /* Skip this filter slot if filter has been deleted	*/
4829     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4830         DEBUG_P(PerlIO_printf(Perl_debug_log,
4831                               "filter_read %d: skipped (filter deleted)\n",
4832                               idx));
4833         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4834     }
4835     if (SvTYPE(datasv) != SVt_PVIO) {
4836         if (correct_length) {
4837             /* Want a block */
4838             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4839             if (!remainder) return 0; /* eof */
4840             if (correct_length > remainder) correct_length = remainder;
4841             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4842             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4843         } else {
4844             /* Want a line */
4845             const char *s = SvEND(datasv);
4846             const char *send = SvPVX(datasv) + SvLEN(datasv);
4847             while (s < send) {
4848                 if (*s == '\n') {
4849                     s++;
4850                     break;
4851                 }
4852                 s++;
4853             }
4854             if (s == send) return 0; /* eof */
4855             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4856             SvCUR_set(datasv, s-SvPVX(datasv));
4857         }
4858         return SvCUR(buf_sv);
4859     }
4860     /* Get function pointer hidden within datasv	*/
4861     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4862     DEBUG_P(PerlIO_printf(Perl_debug_log,
4863                           "filter_read %d: via function %p (%s)\n",
4864                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4865     /* Call function. The function is expected to 	*/
4866     /* call "FILTER_READ(idx+1, buf_sv)" first.		*/
4867     /* Return: <0:error, =0:eof, >0:not eof 		*/
4868     ENTER;
4869     save_scalar(PL_errgv);
4870     ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4871     LEAVE;
4872     return ret;
4873 }
4874 
4875 STATIC char *
4876 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4877 {
4878     PERL_ARGS_ASSERT_FILTER_GETS;
4879 
4880 #ifdef PERL_CR_FILTER
4881     if (!PL_rsfp_filters) {
4882         filter_add(S_cr_textfilter,NULL);
4883     }
4884 #endif
4885     if (PL_rsfp_filters) {
4886         if (!append)
4887             SvCUR_set(sv, 0);	/* start with empty line	*/
4888         if (FILTER_READ(0, sv, 0) > 0)
4889             return ( SvPVX(sv) ) ;
4890         else
4891             return NULL ;
4892     }
4893     else
4894         return (sv_gets(sv, PL_rsfp, append));
4895 }
4896 
4897 STATIC HV *
4898 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4899 {
4900     GV *gv;
4901 
4902     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4903 
4904     if (memEQs(pkgname, len, "__PACKAGE__"))
4905         return PL_curstash;
4906 
4907     if (len > 2
4908         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4909         && (gv = gv_fetchpvn_flags(pkgname,
4910                                    len,
4911                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4912     {
4913         return GvHV(gv);			/* Foo:: */
4914     }
4915 
4916     /* use constant CLASS => 'MyClass' */
4917     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4918     if (gv && GvCV(gv)) {
4919         SV * const sv = cv_const_sv(GvCV(gv));
4920         if (sv)
4921             return gv_stashsv(sv, 0);
4922     }
4923 
4924     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4925 }
4926 
4927 
4928 STATIC char *
4929 S_tokenize_use(pTHX_ int is_use, char *s) {
4930     PERL_ARGS_ASSERT_TOKENIZE_USE;
4931 
4932     if (PL_expect != XSTATE)
4933         /* diag_listed_as: "use" not allowed in expression */
4934         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4935                     is_use ? "use" : "no"));
4936     PL_expect = XTERM;
4937     s = skipspace(s);
4938     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4939         s = force_version(s, TRUE);
4940         if (*s == ';' || *s == '}'
4941                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4942             NEXTVAL_NEXTTOKE.opval = NULL;
4943             force_next(BAREWORD);
4944         }
4945         else if (*s == 'v') {
4946             s = force_word(s,BAREWORD,FALSE,TRUE);
4947             s = force_version(s, FALSE);
4948         }
4949     }
4950     else {
4951         s = force_word(s,BAREWORD,FALSE,TRUE);
4952         s = force_version(s, FALSE);
4953     }
4954     pl_yylval.ival = is_use;
4955     return s;
4956 }
4957 #ifdef DEBUGGING
4958     static const char* const exp_name[] =
4959         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4960           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4961           "SIGVAR", "TERMORDORDOR"
4962         };
4963 #endif
4964 
4965 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4966 STATIC bool
4967 S_word_takes_any_delimiter(char *p, STRLEN len)
4968 {
4969     return (len == 1 && memCHRs("msyq", p[0]))
4970             || (len == 2
4971                 && ((p[0] == 't' && p[1] == 'r')
4972                     || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
4973 }
4974 
4975 static void
4976 S_check_scalar_slice(pTHX_ char *s)
4977 {
4978     s++;
4979     while (SPACE_OR_TAB(*s)) s++;
4980     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4981                                                              PL_bufend,
4982                                                              UTF))
4983     {
4984         return;
4985     }
4986     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4987            || (*s && memCHRs(" \t$#+-'\"", *s)))
4988     {
4989         s += UTF ? UTF8SKIP(s) : 1;
4990     }
4991     if (*s == '}' || *s == ']')
4992         pl_yylval.ival = OPpSLICEWARNING;
4993 }
4994 
4995 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4996 static void
4997 S_lex_token_boundary(pTHX)
4998 {
4999     PL_oldoldbufptr = PL_oldbufptr;
5000     PL_oldbufptr = PL_bufptr;
5001 }
5002 
5003 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
5004 static char *
5005 S_vcs_conflict_marker(pTHX_ char *s)
5006 {
5007     lex_token_boundary();
5008     PL_bufptr = s;
5009     yyerror("Version control conflict marker");
5010     while (s < PL_bufend && *s != '\n')
5011         s++;
5012     return s;
5013 }
5014 
5015 static int
5016 yyl_sigvar(pTHX_ char *s)
5017 {
5018     /* we expect the sigil and optional var name part of a
5019      * signature element here. Since a '$' is not necessarily
5020      * followed by a var name, handle it specially here; the general
5021      * yylex code would otherwise try to interpret whatever follows
5022      * as a var; e.g. ($, ...) would be seen as the var '$,'
5023      */
5024 
5025     U8 sigil;
5026 
5027     s = skipspace(s);
5028     sigil = *s++;
5029     PL_bufptr = s; /* for error reporting */
5030     switch (sigil) {
5031     case '$':
5032     case '@':
5033     case '%':
5034         /* spot stuff that looks like an prototype */
5035         if (memCHRs("$:@%&*;\\[]", *s)) {
5036             yyerror("Illegal character following sigil in a subroutine signature");
5037             break;
5038         }
5039         /* '$#' is banned, while '$ # comment' isn't */
5040         if (*s == '#') {
5041             yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5042             break;
5043         }
5044         s = skipspace(s);
5045         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5046             char *dest = PL_tokenbuf + 1;
5047             /* read var name, including sigil, into PL_tokenbuf */
5048             PL_tokenbuf[0] = sigil;
5049             parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5050                 0, cBOOL(UTF), FALSE, FALSE);
5051             *dest = '\0';
5052             assert(PL_tokenbuf[1]); /* we have a variable name */
5053         }
5054         else {
5055             *PL_tokenbuf = 0;
5056             PL_in_my = 0;
5057         }
5058 
5059         s = skipspace(s);
5060         /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5061          * as the ASSIGNOP, and exclude other tokens that start with =
5062          */
5063         if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
5064             /* save now to report with the same context as we did when
5065              * all ASSIGNOPS were accepted */
5066             PL_oldbufptr = s;
5067 
5068             ++s;
5069             NEXTVAL_NEXTTOKE.ival = 0;
5070             force_next(ASSIGNOP);
5071             PL_expect = XTERM;
5072         }
5073         else if (*s == ',' || *s == ')') {
5074             PL_expect = XOPERATOR;
5075         }
5076         else {
5077             /* make sure the context shows the unexpected character and
5078              * hopefully a bit more */
5079             if (*s) ++s;
5080             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5081                 s++;
5082             PL_bufptr = s; /* for error reporting */
5083             yyerror("Illegal operator following parameter in a subroutine signature");
5084             PL_in_my = 0;
5085         }
5086         if (*PL_tokenbuf) {
5087             NEXTVAL_NEXTTOKE.ival = sigil;
5088             force_next('p'); /* force a signature pending identifier */
5089         }
5090         break;
5091 
5092     case ')':
5093         PL_expect = XBLOCK;
5094         break;
5095     case ',': /* handle ($a,,$b) */
5096         break;
5097 
5098     default:
5099         PL_in_my = 0;
5100         yyerror("A signature parameter must start with '$', '@' or '%'");
5101         /* very crude error recovery: skip to likely next signature
5102          * element */
5103         while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5104             s++;
5105         break;
5106     }
5107 
5108     switch (sigil) {
5109         case ',': TOKEN (PERLY_COMMA);
5110         case '$': TOKEN (PERLY_DOLLAR);
5111         case '@': TOKEN (PERLY_SNAIL);
5112         case '%': TOKEN (PERLY_PERCENT_SIGN);
5113         case ')': TOKEN (PERLY_PAREN_CLOSE);
5114         default:  TOKEN (sigil);
5115     }
5116 }
5117 
5118 static int
5119 yyl_dollar(pTHX_ char *s)
5120 {
5121     CLINE;
5122 
5123     if (PL_expect == XPOSTDEREF) {
5124         if (s[1] == '#') {
5125             s++;
5126             POSTDEREF(DOLSHARP);
5127         }
5128         POSTDEREF(PERLY_DOLLAR);
5129     }
5130 
5131     if (   s[1] == '#'
5132         && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5133             || memCHRs("{$:+-@", s[2])))
5134     {
5135         PL_tokenbuf[0] = '@';
5136         s = scan_ident(s + 1, PL_tokenbuf + 1,
5137                        sizeof PL_tokenbuf - 1, FALSE);
5138         if (PL_expect == XOPERATOR) {
5139             char *d = s;
5140             if (PL_bufptr > s) {
5141                 d = PL_bufptr-1;
5142                 PL_bufptr = PL_oldbufptr;
5143             }
5144             no_op("Array length", d);
5145         }
5146         if (!PL_tokenbuf[1])
5147             PREREF(DOLSHARP);
5148         PL_expect = XOPERATOR;
5149         force_ident_maybe_lex('#');
5150         TOKEN(DOLSHARP);
5151     }
5152 
5153     PL_tokenbuf[0] = '$';
5154     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5155     if (PL_expect == XOPERATOR) {
5156         char *d = s;
5157         if (PL_bufptr > s) {
5158             d = PL_bufptr-1;
5159             PL_bufptr = PL_oldbufptr;
5160         }
5161         no_op("Scalar", d);
5162     }
5163     if (!PL_tokenbuf[1]) {
5164         if (s == PL_bufend)
5165             yyerror("Final $ should be \\$ or $name");
5166         PREREF(PERLY_DOLLAR);
5167     }
5168 
5169     {
5170         const char tmp = *s;
5171         if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5172             s = skipspace(s);
5173 
5174         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5175             && intuit_more(s, PL_bufend)) {
5176             if (*s == '[') {
5177                 PL_tokenbuf[0] = '@';
5178                 if (ckWARN(WARN_SYNTAX)) {
5179                     char *t = s+1;
5180 
5181                     while ( t < PL_bufend ) {
5182                         if (isSPACE(*t)) {
5183                             do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5184                             /* consumed one or more space chars */
5185                         } else if (*t == '$' || *t == '@') {
5186                             /* could be more than one '$' like $$ref or @$ref */
5187                             do { t++; } while (t < PL_bufend && *t == '$');
5188 
5189                             /* could be an abigail style identifier like $ foo */
5190                             while (t < PL_bufend && *t == ' ') t++;
5191 
5192                             /* strip off the name of the var */
5193                             while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5194                                 t += UTF ? UTF8SKIP(t) : 1;
5195                             /* consumed a varname */
5196                         } else if (isDIGIT(*t)) {
5197                             /* deal with hex constants like 0x11 */
5198                             if (t[0] == '0' && t[1] == 'x') {
5199                                 t += 2;
5200                                 while (t < PL_bufend && isXDIGIT(*t)) t++;
5201                             } else {
5202                                 /* deal with decimal/octal constants like 1 and 0123 */
5203                                 do { t++; } while (isDIGIT(*t));
5204                                 if (t<PL_bufend && *t == '.') {
5205                                     do { t++; } while (isDIGIT(*t));
5206                                 }
5207                             }
5208                             /* consumed a number */
5209                         } else {
5210                             /* not a var nor a space nor a number */
5211                             break;
5212                         }
5213                     }
5214                     if (t < PL_bufend && *t++ == ',') {
5215                         PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5216                         while (t < PL_bufend && *t != ']')
5217                             t++;
5218                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5219                                     "Multidimensional syntax %" UTF8f " not supported",
5220                                     UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5221                     }
5222                 }
5223             }
5224             else if (*s == '{') {
5225                 char *t;
5226                 PL_tokenbuf[0] = '%';
5227                 if (    strEQ(PL_tokenbuf+1, "SIG")
5228                     && ckWARN(WARN_SYNTAX)
5229                     && (t = (char *) memchr(s, '}', PL_bufend - s))
5230                     && (t = (char *) memchr(t, '=', PL_bufend - t)))
5231                 {
5232                     char tmpbuf[sizeof PL_tokenbuf];
5233                     do {
5234                         t++;
5235                     } while (isSPACE(*t));
5236                     if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5237                         STRLEN len;
5238                         t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5239                                         &len);
5240                         while (isSPACE(*t))
5241                             t++;
5242                         if (  *t == ';'
5243                             && get_cvn_flags(tmpbuf, len, UTF
5244                                                             ? SVf_UTF8
5245                                                             : 0))
5246                         {
5247                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5248                                 "You need to quote \"%" UTF8f "\"",
5249                                     UTF8fARG(UTF, len, tmpbuf));
5250                         }
5251                     }
5252                 }
5253             }
5254         }
5255 
5256         PL_expect = XOPERATOR;
5257         if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5258             const bool islop = (PL_last_lop == PL_oldoldbufptr);
5259             if (!islop || PL_last_lop_op == OP_GREPSTART)
5260                 PL_expect = XOPERATOR;
5261             else if (memCHRs("$@\"'`q", *s))
5262                 PL_expect = XTERM;		/* e.g. print $fh "foo" */
5263             else if (   memCHRs("&*<%", *s)
5264                      && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5265             {
5266                 PL_expect = XTERM;		/* e.g. print $fh &sub */
5267             }
5268             else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5269                 char tmpbuf[sizeof PL_tokenbuf];
5270                 int t2;
5271                 STRLEN len;
5272                 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5273                 if ((t2 = keyword(tmpbuf, len, 0))) {
5274                     /* binary operators exclude handle interpretations */
5275                     switch (t2) {
5276                     case -KEY_x:
5277                     case -KEY_eq:
5278                     case -KEY_ne:
5279                     case -KEY_gt:
5280                     case -KEY_lt:
5281                     case -KEY_ge:
5282                     case -KEY_le:
5283                     case -KEY_cmp:
5284                         break;
5285                     default:
5286                         PL_expect = XTERM;	/* e.g. print $fh length() */
5287                         break;
5288                     }
5289                 }
5290                 else {
5291                     PL_expect = XTERM;	/* e.g. print $fh subr() */
5292                 }
5293             }
5294             else if (isDIGIT(*s))
5295                 PL_expect = XTERM;		/* e.g. print $fh 3 */
5296             else if (*s == '.' && isDIGIT(s[1]))
5297                 PL_expect = XTERM;		/* e.g. print $fh .3 */
5298             else if ((*s == '?' || *s == '-' || *s == '+')
5299                 && !isSPACE(s[1]) && s[1] != '=')
5300                 PL_expect = XTERM;		/* e.g. print $fh -1 */
5301             else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5302                      && s[1] != '/')
5303                 PL_expect = XTERM;		/* e.g. print $fh /.../
5304                                                XXX except DORDOR operator
5305                                             */
5306             else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5307                      && s[2] != '=')
5308                 PL_expect = XTERM;		/* print $fh <<"EOF" */
5309         }
5310     }
5311     force_ident_maybe_lex('$');
5312     TOKEN(PERLY_DOLLAR);
5313 }
5314 
5315 static int
5316 yyl_sub(pTHX_ char *s, const int key)
5317 {
5318     char * const tmpbuf = PL_tokenbuf + 1;
5319     bool have_name, have_proto;
5320     STRLEN len;
5321     SV *format_name = NULL;
5322     bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5323 
5324     SSize_t off = s-SvPVX(PL_linestr);
5325     char *d;
5326 
5327     s = skipspace(s); /* can move PL_linestr */
5328 
5329     d = SvPVX(PL_linestr)+off;
5330 
5331     SAVEBOOL(PL_parser->sig_seen);
5332     PL_parser->sig_seen = FALSE;
5333 
5334     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5335         || *s == '\''
5336         || (*s == ':' && s[1] == ':'))
5337     {
5338 
5339         PL_expect = XATTRBLOCK;
5340         d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5341                       &len);
5342         if (key == KEY_format)
5343             format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5344         *PL_tokenbuf = '&';
5345         if (memchr(tmpbuf, ':', len) || key != KEY_sub
5346          || pad_findmy_pvn(
5347                 PL_tokenbuf, len + 1, 0
5348             ) != NOT_IN_PAD)
5349             sv_setpvn(PL_subname, tmpbuf, len);
5350         else {
5351             sv_setsv(PL_subname,PL_curstname);
5352             sv_catpvs(PL_subname,"::");
5353             sv_catpvn(PL_subname,tmpbuf,len);
5354         }
5355         if (SvUTF8(PL_linestr))
5356             SvUTF8_on(PL_subname);
5357         have_name = TRUE;
5358 
5359         s = skipspace(d);
5360     }
5361     else {
5362         if (key == KEY_my || key == KEY_our || key==KEY_state) {
5363             *d = '\0';
5364             /* diag_listed_as: Missing name in "%s sub" */
5365             Perl_croak(aTHX_
5366                       "Missing name in \"%s\"", PL_bufptr);
5367         }
5368         PL_expect = XATTRTERM;
5369         sv_setpvs(PL_subname,"?");
5370         have_name = FALSE;
5371     }
5372 
5373     if (key == KEY_format) {
5374         if (format_name) {
5375             NEXTVAL_NEXTTOKE.opval
5376                 = newSVOP(OP_CONST,0, format_name);
5377             NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5378             force_next(BAREWORD);
5379         }
5380         PREBLOCK(FORMAT);
5381     }
5382 
5383     /* Look for a prototype */
5384     if (*s == '(' && !is_sigsub) {
5385         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5386         if (!s)
5387             Perl_croak(aTHX_ "Prototype not terminated");
5388         COPLINE_SET_FROM_MULTI_END;
5389         (void)validate_proto(PL_subname, PL_lex_stuff,
5390                              ckWARN(WARN_ILLEGALPROTO), 0);
5391         have_proto = TRUE;
5392 
5393         s = skipspace(s);
5394     }
5395     else
5396         have_proto = FALSE;
5397 
5398     if (  !(*s == ':' && s[1] != ':')
5399         && (*s != '{' && *s != '(') && key != KEY_format)
5400     {
5401         assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5402                key == KEY_DESTROY || key == KEY_BEGIN ||
5403                key == KEY_UNITCHECK || key == KEY_CHECK ||
5404                key == KEY_INIT || key == KEY_END ||
5405                key == KEY_my || key == KEY_state ||
5406                key == KEY_our);
5407         if (!have_name)
5408             Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5409         else if (*s != ';' && *s != '}')
5410             Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5411     }
5412 
5413     if (have_proto) {
5414         NEXTVAL_NEXTTOKE.opval =
5415             newSVOP(OP_CONST, 0, PL_lex_stuff);
5416         PL_lex_stuff = NULL;
5417         force_next(THING);
5418     }
5419     if (!have_name) {
5420         if (PL_curstash)
5421             sv_setpvs(PL_subname, "__ANON__");
5422         else
5423             sv_setpvs(PL_subname, "__ANON__::__ANON__");
5424         if (is_sigsub)
5425             TOKEN(ANON_SIGSUB);
5426         else
5427             TOKEN(ANONSUB);
5428     }
5429     force_ident_maybe_lex('&');
5430     if (is_sigsub)
5431         TOKEN(SIGSUB);
5432     else
5433         TOKEN(SUB);
5434 }
5435 
5436 static int
5437 yyl_interpcasemod(pTHX_ char *s)
5438 {
5439 #ifdef DEBUGGING
5440     if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5441         Perl_croak(aTHX_
5442                    "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5443                    PL_bufptr, PL_bufend, *PL_bufptr);
5444 #endif
5445 
5446     if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5447         /* if at a \E */
5448         if (PL_lex_casemods) {
5449             const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5450             PL_lex_casestack[PL_lex_casemods] = '\0';
5451 
5452             if (PL_bufptr != PL_bufend
5453                 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5454                     || oldmod == 'F')) {
5455                 PL_bufptr += 2;
5456                 PL_lex_state = LEX_INTERPCONCAT;
5457             }
5458             PL_lex_allbrackets--;
5459             return REPORT(PERLY_PAREN_CLOSE);
5460         }
5461         else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5462            /* Got an unpaired \E */
5463            Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5464                     "Useless use of \\E");
5465         }
5466         if (PL_bufptr != PL_bufend)
5467             PL_bufptr += 2;
5468         PL_lex_state = LEX_INTERPCONCAT;
5469         return yylex();
5470     }
5471     else {
5472         DEBUG_T({
5473             PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5474         });
5475         s = PL_bufptr + 1;
5476         if (s[1] == '\\' && s[2] == 'E') {
5477             PL_bufptr = s + 3;
5478             PL_lex_state = LEX_INTERPCONCAT;
5479             return yylex();
5480         }
5481         else {
5482             I32 tmp;
5483             if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5484                 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5485             {
5486                 tmp = *s, *s = s[2], s[2] = (char)tmp;	/* misordered... */
5487             }
5488             if ((*s == 'L' || *s == 'U' || *s == 'F')
5489                 && (strpbrk(PL_lex_casestack, "LUF")))
5490             {
5491                 PL_lex_casestack[--PL_lex_casemods] = '\0';
5492                 PL_lex_allbrackets--;
5493                 return REPORT(PERLY_PAREN_CLOSE);
5494             }
5495             if (PL_lex_casemods > 10)
5496                 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5497             PL_lex_casestack[PL_lex_casemods++] = *s;
5498             PL_lex_casestack[PL_lex_casemods] = '\0';
5499             PL_lex_state = LEX_INTERPCONCAT;
5500             NEXTVAL_NEXTTOKE.ival = 0;
5501             force_next((2<<24)|PERLY_PAREN_OPEN);
5502             if (*s == 'l')
5503                 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5504             else if (*s == 'u')
5505                 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5506             else if (*s == 'L')
5507                 NEXTVAL_NEXTTOKE.ival = OP_LC;
5508             else if (*s == 'U')
5509                 NEXTVAL_NEXTTOKE.ival = OP_UC;
5510             else if (*s == 'Q')
5511                 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5512             else if (*s == 'F')
5513                 NEXTVAL_NEXTTOKE.ival = OP_FC;
5514             else
5515                 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5516             PL_bufptr = s + 1;
5517         }
5518         force_next(FUNC);
5519         if (PL_lex_starts) {
5520             s = PL_bufptr;
5521             PL_lex_starts = 0;
5522             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5523             if (PL_lex_casemods == 1 && PL_lex_inpat)
5524                 TOKEN(PERLY_COMMA);
5525             else
5526                 AopNOASSIGN(OP_CONCAT);
5527         }
5528         else
5529             return yylex();
5530     }
5531 }
5532 
5533 static int
5534 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5535                         GV **pgv, GV ***pgvp)
5536 {
5537     GV *ogv = NULL;	/* override (winner) */
5538     GV *hgv = NULL;	/* hidden (loser) */
5539     GV *gv = *pgv;
5540 
5541     if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5542         CV *cv;
5543         if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5544                                     (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5545                                     SVt_PVCV))
5546             && (cv = GvCVu(gv)))
5547         {
5548             if (GvIMPORTED_CV(gv))
5549                 ogv = gv;
5550             else if (! CvMETHOD(cv))
5551                 hgv = gv;
5552         }
5553         if (!ogv
5554             && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5555             && (gv = **pgvp)
5556             && (isGV_with_GP(gv)
5557                 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5558                 :   SvPCS_IMPORTED(gv)
5559                 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5560                                                          len, 0), 1)))
5561         {
5562             ogv = gv;
5563         }
5564     }
5565 
5566     *pgv = gv;
5567 
5568     if (ogv) {
5569         *orig_keyword = key;
5570         return 0;		/* overridden by import or by GLOBAL */
5571     }
5572     else if (gv && !*pgvp
5573              && -key==KEY_lock	/* XXX generalizable kludge */
5574              && GvCVu(gv))
5575     {
5576         return 0;		/* any sub overrides "weak" keyword */
5577     }
5578     else {			/* no override */
5579         key = -key;
5580         if (key == KEY_dump) {
5581             Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5582         }
5583         *pgv = NULL;
5584         *pgvp = 0;
5585         if (hgv && key != KEY_x)	/* never ambiguous */
5586             Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5587                            "Ambiguous call resolved as CORE::%s(), "
5588                            "qualify as such or use &",
5589                            GvENAME(hgv));
5590         return key;
5591     }
5592 }
5593 
5594 static int
5595 yyl_qw(pTHX_ char *s, STRLEN len)
5596 {
5597     OP *words = NULL;
5598 
5599     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5600     if (!s)
5601         missingterm(NULL, 0);
5602 
5603     COPLINE_SET_FROM_MULTI_END;
5604     PL_expect = XOPERATOR;
5605     if (SvCUR(PL_lex_stuff)) {
5606         int warned_comma = !ckWARN(WARN_QW);
5607         int warned_comment = warned_comma;
5608         char *d = SvPV_force(PL_lex_stuff, len);
5609         while (len) {
5610             for (; isSPACE(*d) && len; --len, ++d)
5611                 /**/;
5612             if (len) {
5613                 SV *sv;
5614                 const char *b = d;
5615                 if (!warned_comma || !warned_comment) {
5616                     for (; !isSPACE(*d) && len; --len, ++d) {
5617                         if (!warned_comma && *d == ',') {
5618                             Perl_warner(aTHX_ packWARN(WARN_QW),
5619                                 "Possible attempt to separate words with commas");
5620                             ++warned_comma;
5621                         }
5622                         else if (!warned_comment && *d == '#') {
5623                             Perl_warner(aTHX_ packWARN(WARN_QW),
5624                                 "Possible attempt to put comments in qw() list");
5625                             ++warned_comment;
5626                         }
5627                     }
5628                 }
5629                 else {
5630                     for (; !isSPACE(*d) && len; --len, ++d)
5631                         /**/;
5632                 }
5633                 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5634                 words = op_append_elem(OP_LIST, words,
5635                                        newSVOP(OP_CONST, 0, tokeq(sv)));
5636             }
5637         }
5638     }
5639     if (!words)
5640         words = newNULLLIST();
5641     SvREFCNT_dec_NN(PL_lex_stuff);
5642     PL_lex_stuff = NULL;
5643     PL_expect = XOPERATOR;
5644     pl_yylval.opval = sawparens(words);
5645     TOKEN(QWLIST);
5646 }
5647 
5648 static int
5649 yyl_hyphen(pTHX_ char *s)
5650 {
5651     if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5652         I32 ftst = 0;
5653         char tmp;
5654 
5655         s++;
5656         PL_bufptr = s;
5657         tmp = *s++;
5658 
5659         while (s < PL_bufend && SPACE_OR_TAB(*s))
5660             s++;
5661 
5662         if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5663             s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5664             DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5665             OPERATOR(PERLY_MINUS);              /* unary minus */
5666         }
5667         switch (tmp) {
5668         case 'r': ftst = OP_FTEREAD;    break;
5669         case 'w': ftst = OP_FTEWRITE;   break;
5670         case 'x': ftst = OP_FTEEXEC;    break;
5671         case 'o': ftst = OP_FTEOWNED;   break;
5672         case 'R': ftst = OP_FTRREAD;    break;
5673         case 'W': ftst = OP_FTRWRITE;   break;
5674         case 'X': ftst = OP_FTREXEC;    break;
5675         case 'O': ftst = OP_FTROWNED;   break;
5676         case 'e': ftst = OP_FTIS;       break;
5677         case 'z': ftst = OP_FTZERO;     break;
5678         case 's': ftst = OP_FTSIZE;     break;
5679         case 'f': ftst = OP_FTFILE;     break;
5680         case 'd': ftst = OP_FTDIR;      break;
5681         case 'l': ftst = OP_FTLINK;     break;
5682         case 'p': ftst = OP_FTPIPE;     break;
5683         case 'S': ftst = OP_FTSOCK;     break;
5684         case 'u': ftst = OP_FTSUID;     break;
5685         case 'g': ftst = OP_FTSGID;     break;
5686         case 'k': ftst = OP_FTSVTX;     break;
5687         case 'b': ftst = OP_FTBLK;      break;
5688         case 'c': ftst = OP_FTCHR;      break;
5689         case 't': ftst = OP_FTTTY;      break;
5690         case 'T': ftst = OP_FTTEXT;     break;
5691         case 'B': ftst = OP_FTBINARY;   break;
5692         case 'M': case 'A': case 'C':
5693             gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5694             switch (tmp) {
5695             case 'M': ftst = OP_FTMTIME; break;
5696             case 'A': ftst = OP_FTATIME; break;
5697             case 'C': ftst = OP_FTCTIME; break;
5698             default:                     break;
5699             }
5700             break;
5701         default:
5702             break;
5703         }
5704         if (ftst) {
5705             PL_last_uni = PL_oldbufptr;
5706             PL_last_lop_op = (OPCODE)ftst;
5707             DEBUG_T( {
5708                 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5709             } );
5710             FTST(ftst);
5711         }
5712         else {
5713             /* Assume it was a minus followed by a one-letter named
5714              * subroutine call (or a -bareword), then. */
5715             DEBUG_T( {
5716                 PerlIO_printf(Perl_debug_log,
5717                     "### '-%c' looked like a file test but was not\n",
5718                     (int) tmp);
5719             } );
5720             s = --PL_bufptr;
5721         }
5722     }
5723     {
5724         const char tmp = *s++;
5725         if (*s == tmp) {
5726             s++;
5727             if (PL_expect == XOPERATOR)
5728                 TERM(POSTDEC);
5729             else
5730                 OPERATOR(PREDEC);
5731         }
5732         else if (*s == '>') {
5733             s++;
5734             s = skipspace(s);
5735             if (((*s == '$' || *s == '&') && s[1] == '*')
5736               ||(*s == '$' && s[1] == '#' && s[2] == '*')
5737               ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5738               ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5739              )
5740             {
5741                 PL_expect = XPOSTDEREF;
5742                 TOKEN(ARROW);
5743             }
5744             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5745                 s = force_word(s,METHOD,FALSE,TRUE);
5746                 TOKEN(ARROW);
5747             }
5748             else if (*s == '$')
5749                 OPERATOR(ARROW);
5750             else
5751                 TERM(ARROW);
5752         }
5753         if (PL_expect == XOPERATOR) {
5754             if (*s == '='
5755                 && !PL_lex_allbrackets
5756                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5757             {
5758                 s--;
5759                 TOKEN(0);
5760             }
5761             Aop(OP_SUBTRACT);
5762         }
5763         else {
5764             if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5765                 check_uni();
5766             OPERATOR(PERLY_MINUS);              /* unary minus */
5767         }
5768     }
5769 }
5770 
5771 static int
5772 yyl_plus(pTHX_ char *s)
5773 {
5774     const char tmp = *s++;
5775     if (*s == tmp) {
5776         s++;
5777         if (PL_expect == XOPERATOR)
5778             TERM(POSTINC);
5779         else
5780             OPERATOR(PREINC);
5781     }
5782     if (PL_expect == XOPERATOR) {
5783         if (*s == '='
5784             && !PL_lex_allbrackets
5785             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5786         {
5787             s--;
5788             TOKEN(0);
5789         }
5790         Aop(OP_ADD);
5791     }
5792     else {
5793         if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5794             check_uni();
5795         OPERATOR(PERLY_PLUS);
5796     }
5797 }
5798 
5799 static int
5800 yyl_star(pTHX_ char *s)
5801 {
5802     if (PL_expect == XPOSTDEREF)
5803         POSTDEREF(PERLY_STAR);
5804 
5805     if (PL_expect != XOPERATOR) {
5806         s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5807         PL_expect = XOPERATOR;
5808         force_ident(PL_tokenbuf, PERLY_STAR);
5809         if (!*PL_tokenbuf)
5810             PREREF(PERLY_STAR);
5811         TERM(PERLY_STAR);
5812     }
5813 
5814     s++;
5815     if (*s == '*') {
5816         s++;
5817         if (*s == '=' && !PL_lex_allbrackets
5818             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5819         {
5820             s -= 2;
5821             TOKEN(0);
5822         }
5823         PWop(OP_POW);
5824     }
5825 
5826     if (*s == '='
5827         && !PL_lex_allbrackets
5828         && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5829     {
5830         s--;
5831         TOKEN(0);
5832     }
5833 
5834     Mop(OP_MULTIPLY);
5835 }
5836 
5837 static int
5838 yyl_percent(pTHX_ char *s)
5839 {
5840     if (PL_expect == XOPERATOR) {
5841         if (s[1] == '='
5842             && !PL_lex_allbrackets
5843             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5844         {
5845             TOKEN(0);
5846         }
5847         ++s;
5848         Mop(OP_MODULO);
5849     }
5850     else if (PL_expect == XPOSTDEREF)
5851         POSTDEREF(PERLY_PERCENT_SIGN);
5852 
5853     PL_tokenbuf[0] = '%';
5854     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5855     pl_yylval.ival = 0;
5856     if (!PL_tokenbuf[1]) {
5857         PREREF(PERLY_PERCENT_SIGN);
5858     }
5859     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5860         && intuit_more(s, PL_bufend)) {
5861         if (*s == '[')
5862             PL_tokenbuf[0] = '@';
5863     }
5864     PL_expect = XOPERATOR;
5865     force_ident_maybe_lex('%');
5866     TERM(PERLY_PERCENT_SIGN);
5867 }
5868 
5869 static int
5870 yyl_caret(pTHX_ char *s)
5871 {
5872     char *d = s;
5873     const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5874     if (bof && s[1] == '.')
5875         s++;
5876     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5877             (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5878     {
5879         s = d;
5880         TOKEN(0);
5881     }
5882     s++;
5883     BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5884 }
5885 
5886 static int
5887 yyl_colon(pTHX_ char *s)
5888 {
5889     OP *attrs;
5890 
5891     switch (PL_expect) {
5892     case XOPERATOR:
5893         if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5894             break;
5895         PL_bufptr = s;	/* update in case we back off */
5896         if (*s == '=') {
5897             Perl_croak(aTHX_
5898                        "Use of := for an empty attribute list is not allowed");
5899         }
5900         goto grabattrs;
5901     case XATTRBLOCK:
5902         PL_expect = XBLOCK;
5903         goto grabattrs;
5904     case XATTRTERM:
5905         PL_expect = XTERMBLOCK;
5906      grabattrs:
5907         /* NB: as well as parsing normal attributes, we also end up
5908          * here if there is something looking like attributes
5909          * following a signature (which is illegal, but used to be
5910          * legal in 5.20..5.26). If the latter, we still parse the
5911          * attributes so that error messages(s) are less confusing,
5912          * but ignore them (parser->sig_seen).
5913          */
5914         s = skipspace(s);
5915         attrs = NULL;
5916         while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5917             bool sig = PL_parser->sig_seen;
5918             I32 tmp;
5919             SV *sv;
5920             STRLEN len;
5921             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5922             if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5923                 if (tmp < 0) tmp = -tmp;
5924                 switch (tmp) {
5925                 case KEY_or:
5926                 case KEY_and:
5927                 case KEY_for:
5928                 case KEY_foreach:
5929                 case KEY_unless:
5930                 case KEY_if:
5931                 case KEY_while:
5932                 case KEY_until:
5933                     goto got_attrs;
5934                 default:
5935                     break;
5936                 }
5937             }
5938             sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5939             if (*d == '(') {
5940                 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5941                 if (!d) {
5942                     if (attrs)
5943                         op_free(attrs);
5944                     sv_free(sv);
5945                     Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5946                 }
5947                 COPLINE_SET_FROM_MULTI_END;
5948             }
5949             if (PL_lex_stuff) {
5950                 sv_catsv(sv, PL_lex_stuff);
5951                 attrs = op_append_elem(OP_LIST, attrs,
5952                                     newSVOP(OP_CONST, 0, sv));
5953                 SvREFCNT_dec_NN(PL_lex_stuff);
5954                 PL_lex_stuff = NULL;
5955             }
5956             else {
5957                 /* NOTE: any CV attrs applied here need to be part of
5958                    the CVf_BUILTIN_ATTRS define in cv.h! */
5959                 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5960                     sv_free(sv);
5961                     if (!sig)
5962                         CvLVALUE_on(PL_compcv);
5963                 }
5964                 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5965                     sv_free(sv);
5966                     if (!sig)
5967                         CvMETHOD_on(PL_compcv);
5968                 }
5969                 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5970                     sv_free(sv);
5971                     if (!sig) {
5972                         Perl_ck_warner_d(aTHX_
5973                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5974                            ":const is experimental"
5975                         );
5976                         CvANONCONST_on(PL_compcv);
5977                         if (!CvANON(PL_compcv))
5978                             yyerror(":const is not permitted on named "
5979                                     "subroutines");
5980                     }
5981                 }
5982                 /* After we've set the flags, it could be argued that
5983                    we don't need to do the attributes.pm-based setting
5984                    process, and shouldn't bother appending recognized
5985                    flags.  To experiment with that, uncomment the
5986                    following "else".  (Note that's already been
5987                    uncommented.  That keeps the above-applied built-in
5988                    attributes from being intercepted (and possibly
5989                    rejected) by a package's attribute routines, but is
5990                    justified by the performance win for the common case
5991                    of applying only built-in attributes.) */
5992                 else
5993                     attrs = op_append_elem(OP_LIST, attrs,
5994                                         newSVOP(OP_CONST, 0,
5995                                                 sv));
5996             }
5997             s = skipspace(d);
5998             if (*s == ':' && s[1] != ':')
5999                 s = skipspace(s+1);
6000             else if (s == d)
6001                 break;	/* require real whitespace or :'s */
6002             /* XXX losing whitespace on sequential attributes here */
6003         }
6004 
6005         if (*s != ';'
6006             && *s != '}'
6007             && !(PL_expect == XOPERATOR
6008                  ? (*s == '=' ||  *s == ')')
6009                  : (*s == '{' ||  *s == '(')))
6010         {
6011             const char q = ((*s == '\'') ? '"' : '\'');
6012             /* If here for an expression, and parsed no attrs, back off. */
6013             if (PL_expect == XOPERATOR && !attrs) {
6014                 s = PL_bufptr;
6015                 break;
6016             }
6017             /* MUST advance bufptr here to avoid bogus "at end of line"
6018                context messages from yyerror().
6019             */
6020             PL_bufptr = s;
6021             yyerror( (const char *)
6022                      (*s
6023                       ? Perl_form(aTHX_ "Invalid separator character "
6024                                   "%c%c%c in attribute list", q, *s, q)
6025                       : "Unterminated attribute list" ) );
6026             if (attrs)
6027                 op_free(attrs);
6028             OPERATOR(PERLY_COLON);
6029         }
6030 
6031     got_attrs:
6032         if (PL_parser->sig_seen) {
6033             /* see comment about about sig_seen and parser error
6034              * handling */
6035             if (attrs)
6036                 op_free(attrs);
6037             Perl_croak(aTHX_ "Subroutine attributes must come "
6038                              "before the signature");
6039         }
6040         if (attrs) {
6041             NEXTVAL_NEXTTOKE.opval = attrs;
6042             force_next(THING);
6043         }
6044         TOKEN(COLONATTR);
6045     }
6046 
6047     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6048         s--;
6049         TOKEN(0);
6050     }
6051 
6052     PL_lex_allbrackets--;
6053     OPERATOR(PERLY_COLON);
6054 }
6055 
6056 static int
6057 yyl_subproto(pTHX_ char *s, CV *cv)
6058 {
6059     STRLEN protolen = CvPROTOLEN(cv);
6060     const char *proto = CvPROTO(cv);
6061     bool optional;
6062 
6063     proto = S_strip_spaces(aTHX_ proto, &protolen);
6064     if (!protolen)
6065         TERM(FUNC0SUB);
6066     if ((optional = *proto == ';')) {
6067         do {
6068             proto++;
6069         } while (*proto == ';');
6070     }
6071 
6072     if (
6073         (
6074             (
6075                 *proto == '$' || *proto == '_'
6076              || *proto == '*' || *proto == '+'
6077             )
6078          && proto[1] == '\0'
6079         )
6080      || (
6081          *proto == '\\' && proto[1] && proto[2] == '\0'
6082         )
6083     ) {
6084         UNIPROTO(UNIOPSUB,optional);
6085     }
6086 
6087     if (*proto == '\\' && proto[1] == '[') {
6088         const char *p = proto + 2;
6089         while(*p && *p != ']')
6090             ++p;
6091         if(*p == ']' && !p[1])
6092             UNIPROTO(UNIOPSUB,optional);
6093     }
6094 
6095     if (*proto == '&' && *s == '{') {
6096         if (PL_curstash)
6097             sv_setpvs(PL_subname, "__ANON__");
6098         else
6099             sv_setpvs(PL_subname, "__ANON__::__ANON__");
6100         if (!PL_lex_allbrackets
6101             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6102         {
6103             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6104         }
6105         PREBLOCK(LSTOPSUB);
6106     }
6107 
6108     return KEY_NULL;
6109 }
6110 
6111 static int
6112 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
6113 {
6114     char *d;
6115     if (PL_lex_brackets > 100) {
6116         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6117     }
6118 
6119     switch (PL_expect) {
6120     case XTERM:
6121     case XTERMORDORDOR:
6122         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6123         PL_lex_allbrackets++;
6124         OPERATOR(HASHBRACK);
6125     case XOPERATOR:
6126         while (s < PL_bufend && SPACE_OR_TAB(*s))
6127             s++;
6128         d = s;
6129         PL_tokenbuf[0] = '\0';
6130         if (d < PL_bufend && *d == '-') {
6131             PL_tokenbuf[0] = '-';
6132             d++;
6133             while (d < PL_bufend && SPACE_OR_TAB(*d))
6134                 d++;
6135         }
6136         if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6137             STRLEN len;
6138             d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6139                           FALSE, &len);
6140             while (d < PL_bufend && SPACE_OR_TAB(*d))
6141                 d++;
6142             if (*d == '}') {
6143                 const char minus = (PL_tokenbuf[0] == '-');
6144                 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6145                 if (minus)
6146                     force_next(PERLY_MINUS);
6147             }
6148         }
6149         /* FALLTHROUGH */
6150     case XATTRTERM:
6151     case XTERMBLOCK:
6152         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6153         PL_lex_allbrackets++;
6154         PL_expect = XSTATE;
6155         break;
6156     case XATTRBLOCK:
6157     case XBLOCK:
6158         PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6159         PL_lex_allbrackets++;
6160         PL_expect = XSTATE;
6161         break;
6162     case XBLOCKTERM:
6163         PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6164         PL_lex_allbrackets++;
6165         PL_expect = XSTATE;
6166         break;
6167     default: {
6168             const char *t;
6169             if (PL_oldoldbufptr == PL_last_lop)
6170                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6171             else
6172                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6173             PL_lex_allbrackets++;
6174             s = skipspace(s);
6175             if (*s == '}') {
6176                 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6177                     PL_expect = XTERM;
6178                     /* This hack is to get the ${} in the message. */
6179                     PL_bufptr = s+1;
6180                     yyerror("syntax error");
6181                     break;
6182                 }
6183                 OPERATOR(HASHBRACK);
6184             }
6185             if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6186                 /* ${...} or @{...} etc., but not print {...}
6187                  * Skip the disambiguation and treat this as a block.
6188                  */
6189                 goto block_expectation;
6190             }
6191             /* This hack serves to disambiguate a pair of curlies
6192              * as being a block or an anon hash.  Normally, expectation
6193              * determines that, but in cases where we're not in a
6194              * position to expect anything in particular (like inside
6195              * eval"") we have to resolve the ambiguity.  This code
6196              * covers the case where the first term in the curlies is a
6197              * quoted string.  Most other cases need to be explicitly
6198              * disambiguated by prepending a "+" before the opening
6199              * curly in order to force resolution as an anon hash.
6200              *
6201              * XXX should probably propagate the outer expectation
6202              * into eval"" to rely less on this hack, but that could
6203              * potentially break current behavior of eval"".
6204              * GSAR 97-07-21
6205              */
6206             t = s;
6207             if (*s == '\'' || *s == '"' || *s == '`') {
6208                 /* common case: get past first string, handling escapes */
6209                 for (t++; t < PL_bufend && *t != *s;)
6210                     if (*t++ == '\\')
6211                         t++;
6212                 t++;
6213             }
6214             else if (*s == 'q') {
6215                 if (++t < PL_bufend
6216                     && (!isWORDCHAR(*t)
6217                         || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6218                             && !isWORDCHAR(*t))))
6219                 {
6220                     /* skip q//-like construct */
6221                     const char *tmps;
6222                     char open, close, term;
6223                     I32 brackets = 1;
6224 
6225                     while (t < PL_bufend && isSPACE(*t))
6226                         t++;
6227                     /* check for q => */
6228                     if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6229                         OPERATOR(HASHBRACK);
6230                     }
6231                     term = *t;
6232                     open = term;
6233                     if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6234                         term = tmps[5];
6235                     close = term;
6236                     if (open == close)
6237                         for (t++; t < PL_bufend; t++) {
6238                             if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6239                                 t++;
6240                             else if (*t == open)
6241                                 break;
6242                         }
6243                     else {
6244                         for (t++; t < PL_bufend; t++) {
6245                             if (*t == '\\' && t+1 < PL_bufend)
6246                                 t++;
6247                             else if (*t == close && --brackets <= 0)
6248                                 break;
6249                             else if (*t == open)
6250                                 brackets++;
6251                         }
6252                     }
6253                     t++;
6254                 }
6255                 else
6256                     /* skip plain q word */
6257                     while (   t < PL_bufend
6258                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6259                     {
6260                         t += UTF ? UTF8SKIP(t) : 1;
6261                     }
6262             }
6263             else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6264                 t += UTF ? UTF8SKIP(t) : 1;
6265                 while (   t < PL_bufend
6266                        && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6267                 {
6268                     t += UTF ? UTF8SKIP(t) : 1;
6269                 }
6270             }
6271             while (t < PL_bufend && isSPACE(*t))
6272                 t++;
6273             /* if comma follows first term, call it an anon hash */
6274             /* XXX it could be a comma expression with loop modifiers */
6275             if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6276                                || (*t == '=' && t[1] == '>')))
6277                 OPERATOR(HASHBRACK);
6278             if (PL_expect == XREF) {
6279               block_expectation:
6280                 /* If there is an opening brace or 'sub:', treat it
6281                    as a term to make ${{...}}{k} and &{sub:attr...}
6282                    dwim.  Otherwise, treat it as a statement, so
6283                    map {no strict; ...} works.
6284                  */
6285                 s = skipspace(s);
6286                 if (*s == '{') {
6287                     PL_expect = XTERM;
6288                     break;
6289                 }
6290                 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6291                     PL_bufptr = s;
6292                     d = s + 3;
6293                     d = skipspace(d);
6294                     s = PL_bufptr;
6295                     if (*d == ':') {
6296                         PL_expect = XTERM;
6297                         break;
6298                     }
6299                 }
6300                 PL_expect = XSTATE;
6301             }
6302             else {
6303                 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6304                 PL_expect = XSTATE;
6305             }
6306         }
6307         break;
6308     }
6309 
6310     pl_yylval.ival = CopLINE(PL_curcop);
6311     PL_copline = NOLINE;   /* invalidate current command line number */
6312     TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
6313 }
6314 
6315 static int
6316 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6317 {
6318     assert(s != PL_bufend);
6319     s++;
6320 
6321     if (PL_lex_brackets <= 0)
6322         /* diag_listed_as: Unmatched right %s bracket */
6323         yyerror("Unmatched right curly bracket");
6324     else
6325         PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6326 
6327     PL_lex_allbrackets--;
6328 
6329     if (PL_lex_state == LEX_INTERPNORMAL) {
6330         if (PL_lex_brackets == 0) {
6331             if (PL_expect & XFAKEBRACK) {
6332                 PL_expect &= XENUMMASK;
6333                 PL_lex_state = LEX_INTERPEND;
6334                 PL_bufptr = s;
6335                 return yylex();	/* ignore fake brackets */
6336             }
6337             if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6338              && SvEVALED(PL_lex_repl))
6339                 PL_lex_state = LEX_INTERPEND;
6340             else if (*s == '-' && s[1] == '>')
6341                 PL_lex_state = LEX_INTERPENDMAYBE;
6342             else if (*s != '[' && *s != '{')
6343                 PL_lex_state = LEX_INTERPEND;
6344         }
6345     }
6346 
6347     if (PL_expect & XFAKEBRACK) {
6348         PL_expect &= XENUMMASK;
6349         PL_bufptr = s;
6350         return yylex();		/* ignore fake brackets */
6351     }
6352 
6353     force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
6354     if (formbrack) LEAVE_with_name("lex_format");
6355     if (formbrack == 2) { /* means . where arguments were expected */
6356         force_next(PERLY_SEMICOLON);
6357         TOKEN(FORMRBRACK);
6358     }
6359 
6360     TOKEN(PERLY_SEMICOLON);
6361 }
6362 
6363 static int
6364 yyl_ampersand(pTHX_ char *s)
6365 {
6366     if (PL_expect == XPOSTDEREF)
6367         POSTDEREF(PERLY_AMPERSAND);
6368 
6369     s++;
6370     if (*s++ == '&') {
6371         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6372                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6373             s -= 2;
6374             TOKEN(0);
6375         }
6376         AOPERATOR(ANDAND);
6377     }
6378     s--;
6379 
6380     if (PL_expect == XOPERATOR) {
6381         char *d;
6382         bool bof;
6383         if (   PL_bufptr == PL_linestart
6384             && ckWARN(WARN_SEMICOLON)
6385             && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6386         {
6387             CopLINE_dec(PL_curcop);
6388             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6389             CopLINE_inc(PL_curcop);
6390         }
6391         d = s;
6392         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6393             s++;
6394         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6395                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6396             s = d;
6397             s--;
6398             TOKEN(0);
6399         }
6400         if (d == s)
6401             BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6402         else
6403             BAop(OP_SBIT_AND);
6404     }
6405 
6406     PL_tokenbuf[0] = '&';
6407     s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6408     pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6409 
6410     if (PL_tokenbuf[1])
6411         force_ident_maybe_lex('&');
6412     else
6413         PREREF(PERLY_AMPERSAND);
6414 
6415     TERM(PERLY_AMPERSAND);
6416 }
6417 
6418 static int
6419 yyl_verticalbar(pTHX_ char *s)
6420 {
6421     char *d;
6422     bool bof;
6423 
6424     s++;
6425     if (*s++ == '|') {
6426         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6427                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6428             s -= 2;
6429             TOKEN(0);
6430         }
6431         AOPERATOR(OROR);
6432     }
6433 
6434     s--;
6435     d = s;
6436     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6437         s++;
6438 
6439     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6440             (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6441         s = d - 1;
6442         TOKEN(0);
6443     }
6444 
6445     BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6446 }
6447 
6448 static int
6449 yyl_bang(pTHX_ char *s)
6450 {
6451     const char tmp = *s++;
6452     if (tmp == '=') {
6453         /* was this !=~ where !~ was meant?
6454          * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6455 
6456         if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6457             const char *t = s+1;
6458 
6459             while (t < PL_bufend && isSPACE(*t))
6460                 ++t;
6461 
6462             if (*t == '/' || *t == '?'
6463                 || ((*t == 'm' || *t == 's' || *t == 'y')
6464                     && !isWORDCHAR(t[1]))
6465                 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6466                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6467                             "!=~ should be !~");
6468         }
6469 
6470         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6471             s -= 2;
6472             TOKEN(0);
6473         }
6474 
6475         ChEop(OP_NE);
6476     }
6477 
6478     if (tmp == '~')
6479         PMop(OP_NOT);
6480 
6481     s--;
6482     OPERATOR(PERLY_EXCLAMATION_MARK);
6483 }
6484 
6485 static int
6486 yyl_snail(pTHX_ char *s)
6487 {
6488     if (PL_expect == XPOSTDEREF)
6489         POSTDEREF(PERLY_SNAIL);
6490     PL_tokenbuf[0] = '@';
6491     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6492     if (PL_expect == XOPERATOR) {
6493         char *d = s;
6494         if (PL_bufptr > s) {
6495             d = PL_bufptr-1;
6496             PL_bufptr = PL_oldbufptr;
6497         }
6498         no_op("Array", d);
6499     }
6500     pl_yylval.ival = 0;
6501     if (!PL_tokenbuf[1]) {
6502         PREREF(PERLY_SNAIL);
6503     }
6504     if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6505         s = skipspace(s);
6506     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6507         && intuit_more(s, PL_bufend))
6508     {
6509         if (*s == '{')
6510             PL_tokenbuf[0] = '%';
6511 
6512         /* Warn about @ where they meant $. */
6513         if (*s == '[' || *s == '{') {
6514             if (ckWARN(WARN_SYNTAX)) {
6515                 S_check_scalar_slice(aTHX_ s);
6516             }
6517         }
6518     }
6519     PL_expect = XOPERATOR;
6520     force_ident_maybe_lex('@');
6521     TERM(PERLY_SNAIL);
6522 }
6523 
6524 static int
6525 yyl_slash(pTHX_ char *s)
6526 {
6527     if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6528         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6529                 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6530             TOKEN(0);
6531         s += 2;
6532         AOPERATOR(DORDOR);
6533     }
6534     else if (PL_expect == XOPERATOR) {
6535         s++;
6536         if (*s == '=' && !PL_lex_allbrackets
6537             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6538         {
6539             s--;
6540             TOKEN(0);
6541         }
6542         Mop(OP_DIVIDE);
6543     }
6544     else {
6545         /* Disable warning on "study /blah/" */
6546         if (    PL_oldoldbufptr == PL_last_uni
6547             && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6548                 || memNE(PL_last_uni, "study", 5)
6549                 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6550          ))
6551             check_uni();
6552         s = scan_pat(s,OP_MATCH);
6553         TERM(sublex_start());
6554     }
6555 }
6556 
6557 static int
6558 yyl_leftsquare(pTHX_ char *s)
6559 {
6560     if (PL_lex_brackets > 100)
6561         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6562     PL_lex_brackstack[PL_lex_brackets++] = 0;
6563     PL_lex_allbrackets++;
6564     s++;
6565     OPERATOR(PERLY_BRACKET_OPEN);
6566 }
6567 
6568 static int
6569 yyl_rightsquare(pTHX_ char *s)
6570 {
6571     if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6572         TOKEN(0);
6573     s++;
6574     if (PL_lex_brackets <= 0)
6575         /* diag_listed_as: Unmatched right %s bracket */
6576         yyerror("Unmatched right square bracket");
6577     else
6578         --PL_lex_brackets;
6579     PL_lex_allbrackets--;
6580     if (PL_lex_state == LEX_INTERPNORMAL) {
6581         if (PL_lex_brackets == 0) {
6582             if (*s == '-' && s[1] == '>')
6583                 PL_lex_state = LEX_INTERPENDMAYBE;
6584             else if (*s != '[' && *s != '{')
6585                 PL_lex_state = LEX_INTERPEND;
6586         }
6587     }
6588     TERM(PERLY_BRACKET_CLOSE);
6589 }
6590 
6591 static int
6592 yyl_tilde(pTHX_ char *s)
6593 {
6594     bool bof;
6595     if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6596         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6597             TOKEN(0);
6598         s += 2;
6599         Perl_ck_warner_d(aTHX_
6600             packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6601             "Smartmatch is experimental");
6602         NCEop(OP_SMARTMATCH);
6603     }
6604     s++;
6605     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6606         s++;
6607         BCop(OP_SCOMPLEMENT);
6608     }
6609     BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6610 }
6611 
6612 static int
6613 yyl_leftparen(pTHX_ char *s)
6614 {
6615     if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6616         PL_oldbufptr = PL_oldoldbufptr;		/* allow print(STDOUT 123) */
6617     else
6618         PL_expect = XTERM;
6619     s = skipspace(s);
6620     PL_lex_allbrackets++;
6621     TOKEN(PERLY_PAREN_OPEN);
6622 }
6623 
6624 static int
6625 yyl_rightparen(pTHX_ char *s)
6626 {
6627     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6628         TOKEN(0);
6629     s++;
6630     PL_lex_allbrackets--;
6631     s = skipspace(s);
6632     if (*s == '{')
6633         PREBLOCK(PERLY_PAREN_CLOSE);
6634     TERM(PERLY_PAREN_CLOSE);
6635 }
6636 
6637 static int
6638 yyl_leftpointy(pTHX_ char *s)
6639 {
6640     char tmp;
6641 
6642     if (PL_expect != XOPERATOR) {
6643         if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6644             check_uni();
6645         if (s[1] == '<' && s[2] != '>')
6646             s = scan_heredoc(s);
6647         else
6648             s = scan_inputsymbol(s);
6649         PL_expect = XOPERATOR;
6650         TOKEN(sublex_start());
6651     }
6652 
6653     s++;
6654 
6655     tmp = *s++;
6656     if (tmp == '<') {
6657         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6658             s -= 2;
6659             TOKEN(0);
6660         }
6661         SHop(OP_LEFT_SHIFT);
6662     }
6663     if (tmp == '=') {
6664         tmp = *s++;
6665         if (tmp == '>') {
6666             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6667                 s -= 3;
6668                 TOKEN(0);
6669             }
6670             NCEop(OP_NCMP);
6671         }
6672         s--;
6673         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6674             s -= 2;
6675             TOKEN(0);
6676         }
6677         ChRop(OP_LE);
6678     }
6679 
6680     s--;
6681     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6682         s--;
6683         TOKEN(0);
6684     }
6685 
6686     ChRop(OP_LT);
6687 }
6688 
6689 static int
6690 yyl_rightpointy(pTHX_ char *s)
6691 {
6692     const char tmp = *s++;
6693 
6694     if (tmp == '>') {
6695         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6696             s -= 2;
6697             TOKEN(0);
6698         }
6699         SHop(OP_RIGHT_SHIFT);
6700     }
6701     else if (tmp == '=') {
6702         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6703             s -= 2;
6704             TOKEN(0);
6705         }
6706         ChRop(OP_GE);
6707     }
6708 
6709     s--;
6710     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6711         s--;
6712         TOKEN(0);
6713     }
6714 
6715     ChRop(OP_GT);
6716 }
6717 
6718 static int
6719 yyl_sglquote(pTHX_ char *s)
6720 {
6721     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6722     if (!s)
6723         missingterm(NULL, 0);
6724     COPLINE_SET_FROM_MULTI_END;
6725     DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6726     if (PL_expect == XOPERATOR) {
6727         no_op("String",s);
6728     }
6729     pl_yylval.ival = OP_CONST;
6730     TERM(sublex_start());
6731 }
6732 
6733 static int
6734 yyl_dblquote(pTHX_ char *s)
6735 {
6736     char *d;
6737     STRLEN len;
6738     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6739     DEBUG_T( {
6740         if (s)
6741             printbuf("### Saw string before %s\n", s);
6742         else
6743             PerlIO_printf(Perl_debug_log,
6744                          "### Saw unterminated string\n");
6745     } );
6746     if (PL_expect == XOPERATOR) {
6747             no_op("String",s);
6748     }
6749     if (!s)
6750         missingterm(NULL, 0);
6751     pl_yylval.ival = OP_CONST;
6752     /* FIXME. I think that this can be const if char *d is replaced by
6753        more localised variables.  */
6754     for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6755         if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6756             pl_yylval.ival = OP_STRINGIFY;
6757             break;
6758         }
6759     }
6760     if (pl_yylval.ival == OP_CONST)
6761         COPLINE_SET_FROM_MULTI_END;
6762     TERM(sublex_start());
6763 }
6764 
6765 static int
6766 yyl_backtick(pTHX_ char *s)
6767 {
6768     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6769     DEBUG_T( {
6770         if (s)
6771             printbuf("### Saw backtick string before %s\n", s);
6772         else
6773             PerlIO_printf(Perl_debug_log,
6774                          "### Saw unterminated backtick string\n");
6775     } );
6776     if (PL_expect == XOPERATOR)
6777         no_op("Backticks",s);
6778     if (!s)
6779         missingterm(NULL, 0);
6780     pl_yylval.ival = OP_BACKTICK;
6781     TERM(sublex_start());
6782 }
6783 
6784 static int
6785 yyl_backslash(pTHX_ char *s)
6786 {
6787     if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6788         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6789                        *s, *s);
6790     if (PL_expect == XOPERATOR)
6791         no_op("Backslash",s);
6792     OPERATOR(REFGEN);
6793 }
6794 
6795 static void
6796 yyl_data_handle(pTHX)
6797 {
6798     HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6799                             ? PL_curstash
6800                             : PL_defstash;
6801     GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6802 
6803     if (!isGV(gv))
6804         gv_init(gv,stash,"DATA",4,0);
6805 
6806     GvMULTI_on(gv);
6807     if (!GvIO(gv))
6808         GvIOp(gv) = newIO();
6809     IoIFP(GvIOp(gv)) = PL_rsfp;
6810 
6811     /* Mark this internal pseudo-handle as clean */
6812     IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6813     if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6814         IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6815     else
6816         IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6817 
6818 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6819     /* if the script was opened in binmode, we need to revert
6820      * it to text mode for compatibility; but only iff it has CRs
6821      * XXX this is a questionable hack at best. */
6822     if (PL_bufend-PL_bufptr > 2
6823         && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6824     {
6825         Off_t loc = 0;
6826         if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6827             loc = PerlIO_tell(PL_rsfp);
6828             (void)PerlIO_seek(PL_rsfp, 0L, 0);
6829         }
6830         if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6831             if (loc > 0)
6832                 PerlIO_seek(PL_rsfp, loc, 0);
6833         }
6834     }
6835 #endif
6836 
6837 #ifdef PERLIO_LAYERS
6838     if (!IN_BYTES) {
6839         if (UTF)
6840             PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6841     }
6842 #endif
6843 
6844     PL_rsfp = NULL;
6845 }
6846 
6847 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6848     __attribute__noreturn__;
6849 
6850 PERL_STATIC_NO_RET void
6851 yyl_croak_unrecognised(pTHX_ char *s)
6852 {
6853     SV *dsv = newSVpvs_flags("", SVs_TEMP);
6854     const char *c;
6855     char *d;
6856     STRLEN len;
6857 
6858     if (UTF) {
6859         STRLEN skiplen = UTF8SKIP(s);
6860         STRLEN stravail = PL_bufend - s;
6861         c = sv_uni_display(dsv, newSVpvn_flags(s,
6862                                                skiplen > stravail ? stravail : skiplen,
6863                                                SVs_TEMP | SVf_UTF8),
6864                            10, UNI_DISPLAY_ISPRINT);
6865     }
6866     else {
6867         c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6868     }
6869 
6870     if (s >= PL_linestart) {
6871         d = PL_linestart;
6872     }
6873     else {
6874         /* somehow (probably due to a parse failure), PL_linestart has advanced
6875          * pass PL_bufptr, get a reasonable beginning of line
6876          */
6877         d = s;
6878         while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6879             --d;
6880     }
6881     len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6882     if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6883         d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6884     }
6885 
6886     Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6887                       UTF8fARG(UTF, (s - d), d),
6888                      (int) len + 1);
6889 }
6890 
6891 static int
6892 yyl_require(pTHX_ char *s, I32 orig_keyword)
6893 {
6894     s = skipspace(s);
6895     if (isDIGIT(*s)) {
6896         s = force_version(s, FALSE);
6897     }
6898     else if (*s != 'v' || !isDIGIT(s[1])
6899             || (s = force_version(s, TRUE), *s == 'v'))
6900     {
6901         *PL_tokenbuf = '\0';
6902         s = force_word(s,BAREWORD,TRUE,TRUE);
6903         if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6904                                    PL_tokenbuf + sizeof(PL_tokenbuf),
6905                                    UTF))
6906         {
6907             gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6908                         GV_ADD | (UTF ? SVf_UTF8 : 0));
6909         }
6910         else if (*s == '<')
6911             yyerror("<> at require-statement should be quotes");
6912     }
6913 
6914     if (orig_keyword == KEY_require)
6915         pl_yylval.ival = 1;
6916     else
6917         pl_yylval.ival = 0;
6918 
6919     PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6920     PL_bufptr = s;
6921     PL_last_uni = PL_oldbufptr;
6922     PL_last_lop_op = OP_REQUIRE;
6923     s = skipspace(s);
6924     return REPORT( (int)REQUIRE );
6925 }
6926 
6927 static int
6928 yyl_foreach(pTHX_ char *s)
6929 {
6930     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6931         return REPORT(0);
6932     pl_yylval.ival = CopLINE(PL_curcop);
6933     s = skipspace(s);
6934     if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6935         char *p = s;
6936         SSize_t s_off = s - SvPVX(PL_linestr);
6937         bool paren_is_valid = FALSE;
6938         bool maybe_package = FALSE;
6939         bool saw_core = FALSE;
6940         bool core_valid = FALSE;
6941 
6942         if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
6943             saw_core = TRUE;
6944             p += 6;
6945         }
6946         if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
6947             core_valid = TRUE;
6948             paren_is_valid = TRUE;
6949             if (isSPACE(p[2])) {
6950                 p = skipspace(p + 3);
6951                 maybe_package = TRUE;
6952             }
6953             else {
6954                 p += 2;
6955             }
6956         }
6957         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
6958             core_valid = TRUE;
6959             if (isSPACE(p[3])) {
6960                 p = skipspace(p + 4);
6961                 maybe_package = TRUE;
6962             }
6963             else {
6964                 p += 3;
6965             }
6966         }
6967         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
6968             core_valid = TRUE;
6969             if (isSPACE(p[5])) {
6970                 p = skipspace(p + 6);
6971             }
6972             else {
6973                 p += 5;
6974             }
6975         }
6976         if (saw_core && !core_valid) {
6977             Perl_croak(aTHX_ "Missing $ on loop variable");
6978         }
6979 
6980         if (maybe_package && !saw_core) {
6981             /* skip optional package name, as in "for my abc $x (..)" */
6982             if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
6983                 STRLEN len;
6984                 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6985                 p = skipspace(p);
6986                 paren_is_valid = FALSE;
6987             }
6988         }
6989 
6990         if (UNLIKELY(paren_is_valid && *p == '(')) {
6991             Perl_ck_warner_d(aTHX_
6992                              packWARN(WARN_EXPERIMENTAL__FOR_LIST),
6993                              "for my (...) is experimental");
6994         }
6995         else if (UNLIKELY(*p != '$' && *p != '\\')) {
6996             /* "for myfoo (" will end up here, but with p pointing at the 'f' */
6997             Perl_croak(aTHX_ "Missing $ on loop variable");
6998         }
6999         /* The buffer may have been reallocated, update s */
7000         s = SvPVX(PL_linestr) + s_off;
7001     }
7002     OPERATOR(FOR);
7003 }
7004 
7005 static int
7006 yyl_do(pTHX_ char *s, I32 orig_keyword)
7007 {
7008     s = skipspace(s);
7009     if (*s == '{')
7010         PRETERMBLOCK(DO);
7011     if (*s != '\'') {
7012         char *d;
7013         STRLEN len;
7014         *PL_tokenbuf = '&';
7015         d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7016                       1, &len);
7017         if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7018          && !keyword(PL_tokenbuf + 1, len, 0)) {
7019             SSize_t off = s-SvPVX(PL_linestr);
7020             d = skipspace(d);
7021             s = SvPVX(PL_linestr)+off;
7022             if (*d == '(') {
7023                 force_ident_maybe_lex('&');
7024                 s = d;
7025             }
7026         }
7027     }
7028     if (orig_keyword == KEY_do)
7029         pl_yylval.ival = 1;
7030     else
7031         pl_yylval.ival = 0;
7032     OPERATOR(DO);
7033 }
7034 
7035 static int
7036 yyl_my(pTHX_ char *s, I32 my)
7037 {
7038     if (PL_in_my) {
7039         PL_bufptr = s;
7040         yyerror(Perl_form(aTHX_
7041                           "Can't redeclare \"%s\" in \"%s\"",
7042                            my       == KEY_my    ? "my" :
7043                            my       == KEY_state ? "state" : "our",
7044                            PL_in_my == KEY_my    ? "my" :
7045                            PL_in_my == KEY_state ? "state" : "our"));
7046     }
7047     PL_in_my = (U16)my;
7048     s = skipspace(s);
7049     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7050         STRLEN len;
7051         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7052         if (memEQs(PL_tokenbuf, len, "sub"))
7053             return yyl_sub(aTHX_ s, my);
7054         PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7055         if (!PL_in_my_stash) {
7056             char tmpbuf[1024];
7057             int i;
7058             PL_bufptr = s;
7059             i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7060             PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
7061             yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7062         }
7063     }
7064     else if (*s == '\\') {
7065         if (!FEATURE_MYREF_IS_ENABLED)
7066             Perl_croak(aTHX_ "The experimental declared_refs "
7067                              "feature is not enabled");
7068         Perl_ck_warner_d(aTHX_
7069              packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7070             "Declaring references is experimental");
7071     }
7072     OPERATOR(MY);
7073 }
7074 
7075 static int yyl_try(pTHX_ char*);
7076 
7077 static bool
7078 yyl_eol_needs_semicolon(pTHX_ char **ps)
7079 {
7080     char *s = *ps;
7081     if (PL_lex_state != LEX_NORMAL
7082         || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
7083     {
7084         const bool in_comment = *s == '#';
7085         char *d;
7086         if (*s == '#' && s == PL_linestart && PL_in_eval
7087          && !PL_rsfp && !PL_parser->filtered) {
7088             /* handle eval qq[#line 1 "foo"\n ...] */
7089             CopLINE_dec(PL_curcop);
7090             incline(s, PL_bufend);
7091         }
7092         d = s;
7093         while (d < PL_bufend && *d != '\n')
7094             d++;
7095         if (d < PL_bufend)
7096             d++;
7097         s = d;
7098         if (in_comment && d == PL_bufend
7099             && PL_lex_state == LEX_INTERPNORMAL
7100             && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7101             && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
7102         else
7103             incline(s, PL_bufend);
7104         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7105             PL_lex_state = LEX_FORMLINE;
7106             force_next(FORMRBRACK);
7107             *ps = s;
7108             return TRUE;
7109         }
7110     }
7111     else {
7112         while (s < PL_bufend && *s != '\n')
7113             s++;
7114         if (s < PL_bufend) {
7115             s++;
7116             if (s < PL_bufend)
7117                 incline(s, PL_bufend);
7118         }
7119     }
7120     *ps = s;
7121     return FALSE;
7122 }
7123 
7124 static int
7125 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
7126 {
7127     char *d;
7128 
7129     goto start;
7130 
7131     do {
7132         fake_eof = 0;
7133         bof = cBOOL(PL_rsfp);
7134       start:
7135 
7136         PL_bufptr = PL_bufend;
7137         COPLINE_INC_WITH_HERELINES;
7138         if (!lex_next_chunk(fake_eof)) {
7139             CopLINE_dec(PL_curcop);
7140             s = PL_bufptr;
7141             TOKEN(PERLY_SEMICOLON);	/* not infinite loop because rsfp is NULL now */
7142         }
7143         CopLINE_dec(PL_curcop);
7144         s = PL_bufptr;
7145         /* If it looks like the start of a BOM or raw UTF-16,
7146          * check if it in fact is. */
7147         if (bof && PL_rsfp
7148             && (   *s == 0
7149                 || *(U8*)s == BOM_UTF8_FIRST_BYTE
7150                 || *(U8*)s >= 0xFE
7151                 || s[1] == 0))
7152         {
7153             Off_t offset = (IV)PerlIO_tell(PL_rsfp);
7154             bof = (offset == (Off_t)SvCUR(PL_linestr));
7155 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
7156             /* offset may include swallowed CR */
7157             if (!bof)
7158                 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
7159 #endif
7160             if (bof) {
7161                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7162                 s = swallow_bom((U8*)s);
7163             }
7164         }
7165         if (PL_parser->in_pod) {
7166             /* Incest with pod. */
7167             if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
7168                 && !isALPHA(s[4]))
7169             {
7170                 SvPVCLEAR(PL_linestr);
7171                 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7172                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7173                 PL_last_lop = PL_last_uni = NULL;
7174                 PL_parser->in_pod = 0;
7175             }
7176         }
7177         if (PL_rsfp || PL_parser->filtered)
7178             incline(s, PL_bufend);
7179     } while (PL_parser->in_pod);
7180 
7181     PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7182     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7183     PL_last_lop = PL_last_uni = NULL;
7184     if (CopLINE(PL_curcop) == 1) {
7185         while (s < PL_bufend && isSPACE(*s))
7186             s++;
7187         if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7188             s++;
7189         d = NULL;
7190         if (!PL_in_eval) {
7191             if (*s == '#' && *(s+1) == '!')
7192                 d = s + 2;
7193 #ifdef ALTERNATE_SHEBANG
7194             else {
7195                 static char const as[] = ALTERNATE_SHEBANG;
7196                 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7197                     d = s + (sizeof(as) - 1);
7198             }
7199 #endif /* ALTERNATE_SHEBANG */
7200         }
7201         if (d) {
7202             char *ipath;
7203             char *ipathend;
7204 
7205             while (isSPACE(*d))
7206                 d++;
7207             ipath = d;
7208             while (*d && !isSPACE(*d))
7209                 d++;
7210             ipathend = d;
7211 
7212 #ifdef ARG_ZERO_IS_SCRIPT
7213             if (ipathend > ipath) {
7214                 /*
7215                  * HP-UX (at least) sets argv[0] to the script name,
7216                  * which makes $^X incorrect.  And Digital UNIX and Linux,
7217                  * at least, set argv[0] to the basename of the Perl
7218                  * interpreter. So, having found "#!", we'll set it right.
7219                  */
7220                 SV* copfilesv = CopFILESV(PL_curcop);
7221                 if (copfilesv) {
7222                     SV * const x =
7223                         GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7224                                          SVt_PV)); /* $^X */
7225                     assert(SvPOK(x) || SvGMAGICAL(x));
7226                     if (sv_eq(x, copfilesv)) {
7227                         sv_setpvn(x, ipath, ipathend - ipath);
7228                         SvSETMAGIC(x);
7229                     }
7230                     else {
7231                         STRLEN blen;
7232                         STRLEN llen;
7233                         const char *bstart = SvPV_const(copfilesv, blen);
7234                         const char * const lstart = SvPV_const(x, llen);
7235                         if (llen < blen) {
7236                             bstart += blen - llen;
7237                             if (strnEQ(bstart, lstart, llen) &&	bstart[-1] == '/') {
7238                                 sv_setpvn(x, ipath, ipathend - ipath);
7239                                 SvSETMAGIC(x);
7240                             }
7241                         }
7242                     }
7243                 }
7244                 else {
7245                     /* Anything to do if no copfilesv? */
7246                 }
7247                 TAINT_NOT;	/* $^X is always tainted, but that's OK */
7248             }
7249 #endif /* ARG_ZERO_IS_SCRIPT */
7250 
7251             /*
7252              * Look for options.
7253              */
7254             d = instr(s,"perl -");
7255             if (!d) {
7256                 d = instr(s,"perl");
7257 #if defined(DOSISH)
7258                 /* avoid getting into infinite loops when shebang
7259                  * line contains "Perl" rather than "perl" */
7260                 if (!d) {
7261                     for (d = ipathend-4; d >= ipath; --d) {
7262                         if (isALPHA_FOLD_EQ(*d, 'p')
7263                             && !ibcmp(d, "perl", 4))
7264                         {
7265                             break;
7266                         }
7267                     }
7268                     if (d < ipath)
7269                         d = NULL;
7270                 }
7271 #endif
7272             }
7273 #ifdef ALTERNATE_SHEBANG
7274             /*
7275              * If the ALTERNATE_SHEBANG on this system starts with a
7276              * character that can be part of a Perl expression, then if
7277              * we see it but not "perl", we're probably looking at the
7278              * start of Perl code, not a request to hand off to some
7279              * other interpreter.  Similarly, if "perl" is there, but
7280              * not in the first 'word' of the line, we assume the line
7281              * contains the start of the Perl program.
7282              */
7283             if (d && *s != '#') {
7284                 const char *c = ipath;
7285                 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7286                     c++;
7287                 if (c < d)
7288                     d = NULL;	/* "perl" not in first word; ignore */
7289                 else
7290                     *s = '#';	/* Don't try to parse shebang line */
7291             }
7292 #endif /* ALTERNATE_SHEBANG */
7293             if (!d
7294                 && *s == '#'
7295                 && ipathend > ipath
7296                 && !PL_minus_c
7297                 && !instr(s,"indir")
7298                 && instr(PL_origargv[0],"perl"))
7299             {
7300                 char **newargv;
7301 
7302                 *ipathend = '\0';
7303                 s = ipathend + 1;
7304                 while (s < PL_bufend && isSPACE(*s))
7305                     s++;
7306                 if (s < PL_bufend) {
7307                     Newx(newargv,PL_origargc+3,char*);
7308                     newargv[1] = s;
7309                     while (s < PL_bufend && !isSPACE(*s))
7310                         s++;
7311                     *s = '\0';
7312                     Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7313                 }
7314                 else
7315                     newargv = PL_origargv;
7316                 newargv[0] = ipath;
7317                 PERL_FPU_PRE_EXEC
7318                 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7319                 PERL_FPU_POST_EXEC
7320                 Perl_croak(aTHX_ "Can't exec %s", ipath);
7321             }
7322             if (d) {
7323                 while (*d && !isSPACE(*d))
7324                     d++;
7325                 while (SPACE_OR_TAB(*d))
7326                     d++;
7327 
7328                 if (*d++ == '-') {
7329                     const bool switches_done = PL_doswitches;
7330                     const U32 oldpdb = PL_perldb;
7331                     const bool oldn = PL_minus_n;
7332                     const bool oldp = PL_minus_p;
7333                     const char *d1 = d;
7334 
7335                     do {
7336                         bool baduni = FALSE;
7337                         if (*d1 == 'C') {
7338                             const char *d2 = d1 + 1;
7339                             if (parse_unicode_opts((const char **)&d2)
7340                                 != PL_unicode)
7341                                 baduni = TRUE;
7342                         }
7343                         if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7344                             const char * const m = d1;
7345                             while (*d1 && !isSPACE(*d1))
7346                                 d1++;
7347                             Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7348                                   (int)(d1 - m), m);
7349                         }
7350                         d1 = moreswitches(d1);
7351                     } while (d1);
7352                     if (PL_doswitches && !switches_done) {
7353                         int argc = PL_origargc;
7354                         char **argv = PL_origargv;
7355                         do {
7356                             argc--,argv++;
7357                         } while (argc && argv[0][0] == '-' && argv[0][1]);
7358                         init_argv_symbols(argc,argv);
7359                     }
7360                     if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7361                         || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7362                           /* if we have already added "LINE: while (<>) {",
7363                              we must not do it again */
7364                     {
7365                         SvPVCLEAR(PL_linestr);
7366                         PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7367                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7368                         PL_last_lop = PL_last_uni = NULL;
7369                         PL_preambled = FALSE;
7370                         if (PERLDB_LINE_OR_SAVESRC)
7371                             (void)gv_fetchfile(PL_origfilename);
7372                         return YYL_RETRY;
7373                     }
7374                 }
7375             }
7376         }
7377     }
7378 
7379     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7380         PL_lex_state = LEX_FORMLINE;
7381         force_next(FORMRBRACK);
7382         TOKEN(PERLY_SEMICOLON);
7383     }
7384 
7385     PL_bufptr = s;
7386     return YYL_RETRY;
7387 }
7388 
7389 static int
7390 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7391 {
7392     CLINE;
7393     pl_yylval.opval
7394         = newSVOP(OP_CONST, 0,
7395                        S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7396     pl_yylval.opval->op_private = OPpCONST_BARE;
7397     TERM(BAREWORD);
7398 }
7399 
7400 static int
7401 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7402 {
7403     if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7404         && PL_parser->saw_infix_sigil)
7405     {
7406         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7407                          "Operator or semicolon missing before %c%" UTF8f,
7408                          lastchar,
7409                          UTF8fARG(UTF, strlen(PL_tokenbuf),
7410                                   PL_tokenbuf));
7411         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7412                          "Ambiguous use of %c resolved as operator %c",
7413                          lastchar, lastchar);
7414     }
7415     TOKEN(BAREWORD);
7416 }
7417 
7418 static int
7419 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7420 {
7421     if (sv) {
7422         op_free(rv2cv_op);
7423         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7424         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7425         if (SvTYPE(sv) == SVt_PVAV)
7426             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7427                                       pl_yylval.opval);
7428         else {
7429             pl_yylval.opval->op_private = 0;
7430             pl_yylval.opval->op_folded = 1;
7431             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7432         }
7433         TOKEN(BAREWORD);
7434     }
7435 
7436     op_free(pl_yylval.opval);
7437     pl_yylval.opval =
7438         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7439     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7440     PL_last_lop = PL_oldbufptr;
7441     PL_last_lop_op = OP_ENTERSUB;
7442 
7443     /* Is there a prototype? */
7444     if (SvPOK(cv)) {
7445         int k = yyl_subproto(aTHX_ s, cv);
7446         if (k != KEY_NULL)
7447             return k;
7448     }
7449 
7450     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7451     PL_expect = XTERM;
7452     force_next(off ? PRIVATEREF : BAREWORD);
7453     if (!PL_lex_allbrackets
7454         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7455     {
7456         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7457     }
7458 
7459     TOKEN(NOAMP);
7460 }
7461 
7462 /* Honour "reserved word" warnings, and enforce strict subs */
7463 static void
7464 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7465 {
7466     /* after "print" and similar functions (corresponding to
7467      * "F? L" in opcode.pl), whatever wasn't already parsed as
7468      * a filehandle should be subject to "strict subs".
7469      * Likewise for the optional indirect-object argument to system
7470      * or exec, which can't be a bareword */
7471     if ((PL_last_lop_op == OP_PRINT
7472             || PL_last_lop_op == OP_PRTF
7473             || PL_last_lop_op == OP_SAY
7474             || PL_last_lop_op == OP_SYSTEM
7475             || PL_last_lop_op == OP_EXEC)
7476         && (PL_hints & HINT_STRICT_SUBS))
7477     {
7478         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7479     }
7480 
7481     if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7482         char *d = PL_tokenbuf;
7483         while (isLOWER(*d))
7484             d++;
7485         if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7486             /* PL_warn_reserved is constant */
7487             GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7488             Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7489                         PL_tokenbuf);
7490             GCC_DIAG_RESTORE_STMT;
7491         }
7492     }
7493 }
7494 
7495 static int
7496 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7497 {
7498     int pkgname = 0;
7499     const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7500     bool safebw;
7501     bool no_op_error = FALSE;
7502     /* Use this var to track whether intuit_method has been
7503        called.  intuit_method returns 0 or > 255.  */
7504     int key = 1;
7505 
7506     if (PL_expect == XOPERATOR) {
7507         if (PL_bufptr == PL_linestart) {
7508             CopLINE_dec(PL_curcop);
7509             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7510             CopLINE_inc(PL_curcop);
7511         }
7512         else
7513             /* We want to call no_op with s pointing after the
7514                bareword, so defer it.  But we want it to come
7515                before the Bad name croak.  */
7516             no_op_error = TRUE;
7517     }
7518 
7519     /* Get the rest if it looks like a package qualifier */
7520 
7521     if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7522         STRLEN morelen;
7523         s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7524                       TRUE, &morelen);
7525         if (no_op_error) {
7526             no_op("Bareword",s);
7527             no_op_error = FALSE;
7528         }
7529         if (!morelen)
7530             Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7531                     UTF8fARG(UTF, len, PL_tokenbuf),
7532                     *s == '\'' ? "'" : "::");
7533         len += morelen;
7534         pkgname = 1;
7535     }
7536 
7537     if (no_op_error)
7538         no_op("Bareword",s);
7539 
7540     /* See if the name is "Foo::",
7541        in which case Foo is a bareword
7542        (and a package name). */
7543 
7544     if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7545         if (ckWARN(WARN_BAREWORD)
7546             && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7547             Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7548                         "Bareword \"%" UTF8f
7549                         "\" refers to nonexistent package",
7550                         UTF8fARG(UTF, len, PL_tokenbuf));
7551         len -= 2;
7552         PL_tokenbuf[len] = '\0';
7553         c.gv = NULL;
7554         c.gvp = 0;
7555         safebw = TRUE;
7556     }
7557     else {
7558         safebw = FALSE;
7559     }
7560 
7561     /* if we saw a global override before, get the right name */
7562 
7563     if (!c.sv)
7564         c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7565     if (c.gvp) {
7566         SV *sv = newSVpvs("CORE::GLOBAL::");
7567         sv_catsv(sv, c.sv);
7568         SvREFCNT_dec(c.sv);
7569         c.sv = sv;
7570     }
7571 
7572     /* Presume this is going to be a bareword of some sort. */
7573     CLINE;
7574     pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7575     pl_yylval.opval->op_private = OPpCONST_BARE;
7576 
7577     /* And if "Foo::", then that's what it certainly is. */
7578     if (safebw)
7579         return yyl_safe_bareword(aTHX_ s, lastchar);
7580 
7581     if (!c.off) {
7582         OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7583         const_op->op_private = OPpCONST_BARE;
7584         c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7585         c.cv = c.lex
7586             ? isGV(c.gv)
7587                 ? GvCV(c.gv)
7588                 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7589                     ? (CV *)SvRV(c.gv)
7590                     : ((CV *)c.gv)
7591             : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7592     }
7593 
7594     /* See if it's the indirect object for a list operator. */
7595 
7596     if (PL_oldoldbufptr
7597         && PL_oldoldbufptr < PL_bufptr
7598         && (PL_oldoldbufptr == PL_last_lop
7599             || PL_oldoldbufptr == PL_last_uni)
7600         && /* NO SKIPSPACE BEFORE HERE! */
7601            (PL_expect == XREF
7602             || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7603                                                    == OA_FILEREF))
7604     {
7605         bool immediate_paren = *s == '(';
7606         SSize_t s_off;
7607 
7608         /* (Now we can afford to cross potential line boundary.) */
7609         s = skipspace(s);
7610 
7611         /* intuit_method() can indirectly call lex_next_chunk(),
7612          * invalidating s
7613          */
7614         s_off = s - SvPVX(PL_linestr);
7615         /* Two barewords in a row may indicate method call. */
7616         if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7617                 || *s == '$')
7618             && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7619         {
7620             /* the code at method: doesn't use s */
7621             goto method;
7622         }
7623         s = SvPVX(PL_linestr) + s_off;
7624 
7625         if (((PL_opargs[PL_last_lop_op] >> OASHIFT) & 7) == OA_FILEREF
7626             && !immediate_paren && !c.cv
7627             && !FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
7628             no_bareword_filehandle(PL_tokenbuf);
7629         }
7630 
7631         /* If not a declared subroutine, it's an indirect object. */
7632         /* (But it's an indir obj regardless for sort.) */
7633         /* Also, if "_" follows a filetest operator, it's a bareword */
7634 
7635         if (
7636             ( !immediate_paren && (PL_last_lop_op == OP_SORT
7637              || (!c.cv
7638                  && (PL_last_lop_op != OP_MAPSTART
7639                      && PL_last_lop_op != OP_GREPSTART))))
7640            || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7641                 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7642                                                 == OA_FILESTATOP))
7643            )
7644         {
7645             PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7646             yyl_strictwarn_bareword(aTHX_ lastchar);
7647             op_free(c.rv2cv_op);
7648             return yyl_safe_bareword(aTHX_ s, lastchar);
7649         }
7650     }
7651 
7652     PL_expect = XOPERATOR;
7653     s = skipspace(s);
7654 
7655     /* Is this a word before a => operator? */
7656     if (*s == '=' && s[1] == '>' && !pkgname) {
7657         op_free(c.rv2cv_op);
7658         CLINE;
7659         if (c.gvp || (c.lex && !c.off)) {
7660             assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7661             /* This is our own scalar, created a few lines
7662                above, so this is safe. */
7663             SvREADONLY_off(c.sv);
7664             sv_setpv(c.sv, PL_tokenbuf);
7665             if (UTF && !IN_BYTES
7666              && is_utf8_string((U8*)PL_tokenbuf, len))
7667                   SvUTF8_on(c.sv);
7668             SvREADONLY_on(c.sv);
7669         }
7670         TERM(BAREWORD);
7671     }
7672 
7673     /* If followed by a paren, it's certainly a subroutine. */
7674     if (*s == '(') {
7675         CLINE;
7676         if (c.cv) {
7677             char *d = s + 1;
7678             while (SPACE_OR_TAB(*d))
7679                 d++;
7680             if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7681                 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7682         }
7683         NEXTVAL_NEXTTOKE.opval =
7684             c.off ? c.rv2cv_op : pl_yylval.opval;
7685         if (c.off)
7686              op_free(pl_yylval.opval), force_next(PRIVATEREF);
7687         else op_free(c.rv2cv_op),      force_next(BAREWORD);
7688         pl_yylval.ival = 0;
7689         TOKEN(PERLY_AMPERSAND);
7690     }
7691 
7692     /* If followed by var or block, call it a method (unless sub) */
7693 
7694     if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7695         op_free(c.rv2cv_op);
7696         PL_last_lop = PL_oldbufptr;
7697         PL_last_lop_op = OP_METHOD;
7698         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7699             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7700         PL_expect = XBLOCKTERM;
7701         PL_bufptr = s;
7702         return REPORT(METHOD);
7703     }
7704 
7705     /* If followed by a bareword, see if it looks like indir obj. */
7706 
7707     if (   key == 1
7708         && !orig_keyword
7709         && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7710         && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7711     {
7712       method:
7713         if (c.lex && !c.off) {
7714             assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7715             SvREADONLY_off(c.sv);
7716             sv_setpvn(c.sv, PL_tokenbuf, len);
7717             if (UTF && !IN_BYTES
7718              && is_utf8_string((U8*)PL_tokenbuf, len))
7719                 SvUTF8_on(c.sv);
7720             else SvUTF8_off(c.sv);
7721         }
7722         op_free(c.rv2cv_op);
7723         if (key == METHOD && !PL_lex_allbrackets
7724             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7725         {
7726             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7727         }
7728         return REPORT(key);
7729     }
7730 
7731     /* Not a method, so call it a subroutine (if defined) */
7732 
7733     if (c.cv) {
7734         /* Check for a constant sub */
7735         c.sv = cv_const_sv_or_av(c.cv);
7736         return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7737     }
7738 
7739     /* Call it a bare word */
7740 
7741     if (PL_hints & HINT_STRICT_SUBS)
7742         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7743     else
7744         yyl_strictwarn_bareword(aTHX_ lastchar);
7745 
7746     op_free(c.rv2cv_op);
7747 
7748     return yyl_safe_bareword(aTHX_ s, lastchar);
7749 }
7750 
7751 static int
7752 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7753 {
7754     switch (key) {
7755     default:			/* not a keyword */
7756         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7757 
7758     case KEY___FILE__:
7759         FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7760 
7761     case KEY___LINE__:
7762         FUN0OP(
7763             newSVOP(OP_CONST, 0,
7764                 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7765         );
7766 
7767     case KEY___PACKAGE__:
7768         FUN0OP(
7769             newSVOP(OP_CONST, 0, (PL_curstash
7770                                      ? newSVhek(HvNAME_HEK(PL_curstash))
7771                                      : &PL_sv_undef))
7772         );
7773 
7774     case KEY___DATA__:
7775     case KEY___END__:
7776         if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7777             yyl_data_handle(aTHX);
7778         return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7779 
7780     case KEY___SUB__:
7781         FUN0OP(CvCLONE(PL_compcv)
7782                     ? newOP(OP_RUNCV, 0)
7783                     : newPVOP(OP_RUNCV,0,NULL));
7784 
7785     case KEY_AUTOLOAD:
7786     case KEY_DESTROY:
7787     case KEY_BEGIN:
7788     case KEY_UNITCHECK:
7789     case KEY_CHECK:
7790     case KEY_INIT:
7791     case KEY_END:
7792         if (PL_expect == XSTATE)
7793             return yyl_sub(aTHX_ PL_bufptr, key);
7794         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7795 
7796     case KEY_abs:
7797         UNI(OP_ABS);
7798 
7799     case KEY_alarm:
7800         UNI(OP_ALARM);
7801 
7802     case KEY_accept:
7803         LOP(OP_ACCEPT,XTERM);
7804 
7805     case KEY_and:
7806         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7807             return REPORT(0);
7808         OPERATOR(ANDOP);
7809 
7810     case KEY_atan2:
7811         LOP(OP_ATAN2,XTERM);
7812 
7813     case KEY_bind:
7814         LOP(OP_BIND,XTERM);
7815 
7816     case KEY_binmode:
7817         LOP(OP_BINMODE,XTERM);
7818 
7819     case KEY_bless:
7820         LOP(OP_BLESS,XTERM);
7821 
7822     case KEY_break:
7823         FUN0(OP_BREAK);
7824 
7825     case KEY_catch:
7826         Perl_ck_warner_d(aTHX_
7827             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
7828         PREBLOCK(CATCH);
7829 
7830     case KEY_chop:
7831         UNI(OP_CHOP);
7832 
7833     case KEY_continue:
7834         /* We have to disambiguate the two senses of
7835           "continue". If the next token is a '{' then
7836           treat it as the start of a continue block;
7837           otherwise treat it as a control operator.
7838          */
7839         s = skipspace(s);
7840         if (*s == '{')
7841             PREBLOCK(CONTINUE);
7842         else
7843             FUN0(OP_CONTINUE);
7844 
7845     case KEY_chdir:
7846         /* may use HOME */
7847         (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7848         UNI(OP_CHDIR);
7849 
7850     case KEY_close:
7851         UNI(OP_CLOSE);
7852 
7853     case KEY_closedir:
7854         UNI(OP_CLOSEDIR);
7855 
7856     case KEY_cmp:
7857         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7858             return REPORT(0);
7859         NCEop(OP_SCMP);
7860 
7861     case KEY_caller:
7862         UNI(OP_CALLER);
7863 
7864     case KEY_crypt:
7865 
7866         LOP(OP_CRYPT,XTERM);
7867 
7868     case KEY_chmod:
7869         LOP(OP_CHMOD,XTERM);
7870 
7871     case KEY_chown:
7872         LOP(OP_CHOWN,XTERM);
7873 
7874     case KEY_connect:
7875         LOP(OP_CONNECT,XTERM);
7876 
7877     case KEY_chr:
7878         UNI(OP_CHR);
7879 
7880     case KEY_cos:
7881         UNI(OP_COS);
7882 
7883     case KEY_chroot:
7884         UNI(OP_CHROOT);
7885 
7886     case KEY_default:
7887         PREBLOCK(DEFAULT);
7888 
7889     case KEY_defer:
7890         Perl_ck_warner_d(aTHX_
7891             packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
7892         PREBLOCK(DEFER);
7893 
7894     case KEY_do:
7895         return yyl_do(aTHX_ s, orig_keyword);
7896 
7897     case KEY_die:
7898         PL_hints |= HINT_BLOCK_SCOPE;
7899         LOP(OP_DIE,XTERM);
7900 
7901     case KEY_defined:
7902         UNI(OP_DEFINED);
7903 
7904     case KEY_delete:
7905         UNI(OP_DELETE);
7906 
7907     case KEY_dbmopen:
7908         Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7909                           STR_WITH_LEN("NDBM_File::"),
7910                           STR_WITH_LEN("DB_File::"),
7911                           STR_WITH_LEN("GDBM_File::"),
7912                           STR_WITH_LEN("SDBM_File::"),
7913                           STR_WITH_LEN("ODBM_File::"),
7914                           NULL);
7915         LOP(OP_DBMOPEN,XTERM);
7916 
7917     case KEY_dbmclose:
7918         UNI(OP_DBMCLOSE);
7919 
7920     case KEY_dump:
7921         LOOPX(OP_DUMP);
7922 
7923     case KEY_else:
7924         PREBLOCK(ELSE);
7925 
7926     case KEY_elsif:
7927         pl_yylval.ival = CopLINE(PL_curcop);
7928         OPERATOR(ELSIF);
7929 
7930     case KEY_eq:
7931         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7932             return REPORT(0);
7933         ChEop(OP_SEQ);
7934 
7935     case KEY_exists:
7936         UNI(OP_EXISTS);
7937 
7938     case KEY_exit:
7939         UNI(OP_EXIT);
7940 
7941     case KEY_eval:
7942         s = skipspace(s);
7943         if (*s == '{') { /* block eval */
7944             PL_expect = XTERMBLOCK;
7945             UNIBRACK(OP_ENTERTRY);
7946         }
7947         else { /* string eval */
7948             PL_expect = XTERM;
7949             UNIBRACK(OP_ENTEREVAL);
7950         }
7951 
7952     case KEY_evalbytes:
7953         PL_expect = XTERM;
7954         UNIBRACK(-OP_ENTEREVAL);
7955 
7956     case KEY_eof:
7957         UNI(OP_EOF);
7958 
7959     case KEY_exp:
7960         UNI(OP_EXP);
7961 
7962     case KEY_each:
7963         UNI(OP_EACH);
7964 
7965     case KEY_exec:
7966         LOP(OP_EXEC,XREF);
7967 
7968     case KEY_endhostent:
7969         FUN0(OP_EHOSTENT);
7970 
7971     case KEY_endnetent:
7972         FUN0(OP_ENETENT);
7973 
7974     case KEY_endservent:
7975         FUN0(OP_ESERVENT);
7976 
7977     case KEY_endprotoent:
7978         FUN0(OP_EPROTOENT);
7979 
7980     case KEY_endpwent:
7981         FUN0(OP_EPWENT);
7982 
7983     case KEY_endgrent:
7984         FUN0(OP_EGRENT);
7985 
7986     case KEY_finally:
7987         Perl_ck_warner_d(aTHX_
7988             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
7989         PREBLOCK(FINALLY);
7990 
7991     case KEY_for:
7992     case KEY_foreach:
7993         return yyl_foreach(aTHX_ s);
7994 
7995     case KEY_formline:
7996         LOP(OP_FORMLINE,XTERM);
7997 
7998     case KEY_fork:
7999         FUN0(OP_FORK);
8000 
8001     case KEY_fc:
8002         UNI(OP_FC);
8003 
8004     case KEY_fcntl:
8005         LOP(OP_FCNTL,XTERM);
8006 
8007     case KEY_fileno:
8008         UNI(OP_FILENO);
8009 
8010     case KEY_flock:
8011         LOP(OP_FLOCK,XTERM);
8012 
8013     case KEY_gt:
8014         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8015             return REPORT(0);
8016         ChRop(OP_SGT);
8017 
8018     case KEY_ge:
8019         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8020             return REPORT(0);
8021         ChRop(OP_SGE);
8022 
8023     case KEY_grep:
8024         LOP(OP_GREPSTART, XREF);
8025 
8026     case KEY_goto:
8027         LOOPX(OP_GOTO);
8028 
8029     case KEY_gmtime:
8030         UNI(OP_GMTIME);
8031 
8032     case KEY_getc:
8033         UNIDOR(OP_GETC);
8034 
8035     case KEY_getppid:
8036         FUN0(OP_GETPPID);
8037 
8038     case KEY_getpgrp:
8039         UNI(OP_GETPGRP);
8040 
8041     case KEY_getpriority:
8042         LOP(OP_GETPRIORITY,XTERM);
8043 
8044     case KEY_getprotobyname:
8045         UNI(OP_GPBYNAME);
8046 
8047     case KEY_getprotobynumber:
8048         LOP(OP_GPBYNUMBER,XTERM);
8049 
8050     case KEY_getprotoent:
8051         FUN0(OP_GPROTOENT);
8052 
8053     case KEY_getpwent:
8054         FUN0(OP_GPWENT);
8055 
8056     case KEY_getpwnam:
8057         UNI(OP_GPWNAM);
8058 
8059     case KEY_getpwuid:
8060         UNI(OP_GPWUID);
8061 
8062     case KEY_getpeername:
8063         UNI(OP_GETPEERNAME);
8064 
8065     case KEY_gethostbyname:
8066         UNI(OP_GHBYNAME);
8067 
8068     case KEY_gethostbyaddr:
8069         LOP(OP_GHBYADDR,XTERM);
8070 
8071     case KEY_gethostent:
8072         FUN0(OP_GHOSTENT);
8073 
8074     case KEY_getnetbyname:
8075         UNI(OP_GNBYNAME);
8076 
8077     case KEY_getnetbyaddr:
8078         LOP(OP_GNBYADDR,XTERM);
8079 
8080     case KEY_getnetent:
8081         FUN0(OP_GNETENT);
8082 
8083     case KEY_getservbyname:
8084         LOP(OP_GSBYNAME,XTERM);
8085 
8086     case KEY_getservbyport:
8087         LOP(OP_GSBYPORT,XTERM);
8088 
8089     case KEY_getservent:
8090         FUN0(OP_GSERVENT);
8091 
8092     case KEY_getsockname:
8093         UNI(OP_GETSOCKNAME);
8094 
8095     case KEY_getsockopt:
8096         LOP(OP_GSOCKOPT,XTERM);
8097 
8098     case KEY_getgrent:
8099         FUN0(OP_GGRENT);
8100 
8101     case KEY_getgrnam:
8102         UNI(OP_GGRNAM);
8103 
8104     case KEY_getgrgid:
8105         UNI(OP_GGRGID);
8106 
8107     case KEY_getlogin:
8108         FUN0(OP_GETLOGIN);
8109 
8110     case KEY_given:
8111         pl_yylval.ival = CopLINE(PL_curcop);
8112         Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8113                          "given is experimental");
8114         OPERATOR(GIVEN);
8115 
8116     case KEY_glob:
8117         LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
8118 
8119     case KEY_hex:
8120         UNI(OP_HEX);
8121 
8122     case KEY_if:
8123         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8124             return REPORT(0);
8125         pl_yylval.ival = CopLINE(PL_curcop);
8126         OPERATOR(IF);
8127 
8128     case KEY_index:
8129         LOP(OP_INDEX,XTERM);
8130 
8131     case KEY_int:
8132         UNI(OP_INT);
8133 
8134     case KEY_ioctl:
8135         LOP(OP_IOCTL,XTERM);
8136 
8137     case KEY_isa:
8138         NCRop(OP_ISA);
8139 
8140     case KEY_join:
8141         LOP(OP_JOIN,XTERM);
8142 
8143     case KEY_keys:
8144         UNI(OP_KEYS);
8145 
8146     case KEY_kill:
8147         LOP(OP_KILL,XTERM);
8148 
8149     case KEY_last:
8150         LOOPX(OP_LAST);
8151 
8152     case KEY_lc:
8153         UNI(OP_LC);
8154 
8155     case KEY_lcfirst:
8156         UNI(OP_LCFIRST);
8157 
8158     case KEY_local:
8159         OPERATOR(LOCAL);
8160 
8161     case KEY_length:
8162         UNI(OP_LENGTH);
8163 
8164     case KEY_lt:
8165         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8166             return REPORT(0);
8167         ChRop(OP_SLT);
8168 
8169     case KEY_le:
8170         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8171             return REPORT(0);
8172         ChRop(OP_SLE);
8173 
8174     case KEY_localtime:
8175         UNI(OP_LOCALTIME);
8176 
8177     case KEY_log:
8178         UNI(OP_LOG);
8179 
8180     case KEY_link:
8181         LOP(OP_LINK,XTERM);
8182 
8183     case KEY_listen:
8184         LOP(OP_LISTEN,XTERM);
8185 
8186     case KEY_lock:
8187         UNI(OP_LOCK);
8188 
8189     case KEY_lstat:
8190         UNI(OP_LSTAT);
8191 
8192     case KEY_m:
8193         s = scan_pat(s,OP_MATCH);
8194         TERM(sublex_start());
8195 
8196     case KEY_map:
8197         LOP(OP_MAPSTART, XREF);
8198 
8199     case KEY_mkdir:
8200         LOP(OP_MKDIR,XTERM);
8201 
8202     case KEY_msgctl:
8203         LOP(OP_MSGCTL,XTERM);
8204 
8205     case KEY_msgget:
8206         LOP(OP_MSGGET,XTERM);
8207 
8208     case KEY_msgrcv:
8209         LOP(OP_MSGRCV,XTERM);
8210 
8211     case KEY_msgsnd:
8212         LOP(OP_MSGSND,XTERM);
8213 
8214     case KEY_our:
8215     case KEY_my:
8216     case KEY_state:
8217         return yyl_my(aTHX_ s, key);
8218 
8219     case KEY_next:
8220         LOOPX(OP_NEXT);
8221 
8222     case KEY_ne:
8223         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8224             return REPORT(0);
8225         ChEop(OP_SNE);
8226 
8227     case KEY_no:
8228         s = tokenize_use(0, s);
8229         TOKEN(USE);
8230 
8231     case KEY_not:
8232         if (*s == '(' || (s = skipspace(s), *s == '('))
8233             FUN1(OP_NOT);
8234         else {
8235             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8236                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8237             OPERATOR(NOTOP);
8238         }
8239 
8240     case KEY_open:
8241         s = skipspace(s);
8242         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8243             const char *t;
8244             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8245             for (t=d; isSPACE(*t);)
8246                 t++;
8247             if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8248                 /* [perl #16184] */
8249                 && !(t[0] == '=' && t[1] == '>')
8250                 && !(t[0] == ':' && t[1] == ':')
8251                 && !keyword(s, d-s, 0)
8252             ) {
8253                 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8254                    "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8255                     UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8256             }
8257         }
8258         LOP(OP_OPEN,XTERM);
8259 
8260     case KEY_or:
8261         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8262             return REPORT(0);
8263         pl_yylval.ival = OP_OR;
8264         OPERATOR(OROP);
8265 
8266     case KEY_ord:
8267         UNI(OP_ORD);
8268 
8269     case KEY_oct:
8270         UNI(OP_OCT);
8271 
8272     case KEY_opendir:
8273         LOP(OP_OPEN_DIR,XTERM);
8274 
8275     case KEY_print:
8276         checkcomma(s,PL_tokenbuf,"filehandle");
8277         LOP(OP_PRINT,XREF);
8278 
8279     case KEY_printf:
8280         checkcomma(s,PL_tokenbuf,"filehandle");
8281         LOP(OP_PRTF,XREF);
8282 
8283     case KEY_prototype:
8284         UNI(OP_PROTOTYPE);
8285 
8286     case KEY_push:
8287         LOP(OP_PUSH,XTERM);
8288 
8289     case KEY_pop:
8290         UNIDOR(OP_POP);
8291 
8292     case KEY_pos:
8293         UNIDOR(OP_POS);
8294 
8295     case KEY_pack:
8296         LOP(OP_PACK,XTERM);
8297 
8298     case KEY_package:
8299         s = force_word(s,BAREWORD,FALSE,TRUE);
8300         s = skipspace(s);
8301         s = force_strict_version(s);
8302         PREBLOCK(PACKAGE);
8303 
8304     case KEY_pipe:
8305         LOP(OP_PIPE_OP,XTERM);
8306 
8307     case KEY_q:
8308         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8309         if (!s)
8310             missingterm(NULL, 0);
8311         COPLINE_SET_FROM_MULTI_END;
8312         pl_yylval.ival = OP_CONST;
8313         TERM(sublex_start());
8314 
8315     case KEY_quotemeta:
8316         UNI(OP_QUOTEMETA);
8317 
8318     case KEY_qw:
8319         return yyl_qw(aTHX_ s, len);
8320 
8321     case KEY_qq:
8322         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8323         if (!s)
8324             missingterm(NULL, 0);
8325         pl_yylval.ival = OP_STRINGIFY;
8326         if (SvIVX(PL_lex_stuff) == '\'')
8327             SvIV_set(PL_lex_stuff, 0);	/* qq'$foo' should interpolate */
8328         TERM(sublex_start());
8329 
8330     case KEY_qr:
8331         s = scan_pat(s,OP_QR);
8332         TERM(sublex_start());
8333 
8334     case KEY_qx:
8335         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8336         if (!s)
8337             missingterm(NULL, 0);
8338         pl_yylval.ival = OP_BACKTICK;
8339         TERM(sublex_start());
8340 
8341     case KEY_return:
8342         OLDLOP(OP_RETURN);
8343 
8344     case KEY_require:
8345         return yyl_require(aTHX_ s, orig_keyword);
8346 
8347     case KEY_reset:
8348         UNI(OP_RESET);
8349 
8350     case KEY_redo:
8351         LOOPX(OP_REDO);
8352 
8353     case KEY_rename:
8354         LOP(OP_RENAME,XTERM);
8355 
8356     case KEY_rand:
8357         UNI(OP_RAND);
8358 
8359     case KEY_rmdir:
8360         UNI(OP_RMDIR);
8361 
8362     case KEY_rindex:
8363         LOP(OP_RINDEX,XTERM);
8364 
8365     case KEY_read:
8366         LOP(OP_READ,XTERM);
8367 
8368     case KEY_readdir:
8369         UNI(OP_READDIR);
8370 
8371     case KEY_readline:
8372         UNIDOR(OP_READLINE);
8373 
8374     case KEY_readpipe:
8375         UNIDOR(OP_BACKTICK);
8376 
8377     case KEY_rewinddir:
8378         UNI(OP_REWINDDIR);
8379 
8380     case KEY_recv:
8381         LOP(OP_RECV,XTERM);
8382 
8383     case KEY_reverse:
8384         LOP(OP_REVERSE,XTERM);
8385 
8386     case KEY_readlink:
8387         UNIDOR(OP_READLINK);
8388 
8389     case KEY_ref:
8390         UNI(OP_REF);
8391 
8392     case KEY_s:
8393         s = scan_subst(s);
8394         if (pl_yylval.opval)
8395             TERM(sublex_start());
8396         else
8397             TOKEN(1);	/* force error */
8398 
8399     case KEY_say:
8400         checkcomma(s,PL_tokenbuf,"filehandle");
8401         LOP(OP_SAY,XREF);
8402 
8403     case KEY_chomp:
8404         UNI(OP_CHOMP);
8405 
8406     case KEY_scalar:
8407         UNI(OP_SCALAR);
8408 
8409     case KEY_select:
8410         LOP(OP_SELECT,XTERM);
8411 
8412     case KEY_seek:
8413         LOP(OP_SEEK,XTERM);
8414 
8415     case KEY_semctl:
8416         LOP(OP_SEMCTL,XTERM);
8417 
8418     case KEY_semget:
8419         LOP(OP_SEMGET,XTERM);
8420 
8421     case KEY_semop:
8422         LOP(OP_SEMOP,XTERM);
8423 
8424     case KEY_send:
8425         LOP(OP_SEND,XTERM);
8426 
8427     case KEY_setpgrp:
8428         LOP(OP_SETPGRP,XTERM);
8429 
8430     case KEY_setpriority:
8431         LOP(OP_SETPRIORITY,XTERM);
8432 
8433     case KEY_sethostent:
8434         UNI(OP_SHOSTENT);
8435 
8436     case KEY_setnetent:
8437         UNI(OP_SNETENT);
8438 
8439     case KEY_setservent:
8440         UNI(OP_SSERVENT);
8441 
8442     case KEY_setprotoent:
8443         UNI(OP_SPROTOENT);
8444 
8445     case KEY_setpwent:
8446         FUN0(OP_SPWENT);
8447 
8448     case KEY_setgrent:
8449         FUN0(OP_SGRENT);
8450 
8451     case KEY_seekdir:
8452         LOP(OP_SEEKDIR,XTERM);
8453 
8454     case KEY_setsockopt:
8455         LOP(OP_SSOCKOPT,XTERM);
8456 
8457     case KEY_shift:
8458         UNIDOR(OP_SHIFT);
8459 
8460     case KEY_shmctl:
8461         LOP(OP_SHMCTL,XTERM);
8462 
8463     case KEY_shmget:
8464         LOP(OP_SHMGET,XTERM);
8465 
8466     case KEY_shmread:
8467         LOP(OP_SHMREAD,XTERM);
8468 
8469     case KEY_shmwrite:
8470         LOP(OP_SHMWRITE,XTERM);
8471 
8472     case KEY_shutdown:
8473         LOP(OP_SHUTDOWN,XTERM);
8474 
8475     case KEY_sin:
8476         UNI(OP_SIN);
8477 
8478     case KEY_sleep:
8479         UNI(OP_SLEEP);
8480 
8481     case KEY_socket:
8482         LOP(OP_SOCKET,XTERM);
8483 
8484     case KEY_socketpair:
8485         LOP(OP_SOCKPAIR,XTERM);
8486 
8487     case KEY_sort:
8488         checkcomma(s,PL_tokenbuf,"subroutine name");
8489         s = skipspace(s);
8490         PL_expect = XTERM;
8491         s = force_word(s,BAREWORD,TRUE,TRUE);
8492         LOP(OP_SORT,XREF);
8493 
8494     case KEY_split:
8495         LOP(OP_SPLIT,XTERM);
8496 
8497     case KEY_sprintf:
8498         LOP(OP_SPRINTF,XTERM);
8499 
8500     case KEY_splice:
8501         LOP(OP_SPLICE,XTERM);
8502 
8503     case KEY_sqrt:
8504         UNI(OP_SQRT);
8505 
8506     case KEY_srand:
8507         UNI(OP_SRAND);
8508 
8509     case KEY_stat:
8510         UNI(OP_STAT);
8511 
8512     case KEY_study:
8513         UNI(OP_STUDY);
8514 
8515     case KEY_substr:
8516         LOP(OP_SUBSTR,XTERM);
8517 
8518     case KEY_format:
8519     case KEY_sub:
8520         return yyl_sub(aTHX_ s, key);
8521 
8522     case KEY_system:
8523         LOP(OP_SYSTEM,XREF);
8524 
8525     case KEY_symlink:
8526         LOP(OP_SYMLINK,XTERM);
8527 
8528     case KEY_syscall:
8529         LOP(OP_SYSCALL,XTERM);
8530 
8531     case KEY_sysopen:
8532         LOP(OP_SYSOPEN,XTERM);
8533 
8534     case KEY_sysseek:
8535         LOP(OP_SYSSEEK,XTERM);
8536 
8537     case KEY_sysread:
8538         LOP(OP_SYSREAD,XTERM);
8539 
8540     case KEY_syswrite:
8541         LOP(OP_SYSWRITE,XTERM);
8542 
8543     case KEY_tr:
8544     case KEY_y:
8545         s = scan_trans(s);
8546         TERM(sublex_start());
8547 
8548     case KEY_tell:
8549         UNI(OP_TELL);
8550 
8551     case KEY_telldir:
8552         UNI(OP_TELLDIR);
8553 
8554     case KEY_tie:
8555         LOP(OP_TIE,XTERM);
8556 
8557     case KEY_tied:
8558         UNI(OP_TIED);
8559 
8560     case KEY_time:
8561         FUN0(OP_TIME);
8562 
8563     case KEY_times:
8564         FUN0(OP_TMS);
8565 
8566     case KEY_truncate:
8567         LOP(OP_TRUNCATE,XTERM);
8568 
8569     case KEY_try:
8570         pl_yylval.ival = CopLINE(PL_curcop);
8571         Perl_ck_warner_d(aTHX_
8572             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
8573         PREBLOCK(TRY);
8574 
8575     case KEY_uc:
8576         UNI(OP_UC);
8577 
8578     case KEY_ucfirst:
8579         UNI(OP_UCFIRST);
8580 
8581     case KEY_untie:
8582         UNI(OP_UNTIE);
8583 
8584     case KEY_until:
8585         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8586             return REPORT(0);
8587         pl_yylval.ival = CopLINE(PL_curcop);
8588         OPERATOR(UNTIL);
8589 
8590     case KEY_unless:
8591         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8592             return REPORT(0);
8593         pl_yylval.ival = CopLINE(PL_curcop);
8594         OPERATOR(UNLESS);
8595 
8596     case KEY_unlink:
8597         LOP(OP_UNLINK,XTERM);
8598 
8599     case KEY_undef:
8600         UNIDOR(OP_UNDEF);
8601 
8602     case KEY_unpack:
8603         LOP(OP_UNPACK,XTERM);
8604 
8605     case KEY_utime:
8606         LOP(OP_UTIME,XTERM);
8607 
8608     case KEY_umask:
8609         UNIDOR(OP_UMASK);
8610 
8611     case KEY_unshift:
8612         LOP(OP_UNSHIFT,XTERM);
8613 
8614     case KEY_use:
8615         s = tokenize_use(1, s);
8616         TOKEN(USE);
8617 
8618     case KEY_values:
8619         UNI(OP_VALUES);
8620 
8621     case KEY_vec:
8622         LOP(OP_VEC,XTERM);
8623 
8624     case KEY_when:
8625         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8626             return REPORT(0);
8627         pl_yylval.ival = CopLINE(PL_curcop);
8628         Perl_ck_warner_d(aTHX_
8629             packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8630             "when is experimental");
8631         OPERATOR(WHEN);
8632 
8633     case KEY_while:
8634         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8635             return REPORT(0);
8636         pl_yylval.ival = CopLINE(PL_curcop);
8637         OPERATOR(WHILE);
8638 
8639     case KEY_warn:
8640         PL_hints |= HINT_BLOCK_SCOPE;
8641         LOP(OP_WARN,XTERM);
8642 
8643     case KEY_wait:
8644         FUN0(OP_WAIT);
8645 
8646     case KEY_waitpid:
8647         LOP(OP_WAITPID,XTERM);
8648 
8649     case KEY_wantarray:
8650         FUN0(OP_WANTARRAY);
8651 
8652     case KEY_write:
8653         /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8654          * we use the same number on EBCDIC */
8655         gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8656         UNI(OP_ENTERWRITE);
8657 
8658     case KEY_x:
8659         if (PL_expect == XOPERATOR) {
8660             if (*s == '=' && !PL_lex_allbrackets
8661                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8662             {
8663                 return REPORT(0);
8664             }
8665             Mop(OP_REPEAT);
8666         }
8667         check_uni();
8668         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8669 
8670     case KEY_xor:
8671         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8672             return REPORT(0);
8673         pl_yylval.ival = OP_XOR;
8674         OPERATOR(OROP);
8675     }
8676 }
8677 
8678 static int
8679 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8680 {
8681     I32 key = 0;
8682     I32 orig_keyword = 0;
8683     STRLEN olen = len;
8684     char *d = s;
8685     s += 2;
8686     s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8687     if ((*s == ':' && s[1] == ':')
8688         || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8689     {
8690         Copy(PL_bufptr, PL_tokenbuf, olen, char);
8691         return yyl_just_a_word(aTHX_ d, olen, 0, c);
8692     }
8693     if (!key)
8694         Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8695                           UTF8fARG(UTF, len, PL_tokenbuf));
8696     if (key < 0)
8697         key = -key;
8698     else if (key == KEY_require || key == KEY_do
8699           || key == KEY_glob)
8700         /* that's a way to remember we saw "CORE::" */
8701         orig_keyword = key;
8702 
8703     /* Known to be a reserved word at this point */
8704     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8705 }
8706 
8707 static int
8708 yyl_keylookup(pTHX_ char *s, GV *gv)
8709 {
8710     STRLEN len;
8711     bool anydelim;
8712     I32 key;
8713     struct code c = no_code;
8714     I32 orig_keyword = 0;
8715     char *d;
8716 
8717     c.gv = gv;
8718 
8719     PL_bufptr = s;
8720     s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8721 
8722     /* Some keywords can be followed by any delimiter, including ':' */
8723     anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8724 
8725     /* x::* is just a word, unless x is "CORE" */
8726     if (!anydelim && *s == ':' && s[1] == ':') {
8727         if (memEQs(PL_tokenbuf, len, "CORE"))
8728             return yyl_key_core(aTHX_ s, len, c);
8729         return yyl_just_a_word(aTHX_ s, len, 0, c);
8730     }
8731 
8732     d = s;
8733     while (d < PL_bufend && isSPACE(*d))
8734             d++;	/* no comments skipped here, or s### is misparsed */
8735 
8736     /* Is this a word before a => operator? */
8737     if (*d == '=' && d[1] == '>') {
8738         return yyl_fatcomma(aTHX_ s, len);
8739     }
8740 
8741     /* Check for plugged-in keyword */
8742     {
8743         OP *o;
8744         int result;
8745         char *saved_bufptr = PL_bufptr;
8746         PL_bufptr = s;
8747         result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8748         s = PL_bufptr;
8749         if (result == KEYWORD_PLUGIN_DECLINE) {
8750             /* not a plugged-in keyword */
8751             PL_bufptr = saved_bufptr;
8752         } else if (result == KEYWORD_PLUGIN_STMT) {
8753             pl_yylval.opval = o;
8754             CLINE;
8755             if (!PL_nexttoke) PL_expect = XSTATE;
8756             return REPORT(PLUGSTMT);
8757         } else if (result == KEYWORD_PLUGIN_EXPR) {
8758             pl_yylval.opval = o;
8759             CLINE;
8760             if (!PL_nexttoke) PL_expect = XOPERATOR;
8761             return REPORT(PLUGEXPR);
8762         } else {
8763             Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8764         }
8765     }
8766 
8767     /* Is this a label? */
8768     if (!anydelim && PL_expect == XSTATE
8769           && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8770         s = d + 1;
8771         pl_yylval.opval =
8772             newSVOP(OP_CONST, 0,
8773                 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8774         CLINE;
8775         TOKEN(LABEL);
8776     }
8777 
8778     /* Check for lexical sub */
8779     if (PL_expect != XOPERATOR) {
8780         char tmpbuf[sizeof PL_tokenbuf + 1];
8781         *tmpbuf = '&';
8782         Copy(PL_tokenbuf, tmpbuf+1, len, char);
8783         c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8784         if (c.off != NOT_IN_PAD) {
8785             assert(c.off); /* we assume this is boolean-true below */
8786             if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8787                 HV *  const stash = PAD_COMPNAME_OURSTASH(c.off);
8788                 HEK * const stashname = HvNAME_HEK(stash);
8789                 c.sv = newSVhek(stashname);
8790                 sv_catpvs(c.sv, "::");
8791                 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8792                                 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8793                 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8794                                   SVt_PVCV);
8795                 c.off = 0;
8796                 if (!c.gv) {
8797                     sv_free(c.sv);
8798                     c.sv = NULL;
8799                     return yyl_just_a_word(aTHX_ s, len, 0, c);
8800                 }
8801             }
8802             else {
8803                 c.rv2cv_op = newOP(OP_PADANY, 0);
8804                 c.rv2cv_op->op_targ = c.off;
8805                 c.cv = find_lexical_cv(c.off);
8806             }
8807             c.lex = TRUE;
8808             return yyl_just_a_word(aTHX_ s, len, 0, c);
8809         }
8810         c.off = 0;
8811     }
8812 
8813     /* Check for built-in keyword */
8814     key = keyword(PL_tokenbuf, len, 0);
8815 
8816     if (key < 0)
8817         key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8818 
8819     if (key && key != KEY___DATA__ && key != KEY___END__
8820      && (!anydelim || *s != '#')) {
8821         /* no override, and not s### either; skipspace is safe here
8822          * check for => on following line */
8823         bool arrow;
8824         STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8825         STRLEN   soff = s         - SvPVX(PL_linestr);
8826         s = peekspace(s);
8827         arrow = *s == '=' && s[1] == '>';
8828         PL_bufptr = SvPVX(PL_linestr) + bufoff;
8829         s         = SvPVX(PL_linestr) +   soff;
8830         if (arrow)
8831             return yyl_fatcomma(aTHX_ s, len);
8832     }
8833 
8834     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8835 }
8836 
8837 static int
8838 yyl_try(pTHX_ char *s)
8839 {
8840     char *d;
8841     GV *gv = NULL;
8842     int tok;
8843 
8844   retry:
8845     switch (*s) {
8846     default:
8847         if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
8848             if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
8849                 return tok;
8850             goto retry_bufptr;
8851         }
8852         yyl_croak_unrecognised(aTHX_ s);
8853 
8854     case 4:
8855     case 26:
8856         /* emulate EOF on ^D or ^Z */
8857         if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
8858             return tok;
8859     retry_bufptr:
8860         s = PL_bufptr;
8861         goto retry;
8862 
8863     case 0:
8864         if ((!PL_rsfp || PL_lex_inwhat)
8865          && (!PL_parser->filtered || s+1 < PL_bufend)) {
8866             PL_last_uni = 0;
8867             PL_last_lop = 0;
8868             if (PL_lex_brackets
8869                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8870             {
8871                 yyerror((const char *)
8872                         (PL_lex_formbrack
8873                          ? "Format not terminated"
8874                          : "Missing right curly or square bracket"));
8875             }
8876             DEBUG_T({
8877                 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8878             });
8879             TOKEN(0);
8880         }
8881         if (s++ < PL_bufend)
8882             goto retry;  /* ignore stray nulls */
8883         PL_last_uni = 0;
8884         PL_last_lop = 0;
8885         if (!PL_in_eval && !PL_preambled) {
8886             PL_preambled = TRUE;
8887             if (PL_perldb) {
8888                 /* Generate a string of Perl code to load the debugger.
8889                  * If PERL5DB is set, it will return the contents of that,
8890                  * otherwise a compile-time require of perl5db.pl.  */
8891 
8892                 const char * const pdb = PerlEnv_getenv("PERL5DB");
8893 
8894                 if (pdb) {
8895                     sv_setpv(PL_linestr, pdb);
8896                     sv_catpvs(PL_linestr,";");
8897                 } else {
8898                     SETERRNO(0,SS_NORMAL);
8899                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8900                 }
8901                 PL_parser->preambling = CopLINE(PL_curcop);
8902             } else
8903                 SvPVCLEAR(PL_linestr);
8904             if (PL_preambleav) {
8905                 SV **svp = AvARRAY(PL_preambleav);
8906                 SV **const end = svp + AvFILLp(PL_preambleav);
8907                 while(svp <= end) {
8908                     sv_catsv(PL_linestr, *svp);
8909                     ++svp;
8910                     sv_catpvs(PL_linestr, ";");
8911                 }
8912                 sv_free(MUTABLE_SV(PL_preambleav));
8913                 PL_preambleav = NULL;
8914             }
8915             if (PL_minus_E)
8916                 sv_catpvs(PL_linestr,
8917                           "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
8918             if (PL_minus_n || PL_minus_p) {
8919                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8920                 if (PL_minus_l)
8921                     sv_catpvs(PL_linestr,"chomp;");
8922                 if (PL_minus_a) {
8923                     if (PL_minus_F) {
8924                         if (   (   *PL_splitstr == '/'
8925                                 || *PL_splitstr == '\''
8926                                 || *PL_splitstr == '"')
8927                             && strchr(PL_splitstr + 1, *PL_splitstr))
8928                         {
8929                             /* strchr is ok, because -F pattern can't contain
8930                              * embeddded NULs */
8931                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8932                         }
8933                         else {
8934                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8935                                bytes can be used as quoting characters.  :-) */
8936                             const char *splits = PL_splitstr;
8937                             sv_catpvs(PL_linestr, "our @F=split(q\0");
8938                             do {
8939                                 /* Need to \ \s  */
8940                                 if (*splits == '\\')
8941                                     sv_catpvn(PL_linestr, splits, 1);
8942                                 sv_catpvn(PL_linestr, splits, 1);
8943                             } while (*splits++);
8944                             /* This loop will embed the trailing NUL of
8945                                PL_linestr as the last thing it does before
8946                                terminating.  */
8947                             sv_catpvs(PL_linestr, ");");
8948                         }
8949                     }
8950                     else
8951                         sv_catpvs(PL_linestr,"our @F=split(' ');");
8952                 }
8953             }
8954             sv_catpvs(PL_linestr, "\n");
8955             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8956             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8957             PL_last_lop = PL_last_uni = NULL;
8958             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8959                 update_debugger_info(PL_linestr, NULL, 0);
8960             goto retry;
8961         }
8962         if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
8963             return tok;
8964         goto retry_bufptr;
8965 
8966     case '\r':
8967 #ifdef PERL_STRICT_CR
8968         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8969         Perl_croak(aTHX_
8970       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8971 #endif
8972     case ' ': case '\t': case '\f': case '\v':
8973         s++;
8974         goto retry;
8975 
8976     case '#':
8977     case '\n': {
8978         const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8979         if (needs_semicolon)
8980             TOKEN(PERLY_SEMICOLON);
8981         else
8982             goto retry;
8983     }
8984 
8985     case '-':
8986         return yyl_hyphen(aTHX_ s);
8987 
8988     case '+':
8989         return yyl_plus(aTHX_ s);
8990 
8991     case '*':
8992         return yyl_star(aTHX_ s);
8993 
8994     case '%':
8995         return yyl_percent(aTHX_ s);
8996 
8997     case '^':
8998         return yyl_caret(aTHX_ s);
8999 
9000     case '[':
9001         return yyl_leftsquare(aTHX_ s);
9002 
9003     case '~':
9004         return yyl_tilde(aTHX_ s);
9005 
9006     case ',':
9007         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9008             TOKEN(0);
9009         s++;
9010         OPERATOR(PERLY_COMMA);
9011     case ':':
9012         if (s[1] == ':')
9013             return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
9014         return yyl_colon(aTHX_ s + 1);
9015 
9016     case '(':
9017         return yyl_leftparen(aTHX_ s + 1);
9018 
9019     case ';':
9020         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
9021             TOKEN(0);
9022         CLINE;
9023         s++;
9024         PL_expect = XSTATE;
9025         TOKEN(PERLY_SEMICOLON);
9026 
9027     case ')':
9028         return yyl_rightparen(aTHX_ s);
9029 
9030     case ']':
9031         return yyl_rightsquare(aTHX_ s);
9032 
9033     case '{':
9034         return yyl_leftcurly(aTHX_ s + 1, 0);
9035 
9036     case '}':
9037         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
9038             TOKEN(0);
9039         return yyl_rightcurly(aTHX_ s, 0);
9040 
9041     case '&':
9042         return yyl_ampersand(aTHX_ s);
9043 
9044     case '|':
9045         return yyl_verticalbar(aTHX_ s);
9046 
9047     case '=':
9048         if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
9049             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
9050         {
9051             s = vcs_conflict_marker(s + 7);
9052             goto retry;
9053         }
9054 
9055         s++;
9056         {
9057             const char tmp = *s++;
9058             if (tmp == '=') {
9059                 if (!PL_lex_allbrackets
9060                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
9061                 {
9062                     s -= 2;
9063                     TOKEN(0);
9064                 }
9065                 ChEop(OP_EQ);
9066             }
9067             if (tmp == '>') {
9068                 if (!PL_lex_allbrackets
9069                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9070                 {
9071                     s -= 2;
9072                     TOKEN(0);
9073                 }
9074                 OPERATOR(PERLY_COMMA);
9075             }
9076             if (tmp == '~')
9077                 PMop(OP_MATCH);
9078             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
9079                 && memCHRs("+-*/%.^&|<",tmp))
9080                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9081                             "Reversed %c= operator",(int)tmp);
9082             s--;
9083             if (PL_expect == XSTATE
9084                 && isALPHA(tmp)
9085                 && (s == PL_linestart+1 || s[-2] == '\n') )
9086             {
9087                 if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
9088                     || PL_lex_state != LEX_NORMAL)
9089                 {
9090                     d = PL_bufend;
9091                     while (s < d) {
9092                         if (*s++ == '\n') {
9093                             incline(s, PL_bufend);
9094                             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
9095                             {
9096                                 s = (char *) memchr(s,'\n', d - s);
9097                                 if (s)
9098                                     s++;
9099                                 else
9100                                     s = d;
9101                                 incline(s, PL_bufend);
9102                                 goto retry;
9103                             }
9104                         }
9105                     }
9106                     goto retry;
9107                 }
9108                 s = PL_bufend;
9109                 PL_parser->in_pod = 1;
9110                 goto retry;
9111             }
9112         }
9113         if (PL_expect == XBLOCK) {
9114             const char *t = s;
9115 #ifdef PERL_STRICT_CR
9116             while (SPACE_OR_TAB(*t))
9117 #else
9118             while (SPACE_OR_TAB(*t) || *t == '\r')
9119 #endif
9120                 t++;
9121             if (*t == '\n' || *t == '#') {
9122                 ENTER_with_name("lex_format");
9123                 SAVEI8(PL_parser->form_lex_state);
9124                 SAVEI32(PL_lex_formbrack);
9125                 PL_parser->form_lex_state = PL_lex_state;
9126                 PL_lex_formbrack = PL_lex_brackets + 1;
9127                 PL_parser->sub_error_count = PL_error_count;
9128                 return yyl_leftcurly(aTHX_ s, 1);
9129             }
9130         }
9131         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
9132             s--;
9133             TOKEN(0);
9134         }
9135         pl_yylval.ival = 0;
9136         OPERATOR(ASSIGNOP);
9137 
9138         case '!':
9139         return yyl_bang(aTHX_ s + 1);
9140 
9141     case '<':
9142         if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
9143             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
9144         {
9145             s = vcs_conflict_marker(s + 7);
9146             goto retry;
9147         }
9148         return yyl_leftpointy(aTHX_ s);
9149 
9150     case '>':
9151         if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
9152             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
9153         {
9154             s = vcs_conflict_marker(s + 7);
9155             goto retry;
9156         }
9157         return yyl_rightpointy(aTHX_ s + 1);
9158 
9159     case '$':
9160         return yyl_dollar(aTHX_ s);
9161 
9162     case '@':
9163         return yyl_snail(aTHX_ s);
9164 
9165     case '/':			/* may be division, defined-or, or pattern */
9166         return yyl_slash(aTHX_ s);
9167 
9168      case '?':			/* conditional */
9169         s++;
9170         if (!PL_lex_allbrackets
9171             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
9172         {
9173             s--;
9174             TOKEN(0);
9175         }
9176         PL_lex_allbrackets++;
9177         OPERATOR(PERLY_QUESTION_MARK);
9178 
9179     case '.':
9180         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
9181 #ifdef PERL_STRICT_CR
9182             && s[1] == '\n'
9183 #else
9184             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
9185 #endif
9186             && (s == PL_linestart || s[-1] == '\n') )
9187         {
9188             PL_expect = XSTATE;
9189             /* formbrack==2 means dot seen where arguments expected */
9190             return yyl_rightcurly(aTHX_ s, 2);
9191         }
9192         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9193             s += 3;
9194             OPERATOR(YADAYADA);
9195         }
9196         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9197             char tmp = *s++;
9198             if (*s == tmp) {
9199                 if (!PL_lex_allbrackets
9200                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9201                 {
9202                     s--;
9203                     TOKEN(0);
9204                 }
9205                 s++;
9206                 if (*s == tmp) {
9207                     s++;
9208                     pl_yylval.ival = OPf_SPECIAL;
9209                 }
9210                 else
9211                     pl_yylval.ival = 0;
9212                 OPERATOR(DOTDOT);
9213             }
9214             if (*s == '=' && !PL_lex_allbrackets
9215                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9216             {
9217                 s--;
9218                 TOKEN(0);
9219             }
9220             Aop(OP_CONCAT);
9221         }
9222         /* FALLTHROUGH */
9223     case '0': case '1': case '2': case '3': case '4':
9224     case '5': case '6': case '7': case '8': case '9':
9225         s = scan_num(s, &pl_yylval);
9226         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9227         if (PL_expect == XOPERATOR)
9228             no_op("Number",s);
9229         TERM(THING);
9230 
9231     case '\'':
9232         return yyl_sglquote(aTHX_ s);
9233 
9234     case '"':
9235         return yyl_dblquote(aTHX_ s);
9236 
9237     case '`':
9238         return yyl_backtick(aTHX_ s);
9239 
9240     case '\\':
9241         return yyl_backslash(aTHX_ s + 1);
9242 
9243     case 'v':
9244         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9245             char *start = s + 2;
9246             while (isDIGIT(*start) || *start == '_')
9247                 start++;
9248             if (*start == '.' && isDIGIT(start[1])) {
9249                 s = scan_num(s, &pl_yylval);
9250                 TERM(THING);
9251             }
9252             else if ((*start == ':' && start[1] == ':')
9253                      || (PL_expect == XSTATE && *start == ':')) {
9254                 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9255                     return tok;
9256                 goto retry_bufptr;
9257             }
9258             else if (PL_expect == XSTATE) {
9259                 d = start;
9260                 while (d < PL_bufend && isSPACE(*d)) d++;
9261                 if (*d == ':') {
9262                     if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9263                         return tok;
9264                     goto retry_bufptr;
9265                 }
9266             }
9267             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9268             if (!isALPHA(*start) && (PL_expect == XTERM
9269                         || PL_expect == XREF || PL_expect == XSTATE
9270                         || PL_expect == XTERMORDORDOR)) {
9271                 GV *const gv = gv_fetchpvn_flags(s, start - s,
9272                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
9273                 if (!gv) {
9274                     s = scan_num(s, &pl_yylval);
9275                     TERM(THING);
9276                 }
9277             }
9278         }
9279         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9280             return tok;
9281         goto retry_bufptr;
9282 
9283     case 'x':
9284         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9285             s++;
9286             Mop(OP_REPEAT);
9287         }
9288         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9289             return tok;
9290         goto retry_bufptr;
9291 
9292     case '_':
9293     case 'a': case 'A':
9294     case 'b': case 'B':
9295     case 'c': case 'C':
9296     case 'd': case 'D':
9297     case 'e': case 'E':
9298     case 'f': case 'F':
9299     case 'g': case 'G':
9300     case 'h': case 'H':
9301     case 'i': case 'I':
9302     case 'j': case 'J':
9303     case 'k': case 'K':
9304     case 'l': case 'L':
9305     case 'm': case 'M':
9306     case 'n': case 'N':
9307     case 'o': case 'O':
9308     case 'p': case 'P':
9309     case 'q': case 'Q':
9310     case 'r': case 'R':
9311     case 's': case 'S':
9312     case 't': case 'T':
9313     case 'u': case 'U':
9314               case 'V':
9315     case 'w': case 'W':
9316               case 'X':
9317     case 'y': case 'Y':
9318     case 'z': case 'Z':
9319         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9320             return tok;
9321         goto retry_bufptr;
9322     }
9323 }
9324 
9325 
9326 /*
9327   yylex
9328 
9329   Works out what to call the token just pulled out of the input
9330   stream.  The yacc parser takes care of taking the ops we return and
9331   stitching them into a tree.
9332 
9333   Returns:
9334     The type of the next token
9335 
9336   Structure:
9337       Check if we have already built the token; if so, use it.
9338       Switch based on the current state:
9339           - if we have a case modifier in a string, deal with that
9340           - handle other cases of interpolation inside a string
9341           - scan the next line if we are inside a format
9342       In the normal state, switch on the next character:
9343           - default:
9344             if alphabetic, go to key lookup
9345             unrecognized character - croak
9346           - 0/4/26: handle end-of-line or EOF
9347           - cases for whitespace
9348           - \n and #: handle comments and line numbers
9349           - various operators, brackets and sigils
9350           - numbers
9351           - quotes
9352           - 'v': vstrings (or go to key lookup)
9353           - 'x' repetition operator (or go to key lookup)
9354           - other ASCII alphanumerics (key lookup begins here):
9355               word before => ?
9356               keyword plugin
9357               scan built-in keyword (but do nothing with it yet)
9358               check for statement label
9359               check for lexical subs
9360                   return yyl_just_a_word if there is one
9361               see whether built-in keyword is overridden
9362               switch on keyword number:
9363                   - default: return yyl_just_a_word:
9364                       not a built-in keyword; handle bareword lookup
9365                       disambiguate between method and sub call
9366                       fall back to bareword
9367                   - cases for built-in keywords
9368 */
9369 
9370 int
9371 Perl_yylex(pTHX)
9372 {
9373     char *s = PL_bufptr;
9374 
9375     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9376         const U8* first_bad_char_loc;
9377         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9378                                                         PL_bufend - PL_bufptr,
9379                                                         &first_bad_char_loc)))
9380         {
9381             _force_out_malformed_utf8_message(first_bad_char_loc,
9382                                               (U8 *) PL_bufend,
9383                                               0,
9384                                               1 /* 1 means die */ );
9385             NOT_REACHED; /* NOTREACHED */
9386         }
9387         PL_parser->recheck_utf8_validity = FALSE;
9388     }
9389     DEBUG_T( {
9390         SV* tmp = newSVpvs("");
9391         PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9392             (IV)CopLINE(PL_curcop),
9393             lex_state_names[PL_lex_state],
9394             exp_name[PL_expect],
9395             pv_display(tmp, s, strlen(s), 0, 60));
9396         SvREFCNT_dec(tmp);
9397     } );
9398 
9399     /* when we've already built the next token, just pull it out of the queue */
9400     if (PL_nexttoke) {
9401         PL_nexttoke--;
9402         pl_yylval = PL_nextval[PL_nexttoke];
9403         {
9404             I32 next_type;
9405             next_type = PL_nexttype[PL_nexttoke];
9406             if (next_type & (7<<24)) {
9407                 if (next_type & (1<<24)) {
9408                     if (PL_lex_brackets > 100)
9409                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9410                     PL_lex_brackstack[PL_lex_brackets++] =
9411                         (char) ((U8) (next_type >> 16));
9412                 }
9413                 if (next_type & (2<<24))
9414                     PL_lex_allbrackets++;
9415                 if (next_type & (4<<24))
9416                     PL_lex_allbrackets--;
9417                 next_type &= 0xffff;
9418             }
9419             return REPORT(next_type == 'p' ? pending_ident() : next_type);
9420         }
9421     }
9422 
9423     switch (PL_lex_state) {
9424     case LEX_NORMAL:
9425     case LEX_INTERPNORMAL:
9426         break;
9427 
9428     /* interpolated case modifiers like \L \U, including \Q and \E.
9429        when we get here, PL_bufptr is at the \
9430     */
9431     case LEX_INTERPCASEMOD:
9432         /* handle \E or end of string */
9433         return yyl_interpcasemod(aTHX_ s);
9434 
9435     case LEX_INTERPPUSH:
9436         return REPORT(sublex_push());
9437 
9438     case LEX_INTERPSTART:
9439         if (PL_bufptr == PL_bufend)
9440             return REPORT(sublex_done());
9441         DEBUG_T({
9442             if(*PL_bufptr != '(')
9443                 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9444         });
9445         PL_expect = XTERM;
9446         /* for /@a/, we leave the joining for the regex engine to do
9447          * (unless we're within \Q etc) */
9448         PL_lex_dojoin = (*PL_bufptr == '@'
9449                             && (!PL_lex_inpat || PL_lex_casemods));
9450         PL_lex_state = LEX_INTERPNORMAL;
9451         if (PL_lex_dojoin) {
9452             NEXTVAL_NEXTTOKE.ival = 0;
9453             force_next(PERLY_COMMA);
9454             force_ident("\"", PERLY_DOLLAR);
9455             NEXTVAL_NEXTTOKE.ival = 0;
9456             force_next(PERLY_DOLLAR);
9457             NEXTVAL_NEXTTOKE.ival = 0;
9458             force_next((2<<24)|PERLY_PAREN_OPEN);
9459             NEXTVAL_NEXTTOKE.ival = OP_JOIN;	/* emulate join($", ...) */
9460             force_next(FUNC);
9461         }
9462         /* Convert (?{...}) and friends to 'do {...}' */
9463         if (PL_lex_inpat && *PL_bufptr == '(') {
9464             PL_parser->lex_shared->re_eval_start = PL_bufptr;
9465             PL_bufptr += 2;
9466             if (*PL_bufptr != '{')
9467                 PL_bufptr++;
9468             PL_expect = XTERMBLOCK;
9469             force_next(DO);
9470         }
9471 
9472         if (PL_lex_starts++) {
9473             s = PL_bufptr;
9474             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9475             if (!PL_lex_casemods && PL_lex_inpat)
9476                 TOKEN(PERLY_COMMA);
9477             else
9478                 AopNOASSIGN(OP_CONCAT);
9479         }
9480         return yylex();
9481 
9482     case LEX_INTERPENDMAYBE:
9483         if (intuit_more(PL_bufptr, PL_bufend)) {
9484             PL_lex_state = LEX_INTERPNORMAL;	/* false alarm, more expr */
9485             break;
9486         }
9487         /* FALLTHROUGH */
9488 
9489     case LEX_INTERPEND:
9490         if (PL_lex_dojoin) {
9491             const U8 dojoin_was = PL_lex_dojoin;
9492             PL_lex_dojoin = FALSE;
9493             PL_lex_state = LEX_INTERPCONCAT;
9494             PL_lex_allbrackets--;
9495             return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
9496         }
9497         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9498             && SvEVALED(PL_lex_repl))
9499         {
9500             if (PL_bufptr != PL_bufend)
9501                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9502             PL_lex_repl = NULL;
9503         }
9504         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
9505            re_eval_str.  If the here-doc body's length equals the previous
9506            value of re_eval_start, re_eval_start will now be null.  So
9507            check re_eval_str as well. */
9508         if (PL_parser->lex_shared->re_eval_start
9509          || PL_parser->lex_shared->re_eval_str) {
9510             SV *sv;
9511             if (*PL_bufptr != ')')
9512                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9513             PL_bufptr++;
9514             /* having compiled a (?{..}) expression, return the original
9515              * text too, as a const */
9516             if (PL_parser->lex_shared->re_eval_str) {
9517                 sv = PL_parser->lex_shared->re_eval_str;
9518                 PL_parser->lex_shared->re_eval_str = NULL;
9519                 SvCUR_set(sv,
9520                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9521                 SvPV_shrink_to_cur(sv);
9522             }
9523             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9524                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9525             NEXTVAL_NEXTTOKE.opval =
9526                     newSVOP(OP_CONST, 0,
9527                                  sv);
9528             force_next(THING);
9529             PL_parser->lex_shared->re_eval_start = NULL;
9530             PL_expect = XTERM;
9531             return REPORT(PERLY_COMMA);
9532         }
9533 
9534         /* FALLTHROUGH */
9535     case LEX_INTERPCONCAT:
9536 #ifdef DEBUGGING
9537         if (PL_lex_brackets)
9538             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9539                        (long) PL_lex_brackets);
9540 #endif
9541         if (PL_bufptr == PL_bufend)
9542             return REPORT(sublex_done());
9543 
9544         /* m'foo' still needs to be parsed for possible (?{...}) */
9545         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9546             SV *sv = newSVsv(PL_linestr);
9547             sv = tokeq(sv);
9548             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9549             s = PL_bufend;
9550         }
9551         else {
9552             int save_error_count = PL_error_count;
9553 
9554             s = scan_const(PL_bufptr);
9555 
9556             /* Set flag if this was a pattern and there were errors.  op.c will
9557              * refuse to compile a pattern with this flag set.  Otherwise, we
9558              * could get segfaults, etc. */
9559             if (PL_lex_inpat && PL_error_count > save_error_count) {
9560                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9561             }
9562             if (*s == '\\')
9563                 PL_lex_state = LEX_INTERPCASEMOD;
9564             else
9565                 PL_lex_state = LEX_INTERPSTART;
9566         }
9567 
9568         if (s != PL_bufptr) {
9569             NEXTVAL_NEXTTOKE = pl_yylval;
9570             PL_expect = XTERM;
9571             force_next(THING);
9572             if (PL_lex_starts++) {
9573                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9574                 if (!PL_lex_casemods && PL_lex_inpat)
9575                     TOKEN(PERLY_COMMA);
9576                 else
9577                     AopNOASSIGN(OP_CONCAT);
9578             }
9579             else {
9580                 PL_bufptr = s;
9581                 return yylex();
9582             }
9583         }
9584 
9585         return yylex();
9586     case LEX_FORMLINE:
9587         if (PL_parser->sub_error_count != PL_error_count) {
9588             /* There was an error parsing a formline, which tends to
9589                mess up the parser.
9590                Unlike interpolated sub-parsing, we can't treat any of
9591                these as recoverable, so no need to check sub_no_recover.
9592             */
9593             yyquit();
9594         }
9595         assert(PL_lex_formbrack);
9596         s = scan_formline(PL_bufptr);
9597         if (!PL_lex_formbrack)
9598             return yyl_rightcurly(aTHX_ s, 1);
9599         PL_bufptr = s;
9600         return yylex();
9601     }
9602 
9603     /* We really do *not* want PL_linestr ever becoming a COW. */
9604     assert (!SvIsCOW(PL_linestr));
9605     s = PL_bufptr;
9606     PL_oldoldbufptr = PL_oldbufptr;
9607     PL_oldbufptr = s;
9608 
9609     if (PL_in_my == KEY_sigvar) {
9610         PL_parser->saw_infix_sigil = 0;
9611         return yyl_sigvar(aTHX_ s);
9612     }
9613 
9614     {
9615         /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9616            On its return, we then need to set it to indicate whether the token
9617            we just encountered was an infix operator that (if we hadn't been
9618            expecting an operator) have been a sigil.
9619         */
9620         bool expected_operator = (PL_expect == XOPERATOR);
9621         int ret = yyl_try(aTHX_ s);
9622         switch (pl_yylval.ival) {
9623         case OP_BIT_AND:
9624         case OP_MODULO:
9625         case OP_MULTIPLY:
9626         case OP_NBIT_AND:
9627             if (expected_operator) {
9628                 PL_parser->saw_infix_sigil = 1;
9629                 break;
9630             }
9631             /* FALLTHROUGH */
9632         default:
9633             PL_parser->saw_infix_sigil = 0;
9634         }
9635         return ret;
9636     }
9637 }
9638 
9639 
9640 /*
9641   S_pending_ident
9642 
9643   Looks up an identifier in the pad or in a package
9644 
9645   PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9646   rather than a plain pad var.
9647 
9648   Returns:
9649     PRIVATEREF if this is a lexical name.
9650     BAREWORD   if this belongs to a package.
9651 
9652   Structure:
9653       if we're in a my declaration
9654           croak if they tried to say my($foo::bar)
9655           build the ops for a my() declaration
9656       if it's an access to a my() variable
9657           build ops for access to a my() variable
9658       if in a dq string, and they've said @foo and we can't find @foo
9659           warn
9660       build ops for a bareword
9661 */
9662 
9663 static int
9664 S_pending_ident(pTHX)
9665 {
9666     PADOFFSET tmp = 0;
9667     const char pit = (char)pl_yylval.ival;
9668     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9669     /* All routes through this function want to know if there is a colon.  */
9670     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9671 
9672     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9673           "### Pending identifier '%s'\n", PL_tokenbuf); });
9674     assert(tokenbuf_len >= 2);
9675 
9676     /* if we're in a my(), we can't allow dynamics here.
9677        $foo'bar has already been turned into $foo::bar, so
9678        just check for colons.
9679 
9680        if it's a legal name, the OP is a PADANY.
9681     */
9682     if (PL_in_my) {
9683         if (PL_in_my == KEY_our) {	/* "our" is merely analogous to "my" */
9684             if (has_colon)
9685                 /* diag_listed_as: No package name allowed for variable %s
9686                                    in "our" */
9687                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9688                                   "%s %s in \"our\"",
9689                                   *PL_tokenbuf=='&' ? "subroutine" : "variable",
9690                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9691             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9692         }
9693         else {
9694             OP *o;
9695             if (has_colon) {
9696                 /* "my" variable %s can't be in a package */
9697                 /* PL_no_myglob is constant */
9698                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9699                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9700                             PL_in_my == KEY_my ? "my" : "state",
9701                             *PL_tokenbuf == '&' ? "subroutine" : "variable",
9702                             PL_tokenbuf),
9703                             UTF ? SVf_UTF8 : 0);
9704                 GCC_DIAG_RESTORE_STMT;
9705             }
9706 
9707             if (PL_in_my == KEY_sigvar) {
9708                 /* A signature 'padop' needs in addition, an op_first to
9709                  * point to a child sigdefelem, and an extra field to hold
9710                  * the signature index. We can achieve both by using an
9711                  * UNOP_AUX and (ab)using the op_aux field to hold the
9712                  * index. If we ever need more fields, use a real malloced
9713                  * aux strut instead.
9714                  */
9715                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9716                                     INT2PTR(UNOP_AUX_item *,
9717                                         (PL_parser->sig_elems)));
9718                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9719                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9720                                   :                         OPpARGELEM_HV);
9721             }
9722             else
9723                 o = newOP(OP_PADANY, 0);
9724             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9725                                                         UTF ? SVf_UTF8 : 0);
9726             if (PL_in_my == KEY_sigvar)
9727                 PL_in_my = 0;
9728 
9729             pl_yylval.opval = o;
9730             return PRIVATEREF;
9731         }
9732     }
9733 
9734     /*
9735        build the ops for accesses to a my() variable.
9736     */
9737 
9738     if (!has_colon) {
9739         if (!PL_in_my)
9740             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9741                                  0);
9742         if (tmp != NOT_IN_PAD) {
9743             /* might be an "our" variable" */
9744             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9745                 /* build ops for a bareword */
9746                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9747                 HEK * const stashname = HvNAME_HEK(stash);
9748                 SV *  const sym = newSVhek(stashname);
9749                 sv_catpvs(sym, "::");
9750                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9751                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9752                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9753                 if (pit != '&')
9754                   gv_fetchsv(sym,
9755                     GV_ADDMULTI,
9756                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9757                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9758                      : SVt_PVHV));
9759                 return BAREWORD;
9760             }
9761 
9762             pl_yylval.opval = newOP(OP_PADANY, 0);
9763             pl_yylval.opval->op_targ = tmp;
9764             return PRIVATEREF;
9765         }
9766     }
9767 
9768     /*
9769        Whine if they've said @foo or @foo{key} in a doublequoted string,
9770        and @foo (or %foo) isn't a variable we can find in the symbol
9771        table.
9772     */
9773     if (ckWARN(WARN_AMBIGUOUS)
9774         && pit == '@'
9775         && PL_lex_state != LEX_NORMAL
9776         && !PL_lex_brackets)
9777     {
9778         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9779                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9780                                          SVt_PVAV);
9781         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9782            )
9783         {
9784             /* Downgraded from fatal to warning 20000522 mjd */
9785             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9786                         "Possible unintended interpolation of %" UTF8f
9787                         " in string",
9788                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9789         }
9790     }
9791 
9792     /* build ops for a bareword */
9793     pl_yylval.opval = newSVOP(OP_CONST, 0,
9794                                    newSVpvn_flags(PL_tokenbuf + 1,
9795                                                       tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9796                                                       UTF ? SVf_UTF8 : 0 ));
9797     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9798     if (pit != '&')
9799         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9800                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9801                      | ( UTF ? SVf_UTF8 : 0 ),
9802                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9803                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9804                       : SVt_PVHV));
9805     return BAREWORD;
9806 }
9807 
9808 STATIC void
9809 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9810 {
9811     PERL_ARGS_ASSERT_CHECKCOMMA;
9812 
9813     if (*s == ' ' && s[1] == '(') {	/* XXX gotta be a better way */
9814         if (ckWARN(WARN_SYNTAX)) {
9815             int level = 1;
9816             const char *w;
9817             for (w = s+2; *w && level; w++) {
9818                 if (*w == '(')
9819                     ++level;
9820                 else if (*w == ')')
9821                     --level;
9822             }
9823             while (isSPACE(*w))
9824                 ++w;
9825             /* the list of chars below is for end of statements or
9826              * block / parens, boolean operators (&&, ||, //) and branch
9827              * constructs (or, and, if, until, unless, while, err, for).
9828              * Not a very solid hack... */
9829             if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
9830                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9831                             "%s (...) interpreted as function",name);
9832         }
9833     }
9834     while (s < PL_bufend && isSPACE(*s))
9835         s++;
9836     if (*s == '(')
9837         s++;
9838     while (s < PL_bufend && isSPACE(*s))
9839         s++;
9840     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9841         const char * const w = s;
9842         s += UTF ? UTF8SKIP(s) : 1;
9843         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9844             s += UTF ? UTF8SKIP(s) : 1;
9845         while (s < PL_bufend && isSPACE(*s))
9846             s++;
9847         if (*s == ',') {
9848             GV* gv;
9849             if (keyword(w, s - w, 0))
9850                 return;
9851 
9852             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9853             if (gv && GvCVu(gv))
9854                 return;
9855             if (s - w <= 254) {
9856                 PADOFFSET off;
9857                 char tmpbuf[256];
9858                 Copy(w, tmpbuf+1, s - w, char);
9859                 *tmpbuf = '&';
9860                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9861                 if (off != NOT_IN_PAD) return;
9862             }
9863             Perl_croak(aTHX_ "No comma allowed after %s", what);
9864         }
9865     }
9866 }
9867 
9868 /* S_new_constant(): do any overload::constant lookup.
9869 
9870    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9871    Best used as sv=new_constant(..., sv, ...).
9872    If s, pv are NULL, calls subroutine with one argument,
9873    and <type> is used with error messages only.
9874    <type> is assumed to be well formed UTF-8.
9875 
9876    If error_msg is not NULL, *error_msg will be set to any error encountered.
9877    Otherwise yyerror() will be used to output it */
9878 
9879 STATIC SV *
9880 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9881                SV *sv, SV *pv, const char *type, STRLEN typelen,
9882                const char ** error_msg)
9883 {
9884     dSP;
9885     HV * table = GvHV(PL_hintgv);		 /* ^H */
9886     SV *res;
9887     SV *errsv = NULL;
9888     SV **cvp;
9889     SV *cv, *typesv;
9890     const char *why1 = "", *why2 = "", *why3 = "";
9891     const char * optional_colon = ":";  /* Only some messages have a colon */
9892     char *msg;
9893 
9894     PERL_ARGS_ASSERT_NEW_CONSTANT;
9895     /* We assume that this is true: */
9896     assert(type || s);
9897 
9898     sv_2mortal(sv);			/* Parent created it permanently */
9899 
9900     if (   ! table
9901         || ! (PL_hints & HINT_LOCALIZE_HH))
9902     {
9903         why1 = "unknown";
9904         optional_colon = "";
9905         goto report;
9906     }
9907 
9908     cvp = hv_fetch(table, key, keylen, FALSE);
9909     if (!cvp || !SvOK(*cvp)) {
9910         why1 = "$^H{";
9911         why2 = key;
9912         why3 = "} is not defined";
9913         goto report;
9914     }
9915 
9916     cv = *cvp;
9917     if (!pv && s)
9918         pv = newSVpvn_flags(s, len, SVs_TEMP);
9919     if (type && pv)
9920         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9921     else
9922         typesv = &PL_sv_undef;
9923 
9924     PUSHSTACKi(PERLSI_OVERLOAD);
9925     ENTER ;
9926     SAVETMPS;
9927 
9928     PUSHMARK(SP) ;
9929     EXTEND(sp, 3);
9930     if (pv)
9931         PUSHs(pv);
9932     PUSHs(sv);
9933     if (pv)
9934         PUSHs(typesv);
9935     PUTBACK;
9936     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9937 
9938     SPAGAIN ;
9939 
9940     /* Check the eval first */
9941     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9942         STRLEN errlen;
9943         const char * errstr;
9944         sv_catpvs(errsv, "Propagated");
9945         errstr = SvPV_const(errsv, errlen);
9946         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9947         (void)POPs;
9948         res = SvREFCNT_inc_simple_NN(sv);
9949     }
9950     else {
9951         res = POPs;
9952         SvREFCNT_inc_simple_void_NN(res);
9953     }
9954 
9955     PUTBACK ;
9956     FREETMPS ;
9957     LEAVE ;
9958     POPSTACK;
9959 
9960     if (SvOK(res)) {
9961         return res;
9962     }
9963 
9964     sv = res;
9965     (void)sv_2mortal(sv);
9966 
9967     why1 = "Call to &{$^H{";
9968     why2 = key;
9969     why3 = "}} did not return a defined value";
9970 
9971   report:
9972 
9973     msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
9974                         (int)(type ? typelen : len),
9975                         (type ? type: s),
9976                         optional_colon,
9977                         why1, why2, why3);
9978     if (error_msg) {
9979         *error_msg = msg;
9980     }
9981     else {
9982         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9983     }
9984     return SvREFCNT_inc_simple_NN(sv);
9985 }
9986 
9987 PERL_STATIC_INLINE void
9988 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9989                     bool is_utf8, bool check_dollar, bool tick_warn)
9990 {
9991     int saw_tick = 0;
9992     const char *olds = *s;
9993     PERL_ARGS_ASSERT_PARSE_IDENT;
9994 
9995     while (*s < PL_bufend) {
9996         if (*d >= e)
9997             Perl_croak(aTHX_ "%s", ident_too_long);
9998         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9999              /* The UTF-8 case must come first, otherwise things
10000              * like c\N{COMBINING TILDE} would start failing, as the
10001              * isWORDCHAR_A case below would gobble the 'c' up.
10002              */
10003 
10004             char *t = *s + UTF8SKIP(*s);
10005             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
10006                 t += UTF8SKIP(t);
10007             }
10008             if (*d + (t - *s) > e)
10009                 Perl_croak(aTHX_ "%s", ident_too_long);
10010             Copy(*s, *d, t - *s, char);
10011             *d += t - *s;
10012             *s = t;
10013         }
10014         else if ( isWORDCHAR_A(**s) ) {
10015             do {
10016                 *(*d)++ = *(*s)++;
10017             } while (isWORDCHAR_A(**s) && *d < e);
10018         }
10019         else if (   allow_package
10020                  && **s == '\''
10021                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
10022         {
10023             *(*d)++ = ':';
10024             *(*d)++ = ':';
10025             (*s)++;
10026             saw_tick++;
10027         }
10028         else if (allow_package && **s == ':' && (*s)[1] == ':'
10029            /* Disallow things like Foo::$bar. For the curious, this is
10030             * the code path that triggers the "Bad name after" warning
10031             * when looking for barewords.
10032             */
10033            && !(check_dollar && (*s)[2] == '$')) {
10034             *(*d)++ = *(*s)++;
10035             *(*d)++ = *(*s)++;
10036         }
10037         else
10038             break;
10039     }
10040     if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
10041               && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
10042         char *this_d;
10043         char *d2;
10044         Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
10045         d2 = this_d;
10046         SAVEFREEPV(this_d);
10047         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10048                          "Old package separator used in string");
10049         if (olds[-1] == '#')
10050             *d2++ = olds[-2];
10051         *d2++ = olds[-1];
10052         while (olds < *s) {
10053             if (*olds == '\'') {
10054                 *d2++ = '\\';
10055                 *d2++ = *olds++;
10056             }
10057             else
10058                 *d2++ = *olds++;
10059         }
10060         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10061                          "\t(Did you mean \"%" UTF8f "\" instead?)\n",
10062                           UTF8fARG(is_utf8, d2-this_d, this_d));
10063     }
10064     return;
10065 }
10066 
10067 /* Returns a NUL terminated string, with the length of the string written to
10068    *slp
10069    */
10070 char *
10071 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10072 {
10073     char *d = dest;
10074     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
10075     bool is_utf8 = cBOOL(UTF);
10076 
10077     PERL_ARGS_ASSERT_SCAN_WORD;
10078 
10079     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
10080     *d = '\0';
10081     *slp = d - dest;
10082     return s;
10083 }
10084 
10085 /* Is the byte 'd' a legal single character identifier name?  'u' is true
10086  * iff Unicode semantics are to be used.  The legal ones are any of:
10087  *  a) all ASCII characters except:
10088  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
10089  *          2) '{'
10090  *     The final case currently doesn't get this far in the program, so we
10091  *     don't test for it.  If that were to change, it would be ok to allow it.
10092  *  b) When not under Unicode rules, any upper Latin1 character
10093  *  c) Otherwise, when unicode rules are used, all XIDS characters.
10094  *
10095  *      Because all ASCII characters have the same representation whether
10096  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10097  *      '{' without knowing if is UTF-8 or not. */
10098 #define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
10099     (isGRAPH_A(*(s)) || ((is_utf8)                                          \
10100                          ? isIDFIRST_utf8_safe(s, e)                        \
10101                          : (isGRAPH_L1(*s)                                  \
10102                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
10103 
10104 STATIC char *
10105 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10106 {
10107     I32 herelines = PL_parser->herelines;
10108     SSize_t bracket = -1;
10109     char funny = *s++;
10110     char *d = dest;
10111     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
10112     bool is_utf8 = cBOOL(UTF);
10113     I32 orig_copline = 0, tmp_copline = 0;
10114 
10115     PERL_ARGS_ASSERT_SCAN_IDENT;
10116 
10117     if (isSPACE(*s) || !*s)
10118         s = skipspace(s);
10119     if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
10120         bool is_zero= *s == '0' ? TRUE : FALSE;
10121         char *digit_start= d;
10122         *d++ = *s++;
10123         while (s < PL_bufend && isDIGIT(*s)) {
10124             if (d >= e)
10125                 Perl_croak(aTHX_ "%s", ident_too_long);
10126             *d++ = *s++;
10127         }
10128         if (is_zero && d - digit_start > 1)
10129             Perl_croak(aTHX_ ident_var_zero_multi_digit);
10130     }
10131     else {  /* See if it is a "normal" identifier */
10132         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
10133     }
10134     *d = '\0';
10135     d = dest;
10136     if (*d) {
10137         /* Either a digit variable, or parse_ident() found an identifier
10138            (anything valid as a bareword), so job done and return.  */
10139         if (PL_lex_state != LEX_NORMAL)
10140             PL_lex_state = LEX_INTERPENDMAYBE;
10141         return s;
10142     }
10143 
10144     /* Here, it is not a run-of-the-mill identifier name */
10145 
10146     if (*s == '$' && s[1]
10147         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
10148             || isDIGIT_A((U8)s[1])
10149             || s[1] == '$'
10150             || s[1] == '{'
10151             || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
10152     {
10153         /* Dereferencing a value in a scalar variable.
10154            The alternatives are different syntaxes for a scalar variable.
10155            Using ' as a leading package separator isn't allowed. :: is.   */
10156         return s;
10157     }
10158     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
10159     if (*s == '{') {
10160         bracket = s - SvPVX(PL_linestr);
10161         s++;
10162         orig_copline = CopLINE(PL_curcop);
10163         if (s < PL_bufend && isSPACE(*s)) {
10164             s = skipspace(s);
10165         }
10166     }
10167     if ((s <= PL_bufend - ((is_utf8)
10168                           ? UTF8SKIP(s)
10169                           : 1))
10170         && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
10171     {
10172         if (is_utf8) {
10173             const STRLEN skip = UTF8SKIP(s);
10174             STRLEN i;
10175             d[skip] = '\0';
10176             for ( i = 0; i < skip; i++ )
10177                 d[i] = *s++;
10178         }
10179         else {
10180             *d = *s++;
10181             /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10182             if (isDIGIT(*d)) {
10183                 bool is_zero= *d == '0' ? TRUE : FALSE;
10184                 char *digit_start= d;
10185                 while (s < PL_bufend && isDIGIT(*s)) {
10186                     d++;
10187                     if (d >= e)
10188                         Perl_croak(aTHX_ "%s", ident_too_long);
10189                     *d= *s++;
10190                 }
10191                 if (is_zero && d - digit_start > 1)
10192                     Perl_croak(aTHX_ ident_var_zero_multi_digit);
10193             }
10194             d[1] = '\0';
10195         }
10196     }
10197     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10198     if (*d == '^' && *s && isCONTROLVAR(*s)) {
10199         *d = toCTRL(*s);
10200         s++;
10201     }
10202     /* Warn about ambiguous code after unary operators if {...} notation isn't
10203        used.  There's no difference in ambiguity; it's merely a heuristic
10204        about when not to warn.  */
10205     else if (ck_uni && bracket == -1)
10206         check_uni();
10207     if (bracket != -1) {
10208         bool skip;
10209         char *s2;
10210         /* If we were processing {...} notation then...  */
10211         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10212             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10213                  && isWORDCHAR(*s))
10214         ) {
10215             /* note we have to check for a normal identifier first,
10216              * as it handles utf8 symbols, and only after that has
10217              * been ruled out can we look at the caret words */
10218             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10219                 /* if it starts as a valid identifier, assume that it is one.
10220                    (the later check for } being at the expected point will trap
10221                    cases where this doesn't pan out.)  */
10222                 d += is_utf8 ? UTF8SKIP(d) : 1;
10223                 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10224                 *d = '\0';
10225             }
10226             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10227                 d++;
10228                 while (isWORDCHAR(*s) && d < e) {
10229                     *d++ = *s++;
10230                 }
10231                 if (d >= e)
10232                     Perl_croak(aTHX_ "%s", ident_too_long);
10233                 *d = '\0';
10234             }
10235             tmp_copline = CopLINE(PL_curcop);
10236             if (s < PL_bufend && isSPACE(*s)) {
10237                 s = skipspace(s);
10238             }
10239             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10240                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
10241                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10242                     const char * const brack =
10243                         (const char *)
10244                         ((*s == '[') ? "[...]" : "{...}");
10245                     orig_copline = CopLINE(PL_curcop);
10246                     CopLINE_set(PL_curcop, tmp_copline);
10247    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10248                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10249                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10250                         funny, dest, brack, funny, dest, brack);
10251                     CopLINE_set(PL_curcop, orig_copline);
10252                 }
10253                 bracket++;
10254                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10255                 PL_lex_allbrackets++;
10256                 return s;
10257             }
10258         }
10259 
10260         if ( !tmp_copline )
10261             tmp_copline = CopLINE(PL_curcop);
10262         if ((skip = s < PL_bufend && isSPACE(*s))) {
10263             /* Avoid incrementing line numbers or resetting PL_linestart,
10264                in case we have to back up.  */
10265             STRLEN s_off = s - SvPVX(PL_linestr);
10266             s2 = peekspace(s);
10267             s = SvPVX(PL_linestr) + s_off;
10268         }
10269         else
10270             s2 = s;
10271 
10272         /* Expect to find a closing } after consuming any trailing whitespace.
10273          */
10274         if (*s2 == '}') {
10275             /* Now increment line numbers if applicable.  */
10276             if (skip)
10277                 s = skipspace(s);
10278             s++;
10279             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10280                 PL_lex_state = LEX_INTERPEND;
10281                 PL_expect = XREF;
10282             }
10283             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10284                 if (ckWARN(WARN_AMBIGUOUS)
10285                     && (keyword(dest, d - dest, 0)
10286                         || get_cvn_flags(dest, d - dest, is_utf8
10287                            ? SVf_UTF8
10288                            : 0)))
10289                 {
10290                     SV *tmp = newSVpvn_flags( dest, d - dest,
10291                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10292                     if (funny == '#')
10293                         funny = '@';
10294                     orig_copline = CopLINE(PL_curcop);
10295                     CopLINE_set(PL_curcop, tmp_copline);
10296                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10297                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10298                         funny, SVfARG(tmp), funny, SVfARG(tmp));
10299                     CopLINE_set(PL_curcop, orig_copline);
10300                 }
10301             }
10302         }
10303         else {
10304             /* Didn't find the closing } at the point we expected, so restore
10305                state such that the next thing to process is the opening { and */
10306             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10307             CopLINE_set(PL_curcop, orig_copline);
10308             PL_parser->herelines = herelines;
10309             *dest = '\0';
10310             PL_parser->sub_no_recover = TRUE;
10311         }
10312     }
10313     else if (   PL_lex_state == LEX_INTERPNORMAL
10314              && !PL_lex_brackets
10315              && !intuit_more(s, PL_bufend))
10316         PL_lex_state = LEX_INTERPEND;
10317     return s;
10318 }
10319 
10320 static bool
10321 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10322 
10323     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10324      * found in the parse starting at 's', based on the subset that are valid
10325      * in this context input to this routine in 'valid_flags'. Advances s.
10326      * Returns TRUE if the input should be treated as a valid flag, so the next
10327      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10328      * upon first call on the current regex.  This routine will set it to any
10329      * charset modifier found.  The caller shouldn't change it.  This way,
10330      * another charset modifier encountered in the parse can be detected as an
10331      * error, as we have decided to allow only one */
10332 
10333     const char c = **s;
10334     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10335 
10336     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10337         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10338             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10339                        UTF ? SVf_UTF8 : 0);
10340             (*s) += charlen;
10341             /* Pretend that it worked, so will continue processing before
10342              * dieing */
10343             return TRUE;
10344         }
10345         return FALSE;
10346     }
10347 
10348     switch (c) {
10349 
10350         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10351         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
10352         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
10353         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
10354         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
10355         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10356         case LOCALE_PAT_MOD:
10357             if (*charset) {
10358                 goto multiple_charsets;
10359             }
10360             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10361             *charset = c;
10362             break;
10363         case UNICODE_PAT_MOD:
10364             if (*charset) {
10365                 goto multiple_charsets;
10366             }
10367             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10368             *charset = c;
10369             break;
10370         case ASCII_RESTRICT_PAT_MOD:
10371             if (! *charset) {
10372                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10373             }
10374             else {
10375 
10376                 /* Error if previous modifier wasn't an 'a', but if it was, see
10377                  * if, and accept, a second occurrence (only) */
10378                 if (*charset != 'a'
10379                     || get_regex_charset(*pmfl)
10380                         != REGEX_ASCII_RESTRICTED_CHARSET)
10381                 {
10382                         goto multiple_charsets;
10383                 }
10384                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10385             }
10386             *charset = c;
10387             break;
10388         case DEPENDS_PAT_MOD:
10389             if (*charset) {
10390                 goto multiple_charsets;
10391             }
10392             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10393             *charset = c;
10394             break;
10395     }
10396 
10397     (*s)++;
10398     return TRUE;
10399 
10400     multiple_charsets:
10401         if (*charset != c) {
10402             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10403         }
10404         else if (c == 'a') {
10405   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10406             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10407         }
10408         else {
10409             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10410         }
10411 
10412         /* Pretend that it worked, so will continue processing before dieing */
10413         (*s)++;
10414         return TRUE;
10415 }
10416 
10417 STATIC char *
10418 S_scan_pat(pTHX_ char *start, I32 type)
10419 {
10420     PMOP *pm;
10421     char *s;
10422     const char * const valid_flags =
10423         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10424     char charset = '\0';    /* character set modifier */
10425     unsigned int x_mod_count = 0;
10426 
10427     PERL_ARGS_ASSERT_SCAN_PAT;
10428 
10429     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10430     if (!s)
10431         Perl_croak(aTHX_ "Search pattern not terminated");
10432 
10433     pm = (PMOP*)newPMOP(type, 0);
10434     if (PL_multi_open == '?') {
10435         /* This is the only point in the code that sets PMf_ONCE:  */
10436         pm->op_pmflags |= PMf_ONCE;
10437 
10438         /* Hence it's safe to do this bit of PMOP book-keeping here, which
10439            allows us to restrict the list needed by reset to just the ??
10440            matches.  */
10441         assert(type != OP_TRANS);
10442         if (PL_curstash) {
10443             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10444             U32 elements;
10445             if (!mg) {
10446                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10447                                  0);
10448             }
10449             elements = mg->mg_len / sizeof(PMOP**);
10450             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10451             ((PMOP**)mg->mg_ptr) [elements++] = pm;
10452             mg->mg_len = elements * sizeof(PMOP**);
10453             PmopSTASH_set(pm,PL_curstash);
10454         }
10455     }
10456 
10457     /* if qr/...(?{..}).../, then need to parse the pattern within a new
10458      * anon CV. False positives like qr/[(?{]/ are harmless */
10459 
10460     if (type == OP_QR) {
10461         STRLEN len;
10462         char *e, *p = SvPV(PL_lex_stuff, len);
10463         e = p + len;
10464         for (; p < e; p++) {
10465             if (p[0] == '(' && p[1] == '?'
10466                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10467             {
10468                 pm->op_pmflags |= PMf_HAS_CV;
10469                 break;
10470             }
10471         }
10472         pm->op_pmflags |= PMf_IS_QR;
10473     }
10474 
10475     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10476                                 &s, &charset, &x_mod_count))
10477     {};
10478     /* issue a warning if /c is specified,but /g is not */
10479     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10480     {
10481         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10482                        "Use of /c modifier is meaningless without /g" );
10483     }
10484 
10485     PL_lex_op = (OP*)pm;
10486     pl_yylval.ival = OP_MATCH;
10487     return s;
10488 }
10489 
10490 STATIC char *
10491 S_scan_subst(pTHX_ char *start)
10492 {
10493     char *s;
10494     PMOP *pm;
10495     I32 first_start;
10496     line_t first_line;
10497     line_t linediff = 0;
10498     I32 es = 0;
10499     char charset = '\0';    /* character set modifier */
10500     unsigned int x_mod_count = 0;
10501     char *t;
10502 
10503     PERL_ARGS_ASSERT_SCAN_SUBST;
10504 
10505     pl_yylval.ival = OP_NULL;
10506 
10507     s = scan_str(start, TRUE, FALSE, FALSE, &t);
10508 
10509     if (!s)
10510         Perl_croak(aTHX_ "Substitution pattern not terminated");
10511 
10512     s = t;
10513 
10514     first_start = PL_multi_start;
10515     first_line = CopLINE(PL_curcop);
10516     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10517     if (!s) {
10518         SvREFCNT_dec_NN(PL_lex_stuff);
10519         PL_lex_stuff = NULL;
10520         Perl_croak(aTHX_ "Substitution replacement not terminated");
10521     }
10522     PL_multi_start = first_start;	/* so whole substitution is taken together */
10523 
10524     pm = (PMOP*)newPMOP(OP_SUBST, 0);
10525 
10526 
10527     while (*s) {
10528         if (*s == EXEC_PAT_MOD) {
10529             s++;
10530             es++;
10531         }
10532         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10533                                   &s, &charset, &x_mod_count))
10534         {
10535             break;
10536         }
10537     }
10538 
10539     if ((pm->op_pmflags & PMf_CONTINUE)) {
10540         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10541     }
10542 
10543     if (es) {
10544         SV * const repl = newSVpvs("");
10545 
10546         PL_multi_end = 0;
10547         pm->op_pmflags |= PMf_EVAL;
10548         for (; es > 1; es--) {
10549             sv_catpvs(repl, "eval ");
10550         }
10551         sv_catpvs(repl, "do {");
10552         sv_catsv(repl, PL_parser->lex_sub_repl);
10553         sv_catpvs(repl, "}");
10554         SvREFCNT_dec(PL_parser->lex_sub_repl);
10555         PL_parser->lex_sub_repl = repl;
10556     }
10557 
10558 
10559     linediff = CopLINE(PL_curcop) - first_line;
10560     if (linediff)
10561         CopLINE_set(PL_curcop, first_line);
10562 
10563     if (linediff || es) {
10564         /* the IVX field indicates that the replacement string is a s///e;
10565          * the NVX field indicates how many src code lines the replacement
10566          * spreads over */
10567         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10568         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10569         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10570                                                                     cBOOL(es);
10571     }
10572 
10573     PL_lex_op = (OP*)pm;
10574     pl_yylval.ival = OP_SUBST;
10575     return s;
10576 }
10577 
10578 STATIC char *
10579 S_scan_trans(pTHX_ char *start)
10580 {
10581     char* s;
10582     OP *o;
10583     U8 squash;
10584     U8 del;
10585     U8 complement;
10586     bool nondestruct = 0;
10587     char *t;
10588 
10589     PERL_ARGS_ASSERT_SCAN_TRANS;
10590 
10591     pl_yylval.ival = OP_NULL;
10592 
10593     s = scan_str(start,FALSE,FALSE,FALSE,&t);
10594     if (!s)
10595         Perl_croak(aTHX_ "Transliteration pattern not terminated");
10596 
10597     s = t;
10598 
10599     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10600     if (!s) {
10601         SvREFCNT_dec_NN(PL_lex_stuff);
10602         PL_lex_stuff = NULL;
10603         Perl_croak(aTHX_ "Transliteration replacement not terminated");
10604     }
10605 
10606     complement = del = squash = 0;
10607     while (1) {
10608         switch (*s) {
10609         case 'c':
10610             complement = OPpTRANS_COMPLEMENT;
10611             break;
10612         case 'd':
10613             del = OPpTRANS_DELETE;
10614             break;
10615         case 's':
10616             squash = OPpTRANS_SQUASH;
10617             break;
10618         case 'r':
10619             nondestruct = 1;
10620             break;
10621         default:
10622             goto no_more;
10623         }
10624         s++;
10625     }
10626   no_more:
10627 
10628     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10629     o->op_private &= ~OPpTRANS_ALL;
10630     o->op_private |= del|squash|complement;
10631 
10632     PL_lex_op = o;
10633     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10634 
10635 
10636     return s;
10637 }
10638 
10639 /* scan_heredoc
10640    Takes a pointer to the first < in <<FOO.
10641    Returns a pointer to the byte following <<FOO.
10642 
10643    This function scans a heredoc, which involves different methods
10644    depending on whether we are in a string eval, quoted construct, etc.
10645    This is because PL_linestr could containing a single line of input, or
10646    a whole string being evalled, or the contents of the current quote-
10647    like operator.
10648 
10649    The two basic methods are:
10650     - Steal lines from the input stream
10651     - Scan the heredoc in PL_linestr and remove it therefrom
10652 
10653    In a file scope or filtered eval, the first method is used; in a
10654    string eval, the second.
10655 
10656    In a quote-like operator, we have to choose between the two,
10657    depending on where we can find a newline.  We peek into outer lex-
10658    ing scopes until we find one with a newline in it.  If we reach the
10659    outermost lexing scope and it is a file, we use the stream method.
10660    Otherwise it is treated as an eval.
10661 */
10662 
10663 STATIC char *
10664 S_scan_heredoc(pTHX_ char *s)
10665 {
10666     I32 op_type = OP_SCALAR;
10667     I32 len;
10668     SV *tmpstr;
10669     char term;
10670     char *d;
10671     char *e;
10672     char *peek;
10673     char *indent = 0;
10674     I32 indent_len = 0;
10675     bool indented = FALSE;
10676     const bool infile = PL_rsfp || PL_parser->filtered;
10677     const line_t origline = CopLINE(PL_curcop);
10678     LEXSHARED *shared = PL_parser->lex_shared;
10679 
10680     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10681 
10682     s += 2;
10683     d = PL_tokenbuf + 1;
10684     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10685     *PL_tokenbuf = '\n';
10686     peek = s;
10687 
10688     if (*peek == '~') {
10689         indented = TRUE;
10690         peek++; s++;
10691     }
10692 
10693     while (SPACE_OR_TAB(*peek))
10694         peek++;
10695 
10696     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10697         s = peek;
10698         term = *s++;
10699         s = delimcpy(d, e, s, PL_bufend, term, &len);
10700         if (s == PL_bufend)
10701             Perl_croak(aTHX_ "Unterminated delimiter for here document");
10702         d += len;
10703         s++;
10704     }
10705     else {
10706         if (*s == '\\')
10707             /* <<\FOO is equivalent to <<'FOO' */
10708             s++, term = '\'';
10709         else
10710             term = '"';
10711 
10712         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10713             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10714 
10715         peek = s;
10716 
10717         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10718             peek += UTF ? UTF8SKIP(peek) : 1;
10719         }
10720 
10721         len = (peek - s >= e - d) ? (e - d) : (peek - s);
10722         Copy(s, d, len, char);
10723         s += len;
10724         d += len;
10725     }
10726 
10727     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10728         Perl_croak(aTHX_ "Delimiter for here document is too long");
10729 
10730     *d++ = '\n';
10731     *d = '\0';
10732     len = d - PL_tokenbuf;
10733 
10734 #ifndef PERL_STRICT_CR
10735     d = (char *) memchr(s, '\r', PL_bufend - s);
10736     if (d) {
10737         char * const olds = s;
10738         s = d;
10739         while (s < PL_bufend) {
10740             if (*s == '\r') {
10741                 *d++ = '\n';
10742                 if (*++s == '\n')
10743                     s++;
10744             }
10745             else if (*s == '\n' && s[1] == '\r') {	/* \015\013 on a mac? */
10746                 *d++ = *s++;
10747                 s++;
10748             }
10749             else
10750                 *d++ = *s++;
10751         }
10752         *d = '\0';
10753         PL_bufend = d;
10754         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10755         s = olds;
10756     }
10757 #endif
10758 
10759     tmpstr = newSV_type(SVt_PVIV);
10760     SvGROW(tmpstr, 80);
10761     if (term == '\'') {
10762         op_type = OP_CONST;
10763         SvIV_set(tmpstr, -1);
10764     }
10765     else if (term == '`') {
10766         op_type = OP_BACKTICK;
10767         SvIV_set(tmpstr, '\\');
10768     }
10769 
10770     PL_multi_start = origline + 1 + PL_parser->herelines;
10771     PL_multi_open = PL_multi_close = '<';
10772 
10773     /* inside a string eval or quote-like operator */
10774     if (!infile || PL_lex_inwhat) {
10775         SV *linestr;
10776         char *bufend;
10777         char * const olds = s;
10778         PERL_CONTEXT * const cx = CX_CUR();
10779         /* These two fields are not set until an inner lexing scope is
10780            entered.  But we need them set here. */
10781         shared->ls_bufptr  = s;
10782         shared->ls_linestr = PL_linestr;
10783 
10784         if (PL_lex_inwhat) {
10785             /* Look for a newline.  If the current buffer does not have one,
10786              peek into the line buffer of the parent lexing scope, going
10787              up as many levels as necessary to find one with a newline
10788              after bufptr.
10789             */
10790             while (!(s = (char *)memchr(
10791                                 (void *)shared->ls_bufptr, '\n',
10792                                 SvEND(shared->ls_linestr)-shared->ls_bufptr
10793                 )))
10794             {
10795                 shared = shared->ls_prev;
10796                 /* shared is only null if we have gone beyond the outermost
10797                    lexing scope.  In a file, we will have broken out of the
10798                    loop in the previous iteration.  In an eval, the string buf-
10799                    fer ends with "\n;", so the while condition above will have
10800                    evaluated to false.  So shared can never be null.  Or so you
10801                    might think.  Odd syntax errors like s;@{<<; can gobble up
10802                    the implicit semicolon at the end of a flie, causing the
10803                    file handle to be closed even when we are not in a string
10804                    eval.  So shared may be null in that case.
10805                    (Closing '>>}' here to balance the earlier open brace for
10806                    editors that look for matched pairs.) */
10807                 if (UNLIKELY(!shared))
10808                     goto interminable;
10809                 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10810                    most lexing scope.  In a file, shared->ls_linestr at that
10811                    level is just one line, so there is no body to steal. */
10812                 if (infile && !shared->ls_prev) {
10813                     s = olds;
10814                     goto streaming;
10815                 }
10816             }
10817         }
10818         else {	/* eval or we've already hit EOF */
10819             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10820             if (!s)
10821                 goto interminable;
10822         }
10823 
10824         linestr = shared->ls_linestr;
10825         bufend = SvEND(linestr);
10826         d = s;
10827         if (indented) {
10828             char *myolds = s;
10829 
10830             while (s < bufend - len + 1) {
10831                 if (*s++ == '\n')
10832                     ++PL_parser->herelines;
10833 
10834                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10835                     char *backup = s;
10836                     indent_len = 0;
10837 
10838                     /* Only valid if it's preceded by whitespace only */
10839                     while (backup != myolds && --backup >= myolds) {
10840                         if (! SPACE_OR_TAB(*backup)) {
10841                             break;
10842                         }
10843                         indent_len++;
10844                     }
10845 
10846                     /* No whitespace or all! */
10847                     if (backup == s || *backup == '\n') {
10848                         Newx(indent, indent_len + 1, char);
10849                         memcpy(indent, backup + 1, indent_len);
10850                         indent[indent_len] = 0;
10851                         s--; /* before our delimiter */
10852                         PL_parser->herelines--; /* this line doesn't count */
10853                         break;
10854                     }
10855                 }
10856             }
10857         }
10858         else {
10859             while (s < bufend - len + 1
10860                    && memNE(s,PL_tokenbuf,len) )
10861             {
10862                 if (*s++ == '\n')
10863                     ++PL_parser->herelines;
10864             }
10865         }
10866 
10867         if (s >= bufend - len + 1) {
10868             goto interminable;
10869         }
10870 
10871         sv_setpvn(tmpstr,d+1,s-d);
10872         s += len - 1;
10873         /* the preceding stmt passes a newline */
10874         PL_parser->herelines++;
10875 
10876         /* s now points to the newline after the heredoc terminator.
10877            d points to the newline before the body of the heredoc.
10878          */
10879 
10880         /* We are going to modify linestr in place here, so set
10881            aside copies of the string if necessary for re-evals or
10882            (caller $n)[6]. */
10883         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10884            check shared->re_eval_str. */
10885         if (shared->re_eval_start || shared->re_eval_str) {
10886             /* Set aside the rest of the regexp */
10887             if (!shared->re_eval_str)
10888                 shared->re_eval_str =
10889                        newSVpvn(shared->re_eval_start,
10890                                 bufend - shared->re_eval_start);
10891             shared->re_eval_start -= s-d;
10892         }
10893 
10894         if (cxstack_ix >= 0
10895             && CxTYPE(cx) == CXt_EVAL
10896             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10897             && cx->blk_eval.cur_text == linestr)
10898         {
10899             cx->blk_eval.cur_text = newSVsv(linestr);
10900             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10901         }
10902 
10903         /* Copy everything from s onwards back to d. */
10904         Move(s,d,bufend-s + 1,char);
10905         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10906         /* Setting PL_bufend only applies when we have not dug deeper
10907            into other scopes, because sublex_done sets PL_bufend to
10908            SvEND(PL_linestr). */
10909         if (shared == PL_parser->lex_shared)
10910             PL_bufend = SvEND(linestr);
10911         s = olds;
10912     }
10913     else {
10914         SV *linestr_save;
10915         char *oldbufptr_save;
10916         char *oldoldbufptr_save;
10917       streaming:
10918         SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
10919         term = PL_tokenbuf[1];
10920         len--;
10921         linestr_save = PL_linestr; /* must restore this afterwards */
10922         d = s;			 /* and this */
10923         oldbufptr_save = PL_oldbufptr;
10924         oldoldbufptr_save = PL_oldoldbufptr;
10925         PL_linestr = newSVpvs("");
10926         PL_bufend = SvPVX(PL_linestr);
10927 
10928         while (1) {
10929             PL_bufptr = PL_bufend;
10930             CopLINE_set(PL_curcop,
10931                         origline + 1 + PL_parser->herelines);
10932 
10933             if (   !lex_next_chunk(LEX_NO_TERM)
10934                 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10935             {
10936                 /* Simply freeing linestr_save might seem simpler here, as it
10937                    does not matter what PL_linestr points to, since we are
10938                    about to croak; but in a quote-like op, linestr_save
10939                    will have been prospectively freed already, via
10940                    SAVEFREESV(PL_linestr) in sublex_push, so it's easier to
10941                    restore PL_linestr. */
10942                 SvREFCNT_dec_NN(PL_linestr);
10943                 PL_linestr = linestr_save;
10944                 PL_oldbufptr = oldbufptr_save;
10945                 PL_oldoldbufptr = oldoldbufptr_save;
10946                 goto interminable;
10947             }
10948 
10949             CopLINE_set(PL_curcop, origline);
10950 
10951             if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10952                 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10953                 /* ^That should be enough to avoid this needing to grow:  */
10954                 sv_catpvs(PL_linestr, "\n\0");
10955                 assert(s == SvPVX(PL_linestr));
10956                 PL_bufend = SvEND(PL_linestr);
10957             }
10958 
10959             s = PL_bufptr;
10960             PL_parser->herelines++;
10961             PL_last_lop = PL_last_uni = NULL;
10962 
10963 #ifndef PERL_STRICT_CR
10964             if (PL_bufend - PL_linestart >= 2) {
10965                 if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10966                     || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10967                 {
10968                     PL_bufend[-2] = '\n';
10969                     PL_bufend--;
10970                     SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10971                 }
10972                 else if (PL_bufend[-1] == '\r')
10973                     PL_bufend[-1] = '\n';
10974             }
10975             else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10976                 PL_bufend[-1] = '\n';
10977 #endif
10978 
10979             if (indented && (PL_bufend-s) >= len) {
10980                 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10981 
10982                 if (found) {
10983                     char *backup = found;
10984                     indent_len = 0;
10985 
10986                     /* Only valid if it's preceded by whitespace only */
10987                     while (backup != s && --backup >= s) {
10988                         if (! SPACE_OR_TAB(*backup)) {
10989                             break;
10990                         }
10991                         indent_len++;
10992                     }
10993 
10994                     /* All whitespace or none! */
10995                     if (backup == found || SPACE_OR_TAB(*backup)) {
10996                         Newx(indent, indent_len + 1, char);
10997                         memcpy(indent, backup, indent_len);
10998                         indent[indent_len] = 0;
10999                         SvREFCNT_dec(PL_linestr);
11000                         PL_linestr = linestr_save;
11001                         PL_linestart = SvPVX(linestr_save);
11002                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11003                         PL_oldbufptr = oldbufptr_save;
11004                         PL_oldoldbufptr = oldoldbufptr_save;
11005                         s = d;
11006                         break;
11007                     }
11008                 }
11009 
11010                 /* Didn't find it */
11011                 sv_catsv(tmpstr,PL_linestr);
11012             }
11013             else {
11014                 if (*s == term && PL_bufend-s >= len
11015                     && memEQ(s,PL_tokenbuf + 1,len))
11016                 {
11017                     SvREFCNT_dec(PL_linestr);
11018                     PL_linestr = linestr_save;
11019                     PL_linestart = SvPVX(linestr_save);
11020                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11021                     PL_oldbufptr = oldbufptr_save;
11022                     PL_oldoldbufptr = oldoldbufptr_save;
11023                     s = d;
11024                     break;
11025                 }
11026                 else {
11027                     sv_catsv(tmpstr,PL_linestr);
11028                 }
11029             }
11030         } /* while (1) */
11031     }
11032 
11033     PL_multi_end = origline + PL_parser->herelines;
11034 
11035     if (indented && indent) {
11036         STRLEN linecount = 1;
11037         STRLEN herelen = SvCUR(tmpstr);
11038         char *ss = SvPVX(tmpstr);
11039         char *se = ss + herelen;
11040         SV *newstr = newSV(herelen+1);
11041         SvPOK_on(newstr);
11042 
11043         /* Trim leading whitespace */
11044         while (ss < se) {
11045             /* newline only? Copy and move on */
11046             if (*ss == '\n') {
11047                 sv_catpvs(newstr,"\n");
11048                 ss++;
11049                 linecount++;
11050 
11051             /* Found our indentation? Strip it */
11052             }
11053             else if (se - ss >= indent_len
11054                        && memEQ(ss, indent, indent_len))
11055             {
11056                 STRLEN le = 0;
11057                 ss += indent_len;
11058 
11059                 while ((ss + le) < se && *(ss + le) != '\n')
11060                     le++;
11061 
11062                 sv_catpvn(newstr, ss, le);
11063                 ss += le;
11064 
11065             /* Line doesn't begin with our indentation? Croak */
11066             }
11067             else {
11068                 Safefree(indent);
11069                 Perl_croak(aTHX_
11070                     "Indentation on line %d of here-doc doesn't match delimiter",
11071                     (int)linecount
11072                 );
11073             }
11074         } /* while */
11075 
11076         /* avoid sv_setsv() as we dont wan't to COW here */
11077         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
11078         Safefree(indent);
11079         SvREFCNT_dec_NN(newstr);
11080     }
11081 
11082     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11083         SvPV_shrink_to_cur(tmpstr);
11084     }
11085 
11086     if (!IN_BYTES) {
11087         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11088             SvUTF8_on(tmpstr);
11089     }
11090 
11091     PL_lex_stuff = tmpstr;
11092     pl_yylval.ival = op_type;
11093     return s;
11094 
11095   interminable:
11096     if (indent)
11097         Safefree(indent);
11098     SvREFCNT_dec(tmpstr);
11099     CopLINE_set(PL_curcop, origline);
11100     missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
11101 }
11102 
11103 
11104 /* scan_inputsymbol
11105    takes: position of first '<' in input buffer
11106    returns: position of first char following the matching '>' in
11107             input buffer
11108    side-effects: pl_yylval and lex_op are set.
11109 
11110    This code handles:
11111 
11112    <>		read from ARGV
11113    <<>>		read from ARGV without magic open
11114    <FH> 	read from filehandle
11115    <pkg::FH>	read from package qualified filehandle
11116    <pkg'FH>	read from package qualified filehandle
11117    <$fh>	read from filehandle in $fh
11118    <*.h>	filename glob
11119 
11120 */
11121 
11122 STATIC char *
11123 S_scan_inputsymbol(pTHX_ char *start)
11124 {
11125     char *s = start;		/* current position in buffer */
11126     char *end;
11127     I32 len;
11128     bool nomagicopen = FALSE;
11129     char *d = PL_tokenbuf;					/* start of temp holding space */
11130     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;	/* end of temp holding space */
11131 
11132     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11133 
11134     end = (char *) memchr(s, '\n', PL_bufend - s);
11135     if (!end)
11136         end = PL_bufend;
11137     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
11138         nomagicopen = TRUE;
11139         *d = '\0';
11140         len = 0;
11141         s += 3;
11142     }
11143     else
11144         s = delimcpy(d, e, s + 1, end, '>', &len);	/* extract until > */
11145 
11146     /* die if we didn't have space for the contents of the <>,
11147        or if it didn't end, or if we see a newline
11148     */
11149 
11150     if (len >= (I32)sizeof PL_tokenbuf)
11151         Perl_croak(aTHX_ "Excessively long <> operator");
11152     if (s >= end)
11153         Perl_croak(aTHX_ "Unterminated <> operator");
11154 
11155     s++;
11156 
11157     /* check for <$fh>
11158        Remember, only scalar variables are interpreted as filehandles by
11159        this code.  Anything more complex (e.g., <$fh{$num}>) will be
11160        treated as a glob() call.
11161        This code makes use of the fact that except for the $ at the front,
11162        a scalar variable and a filehandle look the same.
11163     */
11164     if (*d == '$' && d[1]) d++;
11165 
11166     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11167     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11168         d += UTF ? UTF8SKIP(d) : 1;
11169     }
11170 
11171     /* If we've tried to read what we allow filehandles to look like, and
11172        there's still text left, then it must be a glob() and not a getline.
11173        Use scan_str to pull out the stuff between the <> and treat it
11174        as nothing more than a string.
11175     */
11176 
11177     if (d - PL_tokenbuf != len) {
11178         pl_yylval.ival = OP_GLOB;
11179         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
11180         if (!s)
11181            Perl_croak(aTHX_ "Glob not terminated");
11182         return s;
11183     }
11184     else {
11185         bool readline_overriden = FALSE;
11186         GV *gv_readline;
11187         /* we're in a filehandle read situation */
11188         d = PL_tokenbuf;
11189 
11190         /* turn <> into <ARGV> */
11191         if (!len)
11192             Copy("ARGV",d,5,char);
11193 
11194         /* Check whether readline() is overriden */
11195         if ((gv_readline = gv_override("readline",8)))
11196             readline_overriden = TRUE;
11197 
11198         /* if <$fh>, create the ops to turn the variable into a
11199            filehandle
11200         */
11201         if (*d == '$') {
11202             /* try to find it in the pad for this block, otherwise find
11203                add symbol table ops
11204             */
11205             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11206             if (tmp != NOT_IN_PAD) {
11207                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11208                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11209                     HEK * const stashname = HvNAME_HEK(stash);
11210                     SV * const sym = sv_2mortal(newSVhek(stashname));
11211                     sv_catpvs(sym, "::");
11212                     sv_catpv(sym, d+1);
11213                     d = SvPVX(sym);
11214                     goto intro_sym;
11215                 }
11216                 else {
11217                     OP * const o = newOP(OP_PADSV, 0);
11218                     o->op_targ = tmp;
11219                     PL_lex_op = readline_overriden
11220                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11221                                 op_append_elem(OP_LIST, o,
11222                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11223                         : newUNOP(OP_READLINE, 0, o);
11224                 }
11225             }
11226             else {
11227                 GV *gv;
11228                 ++d;
11229               intro_sym:
11230                 gv = gv_fetchpv(d,
11231                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11232                                 SVt_PV);
11233                 PL_lex_op = readline_overriden
11234                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11235                             op_append_elem(OP_LIST,
11236                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11237                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11238                     : newUNOP(OP_READLINE, 0,
11239                             newUNOP(OP_RV2SV, 0,
11240                                 newGVOP(OP_GV, 0, gv)));
11241             }
11242             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11243             pl_yylval.ival = OP_NULL;
11244         }
11245 
11246         /* If it's none of the above, it must be a literal filehandle
11247            (<Foo::BAR> or <FOO>) so build a simple readline OP */
11248         else {
11249             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11250             PL_lex_op = readline_overriden
11251                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11252                         op_append_elem(OP_LIST,
11253                             newGVOP(OP_GV, 0, gv),
11254                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11255                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11256             pl_yylval.ival = OP_NULL;
11257 
11258             /* leave the token generation above to avoid confusing the parser */
11259             if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
11260                 no_bareword_filehandle(d);
11261             }
11262         }
11263     }
11264 
11265     return s;
11266 }
11267 
11268 
11269 /* scan_str
11270    takes:
11271         start			position in buffer
11272         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
11273                                 only if they are of the open/close form
11274         keep_delims		preserve the delimiters around the string
11275         re_reparse		compiling a run-time /(?{})/:
11276                                    collapse // to /,  and skip encoding src
11277         delimp			if non-null, this is set to the position of
11278                                 the closing delimiter, or just after it if
11279                                 the closing and opening delimiters differ
11280                                 (i.e., the opening delimiter of a substitu-
11281                                 tion replacement)
11282    returns: position to continue reading from buffer
11283    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11284         updates the read buffer.
11285 
11286    This subroutine pulls a string out of the input.  It is called for:
11287         q		single quotes		q(literal text)
11288         '		single quotes		'literal text'
11289         qq		double quotes		qq(interpolate $here please)
11290         "		double quotes		"interpolate $here please"
11291         qx		backticks		qx(/bin/ls -l)
11292         `		backticks		`/bin/ls -l`
11293         qw		quote words		@EXPORT_OK = qw( func() $spam )
11294         m//		regexp match		m/this/
11295         s///		regexp substitute	s/this/that/
11296         tr///		string transliterate	tr/this/that/
11297         y///		string transliterate	y/this/that/
11298         ($*@)		sub prototypes		sub foo ($)
11299         (stuff)		sub attr parameters	sub foo : attr(stuff)
11300         <>		readline or globs	<FOO>, <>, <$fh>, or <*.c>
11301 
11302    In most of these cases (all but <>, patterns and transliterate)
11303    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
11304    calls scan_str().  s/// makes yylex() call scan_subst() which calls
11305    scan_str().  tr/// and y/// make yylex() call scan_trans() which
11306    calls scan_str().
11307 
11308    It skips whitespace before the string starts, and treats the first
11309    character as the delimiter.  If the delimiter is one of ([{< then
11310    the corresponding "close" character )]}> is used as the closing
11311    delimiter.  It allows quoting of delimiters, and if the string has
11312    balanced delimiters ([{<>}]) it allows nesting.
11313 
11314    On success, the SV with the resulting string is put into lex_stuff or,
11315    if that is already non-NULL, into lex_repl. The second case occurs only
11316    when parsing the RHS of the special constructs s/// and tr/// (y///).
11317    For convenience, the terminating delimiter character is stuffed into
11318    SvIVX of the SV.
11319 */
11320 
11321 char *
11322 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11323                  char **delimp
11324     )
11325 {
11326     SV *sv;			/* scalar value: string */
11327     char *s = start;		/* current position in the buffer */
11328     char *to;			/* current position in the sv's data */
11329     int brackets = 1;		/* bracket nesting level */
11330     bool d_is_utf8 = FALSE;	/* is there any utf8 content? */
11331     UV open_delim_code;         /* code point */
11332     char open_delim_str[UTF8_MAXBYTES+1];
11333     STRLEN delim_byte_len;      /* each delimiter currently is the same number
11334                                    of bytes */
11335     line_t herelines;
11336 
11337     /* The only non-UTF character that isn't a stand alone grapheme is
11338      * white-space, hence can't be a delimiter. */
11339     const char * non_grapheme_msg = "Use of unassigned code point or"
11340                                     " non-standalone grapheme for a delimiter"
11341                                     " is not allowed";
11342     PERL_ARGS_ASSERT_SCAN_STR;
11343 
11344     /* skip space before the delimiter */
11345     if (isSPACE(*s)) {  /* skipspace can change the buffer 's' is in, so
11346                            'start' also has to change */
11347         s = start = skipspace(s);
11348     }
11349 
11350     /* mark where we are, in case we need to report errors */
11351     CLINE;
11352 
11353     /* after skipping whitespace, the next character is the delimiter */
11354     if (! UTF || UTF8_IS_INVARIANT(*s)) {
11355         open_delim_code   = (U8) *s;
11356         open_delim_str[0] =      *s;
11357         delim_byte_len = 1;
11358     }
11359     else {
11360         open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
11361                                             &delim_byte_len);
11362         if (UNLIKELY(! is_grapheme((U8 *) start,
11363                                    (U8 *) s,
11364                                    (U8 *) PL_bufend,
11365                                    open_delim_code)))
11366         {
11367             yyerror(non_grapheme_msg);
11368         }
11369 
11370         Copy(s, open_delim_str, delim_byte_len, char);
11371     }
11372     open_delim_str[delim_byte_len] = '\0';  /* Only for safety */
11373 
11374 
11375     /* mark where we are */
11376     PL_multi_start = CopLINE(PL_curcop);
11377     PL_multi_open = open_delim_code;
11378     herelines = PL_parser->herelines;
11379 
11380     const char * legal_paired_opening_delims;
11381     const char * legal_paired_closing_delims;
11382     const char * deprecated_opening_delims;
11383     if (FEATURE_MORE_DELIMS_IS_ENABLED) {
11384         if (UTF) {
11385             legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
11386             legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
11387 
11388             /* We are deprecating using a closing delimiter as the opening, in
11389              * case we want in the future to accept them reversed.  The string
11390              * may include ones that are legal, but the code below won't look
11391              * at this string unless it didn't find a legal opening one */
11392             deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
11393         }
11394         else {
11395             legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
11396             legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
11397             deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11398         }
11399     }
11400     else {
11401         legal_paired_opening_delims = "([{<";
11402         legal_paired_closing_delims = ")]}>";
11403         deprecated_opening_delims = (UTF)
11404                                     ? DEPRECATED_OPENING_UTF8_BRACKETS
11405                                     : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11406     }
11407 
11408     const char * legal_paired_opening_delims_end = legal_paired_opening_delims
11409                                           + strlen(legal_paired_opening_delims);
11410     const char * deprecated_delims_end = deprecated_opening_delims
11411                                 + strlen(deprecated_opening_delims);
11412 
11413     const char * close_delim_str = open_delim_str;
11414     UV close_delim_code = open_delim_code;
11415 
11416     /* If the delimiter has a mirror-image closing one, get it */
11417     const char *tmps = ninstr(legal_paired_opening_delims,
11418                               legal_paired_opening_delims_end,
11419                               open_delim_str, open_delim_str + delim_byte_len);
11420     if (tmps) {
11421         /* Here, there is a paired delimiter, and tmps points to its position
11422            in the string of the accepted opening paired delimiters.  The
11423            corresponding position in the string of closing ones is the
11424            beginning of the paired mate.  Both contain the same number of
11425            bytes. */
11426         close_delim_str = legal_paired_closing_delims
11427                         + (tmps - legal_paired_opening_delims);
11428 
11429         /* The list of paired delimiters contains all the ASCII ones that have
11430          * always been legal, and no other ASCIIs.  Don't raise a message if
11431          * using one of these */
11432         if (! isASCII(open_delim_code)) {
11433             Perl_ck_warner_d(aTHX_
11434                              packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
11435                              "Use of '%" UTF8f "' is experimental as a string delimiter",
11436                              UTF8fARG(UTF, delim_byte_len, open_delim_str));
11437         }
11438 
11439         close_delim_code = (UTF)
11440                            ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
11441                            : * (U8 *) close_delim_str;
11442     }
11443     else {  /* Here, the delimiter isn't paired, hence the close is the same as
11444                the open; and has aready been set up.  But make sure it isn't
11445                deprecated to use this particular delimiter, as we plan
11446                eventually to make it paired. */
11447         if (ninstr(deprecated_opening_delims, deprecated_delims_end,
11448                    open_delim_str, open_delim_str + delim_byte_len))
11449         {
11450             Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
11451                              "Use of '%" UTF8f "' is deprecated as a string delimiter",
11452                              UTF8fARG(UTF, delim_byte_len, open_delim_str));
11453         }
11454 
11455         /* Note that a NUL may be used as a delimiter, and this happens when
11456          * delimitting an empty string, and no special handling for it is
11457          * needed, as ninstr() calls are used */
11458     }
11459 
11460     PL_multi_close = close_delim_code;
11461 
11462     if (PL_multi_open == PL_multi_close) {
11463         keep_bracketed_quoted = FALSE;
11464     }
11465 
11466     /* create a new SV to hold the contents.  79 is the SV's initial length.
11467        What a random number. */
11468     sv = newSV_type(SVt_PVIV);
11469     SvGROW(sv, 79);
11470     SvIV_set(sv, close_delim_code);
11471     (void)SvPOK_only(sv);		/* validate pointer */
11472 
11473     /* move past delimiter and try to read a complete string */
11474     if (keep_delims)
11475         sv_catpvn(sv, s, delim_byte_len);
11476     s += delim_byte_len;
11477     for (;;) {
11478         /* extend sv if need be */
11479         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11480         /* set 'to' to the next character in the sv's string */
11481         to = SvPVX(sv)+SvCUR(sv);
11482 
11483         /* read until we run out of string, or we find the closing delimiter */
11484         while (s < PL_bufend) {
11485             /* embedded newlines increment the line count */
11486             if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11487                 COPLINE_INC_WITH_HERELINES;
11488 
11489             /* backslashes can escape the closing delimiter */
11490             if (   *s == '\\' && s < PL_bufend - delim_byte_len
11491 
11492                    /* ... but not if the delimiter itself is a backslash */
11493                 && close_delim_code != '\\')
11494             {
11495                 /* Here, we have an escaping backslash.  If we're supposed to
11496                  * discard those that escape the closing delimiter, just
11497                  * discard this one */
11498                 if (   !  keep_bracketed_quoted
11499                     &&   (    memEQ(s + 1,  open_delim_str, delim_byte_len)
11500                           ||  (   PL_multi_open == PL_multi_close
11501                                && re_reparse && s[1] == '\\')
11502                           ||  memEQ(s + 1, close_delim_str, delim_byte_len)))
11503                 {
11504                     s++;
11505                 }
11506                 else /* any other escapes are simply copied straight through */
11507                     *to++ = *s++;
11508             }
11509             else if (   s < PL_bufend - (delim_byte_len - 1)
11510                      && memEQ(s, close_delim_str, delim_byte_len)
11511                      && --brackets <= 0)
11512             {
11513                 /* Found unescaped closing delimiter, unnested if we care about
11514                  * that; so are done.
11515                  *
11516                  * In the case of the opening and closing delimiters being
11517                  * different, we have to deal with nesting; the conditional
11518                  * above makes sure we don't get here until the nesting level,
11519                  * 'brackets', is back down to zero.  In the other case,
11520                  * nesting isn't an issue, and 'brackets' never can get
11521                  * incremented above 0, so will come here at the first closing
11522                  * delimiter.
11523                  *
11524                  * Only grapheme delimiters are legal. */
11525                 if (   UTF  /* All Non-UTF-8's are graphemes */
11526                     && UNLIKELY(! is_grapheme((U8 *) start,
11527                                               (U8 *) s,
11528                                               (U8 *) PL_bufend,
11529                                               close_delim_code)))
11530                 {
11531                     yyerror(non_grapheme_msg);
11532                 }
11533 
11534                 break;
11535             }
11536                         /* No nesting if open eq close */
11537             else if (   PL_multi_open != PL_multi_close
11538                      && s < PL_bufend - (delim_byte_len - 1)
11539                      && memEQ(s, open_delim_str, delim_byte_len))
11540             {
11541                 brackets++;
11542             }
11543 
11544             /* Here, still in the middle of the string; copy this character */
11545             if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
11546                 *to++ = *s++;
11547             }
11548             else {
11549                 size_t this_char_len = UTF8SKIP(s);
11550                 Copy(s, to, this_char_len, char);
11551                 s  += this_char_len;
11552                 to += this_char_len;
11553 
11554                 d_is_utf8 = TRUE;
11555             }
11556         } /* End of loop through buffer */
11557 
11558         /* Here, found end of the string, OR ran out of buffer: terminate the
11559          * copied string and update the sv's end-of-string */
11560         *to = '\0';
11561         SvCUR_set(sv, to - SvPVX_const(sv));
11562 
11563         /*
11564          * this next chunk reads more into the buffer if we're not done yet
11565          */
11566 
11567         if (s < PL_bufend)
11568             break;		/* handle case where we are done yet :-) */
11569 
11570 #ifndef PERL_STRICT_CR
11571         if (to - SvPVX_const(sv) >= 2) {
11572             if (   (to[-2] == '\r' && to[-1] == '\n')
11573                 || (to[-2] == '\n' && to[-1] == '\r'))
11574             {
11575                 to[-2] = '\n';
11576                 to--;
11577                 SvCUR_set(sv, to - SvPVX_const(sv));
11578             }
11579             else if (to[-1] == '\r')
11580                 to[-1] = '\n';
11581         }
11582         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11583             to[-1] = '\n';
11584 #endif
11585 
11586         /* if we're out of file, or a read fails, bail and reset the current
11587            line marker so we can report where the unterminated string began
11588         */
11589         COPLINE_INC_WITH_HERELINES;
11590         PL_bufptr = PL_bufend;
11591         if (!lex_next_chunk(0)) {
11592             sv_free(sv);
11593             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11594             return NULL;
11595         }
11596         s = start = PL_bufptr;
11597     } /* End of infinite loop */
11598 
11599     /* at this point, we have successfully read the delimited string */
11600 
11601     if (keep_delims)
11602             sv_catpvn(sv, s, delim_byte_len);
11603     s += delim_byte_len;
11604 
11605     if (d_is_utf8)
11606         SvUTF8_on(sv);
11607 
11608     PL_multi_end = CopLINE(PL_curcop);
11609     CopLINE_set(PL_curcop, PL_multi_start);
11610     PL_parser->herelines = herelines;
11611 
11612     /* if we allocated too much space, give some back */
11613     if (SvCUR(sv) + 5 < SvLEN(sv)) {
11614         SvLEN_set(sv, SvCUR(sv) + 1);
11615         SvPV_shrink_to_cur(sv);
11616     }
11617 
11618     /* decide whether this is the first or second quoted string we've read
11619        for this op
11620     */
11621 
11622     if (PL_lex_stuff)
11623         PL_parser->lex_sub_repl = sv;
11624     else
11625         PL_lex_stuff = sv;
11626     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s;
11627     return s;
11628 }
11629 
11630 /*
11631   scan_num
11632   takes: pointer to position in buffer
11633   returns: pointer to new position in buffer
11634   side-effects: builds ops for the constant in pl_yylval.op
11635 
11636   Read a number in any of the formats that Perl accepts:
11637 
11638   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)	12 12.34 12.
11639   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)			.34
11640   0b[01](_?[01])*                                       binary integers
11641   0o?[0-7](_?[0-7])*                                    octal integers
11642   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
11643   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
11644 
11645   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11646   thing it reads.
11647 
11648   If it reads a number without a decimal point or an exponent, it will
11649   try converting the number to an integer and see if it can do so
11650   without loss of precision.
11651 */
11652 
11653 char *
11654 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11655 {
11656     const char *s = start;	/* current position in buffer */
11657     char *d;			/* destination in temp buffer */
11658     char *e;			/* end of temp buffer */
11659     NV nv;				/* number read, as a double */
11660     SV *sv = NULL;			/* place to put the converted number */
11661     bool floatit;			/* boolean: int or float? */
11662     const char *lastub = NULL;		/* position of last underbar */
11663     static const char* const number_too_long = "Number too long";
11664     bool warned_about_underscore = 0;
11665     I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11666 #define WARN_ABOUT_UNDERSCORE() \
11667         do { \
11668             if (!warned_about_underscore) { \
11669                 warned_about_underscore = 1; \
11670                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11671                                "Misplaced _ in number"); \
11672             } \
11673         } while(0)
11674     /* Hexadecimal floating point.
11675      *
11676      * In many places (where we have quads and NV is IEEE 754 double)
11677      * we can fit the mantissa bits of a NV into an unsigned quad.
11678      * (Note that UVs might not be quads even when we have quads.)
11679      * This will not work everywhere, though (either no quads, or
11680      * using long doubles), in which case we have to resort to NV,
11681      * which will probably mean horrible loss of precision due to
11682      * multiple fp operations. */
11683     bool hexfp = FALSE;
11684     int total_bits = 0;
11685     int significant_bits = 0;
11686 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11687 #  define HEXFP_UQUAD
11688     Uquad_t hexfp_uquad = 0;
11689     int hexfp_frac_bits = 0;
11690 #else
11691 #  define HEXFP_NV
11692     NV hexfp_nv = 0.0;
11693 #endif
11694     NV hexfp_mult = 1.0;
11695     UV high_non_zero = 0; /* highest digit */
11696     int non_zero_integer_digits = 0;
11697     bool new_octal = FALSE;     /* octal with "0o" prefix */
11698 
11699     PERL_ARGS_ASSERT_SCAN_NUM;
11700 
11701     /* We use the first character to decide what type of number this is */
11702 
11703     switch (*s) {
11704     default:
11705         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11706 
11707     /* if it starts with a 0, it could be an octal number, a decimal in
11708        0.13 disguise, or a hexadecimal number, or a binary number. */
11709     case '0':
11710         {
11711           /* variables:
11712              u		holds the "number so far"
11713              overflowed	was the number more than we can hold?
11714 
11715              Shift is used when we add a digit.  It also serves as an "are
11716              we in octal/hex/binary?" indicator to disallow hex characters
11717              when in octal mode.
11718            */
11719             NV n = 0.0;
11720             UV u = 0;
11721             bool overflowed = FALSE;
11722             bool just_zero  = TRUE;	/* just plain 0 or binary number? */
11723             bool has_digs = FALSE;
11724             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11725             static const char* const bases[5] =
11726               { "", "binary", "", "octal", "hexadecimal" };
11727             static const char* const Bases[5] =
11728               { "", "Binary", "", "Octal", "Hexadecimal" };
11729             static const char* const maxima[5] =
11730               { "",
11731                 "0b11111111111111111111111111111111",
11732                 "",
11733                 "037777777777",
11734                 "0xffffffff" };
11735 
11736             /* check for hex */
11737             if (isALPHA_FOLD_EQ(s[1], 'x')) {
11738                 shift = 4;
11739                 s += 2;
11740                 just_zero = FALSE;
11741             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11742                 shift = 1;
11743                 s += 2;
11744                 just_zero = FALSE;
11745             }
11746             /* check for a decimal in disguise */
11747             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11748                 goto decimal;
11749             /* so it must be octal */
11750             else {
11751                 shift = 3;
11752                 s++;
11753                 if (isALPHA_FOLD_EQ(*s, 'o')) {
11754                     s++;
11755                     just_zero = FALSE;
11756                     new_octal = TRUE;
11757                 }
11758             }
11759 
11760             if (*s == '_') {
11761                 WARN_ABOUT_UNDERSCORE();
11762                lastub = s++;
11763             }
11764 
11765             /* read the rest of the number */
11766             for (;;) {
11767                 /* x is used in the overflow test,
11768                    b is the digit we're adding on. */
11769                 UV x, b;
11770 
11771                 switch (*s) {
11772 
11773                 /* if we don't mention it, we're done */
11774                 default:
11775                     goto out;
11776 
11777                 /* _ are ignored -- but warned about if consecutive */
11778                 case '_':
11779                     if (lastub && s == lastub + 1)
11780                         WARN_ABOUT_UNDERSCORE();
11781                     lastub = s++;
11782                     break;
11783 
11784                 /* 8 and 9 are not octal */
11785                 case '8': case '9':
11786                     if (shift == 3)
11787                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11788                     /* FALLTHROUGH */
11789 
11790                 /* octal digits */
11791                 case '2': case '3': case '4':
11792                 case '5': case '6': case '7':
11793                     if (shift == 1)
11794                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11795                     /* FALLTHROUGH */
11796 
11797                 case '0': case '1':
11798                     b = *s++ & 15;		/* ASCII digit -> value of digit */
11799                     goto digit;
11800 
11801                 /* hex digits */
11802                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11803                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11804                     /* make sure they said 0x */
11805                     if (shift != 4)
11806                         goto out;
11807                     b = (*s++ & 7) + 9;
11808 
11809                     /* Prepare to put the digit we have onto the end
11810                        of the number so far.  We check for overflows.
11811                     */
11812 
11813                   digit:
11814                     just_zero = FALSE;
11815                     has_digs = TRUE;
11816                     if (!overflowed) {
11817                         assert(shift >= 0);
11818                         x = u << shift;	/* make room for the digit */
11819 
11820                         total_bits += shift;
11821 
11822                         if ((x >> shift) != u
11823                             && !(PL_hints & HINT_NEW_BINARY)) {
11824                             overflowed = TRUE;
11825                             n = (NV) u;
11826                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11827                                              "Integer overflow in %s number",
11828                                              bases[shift]);
11829                         } else
11830                             u = x | b;		/* add the digit to the end */
11831                     }
11832                     if (overflowed) {
11833                         n *= nvshift[shift];
11834                         /* If an NV has not enough bits in its
11835                          * mantissa to represent an UV this summing of
11836                          * small low-order numbers is a waste of time
11837                          * (because the NV cannot preserve the
11838                          * low-order bits anyway): we could just
11839                          * remember when did we overflow and in the
11840                          * end just multiply n by the right
11841                          * amount. */
11842                         n += (NV) b;
11843                     }
11844 
11845                     if (high_non_zero == 0 && b > 0)
11846                         high_non_zero = b;
11847 
11848                     if (high_non_zero)
11849                         non_zero_integer_digits++;
11850 
11851                     /* this could be hexfp, but peek ahead
11852                      * to avoid matching ".." */
11853                     if (UNLIKELY(HEXFP_PEEK(s))) {
11854                         goto out;
11855                     }
11856 
11857                     break;
11858                 }
11859             }
11860 
11861           /* if we get here, we had success: make a scalar value from
11862              the number.
11863           */
11864           out:
11865 
11866             /* final misplaced underbar check */
11867             if (s[-1] == '_')
11868                 WARN_ABOUT_UNDERSCORE();
11869 
11870             if (UNLIKELY(HEXFP_PEEK(s))) {
11871                 /* Do sloppy (on the underbars) but quick detection
11872                  * (and value construction) for hexfp, the decimal
11873                  * detection will shortly be more thorough with the
11874                  * underbar checks. */
11875                 const char* h = s;
11876                 significant_bits = non_zero_integer_digits * shift;
11877 #ifdef HEXFP_UQUAD
11878                 hexfp_uquad = u;
11879 #else /* HEXFP_NV */
11880                 hexfp_nv = u;
11881 #endif
11882                 /* Ignore the leading zero bits of
11883                  * the high (first) non-zero digit. */
11884                 if (high_non_zero) {
11885                     if (high_non_zero < 0x8)
11886                         significant_bits--;
11887                     if (high_non_zero < 0x4)
11888                         significant_bits--;
11889                     if (high_non_zero < 0x2)
11890                         significant_bits--;
11891                 }
11892 
11893                 if (*h == '.') {
11894 #ifdef HEXFP_NV
11895                     NV nv_mult = 1.0;
11896 #endif
11897                     bool accumulate = TRUE;
11898                     U8 b;
11899                     int lim = 1 << shift;
11900                     for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11901                                *h == '_'); h++) {
11902                         if (isXDIGIT(*h)) {
11903                             significant_bits += shift;
11904 #ifdef HEXFP_UQUAD
11905                             if (accumulate) {
11906                                 if (significant_bits < NV_MANT_DIG) {
11907                                     /* We are in the long "run" of xdigits,
11908                                      * accumulate the full four bits. */
11909                                     assert(shift >= 0);
11910                                     hexfp_uquad <<= shift;
11911                                     hexfp_uquad |= b;
11912                                     hexfp_frac_bits += shift;
11913                                 } else if (significant_bits - shift < NV_MANT_DIG) {
11914                                     /* We are at a hexdigit either at,
11915                                      * or straddling, the edge of mantissa.
11916                                      * We will try grabbing as many as
11917                                      * possible bits. */
11918                                     int tail =
11919                                       significant_bits - NV_MANT_DIG;
11920                                     if (tail <= 0)
11921                                        tail += shift;
11922                                     assert(tail >= 0);
11923                                     hexfp_uquad <<= tail;
11924                                     assert((shift - tail) >= 0);
11925                                     hexfp_uquad |= b >> (shift - tail);
11926                                     hexfp_frac_bits += tail;
11927 
11928                                     /* Ignore the trailing zero bits
11929                                      * of the last non-zero xdigit.
11930                                      *
11931                                      * The assumption here is that if
11932                                      * one has input of e.g. the xdigit
11933                                      * eight (0x8), there is only one
11934                                      * bit being input, not the full
11935                                      * four bits.  Conversely, if one
11936                                      * specifies a zero xdigit, the
11937                                      * assumption is that one really
11938                                      * wants all those bits to be zero. */
11939                                     if (b) {
11940                                         if ((b & 0x1) == 0x0) {
11941                                             significant_bits--;
11942                                             if ((b & 0x2) == 0x0) {
11943                                                 significant_bits--;
11944                                                 if ((b & 0x4) == 0x0) {
11945                                                     significant_bits--;
11946                                                 }
11947                                             }
11948                                         }
11949                                     }
11950 
11951                                     accumulate = FALSE;
11952                                 }
11953                             } else {
11954                                 /* Keep skipping the xdigits, and
11955                                  * accumulating the significant bits,
11956                                  * but do not shift the uquad
11957                                  * (which would catastrophically drop
11958                                  * high-order bits) or accumulate the
11959                                  * xdigits anymore. */
11960                             }
11961 #else /* HEXFP_NV */
11962                             if (accumulate) {
11963                                 nv_mult /= nvshift[shift];
11964                                 if (nv_mult > 0.0)
11965                                     hexfp_nv += b * nv_mult;
11966                                 else
11967                                     accumulate = FALSE;
11968                             }
11969 #endif
11970                         }
11971                         if (significant_bits >= NV_MANT_DIG)
11972                             accumulate = FALSE;
11973                     }
11974                 }
11975 
11976                 if ((total_bits > 0 || significant_bits > 0) &&
11977                     isALPHA_FOLD_EQ(*h, 'p')) {
11978                     bool negexp = FALSE;
11979                     h++;
11980                     if (*h == '+')
11981                         h++;
11982                     else if (*h == '-') {
11983                         negexp = TRUE;
11984                         h++;
11985                     }
11986                     if (isDIGIT(*h)) {
11987                         I32 hexfp_exp = 0;
11988                         while (isDIGIT(*h) || *h == '_') {
11989                             if (isDIGIT(*h)) {
11990                                 hexfp_exp *= 10;
11991                                 hexfp_exp += *h - '0';
11992 #ifdef NV_MIN_EXP
11993                                 if (negexp
11994                                     && -hexfp_exp < NV_MIN_EXP - 1) {
11995                                     /* NOTE: this means that the exponent
11996                                      * underflow warning happens for
11997                                      * the IEEE 754 subnormals (denormals),
11998                                      * because DBL_MIN_EXP etc are the lowest
11999                                      * possible binary (or, rather, DBL_RADIX-base)
12000                                      * exponent for normals, not subnormals.
12001                                      *
12002                                      * This may or may not be a good thing. */
12003                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12004                                                    "Hexadecimal float: exponent underflow");
12005                                     break;
12006                                 }
12007 #endif
12008 #ifdef NV_MAX_EXP
12009                                 if (!negexp
12010                                     && hexfp_exp > NV_MAX_EXP - 1) {
12011                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12012                                                    "Hexadecimal float: exponent overflow");
12013                                     break;
12014                                 }
12015 #endif
12016                             }
12017                             h++;
12018                         }
12019                         if (negexp)
12020                             hexfp_exp = -hexfp_exp;
12021 #ifdef HEXFP_UQUAD
12022                         hexfp_exp -= hexfp_frac_bits;
12023 #endif
12024                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
12025                         hexfp = TRUE;
12026                         goto decimal;
12027                     }
12028                 }
12029             }
12030 
12031             if (!just_zero && !has_digs) {
12032                 /* 0x, 0o or 0b with no digits, treat it as an error.
12033                    Originally this backed up the parse before the b or
12034                    x, but that has the potential for silent changes in
12035                    behaviour, like for: "0x.3" and "0x+$foo".
12036                 */
12037                 const char *d = s;
12038                 char *oldbp = PL_bufptr;
12039                 if (*d) ++d; /* so the user sees the bad non-digit */
12040                 PL_bufptr = (char *)d; /* so yyerror reports the context */
12041                 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
12042                                   bases[shift]));
12043                 PL_bufptr = oldbp;
12044             }
12045 
12046             if (overflowed) {
12047                 if (n > 4294967295.0)
12048                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12049                                    "%s number > %s non-portable",
12050                                    Bases[shift],
12051                                    new_octal ? "0o37777777777" : maxima[shift]);
12052                 sv = newSVnv(n);
12053             }
12054             else {
12055 #if UVSIZE > 4
12056                 if (u > 0xffffffff)
12057                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12058                                    "%s number > %s non-portable",
12059                                    Bases[shift],
12060                                    new_octal ? "0o37777777777" : maxima[shift]);
12061 #endif
12062                 sv = newSVuv(u);
12063             }
12064             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12065                 sv = new_constant(start, s - start, "integer",
12066                                   sv, NULL, NULL, 0, NULL);
12067             else if (PL_hints & HINT_NEW_BINARY)
12068                 sv = new_constant(start, s - start, "binary",
12069                                   sv, NULL, NULL, 0, NULL);
12070         }
12071         break;
12072 
12073     /*
12074       handle decimal numbers.
12075       we're also sent here when we read a 0 as the first digit
12076     */
12077     case '1': case '2': case '3': case '4': case '5':
12078     case '6': case '7': case '8': case '9': case '.':
12079       decimal:
12080         d = PL_tokenbuf;
12081         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12082         floatit = FALSE;
12083         if (hexfp) {
12084             floatit = TRUE;
12085             *d++ = '0';
12086             switch (shift) {
12087             case 4:
12088                 *d++ = 'x';
12089                 s = start + 2;
12090                 break;
12091             case 3:
12092                 if (new_octal) {
12093                     *d++ = 'o';
12094                     s = start + 2;
12095                     break;
12096                 }
12097                 s = start + 1;
12098                 break;
12099             case 1:
12100                 *d++ = 'b';
12101                 s = start + 2;
12102                 break;
12103             default:
12104                 NOT_REACHED; /* NOTREACHED */
12105             }
12106         }
12107 
12108         /* read next group of digits and _ and copy into d */
12109         while (isDIGIT(*s)
12110                || *s == '_'
12111                || UNLIKELY(hexfp && isXDIGIT(*s)))
12112         {
12113             /* skip underscores, checking for misplaced ones
12114                if -w is on
12115             */
12116             if (*s == '_') {
12117                 if (lastub && s == lastub + 1)
12118                     WARN_ABOUT_UNDERSCORE();
12119                 lastub = s++;
12120             }
12121             else {
12122                 /* check for end of fixed-length buffer */
12123                 if (d >= e)
12124                     Perl_croak(aTHX_ "%s", number_too_long);
12125                 /* if we're ok, copy the character */
12126                 *d++ = *s++;
12127             }
12128         }
12129 
12130         /* final misplaced underbar check */
12131         if (lastub && s == lastub + 1)
12132             WARN_ABOUT_UNDERSCORE();
12133 
12134         /* read a decimal portion if there is one.  avoid
12135            3..5 being interpreted as the number 3. followed
12136            by .5
12137         */
12138         if (*s == '.' && s[1] != '.') {
12139             floatit = TRUE;
12140             *d++ = *s++;
12141 
12142             if (*s == '_') {
12143                 WARN_ABOUT_UNDERSCORE();
12144                 lastub = s;
12145             }
12146 
12147             /* copy, ignoring underbars, until we run out of digits.
12148             */
12149             for (; isDIGIT(*s)
12150                    || *s == '_'
12151                    || UNLIKELY(hexfp && isXDIGIT(*s));
12152                  s++)
12153             {
12154                 /* fixed length buffer check */
12155                 if (d >= e)
12156                     Perl_croak(aTHX_ "%s", number_too_long);
12157                 if (*s == '_') {
12158                    if (lastub && s == lastub + 1)
12159                         WARN_ABOUT_UNDERSCORE();
12160                    lastub = s;
12161                 }
12162                 else
12163                     *d++ = *s;
12164             }
12165             /* fractional part ending in underbar? */
12166             if (s[-1] == '_')
12167                 WARN_ABOUT_UNDERSCORE();
12168             if (*s == '.' && isDIGIT(s[1])) {
12169                 /* oops, it's really a v-string, but without the "v" */
12170                 s = start;
12171                 goto vstring;
12172             }
12173         }
12174 
12175         /* read exponent part, if present */
12176         if ((isALPHA_FOLD_EQ(*s, 'e')
12177               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
12178             && memCHRs("+-0123456789_", s[1]))
12179         {
12180             int exp_digits = 0;
12181             const char *save_s = s;
12182             char * save_d = d;
12183 
12184             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
12185                ditto for p (hexfloats) */
12186             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
12187                 /* At least some Mach atof()s don't grok 'E' */
12188                 *d++ = 'e';
12189             }
12190             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
12191                 *d++ = 'p';
12192             }
12193 
12194             s++;
12195 
12196 
12197             /* stray preinitial _ */
12198             if (*s == '_') {
12199                 WARN_ABOUT_UNDERSCORE();
12200                 lastub = s++;
12201             }
12202 
12203             /* allow positive or negative exponent */
12204             if (*s == '+' || *s == '-')
12205                 *d++ = *s++;
12206 
12207             /* stray initial _ */
12208             if (*s == '_') {
12209                 WARN_ABOUT_UNDERSCORE();
12210                 lastub = s++;
12211             }
12212 
12213             /* read digits of exponent */
12214             while (isDIGIT(*s) || *s == '_') {
12215                 if (isDIGIT(*s)) {
12216                     ++exp_digits;
12217                     if (d >= e)
12218                         Perl_croak(aTHX_ "%s", number_too_long);
12219                     *d++ = *s++;
12220                 }
12221                 else {
12222                    if (((lastub && s == lastub + 1)
12223                         || (!isDIGIT(s[1]) && s[1] != '_')))
12224                         WARN_ABOUT_UNDERSCORE();
12225                    lastub = s++;
12226                 }
12227             }
12228 
12229             if (!exp_digits) {
12230                 /* no exponent digits, the [eEpP] could be for something else,
12231                  * though in practice we don't get here for p since that's preparsed
12232                  * earlier, and results in only the 0xX being consumed, so behave similarly
12233                  * for decimal floats and consume only the D.DD, leaving the [eE] to the
12234                  * next token.
12235                  */
12236                 s = save_s;
12237                 d = save_d;
12238             }
12239             else {
12240                 floatit = TRUE;
12241             }
12242         }
12243 
12244 
12245         /*
12246            We try to do an integer conversion first if no characters
12247            indicating "float" have been found.
12248          */
12249 
12250         if (!floatit) {
12251             UV uv;
12252             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12253 
12254             if (flags == IS_NUMBER_IN_UV) {
12255               if (uv <= IV_MAX)
12256                 sv = newSViv(uv); /* Prefer IVs over UVs. */
12257               else
12258                 sv = newSVuv(uv);
12259             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12260               if (uv <= (UV) IV_MIN)
12261                 sv = newSViv(-(IV)uv);
12262               else
12263                 floatit = TRUE;
12264             } else
12265               floatit = TRUE;
12266         }
12267         if (floatit) {
12268             /* terminate the string */
12269             *d = '\0';
12270             if (UNLIKELY(hexfp)) {
12271 #  ifdef NV_MANT_DIG
12272                 if (significant_bits > NV_MANT_DIG)
12273                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12274                                    "Hexadecimal float: mantissa overflow");
12275 #  endif
12276 #ifdef HEXFP_UQUAD
12277                 nv = hexfp_uquad * hexfp_mult;
12278 #else /* HEXFP_NV */
12279                 nv = hexfp_nv * hexfp_mult;
12280 #endif
12281             } else {
12282                 nv = Atof(PL_tokenbuf);
12283             }
12284             sv = newSVnv(nv);
12285         }
12286 
12287         if ( floatit
12288              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12289             const char *const key = floatit ? "float" : "integer";
12290             const STRLEN keylen = floatit ? 5 : 7;
12291             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12292                                 key, keylen, sv, NULL, NULL, 0, NULL);
12293         }
12294         break;
12295 
12296     /* if it starts with a v, it could be a v-string */
12297     case 'v':
12298     vstring:
12299                 sv = newSV(5); /* preallocate storage space */
12300                 ENTER_with_name("scan_vstring");
12301                 SAVEFREESV(sv);
12302                 s = scan_vstring(s, PL_bufend, sv);
12303                 SvREFCNT_inc_simple_void_NN(sv);
12304                 LEAVE_with_name("scan_vstring");
12305         break;
12306     }
12307 
12308     /* make the op for the constant and return */
12309 
12310     if (sv)
12311         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12312     else
12313         lvalp->opval = NULL;
12314 
12315     return (char *)s;
12316 }
12317 
12318 STATIC char *
12319 S_scan_formline(pTHX_ char *s)
12320 {
12321     SV * const stuff = newSVpvs("");
12322     bool needargs = FALSE;
12323     bool eofmt = FALSE;
12324 
12325     PERL_ARGS_ASSERT_SCAN_FORMLINE;
12326 
12327     while (!needargs) {
12328         char *eol;
12329         if (*s == '.') {
12330             char *t = s+1;
12331 #ifdef PERL_STRICT_CR
12332             while (SPACE_OR_TAB(*t))
12333                 t++;
12334 #else
12335             while (SPACE_OR_TAB(*t) || *t == '\r')
12336                 t++;
12337 #endif
12338             if (*t == '\n' || t == PL_bufend) {
12339                 eofmt = TRUE;
12340                 break;
12341             }
12342         }
12343         eol = (char *) memchr(s,'\n',PL_bufend-s);
12344         if (!eol++)
12345                 eol = PL_bufend;
12346         if (*s != '#') {
12347             char *t;
12348             for (t = s; t < eol; t++) {
12349                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12350                     needargs = FALSE;
12351                     goto enough;	/* ~~ must be first line in formline */
12352                 }
12353                 if (*t == '@' || *t == '^')
12354                     needargs = TRUE;
12355             }
12356             if (eol > s) {
12357                 sv_catpvn(stuff, s, eol-s);
12358 #ifndef PERL_STRICT_CR
12359                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12360                     char *end = SvPVX(stuff) + SvCUR(stuff);
12361                     end[-2] = '\n';
12362                     end[-1] = '\0';
12363                     SvCUR_set(stuff, SvCUR(stuff) - 1);
12364                 }
12365 #endif
12366             }
12367             else
12368               break;
12369         }
12370         s = (char*)eol;
12371         if ((PL_rsfp || PL_parser->filtered)
12372          && PL_parser->form_lex_state == LEX_NORMAL) {
12373             bool got_some;
12374             PL_bufptr = PL_bufend;
12375             COPLINE_INC_WITH_HERELINES;
12376             got_some = lex_next_chunk(0);
12377             CopLINE_dec(PL_curcop);
12378             s = PL_bufptr;
12379             if (!got_some)
12380                 break;
12381         }
12382         incline(s, PL_bufend);
12383     }
12384   enough:
12385     if (!SvCUR(stuff) || needargs)
12386         PL_lex_state = PL_parser->form_lex_state;
12387     if (SvCUR(stuff)) {
12388         PL_expect = XSTATE;
12389         if (needargs) {
12390             const char *s2 = s;
12391             while (isSPACE(*s2) && *s2 != '\n')
12392                 s2++;
12393             if (*s2 == '{') {
12394                 PL_expect = XTERMBLOCK;
12395                 NEXTVAL_NEXTTOKE.ival = 0;
12396                 force_next(DO);
12397             }
12398             NEXTVAL_NEXTTOKE.ival = 0;
12399             force_next(FORMLBRACK);
12400         }
12401         if (!IN_BYTES) {
12402             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12403                 SvUTF8_on(stuff);
12404         }
12405         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12406         force_next(THING);
12407     }
12408     else {
12409         SvREFCNT_dec(stuff);
12410         if (eofmt)
12411             PL_lex_formbrack = 0;
12412     }
12413     return s;
12414 }
12415 
12416 I32
12417 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12418 {
12419     const I32 oldsavestack_ix = PL_savestack_ix;
12420     CV* const outsidecv = PL_compcv;
12421 
12422     SAVEI32(PL_subline);
12423     save_item(PL_subname);
12424     SAVESPTR(PL_compcv);
12425 
12426     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12427     CvFLAGS(PL_compcv) |= flags;
12428 
12429     PL_subline = CopLINE(PL_curcop);
12430     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12431     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12432     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12433     if (outsidecv && CvPADLIST(outsidecv))
12434         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12435 
12436     return oldsavestack_ix;
12437 }
12438 
12439 
12440 /* Do extra initialisation of a CV (typically one just created by
12441  * start_subparse()) if that CV is for a named sub
12442  */
12443 
12444 void
12445 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12446 {
12447     PERL_ARGS_ASSERT_INIT_NAMED_CV;
12448 
12449     if (nameop->op_type == OP_CONST) {
12450         const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12451         if (   strEQ(name, "BEGIN")
12452             || strEQ(name, "END")
12453             || strEQ(name, "INIT")
12454             || strEQ(name, "CHECK")
12455             || strEQ(name, "UNITCHECK")
12456         )
12457           CvSPECIAL_on(cv);
12458     }
12459     else
12460     /* State subs inside anonymous subs need to be
12461      clonable themselves. */
12462     if (   CvANON(CvOUTSIDE(cv))
12463         || CvCLONE(CvOUTSIDE(cv))
12464         || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12465                         CvOUTSIDE(cv)
12466                      ))[nameop->op_targ])
12467     )
12468       CvCLONE_on(cv);
12469 }
12470 
12471 
12472 static int
12473 S_yywarn(pTHX_ const char *const s, U32 flags)
12474 {
12475     PERL_ARGS_ASSERT_YYWARN;
12476 
12477     PL_in_eval |= EVAL_WARNONLY;
12478     yyerror_pv(s, flags);
12479     return 0;
12480 }
12481 
12482 void
12483 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12484 {
12485     PERL_ARGS_ASSERT_ABORT_EXECUTION;
12486 
12487     if (PL_minus_c)
12488         Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12489     else {
12490         Perl_croak(aTHX_
12491                 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12492     }
12493     NOT_REACHED; /* NOTREACHED */
12494 }
12495 
12496 void
12497 Perl_yyquit(pTHX)
12498 {
12499     /* Called, after at least one error has been found, to abort the parse now,
12500      * instead of trying to forge ahead */
12501 
12502     yyerror_pvn(NULL, 0, 0);
12503 }
12504 
12505 int
12506 Perl_yyerror(pTHX_ const char *const s)
12507 {
12508     PERL_ARGS_ASSERT_YYERROR;
12509     return yyerror_pvn(s, strlen(s), 0);
12510 }
12511 
12512 int
12513 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12514 {
12515     PERL_ARGS_ASSERT_YYERROR_PV;
12516     return yyerror_pvn(s, strlen(s), flags);
12517 }
12518 
12519 int
12520 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12521 {
12522     const char *context = NULL;
12523     int contlen = -1;
12524     SV *msg;
12525     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12526     int yychar  = PL_parser->yychar;
12527 
12528     /* Output error message 's' with length 'len'.  'flags' are SV flags that
12529      * apply.  If the number of errors found is large enough, it abandons
12530      * parsing.  If 's' is NULL, there is no message, and it abandons
12531      * processing unconditionally */
12532 
12533     if (s != NULL) {
12534         if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp))
12535             sv_catpvs(where_sv, "at EOF");
12536         else if (   PL_oldoldbufptr
12537                  && PL_bufptr > PL_oldoldbufptr
12538                  && PL_bufptr - PL_oldoldbufptr < 200
12539                  && PL_oldoldbufptr != PL_oldbufptr
12540                  && PL_oldbufptr != PL_bufptr)
12541         {
12542             while (isSPACE(*PL_oldoldbufptr))
12543                 PL_oldoldbufptr++;
12544             context = PL_oldoldbufptr;
12545             contlen = PL_bufptr - PL_oldoldbufptr;
12546         }
12547         else if (  PL_oldbufptr
12548                 && PL_bufptr > PL_oldbufptr
12549                 && PL_bufptr - PL_oldbufptr < 200
12550                 && PL_oldbufptr != PL_bufptr)
12551         {
12552             while (isSPACE(*PL_oldbufptr))
12553                 PL_oldbufptr++;
12554             context = PL_oldbufptr;
12555             contlen = PL_bufptr - PL_oldbufptr;
12556         }
12557         else if (yychar > 255)
12558             sv_catpvs(where_sv, "next token ???");
12559         else if (yychar == YYEMPTY) {
12560             if (PL_lex_state == LEX_NORMAL)
12561                 sv_catpvs(where_sv, "at end of line");
12562             else if (PL_lex_inpat)
12563                 sv_catpvs(where_sv, "within pattern");
12564             else
12565                 sv_catpvs(where_sv, "within string");
12566         }
12567         else {
12568             sv_catpvs(where_sv, "next char ");
12569             if (yychar < 32)
12570                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12571             else if (isPRINT_LC(yychar)) {
12572                 const char string = yychar;
12573                 sv_catpvn(where_sv, &string, 1);
12574             }
12575             else
12576                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12577         }
12578         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12579         Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12580             OutCopFILE(PL_curcop),
12581             (IV)(PL_parser->preambling == NOLINE
12582                    ? CopLINE(PL_curcop)
12583                    : PL_parser->preambling));
12584         if (context)
12585             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12586                                  UTF8fARG(UTF, contlen, context));
12587         else
12588             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12589         if (   PL_multi_start < PL_multi_end
12590             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12591         {
12592             Perl_sv_catpvf(aTHX_ msg,
12593             "  (Might be a runaway multi-line %c%c string starting on"
12594             " line %" IVdf ")\n",
12595                     (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12596             PL_multi_end = 0;
12597         }
12598         if (PL_in_eval & EVAL_WARNONLY) {
12599             PL_in_eval &= ~EVAL_WARNONLY;
12600             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12601         }
12602         else {
12603             qerror(msg);
12604         }
12605     }
12606     if (s == NULL || PL_error_count >= 10) {
12607         const char * msg = "";
12608         const char * const name = OutCopFILE(PL_curcop);
12609 
12610         if (PL_in_eval) {
12611             SV * errsv = ERRSV;
12612             if (SvCUR(errsv)) {
12613                 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12614             }
12615         }
12616 
12617         if (s == NULL) {
12618             abort_execution(msg, name);
12619         }
12620         else {
12621             Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12622         }
12623     }
12624     PL_in_my = 0;
12625     PL_in_my_stash = NULL;
12626     return 0;
12627 }
12628 
12629 STATIC char*
12630 S_swallow_bom(pTHX_ U8 *s)
12631 {
12632     const STRLEN slen = SvCUR(PL_linestr);
12633 
12634     PERL_ARGS_ASSERT_SWALLOW_BOM;
12635 
12636     switch (s[0]) {
12637     case 0xFF:
12638         if (s[1] == 0xFE) {
12639             /* UTF-16 little-endian? (or UTF-32LE?) */
12640             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
12641                 /* diag_listed_as: Unsupported script encoding %s */
12642                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12643 #ifndef PERL_NO_UTF16_FILTER
12644 #ifdef DEBUGGING
12645             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12646 #endif
12647             s += 2;
12648             if (PL_bufend > (char*)s) {
12649                 s = add_utf16_textfilter(s, TRUE);
12650             }
12651 #else
12652             /* diag_listed_as: Unsupported script encoding %s */
12653             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12654 #endif
12655         }
12656         break;
12657     case 0xFE:
12658         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
12659 #ifndef PERL_NO_UTF16_FILTER
12660 #ifdef DEBUGGING
12661             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12662 #endif
12663             s += 2;
12664             if (PL_bufend > (char *)s) {
12665                 s = add_utf16_textfilter(s, FALSE);
12666             }
12667 #else
12668             /* diag_listed_as: Unsupported script encoding %s */
12669             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12670 #endif
12671         }
12672         break;
12673     case BOM_UTF8_FIRST_BYTE: {
12674         if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12675 #ifdef DEBUGGING
12676             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12677 #endif
12678             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
12679         }
12680         break;
12681     }
12682     case 0:
12683         if (slen > 3) {
12684              if (s[1] == 0) {
12685                   if (s[2] == 0xFE && s[3] == 0xFF) {
12686                        /* UTF-32 big-endian */
12687                        /* diag_listed_as: Unsupported script encoding %s */
12688                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12689                   }
12690              }
12691              else if (s[2] == 0 && s[3] != 0) {
12692                   /* Leading bytes
12693                    * 00 xx 00 xx
12694                    * are a good indicator of UTF-16BE. */
12695 #ifndef PERL_NO_UTF16_FILTER
12696 #ifdef DEBUGGING
12697                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12698 #endif
12699                   s = add_utf16_textfilter(s, FALSE);
12700 #else
12701                   /* diag_listed_as: Unsupported script encoding %s */
12702                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12703 #endif
12704              }
12705         }
12706         break;
12707 
12708     default:
12709          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12710                   /* Leading bytes
12711                    * xx 00 xx 00
12712                    * are a good indicator of UTF-16LE. */
12713 #ifndef PERL_NO_UTF16_FILTER
12714 #ifdef DEBUGGING
12715               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12716 #endif
12717               s = add_utf16_textfilter(s, TRUE);
12718 #else
12719               /* diag_listed_as: Unsupported script encoding %s */
12720               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12721 #endif
12722          }
12723     }
12724     return (char*)s;
12725 }
12726 
12727 
12728 #ifndef PERL_NO_UTF16_FILTER
12729 static I32
12730 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12731 {
12732     SV *const filter = FILTER_DATA(idx);
12733     /* We re-use this each time round, throwing the contents away before we
12734        return.  */
12735     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12736     SV *const utf8_buffer = filter;
12737     IV status = IoPAGE(filter);
12738     const bool reverse = cBOOL(IoLINES(filter));
12739     I32 retval;
12740 
12741     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12742 
12743     /* As we're automatically added, at the lowest level, and hence only called
12744        from this file, we can be sure that we're not called in block mode. Hence
12745        don't bother writing code to deal with block mode.  */
12746     if (maxlen) {
12747         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12748     }
12749     if (status < 0) {
12750         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12751     }
12752     DEBUG_P(PerlIO_printf(Perl_debug_log,
12753                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12754                           FPTR2DPTR(void *, S_utf16_textfilter),
12755                           reverse ? 'l' : 'b', idx, maxlen, status,
12756                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12757 
12758     while (1) {
12759         STRLEN chars;
12760         STRLEN have;
12761         Size_t newlen;
12762         U8 *end;
12763         /* First, look in our buffer of existing UTF-8 data:  */
12764         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12765 
12766         if (nl) {
12767             ++nl;
12768         } else if (status == 0) {
12769             /* EOF */
12770             IoPAGE(filter) = 0;
12771             nl = SvEND(utf8_buffer);
12772         }
12773         if (nl) {
12774             STRLEN got = nl - SvPVX(utf8_buffer);
12775             /* Did we have anything to append?  */
12776             retval = got != 0;
12777             sv_catpvn(sv, SvPVX(utf8_buffer), got);
12778             /* Everything else in this code works just fine if SVp_POK isn't
12779                set.  This, however, needs it, and we need it to work, else
12780                we loop infinitely because the buffer is never consumed.  */
12781             sv_chop(utf8_buffer, nl);
12782             break;
12783         }
12784 
12785         /* OK, not a complete line there, so need to read some more UTF-16.
12786            Read an extra octect if the buffer currently has an odd number. */
12787         while (1) {
12788             if (status <= 0)
12789                 break;
12790             if (SvCUR(utf16_buffer) >= 2) {
12791                 /* Location of the high octet of the last complete code point.
12792                    Gosh, UTF-16 is a pain. All the benefits of variable length,
12793                    *coupled* with all the benefits of partial reads and
12794                    endianness.  */
12795                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12796                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12797 
12798                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12799                     break;
12800                 }
12801 
12802                 /* We have the first half of a surrogate. Read more.  */
12803                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12804             }
12805 
12806             status = FILTER_READ(idx + 1, utf16_buffer,
12807                                  160 + (SvCUR(utf16_buffer) & 1));
12808             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12809             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12810             if (status < 0) {
12811                 /* Error */
12812                 IoPAGE(filter) = status;
12813                 return status;
12814             }
12815         }
12816 
12817         /* 'chars' isn't quite the right name, as code points above 0xFFFF
12818          * require 4 bytes per char */
12819         chars = SvCUR(utf16_buffer) >> 1;
12820         have = SvCUR(utf8_buffer);
12821 
12822         /* Assume the worst case size as noted by the functions: twice the
12823          * number of input bytes */
12824         SvGROW(utf8_buffer, have + chars * 4 + 1);
12825 
12826         if (reverse) {
12827             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12828                                          (U8*)SvPVX_const(utf8_buffer) + have,
12829                                          chars * 2, &newlen);
12830         } else {
12831             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12832                                 (U8*)SvPVX_const(utf8_buffer) + have,
12833                                 chars * 2, &newlen);
12834         }
12835         SvCUR_set(utf8_buffer, have + newlen);
12836         *end = '\0';
12837 
12838         /* No need to keep this SV "well-formed" with a '\0' after the end, as
12839            it's private to us, and utf16_to_utf8{,reversed} take a
12840            (pointer,length) pair, rather than a NUL-terminated string.  */
12841         if(SvCUR(utf16_buffer) & 1) {
12842             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12843             SvCUR_set(utf16_buffer, 1);
12844         } else {
12845             SvCUR_set(utf16_buffer, 0);
12846         }
12847     }
12848     DEBUG_P(PerlIO_printf(Perl_debug_log,
12849                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12850                           status,
12851                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12852     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12853     return retval;
12854 }
12855 
12856 static U8 *
12857 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12858 {
12859     SV *filter = filter_add(S_utf16_textfilter, NULL);
12860 
12861     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12862 
12863     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12864     SvPVCLEAR(filter);
12865     IoLINES(filter) = reversed;
12866     IoPAGE(filter) = 1; /* Not EOF */
12867 
12868     /* Sadly, we have to return a valid pointer, come what may, so we have to
12869        ignore any error return from this.  */
12870     SvCUR_set(PL_linestr, 0);
12871     if (FILTER_READ(0, PL_linestr, 0)) {
12872         SvUTF8_on(PL_linestr);
12873     } else {
12874         SvUTF8_on(PL_linestr);
12875     }
12876     PL_bufend = SvEND(PL_linestr);
12877     return (U8*)SvPVX(PL_linestr);
12878 }
12879 #endif
12880 
12881 /*
12882 =for apidoc scan_vstring
12883 
12884 Returns a pointer to the next character after the parsed
12885 vstring, as well as updating the passed in sv.
12886 
12887 Function must be called like
12888 
12889         sv = sv_2mortal(newSV(5));
12890         s = scan_vstring(s,e,sv);
12891 
12892 where s and e are the start and end of the string.
12893 The sv should already be large enough to store the vstring
12894 passed in, for performance reasons.
12895 
12896 This function may croak if fatal warnings are enabled in the
12897 calling scope, hence the sv_2mortal in the example (to prevent
12898 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
12899 sv_2mortal.
12900 
12901 =cut
12902 */
12903 
12904 char *
12905 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12906 {
12907     const char *pos = s;
12908     const char *start = s;
12909 
12910     PERL_ARGS_ASSERT_SCAN_VSTRING;
12911 
12912     if (*pos == 'v') pos++;  /* get past 'v' */
12913     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12914         pos++;
12915     if ( *pos != '.') {
12916         /* this may not be a v-string if followed by => */
12917         const char *next = pos;
12918         while (next < e && isSPACE(*next))
12919             ++next;
12920         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12921             /* return string not v-string */
12922             sv_setpvn(sv,(char *)s,pos-s);
12923             return (char *)pos;
12924         }
12925     }
12926 
12927     if (!isALPHA(*pos)) {
12928         U8 tmpbuf[UTF8_MAXBYTES+1];
12929 
12930         if (*s == 'v')
12931             s++;  /* get past 'v' */
12932 
12933         SvPVCLEAR(sv);
12934 
12935         for (;;) {
12936             /* this is atoi() that tolerates underscores */
12937             U8 *tmpend;
12938             UV rev = 0;
12939             const char *end = pos;
12940             UV mult = 1;
12941             while (--end >= s) {
12942                 if (*end != '_') {
12943                     const UV orev = rev;
12944                     rev += (*end - '0') * mult;
12945                     mult *= 10;
12946                     if (orev > rev)
12947                         /* diag_listed_as: Integer overflow in %s number */
12948                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12949                                          "Integer overflow in decimal number");
12950                 }
12951             }
12952 
12953             /* Append native character for the rev point */
12954             tmpend = uvchr_to_utf8(tmpbuf, rev);
12955             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12956             if (!UVCHR_IS_INVARIANT(rev))
12957                  SvUTF8_on(sv);
12958             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12959                  s = ++pos;
12960             else {
12961                  s = pos;
12962                  break;
12963             }
12964             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12965                  pos++;
12966         }
12967         SvPOK_on(sv);
12968         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12969         SvRMAGICAL_on(sv);
12970     }
12971     return (char *)s;
12972 }
12973 
12974 int
12975 Perl_keyword_plugin_standard(pTHX_
12976         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12977 {
12978     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12979     PERL_UNUSED_CONTEXT;
12980     PERL_UNUSED_ARG(keyword_ptr);
12981     PERL_UNUSED_ARG(keyword_len);
12982     PERL_UNUSED_ARG(op_ptr);
12983     return KEYWORD_PLUGIN_DECLINE;
12984 }
12985 
12986 /*
12987 =for apidoc_section $lexer
12988 =for apidoc wrap_keyword_plugin
12989 
12990 Puts a C function into the chain of keyword plugins.  This is the
12991 preferred way to manipulate the L</PL_keyword_plugin> variable.
12992 C<new_plugin> is a pointer to the C function that is to be added to the
12993 keyword plugin chain, and C<old_plugin_p> points to the storage location
12994 where a pointer to the next function in the chain will be stored.  The
12995 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12996 while the value previously stored there is written to C<*old_plugin_p>.
12997 
12998 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12999 to hook keyword parsing may find itself invoked more than once per
13000 process, typically in different threads.  To handle that situation, this
13001 function is idempotent.  The location C<*old_plugin_p> must initially
13002 (once per process) contain a null pointer.  A C variable of static
13003 duration (declared at file scope, typically also marked C<static> to give
13004 it internal linkage) will be implicitly initialised appropriately, if it
13005 does not have an explicit initialiser.  This function will only actually
13006 modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
13007 function is also thread safe on the small scale.  It uses appropriate
13008 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
13009 
13010 When this function is called, the function referenced by C<new_plugin>
13011 must be ready to be called, except for C<*old_plugin_p> being unfilled.
13012 In a threading situation, C<new_plugin> may be called immediately, even
13013 before this function has returned.  C<*old_plugin_p> will always be
13014 appropriately set before C<new_plugin> is called.  If C<new_plugin>
13015 decides not to do anything special with the identifier that it is given
13016 (which is the usual case for most calls to a keyword plugin), it must
13017 chain the plugin function referenced by C<*old_plugin_p>.
13018 
13019 Taken all together, XS code to install a keyword plugin should typically
13020 look something like this:
13021 
13022     static Perl_keyword_plugin_t next_keyword_plugin;
13023     static OP *my_keyword_plugin(pTHX_
13024         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13025     {
13026         if (memEQs(keyword_ptr, keyword_len,
13027                    "my_new_keyword")) {
13028             ...
13029         } else {
13030             return next_keyword_plugin(aTHX_
13031                 keyword_ptr, keyword_len, op_ptr);
13032         }
13033     }
13034     BOOT:
13035         wrap_keyword_plugin(my_keyword_plugin,
13036                             &next_keyword_plugin);
13037 
13038 Direct access to L</PL_keyword_plugin> should be avoided.
13039 
13040 =cut
13041 */
13042 
13043 void
13044 Perl_wrap_keyword_plugin(pTHX_
13045     Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
13046 {
13047 
13048     PERL_UNUSED_CONTEXT;
13049     PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
13050     if (*old_plugin_p) return;
13051     KEYWORD_PLUGIN_MUTEX_LOCK;
13052     if (!*old_plugin_p) {
13053         *old_plugin_p = PL_keyword_plugin;
13054         PL_keyword_plugin = new_plugin;
13055     }
13056     KEYWORD_PLUGIN_MUTEX_UNLOCK;
13057 }
13058 
13059 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
13060 static void
13061 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
13062 {
13063     SAVEI32(PL_lex_brackets);
13064     if (PL_lex_brackets > 100)
13065         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
13066     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
13067     SAVEI32(PL_lex_allbrackets);
13068     PL_lex_allbrackets = 0;
13069     SAVEI8(PL_lex_fakeeof);
13070     PL_lex_fakeeof = (U8)fakeeof;
13071     if(yyparse(gramtype) && !PL_parser->error_count)
13072         qerror(Perl_mess(aTHX_ "Parse error"));
13073 }
13074 
13075 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
13076 static OP *
13077 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
13078 {
13079     OP *o;
13080     ENTER;
13081     SAVEVPTR(PL_eval_root);
13082     PL_eval_root = NULL;
13083     parse_recdescent(gramtype, fakeeof);
13084     o = PL_eval_root;
13085     LEAVE;
13086     return o;
13087 }
13088 
13089 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
13090 static OP *
13091 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
13092 {
13093     OP *exprop;
13094     if (flags & ~PARSE_OPTIONAL)
13095         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
13096     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
13097     if (!exprop && !(flags & PARSE_OPTIONAL)) {
13098         if (!PL_parser->error_count)
13099             qerror(Perl_mess(aTHX_ "Parse error"));
13100         exprop = newOP(OP_NULL, 0);
13101     }
13102     return exprop;
13103 }
13104 
13105 /*
13106 =for apidoc parse_arithexpr
13107 
13108 Parse a Perl arithmetic expression.  This may contain operators of precedence
13109 down to the bit shift operators.  The expression must be followed (and thus
13110 terminated) either by a comparison or lower-precedence operator or by
13111 something that would normally terminate an expression such as semicolon.
13112 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13113 otherwise it is mandatory.  It is up to the caller to ensure that the
13114 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13115 the source of the code to be parsed and the lexical context for the
13116 expression.
13117 
13118 The op tree representing the expression is returned.  If an optional
13119 expression is absent, a null pointer is returned, otherwise the pointer
13120 will be non-null.
13121 
13122 If an error occurs in parsing or compilation, in most cases a valid op
13123 tree is returned anyway.  The error is reflected in the parser state,
13124 normally resulting in a single exception at the top level of parsing
13125 which covers all the compilation errors that occurred.  Some compilation
13126 errors, however, will throw an exception immediately.
13127 
13128 =for apidoc Amnh||PARSE_OPTIONAL
13129 
13130 =cut
13131 
13132 */
13133 
13134 OP *
13135 Perl_parse_arithexpr(pTHX_ U32 flags)
13136 {
13137     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
13138 }
13139 
13140 /*
13141 =for apidoc parse_termexpr
13142 
13143 Parse a Perl term expression.  This may contain operators of precedence
13144 down to the assignment operators.  The expression must be followed (and thus
13145 terminated) either by a comma or lower-precedence operator or by
13146 something that would normally terminate an expression such as semicolon.
13147 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13148 otherwise it is mandatory.  It is up to the caller to ensure that the
13149 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13150 the source of the code to be parsed and the lexical context for the
13151 expression.
13152 
13153 The op tree representing the expression is returned.  If an optional
13154 expression is absent, a null pointer is returned, otherwise the pointer
13155 will be non-null.
13156 
13157 If an error occurs in parsing or compilation, in most cases a valid op
13158 tree is returned anyway.  The error is reflected in the parser state,
13159 normally resulting in a single exception at the top level of parsing
13160 which covers all the compilation errors that occurred.  Some compilation
13161 errors, however, will throw an exception immediately.
13162 
13163 =cut
13164 */
13165 
13166 OP *
13167 Perl_parse_termexpr(pTHX_ U32 flags)
13168 {
13169     return parse_expr(LEX_FAKEEOF_COMMA, flags);
13170 }
13171 
13172 /*
13173 =for apidoc parse_listexpr
13174 
13175 Parse a Perl list expression.  This may contain operators of precedence
13176 down to the comma operator.  The expression must be followed (and thus
13177 terminated) either by a low-precedence logic operator such as C<or> or by
13178 something that would normally terminate an expression such as semicolon.
13179 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13180 otherwise it is mandatory.  It is up to the caller to ensure that the
13181 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13182 the source of the code to be parsed and the lexical context for the
13183 expression.
13184 
13185 The op tree representing the expression is returned.  If an optional
13186 expression is absent, a null pointer is returned, otherwise the pointer
13187 will be non-null.
13188 
13189 If an error occurs in parsing or compilation, in most cases a valid op
13190 tree is returned anyway.  The error is reflected in the parser state,
13191 normally resulting in a single exception at the top level of parsing
13192 which covers all the compilation errors that occurred.  Some compilation
13193 errors, however, will throw an exception immediately.
13194 
13195 =cut
13196 */
13197 
13198 OP *
13199 Perl_parse_listexpr(pTHX_ U32 flags)
13200 {
13201     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
13202 }
13203 
13204 /*
13205 =for apidoc parse_fullexpr
13206 
13207 Parse a single complete Perl expression.  This allows the full
13208 expression grammar, including the lowest-precedence operators such
13209 as C<or>.  The expression must be followed (and thus terminated) by a
13210 token that an expression would normally be terminated by: end-of-file,
13211 closing bracketing punctuation, semicolon, or one of the keywords that
13212 signals a postfix expression-statement modifier.  If C<flags> has the
13213 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
13214 mandatory.  It is up to the caller to ensure that the dynamic parser
13215 state (L</PL_parser> et al) is correctly set to reflect the source of
13216 the code to be parsed and the lexical context for the expression.
13217 
13218 The op tree representing the expression is returned.  If an optional
13219 expression is absent, a null pointer is returned, otherwise the pointer
13220 will be non-null.
13221 
13222 If an error occurs in parsing or compilation, in most cases a valid op
13223 tree is returned anyway.  The error is reflected in the parser state,
13224 normally resulting in a single exception at the top level of parsing
13225 which covers all the compilation errors that occurred.  Some compilation
13226 errors, however, will throw an exception immediately.
13227 
13228 =cut
13229 */
13230 
13231 OP *
13232 Perl_parse_fullexpr(pTHX_ U32 flags)
13233 {
13234     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
13235 }
13236 
13237 /*
13238 =for apidoc parse_block
13239 
13240 Parse a single complete Perl code block.  This consists of an opening
13241 brace, a sequence of statements, and a closing brace.  The block
13242 constitutes a lexical scope, so C<my> variables and various compile-time
13243 effects can be contained within it.  It is up to the caller to ensure
13244 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13245 reflect the source of the code to be parsed and the lexical context for
13246 the statement.
13247 
13248 The op tree representing the code block is returned.  This is always a
13249 real op, never a null pointer.  It will normally be a C<lineseq> list,
13250 including C<nextstate> or equivalent ops.  No ops to construct any kind
13251 of runtime scope are included by virtue of it being a block.
13252 
13253 If an error occurs in parsing or compilation, in most cases a valid op
13254 tree (most likely null) is returned anyway.  The error is reflected in
13255 the parser state, normally resulting in a single exception at the top
13256 level of parsing which covers all the compilation errors that occurred.
13257 Some compilation errors, however, will throw an exception immediately.
13258 
13259 The C<flags> parameter is reserved for future use, and must always
13260 be zero.
13261 
13262 =cut
13263 */
13264 
13265 OP *
13266 Perl_parse_block(pTHX_ U32 flags)
13267 {
13268     if (flags)
13269         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13270     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13271 }
13272 
13273 /*
13274 =for apidoc parse_barestmt
13275 
13276 Parse a single unadorned Perl statement.  This may be a normal imperative
13277 statement or a declaration that has compile-time effect.  It does not
13278 include any label or other affixture.  It is up to the caller to ensure
13279 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13280 reflect the source of the code to be parsed and the lexical context for
13281 the statement.
13282 
13283 The op tree representing the statement is returned.  This may be a
13284 null pointer if the statement is null, for example if it was actually
13285 a subroutine definition (which has compile-time side effects).  If not
13286 null, it will be ops directly implementing the statement, suitable to
13287 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
13288 equivalent op (except for those embedded in a scope contained entirely
13289 within the statement).
13290 
13291 If an error occurs in parsing or compilation, in most cases a valid op
13292 tree (most likely null) is returned anyway.  The error is reflected in
13293 the parser state, normally resulting in a single exception at the top
13294 level of parsing which covers all the compilation errors that occurred.
13295 Some compilation errors, however, will throw an exception immediately.
13296 
13297 The C<flags> parameter is reserved for future use, and must always
13298 be zero.
13299 
13300 =cut
13301 */
13302 
13303 OP *
13304 Perl_parse_barestmt(pTHX_ U32 flags)
13305 {
13306     if (flags)
13307         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13308     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13309 }
13310 
13311 /*
13312 =for apidoc parse_label
13313 
13314 Parse a single label, possibly optional, of the type that may prefix a
13315 Perl statement.  It is up to the caller to ensure that the dynamic parser
13316 state (L</PL_parser> et al) is correctly set to reflect the source of
13317 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13318 label is optional, otherwise it is mandatory.
13319 
13320 The name of the label is returned in the form of a fresh scalar.  If an
13321 optional label is absent, a null pointer is returned.
13322 
13323 If an error occurs in parsing, which can only occur if the label is
13324 mandatory, a valid label is returned anyway.  The error is reflected in
13325 the parser state, normally resulting in a single exception at the top
13326 level of parsing which covers all the compilation errors that occurred.
13327 
13328 =cut
13329 */
13330 
13331 SV *
13332 Perl_parse_label(pTHX_ U32 flags)
13333 {
13334     if (flags & ~PARSE_OPTIONAL)
13335         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13336     if (PL_nexttoke) {
13337         PL_parser->yychar = yylex();
13338         if (PL_parser->yychar == LABEL) {
13339             SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13340             PL_parser->yychar = YYEMPTY;
13341             cSVOPx(pl_yylval.opval)->op_sv = NULL;
13342             op_free(pl_yylval.opval);
13343             return labelsv;
13344         } else {
13345             yyunlex();
13346             goto no_label;
13347         }
13348     } else {
13349         char *s, *t;
13350         STRLEN wlen, bufptr_pos;
13351         lex_read_space(0);
13352         t = s = PL_bufptr;
13353         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13354             goto no_label;
13355         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
13356         if (word_takes_any_delimiter(s, wlen))
13357             goto no_label;
13358         bufptr_pos = s - SvPVX(PL_linestr);
13359         PL_bufptr = t;
13360         lex_read_space(LEX_KEEP_PREVIOUS);
13361         t = PL_bufptr;
13362         s = SvPVX(PL_linestr) + bufptr_pos;
13363         if (t[0] == ':' && t[1] != ':') {
13364             PL_oldoldbufptr = PL_oldbufptr;
13365             PL_oldbufptr = s;
13366             PL_bufptr = t+1;
13367             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13368         } else {
13369             PL_bufptr = s;
13370             no_label:
13371             if (flags & PARSE_OPTIONAL) {
13372                 return NULL;
13373             } else {
13374                 qerror(Perl_mess(aTHX_ "Parse error"));
13375                 return newSVpvs("x");
13376             }
13377         }
13378     }
13379 }
13380 
13381 /*
13382 =for apidoc parse_fullstmt
13383 
13384 Parse a single complete Perl statement.  This may be a normal imperative
13385 statement or a declaration that has compile-time effect, and may include
13386 optional labels.  It is up to the caller to ensure that the dynamic
13387 parser state (L</PL_parser> et al) is correctly set to reflect the source
13388 of the code to be parsed and the lexical context for the statement.
13389 
13390 The op tree representing the statement is returned.  This may be a
13391 null pointer if the statement is null, for example if it was actually
13392 a subroutine definition (which has compile-time side effects).  If not
13393 null, it will be the result of a L</newSTATEOP> call, normally including
13394 a C<nextstate> or equivalent op.
13395 
13396 If an error occurs in parsing or compilation, in most cases a valid op
13397 tree (most likely null) is returned anyway.  The error is reflected in
13398 the parser state, normally resulting in a single exception at the top
13399 level of parsing which covers all the compilation errors that occurred.
13400 Some compilation errors, however, will throw an exception immediately.
13401 
13402 The C<flags> parameter is reserved for future use, and must always
13403 be zero.
13404 
13405 =cut
13406 */
13407 
13408 OP *
13409 Perl_parse_fullstmt(pTHX_ U32 flags)
13410 {
13411     if (flags)
13412         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13413     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13414 }
13415 
13416 /*
13417 =for apidoc parse_stmtseq
13418 
13419 Parse a sequence of zero or more Perl statements.  These may be normal
13420 imperative statements, including optional labels, or declarations
13421 that have compile-time effect, or any mixture thereof.  The statement
13422 sequence ends when a closing brace or end-of-file is encountered in a
13423 place where a new statement could have validly started.  It is up to
13424 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13425 is correctly set to reflect the source of the code to be parsed and the
13426 lexical context for the statements.
13427 
13428 The op tree representing the statement sequence is returned.  This may
13429 be a null pointer if the statements were all null, for example if there
13430 were no statements or if there were only subroutine definitions (which
13431 have compile-time side effects).  If not null, it will be a C<lineseq>
13432 list, normally including C<nextstate> or equivalent ops.
13433 
13434 If an error occurs in parsing or compilation, in most cases a valid op
13435 tree is returned anyway.  The error is reflected in the parser state,
13436 normally resulting in a single exception at the top level of parsing
13437 which covers all the compilation errors that occurred.  Some compilation
13438 errors, however, will throw an exception immediately.
13439 
13440 The C<flags> parameter is reserved for future use, and must always
13441 be zero.
13442 
13443 =cut
13444 */
13445 
13446 OP *
13447 Perl_parse_stmtseq(pTHX_ U32 flags)
13448 {
13449     OP *stmtseqop;
13450     I32 c;
13451     if (flags)
13452         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13453     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13454     c = lex_peek_unichar(0);
13455     if (c != -1 && c != /*{*/'}')
13456         qerror(Perl_mess(aTHX_ "Parse error"));
13457     return stmtseqop;
13458 }
13459 
13460 /*
13461 =for apidoc parse_subsignature
13462 
13463 Parse a subroutine signature declaration. This is the contents of the
13464 parentheses following a named or anonymous subroutine declaration when the
13465 C<signatures> feature is enabled. Note that this function neither expects
13466 nor consumes the opening and closing parentheses around the signature; it
13467 is the caller's job to handle these.
13468 
13469 This function must only be called during parsing of a subroutine; after
13470 L</start_subparse> has been called. It might allocate lexical variables on
13471 the pad for the current subroutine.
13472 
13473 The op tree to unpack the arguments from the stack at runtime is returned.
13474 This op tree should appear at the beginning of the compiled function. The
13475 caller may wish to use L</op_append_list> to build their function body
13476 after it, or splice it together with the body before calling L</newATTRSUB>.
13477 
13478 The C<flags> parameter is reserved for future use, and must always
13479 be zero.
13480 
13481 =cut
13482 */
13483 
13484 OP *
13485 Perl_parse_subsignature(pTHX_ U32 flags)
13486 {
13487     if (flags)
13488         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13489     return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13490 }
13491 
13492 /*
13493  * ex: set ts=8 sts=4 sw=4 et:
13494  */
13495