1 /*    toke.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  *  'It all comes from here, the stench and the peril.'    --Frodo
13  *
14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15  */
16 
17 /*
18  * This file is the lexer for Perl.  It's closely linked to the
19  * parser, perly.y.
20  *
21  * The main routine is yylex(), which returns the next token.
22  */
23 
24 /*
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
27 
28 =for apidoc AmnU|yy_parser *|PL_parser
29 
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress.  The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
34 
35 =cut
36 */
37 
38 #include "EXTERN.h"
39 #define PERL_IN_TOKE_C
40 #include "perl.h"
41 #include "invlist_inline.h"
42 
43 #define new_constant(a,b,c,d,e,f,g, h)	\
44 	S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
45 
46 #define pl_yylval	(PL_parser->yylval)
47 
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets		(PL_parser->lex_brackets)
50 #define PL_lex_allbrackets	(PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof		(PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack	(PL_parser->lex_brackstack)
53 #define PL_lex_casemods		(PL_parser->lex_casemods)
54 #define PL_lex_casestack        (PL_parser->lex_casestack)
55 #define PL_lex_dojoin		(PL_parser->lex_dojoin)
56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
57 #define PL_lex_inpat		(PL_parser->lex_inpat)
58 #define PL_lex_inwhat		(PL_parser->lex_inwhat)
59 #define PL_lex_op		(PL_parser->lex_op)
60 #define PL_lex_repl		(PL_parser->lex_repl)
61 #define PL_lex_starts		(PL_parser->lex_starts)
62 #define PL_lex_stuff		(PL_parser->lex_stuff)
63 #define PL_multi_start		(PL_parser->multi_start)
64 #define PL_multi_open		(PL_parser->multi_open)
65 #define PL_multi_close		(PL_parser->multi_close)
66 #define PL_preambled		(PL_parser->preambled)
67 #define PL_linestr		(PL_parser->linestr)
68 #define PL_expect		(PL_parser->expect)
69 #define PL_copline		(PL_parser->copline)
70 #define PL_bufptr		(PL_parser->bufptr)
71 #define PL_oldbufptr		(PL_parser->oldbufptr)
72 #define PL_oldoldbufptr		(PL_parser->oldoldbufptr)
73 #define PL_linestart		(PL_parser->linestart)
74 #define PL_bufend		(PL_parser->bufend)
75 #define PL_last_uni		(PL_parser->last_uni)
76 #define PL_last_lop		(PL_parser->last_lop)
77 #define PL_last_lop_op		(PL_parser->last_lop_op)
78 #define PL_lex_state		(PL_parser->lex_state)
79 #define PL_rsfp			(PL_parser->rsfp)
80 #define PL_rsfp_filters		(PL_parser->rsfp_filters)
81 #define PL_in_my		(PL_parser->in_my)
82 #define PL_in_my_stash		(PL_parser->in_my_stash)
83 #define PL_tokenbuf		(PL_parser->tokenbuf)
84 #define PL_multi_end		(PL_parser->multi_end)
85 #define PL_error_count		(PL_parser->error_count)
86 
87 #  define PL_nexttoke		(PL_parser->nexttoke)
88 #  define PL_nexttype		(PL_parser->nexttype)
89 #  define PL_nextval		(PL_parser->nextval)
90 
91 
92 #define SvEVALED(sv) \
93     (SvTYPE(sv) >= SVt_PVNV \
94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
95 
96 static const char* const ident_too_long = "Identifier too long";
97 static const char* const ident_var_zero_multi_digit = "Numeric variables with more than one digit may not start with '0'";
98 
99 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
100 
101 #define XENUMMASK  0x3f
102 #define XFAKEEOF   0x40
103 #define XFAKEBRACK 0x80
104 
105 #ifdef USE_UTF8_SCRIPTS
106 #   define UTF cBOOL(!IN_BYTES)
107 #else
108 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
109 #endif
110 
111 /* The maximum number of characters preceding the unrecognized one to display */
112 #define UNRECOGNIZED_PRECEDE_COUNT 10
113 
114 /* In variables named $^X, these are the legal values for X.
115  * 1999-02-27 mjd-perl-patch@plover.com */
116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
117 
118 #define SPACE_OR_TAB(c) isBLANK_A(c)
119 
120 #define HEXFP_PEEK(s)     \
121     (((s[0] == '.') && \
122       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
123      isALPHA_FOLD_EQ(s[0], 'p'))
124 
125 /* LEX_* are values for PL_lex_state, the state of the lexer.
126  * They are arranged oddly so that the guard on the switch statement
127  * can get by with a single comparison (if the compiler is smart enough).
128  *
129  * These values refer to the various states within a sublex parse,
130  * i.e. within a double quotish string
131  */
132 
133 /* #define LEX_NOTPARSING		11 is done in perl.h. */
134 
135 #define LEX_NORMAL		10 /* normal code (ie not within "...")     */
136 #define LEX_INTERPNORMAL	 9 /* code within a string, eg "$foo[$x+1]" */
137 #define LEX_INTERPCASEMOD	 8 /* expecting a \U, \Q or \E etc          */
138 #define LEX_INTERPPUSH		 7 /* starting a new sublex parse level     */
139 #define LEX_INTERPSTART		 6 /* expecting the start of a $var         */
140 
141 				   /* at end of code, eg "$x" followed by:  */
142 #define LEX_INTERPEND		 5 /* ... eg not one of [, { or ->          */
143 #define LEX_INTERPENDMAYBE	 4 /* ... eg one of [, { or ->              */
144 
145 #define LEX_INTERPCONCAT	 3 /* expecting anything, eg at start of
146 				        string or after \E, $foo, etc       */
147 #define LEX_INTERPCONST		 2 /* NOT USED */
148 #define LEX_FORMLINE		 1 /* expecting a format line               */
149 
150 /* returned to yyl_try() to request it to retry the parse loop, expected to only
151    be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
152    can also return it.
153 
154    yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
155    other token values are 258 or higher (see perly.h), so -1 should be
156    a safe value here.
157 */
158 #define YYL_RETRY (-1)
159 
160 #ifdef DEBUGGING
161 static const char* const lex_state_names[] = {
162     "KNOWNEXT",
163     "FORMLINE",
164     "INTERPCONST",
165     "INTERPCONCAT",
166     "INTERPENDMAYBE",
167     "INTERPEND",
168     "INTERPSTART",
169     "INTERPPUSH",
170     "INTERPCASEMOD",
171     "INTERPNORMAL",
172     "NORMAL"
173 };
174 #endif
175 
176 #include "keywords.h"
177 
178 /* CLINE is a macro that ensures PL_copline has a sane value */
179 
180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
181 
182 /*
183  * Convenience functions to return different tokens and prime the
184  * lexer for the next token.  They all take an argument.
185  *
186  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
187  * OPERATOR     : generic operator
188  * AOPERATOR    : assignment operator
189  * PREBLOCK     : beginning the block after an if, while, foreach, ...
190  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
191  * PREREF       : *EXPR where EXPR is not a simple identifier
192  * TERM         : expression term
193  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
194  * LOOPX        : loop exiting command (goto, last, dump, etc)
195  * FTST         : file test operator
196  * FUN0         : zero-argument function
197  * FUN0OP       : zero-argument function, with its op created in this file
198  * FUN1         : not used, except for not, which isn't a UNIOP
199  * BOop         : bitwise or or xor
200  * BAop         : bitwise and
201  * BCop         : bitwise complement
202  * SHop         : shift operator
203  * PWop         : power operator
204  * PMop         : pattern-matching operator
205  * Aop          : addition-level operator
206  * AopNOASSIGN  : addition-level operator that is never part of .=
207  * Mop          : multiplication-level operator
208  * ChEop        : chaining equality-testing operator
209  * NCEop        : non-chaining comparison operator at equality precedence
210  * ChRop        : chaining relational operator <= != gt
211  * NCRop        : non-chaining relational operator isa
212  *
213  * Also see LOP and lop() below.
214  */
215 
216 #ifdef DEBUGGING /* Serve -DT. */
217 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
218 #else
219 #   define REPORT(retval) (retval)
220 #endif
221 
222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
231 			 pl_yylval.ival=f, \
232 			 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
233 			 REPORT((int)LOOPEX))
234 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
235 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
236 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
237 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
238 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
239 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
241 		       REPORT('~')
242 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
243 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
244 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
245 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
247 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
252 
253 /* This bit of chicanery makes a unary function followed by
254  * a parenthesis into a function with one argument, highest precedence.
255  * The UNIDOR macro is for unary functions that can be followed by the //
256  * operator (such as C<shift // 0>).
257  */
258 #define UNI3(f,x,have_x) { \
259 	pl_yylval.ival = f; \
260 	if (have_x) PL_expect = x; \
261 	PL_bufptr = s; \
262 	PL_last_uni = PL_oldbufptr; \
263 	PL_last_lop_op = (f) < 0 ? -(f) : (f); \
264 	if (*s == '(') \
265 	    return REPORT( (int)FUNC1 ); \
266 	s = skipspace(s); \
267 	return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
268 	}
269 #define UNI(f)    UNI3(f,XTERM,1)
270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
271 #define UNIPROTO(f,optional) { \
272 	if (optional) PL_last_uni = PL_oldbufptr; \
273 	OPERATOR(f); \
274 	}
275 
276 #define UNIBRACK(f) UNI3(f,0,0)
277 
278 /* grandfather return to old style */
279 #define OLDLOP(f) \
280 	do { \
281 	    if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
282 		PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
283 	    pl_yylval.ival = (f); \
284 	    PL_expect = XTERM; \
285 	    PL_bufptr = s; \
286 	    return (int)LSTOP; \
287 	} while(0)
288 
289 #define COPLINE_INC_WITH_HERELINES		    \
290     STMT_START {				     \
291 	CopLINE_inc(PL_curcop);			      \
292 	if (PL_parser->herelines)		       \
293 	    CopLINE(PL_curcop) += PL_parser->herelines, \
294 	    PL_parser->herelines = 0;			 \
295     } STMT_END
296 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
297  * is no sublex_push to follow. */
298 #define COPLINE_SET_FROM_MULTI_END	      \
299     STMT_START {			       \
300 	CopLINE_set(PL_curcop, PL_multi_end);	\
301 	if (PL_multi_end != PL_multi_start)	 \
302 	    PL_parser->herelines = 0;		  \
303     } STMT_END
304 
305 
306 /* A file-local structure for passing around information about subroutines and
307  * related definable words */
308 struct code {
309     SV *sv;
310     CV *cv;
311     GV *gv, **gvp;
312     OP *rv2cv_op;
313     PADOFFSET off;
314     bool lex;
315 };
316 
317 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
318 
319 #ifdef DEBUGGING
320 
321 /* how to interpret the pl_yylval associated with the token */
322 enum token_type {
323     TOKENTYPE_NONE,
324     TOKENTYPE_IVAL,
325     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
326     TOKENTYPE_PVAL,
327     TOKENTYPE_OPVAL
328 };
329 
330 static struct debug_tokens {
331     const int token;
332     enum token_type type;
333     const char *name;
334 } const debug_tokens[] =
335 {
336     { ADDOP,		TOKENTYPE_OPNUM,	"ADDOP" },
337     { ANDAND,		TOKENTYPE_NONE,		"ANDAND" },
338     { ANDOP,		TOKENTYPE_NONE,		"ANDOP" },
339     { ANONSUB,		TOKENTYPE_IVAL,		"ANONSUB" },
340     { ANON_SIGSUB,	TOKENTYPE_IVAL,		"ANON_SIGSUB" },
341     { ARROW,		TOKENTYPE_NONE,		"ARROW" },
342     { ASSIGNOP,		TOKENTYPE_OPNUM,	"ASSIGNOP" },
343     { BITANDOP,		TOKENTYPE_OPNUM,	"BITANDOP" },
344     { BITOROP,		TOKENTYPE_OPNUM,	"BITOROP" },
345     { CHEQOP,		TOKENTYPE_OPNUM,	"CHEQOP" },
346     { CHRELOP,		TOKENTYPE_OPNUM,	"CHRELOP" },
347     { COLONATTR,	TOKENTYPE_NONE,		"COLONATTR" },
348     { CONTINUE,		TOKENTYPE_NONE,		"CONTINUE" },
349     { DEFAULT,		TOKENTYPE_NONE,		"DEFAULT" },
350     { DO,		TOKENTYPE_NONE,		"DO" },
351     { DOLSHARP,		TOKENTYPE_NONE,		"DOLSHARP" },
352     { DORDOR,		TOKENTYPE_NONE,		"DORDOR" },
353     { DOROP,		TOKENTYPE_OPNUM,	"DOROP" },
354     { DOTDOT,		TOKENTYPE_IVAL,		"DOTDOT" },
355     { ELSE,		TOKENTYPE_NONE,		"ELSE" },
356     { ELSIF,		TOKENTYPE_IVAL,		"ELSIF" },
357     { FOR,		TOKENTYPE_IVAL,		"FOR" },
358     { FORMAT,		TOKENTYPE_NONE,		"FORMAT" },
359     { FORMLBRACK,	TOKENTYPE_NONE,		"FORMLBRACK" },
360     { FORMRBRACK,	TOKENTYPE_NONE,		"FORMRBRACK" },
361     { FUNC,		TOKENTYPE_OPNUM,	"FUNC" },
362     { FUNC0,		TOKENTYPE_OPNUM,	"FUNC0" },
363     { FUNC0OP,		TOKENTYPE_OPVAL,	"FUNC0OP" },
364     { FUNC0SUB,		TOKENTYPE_OPVAL,	"FUNC0SUB" },
365     { FUNC1,		TOKENTYPE_OPNUM,	"FUNC1" },
366     { FUNCMETH,		TOKENTYPE_OPVAL,	"FUNCMETH" },
367     { GIVEN,		TOKENTYPE_IVAL,		"GIVEN" },
368     { HASHBRACK,	TOKENTYPE_NONE,		"HASHBRACK" },
369     { IF,		TOKENTYPE_IVAL,		"IF" },
370     { LABEL,		TOKENTYPE_OPVAL,	"LABEL" },
371     { LOCAL,		TOKENTYPE_IVAL,		"LOCAL" },
372     { LOOPEX,		TOKENTYPE_OPNUM,	"LOOPEX" },
373     { LSTOP,		TOKENTYPE_OPNUM,	"LSTOP" },
374     { LSTOPSUB,		TOKENTYPE_OPVAL,	"LSTOPSUB" },
375     { MATCHOP,		TOKENTYPE_OPNUM,	"MATCHOP" },
376     { METHOD,		TOKENTYPE_OPVAL,	"METHOD" },
377     { MULOP,		TOKENTYPE_OPNUM,	"MULOP" },
378     { MY,		TOKENTYPE_IVAL,		"MY" },
379     { NCEQOP,		TOKENTYPE_OPNUM,	"NCEQOP" },
380     { NCRELOP,		TOKENTYPE_OPNUM,	"NCRELOP" },
381     { NOAMP,		TOKENTYPE_NONE,		"NOAMP" },
382     { NOTOP,		TOKENTYPE_NONE,		"NOTOP" },
383     { OROP,		TOKENTYPE_IVAL,		"OROP" },
384     { OROR,		TOKENTYPE_NONE,		"OROR" },
385     { PACKAGE,		TOKENTYPE_NONE,		"PACKAGE" },
386     { PLUGEXPR,		TOKENTYPE_OPVAL,	"PLUGEXPR" },
387     { PLUGSTMT,		TOKENTYPE_OPVAL,	"PLUGSTMT" },
388     { PMFUNC,		TOKENTYPE_OPVAL,	"PMFUNC" },
389     { POSTJOIN,		TOKENTYPE_NONE,		"POSTJOIN" },
390     { POSTDEC,		TOKENTYPE_NONE,		"POSTDEC" },
391     { POSTINC,		TOKENTYPE_NONE,		"POSTINC" },
392     { POWOP,		TOKENTYPE_OPNUM,	"POWOP" },
393     { PREDEC,		TOKENTYPE_NONE,		"PREDEC" },
394     { PREINC,		TOKENTYPE_NONE,		"PREINC" },
395     { PRIVATEREF,	TOKENTYPE_OPVAL,	"PRIVATEREF" },
396     { QWLIST,		TOKENTYPE_OPVAL,	"QWLIST" },
397     { REFGEN,		TOKENTYPE_NONE,		"REFGEN" },
398     { REQUIRE,		TOKENTYPE_NONE,		"REQUIRE" },
399     { SHIFTOP,		TOKENTYPE_OPNUM,	"SHIFTOP" },
400     { SIGSUB,		TOKENTYPE_NONE,		"SIGSUB" },
401     { SUB,		TOKENTYPE_NONE,		"SUB" },
402     { SUBLEXEND,	TOKENTYPE_NONE,		"SUBLEXEND" },
403     { SUBLEXSTART,	TOKENTYPE_NONE,		"SUBLEXSTART" },
404     { THING,		TOKENTYPE_OPVAL,	"THING" },
405     { UMINUS,		TOKENTYPE_NONE,		"UMINUS" },
406     { UNIOP,		TOKENTYPE_OPNUM,	"UNIOP" },
407     { UNIOPSUB,		TOKENTYPE_OPVAL,	"UNIOPSUB" },
408     { UNLESS,		TOKENTYPE_IVAL,		"UNLESS" },
409     { UNTIL,		TOKENTYPE_IVAL,		"UNTIL" },
410     { USE,		TOKENTYPE_IVAL,		"USE" },
411     { WHEN,		TOKENTYPE_IVAL,		"WHEN" },
412     { WHILE,		TOKENTYPE_IVAL,		"WHILE" },
413     { BAREWORD,		TOKENTYPE_OPVAL,	"BAREWORD" },
414     { YADAYADA,		TOKENTYPE_IVAL,		"YADAYADA" },
415     { 0,		TOKENTYPE_NONE,		NULL }
416 };
417 
418 /* dump the returned token in rv, plus any optional arg in pl_yylval */
419 
420 STATIC int
S_tokereport(pTHX_ I32 rv,const YYSTYPE * lvalp)421 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
422 {
423     PERL_ARGS_ASSERT_TOKEREPORT;
424 
425     if (DEBUG_T_TEST) {
426 	const char *name = NULL;
427 	enum token_type type = TOKENTYPE_NONE;
428 	const struct debug_tokens *p;
429 	SV* const report = newSVpvs("<== ");
430 
431 	for (p = debug_tokens; p->token; p++) {
432 	    if (p->token == (int)rv) {
433 		name = p->name;
434 		type = p->type;
435 		break;
436 	    }
437 	}
438 	if (name)
439 	    Perl_sv_catpv(aTHX_ report, name);
440 	else if (isGRAPH(rv))
441 	{
442 	    Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
443 	    if ((char)rv == 'p')
444 		sv_catpvs(report, " (pending identifier)");
445 	}
446 	else if (!rv)
447 	    sv_catpvs(report, "EOF");
448 	else
449 	    Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
450 	switch (type) {
451 	case TOKENTYPE_NONE:
452 	    break;
453 	case TOKENTYPE_IVAL:
454 	    Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
455 	    break;
456 	case TOKENTYPE_OPNUM:
457 	    Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
458 				    PL_op_name[lvalp->ival]);
459 	    break;
460 	case TOKENTYPE_PVAL:
461 	    Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
462 	    break;
463 	case TOKENTYPE_OPVAL:
464 	    if (lvalp->opval) {
465 		Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
466 				    PL_op_name[lvalp->opval->op_type]);
467 		if (lvalp->opval->op_type == OP_CONST) {
468 		    Perl_sv_catpvf(aTHX_ report, " %s",
469 			SvPEEK(cSVOPx_sv(lvalp->opval)));
470 		}
471 
472 	    }
473 	    else
474 		sv_catpvs(report, "(opval=null)");
475 	    break;
476 	}
477         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
478     };
479     return (int)rv;
480 }
481 
482 
483 /* print the buffer with suitable escapes */
484 
485 STATIC void
S_printbuf(pTHX_ const char * const fmt,const char * const s)486 S_printbuf(pTHX_ const char *const fmt, const char *const s)
487 {
488     SV* const tmp = newSVpvs("");
489 
490     PERL_ARGS_ASSERT_PRINTBUF;
491 
492     GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
493     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
494     GCC_DIAG_RESTORE_STMT;
495     SvREFCNT_dec(tmp);
496 }
497 
498 #endif
499 
500 /*
501  * S_ao
502  *
503  * This subroutine looks for an '=' next to the operator that has just been
504  * parsed and turns it into an ASSIGNOP if it finds one.
505  */
506 
507 STATIC int
S_ao(pTHX_ int toketype)508 S_ao(pTHX_ int toketype)
509 {
510     if (*PL_bufptr == '=') {
511 	PL_bufptr++;
512 	if (toketype == ANDAND)
513 	    pl_yylval.ival = OP_ANDASSIGN;
514 	else if (toketype == OROR)
515 	    pl_yylval.ival = OP_ORASSIGN;
516 	else if (toketype == DORDOR)
517 	    pl_yylval.ival = OP_DORASSIGN;
518 	toketype = ASSIGNOP;
519     }
520     return REPORT(toketype);
521 }
522 
523 /*
524  * S_no_op
525  * When Perl expects an operator and finds something else, no_op
526  * prints the warning.  It always prints "<something> found where
527  * operator expected.  It prints "Missing semicolon on previous line?"
528  * if the surprise occurs at the start of the line.  "do you need to
529  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
530  * where the compiler doesn't know if foo is a method call or a function.
531  * It prints "Missing operator before end of line" if there's nothing
532  * after the missing operator, or "... before <...>" if there is something
533  * after the missing operator.
534  *
535  * PL_bufptr is expected to point to the start of the thing that was found,
536  * and s after the next token or partial token.
537  */
538 
539 STATIC void
S_no_op(pTHX_ const char * const what,char * s)540 S_no_op(pTHX_ const char *const what, char *s)
541 {
542     char * const oldbp = PL_bufptr;
543     const bool is_first = (PL_oldbufptr == PL_linestart);
544 
545     PERL_ARGS_ASSERT_NO_OP;
546 
547     if (!s)
548 	s = oldbp;
549     else
550 	PL_bufptr = s;
551     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
552     if (ckWARN_d(WARN_SYNTAX)) {
553 	if (is_first)
554 	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
555 		    "\t(Missing semicolon on previous line?)\n");
556         else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
557                                                            PL_bufend,
558                                                            UTF))
559         {
560 	    const char *t;
561 	    for (t = PL_oldoldbufptr;
562                  (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
563                  t += UTF ? UTF8SKIP(t) : 1)
564             {
565 		NOOP;
566             }
567 	    if (t < PL_bufptr && isSPACE(*t))
568 		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
569 			"\t(Do you need to predeclare %" UTF8f "?)\n",
570 		      UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
571 	}
572 	else {
573 	    assert(s >= oldbp);
574 	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
575 		    "\t(Missing operator before %" UTF8f "?)\n",
576 		     UTF8fARG(UTF, s - oldbp, oldbp));
577 	}
578     }
579     PL_bufptr = oldbp;
580 }
581 
582 /*
583  * S_missingterm
584  * Complain about missing quote/regexp/heredoc terminator.
585  * If it's called with NULL then it cauterizes the line buffer.
586  * If we're in a delimited string and the delimiter is a control
587  * character, it's reformatted into a two-char sequence like ^C.
588  * This is fatal.
589  */
590 
591 STATIC void
S_missingterm(pTHX_ char * s,STRLEN len)592 S_missingterm(pTHX_ char *s, STRLEN len)
593 {
594     char tmpbuf[UTF8_MAXBYTES + 1];
595     char q;
596     bool uni = FALSE;
597     SV *sv;
598     if (s) {
599 	char * const nl = (char *) my_memrchr(s, '\n', len);
600         if (nl) {
601             *nl = '\0';
602             len = nl - s;
603         }
604 	uni = UTF;
605     }
606     else if (PL_multi_close < 32) {
607 	*tmpbuf = '^';
608 	tmpbuf[1] = (char)toCTRL(PL_multi_close);
609 	tmpbuf[2] = '\0';
610 	s = tmpbuf;
611         len = 2;
612     }
613     else {
614 	if (LIKELY(PL_multi_close < 256)) {
615 	    *tmpbuf = (char)PL_multi_close;
616 	    tmpbuf[1] = '\0';
617             len = 1;
618 	}
619 	else {
620             char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
621             *end = '\0';
622             len = end - tmpbuf;
623 	    uni = TRUE;
624 	}
625 	s = tmpbuf;
626     }
627     q = memchr(s, '"', len) ? '\'' : '"';
628     sv = sv_2mortal(newSVpvn(s, len));
629     if (uni)
630 	SvUTF8_on(sv);
631     Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
632                      " anywhere before EOF", q, SVfARG(sv), q);
633 }
634 
635 #include "feature.h"
636 
637 /*
638  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
639  * utf16-to-utf8-reversed.
640  */
641 
642 #ifdef PERL_CR_FILTER
643 static void
strip_return(SV * sv)644 strip_return(SV *sv)
645 {
646     const char *s = SvPVX_const(sv);
647     const char * const e = s + SvCUR(sv);
648 
649     PERL_ARGS_ASSERT_STRIP_RETURN;
650 
651     /* outer loop optimized to do nothing if there are no CR-LFs */
652     while (s < e) {
653 	if (*s++ == '\r' && *s == '\n') {
654 	    /* hit a CR-LF, need to copy the rest */
655 	    char *d = s - 1;
656 	    *d++ = *s++;
657 	    while (s < e) {
658 		if (*s == '\r' && s[1] == '\n')
659 		    s++;
660 		*d++ = *s++;
661 	    }
662 	    SvCUR(sv) -= s - d;
663 	    return;
664 	}
665     }
666 }
667 
668 STATIC I32
S_cr_textfilter(pTHX_ int idx,SV * sv,int maxlen)669 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
670 {
671     const I32 count = FILTER_READ(idx+1, sv, maxlen);
672     if (count > 0 && !maxlen)
673 	strip_return(sv);
674     return count;
675 }
676 #endif
677 
678 /*
679 =for apidoc lex_start
680 
681 Creates and initialises a new lexer/parser state object, supplying
682 a context in which to lex and parse from a new source of Perl code.
683 A pointer to the new state object is placed in L</PL_parser>.  An entry
684 is made on the save stack so that upon unwinding, the new state object
685 will be destroyed and the former value of L</PL_parser> will be restored.
686 Nothing else need be done to clean up the parsing context.
687 
688 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
689 non-null, provides a string (in SV form) containing code to be parsed.
690 A copy of the string is made, so subsequent modification of C<line>
691 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
692 from which code will be read to be parsed.  If both are non-null, the
693 code in C<line> comes first and must consist of complete lines of input,
694 and C<rsfp> supplies the remainder of the source.
695 
696 The C<flags> parameter is reserved for future use.  Currently it is only
697 used by perl internally, so extensions should always pass zero.
698 
699 =cut
700 */
701 
702 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
703    can share filters with the current parser.
704    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
705    caller, hence isn't owned by the parser, so shouldn't be closed on parser
706    destruction. This is used to handle the case of defaulting to reading the
707    script from the standard input because no filename was given on the command
708    line (without getting confused by situation where STDIN has been closed, so
709    the script handle is opened on fd 0)  */
710 
711 void
Perl_lex_start(pTHX_ SV * line,PerlIO * rsfp,U32 flags)712 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
713 {
714     const char *s = NULL;
715     yy_parser *parser, *oparser;
716 
717     if (flags && flags & ~LEX_START_FLAGS)
718 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
719 
720     /* create and initialise a parser */
721 
722     Newxz(parser, 1, yy_parser);
723     parser->old_parser = oparser = PL_parser;
724     PL_parser = parser;
725 
726     parser->stack = NULL;
727     parser->stack_max1 = NULL;
728     parser->ps = NULL;
729 
730     /* on scope exit, free this parser and restore any outer one */
731     SAVEPARSER(parser);
732     parser->saved_curcop = PL_curcop;
733 
734     /* initialise lexer state */
735 
736     parser->nexttoke = 0;
737     parser->error_count = oparser ? oparser->error_count : 0;
738     parser->copline = parser->preambling = NOLINE;
739     parser->lex_state = LEX_NORMAL;
740     parser->expect = XSTATE;
741     parser->rsfp = rsfp;
742     parser->recheck_utf8_validity = TRUE;
743     parser->rsfp_filters =
744       !(flags & LEX_START_SAME_FILTER) || !oparser
745         ? NULL
746         : MUTABLE_AV(SvREFCNT_inc(
747             oparser->rsfp_filters
748              ? oparser->rsfp_filters
749              : (oparser->rsfp_filters = newAV())
750           ));
751 
752     Newx(parser->lex_brackstack, 120, char);
753     Newx(parser->lex_casestack, 12, char);
754     *parser->lex_casestack = '\0';
755     Newxz(parser->lex_shared, 1, LEXSHARED);
756 
757     if (line) {
758 	STRLEN len;
759         const U8* first_bad_char_loc;
760 
761 	s = SvPV_const(line, len);
762 
763         if (   SvUTF8(line)
764             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
765                                              SvCUR(line),
766                                              &first_bad_char_loc)))
767         {
768             _force_out_malformed_utf8_message(first_bad_char_loc,
769                                               (U8 *) s + SvCUR(line),
770                                               0,
771                                               1 /* 1 means die */ );
772             NOT_REACHED; /* NOTREACHED */
773         }
774 
775 	parser->linestr = flags & LEX_START_COPIED
776 			    ? SvREFCNT_inc_simple_NN(line)
777 			    : newSVpvn_flags(s, len, SvUTF8(line));
778 	if (!rsfp)
779 	    sv_catpvs(parser->linestr, "\n;");
780     } else {
781 	parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
782     }
783 
784     parser->oldoldbufptr =
785 	parser->oldbufptr =
786 	parser->bufptr =
787 	parser->linestart = SvPVX(parser->linestr);
788     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
789     parser->last_lop = parser->last_uni = NULL;
790 
791     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
792                                                         |LEX_DONT_CLOSE_RSFP));
793     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
794                                                         |LEX_DONT_CLOSE_RSFP));
795 
796     parser->in_pod = parser->filtered = 0;
797 }
798 
799 
800 /* delete a parser object */
801 
802 void
Perl_parser_free(pTHX_ const yy_parser * parser)803 Perl_parser_free(pTHX_  const yy_parser *parser)
804 {
805     PERL_ARGS_ASSERT_PARSER_FREE;
806 
807     PL_curcop = parser->saved_curcop;
808     SvREFCNT_dec(parser->linestr);
809 
810     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
811 	PerlIO_clearerr(parser->rsfp);
812     else if (parser->rsfp && (!parser->old_parser
813           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
814 	PerlIO_close(parser->rsfp);
815     SvREFCNT_dec(parser->rsfp_filters);
816     SvREFCNT_dec(parser->lex_stuff);
817     SvREFCNT_dec(parser->lex_sub_repl);
818 
819     Safefree(parser->lex_brackstack);
820     Safefree(parser->lex_casestack);
821     Safefree(parser->lex_shared);
822     PL_parser = parser->old_parser;
823     Safefree(parser);
824 }
825 
826 void
Perl_parser_free_nexttoke_ops(pTHX_ yy_parser * parser,OPSLAB * slab)827 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
828 {
829     I32 nexttoke = parser->nexttoke;
830     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
831     while (nexttoke--) {
832 	if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
833 	 && parser->nextval[nexttoke].opval
834 	 && parser->nextval[nexttoke].opval->op_slabbed
835 	 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
836 	    op_free(parser->nextval[nexttoke].opval);
837 	    parser->nextval[nexttoke].opval = NULL;
838 	}
839     }
840 }
841 
842 
843 /*
844 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
845 
846 Buffer scalar containing the chunk currently under consideration of the
847 text currently being lexed.  This is always a plain string scalar (for
848 which C<SvPOK> is true).  It is not intended to be used as a scalar by
849 normal scalar means; instead refer to the buffer directly by the pointer
850 variables described below.
851 
852 The lexer maintains various C<char*> pointers to things in the
853 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
854 reallocated, all of these pointers must be updated.  Don't attempt to
855 do this manually, but rather use L</lex_grow_linestr> if you need to
856 reallocate the buffer.
857 
858 The content of the text chunk in the buffer is commonly exactly one
859 complete line of input, up to and including a newline terminator,
860 but there are situations where it is otherwise.  The octets of the
861 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
862 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
863 flag on this scalar, which may disagree with it.
864 
865 For direct examination of the buffer, the variable
866 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
867 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
868 of these pointers is usually preferable to examination of the scalar
869 through normal scalar means.
870 
871 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
872 
873 Direct pointer to the end of the chunk of text currently being lexed, the
874 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
875 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
876 always located at the end of the buffer, and does not count as part of
877 the buffer's contents.
878 
879 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
880 
881 Points to the current position of lexing inside the lexer buffer.
882 Characters around this point may be freely examined, within
883 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
884 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
885 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
886 
887 Lexing code (whether in the Perl core or not) moves this pointer past
888 the characters that it consumes.  It is also expected to perform some
889 bookkeeping whenever a newline character is consumed.  This movement
890 can be more conveniently performed by the function L</lex_read_to>,
891 which handles newlines appropriately.
892 
893 Interpretation of the buffer's octets can be abstracted out by
894 using the slightly higher-level functions L</lex_peek_unichar> and
895 L</lex_read_unichar>.
896 
897 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
898 
899 Points to the start of the current line inside the lexer buffer.
900 This is useful for indicating at which column an error occurred, and
901 not much else.  This must be updated by any lexing code that consumes
902 a newline; the function L</lex_read_to> handles this detail.
903 
904 =cut
905 */
906 
907 /*
908 =for apidoc lex_bufutf8
909 
910 Indicates whether the octets in the lexer buffer
911 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
912 of Unicode characters.  If not, they should be interpreted as Latin-1
913 characters.  This is analogous to the C<SvUTF8> flag for scalars.
914 
915 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
916 contains valid UTF-8.  Lexing code must be robust in the face of invalid
917 encoding.
918 
919 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
920 is significant, but not the whole story regarding the input character
921 encoding.  Normally, when a file is being read, the scalar contains octets
922 and its C<SvUTF8> flag is off, but the octets should be interpreted as
923 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
924 however, the scalar may have the C<SvUTF8> flag on, and in this case its
925 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
926 is in effect.  This logic may change in the future; use this function
927 instead of implementing the logic yourself.
928 
929 =cut
930 */
931 
932 bool
Perl_lex_bufutf8(pTHX)933 Perl_lex_bufutf8(pTHX)
934 {
935     return UTF;
936 }
937 
938 /*
939 =for apidoc lex_grow_linestr
940 
941 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
942 at least C<len> octets (including terminating C<NUL>).  Returns a
943 pointer to the reallocated buffer.  This is necessary before making
944 any direct modification of the buffer that would increase its length.
945 L</lex_stuff_pvn> provides a more convenient way to insert text into
946 the buffer.
947 
948 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
949 this function updates all of the lexer's variables that point directly
950 into the buffer.
951 
952 =cut
953 */
954 
955 char *
Perl_lex_grow_linestr(pTHX_ STRLEN len)956 Perl_lex_grow_linestr(pTHX_ STRLEN len)
957 {
958     SV *linestr;
959     char *buf;
960     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
961     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
962     bool current;
963 
964     linestr = PL_parser->linestr;
965     buf = SvPVX(linestr);
966     if (len <= SvLEN(linestr))
967 	return buf;
968 
969     /* Is the lex_shared linestr SV the same as the current linestr SV?
970      * Only in this case does re_eval_start need adjusting, since it
971      * points within lex_shared->ls_linestr's buffer */
972     current = (   !PL_parser->lex_shared->ls_linestr
973                || linestr == PL_parser->lex_shared->ls_linestr);
974 
975     bufend_pos = PL_parser->bufend - buf;
976     bufptr_pos = PL_parser->bufptr - buf;
977     oldbufptr_pos = PL_parser->oldbufptr - buf;
978     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
979     linestart_pos = PL_parser->linestart - buf;
980     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
981     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
982     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
983                             PL_parser->lex_shared->re_eval_start - buf : 0;
984 
985     buf = sv_grow(linestr, len);
986 
987     PL_parser->bufend = buf + bufend_pos;
988     PL_parser->bufptr = buf + bufptr_pos;
989     PL_parser->oldbufptr = buf + oldbufptr_pos;
990     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
991     PL_parser->linestart = buf + linestart_pos;
992     if (PL_parser->last_uni)
993 	PL_parser->last_uni = buf + last_uni_pos;
994     if (PL_parser->last_lop)
995 	PL_parser->last_lop = buf + last_lop_pos;
996     if (current && PL_parser->lex_shared->re_eval_start)
997         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
998     return buf;
999 }
1000 
1001 /*
1002 =for apidoc lex_stuff_pvn
1003 
1004 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1005 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1006 reallocating the buffer if necessary.  This means that lexing code that
1007 runs later will see the characters as if they had appeared in the input.
1008 It is not recommended to do this as part of normal parsing, and most
1009 uses of this facility run the risk of the inserted characters being
1010 interpreted in an unintended manner.
1011 
1012 The string to be inserted is represented by C<len> octets starting
1013 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1014 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1015 The characters are recoded for the lexer buffer, according to how the
1016 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1017 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1018 function is more convenient.
1019 
1020 =for apidoc Amnh||LEX_STUFF_UTF8
1021 
1022 =cut
1023 */
1024 
1025 void
Perl_lex_stuff_pvn(pTHX_ const char * pv,STRLEN len,U32 flags)1026 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1027 {
1028     dVAR;
1029     char *bufptr;
1030     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1031     if (flags & ~(LEX_STUFF_UTF8))
1032 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1033     if (UTF) {
1034 	if (flags & LEX_STUFF_UTF8) {
1035 	    goto plain_copy;
1036 	} else {
1037 	    STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1038                                                        (U8 *) pv + len);
1039             const char *p, *e = pv+len;;
1040 	    if (!highhalf)
1041 		goto plain_copy;
1042 	    lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1043 	    bufptr = PL_parser->bufptr;
1044 	    Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1045 	    SvCUR_set(PL_parser->linestr,
1046 	    	SvCUR(PL_parser->linestr) + len+highhalf);
1047 	    PL_parser->bufend += len+highhalf;
1048 	    for (p = pv; p != e; p++) {
1049                 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1050 	    }
1051 	}
1052     } else {
1053 	if (flags & LEX_STUFF_UTF8) {
1054 	    STRLEN highhalf = 0;
1055 	    const char *p, *e = pv+len;
1056 	    for (p = pv; p != e; p++) {
1057 		U8 c = (U8)*p;
1058 		if (UTF8_IS_ABOVE_LATIN1(c)) {
1059 		    Perl_croak(aTHX_ "Lexing code attempted to stuff "
1060 				"non-Latin-1 character into Latin-1 input");
1061 		} else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1062 		    p++;
1063 		    highhalf++;
1064                 } else assert(UTF8_IS_INVARIANT(c));
1065 	    }
1066 	    if (!highhalf)
1067 		goto plain_copy;
1068 	    lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1069 	    bufptr = PL_parser->bufptr;
1070 	    Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1071 	    SvCUR_set(PL_parser->linestr,
1072 	    	SvCUR(PL_parser->linestr) + len-highhalf);
1073 	    PL_parser->bufend += len-highhalf;
1074 	    p = pv;
1075 	    while (p < e) {
1076 		if (UTF8_IS_INVARIANT(*p)) {
1077 		    *bufptr++ = *p;
1078                     p++;
1079 		}
1080 		else {
1081                     assert(p < e -1 );
1082 		    *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1083 		    p += 2;
1084                 }
1085 	    }
1086 	} else {
1087 	  plain_copy:
1088 	    lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1089 	    bufptr = PL_parser->bufptr;
1090 	    Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1091 	    SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1092 	    PL_parser->bufend += len;
1093 	    Copy(pv, bufptr, len, char);
1094 	}
1095     }
1096 }
1097 
1098 /*
1099 =for apidoc lex_stuff_pv
1100 
1101 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1102 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1103 reallocating the buffer if necessary.  This means that lexing code that
1104 runs later will see the characters as if they had appeared in the input.
1105 It is not recommended to do this as part of normal parsing, and most
1106 uses of this facility run the risk of the inserted characters being
1107 interpreted in an unintended manner.
1108 
1109 The string to be inserted is represented by octets starting at C<pv>
1110 and continuing to the first nul.  These octets are interpreted as either
1111 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1112 in C<flags>.  The characters are recoded for the lexer buffer, according
1113 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1114 If it is not convenient to nul-terminate a string to be inserted, the
1115 L</lex_stuff_pvn> function is more appropriate.
1116 
1117 =cut
1118 */
1119 
1120 void
Perl_lex_stuff_pv(pTHX_ const char * pv,U32 flags)1121 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1122 {
1123     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1124     lex_stuff_pvn(pv, strlen(pv), flags);
1125 }
1126 
1127 /*
1128 =for apidoc lex_stuff_sv
1129 
1130 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1131 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1132 reallocating the buffer if necessary.  This means that lexing code that
1133 runs later will see the characters as if they had appeared in the input.
1134 It is not recommended to do this as part of normal parsing, and most
1135 uses of this facility run the risk of the inserted characters being
1136 interpreted in an unintended manner.
1137 
1138 The string to be inserted is the string value of C<sv>.  The characters
1139 are recoded for the lexer buffer, according to how the buffer is currently
1140 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1141 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1142 need to construct a scalar.
1143 
1144 =cut
1145 */
1146 
1147 void
Perl_lex_stuff_sv(pTHX_ SV * sv,U32 flags)1148 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1149 {
1150     char *pv;
1151     STRLEN len;
1152     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1153     if (flags)
1154 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1155     pv = SvPV(sv, len);
1156     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1157 }
1158 
1159 /*
1160 =for apidoc lex_unstuff
1161 
1162 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1163 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1164 This hides the discarded text from any lexing code that runs later,
1165 as if the text had never appeared.
1166 
1167 This is not the normal way to consume lexed text.  For that, use
1168 L</lex_read_to>.
1169 
1170 =cut
1171 */
1172 
1173 void
Perl_lex_unstuff(pTHX_ char * ptr)1174 Perl_lex_unstuff(pTHX_ char *ptr)
1175 {
1176     char *buf, *bufend;
1177     STRLEN unstuff_len;
1178     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1179     buf = PL_parser->bufptr;
1180     if (ptr < buf)
1181 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1182     if (ptr == buf)
1183 	return;
1184     bufend = PL_parser->bufend;
1185     if (ptr > bufend)
1186 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1187     unstuff_len = ptr - buf;
1188     Move(ptr, buf, bufend+1-ptr, char);
1189     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1190     PL_parser->bufend = bufend - unstuff_len;
1191 }
1192 
1193 /*
1194 =for apidoc lex_read_to
1195 
1196 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1197 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1198 performing the correct bookkeeping whenever a newline character is passed.
1199 This is the normal way to consume lexed text.
1200 
1201 Interpretation of the buffer's octets can be abstracted out by
1202 using the slightly higher-level functions L</lex_peek_unichar> and
1203 L</lex_read_unichar>.
1204 
1205 =cut
1206 */
1207 
1208 void
Perl_lex_read_to(pTHX_ char * ptr)1209 Perl_lex_read_to(pTHX_ char *ptr)
1210 {
1211     char *s;
1212     PERL_ARGS_ASSERT_LEX_READ_TO;
1213     s = PL_parser->bufptr;
1214     if (ptr < s || ptr > PL_parser->bufend)
1215 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1216     for (; s != ptr; s++)
1217 	if (*s == '\n') {
1218 	    COPLINE_INC_WITH_HERELINES;
1219 	    PL_parser->linestart = s+1;
1220 	}
1221     PL_parser->bufptr = ptr;
1222 }
1223 
1224 /*
1225 =for apidoc lex_discard_to
1226 
1227 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1228 up to C<ptr>.  The remaining content of the buffer will be moved, and
1229 all pointers into the buffer updated appropriately.  C<ptr> must not
1230 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1231 it is not permitted to discard text that has yet to be lexed.
1232 
1233 Normally it is not necessarily to do this directly, because it suffices to
1234 use the implicit discarding behaviour of L</lex_next_chunk> and things
1235 based on it.  However, if a token stretches across multiple lines,
1236 and the lexing code has kept multiple lines of text in the buffer for
1237 that purpose, then after completion of the token it would be wise to
1238 explicitly discard the now-unneeded earlier lines, to avoid future
1239 multi-line tokens growing the buffer without bound.
1240 
1241 =cut
1242 */
1243 
1244 void
Perl_lex_discard_to(pTHX_ char * ptr)1245 Perl_lex_discard_to(pTHX_ char *ptr)
1246 {
1247     char *buf;
1248     STRLEN discard_len;
1249     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1250     buf = SvPVX(PL_parser->linestr);
1251     if (ptr < buf)
1252 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1253     if (ptr == buf)
1254 	return;
1255     if (ptr > PL_parser->bufptr)
1256 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1257     discard_len = ptr - buf;
1258     if (PL_parser->oldbufptr < ptr)
1259 	PL_parser->oldbufptr = ptr;
1260     if (PL_parser->oldoldbufptr < ptr)
1261 	PL_parser->oldoldbufptr = ptr;
1262     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1263 	PL_parser->last_uni = NULL;
1264     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1265 	PL_parser->last_lop = NULL;
1266     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1267     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1268     PL_parser->bufend -= discard_len;
1269     PL_parser->bufptr -= discard_len;
1270     PL_parser->oldbufptr -= discard_len;
1271     PL_parser->oldoldbufptr -= discard_len;
1272     if (PL_parser->last_uni)
1273 	PL_parser->last_uni -= discard_len;
1274     if (PL_parser->last_lop)
1275 	PL_parser->last_lop -= discard_len;
1276 }
1277 
1278 void
Perl_notify_parser_that_changed_to_utf8(pTHX)1279 Perl_notify_parser_that_changed_to_utf8(pTHX)
1280 {
1281     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1282      * off to on.  At compile time, this has the effect of entering a 'use
1283      * utf8' section.  This means that any input was not previously checked for
1284      * UTF-8 (because it was off), but now we do need to check it, or our
1285      * assumptions about the input being sane could be wrong, and we could
1286      * segfault.  This routine just sets a flag so that the next time we look
1287      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1288      * proper phase, there may not be a parser object, but if there is, setting
1289      * the flag is harmless */
1290 
1291     if (PL_parser) {
1292         PL_parser->recheck_utf8_validity = TRUE;
1293     }
1294 }
1295 
1296 /*
1297 =for apidoc lex_next_chunk
1298 
1299 Reads in the next chunk of text to be lexed, appending it to
1300 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1301 looked to the end of the current chunk and wants to know more.  It is
1302 usual, but not necessary, for lexing to have consumed the entirety of
1303 the current chunk at this time.
1304 
1305 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1306 chunk (i.e., the current chunk has been entirely consumed), normally the
1307 current chunk will be discarded at the same time that the new chunk is
1308 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1309 will not be discarded.  If the current chunk has not been entirely
1310 consumed, then it will not be discarded regardless of the flag.
1311 
1312 Returns true if some new text was added to the buffer, or false if the
1313 buffer has reached the end of the input text.
1314 
1315 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1316 
1317 =cut
1318 */
1319 
1320 #define LEX_FAKE_EOF 0x80000000
1321 #define LEX_NO_TERM  0x40000000 /* here-doc */
1322 
1323 bool
Perl_lex_next_chunk(pTHX_ U32 flags)1324 Perl_lex_next_chunk(pTHX_ U32 flags)
1325 {
1326     SV *linestr;
1327     char *buf;
1328     STRLEN old_bufend_pos, new_bufend_pos;
1329     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1330     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1331     bool got_some_for_debugger = 0;
1332     bool got_some;
1333 
1334     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1335 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1336     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1337 	return FALSE;
1338     linestr = PL_parser->linestr;
1339     buf = SvPVX(linestr);
1340     if (!(flags & LEX_KEEP_PREVIOUS)
1341           && PL_parser->bufptr == PL_parser->bufend)
1342     {
1343 	old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1344 	linestart_pos = 0;
1345 	if (PL_parser->last_uni != PL_parser->bufend)
1346 	    PL_parser->last_uni = NULL;
1347 	if (PL_parser->last_lop != PL_parser->bufend)
1348 	    PL_parser->last_lop = NULL;
1349 	last_uni_pos = last_lop_pos = 0;
1350 	*buf = 0;
1351 	SvCUR_set(linestr, 0);
1352     } else {
1353 	old_bufend_pos = PL_parser->bufend - buf;
1354 	bufptr_pos = PL_parser->bufptr - buf;
1355 	oldbufptr_pos = PL_parser->oldbufptr - buf;
1356 	oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1357 	linestart_pos = PL_parser->linestart - buf;
1358 	last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1359 	last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1360     }
1361     if (flags & LEX_FAKE_EOF) {
1362 	goto eof;
1363     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1364 	got_some = 0;
1365     } else if (filter_gets(linestr, old_bufend_pos)) {
1366 	got_some = 1;
1367 	got_some_for_debugger = 1;
1368     } else if (flags & LEX_NO_TERM) {
1369 	got_some = 0;
1370     } else {
1371 	if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1372             SvPVCLEAR(linestr);
1373 	eof:
1374 	/* End of real input.  Close filehandle (unless it was STDIN),
1375 	 * then add implicit termination.
1376 	 */
1377 	if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1378 	    PerlIO_clearerr(PL_parser->rsfp);
1379 	else if (PL_parser->rsfp)
1380 	    (void)PerlIO_close(PL_parser->rsfp);
1381 	PL_parser->rsfp = NULL;
1382 	PL_parser->in_pod = PL_parser->filtered = 0;
1383 	if (!PL_in_eval && PL_minus_p) {
1384 	    sv_catpvs(linestr,
1385 		/*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1386 	    PL_minus_n = PL_minus_p = 0;
1387 	} else if (!PL_in_eval && PL_minus_n) {
1388 	    sv_catpvs(linestr, /*{*/";}");
1389 	    PL_minus_n = 0;
1390 	} else
1391 	    sv_catpvs(linestr, ";");
1392 	got_some = 1;
1393     }
1394     buf = SvPVX(linestr);
1395     new_bufend_pos = SvCUR(linestr);
1396     PL_parser->bufend = buf + new_bufend_pos;
1397     PL_parser->bufptr = buf + bufptr_pos;
1398 
1399     if (UTF) {
1400         const U8* first_bad_char_loc;
1401         if (UNLIKELY(! is_utf8_string_loc(
1402                             (U8 *) PL_parser->bufptr,
1403                                    PL_parser->bufend - PL_parser->bufptr,
1404                                    &first_bad_char_loc)))
1405         {
1406             _force_out_malformed_utf8_message(first_bad_char_loc,
1407                                               (U8 *) PL_parser->bufend,
1408                                               0,
1409                                               1 /* 1 means die */ );
1410             NOT_REACHED; /* NOTREACHED */
1411         }
1412     }
1413 
1414     PL_parser->oldbufptr = buf + oldbufptr_pos;
1415     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1416     PL_parser->linestart = buf + linestart_pos;
1417     if (PL_parser->last_uni)
1418 	PL_parser->last_uni = buf + last_uni_pos;
1419     if (PL_parser->last_lop)
1420 	PL_parser->last_lop = buf + last_lop_pos;
1421     if (PL_parser->preambling != NOLINE) {
1422 	CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1423 	PL_parser->preambling = NOLINE;
1424     }
1425     if (   got_some_for_debugger
1426         && PERLDB_LINE_OR_SAVESRC
1427         && PL_curstash != PL_debstash)
1428     {
1429 	/* debugger active and we're not compiling the debugger code,
1430 	 * so store the line into the debugger's array of lines
1431 	 */
1432 	update_debugger_info(NULL, buf+old_bufend_pos,
1433 	    new_bufend_pos-old_bufend_pos);
1434     }
1435     return got_some;
1436 }
1437 
1438 /*
1439 =for apidoc lex_peek_unichar
1440 
1441 Looks ahead one (Unicode) character in the text currently being lexed.
1442 Returns the codepoint (unsigned integer value) of the next character,
1443 or -1 if lexing has reached the end of the input text.  To consume the
1444 peeked character, use L</lex_read_unichar>.
1445 
1446 If the next character is in (or extends into) the next chunk of input
1447 text, the next chunk will be read in.  Normally the current chunk will be
1448 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1449 bit set, then the current chunk will not be discarded.
1450 
1451 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1452 is encountered, an exception is generated.
1453 
1454 =cut
1455 */
1456 
1457 I32
Perl_lex_peek_unichar(pTHX_ U32 flags)1458 Perl_lex_peek_unichar(pTHX_ U32 flags)
1459 {
1460     dVAR;
1461     char *s, *bufend;
1462     if (flags & ~(LEX_KEEP_PREVIOUS))
1463 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1464     s = PL_parser->bufptr;
1465     bufend = PL_parser->bufend;
1466     if (UTF) {
1467 	U8 head;
1468 	I32 unichar;
1469 	STRLEN len, retlen;
1470 	if (s == bufend) {
1471 	    if (!lex_next_chunk(flags))
1472 		return -1;
1473 	    s = PL_parser->bufptr;
1474 	    bufend = PL_parser->bufend;
1475 	}
1476 	head = (U8)*s;
1477 	if (UTF8_IS_INVARIANT(head))
1478 	    return head;
1479 	if (UTF8_IS_START(head)) {
1480 	    len = UTF8SKIP(&head);
1481 	    while ((STRLEN)(bufend-s) < len) {
1482 		if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1483 		    break;
1484 		s = PL_parser->bufptr;
1485 		bufend = PL_parser->bufend;
1486 	    }
1487 	}
1488 	unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1489 	if (retlen == (STRLEN)-1) {
1490             _force_out_malformed_utf8_message((U8 *) s,
1491                                               (U8 *) bufend,
1492                                               0,
1493                                               1 /* 1 means die */ );
1494             NOT_REACHED; /* NOTREACHED */
1495 	}
1496 	return unichar;
1497     } else {
1498 	if (s == bufend) {
1499 	    if (!lex_next_chunk(flags))
1500 		return -1;
1501 	    s = PL_parser->bufptr;
1502 	}
1503 	return (U8)*s;
1504     }
1505 }
1506 
1507 /*
1508 =for apidoc lex_read_unichar
1509 
1510 Reads the next (Unicode) character in the text currently being lexed.
1511 Returns the codepoint (unsigned integer value) of the character read,
1512 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1513 if lexing has reached the end of the input text.  To non-destructively
1514 examine the next character, use L</lex_peek_unichar> instead.
1515 
1516 If the next character is in (or extends into) the next chunk of input
1517 text, the next chunk will be read in.  Normally the current chunk will be
1518 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1519 bit set, then the current chunk will not be discarded.
1520 
1521 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1522 is encountered, an exception is generated.
1523 
1524 =cut
1525 */
1526 
1527 I32
Perl_lex_read_unichar(pTHX_ U32 flags)1528 Perl_lex_read_unichar(pTHX_ U32 flags)
1529 {
1530     I32 c;
1531     if (flags & ~(LEX_KEEP_PREVIOUS))
1532 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1533     c = lex_peek_unichar(flags);
1534     if (c != -1) {
1535 	if (c == '\n')
1536 	    COPLINE_INC_WITH_HERELINES;
1537 	if (UTF)
1538 	    PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1539 	else
1540 	    ++(PL_parser->bufptr);
1541     }
1542     return c;
1543 }
1544 
1545 /*
1546 =for apidoc lex_read_space
1547 
1548 Reads optional spaces, in Perl style, in the text currently being
1549 lexed.  The spaces may include ordinary whitespace characters and
1550 Perl-style comments.  C<#line> directives are processed if encountered.
1551 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1552 at a non-space character (or the end of the input text).
1553 
1554 If spaces extend into the next chunk of input text, the next chunk will
1555 be read in.  Normally the current chunk will be discarded at the same
1556 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1557 chunk will not be discarded.
1558 
1559 =cut
1560 */
1561 
1562 #define LEX_NO_INCLINE    0x40000000
1563 #define LEX_NO_NEXT_CHUNK 0x80000000
1564 
1565 void
Perl_lex_read_space(pTHX_ U32 flags)1566 Perl_lex_read_space(pTHX_ U32 flags)
1567 {
1568     char *s, *bufend;
1569     const bool can_incline = !(flags & LEX_NO_INCLINE);
1570     bool need_incline = 0;
1571     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1572 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1573     s = PL_parser->bufptr;
1574     bufend = PL_parser->bufend;
1575     while (1) {
1576 	char c = *s;
1577 	if (c == '#') {
1578 	    do {
1579 		c = *++s;
1580 	    } while (!(c == '\n' || (c == 0 && s == bufend)));
1581 	} else if (c == '\n') {
1582 	    s++;
1583 	    if (can_incline) {
1584 		PL_parser->linestart = s;
1585 		if (s == bufend)
1586 		    need_incline = 1;
1587 		else
1588 		    incline(s, bufend);
1589 	    }
1590 	} else if (isSPACE(c)) {
1591 	    s++;
1592 	} else if (c == 0 && s == bufend) {
1593 	    bool got_more;
1594 	    line_t l;
1595 	    if (flags & LEX_NO_NEXT_CHUNK)
1596 		break;
1597 	    PL_parser->bufptr = s;
1598 	    l = CopLINE(PL_curcop);
1599 	    CopLINE(PL_curcop) += PL_parser->herelines + 1;
1600 	    got_more = lex_next_chunk(flags);
1601 	    CopLINE_set(PL_curcop, l);
1602 	    s = PL_parser->bufptr;
1603 	    bufend = PL_parser->bufend;
1604 	    if (!got_more)
1605 		break;
1606 	    if (can_incline && need_incline && PL_parser->rsfp) {
1607 		incline(s, bufend);
1608 		need_incline = 0;
1609 	    }
1610 	} else if (!c) {
1611 	    s++;
1612 	} else {
1613 	    break;
1614 	}
1615     }
1616     PL_parser->bufptr = s;
1617 }
1618 
1619 /*
1620 
1621 =for apidoc validate_proto
1622 
1623 This function performs syntax checking on a prototype, C<proto>.
1624 If C<warn> is true, any illegal characters or mismatched brackets
1625 will trigger illegalproto warnings, declaring that they were
1626 detected in the prototype for C<name>.
1627 
1628 The return value is C<true> if this is a valid prototype, and
1629 C<false> if it is not, regardless of whether C<warn> was C<true> or
1630 C<false>.
1631 
1632 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1633 
1634 =cut
1635 
1636  */
1637 
1638 bool
Perl_validate_proto(pTHX_ SV * name,SV * proto,bool warn,bool curstash)1639 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1640 {
1641     STRLEN len, origlen;
1642     char *p;
1643     bool bad_proto = FALSE;
1644     bool in_brackets = FALSE;
1645     bool after_slash = FALSE;
1646     char greedy_proto = ' ';
1647     bool proto_after_greedy_proto = FALSE;
1648     bool must_be_last = FALSE;
1649     bool underscore = FALSE;
1650     bool bad_proto_after_underscore = FALSE;
1651 
1652     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1653 
1654     if (!proto)
1655 	return TRUE;
1656 
1657     p = SvPV(proto, len);
1658     origlen = len;
1659     for (; len--; p++) {
1660 	if (!isSPACE(*p)) {
1661 	    if (must_be_last)
1662 		proto_after_greedy_proto = TRUE;
1663 	    if (underscore) {
1664 		if (!memCHRs(";@%", *p))
1665 		    bad_proto_after_underscore = TRUE;
1666 		underscore = FALSE;
1667 	    }
1668 	    if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1669 		bad_proto = TRUE;
1670 	    }
1671 	    else {
1672 		if (*p == '[')
1673 		    in_brackets = TRUE;
1674 		else if (*p == ']')
1675 		    in_brackets = FALSE;
1676 		else if ((*p == '@' || *p == '%')
1677                          && !after_slash
1678                          && !in_brackets )
1679                 {
1680 		    must_be_last = TRUE;
1681 		    greedy_proto = *p;
1682 		}
1683 		else if (*p == '_')
1684 		    underscore = TRUE;
1685 	    }
1686 	    if (*p == '\\')
1687 		after_slash = TRUE;
1688 	    else
1689 		after_slash = FALSE;
1690 	}
1691     }
1692 
1693     if (warn) {
1694 	SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1695 	p -= origlen;
1696 	p = SvUTF8(proto)
1697 	    ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1698 	                     origlen, UNI_DISPLAY_ISPRINT)
1699 	    : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1700 
1701 	if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1702 	    SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1703 	    sv_catpvs(name2, "::");
1704 	    sv_catsv(name2, (SV *)name);
1705 	    name = name2;
1706 	}
1707 
1708 	if (proto_after_greedy_proto)
1709 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1710 			"Prototype after '%c' for %" SVf " : %s",
1711 			greedy_proto, SVfARG(name), p);
1712 	if (in_brackets)
1713 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1714 			"Missing ']' in prototype for %" SVf " : %s",
1715 			SVfARG(name), p);
1716 	if (bad_proto)
1717 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1718 			"Illegal character in prototype for %" SVf " : %s",
1719 			SVfARG(name), p);
1720 	if (bad_proto_after_underscore)
1721 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1722 			"Illegal character after '_' in prototype for %" SVf " : %s",
1723 			SVfARG(name), p);
1724     }
1725 
1726     return (! (proto_after_greedy_proto || bad_proto) );
1727 }
1728 
1729 /*
1730  * S_incline
1731  * This subroutine has nothing to do with tilting, whether at windmills
1732  * or pinball tables.  Its name is short for "increment line".  It
1733  * increments the current line number in CopLINE(PL_curcop) and checks
1734  * to see whether the line starts with a comment of the form
1735  *    # line 500 "foo.pm"
1736  * If so, it sets the current line number and file to the values in the comment.
1737  */
1738 
1739 STATIC void
S_incline(pTHX_ const char * s,const char * end)1740 S_incline(pTHX_ const char *s, const char *end)
1741 {
1742     const char *t;
1743     const char *n;
1744     const char *e;
1745     line_t line_num;
1746     UV uv;
1747 
1748     PERL_ARGS_ASSERT_INCLINE;
1749 
1750     assert(end >= s);
1751 
1752     COPLINE_INC_WITH_HERELINES;
1753     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1754      && s+1 == PL_bufend && *s == ';') {
1755 	/* fake newline in string eval */
1756 	CopLINE_dec(PL_curcop);
1757 	return;
1758     }
1759     if (*s++ != '#')
1760 	return;
1761     while (SPACE_OR_TAB(*s))
1762 	s++;
1763     if (memBEGINs(s, (STRLEN) (end - s), "line"))
1764 	s += sizeof("line") - 1;
1765     else
1766 	return;
1767     if (SPACE_OR_TAB(*s))
1768 	s++;
1769     else
1770 	return;
1771     while (SPACE_OR_TAB(*s))
1772 	s++;
1773     if (!isDIGIT(*s))
1774 	return;
1775 
1776     n = s;
1777     while (isDIGIT(*s))
1778 	s++;
1779     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1780 	return;
1781     while (SPACE_OR_TAB(*s))
1782 	s++;
1783     if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1784 	s++;
1785 	e = t + 1;
1786     }
1787     else {
1788 	t = s;
1789 	while (*t && !isSPACE(*t))
1790 	    t++;
1791 	e = t;
1792     }
1793     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1794 	e++;
1795     if (*e != '\n' && *e != '\0')
1796 	return;		/* false alarm */
1797 
1798     if (!grok_atoUV(n, &uv, &e))
1799         return;
1800     line_num = ((line_t)uv) - 1;
1801 
1802     if (t - s > 0) {
1803 	const STRLEN len = t - s;
1804 
1805 	if (!PL_rsfp && !PL_parser->filtered) {
1806 	    /* must copy *{"::_<(eval N)[oldfilename:L]"}
1807 	     * to *{"::_<newfilename"} */
1808 	    /* However, the long form of evals is only turned on by the
1809 	       debugger - usually they're "(eval %lu)" */
1810 	    GV * const cfgv = CopFILEGV(PL_curcop);
1811 	    if (cfgv) {
1812 		char smallbuf[128];
1813 		STRLEN tmplen2 = len;
1814 		char *tmpbuf2;
1815 		GV *gv2;
1816 
1817 		if (tmplen2 + 2 <= sizeof smallbuf)
1818 		    tmpbuf2 = smallbuf;
1819 		else
1820 		    Newx(tmpbuf2, tmplen2 + 2, char);
1821 
1822 		tmpbuf2[0] = '_';
1823 		tmpbuf2[1] = '<';
1824 
1825 		memcpy(tmpbuf2 + 2, s, tmplen2);
1826 		tmplen2 += 2;
1827 
1828 		gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1829 		if (!isGV(gv2)) {
1830 		    gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1831 		    /* adjust ${"::_<newfilename"} to store the new file name */
1832 		    GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1833 		    /* The line number may differ. If that is the case,
1834 		       alias the saved lines that are in the array.
1835 		       Otherwise alias the whole array. */
1836 		    if (CopLINE(PL_curcop) == line_num) {
1837 			GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1838 			GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1839 		    }
1840 		    else if (GvAV(cfgv)) {
1841 			AV * const av = GvAV(cfgv);
1842 			const line_t start = CopLINE(PL_curcop)+1;
1843 			SSize_t items = AvFILLp(av) - start;
1844 			if (items > 0) {
1845 			    AV * const av2 = GvAVn(gv2);
1846 			    SV **svp = AvARRAY(av) + start;
1847 			    Size_t l = line_num+1;
1848 			    while (items-- && l < SSize_t_MAX && l == (line_t)l)
1849 				av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1850 			}
1851 		    }
1852 		}
1853 
1854 		if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1855 	    }
1856 	}
1857 	CopFILE_free(PL_curcop);
1858 	CopFILE_setn(PL_curcop, s, len);
1859     }
1860     CopLINE_set(PL_curcop, line_num);
1861 }
1862 
1863 STATIC void
S_update_debugger_info(pTHX_ SV * orig_sv,const char * const buf,STRLEN len)1864 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1865 {
1866     AV *av = CopFILEAVx(PL_curcop);
1867     if (av) {
1868 	SV * sv;
1869 	if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1870 	else {
1871 	    sv = *av_fetch(av, 0, 1);
1872 	    SvUPGRADE(sv, SVt_PVMG);
1873 	}
1874         if (!SvPOK(sv)) SvPVCLEAR(sv);
1875 	if (orig_sv)
1876 	    sv_catsv(sv, orig_sv);
1877 	else
1878 	    sv_catpvn(sv, buf, len);
1879 	if (!SvIOK(sv)) {
1880 	    (void)SvIOK_on(sv);
1881 	    SvIV_set(sv, 0);
1882 	}
1883 	if (PL_parser->preambling == NOLINE)
1884 	    av_store(av, CopLINE(PL_curcop), sv);
1885     }
1886 }
1887 
1888 /*
1889  * skipspace
1890  * Called to gobble the appropriate amount and type of whitespace.
1891  * Skips comments as well.
1892  * Returns the next character after the whitespace that is skipped.
1893  *
1894  * peekspace
1895  * Same thing, but look ahead without incrementing line numbers or
1896  * adjusting PL_linestart.
1897  */
1898 
1899 #define skipspace(s) skipspace_flags(s, 0)
1900 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1901 
1902 char *
Perl_skipspace_flags(pTHX_ char * s,U32 flags)1903 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
1904 {
1905     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1906     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1907 	while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1908 	    s++;
1909     } else {
1910 	STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1911 	PL_bufptr = s;
1912 	lex_read_space(flags | LEX_KEEP_PREVIOUS |
1913 		(PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1914 		    LEX_NO_NEXT_CHUNK : 0));
1915 	s = PL_bufptr;
1916 	PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1917 	if (PL_linestart > PL_bufptr)
1918 	    PL_bufptr = PL_linestart;
1919 	return s;
1920     }
1921     return s;
1922 }
1923 
1924 /*
1925  * S_check_uni
1926  * Check the unary operators to ensure there's no ambiguity in how they're
1927  * used.  An ambiguous piece of code would be:
1928  *     rand + 5
1929  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1930  * the +5 is its argument.
1931  */
1932 
1933 STATIC void
S_check_uni(pTHX)1934 S_check_uni(pTHX)
1935 {
1936     const char *s;
1937 
1938     if (PL_oldoldbufptr != PL_last_uni)
1939 	return;
1940     while (isSPACE(*PL_last_uni))
1941 	PL_last_uni++;
1942     s = PL_last_uni;
1943     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1944 	s += UTF ? UTF8SKIP(s) : 1;
1945     if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
1946 	return;
1947 
1948     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1949 		     "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1950 		     UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1951 }
1952 
1953 /*
1954  * LOP : macro to build a list operator.  Its behaviour has been replaced
1955  * with a subroutine, S_lop() for which LOP is just another name.
1956  */
1957 
1958 #define LOP(f,x) return lop(f,x,s)
1959 
1960 /*
1961  * S_lop
1962  * Build a list operator (or something that might be one).  The rules:
1963  *  - if we have a next token, then it's a list operator (no parens) for
1964  *    which the next token has already been parsed; e.g.,
1965  *       sort foo @args
1966  *       sort foo (@args)
1967  *  - if the next thing is an opening paren, then it's a function
1968  *  - else it's a list operator
1969  */
1970 
1971 STATIC I32
S_lop(pTHX_ I32 f,U8 x,char * s)1972 S_lop(pTHX_ I32 f, U8 x, char *s)
1973 {
1974     PERL_ARGS_ASSERT_LOP;
1975 
1976     pl_yylval.ival = f;
1977     CLINE;
1978     PL_bufptr = s;
1979     PL_last_lop = PL_oldbufptr;
1980     PL_last_lop_op = (OPCODE)f;
1981     if (PL_nexttoke)
1982 	goto lstop;
1983     PL_expect = x;
1984     if (*s == '(')
1985 	return REPORT(FUNC);
1986     s = skipspace(s);
1987     if (*s == '(')
1988 	return REPORT(FUNC);
1989     else {
1990 	lstop:
1991 	if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1992 	    PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1993 	return REPORT(LSTOP);
1994     }
1995 }
1996 
1997 /*
1998  * S_force_next
1999  * When the lexer realizes it knows the next token (for instance,
2000  * it is reordering tokens for the parser) then it can call S_force_next
2001  * to know what token to return the next time the lexer is called.  Caller
2002  * will need to set PL_nextval[] and possibly PL_expect to ensure
2003  * the lexer handles the token correctly.
2004  */
2005 
2006 STATIC void
S_force_next(pTHX_ I32 type)2007 S_force_next(pTHX_ I32 type)
2008 {
2009 #ifdef DEBUGGING
2010     if (DEBUG_T_TEST) {
2011         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2012 	tokereport(type, &NEXTVAL_NEXTTOKE);
2013     }
2014 #endif
2015     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2016     PL_nexttype[PL_nexttoke] = type;
2017     PL_nexttoke++;
2018 }
2019 
2020 /*
2021  * S_postderef
2022  *
2023  * This subroutine handles postfix deref syntax after the arrow has already
2024  * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2025  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2026  * only the first, leaving yylex to find the next.
2027  */
2028 
2029 static int
S_postderef(pTHX_ int const funny,char const next)2030 S_postderef(pTHX_ int const funny, char const next)
2031 {
2032     assert(funny == DOLSHARP || memCHRs("$@%&*", funny));
2033     if (next == '*') {
2034 	PL_expect = XOPERATOR;
2035 	if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2036 	    assert('@' == funny || '$' == funny || DOLSHARP == funny);
2037 	    PL_lex_state = LEX_INTERPEND;
2038 	    if ('@' == funny)
2039 		force_next(POSTJOIN);
2040 	}
2041 	force_next(next);
2042 	PL_bufptr+=2;
2043     }
2044     else {
2045 	if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2046 	 && !PL_lex_brackets)
2047 	    PL_lex_dojoin = 2;
2048 	PL_expect = XOPERATOR;
2049 	PL_bufptr++;
2050     }
2051     return funny;
2052 }
2053 
2054 void
Perl_yyunlex(pTHX)2055 Perl_yyunlex(pTHX)
2056 {
2057     int yyc = PL_parser->yychar;
2058     if (yyc != YYEMPTY) {
2059 	if (yyc) {
2060 	    NEXTVAL_NEXTTOKE = PL_parser->yylval;
2061 	    if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2062 		PL_lex_allbrackets--;
2063 		PL_lex_brackets--;
2064 		yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2065 	    } else if (yyc == '('/*)*/) {
2066 		PL_lex_allbrackets--;
2067 		yyc |= (2<<24);
2068 	    }
2069 	    force_next(yyc);
2070 	}
2071 	PL_parser->yychar = YYEMPTY;
2072     }
2073 }
2074 
2075 STATIC SV *
S_newSV_maybe_utf8(pTHX_ const char * const start,STRLEN len)2076 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2077 {
2078     SV * const sv = newSVpvn_utf8(start, len,
2079                     ! IN_BYTES
2080                   &&  UTF
2081                   &&  len != 0
2082                   &&  is_utf8_non_invariant_string((const U8*)start, len));
2083     return sv;
2084 }
2085 
2086 /*
2087  * S_force_word
2088  * When the lexer knows the next thing is a word (for instance, it has
2089  * just seen -> and it knows that the next char is a word char, then
2090  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2091  * lookahead.
2092  *
2093  * Arguments:
2094  *   char *start : buffer position (must be within PL_linestr)
2095  *   int token   : PL_next* will be this type of bare word
2096  *                 (e.g., METHOD,BAREWORD)
2097  *   int check_keyword : if true, Perl checks to make sure the word isn't
2098  *       a keyword (do this if the word is a label, e.g. goto FOO)
2099  *   int allow_pack : if true, : characters will also be allowed (require,
2100  *       use, etc. do this)
2101  */
2102 
2103 STATIC char *
S_force_word(pTHX_ char * start,int token,int check_keyword,int allow_pack)2104 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2105 {
2106     char *s;
2107     STRLEN len;
2108 
2109     PERL_ARGS_ASSERT_FORCE_WORD;
2110 
2111     start = skipspace(start);
2112     s = start;
2113     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2114         || (allow_pack && *s == ':' && s[1] == ':') )
2115     {
2116 	s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2117 	if (check_keyword) {
2118 	  char *s2 = PL_tokenbuf;
2119 	  STRLEN len2 = len;
2120 	  if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2121 	    s2 += sizeof("CORE::") - 1;
2122             len2 -= sizeof("CORE::") - 1;
2123           }
2124 	  if (keyword(s2, len2, 0))
2125 	    return start;
2126 	}
2127 	if (token == METHOD) {
2128 	    s = skipspace(s);
2129 	    if (*s == '(')
2130 		PL_expect = XTERM;
2131 	    else {
2132 		PL_expect = XOPERATOR;
2133 	    }
2134 	}
2135 	NEXTVAL_NEXTTOKE.opval
2136             = newSVOP(OP_CONST,0,
2137 			   S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2138 	NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2139 	force_next(token);
2140     }
2141     return s;
2142 }
2143 
2144 /*
2145  * S_force_ident
2146  * Called when the lexer wants $foo *foo &foo etc, but the program
2147  * text only contains the "foo" portion.  The first argument is a pointer
2148  * to the "foo", and the second argument is the type symbol to prefix.
2149  * Forces the next token to be a "BAREWORD".
2150  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2151  */
2152 
2153 STATIC void
S_force_ident(pTHX_ const char * s,int kind)2154 S_force_ident(pTHX_ const char *s, int kind)
2155 {
2156     PERL_ARGS_ASSERT_FORCE_IDENT;
2157 
2158     if (s[0]) {
2159 	const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2160         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2161                                                                 UTF ? SVf_UTF8 : 0));
2162 	NEXTVAL_NEXTTOKE.opval = o;
2163 	force_next(BAREWORD);
2164 	if (kind) {
2165 	    o->op_private = OPpCONST_ENTERED;
2166 	    /* XXX see note in pp_entereval() for why we forgo typo
2167 	       warnings if the symbol must be introduced in an eval.
2168 	       GSAR 96-10-12 */
2169 	    gv_fetchpvn_flags(s, len,
2170 			      (PL_in_eval ? GV_ADDMULTI
2171 			      : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2172 			      kind == '$' ? SVt_PV :
2173 			      kind == '@' ? SVt_PVAV :
2174 			      kind == '%' ? SVt_PVHV :
2175 			      SVt_PVGV
2176 			      );
2177 	}
2178     }
2179 }
2180 
2181 static void
S_force_ident_maybe_lex(pTHX_ char pit)2182 S_force_ident_maybe_lex(pTHX_ char pit)
2183 {
2184     NEXTVAL_NEXTTOKE.ival = pit;
2185     force_next('p');
2186 }
2187 
2188 NV
Perl_str_to_version(pTHX_ SV * sv)2189 Perl_str_to_version(pTHX_ SV *sv)
2190 {
2191     NV retval = 0.0;
2192     NV nshift = 1.0;
2193     STRLEN len;
2194     const char *start = SvPV_const(sv,len);
2195     const char * const end = start + len;
2196     const bool utf = cBOOL(SvUTF8(sv));
2197 
2198     PERL_ARGS_ASSERT_STR_TO_VERSION;
2199 
2200     while (start < end) {
2201 	STRLEN skip;
2202 	UV n;
2203 	if (utf)
2204 	    n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2205 	else {
2206 	    n = *(U8*)start;
2207 	    skip = 1;
2208 	}
2209 	retval += ((NV)n)/nshift;
2210 	start += skip;
2211 	nshift *= 1000;
2212     }
2213     return retval;
2214 }
2215 
2216 /*
2217  * S_force_version
2218  * Forces the next token to be a version number.
2219  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2220  * and if "guessing" is TRUE, then no new token is created (and the caller
2221  * must use an alternative parsing method).
2222  */
2223 
2224 STATIC char *
S_force_version(pTHX_ char * s,int guessing)2225 S_force_version(pTHX_ char *s, int guessing)
2226 {
2227     OP *version = NULL;
2228     char *d;
2229 
2230     PERL_ARGS_ASSERT_FORCE_VERSION;
2231 
2232     s = skipspace(s);
2233 
2234     d = s;
2235     if (*d == 'v')
2236 	d++;
2237     if (isDIGIT(*d)) {
2238 	while (isDIGIT(*d) || *d == '_' || *d == '.')
2239 	    d++;
2240         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2241 	    SV *ver;
2242             s = scan_num(s, &pl_yylval);
2243             version = pl_yylval.opval;
2244 	    ver = cSVOPx(version)->op_sv;
2245 	    if (SvPOK(ver) && !SvNIOK(ver)) {
2246 		SvUPGRADE(ver, SVt_PVNV);
2247 		SvNV_set(ver, str_to_version(ver));
2248 		SvNOK_on(ver);		/* hint that it is a version */
2249 	    }
2250         }
2251 	else if (guessing) {
2252 	    return s;
2253 	}
2254     }
2255 
2256     /* NOTE: The parser sees the package name and the VERSION swapped */
2257     NEXTVAL_NEXTTOKE.opval = version;
2258     force_next(BAREWORD);
2259 
2260     return s;
2261 }
2262 
2263 /*
2264  * S_force_strict_version
2265  * Forces the next token to be a version number using strict syntax rules.
2266  */
2267 
2268 STATIC char *
S_force_strict_version(pTHX_ char * s)2269 S_force_strict_version(pTHX_ char *s)
2270 {
2271     OP *version = NULL;
2272     const char *errstr = NULL;
2273 
2274     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2275 
2276     while (isSPACE(*s)) /* leading whitespace */
2277 	s++;
2278 
2279     if (is_STRICT_VERSION(s,&errstr)) {
2280 	SV *ver = newSV(0);
2281 	s = (char *)scan_version(s, ver, 0);
2282 	version = newSVOP(OP_CONST, 0, ver);
2283     }
2284     else if ((*s != ';' && *s != '{' && *s != '}' )
2285              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2286     {
2287 	PL_bufptr = s;
2288 	if (errstr)
2289 	    yyerror(errstr); /* version required */
2290 	return s;
2291     }
2292 
2293     /* NOTE: The parser sees the package name and the VERSION swapped */
2294     NEXTVAL_NEXTTOKE.opval = version;
2295     force_next(BAREWORD);
2296 
2297     return s;
2298 }
2299 
2300 /*
2301  * S_tokeq
2302  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2303  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2304  * unchanged, and a new SV containing the modified input is returned.
2305  */
2306 
2307 STATIC SV *
S_tokeq(pTHX_ SV * sv)2308 S_tokeq(pTHX_ SV *sv)
2309 {
2310     char *s;
2311     char *send;
2312     char *d;
2313     SV *pv = sv;
2314 
2315     PERL_ARGS_ASSERT_TOKEQ;
2316 
2317     assert (SvPOK(sv));
2318     assert (SvLEN(sv));
2319     assert (!SvIsCOW(sv));
2320     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2321 	goto finish;
2322     s = SvPVX(sv);
2323     send = SvEND(sv);
2324     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2325     while (s < send && !(*s == '\\' && s[1] == '\\'))
2326 	s++;
2327     if (s == send)
2328 	goto finish;
2329     d = s;
2330     if ( PL_hints & HINT_NEW_STRING ) {
2331 	pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2332 			    SVs_TEMP | SvUTF8(sv));
2333     }
2334     while (s < send) {
2335 	if (*s == '\\') {
2336 	    if (s + 1 < send && (s[1] == '\\'))
2337 		s++;		/* all that, just for this */
2338 	}
2339 	*d++ = *s++;
2340     }
2341     *d = '\0';
2342     SvCUR_set(sv, d - SvPVX_const(sv));
2343   finish:
2344     if ( PL_hints & HINT_NEW_STRING )
2345        return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2346     return sv;
2347 }
2348 
2349 /*
2350  * Now come three functions related to double-quote context,
2351  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2352  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2353  * interact with PL_lex_state, and create fake ( ... ) argument lists
2354  * to handle functions and concatenation.
2355  * For example,
2356  *   "foo\lbar"
2357  * is tokenised as
2358  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2359  */
2360 
2361 /*
2362  * S_sublex_start
2363  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2364  *
2365  * Pattern matching will set PL_lex_op to the pattern-matching op to
2366  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2367  *
2368  * OP_CONST is easy--just make the new op and return.
2369  *
2370  * Everything else becomes a FUNC.
2371  *
2372  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2373  * had an OP_CONST.  This just sets us up for a
2374  * call to S_sublex_push().
2375  */
2376 
2377 STATIC I32
S_sublex_start(pTHX)2378 S_sublex_start(pTHX)
2379 {
2380     const I32 op_type = pl_yylval.ival;
2381 
2382     if (op_type == OP_NULL) {
2383 	pl_yylval.opval = PL_lex_op;
2384 	PL_lex_op = NULL;
2385 	return THING;
2386     }
2387     if (op_type == OP_CONST) {
2388 	SV *sv = PL_lex_stuff;
2389 	PL_lex_stuff = NULL;
2390 	sv = tokeq(sv);
2391 
2392 	if (SvTYPE(sv) == SVt_PVIV) {
2393 	    /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2394 	    STRLEN len;
2395 	    const char * const p = SvPV_const(sv, len);
2396 	    SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2397 	    SvREFCNT_dec(sv);
2398 	    sv = nsv;
2399 	}
2400         pl_yylval.opval = newSVOP(op_type, 0, sv);
2401 	return THING;
2402     }
2403 
2404     PL_parser->lex_super_state = PL_lex_state;
2405     PL_parser->lex_sub_inwhat = (U16)op_type;
2406     PL_parser->lex_sub_op = PL_lex_op;
2407     PL_parser->sub_no_recover = FALSE;
2408     PL_parser->sub_error_count = PL_error_count;
2409     PL_lex_state = LEX_INTERPPUSH;
2410 
2411     PL_expect = XTERM;
2412     if (PL_lex_op) {
2413 	pl_yylval.opval = PL_lex_op;
2414 	PL_lex_op = NULL;
2415 	return PMFUNC;
2416     }
2417     else
2418 	return FUNC;
2419 }
2420 
2421 /*
2422  * S_sublex_push
2423  * Create a new scope to save the lexing state.  The scope will be
2424  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2425  * to the uc, lc, etc. found before.
2426  * Sets PL_lex_state to LEX_INTERPCONCAT.
2427  */
2428 
2429 STATIC I32
S_sublex_push(pTHX)2430 S_sublex_push(pTHX)
2431 {
2432     LEXSHARED *shared;
2433     const bool is_heredoc = PL_multi_close == '<';
2434     ENTER;
2435 
2436     PL_lex_state = PL_parser->lex_super_state;
2437     SAVEI8(PL_lex_dojoin);
2438     SAVEI32(PL_lex_brackets);
2439     SAVEI32(PL_lex_allbrackets);
2440     SAVEI32(PL_lex_formbrack);
2441     SAVEI8(PL_lex_fakeeof);
2442     SAVEI32(PL_lex_casemods);
2443     SAVEI32(PL_lex_starts);
2444     SAVEI8(PL_lex_state);
2445     SAVESPTR(PL_lex_repl);
2446     SAVEVPTR(PL_lex_inpat);
2447     SAVEI16(PL_lex_inwhat);
2448     if (is_heredoc)
2449     {
2450 	SAVECOPLINE(PL_curcop);
2451 	SAVEI32(PL_multi_end);
2452 	SAVEI32(PL_parser->herelines);
2453 	PL_parser->herelines = 0;
2454     }
2455     SAVEIV(PL_multi_close);
2456     SAVEPPTR(PL_bufptr);
2457     SAVEPPTR(PL_bufend);
2458     SAVEPPTR(PL_oldbufptr);
2459     SAVEPPTR(PL_oldoldbufptr);
2460     SAVEPPTR(PL_last_lop);
2461     SAVEPPTR(PL_last_uni);
2462     SAVEPPTR(PL_linestart);
2463     SAVESPTR(PL_linestr);
2464     SAVEGENERICPV(PL_lex_brackstack);
2465     SAVEGENERICPV(PL_lex_casestack);
2466     SAVEGENERICPV(PL_parser->lex_shared);
2467     SAVEBOOL(PL_parser->lex_re_reparsing);
2468     SAVEI32(PL_copline);
2469 
2470     /* The here-doc parser needs to be able to peek into outer lexing
2471        scopes to find the body of the here-doc.  So we put PL_linestr and
2472        PL_bufptr into lex_shared, to ‘share’ those values.
2473      */
2474     PL_parser->lex_shared->ls_linestr = PL_linestr;
2475     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2476 
2477     PL_linestr = PL_lex_stuff;
2478     PL_lex_repl = PL_parser->lex_sub_repl;
2479     PL_lex_stuff = NULL;
2480     PL_parser->lex_sub_repl = NULL;
2481 
2482     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2483        set for an inner quote-like operator and then an error causes scope-
2484        popping.  We must not have a PL_lex_stuff value left dangling, as
2485        that breaks assumptions elsewhere.  See bug #123617.  */
2486     SAVEGENERICSV(PL_lex_stuff);
2487     SAVEGENERICSV(PL_parser->lex_sub_repl);
2488 
2489     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2490 	= SvPVX(PL_linestr);
2491     PL_bufend += SvCUR(PL_linestr);
2492     PL_last_lop = PL_last_uni = NULL;
2493     SAVEFREESV(PL_linestr);
2494     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2495 
2496     PL_lex_dojoin = FALSE;
2497     PL_lex_brackets = PL_lex_formbrack = 0;
2498     PL_lex_allbrackets = 0;
2499     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2500     Newx(PL_lex_brackstack, 120, char);
2501     Newx(PL_lex_casestack, 12, char);
2502     PL_lex_casemods = 0;
2503     *PL_lex_casestack = '\0';
2504     PL_lex_starts = 0;
2505     PL_lex_state = LEX_INTERPCONCAT;
2506     if (is_heredoc)
2507 	CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2508     PL_copline = NOLINE;
2509 
2510     Newxz(shared, 1, LEXSHARED);
2511     shared->ls_prev = PL_parser->lex_shared;
2512     PL_parser->lex_shared = shared;
2513 
2514     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2515     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2516     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2517 	PL_lex_inpat = PL_parser->lex_sub_op;
2518     else
2519 	PL_lex_inpat = NULL;
2520 
2521     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2522     PL_in_eval &= ~EVAL_RE_REPARSING;
2523 
2524     return SUBLEXSTART;
2525 }
2526 
2527 /*
2528  * S_sublex_done
2529  * Restores lexer state after a S_sublex_push.
2530  */
2531 
2532 STATIC I32
S_sublex_done(pTHX)2533 S_sublex_done(pTHX)
2534 {
2535     if (!PL_lex_starts++) {
2536 	SV * const sv = newSVpvs("");
2537 	if (SvUTF8(PL_linestr))
2538 	    SvUTF8_on(sv);
2539 	PL_expect = XOPERATOR;
2540         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2541 	return THING;
2542     }
2543 
2544     if (PL_lex_casemods) {		/* oops, we've got some unbalanced parens */
2545 	PL_lex_state = LEX_INTERPCASEMOD;
2546 	return yylex();
2547     }
2548 
2549     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2550     assert(PL_lex_inwhat != OP_TRANSR);
2551     if (PL_lex_repl) {
2552 	assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2553 	PL_linestr = PL_lex_repl;
2554 	PL_lex_inpat = 0;
2555 	PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2556 	PL_bufend += SvCUR(PL_linestr);
2557 	PL_last_lop = PL_last_uni = NULL;
2558 	PL_lex_dojoin = FALSE;
2559 	PL_lex_brackets = 0;
2560 	PL_lex_allbrackets = 0;
2561 	PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2562 	PL_lex_casemods = 0;
2563 	*PL_lex_casestack = '\0';
2564 	PL_lex_starts = 0;
2565 	if (SvEVALED(PL_lex_repl)) {
2566 	    PL_lex_state = LEX_INTERPNORMAL;
2567 	    PL_lex_starts++;
2568 	    /*	we don't clear PL_lex_repl here, so that we can check later
2569 		whether this is an evalled subst; that means we rely on the
2570 		logic to ensure sublex_done() is called again only via the
2571 		branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2572 	}
2573 	else {
2574 	    PL_lex_state = LEX_INTERPCONCAT;
2575 	    PL_lex_repl = NULL;
2576 	}
2577 	if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2578 	    CopLINE(PL_curcop) +=
2579 		((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2580 		 + PL_parser->herelines;
2581 	    PL_parser->herelines = 0;
2582 	}
2583 	return '/';
2584     }
2585     else {
2586 	const line_t l = CopLINE(PL_curcop);
2587 	LEAVE;
2588         if (PL_parser->sub_error_count != PL_error_count) {
2589             if (PL_parser->sub_no_recover) {
2590                 yyquit();
2591                 NOT_REACHED;
2592             }
2593         }
2594 	if (PL_multi_close == '<')
2595 	    PL_parser->herelines += l - PL_multi_end;
2596 	PL_bufend = SvPVX(PL_linestr);
2597 	PL_bufend += SvCUR(PL_linestr);
2598 	PL_expect = XOPERATOR;
2599 	return SUBLEXEND;
2600     }
2601 }
2602 
2603 HV *
Perl_load_charnames(pTHX_ SV * char_name,const char * context,const STRLEN context_len,const char ** error_msg)2604 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2605                           const STRLEN context_len, const char ** error_msg)
2606 {
2607     /* Load the official _charnames module if not already there.  The
2608      * parameters are just to give info for any error messages generated:
2609      *  char_name   a name to look up which is the reason for loading this
2610      *  context     'char_name' in the context in the input in which it appears
2611      *  context_len how many bytes 'context' occupies
2612      *  error_msg   *error_msg will be set to any error
2613      *
2614      *  Returns the ^H table if success; otherwise NULL */
2615 
2616     unsigned int i;
2617     HV * table;
2618     SV **cvp;
2619     SV * res;
2620 
2621     PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2622 
2623     /* This loop is executed 1 1/2 times.  On the first time through, if it
2624      * isn't already loaded, try loading it, and iterate just once to see if it
2625      * worked.  */
2626     for (i = 0; i < 2; i++) {
2627         table = GvHV(PL_hintgv);		 /* ^H */
2628 
2629         if (    table
2630             && (PL_hints & HINT_LOCALIZE_HH)
2631             && (cvp = hv_fetchs(table, "charnames", FALSE))
2632             &&  SvOK(*cvp))
2633         {
2634             return table;   /* Quit if already loaded */
2635         }
2636 
2637         if (i == 0) {
2638             Perl_load_module(aTHX_
2639                 0,
2640                 newSVpvs("_charnames"),
2641 
2642                 /* version parameter; no need to specify it, as if we get too early
2643                 * a version, will fail anyway, not being able to find 'charnames'
2644                 * */
2645                 NULL,
2646                 newSVpvs(":full"),
2647                 newSVpvs(":short"),
2648                 NULL);
2649         }
2650     }
2651 
2652     /* Here, it failed; new_constant will give appropriate error messages */
2653     *error_msg = NULL;
2654     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2655                         context, context_len, error_msg);
2656     SvREFCNT_dec(res);
2657 
2658     return NULL;
2659 }
2660 
2661 STATIC SV*
S_get_and_check_backslash_N_name_wrapper(pTHX_ const char * s,const char * const e)2662 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2663 {
2664     /* This justs wraps get_and_check_backslash_N_name() to output any error
2665      * message it returns. */
2666 
2667     const char * error_msg = NULL;
2668     SV * result;
2669 
2670     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2671 
2672     /* charnames doesn't work well if there have been errors found */
2673     if (PL_error_count > 0) {
2674 	return NULL;
2675     }
2676 
2677     result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2678 
2679     if (error_msg) {
2680         yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2681     }
2682 
2683     return result;
2684 }
2685 
2686 SV*
Perl_get_and_check_backslash_N_name(pTHX_ const char * s,const char * const e,const bool is_utf8,const char ** error_msg)2687 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2688                                           const char* const e,
2689                                           const bool is_utf8,
2690                                           const char ** error_msg)
2691 {
2692     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2693      * interior, hence to the "}".  Finds what the name resolves to, returning
2694      * an SV* containing it; NULL if no valid one found.
2695      *
2696      * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2697      * doesn't have to be. */
2698 
2699     SV* char_name;
2700     SV* res;
2701     HV * table;
2702     SV **cvp;
2703     SV *cv;
2704     SV *rv;
2705     HV *stash;
2706 
2707     /* Points to the beginning of the \N{... so that any messages include the
2708      * context of what's failing*/
2709     const char* context = s - 3;
2710     STRLEN context_len = e - context + 1; /* include all of \N{...} */
2711 
2712     dVAR;
2713 
2714     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2715 
2716     assert(e >= s);
2717     assert(s > (char *) 3);
2718 
2719     char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2720 
2721     if (!SvCUR(char_name)) {
2722         SvREFCNT_dec_NN(char_name);
2723         /* diag_listed_as: Unknown charname '%s' */
2724         *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2725         return NULL;
2726     }
2727 
2728     /* Autoload the charnames module */
2729 
2730     table = load_charnames(char_name, context, context_len, error_msg);
2731     if (table == NULL) {
2732         return NULL;
2733     }
2734 
2735     *error_msg = NULL;
2736     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2737                         context, context_len, error_msg);
2738     if (*error_msg) {
2739         *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2740 
2741         SvREFCNT_dec(res);
2742         return NULL;
2743     }
2744 
2745     /* See if the charnames handler is the Perl core's, and if so, we can skip
2746      * the validation needed for a user-supplied one, as Perl's does its own
2747      * validation. */
2748     cvp = hv_fetchs(table, "charnames", FALSE);
2749     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2750         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2751     {
2752         const char * const name = HvNAME(stash);
2753          if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2754            return res;
2755        }
2756     }
2757 
2758     /* Here, it isn't Perl's charname handler.  We can't rely on a
2759      * user-supplied handler to validate the input name.  For non-ut8 input,
2760      * look to see that the first character is legal.  Then loop through the
2761      * rest checking that each is a continuation */
2762 
2763     /* This code makes the reasonable assumption that the only Latin1-range
2764      * characters that begin a character name alias are alphabetic, otherwise
2765      * would have to create a isCHARNAME_BEGIN macro */
2766 
2767     if (! is_utf8) {
2768         if (! isALPHAU(*s)) {
2769             goto bad_charname;
2770         }
2771         s++;
2772         while (s < e) {
2773             if (! isCHARNAME_CONT(*s)) {
2774                 goto bad_charname;
2775             }
2776 	    if (*s == ' ' && *(s-1) == ' ') {
2777                 goto multi_spaces;
2778             }
2779             s++;
2780         }
2781     }
2782     else {
2783         /* Similarly for utf8.  For invariants can check directly; for other
2784          * Latin1, can calculate their code point and check; otherwise  use an
2785          * inversion list */
2786         if (UTF8_IS_INVARIANT(*s)) {
2787             if (! isALPHAU(*s)) {
2788                 goto bad_charname;
2789             }
2790             s++;
2791         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2792             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2793                 goto bad_charname;
2794             }
2795             s += 2;
2796         }
2797         else {
2798             if (! _invlist_contains_cp(PL_utf8_charname_begin,
2799                                        utf8_to_uvchr_buf((U8 *) s,
2800                                                          (U8 *) e,
2801                                                          NULL)))
2802             {
2803                 goto bad_charname;
2804             }
2805             s += UTF8SKIP(s);
2806         }
2807 
2808         while (s < e) {
2809             if (UTF8_IS_INVARIANT(*s)) {
2810                 if (! isCHARNAME_CONT(*s)) {
2811                     goto bad_charname;
2812                 }
2813                 if (*s == ' ' && *(s-1) == ' ') {
2814                     goto multi_spaces;
2815                 }
2816                 s++;
2817             }
2818             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2819                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2820                 {
2821                     goto bad_charname;
2822                 }
2823                 s += 2;
2824             }
2825             else {
2826                 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2827                                            utf8_to_uvchr_buf((U8 *) s,
2828                                                              (U8 *) e,
2829                                                              NULL)))
2830                 {
2831                     goto bad_charname;
2832                 }
2833                 s += UTF8SKIP(s);
2834             }
2835         }
2836     }
2837     if (*(s-1) == ' ') {
2838         /* diag_listed_as: charnames alias definitions may not contain
2839                            trailing white-space; marked by <-- HERE in %s
2840          */
2841         *error_msg = Perl_form(aTHX_
2842             "charnames alias definitions may not contain trailing "
2843             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2844             (int)(s - context + 1), context,
2845             (int)(e - s + 1), s + 1);
2846         return NULL;
2847     }
2848 
2849     if (SvUTF8(res)) { /* Don't accept malformed charname value */
2850         const U8* first_bad_char_loc;
2851         STRLEN len;
2852         const char* const str = SvPV_const(res, len);
2853         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2854                                           &first_bad_char_loc)))
2855         {
2856             _force_out_malformed_utf8_message(first_bad_char_loc,
2857                                               (U8 *) PL_parser->bufend,
2858                                               0,
2859                                               0 /* 0 means don't die */ );
2860             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2861                                immediately after '%s' */
2862             *error_msg = Perl_form(aTHX_
2863                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2864                  (int) context_len, context,
2865                  (int) ((char *) first_bad_char_loc - str), str);
2866             return NULL;
2867         }
2868     }
2869 
2870     return res;
2871 
2872   bad_charname: {
2873 
2874         /* The final %.*s makes sure that should the trailing NUL be missing
2875          * that this print won't run off the end of the string */
2876         /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2877                            in \N{%s} */
2878         *error_msg = Perl_form(aTHX_
2879             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2880             (int)(s - context + 1), context,
2881             (int)(e - s + 1), s + 1);
2882         return NULL;
2883     }
2884 
2885   multi_spaces:
2886         /* diag_listed_as: charnames alias definitions may not contain a
2887                            sequence of multiple spaces; marked by <-- HERE
2888                            in %s */
2889         *error_msg = Perl_form(aTHX_
2890             "charnames alias definitions may not contain a sequence of "
2891             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2892             (int)(s - context + 1), context,
2893             (int)(e - s + 1), s + 1);
2894         return NULL;
2895 }
2896 
2897 /*
2898   scan_const
2899 
2900   Extracts the next constant part of a pattern, double-quoted string,
2901   or transliteration.  This is terrifying code.
2902 
2903   For example, in parsing the double-quoted string "ab\x63$d", it would
2904   stop at the '$' and return an OP_CONST containing 'abc'.
2905 
2906   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2907   processing a pattern (PL_lex_inpat is true), a transliteration
2908   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2909 
2910   Returns a pointer to the character scanned up to. If this is
2911   advanced from the start pointer supplied (i.e. if anything was
2912   successfully parsed), will leave an OP_CONST for the substring scanned
2913   in pl_yylval. Caller must intuit reason for not parsing further
2914   by looking at the next characters herself.
2915 
2916   In patterns:
2917     expand:
2918       \N{FOO}  => \N{U+hex_for_character_FOO}
2919       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2920 
2921     pass through:
2922 	all other \-char, including \N and \N{ apart from \N{ABC}
2923 
2924     stops on:
2925 	@ and $ where it appears to be a var, but not for $ as tail anchor
2926         \l \L \u \U \Q \E
2927 	(?{  or  (??{
2928 
2929   In transliterations:
2930     characters are VERY literal, except for - not at the start or end
2931     of the string, which indicates a range.  However some backslash sequences
2932     are recognized: \r, \n, and the like
2933                     \007 \o{}, \x{}, \N{}
2934     If all elements in the transliteration are below 256,
2935     scan_const expands the range to the full set of intermediate
2936     characters. If the range is in utf8, the hyphen is replaced with
2937     a certain range mark which will be handled by pmtrans() in op.c.
2938 
2939   In double-quoted strings:
2940     backslashes:
2941       all those recognized in transliterations
2942       deprecated backrefs: \1 (in substitution replacements)
2943       case and quoting: \U \Q \E
2944     stops on @ and $
2945 
2946   scan_const does *not* construct ops to handle interpolated strings.
2947   It stops processing as soon as it finds an embedded $ or @ variable
2948   and leaves it to the caller to work out what's going on.
2949 
2950   embedded arrays (whether in pattern or not) could be:
2951       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2952 
2953   $ in double-quoted strings must be the symbol of an embedded scalar.
2954 
2955   $ in pattern could be $foo or could be tail anchor.  Assumption:
2956   it's a tail anchor if $ is the last thing in the string, or if it's
2957   followed by one of "()| \r\n\t"
2958 
2959   \1 (backreferences) are turned into $1 in substitutions
2960 
2961   The structure of the code is
2962       while (there's a character to process) {
2963 	  handle transliteration ranges
2964 	  skip regexp comments /(?#comment)/ and codes /(?{code})/
2965 	  skip #-initiated comments in //x patterns
2966 	  check for embedded arrays
2967 	  check for embedded scalars
2968 	  if (backslash) {
2969 	      deprecate \1 in substitution replacements
2970 	      handle string-changing backslashes \l \U \Q \E, etc.
2971 	      switch (what was escaped) {
2972 		  handle \- in a transliteration (becomes a literal -)
2973 		  if a pattern and not \N{, go treat as regular character
2974 		  handle \132 (octal characters)
2975 		  handle \x15 and \x{1234} (hex characters)
2976 		  handle \N{name} (named characters, also \N{3,5} in a pattern)
2977 		  handle \cV (control characters)
2978 		  handle printf-style backslashes (\f, \r, \n, etc)
2979 	      } (end switch)
2980 	      continue
2981 	  } (end if backslash)
2982           handle regular character
2983     } (end while character to read)
2984 
2985 */
2986 
2987 STATIC char *
S_scan_const(pTHX_ char * start)2988 S_scan_const(pTHX_ char *start)
2989 {
2990     char *send = PL_bufend;		/* end of the constant */
2991     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2992                                            on sizing. */
2993     char *s = start;			/* start of the constant */
2994     char *d = SvPVX(sv);		/* destination for copies */
2995     bool dorange = FALSE;               /* are we in a translit range? */
2996     bool didrange = FALSE;              /* did we just finish a range? */
2997     bool in_charclass = FALSE;          /* within /[...]/ */
2998     bool s_is_utf8 = cBOOL(UTF);        /* Is the source string assumed to be
2999                                            UTF8?  But, this can show as true
3000                                            when the source isn't utf8, as for
3001                                            example when it is entirely composed
3002                                            of hex constants */
3003     bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
3004     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
3005                                            number of characters found so far
3006                                            that will expand (into 2 bytes)
3007                                            should we have to convert to
3008                                            UTF-8) */
3009     SV *res;		                /* result from charnames */
3010     STRLEN offset_to_max = 0;   /* The offset in the output to where the range
3011                                    high-end character is temporarily placed */
3012 
3013     /* Does something require special handling in tr/// ?  This avoids extra
3014      * work in a less likely case.  As such, khw didn't feel it was worth
3015      * adding any branches to the more mainline code to handle this, which
3016      * means that this doesn't get set in some circumstances when things like
3017      * \x{100} get expanded out.  As a result there needs to be extra testing
3018      * done in the tr code */
3019     bool has_above_latin1 = FALSE;
3020 
3021     /* Note on sizing:  The scanned constant is placed into sv, which is
3022      * initialized by newSV() assuming one byte of output for every byte of
3023      * input.  This routine expects newSV() to allocate an extra byte for a
3024      * trailing NUL, which this routine will append if it gets to the end of
3025      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3026      * CAPITAL LETTER A}), or more output than input if the constant ends up
3027      * recoded to utf8, but each time a construct is found that might increase
3028      * the needed size, SvGROW() is called.  Its size parameter each time is
3029      * based on the best guess estimate at the time, namely the length used so
3030      * far, plus the length the current construct will occupy, plus room for
3031      * the trailing NUL, plus one byte for every input byte still unscanned */
3032 
3033     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3034                        before set */
3035 #ifdef EBCDIC
3036     int backslash_N = 0;            /* ? was the character from \N{} */
3037     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
3038                                        platform-specific like \x65 */
3039 #endif
3040 
3041     PERL_ARGS_ASSERT_SCAN_CONST;
3042 
3043     assert(PL_lex_inwhat != OP_TRANSR);
3044 
3045     /* Protect sv from errors and fatal warnings. */
3046     ENTER_with_name("scan_const");
3047     SAVEFREESV(sv);
3048 
3049     /* A bunch of code in the loop below assumes that if s[n] exists and is not
3050      * NUL, then s[n+1] exists.  This assertion makes sure that assumption is
3051      * valid */
3052     assert(*send == '\0');
3053 
3054     while (s < send
3055            || dorange   /* Handle tr/// range at right edge of input */
3056     ) {
3057 
3058         /* get transliterations out of the way (they're most literal) */
3059 	if (PL_lex_inwhat == OP_TRANS) {
3060 
3061             /* But there isn't any special handling necessary unless there is a
3062              * range, so for most cases we just drop down and handle the value
3063              * as any other.  There are two exceptions.
3064              *
3065              * 1.  A hyphen indicates that we are actually going to have a
3066              *     range.  In this case, skip the '-', set a flag, then drop
3067              *     down to handle what should be the end range value.
3068              * 2.  After we've handled that value, the next time through, that
3069              *     flag is set and we fix up the range.
3070              *
3071              * Ranges entirely within Latin1 are expanded out entirely, in
3072              * order to make the transliteration a simple table look-up.
3073              * Ranges that extend above Latin1 have to be done differently, so
3074              * there is no advantage to expanding them here, so they are
3075              * stored here as Min, RANGE_INDICATOR, Max.  'RANGE_INDICATOR' is
3076              * a byte that can't occur in legal UTF-8, and hence can signify a
3077              * hyphen without any possible ambiguity.  On EBCDIC machines, if
3078              * the range is expressed as Unicode, the Latin1 portion is
3079              * expanded out even if the range extends above Latin1.  This is
3080              * because each code point in it has to be processed here
3081              * individually to get its native translation */
3082 
3083 	    if (! dorange) {
3084 
3085                 /* Here, we don't think we're in a range.  If the new character
3086                  * is not a hyphen; or if it is a hyphen, but it's too close to
3087                  * either edge to indicate a range, or if we haven't output any
3088                  * characters yet then it's a regular character. */
3089                 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3090                 {
3091 
3092                     /* A regular character.  Process like any other, but first
3093                      * clear any flags */
3094                     didrange = FALSE;
3095                     dorange = FALSE;
3096 #ifdef EBCDIC
3097                     non_portable_endpoint = 0;
3098                     backslash_N = 0;
3099 #endif
3100                     /* The tests here for being above Latin1 and similar ones
3101                      * in the following 'else' suffice to find all such
3102                      * occurences in the constant, except those added by a
3103                      * backslash escape sequence, like \x{100}.  Mostly, those
3104                      * set 'has_above_latin1' as appropriate */
3105                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3106                         has_above_latin1 = TRUE;
3107                     }
3108 
3109                     /* Drops down to generic code to process current byte */
3110                 }
3111                 else {  /* Is a '-' in the context where it means a range */
3112                     if (didrange) { /* Something like y/A-C-Z// */
3113                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
3114                                          " operator");
3115                     }
3116 
3117                     dorange = TRUE;
3118 
3119                     s++;    /* Skip past the hyphen */
3120 
3121                     /* d now points to where the end-range character will be
3122                      * placed.  Drop down to get that character.  We'll finish
3123                      * processing the range the next time through the loop */
3124 
3125                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3126                         has_above_latin1 = TRUE;
3127                     }
3128 
3129                     /* Drops down to generic code to process current byte */
3130                 }
3131             }  /* End of not a range */
3132             else {
3133                 /* Here we have parsed a range.  Now must handle it.  At this
3134                  * point:
3135                  * 'sv' is a SV* that contains the output string we are
3136                  *      constructing.  The final two characters in that string
3137                  *      are the range start and range end, in order.
3138                  * 'd'  points to just beyond the range end in the 'sv' string,
3139                  *      where we would next place something
3140                  */
3141                 char * max_ptr;
3142                 char * min_ptr;
3143                 IV range_min;
3144 		IV range_max;	/* last character in range */
3145                 STRLEN grow;
3146                 Size_t offset_to_min = 0;
3147                 Size_t extras = 0;
3148 #ifdef EBCDIC
3149                 bool convert_unicode;
3150                 IV real_range_max = 0;
3151 #endif
3152                 /* Get the code point values of the range ends. */
3153                 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3154                 offset_to_max = max_ptr - SvPVX_const(sv);
3155                 if (d_is_utf8) {
3156                     /* We know the utf8 is valid, because we just constructed
3157                      * it ourselves in previous loop iterations */
3158                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3159                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3160                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3161 
3162                     /* This compensates for not all code setting
3163                      * 'has_above_latin1', so that we don't skip stuff that
3164                      * should be executed */
3165                     if (range_max > 255) {
3166                         has_above_latin1 = TRUE;
3167                     }
3168                 }
3169                 else {
3170                     min_ptr = max_ptr - 1;
3171                     range_min = * (U8*) min_ptr;
3172                     range_max = * (U8*) max_ptr;
3173                 }
3174 
3175                 /* If the range is just a single code point, like tr/a-a/.../,
3176                  * that code point is already in the output, twice.  We can
3177                  * just back up over the second instance and avoid all the rest
3178                  * of the work.  But if it is a variant character, it's been
3179                  * counted twice, so decrement.  (This unlikely scenario is
3180                  * special cased, like the one for a range of 2 code points
3181                  * below, only because the main-line code below needs a range
3182                  * of 3 or more to work without special casing.  Might as well
3183                  * get it out of the way now.) */
3184                 if (UNLIKELY(range_max == range_min)) {
3185                     d = max_ptr;
3186                     if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3187                         utf8_variant_count--;
3188                     }
3189                     goto range_done;
3190                 }
3191 
3192 #ifdef EBCDIC
3193                 /* On EBCDIC platforms, we may have to deal with portable
3194                  * ranges.  These happen if at least one range endpoint is a
3195                  * Unicode value (\N{...}), or if the range is a subset of
3196                  * [A-Z] or [a-z], and both ends are literal characters,
3197                  * like 'A', and not like \x{C1} */
3198                 convert_unicode =
3199                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3200                                                        hence portable range */
3201                     || (     ! non_portable_endpoint
3202                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3203                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3204                 if (convert_unicode) {
3205 
3206                     /* Special handling is needed for these portable ranges.
3207                      * They are defined to be in Unicode terms, which includes
3208                      * all the Unicode code points between the end points.
3209                      * Convert to Unicode to get the Unicode range.  Later we
3210                      * will convert each code point in the range back to
3211                      * native.  */
3212                     range_min = NATIVE_TO_UNI(range_min);
3213                     range_max = NATIVE_TO_UNI(range_max);
3214                 }
3215 #endif
3216 
3217                 if (range_min > range_max) {
3218 #ifdef EBCDIC
3219                     if (convert_unicode) {
3220                         /* Need to convert back to native for meaningful
3221                          * messages for this platform */
3222                         range_min = UNI_TO_NATIVE(range_min);
3223                         range_max = UNI_TO_NATIVE(range_max);
3224                     }
3225 #endif
3226                     /* Use the characters themselves for the error message if
3227                      * ASCII printables; otherwise some visible representation
3228                      * of them */
3229                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3230                         Perl_croak(aTHX_
3231 			 "Invalid range \"%c-%c\" in transliteration operator",
3232 			 (char)range_min, (char)range_max);
3233                     }
3234 #ifdef EBCDIC
3235                     else if (convert_unicode) {
3236         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3237                         Perl_croak(aTHX_
3238                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3239                            UVXf "}\" in transliteration operator",
3240                            range_min, range_max);
3241                     }
3242 #endif
3243                     else {
3244         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3245                         Perl_croak(aTHX_
3246                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3247                            " in transliteration operator",
3248                            range_min, range_max);
3249                     }
3250                 }
3251 
3252                 /* If the range is exactly two code points long, they are
3253                  * already both in the output */
3254                 if (UNLIKELY(range_min + 1 == range_max)) {
3255                     goto range_done;
3256                 }
3257 
3258                 /* Here the range contains at least 3 code points */
3259 
3260 		if (d_is_utf8) {
3261 
3262                     /* If everything in the transliteration is below 256, we
3263                      * can avoid special handling later.  A translation table
3264                      * for each of those bytes is created by op.c.  So we
3265                      * expand out all ranges to their constituent code points.
3266                      * But if we've encountered something above 255, the
3267                      * expanding won't help, so skip doing that.  But if it's
3268                      * EBCDIC, we may have to look at each character below 256
3269                      * if we have to convert to/from Unicode values */
3270                     if (   has_above_latin1
3271 #ifdef EBCDIC
3272 		        && (range_min > 255 || ! convert_unicode)
3273 #endif
3274                     ) {
3275                         const STRLEN off = d - SvPVX(sv);
3276                         const STRLEN extra = 1 + (send - s) + 1;
3277                         char *e;
3278 
3279                         /* Move the high character one byte to the right; then
3280                          * insert between it and the range begin, an illegal
3281                          * byte which serves to indicate this is a range (using
3282                          * a '-' would be ambiguous). */
3283 
3284                         if (off + extra > SvLEN(sv)) {
3285                             d = off + SvGROW(sv, off + extra);
3286                             max_ptr = d - off + offset_to_max;
3287                         }
3288 
3289                         e = d++;
3290                         while (e-- > max_ptr) {
3291                             *(e + 1) = *e;
3292                         }
3293                         *(e + 1) = (char) RANGE_INDICATOR;
3294                         goto range_done;
3295                     }
3296 
3297                     /* Here, we're going to expand out the range.  For EBCDIC
3298                      * the range can extend above 255 (not so in ASCII), so
3299                      * for EBCDIC, split it into the parts above and below
3300                      * 255/256 */
3301 #ifdef EBCDIC
3302                     if (range_max > 255) {
3303                         real_range_max = range_max;
3304                         range_max = 255;
3305                     }
3306 #endif
3307 		}
3308 
3309                 /* Here we need to expand out the string to contain each
3310                  * character in the range.  Grow the output to handle this.
3311                  * For non-UTF8, we need a byte for each code point in the
3312                  * range, minus the three that we've already allocated for: the
3313                  * hyphen, the min, and the max.  For UTF-8, we need this
3314                  * plus an extra byte for each code point that occupies two
3315                  * bytes (is variant) when in UTF-8 (except we've already
3316                  * allocated for the end points, including if they are
3317                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3318                  * platforms, it's easy to calculate a precise number.  To
3319                  * start, we count the variants in the range, which we need
3320                  * elsewhere in this function anyway.  (For the case where it
3321                  * isn't easy to calculate, 'extras' has been initialized to 0,
3322                  * and the calculation is done in a loop further down.) */
3323 #ifdef EBCDIC
3324                 if (convert_unicode)
3325 #endif
3326                 {
3327                     /* This is executed unconditionally on ASCII, and for
3328                      * Unicode ranges on EBCDIC.  Under these conditions, all
3329                      * code points above a certain value are variant; and none
3330                      * under that value are.  We just need to find out how much
3331                      * of the range is above that value.  We don't count the
3332                      * end points here, as they will already have been counted
3333                      * as they were parsed. */
3334                     if (range_min >= UTF_CONTINUATION_MARK) {
3335 
3336                         /* The whole range is made up of variants */
3337                         extras = (range_max - 1) - (range_min + 1) + 1;
3338                     }
3339                     else if (range_max >= UTF_CONTINUATION_MARK) {
3340 
3341                         /* Only the higher portion of the range is variants */
3342                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3343                     }
3344 
3345                     utf8_variant_count += extras;
3346                 }
3347 
3348                 /* The base growth is the number of code points in the range,
3349                  * not including the endpoints, which have already been sized
3350                  * for (and output).  We don't subtract for the hyphen, as it
3351                  * has been parsed but not output, and the SvGROW below is
3352                  * based only on what's been output plus what's left to parse.
3353                  * */
3354                 grow = (range_max - 1) - (range_min + 1) + 1;
3355 
3356                 if (d_is_utf8) {
3357 #ifdef EBCDIC
3358                     /* In some cases in EBCDIC, we haven't yet calculated a
3359                      * precise amount needed for the UTF-8 variants.  Just
3360                      * assume the worst case, that everything will expand by a
3361                      * byte */
3362                     if (! convert_unicode) {
3363                         grow *= 2;
3364                     }
3365                     else
3366 #endif
3367                     {
3368                         /* Otherwise we know exactly how many variants there
3369                          * are in the range. */
3370                         grow += extras;
3371                     }
3372                 }
3373 
3374                 /* Grow, but position the output to overwrite the range min end
3375                  * point, because in some cases we overwrite that */
3376                 SvCUR_set(sv, d - SvPVX_const(sv));
3377                 offset_to_min = min_ptr - SvPVX_const(sv);
3378 
3379                 /* See Note on sizing above. */
3380                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3381                                              + (send - s)
3382                                              + grow
3383                                              + 1 /* Trailing NUL */ );
3384 
3385                 /* Now, we can expand out the range. */
3386 #ifdef EBCDIC
3387                 if (convert_unicode) {
3388                     SSize_t i;
3389 
3390                     /* Recall that the min and max are now in Unicode terms, so
3391                      * we have to convert each character to its native
3392                      * equivalent */
3393                     if (d_is_utf8) {
3394                         for (i = range_min; i <= range_max; i++) {
3395                             append_utf8_from_native_byte(
3396                                                     LATIN1_TO_NATIVE((U8) i),
3397                                                     (U8 **) &d);
3398                         }
3399                     }
3400                     else {
3401                         for (i = range_min; i <= range_max; i++) {
3402                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3403                         }
3404 		    }
3405 		}
3406                 else
3407 #endif
3408                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3409                 {
3410                     /* Here, no conversions are necessary, which means that the
3411                      * first character in the range is already in 'd' and
3412                      * valid, so we can skip overwriting it */
3413                     if (d_is_utf8) {
3414                         SSize_t i;
3415                         d += UTF8SKIP(d);
3416                         for (i = range_min + 1; i <= range_max; i++) {
3417                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3418                         }
3419                     }
3420                     else {
3421                         SSize_t i;
3422                         d++;
3423                         assert(range_min + 1 <= range_max);
3424                         for (i = range_min + 1; i < range_max; i++) {
3425 #ifdef EBCDIC
3426                             /* In this case on EBCDIC, we haven't calculated
3427                              * the variants.  Do it here, as we go along */
3428                             if (! UVCHR_IS_INVARIANT(i)) {
3429                                 utf8_variant_count++;
3430                             }
3431 #endif
3432                             *d++ = (char)i;
3433                         }
3434 
3435                         /* The range_max is done outside the loop so as to
3436                          * avoid having to special case not incrementing
3437                          * 'utf8_variant_count' on EBCDIC (it's already been
3438                          * counted when originally parsed) */
3439                         *d++ = (char) range_max;
3440 		    }
3441 		}
3442 
3443 #ifdef EBCDIC
3444                 /* If the original range extended above 255, add in that
3445                  * portion. */
3446                 if (real_range_max) {
3447                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3448                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3449                     if (real_range_max > 0x100) {
3450                         if (real_range_max > 0x101) {
3451                             *d++ = (char) RANGE_INDICATOR;
3452                         }
3453                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3454                     }
3455                 }
3456 #endif
3457 
3458               range_done:
3459 		/* mark the range as done, and continue */
3460 		didrange = TRUE;
3461 		dorange = FALSE;
3462 #ifdef EBCDIC
3463 		non_portable_endpoint = 0;
3464                 backslash_N = 0;
3465 #endif
3466 		continue;
3467 	    } /* End of is a range */
3468         } /* End of transliteration.  Joins main code after these else's */
3469 	else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3470 	    char *s1 = s-1;
3471 	    int esc = 0;
3472 	    while (s1 >= start && *s1-- == '\\')
3473 		esc = !esc;
3474 	    if (!esc)
3475 		in_charclass = TRUE;
3476 	}
3477 	else if (*s == ']' && PL_lex_inpat && in_charclass) {
3478 	    char *s1 = s-1;
3479 	    int esc = 0;
3480 	    while (s1 >= start && *s1-- == '\\')
3481 		esc = !esc;
3482 	    if (!esc)
3483 		in_charclass = FALSE;
3484 	}
3485             /* skip for regexp comments /(?#comment)/, except for the last
3486              * char, which will be done separately.  Stop on (?{..}) and
3487              * friends */
3488 	else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3489 	    if (s[2] == '#') {
3490                 if (s_is_utf8) {
3491                     PERL_UINT_FAST8_T  len = UTF8SKIP(s);
3492 
3493                     while (s + len < send && *s != ')') {
3494                         Copy(s, d, len, U8);
3495                         d += len;
3496                         s += len;
3497                         len = UTF8_SAFE_SKIP(s, send);
3498                     }
3499                 }
3500                 else while (s+1 < send && *s != ')') {
3501                     *d++ = *s++;
3502                 }
3503 	    }
3504 	    else if (!PL_lex_casemods
3505                      && (    s[2] == '{' /* This should match regcomp.c */
3506 		         || (s[2] == '?' && s[3] == '{')))
3507 	    {
3508 		break;
3509 	    }
3510 	}
3511             /* likewise skip #-initiated comments in //x patterns */
3512 	else if (*s == '#'
3513                  && PL_lex_inpat
3514                  && !in_charclass
3515                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3516         {
3517 	    while (s < send && *s != '\n')
3518 		*d++ = *s++;
3519 	}
3520             /* no further processing of single-quoted regex */
3521 	else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3522 	    goto default_action;
3523 
3524             /* check for embedded arrays
3525              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3526              */
3527 	else if (*s == '@' && s[1]) {
3528 	    if (UTF
3529                ? isIDFIRST_utf8_safe(s+1, send)
3530                : isWORDCHAR_A(s[1]))
3531             {
3532 		break;
3533             }
3534 	    if (memCHRs(":'{$", s[1]))
3535 		break;
3536 	    if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3537 		break; /* in regexp, neither @+ nor @- are interpolated */
3538 	}
3539             /* check for embedded scalars.  only stop if we're sure it's a
3540              * variable.  */
3541 	else if (*s == '$') {
3542 	    if (!PL_lex_inpat)	/* not a regexp, so $ must be var */
3543 		break;
3544 	    if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3545 		if (s[1] == '\\') {
3546 		    Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3547 				   "Possible unintended interpolation of $\\ in regex");
3548 		}
3549 		break;		/* in regexp, $ might be tail anchor */
3550             }
3551 	}
3552 
3553 	/* End of else if chain - OP_TRANS rejoin rest */
3554 
3555         if (UNLIKELY(s >= send)) {
3556             assert(s == send);
3557             break;
3558         }
3559 
3560 	/* backslashes */
3561 	if (*s == '\\' && s+1 < send) {
3562 	    char* e;	/* Can be used for ending '}', etc. */
3563 
3564 	    s++;
3565 
3566 	    /* warn on \1 - \9 in substitution replacements, but note that \11
3567 	     * is an octal; and \19 is \1 followed by '9' */
3568 	    if (PL_lex_inwhat == OP_SUBST
3569                 && !PL_lex_inpat
3570                 && isDIGIT(*s)
3571                 && *s != '0'
3572                 && !isDIGIT(s[1]))
3573 	    {
3574 		/* diag_listed_as: \%d better written as $%d */
3575 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3576 		*--s = '$';
3577 		break;
3578 	    }
3579 
3580 	    /* string-change backslash escapes */
3581 	    if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3582 		--s;
3583 		break;
3584 	    }
3585 	    /* In a pattern, process \N, but skip any other backslash escapes.
3586 	     * This is because we don't want to translate an escape sequence
3587 	     * into a meta symbol and have the regex compiler use the meta
3588 	     * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3589 	     * in spite of this, we do have to process \N here while the proper
3590 	     * charnames handler is in scope.  See bugs #56444 and #62056.
3591              *
3592 	     * There is a complication because \N in a pattern may also stand
3593 	     * for 'match a non-nl', and not mean a charname, in which case its
3594 	     * processing should be deferred to the regex compiler.  To be a
3595 	     * charname it must be followed immediately by a '{', and not look
3596 	     * like \N followed by a curly quantifier, i.e., not something like
3597 	     * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3598 	     * quantifier */
3599 	    else if (PL_lex_inpat
3600 		    && (*s != 'N'
3601 			|| s[1] != '{'
3602 			|| regcurly(s + 1)))
3603 	    {
3604 		*d++ = '\\';
3605 		goto default_action;
3606 	    }
3607 
3608 	    switch (*s) {
3609 	    default:
3610 	        {
3611 		    if ((isALPHANUMERIC(*s)))
3612 			Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3613 				       "Unrecognized escape \\%c passed through",
3614 				       *s);
3615 		    /* default action is to copy the quoted character */
3616 		    goto default_action;
3617 		}
3618 
3619 	    /* eg. \132 indicates the octal constant 0132 */
3620 	    case '0': case '1': case '2': case '3':
3621 	    case '4': case '5': case '6': case '7':
3622 		{
3623                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3624                               | PERL_SCAN_NOTIFY_ILLDIGIT;
3625                     STRLEN len = 3;
3626                     uv = grok_oct(s, &len, &flags, NULL);
3627                     s += len;
3628                     if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3629                         && s < send
3630                         && isDIGIT(*s)  /* like \08, \178 */
3631                         && ckWARN(WARN_MISC))
3632                     {
3633                         Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3634                             form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3635                     }
3636 		}
3637 		goto NUM_ESCAPE_INSERT;
3638 
3639 	    /* eg. \o{24} indicates the octal constant \024 */
3640 	    case 'o':
3641 		{
3642 		    const char* error;
3643 
3644 		    if (! grok_bslash_o(&s, send,
3645                                                &uv, &error,
3646                                                NULL,
3647                                                FALSE, /* Not strict */
3648                                                FALSE, /* No illegal cp's */
3649                                                UTF))
3650                     {
3651 			yyerror(error);
3652 			uv = 0; /* drop through to ensure range ends are set */
3653 		    }
3654 		    goto NUM_ESCAPE_INSERT;
3655 		}
3656 
3657 	    /* eg. \x24 indicates the hex constant 0x24 */
3658 	    case 'x':
3659 		{
3660 		    const char* error;
3661 
3662 		    if (! grok_bslash_x(&s, send,
3663                                                &uv, &error,
3664                                                NULL,
3665                                                FALSE, /* Not strict */
3666                                                FALSE, /* No illegal cp's */
3667                                                UTF))
3668                     {
3669 			yyerror(error);
3670 			uv = 0; /* drop through to ensure range ends are set */
3671 		    }
3672 		}
3673 
3674 	      NUM_ESCAPE_INSERT:
3675 		/* Insert oct or hex escaped character. */
3676 
3677 		/* Here uv is the ordinal of the next character being added */
3678 		if (UVCHR_IS_INVARIANT(uv)) {
3679 		    *d++ = (char) uv;
3680 		}
3681 		else {
3682 		    if (!d_is_utf8 && uv > 255) {
3683 
3684                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3685                          * If we've only seen invariants so far, all we have to
3686                          * do is turn on the flag */
3687                         if (utf8_variant_count == 0) {
3688                             SvUTF8_on(sv);
3689                         }
3690                         else {
3691                             SvCUR_set(sv, d - SvPVX_const(sv));
3692                             SvPOK_on(sv);
3693                             *d = '\0';
3694 
3695                             sv_utf8_upgrade_flags_grow(
3696                                            sv,
3697                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3698 
3699                                            /* Since we're having to grow here,
3700                                             * make sure we have enough room for
3701                                             * this escape and a NUL, so the
3702                                             * code immediately below won't have
3703                                             * to actually grow again */
3704                                           UVCHR_SKIP(uv)
3705                                         + (STRLEN)(send - s) + 1);
3706                             d = SvPVX(sv) + SvCUR(sv);
3707                         }
3708 
3709                         has_above_latin1 = TRUE;
3710                         d_is_utf8 = TRUE;
3711                     }
3712 
3713                     if (! d_is_utf8) {
3714 		        *d++ = (char)uv;
3715                         utf8_variant_count++;
3716                     }
3717 		    else {
3718                        /* Usually, there will already be enough room in 'sv'
3719                         * since such escapes are likely longer than any UTF-8
3720                         * sequence they can end up as.  This isn't the case on
3721                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3722                         * UTF-8 for it contains 14.  And, we have to allow for
3723                         * a trailing NUL.  It probably can't happen on ASCII
3724                         * platforms, but be safe.  See Note on sizing above. */
3725                         const STRLEN needed = d - SvPVX(sv)
3726                                             + UVCHR_SKIP(uv)
3727                                             + (send - s)
3728                                             + 1;
3729                         if (UNLIKELY(needed > SvLEN(sv))) {
3730                             SvCUR_set(sv, d - SvPVX_const(sv));
3731                             d = SvCUR(sv) + SvGROW(sv, needed);
3732                         }
3733 
3734 		        d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3735                                                    (ckWARN(WARN_PORTABLE))
3736                                                    ? UNICODE_WARN_PERL_EXTENDED
3737                                                    : 0);
3738 		    }
3739 		}
3740 #ifdef EBCDIC
3741                 non_portable_endpoint++;
3742 #endif
3743 		continue;
3744 
3745  	    case 'N':
3746                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3747                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3748                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3749                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3750                  * convenience all three forms are referred to as "named
3751                  * characters" below.
3752                  *
3753                  * For patterns, \N also can mean to match a non-newline.  Code
3754                  * before this 'switch' statement should already have handled
3755                  * this situation, and hence this code only has to deal with
3756                  * the named character cases.
3757                  *
3758                  * For non-patterns, the named characters are converted to
3759                  * their string equivalents.  In patterns, named characters are
3760                  * not converted to their ultimate forms for the same reasons
3761                  * that other escapes aren't (mainly that the ultimate
3762                  * character could be considered a meta-symbol by the regex
3763                  * compiler).  Instead, they are converted to the \N{U+...}
3764                  * form to get the value from the charnames that is in effect
3765                  * right now, while preserving the fact that it was a named
3766                  * character, so that the regex compiler knows this.
3767                  *
3768 		 * The structure of this section of code (besides checking for
3769 		 * errors and upgrading to utf8) is:
3770                  *    If the named character is of the form \N{U+...}, pass it
3771                  *      through if a pattern; otherwise convert the code point
3772                  *      to utf8
3773                  *    Otherwise must be some \N{NAME}: convert to
3774                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3775                  *
3776                  * Transliteration is an exception.  The conversion to utf8 is
3777                  * only done if the code point requires it to be representable.
3778                  *
3779                  * Here, 's' points to the 'N'; the test below is guaranteed to
3780 		 * succeed if we are being called on a pattern, as we already
3781                  * know from a test above that the next character is a '{'.  A
3782                  * non-pattern \N must mean 'named character', which requires
3783                  * braces */
3784 		s++;
3785 		if (*s != '{') {
3786 		    yyerror("Missing braces on \\N{}");
3787                     *d++ = '\0';
3788 		    continue;
3789 		}
3790 		s++;
3791 
3792 		/* If there is no matching '}', it is an error. */
3793 		if (! (e = (char *) memchr(s, '}', send - s))) {
3794 		    if (! PL_lex_inpat) {
3795 			yyerror("Missing right brace on \\N{}");
3796 		    } else {
3797 			yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3798 		    }
3799                     yyquit(); /* Have exhausted the input. */
3800 		}
3801 
3802 		/* Here it looks like a named character */
3803 
3804 		if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3805 		    s += 2;	    /* Skip to next char after the 'U+' */
3806 		    if (PL_lex_inpat) {
3807 
3808                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3809                         /* Check the syntax.  */
3810                         const char *orig_s;
3811                         orig_s = s - 5;
3812                         if (!isXDIGIT(*s)) {
3813                           bad_NU:
3814                             yyerror(
3815                                 "Invalid hexadecimal number in \\N{U+...}"
3816                             );
3817                             s = e + 1;
3818                             *d++ = '\0';
3819                             continue;
3820                         }
3821                         while (++s < e) {
3822                             if (isXDIGIT(*s))
3823                                 continue;
3824                             else if ((*s == '.' || *s == '_')
3825                                   && isXDIGIT(s[1]))
3826                                 continue;
3827                             goto bad_NU;
3828                         }
3829 
3830                         /* Pass everything through unchanged.
3831                          * +1 is for the '}' */
3832                         Copy(orig_s, d, e - orig_s + 1, char);
3833                         d += e - orig_s + 1;
3834 		    }
3835 		    else {  /* Not a pattern: convert the hex to string */
3836                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3837 				  | PERL_SCAN_SILENT_ILLDIGIT
3838 				  | PERL_SCAN_SILENT_OVERFLOW
3839 				  | PERL_SCAN_DISALLOW_PREFIX;
3840                         STRLEN len = e - s;
3841 
3842                         uv = grok_hex(s, &len, &flags, NULL);
3843                         if (len == 0 || (len != (STRLEN)(e - s)))
3844                             goto bad_NU;
3845 
3846                         if (    uv > MAX_LEGAL_CP
3847                             || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
3848                         {
3849                             yyerror(form_cp_too_large_msg(16, s, len, 0));
3850                             uv = 0; /* drop through to ensure range ends are
3851                                        set */
3852                         }
3853 
3854                          /* For non-tr///, if the destination is not in utf8,
3855                           * unconditionally recode it to be so.  This is
3856                           * because \N{} implies Unicode semantics, and scalars
3857                           * have to be in utf8 to guarantee those semantics.
3858                           * tr/// doesn't care about Unicode rules, so no need
3859                           * there to upgrade to UTF-8 for small enough code
3860                           * points */
3861 			if (! d_is_utf8 && (   uv > 0xFF
3862                                            || PL_lex_inwhat != OP_TRANS))
3863                         {
3864 			    /* See Note on sizing above.  */
3865                             const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3866 
3867 			    SvCUR_set(sv, d - SvPVX_const(sv));
3868 			    SvPOK_on(sv);
3869 			    *d = '\0';
3870 
3871                             if (utf8_variant_count == 0) {
3872                                 SvUTF8_on(sv);
3873                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3874                             }
3875                             else {
3876                                 sv_utf8_upgrade_flags_grow(
3877                                                sv,
3878                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3879                                                extra);
3880                                 d = SvPVX(sv) + SvCUR(sv);
3881                             }
3882 
3883 			    d_is_utf8 = TRUE;
3884                             has_above_latin1 = TRUE;
3885 			}
3886 
3887                         /* Add the (Unicode) code point to the output. */
3888 			if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3889 			    *d++ = (char) LATIN1_TO_NATIVE(uv);
3890 			}
3891 			else {
3892                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
3893                                                    (ckWARN(WARN_PORTABLE))
3894                                                    ? UNICODE_WARN_PERL_EXTENDED
3895                                                    : 0);
3896                         }
3897 		    }
3898 		}
3899 		else /* Here is \N{NAME} but not \N{U+...}. */
3900                      if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
3901                 {   /* Failed.  We should die eventually, but for now use a NUL
3902                        to keep parsing */
3903                     *d++ = '\0';
3904                 }
3905                 else {  /* Successfully evaluated the name */
3906                     STRLEN len;
3907                     const char *str = SvPV_const(res, len);
3908                     if (PL_lex_inpat) {
3909 
3910 			if (! len) { /* The name resolved to an empty string */
3911                             const char empty_N[] = "\\N{_}";
3912                             Copy(empty_N, d, sizeof(empty_N) - 1, char);
3913                             d += sizeof(empty_N) - 1;
3914 			}
3915 			else {
3916 			    /* In order to not lose information for the regex
3917 			    * compiler, pass the result in the specially made
3918 			    * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3919 			    * the code points in hex of each character
3920 			    * returned by charnames */
3921 
3922 			    const char *str_end = str + len;
3923 			    const STRLEN off = d - SvPVX_const(sv);
3924 
3925                             if (! SvUTF8(res)) {
3926                                 /* For the non-UTF-8 case, we can determine the
3927                                  * exact length needed without having to parse
3928                                  * through the string.  Each character takes up
3929                                  * 2 hex digits plus either a trailing dot or
3930                                  * the "}" */
3931                                 const char initial_text[] = "\\N{U+";
3932                                 const STRLEN initial_len = sizeof(initial_text)
3933                                                            - 1;
3934                                 d = off + SvGROW(sv, off
3935                                                     + 3 * len
3936 
3937                                                     /* +1 for trailing NUL */
3938                                                     + initial_len + 1
3939 
3940                                                     + (STRLEN)(send - e));
3941                                 Copy(initial_text, d, initial_len, char);
3942                                 d += initial_len;
3943                                 while (str < str_end) {
3944                                     char hex_string[4];
3945                                     int len =
3946                                         my_snprintf(hex_string,
3947                                                   sizeof(hex_string),
3948                                                   "%02X.",
3949 
3950                                                   /* The regex compiler is
3951                                                    * expecting Unicode, not
3952                                                    * native */
3953                                                   NATIVE_TO_LATIN1(*str));
3954                                     PERL_MY_SNPRINTF_POST_GUARD(len,
3955                                                            sizeof(hex_string));
3956                                     Copy(hex_string, d, 3, char);
3957                                     d += 3;
3958                                     str++;
3959                                 }
3960                                 d--;    /* Below, we will overwrite the final
3961                                            dot with a right brace */
3962                             }
3963                             else {
3964                                 STRLEN char_length; /* cur char's byte length */
3965 
3966                                 /* and the number of bytes after this is
3967                                  * translated into hex digits */
3968                                 STRLEN output_length;
3969 
3970                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3971                                  * for max('U+', '.'); and 1 for NUL */
3972                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3973 
3974                                 /* Get the first character of the result. */
3975                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3976                                                         len,
3977                                                         &char_length,
3978                                                         UTF8_ALLOW_ANYUV);
3979                                 /* Convert first code point to Unicode hex,
3980                                  * including the boiler plate before it. */
3981                                 output_length =
3982                                     my_snprintf(hex_string, sizeof(hex_string),
3983                                              "\\N{U+%X",
3984                                              (unsigned int) NATIVE_TO_UNI(uv));
3985 
3986                                 /* Make sure there is enough space to hold it */
3987                                 d = off + SvGROW(sv, off
3988                                                     + output_length
3989                                                     + (STRLEN)(send - e)
3990                                                     + 2);	/* '}' + NUL */
3991                                 /* And output it */
3992                                 Copy(hex_string, d, output_length, char);
3993                                 d += output_length;
3994 
3995                                 /* For each subsequent character, append dot and
3996                                 * its Unicode code point in hex */
3997                                 while ((str += char_length) < str_end) {
3998                                     const STRLEN off = d - SvPVX_const(sv);
3999                                     U32 uv = utf8n_to_uvchr((U8 *) str,
4000                                                             str_end - str,
4001                                                             &char_length,
4002                                                             UTF8_ALLOW_ANYUV);
4003                                     output_length =
4004                                         my_snprintf(hex_string,
4005                                              sizeof(hex_string),
4006                                              ".%X",
4007                                              (unsigned int) NATIVE_TO_UNI(uv));
4008 
4009                                     d = off + SvGROW(sv, off
4010                                                         + output_length
4011                                                         + (STRLEN)(send - e)
4012                                                         + 2);	/* '}' +  NUL */
4013                                     Copy(hex_string, d, output_length, char);
4014                                     d += output_length;
4015                                 }
4016 			    }
4017 
4018 			    *d++ = '}';	/* Done.  Add the trailing brace */
4019 			}
4020 		    }
4021 		    else { /* Here, not in a pattern.  Convert the name to a
4022 			    * string. */
4023 
4024                         if (PL_lex_inwhat == OP_TRANS) {
4025                             str = SvPV_const(res, len);
4026                             if (len > ((SvUTF8(res))
4027                                        ? UTF8SKIP(str)
4028                                        : 1U))
4029                             {
4030                                 yyerror(Perl_form(aTHX_
4031                                     "%.*s must not be a named sequence"
4032                                     " in transliteration operator",
4033                                         /*  +1 to include the "}" */
4034                                     (int) (e + 1 - start), start));
4035                                 *d++ = '\0';
4036                                 goto end_backslash_N;
4037                             }
4038 
4039                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4040                                 has_above_latin1 = TRUE;
4041                             }
4042 
4043                         }
4044                         else if (! SvUTF8(res)) {
4045                             /* Make sure \N{} return is UTF-8.  This is because
4046                              * \N{} implies Unicode semantics, and scalars have
4047                              * to be in utf8 to guarantee those semantics; but
4048                              * not needed in tr/// */
4049                             sv_utf8_upgrade_flags(res, 0);
4050                             str = SvPV_const(res, len);
4051                         }
4052 
4053                          /* Upgrade destination to be utf8 if this new
4054                           * component is */
4055 			if (! d_is_utf8 && SvUTF8(res)) {
4056 			    /* See Note on sizing above.  */
4057                             const STRLEN extra = len + (send - s) + 1;
4058 
4059 			    SvCUR_set(sv, d - SvPVX_const(sv));
4060 			    SvPOK_on(sv);
4061 			    *d = '\0';
4062 
4063                             if (utf8_variant_count == 0) {
4064                                 SvUTF8_on(sv);
4065                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4066                             }
4067                             else {
4068                                 sv_utf8_upgrade_flags_grow(sv,
4069 						SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4070 						extra);
4071                                 d = SvPVX(sv) + SvCUR(sv);
4072                             }
4073 			    d_is_utf8 = TRUE;
4074 			} else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
4075 
4076 			    /* See Note on sizing above.  (NOTE: SvCUR() is not
4077 			     * set correctly here). */
4078                             const STRLEN extra = len + (send - e) + 1;
4079 			    const STRLEN off = d - SvPVX_const(sv);
4080 			    d = off + SvGROW(sv, off + extra);
4081 			}
4082 			Copy(str, d, len, char);
4083 			d += len;
4084 		    }
4085 
4086 		    SvREFCNT_dec(res);
4087 
4088 		} /* End \N{NAME} */
4089 
4090               end_backslash_N:
4091 #ifdef EBCDIC
4092                 backslash_N++; /* \N{} is defined to be Unicode */
4093 #endif
4094 		s = e + 1;  /* Point to just after the '}' */
4095 		continue;
4096 
4097 	    /* \c is a control character */
4098 	    case 'c':
4099 		s++;
4100 		if (s < send) {
4101                     const char * message;
4102 
4103 		    if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4104                         yyerror(message);
4105                         yyquit();   /* Have always immediately croaked on
4106                                        errors in this */
4107                     }
4108 		    d++;
4109 		}
4110 		else {
4111 		    yyerror("Missing control char name in \\c");
4112 		    yyquit();   /* Are at end of input, no sense continuing */
4113 		}
4114 #ifdef EBCDIC
4115                 non_portable_endpoint++;
4116 #endif
4117                 break;
4118 
4119 	    /* printf-style backslashes, formfeeds, newlines, etc */
4120 	    case 'b':
4121 		*d++ = '\b';
4122 		break;
4123 	    case 'n':
4124 		*d++ = '\n';
4125 		break;
4126 	    case 'r':
4127 		*d++ = '\r';
4128 		break;
4129 	    case 'f':
4130 		*d++ = '\f';
4131 		break;
4132 	    case 't':
4133 		*d++ = '\t';
4134 		break;
4135 	    case 'e':
4136 		*d++ = ESC_NATIVE;
4137 		break;
4138 	    case 'a':
4139 		*d++ = '\a';
4140 		break;
4141 	    } /* end switch */
4142 
4143 	    s++;
4144 	    continue;
4145 	} /* end if (backslash) */
4146 
4147     default_action:
4148         /* Just copy the input to the output, though we may have to convert
4149          * to/from UTF-8.
4150          *
4151          * If the input has the same representation in UTF-8 as not, it will be
4152          * a single byte, and we don't care about UTF8ness; just copy the byte */
4153         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4154 	    *d++ = *s++;
4155         }
4156         else if (! s_is_utf8 && ! d_is_utf8) {
4157             /* If neither source nor output is UTF-8, is also a single byte,
4158              * just copy it; but this byte counts should we later have to
4159              * convert to UTF-8 */
4160 	    *d++ = *s++;
4161             utf8_variant_count++;
4162         }
4163         else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
4164 	    const STRLEN len = UTF8SKIP(s);
4165 
4166             /* We expect the source to have already been checked for
4167              * malformedness */
4168             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4169 
4170             Copy(s, d, len, U8);
4171             d += len;
4172             s += len;
4173         }
4174         else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4175             STRLEN need = send - s + 1; /* See Note on sizing above. */
4176 
4177             SvCUR_set(sv, d - SvPVX_const(sv));
4178             SvPOK_on(sv);
4179             *d = '\0';
4180 
4181             if (utf8_variant_count == 0) {
4182                 SvUTF8_on(sv);
4183                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4184             }
4185             else {
4186                 sv_utf8_upgrade_flags_grow(sv,
4187                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4188                                            need);
4189                 d = SvPVX(sv) + SvCUR(sv);
4190             }
4191             d_is_utf8 = TRUE;
4192             goto default_action; /* Redo, having upgraded so both are UTF-8 */
4193         }
4194         else {  /* UTF8ness matters: convert this non-UTF8 source char to
4195                    UTF-8 for output.  It will occupy 2 bytes, but don't include
4196                    the input byte since we haven't incremented 's' yet. See
4197                    Note on sizing above. */
4198             const STRLEN off = d - SvPVX(sv);
4199             const STRLEN extra = 2 + (send - s - 1) + 1;
4200             if (off + extra > SvLEN(sv)) {
4201 		d = off + SvGROW(sv, off + extra);
4202 	    }
4203             *d++ = UTF8_EIGHT_BIT_HI(*s);
4204             *d++ = UTF8_EIGHT_BIT_LO(*s);
4205             s++;
4206 	}
4207     } /* while loop to process each character */
4208 
4209     {
4210         const STRLEN off = d - SvPVX(sv);
4211 
4212         /* See if room for the terminating NUL */
4213         if (UNLIKELY(off >= SvLEN(sv))) {
4214 
4215 #ifndef DEBUGGING
4216 
4217             if (off > SvLEN(sv))
4218 #endif
4219                 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4220                         " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4221 
4222             /* Whew!  Here we don't have room for the terminating NUL, but
4223              * everything else so far has fit.  It's not too late to grow
4224              * to fit the NUL and continue on.  But it is a bug, as the code
4225              * above was supposed to have made room for this, so under
4226              * DEBUGGING builds, we panic anyway.  */
4227             d = off + SvGROW(sv, off + 1);
4228         }
4229     }
4230 
4231     /* terminate the string and set up the sv */
4232     *d = '\0';
4233     SvCUR_set(sv, d - SvPVX_const(sv));
4234 
4235     SvPOK_on(sv);
4236     if (d_is_utf8) {
4237 	SvUTF8_on(sv);
4238     }
4239 
4240     /* shrink the sv if we allocated more than we used */
4241     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4242 	SvPV_shrink_to_cur(sv);
4243     }
4244 
4245     /* return the substring (via pl_yylval) only if we parsed anything */
4246     if (s > start) {
4247 	char *s2 = start;
4248 	for (; s2 < s; s2++) {
4249 	    if (*s2 == '\n')
4250 		COPLINE_INC_WITH_HERELINES;
4251 	}
4252 	SvREFCNT_inc_simple_void_NN(sv);
4253 	if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4254             && ! PL_parser->lex_re_reparsing)
4255         {
4256 	    const char *const key = PL_lex_inpat ? "qr" : "q";
4257 	    const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4258 	    const char *type;
4259 	    STRLEN typelen;
4260 
4261 	    if (PL_lex_inwhat == OP_TRANS) {
4262 		type = "tr";
4263 		typelen = 2;
4264 	    } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4265 		type = "s";
4266 		typelen = 1;
4267 	    } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4268 		type = "q";
4269 		typelen = 1;
4270 	    } else {
4271 		type = "qq";
4272 		typelen = 2;
4273 	    }
4274 
4275 	    sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4276 				type, typelen, NULL);
4277 	}
4278         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4279     }
4280     LEAVE_with_name("scan_const");
4281     return s;
4282 }
4283 
4284 /* S_intuit_more
4285  * Returns TRUE if there's more to the expression (e.g., a subscript),
4286  * FALSE otherwise.
4287  *
4288  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4289  *
4290  * ->[ and ->{ return TRUE
4291  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4292  * { and [ outside a pattern are always subscripts, so return TRUE
4293  * if we're outside a pattern and it's not { or [, then return FALSE
4294  * if we're in a pattern and the first char is a {
4295  *   {4,5} (any digits around the comma) returns FALSE
4296  * if we're in a pattern and the first char is a [
4297  *   [] returns FALSE
4298  *   [SOMETHING] has a funky algorithm to decide whether it's a
4299  *      character class or not.  It has to deal with things like
4300  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4301  * anything else returns TRUE
4302  */
4303 
4304 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4305 
4306 STATIC int
S_intuit_more(pTHX_ char * s,char * e)4307 S_intuit_more(pTHX_ char *s, char *e)
4308 {
4309     PERL_ARGS_ASSERT_INTUIT_MORE;
4310 
4311     if (PL_lex_brackets)
4312 	return TRUE;
4313     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4314 	return TRUE;
4315     if (*s == '-' && s[1] == '>'
4316      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4317      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4318 	||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4319 	return TRUE;
4320     if (*s != '{' && *s != '[')
4321 	return FALSE;
4322     PL_parser->sub_no_recover = TRUE;
4323     if (!PL_lex_inpat)
4324 	return TRUE;
4325 
4326     /* In a pattern, so maybe we have {n,m}. */
4327     if (*s == '{') {
4328 	if (regcurly(s)) {
4329 	    return FALSE;
4330 	}
4331 	return TRUE;
4332     }
4333 
4334     /* On the other hand, maybe we have a character class */
4335 
4336     s++;
4337     if (*s == ']' || *s == '^')
4338 	return FALSE;
4339     else {
4340         /* this is terrifying, and it works */
4341 	int weight;
4342 	char seen[256];
4343 	const char * const send = (char *) memchr(s, ']', e - s);
4344 	unsigned char un_char, last_un_char;
4345 	char tmpbuf[sizeof PL_tokenbuf * 4];
4346 
4347 	if (!send)		/* has to be an expression */
4348 	    return TRUE;
4349 	weight = 2;		/* let's weigh the evidence */
4350 
4351 	if (*s == '$')
4352 	    weight -= 3;
4353 	else if (isDIGIT(*s)) {
4354 	    if (s[1] != ']') {
4355 		if (isDIGIT(s[1]) && s[2] == ']')
4356 		    weight -= 10;
4357 	    }
4358 	    else
4359 		weight -= 100;
4360 	}
4361 	Zero(seen,256,char);
4362 	un_char = 255;
4363 	for (; s < send; s++) {
4364 	    last_un_char = un_char;
4365 	    un_char = (unsigned char)*s;
4366 	    switch (*s) {
4367 	    case '@':
4368 	    case '&':
4369 	    case '$':
4370 		weight -= seen[un_char] * 10;
4371 	        if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4372 		    int len;
4373 		    scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4374 		    len = (int)strlen(tmpbuf);
4375 		    if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4376                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4377 			weight -= 100;
4378 		    else
4379 			weight -= 10;
4380 		}
4381 		else if (*s == '$'
4382                          && s[1]
4383                          && memCHRs("[#!%*<>()-=",s[1]))
4384                 {
4385 		    if (/*{*/ memCHRs("])} =",s[2]))
4386 			weight -= 10;
4387 		    else
4388 			weight -= 1;
4389 		}
4390 		break;
4391 	    case '\\':
4392 		un_char = 254;
4393 		if (s[1]) {
4394 		    if (memCHRs("wds]",s[1]))
4395 			weight += 100;
4396 		    else if (seen[(U8)'\''] || seen[(U8)'"'])
4397 			weight += 1;
4398 		    else if (memCHRs("rnftbxcav",s[1]))
4399 			weight += 40;
4400 		    else if (isDIGIT(s[1])) {
4401 			weight += 40;
4402 			while (s[1] && isDIGIT(s[1]))
4403 			    s++;
4404 		    }
4405 		}
4406 		else
4407 		    weight += 100;
4408 		break;
4409 	    case '-':
4410 		if (s[1] == '\\')
4411 		    weight += 50;
4412 		if (memCHRs("aA01! ",last_un_char))
4413 		    weight += 30;
4414 		if (memCHRs("zZ79~",s[1]))
4415 		    weight += 30;
4416 		if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4417 		    weight -= 5;	/* cope with negative subscript */
4418 		break;
4419 	    default:
4420 		if (!isWORDCHAR(last_un_char)
4421 		    && !(last_un_char == '$' || last_un_char == '@'
4422 			 || last_un_char == '&')
4423 		    && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4424 		    char *d = s;
4425 		    while (isALPHA(*s))
4426 			s++;
4427 		    if (keyword(d, s - d, 0))
4428 			weight -= 150;
4429 		}
4430 		if (un_char == last_un_char + 1)
4431 		    weight += 5;
4432 		weight -= seen[un_char];
4433 		break;
4434 	    }
4435 	    seen[un_char]++;
4436 	}
4437 	if (weight >= 0)	/* probably a character class */
4438 	    return FALSE;
4439     }
4440 
4441     return TRUE;
4442 }
4443 
4444 /*
4445  * S_intuit_method
4446  *
4447  * Does all the checking to disambiguate
4448  *   foo bar
4449  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4450  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4451  *
4452  * First argument is the stuff after the first token, e.g. "bar".
4453  *
4454  * Not a method if foo is a filehandle.
4455  * Not a method if foo is a subroutine prototyped to take a filehandle.
4456  * Not a method if it's really "Foo $bar"
4457  * Method if it's "foo $bar"
4458  * Not a method if it's really "print foo $bar"
4459  * Method if it's really "foo package::" (interpreted as package->foo)
4460  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4461  * Not a method if bar is a filehandle or package, but is quoted with
4462  *   =>
4463  */
4464 
4465 STATIC int
S_intuit_method(pTHX_ char * start,SV * ioname,CV * cv)4466 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4467 {
4468     char *s = start + (*start == '$');
4469     char tmpbuf[sizeof PL_tokenbuf];
4470     STRLEN len;
4471     GV* indirgv;
4472 	/* Mustn't actually add anything to a symbol table.
4473 	   But also don't want to "initialise" any placeholder
4474 	   constants that might already be there into full
4475 	   blown PVGVs with attached PVCV.  */
4476     GV * const gv =
4477 	ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4478 
4479     PERL_ARGS_ASSERT_INTUIT_METHOD;
4480 
4481     if (!FEATURE_INDIRECT_IS_ENABLED)
4482         return 0;
4483 
4484     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4485 	    return 0;
4486     if (cv && SvPOK(cv)) {
4487 	const char *proto = CvPROTO(cv);
4488 	if (proto) {
4489 	    while (*proto && (isSPACE(*proto) || *proto == ';'))
4490 		proto++;
4491 	    if (*proto == '*')
4492 		return 0;
4493 	}
4494     }
4495 
4496     if (*start == '$') {
4497         SSize_t start_off = start - SvPVX(PL_linestr);
4498 	if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4499             || isUPPER(*PL_tokenbuf))
4500 	    return 0;
4501         /* this could be $# */
4502         if (isSPACE(*s))
4503             s = skipspace(s);
4504 	PL_bufptr = SvPVX(PL_linestr) + start_off;
4505 	PL_expect = XREF;
4506 	return *s == '(' ? FUNCMETH : METHOD;
4507     }
4508 
4509     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4510     /* start is the beginning of the possible filehandle/object,
4511      * and s is the end of it
4512      * tmpbuf is a copy of it (but with single quotes as double colons)
4513      */
4514 
4515     if (!keyword(tmpbuf, len, 0)) {
4516 	if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4517 	    len -= 2;
4518 	    tmpbuf[len] = '\0';
4519 	    goto bare_package;
4520 	}
4521 	indirgv = gv_fetchpvn_flags(tmpbuf, len,
4522 				    GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4523 				    SVt_PVCV);
4524 	if (indirgv && SvTYPE(indirgv) != SVt_NULL
4525 	 && (!isGV(indirgv) || GvCVu(indirgv)))
4526 	    return 0;
4527 	/* filehandle or package name makes it a method */
4528 	if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4529 	    s = skipspace(s);
4530 	    if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4531 		return 0;	/* no assumptions -- "=>" quotes bareword */
4532       bare_package:
4533             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4534 						  S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4535 	    NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4536 	    PL_expect = XTERM;
4537 	    force_next(BAREWORD);
4538 	    PL_bufptr = s;
4539 	    return *s == '(' ? FUNCMETH : METHOD;
4540 	}
4541     }
4542     return 0;
4543 }
4544 
4545 /* Encoded script support. filter_add() effectively inserts a
4546  * 'pre-processing' function into the current source input stream.
4547  * Note that the filter function only applies to the current source file
4548  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4549  *
4550  * The datasv parameter (which may be NULL) can be used to pass
4551  * private data to this instance of the filter. The filter function
4552  * can recover the SV using the FILTER_DATA macro and use it to
4553  * store private buffers and state information.
4554  *
4555  * The supplied datasv parameter is upgraded to a PVIO type
4556  * and the IoDIRP/IoANY field is used to store the function pointer,
4557  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4558  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4559  * private use must be set using malloc'd pointers.
4560  */
4561 
4562 SV *
Perl_filter_add(pTHX_ filter_t funcp,SV * datasv)4563 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4564 {
4565     if (!funcp)
4566 	return NULL;
4567 
4568     if (!PL_parser)
4569 	return NULL;
4570 
4571     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4572 	Perl_croak(aTHX_ "Source filters apply only to byte streams");
4573 
4574     if (!PL_rsfp_filters)
4575 	PL_rsfp_filters = newAV();
4576     if (!datasv)
4577 	datasv = newSV(0);
4578     SvUPGRADE(datasv, SVt_PVIO);
4579     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4580     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4581     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4582 			  FPTR2DPTR(void *, IoANY(datasv)),
4583 			  SvPV_nolen(datasv)));
4584     av_unshift(PL_rsfp_filters, 1);
4585     av_store(PL_rsfp_filters, 0, datasv) ;
4586     if (
4587 	!PL_parser->filtered
4588      && PL_parser->lex_flags & LEX_EVALBYTES
4589      && PL_bufptr < PL_bufend
4590     ) {
4591 	const char *s = PL_bufptr;
4592 	while (s < PL_bufend) {
4593 	    if (*s == '\n') {
4594 		SV *linestr = PL_parser->linestr;
4595 		char *buf = SvPVX(linestr);
4596 		STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4597 		STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4598 		STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4599 		STRLEN const linestart_pos = PL_parser->linestart - buf;
4600 		STRLEN const last_uni_pos =
4601 		    PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4602 		STRLEN const last_lop_pos =
4603 		    PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4604 		av_push(PL_rsfp_filters, linestr);
4605 		PL_parser->linestr =
4606 		    newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4607 		buf = SvPVX(PL_parser->linestr);
4608 		PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4609 		PL_parser->bufptr = buf + bufptr_pos;
4610 		PL_parser->oldbufptr = buf + oldbufptr_pos;
4611 		PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4612 		PL_parser->linestart = buf + linestart_pos;
4613 		if (PL_parser->last_uni)
4614 		    PL_parser->last_uni = buf + last_uni_pos;
4615 		if (PL_parser->last_lop)
4616 		    PL_parser->last_lop = buf + last_lop_pos;
4617 		SvLEN_set(linestr, SvCUR(linestr));
4618 		SvCUR_set(linestr, s - SvPVX(linestr));
4619 		PL_parser->filtered = 1;
4620 		break;
4621 	    }
4622 	    s++;
4623 	}
4624     }
4625     return(datasv);
4626 }
4627 
4628 
4629 /* Delete most recently added instance of this filter function.	*/
4630 void
Perl_filter_del(pTHX_ filter_t funcp)4631 Perl_filter_del(pTHX_ filter_t funcp)
4632 {
4633     SV *datasv;
4634 
4635     PERL_ARGS_ASSERT_FILTER_DEL;
4636 
4637 #ifdef DEBUGGING
4638     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4639 			  FPTR2DPTR(void*, funcp)));
4640 #endif
4641     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4642 	return;
4643     /* if filter is on top of stack (usual case) just pop it off */
4644     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4645     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4646 	sv_free(av_pop(PL_rsfp_filters));
4647 
4648         return;
4649     }
4650     /* we need to search for the correct entry and clear it	*/
4651     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4652 }
4653 
4654 
4655 /* Invoke the idxth filter function for the current rsfp.	 */
4656 /* maxlen 0 = read one text line */
4657 I32
Perl_filter_read(pTHX_ int idx,SV * buf_sv,int maxlen)4658 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4659 {
4660     filter_t funcp;
4661     I32 ret;
4662     SV *datasv = NULL;
4663     /* This API is bad. It should have been using unsigned int for maxlen.
4664        Not sure if we want to change the API, but if not we should sanity
4665        check the value here.  */
4666     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4667 
4668     PERL_ARGS_ASSERT_FILTER_READ;
4669 
4670     if (!PL_parser || !PL_rsfp_filters)
4671 	return -1;
4672     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?	*/
4673 	/* Provide a default input filter to make life easy.	*/
4674 	/* Note that we append to the line. This is handy.	*/
4675 	DEBUG_P(PerlIO_printf(Perl_debug_log,
4676 			      "filter_read %d: from rsfp\n", idx));
4677 	if (correct_length) {
4678  	    /* Want a block */
4679 	    int len ;
4680 	    const int old_len = SvCUR(buf_sv);
4681 
4682 	    /* ensure buf_sv is large enough */
4683 	    SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4684 	    if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4685 				   correct_length)) <= 0) {
4686 		if (PerlIO_error(PL_rsfp))
4687 	            return -1;		/* error */
4688 	        else
4689 		    return 0 ;		/* end of file */
4690 	    }
4691 	    SvCUR_set(buf_sv, old_len + len) ;
4692 	    SvPVX(buf_sv)[old_len + len] = '\0';
4693 	} else {
4694 	    /* Want a line */
4695             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4696 		if (PerlIO_error(PL_rsfp))
4697 	            return -1;		/* error */
4698 	        else
4699 		    return 0 ;		/* end of file */
4700 	    }
4701 	}
4702 	return SvCUR(buf_sv);
4703     }
4704     /* Skip this filter slot if filter has been deleted	*/
4705     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4706 	DEBUG_P(PerlIO_printf(Perl_debug_log,
4707 			      "filter_read %d: skipped (filter deleted)\n",
4708 			      idx));
4709 	return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4710     }
4711     if (SvTYPE(datasv) != SVt_PVIO) {
4712 	if (correct_length) {
4713  	    /* Want a block */
4714 	    const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4715 	    if (!remainder) return 0; /* eof */
4716 	    if (correct_length > remainder) correct_length = remainder;
4717 	    sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4718 	    SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4719 	} else {
4720 	    /* Want a line */
4721 	    const char *s = SvEND(datasv);
4722 	    const char *send = SvPVX(datasv) + SvLEN(datasv);
4723 	    while (s < send) {
4724 		if (*s == '\n') {
4725 		    s++;
4726 		    break;
4727 		}
4728 		s++;
4729 	    }
4730 	    if (s == send) return 0; /* eof */
4731 	    sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4732 	    SvCUR_set(datasv, s-SvPVX(datasv));
4733 	}
4734 	return SvCUR(buf_sv);
4735     }
4736     /* Get function pointer hidden within datasv	*/
4737     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4738     DEBUG_P(PerlIO_printf(Perl_debug_log,
4739 			  "filter_read %d: via function %p (%s)\n",
4740 			  idx, (void*)datasv, SvPV_nolen_const(datasv)));
4741     /* Call function. The function is expected to 	*/
4742     /* call "FILTER_READ(idx+1, buf_sv)" first.		*/
4743     /* Return: <0:error, =0:eof, >0:not eof 		*/
4744     ENTER;
4745     save_scalar(PL_errgv);
4746     ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4747     LEAVE;
4748     return ret;
4749 }
4750 
4751 STATIC char *
S_filter_gets(pTHX_ SV * sv,STRLEN append)4752 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4753 {
4754     PERL_ARGS_ASSERT_FILTER_GETS;
4755 
4756 #ifdef PERL_CR_FILTER
4757     if (!PL_rsfp_filters) {
4758 	filter_add(S_cr_textfilter,NULL);
4759     }
4760 #endif
4761     if (PL_rsfp_filters) {
4762 	if (!append)
4763             SvCUR_set(sv, 0);	/* start with empty line	*/
4764         if (FILTER_READ(0, sv, 0) > 0)
4765             return ( SvPVX(sv) ) ;
4766         else
4767 	    return NULL ;
4768     }
4769     else
4770         return (sv_gets(sv, PL_rsfp, append));
4771 }
4772 
4773 STATIC HV *
S_find_in_my_stash(pTHX_ const char * pkgname,STRLEN len)4774 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4775 {
4776     GV *gv;
4777 
4778     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4779 
4780     if (memEQs(pkgname, len, "__PACKAGE__"))
4781         return PL_curstash;
4782 
4783     if (len > 2
4784         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4785         && (gv = gv_fetchpvn_flags(pkgname,
4786                                    len,
4787                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4788     {
4789         return GvHV(gv);			/* Foo:: */
4790     }
4791 
4792     /* use constant CLASS => 'MyClass' */
4793     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4794     if (gv && GvCV(gv)) {
4795 	SV * const sv = cv_const_sv(GvCV(gv));
4796 	if (sv)
4797 	    return gv_stashsv(sv, 0);
4798     }
4799 
4800     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4801 }
4802 
4803 
4804 STATIC char *
S_tokenize_use(pTHX_ int is_use,char * s)4805 S_tokenize_use(pTHX_ int is_use, char *s) {
4806     PERL_ARGS_ASSERT_TOKENIZE_USE;
4807 
4808     if (PL_expect != XSTATE)
4809 	/* diag_listed_as: "use" not allowed in expression */
4810 	yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4811 		    is_use ? "use" : "no"));
4812     PL_expect = XTERM;
4813     s = skipspace(s);
4814     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4815 	s = force_version(s, TRUE);
4816 	if (*s == ';' || *s == '}'
4817 		|| (s = skipspace(s), (*s == ';' || *s == '}'))) {
4818 	    NEXTVAL_NEXTTOKE.opval = NULL;
4819 	    force_next(BAREWORD);
4820 	}
4821 	else if (*s == 'v') {
4822 	    s = force_word(s,BAREWORD,FALSE,TRUE);
4823 	    s = force_version(s, FALSE);
4824 	}
4825     }
4826     else {
4827 	s = force_word(s,BAREWORD,FALSE,TRUE);
4828 	s = force_version(s, FALSE);
4829     }
4830     pl_yylval.ival = is_use;
4831     return s;
4832 }
4833 #ifdef DEBUGGING
4834     static const char* const exp_name[] =
4835 	{ "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4836 	  "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4837 	  "SIGVAR", "TERMORDORDOR"
4838 	};
4839 #endif
4840 
4841 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4842 STATIC bool
S_word_takes_any_delimiter(char * p,STRLEN len)4843 S_word_takes_any_delimiter(char *p, STRLEN len)
4844 {
4845     return (len == 1 && memCHRs("msyq", p[0]))
4846             || (len == 2
4847                 && ((p[0] == 't' && p[1] == 'r')
4848                     || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
4849 }
4850 
4851 static void
S_check_scalar_slice(pTHX_ char * s)4852 S_check_scalar_slice(pTHX_ char *s)
4853 {
4854     s++;
4855     while (SPACE_OR_TAB(*s)) s++;
4856     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4857                                                              PL_bufend,
4858                                                              UTF))
4859     {
4860 	return;
4861     }
4862     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4863            || (*s && memCHRs(" \t$#+-'\"", *s)))
4864     {
4865         s += UTF ? UTF8SKIP(s) : 1;
4866     }
4867     if (*s == '}' || *s == ']')
4868 	pl_yylval.ival = OPpSLICEWARNING;
4869 }
4870 
4871 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4872 static void
S_lex_token_boundary(pTHX)4873 S_lex_token_boundary(pTHX)
4874 {
4875     PL_oldoldbufptr = PL_oldbufptr;
4876     PL_oldbufptr = PL_bufptr;
4877 }
4878 
4879 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4880 static char *
S_vcs_conflict_marker(pTHX_ char * s)4881 S_vcs_conflict_marker(pTHX_ char *s)
4882 {
4883     lex_token_boundary();
4884     PL_bufptr = s;
4885     yyerror("Version control conflict marker");
4886     while (s < PL_bufend && *s != '\n')
4887 	s++;
4888     return s;
4889 }
4890 
4891 static int
yyl_sigvar(pTHX_ char * s)4892 yyl_sigvar(pTHX_ char *s)
4893 {
4894     /* we expect the sigil and optional var name part of a
4895      * signature element here. Since a '$' is not necessarily
4896      * followed by a var name, handle it specially here; the general
4897      * yylex code would otherwise try to interpret whatever follows
4898      * as a var; e.g. ($, ...) would be seen as the var '$,'
4899      */
4900 
4901     U8 sigil;
4902 
4903     s = skipspace(s);
4904     sigil = *s++;
4905     PL_bufptr = s; /* for error reporting */
4906     switch (sigil) {
4907     case '$':
4908     case '@':
4909     case '%':
4910         /* spot stuff that looks like an prototype */
4911         if (memCHRs("$:@%&*;\\[]", *s)) {
4912             yyerror("Illegal character following sigil in a subroutine signature");
4913             break;
4914         }
4915         /* '$#' is banned, while '$ # comment' isn't */
4916         if (*s == '#') {
4917             yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
4918             break;
4919         }
4920         s = skipspace(s);
4921         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
4922             char *dest = PL_tokenbuf + 1;
4923             /* read var name, including sigil, into PL_tokenbuf */
4924             PL_tokenbuf[0] = sigil;
4925             parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
4926                 0, cBOOL(UTF), FALSE, FALSE);
4927             *dest = '\0';
4928             assert(PL_tokenbuf[1]); /* we have a variable name */
4929         }
4930         else {
4931             *PL_tokenbuf = 0;
4932             PL_in_my = 0;
4933         }
4934 
4935         s = skipspace(s);
4936         /* parse the = for the default ourselves to avoid '+=' etc being accepted here
4937          * as the ASSIGNOP, and exclude other tokens that start with =
4938          */
4939         if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
4940             /* save now to report with the same context as we did when
4941              * all ASSIGNOPS were accepted */
4942             PL_oldbufptr = s;
4943 
4944             ++s;
4945             NEXTVAL_NEXTTOKE.ival = 0;
4946             force_next(ASSIGNOP);
4947             PL_expect = XTERM;
4948         }
4949         else if (*s == ',' || *s == ')') {
4950             PL_expect = XOPERATOR;
4951         }
4952         else {
4953             /* make sure the context shows the unexpected character and
4954              * hopefully a bit more */
4955             if (*s) ++s;
4956             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4957                 s++;
4958             PL_bufptr = s; /* for error reporting */
4959             yyerror("Illegal operator following parameter in a subroutine signature");
4960             PL_in_my = 0;
4961         }
4962         if (*PL_tokenbuf) {
4963             NEXTVAL_NEXTTOKE.ival = sigil;
4964             force_next('p'); /* force a signature pending identifier */
4965         }
4966         break;
4967 
4968     case ')':
4969         PL_expect = XBLOCK;
4970         break;
4971     case ',': /* handle ($a,,$b) */
4972         break;
4973 
4974     default:
4975         PL_in_my = 0;
4976         yyerror("A signature parameter must start with '$', '@' or '%'");
4977         /* very crude error recovery: skip to likely next signature
4978          * element */
4979         while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4980             s++;
4981         break;
4982     }
4983 
4984     TOKEN(sigil);
4985 }
4986 
4987 static int
yyl_dollar(pTHX_ char * s)4988 yyl_dollar(pTHX_ char *s)
4989 {
4990     CLINE;
4991 
4992     if (PL_expect == XPOSTDEREF) {
4993         if (s[1] == '#') {
4994             s++;
4995             POSTDEREF(DOLSHARP);
4996         }
4997         POSTDEREF('$');
4998     }
4999 
5000     if (   s[1] == '#'
5001         && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5002             || memCHRs("{$:+-@", s[2])))
5003     {
5004         PL_tokenbuf[0] = '@';
5005         s = scan_ident(s + 1, PL_tokenbuf + 1,
5006                        sizeof PL_tokenbuf - 1, FALSE);
5007         if (PL_expect == XOPERATOR) {
5008             char *d = s;
5009             if (PL_bufptr > s) {
5010                 d = PL_bufptr-1;
5011                 PL_bufptr = PL_oldbufptr;
5012             }
5013             no_op("Array length", d);
5014         }
5015         if (!PL_tokenbuf[1])
5016             PREREF(DOLSHARP);
5017         PL_expect = XOPERATOR;
5018         force_ident_maybe_lex('#');
5019         TOKEN(DOLSHARP);
5020     }
5021 
5022     PL_tokenbuf[0] = '$';
5023     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5024     if (PL_expect == XOPERATOR) {
5025         char *d = s;
5026         if (PL_bufptr > s) {
5027             d = PL_bufptr-1;
5028             PL_bufptr = PL_oldbufptr;
5029         }
5030         no_op("Scalar", d);
5031     }
5032     if (!PL_tokenbuf[1]) {
5033         if (s == PL_bufend)
5034             yyerror("Final $ should be \\$ or $name");
5035         PREREF('$');
5036     }
5037 
5038     {
5039         const char tmp = *s;
5040         if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5041             s = skipspace(s);
5042 
5043         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5044             && intuit_more(s, PL_bufend)) {
5045             if (*s == '[') {
5046                 PL_tokenbuf[0] = '@';
5047                 if (ckWARN(WARN_SYNTAX)) {
5048                     char *t = s+1;
5049 
5050                     while ( t < PL_bufend ) {
5051                         if (isSPACE(*t)) {
5052                             do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5053                             /* consumed one or more space chars */
5054                         } else if (*t == '$' || *t == '@') {
5055                             /* could be more than one '$' like $$ref or @$ref */
5056                             do { t++; } while (t < PL_bufend && *t == '$');
5057 
5058                             /* could be an abigail style identifier like $ foo */
5059                             while (t < PL_bufend && *t == ' ') t++;
5060 
5061                             /* strip off the name of the var */
5062                             while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5063                                 t += UTF ? UTF8SKIP(t) : 1;
5064                             /* consumed a varname */
5065                         } else if (isDIGIT(*t)) {
5066                             /* deal with hex constants like 0x11 */
5067                             if (t[0] == '0' && t[1] == 'x') {
5068                                 t += 2;
5069                                 while (t < PL_bufend && isXDIGIT(*t)) t++;
5070                             } else {
5071                                 /* deal with decimal/octal constants like 1 and 0123 */
5072                                 do { t++; } while (isDIGIT(*t));
5073                                 if (t<PL_bufend && *t == '.') {
5074                                     do { t++; } while (isDIGIT(*t));
5075                                 }
5076                             }
5077                             /* consumed a number */
5078                         } else {
5079                             /* not a var nor a space nor a number */
5080                             break;
5081                         }
5082                     }
5083                     if (t < PL_bufend && *t++ == ',') {
5084                         PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5085                         while (t < PL_bufend && *t != ']')
5086                             t++;
5087                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5088                                     "Multidimensional syntax %" UTF8f " not supported",
5089                                     UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5090                     }
5091                 }
5092             }
5093             else if (*s == '{') {
5094                 char *t;
5095                 PL_tokenbuf[0] = '%';
5096                 if (    strEQ(PL_tokenbuf+1, "SIG")
5097                     && ckWARN(WARN_SYNTAX)
5098                     && (t = (char *) memchr(s, '}', PL_bufend - s))
5099                     && (t = (char *) memchr(t, '=', PL_bufend - t)))
5100                 {
5101                     char tmpbuf[sizeof PL_tokenbuf];
5102                     do {
5103                         t++;
5104                     } while (isSPACE(*t));
5105                     if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5106                         STRLEN len;
5107                         t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5108                                         &len);
5109                         while (isSPACE(*t))
5110                             t++;
5111                         if (  *t == ';'
5112                             && get_cvn_flags(tmpbuf, len, UTF
5113                                                             ? SVf_UTF8
5114                                                             : 0))
5115                         {
5116                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5117                                 "You need to quote \"%" UTF8f "\"",
5118                                     UTF8fARG(UTF, len, tmpbuf));
5119                         }
5120                     }
5121                 }
5122             }
5123         }
5124 
5125         PL_expect = XOPERATOR;
5126         if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5127             const bool islop = (PL_last_lop == PL_oldoldbufptr);
5128             if (!islop || PL_last_lop_op == OP_GREPSTART)
5129                 PL_expect = XOPERATOR;
5130             else if (memCHRs("$@\"'`q", *s))
5131                 PL_expect = XTERM;		/* e.g. print $fh "foo" */
5132             else if (   memCHRs("&*<%", *s)
5133                      && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5134             {
5135                 PL_expect = XTERM;		/* e.g. print $fh &sub */
5136             }
5137             else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5138                 char tmpbuf[sizeof PL_tokenbuf];
5139                 int t2;
5140                 STRLEN len;
5141                 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5142                 if ((t2 = keyword(tmpbuf, len, 0))) {
5143                     /* binary operators exclude handle interpretations */
5144                     switch (t2) {
5145                     case -KEY_x:
5146                     case -KEY_eq:
5147                     case -KEY_ne:
5148                     case -KEY_gt:
5149                     case -KEY_lt:
5150                     case -KEY_ge:
5151                     case -KEY_le:
5152                     case -KEY_cmp:
5153                         break;
5154                     default:
5155                         PL_expect = XTERM;	/* e.g. print $fh length() */
5156                         break;
5157                     }
5158                 }
5159                 else {
5160                     PL_expect = XTERM;	/* e.g. print $fh subr() */
5161                 }
5162             }
5163             else if (isDIGIT(*s))
5164                 PL_expect = XTERM;		/* e.g. print $fh 3 */
5165             else if (*s == '.' && isDIGIT(s[1]))
5166                 PL_expect = XTERM;		/* e.g. print $fh .3 */
5167             else if ((*s == '?' || *s == '-' || *s == '+')
5168                      && !isSPACE(s[1]) && s[1] != '=')
5169                 PL_expect = XTERM;		/* e.g. print $fh -1 */
5170             else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5171                      && s[1] != '/')
5172                 PL_expect = XTERM;		/* e.g. print $fh /.../
5173                                                XXX except DORDOR operator
5174                                             */
5175             else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5176                      && s[2] != '=')
5177                 PL_expect = XTERM;		/* print $fh <<"EOF" */
5178         }
5179     }
5180     force_ident_maybe_lex('$');
5181     TOKEN('$');
5182 }
5183 
5184 static int
yyl_sub(pTHX_ char * s,const int key)5185 yyl_sub(pTHX_ char *s, const int key)
5186 {
5187     char * const tmpbuf = PL_tokenbuf + 1;
5188     bool have_name, have_proto;
5189     STRLEN len;
5190     SV *format_name = NULL;
5191     bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5192 
5193     SSize_t off = s-SvPVX(PL_linestr);
5194     char *d;
5195 
5196     s = skipspace(s); /* can move PL_linestr */
5197 
5198     d = SvPVX(PL_linestr)+off;
5199 
5200     SAVEBOOL(PL_parser->sig_seen);
5201     PL_parser->sig_seen = FALSE;
5202 
5203     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5204         || *s == '\''
5205         || (*s == ':' && s[1] == ':'))
5206     {
5207 
5208         PL_expect = XATTRBLOCK;
5209         d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5210                       &len);
5211         if (key == KEY_format)
5212             format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5213         *PL_tokenbuf = '&';
5214         if (memchr(tmpbuf, ':', len) || key != KEY_sub
5215          || pad_findmy_pvn(
5216                 PL_tokenbuf, len + 1, 0
5217             ) != NOT_IN_PAD)
5218             sv_setpvn(PL_subname, tmpbuf, len);
5219         else {
5220             sv_setsv(PL_subname,PL_curstname);
5221             sv_catpvs(PL_subname,"::");
5222             sv_catpvn(PL_subname,tmpbuf,len);
5223         }
5224         if (SvUTF8(PL_linestr))
5225             SvUTF8_on(PL_subname);
5226         have_name = TRUE;
5227 
5228         s = skipspace(d);
5229     }
5230     else {
5231         if (key == KEY_my || key == KEY_our || key==KEY_state) {
5232             *d = '\0';
5233             /* diag_listed_as: Missing name in "%s sub" */
5234             Perl_croak(aTHX_
5235                       "Missing name in \"%s\"", PL_bufptr);
5236         }
5237         PL_expect = XATTRTERM;
5238         sv_setpvs(PL_subname,"?");
5239         have_name = FALSE;
5240     }
5241 
5242     if (key == KEY_format) {
5243         if (format_name) {
5244             NEXTVAL_NEXTTOKE.opval
5245                 = newSVOP(OP_CONST,0, format_name);
5246             NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5247             force_next(BAREWORD);
5248         }
5249         PREBLOCK(FORMAT);
5250     }
5251 
5252     /* Look for a prototype */
5253     if (*s == '(' && !is_sigsub) {
5254         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5255         if (!s)
5256             Perl_croak(aTHX_ "Prototype not terminated");
5257         COPLINE_SET_FROM_MULTI_END;
5258         (void)validate_proto(PL_subname, PL_lex_stuff,
5259                              ckWARN(WARN_ILLEGALPROTO), 0);
5260         have_proto = TRUE;
5261 
5262         s = skipspace(s);
5263     }
5264     else
5265         have_proto = FALSE;
5266 
5267     if (  !(*s == ':' && s[1] != ':')
5268         && (*s != '{' && *s != '(') && key != KEY_format)
5269     {
5270         assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5271                key == KEY_DESTROY || key == KEY_BEGIN ||
5272                key == KEY_UNITCHECK || key == KEY_CHECK ||
5273                key == KEY_INIT || key == KEY_END ||
5274                key == KEY_my || key == KEY_state ||
5275                key == KEY_our);
5276         if (!have_name)
5277             Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5278         else if (*s != ';' && *s != '}')
5279             Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5280     }
5281 
5282     if (have_proto) {
5283         NEXTVAL_NEXTTOKE.opval =
5284             newSVOP(OP_CONST, 0, PL_lex_stuff);
5285         PL_lex_stuff = NULL;
5286         force_next(THING);
5287     }
5288     if (!have_name) {
5289         if (PL_curstash)
5290             sv_setpvs(PL_subname, "__ANON__");
5291         else
5292             sv_setpvs(PL_subname, "__ANON__::__ANON__");
5293         if (is_sigsub)
5294             TOKEN(ANON_SIGSUB);
5295         else
5296             TOKEN(ANONSUB);
5297     }
5298     force_ident_maybe_lex('&');
5299     if (is_sigsub)
5300         TOKEN(SIGSUB);
5301     else
5302         TOKEN(SUB);
5303 }
5304 
5305 static int
yyl_interpcasemod(pTHX_ char * s)5306 yyl_interpcasemod(pTHX_ char *s)
5307 {
5308 #ifdef DEBUGGING
5309     if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5310         Perl_croak(aTHX_
5311                    "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5312                    PL_bufptr, PL_bufend, *PL_bufptr);
5313 #endif
5314 
5315     if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5316         /* if at a \E */
5317         if (PL_lex_casemods) {
5318             const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5319             PL_lex_casestack[PL_lex_casemods] = '\0';
5320 
5321             if (PL_bufptr != PL_bufend
5322                 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5323                     || oldmod == 'F')) {
5324                 PL_bufptr += 2;
5325                 PL_lex_state = LEX_INTERPCONCAT;
5326             }
5327             PL_lex_allbrackets--;
5328             return REPORT(')');
5329         }
5330         else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5331            /* Got an unpaired \E */
5332            Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5333                     "Useless use of \\E");
5334         }
5335         if (PL_bufptr != PL_bufend)
5336             PL_bufptr += 2;
5337         PL_lex_state = LEX_INTERPCONCAT;
5338         return yylex();
5339     }
5340     else {
5341         DEBUG_T({
5342             PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5343         });
5344         s = PL_bufptr + 1;
5345         if (s[1] == '\\' && s[2] == 'E') {
5346             PL_bufptr = s + 3;
5347             PL_lex_state = LEX_INTERPCONCAT;
5348             return yylex();
5349         }
5350         else {
5351             I32 tmp;
5352             if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5353                 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5354             {
5355                 tmp = *s, *s = s[2], s[2] = (char)tmp;	/* misordered... */
5356             }
5357             if ((*s == 'L' || *s == 'U' || *s == 'F')
5358                 && (strpbrk(PL_lex_casestack, "LUF")))
5359             {
5360                 PL_lex_casestack[--PL_lex_casemods] = '\0';
5361                 PL_lex_allbrackets--;
5362                 return REPORT(')');
5363             }
5364             if (PL_lex_casemods > 10)
5365                 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5366             PL_lex_casestack[PL_lex_casemods++] = *s;
5367             PL_lex_casestack[PL_lex_casemods] = '\0';
5368             PL_lex_state = LEX_INTERPCONCAT;
5369             NEXTVAL_NEXTTOKE.ival = 0;
5370             force_next((2<<24)|'(');
5371             if (*s == 'l')
5372                 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5373             else if (*s == 'u')
5374                 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5375             else if (*s == 'L')
5376                 NEXTVAL_NEXTTOKE.ival = OP_LC;
5377             else if (*s == 'U')
5378                 NEXTVAL_NEXTTOKE.ival = OP_UC;
5379             else if (*s == 'Q')
5380                 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5381             else if (*s == 'F')
5382                 NEXTVAL_NEXTTOKE.ival = OP_FC;
5383             else
5384                 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5385             PL_bufptr = s + 1;
5386         }
5387         force_next(FUNC);
5388         if (PL_lex_starts) {
5389             s = PL_bufptr;
5390             PL_lex_starts = 0;
5391             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5392             if (PL_lex_casemods == 1 && PL_lex_inpat)
5393                 TOKEN(',');
5394             else
5395                 AopNOASSIGN(OP_CONCAT);
5396         }
5397         else
5398             return yylex();
5399     }
5400 }
5401 
5402 static int
yyl_secondclass_keyword(pTHX_ char * s,STRLEN len,int key,I32 * orig_keyword,GV ** pgv,GV *** pgvp)5403 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5404                         GV **pgv, GV ***pgvp)
5405 {
5406     GV *ogv = NULL;	/* override (winner) */
5407     GV *hgv = NULL;	/* hidden (loser) */
5408     GV *gv = *pgv;
5409 
5410     if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5411         CV *cv;
5412         if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5413                                     (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5414                                     SVt_PVCV))
5415             && (cv = GvCVu(gv)))
5416         {
5417             if (GvIMPORTED_CV(gv))
5418                 ogv = gv;
5419             else if (! CvMETHOD(cv))
5420                 hgv = gv;
5421         }
5422         if (!ogv
5423             && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5424             && (gv = **pgvp)
5425             && (isGV_with_GP(gv)
5426                 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5427                 :   SvPCS_IMPORTED(gv)
5428                 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5429                                                          len, 0), 1)))
5430         {
5431             ogv = gv;
5432         }
5433     }
5434 
5435     *pgv = gv;
5436 
5437     if (ogv) {
5438         *orig_keyword = key;
5439         return 0;		/* overridden by import or by GLOBAL */
5440     }
5441     else if (gv && !*pgvp
5442              && -key==KEY_lock	/* XXX generalizable kludge */
5443              && GvCVu(gv))
5444     {
5445         return 0;		/* any sub overrides "weak" keyword */
5446     }
5447     else {			/* no override */
5448         key = -key;
5449         if (key == KEY_dump) {
5450             Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5451         }
5452         *pgv = NULL;
5453         *pgvp = 0;
5454         if (hgv && key != KEY_x)	/* never ambiguous */
5455             Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5456                            "Ambiguous call resolved as CORE::%s(), "
5457                            "qualify as such or use &",
5458                            GvENAME(hgv));
5459         return key;
5460     }
5461 }
5462 
5463 static int
yyl_qw(pTHX_ char * s,STRLEN len)5464 yyl_qw(pTHX_ char *s, STRLEN len)
5465 {
5466     OP *words = NULL;
5467 
5468     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5469     if (!s)
5470         missingterm(NULL, 0);
5471 
5472     COPLINE_SET_FROM_MULTI_END;
5473     PL_expect = XOPERATOR;
5474     if (SvCUR(PL_lex_stuff)) {
5475         int warned_comma = !ckWARN(WARN_QW);
5476         int warned_comment = warned_comma;
5477         char *d = SvPV_force(PL_lex_stuff, len);
5478         while (len) {
5479             for (; isSPACE(*d) && len; --len, ++d)
5480                 /**/;
5481             if (len) {
5482                 SV *sv;
5483                 const char *b = d;
5484                 if (!warned_comma || !warned_comment) {
5485                     for (; !isSPACE(*d) && len; --len, ++d) {
5486                         if (!warned_comma && *d == ',') {
5487                             Perl_warner(aTHX_ packWARN(WARN_QW),
5488                                 "Possible attempt to separate words with commas");
5489                             ++warned_comma;
5490                         }
5491                         else if (!warned_comment && *d == '#') {
5492                             Perl_warner(aTHX_ packWARN(WARN_QW),
5493                                 "Possible attempt to put comments in qw() list");
5494                             ++warned_comment;
5495                         }
5496                     }
5497                 }
5498                 else {
5499                     for (; !isSPACE(*d) && len; --len, ++d)
5500                         /**/;
5501                 }
5502                 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5503                 words = op_append_elem(OP_LIST, words,
5504                                        newSVOP(OP_CONST, 0, tokeq(sv)));
5505             }
5506         }
5507     }
5508     if (!words)
5509         words = newNULLLIST();
5510     SvREFCNT_dec_NN(PL_lex_stuff);
5511     PL_lex_stuff = NULL;
5512     PL_expect = XOPERATOR;
5513     pl_yylval.opval = sawparens(words);
5514     TOKEN(QWLIST);
5515 }
5516 
5517 static int
yyl_hyphen(pTHX_ char * s)5518 yyl_hyphen(pTHX_ char *s)
5519 {
5520     if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5521         I32 ftst = 0;
5522         char tmp;
5523 
5524         s++;
5525         PL_bufptr = s;
5526         tmp = *s++;
5527 
5528         while (s < PL_bufend && SPACE_OR_TAB(*s))
5529             s++;
5530 
5531         if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5532             s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5533             DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5534             OPERATOR('-');              /* unary minus */
5535         }
5536         switch (tmp) {
5537         case 'r': ftst = OP_FTEREAD;    break;
5538         case 'w': ftst = OP_FTEWRITE;   break;
5539         case 'x': ftst = OP_FTEEXEC;    break;
5540         case 'o': ftst = OP_FTEOWNED;   break;
5541         case 'R': ftst = OP_FTRREAD;    break;
5542         case 'W': ftst = OP_FTRWRITE;   break;
5543         case 'X': ftst = OP_FTREXEC;    break;
5544         case 'O': ftst = OP_FTROWNED;   break;
5545         case 'e': ftst = OP_FTIS;       break;
5546         case 'z': ftst = OP_FTZERO;     break;
5547         case 's': ftst = OP_FTSIZE;     break;
5548         case 'f': ftst = OP_FTFILE;     break;
5549         case 'd': ftst = OP_FTDIR;      break;
5550         case 'l': ftst = OP_FTLINK;     break;
5551         case 'p': ftst = OP_FTPIPE;     break;
5552         case 'S': ftst = OP_FTSOCK;     break;
5553         case 'u': ftst = OP_FTSUID;     break;
5554         case 'g': ftst = OP_FTSGID;     break;
5555         case 'k': ftst = OP_FTSVTX;     break;
5556         case 'b': ftst = OP_FTBLK;      break;
5557         case 'c': ftst = OP_FTCHR;      break;
5558         case 't': ftst = OP_FTTTY;      break;
5559         case 'T': ftst = OP_FTTEXT;     break;
5560         case 'B': ftst = OP_FTBINARY;   break;
5561         case 'M': case 'A': case 'C':
5562             gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5563             switch (tmp) {
5564             case 'M': ftst = OP_FTMTIME; break;
5565             case 'A': ftst = OP_FTATIME; break;
5566             case 'C': ftst = OP_FTCTIME; break;
5567             default:                     break;
5568             }
5569             break;
5570         default:
5571             break;
5572         }
5573         if (ftst) {
5574             PL_last_uni = PL_oldbufptr;
5575             PL_last_lop_op = (OPCODE)ftst;
5576             DEBUG_T( {
5577                 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5578             } );
5579             FTST(ftst);
5580         }
5581         else {
5582             /* Assume it was a minus followed by a one-letter named
5583              * subroutine call (or a -bareword), then. */
5584             DEBUG_T( {
5585                 PerlIO_printf(Perl_debug_log,
5586                     "### '-%c' looked like a file test but was not\n",
5587                     (int) tmp);
5588             } );
5589             s = --PL_bufptr;
5590         }
5591     }
5592     {
5593         const char tmp = *s++;
5594         if (*s == tmp) {
5595             s++;
5596             if (PL_expect == XOPERATOR)
5597                 TERM(POSTDEC);
5598             else
5599                 OPERATOR(PREDEC);
5600         }
5601         else if (*s == '>') {
5602             s++;
5603             s = skipspace(s);
5604             if (((*s == '$' || *s == '&') && s[1] == '*')
5605               ||(*s == '$' && s[1] == '#' && s[2] == '*')
5606               ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5607               ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5608              )
5609             {
5610                 PL_expect = XPOSTDEREF;
5611                 TOKEN(ARROW);
5612             }
5613             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5614                 s = force_word(s,METHOD,FALSE,TRUE);
5615                 TOKEN(ARROW);
5616             }
5617             else if (*s == '$')
5618                 OPERATOR(ARROW);
5619             else
5620                 TERM(ARROW);
5621         }
5622         if (PL_expect == XOPERATOR) {
5623             if (*s == '='
5624                 && !PL_lex_allbrackets
5625                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5626             {
5627                 s--;
5628                 TOKEN(0);
5629             }
5630             Aop(OP_SUBTRACT);
5631         }
5632         else {
5633             if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5634                 check_uni();
5635             OPERATOR('-');              /* unary minus */
5636         }
5637     }
5638 }
5639 
5640 static int
yyl_plus(pTHX_ char * s)5641 yyl_plus(pTHX_ char *s)
5642 {
5643     const char tmp = *s++;
5644     if (*s == tmp) {
5645         s++;
5646         if (PL_expect == XOPERATOR)
5647             TERM(POSTINC);
5648         else
5649             OPERATOR(PREINC);
5650     }
5651     if (PL_expect == XOPERATOR) {
5652         if (*s == '='
5653             && !PL_lex_allbrackets
5654             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5655         {
5656             s--;
5657             TOKEN(0);
5658         }
5659         Aop(OP_ADD);
5660     }
5661     else {
5662         if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5663             check_uni();
5664         OPERATOR('+');
5665     }
5666 }
5667 
5668 static int
yyl_star(pTHX_ char * s)5669 yyl_star(pTHX_ char *s)
5670 {
5671     if (PL_expect == XPOSTDEREF)
5672         POSTDEREF('*');
5673 
5674     if (PL_expect != XOPERATOR) {
5675         s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5676         PL_expect = XOPERATOR;
5677         force_ident(PL_tokenbuf, '*');
5678         if (!*PL_tokenbuf)
5679             PREREF('*');
5680         TERM('*');
5681     }
5682 
5683     s++;
5684     if (*s == '*') {
5685         s++;
5686         if (*s == '=' && !PL_lex_allbrackets
5687             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5688         {
5689             s -= 2;
5690             TOKEN(0);
5691         }
5692         PWop(OP_POW);
5693     }
5694 
5695     if (*s == '='
5696         && !PL_lex_allbrackets
5697         && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5698     {
5699         s--;
5700         TOKEN(0);
5701     }
5702 
5703     Mop(OP_MULTIPLY);
5704 }
5705 
5706 static int
yyl_percent(pTHX_ char * s)5707 yyl_percent(pTHX_ char *s)
5708 {
5709     if (PL_expect == XOPERATOR) {
5710         if (s[1] == '='
5711             && !PL_lex_allbrackets
5712             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5713         {
5714             TOKEN(0);
5715         }
5716         ++s;
5717         Mop(OP_MODULO);
5718     }
5719     else if (PL_expect == XPOSTDEREF)
5720         POSTDEREF('%');
5721 
5722     PL_tokenbuf[0] = '%';
5723     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5724     pl_yylval.ival = 0;
5725     if (!PL_tokenbuf[1]) {
5726         PREREF('%');
5727     }
5728     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5729         && intuit_more(s, PL_bufend)) {
5730         if (*s == '[')
5731             PL_tokenbuf[0] = '@';
5732     }
5733     PL_expect = XOPERATOR;
5734     force_ident_maybe_lex('%');
5735     TERM('%');
5736 }
5737 
5738 static int
yyl_caret(pTHX_ char * s)5739 yyl_caret(pTHX_ char *s)
5740 {
5741     char *d = s;
5742     const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5743     if (bof && s[1] == '.')
5744         s++;
5745     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5746             (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5747     {
5748         s = d;
5749         TOKEN(0);
5750     }
5751     s++;
5752     BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5753 }
5754 
5755 static int
yyl_colon(pTHX_ char * s)5756 yyl_colon(pTHX_ char *s)
5757 {
5758     OP *attrs;
5759 
5760     switch (PL_expect) {
5761     case XOPERATOR:
5762         if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5763             break;
5764         PL_bufptr = s;	/* update in case we back off */
5765         if (*s == '=') {
5766             Perl_croak(aTHX_
5767                        "Use of := for an empty attribute list is not allowed");
5768         }
5769         goto grabattrs;
5770     case XATTRBLOCK:
5771         PL_expect = XBLOCK;
5772         goto grabattrs;
5773     case XATTRTERM:
5774         PL_expect = XTERMBLOCK;
5775      grabattrs:
5776         /* NB: as well as parsing normal attributes, we also end up
5777          * here if there is something looking like attributes
5778          * following a signature (which is illegal, but used to be
5779          * legal in 5.20..5.26). If the latter, we still parse the
5780          * attributes so that error messages(s) are less confusing,
5781          * but ignore them (parser->sig_seen).
5782          */
5783         s = skipspace(s);
5784         attrs = NULL;
5785         while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5786             bool sig = PL_parser->sig_seen;
5787             I32 tmp;
5788             SV *sv;
5789             STRLEN len;
5790             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5791             if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5792                 if (tmp < 0) tmp = -tmp;
5793                 switch (tmp) {
5794                 case KEY_or:
5795                 case KEY_and:
5796                 case KEY_for:
5797                 case KEY_foreach:
5798                 case KEY_unless:
5799                 case KEY_if:
5800                 case KEY_while:
5801                 case KEY_until:
5802                     goto got_attrs;
5803                 default:
5804                     break;
5805                 }
5806             }
5807             sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5808             if (*d == '(') {
5809                 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5810                 if (!d) {
5811                     if (attrs)
5812                         op_free(attrs);
5813                     sv_free(sv);
5814                     Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5815                 }
5816                 COPLINE_SET_FROM_MULTI_END;
5817             }
5818             if (PL_lex_stuff) {
5819                 sv_catsv(sv, PL_lex_stuff);
5820                 attrs = op_append_elem(OP_LIST, attrs,
5821                                     newSVOP(OP_CONST, 0, sv));
5822                 SvREFCNT_dec_NN(PL_lex_stuff);
5823                 PL_lex_stuff = NULL;
5824             }
5825             else {
5826                 /* NOTE: any CV attrs applied here need to be part of
5827                    the CVf_BUILTIN_ATTRS define in cv.h! */
5828                 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5829                     sv_free(sv);
5830                     if (!sig)
5831                         CvLVALUE_on(PL_compcv);
5832                 }
5833                 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5834                     sv_free(sv);
5835                     if (!sig)
5836                         CvMETHOD_on(PL_compcv);
5837                 }
5838                 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5839                     sv_free(sv);
5840                     if (!sig) {
5841                         Perl_ck_warner_d(aTHX_
5842                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5843                            ":const is experimental"
5844                         );
5845                         CvANONCONST_on(PL_compcv);
5846                         if (!CvANON(PL_compcv))
5847                             yyerror(":const is not permitted on named "
5848                                     "subroutines");
5849                     }
5850                 }
5851                 /* After we've set the flags, it could be argued that
5852                    we don't need to do the attributes.pm-based setting
5853                    process, and shouldn't bother appending recognized
5854                    flags.  To experiment with that, uncomment the
5855                    following "else".  (Note that's already been
5856                    uncommented.  That keeps the above-applied built-in
5857                    attributes from being intercepted (and possibly
5858                    rejected) by a package's attribute routines, but is
5859                    justified by the performance win for the common case
5860                    of applying only built-in attributes.) */
5861                 else
5862                     attrs = op_append_elem(OP_LIST, attrs,
5863                                         newSVOP(OP_CONST, 0,
5864                                                 sv));
5865             }
5866             s = skipspace(d);
5867             if (*s == ':' && s[1] != ':')
5868                 s = skipspace(s+1);
5869             else if (s == d)
5870                 break;	/* require real whitespace or :'s */
5871             /* XXX losing whitespace on sequential attributes here */
5872         }
5873 
5874         if (*s != ';'
5875             && *s != '}'
5876             && !(PL_expect == XOPERATOR
5877                  ? (*s == '=' ||  *s == ')')
5878                  : (*s == '{' ||  *s == '(')))
5879         {
5880             const char q = ((*s == '\'') ? '"' : '\'');
5881             /* If here for an expression, and parsed no attrs, back off. */
5882             if (PL_expect == XOPERATOR && !attrs) {
5883                 s = PL_bufptr;
5884                 break;
5885             }
5886             /* MUST advance bufptr here to avoid bogus "at end of line"
5887                context messages from yyerror().
5888             */
5889             PL_bufptr = s;
5890             yyerror( (const char *)
5891                      (*s
5892                       ? Perl_form(aTHX_ "Invalid separator character "
5893                                   "%c%c%c in attribute list", q, *s, q)
5894                       : "Unterminated attribute list" ) );
5895             if (attrs)
5896                 op_free(attrs);
5897             OPERATOR(':');
5898         }
5899 
5900     got_attrs:
5901         if (PL_parser->sig_seen) {
5902             /* see comment about about sig_seen and parser error
5903              * handling */
5904             if (attrs)
5905                 op_free(attrs);
5906             Perl_croak(aTHX_ "Subroutine attributes must come "
5907                              "before the signature");
5908         }
5909         if (attrs) {
5910             NEXTVAL_NEXTTOKE.opval = attrs;
5911             force_next(THING);
5912         }
5913         TOKEN(COLONATTR);
5914     }
5915 
5916     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5917         s--;
5918         TOKEN(0);
5919     }
5920 
5921     PL_lex_allbrackets--;
5922     OPERATOR(':');
5923 }
5924 
5925 static int
yyl_subproto(pTHX_ char * s,CV * cv)5926 yyl_subproto(pTHX_ char *s, CV *cv)
5927 {
5928     STRLEN protolen = CvPROTOLEN(cv);
5929     const char *proto = CvPROTO(cv);
5930     bool optional;
5931 
5932     proto = S_strip_spaces(aTHX_ proto, &protolen);
5933     if (!protolen)
5934         TERM(FUNC0SUB);
5935     if ((optional = *proto == ';')) {
5936         do {
5937             proto++;
5938         } while (*proto == ';');
5939     }
5940 
5941     if (
5942         (
5943             (
5944                 *proto == '$' || *proto == '_'
5945              || *proto == '*' || *proto == '+'
5946             )
5947          && proto[1] == '\0'
5948         )
5949      || (
5950          *proto == '\\' && proto[1] && proto[2] == '\0'
5951         )
5952     ) {
5953         UNIPROTO(UNIOPSUB,optional);
5954     }
5955 
5956     if (*proto == '\\' && proto[1] == '[') {
5957         const char *p = proto + 2;
5958         while(*p && *p != ']')
5959             ++p;
5960         if(*p == ']' && !p[1])
5961             UNIPROTO(UNIOPSUB,optional);
5962     }
5963 
5964     if (*proto == '&' && *s == '{') {
5965         if (PL_curstash)
5966             sv_setpvs(PL_subname, "__ANON__");
5967         else
5968             sv_setpvs(PL_subname, "__ANON__::__ANON__");
5969         if (!PL_lex_allbrackets
5970             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
5971         {
5972             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5973         }
5974         PREBLOCK(LSTOPSUB);
5975     }
5976 
5977     return KEY_NULL;
5978 }
5979 
5980 static int
yyl_leftcurly(pTHX_ char * s,const U8 formbrack)5981 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
5982 {
5983     char *d;
5984     if (PL_lex_brackets > 100) {
5985         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5986     }
5987 
5988     switch (PL_expect) {
5989     case XTERM:
5990     case XTERMORDORDOR:
5991         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5992         PL_lex_allbrackets++;
5993         OPERATOR(HASHBRACK);
5994     case XOPERATOR:
5995         while (s < PL_bufend && SPACE_OR_TAB(*s))
5996             s++;
5997         d = s;
5998         PL_tokenbuf[0] = '\0';
5999         if (d < PL_bufend && *d == '-') {
6000             PL_tokenbuf[0] = '-';
6001             d++;
6002             while (d < PL_bufend && SPACE_OR_TAB(*d))
6003                 d++;
6004         }
6005         if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6006             STRLEN len;
6007             d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6008                           FALSE, &len);
6009             while (d < PL_bufend && SPACE_OR_TAB(*d))
6010                 d++;
6011             if (*d == '}') {
6012                 const char minus = (PL_tokenbuf[0] == '-');
6013                 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6014                 if (minus)
6015                     force_next('-');
6016             }
6017         }
6018         /* FALLTHROUGH */
6019     case XATTRTERM:
6020     case XTERMBLOCK:
6021         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6022         PL_lex_allbrackets++;
6023         PL_expect = XSTATE;
6024         break;
6025     case XATTRBLOCK:
6026     case XBLOCK:
6027         PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6028         PL_lex_allbrackets++;
6029         PL_expect = XSTATE;
6030         break;
6031     case XBLOCKTERM:
6032         PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6033         PL_lex_allbrackets++;
6034         PL_expect = XSTATE;
6035         break;
6036     default: {
6037             const char *t;
6038             if (PL_oldoldbufptr == PL_last_lop)
6039                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6040             else
6041                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6042             PL_lex_allbrackets++;
6043             s = skipspace(s);
6044             if (*s == '}') {
6045                 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6046                     PL_expect = XTERM;
6047                     /* This hack is to get the ${} in the message. */
6048                     PL_bufptr = s+1;
6049                     yyerror("syntax error");
6050                     break;
6051                 }
6052                 OPERATOR(HASHBRACK);
6053             }
6054             if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6055                 /* ${...} or @{...} etc., but not print {...}
6056                  * Skip the disambiguation and treat this as a block.
6057                  */
6058                 goto block_expectation;
6059             }
6060             /* This hack serves to disambiguate a pair of curlies
6061              * as being a block or an anon hash.  Normally, expectation
6062              * determines that, but in cases where we're not in a
6063              * position to expect anything in particular (like inside
6064              * eval"") we have to resolve the ambiguity.  This code
6065              * covers the case where the first term in the curlies is a
6066              * quoted string.  Most other cases need to be explicitly
6067              * disambiguated by prepending a "+" before the opening
6068              * curly in order to force resolution as an anon hash.
6069              *
6070              * XXX should probably propagate the outer expectation
6071              * into eval"" to rely less on this hack, but that could
6072              * potentially break current behavior of eval"".
6073              * GSAR 97-07-21
6074              */
6075             t = s;
6076             if (*s == '\'' || *s == '"' || *s == '`') {
6077                 /* common case: get past first string, handling escapes */
6078                 for (t++; t < PL_bufend && *t != *s;)
6079                     if (*t++ == '\\')
6080                         t++;
6081                 t++;
6082             }
6083             else if (*s == 'q') {
6084                 if (++t < PL_bufend
6085                     && (!isWORDCHAR(*t)
6086                         || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6087                             && !isWORDCHAR(*t))))
6088                 {
6089                     /* skip q//-like construct */
6090                     const char *tmps;
6091                     char open, close, term;
6092                     I32 brackets = 1;
6093 
6094                     while (t < PL_bufend && isSPACE(*t))
6095                         t++;
6096                     /* check for q => */
6097                     if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6098                         OPERATOR(HASHBRACK);
6099                     }
6100                     term = *t;
6101                     open = term;
6102                     if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6103                         term = tmps[5];
6104                     close = term;
6105                     if (open == close)
6106                         for (t++; t < PL_bufend; t++) {
6107                             if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6108                                 t++;
6109                             else if (*t == open)
6110                                 break;
6111                         }
6112                     else {
6113                         for (t++; t < PL_bufend; t++) {
6114                             if (*t == '\\' && t+1 < PL_bufend)
6115                                 t++;
6116                             else if (*t == close && --brackets <= 0)
6117                                 break;
6118                             else if (*t == open)
6119                                 brackets++;
6120                         }
6121                     }
6122                     t++;
6123                 }
6124                 else
6125                     /* skip plain q word */
6126                     while (   t < PL_bufend
6127                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6128                     {
6129                         t += UTF ? UTF8SKIP(t) : 1;
6130                     }
6131             }
6132             else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6133                 t += UTF ? UTF8SKIP(t) : 1;
6134                 while (   t < PL_bufend
6135                        && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6136                 {
6137                     t += UTF ? UTF8SKIP(t) : 1;
6138                 }
6139             }
6140             while (t < PL_bufend && isSPACE(*t))
6141                 t++;
6142             /* if comma follows first term, call it an anon hash */
6143             /* XXX it could be a comma expression with loop modifiers */
6144             if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6145                                || (*t == '=' && t[1] == '>')))
6146                 OPERATOR(HASHBRACK);
6147             if (PL_expect == XREF) {
6148               block_expectation:
6149                 /* If there is an opening brace or 'sub:', treat it
6150                    as a term to make ${{...}}{k} and &{sub:attr...}
6151                    dwim.  Otherwise, treat it as a statement, so
6152                    map {no strict; ...} works.
6153                  */
6154                 s = skipspace(s);
6155                 if (*s == '{') {
6156                     PL_expect = XTERM;
6157                     break;
6158                 }
6159                 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6160                     PL_bufptr = s;
6161                     d = s + 3;
6162                     d = skipspace(d);
6163                     s = PL_bufptr;
6164                     if (*d == ':') {
6165                         PL_expect = XTERM;
6166                         break;
6167                     }
6168                 }
6169                 PL_expect = XSTATE;
6170             }
6171             else {
6172                 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6173                 PL_expect = XSTATE;
6174             }
6175         }
6176         break;
6177     }
6178 
6179     pl_yylval.ival = CopLINE(PL_curcop);
6180     PL_copline = NOLINE;   /* invalidate current command line number */
6181     TOKEN(formbrack ? '=' : '{');
6182 }
6183 
6184 static int
yyl_rightcurly(pTHX_ char * s,const U8 formbrack)6185 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6186 {
6187     assert(s != PL_bufend);
6188     s++;
6189 
6190     if (PL_lex_brackets <= 0)
6191         /* diag_listed_as: Unmatched right %s bracket */
6192         yyerror("Unmatched right curly bracket");
6193     else
6194         PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6195 
6196     PL_lex_allbrackets--;
6197 
6198     if (PL_lex_state == LEX_INTERPNORMAL) {
6199         if (PL_lex_brackets == 0) {
6200             if (PL_expect & XFAKEBRACK) {
6201                 PL_expect &= XENUMMASK;
6202                 PL_lex_state = LEX_INTERPEND;
6203                 PL_bufptr = s;
6204                 return yylex();	/* ignore fake brackets */
6205             }
6206             if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6207              && SvEVALED(PL_lex_repl))
6208                 PL_lex_state = LEX_INTERPEND;
6209             else if (*s == '-' && s[1] == '>')
6210                 PL_lex_state = LEX_INTERPENDMAYBE;
6211             else if (*s != '[' && *s != '{')
6212                 PL_lex_state = LEX_INTERPEND;
6213         }
6214     }
6215 
6216     if (PL_expect & XFAKEBRACK) {
6217         PL_expect &= XENUMMASK;
6218         PL_bufptr = s;
6219         return yylex();		/* ignore fake brackets */
6220     }
6221 
6222     force_next(formbrack ? '.' : '}');
6223     if (formbrack) LEAVE_with_name("lex_format");
6224     if (formbrack == 2) { /* means . where arguments were expected */
6225         force_next(';');
6226         TOKEN(FORMRBRACK);
6227     }
6228 
6229     TOKEN(';');
6230 }
6231 
6232 static int
yyl_ampersand(pTHX_ char * s)6233 yyl_ampersand(pTHX_ char *s)
6234 {
6235     if (PL_expect == XPOSTDEREF)
6236         POSTDEREF('&');
6237 
6238     s++;
6239     if (*s++ == '&') {
6240         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6241                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6242             s -= 2;
6243             TOKEN(0);
6244         }
6245         AOPERATOR(ANDAND);
6246     }
6247     s--;
6248 
6249     if (PL_expect == XOPERATOR) {
6250         char *d;
6251         bool bof;
6252         if (   PL_bufptr == PL_linestart
6253             && ckWARN(WARN_SEMICOLON)
6254             && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6255         {
6256             CopLINE_dec(PL_curcop);
6257             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6258             CopLINE_inc(PL_curcop);
6259         }
6260         d = s;
6261         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6262             s++;
6263         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6264                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6265             s = d;
6266             s--;
6267             TOKEN(0);
6268         }
6269         if (d == s)
6270             BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6271         else
6272             BAop(OP_SBIT_AND);
6273     }
6274 
6275     PL_tokenbuf[0] = '&';
6276     s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6277     pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6278 
6279     if (PL_tokenbuf[1])
6280         force_ident_maybe_lex('&');
6281     else
6282         PREREF('&');
6283 
6284     TERM('&');
6285 }
6286 
6287 static int
yyl_verticalbar(pTHX_ char * s)6288 yyl_verticalbar(pTHX_ char *s)
6289 {
6290     char *d;
6291     bool bof;
6292 
6293     s++;
6294     if (*s++ == '|') {
6295         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6296                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6297             s -= 2;
6298             TOKEN(0);
6299         }
6300         AOPERATOR(OROR);
6301     }
6302 
6303     s--;
6304     d = s;
6305     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6306         s++;
6307 
6308     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6309             (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6310         s = d - 1;
6311         TOKEN(0);
6312     }
6313 
6314     BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6315 }
6316 
6317 static int
yyl_bang(pTHX_ char * s)6318 yyl_bang(pTHX_ char *s)
6319 {
6320     const char tmp = *s++;
6321     if (tmp == '=') {
6322         /* was this !=~ where !~ was meant?
6323          * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6324 
6325         if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6326             const char *t = s+1;
6327 
6328             while (t < PL_bufend && isSPACE(*t))
6329                 ++t;
6330 
6331             if (*t == '/' || *t == '?'
6332                 || ((*t == 'm' || *t == 's' || *t == 'y')
6333                     && !isWORDCHAR(t[1]))
6334                 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6335                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6336                             "!=~ should be !~");
6337         }
6338 
6339         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6340             s -= 2;
6341             TOKEN(0);
6342         }
6343 
6344         ChEop(OP_NE);
6345     }
6346 
6347     if (tmp == '~')
6348         PMop(OP_NOT);
6349 
6350     s--;
6351     OPERATOR('!');
6352 }
6353 
6354 static int
yyl_snail(pTHX_ char * s)6355 yyl_snail(pTHX_ char *s)
6356 {
6357     if (PL_expect == XPOSTDEREF)
6358         POSTDEREF('@');
6359     PL_tokenbuf[0] = '@';
6360     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6361     if (PL_expect == XOPERATOR) {
6362         char *d = s;
6363         if (PL_bufptr > s) {
6364             d = PL_bufptr-1;
6365             PL_bufptr = PL_oldbufptr;
6366         }
6367         no_op("Array", d);
6368     }
6369     pl_yylval.ival = 0;
6370     if (!PL_tokenbuf[1]) {
6371         PREREF('@');
6372     }
6373     if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6374         s = skipspace(s);
6375     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6376         && intuit_more(s, PL_bufend))
6377     {
6378         if (*s == '{')
6379             PL_tokenbuf[0] = '%';
6380 
6381         /* Warn about @ where they meant $. */
6382         if (*s == '[' || *s == '{') {
6383             if (ckWARN(WARN_SYNTAX)) {
6384                 S_check_scalar_slice(aTHX_ s);
6385             }
6386         }
6387     }
6388     PL_expect = XOPERATOR;
6389     force_ident_maybe_lex('@');
6390     TERM('@');
6391 }
6392 
6393 static int
yyl_slash(pTHX_ char * s)6394 yyl_slash(pTHX_ char *s)
6395 {
6396     if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6397         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6398                 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6399             TOKEN(0);
6400         s += 2;
6401         AOPERATOR(DORDOR);
6402     }
6403     else if (PL_expect == XOPERATOR) {
6404         s++;
6405         if (*s == '=' && !PL_lex_allbrackets
6406             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6407         {
6408             s--;
6409             TOKEN(0);
6410         }
6411         Mop(OP_DIVIDE);
6412     }
6413     else {
6414         /* Disable warning on "study /blah/" */
6415         if (    PL_oldoldbufptr == PL_last_uni
6416             && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6417                 || memNE(PL_last_uni, "study", 5)
6418                 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6419          ))
6420             check_uni();
6421         s = scan_pat(s,OP_MATCH);
6422         TERM(sublex_start());
6423     }
6424 }
6425 
6426 static int
yyl_leftsquare(pTHX_ char * s)6427 yyl_leftsquare(pTHX_ char *s)
6428 {
6429     char tmp;
6430 
6431     if (PL_lex_brackets > 100)
6432         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6433     PL_lex_brackstack[PL_lex_brackets++] = 0;
6434     PL_lex_allbrackets++;
6435     tmp = *s++;
6436     OPERATOR(tmp);
6437 }
6438 
6439 static int
yyl_rightsquare(pTHX_ char * s)6440 yyl_rightsquare(pTHX_ char *s)
6441 {
6442     if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6443         TOKEN(0);
6444     s++;
6445     if (PL_lex_brackets <= 0)
6446         /* diag_listed_as: Unmatched right %s bracket */
6447         yyerror("Unmatched right square bracket");
6448     else
6449         --PL_lex_brackets;
6450     PL_lex_allbrackets--;
6451     if (PL_lex_state == LEX_INTERPNORMAL) {
6452         if (PL_lex_brackets == 0) {
6453             if (*s == '-' && s[1] == '>')
6454                 PL_lex_state = LEX_INTERPENDMAYBE;
6455             else if (*s != '[' && *s != '{')
6456                 PL_lex_state = LEX_INTERPEND;
6457         }
6458     }
6459     TERM(']');
6460 }
6461 
6462 static int
yyl_tilde(pTHX_ char * s)6463 yyl_tilde(pTHX_ char *s)
6464 {
6465     bool bof;
6466     if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6467         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6468             TOKEN(0);
6469         s += 2;
6470         Perl_ck_warner_d(aTHX_
6471             packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6472             "Smartmatch is experimental");
6473         NCEop(OP_SMARTMATCH);
6474     }
6475     s++;
6476     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6477         s++;
6478         BCop(OP_SCOMPLEMENT);
6479     }
6480     BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6481 }
6482 
6483 static int
yyl_leftparen(pTHX_ char * s)6484 yyl_leftparen(pTHX_ char *s)
6485 {
6486     if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6487         PL_oldbufptr = PL_oldoldbufptr;		/* allow print(STDOUT 123) */
6488     else
6489         PL_expect = XTERM;
6490     s = skipspace(s);
6491     PL_lex_allbrackets++;
6492     TOKEN('(');
6493 }
6494 
6495 static int
yyl_rightparen(pTHX_ char * s)6496 yyl_rightparen(pTHX_ char *s)
6497 {
6498     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6499         TOKEN(0);
6500     s++;
6501     PL_lex_allbrackets--;
6502     s = skipspace(s);
6503     if (*s == '{')
6504         PREBLOCK(')');
6505     TERM(')');
6506 }
6507 
6508 static int
yyl_leftpointy(pTHX_ char * s)6509 yyl_leftpointy(pTHX_ char *s)
6510 {
6511     char tmp;
6512 
6513     if (PL_expect != XOPERATOR) {
6514         if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6515             check_uni();
6516         if (s[1] == '<' && s[2] != '>')
6517             s = scan_heredoc(s);
6518         else
6519             s = scan_inputsymbol(s);
6520         PL_expect = XOPERATOR;
6521         TOKEN(sublex_start());
6522     }
6523 
6524     s++;
6525 
6526     tmp = *s++;
6527     if (tmp == '<') {
6528         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6529             s -= 2;
6530             TOKEN(0);
6531         }
6532         SHop(OP_LEFT_SHIFT);
6533     }
6534     if (tmp == '=') {
6535         tmp = *s++;
6536         if (tmp == '>') {
6537             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6538                 s -= 3;
6539                 TOKEN(0);
6540             }
6541             NCEop(OP_NCMP);
6542         }
6543         s--;
6544         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6545             s -= 2;
6546             TOKEN(0);
6547         }
6548         ChRop(OP_LE);
6549     }
6550 
6551     s--;
6552     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6553         s--;
6554         TOKEN(0);
6555     }
6556 
6557     ChRop(OP_LT);
6558 }
6559 
6560 static int
yyl_rightpointy(pTHX_ char * s)6561 yyl_rightpointy(pTHX_ char *s)
6562 {
6563     const char tmp = *s++;
6564 
6565     if (tmp == '>') {
6566         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6567             s -= 2;
6568             TOKEN(0);
6569         }
6570         SHop(OP_RIGHT_SHIFT);
6571     }
6572     else if (tmp == '=') {
6573         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6574             s -= 2;
6575             TOKEN(0);
6576         }
6577         ChRop(OP_GE);
6578     }
6579 
6580     s--;
6581     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6582         s--;
6583         TOKEN(0);
6584     }
6585 
6586     ChRop(OP_GT);
6587 }
6588 
6589 static int
yyl_sglquote(pTHX_ char * s)6590 yyl_sglquote(pTHX_ char *s)
6591 {
6592     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6593     if (!s)
6594         missingterm(NULL, 0);
6595     COPLINE_SET_FROM_MULTI_END;
6596     DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6597     if (PL_expect == XOPERATOR) {
6598         no_op("String",s);
6599     }
6600     pl_yylval.ival = OP_CONST;
6601     TERM(sublex_start());
6602 }
6603 
6604 static int
yyl_dblquote(pTHX_ char * s)6605 yyl_dblquote(pTHX_ char *s)
6606 {
6607     char *d;
6608     STRLEN len;
6609     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6610     DEBUG_T( {
6611         if (s)
6612             printbuf("### Saw string before %s\n", s);
6613         else
6614             PerlIO_printf(Perl_debug_log,
6615                          "### Saw unterminated string\n");
6616     } );
6617     if (PL_expect == XOPERATOR) {
6618             no_op("String",s);
6619     }
6620     if (!s)
6621         missingterm(NULL, 0);
6622     pl_yylval.ival = OP_CONST;
6623     /* FIXME. I think that this can be const if char *d is replaced by
6624        more localised variables.  */
6625     for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6626         if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6627             pl_yylval.ival = OP_STRINGIFY;
6628             break;
6629         }
6630     }
6631     if (pl_yylval.ival == OP_CONST)
6632         COPLINE_SET_FROM_MULTI_END;
6633     TERM(sublex_start());
6634 }
6635 
6636 static int
yyl_backtick(pTHX_ char * s)6637 yyl_backtick(pTHX_ char *s)
6638 {
6639     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6640     DEBUG_T( {
6641         if (s)
6642             printbuf("### Saw backtick string before %s\n", s);
6643         else
6644             PerlIO_printf(Perl_debug_log,
6645                          "### Saw unterminated backtick string\n");
6646     } );
6647     if (PL_expect == XOPERATOR)
6648         no_op("Backticks",s);
6649     if (!s)
6650         missingterm(NULL, 0);
6651     pl_yylval.ival = OP_BACKTICK;
6652     TERM(sublex_start());
6653 }
6654 
6655 static int
yyl_backslash(pTHX_ char * s)6656 yyl_backslash(pTHX_ char *s)
6657 {
6658     if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6659         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6660                        *s, *s);
6661     if (PL_expect == XOPERATOR)
6662         no_op("Backslash",s);
6663     OPERATOR(REFGEN);
6664 }
6665 
6666 static void
yyl_data_handle(pTHX)6667 yyl_data_handle(pTHX)
6668 {
6669     HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6670                             ? PL_curstash
6671                             : PL_defstash;
6672     GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6673 
6674     if (!isGV(gv))
6675         gv_init(gv,stash,"DATA",4,0);
6676 
6677     GvMULTI_on(gv);
6678     if (!GvIO(gv))
6679         GvIOp(gv) = newIO();
6680     IoIFP(GvIOp(gv)) = PL_rsfp;
6681 
6682     /* Mark this internal pseudo-handle as clean */
6683     IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6684     if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6685         IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6686     else
6687         IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6688 
6689 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6690     /* if the script was opened in binmode, we need to revert
6691      * it to text mode for compatibility; but only iff it has CRs
6692      * XXX this is a questionable hack at best. */
6693     if (PL_bufend-PL_bufptr > 2
6694         && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6695     {
6696         Off_t loc = 0;
6697         if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6698             loc = PerlIO_tell(PL_rsfp);
6699             (void)PerlIO_seek(PL_rsfp, 0L, 0);
6700         }
6701         if (PerlLIO_setmode(RSFP_FILENO, O_TEXT) != -1) {
6702             if (loc > 0)
6703                 PerlIO_seek(PL_rsfp, loc, 0);
6704         }
6705     }
6706 #endif
6707 
6708 #ifdef PERLIO_LAYERS
6709     if (!IN_BYTES) {
6710         if (UTF)
6711             PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6712     }
6713 #endif
6714 
6715     PL_rsfp = NULL;
6716 }
6717 
6718 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6719     __attribute__noreturn__;
6720 
6721 PERL_STATIC_NO_RET void
yyl_croak_unrecognised(pTHX_ char * s)6722 yyl_croak_unrecognised(pTHX_ char *s)
6723 {
6724     SV *dsv = newSVpvs_flags("", SVs_TEMP);
6725     const char *c;
6726     char *d;
6727     STRLEN len;
6728 
6729     if (UTF) {
6730         STRLEN skiplen = UTF8SKIP(s);
6731         STRLEN stravail = PL_bufend - s;
6732         c = sv_uni_display(dsv, newSVpvn_flags(s,
6733                                                skiplen > stravail ? stravail : skiplen,
6734                                                SVs_TEMP | SVf_UTF8),
6735                            10, UNI_DISPLAY_ISPRINT);
6736     }
6737     else {
6738         c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6739     }
6740 
6741     if (s >= PL_linestart) {
6742         d = PL_linestart;
6743     }
6744     else {
6745         /* somehow (probably due to a parse failure), PL_linestart has advanced
6746          * pass PL_bufptr, get a reasonable beginning of line
6747          */
6748         d = s;
6749         while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6750             --d;
6751     }
6752     len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6753     if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6754         d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6755     }
6756 
6757     Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6758                       UTF8fARG(UTF, (s - d), d),
6759                      (int) len + 1);
6760 }
6761 
6762 static int
yyl_require(pTHX_ char * s,I32 orig_keyword)6763 yyl_require(pTHX_ char *s, I32 orig_keyword)
6764 {
6765     s = skipspace(s);
6766     if (isDIGIT(*s)) {
6767         s = force_version(s, FALSE);
6768     }
6769     else if (*s != 'v' || !isDIGIT(s[1])
6770             || (s = force_version(s, TRUE), *s == 'v'))
6771     {
6772         *PL_tokenbuf = '\0';
6773         s = force_word(s,BAREWORD,TRUE,TRUE);
6774         if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6775                                    PL_tokenbuf + sizeof(PL_tokenbuf),
6776                                    UTF))
6777         {
6778             gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6779                         GV_ADD | (UTF ? SVf_UTF8 : 0));
6780         }
6781         else if (*s == '<')
6782             yyerror("<> at require-statement should be quotes");
6783     }
6784 
6785     if (orig_keyword == KEY_require)
6786         pl_yylval.ival = 1;
6787     else
6788         pl_yylval.ival = 0;
6789 
6790     PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6791     PL_bufptr = s;
6792     PL_last_uni = PL_oldbufptr;
6793     PL_last_lop_op = OP_REQUIRE;
6794     s = skipspace(s);
6795     return REPORT( (int)REQUIRE );
6796 }
6797 
6798 static int
yyl_foreach(pTHX_ char * s)6799 yyl_foreach(pTHX_ char *s)
6800 {
6801     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6802         return REPORT(0);
6803     pl_yylval.ival = CopLINE(PL_curcop);
6804     s = skipspace(s);
6805     if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6806         char *p = s;
6807         SSize_t s_off = s - SvPVX(PL_linestr);
6808         STRLEN len;
6809 
6810         if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") && isSPACE(p[2])) {
6811             p += 2;
6812         }
6813         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") && isSPACE(p[3])) {
6814             p += 3;
6815         }
6816 
6817         p = skipspace(p);
6818         /* skip optional package name, as in "for my abc $x (..)" */
6819         if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
6820             p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6821             p = skipspace(p);
6822         }
6823         if (*p != '$' && *p != '\\')
6824             Perl_croak(aTHX_ "Missing $ on loop variable");
6825 
6826         /* The buffer may have been reallocated, update s */
6827         s = SvPVX(PL_linestr) + s_off;
6828     }
6829     OPERATOR(FOR);
6830 }
6831 
6832 static int
yyl_do(pTHX_ char * s,I32 orig_keyword)6833 yyl_do(pTHX_ char *s, I32 orig_keyword)
6834 {
6835     s = skipspace(s);
6836     if (*s == '{')
6837         PRETERMBLOCK(DO);
6838     if (*s != '\'') {
6839         char *d;
6840         STRLEN len;
6841         *PL_tokenbuf = '&';
6842         d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6843                       1, &len);
6844         if (len && memNEs(PL_tokenbuf+1, len, "CORE")
6845          && !keyword(PL_tokenbuf + 1, len, 0)) {
6846             SSize_t off = s-SvPVX(PL_linestr);
6847             d = skipspace(d);
6848             s = SvPVX(PL_linestr)+off;
6849             if (*d == '(') {
6850                 force_ident_maybe_lex('&');
6851                 s = d;
6852             }
6853         }
6854     }
6855     if (orig_keyword == KEY_do)
6856         pl_yylval.ival = 1;
6857     else
6858         pl_yylval.ival = 0;
6859     OPERATOR(DO);
6860 }
6861 
6862 static int
yyl_my(pTHX_ char * s,I32 my)6863 yyl_my(pTHX_ char *s, I32 my)
6864 {
6865     if (PL_in_my) {
6866         PL_bufptr = s;
6867         yyerror(Perl_form(aTHX_
6868                           "Can't redeclare \"%s\" in \"%s\"",
6869                            my       == KEY_my    ? "my" :
6870                            my       == KEY_state ? "state" : "our",
6871                            PL_in_my == KEY_my    ? "my" :
6872                            PL_in_my == KEY_state ? "state" : "our"));
6873     }
6874     PL_in_my = (U16)my;
6875     s = skipspace(s);
6876     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6877         STRLEN len;
6878         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6879         if (memEQs(PL_tokenbuf, len, "sub"))
6880             return yyl_sub(aTHX_ s, my);
6881         PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6882         if (!PL_in_my_stash) {
6883             char tmpbuf[1024];
6884             int i;
6885             PL_bufptr = s;
6886             i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6887             PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
6888             yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
6889         }
6890     }
6891     else if (*s == '\\') {
6892         if (!FEATURE_MYREF_IS_ENABLED)
6893             Perl_croak(aTHX_ "The experimental declared_refs "
6894                              "feature is not enabled");
6895         Perl_ck_warner_d(aTHX_
6896              packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
6897             "Declaring references is experimental");
6898     }
6899     OPERATOR(MY);
6900 }
6901 
6902 static int yyl_try(pTHX_ char*);
6903 
6904 static bool
yyl_eol_needs_semicolon(pTHX_ char ** ps)6905 yyl_eol_needs_semicolon(pTHX_ char **ps)
6906 {
6907     char *s = *ps;
6908     if (PL_lex_state != LEX_NORMAL
6909         || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
6910     {
6911         const bool in_comment = *s == '#';
6912         char *d;
6913         if (*s == '#' && s == PL_linestart && PL_in_eval
6914          && !PL_rsfp && !PL_parser->filtered) {
6915             /* handle eval qq[#line 1 "foo"\n ...] */
6916             CopLINE_dec(PL_curcop);
6917             incline(s, PL_bufend);
6918         }
6919         d = s;
6920         while (d < PL_bufend && *d != '\n')
6921             d++;
6922         if (d < PL_bufend)
6923             d++;
6924         s = d;
6925         if (in_comment && d == PL_bufend
6926             && PL_lex_state == LEX_INTERPNORMAL
6927             && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6928             && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
6929         else
6930             incline(s, PL_bufend);
6931         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
6932             PL_lex_state = LEX_FORMLINE;
6933             force_next(FORMRBRACK);
6934             *ps = s;
6935             return TRUE;
6936         }
6937     }
6938     else {
6939         while (s < PL_bufend && *s != '\n')
6940             s++;
6941         if (s < PL_bufend) {
6942             s++;
6943             if (s < PL_bufend)
6944                 incline(s, PL_bufend);
6945         }
6946     }
6947     *ps = s;
6948     return FALSE;
6949 }
6950 
6951 static int
yyl_fake_eof(pTHX_ U32 fake_eof,bool bof,char * s)6952 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
6953 {
6954     char *d;
6955 
6956     goto start;
6957 
6958     do {
6959         fake_eof = 0;
6960         bof = cBOOL(PL_rsfp);
6961       start:
6962 
6963         PL_bufptr = PL_bufend;
6964         COPLINE_INC_WITH_HERELINES;
6965         if (!lex_next_chunk(fake_eof)) {
6966             CopLINE_dec(PL_curcop);
6967             s = PL_bufptr;
6968             TOKEN(';');	/* not infinite loop because rsfp is NULL now */
6969         }
6970         CopLINE_dec(PL_curcop);
6971         s = PL_bufptr;
6972         /* If it looks like the start of a BOM or raw UTF-16,
6973          * check if it in fact is. */
6974         if (bof && PL_rsfp
6975             && (   *s == 0
6976                 || *(U8*)s == BOM_UTF8_FIRST_BYTE
6977                 || *(U8*)s >= 0xFE
6978                 || s[1] == 0))
6979         {
6980             Off_t offset = (IV)PerlIO_tell(PL_rsfp);
6981             bof = (offset == (Off_t)SvCUR(PL_linestr));
6982 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
6983             /* offset may include swallowed CR */
6984             if (!bof)
6985                 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6986 #endif
6987             if (bof) {
6988                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6989                 s = swallow_bom((U8*)s);
6990             }
6991         }
6992         if (PL_parser->in_pod) {
6993             /* Incest with pod. */
6994             if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
6995                 && !isALPHA(s[4]))
6996             {
6997                 SvPVCLEAR(PL_linestr);
6998                 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
6999                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7000                 PL_last_lop = PL_last_uni = NULL;
7001                 PL_parser->in_pod = 0;
7002             }
7003         }
7004         if (PL_rsfp || PL_parser->filtered)
7005             incline(s, PL_bufend);
7006     } while (PL_parser->in_pod);
7007 
7008     PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7009     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7010     PL_last_lop = PL_last_uni = NULL;
7011     if (CopLINE(PL_curcop) == 1) {
7012         while (s < PL_bufend && isSPACE(*s))
7013             s++;
7014         if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7015             s++;
7016         d = NULL;
7017         if (!PL_in_eval) {
7018             if (*s == '#' && *(s+1) == '!')
7019                 d = s + 2;
7020 #ifdef ALTERNATE_SHEBANG
7021             else {
7022                 static char const as[] = ALTERNATE_SHEBANG;
7023                 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7024                     d = s + (sizeof(as) - 1);
7025             }
7026 #endif /* ALTERNATE_SHEBANG */
7027         }
7028         if (d) {
7029             char *ipath;
7030             char *ipathend;
7031 
7032             while (isSPACE(*d))
7033                 d++;
7034             ipath = d;
7035             while (*d && !isSPACE(*d))
7036                 d++;
7037             ipathend = d;
7038 
7039 #ifdef ARG_ZERO_IS_SCRIPT
7040             if (ipathend > ipath) {
7041                 /*
7042                  * HP-UX (at least) sets argv[0] to the script name,
7043                  * which makes $^X incorrect.  And Digital UNIX and Linux,
7044                  * at least, set argv[0] to the basename of the Perl
7045                  * interpreter. So, having found "#!", we'll set it right.
7046                  */
7047                 SV* copfilesv = CopFILESV(PL_curcop);
7048                 if (copfilesv) {
7049                     SV * const x =
7050                         GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7051                                          SVt_PV)); /* $^X */
7052                     assert(SvPOK(x) || SvGMAGICAL(x));
7053                     if (sv_eq(x, copfilesv)) {
7054                         sv_setpvn(x, ipath, ipathend - ipath);
7055                         SvSETMAGIC(x);
7056                     }
7057                     else {
7058                         STRLEN blen;
7059                         STRLEN llen;
7060                         const char *bstart = SvPV_const(copfilesv, blen);
7061                         const char * const lstart = SvPV_const(x, llen);
7062                         if (llen < blen) {
7063                             bstart += blen - llen;
7064                             if (strnEQ(bstart, lstart, llen) &&	bstart[-1] == '/') {
7065                                 sv_setpvn(x, ipath, ipathend - ipath);
7066                                 SvSETMAGIC(x);
7067                             }
7068                         }
7069                     }
7070                 }
7071                 else {
7072                     /* Anything to do if no copfilesv? */
7073                 }
7074                 TAINT_NOT;	/* $^X is always tainted, but that's OK */
7075             }
7076 #endif /* ARG_ZERO_IS_SCRIPT */
7077 
7078             /*
7079              * Look for options.
7080              */
7081             d = instr(s,"perl -");
7082             if (!d) {
7083                 d = instr(s,"perl");
7084 #if defined(DOSISH)
7085                 /* avoid getting into infinite loops when shebang
7086                  * line contains "Perl" rather than "perl" */
7087                 if (!d) {
7088                     for (d = ipathend-4; d >= ipath; --d) {
7089                         if (isALPHA_FOLD_EQ(*d, 'p')
7090                             && !ibcmp(d, "perl", 4))
7091                         {
7092                             break;
7093                         }
7094                     }
7095                     if (d < ipath)
7096                         d = NULL;
7097                 }
7098 #endif
7099             }
7100 #ifdef ALTERNATE_SHEBANG
7101             /*
7102              * If the ALTERNATE_SHEBANG on this system starts with a
7103              * character that can be part of a Perl expression, then if
7104              * we see it but not "perl", we're probably looking at the
7105              * start of Perl code, not a request to hand off to some
7106              * other interpreter.  Similarly, if "perl" is there, but
7107              * not in the first 'word' of the line, we assume the line
7108              * contains the start of the Perl program.
7109              */
7110             if (d && *s != '#') {
7111                 const char *c = ipath;
7112                 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7113                     c++;
7114                 if (c < d)
7115                     d = NULL;	/* "perl" not in first word; ignore */
7116                 else
7117                     *s = '#';	/* Don't try to parse shebang line */
7118             }
7119 #endif /* ALTERNATE_SHEBANG */
7120             if (!d
7121                 && *s == '#'
7122                 && ipathend > ipath
7123                 && !PL_minus_c
7124                 && !instr(s,"indir")
7125                 && instr(PL_origargv[0],"perl"))
7126             {
7127                 dVAR;
7128                 char **newargv;
7129 
7130                 *ipathend = '\0';
7131                 s = ipathend + 1;
7132                 while (s < PL_bufend && isSPACE(*s))
7133                     s++;
7134                 if (s < PL_bufend) {
7135                     Newx(newargv,PL_origargc+3,char*);
7136                     newargv[1] = s;
7137                     while (s < PL_bufend && !isSPACE(*s))
7138                         s++;
7139                     *s = '\0';
7140                     Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7141                 }
7142                 else
7143                     newargv = PL_origargv;
7144                 newargv[0] = ipath;
7145                 PERL_FPU_PRE_EXEC
7146                 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7147                 PERL_FPU_POST_EXEC
7148                 Perl_croak(aTHX_ "Can't exec %s", ipath);
7149             }
7150             if (d) {
7151                 while (*d && !isSPACE(*d))
7152                     d++;
7153                 while (SPACE_OR_TAB(*d))
7154                     d++;
7155 
7156                 if (*d++ == '-') {
7157                     const bool switches_done = PL_doswitches;
7158                     const U32 oldpdb = PL_perldb;
7159                     const bool oldn = PL_minus_n;
7160                     const bool oldp = PL_minus_p;
7161                     const char *d1 = d;
7162 
7163                     do {
7164                         bool baduni = FALSE;
7165                         if (*d1 == 'C') {
7166                             const char *d2 = d1 + 1;
7167                             if (parse_unicode_opts((const char **)&d2)
7168                                 != PL_unicode)
7169                                 baduni = TRUE;
7170                         }
7171                         if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7172                             const char * const m = d1;
7173                             while (*d1 && !isSPACE(*d1))
7174                                 d1++;
7175                             Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7176                                   (int)(d1 - m), m);
7177                         }
7178                         d1 = moreswitches(d1);
7179                     } while (d1);
7180                     if (PL_doswitches && !switches_done) {
7181                         int argc = PL_origargc;
7182                         char **argv = PL_origargv;
7183                         do {
7184                             argc--,argv++;
7185                         } while (argc && argv[0][0] == '-' && argv[0][1]);
7186                         init_argv_symbols(argc,argv);
7187                     }
7188                     if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7189                         || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7190                           /* if we have already added "LINE: while (<>) {",
7191                              we must not do it again */
7192                     {
7193                         SvPVCLEAR(PL_linestr);
7194                         PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7195                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7196                         PL_last_lop = PL_last_uni = NULL;
7197                         PL_preambled = FALSE;
7198                         if (PERLDB_LINE_OR_SAVESRC)
7199                             (void)gv_fetchfile(PL_origfilename);
7200                         return YYL_RETRY;
7201                     }
7202                 }
7203             }
7204         }
7205     }
7206 
7207     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7208         PL_lex_state = LEX_FORMLINE;
7209         force_next(FORMRBRACK);
7210         TOKEN(';');
7211     }
7212 
7213     PL_bufptr = s;
7214     return YYL_RETRY;
7215 }
7216 
7217 static int
yyl_fatcomma(pTHX_ char * s,STRLEN len)7218 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7219 {
7220     CLINE;
7221     pl_yylval.opval
7222         = newSVOP(OP_CONST, 0,
7223                        S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7224     pl_yylval.opval->op_private = OPpCONST_BARE;
7225     TERM(BAREWORD);
7226 }
7227 
7228 static int
yyl_safe_bareword(pTHX_ char * s,const char lastchar)7229 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7230 {
7231     if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7232         && PL_parser->saw_infix_sigil)
7233     {
7234         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7235                          "Operator or semicolon missing before %c%" UTF8f,
7236                          lastchar,
7237                          UTF8fARG(UTF, strlen(PL_tokenbuf),
7238                                   PL_tokenbuf));
7239         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7240                          "Ambiguous use of %c resolved as operator %c",
7241                          lastchar, lastchar);
7242     }
7243     TOKEN(BAREWORD);
7244 }
7245 
7246 static int
yyl_constant_op(pTHX_ char * s,SV * sv,CV * cv,OP * rv2cv_op,PADOFFSET off)7247 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7248 {
7249     if (sv) {
7250         op_free(rv2cv_op);
7251         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7252         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7253         if (SvTYPE(sv) == SVt_PVAV)
7254             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7255                                       pl_yylval.opval);
7256         else {
7257             pl_yylval.opval->op_private = 0;
7258             pl_yylval.opval->op_folded = 1;
7259             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7260         }
7261         TOKEN(BAREWORD);
7262     }
7263 
7264     op_free(pl_yylval.opval);
7265     pl_yylval.opval =
7266         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7267     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7268     PL_last_lop = PL_oldbufptr;
7269     PL_last_lop_op = OP_ENTERSUB;
7270 
7271     /* Is there a prototype? */
7272     if (SvPOK(cv)) {
7273         int k = yyl_subproto(aTHX_ s, cv);
7274         if (k != KEY_NULL)
7275             return k;
7276     }
7277 
7278     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7279     PL_expect = XTERM;
7280     force_next(off ? PRIVATEREF : BAREWORD);
7281     if (!PL_lex_allbrackets
7282         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7283     {
7284         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7285     }
7286 
7287     TOKEN(NOAMP);
7288 }
7289 
7290 /* Honour "reserved word" warnings, and enforce strict subs */
7291 static void
yyl_strictwarn_bareword(pTHX_ const char lastchar)7292 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7293 {
7294     /* after "print" and similar functions (corresponding to
7295      * "F? L" in opcode.pl), whatever wasn't already parsed as
7296      * a filehandle should be subject to "strict subs".
7297      * Likewise for the optional indirect-object argument to system
7298      * or exec, which can't be a bareword */
7299     if ((PL_last_lop_op == OP_PRINT
7300             || PL_last_lop_op == OP_PRTF
7301             || PL_last_lop_op == OP_SAY
7302             || PL_last_lop_op == OP_SYSTEM
7303             || PL_last_lop_op == OP_EXEC)
7304         && (PL_hints & HINT_STRICT_SUBS))
7305     {
7306         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7307     }
7308 
7309     if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7310         char *d = PL_tokenbuf;
7311         while (isLOWER(*d))
7312             d++;
7313         if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7314             /* PL_warn_reserved is constant */
7315             GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7316             Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7317                         PL_tokenbuf);
7318             GCC_DIAG_RESTORE_STMT;
7319         }
7320     }
7321 }
7322 
7323 static int
yyl_just_a_word(pTHX_ char * s,STRLEN len,I32 orig_keyword,struct code c)7324 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7325 {
7326     int pkgname = 0;
7327     const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7328     bool safebw;
7329     bool no_op_error = FALSE;
7330     /* Use this var to track whether intuit_method has been
7331        called.  intuit_method returns 0 or > 255.  */
7332     int key = 1;
7333 
7334     if (PL_expect == XOPERATOR) {
7335         if (PL_bufptr == PL_linestart) {
7336             CopLINE_dec(PL_curcop);
7337             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7338             CopLINE_inc(PL_curcop);
7339         }
7340         else
7341             /* We want to call no_op with s pointing after the
7342                bareword, so defer it.  But we want it to come
7343                before the Bad name croak.  */
7344             no_op_error = TRUE;
7345     }
7346 
7347     /* Get the rest if it looks like a package qualifier */
7348 
7349     if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7350         STRLEN morelen;
7351         s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7352                       TRUE, &morelen);
7353         if (no_op_error) {
7354             no_op("Bareword",s);
7355             no_op_error = FALSE;
7356         }
7357         if (!morelen)
7358             Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7359                     UTF8fARG(UTF, len, PL_tokenbuf),
7360                     *s == '\'' ? "'" : "::");
7361         len += morelen;
7362         pkgname = 1;
7363     }
7364 
7365     if (no_op_error)
7366         no_op("Bareword",s);
7367 
7368     /* See if the name is "Foo::",
7369        in which case Foo is a bareword
7370        (and a package name). */
7371 
7372     if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7373         if (ckWARN(WARN_BAREWORD)
7374             && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7375             Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7376                         "Bareword \"%" UTF8f
7377                         "\" refers to nonexistent package",
7378                         UTF8fARG(UTF, len, PL_tokenbuf));
7379         len -= 2;
7380         PL_tokenbuf[len] = '\0';
7381         c.gv = NULL;
7382         c.gvp = 0;
7383         safebw = TRUE;
7384     }
7385     else {
7386         safebw = FALSE;
7387     }
7388 
7389     /* if we saw a global override before, get the right name */
7390 
7391     if (!c.sv)
7392         c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7393     if (c.gvp) {
7394         SV *sv = newSVpvs("CORE::GLOBAL::");
7395         sv_catsv(sv, c.sv);
7396         SvREFCNT_dec(c.sv);
7397         c.sv = sv;
7398     }
7399 
7400     /* Presume this is going to be a bareword of some sort. */
7401     CLINE;
7402     pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7403     pl_yylval.opval->op_private = OPpCONST_BARE;
7404 
7405     /* And if "Foo::", then that's what it certainly is. */
7406     if (safebw)
7407         return yyl_safe_bareword(aTHX_ s, lastchar);
7408 
7409     if (!c.off) {
7410         OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7411         const_op->op_private = OPpCONST_BARE;
7412         c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7413         c.cv = c.lex
7414             ? isGV(c.gv)
7415                 ? GvCV(c.gv)
7416                 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7417                     ? (CV *)SvRV(c.gv)
7418                     : ((CV *)c.gv)
7419             : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7420     }
7421 
7422     /* See if it's the indirect object for a list operator. */
7423 
7424     if (PL_oldoldbufptr
7425         && PL_oldoldbufptr < PL_bufptr
7426         && (PL_oldoldbufptr == PL_last_lop
7427             || PL_oldoldbufptr == PL_last_uni)
7428         && /* NO SKIPSPACE BEFORE HERE! */
7429            (PL_expect == XREF
7430             || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7431                                                    == OA_FILEREF))
7432     {
7433         bool immediate_paren = *s == '(';
7434         SSize_t s_off;
7435 
7436         /* (Now we can afford to cross potential line boundary.) */
7437         s = skipspace(s);
7438 
7439         /* intuit_method() can indirectly call lex_next_chunk(),
7440          * invalidating s
7441          */
7442         s_off = s - SvPVX(PL_linestr);
7443         /* Two barewords in a row may indicate method call. */
7444         if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7445                 || *s == '$')
7446             && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7447         {
7448             /* the code at method: doesn't use s */
7449             goto method;
7450         }
7451         s = SvPVX(PL_linestr) + s_off;
7452 
7453         /* If not a declared subroutine, it's an indirect object. */
7454         /* (But it's an indir obj regardless for sort.) */
7455         /* Also, if "_" follows a filetest operator, it's a bareword */
7456 
7457         if (
7458             ( !immediate_paren && (PL_last_lop_op == OP_SORT
7459              || (!c.cv
7460                  && (PL_last_lop_op != OP_MAPSTART
7461                      && PL_last_lop_op != OP_GREPSTART))))
7462            || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7463                 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7464                                                 == OA_FILESTATOP))
7465            )
7466         {
7467             PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7468             yyl_strictwarn_bareword(aTHX_ lastchar);
7469             op_free(c.rv2cv_op);
7470             return yyl_safe_bareword(aTHX_ s, lastchar);
7471         }
7472     }
7473 
7474     PL_expect = XOPERATOR;
7475     s = skipspace(s);
7476 
7477     /* Is this a word before a => operator? */
7478     if (*s == '=' && s[1] == '>' && !pkgname) {
7479         op_free(c.rv2cv_op);
7480         CLINE;
7481         if (c.gvp || (c.lex && !c.off)) {
7482             assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7483             /* This is our own scalar, created a few lines
7484                above, so this is safe. */
7485             SvREADONLY_off(c.sv);
7486             sv_setpv(c.sv, PL_tokenbuf);
7487             if (UTF && !IN_BYTES
7488              && is_utf8_string((U8*)PL_tokenbuf, len))
7489                   SvUTF8_on(c.sv);
7490             SvREADONLY_on(c.sv);
7491         }
7492         TERM(BAREWORD);
7493     }
7494 
7495     /* If followed by a paren, it's certainly a subroutine. */
7496     if (*s == '(') {
7497         CLINE;
7498         if (c.cv) {
7499             char *d = s + 1;
7500             while (SPACE_OR_TAB(*d))
7501                 d++;
7502             if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7503                 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7504         }
7505         NEXTVAL_NEXTTOKE.opval =
7506             c.off ? c.rv2cv_op : pl_yylval.opval;
7507         if (c.off)
7508              op_free(pl_yylval.opval), force_next(PRIVATEREF);
7509         else op_free(c.rv2cv_op),      force_next(BAREWORD);
7510         pl_yylval.ival = 0;
7511         TOKEN('&');
7512     }
7513 
7514     /* If followed by var or block, call it a method (unless sub) */
7515 
7516     if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7517         op_free(c.rv2cv_op);
7518         PL_last_lop = PL_oldbufptr;
7519         PL_last_lop_op = OP_METHOD;
7520         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7521             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7522         PL_expect = XBLOCKTERM;
7523         PL_bufptr = s;
7524         return REPORT(METHOD);
7525     }
7526 
7527     /* If followed by a bareword, see if it looks like indir obj. */
7528 
7529     if (   key == 1
7530         && !orig_keyword
7531         && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7532         && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7533     {
7534       method:
7535         if (c.lex && !c.off) {
7536             assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7537             SvREADONLY_off(c.sv);
7538             sv_setpvn(c.sv, PL_tokenbuf, len);
7539             if (UTF && !IN_BYTES
7540              && is_utf8_string((U8*)PL_tokenbuf, len))
7541                 SvUTF8_on(c.sv);
7542             else SvUTF8_off(c.sv);
7543         }
7544         op_free(c.rv2cv_op);
7545         if (key == METHOD && !PL_lex_allbrackets
7546             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7547         {
7548             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7549         }
7550         return REPORT(key);
7551     }
7552 
7553     /* Not a method, so call it a subroutine (if defined) */
7554 
7555     if (c.cv) {
7556         /* Check for a constant sub */
7557         c.sv = cv_const_sv_or_av(c.cv);
7558         return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7559     }
7560 
7561     /* Call it a bare word */
7562 
7563     if (PL_hints & HINT_STRICT_SUBS)
7564         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7565     else
7566         yyl_strictwarn_bareword(aTHX_ lastchar);
7567 
7568     op_free(c.rv2cv_op);
7569 
7570     return yyl_safe_bareword(aTHX_ s, lastchar);
7571 }
7572 
7573 static int
yyl_word_or_keyword(pTHX_ char * s,STRLEN len,I32 key,I32 orig_keyword,struct code c)7574 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7575 {
7576     switch (key) {
7577     default:			/* not a keyword */
7578         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7579 
7580     case KEY___FILE__:
7581         FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7582 
7583     case KEY___LINE__:
7584         FUN0OP(
7585             newSVOP(OP_CONST, 0,
7586                 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7587         );
7588 
7589     case KEY___PACKAGE__:
7590         FUN0OP(
7591             newSVOP(OP_CONST, 0, (PL_curstash
7592                                      ? newSVhek(HvNAME_HEK(PL_curstash))
7593                                      : &PL_sv_undef))
7594         );
7595 
7596     case KEY___DATA__:
7597     case KEY___END__:
7598         if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7599             yyl_data_handle(aTHX);
7600         return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7601 
7602     case KEY___SUB__:
7603         FUN0OP(CvCLONE(PL_compcv)
7604                     ? newOP(OP_RUNCV, 0)
7605                     : newPVOP(OP_RUNCV,0,NULL));
7606 
7607     case KEY_AUTOLOAD:
7608     case KEY_DESTROY:
7609     case KEY_BEGIN:
7610     case KEY_UNITCHECK:
7611     case KEY_CHECK:
7612     case KEY_INIT:
7613     case KEY_END:
7614         if (PL_expect == XSTATE)
7615             return yyl_sub(aTHX_ PL_bufptr, key);
7616         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7617 
7618     case KEY_abs:
7619         UNI(OP_ABS);
7620 
7621     case KEY_alarm:
7622         UNI(OP_ALARM);
7623 
7624     case KEY_accept:
7625         LOP(OP_ACCEPT,XTERM);
7626 
7627     case KEY_and:
7628         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7629             return REPORT(0);
7630         OPERATOR(ANDOP);
7631 
7632     case KEY_atan2:
7633         LOP(OP_ATAN2,XTERM);
7634 
7635     case KEY_bind:
7636         LOP(OP_BIND,XTERM);
7637 
7638     case KEY_binmode:
7639         LOP(OP_BINMODE,XTERM);
7640 
7641     case KEY_bless:
7642         LOP(OP_BLESS,XTERM);
7643 
7644     case KEY_break:
7645         FUN0(OP_BREAK);
7646 
7647     case KEY_chop:
7648         UNI(OP_CHOP);
7649 
7650     case KEY_continue:
7651         /* We have to disambiguate the two senses of
7652           "continue". If the next token is a '{' then
7653           treat it as the start of a continue block;
7654           otherwise treat it as a control operator.
7655          */
7656         s = skipspace(s);
7657         if (*s == '{')
7658             PREBLOCK(CONTINUE);
7659         else
7660             FUN0(OP_CONTINUE);
7661 
7662     case KEY_chdir:
7663         /* may use HOME */
7664         (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7665         UNI(OP_CHDIR);
7666 
7667     case KEY_close:
7668         UNI(OP_CLOSE);
7669 
7670     case KEY_closedir:
7671         UNI(OP_CLOSEDIR);
7672 
7673     case KEY_cmp:
7674         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7675             return REPORT(0);
7676         NCEop(OP_SCMP);
7677 
7678     case KEY_caller:
7679         UNI(OP_CALLER);
7680 
7681     case KEY_crypt:
7682 #ifdef FCRYPT
7683         if (!PL_cryptseen) {
7684             PL_cryptseen = TRUE;
7685             init_des();
7686         }
7687 #endif
7688         LOP(OP_CRYPT,XTERM);
7689 
7690     case KEY_chmod:
7691         LOP(OP_CHMOD,XTERM);
7692 
7693     case KEY_chown:
7694         LOP(OP_CHOWN,XTERM);
7695 
7696     case KEY_connect:
7697         LOP(OP_CONNECT,XTERM);
7698 
7699     case KEY_chr:
7700         UNI(OP_CHR);
7701 
7702     case KEY_cos:
7703         UNI(OP_COS);
7704 
7705     case KEY_chroot:
7706         UNI(OP_CHROOT);
7707 
7708     case KEY_default:
7709         PREBLOCK(DEFAULT);
7710 
7711     case KEY_do:
7712         return yyl_do(aTHX_ s, orig_keyword);
7713 
7714     case KEY_die:
7715         PL_hints |= HINT_BLOCK_SCOPE;
7716         LOP(OP_DIE,XTERM);
7717 
7718     case KEY_defined:
7719         UNI(OP_DEFINED);
7720 
7721     case KEY_delete:
7722         UNI(OP_DELETE);
7723 
7724     case KEY_dbmopen:
7725         Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7726                           STR_WITH_LEN("NDBM_File::"),
7727                           STR_WITH_LEN("DB_File::"),
7728                           STR_WITH_LEN("GDBM_File::"),
7729                           STR_WITH_LEN("SDBM_File::"),
7730                           STR_WITH_LEN("ODBM_File::"),
7731                           NULL);
7732         LOP(OP_DBMOPEN,XTERM);
7733 
7734     case KEY_dbmclose:
7735         UNI(OP_DBMCLOSE);
7736 
7737     case KEY_dump:
7738         LOOPX(OP_DUMP);
7739 
7740     case KEY_else:
7741         PREBLOCK(ELSE);
7742 
7743     case KEY_elsif:
7744         pl_yylval.ival = CopLINE(PL_curcop);
7745         OPERATOR(ELSIF);
7746 
7747     case KEY_eq:
7748         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7749             return REPORT(0);
7750         ChEop(OP_SEQ);
7751 
7752     case KEY_exists:
7753         UNI(OP_EXISTS);
7754 
7755     case KEY_exit:
7756         UNI(OP_EXIT);
7757 
7758     case KEY_eval:
7759         s = skipspace(s);
7760         if (*s == '{') { /* block eval */
7761             PL_expect = XTERMBLOCK;
7762             UNIBRACK(OP_ENTERTRY);
7763         }
7764         else { /* string eval */
7765             PL_expect = XTERM;
7766             UNIBRACK(OP_ENTEREVAL);
7767         }
7768 
7769     case KEY_evalbytes:
7770         PL_expect = XTERM;
7771         UNIBRACK(-OP_ENTEREVAL);
7772 
7773     case KEY_eof:
7774         UNI(OP_EOF);
7775 
7776     case KEY_exp:
7777         UNI(OP_EXP);
7778 
7779     case KEY_each:
7780         UNI(OP_EACH);
7781 
7782     case KEY_exec:
7783         LOP(OP_EXEC,XREF);
7784 
7785     case KEY_endhostent:
7786         FUN0(OP_EHOSTENT);
7787 
7788     case KEY_endnetent:
7789         FUN0(OP_ENETENT);
7790 
7791     case KEY_endservent:
7792         FUN0(OP_ESERVENT);
7793 
7794     case KEY_endprotoent:
7795         FUN0(OP_EPROTOENT);
7796 
7797     case KEY_endpwent:
7798         FUN0(OP_EPWENT);
7799 
7800     case KEY_endgrent:
7801         FUN0(OP_EGRENT);
7802 
7803     case KEY_for:
7804     case KEY_foreach:
7805         return yyl_foreach(aTHX_ s);
7806 
7807     case KEY_formline:
7808         LOP(OP_FORMLINE,XTERM);
7809 
7810     case KEY_fork:
7811         FUN0(OP_FORK);
7812 
7813     case KEY_fc:
7814         UNI(OP_FC);
7815 
7816     case KEY_fcntl:
7817         LOP(OP_FCNTL,XTERM);
7818 
7819     case KEY_fileno:
7820         UNI(OP_FILENO);
7821 
7822     case KEY_flock:
7823         LOP(OP_FLOCK,XTERM);
7824 
7825     case KEY_gt:
7826         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7827             return REPORT(0);
7828         ChRop(OP_SGT);
7829 
7830     case KEY_ge:
7831         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7832             return REPORT(0);
7833         ChRop(OP_SGE);
7834 
7835     case KEY_grep:
7836         LOP(OP_GREPSTART, XREF);
7837 
7838     case KEY_goto:
7839         LOOPX(OP_GOTO);
7840 
7841     case KEY_gmtime:
7842         UNI(OP_GMTIME);
7843 
7844     case KEY_getc:
7845         UNIDOR(OP_GETC);
7846 
7847     case KEY_getppid:
7848         FUN0(OP_GETPPID);
7849 
7850     case KEY_getpgrp:
7851         UNI(OP_GETPGRP);
7852 
7853     case KEY_getpriority:
7854         LOP(OP_GETPRIORITY,XTERM);
7855 
7856     case KEY_getprotobyname:
7857         UNI(OP_GPBYNAME);
7858 
7859     case KEY_getprotobynumber:
7860         LOP(OP_GPBYNUMBER,XTERM);
7861 
7862     case KEY_getprotoent:
7863         FUN0(OP_GPROTOENT);
7864 
7865     case KEY_getpwent:
7866         FUN0(OP_GPWENT);
7867 
7868     case KEY_getpwnam:
7869         UNI(OP_GPWNAM);
7870 
7871     case KEY_getpwuid:
7872         UNI(OP_GPWUID);
7873 
7874     case KEY_getpeername:
7875         UNI(OP_GETPEERNAME);
7876 
7877     case KEY_gethostbyname:
7878         UNI(OP_GHBYNAME);
7879 
7880     case KEY_gethostbyaddr:
7881         LOP(OP_GHBYADDR,XTERM);
7882 
7883     case KEY_gethostent:
7884         FUN0(OP_GHOSTENT);
7885 
7886     case KEY_getnetbyname:
7887         UNI(OP_GNBYNAME);
7888 
7889     case KEY_getnetbyaddr:
7890         LOP(OP_GNBYADDR,XTERM);
7891 
7892     case KEY_getnetent:
7893         FUN0(OP_GNETENT);
7894 
7895     case KEY_getservbyname:
7896         LOP(OP_GSBYNAME,XTERM);
7897 
7898     case KEY_getservbyport:
7899         LOP(OP_GSBYPORT,XTERM);
7900 
7901     case KEY_getservent:
7902         FUN0(OP_GSERVENT);
7903 
7904     case KEY_getsockname:
7905         UNI(OP_GETSOCKNAME);
7906 
7907     case KEY_getsockopt:
7908         LOP(OP_GSOCKOPT,XTERM);
7909 
7910     case KEY_getgrent:
7911         FUN0(OP_GGRENT);
7912 
7913     case KEY_getgrnam:
7914         UNI(OP_GGRNAM);
7915 
7916     case KEY_getgrgid:
7917         UNI(OP_GGRGID);
7918 
7919     case KEY_getlogin:
7920         FUN0(OP_GETLOGIN);
7921 
7922     case KEY_given:
7923         pl_yylval.ival = CopLINE(PL_curcop);
7924         Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7925                          "given is experimental");
7926         OPERATOR(GIVEN);
7927 
7928     case KEY_glob:
7929         LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
7930 
7931     case KEY_hex:
7932         UNI(OP_HEX);
7933 
7934     case KEY_if:
7935         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7936             return REPORT(0);
7937         pl_yylval.ival = CopLINE(PL_curcop);
7938         OPERATOR(IF);
7939 
7940     case KEY_index:
7941         LOP(OP_INDEX,XTERM);
7942 
7943     case KEY_int:
7944         UNI(OP_INT);
7945 
7946     case KEY_ioctl:
7947         LOP(OP_IOCTL,XTERM);
7948 
7949     case KEY_isa:
7950         Perl_ck_warner_d(aTHX_
7951             packWARN(WARN_EXPERIMENTAL__ISA), "isa is experimental");
7952         NCRop(OP_ISA);
7953 
7954     case KEY_join:
7955         LOP(OP_JOIN,XTERM);
7956 
7957     case KEY_keys:
7958         UNI(OP_KEYS);
7959 
7960     case KEY_kill:
7961         LOP(OP_KILL,XTERM);
7962 
7963     case KEY_last:
7964         LOOPX(OP_LAST);
7965 
7966     case KEY_lc:
7967         UNI(OP_LC);
7968 
7969     case KEY_lcfirst:
7970         UNI(OP_LCFIRST);
7971 
7972     case KEY_local:
7973         OPERATOR(LOCAL);
7974 
7975     case KEY_length:
7976         UNI(OP_LENGTH);
7977 
7978     case KEY_lt:
7979         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7980             return REPORT(0);
7981         ChRop(OP_SLT);
7982 
7983     case KEY_le:
7984         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7985             return REPORT(0);
7986         ChRop(OP_SLE);
7987 
7988     case KEY_localtime:
7989         UNI(OP_LOCALTIME);
7990 
7991     case KEY_log:
7992         UNI(OP_LOG);
7993 
7994     case KEY_link:
7995         LOP(OP_LINK,XTERM);
7996 
7997     case KEY_listen:
7998         LOP(OP_LISTEN,XTERM);
7999 
8000     case KEY_lock:
8001         UNI(OP_LOCK);
8002 
8003     case KEY_lstat:
8004         UNI(OP_LSTAT);
8005 
8006     case KEY_m:
8007         s = scan_pat(s,OP_MATCH);
8008         TERM(sublex_start());
8009 
8010     case KEY_map:
8011         LOP(OP_MAPSTART, XREF);
8012 
8013     case KEY_mkdir:
8014         LOP(OP_MKDIR,XTERM);
8015 
8016     case KEY_msgctl:
8017         LOP(OP_MSGCTL,XTERM);
8018 
8019     case KEY_msgget:
8020         LOP(OP_MSGGET,XTERM);
8021 
8022     case KEY_msgrcv:
8023         LOP(OP_MSGRCV,XTERM);
8024 
8025     case KEY_msgsnd:
8026         LOP(OP_MSGSND,XTERM);
8027 
8028     case KEY_our:
8029     case KEY_my:
8030     case KEY_state:
8031         return yyl_my(aTHX_ s, key);
8032 
8033     case KEY_next:
8034         LOOPX(OP_NEXT);
8035 
8036     case KEY_ne:
8037         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8038             return REPORT(0);
8039         ChEop(OP_SNE);
8040 
8041     case KEY_no:
8042         s = tokenize_use(0, s);
8043         TOKEN(USE);
8044 
8045     case KEY_not:
8046         if (*s == '(' || (s = skipspace(s), *s == '('))
8047             FUN1(OP_NOT);
8048         else {
8049             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8050                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8051             OPERATOR(NOTOP);
8052         }
8053 
8054     case KEY_open:
8055         s = skipspace(s);
8056         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8057             const char *t;
8058             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8059             for (t=d; isSPACE(*t);)
8060                 t++;
8061             if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8062                 /* [perl #16184] */
8063                 && !(t[0] == '=' && t[1] == '>')
8064                 && !(t[0] == ':' && t[1] == ':')
8065                 && !keyword(s, d-s, 0)
8066             ) {
8067                 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8068                    "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8069                     UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8070             }
8071         }
8072         LOP(OP_OPEN,XTERM);
8073 
8074     case KEY_or:
8075         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8076             return REPORT(0);
8077         pl_yylval.ival = OP_OR;
8078         OPERATOR(OROP);
8079 
8080     case KEY_ord:
8081         UNI(OP_ORD);
8082 
8083     case KEY_oct:
8084         UNI(OP_OCT);
8085 
8086     case KEY_opendir:
8087         LOP(OP_OPEN_DIR,XTERM);
8088 
8089     case KEY_print:
8090         checkcomma(s,PL_tokenbuf,"filehandle");
8091         LOP(OP_PRINT,XREF);
8092 
8093     case KEY_printf:
8094         checkcomma(s,PL_tokenbuf,"filehandle");
8095         LOP(OP_PRTF,XREF);
8096 
8097     case KEY_prototype:
8098         UNI(OP_PROTOTYPE);
8099 
8100     case KEY_push:
8101         LOP(OP_PUSH,XTERM);
8102 
8103     case KEY_pop:
8104         UNIDOR(OP_POP);
8105 
8106     case KEY_pos:
8107         UNIDOR(OP_POS);
8108 
8109     case KEY_pack:
8110         LOP(OP_PACK,XTERM);
8111 
8112     case KEY_package:
8113         s = force_word(s,BAREWORD,FALSE,TRUE);
8114         s = skipspace(s);
8115         s = force_strict_version(s);
8116         PREBLOCK(PACKAGE);
8117 
8118     case KEY_pipe:
8119         LOP(OP_PIPE_OP,XTERM);
8120 
8121     case KEY_q:
8122         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8123         if (!s)
8124             missingterm(NULL, 0);
8125         COPLINE_SET_FROM_MULTI_END;
8126         pl_yylval.ival = OP_CONST;
8127         TERM(sublex_start());
8128 
8129     case KEY_quotemeta:
8130         UNI(OP_QUOTEMETA);
8131 
8132     case KEY_qw:
8133         return yyl_qw(aTHX_ s, len);
8134 
8135     case KEY_qq:
8136         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8137         if (!s)
8138             missingterm(NULL, 0);
8139         pl_yylval.ival = OP_STRINGIFY;
8140         if (SvIVX(PL_lex_stuff) == '\'')
8141             SvIV_set(PL_lex_stuff, 0);	/* qq'$foo' should interpolate */
8142         TERM(sublex_start());
8143 
8144     case KEY_qr:
8145         s = scan_pat(s,OP_QR);
8146         TERM(sublex_start());
8147 
8148     case KEY_qx:
8149         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8150         if (!s)
8151             missingterm(NULL, 0);
8152         pl_yylval.ival = OP_BACKTICK;
8153         TERM(sublex_start());
8154 
8155     case KEY_return:
8156         OLDLOP(OP_RETURN);
8157 
8158     case KEY_require:
8159         return yyl_require(aTHX_ s, orig_keyword);
8160 
8161     case KEY_reset:
8162         UNI(OP_RESET);
8163 
8164     case KEY_redo:
8165         LOOPX(OP_REDO);
8166 
8167     case KEY_rename:
8168         LOP(OP_RENAME,XTERM);
8169 
8170     case KEY_rand:
8171         UNI(OP_RAND);
8172 
8173     case KEY_rmdir:
8174         UNI(OP_RMDIR);
8175 
8176     case KEY_rindex:
8177         LOP(OP_RINDEX,XTERM);
8178 
8179     case KEY_read:
8180         LOP(OP_READ,XTERM);
8181 
8182     case KEY_readdir:
8183         UNI(OP_READDIR);
8184 
8185     case KEY_readline:
8186         UNIDOR(OP_READLINE);
8187 
8188     case KEY_readpipe:
8189         UNIDOR(OP_BACKTICK);
8190 
8191     case KEY_rewinddir:
8192         UNI(OP_REWINDDIR);
8193 
8194     case KEY_recv:
8195         LOP(OP_RECV,XTERM);
8196 
8197     case KEY_reverse:
8198         LOP(OP_REVERSE,XTERM);
8199 
8200     case KEY_readlink:
8201         UNIDOR(OP_READLINK);
8202 
8203     case KEY_ref:
8204         UNI(OP_REF);
8205 
8206     case KEY_s:
8207         s = scan_subst(s);
8208         if (pl_yylval.opval)
8209             TERM(sublex_start());
8210         else
8211             TOKEN(1);	/* force error */
8212 
8213     case KEY_say:
8214         checkcomma(s,PL_tokenbuf,"filehandle");
8215         LOP(OP_SAY,XREF);
8216 
8217     case KEY_chomp:
8218         UNI(OP_CHOMP);
8219 
8220     case KEY_scalar:
8221         UNI(OP_SCALAR);
8222 
8223     case KEY_select:
8224         LOP(OP_SELECT,XTERM);
8225 
8226     case KEY_seek:
8227         LOP(OP_SEEK,XTERM);
8228 
8229     case KEY_semctl:
8230         LOP(OP_SEMCTL,XTERM);
8231 
8232     case KEY_semget:
8233         LOP(OP_SEMGET,XTERM);
8234 
8235     case KEY_semop:
8236         LOP(OP_SEMOP,XTERM);
8237 
8238     case KEY_send:
8239         LOP(OP_SEND,XTERM);
8240 
8241     case KEY_setpgrp:
8242         LOP(OP_SETPGRP,XTERM);
8243 
8244     case KEY_setpriority:
8245         LOP(OP_SETPRIORITY,XTERM);
8246 
8247     case KEY_sethostent:
8248         UNI(OP_SHOSTENT);
8249 
8250     case KEY_setnetent:
8251         UNI(OP_SNETENT);
8252 
8253     case KEY_setservent:
8254         UNI(OP_SSERVENT);
8255 
8256     case KEY_setprotoent:
8257         UNI(OP_SPROTOENT);
8258 
8259     case KEY_setpwent:
8260         FUN0(OP_SPWENT);
8261 
8262     case KEY_setgrent:
8263         FUN0(OP_SGRENT);
8264 
8265     case KEY_seekdir:
8266         LOP(OP_SEEKDIR,XTERM);
8267 
8268     case KEY_setsockopt:
8269         LOP(OP_SSOCKOPT,XTERM);
8270 
8271     case KEY_shift:
8272         UNIDOR(OP_SHIFT);
8273 
8274     case KEY_shmctl:
8275         LOP(OP_SHMCTL,XTERM);
8276 
8277     case KEY_shmget:
8278         LOP(OP_SHMGET,XTERM);
8279 
8280     case KEY_shmread:
8281         LOP(OP_SHMREAD,XTERM);
8282 
8283     case KEY_shmwrite:
8284         LOP(OP_SHMWRITE,XTERM);
8285 
8286     case KEY_shutdown:
8287         LOP(OP_SHUTDOWN,XTERM);
8288 
8289     case KEY_sin:
8290         UNI(OP_SIN);
8291 
8292     case KEY_sleep:
8293         UNI(OP_SLEEP);
8294 
8295     case KEY_socket:
8296         LOP(OP_SOCKET,XTERM);
8297 
8298     case KEY_socketpair:
8299         LOP(OP_SOCKPAIR,XTERM);
8300 
8301     case KEY_sort:
8302         checkcomma(s,PL_tokenbuf,"subroutine name");
8303         s = skipspace(s);
8304         PL_expect = XTERM;
8305         s = force_word(s,BAREWORD,TRUE,TRUE);
8306         LOP(OP_SORT,XREF);
8307 
8308     case KEY_split:
8309         LOP(OP_SPLIT,XTERM);
8310 
8311     case KEY_sprintf:
8312         LOP(OP_SPRINTF,XTERM);
8313 
8314     case KEY_splice:
8315         LOP(OP_SPLICE,XTERM);
8316 
8317     case KEY_sqrt:
8318         UNI(OP_SQRT);
8319 
8320     case KEY_srand:
8321         UNI(OP_SRAND);
8322 
8323     case KEY_stat:
8324         UNI(OP_STAT);
8325 
8326     case KEY_study:
8327         UNI(OP_STUDY);
8328 
8329     case KEY_substr:
8330         LOP(OP_SUBSTR,XTERM);
8331 
8332     case KEY_format:
8333     case KEY_sub:
8334         return yyl_sub(aTHX_ s, key);
8335 
8336     case KEY_system:
8337         LOP(OP_SYSTEM,XREF);
8338 
8339     case KEY_symlink:
8340         LOP(OP_SYMLINK,XTERM);
8341 
8342     case KEY_syscall:
8343         LOP(OP_SYSCALL,XTERM);
8344 
8345     case KEY_sysopen:
8346         LOP(OP_SYSOPEN,XTERM);
8347 
8348     case KEY_sysseek:
8349         LOP(OP_SYSSEEK,XTERM);
8350 
8351     case KEY_sysread:
8352         LOP(OP_SYSREAD,XTERM);
8353 
8354     case KEY_syswrite:
8355         LOP(OP_SYSWRITE,XTERM);
8356 
8357     case KEY_tr:
8358     case KEY_y:
8359         s = scan_trans(s);
8360         TERM(sublex_start());
8361 
8362     case KEY_tell:
8363         UNI(OP_TELL);
8364 
8365     case KEY_telldir:
8366         UNI(OP_TELLDIR);
8367 
8368     case KEY_tie:
8369         LOP(OP_TIE,XTERM);
8370 
8371     case KEY_tied:
8372         UNI(OP_TIED);
8373 
8374     case KEY_time:
8375         FUN0(OP_TIME);
8376 
8377     case KEY_times:
8378         FUN0(OP_TMS);
8379 
8380     case KEY_truncate:
8381         LOP(OP_TRUNCATE,XTERM);
8382 
8383     case KEY_uc:
8384         UNI(OP_UC);
8385 
8386     case KEY_ucfirst:
8387         UNI(OP_UCFIRST);
8388 
8389     case KEY_untie:
8390         UNI(OP_UNTIE);
8391 
8392     case KEY_until:
8393         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8394             return REPORT(0);
8395         pl_yylval.ival = CopLINE(PL_curcop);
8396         OPERATOR(UNTIL);
8397 
8398     case KEY_unless:
8399         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8400             return REPORT(0);
8401         pl_yylval.ival = CopLINE(PL_curcop);
8402         OPERATOR(UNLESS);
8403 
8404     case KEY_unlink:
8405         LOP(OP_UNLINK,XTERM);
8406 
8407     case KEY_undef:
8408         UNIDOR(OP_UNDEF);
8409 
8410     case KEY_unpack:
8411         LOP(OP_UNPACK,XTERM);
8412 
8413     case KEY_utime:
8414         LOP(OP_UTIME,XTERM);
8415 
8416     case KEY_umask:
8417         UNIDOR(OP_UMASK);
8418 
8419     case KEY_unshift:
8420         LOP(OP_UNSHIFT,XTERM);
8421 
8422     case KEY_use:
8423         s = tokenize_use(1, s);
8424         TOKEN(USE);
8425 
8426     case KEY_values:
8427         UNI(OP_VALUES);
8428 
8429     case KEY_vec:
8430         LOP(OP_VEC,XTERM);
8431 
8432     case KEY_when:
8433         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8434             return REPORT(0);
8435         pl_yylval.ival = CopLINE(PL_curcop);
8436         Perl_ck_warner_d(aTHX_
8437             packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8438             "when is experimental");
8439         OPERATOR(WHEN);
8440 
8441     case KEY_while:
8442         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8443             return REPORT(0);
8444         pl_yylval.ival = CopLINE(PL_curcop);
8445         OPERATOR(WHILE);
8446 
8447     case KEY_warn:
8448         PL_hints |= HINT_BLOCK_SCOPE;
8449         LOP(OP_WARN,XTERM);
8450 
8451     case KEY_wait:
8452         FUN0(OP_WAIT);
8453 
8454     case KEY_waitpid:
8455         LOP(OP_WAITPID,XTERM);
8456 
8457     case KEY_wantarray:
8458         FUN0(OP_WANTARRAY);
8459 
8460     case KEY_write:
8461         /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8462          * we use the same number on EBCDIC */
8463         gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8464         UNI(OP_ENTERWRITE);
8465 
8466     case KEY_x:
8467         if (PL_expect == XOPERATOR) {
8468             if (*s == '=' && !PL_lex_allbrackets
8469                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8470             {
8471                 return REPORT(0);
8472             }
8473             Mop(OP_REPEAT);
8474         }
8475         check_uni();
8476         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8477 
8478     case KEY_xor:
8479         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8480             return REPORT(0);
8481         pl_yylval.ival = OP_XOR;
8482         OPERATOR(OROP);
8483     }
8484 }
8485 
8486 static int
yyl_key_core(pTHX_ char * s,STRLEN len,struct code c)8487 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8488 {
8489     I32 key = 0;
8490     I32 orig_keyword = 0;
8491     STRLEN olen = len;
8492     char *d = s;
8493     s += 2;
8494     s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8495     if ((*s == ':' && s[1] == ':')
8496         || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8497     {
8498         Copy(PL_bufptr, PL_tokenbuf, olen, char);
8499         return yyl_just_a_word(aTHX_ d, olen, 0, c);
8500     }
8501     if (!key)
8502         Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8503                           UTF8fARG(UTF, len, PL_tokenbuf));
8504     if (key < 0)
8505         key = -key;
8506     else if (key == KEY_require || key == KEY_do
8507           || key == KEY_glob)
8508         /* that's a way to remember we saw "CORE::" */
8509         orig_keyword = key;
8510 
8511     /* Known to be a reserved word at this point */
8512     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8513 }
8514 
8515 static int
yyl_keylookup(pTHX_ char * s,GV * gv)8516 yyl_keylookup(pTHX_ char *s, GV *gv)
8517 {
8518     dVAR;
8519     STRLEN len;
8520     bool anydelim;
8521     I32 key;
8522     struct code c = no_code;
8523     I32 orig_keyword = 0;
8524     char *d;
8525 
8526     c.gv = gv;
8527 
8528     PL_bufptr = s;
8529     s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8530 
8531     /* Some keywords can be followed by any delimiter, including ':' */
8532     anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8533 
8534     /* x::* is just a word, unless x is "CORE" */
8535     if (!anydelim && *s == ':' && s[1] == ':') {
8536         if (memEQs(PL_tokenbuf, len, "CORE"))
8537             return yyl_key_core(aTHX_ s, len, c);
8538         return yyl_just_a_word(aTHX_ s, len, 0, c);
8539     }
8540 
8541     d = s;
8542     while (d < PL_bufend && isSPACE(*d))
8543             d++;	/* no comments skipped here, or s### is misparsed */
8544 
8545     /* Is this a word before a => operator? */
8546     if (*d == '=' && d[1] == '>') {
8547         return yyl_fatcomma(aTHX_ s, len);
8548     }
8549 
8550     /* Check for plugged-in keyword */
8551     {
8552         OP *o;
8553         int result;
8554         char *saved_bufptr = PL_bufptr;
8555         PL_bufptr = s;
8556         result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8557         s = PL_bufptr;
8558         if (result == KEYWORD_PLUGIN_DECLINE) {
8559             /* not a plugged-in keyword */
8560             PL_bufptr = saved_bufptr;
8561         } else if (result == KEYWORD_PLUGIN_STMT) {
8562             pl_yylval.opval = o;
8563             CLINE;
8564             if (!PL_nexttoke) PL_expect = XSTATE;
8565             return REPORT(PLUGSTMT);
8566         } else if (result == KEYWORD_PLUGIN_EXPR) {
8567             pl_yylval.opval = o;
8568             CLINE;
8569             if (!PL_nexttoke) PL_expect = XOPERATOR;
8570             return REPORT(PLUGEXPR);
8571         } else {
8572             Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8573         }
8574     }
8575 
8576     /* Is this a label? */
8577     if (!anydelim && PL_expect == XSTATE
8578           && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8579         s = d + 1;
8580         pl_yylval.opval =
8581             newSVOP(OP_CONST, 0,
8582                 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8583         CLINE;
8584         TOKEN(LABEL);
8585     }
8586 
8587     /* Check for lexical sub */
8588     if (PL_expect != XOPERATOR) {
8589         char tmpbuf[sizeof PL_tokenbuf + 1];
8590         *tmpbuf = '&';
8591         Copy(PL_tokenbuf, tmpbuf+1, len, char);
8592         c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8593         if (c.off != NOT_IN_PAD) {
8594             assert(c.off); /* we assume this is boolean-true below */
8595             if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8596                 HV *  const stash = PAD_COMPNAME_OURSTASH(c.off);
8597                 HEK * const stashname = HvNAME_HEK(stash);
8598                 c.sv = newSVhek(stashname);
8599                 sv_catpvs(c.sv, "::");
8600                 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8601                                 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8602                 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8603                                   SVt_PVCV);
8604                 c.off = 0;
8605                 if (!c.gv) {
8606                     sv_free(c.sv);
8607                     c.sv = NULL;
8608                     return yyl_just_a_word(aTHX_ s, len, 0, c);
8609                 }
8610             }
8611             else {
8612                 c.rv2cv_op = newOP(OP_PADANY, 0);
8613                 c.rv2cv_op->op_targ = c.off;
8614                 c.cv = find_lexical_cv(c.off);
8615             }
8616             c.lex = TRUE;
8617             return yyl_just_a_word(aTHX_ s, len, 0, c);
8618         }
8619         c.off = 0;
8620     }
8621 
8622     /* Check for built-in keyword */
8623     key = keyword(PL_tokenbuf, len, 0);
8624 
8625     if (key < 0)
8626         key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8627 
8628     if (key && key != KEY___DATA__ && key != KEY___END__
8629      && (!anydelim || *s != '#')) {
8630         /* no override, and not s### either; skipspace is safe here
8631          * check for => on following line */
8632         bool arrow;
8633         STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8634         STRLEN   soff = s         - SvPVX(PL_linestr);
8635         s = peekspace(s);
8636         arrow = *s == '=' && s[1] == '>';
8637         PL_bufptr = SvPVX(PL_linestr) + bufoff;
8638         s         = SvPVX(PL_linestr) +   soff;
8639         if (arrow)
8640             return yyl_fatcomma(aTHX_ s, len);
8641     }
8642 
8643     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8644 }
8645 
8646 static int
yyl_try(pTHX_ char * s)8647 yyl_try(pTHX_ char *s)
8648 {
8649     char *d;
8650     GV *gv = NULL;
8651     int tok;
8652 
8653   retry:
8654     switch (*s) {
8655     default:
8656         if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
8657             if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
8658                 return tok;
8659             goto retry_bufptr;
8660         }
8661         yyl_croak_unrecognised(aTHX_ s);
8662 
8663     case 4:
8664     case 26:
8665         /* emulate EOF on ^D or ^Z */
8666         if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
8667             return tok;
8668     retry_bufptr:
8669         s = PL_bufptr;
8670         goto retry;
8671 
8672     case 0:
8673 	if ((!PL_rsfp || PL_lex_inwhat)
8674 	 && (!PL_parser->filtered || s+1 < PL_bufend)) {
8675 	    PL_last_uni = 0;
8676 	    PL_last_lop = 0;
8677 	    if (PL_lex_brackets
8678                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8679             {
8680 		yyerror((const char *)
8681 			(PL_lex_formbrack
8682 			 ? "Format not terminated"
8683 			 : "Missing right curly or square bracket"));
8684 	    }
8685             DEBUG_T({
8686                 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8687             });
8688 	    TOKEN(0);
8689 	}
8690 	if (s++ < PL_bufend)
8691 	    goto retry;  /* ignore stray nulls */
8692 	PL_last_uni = 0;
8693 	PL_last_lop = 0;
8694 	if (!PL_in_eval && !PL_preambled) {
8695 	    PL_preambled = TRUE;
8696 	    if (PL_perldb) {
8697 		/* Generate a string of Perl code to load the debugger.
8698 		 * If PERL5DB is set, it will return the contents of that,
8699 		 * otherwise a compile-time require of perl5db.pl.  */
8700 
8701 		const char * const pdb = PerlEnv_getenv("PERL5DB");
8702 
8703 		if (pdb) {
8704 		    sv_setpv(PL_linestr, pdb);
8705 		    sv_catpvs(PL_linestr,";");
8706 		} else {
8707 		    SETERRNO(0,SS_NORMAL);
8708 		    sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8709 		}
8710 		PL_parser->preambling = CopLINE(PL_curcop);
8711 	    } else
8712                 SvPVCLEAR(PL_linestr);
8713 	    if (PL_preambleav) {
8714 		SV **svp = AvARRAY(PL_preambleav);
8715 		SV **const end = svp + AvFILLp(PL_preambleav);
8716 		while(svp <= end) {
8717 		    sv_catsv(PL_linestr, *svp);
8718 		    ++svp;
8719 		    sv_catpvs(PL_linestr, ";");
8720 		}
8721 		sv_free(MUTABLE_SV(PL_preambleav));
8722 		PL_preambleav = NULL;
8723 	    }
8724 	    if (PL_minus_E)
8725 		sv_catpvs(PL_linestr,
8726 			  "use feature ':5." STRINGIFY(PERL_VERSION) "';");
8727 	    if (PL_minus_n || PL_minus_p) {
8728 		sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8729 		if (PL_minus_l)
8730 		    sv_catpvs(PL_linestr,"chomp;");
8731 		if (PL_minus_a) {
8732 		    if (PL_minus_F) {
8733                         if (   (   *PL_splitstr == '/'
8734                                 || *PL_splitstr == '\''
8735                                 || *PL_splitstr == '"')
8736                             && strchr(PL_splitstr + 1, *PL_splitstr))
8737                         {
8738                             /* strchr is ok, because -F pattern can't contain
8739                              * embeddded NULs */
8740 			    Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8741                         }
8742 			else {
8743 			    /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8744 			       bytes can be used as quoting characters.  :-) */
8745 			    const char *splits = PL_splitstr;
8746 			    sv_catpvs(PL_linestr, "our @F=split(q\0");
8747 			    do {
8748 				/* Need to \ \s  */
8749 				if (*splits == '\\')
8750 				    sv_catpvn(PL_linestr, splits, 1);
8751 				sv_catpvn(PL_linestr, splits, 1);
8752 			    } while (*splits++);
8753 			    /* This loop will embed the trailing NUL of
8754 			       PL_linestr as the last thing it does before
8755 			       terminating.  */
8756 			    sv_catpvs(PL_linestr, ");");
8757 			}
8758 		    }
8759 		    else
8760 		        sv_catpvs(PL_linestr,"our @F=split(' ');");
8761 		}
8762 	    }
8763 	    sv_catpvs(PL_linestr, "\n");
8764 	    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8765 	    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8766 	    PL_last_lop = PL_last_uni = NULL;
8767 	    if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8768 		update_debugger_info(PL_linestr, NULL, 0);
8769 	    goto retry;
8770 	}
8771         if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
8772             return tok;
8773         goto retry_bufptr;
8774 
8775     case '\r':
8776 #ifdef PERL_STRICT_CR
8777 	Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8778 	Perl_croak(aTHX_
8779       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8780 #endif
8781     case ' ': case '\t': case '\f': case '\v':
8782 	s++;
8783 	goto retry;
8784 
8785     case '#':
8786     case '\n': {
8787         const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8788         if (needs_semicolon)
8789             TOKEN(';');
8790         else
8791             goto retry;
8792     }
8793 
8794     case '-':
8795         return yyl_hyphen(aTHX_ s);
8796 
8797     case '+':
8798         return yyl_plus(aTHX_ s);
8799 
8800     case '*':
8801         return yyl_star(aTHX_ s);
8802 
8803     case '%':
8804         return yyl_percent(aTHX_ s);
8805 
8806     case '^':
8807         return yyl_caret(aTHX_ s);
8808 
8809     case '[':
8810         return yyl_leftsquare(aTHX_ s);
8811 
8812     case '~':
8813         return yyl_tilde(aTHX_ s);
8814 
8815     case ',':
8816 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8817 	    TOKEN(0);
8818 	s++;
8819 	OPERATOR(',');
8820     case ':':
8821 	if (s[1] == ':')
8822             return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
8823         return yyl_colon(aTHX_ s + 1);
8824 
8825     case '(':
8826         return yyl_leftparen(aTHX_ s + 1);
8827 
8828     case ';':
8829 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8830 	    TOKEN(0);
8831 	CLINE;
8832 	s++;
8833 	PL_expect = XSTATE;
8834 	TOKEN(';');
8835 
8836     case ')':
8837         return yyl_rightparen(aTHX_ s);
8838 
8839     case ']':
8840         return yyl_rightsquare(aTHX_ s);
8841 
8842     case '{':
8843         return yyl_leftcurly(aTHX_ s + 1, 0);
8844 
8845     case '}':
8846 	if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
8847 	    TOKEN(0);
8848         return yyl_rightcurly(aTHX_ s, 0);
8849 
8850     case '&':
8851         return yyl_ampersand(aTHX_ s);
8852 
8853     case '|':
8854         return yyl_verticalbar(aTHX_ s);
8855 
8856     case '=':
8857         if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
8858             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
8859         {
8860             s = vcs_conflict_marker(s + 7);
8861             goto retry;
8862         }
8863 
8864 	s++;
8865 	{
8866 	    const char tmp = *s++;
8867 	    if (tmp == '=') {
8868 		if (!PL_lex_allbrackets
8869                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8870                 {
8871 		    s -= 2;
8872 		    TOKEN(0);
8873 		}
8874 		ChEop(OP_EQ);
8875 	    }
8876 	    if (tmp == '>') {
8877 		if (!PL_lex_allbrackets
8878                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8879                 {
8880 		    s -= 2;
8881 		    TOKEN(0);
8882 		}
8883 		OPERATOR(',');
8884 	    }
8885 	    if (tmp == '~')
8886 		PMop(OP_MATCH);
8887 	    if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
8888 		&& memCHRs("+-*/%.^&|<",tmp))
8889 		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8890 			    "Reversed %c= operator",(int)tmp);
8891 	    s--;
8892 	    if (PL_expect == XSTATE
8893                 && isALPHA(tmp)
8894                 && (s == PL_linestart+1 || s[-2] == '\n') )
8895             {
8896                 if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
8897                     || PL_lex_state != LEX_NORMAL)
8898                 {
8899                     d = PL_bufend;
8900                     while (s < d) {
8901                         if (*s++ == '\n') {
8902                             incline(s, PL_bufend);
8903                             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
8904                             {
8905                                 s = (char *) memchr(s,'\n', d - s);
8906                                 if (s)
8907                                     s++;
8908                                 else
8909                                     s = d;
8910                                 incline(s, PL_bufend);
8911                                 goto retry;
8912                             }
8913                         }
8914                     }
8915                     goto retry;
8916                 }
8917                 s = PL_bufend;
8918                 PL_parser->in_pod = 1;
8919                 goto retry;
8920             }
8921 	}
8922 	if (PL_expect == XBLOCK) {
8923 	    const char *t = s;
8924 #ifdef PERL_STRICT_CR
8925 	    while (SPACE_OR_TAB(*t))
8926 #else
8927 	    while (SPACE_OR_TAB(*t) || *t == '\r')
8928 #endif
8929 		t++;
8930 	    if (*t == '\n' || *t == '#') {
8931 		ENTER_with_name("lex_format");
8932 		SAVEI8(PL_parser->form_lex_state);
8933 		SAVEI32(PL_lex_formbrack);
8934 		PL_parser->form_lex_state = PL_lex_state;
8935 		PL_lex_formbrack = PL_lex_brackets + 1;
8936                 PL_parser->sub_error_count = PL_error_count;
8937                 return yyl_leftcurly(aTHX_ s, 1);
8938 	    }
8939 	}
8940 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
8941 	    s--;
8942 	    TOKEN(0);
8943 	}
8944 	pl_yylval.ival = 0;
8945 	OPERATOR(ASSIGNOP);
8946 
8947     case '!':
8948         return yyl_bang(aTHX_ s + 1);
8949 
8950     case '<':
8951         if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
8952             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
8953         {
8954             s = vcs_conflict_marker(s + 7);
8955             goto retry;
8956         }
8957         return yyl_leftpointy(aTHX_ s);
8958 
8959     case '>':
8960         if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
8961             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
8962         {
8963             s = vcs_conflict_marker(s + 7);
8964             goto retry;
8965         }
8966         return yyl_rightpointy(aTHX_ s + 1);
8967 
8968     case '$':
8969         return yyl_dollar(aTHX_ s);
8970 
8971     case '@':
8972         return yyl_snail(aTHX_ s);
8973 
8974     case '/':			/* may be division, defined-or, or pattern */
8975         return yyl_slash(aTHX_ s);
8976 
8977      case '?':			/* conditional */
8978 	s++;
8979 	if (!PL_lex_allbrackets
8980             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
8981         {
8982 	    s--;
8983 	    TOKEN(0);
8984 	}
8985 	PL_lex_allbrackets++;
8986 	OPERATOR('?');
8987 
8988     case '.':
8989 	if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
8990 #ifdef PERL_STRICT_CR
8991 	    && s[1] == '\n'
8992 #else
8993 	    && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
8994 #endif
8995 	    && (s == PL_linestart || s[-1] == '\n') )
8996 	{
8997 	    PL_expect = XSTATE;
8998             /* formbrack==2 means dot seen where arguments expected */
8999             return yyl_rightcurly(aTHX_ s, 2);
9000 	}
9001 	if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9002 	    s += 3;
9003 	    OPERATOR(YADAYADA);
9004 	}
9005 	if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9006 	    char tmp = *s++;
9007 	    if (*s == tmp) {
9008 		if (!PL_lex_allbrackets
9009                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9010                 {
9011 		    s--;
9012 		    TOKEN(0);
9013 		}
9014 		s++;
9015 		if (*s == tmp) {
9016 		    s++;
9017 		    pl_yylval.ival = OPf_SPECIAL;
9018 		}
9019 		else
9020 		    pl_yylval.ival = 0;
9021 		OPERATOR(DOTDOT);
9022 	    }
9023 	    if (*s == '=' && !PL_lex_allbrackets
9024                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9025             {
9026 		s--;
9027 		TOKEN(0);
9028 	    }
9029 	    Aop(OP_CONCAT);
9030 	}
9031 	/* FALLTHROUGH */
9032     case '0': case '1': case '2': case '3': case '4':
9033     case '5': case '6': case '7': case '8': case '9':
9034 	s = scan_num(s, &pl_yylval);
9035 	DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9036 	if (PL_expect == XOPERATOR)
9037 	    no_op("Number",s);
9038 	TERM(THING);
9039 
9040     case '\'':
9041         return yyl_sglquote(aTHX_ s);
9042 
9043     case '"':
9044         return yyl_dblquote(aTHX_ s);
9045 
9046     case '`':
9047         return yyl_backtick(aTHX_ s);
9048 
9049     case '\\':
9050         return yyl_backslash(aTHX_ s + 1);
9051 
9052     case 'v':
9053 	if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9054 	    char *start = s + 2;
9055 	    while (isDIGIT(*start) || *start == '_')
9056 		start++;
9057 	    if (*start == '.' && isDIGIT(start[1])) {
9058 		s = scan_num(s, &pl_yylval);
9059 		TERM(THING);
9060 	    }
9061 	    else if ((*start == ':' && start[1] == ':')
9062                      || (PL_expect == XSTATE && *start == ':')) {
9063                 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9064                     return tok;
9065                 goto retry_bufptr;
9066             }
9067 	    else if (PL_expect == XSTATE) {
9068 		d = start;
9069 		while (d < PL_bufend && isSPACE(*d)) d++;
9070 		if (*d == ':') {
9071                     if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9072                         return tok;
9073                     goto retry_bufptr;
9074                 }
9075 	    }
9076 	    /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9077 	    if (!isALPHA(*start) && (PL_expect == XTERM
9078 			|| PL_expect == XREF || PL_expect == XSTATE
9079 			|| PL_expect == XTERMORDORDOR)) {
9080 		GV *const gv = gv_fetchpvn_flags(s, start - s,
9081                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
9082 		if (!gv) {
9083 		    s = scan_num(s, &pl_yylval);
9084 		    TERM(THING);
9085 		}
9086 	    }
9087 	}
9088         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9089             return tok;
9090         goto retry_bufptr;
9091 
9092     case 'x':
9093 	if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9094 	    s++;
9095 	    Mop(OP_REPEAT);
9096 	}
9097         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9098             return tok;
9099         goto retry_bufptr;
9100 
9101     case '_':
9102     case 'a': case 'A':
9103     case 'b': case 'B':
9104     case 'c': case 'C':
9105     case 'd': case 'D':
9106     case 'e': case 'E':
9107     case 'f': case 'F':
9108     case 'g': case 'G':
9109     case 'h': case 'H':
9110     case 'i': case 'I':
9111     case 'j': case 'J':
9112     case 'k': case 'K':
9113     case 'l': case 'L':
9114     case 'm': case 'M':
9115     case 'n': case 'N':
9116     case 'o': case 'O':
9117     case 'p': case 'P':
9118     case 'q': case 'Q':
9119     case 'r': case 'R':
9120     case 's': case 'S':
9121     case 't': case 'T':
9122     case 'u': case 'U':
9123 	      case 'V':
9124     case 'w': case 'W':
9125 	      case 'X':
9126     case 'y': case 'Y':
9127     case 'z': case 'Z':
9128         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9129             return tok;
9130         goto retry_bufptr;
9131     }
9132 }
9133 
9134 
9135 /*
9136   yylex
9137 
9138   Works out what to call the token just pulled out of the input
9139   stream.  The yacc parser takes care of taking the ops we return and
9140   stitching them into a tree.
9141 
9142   Returns:
9143     The type of the next token
9144 
9145   Structure:
9146       Check if we have already built the token; if so, use it.
9147       Switch based on the current state:
9148 	  - if we have a case modifier in a string, deal with that
9149 	  - handle other cases of interpolation inside a string
9150 	  - scan the next line if we are inside a format
9151       In the normal state, switch on the next character:
9152 	  - default:
9153 	    if alphabetic, go to key lookup
9154 	    unrecognized character - croak
9155 	  - 0/4/26: handle end-of-line or EOF
9156 	  - cases for whitespace
9157 	  - \n and #: handle comments and line numbers
9158 	  - various operators, brackets and sigils
9159 	  - numbers
9160 	  - quotes
9161 	  - 'v': vstrings (or go to key lookup)
9162 	  - 'x' repetition operator (or go to key lookup)
9163 	  - other ASCII alphanumerics (key lookup begins here):
9164 	      word before => ?
9165 	      keyword plugin
9166 	      scan built-in keyword (but do nothing with it yet)
9167 	      check for statement label
9168 	      check for lexical subs
9169 		  return yyl_just_a_word if there is one
9170 	      see whether built-in keyword is overridden
9171 	      switch on keyword number:
9172 		  - default: return yyl_just_a_word:
9173 		      not a built-in keyword; handle bareword lookup
9174 		      disambiguate between method and sub call
9175 		      fall back to bareword
9176 		  - cases for built-in keywords
9177 */
9178 
9179 #ifdef NETWARE
9180 #define RSFP_FILENO (PL_rsfp)
9181 #else
9182 #define RSFP_FILENO (PerlIO_fileno(PL_rsfp))
9183 #endif
9184 
9185 
9186 int
Perl_yylex(pTHX)9187 Perl_yylex(pTHX)
9188 {
9189     dVAR;
9190     char *s = PL_bufptr;
9191 
9192     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9193         const U8* first_bad_char_loc;
9194         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9195                                                         PL_bufend - PL_bufptr,
9196                                                         &first_bad_char_loc)))
9197         {
9198             _force_out_malformed_utf8_message(first_bad_char_loc,
9199                                               (U8 *) PL_bufend,
9200                                               0,
9201                                               1 /* 1 means die */ );
9202             NOT_REACHED; /* NOTREACHED */
9203         }
9204         PL_parser->recheck_utf8_validity = FALSE;
9205     }
9206     DEBUG_T( {
9207 	SV* tmp = newSVpvs("");
9208 	PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9209 	    (IV)CopLINE(PL_curcop),
9210 	    lex_state_names[PL_lex_state],
9211 	    exp_name[PL_expect],
9212 	    pv_display(tmp, s, strlen(s), 0, 60));
9213 	SvREFCNT_dec(tmp);
9214     } );
9215 
9216     /* when we've already built the next token, just pull it out of the queue */
9217     if (PL_nexttoke) {
9218 	PL_nexttoke--;
9219 	pl_yylval = PL_nextval[PL_nexttoke];
9220 	{
9221 	    I32 next_type;
9222 	    next_type = PL_nexttype[PL_nexttoke];
9223 	    if (next_type & (7<<24)) {
9224 		if (next_type & (1<<24)) {
9225 		    if (PL_lex_brackets > 100)
9226 			Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9227 		    PL_lex_brackstack[PL_lex_brackets++] =
9228 			(char) ((next_type >> 16) & 0xff);
9229 		}
9230 		if (next_type & (2<<24))
9231 		    PL_lex_allbrackets++;
9232 		if (next_type & (4<<24))
9233 		    PL_lex_allbrackets--;
9234 		next_type &= 0xffff;
9235 	    }
9236 	    return REPORT(next_type == 'p' ? pending_ident() : next_type);
9237 	}
9238     }
9239 
9240     switch (PL_lex_state) {
9241     case LEX_NORMAL:
9242     case LEX_INTERPNORMAL:
9243 	break;
9244 
9245     /* interpolated case modifiers like \L \U, including \Q and \E.
9246        when we get here, PL_bufptr is at the \
9247     */
9248     case LEX_INTERPCASEMOD:
9249 	/* handle \E or end of string */
9250         return yyl_interpcasemod(aTHX_ s);
9251 
9252     case LEX_INTERPPUSH:
9253         return REPORT(sublex_push());
9254 
9255     case LEX_INTERPSTART:
9256 	if (PL_bufptr == PL_bufend)
9257 	    return REPORT(sublex_done());
9258 	DEBUG_T({
9259             if(*PL_bufptr != '(')
9260                 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9261         });
9262 	PL_expect = XTERM;
9263         /* for /@a/, we leave the joining for the regex engine to do
9264          * (unless we're within \Q etc) */
9265 	PL_lex_dojoin = (*PL_bufptr == '@'
9266                             && (!PL_lex_inpat || PL_lex_casemods));
9267 	PL_lex_state = LEX_INTERPNORMAL;
9268 	if (PL_lex_dojoin) {
9269 	    NEXTVAL_NEXTTOKE.ival = 0;
9270 	    force_next(',');
9271 	    force_ident("\"", '$');
9272 	    NEXTVAL_NEXTTOKE.ival = 0;
9273 	    force_next('$');
9274 	    NEXTVAL_NEXTTOKE.ival = 0;
9275 	    force_next((2<<24)|'(');
9276 	    NEXTVAL_NEXTTOKE.ival = OP_JOIN;	/* emulate join($", ...) */
9277 	    force_next(FUNC);
9278 	}
9279 	/* Convert (?{...}) and friends to 'do {...}' */
9280 	if (PL_lex_inpat && *PL_bufptr == '(') {
9281 	    PL_parser->lex_shared->re_eval_start = PL_bufptr;
9282 	    PL_bufptr += 2;
9283 	    if (*PL_bufptr != '{')
9284 		PL_bufptr++;
9285 	    PL_expect = XTERMBLOCK;
9286 	    force_next(DO);
9287 	}
9288 
9289 	if (PL_lex_starts++) {
9290 	    s = PL_bufptr;
9291 	    /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9292 	    if (!PL_lex_casemods && PL_lex_inpat)
9293 		TOKEN(',');
9294 	    else
9295 		AopNOASSIGN(OP_CONCAT);
9296 	}
9297 	return yylex();
9298 
9299     case LEX_INTERPENDMAYBE:
9300 	if (intuit_more(PL_bufptr, PL_bufend)) {
9301 	    PL_lex_state = LEX_INTERPNORMAL;	/* false alarm, more expr */
9302 	    break;
9303 	}
9304 	/* FALLTHROUGH */
9305 
9306     case LEX_INTERPEND:
9307 	if (PL_lex_dojoin) {
9308 	    const U8 dojoin_was = PL_lex_dojoin;
9309 	    PL_lex_dojoin = FALSE;
9310 	    PL_lex_state = LEX_INTERPCONCAT;
9311 	    PL_lex_allbrackets--;
9312 	    return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
9313 	}
9314 	if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9315 	    && SvEVALED(PL_lex_repl))
9316 	{
9317 	    if (PL_bufptr != PL_bufend)
9318 		Perl_croak(aTHX_ "Bad evalled substitution pattern");
9319 	    PL_lex_repl = NULL;
9320 	}
9321 	/* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
9322 	   re_eval_str.  If the here-doc body’s length equals the previous
9323 	   value of re_eval_start, re_eval_start will now be null.  So
9324 	   check re_eval_str as well. */
9325 	if (PL_parser->lex_shared->re_eval_start
9326 	 || PL_parser->lex_shared->re_eval_str) {
9327 	    SV *sv;
9328 	    if (*PL_bufptr != ')')
9329 		Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9330 	    PL_bufptr++;
9331 	    /* having compiled a (?{..}) expression, return the original
9332 	     * text too, as a const */
9333 	    if (PL_parser->lex_shared->re_eval_str) {
9334 		sv = PL_parser->lex_shared->re_eval_str;
9335 		PL_parser->lex_shared->re_eval_str = NULL;
9336 		SvCUR_set(sv,
9337 			 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9338 		SvPV_shrink_to_cur(sv);
9339 	    }
9340 	    else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9341 			 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9342 	    NEXTVAL_NEXTTOKE.opval =
9343                     newSVOP(OP_CONST, 0,
9344 				 sv);
9345 	    force_next(THING);
9346 	    PL_parser->lex_shared->re_eval_start = NULL;
9347 	    PL_expect = XTERM;
9348 	    return REPORT(',');
9349 	}
9350 
9351 	/* FALLTHROUGH */
9352     case LEX_INTERPCONCAT:
9353 #ifdef DEBUGGING
9354 	if (PL_lex_brackets)
9355 	    Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9356 		       (long) PL_lex_brackets);
9357 #endif
9358 	if (PL_bufptr == PL_bufend)
9359 	    return REPORT(sublex_done());
9360 
9361 	/* m'foo' still needs to be parsed for possible (?{...}) */
9362 	if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9363 	    SV *sv = newSVsv(PL_linestr);
9364 	    sv = tokeq(sv);
9365             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9366 	    s = PL_bufend;
9367 	}
9368 	else {
9369             int save_error_count = PL_error_count;
9370 
9371 	    s = scan_const(PL_bufptr);
9372 
9373             /* Set flag if this was a pattern and there were errors.  op.c will
9374              * refuse to compile a pattern with this flag set.  Otherwise, we
9375              * could get segfaults, etc. */
9376             if (PL_lex_inpat && PL_error_count > save_error_count) {
9377                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9378             }
9379 	    if (*s == '\\')
9380 		PL_lex_state = LEX_INTERPCASEMOD;
9381 	    else
9382 		PL_lex_state = LEX_INTERPSTART;
9383 	}
9384 
9385 	if (s != PL_bufptr) {
9386 	    NEXTVAL_NEXTTOKE = pl_yylval;
9387 	    PL_expect = XTERM;
9388 	    force_next(THING);
9389 	    if (PL_lex_starts++) {
9390 		/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9391 		if (!PL_lex_casemods && PL_lex_inpat)
9392 		    TOKEN(',');
9393 		else
9394 		    AopNOASSIGN(OP_CONCAT);
9395 	    }
9396 	    else {
9397 		PL_bufptr = s;
9398 		return yylex();
9399 	    }
9400 	}
9401 
9402 	return yylex();
9403     case LEX_FORMLINE:
9404         if (PL_parser->sub_error_count != PL_error_count) {
9405             /* There was an error parsing a formline, which tends to
9406                mess up the parser.
9407                Unlike interpolated sub-parsing, we can't treat any of
9408                these as recoverable, so no need to check sub_no_recover.
9409             */
9410             yyquit();
9411         }
9412 	assert(PL_lex_formbrack);
9413 	s = scan_formline(PL_bufptr);
9414 	if (!PL_lex_formbrack)
9415             return yyl_rightcurly(aTHX_ s, 1);
9416 	PL_bufptr = s;
9417 	return yylex();
9418     }
9419 
9420     /* We really do *not* want PL_linestr ever becoming a COW. */
9421     assert (!SvIsCOW(PL_linestr));
9422     s = PL_bufptr;
9423     PL_oldoldbufptr = PL_oldbufptr;
9424     PL_oldbufptr = s;
9425 
9426     if (PL_in_my == KEY_sigvar) {
9427         PL_parser->saw_infix_sigil = 0;
9428         return yyl_sigvar(aTHX_ s);
9429     }
9430 
9431     {
9432         /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9433            On its return, we then need to set it to indicate whether the token
9434            we just encountered was an infix operator that (if we hadn't been
9435            expecting an operator) have been a sigil.
9436         */
9437         bool expected_operator = (PL_expect == XOPERATOR);
9438         int ret = yyl_try(aTHX_ s);
9439         switch (pl_yylval.ival) {
9440         case OP_BIT_AND:
9441         case OP_MODULO:
9442         case OP_MULTIPLY:
9443         case OP_NBIT_AND:
9444             if (expected_operator) {
9445                 PL_parser->saw_infix_sigil = 1;
9446                 break;
9447             }
9448             /* FALLTHROUGH */
9449         default:
9450             PL_parser->saw_infix_sigil = 0;
9451         }
9452         return ret;
9453     }
9454 }
9455 
9456 
9457 /*
9458   S_pending_ident
9459 
9460   Looks up an identifier in the pad or in a package
9461 
9462   PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9463   rather than a plain pad var.
9464 
9465   Returns:
9466     PRIVATEREF if this is a lexical name.
9467     BAREWORD   if this belongs to a package.
9468 
9469   Structure:
9470       if we're in a my declaration
9471 	  croak if they tried to say my($foo::bar)
9472 	  build the ops for a my() declaration
9473       if it's an access to a my() variable
9474 	  build ops for access to a my() variable
9475       if in a dq string, and they've said @foo and we can't find @foo
9476 	  warn
9477       build ops for a bareword
9478 */
9479 
9480 static int
S_pending_ident(pTHX)9481 S_pending_ident(pTHX)
9482 {
9483     PADOFFSET tmp = 0;
9484     const char pit = (char)pl_yylval.ival;
9485     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9486     /* All routes through this function want to know if there is a colon.  */
9487     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9488 
9489     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9490           "### Pending identifier '%s'\n", PL_tokenbuf); });
9491     assert(tokenbuf_len >= 2);
9492 
9493     /* if we're in a my(), we can't allow dynamics here.
9494        $foo'bar has already been turned into $foo::bar, so
9495        just check for colons.
9496 
9497        if it's a legal name, the OP is a PADANY.
9498     */
9499     if (PL_in_my) {
9500         if (PL_in_my == KEY_our) {	/* "our" is merely analogous to "my" */
9501             if (has_colon)
9502                 /* diag_listed_as: No package name allowed for variable %s
9503                                    in "our" */
9504                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9505                                   "%s %s in \"our\"",
9506                                   *PL_tokenbuf=='&' ? "subroutine" : "variable",
9507                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9508             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9509         }
9510         else {
9511             OP *o;
9512             if (has_colon) {
9513                 /* "my" variable %s can't be in a package */
9514                 /* PL_no_myglob is constant */
9515                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9516                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9517                             PL_in_my == KEY_my ? "my" : "state",
9518                             *PL_tokenbuf == '&' ? "subroutine" : "variable",
9519                             PL_tokenbuf),
9520                             UTF ? SVf_UTF8 : 0);
9521                 GCC_DIAG_RESTORE_STMT;
9522             }
9523 
9524             if (PL_in_my == KEY_sigvar) {
9525                 /* A signature 'padop' needs in addition, an op_first to
9526                  * point to a child sigdefelem, and an extra field to hold
9527                  * the signature index. We can achieve both by using an
9528                  * UNOP_AUX and (ab)using the op_aux field to hold the
9529                  * index. If we ever need more fields, use a real malloced
9530                  * aux strut instead.
9531                  */
9532                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9533                                     INT2PTR(UNOP_AUX_item *,
9534                                         (PL_parser->sig_elems)));
9535                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9536                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9537                                   :                         OPpARGELEM_HV);
9538             }
9539             else
9540                 o = newOP(OP_PADANY, 0);
9541             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9542                                                         UTF ? SVf_UTF8 : 0);
9543             if (PL_in_my == KEY_sigvar)
9544                 PL_in_my = 0;
9545 
9546             pl_yylval.opval = o;
9547 	    return PRIVATEREF;
9548         }
9549     }
9550 
9551     /*
9552        build the ops for accesses to a my() variable.
9553     */
9554 
9555     if (!has_colon) {
9556 	if (!PL_in_my)
9557 	    tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9558                                  0);
9559         if (tmp != NOT_IN_PAD) {
9560             /* might be an "our" variable" */
9561             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9562                 /* build ops for a bareword */
9563 		HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9564 		HEK * const stashname = HvNAME_HEK(stash);
9565 		SV *  const sym = newSVhek(stashname);
9566                 sv_catpvs(sym, "::");
9567                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9568                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9569                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9570                 if (pit != '&')
9571                   gv_fetchsv(sym,
9572                     GV_ADDMULTI,
9573                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9574                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9575                      : SVt_PVHV));
9576                 return BAREWORD;
9577             }
9578 
9579             pl_yylval.opval = newOP(OP_PADANY, 0);
9580             pl_yylval.opval->op_targ = tmp;
9581             return PRIVATEREF;
9582         }
9583     }
9584 
9585     /*
9586        Whine if they've said @foo or @foo{key} in a doublequoted string,
9587        and @foo (or %foo) isn't a variable we can find in the symbol
9588        table.
9589     */
9590     if (ckWARN(WARN_AMBIGUOUS)
9591         && pit == '@'
9592         && PL_lex_state != LEX_NORMAL
9593         && !PL_lex_brackets)
9594     {
9595         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9596                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9597                                          SVt_PVAV);
9598         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9599 	   )
9600         {
9601             /* Downgraded from fatal to warning 20000522 mjd */
9602             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9603 			"Possible unintended interpolation of %" UTF8f
9604 			" in string",
9605 			UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9606         }
9607     }
9608 
9609     /* build ops for a bareword */
9610     pl_yylval.opval = newSVOP(OP_CONST, 0,
9611 				   newSVpvn_flags(PL_tokenbuf + 1,
9612                                                       tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9613                                                       UTF ? SVf_UTF8 : 0 ));
9614     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9615     if (pit != '&')
9616         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9617 		     (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9618                      | ( UTF ? SVf_UTF8 : 0 ),
9619 		     ((PL_tokenbuf[0] == '$') ? SVt_PV
9620 		      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9621 		      : SVt_PVHV));
9622     return BAREWORD;
9623 }
9624 
9625 STATIC void
S_checkcomma(pTHX_ const char * s,const char * name,const char * what)9626 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9627 {
9628     PERL_ARGS_ASSERT_CHECKCOMMA;
9629 
9630     if (*s == ' ' && s[1] == '(') {	/* XXX gotta be a better way */
9631 	if (ckWARN(WARN_SYNTAX)) {
9632 	    int level = 1;
9633 	    const char *w;
9634 	    for (w = s+2; *w && level; w++) {
9635 		if (*w == '(')
9636 		    ++level;
9637 		else if (*w == ')')
9638 		    --level;
9639 	    }
9640 	    while (isSPACE(*w))
9641 		++w;
9642 	    /* the list of chars below is for end of statements or
9643 	     * block / parens, boolean operators (&&, ||, //) and branch
9644 	     * constructs (or, and, if, until, unless, while, err, for).
9645 	     * Not a very solid hack... */
9646 	    if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
9647 		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9648 			    "%s (...) interpreted as function",name);
9649 	}
9650     }
9651     while (s < PL_bufend && isSPACE(*s))
9652 	s++;
9653     if (*s == '(')
9654 	s++;
9655     while (s < PL_bufend && isSPACE(*s))
9656 	s++;
9657     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9658 	const char * const w = s;
9659         s += UTF ? UTF8SKIP(s) : 1;
9660 	while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9661 	    s += UTF ? UTF8SKIP(s) : 1;
9662 	while (s < PL_bufend && isSPACE(*s))
9663 	    s++;
9664 	if (*s == ',') {
9665 	    GV* gv;
9666 	    if (keyword(w, s - w, 0))
9667 		return;
9668 
9669 	    gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9670 	    if (gv && GvCVu(gv))
9671 		return;
9672 	    if (s - w <= 254) {
9673                 PADOFFSET off;
9674 		char tmpbuf[256];
9675 		Copy(w, tmpbuf+1, s - w, char);
9676 		*tmpbuf = '&';
9677 		off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9678 		if (off != NOT_IN_PAD) return;
9679 	    }
9680 	    Perl_croak(aTHX_ "No comma allowed after %s", what);
9681 	}
9682     }
9683 }
9684 
9685 /* S_new_constant(): do any overload::constant lookup.
9686 
9687    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9688    Best used as sv=new_constant(..., sv, ...).
9689    If s, pv are NULL, calls subroutine with one argument,
9690    and <type> is used with error messages only.
9691    <type> is assumed to be well formed UTF-8.
9692 
9693    If error_msg is not NULL, *error_msg will be set to any error encountered.
9694    Otherwise yyerror() will be used to output it */
9695 
9696 STATIC SV *
S_new_constant(pTHX_ const char * s,STRLEN len,const char * key,STRLEN keylen,SV * sv,SV * pv,const char * type,STRLEN typelen,const char ** error_msg)9697 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9698 	       SV *sv, SV *pv, const char *type, STRLEN typelen,
9699                const char ** error_msg)
9700 {
9701     dSP;
9702     HV * table = GvHV(PL_hintgv);		 /* ^H */
9703     SV *res;
9704     SV *errsv = NULL;
9705     SV **cvp;
9706     SV *cv, *typesv;
9707     const char *why1 = "", *why2 = "", *why3 = "";
9708     const char * optional_colon = ":";  /* Only some messages have a colon */
9709     char *msg;
9710 
9711     PERL_ARGS_ASSERT_NEW_CONSTANT;
9712     /* We assume that this is true: */
9713     assert(type || s);
9714 
9715     sv_2mortal(sv);			/* Parent created it permanently */
9716 
9717     if (   ! table
9718 	|| ! (PL_hints & HINT_LOCALIZE_HH))
9719     {
9720         why1 = "unknown";
9721         optional_colon = "";
9722         goto report;
9723     }
9724 
9725     cvp = hv_fetch(table, key, keylen, FALSE);
9726     if (!cvp || !SvOK(*cvp)) {
9727         why1 = "$^H{";
9728         why2 = key;
9729         why3 = "} is not defined";
9730         goto report;
9731     }
9732 
9733     cv = *cvp;
9734     if (!pv && s)
9735   	pv = newSVpvn_flags(s, len, SVs_TEMP);
9736     if (type && pv)
9737   	typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9738     else
9739   	typesv = &PL_sv_undef;
9740 
9741     PUSHSTACKi(PERLSI_OVERLOAD);
9742     ENTER ;
9743     SAVETMPS;
9744 
9745     PUSHMARK(SP) ;
9746     EXTEND(sp, 3);
9747     if (pv)
9748  	PUSHs(pv);
9749     PUSHs(sv);
9750     if (pv)
9751  	PUSHs(typesv);
9752     PUTBACK;
9753     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9754 
9755     SPAGAIN ;
9756 
9757     /* Check the eval first */
9758     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9759 	STRLEN errlen;
9760 	const char * errstr;
9761 	sv_catpvs(errsv, "Propagated");
9762 	errstr = SvPV_const(errsv, errlen);
9763 	yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9764 	(void)POPs;
9765 	res = SvREFCNT_inc_simple_NN(sv);
9766     }
9767     else {
9768  	res = POPs;
9769 	SvREFCNT_inc_simple_void_NN(res);
9770     }
9771 
9772     PUTBACK ;
9773     FREETMPS ;
9774     LEAVE ;
9775     POPSTACK;
9776 
9777     if (SvOK(res)) {
9778         return res;
9779     }
9780 
9781     sv = res;
9782     (void)sv_2mortal(sv);
9783 
9784     why1 = "Call to &{$^H{";
9785     why2 = key;
9786     why3 = "}} did not return a defined value";
9787 
9788   report:
9789 
9790     msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
9791                         (int)(type ? typelen : len),
9792                         (type ? type: s),
9793                         optional_colon,
9794                         why1, why2, why3);
9795     if (error_msg) {
9796         *error_msg = msg;
9797     }
9798     else {
9799         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9800     }
9801     return SvREFCNT_inc_simple_NN(sv);
9802 }
9803 
9804 PERL_STATIC_INLINE void
S_parse_ident(pTHX_ char ** s,char ** d,char * const e,int allow_package,bool is_utf8,bool check_dollar,bool tick_warn)9805 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9806                     bool is_utf8, bool check_dollar, bool tick_warn)
9807 {
9808     int saw_tick = 0;
9809     const char *olds = *s;
9810     PERL_ARGS_ASSERT_PARSE_IDENT;
9811 
9812     while (*s < PL_bufend) {
9813         if (*d >= e)
9814             Perl_croak(aTHX_ "%s", ident_too_long);
9815         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9816              /* The UTF-8 case must come first, otherwise things
9817              * like c\N{COMBINING TILDE} would start failing, as the
9818              * isWORDCHAR_A case below would gobble the 'c' up.
9819              */
9820 
9821             char *t = *s + UTF8SKIP(*s);
9822             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9823                 t += UTF8SKIP(t);
9824             }
9825             if (*d + (t - *s) > e)
9826                 Perl_croak(aTHX_ "%s", ident_too_long);
9827             Copy(*s, *d, t - *s, char);
9828             *d += t - *s;
9829             *s = t;
9830         }
9831         else if ( isWORDCHAR_A(**s) ) {
9832             do {
9833                 *(*d)++ = *(*s)++;
9834             } while (isWORDCHAR_A(**s) && *d < e);
9835         }
9836         else if (   allow_package
9837                  && **s == '\''
9838                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9839         {
9840             *(*d)++ = ':';
9841             *(*d)++ = ':';
9842             (*s)++;
9843             saw_tick++;
9844         }
9845         else if (allow_package && **s == ':' && (*s)[1] == ':'
9846            /* Disallow things like Foo::$bar. For the curious, this is
9847             * the code path that triggers the "Bad name after" warning
9848             * when looking for barewords.
9849             */
9850            && !(check_dollar && (*s)[2] == '$')) {
9851             *(*d)++ = *(*s)++;
9852             *(*d)++ = *(*s)++;
9853         }
9854         else
9855             break;
9856     }
9857     if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
9858               && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
9859         char *this_d;
9860 	char *d2;
9861         Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
9862         d2 = this_d;
9863         SAVEFREEPV(this_d);
9864         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9865                          "Old package separator used in string");
9866         if (olds[-1] == '#')
9867             *d2++ = olds[-2];
9868         *d2++ = olds[-1];
9869         while (olds < *s) {
9870             if (*olds == '\'') {
9871                 *d2++ = '\\';
9872                 *d2++ = *olds++;
9873             }
9874 	    else
9875                 *d2++ = *olds++;
9876         }
9877         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9878                          "\t(Did you mean \"%" UTF8f "\" instead?)\n",
9879                           UTF8fARG(is_utf8, d2-this_d, this_d));
9880     }
9881     return;
9882 }
9883 
9884 /* Returns a NUL terminated string, with the length of the string written to
9885    *slp
9886    */
9887 char *
Perl_scan_word(pTHX_ char * s,char * dest,STRLEN destlen,int allow_package,STRLEN * slp)9888 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9889 {
9890     char *d = dest;
9891     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9892     bool is_utf8 = cBOOL(UTF);
9893 
9894     PERL_ARGS_ASSERT_SCAN_WORD;
9895 
9896     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
9897     *d = '\0';
9898     *slp = d - dest;
9899     return s;
9900 }
9901 
9902 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9903  * iff Unicode semantics are to be used.  The legal ones are any of:
9904  *  a) all ASCII characters except:
9905  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9906  *          2) '{'
9907  *     The final case currently doesn't get this far in the program, so we
9908  *     don't test for it.  If that were to change, it would be ok to allow it.
9909  *  b) When not under Unicode rules, any upper Latin1 character
9910  *  c) Otherwise, when unicode rules are used, all XIDS characters.
9911  *
9912  *      Because all ASCII characters have the same representation whether
9913  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9914  *      '{' without knowing if is UTF-8 or not. */
9915 #define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
9916     (isGRAPH_A(*(s)) || ((is_utf8)                                          \
9917                          ? isIDFIRST_utf8_safe(s, e)                        \
9918                          : (isGRAPH_L1(*s)                                  \
9919                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9920 
9921 STATIC char *
S_scan_ident(pTHX_ char * s,char * dest,STRLEN destlen,I32 ck_uni)9922 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9923 {
9924     I32 herelines = PL_parser->herelines;
9925     SSize_t bracket = -1;
9926     char funny = *s++;
9927     char *d = dest;
9928     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9929     bool is_utf8 = cBOOL(UTF);
9930     I32 orig_copline = 0, tmp_copline = 0;
9931 
9932     PERL_ARGS_ASSERT_SCAN_IDENT;
9933 
9934     if (isSPACE(*s) || !*s)
9935 	s = skipspace(s);
9936     if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
9937         bool is_zero= *s == '0' ? TRUE : FALSE;
9938         char *digit_start= d;
9939         *d++ = *s++;
9940         while (s < PL_bufend && isDIGIT(*s)) {
9941             if (d >= e)
9942                 Perl_croak(aTHX_ "%s", ident_too_long);
9943             *d++ = *s++;
9944         }
9945         if (is_zero && d - digit_start > 1)
9946             Perl_croak(aTHX_ ident_var_zero_multi_digit);
9947     }
9948     else {  /* See if it is a "normal" identifier */
9949         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
9950     }
9951     *d = '\0';
9952     d = dest;
9953     if (*d) {
9954         /* Either a digit variable, or parse_ident() found an identifier
9955            (anything valid as a bareword), so job done and return.  */
9956 	if (PL_lex_state != LEX_NORMAL)
9957 	    PL_lex_state = LEX_INTERPENDMAYBE;
9958 	return s;
9959     }
9960 
9961     /* Here, it is not a run-of-the-mill identifier name */
9962 
9963     if (*s == '$' && s[1]
9964         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9965             || isDIGIT_A((U8)s[1])
9966             || s[1] == '$'
9967             || s[1] == '{'
9968             || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
9969     {
9970         /* Dereferencing a value in a scalar variable.
9971            The alternatives are different syntaxes for a scalar variable.
9972            Using ' as a leading package separator isn't allowed. :: is.   */
9973 	return s;
9974     }
9975     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9976     if (*s == '{') {
9977 	bracket = s - SvPVX(PL_linestr);
9978 	s++;
9979 	orig_copline = CopLINE(PL_curcop);
9980         if (s < PL_bufend && isSPACE(*s)) {
9981             s = skipspace(s);
9982         }
9983     }
9984     if ((s <= PL_bufend - ((is_utf8)
9985                           ? UTF8SKIP(s)
9986                           : 1))
9987         && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9988     {
9989         if (is_utf8) {
9990             const STRLEN skip = UTF8SKIP(s);
9991             STRLEN i;
9992             d[skip] = '\0';
9993             for ( i = 0; i < skip; i++ )
9994                 d[i] = *s++;
9995         }
9996         else {
9997             *d = *s++;
9998             /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
9999             if (isDIGIT(*d)) {
10000                 bool is_zero= *d == '0' ? TRUE : FALSE;
10001                 char *digit_start= d;
10002                 while (s < PL_bufend && isDIGIT(*s)) {
10003                     d++;
10004                     if (d >= e)
10005                         Perl_croak(aTHX_ "%s", ident_too_long);
10006                     *d= *s++;
10007                 }
10008                 if (is_zero && d - digit_start > 1)
10009                     Perl_croak(aTHX_ ident_var_zero_multi_digit);
10010             }
10011             d[1] = '\0';
10012         }
10013     }
10014     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10015     if (*d == '^' && *s && isCONTROLVAR(*s)) {
10016 	*d = toCTRL(*s);
10017 	s++;
10018     }
10019     /* Warn about ambiguous code after unary operators if {...} notation isn't
10020        used.  There's no difference in ambiguity; it's merely a heuristic
10021        about when not to warn.  */
10022     else if (ck_uni && bracket == -1)
10023 	check_uni();
10024     if (bracket != -1) {
10025         bool skip;
10026         char *s2;
10027         /* If we were processing {...} notation then...  */
10028         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10029             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10030                  && isWORDCHAR(*s))
10031         ) {
10032             /* note we have to check for a normal identifier first,
10033              * as it handles utf8 symbols, and only after that has
10034              * been ruled out can we look at the caret words */
10035             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10036                 /* if it starts as a valid identifier, assume that it is one.
10037                    (the later check for } being at the expected point will trap
10038                    cases where this doesn't pan out.)  */
10039                 d += is_utf8 ? UTF8SKIP(d) : 1;
10040                 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10041                 *d = '\0';
10042             }
10043             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10044                 d++;
10045                 while (isWORDCHAR(*s) && d < e) {
10046                     *d++ = *s++;
10047                 }
10048                 if (d >= e)
10049                     Perl_croak(aTHX_ "%s", ident_too_long);
10050                 *d = '\0';
10051             }
10052             tmp_copline = CopLINE(PL_curcop);
10053             if (s < PL_bufend && isSPACE(*s)) {
10054                 s = skipspace(s);
10055             }
10056 	    if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10057                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
10058 		if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10059 		    const char * const brack =
10060 			(const char *)
10061 			((*s == '[') ? "[...]" : "{...}");
10062                     orig_copline = CopLINE(PL_curcop);
10063                     CopLINE_set(PL_curcop, tmp_copline);
10064    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10065 		    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10066 			"Ambiguous use of %c{%s%s} resolved to %c%s%s",
10067 			funny, dest, brack, funny, dest, brack);
10068                     CopLINE_set(PL_curcop, orig_copline);
10069 		}
10070 		bracket++;
10071 		PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10072 		PL_lex_allbrackets++;
10073 		return s;
10074 	    }
10075 	}
10076 
10077         if ( !tmp_copline )
10078             tmp_copline = CopLINE(PL_curcop);
10079         if ((skip = s < PL_bufend && isSPACE(*s))) {
10080             /* Avoid incrementing line numbers or resetting PL_linestart,
10081                in case we have to back up.  */
10082             STRLEN s_off = s - SvPVX(PL_linestr);
10083             s2 = peekspace(s);
10084             s = SvPVX(PL_linestr) + s_off;
10085         }
10086         else
10087             s2 = s;
10088 
10089         /* Expect to find a closing } after consuming any trailing whitespace.
10090          */
10091         if (*s2 == '}') {
10092             /* Now increment line numbers if applicable.  */
10093             if (skip)
10094                 s = skipspace(s);
10095 	    s++;
10096 	    if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10097 		PL_lex_state = LEX_INTERPEND;
10098 		PL_expect = XREF;
10099 	    }
10100 	    if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10101 		if (ckWARN(WARN_AMBIGUOUS)
10102                     && (keyword(dest, d - dest, 0)
10103 		        || get_cvn_flags(dest, d - dest, is_utf8
10104                            ? SVf_UTF8
10105                            : 0)))
10106 		{
10107                     SV *tmp = newSVpvn_flags( dest, d - dest,
10108                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10109 		    if (funny == '#')
10110 			funny = '@';
10111                     orig_copline = CopLINE(PL_curcop);
10112                     CopLINE_set(PL_curcop, tmp_copline);
10113 		    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10114 			"Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10115 			funny, SVfARG(tmp), funny, SVfARG(tmp));
10116                     CopLINE_set(PL_curcop, orig_copline);
10117 		}
10118 	    }
10119 	}
10120 	else {
10121             /* Didn't find the closing } at the point we expected, so restore
10122                state such that the next thing to process is the opening { and */
10123 	    s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10124             CopLINE_set(PL_curcop, orig_copline);
10125             PL_parser->herelines = herelines;
10126 	    *dest = '\0';
10127             PL_parser->sub_no_recover = TRUE;
10128 	}
10129     }
10130     else if (   PL_lex_state == LEX_INTERPNORMAL
10131              && !PL_lex_brackets
10132              && !intuit_more(s, PL_bufend))
10133 	PL_lex_state = LEX_INTERPEND;
10134     return s;
10135 }
10136 
10137 static bool
S_pmflag(pTHX_ const char * const valid_flags,U32 * pmfl,char ** s,char * charset,unsigned int * x_mod_count)10138 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10139 
10140     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10141      * found in the parse starting at 's', based on the subset that are valid
10142      * in this context input to this routine in 'valid_flags'. Advances s.
10143      * Returns TRUE if the input should be treated as a valid flag, so the next
10144      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10145      * upon first call on the current regex.  This routine will set it to any
10146      * charset modifier found.  The caller shouldn't change it.  This way,
10147      * another charset modifier encountered in the parse can be detected as an
10148      * error, as we have decided to allow only one */
10149 
10150     const char c = **s;
10151     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10152 
10153     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10154         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10155             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10156                        UTF ? SVf_UTF8 : 0);
10157             (*s) += charlen;
10158             /* Pretend that it worked, so will continue processing before
10159              * dieing */
10160             return TRUE;
10161         }
10162         return FALSE;
10163     }
10164 
10165     switch (c) {
10166 
10167         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10168         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
10169         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
10170         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
10171         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
10172         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10173 	case LOCALE_PAT_MOD:
10174 	    if (*charset) {
10175 		goto multiple_charsets;
10176 	    }
10177 	    set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10178 	    *charset = c;
10179 	    break;
10180 	case UNICODE_PAT_MOD:
10181 	    if (*charset) {
10182 		goto multiple_charsets;
10183 	    }
10184 	    set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10185 	    *charset = c;
10186 	    break;
10187 	case ASCII_RESTRICT_PAT_MOD:
10188 	    if (! *charset) {
10189 		set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10190 	    }
10191 	    else {
10192 
10193 		/* Error if previous modifier wasn't an 'a', but if it was, see
10194 		 * if, and accept, a second occurrence (only) */
10195 		if (*charset != 'a'
10196 		    || get_regex_charset(*pmfl)
10197 			!= REGEX_ASCII_RESTRICTED_CHARSET)
10198 		{
10199 			goto multiple_charsets;
10200 		}
10201 		set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10202 	    }
10203 	    *charset = c;
10204 	    break;
10205 	case DEPENDS_PAT_MOD:
10206 	    if (*charset) {
10207 		goto multiple_charsets;
10208 	    }
10209 	    set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10210 	    *charset = c;
10211 	    break;
10212     }
10213 
10214     (*s)++;
10215     return TRUE;
10216 
10217     multiple_charsets:
10218 	if (*charset != c) {
10219 	    yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10220 	}
10221 	else if (c == 'a') {
10222   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10223 	    yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10224 	}
10225 	else {
10226 	    yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10227 	}
10228 
10229 	/* Pretend that it worked, so will continue processing before dieing */
10230 	(*s)++;
10231 	return TRUE;
10232 }
10233 
10234 STATIC char *
S_scan_pat(pTHX_ char * start,I32 type)10235 S_scan_pat(pTHX_ char *start, I32 type)
10236 {
10237     PMOP *pm;
10238     char *s;
10239     const char * const valid_flags =
10240 	(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10241     char charset = '\0';    /* character set modifier */
10242     unsigned int x_mod_count = 0;
10243 
10244     PERL_ARGS_ASSERT_SCAN_PAT;
10245 
10246     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10247     if (!s)
10248 	Perl_croak(aTHX_ "Search pattern not terminated");
10249 
10250     pm = (PMOP*)newPMOP(type, 0);
10251     if (PL_multi_open == '?') {
10252 	/* This is the only point in the code that sets PMf_ONCE:  */
10253 	pm->op_pmflags |= PMf_ONCE;
10254 
10255 	/* Hence it's safe to do this bit of PMOP book-keeping here, which
10256 	   allows us to restrict the list needed by reset to just the ??
10257 	   matches.  */
10258 	assert(type != OP_TRANS);
10259 	if (PL_curstash) {
10260 	    MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10261 	    U32 elements;
10262 	    if (!mg) {
10263 		mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10264 				 0);
10265 	    }
10266 	    elements = mg->mg_len / sizeof(PMOP**);
10267 	    Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10268 	    ((PMOP**)mg->mg_ptr) [elements++] = pm;
10269 	    mg->mg_len = elements * sizeof(PMOP**);
10270 	    PmopSTASH_set(pm,PL_curstash);
10271 	}
10272     }
10273 
10274     /* if qr/...(?{..}).../, then need to parse the pattern within a new
10275      * anon CV. False positives like qr/[(?{]/ are harmless */
10276 
10277     if (type == OP_QR) {
10278 	STRLEN len;
10279 	char *e, *p = SvPV(PL_lex_stuff, len);
10280 	e = p + len;
10281 	for (; p < e; p++) {
10282 	    if (p[0] == '(' && p[1] == '?'
10283 		&& (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10284 	    {
10285 		pm->op_pmflags |= PMf_HAS_CV;
10286 		break;
10287 	    }
10288 	}
10289 	pm->op_pmflags |= PMf_IS_QR;
10290     }
10291 
10292     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10293                                 &s, &charset, &x_mod_count))
10294     {};
10295     /* issue a warning if /c is specified,but /g is not */
10296     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10297     {
10298         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10299 		       "Use of /c modifier is meaningless without /g" );
10300     }
10301 
10302     PL_lex_op = (OP*)pm;
10303     pl_yylval.ival = OP_MATCH;
10304     return s;
10305 }
10306 
10307 STATIC char *
S_scan_subst(pTHX_ char * start)10308 S_scan_subst(pTHX_ char *start)
10309 {
10310     char *s;
10311     PMOP *pm;
10312     I32 first_start;
10313     line_t first_line;
10314     line_t linediff = 0;
10315     I32 es = 0;
10316     char charset = '\0';    /* character set modifier */
10317     unsigned int x_mod_count = 0;
10318     char *t;
10319 
10320     PERL_ARGS_ASSERT_SCAN_SUBST;
10321 
10322     pl_yylval.ival = OP_NULL;
10323 
10324     s = scan_str(start, TRUE, FALSE, FALSE, &t);
10325 
10326     if (!s)
10327 	Perl_croak(aTHX_ "Substitution pattern not terminated");
10328 
10329     s = t;
10330 
10331     first_start = PL_multi_start;
10332     first_line = CopLINE(PL_curcop);
10333     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10334     if (!s) {
10335 	SvREFCNT_dec_NN(PL_lex_stuff);
10336 	PL_lex_stuff = NULL;
10337 	Perl_croak(aTHX_ "Substitution replacement not terminated");
10338     }
10339     PL_multi_start = first_start;	/* so whole substitution is taken together */
10340 
10341     pm = (PMOP*)newPMOP(OP_SUBST, 0);
10342 
10343 
10344     while (*s) {
10345 	if (*s == EXEC_PAT_MOD) {
10346 	    s++;
10347 	    es++;
10348 	}
10349 	else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10350                                   &s, &charset, &x_mod_count))
10351 	{
10352 	    break;
10353 	}
10354     }
10355 
10356     if ((pm->op_pmflags & PMf_CONTINUE)) {
10357         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10358     }
10359 
10360     if (es) {
10361 	SV * const repl = newSVpvs("");
10362 
10363 	PL_multi_end = 0;
10364 	pm->op_pmflags |= PMf_EVAL;
10365         for (; es > 1; es--) {
10366             sv_catpvs(repl, "eval ");
10367         }
10368         sv_catpvs(repl, "do {");
10369 	sv_catsv(repl, PL_parser->lex_sub_repl);
10370 	sv_catpvs(repl, "}");
10371 	SvREFCNT_dec(PL_parser->lex_sub_repl);
10372 	PL_parser->lex_sub_repl = repl;
10373     }
10374 
10375 
10376     linediff = CopLINE(PL_curcop) - first_line;
10377     if (linediff)
10378 	CopLINE_set(PL_curcop, first_line);
10379 
10380     if (linediff || es) {
10381         /* the IVX field indicates that the replacement string is a s///e;
10382          * the NVX field indicates how many src code lines the replacement
10383          * spreads over */
10384         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10385         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10386         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10387                                                                     cBOOL(es);
10388     }
10389 
10390     PL_lex_op = (OP*)pm;
10391     pl_yylval.ival = OP_SUBST;
10392     return s;
10393 }
10394 
10395 STATIC char *
S_scan_trans(pTHX_ char * start)10396 S_scan_trans(pTHX_ char *start)
10397 {
10398     char* s;
10399     OP *o;
10400     U8 squash;
10401     U8 del;
10402     U8 complement;
10403     bool nondestruct = 0;
10404     char *t;
10405 
10406     PERL_ARGS_ASSERT_SCAN_TRANS;
10407 
10408     pl_yylval.ival = OP_NULL;
10409 
10410     s = scan_str(start,FALSE,FALSE,FALSE,&t);
10411     if (!s)
10412 	Perl_croak(aTHX_ "Transliteration pattern not terminated");
10413 
10414     s = t;
10415 
10416     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10417     if (!s) {
10418 	SvREFCNT_dec_NN(PL_lex_stuff);
10419 	PL_lex_stuff = NULL;
10420 	Perl_croak(aTHX_ "Transliteration replacement not terminated");
10421     }
10422 
10423     complement = del = squash = 0;
10424     while (1) {
10425 	switch (*s) {
10426 	case 'c':
10427 	    complement = OPpTRANS_COMPLEMENT;
10428 	    break;
10429 	case 'd':
10430 	    del = OPpTRANS_DELETE;
10431 	    break;
10432 	case 's':
10433 	    squash = OPpTRANS_SQUASH;
10434 	    break;
10435 	case 'r':
10436 	    nondestruct = 1;
10437 	    break;
10438 	default:
10439 	    goto no_more;
10440 	}
10441 	s++;
10442     }
10443   no_more:
10444 
10445     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10446     o->op_private &= ~OPpTRANS_ALL;
10447     o->op_private |= del|squash|complement;
10448 
10449     PL_lex_op = o;
10450     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10451 
10452 
10453     return s;
10454 }
10455 
10456 /* scan_heredoc
10457    Takes a pointer to the first < in <<FOO.
10458    Returns a pointer to the byte following <<FOO.
10459 
10460    This function scans a heredoc, which involves different methods
10461    depending on whether we are in a string eval, quoted construct, etc.
10462    This is because PL_linestr could containing a single line of input, or
10463    a whole string being evalled, or the contents of the current quote-
10464    like operator.
10465 
10466    The two basic methods are:
10467     - Steal lines from the input stream
10468     - Scan the heredoc in PL_linestr and remove it therefrom
10469 
10470    In a file scope or filtered eval, the first method is used; in a
10471    string eval, the second.
10472 
10473    In a quote-like operator, we have to choose between the two,
10474    depending on where we can find a newline.  We peek into outer lex-
10475    ing scopes until we find one with a newline in it.  If we reach the
10476    outermost lexing scope and it is a file, we use the stream method.
10477    Otherwise it is treated as an eval.
10478 */
10479 
10480 STATIC char *
S_scan_heredoc(pTHX_ char * s)10481 S_scan_heredoc(pTHX_ char *s)
10482 {
10483     I32 op_type = OP_SCALAR;
10484     I32 len;
10485     SV *tmpstr;
10486     char term;
10487     char *d;
10488     char *e;
10489     char *peek;
10490     char *indent = 0;
10491     I32 indent_len = 0;
10492     bool indented = FALSE;
10493     const bool infile = PL_rsfp || PL_parser->filtered;
10494     const line_t origline = CopLINE(PL_curcop);
10495     LEXSHARED *shared = PL_parser->lex_shared;
10496 
10497     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10498 
10499     s += 2;
10500     d = PL_tokenbuf + 1;
10501     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10502     *PL_tokenbuf = '\n';
10503     peek = s;
10504 
10505     if (*peek == '~') {
10506 	indented = TRUE;
10507 	peek++; s++;
10508     }
10509 
10510     while (SPACE_OR_TAB(*peek))
10511 	peek++;
10512 
10513     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10514 	s = peek;
10515 	term = *s++;
10516 	s = delimcpy(d, e, s, PL_bufend, term, &len);
10517 	if (s == PL_bufend)
10518 	    Perl_croak(aTHX_ "Unterminated delimiter for here document");
10519 	d += len;
10520 	s++;
10521     }
10522     else {
10523 	if (*s == '\\')
10524             /* <<\FOO is equivalent to <<'FOO' */
10525 	    s++, term = '\'';
10526 	else
10527 	    term = '"';
10528 
10529 	if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10530 	    Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10531 
10532 	peek = s;
10533 
10534         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10535 	    peek += UTF ? UTF8SKIP(peek) : 1;
10536 	}
10537 
10538 	len = (peek - s >= e - d) ? (e - d) : (peek - s);
10539 	Copy(s, d, len, char);
10540 	s += len;
10541 	d += len;
10542     }
10543 
10544     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10545 	Perl_croak(aTHX_ "Delimiter for here document is too long");
10546 
10547     *d++ = '\n';
10548     *d = '\0';
10549     len = d - PL_tokenbuf;
10550 
10551 #ifndef PERL_STRICT_CR
10552     d = (char *) memchr(s, '\r', PL_bufend - s);
10553     if (d) {
10554 	char * const olds = s;
10555 	s = d;
10556 	while (s < PL_bufend) {
10557 	    if (*s == '\r') {
10558 		*d++ = '\n';
10559 		if (*++s == '\n')
10560 		    s++;
10561 	    }
10562 	    else if (*s == '\n' && s[1] == '\r') {	/* \015\013 on a mac? */
10563 		*d++ = *s++;
10564 		s++;
10565 	    }
10566 	    else
10567 		*d++ = *s++;
10568 	}
10569 	*d = '\0';
10570 	PL_bufend = d;
10571 	SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10572 	s = olds;
10573     }
10574 #endif
10575 
10576     tmpstr = newSV_type(SVt_PVIV);
10577     SvGROW(tmpstr, 80);
10578     if (term == '\'') {
10579 	op_type = OP_CONST;
10580 	SvIV_set(tmpstr, -1);
10581     }
10582     else if (term == '`') {
10583 	op_type = OP_BACKTICK;
10584 	SvIV_set(tmpstr, '\\');
10585     }
10586 
10587     PL_multi_start = origline + 1 + PL_parser->herelines;
10588     PL_multi_open = PL_multi_close = '<';
10589 
10590     /* inside a string eval or quote-like operator */
10591     if (!infile || PL_lex_inwhat) {
10592 	SV *linestr;
10593 	char *bufend;
10594 	char * const olds = s;
10595 	PERL_CONTEXT * const cx = CX_CUR();
10596 	/* These two fields are not set until an inner lexing scope is
10597 	   entered.  But we need them set here. */
10598 	shared->ls_bufptr  = s;
10599 	shared->ls_linestr = PL_linestr;
10600 
10601         if (PL_lex_inwhat) {
10602             /* Look for a newline.  If the current buffer does not have one,
10603              peek into the line buffer of the parent lexing scope, going
10604              up as many levels as necessary to find one with a newline
10605              after bufptr.
10606             */
10607 	    while (!(s = (char *)memchr(
10608                                 (void *)shared->ls_bufptr, '\n',
10609                                 SvEND(shared->ls_linestr)-shared->ls_bufptr
10610 		)))
10611             {
10612                 shared = shared->ls_prev;
10613                 /* shared is only null if we have gone beyond the outermost
10614                    lexing scope.  In a file, we will have broken out of the
10615                    loop in the previous iteration.  In an eval, the string buf-
10616                    fer ends with "\n;", so the while condition above will have
10617                    evaluated to false.  So shared can never be null.  Or so you
10618                    might think.  Odd syntax errors like s;@{<<; can gobble up
10619                    the implicit semicolon at the end of a flie, causing the
10620                    file handle to be closed even when we are not in a string
10621                    eval.  So shared may be null in that case.
10622                    (Closing '>>}' here to balance the earlier open brace for
10623                    editors that look for matched pairs.) */
10624                 if (UNLIKELY(!shared))
10625                     goto interminable;
10626                 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10627                    most lexing scope.  In a file, shared->ls_linestr at that
10628                    level is just one line, so there is no body to steal. */
10629                 if (infile && !shared->ls_prev) {
10630                     s = olds;
10631                     goto streaming;
10632                 }
10633             }
10634         }
10635 	else {	/* eval or we've already hit EOF */
10636 	    s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10637 	    if (!s)
10638                 goto interminable;
10639 	}
10640 
10641 	linestr = shared->ls_linestr;
10642 	bufend = SvEND(linestr);
10643 	d = s;
10644 	if (indented) {
10645 	    char *myolds = s;
10646 
10647 	    while (s < bufend - len + 1) {
10648 		if (*s++ == '\n')
10649 		    ++PL_parser->herelines;
10650 
10651 		if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10652 		    char *backup = s;
10653 		    indent_len = 0;
10654 
10655 		    /* Only valid if it's preceded by whitespace only */
10656 		    while (backup != myolds && --backup >= myolds) {
10657 			if (! SPACE_OR_TAB(*backup)) {
10658 			    break;
10659 			}
10660 			indent_len++;
10661 		    }
10662 
10663 		    /* No whitespace or all! */
10664 		    if (backup == s || *backup == '\n') {
10665 			Newx(indent, indent_len + 1, char);
10666 			memcpy(indent, backup + 1, indent_len);
10667 			indent[indent_len] = 0;
10668 			s--; /* before our delimiter */
10669 			PL_parser->herelines--; /* this line doesn't count */
10670 			break;
10671 		    }
10672 		}
10673 	    }
10674 	}
10675         else {
10676 	    while (s < bufend - len + 1
10677 	           && memNE(s,PL_tokenbuf,len) )
10678 	    {
10679 		if (*s++ == '\n')
10680 		    ++PL_parser->herelines;
10681 	    }
10682 	}
10683 
10684 	if (s >= bufend - len + 1) {
10685 	    goto interminable;
10686 	}
10687 
10688 	sv_setpvn(tmpstr,d+1,s-d);
10689 	s += len - 1;
10690 	/* the preceding stmt passes a newline */
10691 	PL_parser->herelines++;
10692 
10693 	/* s now points to the newline after the heredoc terminator.
10694 	   d points to the newline before the body of the heredoc.
10695 	 */
10696 
10697 	/* We are going to modify linestr in place here, so set
10698 	   aside copies of the string if necessary for re-evals or
10699 	   (caller $n)[6]. */
10700 	/* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10701 	   check shared->re_eval_str. */
10702 	if (shared->re_eval_start || shared->re_eval_str) {
10703 	    /* Set aside the rest of the regexp */
10704 	    if (!shared->re_eval_str)
10705 		shared->re_eval_str =
10706 		       newSVpvn(shared->re_eval_start,
10707 				bufend - shared->re_eval_start);
10708 	    shared->re_eval_start -= s-d;
10709 	}
10710 
10711 	if (cxstack_ix >= 0
10712             && CxTYPE(cx) == CXt_EVAL
10713             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10714             && cx->blk_eval.cur_text == linestr)
10715         {
10716 	    cx->blk_eval.cur_text = newSVsv(linestr);
10717 	    cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10718 	}
10719 
10720 	/* Copy everything from s onwards back to d. */
10721 	Move(s,d,bufend-s + 1,char);
10722 	SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10723 	/* Setting PL_bufend only applies when we have not dug deeper
10724 	   into other scopes, because sublex_done sets PL_bufend to
10725 	   SvEND(PL_linestr). */
10726 	if (shared == PL_parser->lex_shared)
10727             PL_bufend = SvEND(linestr);
10728 	s = olds;
10729     }
10730     else {
10731         SV *linestr_save;
10732         char *oldbufptr_save;
10733         char *oldoldbufptr_save;
10734       streaming:
10735         SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
10736         term = PL_tokenbuf[1];
10737         len--;
10738         linestr_save = PL_linestr; /* must restore this afterwards */
10739         d = s;			 /* and this */
10740         oldbufptr_save = PL_oldbufptr;
10741         oldoldbufptr_save = PL_oldoldbufptr;
10742         PL_linestr = newSVpvs("");
10743         PL_bufend = SvPVX(PL_linestr);
10744 
10745         while (1) {
10746             PL_bufptr = PL_bufend;
10747             CopLINE_set(PL_curcop,
10748                         origline + 1 + PL_parser->herelines);
10749 
10750             if (   !lex_next_chunk(LEX_NO_TERM)
10751                 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10752             {
10753                 /* Simply freeing linestr_save might seem simpler here, as it
10754                    does not matter what PL_linestr points to, since we are
10755                    about to croak; but in a quote-like op, linestr_save
10756                    will have been prospectively freed already, via
10757                    SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10758                    restore PL_linestr. */
10759                 SvREFCNT_dec_NN(PL_linestr);
10760                 PL_linestr = linestr_save;
10761                 PL_oldbufptr = oldbufptr_save;
10762                 PL_oldoldbufptr = oldoldbufptr_save;
10763                 goto interminable;
10764             }
10765 
10766             CopLINE_set(PL_curcop, origline);
10767 
10768             if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10769                 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10770                 /* ^That should be enough to avoid this needing to grow:  */
10771                 sv_catpvs(PL_linestr, "\n\0");
10772                 assert(s == SvPVX(PL_linestr));
10773                 PL_bufend = SvEND(PL_linestr);
10774             }
10775 
10776             s = PL_bufptr;
10777             PL_parser->herelines++;
10778             PL_last_lop = PL_last_uni = NULL;
10779 
10780 #ifndef PERL_STRICT_CR
10781             if (PL_bufend - PL_linestart >= 2) {
10782                 if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10783                     || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10784                 {
10785                     PL_bufend[-2] = '\n';
10786                     PL_bufend--;
10787                     SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10788                 }
10789                 else if (PL_bufend[-1] == '\r')
10790                     PL_bufend[-1] = '\n';
10791             }
10792             else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10793                 PL_bufend[-1] = '\n';
10794 #endif
10795 
10796             if (indented && (PL_bufend-s) >= len) {
10797                 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10798 
10799                 if (found) {
10800                     char *backup = found;
10801                     indent_len = 0;
10802 
10803                     /* Only valid if it's preceded by whitespace only */
10804                     while (backup != s && --backup >= s) {
10805                         if (! SPACE_OR_TAB(*backup)) {
10806                             break;
10807                         }
10808                         indent_len++;
10809                     }
10810 
10811                     /* All whitespace or none! */
10812                     if (backup == found || SPACE_OR_TAB(*backup)) {
10813                         Newx(indent, indent_len + 1, char);
10814                         memcpy(indent, backup, indent_len);
10815                         indent[indent_len] = 0;
10816                         SvREFCNT_dec(PL_linestr);
10817                         PL_linestr = linestr_save;
10818                         PL_linestart = SvPVX(linestr_save);
10819                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10820                         PL_oldbufptr = oldbufptr_save;
10821                         PL_oldoldbufptr = oldoldbufptr_save;
10822                         s = d;
10823                         break;
10824                     }
10825                 }
10826 
10827                 /* Didn't find it */
10828                 sv_catsv(tmpstr,PL_linestr);
10829             }
10830             else {
10831                 if (*s == term && PL_bufend-s >= len
10832                     && memEQ(s,PL_tokenbuf + 1,len))
10833                 {
10834                     SvREFCNT_dec(PL_linestr);
10835                     PL_linestr = linestr_save;
10836                     PL_linestart = SvPVX(linestr_save);
10837                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10838                     PL_oldbufptr = oldbufptr_save;
10839                     PL_oldoldbufptr = oldoldbufptr_save;
10840                     s = d;
10841                     break;
10842                 }
10843                 else {
10844                     sv_catsv(tmpstr,PL_linestr);
10845                 }
10846             }
10847         } /* while (1) */
10848     }
10849 
10850     PL_multi_end = origline + PL_parser->herelines;
10851 
10852     if (indented && indent) {
10853 	STRLEN linecount = 1;
10854 	STRLEN herelen = SvCUR(tmpstr);
10855 	char *ss = SvPVX(tmpstr);
10856 	char *se = ss + herelen;
10857         SV *newstr = newSV(herelen+1);
10858         SvPOK_on(newstr);
10859 
10860 	/* Trim leading whitespace */
10861 	while (ss < se) {
10862 	    /* newline only? Copy and move on */
10863 	    if (*ss == '\n') {
10864 		sv_catpvs(newstr,"\n");
10865 		ss++;
10866 		linecount++;
10867 
10868 	    /* Found our indentation? Strip it */
10869 	    }
10870             else if (se - ss >= indent_len
10871 	               && memEQ(ss, indent, indent_len))
10872 	    {
10873 		STRLEN le = 0;
10874 		ss += indent_len;
10875 
10876 		while ((ss + le) < se && *(ss + le) != '\n')
10877 		    le++;
10878 
10879 		sv_catpvn(newstr, ss, le);
10880 		ss += le;
10881 
10882 	    /* Line doesn't begin with our indentation? Croak */
10883 	    }
10884             else {
10885                 Safefree(indent);
10886 		Perl_croak(aTHX_
10887 		    "Indentation on line %d of here-doc doesn't match delimiter",
10888 		    (int)linecount
10889 		);
10890 	    }
10891 	} /* while */
10892 
10893         /* avoid sv_setsv() as we dont wan't to COW here */
10894         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10895 	Safefree(indent);
10896 	SvREFCNT_dec_NN(newstr);
10897     }
10898 
10899     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10900 	SvPV_shrink_to_cur(tmpstr);
10901     }
10902 
10903     if (!IN_BYTES) {
10904 	if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10905 	    SvUTF8_on(tmpstr);
10906     }
10907 
10908     PL_lex_stuff = tmpstr;
10909     pl_yylval.ival = op_type;
10910     return s;
10911 
10912   interminable:
10913     if (indent)
10914 	Safefree(indent);
10915     SvREFCNT_dec(tmpstr);
10916     CopLINE_set(PL_curcop, origline);
10917     missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
10918 }
10919 
10920 
10921 /* scan_inputsymbol
10922    takes: position of first '<' in input buffer
10923    returns: position of first char following the matching '>' in
10924 	    input buffer
10925    side-effects: pl_yylval and lex_op are set.
10926 
10927    This code handles:
10928 
10929    <>		read from ARGV
10930    <<>>		read from ARGV without magic open
10931    <FH> 	read from filehandle
10932    <pkg::FH>	read from package qualified filehandle
10933    <pkg'FH>	read from package qualified filehandle
10934    <$fh>	read from filehandle in $fh
10935    <*.h>	filename glob
10936 
10937 */
10938 
10939 STATIC char *
S_scan_inputsymbol(pTHX_ char * start)10940 S_scan_inputsymbol(pTHX_ char *start)
10941 {
10942     char *s = start;		/* current position in buffer */
10943     char *end;
10944     I32 len;
10945     bool nomagicopen = FALSE;
10946     char *d = PL_tokenbuf;					/* start of temp holding space */
10947     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;	/* end of temp holding space */
10948 
10949     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10950 
10951     end = (char *) memchr(s, '\n', PL_bufend - s);
10952     if (!end)
10953 	end = PL_bufend;
10954     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10955         nomagicopen = TRUE;
10956         *d = '\0';
10957         len = 0;
10958         s += 3;
10959     }
10960     else
10961         s = delimcpy(d, e, s + 1, end, '>', &len);	/* extract until > */
10962 
10963     /* die if we didn't have space for the contents of the <>,
10964        or if it didn't end, or if we see a newline
10965     */
10966 
10967     if (len >= (I32)sizeof PL_tokenbuf)
10968 	Perl_croak(aTHX_ "Excessively long <> operator");
10969     if (s >= end)
10970 	Perl_croak(aTHX_ "Unterminated <> operator");
10971 
10972     s++;
10973 
10974     /* check for <$fh>
10975        Remember, only scalar variables are interpreted as filehandles by
10976        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10977        treated as a glob() call.
10978        This code makes use of the fact that except for the $ at the front,
10979        a scalar variable and a filehandle look the same.
10980     */
10981     if (*d == '$' && d[1]) d++;
10982 
10983     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10984     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10985 	d += UTF ? UTF8SKIP(d) : 1;
10986     }
10987 
10988     /* If we've tried to read what we allow filehandles to look like, and
10989        there's still text left, then it must be a glob() and not a getline.
10990        Use scan_str to pull out the stuff between the <> and treat it
10991        as nothing more than a string.
10992     */
10993 
10994     if (d - PL_tokenbuf != len) {
10995 	pl_yylval.ival = OP_GLOB;
10996 	s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10997 	if (!s)
10998 	   Perl_croak(aTHX_ "Glob not terminated");
10999 	return s;
11000     }
11001     else {
11002 	bool readline_overriden = FALSE;
11003 	GV *gv_readline;
11004     	/* we're in a filehandle read situation */
11005 	d = PL_tokenbuf;
11006 
11007 	/* turn <> into <ARGV> */
11008 	if (!len)
11009 	    Copy("ARGV",d,5,char);
11010 
11011 	/* Check whether readline() is overriden */
11012 	if ((gv_readline = gv_override("readline",8)))
11013 	    readline_overriden = TRUE;
11014 
11015 	/* if <$fh>, create the ops to turn the variable into a
11016 	   filehandle
11017 	*/
11018 	if (*d == '$') {
11019 	    /* try to find it in the pad for this block, otherwise find
11020 	       add symbol table ops
11021 	    */
11022 	    const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11023 	    if (tmp != NOT_IN_PAD) {
11024 		if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11025 		    HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11026 		    HEK * const stashname = HvNAME_HEK(stash);
11027 		    SV * const sym = sv_2mortal(newSVhek(stashname));
11028 		    sv_catpvs(sym, "::");
11029 		    sv_catpv(sym, d+1);
11030 		    d = SvPVX(sym);
11031 		    goto intro_sym;
11032 		}
11033 		else {
11034 		    OP * const o = newOP(OP_PADSV, 0);
11035 		    o->op_targ = tmp;
11036 		    PL_lex_op = readline_overriden
11037                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11038 				op_append_elem(OP_LIST, o,
11039 				    newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11040                         : newUNOP(OP_READLINE, 0, o);
11041 		}
11042 	    }
11043 	    else {
11044 		GV *gv;
11045 		++d;
11046               intro_sym:
11047 		gv = gv_fetchpv(d,
11048 				GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11049 				SVt_PV);
11050 		PL_lex_op = readline_overriden
11051                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11052 			    op_append_elem(OP_LIST,
11053 				newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11054 				newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11055                     : newUNOP(OP_READLINE, 0,
11056 			    newUNOP(OP_RV2SV, 0,
11057 				newGVOP(OP_GV, 0, gv)));
11058 	    }
11059 	    /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11060 	    pl_yylval.ival = OP_NULL;
11061 	}
11062 
11063 	/* If it's none of the above, it must be a literal filehandle
11064 	   (<Foo::BAR> or <FOO>) so build a simple readline OP */
11065 	else {
11066 	    GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11067 	    PL_lex_op = readline_overriden
11068                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11069 			op_append_elem(OP_LIST,
11070 			    newGVOP(OP_GV, 0, gv),
11071 			    newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11072                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11073 	    pl_yylval.ival = OP_NULL;
11074 	}
11075     }
11076 
11077     return s;
11078 }
11079 
11080 
11081 /* scan_str
11082    takes:
11083 	start			position in buffer
11084         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
11085                                 only if they are of the open/close form
11086 	keep_delims		preserve the delimiters around the string
11087 	re_reparse		compiling a run-time /(?{})/:
11088 				   collapse // to /,  and skip encoding src
11089 	delimp			if non-null, this is set to the position of
11090 				the closing delimiter, or just after it if
11091 				the closing and opening delimiters differ
11092 				(i.e., the opening delimiter of a substitu-
11093 				tion replacement)
11094    returns: position to continue reading from buffer
11095    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11096    	updates the read buffer.
11097 
11098    This subroutine pulls a string out of the input.  It is called for:
11099    	q		single quotes		q(literal text)
11100 	'		single quotes		'literal text'
11101 	qq		double quotes		qq(interpolate $here please)
11102 	"		double quotes		"interpolate $here please"
11103 	qx		backticks		qx(/bin/ls -l)
11104 	`		backticks		`/bin/ls -l`
11105 	qw		quote words		@EXPORT_OK = qw( func() $spam )
11106 	m//		regexp match		m/this/
11107 	s///		regexp substitute	s/this/that/
11108 	tr///		string transliterate	tr/this/that/
11109 	y///		string transliterate	y/this/that/
11110 	($*@)		sub prototypes		sub foo ($)
11111 	(stuff)		sub attr parameters	sub foo : attr(stuff)
11112 	<>		readline or globs	<FOO>, <>, <$fh>, or <*.c>
11113 
11114    In most of these cases (all but <>, patterns and transliterate)
11115    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
11116    calls scan_str().  s/// makes yylex() call scan_subst() which calls
11117    scan_str().  tr/// and y/// make yylex() call scan_trans() which
11118    calls scan_str().
11119 
11120    It skips whitespace before the string starts, and treats the first
11121    character as the delimiter.  If the delimiter is one of ([{< then
11122    the corresponding "close" character )]}> is used as the closing
11123    delimiter.  It allows quoting of delimiters, and if the string has
11124    balanced delimiters ([{<>}]) it allows nesting.
11125 
11126    On success, the SV with the resulting string is put into lex_stuff or,
11127    if that is already non-NULL, into lex_repl. The second case occurs only
11128    when parsing the RHS of the special constructs s/// and tr/// (y///).
11129    For convenience, the terminating delimiter character is stuffed into
11130    SvIVX of the SV.
11131 */
11132 
11133 char *
Perl_scan_str(pTHX_ char * start,int keep_bracketed_quoted,int keep_delims,int re_reparse,char ** delimp)11134 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11135 		 char **delimp
11136     )
11137 {
11138     SV *sv;			/* scalar value: string */
11139     const char *tmps;		/* temp string, used for delimiter matching */
11140     char *s = start;		/* current position in the buffer */
11141     char term;			/* terminating character */
11142     char *to;			/* current position in the sv's data */
11143     I32 brackets = 1;		/* bracket nesting level */
11144     bool d_is_utf8 = FALSE;	/* is there any utf8 content? */
11145     IV termcode;		/* terminating char. code */
11146     U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
11147     STRLEN termlen;		/* length of terminating string */
11148     line_t herelines;
11149 
11150     /* The delimiters that have a mirror-image closing one */
11151     const char * opening_delims = "([{<";
11152     const char * closing_delims = ")]}>";
11153 
11154     /* The only non-UTF character that isn't a stand alone grapheme is
11155      * white-space, hence can't be a delimiter. */
11156     const char * non_grapheme_msg = "Use of unassigned code point or"
11157                                     " non-standalone grapheme for a delimiter"
11158                                     " is not allowed";
11159     PERL_ARGS_ASSERT_SCAN_STR;
11160 
11161     /* skip space before the delimiter */
11162     if (isSPACE(*s)) {
11163 	s = skipspace(s);
11164     }
11165 
11166     /* mark where we are, in case we need to report errors */
11167     CLINE;
11168 
11169     /* after skipping whitespace, the next character is the terminator */
11170     term = *s;
11171     if (!UTF || UTF8_IS_INVARIANT(term)) {
11172 	termcode = termstr[0] = term;
11173 	termlen = 1;
11174     }
11175     else {
11176 	termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
11177         if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
11178                                            (U8 *) s,
11179                                            (U8 *) PL_bufend,
11180                                                   termcode)))
11181         {
11182             yyerror(non_grapheme_msg);
11183         }
11184 
11185 	Copy(s, termstr, termlen, U8);
11186     }
11187 
11188     /* mark where we are */
11189     PL_multi_start = CopLINE(PL_curcop);
11190     PL_multi_open = termcode;
11191     herelines = PL_parser->herelines;
11192 
11193     /* If the delimiter has a mirror-image closing one, get it */
11194     if (term && (tmps = strchr(opening_delims, term))) {
11195         termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
11196     }
11197 
11198     PL_multi_close = termcode;
11199 
11200     if (PL_multi_open == PL_multi_close) {
11201         keep_bracketed_quoted = FALSE;
11202     }
11203 
11204     /* create a new SV to hold the contents.  79 is the SV's initial length.
11205        What a random number. */
11206     sv = newSV_type(SVt_PVIV);
11207     SvGROW(sv, 80);
11208     SvIV_set(sv, termcode);
11209     (void)SvPOK_only(sv);		/* validate pointer */
11210 
11211     /* move past delimiter and try to read a complete string */
11212     if (keep_delims)
11213 	sv_catpvn(sv, s, termlen);
11214     s += termlen;
11215     for (;;) {
11216     	/* extend sv if need be */
11217 	SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11218 	/* set 'to' to the next character in the sv's string */
11219 	to = SvPVX(sv)+SvCUR(sv);
11220 
11221 	/* if open delimiter is the close delimiter read unbridle */
11222 	if (PL_multi_open == PL_multi_close) {
11223 	    for (; s < PL_bufend; s++,to++) {
11224 	    	/* embedded newlines increment the current line number */
11225 		if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11226 		    COPLINE_INC_WITH_HERELINES;
11227 		/* handle quoted delimiters */
11228 		if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11229 		    if (!keep_bracketed_quoted
11230 		        && (s[1] == term
11231 			    || (re_reparse && s[1] == '\\'))
11232 		    )
11233 			s++;
11234 		    else /* any other quotes are simply copied straight through */
11235 			*to++ = *s++;
11236 		}
11237 		/* terminate when run out of buffer (the for() condition), or
11238 		   have found the terminator */
11239 		else if (*s == term) {  /* First byte of terminator matches */
11240 		    if (termlen == 1)   /* If is the only byte, are done */
11241 			break;
11242 
11243                     /* If the remainder of the terminator matches, also are
11244                      * done, after checking that is a separate grapheme */
11245                     if (   s + termlen <= PL_bufend
11246                         && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
11247                     {
11248                         if (   UTF
11249                             && UNLIKELY(! is_grapheme((U8 *) start,
11250                                                        (U8 *) s,
11251                                                        (U8 *) PL_bufend,
11252                                                               termcode)))
11253                         {
11254                             yyerror(non_grapheme_msg);
11255                         }
11256 			break;
11257                     }
11258 		}
11259 		else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
11260 		    d_is_utf8 = TRUE;
11261                 }
11262 
11263 		*to = *s;
11264 	    }
11265 	}
11266 
11267 	/* if the terminator isn't the same as the start character (e.g.,
11268 	   matched brackets), we have to allow more in the quoting, and
11269 	   be prepared for nested brackets.
11270 	*/
11271 	else {
11272 	    /* read until we run out of string, or we find the terminator */
11273 	    for (; s < PL_bufend; s++,to++) {
11274 	    	/* embedded newlines increment the line count */
11275 		if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11276 		    COPLINE_INC_WITH_HERELINES;
11277 		/* backslashes can escape the open or closing characters */
11278 		if (*s == '\\' && s+1 < PL_bufend) {
11279 		    if (!keep_bracketed_quoted
11280                        && ( ((UV)s[1] == PL_multi_open)
11281                          || ((UV)s[1] == PL_multi_close) ))
11282                     {
11283 			s++;
11284                     }
11285 		    else
11286 			*to++ = *s++;
11287                 }
11288 		/* allow nested opens and closes */
11289 		else if ((UV)*s == PL_multi_close && --brackets <= 0)
11290 		    break;
11291 		else if ((UV)*s == PL_multi_open)
11292 		    brackets++;
11293 		else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11294 		    d_is_utf8 = TRUE;
11295 		*to = *s;
11296 	    }
11297 	}
11298 	/* terminate the copied string and update the sv's end-of-string */
11299 	*to = '\0';
11300 	SvCUR_set(sv, to - SvPVX_const(sv));
11301 
11302 	/*
11303 	 * this next chunk reads more into the buffer if we're not done yet
11304 	 */
11305 
11306   	if (s < PL_bufend)
11307 	    break;		/* handle case where we are done yet :-) */
11308 
11309 #ifndef PERL_STRICT_CR
11310 	if (to - SvPVX_const(sv) >= 2) {
11311 	    if (   (to[-2] == '\r' && to[-1] == '\n')
11312                 || (to[-2] == '\n' && to[-1] == '\r'))
11313 	    {
11314 		to[-2] = '\n';
11315 		to--;
11316 		SvCUR_set(sv, to - SvPVX_const(sv));
11317 	    }
11318 	    else if (to[-1] == '\r')
11319 		to[-1] = '\n';
11320 	}
11321 	else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11322 	    to[-1] = '\n';
11323 #endif
11324 
11325 	/* if we're out of file, or a read fails, bail and reset the current
11326 	   line marker so we can report where the unterminated string began
11327 	*/
11328 	COPLINE_INC_WITH_HERELINES;
11329 	PL_bufptr = PL_bufend;
11330 	if (!lex_next_chunk(0)) {
11331 	    sv_free(sv);
11332 	    CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11333 	    return NULL;
11334 	}
11335 	s = start = PL_bufptr;
11336     }
11337 
11338     /* at this point, we have successfully read the delimited string */
11339 
11340     if (keep_delims)
11341 	    sv_catpvn(sv, s, termlen);
11342     s += termlen;
11343 
11344     if (d_is_utf8)
11345 	SvUTF8_on(sv);
11346 
11347     PL_multi_end = CopLINE(PL_curcop);
11348     CopLINE_set(PL_curcop, PL_multi_start);
11349     PL_parser->herelines = herelines;
11350 
11351     /* if we allocated too much space, give some back */
11352     if (SvCUR(sv) + 5 < SvLEN(sv)) {
11353 	SvLEN_set(sv, SvCUR(sv) + 1);
11354 	SvPV_renew(sv, SvLEN(sv));
11355     }
11356 
11357     /* decide whether this is the first or second quoted string we've read
11358        for this op
11359     */
11360 
11361     if (PL_lex_stuff)
11362 	PL_parser->lex_sub_repl = sv;
11363     else
11364 	PL_lex_stuff = sv;
11365     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
11366     return s;
11367 }
11368 
11369 /*
11370   scan_num
11371   takes: pointer to position in buffer
11372   returns: pointer to new position in buffer
11373   side-effects: builds ops for the constant in pl_yylval.op
11374 
11375   Read a number in any of the formats that Perl accepts:
11376 
11377   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)	12 12.34 12.
11378   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)			.34
11379   0b[01](_?[01])*                                       binary integers
11380   0[0-7](_?[0-7])*                                      octal integers
11381   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
11382   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
11383 
11384   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11385   thing it reads.
11386 
11387   If it reads a number without a decimal point or an exponent, it will
11388   try converting the number to an integer and see if it can do so
11389   without loss of precision.
11390 */
11391 
11392 char *
Perl_scan_num(pTHX_ const char * start,YYSTYPE * lvalp)11393 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11394 {
11395     const char *s = start;	/* current position in buffer */
11396     char *d;			/* destination in temp buffer */
11397     char *e;			/* end of temp buffer */
11398     NV nv;				/* number read, as a double */
11399     SV *sv = NULL;			/* place to put the converted number */
11400     bool floatit;			/* boolean: int or float? */
11401     const char *lastub = NULL;		/* position of last underbar */
11402     static const char* const number_too_long = "Number too long";
11403     bool warned_about_underscore = 0;
11404     I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11405 #define WARN_ABOUT_UNDERSCORE() \
11406 	do { \
11407 	    if (!warned_about_underscore) { \
11408 		warned_about_underscore = 1; \
11409 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11410 			       "Misplaced _ in number"); \
11411 	    } \
11412 	} while(0)
11413     /* Hexadecimal floating point.
11414      *
11415      * In many places (where we have quads and NV is IEEE 754 double)
11416      * we can fit the mantissa bits of a NV into an unsigned quad.
11417      * (Note that UVs might not be quads even when we have quads.)
11418      * This will not work everywhere, though (either no quads, or
11419      * using long doubles), in which case we have to resort to NV,
11420      * which will probably mean horrible loss of precision due to
11421      * multiple fp operations. */
11422     bool hexfp = FALSE;
11423     int total_bits = 0;
11424     int significant_bits = 0;
11425 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11426 #  define HEXFP_UQUAD
11427     Uquad_t hexfp_uquad = 0;
11428     int hexfp_frac_bits = 0;
11429 #else
11430 #  define HEXFP_NV
11431     NV hexfp_nv = 0.0;
11432 #endif
11433     NV hexfp_mult = 1.0;
11434     UV high_non_zero = 0; /* highest digit */
11435     int non_zero_integer_digits = 0;
11436 
11437     PERL_ARGS_ASSERT_SCAN_NUM;
11438 
11439     /* We use the first character to decide what type of number this is */
11440 
11441     switch (*s) {
11442     default:
11443 	Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11444 
11445     /* if it starts with a 0, it could be an octal number, a decimal in
11446        0.13 disguise, or a hexadecimal number, or a binary number. */
11447     case '0':
11448 	{
11449 	  /* variables:
11450 	     u		holds the "number so far"
11451 	     overflowed	was the number more than we can hold?
11452 
11453 	     Shift is used when we add a digit.  It also serves as an "are
11454 	     we in octal/hex/binary?" indicator to disallow hex characters
11455 	     when in octal mode.
11456 	   */
11457 	    NV n = 0.0;
11458 	    UV u = 0;
11459 	    bool overflowed = FALSE;
11460 	    bool just_zero  = TRUE;	/* just plain 0 or binary number? */
11461             bool has_digs = FALSE;
11462 	    static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11463 	    static const char* const bases[5] =
11464 	      { "", "binary", "", "octal", "hexadecimal" };
11465 	    static const char* const Bases[5] =
11466 	      { "", "Binary", "", "Octal", "Hexadecimal" };
11467 	    static const char* const maxima[5] =
11468 	      { "",
11469 		"0b11111111111111111111111111111111",
11470 		"",
11471 		"037777777777",
11472 		"0xffffffff" };
11473 	    const char *base, *Base, *max;
11474 
11475 	    /* check for hex */
11476 	    if (isALPHA_FOLD_EQ(s[1], 'x')) {
11477 		shift = 4;
11478 		s += 2;
11479 		just_zero = FALSE;
11480 	    } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11481 		shift = 1;
11482 		s += 2;
11483 		just_zero = FALSE;
11484 	    }
11485 	    /* check for a decimal in disguise */
11486 	    else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11487 		goto decimal;
11488 	    /* so it must be octal */
11489 	    else {
11490 		shift = 3;
11491 		s++;
11492 	    }
11493 
11494 	    if (*s == '_') {
11495 		WARN_ABOUT_UNDERSCORE();
11496 	       lastub = s++;
11497 	    }
11498 
11499 	    base = bases[shift];
11500 	    Base = Bases[shift];
11501 	    max  = maxima[shift];
11502 
11503 	    /* read the rest of the number */
11504 	    for (;;) {
11505 		/* x is used in the overflow test,
11506 		   b is the digit we're adding on. */
11507 		UV x, b;
11508 
11509 		switch (*s) {
11510 
11511 		/* if we don't mention it, we're done */
11512 		default:
11513 		    goto out;
11514 
11515 		/* _ are ignored -- but warned about if consecutive */
11516 		case '_':
11517 		    if (lastub && s == lastub + 1)
11518 			WARN_ABOUT_UNDERSCORE();
11519 		    lastub = s++;
11520 		    break;
11521 
11522 		/* 8 and 9 are not octal */
11523 		case '8': case '9':
11524 		    if (shift == 3)
11525 			yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11526 		    /* FALLTHROUGH */
11527 
11528 	        /* octal digits */
11529 		case '2': case '3': case '4':
11530 		case '5': case '6': case '7':
11531 		    if (shift == 1)
11532 			yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11533 		    /* FALLTHROUGH */
11534 
11535 		case '0': case '1':
11536 		    b = *s++ & 15;		/* ASCII digit -> value of digit */
11537 		    goto digit;
11538 
11539 	        /* hex digits */
11540 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11541 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11542 		    /* make sure they said 0x */
11543 		    if (shift != 4)
11544 			goto out;
11545 		    b = (*s++ & 7) + 9;
11546 
11547 		    /* Prepare to put the digit we have onto the end
11548 		       of the number so far.  We check for overflows.
11549 		    */
11550 
11551 		  digit:
11552 		    just_zero = FALSE;
11553                     has_digs = TRUE;
11554 		    if (!overflowed) {
11555 			assert(shift >= 0);
11556 			x = u << shift;	/* make room for the digit */
11557 
11558                         total_bits += shift;
11559 
11560 			if ((x >> shift) != u
11561 			    && !(PL_hints & HINT_NEW_BINARY)) {
11562 			    overflowed = TRUE;
11563 			    n = (NV) u;
11564 			    Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11565 					     "Integer overflow in %s number",
11566 					     base);
11567 			} else
11568 			    u = x | b;		/* add the digit to the end */
11569 		    }
11570 		    if (overflowed) {
11571 			n *= nvshift[shift];
11572 			/* If an NV has not enough bits in its
11573 			 * mantissa to represent an UV this summing of
11574 			 * small low-order numbers is a waste of time
11575 			 * (because the NV cannot preserve the
11576 			 * low-order bits anyway): we could just
11577 			 * remember when did we overflow and in the
11578 			 * end just multiply n by the right
11579 			 * amount. */
11580 			n += (NV) b;
11581 		    }
11582 
11583                     if (high_non_zero == 0 && b > 0)
11584                         high_non_zero = b;
11585 
11586                     if (high_non_zero)
11587                         non_zero_integer_digits++;
11588 
11589                     /* this could be hexfp, but peek ahead
11590                      * to avoid matching ".." */
11591                     if (UNLIKELY(HEXFP_PEEK(s))) {
11592                         goto out;
11593                     }
11594 
11595 		    break;
11596 		}
11597 	    }
11598 
11599 	  /* if we get here, we had success: make a scalar value from
11600 	     the number.
11601 	  */
11602 	  out:
11603 
11604 	    /* final misplaced underbar check */
11605 	    if (s[-1] == '_')
11606 		WARN_ABOUT_UNDERSCORE();
11607 
11608             if (UNLIKELY(HEXFP_PEEK(s))) {
11609                 /* Do sloppy (on the underbars) but quick detection
11610                  * (and value construction) for hexfp, the decimal
11611                  * detection will shortly be more thorough with the
11612                  * underbar checks. */
11613                 const char* h = s;
11614                 significant_bits = non_zero_integer_digits * shift;
11615 #ifdef HEXFP_UQUAD
11616                 hexfp_uquad = u;
11617 #else /* HEXFP_NV */
11618                 hexfp_nv = u;
11619 #endif
11620                 /* Ignore the leading zero bits of
11621                  * the high (first) non-zero digit. */
11622                 if (high_non_zero) {
11623                     if (high_non_zero < 0x8)
11624                         significant_bits--;
11625                     if (high_non_zero < 0x4)
11626                         significant_bits--;
11627                     if (high_non_zero < 0x2)
11628                         significant_bits--;
11629                 }
11630 
11631                 if (*h == '.') {
11632 #ifdef HEXFP_NV
11633                     NV nv_mult = 1.0;
11634 #endif
11635                     bool accumulate = TRUE;
11636                     U8 b;
11637                     int lim = 1 << shift;
11638                     for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11639                                *h == '_'); h++) {
11640                         if (isXDIGIT(*h)) {
11641                             significant_bits += shift;
11642 #ifdef HEXFP_UQUAD
11643                             if (accumulate) {
11644                                 if (significant_bits < NV_MANT_DIG) {
11645                                     /* We are in the long "run" of xdigits,
11646                                      * accumulate the full four bits. */
11647 				    assert(shift >= 0);
11648                                     hexfp_uquad <<= shift;
11649                                     hexfp_uquad |= b;
11650                                     hexfp_frac_bits += shift;
11651                                 } else if (significant_bits - shift < NV_MANT_DIG) {
11652                                     /* We are at a hexdigit either at,
11653                                      * or straddling, the edge of mantissa.
11654                                      * We will try grabbing as many as
11655                                      * possible bits. */
11656                                     int tail =
11657                                       significant_bits - NV_MANT_DIG;
11658                                     if (tail <= 0)
11659                                        tail += shift;
11660 				    assert(tail >= 0);
11661                                     hexfp_uquad <<= tail;
11662 				    assert((shift - tail) >= 0);
11663                                     hexfp_uquad |= b >> (shift - tail);
11664                                     hexfp_frac_bits += tail;
11665 
11666                                     /* Ignore the trailing zero bits
11667                                      * of the last non-zero xdigit.
11668                                      *
11669                                      * The assumption here is that if
11670                                      * one has input of e.g. the xdigit
11671                                      * eight (0x8), there is only one
11672                                      * bit being input, not the full
11673                                      * four bits.  Conversely, if one
11674                                      * specifies a zero xdigit, the
11675                                      * assumption is that one really
11676                                      * wants all those bits to be zero. */
11677                                     if (b) {
11678                                         if ((b & 0x1) == 0x0) {
11679                                             significant_bits--;
11680                                             if ((b & 0x2) == 0x0) {
11681                                                 significant_bits--;
11682                                                 if ((b & 0x4) == 0x0) {
11683                                                     significant_bits--;
11684                                                 }
11685                                             }
11686                                         }
11687                                     }
11688 
11689                                     accumulate = FALSE;
11690                                 }
11691                             } else {
11692                                 /* Keep skipping the xdigits, and
11693                                  * accumulating the significant bits,
11694                                  * but do not shift the uquad
11695                                  * (which would catastrophically drop
11696                                  * high-order bits) or accumulate the
11697                                  * xdigits anymore. */
11698                             }
11699 #else /* HEXFP_NV */
11700                             if (accumulate) {
11701                                 nv_mult /= nvshift[shift];
11702                                 if (nv_mult > 0.0)
11703                                     hexfp_nv += b * nv_mult;
11704                                 else
11705                                     accumulate = FALSE;
11706                             }
11707 #endif
11708                         }
11709                         if (significant_bits >= NV_MANT_DIG)
11710                             accumulate = FALSE;
11711                     }
11712                 }
11713 
11714                 if ((total_bits > 0 || significant_bits > 0) &&
11715                     isALPHA_FOLD_EQ(*h, 'p')) {
11716                     bool negexp = FALSE;
11717                     h++;
11718                     if (*h == '+')
11719                         h++;
11720                     else if (*h == '-') {
11721                         negexp = TRUE;
11722                         h++;
11723                     }
11724                     if (isDIGIT(*h)) {
11725                         I32 hexfp_exp = 0;
11726                         while (isDIGIT(*h) || *h == '_') {
11727                             if (isDIGIT(*h)) {
11728                                 hexfp_exp *= 10;
11729                                 hexfp_exp += *h - '0';
11730 #ifdef NV_MIN_EXP
11731                                 if (negexp
11732                                     && -hexfp_exp < NV_MIN_EXP - 1) {
11733                                     /* NOTE: this means that the exponent
11734                                      * underflow warning happens for
11735                                      * the IEEE 754 subnormals (denormals),
11736                                      * because DBL_MIN_EXP etc are the lowest
11737                                      * possible binary (or, rather, DBL_RADIX-base)
11738                                      * exponent for normals, not subnormals.
11739                                      *
11740                                      * This may or may not be a good thing. */
11741                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11742                                                    "Hexadecimal float: exponent underflow");
11743                                     break;
11744                                 }
11745 #endif
11746 #ifdef NV_MAX_EXP
11747                                 if (!negexp
11748                                     && hexfp_exp > NV_MAX_EXP - 1) {
11749                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11750                                                    "Hexadecimal float: exponent overflow");
11751                                     break;
11752                                 }
11753 #endif
11754                             }
11755                             h++;
11756                         }
11757                         if (negexp)
11758                             hexfp_exp = -hexfp_exp;
11759 #ifdef HEXFP_UQUAD
11760                         hexfp_exp -= hexfp_frac_bits;
11761 #endif
11762                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
11763                         hexfp = TRUE;
11764                         goto decimal;
11765                     }
11766                 }
11767             }
11768 
11769             if (shift != 3 && !has_digs) {
11770                 /* 0x or 0b with no digits, treat it as an error.
11771                    Originally this backed up the parse before the b or
11772                    x, but that has the potential for silent changes in
11773                    behaviour, like for: "0x.3" and "0x+$foo".
11774                 */
11775                 const char *d = s;
11776                 char *oldbp = PL_bufptr;
11777                 if (*d) ++d; /* so the user sees the bad non-digit */
11778                 PL_bufptr = (char *)d; /* so yyerror reports the context */
11779                 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
11780                                   shift == 4 ? "hexadecimal" : "binary"));
11781                 PL_bufptr = oldbp;
11782             }
11783 
11784 	    if (overflowed) {
11785 		if (n > 4294967295.0)
11786 		    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11787 				   "%s number > %s non-portable",
11788 				   Base, max);
11789 		sv = newSVnv(n);
11790 	    }
11791 	    else {
11792 #if UVSIZE > 4
11793 		if (u > 0xffffffff)
11794 		    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11795 				   "%s number > %s non-portable",
11796 				   Base, max);
11797 #endif
11798 		sv = newSVuv(u);
11799 	    }
11800 	    if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11801 		sv = new_constant(start, s - start, "integer",
11802 				  sv, NULL, NULL, 0, NULL);
11803 	    else if (PL_hints & HINT_NEW_BINARY)
11804 		sv = new_constant(start, s - start, "binary",
11805                                   sv, NULL, NULL, 0, NULL);
11806 	}
11807 	break;
11808 
11809     /*
11810       handle decimal numbers.
11811       we're also sent here when we read a 0 as the first digit
11812     */
11813     case '1': case '2': case '3': case '4': case '5':
11814     case '6': case '7': case '8': case '9': case '.':
11815       decimal:
11816 	d = PL_tokenbuf;
11817 	e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11818         floatit = FALSE;
11819         if (hexfp) {
11820             floatit = TRUE;
11821             *d++ = '0';
11822             switch (shift) {
11823             case 4:
11824                 *d++ = 'x';
11825                 s = start + 2;
11826                 break;
11827             case 3:
11828                 s = start + 1;
11829                 break;
11830             case 1:
11831                 *d++ = 'b';
11832                 s = start + 2;
11833                 break;
11834             default:
11835                 NOT_REACHED; /* NOTREACHED */
11836             }
11837         }
11838 
11839 	/* read next group of digits and _ and copy into d */
11840 	while (isDIGIT(*s)
11841                || *s == '_'
11842                || UNLIKELY(hexfp && isXDIGIT(*s)))
11843         {
11844 	    /* skip underscores, checking for misplaced ones
11845 	       if -w is on
11846 	    */
11847 	    if (*s == '_') {
11848 		if (lastub && s == lastub + 1)
11849 		    WARN_ABOUT_UNDERSCORE();
11850 		lastub = s++;
11851 	    }
11852 	    else {
11853 	        /* check for end of fixed-length buffer */
11854 		if (d >= e)
11855 		    Perl_croak(aTHX_ "%s", number_too_long);
11856 		/* if we're ok, copy the character */
11857 		*d++ = *s++;
11858 	    }
11859 	}
11860 
11861 	/* final misplaced underbar check */
11862 	if (lastub && s == lastub + 1)
11863 	    WARN_ABOUT_UNDERSCORE();
11864 
11865 	/* read a decimal portion if there is one.  avoid
11866 	   3..5 being interpreted as the number 3. followed
11867 	   by .5
11868 	*/
11869 	if (*s == '.' && s[1] != '.') {
11870 	    floatit = TRUE;
11871 	    *d++ = *s++;
11872 
11873 	    if (*s == '_') {
11874 		WARN_ABOUT_UNDERSCORE();
11875 		lastub = s;
11876 	    }
11877 
11878 	    /* copy, ignoring underbars, until we run out of digits.
11879 	    */
11880 	    for (; isDIGIT(*s)
11881                    || *s == '_'
11882                    || UNLIKELY(hexfp && isXDIGIT(*s));
11883                  s++)
11884             {
11885 	        /* fixed length buffer check */
11886 		if (d >= e)
11887 		    Perl_croak(aTHX_ "%s", number_too_long);
11888 		if (*s == '_') {
11889 		   if (lastub && s == lastub + 1)
11890 			WARN_ABOUT_UNDERSCORE();
11891 		   lastub = s;
11892 		}
11893 		else
11894 		    *d++ = *s;
11895 	    }
11896 	    /* fractional part ending in underbar? */
11897 	    if (s[-1] == '_')
11898 		WARN_ABOUT_UNDERSCORE();
11899 	    if (*s == '.' && isDIGIT(s[1])) {
11900 		/* oops, it's really a v-string, but without the "v" */
11901 		s = start;
11902 		goto vstring;
11903 	    }
11904 	}
11905 
11906 	/* read exponent part, if present */
11907 	if ((isALPHA_FOLD_EQ(*s, 'e')
11908               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11909             && memCHRs("+-0123456789_", s[1]))
11910         {
11911             int exp_digits = 0;
11912             const char *save_s = s;
11913             char * save_d = d;
11914 
11915             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11916                ditto for p (hexfloats) */
11917             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11918 		/* At least some Mach atof()s don't grok 'E' */
11919                 *d++ = 'e';
11920             }
11921             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11922                 *d++ = 'p';
11923             }
11924 
11925 	    s++;
11926 
11927 
11928 	    /* stray preinitial _ */
11929 	    if (*s == '_') {
11930 		WARN_ABOUT_UNDERSCORE();
11931 	        lastub = s++;
11932 	    }
11933 
11934 	    /* allow positive or negative exponent */
11935 	    if (*s == '+' || *s == '-')
11936 		*d++ = *s++;
11937 
11938 	    /* stray initial _ */
11939 	    if (*s == '_') {
11940 		WARN_ABOUT_UNDERSCORE();
11941 	        lastub = s++;
11942 	    }
11943 
11944 	    /* read digits of exponent */
11945 	    while (isDIGIT(*s) || *s == '_') {
11946 	        if (isDIGIT(*s)) {
11947                     ++exp_digits;
11948 		    if (d >= e)
11949 		        Perl_croak(aTHX_ "%s", number_too_long);
11950 		    *d++ = *s++;
11951 		}
11952 		else {
11953 		   if (((lastub && s == lastub + 1)
11954                         || (!isDIGIT(s[1]) && s[1] != '_')))
11955 			WARN_ABOUT_UNDERSCORE();
11956 		   lastub = s++;
11957 		}
11958 	    }
11959 
11960             if (!exp_digits) {
11961                 /* no exponent digits, the [eEpP] could be for something else,
11962                  * though in practice we don't get here for p since that's preparsed
11963                  * earlier, and results in only the 0xX being consumed, so behave similarly
11964                  * for decimal floats and consume only the D.DD, leaving the [eE] to the
11965                  * next token.
11966                  */
11967                 s = save_s;
11968                 d = save_d;
11969             }
11970             else {
11971                 floatit = TRUE;
11972             }
11973 	}
11974 
11975 
11976 	/*
11977            We try to do an integer conversion first if no characters
11978            indicating "float" have been found.
11979 	 */
11980 
11981 	if (!floatit) {
11982     	    UV uv;
11983 	    const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11984 
11985             if (flags == IS_NUMBER_IN_UV) {
11986               if (uv <= IV_MAX)
11987 		sv = newSViv(uv); /* Prefer IVs over UVs. */
11988               else
11989 	    	sv = newSVuv(uv);
11990             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11991               if (uv <= (UV) IV_MIN)
11992                 sv = newSViv(-(IV)uv);
11993               else
11994 	    	floatit = TRUE;
11995             } else
11996               floatit = TRUE;
11997         }
11998 	if (floatit) {
11999 	    /* terminate the string */
12000 	    *d = '\0';
12001             if (UNLIKELY(hexfp)) {
12002 #  ifdef NV_MANT_DIG
12003                 if (significant_bits > NV_MANT_DIG)
12004                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12005                                    "Hexadecimal float: mantissa overflow");
12006 #  endif
12007 #ifdef HEXFP_UQUAD
12008                 nv = hexfp_uquad * hexfp_mult;
12009 #else /* HEXFP_NV */
12010                 nv = hexfp_nv * hexfp_mult;
12011 #endif
12012             } else {
12013                 nv = Atof(PL_tokenbuf);
12014             }
12015             sv = newSVnv(nv);
12016 	}
12017 
12018 	if ( floatit
12019 	     ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12020 	    const char *const key = floatit ? "float" : "integer";
12021 	    const STRLEN keylen = floatit ? 5 : 7;
12022 	    sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12023 				key, keylen, sv, NULL, NULL, 0, NULL);
12024 	}
12025 	break;
12026 
12027     /* if it starts with a v, it could be a v-string */
12028     case 'v':
12029     vstring:
12030 		sv = newSV(5); /* preallocate storage space */
12031 		ENTER_with_name("scan_vstring");
12032 		SAVEFREESV(sv);
12033 		s = scan_vstring(s, PL_bufend, sv);
12034 		SvREFCNT_inc_simple_void_NN(sv);
12035 		LEAVE_with_name("scan_vstring");
12036 	break;
12037     }
12038 
12039     /* make the op for the constant and return */
12040 
12041     if (sv)
12042 	lvalp->opval = newSVOP(OP_CONST, 0, sv);
12043     else
12044 	lvalp->opval = NULL;
12045 
12046     return (char *)s;
12047 }
12048 
12049 STATIC char *
S_scan_formline(pTHX_ char * s)12050 S_scan_formline(pTHX_ char *s)
12051 {
12052     SV * const stuff = newSVpvs("");
12053     bool needargs = FALSE;
12054     bool eofmt = FALSE;
12055 
12056     PERL_ARGS_ASSERT_SCAN_FORMLINE;
12057 
12058     while (!needargs) {
12059         char *eol;
12060 	if (*s == '.') {
12061             char *t = s+1;
12062 #ifdef PERL_STRICT_CR
12063 	    while (SPACE_OR_TAB(*t))
12064 		t++;
12065 #else
12066 	    while (SPACE_OR_TAB(*t) || *t == '\r')
12067 		t++;
12068 #endif
12069 	    if (*t == '\n' || t == PL_bufend) {
12070 	        eofmt = TRUE;
12071 		break;
12072             }
12073 	}
12074 	eol = (char *) memchr(s,'\n',PL_bufend-s);
12075 	if (!eol++)
12076 		eol = PL_bufend;
12077 	if (*s != '#') {
12078             char *t;
12079 	    for (t = s; t < eol; t++) {
12080 		if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12081 		    needargs = FALSE;
12082 		    goto enough;	/* ~~ must be first line in formline */
12083 		}
12084 		if (*t == '@' || *t == '^')
12085 		    needargs = TRUE;
12086 	    }
12087 	    if (eol > s) {
12088 	        sv_catpvn(stuff, s, eol-s);
12089 #ifndef PERL_STRICT_CR
12090 		if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12091 		    char *end = SvPVX(stuff) + SvCUR(stuff);
12092 		    end[-2] = '\n';
12093 		    end[-1] = '\0';
12094 		    SvCUR_set(stuff, SvCUR(stuff) - 1);
12095 		}
12096 #endif
12097 	    }
12098 	    else
12099 	      break;
12100 	}
12101 	s = (char*)eol;
12102 	if ((PL_rsfp || PL_parser->filtered)
12103 	 && PL_parser->form_lex_state == LEX_NORMAL) {
12104 	    bool got_some;
12105 	    PL_bufptr = PL_bufend;
12106 	    COPLINE_INC_WITH_HERELINES;
12107 	    got_some = lex_next_chunk(0);
12108 	    CopLINE_dec(PL_curcop);
12109 	    s = PL_bufptr;
12110 	    if (!got_some)
12111 		break;
12112 	}
12113 	incline(s, PL_bufend);
12114     }
12115   enough:
12116     if (!SvCUR(stuff) || needargs)
12117 	PL_lex_state = PL_parser->form_lex_state;
12118     if (SvCUR(stuff)) {
12119 	PL_expect = XSTATE;
12120 	if (needargs) {
12121 	    const char *s2 = s;
12122 	    while (isSPACE(*s2) && *s2 != '\n')
12123 		s2++;
12124 	    if (*s2 == '{') {
12125 		PL_expect = XTERMBLOCK;
12126 		NEXTVAL_NEXTTOKE.ival = 0;
12127 		force_next(DO);
12128 	    }
12129 	    NEXTVAL_NEXTTOKE.ival = 0;
12130 	    force_next(FORMLBRACK);
12131 	}
12132 	if (!IN_BYTES) {
12133 	    if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12134 		SvUTF8_on(stuff);
12135 	}
12136         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12137 	force_next(THING);
12138     }
12139     else {
12140 	SvREFCNT_dec(stuff);
12141 	if (eofmt)
12142 	    PL_lex_formbrack = 0;
12143     }
12144     return s;
12145 }
12146 
12147 I32
Perl_start_subparse(pTHX_ I32 is_format,U32 flags)12148 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12149 {
12150     const I32 oldsavestack_ix = PL_savestack_ix;
12151     CV* const outsidecv = PL_compcv;
12152 
12153     SAVEI32(PL_subline);
12154     save_item(PL_subname);
12155     SAVESPTR(PL_compcv);
12156 
12157     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12158     CvFLAGS(PL_compcv) |= flags;
12159 
12160     PL_subline = CopLINE(PL_curcop);
12161     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12162     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12163     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12164     if (outsidecv && CvPADLIST(outsidecv))
12165 	CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12166 
12167     return oldsavestack_ix;
12168 }
12169 
12170 
12171 /* Do extra initialisation of a CV (typically one just created by
12172  * start_subparse()) if that CV is for a named sub
12173  */
12174 
12175 void
Perl_init_named_cv(pTHX_ CV * cv,OP * nameop)12176 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12177 {
12178     PERL_ARGS_ASSERT_INIT_NAMED_CV;
12179 
12180     if (nameop->op_type == OP_CONST) {
12181         const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12182         if (   strEQ(name, "BEGIN")
12183             || strEQ(name, "END")
12184             || strEQ(name, "INIT")
12185             || strEQ(name, "CHECK")
12186             || strEQ(name, "UNITCHECK")
12187         )
12188           CvSPECIAL_on(cv);
12189     }
12190     else
12191     /* State subs inside anonymous subs need to be
12192      clonable themselves. */
12193     if (   CvANON(CvOUTSIDE(cv))
12194         || CvCLONE(CvOUTSIDE(cv))
12195         || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12196                         CvOUTSIDE(cv)
12197                      ))[nameop->op_targ])
12198     )
12199       CvCLONE_on(cv);
12200 }
12201 
12202 
12203 static int
S_yywarn(pTHX_ const char * const s,U32 flags)12204 S_yywarn(pTHX_ const char *const s, U32 flags)
12205 {
12206     PERL_ARGS_ASSERT_YYWARN;
12207 
12208     PL_in_eval |= EVAL_WARNONLY;
12209     yyerror_pv(s, flags);
12210     return 0;
12211 }
12212 
12213 void
Perl_abort_execution(pTHX_ const char * const msg,const char * const name)12214 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12215 {
12216     PERL_ARGS_ASSERT_ABORT_EXECUTION;
12217 
12218     if (PL_minus_c)
12219         Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12220     else {
12221         Perl_croak(aTHX_
12222                 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12223     }
12224     NOT_REACHED; /* NOTREACHED */
12225 }
12226 
12227 void
Perl_yyquit(pTHX)12228 Perl_yyquit(pTHX)
12229 {
12230     /* Called, after at least one error has been found, to abort the parse now,
12231      * instead of trying to forge ahead */
12232 
12233     yyerror_pvn(NULL, 0, 0);
12234 }
12235 
12236 int
Perl_yyerror(pTHX_ const char * const s)12237 Perl_yyerror(pTHX_ const char *const s)
12238 {
12239     PERL_ARGS_ASSERT_YYERROR;
12240     return yyerror_pvn(s, strlen(s), 0);
12241 }
12242 
12243 int
Perl_yyerror_pv(pTHX_ const char * const s,U32 flags)12244 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12245 {
12246     PERL_ARGS_ASSERT_YYERROR_PV;
12247     return yyerror_pvn(s, strlen(s), flags);
12248 }
12249 
12250 int
Perl_yyerror_pvn(pTHX_ const char * const s,STRLEN len,U32 flags)12251 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12252 {
12253     const char *context = NULL;
12254     int contlen = -1;
12255     SV *msg;
12256     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12257     int yychar  = PL_parser->yychar;
12258 
12259     /* Output error message 's' with length 'len'.  'flags' are SV flags that
12260      * apply.  If the number of errors found is large enough, it abandons
12261      * parsing.  If 's' is NULL, there is no message, and it abandons
12262      * processing unconditionally */
12263 
12264     if (s != NULL) {
12265         if (!yychar || (yychar == ';' && !PL_rsfp))
12266             sv_catpvs(where_sv, "at EOF");
12267         else if (   PL_oldoldbufptr
12268                  && PL_bufptr > PL_oldoldbufptr
12269                  && PL_bufptr - PL_oldoldbufptr < 200
12270                  && PL_oldoldbufptr != PL_oldbufptr
12271                  && PL_oldbufptr != PL_bufptr)
12272         {
12273             /*
12274                     Only for NetWare:
12275                     The code below is removed for NetWare because it
12276                     abends/crashes on NetWare when the script has error such as
12277                     not having the closing quotes like:
12278                         if ($var eq "value)
12279                     Checking of white spaces is anyway done in NetWare code.
12280             */
12281 #ifndef NETWARE
12282             while (isSPACE(*PL_oldoldbufptr))
12283                 PL_oldoldbufptr++;
12284 #endif
12285             context = PL_oldoldbufptr;
12286             contlen = PL_bufptr - PL_oldoldbufptr;
12287         }
12288         else if (  PL_oldbufptr
12289                 && PL_bufptr > PL_oldbufptr
12290                 && PL_bufptr - PL_oldbufptr < 200
12291                 && PL_oldbufptr != PL_bufptr) {
12292             /*
12293                     Only for NetWare:
12294                     The code below is removed for NetWare because it
12295                     abends/crashes on NetWare when the script has error such as
12296                     not having the closing quotes like:
12297                         if ($var eq "value)
12298                     Checking of white spaces is anyway done in NetWare code.
12299             */
12300 #ifndef NETWARE
12301             while (isSPACE(*PL_oldbufptr))
12302                 PL_oldbufptr++;
12303 #endif
12304             context = PL_oldbufptr;
12305             contlen = PL_bufptr - PL_oldbufptr;
12306         }
12307         else if (yychar > 255)
12308             sv_catpvs(where_sv, "next token ???");
12309         else if (yychar == YYEMPTY) {
12310             if (PL_lex_state == LEX_NORMAL)
12311                 sv_catpvs(where_sv, "at end of line");
12312             else if (PL_lex_inpat)
12313                 sv_catpvs(where_sv, "within pattern");
12314             else
12315                 sv_catpvs(where_sv, "within string");
12316         }
12317         else {
12318             sv_catpvs(where_sv, "next char ");
12319             if (yychar < 32)
12320                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12321             else if (isPRINT_LC(yychar)) {
12322                 const char string = yychar;
12323                 sv_catpvn(where_sv, &string, 1);
12324             }
12325             else
12326                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12327         }
12328         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12329         Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12330             OutCopFILE(PL_curcop),
12331             (IV)(PL_parser->preambling == NOLINE
12332                    ? CopLINE(PL_curcop)
12333                    : PL_parser->preambling));
12334         if (context)
12335             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12336                                  UTF8fARG(UTF, contlen, context));
12337         else
12338             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12339         if (   PL_multi_start < PL_multi_end
12340             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12341         {
12342             Perl_sv_catpvf(aTHX_ msg,
12343             "  (Might be a runaway multi-line %c%c string starting on"
12344             " line %" IVdf ")\n",
12345                     (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12346             PL_multi_end = 0;
12347         }
12348         if (PL_in_eval & EVAL_WARNONLY) {
12349             PL_in_eval &= ~EVAL_WARNONLY;
12350             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12351         }
12352         else {
12353             qerror(msg);
12354         }
12355     }
12356     if (s == NULL || PL_error_count >= 10) {
12357         const char * msg = "";
12358         const char * const name = OutCopFILE(PL_curcop);
12359 
12360 	if (PL_in_eval) {
12361             SV * errsv = ERRSV;
12362             if (SvCUR(errsv)) {
12363                 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12364             }
12365         }
12366 
12367         if (s == NULL) {
12368             abort_execution(msg, name);
12369         }
12370         else {
12371             Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12372         }
12373     }
12374     PL_in_my = 0;
12375     PL_in_my_stash = NULL;
12376     return 0;
12377 }
12378 
12379 STATIC char*
S_swallow_bom(pTHX_ U8 * s)12380 S_swallow_bom(pTHX_ U8 *s)
12381 {
12382     const STRLEN slen = SvCUR(PL_linestr);
12383 
12384     PERL_ARGS_ASSERT_SWALLOW_BOM;
12385 
12386     switch (s[0]) {
12387     case 0xFF:
12388 	if (s[1] == 0xFE) {
12389 	    /* UTF-16 little-endian? (or UTF-32LE?) */
12390 	    if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
12391 		/* diag_listed_as: Unsupported script encoding %s */
12392 		Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12393 #ifndef PERL_NO_UTF16_FILTER
12394 #ifdef DEBUGGING
12395 	    if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12396 #endif
12397 	    s += 2;
12398 	    if (PL_bufend > (char*)s) {
12399 		s = add_utf16_textfilter(s, TRUE);
12400 	    }
12401 #else
12402 	    /* diag_listed_as: Unsupported script encoding %s */
12403 	    Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12404 #endif
12405 	}
12406 	break;
12407     case 0xFE:
12408 	if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
12409 #ifndef PERL_NO_UTF16_FILTER
12410 #ifdef DEBUGGING
12411 	    if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12412 #endif
12413 	    s += 2;
12414 	    if (PL_bufend > (char *)s) {
12415 		s = add_utf16_textfilter(s, FALSE);
12416 	    }
12417 #else
12418 	    /* diag_listed_as: Unsupported script encoding %s */
12419 	    Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12420 #endif
12421 	}
12422 	break;
12423     case BOM_UTF8_FIRST_BYTE: {
12424         if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12425 #ifdef DEBUGGING
12426             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12427 #endif
12428             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
12429         }
12430         break;
12431     }
12432     case 0:
12433 	if (slen > 3) {
12434 	     if (s[1] == 0) {
12435 		  if (s[2] == 0xFE && s[3] == 0xFF) {
12436 		       /* UTF-32 big-endian */
12437 		       /* diag_listed_as: Unsupported script encoding %s */
12438 		       Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12439 		  }
12440 	     }
12441 	     else if (s[2] == 0 && s[3] != 0) {
12442 		  /* Leading bytes
12443 		   * 00 xx 00 xx
12444 		   * are a good indicator of UTF-16BE. */
12445 #ifndef PERL_NO_UTF16_FILTER
12446 #ifdef DEBUGGING
12447 		  if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12448 #endif
12449 		  s = add_utf16_textfilter(s, FALSE);
12450 #else
12451 		  /* diag_listed_as: Unsupported script encoding %s */
12452 		  Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12453 #endif
12454 	     }
12455 	}
12456         break;
12457 
12458     default:
12459 	 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12460 		  /* Leading bytes
12461 		   * xx 00 xx 00
12462 		   * are a good indicator of UTF-16LE. */
12463 #ifndef PERL_NO_UTF16_FILTER
12464 #ifdef DEBUGGING
12465 	      if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12466 #endif
12467 	      s = add_utf16_textfilter(s, TRUE);
12468 #else
12469 	      /* diag_listed_as: Unsupported script encoding %s */
12470 	      Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12471 #endif
12472 	 }
12473     }
12474     return (char*)s;
12475 }
12476 
12477 
12478 #ifndef PERL_NO_UTF16_FILTER
12479 static I32
S_utf16_textfilter(pTHX_ int idx,SV * sv,int maxlen)12480 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12481 {
12482     SV *const filter = FILTER_DATA(idx);
12483     /* We re-use this each time round, throwing the contents away before we
12484        return.  */
12485     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12486     SV *const utf8_buffer = filter;
12487     IV status = IoPAGE(filter);
12488     const bool reverse = cBOOL(IoLINES(filter));
12489     I32 retval;
12490 
12491     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12492 
12493     /* As we're automatically added, at the lowest level, and hence only called
12494        from this file, we can be sure that we're not called in block mode. Hence
12495        don't bother writing code to deal with block mode.  */
12496     if (maxlen) {
12497 	Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12498     }
12499     if (status < 0) {
12500 	Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12501     }
12502     DEBUG_P(PerlIO_printf(Perl_debug_log,
12503 			  "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12504 			  FPTR2DPTR(void *, S_utf16_textfilter),
12505 			  reverse ? 'l' : 'b', idx, maxlen, status,
12506 			  (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12507 
12508     while (1) {
12509 	STRLEN chars;
12510 	STRLEN have;
12511 	Size_t newlen;
12512 	U8 *end;
12513 	/* First, look in our buffer of existing UTF-8 data:  */
12514 	char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12515 
12516 	if (nl) {
12517 	    ++nl;
12518 	} else if (status == 0) {
12519 	    /* EOF */
12520 	    IoPAGE(filter) = 0;
12521 	    nl = SvEND(utf8_buffer);
12522 	}
12523 	if (nl) {
12524 	    STRLEN got = nl - SvPVX(utf8_buffer);
12525 	    /* Did we have anything to append?  */
12526 	    retval = got != 0;
12527 	    sv_catpvn(sv, SvPVX(utf8_buffer), got);
12528 	    /* Everything else in this code works just fine if SVp_POK isn't
12529 	       set.  This, however, needs it, and we need it to work, else
12530 	       we loop infinitely because the buffer is never consumed.  */
12531 	    sv_chop(utf8_buffer, nl);
12532 	    break;
12533 	}
12534 
12535 	/* OK, not a complete line there, so need to read some more UTF-16.
12536 	   Read an extra octect if the buffer currently has an odd number. */
12537 	while (1) {
12538 	    if (status <= 0)
12539 		break;
12540 	    if (SvCUR(utf16_buffer) >= 2) {
12541 		/* Location of the high octet of the last complete code point.
12542 		   Gosh, UTF-16 is a pain. All the benefits of variable length,
12543 		   *coupled* with all the benefits of partial reads and
12544 		   endianness.  */
12545 		const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12546 		    + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12547 
12548 		if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12549 		    break;
12550 		}
12551 
12552 		/* We have the first half of a surrogate. Read more.  */
12553 		DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12554 	    }
12555 
12556 	    status = FILTER_READ(idx + 1, utf16_buffer,
12557 				 160 + (SvCUR(utf16_buffer) & 1));
12558 	    DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12559 	    DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12560 	    if (status < 0) {
12561 		/* Error */
12562 		IoPAGE(filter) = status;
12563 		return status;
12564 	    }
12565 	}
12566 
12567         /* 'chars' isn't quite the right name, as code points above 0xFFFF
12568          * require 4 bytes per char */
12569 	chars = SvCUR(utf16_buffer) >> 1;
12570 	have = SvCUR(utf8_buffer);
12571 
12572         /* Assume the worst case size as noted by the functions: twice the
12573          * number of input bytes */
12574 	SvGROW(utf8_buffer, have + chars * 4 + 1);
12575 
12576 	if (reverse) {
12577 	    end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12578 					 (U8*)SvPVX_const(utf8_buffer) + have,
12579 					 chars * 2, &newlen);
12580 	} else {
12581 	    end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12582 				(U8*)SvPVX_const(utf8_buffer) + have,
12583 				chars * 2, &newlen);
12584 	}
12585 	SvCUR_set(utf8_buffer, have + newlen);
12586 	*end = '\0';
12587 
12588 	/* No need to keep this SV "well-formed" with a '\0' after the end, as
12589 	   it's private to us, and utf16_to_utf8{,reversed} take a
12590 	   (pointer,length) pair, rather than a NUL-terminated string.  */
12591 	if(SvCUR(utf16_buffer) & 1) {
12592 	    *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12593 	    SvCUR_set(utf16_buffer, 1);
12594 	} else {
12595 	    SvCUR_set(utf16_buffer, 0);
12596 	}
12597     }
12598     DEBUG_P(PerlIO_printf(Perl_debug_log,
12599 			  "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12600 			  status,
12601 			  (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12602     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12603     return retval;
12604 }
12605 
12606 static U8 *
S_add_utf16_textfilter(pTHX_ U8 * const s,bool reversed)12607 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12608 {
12609     SV *filter = filter_add(S_utf16_textfilter, NULL);
12610 
12611     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12612 
12613     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12614     SvPVCLEAR(filter);
12615     IoLINES(filter) = reversed;
12616     IoPAGE(filter) = 1; /* Not EOF */
12617 
12618     /* Sadly, we have to return a valid pointer, come what may, so we have to
12619        ignore any error return from this.  */
12620     SvCUR_set(PL_linestr, 0);
12621     if (FILTER_READ(0, PL_linestr, 0)) {
12622 	SvUTF8_on(PL_linestr);
12623     } else {
12624 	SvUTF8_on(PL_linestr);
12625     }
12626     PL_bufend = SvEND(PL_linestr);
12627     return (U8*)SvPVX(PL_linestr);
12628 }
12629 #endif
12630 
12631 /*
12632 Returns a pointer to the next character after the parsed
12633 vstring, as well as updating the passed in sv.
12634 
12635 Function must be called like
12636 
12637 	sv = sv_2mortal(newSV(5));
12638 	s = scan_vstring(s,e,sv);
12639 
12640 where s and e are the start and end of the string.
12641 The sv should already be large enough to store the vstring
12642 passed in, for performance reasons.
12643 
12644 This function may croak if fatal warnings are enabled in the
12645 calling scope, hence the sv_2mortal in the example (to prevent
12646 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
12647 sv_2mortal.
12648 
12649 */
12650 
12651 char *
Perl_scan_vstring(pTHX_ const char * s,const char * const e,SV * sv)12652 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12653 {
12654     const char *pos = s;
12655     const char *start = s;
12656 
12657     PERL_ARGS_ASSERT_SCAN_VSTRING;
12658 
12659     if (*pos == 'v') pos++;  /* get past 'v' */
12660     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12661 	pos++;
12662     if ( *pos != '.') {
12663 	/* this may not be a v-string if followed by => */
12664 	const char *next = pos;
12665 	while (next < e && isSPACE(*next))
12666 	    ++next;
12667 	if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12668 	    /* return string not v-string */
12669 	    sv_setpvn(sv,(char *)s,pos-s);
12670 	    return (char *)pos;
12671 	}
12672     }
12673 
12674     if (!isALPHA(*pos)) {
12675 	U8 tmpbuf[UTF8_MAXBYTES+1];
12676 
12677 	if (*s == 'v')
12678 	    s++;  /* get past 'v' */
12679 
12680         SvPVCLEAR(sv);
12681 
12682 	for (;;) {
12683 	    /* this is atoi() that tolerates underscores */
12684 	    U8 *tmpend;
12685 	    UV rev = 0;
12686 	    const char *end = pos;
12687 	    UV mult = 1;
12688 	    while (--end >= s) {
12689 		if (*end != '_') {
12690 		    const UV orev = rev;
12691 		    rev += (*end - '0') * mult;
12692 		    mult *= 10;
12693 		    if (orev > rev)
12694 			/* diag_listed_as: Integer overflow in %s number */
12695 			Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12696 					 "Integer overflow in decimal number");
12697 		}
12698 	    }
12699 
12700 	    /* Append native character for the rev point */
12701 	    tmpend = uvchr_to_utf8(tmpbuf, rev);
12702 	    sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12703 	    if (!UVCHR_IS_INVARIANT(rev))
12704 		 SvUTF8_on(sv);
12705 	    if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12706 		 s = ++pos;
12707 	    else {
12708 		 s = pos;
12709 		 break;
12710 	    }
12711 	    while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12712 		 pos++;
12713 	}
12714 	SvPOK_on(sv);
12715 	sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12716 	SvRMAGICAL_on(sv);
12717     }
12718     return (char *)s;
12719 }
12720 
12721 int
Perl_keyword_plugin_standard(pTHX_ char * keyword_ptr,STRLEN keyword_len,OP ** op_ptr)12722 Perl_keyword_plugin_standard(pTHX_
12723 	char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12724 {
12725     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12726     PERL_UNUSED_CONTEXT;
12727     PERL_UNUSED_ARG(keyword_ptr);
12728     PERL_UNUSED_ARG(keyword_len);
12729     PERL_UNUSED_ARG(op_ptr);
12730     return KEYWORD_PLUGIN_DECLINE;
12731 }
12732 
12733 /*
12734 =for apidoc wrap_keyword_plugin
12735 
12736 Puts a C function into the chain of keyword plugins.  This is the
12737 preferred way to manipulate the L</PL_keyword_plugin> variable.
12738 C<new_plugin> is a pointer to the C function that is to be added to the
12739 keyword plugin chain, and C<old_plugin_p> points to the storage location
12740 where a pointer to the next function in the chain will be stored.  The
12741 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12742 while the value previously stored there is written to C<*old_plugin_p>.
12743 
12744 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12745 to hook keyword parsing may find itself invoked more than once per
12746 process, typically in different threads.  To handle that situation, this
12747 function is idempotent.  The location C<*old_plugin_p> must initially
12748 (once per process) contain a null pointer.  A C variable of static
12749 duration (declared at file scope, typically also marked C<static> to give
12750 it internal linkage) will be implicitly initialised appropriately, if it
12751 does not have an explicit initialiser.  This function will only actually
12752 modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
12753 function is also thread safe on the small scale.  It uses appropriate
12754 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12755 
12756 When this function is called, the function referenced by C<new_plugin>
12757 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12758 In a threading situation, C<new_plugin> may be called immediately, even
12759 before this function has returned.  C<*old_plugin_p> will always be
12760 appropriately set before C<new_plugin> is called.  If C<new_plugin>
12761 decides not to do anything special with the identifier that it is given
12762 (which is the usual case for most calls to a keyword plugin), it must
12763 chain the plugin function referenced by C<*old_plugin_p>.
12764 
12765 Taken all together, XS code to install a keyword plugin should typically
12766 look something like this:
12767 
12768     static Perl_keyword_plugin_t next_keyword_plugin;
12769     static OP *my_keyword_plugin(pTHX_
12770         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12771     {
12772         if (memEQs(keyword_ptr, keyword_len,
12773                    "my_new_keyword")) {
12774             ...
12775         } else {
12776             return next_keyword_plugin(aTHX_
12777                 keyword_ptr, keyword_len, op_ptr);
12778         }
12779     }
12780     BOOT:
12781         wrap_keyword_plugin(my_keyword_plugin,
12782                             &next_keyword_plugin);
12783 
12784 Direct access to L</PL_keyword_plugin> should be avoided.
12785 
12786 =cut
12787 */
12788 
12789 void
Perl_wrap_keyword_plugin(pTHX_ Perl_keyword_plugin_t new_plugin,Perl_keyword_plugin_t * old_plugin_p)12790 Perl_wrap_keyword_plugin(pTHX_
12791     Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12792 {
12793     dVAR;
12794 
12795     PERL_UNUSED_CONTEXT;
12796     PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12797     if (*old_plugin_p) return;
12798     KEYWORD_PLUGIN_MUTEX_LOCK;
12799     if (!*old_plugin_p) {
12800         *old_plugin_p = PL_keyword_plugin;
12801         PL_keyword_plugin = new_plugin;
12802     }
12803     KEYWORD_PLUGIN_MUTEX_UNLOCK;
12804 }
12805 
12806 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12807 static void
S_parse_recdescent(pTHX_ int gramtype,I32 fakeeof)12808 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12809 {
12810     SAVEI32(PL_lex_brackets);
12811     if (PL_lex_brackets > 100)
12812 	Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12813     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12814     SAVEI32(PL_lex_allbrackets);
12815     PL_lex_allbrackets = 0;
12816     SAVEI8(PL_lex_fakeeof);
12817     PL_lex_fakeeof = (U8)fakeeof;
12818     if(yyparse(gramtype) && !PL_parser->error_count)
12819 	qerror(Perl_mess(aTHX_ "Parse error"));
12820 }
12821 
12822 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12823 static OP *
S_parse_recdescent_for_op(pTHX_ int gramtype,I32 fakeeof)12824 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12825 {
12826     OP *o;
12827     ENTER;
12828     SAVEVPTR(PL_eval_root);
12829     PL_eval_root = NULL;
12830     parse_recdescent(gramtype, fakeeof);
12831     o = PL_eval_root;
12832     LEAVE;
12833     return o;
12834 }
12835 
12836 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12837 static OP *
S_parse_expr(pTHX_ I32 fakeeof,U32 flags)12838 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12839 {
12840     OP *exprop;
12841     if (flags & ~PARSE_OPTIONAL)
12842 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12843     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12844     if (!exprop && !(flags & PARSE_OPTIONAL)) {
12845 	if (!PL_parser->error_count)
12846 	    qerror(Perl_mess(aTHX_ "Parse error"));
12847 	exprop = newOP(OP_NULL, 0);
12848     }
12849     return exprop;
12850 }
12851 
12852 /*
12853 =for apidoc parse_arithexpr
12854 
12855 Parse a Perl arithmetic expression.  This may contain operators of precedence
12856 down to the bit shift operators.  The expression must be followed (and thus
12857 terminated) either by a comparison or lower-precedence operator or by
12858 something that would normally terminate an expression such as semicolon.
12859 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12860 otherwise it is mandatory.  It is up to the caller to ensure that the
12861 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12862 the source of the code to be parsed and the lexical context for the
12863 expression.
12864 
12865 The op tree representing the expression is returned.  If an optional
12866 expression is absent, a null pointer is returned, otherwise the pointer
12867 will be non-null.
12868 
12869 If an error occurs in parsing or compilation, in most cases a valid op
12870 tree is returned anyway.  The error is reflected in the parser state,
12871 normally resulting in a single exception at the top level of parsing
12872 which covers all the compilation errors that occurred.  Some compilation
12873 errors, however, will throw an exception immediately.
12874 
12875 =for apidoc Amnh||PARSE_OPTIONAL
12876 
12877 =cut
12878 
12879 */
12880 
12881 OP *
Perl_parse_arithexpr(pTHX_ U32 flags)12882 Perl_parse_arithexpr(pTHX_ U32 flags)
12883 {
12884     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12885 }
12886 
12887 /*
12888 =for apidoc parse_termexpr
12889 
12890 Parse a Perl term expression.  This may contain operators of precedence
12891 down to the assignment operators.  The expression must be followed (and thus
12892 terminated) either by a comma or lower-precedence operator or by
12893 something that would normally terminate an expression such as semicolon.
12894 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12895 otherwise it is mandatory.  It is up to the caller to ensure that the
12896 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12897 the source of the code to be parsed and the lexical context for the
12898 expression.
12899 
12900 The op tree representing the expression is returned.  If an optional
12901 expression is absent, a null pointer is returned, otherwise the pointer
12902 will be non-null.
12903 
12904 If an error occurs in parsing or compilation, in most cases a valid op
12905 tree is returned anyway.  The error is reflected in the parser state,
12906 normally resulting in a single exception at the top level of parsing
12907 which covers all the compilation errors that occurred.  Some compilation
12908 errors, however, will throw an exception immediately.
12909 
12910 =cut
12911 */
12912 
12913 OP *
Perl_parse_termexpr(pTHX_ U32 flags)12914 Perl_parse_termexpr(pTHX_ U32 flags)
12915 {
12916     return parse_expr(LEX_FAKEEOF_COMMA, flags);
12917 }
12918 
12919 /*
12920 =for apidoc parse_listexpr
12921 
12922 Parse a Perl list expression.  This may contain operators of precedence
12923 down to the comma operator.  The expression must be followed (and thus
12924 terminated) either by a low-precedence logic operator such as C<or> or by
12925 something that would normally terminate an expression such as semicolon.
12926 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12927 otherwise it is mandatory.  It is up to the caller to ensure that the
12928 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12929 the source of the code to be parsed and the lexical context for the
12930 expression.
12931 
12932 The op tree representing the expression is returned.  If an optional
12933 expression is absent, a null pointer is returned, otherwise the pointer
12934 will be non-null.
12935 
12936 If an error occurs in parsing or compilation, in most cases a valid op
12937 tree is returned anyway.  The error is reflected in the parser state,
12938 normally resulting in a single exception at the top level of parsing
12939 which covers all the compilation errors that occurred.  Some compilation
12940 errors, however, will throw an exception immediately.
12941 
12942 =cut
12943 */
12944 
12945 OP *
Perl_parse_listexpr(pTHX_ U32 flags)12946 Perl_parse_listexpr(pTHX_ U32 flags)
12947 {
12948     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12949 }
12950 
12951 /*
12952 =for apidoc parse_fullexpr
12953 
12954 Parse a single complete Perl expression.  This allows the full
12955 expression grammar, including the lowest-precedence operators such
12956 as C<or>.  The expression must be followed (and thus terminated) by a
12957 token that an expression would normally be terminated by: end-of-file,
12958 closing bracketing punctuation, semicolon, or one of the keywords that
12959 signals a postfix expression-statement modifier.  If C<flags> has the
12960 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12961 mandatory.  It is up to the caller to ensure that the dynamic parser
12962 state (L</PL_parser> et al) is correctly set to reflect the source of
12963 the code to be parsed and the lexical context for the expression.
12964 
12965 The op tree representing the expression is returned.  If an optional
12966 expression is absent, a null pointer is returned, otherwise the pointer
12967 will be non-null.
12968 
12969 If an error occurs in parsing or compilation, in most cases a valid op
12970 tree is returned anyway.  The error is reflected in the parser state,
12971 normally resulting in a single exception at the top level of parsing
12972 which covers all the compilation errors that occurred.  Some compilation
12973 errors, however, will throw an exception immediately.
12974 
12975 =cut
12976 */
12977 
12978 OP *
Perl_parse_fullexpr(pTHX_ U32 flags)12979 Perl_parse_fullexpr(pTHX_ U32 flags)
12980 {
12981     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12982 }
12983 
12984 /*
12985 =for apidoc parse_block
12986 
12987 Parse a single complete Perl code block.  This consists of an opening
12988 brace, a sequence of statements, and a closing brace.  The block
12989 constitutes a lexical scope, so C<my> variables and various compile-time
12990 effects can be contained within it.  It is up to the caller to ensure
12991 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12992 reflect the source of the code to be parsed and the lexical context for
12993 the statement.
12994 
12995 The op tree representing the code block is returned.  This is always a
12996 real op, never a null pointer.  It will normally be a C<lineseq> list,
12997 including C<nextstate> or equivalent ops.  No ops to construct any kind
12998 of runtime scope are included by virtue of it being a block.
12999 
13000 If an error occurs in parsing or compilation, in most cases a valid op
13001 tree (most likely null) is returned anyway.  The error is reflected in
13002 the parser state, normally resulting in a single exception at the top
13003 level of parsing which covers all the compilation errors that occurred.
13004 Some compilation errors, however, will throw an exception immediately.
13005 
13006 The C<flags> parameter is reserved for future use, and must always
13007 be zero.
13008 
13009 =cut
13010 */
13011 
13012 OP *
Perl_parse_block(pTHX_ U32 flags)13013 Perl_parse_block(pTHX_ U32 flags)
13014 {
13015     if (flags)
13016 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13017     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13018 }
13019 
13020 /*
13021 =for apidoc parse_barestmt
13022 
13023 Parse a single unadorned Perl statement.  This may be a normal imperative
13024 statement or a declaration that has compile-time effect.  It does not
13025 include any label or other affixture.  It is up to the caller to ensure
13026 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13027 reflect the source of the code to be parsed and the lexical context for
13028 the statement.
13029 
13030 The op tree representing the statement is returned.  This may be a
13031 null pointer if the statement is null, for example if it was actually
13032 a subroutine definition (which has compile-time side effects).  If not
13033 null, it will be ops directly implementing the statement, suitable to
13034 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
13035 equivalent op (except for those embedded in a scope contained entirely
13036 within the statement).
13037 
13038 If an error occurs in parsing or compilation, in most cases a valid op
13039 tree (most likely null) is returned anyway.  The error is reflected in
13040 the parser state, normally resulting in a single exception at the top
13041 level of parsing which covers all the compilation errors that occurred.
13042 Some compilation errors, however, will throw an exception immediately.
13043 
13044 The C<flags> parameter is reserved for future use, and must always
13045 be zero.
13046 
13047 =cut
13048 */
13049 
13050 OP *
Perl_parse_barestmt(pTHX_ U32 flags)13051 Perl_parse_barestmt(pTHX_ U32 flags)
13052 {
13053     if (flags)
13054 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13055     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13056 }
13057 
13058 /*
13059 =for apidoc parse_label
13060 
13061 Parse a single label, possibly optional, of the type that may prefix a
13062 Perl statement.  It is up to the caller to ensure that the dynamic parser
13063 state (L</PL_parser> et al) is correctly set to reflect the source of
13064 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13065 label is optional, otherwise it is mandatory.
13066 
13067 The name of the label is returned in the form of a fresh scalar.  If an
13068 optional label is absent, a null pointer is returned.
13069 
13070 If an error occurs in parsing, which can only occur if the label is
13071 mandatory, a valid label is returned anyway.  The error is reflected in
13072 the parser state, normally resulting in a single exception at the top
13073 level of parsing which covers all the compilation errors that occurred.
13074 
13075 =cut
13076 */
13077 
13078 SV *
Perl_parse_label(pTHX_ U32 flags)13079 Perl_parse_label(pTHX_ U32 flags)
13080 {
13081     if (flags & ~PARSE_OPTIONAL)
13082 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13083     if (PL_nexttoke) {
13084 	PL_parser->yychar = yylex();
13085 	if (PL_parser->yychar == LABEL) {
13086 	    SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13087 	    PL_parser->yychar = YYEMPTY;
13088 	    cSVOPx(pl_yylval.opval)->op_sv = NULL;
13089 	    op_free(pl_yylval.opval);
13090 	    return labelsv;
13091 	} else {
13092 	    yyunlex();
13093 	    goto no_label;
13094 	}
13095     } else {
13096 	char *s, *t;
13097 	STRLEN wlen, bufptr_pos;
13098 	lex_read_space(0);
13099 	t = s = PL_bufptr;
13100         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13101 	    goto no_label;
13102 	t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
13103 	if (word_takes_any_delimiter(s, wlen))
13104 	    goto no_label;
13105 	bufptr_pos = s - SvPVX(PL_linestr);
13106 	PL_bufptr = t;
13107 	lex_read_space(LEX_KEEP_PREVIOUS);
13108 	t = PL_bufptr;
13109 	s = SvPVX(PL_linestr) + bufptr_pos;
13110 	if (t[0] == ':' && t[1] != ':') {
13111 	    PL_oldoldbufptr = PL_oldbufptr;
13112 	    PL_oldbufptr = s;
13113 	    PL_bufptr = t+1;
13114 	    return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13115 	} else {
13116 	    PL_bufptr = s;
13117 	    no_label:
13118 	    if (flags & PARSE_OPTIONAL) {
13119 		return NULL;
13120 	    } else {
13121 		qerror(Perl_mess(aTHX_ "Parse error"));
13122 		return newSVpvs("x");
13123 	    }
13124 	}
13125     }
13126 }
13127 
13128 /*
13129 =for apidoc parse_fullstmt
13130 
13131 Parse a single complete Perl statement.  This may be a normal imperative
13132 statement or a declaration that has compile-time effect, and may include
13133 optional labels.  It is up to the caller to ensure that the dynamic
13134 parser state (L</PL_parser> et al) is correctly set to reflect the source
13135 of the code to be parsed and the lexical context for the statement.
13136 
13137 The op tree representing the statement is returned.  This may be a
13138 null pointer if the statement is null, for example if it was actually
13139 a subroutine definition (which has compile-time side effects).  If not
13140 null, it will be the result of a L</newSTATEOP> call, normally including
13141 a C<nextstate> or equivalent op.
13142 
13143 If an error occurs in parsing or compilation, in most cases a valid op
13144 tree (most likely null) is returned anyway.  The error is reflected in
13145 the parser state, normally resulting in a single exception at the top
13146 level of parsing which covers all the compilation errors that occurred.
13147 Some compilation errors, however, will throw an exception immediately.
13148 
13149 The C<flags> parameter is reserved for future use, and must always
13150 be zero.
13151 
13152 =cut
13153 */
13154 
13155 OP *
Perl_parse_fullstmt(pTHX_ U32 flags)13156 Perl_parse_fullstmt(pTHX_ U32 flags)
13157 {
13158     if (flags)
13159 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13160     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13161 }
13162 
13163 /*
13164 =for apidoc parse_stmtseq
13165 
13166 Parse a sequence of zero or more Perl statements.  These may be normal
13167 imperative statements, including optional labels, or declarations
13168 that have compile-time effect, or any mixture thereof.  The statement
13169 sequence ends when a closing brace or end-of-file is encountered in a
13170 place where a new statement could have validly started.  It is up to
13171 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13172 is correctly set to reflect the source of the code to be parsed and the
13173 lexical context for the statements.
13174 
13175 The op tree representing the statement sequence is returned.  This may
13176 be a null pointer if the statements were all null, for example if there
13177 were no statements or if there were only subroutine definitions (which
13178 have compile-time side effects).  If not null, it will be a C<lineseq>
13179 list, normally including C<nextstate> or equivalent ops.
13180 
13181 If an error occurs in parsing or compilation, in most cases a valid op
13182 tree is returned anyway.  The error is reflected in the parser state,
13183 normally resulting in a single exception at the top level of parsing
13184 which covers all the compilation errors that occurred.  Some compilation
13185 errors, however, will throw an exception immediately.
13186 
13187 The C<flags> parameter is reserved for future use, and must always
13188 be zero.
13189 
13190 =cut
13191 */
13192 
13193 OP *
Perl_parse_stmtseq(pTHX_ U32 flags)13194 Perl_parse_stmtseq(pTHX_ U32 flags)
13195 {
13196     OP *stmtseqop;
13197     I32 c;
13198     if (flags)
13199 	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13200     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13201     c = lex_peek_unichar(0);
13202     if (c != -1 && c != /*{*/'}')
13203 	qerror(Perl_mess(aTHX_ "Parse error"));
13204     return stmtseqop;
13205 }
13206 
13207 /*
13208 =for apidoc parse_subsignature
13209 
13210 Parse a subroutine signature declaration. This is the contents of the
13211 parentheses following a named or anonymous subroutine declaration when the
13212 C<signatures> feature is enabled. Note that this function neither expects
13213 nor consumes the opening and closing parentheses around the signature; it
13214 is the caller's job to handle these.
13215 
13216 This function must only be called during parsing of a subroutine; after
13217 L</start_subparse> has been called. It might allocate lexical variables on
13218 the pad for the current subroutine.
13219 
13220 The op tree to unpack the arguments from the stack at runtime is returned.
13221 This op tree should appear at the beginning of the compiled function. The
13222 caller may wish to use L</op_append_list> to build their function body
13223 after it, or splice it together with the body before calling L</newATTRSUB>.
13224 
13225 The C<flags> parameter is reserved for future use, and must always
13226 be zero.
13227 
13228 =cut
13229 */
13230 
13231 OP *
Perl_parse_subsignature(pTHX_ U32 flags)13232 Perl_parse_subsignature(pTHX_ U32 flags)
13233 {
13234     if (flags)
13235         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13236     return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13237 }
13238 
13239 /*
13240  * ex: set ts=8 sts=4 sw=4 et:
13241  */
13242