1 %{
2 /*
3  *  R : A Computer Language for Statistical Data Analysis
4  *  Copyright (C) 1995, 1996, 1997  Robert Gentleman and Ross Ihaka
5  *  Copyright (C) 1997--2021  The R Core Team
6  *  Copyright (C) 2009--2011  Romain Francois
7  *
8  *  This program is free software; you can redistribute it and/or modify
9  *  it under the terms of the GNU General Public License as published by
10  *  the Free Software Foundation; either version 2 of the License, or
11  *  (at your option) any later version.
12  *
13  *  This program is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *  GNU General Public License for more details.
17  *
18  *  You should have received a copy of the GNU General Public License
19  *  along with this program; if not, a copy is available at
20  *  https://www.R-project.org/Licenses/
21  */
22 
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
26 
27 #define R_USE_SIGNALS 1
28 #include "IOStuff.h"		/*-> Defn.h */
29 #include "Fileio.h"
30 #include "Parse.h"
31 #include <R_ext/Print.h>
32 
33 #if !defined(__STDC_ISO_10646__) && (defined(__APPLE__) || defined(__FreeBSD__) || defined(__sun))
34 /* This may not be 100% true (see the comment in rlocale.h),
35    but it seems true in normal locales.
36  */
37 # define __STDC_ISO_10646__
38 #endif
39 
40 /* #define YYDEBUG 1 */
41 #define YYERROR_VERBOSE 1
42 #define PARSE_ERROR_SIZE 256	    /* Parse error messages saved here */
43 #define PARSE_CONTEXT_SIZE 256	    /* Recent parse context kept in a circular buffer */
44 
45 static Rboolean busy = FALSE;
46 static SEXP R_NullSymbol = NULL;
47 
48 static int identifier ;
49 static void incrementId(void);
50 static void initData(void);
51 static void initId(void);
52 static void record_( int, int, int, int, int, int, char* ) ;
53 
54 static void yyerror(const char *);
55 static int yylex();
56 int yyparse(void);
57 
58 static FILE *fp_parse;
59 static int (*ptr_getc)(void);
60 
61 static int	SavedToken;
62 static SEXP	SavedLval;
63 
64 #define yyconst const
65 
66 typedef struct yyltype
67 {
68   int first_line;
69   int first_column;
70   int first_byte;
71 
72   int last_line;
73   int last_column;
74   int last_byte;
75 
76   int first_parsed;
77   int last_parsed;
78 
79   int id;
80 } yyltype;
81 
82 
83 #define INIT_DATA_COUNT 16384    	/* init parser data to this size */
84 #define MAX_DATA_COUNT   65536		/* release it at the end if it is this size or larger*/
85 
86 #define DATA_COUNT  (length( PS_DATA ) / DATA_ROWS)
87 #define ID_COUNT    ((length( PS_IDS ) / 2) - 1)
88 
89 static void finalizeData( ) ;
90 static void growData( ) ;
91 static void growID( int ) ;
92 
93 #define DATA_ROWS 8
94 
95 #define _FIRST_PARSED( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i)     ]
96 #define _FIRST_COLUMN( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 1 ]
97 #define _LAST_PARSED( i )  INTEGER( PS_DATA )[ DATA_ROWS*(i) + 2 ]
98 #define _LAST_COLUMN( i )  INTEGER( PS_DATA )[ DATA_ROWS*(i) + 3 ]
99 #define _TERMINAL( i )     INTEGER( PS_DATA )[ DATA_ROWS*(i) + 4 ]
100 #define _TOKEN( i )        INTEGER( PS_DATA )[ DATA_ROWS*(i) + 5 ]
101 #define _ID( i )           INTEGER( PS_DATA )[ DATA_ROWS*(i) + 6 ]
102 #define _PARENT(i)         INTEGER( PS_DATA )[ DATA_ROWS*(i) + 7 ]
103 
104 #define ID_ID( i )      INTEGER(PS_IDS)[ 2*(i) ]
105 #define ID_PARENT( i )  INTEGER(PS_IDS)[ 2*(i) + 1 ]
106 
107 static void modif_token( yyltype*, int ) ;
108 static void recordParents( int, yyltype*, int) ;
109 
110 static int _current_token ;
111 
112 /**
113  * Records the current non-terminal token expression and gives it an id
114  *
115  * @param loc the location of the expression
116  */
setId(yyltype loc)117 static void setId(yyltype loc){
118     record_(
119 	    (loc).first_parsed, (loc).first_column, (loc).last_parsed, (loc).last_column,
120 	    _current_token, (loc).id, 0 ) ;
121 }
122 
123 # define YYLTYPE yyltype
124 # define YYLLOC_DEFAULT(Current, Rhs, N)				\
125     do	{ 								\
126 	if (N){								\
127 	    (Current).first_line   = YYRHSLOC (Rhs, 1).first_line;	\
128 	    (Current).first_column = YYRHSLOC (Rhs, 1).first_column;	\
129 	    (Current).first_byte   = YYRHSLOC (Rhs, 1).first_byte;	\
130 	    (Current).last_line    = YYRHSLOC (Rhs, N).last_line;	\
131 	    (Current).last_column  = YYRHSLOC (Rhs, N).last_column;	\
132 	    (Current).last_byte    = YYRHSLOC (Rhs, N).last_byte;	\
133 	    (Current).first_parsed = YYRHSLOC (Rhs, 1).first_parsed;    \
134 	    (Current).last_parsed  = YYRHSLOC (Rhs, N).last_parsed;	\
135 	    incrementId( ) ; 						\
136 	    (Current).id = identifier ; 				\
137 	    _current_token = yyr1[yyn] ; 				\
138 	    if (ParseState.keepSrcRefs && ParseState.keepParseData) {	\
139 	        yyltype childs[N];					\
140 	        int ii = 0; 						\
141 	        for(ii=0; ii<N; ii++){					\
142 		      childs[ii] = YYRHSLOC (Rhs, (ii+1) ) ; 		\
143 	        } 							\
144 	        recordParents( identifier, childs, N) ; 		\
145 	    }								\
146 	} else	{							\
147 	  (Current).first_line   = (Current).last_line   =		\
148 	    YYRHSLOC (Rhs, 0).last_line;				\
149 	  (Current).first_parsed   = (Current).last_parsed   =		\
150 	    YYRHSLOC (Rhs, 0).last_parsed;				\
151 	  (Current).first_column = YYRHSLOC (Rhs, 0).last_column;	\
152 	  (Current).last_column = (Current).first_column - 1;		\
153 	  (Current).first_byte = YYRHSLOC (Rhs, 0).last_byte;		\
154 	  (Current).last_byte = (Current).first_byte - 1;		\
155 	  (Current).id = NA_INTEGER;                                    \
156 	} 								\
157     } while (0)
158 
159 
160 # define YY_LOCATION_PRINT(File,Loc)					\
161  fprintf ( File, "%d.%d.%d-%d.%d.%d (%d)",				\
162  	(Loc).first_line, (Loc).first_column,	(Loc).first_byte, 	\
163  	(Loc).last_line,  (Loc).last_column, 	(Loc).last_byte, 	\
164 	(Loc).id )
165 
166 /* Useful defines so editors don't get confused ... */
167 
168 #define LBRACE	'{'
169 #define RBRACE	'}'
170 
171 /* Functions used in the parsing process */
172 
173 static void	CheckFormalArgs(SEXP, SEXP, YYLTYPE *);
174 static SEXP	FirstArg(SEXP, SEXP); /* create list with one element */
175 static void 	GrowList(SEXP, SEXP); /* add element to list end */
176 
177 static void	SetSingleSrcRef(SEXP);
178 static void	AppendToSrcRefs(SEXP);
179 static void	PrependToSrcRefs(SEXP);
180 static SEXP	SrcRefsToVectorList();
181 
182 static void	IfPush(void);
183 static int	KeywordLookup(const char *);
184 static SEXP	NewList(void);
185 static void	NextArg(SEXP, SEXP, SEXP); /* add named element to list end */
186 static SEXP	TagArg(SEXP, SEXP, YYLTYPE *);
187 static int 	processLineDirective();
188 
189 static SEXP R_PipeBindSymbol = NULL;
190 
191 /* These routines allocate constants */
192 
193 static SEXP	mkComplex(const char *);
194 SEXP		mkFalse(void);
195 static SEXP     mkFloat(const char *);
196 static SEXP 	mkInt(const char *);
197 static SEXP	mkNA(void);
198 SEXP		mkTrue(void);
199 
200 /* Internal lexer / parser state variables */
201 
202 static int	EatLines = 0;
203 static int	GenerateCode = 0;
204 static int	EndOfFile = 0;
205 static int	xxgetc();
206 static int	xxungetc(int);
207 static int	xxcharcount, xxcharsave;
208 static int	xxlinesave, xxbytesave, xxcolsave, xxparsesave;
209 
210 static SrcRefState ParseState;
211 
212 #define PS_SET_SRCREFS(x)   SET_VECTOR_ELT(ParseState.sexps, 0, (x))
213 #define PS_SET_SRCFILE(x)   SET_VECTOR_ELT(ParseState.sexps, 1, (x))
214 #define PS_SET_ORIGINAL(x)  SET_VECTOR_ELT(ParseState.sexps, 2, (x))
215 
216 /* direct pointer to data is kept for performance of finalizeData() */
217 #define PS_SET_DATA(x)      do {                \
218     SEXP __x__ = (x);                           \
219     SET_VECTOR_ELT(ParseState.sexps, 3, __x__); \
220     ParseState.data = __x__;                    \
221 } while(0);
222 
223 #define PS_SET_TEXT(x)      SET_VECTOR_ELT(ParseState.sexps, 4, (x))
224 #define PS_SET_IDS(x)       SET_VECTOR_ELT(ParseState.sexps, 5, (x))
225 #define PS_SET_SVS(x)       SET_VECTOR_ELT(ParseState.sexps, 6, (x))
226 
227 #define PS_SRCREFS          VECTOR_ELT(ParseState.sexps, 0)
228 #define PS_SRCFILE          VECTOR_ELT(ParseState.sexps, 1)
229 #define PS_ORIGINAL         VECTOR_ELT(ParseState.sexps, 2)
230 #define PS_DATA             ParseState.data
231 #define PS_TEXT             VECTOR_ELT(ParseState.sexps, 4)
232 #define PS_IDS              VECTOR_ELT(ParseState.sexps, 5)
233 #define PS_SVS              VECTOR_ELT(ParseState.sexps, 6)
234 
235 /* Memory protection in the parser
236 
237    The generated code of the parser keeps semantic values (SEXPs) on its
238    semantic values stack. Values are added to the stack during shift and
239    reduce operations and are removed during reduce operations or error
240    handling. Values are created by the lexer before they are added to the
241    stack. Values are also held in a local SEXP variable once removed from
242    the stack but still needed. The stack is automatically expanded on demand.
243 
244    For memory protection, it would be natural to have that stack on the R heap
245    and to use PROTECT/UNPROTECT to protect values in local SEXP variables.
246    Unfortunately, bison does not seem to be customizable enough to allow this.
247 
248    Hence, semantic values, when created by the lexer or reduce operations, are
249    placed on parser state precious multi-set via PRESERVE_SV. They are removed
250    from the multi-set in reduce operations using RELEASE_SV, because by design
251    of the bison parsers such values are subsequently removed from the stack.
252    They are also automatically removed when the parsing finishes, including
253    parser error (also on R error, via the context on-end action).
254 
255    Previously semantic values were protected via PROTECT/UNPROTECT_PTR with
256    similar semantics but using protect stack shared with PROTECT/UNPROTECT.
257    Using a separate precious multi-set is safe even with interleaving of the
258    two protection schemes.
259 */
260 
261 #define INIT_SVS()     PS_SET_SVS(R_NewPreciousMSet(200))
262 #define PRESERVE_SV(x) R_PreserveInMSet((x), PS_SVS)
263 #define RELEASE_SV(x)  R_ReleaseFromMSet((x), PS_SVS)
264 #define CLEAR_SVS()    R_ReleaseMSet(PS_SVS, 500)
265 
266 /* Memory leak
267 
268    yyparse(), as generated by bison, allocates extra space for the parser
269    stack using malloc(). Unfortunately this means that there is a memory
270    leak in case of an R error (long-jump). In principle, we could define
271    yyoverflow() to relocate the parser stacks for bison and allocate say on
272    the R heap, but yyoverflow() is undocumented and somewhat complicated
273    (we would have to replicate some macros from the generated parser here).
274    The same problem exists at least in the Rd and LaTeX parsers in tools.
275 */
276 
277 #include <rlocale.h>
278 #ifdef HAVE_LANGINFO_CODESET
279 # include <langinfo.h>
280 #endif
281 
mbcs_get_next(int c,wchar_t * wc)282 static int mbcs_get_next(int c, wchar_t *wc)
283 {
284     int i, res, clen = 1; char s[9];
285     mbstate_t mb_st;
286 
287     s[0] = (char) c;
288     /* This assumes (probably OK) that all MBCS embed ASCII as single-byte
289        lead bytes, including control chars */
290     if((unsigned int) c < 0x80) {
291 	*wc = (wchar_t) c;
292 	return 1;
293     }
294     if(utf8locale) {
295 	clen = utf8clen((char) c);
296 	for(i = 1; i < clen; i++) {
297 	    c = xxgetc();
298 	    if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno);
299 	    s[i] = (char) c;
300 	}
301 	s[clen] ='\0'; /* x86 Solaris requires this */
302 	res = (int) mbrtowc(wc, s, clen, NULL);
303 	if(res == -1) error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno);
304     } else {
305 	/* This is not necessarily correct for stateful MBCS */
306 	while(clen <= R_MB_CUR_MAX) {
307 	    mbs_init(&mb_st);
308 	    res = (int) mbrtowc(wc, s, clen, &mb_st);
309 	    if(res >= 0) break;
310 	    if(res == -1)
311 		error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno);
312 	    /* so res == -2 */
313 	    c = xxgetc();
314 	    if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno);
315 	    s[clen++] = (char) c;
316 	} /* we've tried enough, so must be complete or invalid by now */
317     }
318     for(i = clen - 1; i > 0; i--) xxungetc(s[i]);
319     return clen;
320 }
321 
322 /* Soon to be defunct entry points */
323 
324 void		R_SetInput(int);
325 int		R_fgetc(FILE*);
326 
327 /* Routines used to build the parse tree */
328 
329 static SEXP	xxnullformal(void);
330 static SEXP	xxfirstformal0(SEXP);
331 static SEXP	xxfirstformal1(SEXP, SEXP);
332 static SEXP	xxaddformal0(SEXP, SEXP, YYLTYPE *);
333 static SEXP	xxaddformal1(SEXP, SEXP, SEXP, YYLTYPE *);
334 static SEXP	xxexprlist0();
335 static SEXP	xxexprlist1(SEXP, YYLTYPE *);
336 static SEXP	xxexprlist2(SEXP, SEXP, YYLTYPE *);
337 static SEXP	xxsub0(void);
338 static SEXP	xxsub1(SEXP, YYLTYPE *);
339 static SEXP	xxsymsub0(SEXP, YYLTYPE *);
340 static SEXP	xxsymsub1(SEXP, SEXP, YYLTYPE *);
341 static SEXP	xxnullsub0(YYLTYPE *);
342 static SEXP	xxnullsub1(SEXP, YYLTYPE *);
343 static SEXP	xxsublist1(SEXP);
344 static SEXP	xxsublist2(SEXP, SEXP);
345 static SEXP	xxcond(SEXP);
346 static SEXP	xxifcond(SEXP);
347 static SEXP	xxif(SEXP, SEXP, SEXP);
348 static SEXP	xxifelse(SEXP, SEXP, SEXP, SEXP);
349 static SEXP	xxforcond(SEXP, SEXP);
350 static SEXP	xxfor(SEXP, SEXP, SEXP);
351 static SEXP	xxwhile(SEXP, SEXP, SEXP);
352 static SEXP	xxrepeat(SEXP, SEXP);
353 static SEXP	xxnxtbrk(SEXP);
354 static SEXP	xxfuncall(SEXP, SEXP);
355 static SEXP	xxdefun(SEXP, SEXP, SEXP, YYLTYPE *);
356 static SEXP	xxpipe(SEXP, SEXP);
357 static SEXP	xxpipebind(SEXP, SEXP, SEXP);
358 static SEXP	xxunary(SEXP, SEXP);
359 static SEXP	xxbinary(SEXP, SEXP, SEXP);
360 static SEXP	xxparen(SEXP, SEXP);
361 static SEXP	xxsubscript(SEXP, SEXP, SEXP);
362 static SEXP	xxexprlist(SEXP, YYLTYPE *, SEXP);
363 static int	xxvalue(SEXP, int, YYLTYPE *);
364 
365 #define YYSTYPE		SEXP
366 
367 %}
368 
369 %token-table
370 
371 %token		END_OF_INPUT ERROR
372 %token		STR_CONST NUM_CONST NULL_CONST SYMBOL FUNCTION
373 %token		INCOMPLETE_STRING
374 %token		LEFT_ASSIGN EQ_ASSIGN RIGHT_ASSIGN LBB
375 %token		FOR IN IF ELSE WHILE NEXT BREAK REPEAT
376 %token		GT GE LT LE EQ NE AND OR AND2 OR2
377 %token		NS_GET NS_GET_INT
378 %token		COMMENT LINE_DIRECTIVE
379 %token		SYMBOL_FORMALS
380 %token		EQ_FORMALS
381 %token		EQ_SUB SYMBOL_SUB
382 %token		SYMBOL_FUNCTION_CALL
383 %token		SYMBOL_PACKAGE
384 /* no longer used: %token COLON_ASSIGN */
385 %token		SLOT
386 %token		PIPE
387 %token          PIPEBIND
388 
389 /* This is the precedence table, low to high */
390 %left		'?'
391 %left		LOW WHILE FOR REPEAT
392 %right		IF
393 %left		ELSE
394 %right		LEFT_ASSIGN
395 %right		EQ_ASSIGN
396 %left		RIGHT_ASSIGN
397 %left		'~' TILDE
398 %left		OR OR2
399 %left		AND AND2
400 %left		UNOT NOT
401 %nonassoc   	GT GE LT LE EQ NE
402 %left		'+' '-'
403 %left		'*' '/'
404 %left		SPECIAL PIPE
405 %left		PIPEBIND
406 %left		':'
407 %left		UMINUS UPLUS
408 %right		'^'
409 %left		'$' '@'
410 %left		NS_GET NS_GET_INT
411 %nonassoc	'(' '[' LBB
412 
413 %%
414 
415 prog	:	END_OF_INPUT			{ YYACCEPT; }
416 	|	'\n'				{ yyresult = xxvalue(NULL,2,NULL);	goto yyreturn; }
417 	|	expr_or_assign_or_help '\n'	{ yyresult = xxvalue($1,3,&@1);	goto yyreturn; }
418 	|	expr_or_assign_or_help ';'	{ yyresult = xxvalue($1,4,&@1);	goto yyreturn; }
419 	|	error	 			{ YYABORT; }
420 	;
421 
422 expr_or_assign_or_help  :    expr               { $$ = $1; }
423                 |    expr_or_assign_or_help EQ_ASSIGN expr_or_assign_or_help    { $$ = xxbinary($2,$1,$3); setId(@$); }
424                 |    expr_or_assign_or_help '?'  expr_or_assign_or_help		{ $$ = xxbinary($2,$1,$3); setId(@$); }
425                 ;
426 
427 expr_or_help  :    expr				    { $$ = $1; }
428 	      |    expr_or_help '?' expr_or_help    { $$ = xxbinary($2,$1,$3); setId(@$); }
429               ;
430 
431 expr	: 	NUM_CONST			{ $$ = $1;	setId(@$); }
432 	|	STR_CONST			{ $$ = $1;	setId(@$); }
433 	|	NULL_CONST			{ $$ = $1;	setId(@$); }
434 	|	SYMBOL				{ $$ = $1;	setId(@$); }
435 
436 	|	'{' exprlist '}'		{ $$ = xxexprlist($1,&@1,$2); setId(@$); }
437 	|	'(' expr_or_assign_or_help ')'	{ $$ = xxparen($1,$2);	setId(@$); }
438 
439 	|	'-' expr %prec UMINUS		{ $$ = xxunary($1,$2);	setId(@$); }
440 	|	'+' expr %prec UMINUS		{ $$ = xxunary($1,$2);	setId(@$); }
441 	|	'!' expr %prec UNOT		{ $$ = xxunary($1,$2);	setId(@$); }
442 	|	'~' expr %prec TILDE		{ $$ = xxunary($1,$2);	setId(@$); }
443 	|	'?' expr_or_assign_or_help	{ $$ = xxunary($1,$2);	setId(@$); }
444 
445 	|	expr ':'  expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
446 	|	expr '+'  expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
447 	|	expr '-' expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
448 	|	expr '*' expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
449 	|	expr '/' expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
450 	|	expr '^' expr 			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
451 	|	expr SPECIAL expr		{ $$ = xxbinary($2,$1,$3);	setId(@$); }
452 	|	expr '~' expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
453 	|	expr LT expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
454 	|	expr LE expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
455 	|	expr EQ expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
456 	|	expr NE expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
457 	|	expr GE expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
458 	|	expr GT expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
459 	|	expr AND expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
460 	|	expr OR expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
461 	|	expr AND2 expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
462 	|	expr OR2 expr			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
463 	|	expr PIPE expr			{ $$ = xxpipe($1,$3);  setId(@$); }
464 	|	expr PIPEBIND expr		{ $$ = xxpipebind($2,$1,$3);	setId(@$); }
465 	|	expr LEFT_ASSIGN expr 		{ $$ = xxbinary($2,$1,$3);	setId(@$); }
466 	|	expr RIGHT_ASSIGN expr 		{ $$ = xxbinary($2,$3,$1);	setId(@$); }
467 	|	FUNCTION '(' formlist ')' cr expr_or_assign_or_help %prec LOW
468 						{ $$ = xxdefun($1,$3,$6,&@$); 	setId(@$); }
469 	|	'\\' '(' formlist ')' cr expr_or_assign_or_help %prec LOW							{ $$ = xxdefun(R_FunctionSymbol,$3,$6,&@$); 	setId(@$); }
470 	|	expr '(' sublist ')'		{ $$ = xxfuncall($1,$3);  setId(@$); modif_token( &@1, SYMBOL_FUNCTION_CALL ) ; }
471 	|	IF ifcond expr_or_assign_or_help 	{ $$ = xxif($1,$2,$3);	setId(@$); }
472 	|	IF ifcond expr_or_assign_or_help ELSE expr_or_assign_or_help	{ $$ = xxifelse($1,$2,$3,$5);	setId(@$); }
473 	|	FOR forcond expr_or_assign_or_help %prec FOR	{ $$ = xxfor($1,$2,$3);	setId(@$); }
474 	|	WHILE cond expr_or_assign_or_help   { $$ = xxwhile($1,$2,$3);	setId(@$); }
475 	|	REPEAT expr_or_assign_or_help	    { $$ = xxrepeat($1,$2);	setId(@$); }
476 	|	expr LBB sublist ']' ']'	{ $$ = xxsubscript($1,$2,$3);	setId(@$); }
477 	|	expr '[' sublist ']'		{ $$ = xxsubscript($1,$2,$3);	setId(@$); }
478 	|	SYMBOL NS_GET SYMBOL		{ $$ = xxbinary($2,$1,$3);      setId(@$); modif_token( &@1, SYMBOL_PACKAGE ) ; }
479 	|	SYMBOL NS_GET STR_CONST		{ $$ = xxbinary($2,$1,$3);      setId(@$); modif_token( &@1, SYMBOL_PACKAGE ) ; }
480 	|	STR_CONST NS_GET SYMBOL		{ $$ = xxbinary($2,$1,$3);	setId(@$); }
481 	|	STR_CONST NS_GET STR_CONST	{ $$ = xxbinary($2,$1,$3);	setId(@$); }
482 	|	SYMBOL NS_GET_INT SYMBOL	{ $$ = xxbinary($2,$1,$3);      setId(@$); modif_token( &@1, SYMBOL_PACKAGE ) ;}
483 	|	SYMBOL NS_GET_INT STR_CONST	{ $$ = xxbinary($2,$1,$3);      setId(@$); modif_token( &@1, SYMBOL_PACKAGE ) ;}
484 	|	STR_CONST NS_GET_INT SYMBOL	{ $$ = xxbinary($2,$1,$3);	setId(@$); }
485 	|	STR_CONST NS_GET_INT STR_CONST	{ $$ = xxbinary($2,$1,$3);	setId(@$); }
486 	|	expr '$' SYMBOL			{ $$ = xxbinary($2,$1,$3);	setId(@$); }
487 	|	expr '$' STR_CONST		{ $$ = xxbinary($2,$1,$3);	setId(@$); }
488 	|	expr '@' SYMBOL			{ $$ = xxbinary($2,$1,$3);      setId(@$); modif_token( &@3, SLOT ) ; }
489 	|	expr '@' STR_CONST		{ $$ = xxbinary($2,$1,$3);	setId(@$); }
490 	|	NEXT				{ $$ = xxnxtbrk($1);	setId(@$); }
491 	|	BREAK				{ $$ = xxnxtbrk($1);	setId(@$); }
492 	;
493 
494 
495 cond	:	'(' expr_or_help ')'			{ $$ = xxcond($2);   }
496 	;
497 
498 ifcond	:	'(' expr_or_help ')'			{ $$ = xxifcond($2); }
499 	;
500 
501 forcond :	'(' SYMBOL IN expr_or_help ')' 		{ $$ = xxforcond($2,$4);	setId(@$); }
502 	;
503 
504 
505 exprlist:					{ $$ = xxexprlist0();	setId(@$); }
506 	|	expr_or_assign_or_help			{ $$ = xxexprlist1($1, &@1); }
507 	|	exprlist ';' expr_or_assign_or_help	{ $$ = xxexprlist2($1, $3, &@3); }
508 	|	exprlist ';'			{ $$ = $1;		setId(@$); }
509 	|	exprlist '\n' expr_or_assign_or_help	{ $$ = xxexprlist2($1, $3, &@3); }
510 	|	exprlist '\n'			{ $$ = $1;}
511 	;
512 
513 sublist	:	sub				{ $$ = xxsublist1($1);	  }
514 	|	sublist cr ',' sub		{ $$ = xxsublist2($1,$4); }
515 	;
516 
517 sub	:					{ $$ = xxsub0();	 }
518 	|	expr_or_help			{ $$ = xxsub1($1, &@1);  }
519 	|	SYMBOL EQ_ASSIGN 		{ $$ = xxsymsub0($1, &@1); 	modif_token( &@2, EQ_SUB ) ; modif_token( &@1, SYMBOL_SUB ) ; }
520 	|	SYMBOL EQ_ASSIGN expr_or_help	{ $$ = xxsymsub1($1,$3, &@1); 	modif_token( &@2, EQ_SUB ) ; modif_token( &@1, SYMBOL_SUB ) ; }
521 	|	STR_CONST EQ_ASSIGN 		{ $$ = xxsymsub0($1, &@1); 	modif_token( &@2, EQ_SUB ) ; }
522 	|	STR_CONST EQ_ASSIGN expr_or_help    { $$ = xxsymsub1($1,$3, &@1); 	modif_token( &@2, EQ_SUB ) ; }
523 	|	NULL_CONST EQ_ASSIGN 		{ $$ = xxnullsub0(&@1); 	modif_token( &@2, EQ_SUB ) ; }
524 	|	NULL_CONST EQ_ASSIGN expr_or_help   { $$ = xxnullsub1($3, &@1); 	modif_token( &@2, EQ_SUB ) ; }
525 	;
526 
527 formlist:					{ $$ = xxnullformal(); }
528 	|	SYMBOL				{ $$ = xxfirstformal0($1); 	modif_token( &@1, SYMBOL_FORMALS ) ; }
529 	|	SYMBOL EQ_ASSIGN expr_or_help	{ $$ = xxfirstformal1($1,$3); 	modif_token( &@1, SYMBOL_FORMALS ) ; modif_token( &@2, EQ_FORMALS ) ; }
530 	|	formlist ',' SYMBOL		{ $$ = xxaddformal0($1,$3, &@3);   modif_token( &@3, SYMBOL_FORMALS ) ; }
531 	|	formlist ',' SYMBOL EQ_ASSIGN expr_or_help
532 						{ $$ = xxaddformal1($1,$3,$5,&@3); modif_token( &@3, SYMBOL_FORMALS ) ; modif_token( &@4, EQ_FORMALS ) ;}
533 	;
534 
535 cr	:					{ EatLines = 1; }
536 	;
537 %%
538 
539 
540 /*----------------------------------------------------------------------------*/
541 
542 static int (*ptr_getc)(void);
543 
544 /* Private pushback, since file ungetc only guarantees one byte.
545    We need up to one MBCS-worth */
546 #define DECLARE_YYTEXT_BUFP(bp) char *bp = yytext ;
547 #define YYTEXT_PUSH(c, bp) do { \
548     if ((bp) - yytext >= sizeof(yytext) - 1){ \
549 		error(_("input buffer overflow at line %d"), ParseState.xxlineno); \
550 	} \
551     *(bp)++ = ((char)c);			\
552 } while(0) ;
553 
554 #define PUSHBACK_BUFSIZE 16
555 static int pushback[PUSHBACK_BUFSIZE];
556 static unsigned int npush = 0;
557 
558 static int prevpos = 0;
559 static int prevlines[PUSHBACK_BUFSIZE];
560 static int prevcols[PUSHBACK_BUFSIZE];
561 static int prevbytes[PUSHBACK_BUFSIZE];
562 static int prevparse[PUSHBACK_BUFSIZE];
563 
xxgetc(void)564 static int xxgetc(void)
565 {
566     int c;
567 
568     if(npush) c = pushback[--npush]; else  c = ptr_getc();
569 
570     prevpos = (prevpos + 1) % PUSHBACK_BUFSIZE;
571     prevbytes[prevpos] = ParseState.xxbyteno;
572     prevlines[prevpos] = ParseState.xxlineno;
573     prevparse[prevpos] = ParseState.xxparseno;
574     prevcols[prevpos] = ParseState.xxcolno;
575 
576     if (c == EOF) {
577 	EndOfFile = 1;
578 	return R_EOF;
579     }
580     R_ParseContextLast = (R_ParseContextLast + 1) % PARSE_CONTEXT_SIZE;
581     R_ParseContext[R_ParseContextLast] = (char) c;
582 
583     if (c == '\n') {
584 	ParseState.xxlineno += 1;
585 	ParseState.xxcolno = 0;
586     	ParseState.xxbyteno = 0;
587     	ParseState.xxparseno += 1;
588     } else {
589         /* We only advance the column for the 1st byte in UTF-8, so handle later bytes specially */
590 	if (!known_to_be_utf8 || (unsigned char)c < 0x80 || 0xC0 <= (unsigned char)c)
591             ParseState.xxcolno++;
592     	ParseState.xxbyteno++;
593     }
594 
595     if (c == '\t') ParseState.xxcolno = ((ParseState.xxcolno + 7) & ~7);
596 
597     R_ParseContextLine = ParseState.xxlineno;
598 
599     xxcharcount++;
600     return c;
601 }
602 
xxungetc(int c)603 static int xxungetc(int c)
604 {
605     /* this assumes that c was the result of xxgetc; if not, some edits will be needed */
606     ParseState.xxlineno = prevlines[prevpos];
607     ParseState.xxbyteno = prevbytes[prevpos];
608     ParseState.xxcolno  = prevcols[prevpos];
609     ParseState.xxparseno = prevparse[prevpos];
610 
611     prevpos = (prevpos + PUSHBACK_BUFSIZE - 1) % PUSHBACK_BUFSIZE;
612 
613     R_ParseContextLine = ParseState.xxlineno;
614 
615     xxcharcount--;
616     R_ParseContext[R_ParseContextLast] = '\0';
617     /* precaution as to how % is implemented for < 0 numbers */
618     R_ParseContextLast = (R_ParseContextLast + PARSE_CONTEXT_SIZE -1) % PARSE_CONTEXT_SIZE;
619     if(npush >= PUSHBACK_BUFSIZE) return EOF;
620     pushback[npush++] = c;
621     return c;
622 }
623 
624 /* Only used from finish_mbcs_in_parse_context. */
add_mbcs_byte_to_parse_context()625 static int add_mbcs_byte_to_parse_context()
626 {
627     int c;
628 
629     if (EndOfFile)
630 	error(_("invalid multibyte character in parser at line %d"),
631 	      ParseState.xxlineno);
632     if(npush)
633 	c = pushback[--npush];
634     else
635 	c = ptr_getc();
636     if (c == EOF)
637 	error(_("invalid multibyte character in parser at line %d"),
638 	      ParseState.xxlineno);
639 
640     R_ParseContextLast = (R_ParseContextLast + 1) % PARSE_CONTEXT_SIZE;
641     R_ParseContext[R_ParseContextLast] = (char) c;
642     return c;
643 }
644 
645 /* On error, the parse context may end inside a multi-byte character. Add
646    the missing bytes to the context to so that it contains full characters. */
finish_mbcs_in_parse_context()647 static void finish_mbcs_in_parse_context()
648 {
649     int i, c, nbytes = 0, first;
650     Rboolean mbcs = FALSE;
651 
652     /* find the first byte of the context */
653     for(i = R_ParseContextLast;
654         R_ParseContext[i];
655         i = (i + PARSE_CONTEXT_SIZE - 1) % PARSE_CONTEXT_SIZE) {
656 
657 	nbytes++;
658 	if (nbytes == PARSE_CONTEXT_SIZE)
659 	    break;
660     }
661     if (!nbytes)
662 	return;
663     if (!R_ParseContext[i])
664 	first = (i + 1) % PARSE_CONTEXT_SIZE;
665     else
666 	/* The beginning of the context has been overwritten and for a general
667 	   encoding there is not way to recover it. It is possible for UTF-8,
668 	   though. */
669 	return;
670 
671     /* decode multi-byte characters */
672     for(i = 0; i < nbytes; i++) {
673 	c = R_ParseContext[(first + i) % PARSE_CONTEXT_SIZE];
674 	if ((unsigned int)c < 0x80) continue; /* ASCII */
675 
676 	if (utf8locale) {
677 	    /* UTF-8 could be handled more efficiently, searching from the end
678 	       of the string */
679 	    i += utf8clen((char) c) - 1;
680 	    if (i >= nbytes) {
681 		while (i >= nbytes) {
682 		    add_mbcs_byte_to_parse_context();
683 		    nbytes++;
684 		}
685 		return;
686 	    }
687 	} else
688 	    mbcs = TRUE;
689     }
690     if (!mbcs)
691 	return;
692 
693     /* copy the context to a linear buffer */
694     char buf[nbytes + R_MB_CUR_MAX];
695 
696     for(i = 0; i < nbytes; i++)
697 	buf[i] = R_ParseContext[(first + i) % PARSE_CONTEXT_SIZE];
698 
699     for(i = 0; i < nbytes; i++) {
700 	wchar_t wc;
701 	int res;
702 	mbstate_t mb_st;
703 
704 	mbs_init(&mb_st);
705 	res = (int) mbrtowc(&wc, buf + i, nbytes - i, &mb_st);
706 	while (res == -2 && nbytes < sizeof(buf)) {
707 	    /* This is not necessarily correct for stateful MBCS */
708 	    buf[nbytes++] = (char) add_mbcs_byte_to_parse_context();
709 	    mbs_init(&mb_st);
710 	    res = (int) mbrtowc(&wc, buf + i, nbytes - i, &mb_st);
711 	}
712 	if (res == -1)
713 	    error(_("invalid multibyte character in parser at line %d"),
714 		  ParseState.xxlineno);
715 	i += res - 1;
716     }
717 }
718 
719 /*
720  * Increments/inits the token/grouping counter
721  */
incrementId(void)722 static void incrementId(void){
723 	identifier++;
724 }
725 
initId(void)726 static void initId(void){
727 	identifier = 0 ;
728 }
729 
makeSrcref(YYLTYPE * lloc,SEXP srcfile)730 static SEXP makeSrcref(YYLTYPE *lloc, SEXP srcfile)
731 {
732     SEXP val;
733 
734     PROTECT(val = allocVector(INTSXP, 8));
735     INTEGER(val)[0] = lloc->first_line;
736     INTEGER(val)[1] = lloc->first_byte;
737     INTEGER(val)[2] = lloc->last_line;
738     INTEGER(val)[3] = lloc->last_byte;
739     INTEGER(val)[4] = lloc->first_column;
740     INTEGER(val)[5] = lloc->last_column;
741     INTEGER(val)[6] = lloc->first_parsed;
742     INTEGER(val)[7] = lloc->last_parsed;
743     setAttrib(val, R_SrcfileSymbol, srcfile);
744     setAttrib(val, R_ClassSymbol, mkString("srcref"));
745     UNPROTECT(1); /* val */
746     return val;
747 }
748 
attachSrcrefs(SEXP val)749 static void attachSrcrefs(SEXP val)
750 {
751     SEXP srval;
752 
753     PROTECT(srval = SrcRefsToVectorList());
754 
755     setAttrib(val, R_SrcrefSymbol, srval);
756     setAttrib(val, R_SrcfileSymbol, PS_SRCFILE);
757     {
758 	YYLTYPE wholeFile;
759 	wholeFile.first_line = 1;
760 	wholeFile.first_byte = 0;
761 	wholeFile.first_column = 0;
762 	wholeFile.last_line = ParseState.xxlineno;
763 	wholeFile.last_byte = ParseState.xxbyteno;
764 	wholeFile.last_column = ParseState.xxcolno;
765 	wholeFile.first_parsed = 1;
766 	wholeFile.last_parsed = ParseState.xxparseno;
767 	setAttrib(val, R_WholeSrcrefSymbol, makeSrcref(&wholeFile, PS_SRCFILE));
768     }
769     PS_SET_SRCREFS(R_NilValue);
770     ParseState.didAttach = TRUE;
771     UNPROTECT(1); /* srval */
772 }
773 
xxvalue(SEXP v,int k,YYLTYPE * lloc)774 static int xxvalue(SEXP v, int k, YYLTYPE *lloc)
775 {
776     if (k > 2) {
777 	if (ParseState.keepSrcRefs) {
778 	    SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE));
779 	    AppendToSrcRefs(s);
780 	    UNPROTECT(1); /* s */
781 	}
782 	RELEASE_SV(v);
783     }
784     R_CurrentExpr = v;
785     return k;
786 }
787 
xxnullformal()788 static SEXP xxnullformal()
789 {
790     SEXP ans;
791     PRESERVE_SV(ans = R_NilValue);
792     return ans;
793 }
794 
xxfirstformal0(SEXP sym)795 static SEXP xxfirstformal0(SEXP sym)
796 {
797     SEXP ans;
798     if (GenerateCode)
799 	PRESERVE_SV(ans = FirstArg(R_MissingArg, sym));
800     else
801 	PRESERVE_SV(ans = R_NilValue);
802     RELEASE_SV(sym);
803     return ans;
804 }
805 
xxfirstformal1(SEXP sym,SEXP expr)806 static SEXP xxfirstformal1(SEXP sym, SEXP expr)
807 {
808     SEXP ans;
809     if (GenerateCode)
810 	PRESERVE_SV(ans = FirstArg(expr, sym));
811     else
812 	PRESERVE_SV(ans = R_NilValue);
813     RELEASE_SV(expr);
814     RELEASE_SV(sym);
815     return ans;
816 }
817 
xxaddformal0(SEXP formlist,SEXP sym,YYLTYPE * lloc)818 static SEXP xxaddformal0(SEXP formlist, SEXP sym, YYLTYPE *lloc)
819 {
820     SEXP ans;
821     if (GenerateCode) {
822 	CheckFormalArgs(formlist, sym, lloc);
823 	NextArg(formlist, R_MissingArg, sym);
824 	ans = formlist;
825     } else {
826 	RELEASE_SV(formlist);
827 	PRESERVE_SV(ans = R_NilValue);
828     }
829     RELEASE_SV(sym);
830     return ans;
831 }
832 
xxaddformal1(SEXP formlist,SEXP sym,SEXP expr,YYLTYPE * lloc)833 static SEXP xxaddformal1(SEXP formlist, SEXP sym, SEXP expr, YYLTYPE *lloc)
834 {
835     SEXP ans;
836     if (GenerateCode) {
837 	CheckFormalArgs(formlist, sym, lloc);
838 	NextArg(formlist, expr, sym);
839 	ans = formlist;
840     } else {
841 	RELEASE_SV(formlist);
842 	PRESERVE_SV(ans = R_NilValue);
843     }
844     RELEASE_SV(expr);
845     RELEASE_SV(sym);
846     return ans;
847 }
848 
xxexprlist0(void)849 static SEXP xxexprlist0(void)
850 {
851     SEXP ans;
852     if (GenerateCode) {
853 	PRESERVE_SV(ans = NewList());
854 	if (ParseState.keepSrcRefs) {
855 	    setAttrib(ans, R_SrcrefSymbol, PS_SRCREFS);
856 	    PS_SET_SRCREFS(R_NilValue);
857 	}
858     }
859     else
860 	PRESERVE_SV(ans = R_NilValue);
861     return ans;
862 }
863 
xxexprlist1(SEXP expr,YYLTYPE * lloc)864 static SEXP xxexprlist1(SEXP expr, YYLTYPE *lloc)
865 {
866     SEXP ans;
867     if (GenerateCode) {
868 	PRESERVE_SV(ans = NewList());
869 	if (ParseState.keepSrcRefs) {
870 	    setAttrib(ans, R_SrcrefSymbol, PS_SRCREFS);
871 	    SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE));
872 	    SetSingleSrcRef(s);
873 	    UNPROTECT(1); /* s */
874 	}
875 	GrowList(ans, expr);
876     }
877     else
878 	PRESERVE_SV(ans = R_NilValue);
879     RELEASE_SV(expr);
880     return ans;
881 }
882 
xxexprlist2(SEXP exprlist,SEXP expr,YYLTYPE * lloc)883 static SEXP xxexprlist2(SEXP exprlist, SEXP expr, YYLTYPE *lloc)
884 {
885     SEXP ans;
886     if (GenerateCode) {
887 	if (ParseState.keepSrcRefs) {
888 	    SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE));
889 	    AppendToSrcRefs(s);
890 	    UNPROTECT(1); /* s */
891 	}
892 	GrowList(exprlist, expr);
893 	ans = exprlist;
894     } else {
895 	RELEASE_SV(exprlist);
896 	PRESERVE_SV(ans = R_NilValue);
897     }
898     RELEASE_SV(expr);
899     return ans;
900 }
901 
xxsub0(void)902 static SEXP xxsub0(void)
903 {
904     SEXP ans;
905     if (GenerateCode)
906 	PRESERVE_SV(ans = lang2(R_MissingArg,R_NilValue));
907     else
908 	PRESERVE_SV(ans = R_NilValue);
909     return ans;
910 }
911 
xxsub1(SEXP expr,YYLTYPE * lloc)912 static SEXP xxsub1(SEXP expr, YYLTYPE *lloc)
913 {
914     SEXP ans;
915     if (GenerateCode)
916 	PRESERVE_SV(ans = TagArg(expr, R_NilValue, lloc));
917     else
918 	PRESERVE_SV(ans = R_NilValue);
919     RELEASE_SV(expr);
920     return ans;
921 }
922 
xxsymsub0(SEXP sym,YYLTYPE * lloc)923 static SEXP xxsymsub0(SEXP sym, YYLTYPE *lloc)
924 {
925     SEXP ans;
926     if (GenerateCode)
927 	PRESERVE_SV(ans = TagArg(R_MissingArg, sym, lloc));
928     else
929 	PRESERVE_SV(ans = R_NilValue);
930     RELEASE_SV(sym);
931     return ans;
932 }
933 
xxsymsub1(SEXP sym,SEXP expr,YYLTYPE * lloc)934 static SEXP xxsymsub1(SEXP sym, SEXP expr, YYLTYPE *lloc)
935 {
936     SEXP ans;
937     if (GenerateCode)
938 	PRESERVE_SV(ans = TagArg(expr, sym, lloc));
939     else
940 	PRESERVE_SV(ans = R_NilValue);
941     RELEASE_SV(expr);
942     RELEASE_SV(sym);
943     return ans;
944 }
945 
xxnullsub0(YYLTYPE * lloc)946 static SEXP xxnullsub0(YYLTYPE *lloc)
947 {
948     SEXP ans;
949     if (GenerateCode)
950 	PRESERVE_SV(ans = TagArg(R_MissingArg, R_NullSymbol, lloc));
951     else
952 	PRESERVE_SV(ans = R_NilValue);
953     RELEASE_SV(R_NilValue);
954     return ans;
955 }
956 
xxnullsub1(SEXP expr,YYLTYPE * lloc)957 static SEXP xxnullsub1(SEXP expr, YYLTYPE *lloc)
958 {
959     SEXP ans;
960     if (GenerateCode)
961 	PRESERVE_SV(ans = TagArg(expr, R_NullSymbol, lloc));
962     else
963 	PRESERVE_SV(ans = R_NilValue);
964     RELEASE_SV(R_NilValue);
965     RELEASE_SV(expr);
966     return ans;
967 }
968 
969 
xxsublist1(SEXP sub)970 static SEXP xxsublist1(SEXP sub)
971 {
972     SEXP ans;
973     if (GenerateCode)
974 	PRESERVE_SV(ans = FirstArg(CAR(sub),CADR(sub)));
975     else
976 	PRESERVE_SV(ans = R_NilValue);
977     RELEASE_SV(sub);
978     return ans;
979 }
980 
xxsublist2(SEXP sublist,SEXP sub)981 static SEXP xxsublist2(SEXP sublist, SEXP sub)
982 {
983     SEXP ans;
984     if (GenerateCode) {
985 	NextArg(sublist, CAR(sub), CADR(sub));
986 	ans = sublist;
987     } else {
988 	RELEASE_SV(sublist);
989 	PRESERVE_SV(ans = R_NilValue);
990     }
991     RELEASE_SV(sub);
992     return ans;
993 }
994 
xxcond(SEXP expr)995 static SEXP xxcond(SEXP expr)
996 {
997     EatLines = 1;
998     return expr;
999 }
1000 
xxifcond(SEXP expr)1001 static SEXP xxifcond(SEXP expr)
1002 {
1003     EatLines = 1;
1004     return expr;
1005 }
1006 
xxif(SEXP ifsym,SEXP cond,SEXP expr)1007 static SEXP xxif(SEXP ifsym, SEXP cond, SEXP expr)
1008 {
1009     SEXP ans;
1010     if (GenerateCode)
1011 	PRESERVE_SV(ans = lang3(ifsym, cond, expr));
1012     else
1013 	PRESERVE_SV(ans = R_NilValue);
1014     RELEASE_SV(expr);
1015     RELEASE_SV(cond);
1016     return ans;
1017 }
1018 
xxifelse(SEXP ifsym,SEXP cond,SEXP ifexpr,SEXP elseexpr)1019 static SEXP xxifelse(SEXP ifsym, SEXP cond, SEXP ifexpr, SEXP elseexpr)
1020 {
1021     SEXP ans;
1022     if (GenerateCode)
1023 	PRESERVE_SV(ans = lang4(ifsym, cond, ifexpr, elseexpr));
1024     else
1025 	PRESERVE_SV(ans = R_NilValue);
1026     RELEASE_SV(elseexpr);
1027     RELEASE_SV(ifexpr);
1028     RELEASE_SV(cond);
1029     return ans;
1030 }
1031 
xxforcond(SEXP sym,SEXP expr)1032 static SEXP xxforcond(SEXP sym, SEXP expr)
1033 {
1034     SEXP ans;
1035     EatLines = 1;
1036     if (GenerateCode)
1037 	PRESERVE_SV(ans = LCONS(sym, expr));
1038     else
1039 	PRESERVE_SV(ans = R_NilValue);
1040     RELEASE_SV(expr);
1041     RELEASE_SV(sym);
1042     return ans;
1043 }
1044 
xxfor(SEXP forsym,SEXP forcond,SEXP body)1045 static SEXP xxfor(SEXP forsym, SEXP forcond, SEXP body)
1046 {
1047     SEXP ans;
1048     if (GenerateCode)
1049 	PRESERVE_SV(ans = lang4(forsym, CAR(forcond), CDR(forcond), body));
1050     else
1051 	PRESERVE_SV(ans = R_NilValue);
1052     RELEASE_SV(body);
1053     RELEASE_SV(forcond);
1054     return ans;
1055 }
1056 
xxwhile(SEXP whilesym,SEXP cond,SEXP body)1057 static SEXP xxwhile(SEXP whilesym, SEXP cond, SEXP body)
1058 {
1059     SEXP ans;
1060     if (GenerateCode)
1061 	PRESERVE_SV(ans = lang3(whilesym, cond, body));
1062     else
1063 	PRESERVE_SV(ans = R_NilValue);
1064     RELEASE_SV(body);
1065     RELEASE_SV(cond);
1066     return ans;
1067 }
1068 
xxrepeat(SEXP repeatsym,SEXP body)1069 static SEXP xxrepeat(SEXP repeatsym, SEXP body)
1070 {
1071     SEXP ans;
1072     if (GenerateCode)
1073 	PRESERVE_SV(ans = lang2(repeatsym, body));
1074     else
1075 	PRESERVE_SV(ans = R_NilValue);
1076     RELEASE_SV(body);
1077     return ans;
1078 }
1079 
xxnxtbrk(SEXP keyword)1080 static SEXP xxnxtbrk(SEXP keyword)
1081 {
1082     if (GenerateCode)
1083 	PRESERVE_SV(keyword = lang1(keyword));
1084     else
1085 	PRESERVE_SV(keyword = R_NilValue);
1086     return keyword;
1087 }
1088 
xxfuncall(SEXP expr,SEXP args)1089 static SEXP xxfuncall(SEXP expr, SEXP args)
1090 {
1091     SEXP ans, sav_expr = expr;
1092     if (GenerateCode) {
1093 	if (isString(expr))
1094 	    expr = installTrChar(STRING_ELT(expr, 0));
1095 	PROTECT(expr);
1096 	if (length(CDR(args)) == 1 && CADR(args) == R_MissingArg && TAG(CDR(args)) == R_NilValue )
1097 	    ans = lang1(expr);
1098 	else
1099 	    ans = LCONS(expr, CDR(args));
1100 	UNPROTECT(1); /* expr */
1101 	PRESERVE_SV(ans);
1102     } else
1103 	PRESERVE_SV(ans = R_NilValue);
1104 
1105     RELEASE_SV(args);
1106     RELEASE_SV(sav_expr);
1107     return ans;
1108 }
1109 
mkChar2(const char * name)1110 static SEXP mkChar2(const char *name)
1111 {
1112     cetype_t enc = CE_NATIVE;
1113 
1114     if(known_to_be_latin1) enc = CE_LATIN1;
1115     else if(known_to_be_utf8) enc = CE_UTF8;
1116 
1117     return mkCharLenCE(name, (int) strlen(name), enc);
1118 }
1119 
mkString2(const char * s,size_t len,Rboolean escaped)1120 static SEXP mkString2(const char *s, size_t len, Rboolean escaped)
1121 {
1122     SEXP t;
1123     cetype_t enc = CE_NATIVE;
1124 
1125     if(known_to_be_latin1) enc = CE_LATIN1;
1126     else if(!escaped && known_to_be_utf8) enc = CE_UTF8;
1127 
1128     PROTECT(t = allocVector(STRSXP, 1));
1129     SET_STRING_ELT(t, 0, mkCharLenCE(s, (int) len, enc));
1130     UNPROTECT(1); /* t */
1131     return t;
1132 }
1133 
xxdefun(SEXP fname,SEXP formals,SEXP body,YYLTYPE * lloc)1134 static SEXP xxdefun(SEXP fname, SEXP formals, SEXP body, YYLTYPE *lloc)
1135 {
1136     SEXP ans, srcref;
1137 
1138     if (GenerateCode) {
1139     	if (ParseState.keepSrcRefs) {
1140 	    srcref = makeSrcref(lloc, PS_SRCFILE);
1141     	    ParseState.didAttach = TRUE;
1142     	} else
1143     	    srcref = R_NilValue;
1144 	PRESERVE_SV(ans = lang4(fname, CDR(formals), body, srcref));
1145     } else
1146 	PRESERVE_SV(ans = R_NilValue);
1147     RELEASE_SV(body);
1148     RELEASE_SV(formals);
1149     return ans;
1150 }
1151 
xxunary(SEXP op,SEXP arg)1152 static SEXP xxunary(SEXP op, SEXP arg)
1153 {
1154     SEXP ans;
1155     if (GenerateCode)
1156 	PRESERVE_SV(ans = lang2(op, arg));
1157     else
1158 	PRESERVE_SV(ans = R_NilValue);
1159     RELEASE_SV(arg);
1160     return ans;
1161 }
1162 
xxbinary(SEXP n1,SEXP n2,SEXP n3)1163 static SEXP xxbinary(SEXP n1, SEXP n2, SEXP n3)
1164 {
1165     SEXP ans;
1166     if (GenerateCode)
1167 	PRESERVE_SV(ans = lang3(n1, n2, n3));
1168     else
1169 	PRESERVE_SV(ans = R_NilValue);
1170     RELEASE_SV(n2);
1171     RELEASE_SV(n3);
1172     return ans;
1173 }
1174 
1175 static SEXP findPlaceholderCell(SEXP, SEXP);
1176 
check_rhs(SEXP rhs)1177 static void check_rhs(SEXP rhs)
1178 {
1179     if (TYPEOF(rhs) != LANGSXP)
1180 	error(_("The pipe operator requires a function call as RHS"));
1181 
1182     /* rule out syntactically special functions */
1183     /* the IS_SPECIAL_SYMBOL bit is set in names.c */
1184     SEXP fun = CAR(rhs);
1185     if (TYPEOF(fun) == SYMSXP && IS_SPECIAL_SYMBOL(fun))
1186 	error("function '%s' not supported in RHS call of a pipe",
1187 	      CHAR(PRINTNAME(fun)));
1188 }
1189 
xxpipe(SEXP lhs,SEXP rhs)1190 static SEXP xxpipe(SEXP lhs, SEXP rhs)
1191 {
1192     SEXP ans;
1193     if (GenerateCode) {
1194 	/* allow x => log(x) on RHS */
1195 	if (TYPEOF(rhs) == LANGSXP && CAR(rhs) == R_PipeBindSymbol) {
1196 	    SEXP var = CADR(rhs);
1197 	    SEXP expr = CADDR(rhs);
1198 	    check_rhs(expr);
1199 	    SEXP phcell = findPlaceholderCell(var, expr);
1200 	    if (phcell == NULL)
1201 		error(_("no placeholder found on RHS"));
1202 	    SETCAR(phcell, lhs);
1203 	    return expr;
1204 	}
1205 
1206 	check_rhs(rhs);
1207 
1208         SEXP fun = CAR(rhs);
1209         SEXP args = CDR(rhs);
1210 	PRESERVE_SV(ans = lcons(fun, lcons(lhs, args)));
1211     }
1212     else {
1213 	PRESERVE_SV(ans = R_NilValue);
1214     }
1215     RELEASE_SV(lhs);
1216     RELEASE_SV(rhs);
1217     return ans;
1218 }
1219 
xxpipebind(SEXP fn,SEXP lhs,SEXP rhs)1220 static SEXP xxpipebind(SEXP fn, SEXP lhs, SEXP rhs)
1221 {
1222     static int use_pipebind = 0;
1223     if (use_pipebind != 1) {
1224 	char *lookup = getenv("_R_USE_PIPEBIND_");
1225 	use_pipebind = ((lookup != NULL) && StringTrue(lookup)) ? 1 : 0;
1226     }
1227 
1228     if (use_pipebind)
1229 	return xxbinary(fn, lhs, rhs);
1230     else
1231 	error("'=>' is disabled; set '_R_USE_PIPEBIND_' envvar to a true value to enable it");
1232 }
1233 
xxparen(SEXP n1,SEXP n2)1234 static SEXP xxparen(SEXP n1, SEXP n2)
1235 {
1236     SEXP ans;
1237     if (GenerateCode)
1238 	PRESERVE_SV(ans = lang2(n1, n2));
1239     else
1240 	PRESERVE_SV(ans = R_NilValue);
1241     RELEASE_SV(n2);
1242     return ans;
1243 }
1244 
1245 
1246 /* This should probably use CONS rather than LCONS, but
1247    it shouldn't matter and we would rather not meddle
1248    See PR#7055 */
1249 
xxsubscript(SEXP a1,SEXP a2,SEXP a3)1250 static SEXP xxsubscript(SEXP a1, SEXP a2, SEXP a3)
1251 {
1252     SEXP ans;
1253     if (GenerateCode)
1254 	PRESERVE_SV(ans = LCONS(a2, CONS(a1, CDR(a3))));
1255     else
1256 	PRESERVE_SV(ans = R_NilValue);
1257     RELEASE_SV(a3);
1258     RELEASE_SV(a1);
1259     return ans;
1260 }
1261 
xxexprlist(SEXP a1,YYLTYPE * lloc,SEXP a2)1262 static SEXP xxexprlist(SEXP a1, YYLTYPE *lloc, SEXP a2)
1263 {
1264     SEXP ans;
1265     SEXP prevSrcrefs;
1266 
1267     EatLines = 0;
1268     if (GenerateCode) {
1269 	SET_TYPEOF(a2, LANGSXP);
1270 	SETCAR(a2, a1);
1271 	if (ParseState.keepSrcRefs) {
1272 	    PROTECT(prevSrcrefs = getAttrib(a2, R_SrcrefSymbol));
1273 	    SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE));
1274 	    PrependToSrcRefs(s);
1275 	    attachSrcrefs(a2);
1276 	    UNPROTECT(2); /* prevSrcrefs, s */
1277 #ifndef SWITCH_TO_REFCNT
1278 	    /* SrcRefs got NAMED by being an attribute, preventively
1279 	       getAttrib(), but it has not in fact been referenced. Set NAMED
1280 	       to 0 to avoid overhead in further setAttrib calls due to cycle
1281 	       detection. */
1282 	    SET_NAMED(prevSrcrefs, 0);
1283 #endif
1284 	    PS_SET_SRCREFS(prevSrcrefs);
1285 	}
1286 	PRESERVE_SV(ans = a2);
1287     }
1288     else
1289 	PRESERVE_SV(ans = R_NilValue);
1290     RELEASE_SV(a2);
1291     return ans;
1292 }
1293 
1294 /*--------------------------------------------------------------------------*/
1295 
TagArg(SEXP arg,SEXP tag,YYLTYPE * lloc)1296 static SEXP TagArg(SEXP arg, SEXP tag, YYLTYPE *lloc)
1297 {
1298     switch (TYPEOF(tag)) {
1299     case STRSXP:
1300 	tag = installTrChar(STRING_ELT(tag, 0));
1301     case NILSXP:
1302     case SYMSXP:
1303 	return lang2(arg, tag);
1304     default:
1305 	error(_("incorrect tag type at line %d"), lloc->first_line); return R_NilValue/* -Wall */;
1306     }
1307 }
1308 
1309 
1310 /* Stretchy List Structures : Lists are created and grown using a special */
1311 /* dotted pair.  The CAR of the list points to the last cons-cell in the */
1312 /* list and the CDR points to the first.  The list can be extracted from */
1313 /* the pair by taking its CDR, while the CAR gives fast access to the end */
1314 /* of the list. */
1315 
1316 /* These functions must be called with arguments protected */
1317 
1318 /* Create a stretchy-list dotted pair */
NewList(void)1319 static SEXP NewList(void)
1320 {
1321     SEXP s = CONS(R_NilValue, R_NilValue);
1322     SETCAR(s, s);
1323     return s;
1324 }
1325 
1326 /* Add a new element at the end of a stretchy list */
GrowList(SEXP l,SEXP s)1327 static void GrowList(SEXP l, SEXP s)
1328 {
1329     SEXP tmp;
1330     tmp = CONS(s, R_NilValue);
1331     SETCDR(CAR(l), tmp);
1332     SETCAR(l, tmp);
1333 }
1334 
1335 /* Create a stretchy list with a single named element */
FirstArg(SEXP s,SEXP tag)1336 static SEXP FirstArg(SEXP s, SEXP tag)
1337 {
1338     SEXP tmp;
1339     PROTECT(tmp = NewList());
1340     GrowList(tmp, s);
1341     SET_TAG(CAR(tmp), tag);
1342     UNPROTECT(1); /* tmp */
1343     return tmp;
1344 }
1345 
1346 /* Add named element to the end of a stretchy list */
NextArg(SEXP l,SEXP s,SEXP tag)1347 static void NextArg(SEXP l, SEXP s, SEXP tag)
1348 {
1349     GrowList(l, s);
1350     SET_TAG(CAR(l), tag);
1351 }
1352 
1353 /* SrcRefs (PS_SRCREFS) are represented as R_NilValue (empty) or by
1354    a stretchy list (which includes another representation for empty)
1355    for fast append operation. */
1356 
SetSingleSrcRef(SEXP r)1357 static void SetSingleSrcRef(SEXP r)
1358 {
1359     SEXP l;
1360 
1361     PROTECT(l = NewList());
1362     GrowList(l, r);
1363     PS_SET_SRCREFS(l);
1364     UNPROTECT(1); /* l */
1365 }
1366 
AppendToSrcRefs(SEXP r)1367 static void AppendToSrcRefs(SEXP r)
1368 {
1369     SEXP l = PS_SRCREFS;
1370     if (l == R_NilValue)
1371 	SetSingleSrcRef(r);
1372     else
1373 	GrowList(l, r);
1374 }
1375 
PrependToSrcRefs(SEXP r)1376 static void PrependToSrcRefs(SEXP r)
1377 {
1378     SEXP l = PS_SRCREFS;
1379     if (l == R_NilValue)
1380 	SetSingleSrcRef(r);
1381     else if (CDR(l) == R_NilValue)
1382 	/* adding to empty stretchy list */
1383 	GrowList(l, r);
1384     else {
1385 	SEXP tmp = CONS(r, CDR(l));
1386 	SETCDR(l, tmp);
1387     }
1388 }
1389 
SrcRefsToVectorList()1390 static SEXP SrcRefsToVectorList() {
1391     SEXP l = PS_SRCREFS;
1392     if (l == R_NilValue)
1393 	return PairToVectorList(l);
1394     else
1395 	return PairToVectorList(CDR(l));
1396 }
1397 
1398 /*--------------------------------------------------------------------------*/
1399 
1400 /*
1401  *  Parsing Entry Points:
1402  *
1403  *  The Following entry points provide language parsing facilities.
1404  *  Note that there are separate entry points for parsing IoBuffers
1405  *  (i.e. interactve use), files and R character strings.
1406  *
1407  *  The entry points provide the same functionality, they just
1408  *  set things up in slightly different ways.
1409  *
1410  *  The following routines parse a single expression:
1411  *
1412  *
1413  *	SEXP R_Parse1File(FILE *fp, int gencode, ParseStatus *status, Rboolean first)
1414  *   (used for R_ReplFile in main.c)
1415  *
1416  *	SEXP R_Parse1Buffer(IoBuffer *buffer, int gencode, ParseStatus *status, Rboolean first)
1417  *   (used for ReplIteration and R_ReplDLLdo1 in main.c)
1418  *
1419  *  The success of the parse is indicated as folllows:
1420  *
1421  *
1422  *	status = PARSE_NULL       - there was no statement to parse
1423  *		 PARSE_OK	  - complete statement
1424  *		 PARSE_INCOMPLETE - incomplete statement
1425  *		 PARSE_ERROR      - syntax error
1426  *		 PARSE_EOF	  - end of file
1427  *
1428  *
1429  *  The following routines parse several expressions and return
1430  *  their values in a single expression vector.
1431  *
1432  *	SEXP R_ParseFile(FILE *fp, int n, ParseStatus *status, SEXP srcfile)
1433  *    (used for do_edit in file edit.c)
1434  *
1435  *	SEXP R_ParseVector(SEXP *text, int n, ParseStatus *status, SEXP srcfile)
1436  *    (public, and used by parse(text=) in file source.c)
1437  *
1438  *	SEXP R_ParseBuffer(IoBuffer *buffer, int n, ParseStatus *status, SEXP prompt, SEXP srcfile)
1439  *    (used by parse(file="") in file source.c)
1440  *
1441  *      SEXP R_ParseConn(Rconnection con, int n, ParseStatus *status, SEXP srcfile)
1442  *    (used by parse(file=) in file source.c)
1443  *
1444  *  Here, status is 1 for a successful parse and 0 if parsing failed
1445  *  for some reason.
1446  */
1447 
1448 #define CONTEXTSTACK_SIZE 50
1449 static int	SavedToken;
1450 static SEXP	SavedLval;
1451 static char	contextstack[CONTEXTSTACK_SIZE], *contextp;
1452 
1453 static void PutSrcRefState(SrcRefState *state);
1454 static void UseSrcRefState(SrcRefState *state);
1455 
1456 /* This is called once when R starts up. */
1457 attribute_hidden
InitParser(void)1458 void InitParser(void)
1459 {
1460     ParseState.sexps = allocVector(VECSXP, 7); /* initialized to R_NilValue */
1461     ParseState.data = R_NilValue;
1462     INIT_SVS();
1463     R_PreserveObject(ParseState.sexps); /* never released in an R session */
1464     R_NullSymbol = install("NULL");
1465     R_PipeBindSymbol = install("=>");
1466 }
1467 
FinalizeSrcRefStateOnError(void * dummy)1468 static void FinalizeSrcRefStateOnError(void *dummy)
1469 {
1470     R_FinalizeSrcRefState();
1471 }
1472 
1473 /* This is called each time a new parse sequence begins */
1474 attribute_hidden
R_InitSrcRefState(RCNTXT * cptr)1475 void R_InitSrcRefState(RCNTXT* cptr)
1476 {
1477     if (busy) {
1478     	SrcRefState *prev = malloc(sizeof(SrcRefState));
1479 	if (prev == NULL)
1480 	    error(_("allocation of source reference state failed"));
1481     	PutSrcRefState(prev);
1482 	ParseState.prevState = prev;
1483 	ParseState.sexps = allocVector(VECSXP, 7);
1484 	ParseState.data = R_NilValue;
1485 	INIT_SVS();
1486 	R_PreserveObject(ParseState.sexps);
1487 	/* ParseState.sexps released in R_FinalizeSrcRefState */
1488     } else
1489 	/* re-use data, text, ids arrays */
1490         ParseState.prevState = NULL;
1491     /* set up context _after_ PutSrcRefState */
1492     begincontext(cptr, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
1493                  R_NilValue, R_NilValue);
1494     cptr->cend = &FinalizeSrcRefStateOnError;
1495     cptr->cenddata = NULL;
1496     ParseState.keepSrcRefs = FALSE;
1497     ParseState.keepParseData = TRUE;
1498     ParseState.didAttach = FALSE;
1499     PS_SET_SRCFILE(R_NilValue);
1500     PS_SET_ORIGINAL(R_NilValue);
1501     ParseState.data_count = 0;
1502     ParseState.xxlineno = 1;
1503     ParseState.xxcolno = 0;
1504     ParseState.xxbyteno = 0;
1505     ParseState.xxparseno = 1;
1506     busy = TRUE;
1507 }
1508 
1509 attribute_hidden
R_FinalizeSrcRefState(void)1510 void R_FinalizeSrcRefState(void)
1511 {
1512     PS_SET_SRCFILE(R_NilValue);
1513     PS_SET_ORIGINAL(R_NilValue);
1514     CLEAR_SVS();
1515 
1516     /* Free the data, text and ids if we are restoring a previous state,
1517        or if they have grown too large */
1518     if (PS_DATA != R_NilValue) {
1519     	if (ParseState.prevState || DATA_COUNT > MAX_DATA_COUNT) {
1520 	    PS_SET_DATA(R_NilValue);
1521 	    PS_SET_TEXT(R_NilValue);
1522 	} else /* Remove all the strings from the text vector so they don't take up memory, and clean up data */
1523 	    for (int i=0; i < ParseState.data_count; i++) {
1524 		SET_STRING_ELT(PS_TEXT, i, R_BlankString);
1525 		_PARENT(i) = 0;
1526 	    }
1527     }
1528     if (PS_IDS != R_NilValue) {
1529 	if (ParseState.prevState || ID_COUNT > MAX_DATA_COUNT) {
1530 	    PS_SET_IDS(R_NilValue);
1531         } else {/* Remove the parent records */
1532             if (identifier > ID_COUNT) identifier = ID_COUNT;
1533             for (int i=0; i < identifier; i++) {
1534 		ID_ID(i) = 0;
1535 	        ID_PARENT(i) = 0;
1536 	    }
1537 	}
1538     }
1539     ParseState.data_count = NA_INTEGER;
1540     if (ParseState.prevState) {
1541 	R_ReleaseObject(ParseState.sexps);
1542         SrcRefState *prev = ParseState.prevState;
1543     	UseSrcRefState(prev);
1544     	free(prev);
1545     } else
1546         busy = FALSE;
1547 }
1548 
UseSrcRefState(SrcRefState * state)1549 static void UseSrcRefState(SrcRefState *state)
1550 {
1551     ParseState.keepSrcRefs = state->keepSrcRefs;
1552     ParseState.keepParseData = state->keepParseData;
1553     ParseState.sexps = state->sexps;
1554     ParseState.data = state->data;
1555     ParseState.data_count = state->data_count;
1556     ParseState.xxlineno = state->xxlineno;
1557     ParseState.xxcolno = state->xxcolno;
1558     ParseState.xxbyteno = state->xxbyteno;
1559     ParseState.xxparseno = state->xxparseno;
1560     ParseState.prevState = state->prevState;
1561     busy = TRUE;
1562 }
1563 
PutSrcRefState(SrcRefState * state)1564 static void PutSrcRefState(SrcRefState *state)
1565 {
1566     state->keepSrcRefs = ParseState.keepSrcRefs;
1567     state->keepParseData = ParseState.keepParseData;
1568     state->sexps = ParseState.sexps;
1569     state->data = ParseState.data;
1570     state->data_count = ParseState.data_count;
1571     state->xxlineno = ParseState.xxlineno;
1572     state->xxcolno = ParseState.xxcolno;
1573     state->xxbyteno = ParseState.xxbyteno;
1574     state->xxparseno = ParseState.xxparseno;
1575     state->prevState = ParseState.prevState;
1576 }
1577 
ParseInit(void)1578 static void ParseInit(void)
1579 {
1580     contextp = contextstack;
1581     *contextp = ' ';
1582     SavedToken = 0;
1583     SavedLval = R_NilValue;
1584     EatLines = 0;
1585     EndOfFile = 0;
1586     xxcharcount = 0;
1587     npush = 0;
1588 }
1589 
initData(void)1590 static void initData(void)
1591 {
1592     ParseState.data_count = 0 ;
1593 }
1594 
1595 
ParseContextInit(void)1596 static void ParseContextInit(void)
1597 {
1598     R_ParseContextLast = 0;
1599     R_ParseContext[0] = '\0';
1600 
1601     /* starts the identifier counter*/
1602     initId();
1603     initData();
1604 }
1605 
R_Parse1(ParseStatus * status)1606 static SEXP R_Parse1(ParseStatus *status)
1607 {
1608     switch(yyparse()) {
1609     case 0:                     /* End of file */
1610 	*status = PARSE_EOF;
1611 	if (EndOfFile == 2) *status = PARSE_INCOMPLETE;
1612 	break;
1613     case 1:                     /* Syntax error / incomplete */
1614 	*status = PARSE_ERROR;
1615 	if (EndOfFile) *status = PARSE_INCOMPLETE;
1616 	break;
1617     case 2:                     /* Empty Line */
1618 	*status = PARSE_NULL;
1619 	break;
1620     case 3:                     /* Valid expr '\n' terminated */
1621     case 4:                     /* Valid expr ';' terminated */
1622 	*status = PARSE_OK;
1623 	break;
1624     }
1625     return R_CurrentExpr;
1626 }
1627 
1628 static FILE *fp_parse;
1629 
file_getc(void)1630 static int file_getc(void)
1631 {
1632     return R_fgetc(fp_parse);
1633 }
1634 
1635 /* used in main.c */
1636 attribute_hidden
R_Parse1File(FILE * fp,int gencode,ParseStatus * status)1637 SEXP R_Parse1File(FILE *fp, int gencode, ParseStatus *status)
1638 {
1639     ParseInit();
1640     ParseContextInit();
1641     GenerateCode = gencode;
1642     fp_parse = fp;
1643     ptr_getc = file_getc;
1644     R_Parse1(status);
1645     CLEAR_SVS();
1646     return R_CurrentExpr;
1647 }
1648 
1649 static IoBuffer *iob;
1650 
buffer_getc(void)1651 static int buffer_getc(void)
1652 {
1653     return R_IoBufferGetc(iob);
1654 }
1655 
1656 /* Used only in main.c */
1657 attribute_hidden
R_Parse1Buffer(IoBuffer * buffer,int gencode,ParseStatus * status)1658 SEXP R_Parse1Buffer(IoBuffer *buffer, int gencode, ParseStatus *status)
1659 {
1660     Rboolean keepSource = FALSE;
1661     RCNTXT cntxt;
1662 
1663     R_InitSrcRefState(&cntxt);
1664     if (gencode) {
1665     	keepSource = asLogical(GetOption1(install("keep.source")));
1666     	if (keepSource) {
1667     	    ParseState.keepSrcRefs = TRUE;
1668 	    ParseState.keepParseData =
1669 		asLogical(GetOption1(install("keep.parse.data")));
1670 	    PS_SET_SRCFILE(NewEnvironment(R_NilValue, R_NilValue, R_EmptyEnv));
1671 	    PS_SET_ORIGINAL(PS_SRCFILE);
1672 	    PS_SET_SRCREFS(R_NilValue);
1673 	}
1674     }
1675     ParseInit();
1676     ParseContextInit();
1677     GenerateCode = gencode;
1678     iob = buffer;
1679     ptr_getc = buffer_getc;
1680     R_Parse1(status);
1681     if (gencode && keepSource) {
1682     	if (ParseState.didAttach) {
1683    	    int buflen = R_IoBufferReadOffset(buffer);
1684    	    char buf[buflen+1];
1685    	    SEXP class;
1686    	    R_IoBufferReadReset(buffer);
1687    	    for (int i=0; i<buflen; i++)
1688    	    	buf[i] = (char) R_IoBufferGetc(buffer);
1689 
1690    	    buf[buflen] = 0;
1691 	    SEXP s_filename = install("filename");
1692 	    defineVar(s_filename, ScalarString(mkChar("")), PS_ORIGINAL);
1693 	    SEXP s_lines = install("lines");
1694 	    defineVar(s_lines, ScalarString(mkChar2(buf)), PS_ORIGINAL);
1695     	    PROTECT(class = allocVector(STRSXP, 2));
1696             SET_STRING_ELT(class, 0, mkChar("srcfilecopy"));
1697             SET_STRING_ELT(class, 1, mkChar("srcfile"));
1698 	    setAttrib(PS_ORIGINAL, R_ClassSymbol, class);
1699 	    UNPROTECT(1); /* class */
1700 	}
1701     }
1702     PROTECT(R_CurrentExpr);
1703     endcontext(&cntxt);
1704     R_FinalizeSrcRefState();
1705     UNPROTECT(1); /* R_CurrentExpr */
1706     return R_CurrentExpr;
1707 }
1708 
1709 static TextBuffer *txtb;
1710 
text_getc(void)1711 static int text_getc(void)
1712 {
1713     return R_TextBufferGetc(txtb);
1714 }
1715 
R_Parse(int n,ParseStatus * status,SEXP srcfile)1716 static SEXP R_Parse(int n, ParseStatus *status, SEXP srcfile)
1717 {
1718     int i;
1719     SEXP t, rval;
1720     RCNTXT cntxt;
1721 
1722     R_InitSrcRefState(&cntxt);
1723     ParseContextInit();
1724 
1725     PS_SET_SRCFILE(srcfile);
1726     PS_SET_ORIGINAL(srcfile);
1727 
1728     if (isEnvironment(srcfile)) {
1729     	ParseState.keepSrcRefs = TRUE;
1730 	ParseState.keepParseData =
1731 	    asLogical(GetOption1(install("keep.parse.data")));
1732 	PS_SET_SRCREFS(R_NilValue);
1733     }
1734 
1735     PROTECT(t = NewList());
1736     for(i = 0; ; ) {
1737 	if(n >= 0 && i >= n) break;
1738 	ParseInit();
1739 	rval = R_Parse1(status);
1740 	switch(*status) {
1741 	case PARSE_NULL:
1742 	    break;
1743 	case PARSE_OK:
1744 	    PROTECT(rval);
1745 	    GrowList(t, rval);
1746 	    UNPROTECT(1); /* rval */
1747 	    i++;
1748 	    break;
1749 	case PARSE_INCOMPLETE:
1750 	case PARSE_ERROR:
1751 	    UNPROTECT(1); /* t */
1752 	    if (ParseState.keepSrcRefs && ParseState.keepParseData)
1753 	        finalizeData();
1754 	    endcontext(&cntxt);
1755 	    R_FinalizeSrcRefState();
1756 	    return R_NilValue;
1757 	    break;
1758 	case PARSE_EOF:
1759 	    goto finish;
1760 	    break;
1761 	}
1762     }
1763 
1764 finish:
1765 
1766     t = CDR(t);
1767     PROTECT(rval = allocVector(EXPRSXP, length(t)));
1768     for (n = 0 ; n < LENGTH(rval) ; n++, t = CDR(t))
1769 	SET_VECTOR_ELT(rval, n, CAR(t));
1770     if (ParseState.keepSrcRefs) {
1771 	if (ParseState.keepParseData)
1772 	    finalizeData();
1773 	attachSrcrefs(rval);
1774     }
1775     UNPROTECT(2); /* t, rval */
1776     PROTECT(rval);
1777     endcontext(&cntxt);
1778     R_FinalizeSrcRefState();
1779     UNPROTECT(1); /* rval */
1780     *status = PARSE_OK;
1781     return rval;
1782 }
1783 
1784 /* used in edit.c */
1785 attribute_hidden
R_ParseFile(FILE * fp,int n,ParseStatus * status,SEXP srcfile)1786 SEXP R_ParseFile(FILE *fp, int n, ParseStatus *status, SEXP srcfile)
1787 {
1788     GenerateCode = 1;
1789     fp_parse = fp;
1790     ptr_getc = file_getc;
1791     return R_Parse(n, status, srcfile);
1792 }
1793 
1794 #include "Rconnections.h"
1795 static Rconnection con_parse;
1796 
1797 /* need to handle incomplete last line */
con_getc(void)1798 static int con_getc(void)
1799 {
1800     int c;
1801     static int last=-1000;
1802 
1803     c = Rconn_fgetc(con_parse);
1804     if (c == EOF && last != '\n') c = '\n';
1805     return (last = c);
1806 }
1807 
1808 /* used in source.c */
1809 attribute_hidden
R_ParseConn(Rconnection con,int n,ParseStatus * status,SEXP srcfile)1810 SEXP R_ParseConn(Rconnection con, int n, ParseStatus *status, SEXP srcfile)
1811 {
1812     GenerateCode = 1;
1813     con_parse = con;
1814     ptr_getc = con_getc;
1815     return R_Parse(n, status, srcfile);
1816 }
1817 
1818 /* This one is public, and used in source.c */
R_ParseVector(SEXP text,int n,ParseStatus * status,SEXP srcfile)1819 SEXP R_ParseVector(SEXP text, int n, ParseStatus *status, SEXP srcfile)
1820 {
1821     SEXP rval;
1822     TextBuffer textb;
1823     R_TextBufferInit(&textb, text);
1824     txtb = &textb;
1825     GenerateCode = 1;
1826     ptr_getc = text_getc;
1827     rval = R_Parse(n, status, srcfile);
1828     R_TextBufferFree(&textb);
1829     return rval;
1830 }
1831 
Prompt(SEXP prompt,int type)1832 static const char *Prompt(SEXP prompt, int type)
1833 {
1834     if(type == 1) {
1835 	if(length(prompt) <= 0) {
1836 	    return CHAR(STRING_ELT(GetOption1(install("prompt")), 0));
1837 	}
1838 	else
1839 	    return CHAR(STRING_ELT(prompt, 0));
1840     }
1841     else {
1842 	return CHAR(STRING_ELT(GetOption1(install("continue")), 0));
1843     }
1844 }
1845 
1846 /* used in source.c */
1847 attribute_hidden
R_ParseBuffer(IoBuffer * buffer,int n,ParseStatus * status,SEXP prompt,SEXP srcfile)1848 SEXP R_ParseBuffer(IoBuffer *buffer, int n, ParseStatus *status, SEXP prompt,
1849 		   SEXP srcfile)
1850 {
1851     SEXP rval, t;
1852     char *bufp, buf[CONSOLE_BUFFER_SIZE];
1853     int c, i, prompt_type = 1;
1854     RCNTXT cntxt;
1855 
1856     R_IoBufferWriteReset(buffer);
1857     buf[0] = '\0';
1858     bufp = buf;
1859     R_InitSrcRefState(&cntxt);
1860     ParseContextInit();
1861 
1862     GenerateCode = 1;
1863     iob = buffer;
1864     ptr_getc = buffer_getc;
1865 
1866     PS_SET_SRCFILE(srcfile);
1867     PS_SET_ORIGINAL(srcfile);
1868 
1869     if (isEnvironment(srcfile)) {
1870     	ParseState.keepSrcRefs = TRUE;
1871 	ParseState.keepParseData =
1872 	    asLogical(GetOption1(install("keep.parse.data")));
1873 	PS_SET_SRCREFS(R_NilValue);
1874     }
1875 
1876     PROTECT(t = NewList());
1877     for(i = 0; ; ) {
1878 	if(n >= 0 && i >= n) break;
1879 	if (!*bufp) {
1880 	    if(R_ReadConsole((char *) Prompt(prompt, prompt_type),
1881 			     (unsigned char *)buf, CONSOLE_BUFFER_SIZE, 1) == 0)
1882 		goto finish;
1883 	    bufp = buf;
1884 	}
1885 	while ((c = *bufp++)) {
1886 	    R_IoBufferPutc(c, buffer);
1887 	    if (c == ';' || c == '\n') break;
1888 	}
1889 
1890 	/* Was a call to R_Parse1Buffer, but we don't want to reset
1891 	   xxlineno and xxcolno */
1892 	ParseInit();
1893 	/* Not calling ParseContextInit() as it resets parse data, and
1894 	   to be consistent with R_Parse */
1895 	R_Parse1(status);
1896 	rval = R_CurrentExpr;
1897 
1898 	switch(*status) {
1899 	case PARSE_NULL:
1900 	    break;
1901 	case PARSE_OK:
1902 	    PROTECT(rval);
1903 	    GrowList(t, rval);
1904 	    UNPROTECT(1); /* rval */
1905 	    i++;
1906 	    break;
1907 	case PARSE_INCOMPLETE:
1908 	case PARSE_ERROR:
1909 	    UNPROTECT(1); /* t */
1910 	    R_IoBufferWriteReset(buffer);
1911 	    endcontext(&cntxt);
1912 	    R_FinalizeSrcRefState();
1913 	    return R_NilValue;
1914 	    break;
1915 	case PARSE_EOF:
1916 	    goto finish;
1917 	    break;
1918 	}
1919     }
1920 finish:
1921     R_IoBufferWriteReset(buffer);
1922     t = CDR(t);
1923     PROTECT(rval = allocVector(EXPRSXP, length(t)));
1924     for (n = 0 ; n < LENGTH(rval) ; n++, t = CDR(t))
1925 	SET_VECTOR_ELT(rval, n, CAR(t));
1926     if (ParseState.keepSrcRefs) {
1927 	if (ParseState.keepParseData)
1928 	    finalizeData();
1929 	attachSrcrefs(rval);
1930     }
1931     UNPROTECT(2); /* t, rval */
1932     PROTECT(rval);
1933     endcontext(&cntxt);
1934     R_FinalizeSrcRefState();
1935     UNPROTECT(1); /* rval */
1936     *status = PARSE_OK;
1937     return rval;
1938 }
1939 
1940 
1941 /*----------------------------------------------------------------------------
1942  *
1943  *  The Lexical Analyzer:
1944  *
1945  *  Basic lexical analysis is performed by the following
1946  *  routines.  Input is read a line at a time, and, if the
1947  *  program is in batch mode, each input line is echoed to
1948  *  standard output after it is read.
1949  *
1950  *  The function yylex() scans the input, breaking it into
1951  *  tokens which are then passed to the parser.  The lexical
1952  *  analyser maintains a symbol table (in a very messy fashion).
1953  *
1954  *  The fact that if statements need to parse differently
1955  *  depending on whether the statement is being interpreted or
1956  *  part of the body of a function causes the need for ifpop
1957  *  and IfPush.  When an if statement is encountered an 'i' is
1958  *  pushed on a stack (provided there are parentheses active).
1959  *  At later points this 'i' needs to be popped off of the if
1960  *  stack.
1961  *
1962  */
1963 
IfPush(void)1964 static void IfPush(void)
1965 {
1966     if (*contextp==LBRACE ||
1967 	*contextp=='['    ||
1968 	*contextp=='('    ||
1969 	*contextp == 'i') {
1970 	if(contextp - contextstack >= CONTEXTSTACK_SIZE)
1971 	    error(_("contextstack overflow"));
1972 	*++contextp = 'i';
1973     }
1974 
1975 }
1976 
ifpop(void)1977 static void ifpop(void)
1978 {
1979     if (*contextp=='i')
1980 	*contextp-- = 0;
1981 }
1982 
1983 /* This is only called following ., so we only care if it is
1984    an ANSI digit or not */
typeofnext(void)1985 static int typeofnext(void)
1986 {
1987     int k, c;
1988 
1989     c = xxgetc();
1990     if (isdigit(c)) k = 1; else k = 2;
1991     xxungetc(c);
1992     return k;
1993 }
1994 
nextchar(int expect)1995 static int nextchar(int expect)
1996 {
1997     int c = xxgetc();
1998     if (c == expect)
1999 	return 1;
2000     else
2001 	xxungetc(c);
2002     return 0;
2003 }
2004 
2005 /* Special Symbols */
2006 /* Syntactic Keywords + Symbolic Constants */
2007 
2008 struct {
2009     char *name;
2010     int token;
2011 }
2012 static keywords[] = {
2013     { "NULL",	    NULL_CONST },
2014     { "NA",	    NUM_CONST  },
2015     { "TRUE",	    NUM_CONST  },
2016     { "FALSE",	    NUM_CONST  },
2017     { "Inf",	    NUM_CONST  },
2018     { "NaN",	    NUM_CONST  },
2019     { "NA_integer_", NUM_CONST  },
2020     { "NA_real_",    NUM_CONST  },
2021     { "NA_character_", NUM_CONST  },
2022     { "NA_complex_", NUM_CONST  },
2023     { "function",   FUNCTION   },
2024     { "while",	    WHILE      },
2025     { "repeat",	    REPEAT     },
2026     { "for",	    FOR	       },
2027     { "if",	    IF	       },
2028     { "in",	    IN	       },
2029     { "else",	    ELSE       },
2030     { "next",	    NEXT       },
2031     { "break",	    BREAK      },
2032     { "...",	    SYMBOL     },
2033     { 0,	    0	       }
2034 };
2035 
2036 /* KeywordLookup has side effects, it sets yylval */
2037 
KeywordLookup(const char * s)2038 static int KeywordLookup(const char *s)
2039 {
2040     int i;
2041     for (i = 0; keywords[i].name; i++) {
2042 	if (strcmp(keywords[i].name, s) == 0) {
2043 	    switch (keywords[i].token) {
2044 	    case NULL_CONST:
2045 		PRESERVE_SV(yylval = R_NilValue);
2046 		break;
2047 	    case NUM_CONST:
2048 		if(GenerateCode) {
2049 		    switch(i) {
2050 		    case 1:
2051 			PRESERVE_SV(yylval = mkNA());
2052 			break;
2053 		    case 2:
2054 			PRESERVE_SV(yylval = mkTrue());
2055 			break;
2056 		    case 3:
2057 			PRESERVE_SV(yylval = mkFalse());
2058 			break;
2059 		    case 4:
2060 			PRESERVE_SV(yylval = allocVector(REALSXP, 1));
2061 			REAL(yylval)[0] = R_PosInf;
2062 			break;
2063 		    case 5:
2064 			PRESERVE_SV(yylval = allocVector(REALSXP, 1));
2065 			REAL(yylval)[0] = R_NaN;
2066 			break;
2067 		    case 6:
2068 			PRESERVE_SV(yylval = allocVector(INTSXP, 1));
2069 			INTEGER(yylval)[0] = NA_INTEGER;
2070 			break;
2071 		    case 7:
2072 			PRESERVE_SV(yylval = allocVector(REALSXP, 1));
2073 			REAL(yylval)[0] = NA_REAL;
2074 			break;
2075 		    case 8:
2076 			PRESERVE_SV(yylval = allocVector(STRSXP, 1));
2077 			SET_STRING_ELT(yylval, 0, NA_STRING);
2078 			break;
2079 		    case 9:
2080 			PRESERVE_SV(yylval = allocVector(CPLXSXP, 1));
2081 			COMPLEX(yylval)[0].r = COMPLEX(yylval)[0].i = NA_REAL;
2082 			break;
2083 		    }
2084 		} else
2085 		    PRESERVE_SV(yylval = R_NilValue);
2086 		break;
2087 	    case FUNCTION:
2088 	    case WHILE:
2089 	    case REPEAT:
2090 	    case FOR:
2091 	    case IF:
2092 	    case NEXT:
2093 	    case BREAK:
2094 		yylval = install(s);
2095 		break;
2096 	    case IN:
2097 	    case ELSE:
2098 		break;
2099 	    case SYMBOL:
2100 		PRESERVE_SV(yylval = install(s));
2101 		break;
2102 	    }
2103 	    return keywords[i].token;
2104 	}
2105     }
2106     return 0;
2107 }
2108 
mkFloat(const char * s)2109 static SEXP mkFloat(const char *s)
2110 {
2111     return ScalarReal(R_atof(s));
2112 }
2113 
mkInt(const char * s)2114 static SEXP mkInt(const char *s)
2115 {
2116     double f = R_atof(s);  /* or R_strtol? */
2117     return ScalarInteger((int) f);
2118 }
2119 
mkComplex(const char * s)2120 static SEXP mkComplex(const char *s)
2121 {
2122     SEXP t = R_NilValue;
2123     double f;
2124     f = R_atof(s); /* FIXME: make certain the value is legitimate. */
2125     t = allocVector(CPLXSXP, 1);
2126     COMPLEX(t)[0].r = 0;
2127     COMPLEX(t)[0].i = f;
2128     return t;
2129 }
2130 
mkNA(void)2131 static SEXP mkNA(void)
2132 {
2133     SEXP t = allocVector(LGLSXP, 1);
2134     LOGICAL(t)[0] = NA_LOGICAL;
2135     return t;
2136 }
2137 
2138 attribute_hidden
mkTrue(void)2139 SEXP mkTrue(void)
2140 {
2141     SEXP s = allocVector(LGLSXP, 1);
2142     LOGICAL(s)[0] = 1;
2143     return s;
2144 }
2145 
mkFalse(void)2146 SEXP mkFalse(void)
2147 {
2148     SEXP s = allocVector(LGLSXP, 1);
2149     LOGICAL(s)[0] = 0;
2150     return s;
2151 }
2152 
yyerror(const char * s)2153 static void yyerror(const char *s)
2154 {
2155     static const char *const yytname_translations[] =
2156     {
2157     /* the left column are strings coming from bison, the right
2158        column are translations for users.
2159        The first YYENGLISH from the right column are English to be translated,
2160        the rest are to be copied literally.  The #if 0 block below allows xgettext
2161        to see these.
2162     */
2163 #define YYENGLISH 8
2164 	"$undefined",	"input",
2165 	"END_OF_INPUT",	"end of input",
2166 	"ERROR",	"input",
2167 	"STR_CONST",	"string constant",
2168 	"NUM_CONST",	"numeric constant",
2169 	"SYMBOL",	"symbol",
2170 	"LEFT_ASSIGN",	"assignment",
2171 	"'\\n'",	"end of line",
2172 	"NULL_CONST",	"'NULL'",
2173 	"FUNCTION",	"'function'",
2174 	"EQ_ASSIGN",	"'='",
2175 	"RIGHT_ASSIGN",	"'->'",
2176 	"LBB",		"'[['",
2177 	"FOR",		"'for'",
2178 	"IN",		"'in'",
2179 	"IF",		"'if'",
2180 	"ELSE",		"'else'",
2181 	"WHILE",	"'while'",
2182 	"NEXT",		"'next'",
2183 	"BREAK",	"'break'",
2184 	"REPEAT",	"'repeat'",
2185 	"GT",		"'>'",
2186 	"GE",		"'>='",
2187 	"LT",		"'<'",
2188 	"LE",		"'<='",
2189 	"EQ",		"'=='",
2190 	"NE",		"'!='",
2191 	"AND",		"'&'",
2192 	"OR",		"'|'",
2193 	"AND2",		"'&&'",
2194 	"OR2",		"'||'",
2195 	"NS_GET",	"'::'",
2196 	"NS_GET_INT",	"':::'",
2197 	"PIPE",         "'|>'",
2198 	"PIPEBIND",     "'=>'",
2199 	0
2200     };
2201     static char const yyunexpected[] = "syntax error, unexpected ";
2202     static char const yyexpecting[] = ", expecting ";
2203     char *expecting;
2204 
2205     if (!EndOfFile)
2206 	/* On EndOfFile, there are no more bytes to add, but trying to do
2207 	   so may have non-trivial performance overhead and this can be
2208 	   reached also in non-error situations, e.g. from repl.
2209 	*/
2210 	finish_mbcs_in_parse_context();
2211 
2212     R_ParseError     = yylloc.first_line;
2213     R_ParseErrorCol  = yylloc.first_column;
2214     R_ParseErrorFile = PS_SRCFILE;
2215 
2216     if (!strncmp(s, yyunexpected, sizeof yyunexpected -1)) {
2217 	int i;
2218 	/* Edit the error message */
2219 	expecting = strstr(s + sizeof yyunexpected -1, yyexpecting);
2220 	if (expecting) *expecting = '\0';
2221 	for (i = 0; yytname_translations[i]; i += 2) {
2222 	    if (!strcmp(s + sizeof yyunexpected - 1, yytname_translations[i])) {
2223                 switch(i/2)
2224                 {
2225                 case 0:
2226                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected input"));
2227                                 break;
2228                 case 1:
2229                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected end of input"));
2230                                 break;
2231                 case 2:
2232                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected input"));
2233                                 break;
2234                 case 3:
2235                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected string constant"));
2236                                 break;
2237                 case 4:
2238                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected numeric constant"));
2239                                 break;
2240                 case 5:
2241                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected symbol"));
2242                                 break;
2243                 case 6:
2244                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected assignment"));
2245                                 break;
2246                 case 7:
2247                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected end of line"));
2248                                 break;
2249                 default:
2250                   snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected %s"),
2251                            yytname_translations[i+1]);
2252                                 break;
2253                 }
2254 
2255 		return;
2256 	    }
2257 	}
2258 	snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE - 1, _("unexpected %s"),
2259                  s + sizeof yyunexpected - 1);
2260     } else {
2261 	strncpy(R_ParseErrorMsg, s, PARSE_ERROR_SIZE - 1);
2262         R_ParseErrorMsg[PARSE_ERROR_SIZE - 1] = '\0';
2263     }
2264 }
2265 
CheckFormalArgs(SEXP formlist,SEXP _new,YYLTYPE * lloc)2266 static void CheckFormalArgs(SEXP formlist, SEXP _new, YYLTYPE *lloc)
2267 {
2268     while (formlist != R_NilValue) {
2269 	if (TAG(formlist) == _new) {
2270 	    error(_("repeated formal argument '%s' on line %d"), EncodeChar(PRINTNAME(_new)),
2271 								 lloc->first_line);
2272 	}
2273 	formlist = CDR(formlist);
2274     }
2275 }
2276 
2277 /* This is used as the buffer for NumericValue, SpecialValue and
2278    SymbolValue.  None of these could conceivably need 8192 bytes.
2279 
2280    It has not been used as the buffer for input character strings
2281    since Oct 2007 (released as 2.7.0), and for comments since 2.8.0
2282  */
2283 static char yytext[MAXELTSIZE];
2284 
SkipSpace(void)2285 static int SkipSpace(void)
2286 {
2287     int c;
2288 
2289 #if defined(USE_RI18N_FNS) // includes Win32
2290     static wctype_t blankwct = 0;
2291     if (!blankwct)
2292 	blankwct = Ri18n_wctype("blank");
2293 #endif
2294 
2295 #ifdef Win32
2296     if(!mbcslocale) { /* 0xa0 is NBSP in all 8-bit Windows locales */
2297 	while ((c = xxgetc()) == ' ' || c == '\t' || c == '\f' ||
2298 	       (unsigned int) c == 0xa0) ;
2299 	return c;
2300     } else {
2301 	int i, clen;
2302 	wchar_t wc;
2303 	while (1) {
2304 	    c = xxgetc();
2305 	    if (c == ' ' || c == '\t' || c == '\f') continue;
2306 	    if (c == '\n' || c == R_EOF) break;
2307 	    if ((unsigned int) c < 0x80) break;
2308 	    clen = mbcs_get_next(c, &wc);  /* always 2 */
2309 	    if(! Ri18n_iswctype(wc, blankwct) ) break;
2310 	    for(i = 1; i < clen; i++) c = xxgetc();
2311 	}
2312 	return c;
2313     }
2314 #endif
2315 #if defined(__STDC_ISO_10646__)
2316     if(mbcslocale) { /* wctype functions need Unicode wchar_t */
2317 	int i, clen;
2318 	wchar_t wc;
2319 	while (1) {
2320 	    c = xxgetc();
2321 	    if (c == ' ' || c == '\t' || c == '\f') continue;
2322 	    if (c == '\n' || c == R_EOF) break;
2323 	    if ((unsigned int) c < 0x80) break;
2324 	    clen = mbcs_get_next(c, &wc);
2325 #if defined(USE_RI18N_FNS)
2326 	    if(! Ri18n_iswctype(wc, blankwct) ) break;
2327 #else
2328 	    if(! iswblank(wc) ) break;
2329 #endif
2330 	    for(i = 1; i < clen; i++) c = xxgetc();
2331 	}
2332     } else
2333 #endif
2334 	// does not support non-ASCII spaces, unlike Windows
2335 	while ((c = xxgetc()) == ' ' || c == '\t' || c == '\f') ;
2336     return c;
2337 }
2338 
2339 /* Note that with interactive use, EOF cannot occur inside */
2340 /* a comment.  However, semicolons inside comments make it */
2341 /* appear that this does happen.  For this reason we use the */
2342 /* special assignment EndOfFile=2 to indicate that this is */
2343 /* going on.  This is detected and dealt with in Parse1Buffer. */
2344 
SkipComment(void)2345 static int SkipComment(void)
2346 {
2347     int c='#', i;
2348 
2349     /* locations before the # character was read */
2350     int _first_column = ParseState.xxcolno ;
2351     int _first_parsed = ParseState.xxparseno ;
2352     int type = COMMENT ;
2353 
2354     Rboolean maybeLine = (ParseState.xxcolno == 1);
2355     Rboolean doSave;
2356 
2357     DECLARE_YYTEXT_BUFP(yyp);
2358 
2359     if (maybeLine) {
2360     	char lineDirective[] = "#line";
2361     	YYTEXT_PUSH(c, yyp);
2362     	for (i=1; i<5; i++) {
2363     	    c = xxgetc();
2364   	    if (c != (int)(lineDirective[i])) {
2365   	    	maybeLine = FALSE;
2366   	    	break;
2367   	    }
2368             YYTEXT_PUSH(c, yyp);
2369   	}
2370   	if (maybeLine)
2371 	    c = processLineDirective(&type);
2372     }
2373     // we want to track down the character
2374     // __before__ the new line character
2375     int _last_column  = ParseState.xxcolno ;
2376     int _last_parsed  = ParseState.xxparseno ;
2377 
2378     if (c == '\n') {
2379         _last_column = prevcols[prevpos];
2380         _last_parsed = prevparse[prevpos];
2381     }
2382 
2383     doSave = !maybeLine;
2384 
2385     while (c != '\n' && c != R_EOF) {
2386         // Comments can be any length; we only record the ones that fit in yytext.
2387         if (doSave) {
2388             YYTEXT_PUSH(c, yyp);
2389             doSave = (yyp - yytext) < sizeof(yytext) - 2;
2390         }
2391  	_last_column = ParseState.xxcolno ;
2392 	_last_parsed = ParseState.xxparseno ;
2393 	c = xxgetc();
2394     }
2395     if (c == R_EOF) EndOfFile = 2;
2396     incrementId( ) ;
2397     YYTEXT_PUSH('\0', yyp);
2398     record_( _first_parsed, _first_column, _last_parsed, _last_column,
2399 	     type, identifier, doSave ? yytext : 0 ) ;
2400     return c;
2401 }
2402 
NumericValue(int c)2403 static int NumericValue(int c)
2404 {
2405     int seendot = (c == '.');
2406     int seenexp = 0;
2407     int last = c;
2408     int nd = 0;
2409     int asNumeric = 0;
2410     int count = 1; /* The number of characters seen */
2411 
2412     DECLARE_YYTEXT_BUFP(yyp);
2413     YYTEXT_PUSH(c, yyp);
2414     /* We don't care about other than ASCII digits */
2415     while (isdigit(c = xxgetc()) || c == '.' || c == 'e' || c == 'E'
2416 	   || c == 'x' || c == 'X' || c == 'L')
2417     {
2418 	count++;
2419 	if (c == 'L') /* must be at the end.  Won't allow 1Le3 (at present). */
2420 	{   YYTEXT_PUSH(c, yyp);
2421 	    break;
2422 	}
2423 
2424 	if (c == 'x' || c == 'X') {
2425 	    if (count > 2 || last != '0') break;  /* 0x must be first */
2426 	    YYTEXT_PUSH(c, yyp);
2427 	    while(isdigit(c = xxgetc()) || ('a' <= c && c <= 'f') ||
2428 		  ('A' <= c && c <= 'F') || c == '.') {
2429 		if (c == '.') {
2430 		    if (seendot) return ERROR;
2431 		    seendot = 1;
2432 		}
2433 		YYTEXT_PUSH(c, yyp);
2434 		nd++;
2435 	    }
2436 	    if (nd == 0) return ERROR;
2437 	    if (c == 'p' || c == 'P') {
2438 	        seenexp = 1;
2439 		YYTEXT_PUSH(c, yyp);
2440 		c = xxgetc();
2441 		if (!isdigit(c) && c != '+' && c != '-') return ERROR;
2442 		if (c == '+' || c == '-') {
2443 		    YYTEXT_PUSH(c, yyp);
2444 		    c = xxgetc();
2445 		}
2446 		for(nd = 0; isdigit(c); c = xxgetc(), nd++)
2447 		    YYTEXT_PUSH(c, yyp);
2448 		if (nd == 0) return ERROR;
2449 	    }
2450             if (seendot && !seenexp) return ERROR;
2451 	    if (c == 'L') /* for getParseData */
2452 	    {
2453 		// seenexp will be checked later
2454 		YYTEXT_PUSH(c, yyp);
2455 		break;
2456 	    }
2457 	    break;
2458 	}
2459 	if (c == 'E' || c == 'e') {
2460 	    if (seenexp)
2461 		break;
2462 	    seenexp = 1;
2463 	    seendot = seendot == 1 ? seendot : 2;
2464 	    YYTEXT_PUSH(c, yyp);
2465 	    c = xxgetc();
2466 	    if (!isdigit(c) && c != '+' && c != '-') return ERROR;
2467 	    if (c == '+' || c == '-') {
2468 		YYTEXT_PUSH(c, yyp);
2469 		c = xxgetc();
2470 		if (!isdigit(c)) return ERROR;
2471 	    }
2472 	}
2473 	if (c == '.') {
2474 	    if (seendot)
2475 		break;
2476 	    seendot = 1;
2477 	}
2478 	YYTEXT_PUSH(c, yyp);
2479 	last = c;
2480     }
2481 
2482     if(c == 'i')
2483 	YYTEXT_PUSH(c, yyp); /* for getParseData */
2484 
2485     YYTEXT_PUSH('\0', yyp);
2486     /* Make certain that things are okay. */
2487     if(c == 'L') {
2488 	double a = R_atof(yytext);
2489 	int b = (int) a;
2490 	/* We are asked to create an integer via the L, so we check that the
2491 	   double and int values are the same. If not, this is a problem and we
2492 	   will not lose information and so use the numeric value.
2493 	*/
2494 	if(a != (double) b) {
2495 	    if(GenerateCode) {
2496 		if(seendot == 1 && seenexp == 0)
2497 		    warning(_("integer literal %s contains decimal; using numeric value"), yytext);
2498 		else {
2499 		    /* hide the L for the warning message */
2500 		    warning(_("non-integer value %s qualified with L; using numeric value"), yytext);
2501 		}
2502 	    }
2503 	    asNumeric = 1;
2504 	    seenexp = 1;
2505 	}
2506     }
2507 
2508     if(c == 'i') {
2509 	yylval = GenerateCode ? mkComplex(yytext) : R_NilValue;
2510     } else if(c == 'L' && asNumeric == 0) {
2511 	if(GenerateCode && seendot == 1 && seenexp == 0)
2512 	    warning(_("integer literal %s contains unnecessary decimal point"), yytext);
2513 	yylval = GenerateCode ? mkInt(yytext) : R_NilValue;
2514 #if 0  /* do this to make 123 integer not double */
2515     } else if(!(seendot || seenexp)) {
2516 	if(c != 'L') xxungetc(c);
2517 	if (GenerateCode) {
2518 	    double a = R_atof(yytext);
2519 	    int b = (int) a;
2520 	    yylval = (a != (double) b) ? mkFloat(yytext) : mkInt(yytext);
2521 	} else yylval = R_NilValue;
2522 #endif
2523     } else {
2524 	if(c != 'L')
2525 	    xxungetc(c);
2526 	yylval = GenerateCode ? mkFloat(yytext) : R_NilValue;
2527     }
2528 
2529     PRESERVE_SV(yylval);
2530     return NUM_CONST;
2531 }
2532 
2533 /* Strings may contain the standard ANSI escapes and octal */
2534 /* specifications of the form \o, \oo or \ooo, where 'o' */
2535 /* is an octal digit. */
2536 
2537 /* The buffer is reallocated on the R heap if needed; not by malloc */
2538 /* to avoid memory leak in case of R error (long jump) */
2539 #define STEXT_PUSH(c) do {                  \
2540 	size_t nc = bp - stext;             \
2541 	if (nc >= nstext - 1) {             \
2542 	    char *old = stext;              \
2543 	    SEXP st1;		            \
2544 	    nstext *= 2;                    \
2545 	    PROTECT(st1 = allocVector(RAWSXP, nstext)); \
2546 	    stext = (char *)RAW(st1);       \
2547 	    memmove(stext, old, nc);        \
2548 	    REPROTECT(st1, sti);	    \
2549 	    UNPROTECT(1); /* st1 */         \
2550 	    bp = stext+nc; }		    \
2551 	*bp++ = ((char) c);		    \
2552 } while(0)
2553 
2554 
2555 /* The idea here is that if a string contains \u escapes that are not
2556    valid in the current locale, we should switch to UTF-8 for that
2557    string.  Needs Unicode wide-char support or out substitutes.
2558 
2559    Defining __STDC_ISO_10646__ is done by the OS (or not) in wchar.t.
2560    Some (e.g. macOS, Solaris, FreeBSD) have Unicode wchar_t but do not
2561    define it: we override macOS and FreeBSD earlier in this file.
2562 */
2563 
2564 #if defined(Win32) || defined(__STDC_ISO_10646__)
2565 typedef wchar_t ucs_t;
2566 # define mbcs_get_next2 mbcs_get_next
2567 #else
2568 typedef unsigned int ucs_t;
2569 # define WC_NOT_UNICODE
2570 // which is used to select our mbtoucs rather than system mbrtowc
mbcs_get_next2(int c,ucs_t * wc)2571 static int mbcs_get_next2(int c, ucs_t *wc)
2572 {
2573     int i, res, clen = 1; char s[9];
2574 
2575     s[0] = c;
2576     /* This assumes (probably OK) that all MBCS embed ASCII as single-byte
2577        lead bytes, including control chars */
2578     if((unsigned int) c < 0x80) {
2579 	*wc = (wchar_t) c;
2580 	return 1;
2581     }
2582     if(utf8locale) {
2583 	clen = utf8clen(c);
2584 	for(i = 1; i < clen; i++) {
2585 	    c = xxgetc();
2586 	    if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno);
2587 	    s[i] = (char) c;
2588 	}
2589 	s[clen] ='\0'; /* x86 Solaris requires this */
2590 	res = mbtoucs(wc, s, clen);
2591 	if(res == -1) error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno);
2592     } else {
2593 	/* This is not necessarily correct for stateful MBCS */
2594 	while(clen <= R_MB_CUR_MAX) {
2595 	    res = mbtoucs(wc, s, clen);
2596 	    if(res >= 0) break;
2597 	    if(res == -1)
2598 		error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno);
2599 	    /* so res == -2 */
2600 	    c = xxgetc();
2601 	    if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno);
2602 	    s[clen++] = c;
2603 	} /* we've tried enough, so must be complete or invalid by now */
2604     }
2605     for(i = clen - 1; i > 0; i--) xxungetc(s[i]);
2606     return clen;
2607 }
2608 #endif
2609 
2610 #define WTEXT_PUSH(c) do { if(wcnt < 10000) wcs[wcnt++] = c; } while(0)
2611 
mkStringUTF8(const ucs_t * wcs,int cnt)2612 static SEXP mkStringUTF8(const ucs_t *wcs, int cnt)
2613 {
2614     SEXP t;
2615     int nb;
2616 
2617 /* NB: cnt includes the terminator */
2618 #ifdef Win32
2619     nb = cnt*4; /* UCS-2/UTF-16 so max 4 bytes per wchar_t */
2620 #else
2621     nb = cnt*6;
2622 #endif
2623     R_CheckStack2(nb);
2624     char s[nb];
2625     memset(s, 0, nb); /* safety */
2626     // This used to differentiate WC_NOT_UNICODE but not needed
2627     wcstoutf8(s, (const wchar_t *)wcs, sizeof(s));
2628     PROTECT(t = allocVector(STRSXP, 1));
2629     SET_STRING_ELT(t, 0, mkCharCE(s, CE_UTF8));
2630     UNPROTECT(1); /* t */
2631     return t;
2632 }
2633 
2634 #define CTEXT_PUSH(c) do { \
2635 	if (ct - currtext >= 1000) { \
2636 	    memmove(currtext, currtext+100, 901); memmove(currtext, "... ", 4); ct -= 100; \
2637 	    currtext_truncated = TRUE; \
2638 	} \
2639 	*ct++ = ((char) c);  \
2640 } while(0)
2641 #define CTEXT_POP() ct--
2642 
2643 
2644 /* forSymbol is true when parsing backticked symbols */
StringValue(int c,Rboolean forSymbol)2645 static int StringValue(int c, Rboolean forSymbol)
2646 {
2647     int quote = c;
2648     char currtext[1010], *ct = currtext;
2649     char st0[MAXELTSIZE];
2650     unsigned int nstext = MAXELTSIZE;
2651     char *stext = st0, *bp = st0;
2652     PROTECT_INDEX sti;
2653     int wcnt = 0;
2654     ucs_t wcs[10001];
2655     Rboolean oct_or_hex = FALSE, use_wcs = FALSE, currtext_truncated = FALSE;
2656 
2657     PROTECT_WITH_INDEX(R_NilValue, &sti);
2658     CTEXT_PUSH(c);
2659     while ((c = xxgetc()) != R_EOF && c != quote) {
2660 	CTEXT_PUSH(c);
2661 	if (c == '\n') {
2662 	    xxungetc(c); CTEXT_POP();
2663 	    /* Fix suggested by Mark Bravington to allow multiline strings
2664 	     * by pretending we've seen a backslash. Was:
2665 	     * return ERROR;
2666 	     */
2667 	    c = '\\';
2668 	}
2669 	if (c == '\\') {
2670 	    c = xxgetc(); CTEXT_PUSH(c);
2671 	    if ('0' <= c && c <= '7') {
2672 		int octal = c - '0';
2673 		if ('0' <= (c = xxgetc()) && c <= '7') {
2674 		    CTEXT_PUSH(c);
2675 		    octal = 8 * octal + c - '0';
2676 		    if ('0' <= (c = xxgetc()) && c <= '7') {
2677 			CTEXT_PUSH(c);
2678 			octal = 8 * octal + c - '0';
2679 		    } else {
2680 			xxungetc(c);
2681 			CTEXT_POP();
2682 		    }
2683 		} else {
2684 		    xxungetc(c);
2685 		    CTEXT_POP();
2686 		}
2687 		if (!octal)
2688 		    error(_("nul character not allowed (line %d)"), ParseState.xxlineno);
2689 		c = octal;
2690 		oct_or_hex = TRUE;
2691 	    }
2692 	    else if(c == 'x') {
2693 		int val = 0; int i, ext;
2694 		for(i = 0; i < 2; i++) {
2695 		    c = xxgetc(); CTEXT_PUSH(c);
2696 		    if(c >= '0' && c <= '9') ext = c - '0';
2697 		    else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10;
2698 		    else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10;
2699 		    else {
2700 			xxungetc(c);
2701 			CTEXT_POP();
2702 			if (i == 0) { /* was just \x */
2703 			    *ct = '\0';
2704 			    errorcall(R_NilValue, _("'\\x' used without hex digits in character string starting \"%s\""), currtext);
2705 			}
2706 			break;
2707 		    }
2708 		    val = 16*val + ext;
2709 		}
2710 		if (!val)
2711 		    error(_("nul character not allowed (line %d)"), ParseState.xxlineno);
2712 		c = val;
2713 		oct_or_hex = TRUE;
2714 	    }
2715 	    else if(c == 'u') {
2716 		unsigned int val = 0; int i, ext;
2717 		Rboolean delim = FALSE;
2718 
2719 		if(forSymbol)
2720 		    error(_("\\uxxxx sequences not supported inside backticks (line %d)"), ParseState.xxlineno);
2721 		if((c = xxgetc()) == '{') {
2722 		    delim = TRUE;
2723 		    CTEXT_PUSH(c);
2724 		} else xxungetc(c);
2725 		for(i = 0; i < 4; i++) {
2726 		    c = xxgetc(); CTEXT_PUSH(c);
2727 		    if(c >= '0' && c <= '9') ext = c - '0';
2728 		    else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10;
2729 		    else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10;
2730 		    else {
2731 			xxungetc(c);
2732 			CTEXT_POP();
2733 			if (i == 0) { /* was just \u */
2734 			    *ct = '\0';
2735 			    errorcall(R_NilValue, _("'\\u' used without hex digits in character string starting \"%s\""), currtext);
2736 			}
2737 			break;
2738 		    }
2739 		    val = 16*val + ext;
2740 		}
2741 		if(delim) {
2742 		    if((c = xxgetc()) != '}')
2743 			error(_("invalid \\u{xxxx} sequence (line %d)"),
2744 			      ParseState.xxlineno);
2745 		    else CTEXT_PUSH(c);
2746 		}
2747 		if (!val)
2748 		    error(_("nul character not allowed (line %d)"), ParseState.xxlineno);
2749 		WTEXT_PUSH(val); /* this assumes wchar_t is Unicode */
2750 		use_wcs = TRUE;
2751 		continue;
2752 	    }
2753 	    else if(c == 'U') {
2754 		unsigned int val = 0; int i, ext;
2755 		Rboolean delim = FALSE;
2756 		if(forSymbol)
2757 		    error(_("\\Uxxxxxxxx sequences not supported inside backticks (line %d)"), ParseState.xxlineno);
2758 		if((c = xxgetc()) == '{') {
2759 		    delim = TRUE;
2760 		    CTEXT_PUSH(c);
2761 		} else xxungetc(c);
2762 		for(i = 0; i < 8; i++) {
2763 		    c = xxgetc(); CTEXT_PUSH(c);
2764 		    if(c >= '0' && c <= '9') ext = c - '0';
2765 		    else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10;
2766 		    else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10;
2767 		    else {
2768 			xxungetc(c);
2769 			CTEXT_POP();
2770 			if (i == 0) { /* was just \U */
2771 			    *ct = '\0';
2772 			    errorcall(R_NilValue, _("'\\U' used without hex digits in character string starting \"%s\""), currtext);
2773 			}
2774 			break;
2775 		    }
2776 		    val = 16*val + ext;
2777 		}
2778 		if(delim) {
2779 		    if((c = xxgetc()) != '}')
2780 			error(_("invalid \\U{xxxxxxxx} sequence (line %d)"),
2781 			      ParseState.xxlineno);
2782 		    else CTEXT_PUSH(c);
2783 		}
2784 		if (!val)
2785 		    error(_("nul character not allowed (line %d)"),
2786 			  ParseState.xxlineno);
2787 		if (val > 0x10FFFF) {
2788 		    if(delim)
2789 			error(_("invalid \\U{xxxxxxxx} value %6x (line %d)"),
2790 			      val, ParseState.xxlineno);
2791 		    else
2792 			error(_("invalid \\Uxxxxxxxx value %6x (line %d)"),
2793 			      val, ParseState.xxlineno);
2794 		}
2795 #ifdef Win32
2796 		if (0x010000 <= val && val <= 0x10FFFF) {   /* Need surrogate pair in Windows */
2797 		    val = val - 0x010000;
2798 		    WTEXT_PUSH( 0xD800 | (val >> 10) );
2799 		    val = 0xDC00 | (val & 0x03FF);
2800 		}
2801 #endif
2802 		WTEXT_PUSH(val);
2803 		use_wcs = TRUE;
2804 		continue;
2805 	    }
2806 	    else {
2807 		switch (c) {
2808 		case 'a':
2809 		    c = '\a';
2810 		    break;
2811 		case 'b':
2812 		    c = '\b';
2813 		    break;
2814 		case 'f':
2815 		    c = '\f';
2816 		    break;
2817 		case 'n':
2818 		    c = '\n';
2819 		    break;
2820 		case 'r':
2821 		    c = '\r';
2822 		    break;
2823 		case 't':
2824 		    c = '\t';
2825 		    break;
2826 		case 'v':
2827 		    c = '\v';
2828 		    break;
2829 		case '\\':
2830 		    c = '\\';
2831 		    break;
2832 		case '"':
2833 		case '\'':
2834 		case '`':
2835 		case ' ':
2836 		case '\n':
2837 		    break;
2838 		default:
2839 		    *ct = '\0';
2840 		    errorcall(R_NilValue, _("'\\%c' is an unrecognized escape in character string starting \"%s\""), c, currtext);
2841 		}
2842 	    }
2843 	} else if(mbcslocale) {
2844 	    int i, clen;
2845 	    ucs_t wc;
2846 	    clen = mbcs_get_next2(c, &wc);
2847 	    WTEXT_PUSH(wc);
2848 	    ParseState.xxbyteno += clen-1;
2849 
2850 	    for(i = 0; i < clen - 1; i++){
2851 		STEXT_PUSH(c);
2852 		c = xxgetc();
2853 		if (c == R_EOF) break;
2854 		CTEXT_PUSH(c);
2855 		if (c == '\n') {
2856 		    xxungetc(c); CTEXT_POP();
2857 		    c = '\\';
2858 		}
2859 	    }
2860 	    if (c == R_EOF) break;
2861 	    STEXT_PUSH(c);
2862 	    continue;
2863 	}
2864 	STEXT_PUSH(c);
2865 	if ((unsigned int) c < 0x80) WTEXT_PUSH(c);
2866 	else { /* have an 8-bit char in the current encoding */
2867 #ifdef WC_NOT_UNICODE
2868 	    ucs_t wc;
2869 	    char s[2] = " ";
2870 	    s[0] = (char) c;
2871 	    mbtoucs(&wc, s, 2);
2872 #else
2873 	    wchar_t wc;
2874 	    char s[2] = " ";
2875 	    s[0] = (char) c;
2876 	    mbrtowc(&wc, s, 2, NULL);
2877 #endif
2878 	    WTEXT_PUSH(wc);
2879 	}
2880     }
2881     STEXT_PUSH('\0');
2882     WTEXT_PUSH(0);
2883     yytext[0] = '\0';
2884     if (c == R_EOF) {
2885 	PRESERVE_SV(yylval = R_NilValue);
2886 	UNPROTECT(1); /* release stext */
2887     	return INCOMPLETE_STRING;
2888     } else {
2889     	CTEXT_PUSH(c);
2890     	CTEXT_PUSH('\0');
2891     }
2892     if (!currtext_truncated)
2893     	strcpy(yytext, currtext);
2894     else if (forSymbol || !use_wcs) {
2895         size_t total = strlen(stext);
2896         snprintf(yytext, MAXELTSIZE, "[%u chars quoted with '%c']", (unsigned int)total, quote);
2897     } else
2898         snprintf(yytext, MAXELTSIZE, "[%d wide chars quoted with '%c']", wcnt, quote);
2899     if(forSymbol) {
2900 	PRESERVE_SV(yylval = install(stext));
2901 	UNPROTECT(1); /* release stext */
2902 	return SYMBOL;
2903     } else {
2904 	if(use_wcs) {
2905 	    if(oct_or_hex)
2906 		error(_("mixing Unicode and octal/hex escapes in a string is not allowed"));
2907 	    if(wcnt < 10000)
2908 		PRESERVE_SV(yylval = mkStringUTF8(wcs, wcnt)); /* include terminator */
2909 	    else
2910 		error(_("string at line %d containing Unicode escapes not in this locale\nis too long (max 10000 chars)"), ParseState.xxlineno);
2911 	} else
2912 	    PRESERVE_SV(yylval = mkString2(stext,  bp - stext - 1, oct_or_hex));
2913 	UNPROTECT(1); /* release stext */
2914 	return STR_CONST;
2915     }
2916 }
2917 
RawStringValue(int c0,int c)2918 static int RawStringValue(int c0, int c)
2919 {
2920     int quote = c;
2921     int delim = ')';
2922     char currtext[1010], *ct = currtext;
2923     char st0[MAXELTSIZE];
2924     unsigned int nstext = MAXELTSIZE;
2925     char *stext = st0, *bp = st0;
2926     PROTECT_INDEX sti;
2927     int wcnt = 0;
2928     ucs_t wcs[10001];
2929     Rboolean oct_or_hex = FALSE, use_wcs = FALSE, currtext_truncated = FALSE;
2930 
2931     CTEXT_PUSH(c0); /* 'r' or 'R' */
2932     CTEXT_PUSH(c);  /* opening quote */
2933 
2934     /* count dashes between the opening quote and opening delimiter */
2935     int ndash = 0;
2936     while (nextchar('-')) { CTEXT_PUSH('-'); ndash++; }
2937 
2938     c = xxgetc();
2939     CTEXT_PUSH(c);
2940     switch(c) {
2941     case '(': delim = ')'; break;
2942     case '[': delim = ']'; break;
2943     case '{': delim = '}'; break;
2944     case '|': delim = '|'; break;
2945     default:
2946 	error(_("malformed raw string literal at line %d"),
2947 	      ParseState.xxlineno);
2948     }
2949 
2950     PROTECT_WITH_INDEX(R_NilValue, &sti);
2951     while ((c = xxgetc()) != R_EOF) {
2952 	if (c == delim) {
2953 	    /* count the dashes after the closing delimiter */
2954 	    int nd = 0;
2955 	    while (nd < ndash && nextchar('-')) nd++;
2956 
2957 	    if (nd == ndash && nextchar(quote))
2958 		/* right number of dashes, right quote: were done! */
2959 		break;
2960 	    else {
2961 		/* not done: emit closing delimiter, dashes, and continue */
2962 		CTEXT_PUSH(delim);
2963 		STEXT_PUSH(delim);
2964 		WTEXT_PUSH(delim);
2965 		for (int i = 0; i < nd; i++) {
2966 		    CTEXT_PUSH('-');
2967 		    STEXT_PUSH('-');
2968 		    WTEXT_PUSH('-');
2969 		}
2970 		continue;
2971 	    }
2972 	}
2973 	CTEXT_PUSH(c);
2974 	if(mbcslocale) {
2975 	    int i, clen;
2976 	    ucs_t wc;
2977 	    clen = mbcs_get_next2(c, &wc);
2978 	    WTEXT_PUSH(wc);
2979 	    ParseState.xxbyteno += clen-1;
2980 
2981 	    for(i = 0; i < clen - 1; i++){
2982 		STEXT_PUSH(c);
2983 		c = xxgetc();
2984 		if (c == R_EOF) break;
2985 		CTEXT_PUSH(c);
2986 	    }
2987 	    if (c == R_EOF) break;
2988 	    STEXT_PUSH(c);
2989 	    continue;
2990 	}
2991 	STEXT_PUSH(c);
2992 	if ((unsigned int) c < 0x80) WTEXT_PUSH(c);
2993 	else { /* have an 8-bit char in the current encoding */
2994 #ifdef WC_NOT_UNICODE
2995 	    ucs_t wc;
2996 	    char s[2] = " ";
2997 	    s[0] = (char) c;
2998 	    mbtoucs(&wc, s, 2);
2999 #else
3000 	    wchar_t wc;
3001 	    char s[2] = " ";
3002 	    s[0] = (char) c;
3003 	    mbrtowc(&wc, s, 2, NULL);
3004 #endif
3005 	    WTEXT_PUSH(wc);
3006 	}
3007     }
3008     STEXT_PUSH('\0');
3009     WTEXT_PUSH(0);
3010     yytext[0] = '\0';
3011     if (c == R_EOF) {
3012 	PRESERVE_SV(yylval = R_NilValue);
3013 	UNPROTECT(1); /* release stext */
3014     	return INCOMPLETE_STRING;
3015     } else {
3016 	/* record delim, dashes, and quote, and terminate string */
3017 	CTEXT_PUSH(delim);
3018 	for (int i = 0; i < ndash; i++)
3019 	    CTEXT_PUSH('-');
3020 	CTEXT_PUSH(quote);
3021     	CTEXT_PUSH('\0');
3022     }
3023     if (!currtext_truncated)
3024     	strcpy(yytext, currtext);
3025     else if (!use_wcs) {
3026         size_t total = strlen(stext);
3027         snprintf(yytext, MAXELTSIZE, "[%u chars quoted with '%c']", (unsigned int)total, quote);
3028     } else
3029         snprintf(yytext, MAXELTSIZE, "[%d wide chars quoted with '%c']", wcnt, quote);
3030     if(use_wcs) {
3031 	if(oct_or_hex)
3032 	    error(_("mixing Unicode and octal/hex escapes in a string is not allowed"));
3033 	if(wcnt < 10000)
3034 	    PRESERVE_SV(yylval = mkStringUTF8(wcs, wcnt)); /* include terminator */
3035 	else
3036 	    error(_("string at line %d containing Unicode escapes not in this locale\nis too long (max 10000 chars)"), ParseState.xxlineno);
3037     } else
3038 	PRESERVE_SV(yylval = mkString2(stext,  bp - stext - 1, oct_or_hex));
3039     UNPROTECT(1); /* release stext */
3040     return STR_CONST;
3041 }
3042 
SpecialValue(int c)3043 static int SpecialValue(int c)
3044 {
3045     DECLARE_YYTEXT_BUFP(yyp);
3046     YYTEXT_PUSH(c, yyp);
3047     while ((c = xxgetc()) != R_EOF && c != '%') {
3048 	if (c == '\n') {
3049 	    xxungetc(c);
3050 	    return ERROR;
3051 	}
3052 	YYTEXT_PUSH(c, yyp);
3053     }
3054     if (c == '%')
3055 	YYTEXT_PUSH(c, yyp);
3056     YYTEXT_PUSH('\0', yyp);
3057     yylval = install(yytext);
3058     return SPECIAL;
3059 }
3060 
3061 /* return 1 if name is a valid name 0 otherwise */
3062 attribute_hidden
isValidName(const char * name)3063 int isValidName(const char *name)
3064 {
3065     const char *p = name;
3066     int i;
3067 
3068     if(mbcslocale) {
3069 	/* the only way to establish which chars are alpha etc is to
3070 	   use the wchar variants */
3071 	size_t n = strlen(name), used;
3072 	wchar_t wc;
3073 	used = Mbrtowc(&wc, p, n, NULL); p += used; n -= used;
3074 	if(used == 0) return 0;
3075 	if (wc != L'.' && !iswalpha(wc) ) return 0;
3076 	if (wc == L'.') {
3077 	    /* We don't care about other than ASCII digits */
3078 	    if(isdigit(0xff & (int)*p)) return 0;
3079 	    /* Mbrtowc(&wc, p, n, NULL); if(iswdigit(wc)) return 0; */
3080 	}
3081 	while((used = Mbrtowc(&wc, p, n, NULL))) {
3082 	    if (!(iswalnum(wc) || wc == L'.' || wc == L'_')) break;
3083 	    p += used; n -= used;
3084 	}
3085 	if (*p != '\0') return 0;
3086     } else {
3087 	int c = 0xff & *p++;
3088 	if (c != '.' && !isalpha(c) ) return 0;
3089 	if (c == '.' && isdigit(0xff & (int)*p)) return 0;
3090 	while ( c = 0xff & *p++, (isalnum(c) || c == '.' || c == '_') ) ;
3091 	if (c != '\0') return 0;
3092     }
3093 
3094     if (strcmp(name, "...") == 0) return 1;
3095 
3096     for (i = 0; keywords[i].name != NULL; i++)
3097 	if (strcmp(keywords[i].name, name) == 0) return 0;
3098 
3099     return 1;
3100 }
3101 
3102 
SymbolValue(int c)3103 static int SymbolValue(int c)
3104 {
3105     int kw;
3106     DECLARE_YYTEXT_BUFP(yyp);
3107     if(mbcslocale) {
3108 	// FIXME potentially need R_wchar_t with UTF-8 Windows.
3109 	wchar_t wc; int i, clen;
3110 	clen = mbcs_get_next(c, &wc);
3111 	while(1) {
3112 	    /* at this point we have seen one char, so push its bytes
3113 	       and get one more */
3114 	    for(i = 0; i < clen; i++) {
3115 		YYTEXT_PUSH(c, yyp);
3116 		c = xxgetc();
3117 	    }
3118 	    if(c == R_EOF) break;
3119 	    if(c == '.' || c == '_') {
3120 		clen = 1;
3121 		continue;
3122 	    }
3123 	    clen = mbcs_get_next(c, &wc);
3124 	    if(!iswalnum(wc)) break;
3125 	}
3126     } else
3127 	do {
3128 	    YYTEXT_PUSH(c, yyp);
3129 	} while ((c = xxgetc()) != R_EOF &&
3130 		 (isalnum(c) || c == '.' || c == '_'));
3131     xxungetc(c);
3132     YYTEXT_PUSH('\0', yyp);
3133     if ((kw = KeywordLookup(yytext)))
3134 	return kw;
3135 
3136     PRESERVE_SV(yylval = install(yytext));
3137     return SYMBOL;
3138 }
3139 
setParseFilename(SEXP newname)3140 static void setParseFilename(SEXP newname) {
3141     SEXP class;
3142 
3143     if (isEnvironment(PS_SRCFILE)) {
3144 	SEXP oldname = findVar(install("filename"), PS_SRCFILE);
3145     	if (isString(oldname) && length(oldname) > 0 &&
3146     	    strcmp(CHAR(STRING_ELT(oldname, 0)),
3147     	           CHAR(STRING_ELT(newname, 0))) == 0) return;
3148 	PS_SET_SRCFILE(NewEnvironment(R_NilValue, R_NilValue, R_EmptyEnv));
3149 	defineVar(install("filename"), newname, PS_SRCFILE);
3150 	defineVar(install("original"), PS_ORIGINAL, PS_SRCFILE);
3151 
3152 	PROTECT(class = allocVector(STRSXP, 2));
3153 	SET_STRING_ELT(class, 0, mkChar("srcfilealias"));
3154 	SET_STRING_ELT(class, 1, mkChar("srcfile"));
3155 	setAttrib(PS_SRCFILE, R_ClassSymbol, class);
3156 	UNPROTECT(1); /* class */
3157     } else
3158 	PS_SET_SRCFILE(duplicate(newname));
3159     RELEASE_SV(newname);
3160 }
3161 
processLineDirective(int * type)3162 static int processLineDirective(int *type)
3163 {
3164     int c, tok, linenumber;
3165     c = SkipSpace();
3166     if (!isdigit(c)) return(c);
3167     tok = NumericValue(c);
3168     linenumber = atoi(yytext);
3169     c = SkipSpace();
3170     if (c == '"')
3171 	tok = StringValue(c, FALSE);
3172     else
3173     	xxungetc(c);
3174     if (tok == STR_CONST)
3175 	setParseFilename(yylval);
3176     while ((c = xxgetc()) != '\n' && c != R_EOF) /* skip */ ;
3177     ParseState.xxlineno = linenumber;
3178     *type = LINE_DIRECTIVE;
3179     /* we don't change xxparseno here:  it counts parsed lines, not official lines */
3180     R_ParseContext[R_ParseContextLast] = '\0';  /* Context report shouldn't show the directive */
3181     return(c);
3182 }
3183 
3184 /* Get the R symbol, and set yytext at the same time */
install_and_save(char * text)3185 static SEXP install_and_save(char * text)
3186 {
3187     strcpy(yytext, text);
3188     return install(text);
3189 }
3190 
3191 /* Get an R symbol, and set different yytext.  Used for translation of -> to <-. ->> to <<- */
install_and_save2(char * text,char * savetext)3192 static SEXP install_and_save2(char * text, char * savetext)
3193 {
3194     strcpy(yytext, savetext);
3195     return install(text);
3196 }
3197 
3198 
3199 /* Split the input stream into tokens. */
3200 /* This is the lowest of the parsing levels. */
3201 
token(void)3202 static int token(void)
3203 {
3204     int c;
3205     wchar_t wc;
3206 
3207     if (SavedToken) {
3208 	c = SavedToken;
3209 	yylval = SavedLval;
3210 	SavedLval = R_NilValue;
3211 	SavedToken = 0;
3212 	yylloc.first_line = xxlinesave;
3213 	yylloc.first_column = xxcolsave;
3214 	yylloc.first_byte = xxbytesave;
3215 	yylloc.first_parsed = xxparsesave;
3216 	return c;
3217     }
3218     xxcharsave = xxcharcount; /* want to be able to go back one token */
3219 
3220     c = SkipSpace();
3221     if (c == '#') c = SkipComment();
3222 
3223     yylloc.first_line = ParseState.xxlineno;
3224     yylloc.first_column = ParseState.xxcolno;
3225     yylloc.first_byte = ParseState.xxbyteno;
3226     yylloc.first_parsed = ParseState.xxparseno;
3227 
3228     if (c == R_EOF) return END_OF_INPUT;
3229 
3230     /* Either digits or symbols can start with a "." */
3231     /* so we need to decide which it is and jump to  */
3232     /* the correct spot. */
3233 
3234     if (c == '.' && typeofnext() >= 2) goto symbol;
3235 
3236     /* literal numbers */
3237 
3238     if (c == '.') return NumericValue(c);
3239     /* We don't care about other than ASCII digits */
3240     if (isdigit(c)) return NumericValue(c);
3241 
3242     /* raw string literal */
3243 
3244     if (c == 'r' || c == 'R') {
3245 	if (nextchar('"'))
3246 	    return RawStringValue(c, '"');
3247 	else if (nextchar('\''))
3248 	    return RawStringValue(c, '\'');
3249     }
3250 
3251     /* literal strings */
3252 
3253     if (c == '\"' || c == '\'')
3254 	return StringValue(c, FALSE);
3255 
3256     /* special functions */
3257 
3258     if (c == '%')
3259 	return SpecialValue(c);
3260 
3261     /* functions, constants and variables */
3262 
3263     if (c == '`')
3264 	return StringValue(c, TRUE);
3265  symbol:
3266 
3267     if (c == '.') return SymbolValue(c);
3268     if(mbcslocale) {
3269 	// FIXME potentially need R_wchar_t with UTF-8 Windows.
3270 	mbcs_get_next(c, &wc);
3271 	if (iswalpha(wc)) return SymbolValue(c);
3272     } else
3273 	if (isalpha(c)) return SymbolValue(c);
3274 
3275     /* compound tokens */
3276 
3277     switch (c) {
3278     case '<':
3279 	if (nextchar('=')) {
3280 	    yylval = install_and_save("<=");
3281 	    return LE;
3282 	}
3283 	if (nextchar('-')) {
3284 	    yylval = install_and_save("<-");
3285 	    return LEFT_ASSIGN;
3286 	}
3287 	if (nextchar('<')) {
3288 	    if (nextchar('-')) {
3289 		yylval = install_and_save("<<-");
3290 		return LEFT_ASSIGN;
3291 	    }
3292 	    else
3293 		return ERROR;
3294 	}
3295 	yylval = install_and_save("<");
3296 	return LT;
3297     case '-':
3298 	if (nextchar('>')) {
3299 	    if (nextchar('>')) {
3300 		yylval = install_and_save2("<<-", "->>");
3301 		return RIGHT_ASSIGN;
3302 	    }
3303 	    else {
3304 		yylval = install_and_save2("<-", "->");
3305 		return RIGHT_ASSIGN;
3306 	    }
3307 	}
3308 	yylval = install_and_save("-");
3309 	return '-';
3310     case '>':
3311 	if (nextchar('=')) {
3312 	    yylval = install_and_save(">=");
3313 	    return GE;
3314 	}
3315 	yylval = install_and_save(">");
3316 	return GT;
3317     case '!':
3318 	if (nextchar('=')) {
3319 	    yylval = install_and_save("!=");
3320 	    return NE;
3321 	}
3322 	yylval = install_and_save("!");
3323 	return '!';
3324     case '=':
3325 	if (nextchar('=')) {
3326 	    yylval = install_and_save("==");
3327 	    return EQ;
3328 	}
3329 	else if (nextchar('>')) {
3330 	    yylval = install_and_save("=>");
3331 	    return PIPEBIND;
3332 	}
3333 	yylval = install_and_save("=");
3334 	return EQ_ASSIGN;
3335     case ':':
3336 	if (nextchar(':')) {
3337 	    if (nextchar(':')) {
3338 		yylval = install_and_save(":::");
3339 		return NS_GET_INT;
3340 	    }
3341 	    else {
3342 		yylval = install_and_save("::");
3343 		return NS_GET;
3344 	    }
3345 	}
3346 	if (nextchar('=')) {
3347 	    yylval = install_and_save(":=");
3348 	    return LEFT_ASSIGN;
3349 	}
3350 	yylval = install_and_save(":");
3351 	return ':';
3352     case '&':
3353 	if (nextchar('&')) {
3354 	    yylval = install_and_save("&&");
3355 	    return AND2;
3356 	}
3357 	yylval = install_and_save("&");
3358 	return AND;
3359     case '|':
3360 	if (nextchar('|')) {
3361 	    yylval = install_and_save("||");
3362 	    return OR2;
3363 	}
3364 	else if (nextchar('>')) {
3365 	    yylval = install_and_save("|>");
3366 	    return PIPE;
3367 	}
3368 	yylval = install_and_save("|");
3369 	return OR;
3370     case LBRACE:
3371 	yylval = install_and_save("{");
3372 	return c;
3373     case RBRACE:
3374         strcpy(yytext, "}");
3375 	return c;
3376     case '(':
3377 	yylval = install_and_save("(");
3378 	return c;
3379     case ')':
3380         strcpy(yytext, ")");
3381 	return c;
3382     case '[':
3383 	if (nextchar('[')) {
3384 	    yylval = install_and_save("[[");
3385 	    return LBB;
3386 	}
3387 	yylval = install_and_save("[");
3388 	return c;
3389     case ']':
3390         strcpy(yytext, "]");
3391 	return c;
3392     case '?':
3393 	yylval = install_and_save("?");
3394 	return c;
3395     case '*':
3396 	/* Replace ** by ^.  This has been here since 1998, but is
3397 	   undocumented (at least in the obvious places).  It is in
3398 	   the index of the Blue Book with a reference to p. 431, the
3399 	   help for 'Deprecated'.  S-PLUS 6.2 still allowed this, so
3400 	   presumably it was for compatibility with S. */
3401 	if (nextchar('*')) {
3402 	    yylval = install_and_save2("^", "**");
3403 	    return '^';
3404 	} else
3405 	    yylval = install_and_save("*");
3406 	return c;
3407     case '+':
3408     case '/':
3409     case '^':
3410     case '~':
3411     case '$':
3412     case '@':
3413     case '\\':
3414 	yytext[0] = (char) c;
3415 	yytext[1] = '\0';
3416 	yylval = install(yytext);
3417 	return c;
3418     default:
3419         yytext[0] = (char) c;
3420         yytext[1] = '\0';
3421 	return c;
3422     }
3423 }
3424 
3425 /**
3426  * Sets the first elements of the yyloc structure with current
3427  * information
3428  */
setfirstloc(void)3429 static void setfirstloc(void)
3430 {
3431     yylloc.first_line   = ParseState.xxlineno;
3432     yylloc.first_column = ParseState.xxcolno;
3433     yylloc.first_byte   = ParseState.xxbyteno;
3434     yylloc.first_parsed = ParseState.xxparseno;
3435 }
3436 
setlastloc(void)3437 static void setlastloc(void)
3438 {
3439     yylloc.last_line = ParseState.xxlineno;
3440     yylloc.last_column = ParseState.xxcolno;
3441     yylloc.last_byte = ParseState.xxbyteno;
3442     yylloc.last_parsed = ParseState.xxparseno;
3443 }
3444 
3445 /**
3446  * Wrap around the token function. Returns the same result
3447  * but increments the identifier, after a call to token_,
3448  * the identifier variable contains the id of the token
3449  * just returned
3450  *
3451  * @return the same as token
3452  */
3453 
token_(void)3454 static int token_(void){
3455     // capture the position before retrieving the token
3456     setfirstloc( ) ;
3457 
3458     // get the token
3459     int res = token( ) ;
3460 
3461     // capture the position after
3462     int _last_col  = ParseState.xxcolno ;
3463     int _last_parsed = ParseState.xxparseno ;
3464 
3465     _current_token = res ;
3466     incrementId( ) ;
3467     yylloc.id = identifier ;
3468 
3469     // record the position
3470     if( res != '\n' && res != END_OF_INPUT)
3471 	record_( yylloc.first_parsed, yylloc.first_column,
3472 	         _last_parsed, _last_col,
3473 		res, identifier, yytext );
3474 
3475     return res;
3476 }
3477 
3478 
yylex(void)3479 static int yylex(void)
3480 {
3481     int tok;
3482 
3483  again:
3484 
3485     tok = token_();
3486 
3487     /* Newlines must be handled in a context */
3488     /* sensitive way.  The following block of */
3489     /* deals directly with newlines in the */
3490     /* body of "if" statements. */
3491 
3492     if (tok == '\n') {
3493 
3494 	if (EatLines || *contextp == '[' || *contextp == '(')
3495 	    goto again;
3496 
3497 	/* The essence of this is that in the body of */
3498 	/* an "if", any newline must be checked to */
3499 	/* see if it is followed by an "else". */
3500 	/* such newlines are discarded. */
3501 
3502 	if (*contextp == 'i') {
3503 
3504 	    /* Find the next non-newline token */
3505 
3506 	    while(tok == '\n')
3507 		tok = token_();
3508 
3509 	    /* If we encounter "}", ")" or "]" then */
3510 	    /* we know that all immediately preceding */
3511 	    /* "if" bodies have been terminated. */
3512 	    /* The corresponding "i" values are */
3513 	    /* popped off the context stack. */
3514 
3515 	    if (tok == RBRACE || tok == ')' || tok == ']' ) {
3516 		while (*contextp == 'i')
3517 		    ifpop();
3518 		*contextp-- = 0;
3519 		setlastloc();
3520 		return tok;
3521 	    }
3522 
3523 	    /* When a "," is encountered, it terminates */
3524 	    /* just the immediately preceding "if" body */
3525 	    /* so we pop just a single "i" of the */
3526 	    /* context stack. */
3527 
3528 	    if (tok == ',') {
3529 		ifpop();
3530 		setlastloc();
3531 		return tok;
3532 	    }
3533 
3534 	    /* Tricky! If we find an "else" we must */
3535 	    /* ignore the preceding newline.  Any other */
3536 	    /* token means that we must return the newline */
3537 	    /* to terminate the "if" and "push back" that */
3538 	    /* token so that we will obtain it on the next */
3539 	    /* call to token.  In either case sensitivity */
3540 	    /* is lost, so we pop the "i" from the context */
3541 	    /* stack. */
3542 
3543 	    if(tok == ELSE) {
3544 		EatLines = 1;
3545 		ifpop();
3546 		setlastloc();
3547 		return ELSE;
3548 	    }
3549 	    else {
3550 		ifpop();
3551 		SavedToken = tok;
3552 		xxlinesave = yylloc.first_line;
3553 		xxcolsave  = yylloc.first_column;
3554 		xxbytesave = yylloc.first_byte;
3555 		xxparsesave = yylloc.first_parsed;
3556 		SavedLval = yylval;
3557 		setlastloc();
3558 		if (ParseState.keepSrcRefs && ParseState.keepParseData &&
3559 		    yytext[0])
3560 
3561 		    /* unrecord the pushed back token if not null */
3562 		    ParseState.data_count--;
3563 		return '\n';
3564 	    }
3565 	}
3566 	else {
3567 	    setlastloc();
3568 	    return '\n';
3569 	}
3570     }
3571 
3572     /* Additional context sensitivities */
3573 
3574     switch(tok) {
3575 
3576 	/* Any newlines immediately following the */
3577 	/* the following tokens are discarded. The */
3578 	/* expressions are clearly incomplete. */
3579 
3580     case '+':
3581     case '-':
3582     case '*':
3583     case '/':
3584     case '^':
3585     case LT:
3586     case LE:
3587     case GE:
3588     case GT:
3589     case EQ:
3590     case NE:
3591     case OR:
3592     case AND:
3593     case OR2:
3594     case AND2:
3595     case PIPE:
3596     case PIPEBIND:
3597     case SPECIAL:
3598     case FUNCTION:
3599     case WHILE:
3600     case REPEAT:
3601     case FOR:
3602     case IN:
3603     case '?':
3604     case '!':
3605     case '=':
3606     case ':':
3607     case '~':
3608     case '$':
3609     case '@':
3610     case LEFT_ASSIGN:
3611     case RIGHT_ASSIGN:
3612     case EQ_ASSIGN:
3613 	EatLines = 1;
3614 	break;
3615 
3616 	/* Push any "if" statements found and */
3617 	/* discard any immediately following newlines. */
3618 
3619     case IF:
3620 	IfPush();
3621 	EatLines = 1;
3622 	break;
3623 
3624 	/* Terminate any immediately preceding "if" */
3625 	/* statements and discard any immediately */
3626 	/* following newlines. */
3627 
3628     case ELSE:
3629 	ifpop();
3630 	EatLines = 1;
3631 	break;
3632 
3633 	/* These tokens terminate any immediately */
3634 	/* preceding "if" statements. */
3635 
3636     case ';':
3637     case ',':
3638 	ifpop();
3639 	break;
3640 
3641 	/* Any newlines following these tokens can */
3642 	/* indicate the end of an expression. */
3643 
3644     case SYMBOL:
3645     case STR_CONST:
3646     case NUM_CONST:
3647     case NULL_CONST:
3648     case NEXT:
3649     case BREAK:
3650 	EatLines = 0;
3651 	break;
3652 
3653 	/* Handle brackets, braces and parentheses */
3654 
3655     case LBB:
3656 	if(contextp - contextstack >= CONTEXTSTACK_SIZE - 1)
3657 	    error(_("contextstack overflow at line %d"), ParseState.xxlineno);
3658 	*++contextp = '[';
3659 	*++contextp = '[';
3660 	break;
3661 
3662     case '[':
3663 	if(contextp - contextstack >= CONTEXTSTACK_SIZE)
3664 	    error(_("contextstack overflow at line %d"), ParseState.xxlineno);
3665 	*++contextp = (char) tok;
3666 	break;
3667 
3668     case LBRACE:
3669 	if(contextp - contextstack >= CONTEXTSTACK_SIZE)
3670 	    error(_("contextstack overflow at line %d"), ParseState.xxlineno);
3671 	*++contextp = (char) tok;
3672 	EatLines = 1;
3673 	break;
3674 
3675     case '(':
3676 	if(contextp - contextstack >= CONTEXTSTACK_SIZE)
3677 	    error(_("contextstack overflow at line %d"), ParseState.xxlineno);
3678 	*++contextp = (char) tok;
3679 	break;
3680 
3681     case ']':
3682 	while (*contextp == 'i')
3683 	    ifpop();
3684 	*contextp-- = 0;
3685 	EatLines = 0;
3686 	break;
3687 
3688     case RBRACE:
3689 	while (*contextp == 'i')
3690 	    ifpop();
3691 	*contextp-- = 0;
3692 	break;
3693 
3694     case ')':
3695 	while (*contextp == 'i')
3696 	    ifpop();
3697 	*contextp-- = 0;
3698 	EatLines = 0;
3699 	break;
3700 
3701     }
3702     setlastloc();
3703     return tok;
3704 }
3705 /**
3706  * Records location information about a symbol. The information is
3707  * used to fill the data
3708  *
3709  */
record_(int first_parsed,int first_column,int last_parsed,int last_column,int token,int id,char * text_in)3710 static void record_( int first_parsed, int first_column, int last_parsed, int last_column,
3711 	int token, int id, char* text_in ){
3712 
3713 	if (!ParseState.keepSrcRefs || !ParseState.keepParseData
3714 	    || id == NA_INTEGER) return;
3715 
3716 	// don't care about zero sized things
3717 	if( !yytext[0] ) return ;
3718 
3719 	if (ParseState.data_count == DATA_COUNT)
3720 	    growData();
3721 
3722 	_FIRST_COLUMN( ParseState.data_count ) = first_column;
3723 	_FIRST_PARSED( ParseState.data_count ) = first_parsed;
3724 	_LAST_COLUMN( ParseState.data_count )  = last_column;
3725 	_LAST_PARSED( ParseState.data_count )  = last_parsed;
3726 	_TOKEN( ParseState.data_count )        = token;
3727 	_ID( ParseState.data_count )           = id ;
3728 	_PARENT(ParseState.data_count)         = 0 ;
3729 	if ( text_in )
3730 	    SET_STRING_ELT(PS_TEXT, ParseState.data_count, mkChar2(text_in));
3731 	else
3732 	    SET_STRING_ELT(PS_TEXT, ParseState.data_count, mkChar(""));
3733 
3734 	if( id > ID_COUNT )
3735 	    growID(id) ;
3736 
3737 	ID_ID( id ) = ParseState.data_count ;
3738 
3739 	ParseState.data_count++ ;
3740 }
3741 
3742 /**
3743  * records parent as the parent of all its childs. This grows the
3744  * parents list with a new vector. The first element of the new
3745  * vector is the parent id, and other elements are childs id
3746  *
3747  * @param parent id of the parent expression
3748  * @param childs array of location information for all child symbols
3749  * @param nchilds number of childs
3750  */
recordParents(int parent,yyltype * childs,int nchilds)3751 static void recordParents( int parent, yyltype * childs, int nchilds){
3752 
3753 	if( parent > ID_COUNT ){
3754 		growID(parent) ;
3755 	}
3756 
3757 	/* some of the childs might be an empty token (like cr)
3758 	   which we do not want to track */
3759 	int ii;    /* loop index */
3760 	yyltype loc ;
3761 	for( ii=0; ii<nchilds; ii++){
3762 		loc = childs[ii] ;
3763 		if( loc.id == NA_INTEGER || (loc.first_line == loc.last_line && loc.first_byte > loc.last_byte) )
3764 			continue ;
3765 		/*  This shouldn't happen... */
3766 		if (loc.id < 0 || loc.id > identifier) {
3767 		    error(_("internal parser error at line %d"),  ParseState.xxlineno);
3768 		}
3769 		ID_PARENT( loc.id ) = parent;
3770 	}
3771 
3772 }
3773 
3774 /**
3775  * The token pointed by the location has the wrong token type,
3776  * This updates the type
3777  *
3778  * @param loc location information for the token to track
3779  */
modif_token(yyltype * loc,int tok)3780 static void modif_token( yyltype* loc, int tok ){
3781 
3782 	int id = loc->id ;
3783 
3784 	if (!ParseState.keepSrcRefs || !ParseState.keepParseData
3785 	    || id < 0 || id > ID_COUNT) return;
3786 
3787 	if( tok == SYMBOL_FUNCTION_CALL ){
3788 		// looking for first child of id
3789 		int j = ID_ID( id ) ;
3790 		int parent = id ;
3791 
3792 		if (j < 0 || j > ID_COUNT)
3793 	            return;
3794 
3795 		while( ID_PARENT( _ID(j) ) != parent ){
3796 		    j-- ;
3797 		    if (j < 0)
3798 	        	return;
3799 		}
3800 
3801 		if( _TOKEN(j) == SYMBOL ){
3802 		    _TOKEN(j) = SYMBOL_FUNCTION_CALL ;
3803 		}
3804 
3805 	} else{
3806 		_TOKEN( ID_ID(id) ) = tok ;
3807 	}
3808 
3809 }
3810 
3811 /* this local version of lengthgets() always copies and doesn't fill with NA */
lengthgets2(SEXP x,int len)3812 static SEXP lengthgets2(SEXP x, int len) {
3813     SEXP result;
3814     PROTECT(result = allocVector( TYPEOF(x), len ));
3815 
3816     len = (len < length(x)) ? len : length(x);
3817     switch(TYPEOF(x)) {
3818     	case INTSXP:
3819     	    for (int i = 0; i < len; i++)
3820     	    	INTEGER(result)[i] = INTEGER(x)[i];
3821 	    for (int i = len; i < length(result); i++)
3822 		INTEGER(result)[i] = 0;
3823     	    break;
3824     	case STRSXP:
3825     	    for (int i = 0; i < len; i++)
3826     	    	SET_STRING_ELT(result, i, STRING_ELT(x, i));
3827     	    break;
3828     	default:
3829 	    UNIMPLEMENTED_TYPE("lengthgets2", x);
3830     }
3831     UNPROTECT(1); /* result */
3832     return result;
3833 }
3834 
finalizeData()3835 static void finalizeData( ){
3836 
3837     int nloc = ParseState.data_count ;
3838 
3839     int i, j, id ;
3840     int parent ;
3841 
3842     /* store parents in the data */
3843     for( i=0; i<nloc; i++){
3844 	id = _ID(i);
3845 	parent = ID_PARENT( id ) ;
3846 	while( parent != 0 && ID_ID(parent) == 0 )
3847 	    parent = ID_PARENT( parent ) ;
3848 	_PARENT(i) = parent ;
3849 
3850 #define FD_FAST_UPDATE_PARENTS
3851 #ifdef FD_FAST_UPDATE_PARENTS
3852 	/*
3853 	   With long generated expressions, updating the parents can take
3854 	   a lot of time due to long chains of nodes not represented in the
3855 	   parse data. To reduce the overhead somewhat, we create shortcuts
3856 	   in the IDS array to point directly to the parent that is in the
3857 	   parse data.
3858 	*/
3859 	int data_parent = parent;
3860 	parent = ID_PARENT( id ) ;
3861 	while( parent != data_parent ){
3862 	    ID_PARENT( id ) = data_parent; /* set shortcut */
3863 	    id = parent;
3864 	    parent = ID_PARENT( parent );
3865 	}
3866 #endif
3867     }
3868 
3869     /* attach comments to closest enclosing symbol */
3870     /* not updating ID_PARENT anymore */
3871 
3872 #define FD_FAST_ASSIGN_COMMENTS
3873 #ifdef FD_FAST_ASSIGN_COMMENTS
3874     /*
3875        All terminals (tokens) are ordered by start and end location, including
3876        the comments, in the data.
3877 
3878        All non-terminals, including to be found parents of the comments, are
3879        ordered by their end location. When they have the same end location
3880        in the code, they are ordered by their decreasing start location
3881        (children before parents).
3882 
3883        All terminals and non-terminals are also before their parents (if any),
3884        so a comment is also befor its parent in the data.
3885 
3886        Consequently: the first non-terminal after a comment that encloses the
3887        comment is its (immediate) parent. The original algorithm for every
3888        comment linearly searches for the first enclosing non-terminal and
3889        returns it, but it has quadratic complexity and dominates the whole
3890        parsing for long inputs (used when FD_FAST_ASSIGN_COMMENTS is not
3891        defined).
3892 
3893        This algorithm uses the parental information available on nodes that
3894        follow the comments. That information has been filled by the parser
3895        during reductions (but not for comments, because those are not in the
3896        grammar). A node following a comment is either the parent of the
3897        comment, or some of its parents are, or is an orphan.
3898 
3899        Note that a non-terminal may end before a terminal (e.g. comment) in the
3900        code but be after the terminal in the data (due to look-ahead). It seems
3901        that the parent of the comment has to be within parents of the
3902        non-terminal as well, but I am not sure how to prove it, so the algorithm
3903        just skips non-terminals preceding the comment in the code (so is not
3904        strictly linear).
3905       */
3906 
3907     for(i = nloc-1; i >= 0; i--) {
3908 	if (_TOKEN(i) == COMMENT) {
3909 	    int orphan = 1;
3910 	    int istartl = _FIRST_PARSED(i);
3911 	    int istartc = _FIRST_COLUMN(i);
3912 
3913 	    /* look for first node j that does not end before the comment i */
3914 	    for(j = i + 1; j < nloc && _LAST_PARSED(j) <= istartl; j++);
3915 
3916 	    if (j < nloc) {
3917 		for(;;) {
3918 		    int jstartl = _FIRST_PARSED(j);
3919 		    int jstartc = _FIRST_COLUMN(j);
3920 
3921 		    if (jstartl < istartl || (jstartl == istartl
3922 		                              && jstartc <= istartc)) {
3923 			/* j starts before or at the comment */
3924 			_PARENT(i) = _ID(j);
3925 			orphan = 0;
3926 			break;
3927 		    }
3928 		    /* find parent of j */
3929 		    int jparent = _PARENT(j);
3930 		    if (jparent == 0)
3931 			break; /* orphan */
3932 		    j = ID_ID(jparent);
3933 		}
3934 	    }
3935 	    if (orphan)
3936 		_PARENT(i) = 0;
3937 	}
3938     }
3939 #else
3940     /* the original algorithm, which is slow for large inputs */
3941 
3942     int comment_line, comment_first_col;
3943     int this_first_parsed, this_last_parsed, this_first_col ;
3944     int orphan ;
3945 
3946     for( i=0; i<nloc; i++){
3947 	if( _TOKEN(i) == COMMENT ){
3948 	    comment_line = _FIRST_PARSED( i ) ;
3949 	    comment_first_col = _FIRST_COLUMN( i ) ;
3950 
3951 	    orphan = 1 ;
3952 	    for( j=i+1; j<nloc; j++){
3953 		this_first_parsed = _FIRST_PARSED( j ) ;
3954 		this_first_col = _FIRST_COLUMN( j ) ;
3955 		this_last_parsed  = _LAST_PARSED( j ) ;
3956 
3957 		/* the comment needs to start after the current symbol */
3958 		if( comment_line < this_first_parsed ) continue ;
3959 		if( (comment_line == this_first_parsed) & (comment_first_col < this_first_col) ) continue ;
3960 
3961 		/* the current symbol must finish after the comment */
3962 		if( this_last_parsed <= comment_line ) continue ;
3963 
3964 		/* we have a match, record the parent and stop looking */
3965 		_PARENT(i) = _ID(j);
3966 		orphan = 0;
3967 		break ;
3968 	    }
3969 	    if(orphan){
3970 		_PARENT(i) = 0 ;
3971 	    }
3972 	}
3973     }
3974 #endif
3975 
3976 
3977     /* now rework the parents of comments, we try to attach
3978     comments that are not already attached (parent=0) to the next
3979     enclosing top-level expression */
3980 
3981     for( i=0; i<nloc; i++){
3982 	int token = _TOKEN(i);
3983 	if( token == COMMENT && _PARENT(i) == 0 ){
3984 	    for( j=i; j<nloc; j++){
3985 		int token_j = _TOKEN(j);
3986 		if( token_j == COMMENT ) continue ;
3987 		if( _PARENT(j) != 0 ) continue ;
3988 		_PARENT(i) = - _ID(j) ;
3989 		break ;
3990 	    }
3991 	}
3992     }
3993 
3994     /* attach the token names as an attribute so we don't need to switch to a dataframe, and decide on terminals */
3995     SEXP tokens;
3996     PROTECT(tokens = allocVector( STRSXP, nloc ) );
3997     for (int i=0; i<nloc; i++) {
3998         int token = _TOKEN(i);
3999         int xlat = yytranslate[token];
4000         if (xlat == 2) /* "unknown" */
4001             xlat = token;
4002         if (xlat < YYNTOKENS + YYNNTS)
4003     	    SET_STRING_ELT(tokens, i, mkChar(yytname[xlat]));
4004     	else { /* we have a token which doesn't have a name, e.g. an illegal character as in PR#15518 */
4005     	    char name[2];
4006     	    name[0] = (char) xlat;
4007     	    name[1] = 0;
4008     	    SET_STRING_ELT(tokens, i, mkChar(name));
4009     	}
4010     	_TERMINAL(i) = xlat < YYNTOKENS;
4011     }
4012     SEXP dims, newdata, newtext;
4013     if (nloc) {
4014 	PROTECT( newdata = lengthgets2(PS_DATA, nloc * DATA_ROWS));
4015 	PROTECT( newtext = lengthgets2(PS_TEXT, nloc));
4016     } else {
4017 	PROTECT( newdata = allocVector( INTSXP, 0));
4018 	PROTECT( newtext = allocVector( STRSXP, 0));
4019     }
4020     PROTECT( dims = allocVector( INTSXP, 2 ) ) ;
4021     INTEGER(dims)[0] = DATA_ROWS ;
4022     INTEGER(dims)[1] = nloc ;
4023     setAttrib( newdata, install( "dim" ), dims ) ;
4024     setAttrib( newdata, install("tokens"), tokens );
4025     setAttrib( newdata, install("text"), newtext );
4026 
4027     setAttrib(newdata, R_ClassSymbol, mkString("parseData"));
4028 
4029     /* Put it into the srcfile environment */
4030     if (isEnvironment(PS_SRCFILE))
4031 	defineVar(install("parseData"), newdata, PS_SRCFILE);
4032     UNPROTECT(4); /* tokens, newdata, newtext, dims */
4033 }
4034 
4035 /**
4036  * Grows the data
4037  */
growData()4038 static void growData(){
4039 
4040     int new_data_count;
4041     if (PS_DATA == R_NilValue) {
4042         new_data_count = INIT_DATA_COUNT;
4043 	PS_SET_DATA(allocVector(INTSXP, 0));
4044 	PS_SET_TEXT(allocVector(STRSXP, 0));
4045     } else
4046         new_data_count = 2*DATA_COUNT;
4047 
4048     PS_SET_DATA(lengthgets2(PS_DATA, new_data_count * DATA_ROWS));
4049     PS_SET_TEXT(lengthgets2(PS_TEXT, new_data_count));
4050 }
4051 
4052 /**
4053  * Grows the ids vector so that ID_ID(target) can be called
4054  */
growID(int target)4055 static void growID( int target ){
4056 
4057     int new_count;
4058     if (PS_IDS == R_NilValue) {
4059         new_count = INIT_DATA_COUNT/2 - 1;
4060         PS_SET_IDS(allocVector(INTSXP, 0));
4061     } else
4062     	new_count = ID_COUNT;
4063 
4064     while (target > new_count)
4065     	new_count = 2*new_count + 1;
4066 
4067     if (new_count <= ID_COUNT)
4068     	return;
4069 
4070     int new_size = (1 + new_count)*2;
4071     PS_SET_IDS(lengthgets2(PS_IDS, new_size));
4072 }
4073 
checkForPlaceholder(SEXP placeholder,SEXP arg)4074 static int checkForPlaceholder(SEXP placeholder, SEXP arg)
4075 {
4076     if (arg == placeholder)
4077 	return TRUE;
4078     else if (TYPEOF(arg) == LANGSXP)
4079 	for (SEXP cur = arg; cur != R_NilValue; cur = CDR(cur))
4080 	    if (checkForPlaceholder(placeholder, CAR(cur)))
4081 		return TRUE;
4082     return FALSE;
4083 }
4084 
signal_ph_error(SEXP rhs,SEXP ph)4085 static void NORET signal_ph_error(SEXP rhs, SEXP ph) {
4086     errorcall(rhs, _("pipe placeholder must only appear as a top-level "
4087 		     "argument in the RHS call"));
4088 }
4089 
findPlaceholderCell(SEXP placeholder,SEXP rhs)4090 static SEXP findPlaceholderCell(SEXP placeholder, SEXP rhs)
4091 {
4092     SEXP phcell = NULL;
4093     int count = 0;
4094     if (checkForPlaceholder(placeholder, CAR(rhs)))
4095 	signal_ph_error(rhs, placeholder);
4096     for (SEXP a = CDR(rhs); a != R_NilValue; a = CDR(a))
4097 	if (CAR(a) == placeholder) {
4098 	    if (phcell == NULL)
4099 		phcell = a;
4100 	    count++;
4101 	}
4102 	else if (checkForPlaceholder(placeholder, CAR(a)))
4103 	    signal_ph_error(rhs, placeholder);
4104     if (count > 1)
4105 	errorcall(rhs, _("pipe placeholder may only appear once"));
4106     return phcell;
4107 }
4108