1 %{
2 /*
3 * R : A Computer Language for Statistical Data Analysis
4 * Copyright (C) 1995, 1996, 1997 Robert Gentleman and Ross Ihaka
5 * Copyright (C) 1997--2021 The R Core Team
6 * Copyright (C) 2009--2011 Romain Francois
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, a copy is available at
20 * https://www.R-project.org/Licenses/
21 */
22
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
26
27 #define R_USE_SIGNALS 1
28 #include "IOStuff.h" /*-> Defn.h */
29 #include "Fileio.h"
30 #include "Parse.h"
31 #include <R_ext/Print.h>
32
33 #if !defined(__STDC_ISO_10646__) && (defined(__APPLE__) || defined(__FreeBSD__) || defined(__sun))
34 /* This may not be 100% true (see the comment in rlocale.h),
35 but it seems true in normal locales.
36 */
37 # define __STDC_ISO_10646__
38 #endif
39
40 /* #define YYDEBUG 1 */
41 #define YYERROR_VERBOSE 1
42 #define PARSE_ERROR_SIZE 256 /* Parse error messages saved here */
43 #define PARSE_CONTEXT_SIZE 256 /* Recent parse context kept in a circular buffer */
44
45 static Rboolean busy = FALSE;
46 static SEXP R_NullSymbol = NULL;
47
48 static int identifier ;
49 static void incrementId(void);
50 static void initData(void);
51 static void initId(void);
52 static void record_( int, int, int, int, int, int, char* ) ;
53
54 static void yyerror(const char *);
55 static int yylex();
56 int yyparse(void);
57
58 static FILE *fp_parse;
59 static int (*ptr_getc)(void);
60
61 static int SavedToken;
62 static SEXP SavedLval;
63
64 #define yyconst const
65
66 typedef struct yyltype
67 {
68 int first_line;
69 int first_column;
70 int first_byte;
71
72 int last_line;
73 int last_column;
74 int last_byte;
75
76 int first_parsed;
77 int last_parsed;
78
79 int id;
80 } yyltype;
81
82
83 #define INIT_DATA_COUNT 16384 /* init parser data to this size */
84 #define MAX_DATA_COUNT 65536 /* release it at the end if it is this size or larger*/
85
86 #define DATA_COUNT (length( PS_DATA ) / DATA_ROWS)
87 #define ID_COUNT ((length( PS_IDS ) / 2) - 1)
88
89 static void finalizeData( ) ;
90 static void growData( ) ;
91 static void growID( int ) ;
92
93 #define DATA_ROWS 8
94
95 #define _FIRST_PARSED( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) ]
96 #define _FIRST_COLUMN( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 1 ]
97 #define _LAST_PARSED( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 2 ]
98 #define _LAST_COLUMN( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 3 ]
99 #define _TERMINAL( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 4 ]
100 #define _TOKEN( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 5 ]
101 #define _ID( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 6 ]
102 #define _PARENT(i) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 7 ]
103
104 #define ID_ID( i ) INTEGER(PS_IDS)[ 2*(i) ]
105 #define ID_PARENT( i ) INTEGER(PS_IDS)[ 2*(i) + 1 ]
106
107 static void modif_token( yyltype*, int ) ;
108 static void recordParents( int, yyltype*, int) ;
109
110 static int _current_token ;
111
112 /**
113 * Records the current non-terminal token expression and gives it an id
114 *
115 * @param loc the location of the expression
116 */
setId(yyltype loc)117 static void setId(yyltype loc){
118 record_(
119 (loc).first_parsed, (loc).first_column, (loc).last_parsed, (loc).last_column,
120 _current_token, (loc).id, 0 ) ;
121 }
122
123 # define YYLTYPE yyltype
124 # define YYLLOC_DEFAULT(Current, Rhs, N) \
125 do { \
126 if (N){ \
127 (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
128 (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
129 (Current).first_byte = YYRHSLOC (Rhs, 1).first_byte; \
130 (Current).last_line = YYRHSLOC (Rhs, N).last_line; \
131 (Current).last_column = YYRHSLOC (Rhs, N).last_column; \
132 (Current).last_byte = YYRHSLOC (Rhs, N).last_byte; \
133 (Current).first_parsed = YYRHSLOC (Rhs, 1).first_parsed; \
134 (Current).last_parsed = YYRHSLOC (Rhs, N).last_parsed; \
135 incrementId( ) ; \
136 (Current).id = identifier ; \
137 _current_token = yyr1[yyn] ; \
138 if (ParseState.keepSrcRefs && ParseState.keepParseData) { \
139 yyltype childs[N]; \
140 int ii = 0; \
141 for(ii=0; ii<N; ii++){ \
142 childs[ii] = YYRHSLOC (Rhs, (ii+1) ) ; \
143 } \
144 recordParents( identifier, childs, N) ; \
145 } \
146 } else { \
147 (Current).first_line = (Current).last_line = \
148 YYRHSLOC (Rhs, 0).last_line; \
149 (Current).first_parsed = (Current).last_parsed = \
150 YYRHSLOC (Rhs, 0).last_parsed; \
151 (Current).first_column = YYRHSLOC (Rhs, 0).last_column; \
152 (Current).last_column = (Current).first_column - 1; \
153 (Current).first_byte = YYRHSLOC (Rhs, 0).last_byte; \
154 (Current).last_byte = (Current).first_byte - 1; \
155 (Current).id = NA_INTEGER; \
156 } \
157 } while (0)
158
159
160 # define YY_LOCATION_PRINT(File,Loc) \
161 fprintf ( File, "%d.%d.%d-%d.%d.%d (%d)", \
162 (Loc).first_line, (Loc).first_column, (Loc).first_byte, \
163 (Loc).last_line, (Loc).last_column, (Loc).last_byte, \
164 (Loc).id )
165
166 /* Useful defines so editors don't get confused ... */
167
168 #define LBRACE '{'
169 #define RBRACE '}'
170
171 /* Functions used in the parsing process */
172
173 static void CheckFormalArgs(SEXP, SEXP, YYLTYPE *);
174 static SEXP FirstArg(SEXP, SEXP); /* create list with one element */
175 static void GrowList(SEXP, SEXP); /* add element to list end */
176
177 static void SetSingleSrcRef(SEXP);
178 static void AppendToSrcRefs(SEXP);
179 static void PrependToSrcRefs(SEXP);
180 static SEXP SrcRefsToVectorList();
181
182 static void IfPush(void);
183 static int KeywordLookup(const char *);
184 static SEXP NewList(void);
185 static void NextArg(SEXP, SEXP, SEXP); /* add named element to list end */
186 static SEXP TagArg(SEXP, SEXP, YYLTYPE *);
187 static int processLineDirective();
188
189 static SEXP R_PipeBindSymbol = NULL;
190
191 /* These routines allocate constants */
192
193 static SEXP mkComplex(const char *);
194 SEXP mkFalse(void);
195 static SEXP mkFloat(const char *);
196 static SEXP mkInt(const char *);
197 static SEXP mkNA(void);
198 SEXP mkTrue(void);
199
200 /* Internal lexer / parser state variables */
201
202 static int EatLines = 0;
203 static int GenerateCode = 0;
204 static int EndOfFile = 0;
205 static int xxgetc();
206 static int xxungetc(int);
207 static int xxcharcount, xxcharsave;
208 static int xxlinesave, xxbytesave, xxcolsave, xxparsesave;
209
210 static SrcRefState ParseState;
211
212 #define PS_SET_SRCREFS(x) SET_VECTOR_ELT(ParseState.sexps, 0, (x))
213 #define PS_SET_SRCFILE(x) SET_VECTOR_ELT(ParseState.sexps, 1, (x))
214 #define PS_SET_ORIGINAL(x) SET_VECTOR_ELT(ParseState.sexps, 2, (x))
215
216 /* direct pointer to data is kept for performance of finalizeData() */
217 #define PS_SET_DATA(x) do { \
218 SEXP __x__ = (x); \
219 SET_VECTOR_ELT(ParseState.sexps, 3, __x__); \
220 ParseState.data = __x__; \
221 } while(0);
222
223 #define PS_SET_TEXT(x) SET_VECTOR_ELT(ParseState.sexps, 4, (x))
224 #define PS_SET_IDS(x) SET_VECTOR_ELT(ParseState.sexps, 5, (x))
225 #define PS_SET_SVS(x) SET_VECTOR_ELT(ParseState.sexps, 6, (x))
226
227 #define PS_SRCREFS VECTOR_ELT(ParseState.sexps, 0)
228 #define PS_SRCFILE VECTOR_ELT(ParseState.sexps, 1)
229 #define PS_ORIGINAL VECTOR_ELT(ParseState.sexps, 2)
230 #define PS_DATA ParseState.data
231 #define PS_TEXT VECTOR_ELT(ParseState.sexps, 4)
232 #define PS_IDS VECTOR_ELT(ParseState.sexps, 5)
233 #define PS_SVS VECTOR_ELT(ParseState.sexps, 6)
234
235 /* Memory protection in the parser
236
237 The generated code of the parser keeps semantic values (SEXPs) on its
238 semantic values stack. Values are added to the stack during shift and
239 reduce operations and are removed during reduce operations or error
240 handling. Values are created by the lexer before they are added to the
241 stack. Values are also held in a local SEXP variable once removed from
242 the stack but still needed. The stack is automatically expanded on demand.
243
244 For memory protection, it would be natural to have that stack on the R heap
245 and to use PROTECT/UNPROTECT to protect values in local SEXP variables.
246 Unfortunately, bison does not seem to be customizable enough to allow this.
247
248 Hence, semantic values, when created by the lexer or reduce operations, are
249 placed on parser state precious multi-set via PRESERVE_SV. They are removed
250 from the multi-set in reduce operations using RELEASE_SV, because by design
251 of the bison parsers such values are subsequently removed from the stack.
252 They are also automatically removed when the parsing finishes, including
253 parser error (also on R error, via the context on-end action).
254
255 Previously semantic values were protected via PROTECT/UNPROTECT_PTR with
256 similar semantics but using protect stack shared with PROTECT/UNPROTECT.
257 Using a separate precious multi-set is safe even with interleaving of the
258 two protection schemes.
259 */
260
261 #define INIT_SVS() PS_SET_SVS(R_NewPreciousMSet(200))
262 #define PRESERVE_SV(x) R_PreserveInMSet((x), PS_SVS)
263 #define RELEASE_SV(x) R_ReleaseFromMSet((x), PS_SVS)
264 #define CLEAR_SVS() R_ReleaseMSet(PS_SVS, 500)
265
266 /* Memory leak
267
268 yyparse(), as generated by bison, allocates extra space for the parser
269 stack using malloc(). Unfortunately this means that there is a memory
270 leak in case of an R error (long-jump). In principle, we could define
271 yyoverflow() to relocate the parser stacks for bison and allocate say on
272 the R heap, but yyoverflow() is undocumented and somewhat complicated
273 (we would have to replicate some macros from the generated parser here).
274 The same problem exists at least in the Rd and LaTeX parsers in tools.
275 */
276
277 #include <rlocale.h>
278 #ifdef HAVE_LANGINFO_CODESET
279 # include <langinfo.h>
280 #endif
281
mbcs_get_next(int c,wchar_t * wc)282 static int mbcs_get_next(int c, wchar_t *wc)
283 {
284 int i, res, clen = 1; char s[9];
285 mbstate_t mb_st;
286
287 s[0] = (char) c;
288 /* This assumes (probably OK) that all MBCS embed ASCII as single-byte
289 lead bytes, including control chars */
290 if((unsigned int) c < 0x80) {
291 *wc = (wchar_t) c;
292 return 1;
293 }
294 if(utf8locale) {
295 clen = utf8clen((char) c);
296 for(i = 1; i < clen; i++) {
297 c = xxgetc();
298 if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno);
299 s[i] = (char) c;
300 }
301 s[clen] ='\0'; /* x86 Solaris requires this */
302 res = (int) mbrtowc(wc, s, clen, NULL);
303 if(res == -1) error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno);
304 } else {
305 /* This is not necessarily correct for stateful MBCS */
306 while(clen <= R_MB_CUR_MAX) {
307 mbs_init(&mb_st);
308 res = (int) mbrtowc(wc, s, clen, &mb_st);
309 if(res >= 0) break;
310 if(res == -1)
311 error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno);
312 /* so res == -2 */
313 c = xxgetc();
314 if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno);
315 s[clen++] = (char) c;
316 } /* we've tried enough, so must be complete or invalid by now */
317 }
318 for(i = clen - 1; i > 0; i--) xxungetc(s[i]);
319 return clen;
320 }
321
322 /* Soon to be defunct entry points */
323
324 void R_SetInput(int);
325 int R_fgetc(FILE*);
326
327 /* Routines used to build the parse tree */
328
329 static SEXP xxnullformal(void);
330 static SEXP xxfirstformal0(SEXP);
331 static SEXP xxfirstformal1(SEXP, SEXP);
332 static SEXP xxaddformal0(SEXP, SEXP, YYLTYPE *);
333 static SEXP xxaddformal1(SEXP, SEXP, SEXP, YYLTYPE *);
334 static SEXP xxexprlist0();
335 static SEXP xxexprlist1(SEXP, YYLTYPE *);
336 static SEXP xxexprlist2(SEXP, SEXP, YYLTYPE *);
337 static SEXP xxsub0(void);
338 static SEXP xxsub1(SEXP, YYLTYPE *);
339 static SEXP xxsymsub0(SEXP, YYLTYPE *);
340 static SEXP xxsymsub1(SEXP, SEXP, YYLTYPE *);
341 static SEXP xxnullsub0(YYLTYPE *);
342 static SEXP xxnullsub1(SEXP, YYLTYPE *);
343 static SEXP xxsublist1(SEXP);
344 static SEXP xxsublist2(SEXP, SEXP);
345 static SEXP xxcond(SEXP);
346 static SEXP xxifcond(SEXP);
347 static SEXP xxif(SEXP, SEXP, SEXP);
348 static SEXP xxifelse(SEXP, SEXP, SEXP, SEXP);
349 static SEXP xxforcond(SEXP, SEXP);
350 static SEXP xxfor(SEXP, SEXP, SEXP);
351 static SEXP xxwhile(SEXP, SEXP, SEXP);
352 static SEXP xxrepeat(SEXP, SEXP);
353 static SEXP xxnxtbrk(SEXP);
354 static SEXP xxfuncall(SEXP, SEXP);
355 static SEXP xxdefun(SEXP, SEXP, SEXP, YYLTYPE *);
356 static SEXP xxpipe(SEXP, SEXP);
357 static SEXP xxpipebind(SEXP, SEXP, SEXP);
358 static SEXP xxunary(SEXP, SEXP);
359 static SEXP xxbinary(SEXP, SEXP, SEXP);
360 static SEXP xxparen(SEXP, SEXP);
361 static SEXP xxsubscript(SEXP, SEXP, SEXP);
362 static SEXP xxexprlist(SEXP, YYLTYPE *, SEXP);
363 static int xxvalue(SEXP, int, YYLTYPE *);
364
365 #define YYSTYPE SEXP
366
367 %}
368
369 %token-table
370
371 %token END_OF_INPUT ERROR
372 %token STR_CONST NUM_CONST NULL_CONST SYMBOL FUNCTION
373 %token INCOMPLETE_STRING
374 %token LEFT_ASSIGN EQ_ASSIGN RIGHT_ASSIGN LBB
375 %token FOR IN IF ELSE WHILE NEXT BREAK REPEAT
376 %token GT GE LT LE EQ NE AND OR AND2 OR2
377 %token NS_GET NS_GET_INT
378 %token COMMENT LINE_DIRECTIVE
379 %token SYMBOL_FORMALS
380 %token EQ_FORMALS
381 %token EQ_SUB SYMBOL_SUB
382 %token SYMBOL_FUNCTION_CALL
383 %token SYMBOL_PACKAGE
384 /* no longer used: %token COLON_ASSIGN */
385 %token SLOT
386 %token PIPE
387 %token PIPEBIND
388
389 /* This is the precedence table, low to high */
390 %left '?'
391 %left LOW WHILE FOR REPEAT
392 %right IF
393 %left ELSE
394 %right LEFT_ASSIGN
395 %right EQ_ASSIGN
396 %left RIGHT_ASSIGN
397 %left '~' TILDE
398 %left OR OR2
399 %left AND AND2
400 %left UNOT NOT
401 %nonassoc GT GE LT LE EQ NE
402 %left '+' '-'
403 %left '*' '/'
404 %left SPECIAL PIPE
405 %left PIPEBIND
406 %left ':'
407 %left UMINUS UPLUS
408 %right '^'
409 %left '$' '@'
410 %left NS_GET NS_GET_INT
411 %nonassoc '(' '[' LBB
412
413 %%
414
415 prog : END_OF_INPUT { YYACCEPT; }
416 | '\n' { yyresult = xxvalue(NULL,2,NULL); goto yyreturn; }
417 | expr_or_assign_or_help '\n' { yyresult = xxvalue($1,3,&@1); goto yyreturn; }
418 | expr_or_assign_or_help ';' { yyresult = xxvalue($1,4,&@1); goto yyreturn; }
419 | error { YYABORT; }
420 ;
421
422 expr_or_assign_or_help : expr { $$ = $1; }
423 | expr_or_assign_or_help EQ_ASSIGN expr_or_assign_or_help { $$ = xxbinary($2,$1,$3); setId(@$); }
424 | expr_or_assign_or_help '?' expr_or_assign_or_help { $$ = xxbinary($2,$1,$3); setId(@$); }
425 ;
426
427 expr_or_help : expr { $$ = $1; }
428 | expr_or_help '?' expr_or_help { $$ = xxbinary($2,$1,$3); setId(@$); }
429 ;
430
431 expr : NUM_CONST { $$ = $1; setId(@$); }
432 | STR_CONST { $$ = $1; setId(@$); }
433 | NULL_CONST { $$ = $1; setId(@$); }
434 | SYMBOL { $$ = $1; setId(@$); }
435
436 | '{' exprlist '}' { $$ = xxexprlist($1,&@1,$2); setId(@$); }
437 | '(' expr_or_assign_or_help ')' { $$ = xxparen($1,$2); setId(@$); }
438
439 | '-' expr %prec UMINUS { $$ = xxunary($1,$2); setId(@$); }
440 | '+' expr %prec UMINUS { $$ = xxunary($1,$2); setId(@$); }
441 | '!' expr %prec UNOT { $$ = xxunary($1,$2); setId(@$); }
442 | '~' expr %prec TILDE { $$ = xxunary($1,$2); setId(@$); }
443 | '?' expr_or_assign_or_help { $$ = xxunary($1,$2); setId(@$); }
444
445 | expr ':' expr { $$ = xxbinary($2,$1,$3); setId(@$); }
446 | expr '+' expr { $$ = xxbinary($2,$1,$3); setId(@$); }
447 | expr '-' expr { $$ = xxbinary($2,$1,$3); setId(@$); }
448 | expr '*' expr { $$ = xxbinary($2,$1,$3); setId(@$); }
449 | expr '/' expr { $$ = xxbinary($2,$1,$3); setId(@$); }
450 | expr '^' expr { $$ = xxbinary($2,$1,$3); setId(@$); }
451 | expr SPECIAL expr { $$ = xxbinary($2,$1,$3); setId(@$); }
452 | expr '~' expr { $$ = xxbinary($2,$1,$3); setId(@$); }
453 | expr LT expr { $$ = xxbinary($2,$1,$3); setId(@$); }
454 | expr LE expr { $$ = xxbinary($2,$1,$3); setId(@$); }
455 | expr EQ expr { $$ = xxbinary($2,$1,$3); setId(@$); }
456 | expr NE expr { $$ = xxbinary($2,$1,$3); setId(@$); }
457 | expr GE expr { $$ = xxbinary($2,$1,$3); setId(@$); }
458 | expr GT expr { $$ = xxbinary($2,$1,$3); setId(@$); }
459 | expr AND expr { $$ = xxbinary($2,$1,$3); setId(@$); }
460 | expr OR expr { $$ = xxbinary($2,$1,$3); setId(@$); }
461 | expr AND2 expr { $$ = xxbinary($2,$1,$3); setId(@$); }
462 | expr OR2 expr { $$ = xxbinary($2,$1,$3); setId(@$); }
463 | expr PIPE expr { $$ = xxpipe($1,$3); setId(@$); }
464 | expr PIPEBIND expr { $$ = xxpipebind($2,$1,$3); setId(@$); }
465 | expr LEFT_ASSIGN expr { $$ = xxbinary($2,$1,$3); setId(@$); }
466 | expr RIGHT_ASSIGN expr { $$ = xxbinary($2,$3,$1); setId(@$); }
467 | FUNCTION '(' formlist ')' cr expr_or_assign_or_help %prec LOW
468 { $$ = xxdefun($1,$3,$6,&@$); setId(@$); }
469 | '\\' '(' formlist ')' cr expr_or_assign_or_help %prec LOW { $$ = xxdefun(R_FunctionSymbol,$3,$6,&@$); setId(@$); }
470 | expr '(' sublist ')' { $$ = xxfuncall($1,$3); setId(@$); modif_token( &@1, SYMBOL_FUNCTION_CALL ) ; }
471 | IF ifcond expr_or_assign_or_help { $$ = xxif($1,$2,$3); setId(@$); }
472 | IF ifcond expr_or_assign_or_help ELSE expr_or_assign_or_help { $$ = xxifelse($1,$2,$3,$5); setId(@$); }
473 | FOR forcond expr_or_assign_or_help %prec FOR { $$ = xxfor($1,$2,$3); setId(@$); }
474 | WHILE cond expr_or_assign_or_help { $$ = xxwhile($1,$2,$3); setId(@$); }
475 | REPEAT expr_or_assign_or_help { $$ = xxrepeat($1,$2); setId(@$); }
476 | expr LBB sublist ']' ']' { $$ = xxsubscript($1,$2,$3); setId(@$); }
477 | expr '[' sublist ']' { $$ = xxsubscript($1,$2,$3); setId(@$); }
478 | SYMBOL NS_GET SYMBOL { $$ = xxbinary($2,$1,$3); setId(@$); modif_token( &@1, SYMBOL_PACKAGE ) ; }
479 | SYMBOL NS_GET STR_CONST { $$ = xxbinary($2,$1,$3); setId(@$); modif_token( &@1, SYMBOL_PACKAGE ) ; }
480 | STR_CONST NS_GET SYMBOL { $$ = xxbinary($2,$1,$3); setId(@$); }
481 | STR_CONST NS_GET STR_CONST { $$ = xxbinary($2,$1,$3); setId(@$); }
482 | SYMBOL NS_GET_INT SYMBOL { $$ = xxbinary($2,$1,$3); setId(@$); modif_token( &@1, SYMBOL_PACKAGE ) ;}
483 | SYMBOL NS_GET_INT STR_CONST { $$ = xxbinary($2,$1,$3); setId(@$); modif_token( &@1, SYMBOL_PACKAGE ) ;}
484 | STR_CONST NS_GET_INT SYMBOL { $$ = xxbinary($2,$1,$3); setId(@$); }
485 | STR_CONST NS_GET_INT STR_CONST { $$ = xxbinary($2,$1,$3); setId(@$); }
486 | expr '$' SYMBOL { $$ = xxbinary($2,$1,$3); setId(@$); }
487 | expr '$' STR_CONST { $$ = xxbinary($2,$1,$3); setId(@$); }
488 | expr '@' SYMBOL { $$ = xxbinary($2,$1,$3); setId(@$); modif_token( &@3, SLOT ) ; }
489 | expr '@' STR_CONST { $$ = xxbinary($2,$1,$3); setId(@$); }
490 | NEXT { $$ = xxnxtbrk($1); setId(@$); }
491 | BREAK { $$ = xxnxtbrk($1); setId(@$); }
492 ;
493
494
495 cond : '(' expr_or_help ')' { $$ = xxcond($2); }
496 ;
497
498 ifcond : '(' expr_or_help ')' { $$ = xxifcond($2); }
499 ;
500
501 forcond : '(' SYMBOL IN expr_or_help ')' { $$ = xxforcond($2,$4); setId(@$); }
502 ;
503
504
505 exprlist: { $$ = xxexprlist0(); setId(@$); }
506 | expr_or_assign_or_help { $$ = xxexprlist1($1, &@1); }
507 | exprlist ';' expr_or_assign_or_help { $$ = xxexprlist2($1, $3, &@3); }
508 | exprlist ';' { $$ = $1; setId(@$); }
509 | exprlist '\n' expr_or_assign_or_help { $$ = xxexprlist2($1, $3, &@3); }
510 | exprlist '\n' { $$ = $1;}
511 ;
512
513 sublist : sub { $$ = xxsublist1($1); }
514 | sublist cr ',' sub { $$ = xxsublist2($1,$4); }
515 ;
516
517 sub : { $$ = xxsub0(); }
518 | expr_or_help { $$ = xxsub1($1, &@1); }
519 | SYMBOL EQ_ASSIGN { $$ = xxsymsub0($1, &@1); modif_token( &@2, EQ_SUB ) ; modif_token( &@1, SYMBOL_SUB ) ; }
520 | SYMBOL EQ_ASSIGN expr_or_help { $$ = xxsymsub1($1,$3, &@1); modif_token( &@2, EQ_SUB ) ; modif_token( &@1, SYMBOL_SUB ) ; }
521 | STR_CONST EQ_ASSIGN { $$ = xxsymsub0($1, &@1); modif_token( &@2, EQ_SUB ) ; }
522 | STR_CONST EQ_ASSIGN expr_or_help { $$ = xxsymsub1($1,$3, &@1); modif_token( &@2, EQ_SUB ) ; }
523 | NULL_CONST EQ_ASSIGN { $$ = xxnullsub0(&@1); modif_token( &@2, EQ_SUB ) ; }
524 | NULL_CONST EQ_ASSIGN expr_or_help { $$ = xxnullsub1($3, &@1); modif_token( &@2, EQ_SUB ) ; }
525 ;
526
527 formlist: { $$ = xxnullformal(); }
528 | SYMBOL { $$ = xxfirstformal0($1); modif_token( &@1, SYMBOL_FORMALS ) ; }
529 | SYMBOL EQ_ASSIGN expr_or_help { $$ = xxfirstformal1($1,$3); modif_token( &@1, SYMBOL_FORMALS ) ; modif_token( &@2, EQ_FORMALS ) ; }
530 | formlist ',' SYMBOL { $$ = xxaddformal0($1,$3, &@3); modif_token( &@3, SYMBOL_FORMALS ) ; }
531 | formlist ',' SYMBOL EQ_ASSIGN expr_or_help
532 { $$ = xxaddformal1($1,$3,$5,&@3); modif_token( &@3, SYMBOL_FORMALS ) ; modif_token( &@4, EQ_FORMALS ) ;}
533 ;
534
535 cr : { EatLines = 1; }
536 ;
537 %%
538
539
540 /*----------------------------------------------------------------------------*/
541
542 static int (*ptr_getc)(void);
543
544 /* Private pushback, since file ungetc only guarantees one byte.
545 We need up to one MBCS-worth */
546 #define DECLARE_YYTEXT_BUFP(bp) char *bp = yytext ;
547 #define YYTEXT_PUSH(c, bp) do { \
548 if ((bp) - yytext >= sizeof(yytext) - 1){ \
549 error(_("input buffer overflow at line %d"), ParseState.xxlineno); \
550 } \
551 *(bp)++ = ((char)c); \
552 } while(0) ;
553
554 #define PUSHBACK_BUFSIZE 16
555 static int pushback[PUSHBACK_BUFSIZE];
556 static unsigned int npush = 0;
557
558 static int prevpos = 0;
559 static int prevlines[PUSHBACK_BUFSIZE];
560 static int prevcols[PUSHBACK_BUFSIZE];
561 static int prevbytes[PUSHBACK_BUFSIZE];
562 static int prevparse[PUSHBACK_BUFSIZE];
563
xxgetc(void)564 static int xxgetc(void)
565 {
566 int c;
567
568 if(npush) c = pushback[--npush]; else c = ptr_getc();
569
570 prevpos = (prevpos + 1) % PUSHBACK_BUFSIZE;
571 prevbytes[prevpos] = ParseState.xxbyteno;
572 prevlines[prevpos] = ParseState.xxlineno;
573 prevparse[prevpos] = ParseState.xxparseno;
574 prevcols[prevpos] = ParseState.xxcolno;
575
576 if (c == EOF) {
577 EndOfFile = 1;
578 return R_EOF;
579 }
580 R_ParseContextLast = (R_ParseContextLast + 1) % PARSE_CONTEXT_SIZE;
581 R_ParseContext[R_ParseContextLast] = (char) c;
582
583 if (c == '\n') {
584 ParseState.xxlineno += 1;
585 ParseState.xxcolno = 0;
586 ParseState.xxbyteno = 0;
587 ParseState.xxparseno += 1;
588 } else {
589 /* We only advance the column for the 1st byte in UTF-8, so handle later bytes specially */
590 if (!known_to_be_utf8 || (unsigned char)c < 0x80 || 0xC0 <= (unsigned char)c)
591 ParseState.xxcolno++;
592 ParseState.xxbyteno++;
593 }
594
595 if (c == '\t') ParseState.xxcolno = ((ParseState.xxcolno + 7) & ~7);
596
597 R_ParseContextLine = ParseState.xxlineno;
598
599 xxcharcount++;
600 return c;
601 }
602
xxungetc(int c)603 static int xxungetc(int c)
604 {
605 /* this assumes that c was the result of xxgetc; if not, some edits will be needed */
606 ParseState.xxlineno = prevlines[prevpos];
607 ParseState.xxbyteno = prevbytes[prevpos];
608 ParseState.xxcolno = prevcols[prevpos];
609 ParseState.xxparseno = prevparse[prevpos];
610
611 prevpos = (prevpos + PUSHBACK_BUFSIZE - 1) % PUSHBACK_BUFSIZE;
612
613 R_ParseContextLine = ParseState.xxlineno;
614
615 xxcharcount--;
616 R_ParseContext[R_ParseContextLast] = '\0';
617 /* precaution as to how % is implemented for < 0 numbers */
618 R_ParseContextLast = (R_ParseContextLast + PARSE_CONTEXT_SIZE -1) % PARSE_CONTEXT_SIZE;
619 if(npush >= PUSHBACK_BUFSIZE) return EOF;
620 pushback[npush++] = c;
621 return c;
622 }
623
624 /* Only used from finish_mbcs_in_parse_context. */
add_mbcs_byte_to_parse_context()625 static int add_mbcs_byte_to_parse_context()
626 {
627 int c;
628
629 if (EndOfFile)
630 error(_("invalid multibyte character in parser at line %d"),
631 ParseState.xxlineno);
632 if(npush)
633 c = pushback[--npush];
634 else
635 c = ptr_getc();
636 if (c == EOF)
637 error(_("invalid multibyte character in parser at line %d"),
638 ParseState.xxlineno);
639
640 R_ParseContextLast = (R_ParseContextLast + 1) % PARSE_CONTEXT_SIZE;
641 R_ParseContext[R_ParseContextLast] = (char) c;
642 return c;
643 }
644
645 /* On error, the parse context may end inside a multi-byte character. Add
646 the missing bytes to the context to so that it contains full characters. */
finish_mbcs_in_parse_context()647 static void finish_mbcs_in_parse_context()
648 {
649 int i, c, nbytes = 0, first;
650 Rboolean mbcs = FALSE;
651
652 /* find the first byte of the context */
653 for(i = R_ParseContextLast;
654 R_ParseContext[i];
655 i = (i + PARSE_CONTEXT_SIZE - 1) % PARSE_CONTEXT_SIZE) {
656
657 nbytes++;
658 if (nbytes == PARSE_CONTEXT_SIZE)
659 break;
660 }
661 if (!nbytes)
662 return;
663 if (!R_ParseContext[i])
664 first = (i + 1) % PARSE_CONTEXT_SIZE;
665 else
666 /* The beginning of the context has been overwritten and for a general
667 encoding there is not way to recover it. It is possible for UTF-8,
668 though. */
669 return;
670
671 /* decode multi-byte characters */
672 for(i = 0; i < nbytes; i++) {
673 c = R_ParseContext[(first + i) % PARSE_CONTEXT_SIZE];
674 if ((unsigned int)c < 0x80) continue; /* ASCII */
675
676 if (utf8locale) {
677 /* UTF-8 could be handled more efficiently, searching from the end
678 of the string */
679 i += utf8clen((char) c) - 1;
680 if (i >= nbytes) {
681 while (i >= nbytes) {
682 add_mbcs_byte_to_parse_context();
683 nbytes++;
684 }
685 return;
686 }
687 } else
688 mbcs = TRUE;
689 }
690 if (!mbcs)
691 return;
692
693 /* copy the context to a linear buffer */
694 char buf[nbytes + R_MB_CUR_MAX];
695
696 for(i = 0; i < nbytes; i++)
697 buf[i] = R_ParseContext[(first + i) % PARSE_CONTEXT_SIZE];
698
699 for(i = 0; i < nbytes; i++) {
700 wchar_t wc;
701 int res;
702 mbstate_t mb_st;
703
704 mbs_init(&mb_st);
705 res = (int) mbrtowc(&wc, buf + i, nbytes - i, &mb_st);
706 while (res == -2 && nbytes < sizeof(buf)) {
707 /* This is not necessarily correct for stateful MBCS */
708 buf[nbytes++] = (char) add_mbcs_byte_to_parse_context();
709 mbs_init(&mb_st);
710 res = (int) mbrtowc(&wc, buf + i, nbytes - i, &mb_st);
711 }
712 if (res == -1)
713 error(_("invalid multibyte character in parser at line %d"),
714 ParseState.xxlineno);
715 i += res - 1;
716 }
717 }
718
719 /*
720 * Increments/inits the token/grouping counter
721 */
incrementId(void)722 static void incrementId(void){
723 identifier++;
724 }
725
initId(void)726 static void initId(void){
727 identifier = 0 ;
728 }
729
makeSrcref(YYLTYPE * lloc,SEXP srcfile)730 static SEXP makeSrcref(YYLTYPE *lloc, SEXP srcfile)
731 {
732 SEXP val;
733
734 PROTECT(val = allocVector(INTSXP, 8));
735 INTEGER(val)[0] = lloc->first_line;
736 INTEGER(val)[1] = lloc->first_byte;
737 INTEGER(val)[2] = lloc->last_line;
738 INTEGER(val)[3] = lloc->last_byte;
739 INTEGER(val)[4] = lloc->first_column;
740 INTEGER(val)[5] = lloc->last_column;
741 INTEGER(val)[6] = lloc->first_parsed;
742 INTEGER(val)[7] = lloc->last_parsed;
743 setAttrib(val, R_SrcfileSymbol, srcfile);
744 setAttrib(val, R_ClassSymbol, mkString("srcref"));
745 UNPROTECT(1); /* val */
746 return val;
747 }
748
attachSrcrefs(SEXP val)749 static void attachSrcrefs(SEXP val)
750 {
751 SEXP srval;
752
753 PROTECT(srval = SrcRefsToVectorList());
754
755 setAttrib(val, R_SrcrefSymbol, srval);
756 setAttrib(val, R_SrcfileSymbol, PS_SRCFILE);
757 {
758 YYLTYPE wholeFile;
759 wholeFile.first_line = 1;
760 wholeFile.first_byte = 0;
761 wholeFile.first_column = 0;
762 wholeFile.last_line = ParseState.xxlineno;
763 wholeFile.last_byte = ParseState.xxbyteno;
764 wholeFile.last_column = ParseState.xxcolno;
765 wholeFile.first_parsed = 1;
766 wholeFile.last_parsed = ParseState.xxparseno;
767 setAttrib(val, R_WholeSrcrefSymbol, makeSrcref(&wholeFile, PS_SRCFILE));
768 }
769 PS_SET_SRCREFS(R_NilValue);
770 ParseState.didAttach = TRUE;
771 UNPROTECT(1); /* srval */
772 }
773
xxvalue(SEXP v,int k,YYLTYPE * lloc)774 static int xxvalue(SEXP v, int k, YYLTYPE *lloc)
775 {
776 if (k > 2) {
777 if (ParseState.keepSrcRefs) {
778 SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE));
779 AppendToSrcRefs(s);
780 UNPROTECT(1); /* s */
781 }
782 RELEASE_SV(v);
783 }
784 R_CurrentExpr = v;
785 return k;
786 }
787
xxnullformal()788 static SEXP xxnullformal()
789 {
790 SEXP ans;
791 PRESERVE_SV(ans = R_NilValue);
792 return ans;
793 }
794
xxfirstformal0(SEXP sym)795 static SEXP xxfirstformal0(SEXP sym)
796 {
797 SEXP ans;
798 if (GenerateCode)
799 PRESERVE_SV(ans = FirstArg(R_MissingArg, sym));
800 else
801 PRESERVE_SV(ans = R_NilValue);
802 RELEASE_SV(sym);
803 return ans;
804 }
805
xxfirstformal1(SEXP sym,SEXP expr)806 static SEXP xxfirstformal1(SEXP sym, SEXP expr)
807 {
808 SEXP ans;
809 if (GenerateCode)
810 PRESERVE_SV(ans = FirstArg(expr, sym));
811 else
812 PRESERVE_SV(ans = R_NilValue);
813 RELEASE_SV(expr);
814 RELEASE_SV(sym);
815 return ans;
816 }
817
xxaddformal0(SEXP formlist,SEXP sym,YYLTYPE * lloc)818 static SEXP xxaddformal0(SEXP formlist, SEXP sym, YYLTYPE *lloc)
819 {
820 SEXP ans;
821 if (GenerateCode) {
822 CheckFormalArgs(formlist, sym, lloc);
823 NextArg(formlist, R_MissingArg, sym);
824 ans = formlist;
825 } else {
826 RELEASE_SV(formlist);
827 PRESERVE_SV(ans = R_NilValue);
828 }
829 RELEASE_SV(sym);
830 return ans;
831 }
832
xxaddformal1(SEXP formlist,SEXP sym,SEXP expr,YYLTYPE * lloc)833 static SEXP xxaddformal1(SEXP formlist, SEXP sym, SEXP expr, YYLTYPE *lloc)
834 {
835 SEXP ans;
836 if (GenerateCode) {
837 CheckFormalArgs(formlist, sym, lloc);
838 NextArg(formlist, expr, sym);
839 ans = formlist;
840 } else {
841 RELEASE_SV(formlist);
842 PRESERVE_SV(ans = R_NilValue);
843 }
844 RELEASE_SV(expr);
845 RELEASE_SV(sym);
846 return ans;
847 }
848
xxexprlist0(void)849 static SEXP xxexprlist0(void)
850 {
851 SEXP ans;
852 if (GenerateCode) {
853 PRESERVE_SV(ans = NewList());
854 if (ParseState.keepSrcRefs) {
855 setAttrib(ans, R_SrcrefSymbol, PS_SRCREFS);
856 PS_SET_SRCREFS(R_NilValue);
857 }
858 }
859 else
860 PRESERVE_SV(ans = R_NilValue);
861 return ans;
862 }
863
xxexprlist1(SEXP expr,YYLTYPE * lloc)864 static SEXP xxexprlist1(SEXP expr, YYLTYPE *lloc)
865 {
866 SEXP ans;
867 if (GenerateCode) {
868 PRESERVE_SV(ans = NewList());
869 if (ParseState.keepSrcRefs) {
870 setAttrib(ans, R_SrcrefSymbol, PS_SRCREFS);
871 SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE));
872 SetSingleSrcRef(s);
873 UNPROTECT(1); /* s */
874 }
875 GrowList(ans, expr);
876 }
877 else
878 PRESERVE_SV(ans = R_NilValue);
879 RELEASE_SV(expr);
880 return ans;
881 }
882
xxexprlist2(SEXP exprlist,SEXP expr,YYLTYPE * lloc)883 static SEXP xxexprlist2(SEXP exprlist, SEXP expr, YYLTYPE *lloc)
884 {
885 SEXP ans;
886 if (GenerateCode) {
887 if (ParseState.keepSrcRefs) {
888 SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE));
889 AppendToSrcRefs(s);
890 UNPROTECT(1); /* s */
891 }
892 GrowList(exprlist, expr);
893 ans = exprlist;
894 } else {
895 RELEASE_SV(exprlist);
896 PRESERVE_SV(ans = R_NilValue);
897 }
898 RELEASE_SV(expr);
899 return ans;
900 }
901
xxsub0(void)902 static SEXP xxsub0(void)
903 {
904 SEXP ans;
905 if (GenerateCode)
906 PRESERVE_SV(ans = lang2(R_MissingArg,R_NilValue));
907 else
908 PRESERVE_SV(ans = R_NilValue);
909 return ans;
910 }
911
xxsub1(SEXP expr,YYLTYPE * lloc)912 static SEXP xxsub1(SEXP expr, YYLTYPE *lloc)
913 {
914 SEXP ans;
915 if (GenerateCode)
916 PRESERVE_SV(ans = TagArg(expr, R_NilValue, lloc));
917 else
918 PRESERVE_SV(ans = R_NilValue);
919 RELEASE_SV(expr);
920 return ans;
921 }
922
xxsymsub0(SEXP sym,YYLTYPE * lloc)923 static SEXP xxsymsub0(SEXP sym, YYLTYPE *lloc)
924 {
925 SEXP ans;
926 if (GenerateCode)
927 PRESERVE_SV(ans = TagArg(R_MissingArg, sym, lloc));
928 else
929 PRESERVE_SV(ans = R_NilValue);
930 RELEASE_SV(sym);
931 return ans;
932 }
933
xxsymsub1(SEXP sym,SEXP expr,YYLTYPE * lloc)934 static SEXP xxsymsub1(SEXP sym, SEXP expr, YYLTYPE *lloc)
935 {
936 SEXP ans;
937 if (GenerateCode)
938 PRESERVE_SV(ans = TagArg(expr, sym, lloc));
939 else
940 PRESERVE_SV(ans = R_NilValue);
941 RELEASE_SV(expr);
942 RELEASE_SV(sym);
943 return ans;
944 }
945
xxnullsub0(YYLTYPE * lloc)946 static SEXP xxnullsub0(YYLTYPE *lloc)
947 {
948 SEXP ans;
949 if (GenerateCode)
950 PRESERVE_SV(ans = TagArg(R_MissingArg, R_NullSymbol, lloc));
951 else
952 PRESERVE_SV(ans = R_NilValue);
953 RELEASE_SV(R_NilValue);
954 return ans;
955 }
956
xxnullsub1(SEXP expr,YYLTYPE * lloc)957 static SEXP xxnullsub1(SEXP expr, YYLTYPE *lloc)
958 {
959 SEXP ans;
960 if (GenerateCode)
961 PRESERVE_SV(ans = TagArg(expr, R_NullSymbol, lloc));
962 else
963 PRESERVE_SV(ans = R_NilValue);
964 RELEASE_SV(R_NilValue);
965 RELEASE_SV(expr);
966 return ans;
967 }
968
969
xxsublist1(SEXP sub)970 static SEXP xxsublist1(SEXP sub)
971 {
972 SEXP ans;
973 if (GenerateCode)
974 PRESERVE_SV(ans = FirstArg(CAR(sub),CADR(sub)));
975 else
976 PRESERVE_SV(ans = R_NilValue);
977 RELEASE_SV(sub);
978 return ans;
979 }
980
xxsublist2(SEXP sublist,SEXP sub)981 static SEXP xxsublist2(SEXP sublist, SEXP sub)
982 {
983 SEXP ans;
984 if (GenerateCode) {
985 NextArg(sublist, CAR(sub), CADR(sub));
986 ans = sublist;
987 } else {
988 RELEASE_SV(sublist);
989 PRESERVE_SV(ans = R_NilValue);
990 }
991 RELEASE_SV(sub);
992 return ans;
993 }
994
xxcond(SEXP expr)995 static SEXP xxcond(SEXP expr)
996 {
997 EatLines = 1;
998 return expr;
999 }
1000
xxifcond(SEXP expr)1001 static SEXP xxifcond(SEXP expr)
1002 {
1003 EatLines = 1;
1004 return expr;
1005 }
1006
xxif(SEXP ifsym,SEXP cond,SEXP expr)1007 static SEXP xxif(SEXP ifsym, SEXP cond, SEXP expr)
1008 {
1009 SEXP ans;
1010 if (GenerateCode)
1011 PRESERVE_SV(ans = lang3(ifsym, cond, expr));
1012 else
1013 PRESERVE_SV(ans = R_NilValue);
1014 RELEASE_SV(expr);
1015 RELEASE_SV(cond);
1016 return ans;
1017 }
1018
xxifelse(SEXP ifsym,SEXP cond,SEXP ifexpr,SEXP elseexpr)1019 static SEXP xxifelse(SEXP ifsym, SEXP cond, SEXP ifexpr, SEXP elseexpr)
1020 {
1021 SEXP ans;
1022 if (GenerateCode)
1023 PRESERVE_SV(ans = lang4(ifsym, cond, ifexpr, elseexpr));
1024 else
1025 PRESERVE_SV(ans = R_NilValue);
1026 RELEASE_SV(elseexpr);
1027 RELEASE_SV(ifexpr);
1028 RELEASE_SV(cond);
1029 return ans;
1030 }
1031
xxforcond(SEXP sym,SEXP expr)1032 static SEXP xxforcond(SEXP sym, SEXP expr)
1033 {
1034 SEXP ans;
1035 EatLines = 1;
1036 if (GenerateCode)
1037 PRESERVE_SV(ans = LCONS(sym, expr));
1038 else
1039 PRESERVE_SV(ans = R_NilValue);
1040 RELEASE_SV(expr);
1041 RELEASE_SV(sym);
1042 return ans;
1043 }
1044
xxfor(SEXP forsym,SEXP forcond,SEXP body)1045 static SEXP xxfor(SEXP forsym, SEXP forcond, SEXP body)
1046 {
1047 SEXP ans;
1048 if (GenerateCode)
1049 PRESERVE_SV(ans = lang4(forsym, CAR(forcond), CDR(forcond), body));
1050 else
1051 PRESERVE_SV(ans = R_NilValue);
1052 RELEASE_SV(body);
1053 RELEASE_SV(forcond);
1054 return ans;
1055 }
1056
xxwhile(SEXP whilesym,SEXP cond,SEXP body)1057 static SEXP xxwhile(SEXP whilesym, SEXP cond, SEXP body)
1058 {
1059 SEXP ans;
1060 if (GenerateCode)
1061 PRESERVE_SV(ans = lang3(whilesym, cond, body));
1062 else
1063 PRESERVE_SV(ans = R_NilValue);
1064 RELEASE_SV(body);
1065 RELEASE_SV(cond);
1066 return ans;
1067 }
1068
xxrepeat(SEXP repeatsym,SEXP body)1069 static SEXP xxrepeat(SEXP repeatsym, SEXP body)
1070 {
1071 SEXP ans;
1072 if (GenerateCode)
1073 PRESERVE_SV(ans = lang2(repeatsym, body));
1074 else
1075 PRESERVE_SV(ans = R_NilValue);
1076 RELEASE_SV(body);
1077 return ans;
1078 }
1079
xxnxtbrk(SEXP keyword)1080 static SEXP xxnxtbrk(SEXP keyword)
1081 {
1082 if (GenerateCode)
1083 PRESERVE_SV(keyword = lang1(keyword));
1084 else
1085 PRESERVE_SV(keyword = R_NilValue);
1086 return keyword;
1087 }
1088
xxfuncall(SEXP expr,SEXP args)1089 static SEXP xxfuncall(SEXP expr, SEXP args)
1090 {
1091 SEXP ans, sav_expr = expr;
1092 if (GenerateCode) {
1093 if (isString(expr))
1094 expr = installTrChar(STRING_ELT(expr, 0));
1095 PROTECT(expr);
1096 if (length(CDR(args)) == 1 && CADR(args) == R_MissingArg && TAG(CDR(args)) == R_NilValue )
1097 ans = lang1(expr);
1098 else
1099 ans = LCONS(expr, CDR(args));
1100 UNPROTECT(1); /* expr */
1101 PRESERVE_SV(ans);
1102 } else
1103 PRESERVE_SV(ans = R_NilValue);
1104
1105 RELEASE_SV(args);
1106 RELEASE_SV(sav_expr);
1107 return ans;
1108 }
1109
mkChar2(const char * name)1110 static SEXP mkChar2(const char *name)
1111 {
1112 cetype_t enc = CE_NATIVE;
1113
1114 if(known_to_be_latin1) enc = CE_LATIN1;
1115 else if(known_to_be_utf8) enc = CE_UTF8;
1116
1117 return mkCharLenCE(name, (int) strlen(name), enc);
1118 }
1119
mkString2(const char * s,size_t len,Rboolean escaped)1120 static SEXP mkString2(const char *s, size_t len, Rboolean escaped)
1121 {
1122 SEXP t;
1123 cetype_t enc = CE_NATIVE;
1124
1125 if(known_to_be_latin1) enc = CE_LATIN1;
1126 else if(!escaped && known_to_be_utf8) enc = CE_UTF8;
1127
1128 PROTECT(t = allocVector(STRSXP, 1));
1129 SET_STRING_ELT(t, 0, mkCharLenCE(s, (int) len, enc));
1130 UNPROTECT(1); /* t */
1131 return t;
1132 }
1133
xxdefun(SEXP fname,SEXP formals,SEXP body,YYLTYPE * lloc)1134 static SEXP xxdefun(SEXP fname, SEXP formals, SEXP body, YYLTYPE *lloc)
1135 {
1136 SEXP ans, srcref;
1137
1138 if (GenerateCode) {
1139 if (ParseState.keepSrcRefs) {
1140 srcref = makeSrcref(lloc, PS_SRCFILE);
1141 ParseState.didAttach = TRUE;
1142 } else
1143 srcref = R_NilValue;
1144 PRESERVE_SV(ans = lang4(fname, CDR(formals), body, srcref));
1145 } else
1146 PRESERVE_SV(ans = R_NilValue);
1147 RELEASE_SV(body);
1148 RELEASE_SV(formals);
1149 return ans;
1150 }
1151
xxunary(SEXP op,SEXP arg)1152 static SEXP xxunary(SEXP op, SEXP arg)
1153 {
1154 SEXP ans;
1155 if (GenerateCode)
1156 PRESERVE_SV(ans = lang2(op, arg));
1157 else
1158 PRESERVE_SV(ans = R_NilValue);
1159 RELEASE_SV(arg);
1160 return ans;
1161 }
1162
xxbinary(SEXP n1,SEXP n2,SEXP n3)1163 static SEXP xxbinary(SEXP n1, SEXP n2, SEXP n3)
1164 {
1165 SEXP ans;
1166 if (GenerateCode)
1167 PRESERVE_SV(ans = lang3(n1, n2, n3));
1168 else
1169 PRESERVE_SV(ans = R_NilValue);
1170 RELEASE_SV(n2);
1171 RELEASE_SV(n3);
1172 return ans;
1173 }
1174
1175 static SEXP findPlaceholderCell(SEXP, SEXP);
1176
check_rhs(SEXP rhs)1177 static void check_rhs(SEXP rhs)
1178 {
1179 if (TYPEOF(rhs) != LANGSXP)
1180 error(_("The pipe operator requires a function call as RHS"));
1181
1182 /* rule out syntactically special functions */
1183 /* the IS_SPECIAL_SYMBOL bit is set in names.c */
1184 SEXP fun = CAR(rhs);
1185 if (TYPEOF(fun) == SYMSXP && IS_SPECIAL_SYMBOL(fun))
1186 error("function '%s' not supported in RHS call of a pipe",
1187 CHAR(PRINTNAME(fun)));
1188 }
1189
xxpipe(SEXP lhs,SEXP rhs)1190 static SEXP xxpipe(SEXP lhs, SEXP rhs)
1191 {
1192 SEXP ans;
1193 if (GenerateCode) {
1194 /* allow x => log(x) on RHS */
1195 if (TYPEOF(rhs) == LANGSXP && CAR(rhs) == R_PipeBindSymbol) {
1196 SEXP var = CADR(rhs);
1197 SEXP expr = CADDR(rhs);
1198 check_rhs(expr);
1199 SEXP phcell = findPlaceholderCell(var, expr);
1200 if (phcell == NULL)
1201 error(_("no placeholder found on RHS"));
1202 SETCAR(phcell, lhs);
1203 return expr;
1204 }
1205
1206 check_rhs(rhs);
1207
1208 SEXP fun = CAR(rhs);
1209 SEXP args = CDR(rhs);
1210 PRESERVE_SV(ans = lcons(fun, lcons(lhs, args)));
1211 }
1212 else {
1213 PRESERVE_SV(ans = R_NilValue);
1214 }
1215 RELEASE_SV(lhs);
1216 RELEASE_SV(rhs);
1217 return ans;
1218 }
1219
xxpipebind(SEXP fn,SEXP lhs,SEXP rhs)1220 static SEXP xxpipebind(SEXP fn, SEXP lhs, SEXP rhs)
1221 {
1222 static int use_pipebind = 0;
1223 if (use_pipebind != 1) {
1224 char *lookup = getenv("_R_USE_PIPEBIND_");
1225 use_pipebind = ((lookup != NULL) && StringTrue(lookup)) ? 1 : 0;
1226 }
1227
1228 if (use_pipebind)
1229 return xxbinary(fn, lhs, rhs);
1230 else
1231 error("'=>' is disabled; set '_R_USE_PIPEBIND_' envvar to a true value to enable it");
1232 }
1233
xxparen(SEXP n1,SEXP n2)1234 static SEXP xxparen(SEXP n1, SEXP n2)
1235 {
1236 SEXP ans;
1237 if (GenerateCode)
1238 PRESERVE_SV(ans = lang2(n1, n2));
1239 else
1240 PRESERVE_SV(ans = R_NilValue);
1241 RELEASE_SV(n2);
1242 return ans;
1243 }
1244
1245
1246 /* This should probably use CONS rather than LCONS, but
1247 it shouldn't matter and we would rather not meddle
1248 See PR#7055 */
1249
xxsubscript(SEXP a1,SEXP a2,SEXP a3)1250 static SEXP xxsubscript(SEXP a1, SEXP a2, SEXP a3)
1251 {
1252 SEXP ans;
1253 if (GenerateCode)
1254 PRESERVE_SV(ans = LCONS(a2, CONS(a1, CDR(a3))));
1255 else
1256 PRESERVE_SV(ans = R_NilValue);
1257 RELEASE_SV(a3);
1258 RELEASE_SV(a1);
1259 return ans;
1260 }
1261
xxexprlist(SEXP a1,YYLTYPE * lloc,SEXP a2)1262 static SEXP xxexprlist(SEXP a1, YYLTYPE *lloc, SEXP a2)
1263 {
1264 SEXP ans;
1265 SEXP prevSrcrefs;
1266
1267 EatLines = 0;
1268 if (GenerateCode) {
1269 SET_TYPEOF(a2, LANGSXP);
1270 SETCAR(a2, a1);
1271 if (ParseState.keepSrcRefs) {
1272 PROTECT(prevSrcrefs = getAttrib(a2, R_SrcrefSymbol));
1273 SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE));
1274 PrependToSrcRefs(s);
1275 attachSrcrefs(a2);
1276 UNPROTECT(2); /* prevSrcrefs, s */
1277 #ifndef SWITCH_TO_REFCNT
1278 /* SrcRefs got NAMED by being an attribute, preventively
1279 getAttrib(), but it has not in fact been referenced. Set NAMED
1280 to 0 to avoid overhead in further setAttrib calls due to cycle
1281 detection. */
1282 SET_NAMED(prevSrcrefs, 0);
1283 #endif
1284 PS_SET_SRCREFS(prevSrcrefs);
1285 }
1286 PRESERVE_SV(ans = a2);
1287 }
1288 else
1289 PRESERVE_SV(ans = R_NilValue);
1290 RELEASE_SV(a2);
1291 return ans;
1292 }
1293
1294 /*--------------------------------------------------------------------------*/
1295
TagArg(SEXP arg,SEXP tag,YYLTYPE * lloc)1296 static SEXP TagArg(SEXP arg, SEXP tag, YYLTYPE *lloc)
1297 {
1298 switch (TYPEOF(tag)) {
1299 case STRSXP:
1300 tag = installTrChar(STRING_ELT(tag, 0));
1301 case NILSXP:
1302 case SYMSXP:
1303 return lang2(arg, tag);
1304 default:
1305 error(_("incorrect tag type at line %d"), lloc->first_line); return R_NilValue/* -Wall */;
1306 }
1307 }
1308
1309
1310 /* Stretchy List Structures : Lists are created and grown using a special */
1311 /* dotted pair. The CAR of the list points to the last cons-cell in the */
1312 /* list and the CDR points to the first. The list can be extracted from */
1313 /* the pair by taking its CDR, while the CAR gives fast access to the end */
1314 /* of the list. */
1315
1316 /* These functions must be called with arguments protected */
1317
1318 /* Create a stretchy-list dotted pair */
NewList(void)1319 static SEXP NewList(void)
1320 {
1321 SEXP s = CONS(R_NilValue, R_NilValue);
1322 SETCAR(s, s);
1323 return s;
1324 }
1325
1326 /* Add a new element at the end of a stretchy list */
GrowList(SEXP l,SEXP s)1327 static void GrowList(SEXP l, SEXP s)
1328 {
1329 SEXP tmp;
1330 tmp = CONS(s, R_NilValue);
1331 SETCDR(CAR(l), tmp);
1332 SETCAR(l, tmp);
1333 }
1334
1335 /* Create a stretchy list with a single named element */
FirstArg(SEXP s,SEXP tag)1336 static SEXP FirstArg(SEXP s, SEXP tag)
1337 {
1338 SEXP tmp;
1339 PROTECT(tmp = NewList());
1340 GrowList(tmp, s);
1341 SET_TAG(CAR(tmp), tag);
1342 UNPROTECT(1); /* tmp */
1343 return tmp;
1344 }
1345
1346 /* Add named element to the end of a stretchy list */
NextArg(SEXP l,SEXP s,SEXP tag)1347 static void NextArg(SEXP l, SEXP s, SEXP tag)
1348 {
1349 GrowList(l, s);
1350 SET_TAG(CAR(l), tag);
1351 }
1352
1353 /* SrcRefs (PS_SRCREFS) are represented as R_NilValue (empty) or by
1354 a stretchy list (which includes another representation for empty)
1355 for fast append operation. */
1356
SetSingleSrcRef(SEXP r)1357 static void SetSingleSrcRef(SEXP r)
1358 {
1359 SEXP l;
1360
1361 PROTECT(l = NewList());
1362 GrowList(l, r);
1363 PS_SET_SRCREFS(l);
1364 UNPROTECT(1); /* l */
1365 }
1366
AppendToSrcRefs(SEXP r)1367 static void AppendToSrcRefs(SEXP r)
1368 {
1369 SEXP l = PS_SRCREFS;
1370 if (l == R_NilValue)
1371 SetSingleSrcRef(r);
1372 else
1373 GrowList(l, r);
1374 }
1375
PrependToSrcRefs(SEXP r)1376 static void PrependToSrcRefs(SEXP r)
1377 {
1378 SEXP l = PS_SRCREFS;
1379 if (l == R_NilValue)
1380 SetSingleSrcRef(r);
1381 else if (CDR(l) == R_NilValue)
1382 /* adding to empty stretchy list */
1383 GrowList(l, r);
1384 else {
1385 SEXP tmp = CONS(r, CDR(l));
1386 SETCDR(l, tmp);
1387 }
1388 }
1389
SrcRefsToVectorList()1390 static SEXP SrcRefsToVectorList() {
1391 SEXP l = PS_SRCREFS;
1392 if (l == R_NilValue)
1393 return PairToVectorList(l);
1394 else
1395 return PairToVectorList(CDR(l));
1396 }
1397
1398 /*--------------------------------------------------------------------------*/
1399
1400 /*
1401 * Parsing Entry Points:
1402 *
1403 * The Following entry points provide language parsing facilities.
1404 * Note that there are separate entry points for parsing IoBuffers
1405 * (i.e. interactve use), files and R character strings.
1406 *
1407 * The entry points provide the same functionality, they just
1408 * set things up in slightly different ways.
1409 *
1410 * The following routines parse a single expression:
1411 *
1412 *
1413 * SEXP R_Parse1File(FILE *fp, int gencode, ParseStatus *status, Rboolean first)
1414 * (used for R_ReplFile in main.c)
1415 *
1416 * SEXP R_Parse1Buffer(IoBuffer *buffer, int gencode, ParseStatus *status, Rboolean first)
1417 * (used for ReplIteration and R_ReplDLLdo1 in main.c)
1418 *
1419 * The success of the parse is indicated as folllows:
1420 *
1421 *
1422 * status = PARSE_NULL - there was no statement to parse
1423 * PARSE_OK - complete statement
1424 * PARSE_INCOMPLETE - incomplete statement
1425 * PARSE_ERROR - syntax error
1426 * PARSE_EOF - end of file
1427 *
1428 *
1429 * The following routines parse several expressions and return
1430 * their values in a single expression vector.
1431 *
1432 * SEXP R_ParseFile(FILE *fp, int n, ParseStatus *status, SEXP srcfile)
1433 * (used for do_edit in file edit.c)
1434 *
1435 * SEXP R_ParseVector(SEXP *text, int n, ParseStatus *status, SEXP srcfile)
1436 * (public, and used by parse(text=) in file source.c)
1437 *
1438 * SEXP R_ParseBuffer(IoBuffer *buffer, int n, ParseStatus *status, SEXP prompt, SEXP srcfile)
1439 * (used by parse(file="") in file source.c)
1440 *
1441 * SEXP R_ParseConn(Rconnection con, int n, ParseStatus *status, SEXP srcfile)
1442 * (used by parse(file=) in file source.c)
1443 *
1444 * Here, status is 1 for a successful parse and 0 if parsing failed
1445 * for some reason.
1446 */
1447
1448 #define CONTEXTSTACK_SIZE 50
1449 static int SavedToken;
1450 static SEXP SavedLval;
1451 static char contextstack[CONTEXTSTACK_SIZE], *contextp;
1452
1453 static void PutSrcRefState(SrcRefState *state);
1454 static void UseSrcRefState(SrcRefState *state);
1455
1456 /* This is called once when R starts up. */
1457 attribute_hidden
InitParser(void)1458 void InitParser(void)
1459 {
1460 ParseState.sexps = allocVector(VECSXP, 7); /* initialized to R_NilValue */
1461 ParseState.data = R_NilValue;
1462 INIT_SVS();
1463 R_PreserveObject(ParseState.sexps); /* never released in an R session */
1464 R_NullSymbol = install("NULL");
1465 R_PipeBindSymbol = install("=>");
1466 }
1467
FinalizeSrcRefStateOnError(void * dummy)1468 static void FinalizeSrcRefStateOnError(void *dummy)
1469 {
1470 R_FinalizeSrcRefState();
1471 }
1472
1473 /* This is called each time a new parse sequence begins */
1474 attribute_hidden
R_InitSrcRefState(RCNTXT * cptr)1475 void R_InitSrcRefState(RCNTXT* cptr)
1476 {
1477 if (busy) {
1478 SrcRefState *prev = malloc(sizeof(SrcRefState));
1479 if (prev == NULL)
1480 error(_("allocation of source reference state failed"));
1481 PutSrcRefState(prev);
1482 ParseState.prevState = prev;
1483 ParseState.sexps = allocVector(VECSXP, 7);
1484 ParseState.data = R_NilValue;
1485 INIT_SVS();
1486 R_PreserveObject(ParseState.sexps);
1487 /* ParseState.sexps released in R_FinalizeSrcRefState */
1488 } else
1489 /* re-use data, text, ids arrays */
1490 ParseState.prevState = NULL;
1491 /* set up context _after_ PutSrcRefState */
1492 begincontext(cptr, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
1493 R_NilValue, R_NilValue);
1494 cptr->cend = &FinalizeSrcRefStateOnError;
1495 cptr->cenddata = NULL;
1496 ParseState.keepSrcRefs = FALSE;
1497 ParseState.keepParseData = TRUE;
1498 ParseState.didAttach = FALSE;
1499 PS_SET_SRCFILE(R_NilValue);
1500 PS_SET_ORIGINAL(R_NilValue);
1501 ParseState.data_count = 0;
1502 ParseState.xxlineno = 1;
1503 ParseState.xxcolno = 0;
1504 ParseState.xxbyteno = 0;
1505 ParseState.xxparseno = 1;
1506 busy = TRUE;
1507 }
1508
1509 attribute_hidden
R_FinalizeSrcRefState(void)1510 void R_FinalizeSrcRefState(void)
1511 {
1512 PS_SET_SRCFILE(R_NilValue);
1513 PS_SET_ORIGINAL(R_NilValue);
1514 CLEAR_SVS();
1515
1516 /* Free the data, text and ids if we are restoring a previous state,
1517 or if they have grown too large */
1518 if (PS_DATA != R_NilValue) {
1519 if (ParseState.prevState || DATA_COUNT > MAX_DATA_COUNT) {
1520 PS_SET_DATA(R_NilValue);
1521 PS_SET_TEXT(R_NilValue);
1522 } else /* Remove all the strings from the text vector so they don't take up memory, and clean up data */
1523 for (int i=0; i < ParseState.data_count; i++) {
1524 SET_STRING_ELT(PS_TEXT, i, R_BlankString);
1525 _PARENT(i) = 0;
1526 }
1527 }
1528 if (PS_IDS != R_NilValue) {
1529 if (ParseState.prevState || ID_COUNT > MAX_DATA_COUNT) {
1530 PS_SET_IDS(R_NilValue);
1531 } else {/* Remove the parent records */
1532 if (identifier > ID_COUNT) identifier = ID_COUNT;
1533 for (int i=0; i < identifier; i++) {
1534 ID_ID(i) = 0;
1535 ID_PARENT(i) = 0;
1536 }
1537 }
1538 }
1539 ParseState.data_count = NA_INTEGER;
1540 if (ParseState.prevState) {
1541 R_ReleaseObject(ParseState.sexps);
1542 SrcRefState *prev = ParseState.prevState;
1543 UseSrcRefState(prev);
1544 free(prev);
1545 } else
1546 busy = FALSE;
1547 }
1548
UseSrcRefState(SrcRefState * state)1549 static void UseSrcRefState(SrcRefState *state)
1550 {
1551 ParseState.keepSrcRefs = state->keepSrcRefs;
1552 ParseState.keepParseData = state->keepParseData;
1553 ParseState.sexps = state->sexps;
1554 ParseState.data = state->data;
1555 ParseState.data_count = state->data_count;
1556 ParseState.xxlineno = state->xxlineno;
1557 ParseState.xxcolno = state->xxcolno;
1558 ParseState.xxbyteno = state->xxbyteno;
1559 ParseState.xxparseno = state->xxparseno;
1560 ParseState.prevState = state->prevState;
1561 busy = TRUE;
1562 }
1563
PutSrcRefState(SrcRefState * state)1564 static void PutSrcRefState(SrcRefState *state)
1565 {
1566 state->keepSrcRefs = ParseState.keepSrcRefs;
1567 state->keepParseData = ParseState.keepParseData;
1568 state->sexps = ParseState.sexps;
1569 state->data = ParseState.data;
1570 state->data_count = ParseState.data_count;
1571 state->xxlineno = ParseState.xxlineno;
1572 state->xxcolno = ParseState.xxcolno;
1573 state->xxbyteno = ParseState.xxbyteno;
1574 state->xxparseno = ParseState.xxparseno;
1575 state->prevState = ParseState.prevState;
1576 }
1577
ParseInit(void)1578 static void ParseInit(void)
1579 {
1580 contextp = contextstack;
1581 *contextp = ' ';
1582 SavedToken = 0;
1583 SavedLval = R_NilValue;
1584 EatLines = 0;
1585 EndOfFile = 0;
1586 xxcharcount = 0;
1587 npush = 0;
1588 }
1589
initData(void)1590 static void initData(void)
1591 {
1592 ParseState.data_count = 0 ;
1593 }
1594
1595
ParseContextInit(void)1596 static void ParseContextInit(void)
1597 {
1598 R_ParseContextLast = 0;
1599 R_ParseContext[0] = '\0';
1600
1601 /* starts the identifier counter*/
1602 initId();
1603 initData();
1604 }
1605
R_Parse1(ParseStatus * status)1606 static SEXP R_Parse1(ParseStatus *status)
1607 {
1608 switch(yyparse()) {
1609 case 0: /* End of file */
1610 *status = PARSE_EOF;
1611 if (EndOfFile == 2) *status = PARSE_INCOMPLETE;
1612 break;
1613 case 1: /* Syntax error / incomplete */
1614 *status = PARSE_ERROR;
1615 if (EndOfFile) *status = PARSE_INCOMPLETE;
1616 break;
1617 case 2: /* Empty Line */
1618 *status = PARSE_NULL;
1619 break;
1620 case 3: /* Valid expr '\n' terminated */
1621 case 4: /* Valid expr ';' terminated */
1622 *status = PARSE_OK;
1623 break;
1624 }
1625 return R_CurrentExpr;
1626 }
1627
1628 static FILE *fp_parse;
1629
file_getc(void)1630 static int file_getc(void)
1631 {
1632 return R_fgetc(fp_parse);
1633 }
1634
1635 /* used in main.c */
1636 attribute_hidden
R_Parse1File(FILE * fp,int gencode,ParseStatus * status)1637 SEXP R_Parse1File(FILE *fp, int gencode, ParseStatus *status)
1638 {
1639 ParseInit();
1640 ParseContextInit();
1641 GenerateCode = gencode;
1642 fp_parse = fp;
1643 ptr_getc = file_getc;
1644 R_Parse1(status);
1645 CLEAR_SVS();
1646 return R_CurrentExpr;
1647 }
1648
1649 static IoBuffer *iob;
1650
buffer_getc(void)1651 static int buffer_getc(void)
1652 {
1653 return R_IoBufferGetc(iob);
1654 }
1655
1656 /* Used only in main.c */
1657 attribute_hidden
R_Parse1Buffer(IoBuffer * buffer,int gencode,ParseStatus * status)1658 SEXP R_Parse1Buffer(IoBuffer *buffer, int gencode, ParseStatus *status)
1659 {
1660 Rboolean keepSource = FALSE;
1661 RCNTXT cntxt;
1662
1663 R_InitSrcRefState(&cntxt);
1664 if (gencode) {
1665 keepSource = asLogical(GetOption1(install("keep.source")));
1666 if (keepSource) {
1667 ParseState.keepSrcRefs = TRUE;
1668 ParseState.keepParseData =
1669 asLogical(GetOption1(install("keep.parse.data")));
1670 PS_SET_SRCFILE(NewEnvironment(R_NilValue, R_NilValue, R_EmptyEnv));
1671 PS_SET_ORIGINAL(PS_SRCFILE);
1672 PS_SET_SRCREFS(R_NilValue);
1673 }
1674 }
1675 ParseInit();
1676 ParseContextInit();
1677 GenerateCode = gencode;
1678 iob = buffer;
1679 ptr_getc = buffer_getc;
1680 R_Parse1(status);
1681 if (gencode && keepSource) {
1682 if (ParseState.didAttach) {
1683 int buflen = R_IoBufferReadOffset(buffer);
1684 char buf[buflen+1];
1685 SEXP class;
1686 R_IoBufferReadReset(buffer);
1687 for (int i=0; i<buflen; i++)
1688 buf[i] = (char) R_IoBufferGetc(buffer);
1689
1690 buf[buflen] = 0;
1691 SEXP s_filename = install("filename");
1692 defineVar(s_filename, ScalarString(mkChar("")), PS_ORIGINAL);
1693 SEXP s_lines = install("lines");
1694 defineVar(s_lines, ScalarString(mkChar2(buf)), PS_ORIGINAL);
1695 PROTECT(class = allocVector(STRSXP, 2));
1696 SET_STRING_ELT(class, 0, mkChar("srcfilecopy"));
1697 SET_STRING_ELT(class, 1, mkChar("srcfile"));
1698 setAttrib(PS_ORIGINAL, R_ClassSymbol, class);
1699 UNPROTECT(1); /* class */
1700 }
1701 }
1702 PROTECT(R_CurrentExpr);
1703 endcontext(&cntxt);
1704 R_FinalizeSrcRefState();
1705 UNPROTECT(1); /* R_CurrentExpr */
1706 return R_CurrentExpr;
1707 }
1708
1709 static TextBuffer *txtb;
1710
text_getc(void)1711 static int text_getc(void)
1712 {
1713 return R_TextBufferGetc(txtb);
1714 }
1715
R_Parse(int n,ParseStatus * status,SEXP srcfile)1716 static SEXP R_Parse(int n, ParseStatus *status, SEXP srcfile)
1717 {
1718 int i;
1719 SEXP t, rval;
1720 RCNTXT cntxt;
1721
1722 R_InitSrcRefState(&cntxt);
1723 ParseContextInit();
1724
1725 PS_SET_SRCFILE(srcfile);
1726 PS_SET_ORIGINAL(srcfile);
1727
1728 if (isEnvironment(srcfile)) {
1729 ParseState.keepSrcRefs = TRUE;
1730 ParseState.keepParseData =
1731 asLogical(GetOption1(install("keep.parse.data")));
1732 PS_SET_SRCREFS(R_NilValue);
1733 }
1734
1735 PROTECT(t = NewList());
1736 for(i = 0; ; ) {
1737 if(n >= 0 && i >= n) break;
1738 ParseInit();
1739 rval = R_Parse1(status);
1740 switch(*status) {
1741 case PARSE_NULL:
1742 break;
1743 case PARSE_OK:
1744 PROTECT(rval);
1745 GrowList(t, rval);
1746 UNPROTECT(1); /* rval */
1747 i++;
1748 break;
1749 case PARSE_INCOMPLETE:
1750 case PARSE_ERROR:
1751 UNPROTECT(1); /* t */
1752 if (ParseState.keepSrcRefs && ParseState.keepParseData)
1753 finalizeData();
1754 endcontext(&cntxt);
1755 R_FinalizeSrcRefState();
1756 return R_NilValue;
1757 break;
1758 case PARSE_EOF:
1759 goto finish;
1760 break;
1761 }
1762 }
1763
1764 finish:
1765
1766 t = CDR(t);
1767 PROTECT(rval = allocVector(EXPRSXP, length(t)));
1768 for (n = 0 ; n < LENGTH(rval) ; n++, t = CDR(t))
1769 SET_VECTOR_ELT(rval, n, CAR(t));
1770 if (ParseState.keepSrcRefs) {
1771 if (ParseState.keepParseData)
1772 finalizeData();
1773 attachSrcrefs(rval);
1774 }
1775 UNPROTECT(2); /* t, rval */
1776 PROTECT(rval);
1777 endcontext(&cntxt);
1778 R_FinalizeSrcRefState();
1779 UNPROTECT(1); /* rval */
1780 *status = PARSE_OK;
1781 return rval;
1782 }
1783
1784 /* used in edit.c */
1785 attribute_hidden
R_ParseFile(FILE * fp,int n,ParseStatus * status,SEXP srcfile)1786 SEXP R_ParseFile(FILE *fp, int n, ParseStatus *status, SEXP srcfile)
1787 {
1788 GenerateCode = 1;
1789 fp_parse = fp;
1790 ptr_getc = file_getc;
1791 return R_Parse(n, status, srcfile);
1792 }
1793
1794 #include "Rconnections.h"
1795 static Rconnection con_parse;
1796
1797 /* need to handle incomplete last line */
con_getc(void)1798 static int con_getc(void)
1799 {
1800 int c;
1801 static int last=-1000;
1802
1803 c = Rconn_fgetc(con_parse);
1804 if (c == EOF && last != '\n') c = '\n';
1805 return (last = c);
1806 }
1807
1808 /* used in source.c */
1809 attribute_hidden
R_ParseConn(Rconnection con,int n,ParseStatus * status,SEXP srcfile)1810 SEXP R_ParseConn(Rconnection con, int n, ParseStatus *status, SEXP srcfile)
1811 {
1812 GenerateCode = 1;
1813 con_parse = con;
1814 ptr_getc = con_getc;
1815 return R_Parse(n, status, srcfile);
1816 }
1817
1818 /* This one is public, and used in source.c */
R_ParseVector(SEXP text,int n,ParseStatus * status,SEXP srcfile)1819 SEXP R_ParseVector(SEXP text, int n, ParseStatus *status, SEXP srcfile)
1820 {
1821 SEXP rval;
1822 TextBuffer textb;
1823 R_TextBufferInit(&textb, text);
1824 txtb = &textb;
1825 GenerateCode = 1;
1826 ptr_getc = text_getc;
1827 rval = R_Parse(n, status, srcfile);
1828 R_TextBufferFree(&textb);
1829 return rval;
1830 }
1831
Prompt(SEXP prompt,int type)1832 static const char *Prompt(SEXP prompt, int type)
1833 {
1834 if(type == 1) {
1835 if(length(prompt) <= 0) {
1836 return CHAR(STRING_ELT(GetOption1(install("prompt")), 0));
1837 }
1838 else
1839 return CHAR(STRING_ELT(prompt, 0));
1840 }
1841 else {
1842 return CHAR(STRING_ELT(GetOption1(install("continue")), 0));
1843 }
1844 }
1845
1846 /* used in source.c */
1847 attribute_hidden
R_ParseBuffer(IoBuffer * buffer,int n,ParseStatus * status,SEXP prompt,SEXP srcfile)1848 SEXP R_ParseBuffer(IoBuffer *buffer, int n, ParseStatus *status, SEXP prompt,
1849 SEXP srcfile)
1850 {
1851 SEXP rval, t;
1852 char *bufp, buf[CONSOLE_BUFFER_SIZE];
1853 int c, i, prompt_type = 1;
1854 RCNTXT cntxt;
1855
1856 R_IoBufferWriteReset(buffer);
1857 buf[0] = '\0';
1858 bufp = buf;
1859 R_InitSrcRefState(&cntxt);
1860 ParseContextInit();
1861
1862 GenerateCode = 1;
1863 iob = buffer;
1864 ptr_getc = buffer_getc;
1865
1866 PS_SET_SRCFILE(srcfile);
1867 PS_SET_ORIGINAL(srcfile);
1868
1869 if (isEnvironment(srcfile)) {
1870 ParseState.keepSrcRefs = TRUE;
1871 ParseState.keepParseData =
1872 asLogical(GetOption1(install("keep.parse.data")));
1873 PS_SET_SRCREFS(R_NilValue);
1874 }
1875
1876 PROTECT(t = NewList());
1877 for(i = 0; ; ) {
1878 if(n >= 0 && i >= n) break;
1879 if (!*bufp) {
1880 if(R_ReadConsole((char *) Prompt(prompt, prompt_type),
1881 (unsigned char *)buf, CONSOLE_BUFFER_SIZE, 1) == 0)
1882 goto finish;
1883 bufp = buf;
1884 }
1885 while ((c = *bufp++)) {
1886 R_IoBufferPutc(c, buffer);
1887 if (c == ';' || c == '\n') break;
1888 }
1889
1890 /* Was a call to R_Parse1Buffer, but we don't want to reset
1891 xxlineno and xxcolno */
1892 ParseInit();
1893 /* Not calling ParseContextInit() as it resets parse data, and
1894 to be consistent with R_Parse */
1895 R_Parse1(status);
1896 rval = R_CurrentExpr;
1897
1898 switch(*status) {
1899 case PARSE_NULL:
1900 break;
1901 case PARSE_OK:
1902 PROTECT(rval);
1903 GrowList(t, rval);
1904 UNPROTECT(1); /* rval */
1905 i++;
1906 break;
1907 case PARSE_INCOMPLETE:
1908 case PARSE_ERROR:
1909 UNPROTECT(1); /* t */
1910 R_IoBufferWriteReset(buffer);
1911 endcontext(&cntxt);
1912 R_FinalizeSrcRefState();
1913 return R_NilValue;
1914 break;
1915 case PARSE_EOF:
1916 goto finish;
1917 break;
1918 }
1919 }
1920 finish:
1921 R_IoBufferWriteReset(buffer);
1922 t = CDR(t);
1923 PROTECT(rval = allocVector(EXPRSXP, length(t)));
1924 for (n = 0 ; n < LENGTH(rval) ; n++, t = CDR(t))
1925 SET_VECTOR_ELT(rval, n, CAR(t));
1926 if (ParseState.keepSrcRefs) {
1927 if (ParseState.keepParseData)
1928 finalizeData();
1929 attachSrcrefs(rval);
1930 }
1931 UNPROTECT(2); /* t, rval */
1932 PROTECT(rval);
1933 endcontext(&cntxt);
1934 R_FinalizeSrcRefState();
1935 UNPROTECT(1); /* rval */
1936 *status = PARSE_OK;
1937 return rval;
1938 }
1939
1940
1941 /*----------------------------------------------------------------------------
1942 *
1943 * The Lexical Analyzer:
1944 *
1945 * Basic lexical analysis is performed by the following
1946 * routines. Input is read a line at a time, and, if the
1947 * program is in batch mode, each input line is echoed to
1948 * standard output after it is read.
1949 *
1950 * The function yylex() scans the input, breaking it into
1951 * tokens which are then passed to the parser. The lexical
1952 * analyser maintains a symbol table (in a very messy fashion).
1953 *
1954 * The fact that if statements need to parse differently
1955 * depending on whether the statement is being interpreted or
1956 * part of the body of a function causes the need for ifpop
1957 * and IfPush. When an if statement is encountered an 'i' is
1958 * pushed on a stack (provided there are parentheses active).
1959 * At later points this 'i' needs to be popped off of the if
1960 * stack.
1961 *
1962 */
1963
IfPush(void)1964 static void IfPush(void)
1965 {
1966 if (*contextp==LBRACE ||
1967 *contextp=='[' ||
1968 *contextp=='(' ||
1969 *contextp == 'i') {
1970 if(contextp - contextstack >= CONTEXTSTACK_SIZE)
1971 error(_("contextstack overflow"));
1972 *++contextp = 'i';
1973 }
1974
1975 }
1976
ifpop(void)1977 static void ifpop(void)
1978 {
1979 if (*contextp=='i')
1980 *contextp-- = 0;
1981 }
1982
1983 /* This is only called following ., so we only care if it is
1984 an ANSI digit or not */
typeofnext(void)1985 static int typeofnext(void)
1986 {
1987 int k, c;
1988
1989 c = xxgetc();
1990 if (isdigit(c)) k = 1; else k = 2;
1991 xxungetc(c);
1992 return k;
1993 }
1994
nextchar(int expect)1995 static int nextchar(int expect)
1996 {
1997 int c = xxgetc();
1998 if (c == expect)
1999 return 1;
2000 else
2001 xxungetc(c);
2002 return 0;
2003 }
2004
2005 /* Special Symbols */
2006 /* Syntactic Keywords + Symbolic Constants */
2007
2008 struct {
2009 char *name;
2010 int token;
2011 }
2012 static keywords[] = {
2013 { "NULL", NULL_CONST },
2014 { "NA", NUM_CONST },
2015 { "TRUE", NUM_CONST },
2016 { "FALSE", NUM_CONST },
2017 { "Inf", NUM_CONST },
2018 { "NaN", NUM_CONST },
2019 { "NA_integer_", NUM_CONST },
2020 { "NA_real_", NUM_CONST },
2021 { "NA_character_", NUM_CONST },
2022 { "NA_complex_", NUM_CONST },
2023 { "function", FUNCTION },
2024 { "while", WHILE },
2025 { "repeat", REPEAT },
2026 { "for", FOR },
2027 { "if", IF },
2028 { "in", IN },
2029 { "else", ELSE },
2030 { "next", NEXT },
2031 { "break", BREAK },
2032 { "...", SYMBOL },
2033 { 0, 0 }
2034 };
2035
2036 /* KeywordLookup has side effects, it sets yylval */
2037
KeywordLookup(const char * s)2038 static int KeywordLookup(const char *s)
2039 {
2040 int i;
2041 for (i = 0; keywords[i].name; i++) {
2042 if (strcmp(keywords[i].name, s) == 0) {
2043 switch (keywords[i].token) {
2044 case NULL_CONST:
2045 PRESERVE_SV(yylval = R_NilValue);
2046 break;
2047 case NUM_CONST:
2048 if(GenerateCode) {
2049 switch(i) {
2050 case 1:
2051 PRESERVE_SV(yylval = mkNA());
2052 break;
2053 case 2:
2054 PRESERVE_SV(yylval = mkTrue());
2055 break;
2056 case 3:
2057 PRESERVE_SV(yylval = mkFalse());
2058 break;
2059 case 4:
2060 PRESERVE_SV(yylval = allocVector(REALSXP, 1));
2061 REAL(yylval)[0] = R_PosInf;
2062 break;
2063 case 5:
2064 PRESERVE_SV(yylval = allocVector(REALSXP, 1));
2065 REAL(yylval)[0] = R_NaN;
2066 break;
2067 case 6:
2068 PRESERVE_SV(yylval = allocVector(INTSXP, 1));
2069 INTEGER(yylval)[0] = NA_INTEGER;
2070 break;
2071 case 7:
2072 PRESERVE_SV(yylval = allocVector(REALSXP, 1));
2073 REAL(yylval)[0] = NA_REAL;
2074 break;
2075 case 8:
2076 PRESERVE_SV(yylval = allocVector(STRSXP, 1));
2077 SET_STRING_ELT(yylval, 0, NA_STRING);
2078 break;
2079 case 9:
2080 PRESERVE_SV(yylval = allocVector(CPLXSXP, 1));
2081 COMPLEX(yylval)[0].r = COMPLEX(yylval)[0].i = NA_REAL;
2082 break;
2083 }
2084 } else
2085 PRESERVE_SV(yylval = R_NilValue);
2086 break;
2087 case FUNCTION:
2088 case WHILE:
2089 case REPEAT:
2090 case FOR:
2091 case IF:
2092 case NEXT:
2093 case BREAK:
2094 yylval = install(s);
2095 break;
2096 case IN:
2097 case ELSE:
2098 break;
2099 case SYMBOL:
2100 PRESERVE_SV(yylval = install(s));
2101 break;
2102 }
2103 return keywords[i].token;
2104 }
2105 }
2106 return 0;
2107 }
2108
mkFloat(const char * s)2109 static SEXP mkFloat(const char *s)
2110 {
2111 return ScalarReal(R_atof(s));
2112 }
2113
mkInt(const char * s)2114 static SEXP mkInt(const char *s)
2115 {
2116 double f = R_atof(s); /* or R_strtol? */
2117 return ScalarInteger((int) f);
2118 }
2119
mkComplex(const char * s)2120 static SEXP mkComplex(const char *s)
2121 {
2122 SEXP t = R_NilValue;
2123 double f;
2124 f = R_atof(s); /* FIXME: make certain the value is legitimate. */
2125 t = allocVector(CPLXSXP, 1);
2126 COMPLEX(t)[0].r = 0;
2127 COMPLEX(t)[0].i = f;
2128 return t;
2129 }
2130
mkNA(void)2131 static SEXP mkNA(void)
2132 {
2133 SEXP t = allocVector(LGLSXP, 1);
2134 LOGICAL(t)[0] = NA_LOGICAL;
2135 return t;
2136 }
2137
2138 attribute_hidden
mkTrue(void)2139 SEXP mkTrue(void)
2140 {
2141 SEXP s = allocVector(LGLSXP, 1);
2142 LOGICAL(s)[0] = 1;
2143 return s;
2144 }
2145
mkFalse(void)2146 SEXP mkFalse(void)
2147 {
2148 SEXP s = allocVector(LGLSXP, 1);
2149 LOGICAL(s)[0] = 0;
2150 return s;
2151 }
2152
yyerror(const char * s)2153 static void yyerror(const char *s)
2154 {
2155 static const char *const yytname_translations[] =
2156 {
2157 /* the left column are strings coming from bison, the right
2158 column are translations for users.
2159 The first YYENGLISH from the right column are English to be translated,
2160 the rest are to be copied literally. The #if 0 block below allows xgettext
2161 to see these.
2162 */
2163 #define YYENGLISH 8
2164 "$undefined", "input",
2165 "END_OF_INPUT", "end of input",
2166 "ERROR", "input",
2167 "STR_CONST", "string constant",
2168 "NUM_CONST", "numeric constant",
2169 "SYMBOL", "symbol",
2170 "LEFT_ASSIGN", "assignment",
2171 "'\\n'", "end of line",
2172 "NULL_CONST", "'NULL'",
2173 "FUNCTION", "'function'",
2174 "EQ_ASSIGN", "'='",
2175 "RIGHT_ASSIGN", "'->'",
2176 "LBB", "'[['",
2177 "FOR", "'for'",
2178 "IN", "'in'",
2179 "IF", "'if'",
2180 "ELSE", "'else'",
2181 "WHILE", "'while'",
2182 "NEXT", "'next'",
2183 "BREAK", "'break'",
2184 "REPEAT", "'repeat'",
2185 "GT", "'>'",
2186 "GE", "'>='",
2187 "LT", "'<'",
2188 "LE", "'<='",
2189 "EQ", "'=='",
2190 "NE", "'!='",
2191 "AND", "'&'",
2192 "OR", "'|'",
2193 "AND2", "'&&'",
2194 "OR2", "'||'",
2195 "NS_GET", "'::'",
2196 "NS_GET_INT", "':::'",
2197 "PIPE", "'|>'",
2198 "PIPEBIND", "'=>'",
2199 0
2200 };
2201 static char const yyunexpected[] = "syntax error, unexpected ";
2202 static char const yyexpecting[] = ", expecting ";
2203 char *expecting;
2204
2205 if (!EndOfFile)
2206 /* On EndOfFile, there are no more bytes to add, but trying to do
2207 so may have non-trivial performance overhead and this can be
2208 reached also in non-error situations, e.g. from repl.
2209 */
2210 finish_mbcs_in_parse_context();
2211
2212 R_ParseError = yylloc.first_line;
2213 R_ParseErrorCol = yylloc.first_column;
2214 R_ParseErrorFile = PS_SRCFILE;
2215
2216 if (!strncmp(s, yyunexpected, sizeof yyunexpected -1)) {
2217 int i;
2218 /* Edit the error message */
2219 expecting = strstr(s + sizeof yyunexpected -1, yyexpecting);
2220 if (expecting) *expecting = '\0';
2221 for (i = 0; yytname_translations[i]; i += 2) {
2222 if (!strcmp(s + sizeof yyunexpected - 1, yytname_translations[i])) {
2223 switch(i/2)
2224 {
2225 case 0:
2226 snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected input"));
2227 break;
2228 case 1:
2229 snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected end of input"));
2230 break;
2231 case 2:
2232 snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected input"));
2233 break;
2234 case 3:
2235 snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected string constant"));
2236 break;
2237 case 4:
2238 snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected numeric constant"));
2239 break;
2240 case 5:
2241 snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected symbol"));
2242 break;
2243 case 6:
2244 snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected assignment"));
2245 break;
2246 case 7:
2247 snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected end of line"));
2248 break;
2249 default:
2250 snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected %s"),
2251 yytname_translations[i+1]);
2252 break;
2253 }
2254
2255 return;
2256 }
2257 }
2258 snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE - 1, _("unexpected %s"),
2259 s + sizeof yyunexpected - 1);
2260 } else {
2261 strncpy(R_ParseErrorMsg, s, PARSE_ERROR_SIZE - 1);
2262 R_ParseErrorMsg[PARSE_ERROR_SIZE - 1] = '\0';
2263 }
2264 }
2265
CheckFormalArgs(SEXP formlist,SEXP _new,YYLTYPE * lloc)2266 static void CheckFormalArgs(SEXP formlist, SEXP _new, YYLTYPE *lloc)
2267 {
2268 while (formlist != R_NilValue) {
2269 if (TAG(formlist) == _new) {
2270 error(_("repeated formal argument '%s' on line %d"), EncodeChar(PRINTNAME(_new)),
2271 lloc->first_line);
2272 }
2273 formlist = CDR(formlist);
2274 }
2275 }
2276
2277 /* This is used as the buffer for NumericValue, SpecialValue and
2278 SymbolValue. None of these could conceivably need 8192 bytes.
2279
2280 It has not been used as the buffer for input character strings
2281 since Oct 2007 (released as 2.7.0), and for comments since 2.8.0
2282 */
2283 static char yytext[MAXELTSIZE];
2284
SkipSpace(void)2285 static int SkipSpace(void)
2286 {
2287 int c;
2288
2289 #if defined(USE_RI18N_FNS) // includes Win32
2290 static wctype_t blankwct = 0;
2291 if (!blankwct)
2292 blankwct = Ri18n_wctype("blank");
2293 #endif
2294
2295 #ifdef Win32
2296 if(!mbcslocale) { /* 0xa0 is NBSP in all 8-bit Windows locales */
2297 while ((c = xxgetc()) == ' ' || c == '\t' || c == '\f' ||
2298 (unsigned int) c == 0xa0) ;
2299 return c;
2300 } else {
2301 int i, clen;
2302 wchar_t wc;
2303 while (1) {
2304 c = xxgetc();
2305 if (c == ' ' || c == '\t' || c == '\f') continue;
2306 if (c == '\n' || c == R_EOF) break;
2307 if ((unsigned int) c < 0x80) break;
2308 clen = mbcs_get_next(c, &wc); /* always 2 */
2309 if(! Ri18n_iswctype(wc, blankwct) ) break;
2310 for(i = 1; i < clen; i++) c = xxgetc();
2311 }
2312 return c;
2313 }
2314 #endif
2315 #if defined(__STDC_ISO_10646__)
2316 if(mbcslocale) { /* wctype functions need Unicode wchar_t */
2317 int i, clen;
2318 wchar_t wc;
2319 while (1) {
2320 c = xxgetc();
2321 if (c == ' ' || c == '\t' || c == '\f') continue;
2322 if (c == '\n' || c == R_EOF) break;
2323 if ((unsigned int) c < 0x80) break;
2324 clen = mbcs_get_next(c, &wc);
2325 #if defined(USE_RI18N_FNS)
2326 if(! Ri18n_iswctype(wc, blankwct) ) break;
2327 #else
2328 if(! iswblank(wc) ) break;
2329 #endif
2330 for(i = 1; i < clen; i++) c = xxgetc();
2331 }
2332 } else
2333 #endif
2334 // does not support non-ASCII spaces, unlike Windows
2335 while ((c = xxgetc()) == ' ' || c == '\t' || c == '\f') ;
2336 return c;
2337 }
2338
2339 /* Note that with interactive use, EOF cannot occur inside */
2340 /* a comment. However, semicolons inside comments make it */
2341 /* appear that this does happen. For this reason we use the */
2342 /* special assignment EndOfFile=2 to indicate that this is */
2343 /* going on. This is detected and dealt with in Parse1Buffer. */
2344
SkipComment(void)2345 static int SkipComment(void)
2346 {
2347 int c='#', i;
2348
2349 /* locations before the # character was read */
2350 int _first_column = ParseState.xxcolno ;
2351 int _first_parsed = ParseState.xxparseno ;
2352 int type = COMMENT ;
2353
2354 Rboolean maybeLine = (ParseState.xxcolno == 1);
2355 Rboolean doSave;
2356
2357 DECLARE_YYTEXT_BUFP(yyp);
2358
2359 if (maybeLine) {
2360 char lineDirective[] = "#line";
2361 YYTEXT_PUSH(c, yyp);
2362 for (i=1; i<5; i++) {
2363 c = xxgetc();
2364 if (c != (int)(lineDirective[i])) {
2365 maybeLine = FALSE;
2366 break;
2367 }
2368 YYTEXT_PUSH(c, yyp);
2369 }
2370 if (maybeLine)
2371 c = processLineDirective(&type);
2372 }
2373 // we want to track down the character
2374 // __before__ the new line character
2375 int _last_column = ParseState.xxcolno ;
2376 int _last_parsed = ParseState.xxparseno ;
2377
2378 if (c == '\n') {
2379 _last_column = prevcols[prevpos];
2380 _last_parsed = prevparse[prevpos];
2381 }
2382
2383 doSave = !maybeLine;
2384
2385 while (c != '\n' && c != R_EOF) {
2386 // Comments can be any length; we only record the ones that fit in yytext.
2387 if (doSave) {
2388 YYTEXT_PUSH(c, yyp);
2389 doSave = (yyp - yytext) < sizeof(yytext) - 2;
2390 }
2391 _last_column = ParseState.xxcolno ;
2392 _last_parsed = ParseState.xxparseno ;
2393 c = xxgetc();
2394 }
2395 if (c == R_EOF) EndOfFile = 2;
2396 incrementId( ) ;
2397 YYTEXT_PUSH('\0', yyp);
2398 record_( _first_parsed, _first_column, _last_parsed, _last_column,
2399 type, identifier, doSave ? yytext : 0 ) ;
2400 return c;
2401 }
2402
NumericValue(int c)2403 static int NumericValue(int c)
2404 {
2405 int seendot = (c == '.');
2406 int seenexp = 0;
2407 int last = c;
2408 int nd = 0;
2409 int asNumeric = 0;
2410 int count = 1; /* The number of characters seen */
2411
2412 DECLARE_YYTEXT_BUFP(yyp);
2413 YYTEXT_PUSH(c, yyp);
2414 /* We don't care about other than ASCII digits */
2415 while (isdigit(c = xxgetc()) || c == '.' || c == 'e' || c == 'E'
2416 || c == 'x' || c == 'X' || c == 'L')
2417 {
2418 count++;
2419 if (c == 'L') /* must be at the end. Won't allow 1Le3 (at present). */
2420 { YYTEXT_PUSH(c, yyp);
2421 break;
2422 }
2423
2424 if (c == 'x' || c == 'X') {
2425 if (count > 2 || last != '0') break; /* 0x must be first */
2426 YYTEXT_PUSH(c, yyp);
2427 while(isdigit(c = xxgetc()) || ('a' <= c && c <= 'f') ||
2428 ('A' <= c && c <= 'F') || c == '.') {
2429 if (c == '.') {
2430 if (seendot) return ERROR;
2431 seendot = 1;
2432 }
2433 YYTEXT_PUSH(c, yyp);
2434 nd++;
2435 }
2436 if (nd == 0) return ERROR;
2437 if (c == 'p' || c == 'P') {
2438 seenexp = 1;
2439 YYTEXT_PUSH(c, yyp);
2440 c = xxgetc();
2441 if (!isdigit(c) && c != '+' && c != '-') return ERROR;
2442 if (c == '+' || c == '-') {
2443 YYTEXT_PUSH(c, yyp);
2444 c = xxgetc();
2445 }
2446 for(nd = 0; isdigit(c); c = xxgetc(), nd++)
2447 YYTEXT_PUSH(c, yyp);
2448 if (nd == 0) return ERROR;
2449 }
2450 if (seendot && !seenexp) return ERROR;
2451 if (c == 'L') /* for getParseData */
2452 {
2453 // seenexp will be checked later
2454 YYTEXT_PUSH(c, yyp);
2455 break;
2456 }
2457 break;
2458 }
2459 if (c == 'E' || c == 'e') {
2460 if (seenexp)
2461 break;
2462 seenexp = 1;
2463 seendot = seendot == 1 ? seendot : 2;
2464 YYTEXT_PUSH(c, yyp);
2465 c = xxgetc();
2466 if (!isdigit(c) && c != '+' && c != '-') return ERROR;
2467 if (c == '+' || c == '-') {
2468 YYTEXT_PUSH(c, yyp);
2469 c = xxgetc();
2470 if (!isdigit(c)) return ERROR;
2471 }
2472 }
2473 if (c == '.') {
2474 if (seendot)
2475 break;
2476 seendot = 1;
2477 }
2478 YYTEXT_PUSH(c, yyp);
2479 last = c;
2480 }
2481
2482 if(c == 'i')
2483 YYTEXT_PUSH(c, yyp); /* for getParseData */
2484
2485 YYTEXT_PUSH('\0', yyp);
2486 /* Make certain that things are okay. */
2487 if(c == 'L') {
2488 double a = R_atof(yytext);
2489 int b = (int) a;
2490 /* We are asked to create an integer via the L, so we check that the
2491 double and int values are the same. If not, this is a problem and we
2492 will not lose information and so use the numeric value.
2493 */
2494 if(a != (double) b) {
2495 if(GenerateCode) {
2496 if(seendot == 1 && seenexp == 0)
2497 warning(_("integer literal %s contains decimal; using numeric value"), yytext);
2498 else {
2499 /* hide the L for the warning message */
2500 warning(_("non-integer value %s qualified with L; using numeric value"), yytext);
2501 }
2502 }
2503 asNumeric = 1;
2504 seenexp = 1;
2505 }
2506 }
2507
2508 if(c == 'i') {
2509 yylval = GenerateCode ? mkComplex(yytext) : R_NilValue;
2510 } else if(c == 'L' && asNumeric == 0) {
2511 if(GenerateCode && seendot == 1 && seenexp == 0)
2512 warning(_("integer literal %s contains unnecessary decimal point"), yytext);
2513 yylval = GenerateCode ? mkInt(yytext) : R_NilValue;
2514 #if 0 /* do this to make 123 integer not double */
2515 } else if(!(seendot || seenexp)) {
2516 if(c != 'L') xxungetc(c);
2517 if (GenerateCode) {
2518 double a = R_atof(yytext);
2519 int b = (int) a;
2520 yylval = (a != (double) b) ? mkFloat(yytext) : mkInt(yytext);
2521 } else yylval = R_NilValue;
2522 #endif
2523 } else {
2524 if(c != 'L')
2525 xxungetc(c);
2526 yylval = GenerateCode ? mkFloat(yytext) : R_NilValue;
2527 }
2528
2529 PRESERVE_SV(yylval);
2530 return NUM_CONST;
2531 }
2532
2533 /* Strings may contain the standard ANSI escapes and octal */
2534 /* specifications of the form \o, \oo or \ooo, where 'o' */
2535 /* is an octal digit. */
2536
2537 /* The buffer is reallocated on the R heap if needed; not by malloc */
2538 /* to avoid memory leak in case of R error (long jump) */
2539 #define STEXT_PUSH(c) do { \
2540 size_t nc = bp - stext; \
2541 if (nc >= nstext - 1) { \
2542 char *old = stext; \
2543 SEXP st1; \
2544 nstext *= 2; \
2545 PROTECT(st1 = allocVector(RAWSXP, nstext)); \
2546 stext = (char *)RAW(st1); \
2547 memmove(stext, old, nc); \
2548 REPROTECT(st1, sti); \
2549 UNPROTECT(1); /* st1 */ \
2550 bp = stext+nc; } \
2551 *bp++ = ((char) c); \
2552 } while(0)
2553
2554
2555 /* The idea here is that if a string contains \u escapes that are not
2556 valid in the current locale, we should switch to UTF-8 for that
2557 string. Needs Unicode wide-char support or out substitutes.
2558
2559 Defining __STDC_ISO_10646__ is done by the OS (or not) in wchar.t.
2560 Some (e.g. macOS, Solaris, FreeBSD) have Unicode wchar_t but do not
2561 define it: we override macOS and FreeBSD earlier in this file.
2562 */
2563
2564 #if defined(Win32) || defined(__STDC_ISO_10646__)
2565 typedef wchar_t ucs_t;
2566 # define mbcs_get_next2 mbcs_get_next
2567 #else
2568 typedef unsigned int ucs_t;
2569 # define WC_NOT_UNICODE
2570 // which is used to select our mbtoucs rather than system mbrtowc
mbcs_get_next2(int c,ucs_t * wc)2571 static int mbcs_get_next2(int c, ucs_t *wc)
2572 {
2573 int i, res, clen = 1; char s[9];
2574
2575 s[0] = c;
2576 /* This assumes (probably OK) that all MBCS embed ASCII as single-byte
2577 lead bytes, including control chars */
2578 if((unsigned int) c < 0x80) {
2579 *wc = (wchar_t) c;
2580 return 1;
2581 }
2582 if(utf8locale) {
2583 clen = utf8clen(c);
2584 for(i = 1; i < clen; i++) {
2585 c = xxgetc();
2586 if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno);
2587 s[i] = (char) c;
2588 }
2589 s[clen] ='\0'; /* x86 Solaris requires this */
2590 res = mbtoucs(wc, s, clen);
2591 if(res == -1) error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno);
2592 } else {
2593 /* This is not necessarily correct for stateful MBCS */
2594 while(clen <= R_MB_CUR_MAX) {
2595 res = mbtoucs(wc, s, clen);
2596 if(res >= 0) break;
2597 if(res == -1)
2598 error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno);
2599 /* so res == -2 */
2600 c = xxgetc();
2601 if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno);
2602 s[clen++] = c;
2603 } /* we've tried enough, so must be complete or invalid by now */
2604 }
2605 for(i = clen - 1; i > 0; i--) xxungetc(s[i]);
2606 return clen;
2607 }
2608 #endif
2609
2610 #define WTEXT_PUSH(c) do { if(wcnt < 10000) wcs[wcnt++] = c; } while(0)
2611
mkStringUTF8(const ucs_t * wcs,int cnt)2612 static SEXP mkStringUTF8(const ucs_t *wcs, int cnt)
2613 {
2614 SEXP t;
2615 int nb;
2616
2617 /* NB: cnt includes the terminator */
2618 #ifdef Win32
2619 nb = cnt*4; /* UCS-2/UTF-16 so max 4 bytes per wchar_t */
2620 #else
2621 nb = cnt*6;
2622 #endif
2623 R_CheckStack2(nb);
2624 char s[nb];
2625 memset(s, 0, nb); /* safety */
2626 // This used to differentiate WC_NOT_UNICODE but not needed
2627 wcstoutf8(s, (const wchar_t *)wcs, sizeof(s));
2628 PROTECT(t = allocVector(STRSXP, 1));
2629 SET_STRING_ELT(t, 0, mkCharCE(s, CE_UTF8));
2630 UNPROTECT(1); /* t */
2631 return t;
2632 }
2633
2634 #define CTEXT_PUSH(c) do { \
2635 if (ct - currtext >= 1000) { \
2636 memmove(currtext, currtext+100, 901); memmove(currtext, "... ", 4); ct -= 100; \
2637 currtext_truncated = TRUE; \
2638 } \
2639 *ct++ = ((char) c); \
2640 } while(0)
2641 #define CTEXT_POP() ct--
2642
2643
2644 /* forSymbol is true when parsing backticked symbols */
StringValue(int c,Rboolean forSymbol)2645 static int StringValue(int c, Rboolean forSymbol)
2646 {
2647 int quote = c;
2648 char currtext[1010], *ct = currtext;
2649 char st0[MAXELTSIZE];
2650 unsigned int nstext = MAXELTSIZE;
2651 char *stext = st0, *bp = st0;
2652 PROTECT_INDEX sti;
2653 int wcnt = 0;
2654 ucs_t wcs[10001];
2655 Rboolean oct_or_hex = FALSE, use_wcs = FALSE, currtext_truncated = FALSE;
2656
2657 PROTECT_WITH_INDEX(R_NilValue, &sti);
2658 CTEXT_PUSH(c);
2659 while ((c = xxgetc()) != R_EOF && c != quote) {
2660 CTEXT_PUSH(c);
2661 if (c == '\n') {
2662 xxungetc(c); CTEXT_POP();
2663 /* Fix suggested by Mark Bravington to allow multiline strings
2664 * by pretending we've seen a backslash. Was:
2665 * return ERROR;
2666 */
2667 c = '\\';
2668 }
2669 if (c == '\\') {
2670 c = xxgetc(); CTEXT_PUSH(c);
2671 if ('0' <= c && c <= '7') {
2672 int octal = c - '0';
2673 if ('0' <= (c = xxgetc()) && c <= '7') {
2674 CTEXT_PUSH(c);
2675 octal = 8 * octal + c - '0';
2676 if ('0' <= (c = xxgetc()) && c <= '7') {
2677 CTEXT_PUSH(c);
2678 octal = 8 * octal + c - '0';
2679 } else {
2680 xxungetc(c);
2681 CTEXT_POP();
2682 }
2683 } else {
2684 xxungetc(c);
2685 CTEXT_POP();
2686 }
2687 if (!octal)
2688 error(_("nul character not allowed (line %d)"), ParseState.xxlineno);
2689 c = octal;
2690 oct_or_hex = TRUE;
2691 }
2692 else if(c == 'x') {
2693 int val = 0; int i, ext;
2694 for(i = 0; i < 2; i++) {
2695 c = xxgetc(); CTEXT_PUSH(c);
2696 if(c >= '0' && c <= '9') ext = c - '0';
2697 else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10;
2698 else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10;
2699 else {
2700 xxungetc(c);
2701 CTEXT_POP();
2702 if (i == 0) { /* was just \x */
2703 *ct = '\0';
2704 errorcall(R_NilValue, _("'\\x' used without hex digits in character string starting \"%s\""), currtext);
2705 }
2706 break;
2707 }
2708 val = 16*val + ext;
2709 }
2710 if (!val)
2711 error(_("nul character not allowed (line %d)"), ParseState.xxlineno);
2712 c = val;
2713 oct_or_hex = TRUE;
2714 }
2715 else if(c == 'u') {
2716 unsigned int val = 0; int i, ext;
2717 Rboolean delim = FALSE;
2718
2719 if(forSymbol)
2720 error(_("\\uxxxx sequences not supported inside backticks (line %d)"), ParseState.xxlineno);
2721 if((c = xxgetc()) == '{') {
2722 delim = TRUE;
2723 CTEXT_PUSH(c);
2724 } else xxungetc(c);
2725 for(i = 0; i < 4; i++) {
2726 c = xxgetc(); CTEXT_PUSH(c);
2727 if(c >= '0' && c <= '9') ext = c - '0';
2728 else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10;
2729 else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10;
2730 else {
2731 xxungetc(c);
2732 CTEXT_POP();
2733 if (i == 0) { /* was just \u */
2734 *ct = '\0';
2735 errorcall(R_NilValue, _("'\\u' used without hex digits in character string starting \"%s\""), currtext);
2736 }
2737 break;
2738 }
2739 val = 16*val + ext;
2740 }
2741 if(delim) {
2742 if((c = xxgetc()) != '}')
2743 error(_("invalid \\u{xxxx} sequence (line %d)"),
2744 ParseState.xxlineno);
2745 else CTEXT_PUSH(c);
2746 }
2747 if (!val)
2748 error(_("nul character not allowed (line %d)"), ParseState.xxlineno);
2749 WTEXT_PUSH(val); /* this assumes wchar_t is Unicode */
2750 use_wcs = TRUE;
2751 continue;
2752 }
2753 else if(c == 'U') {
2754 unsigned int val = 0; int i, ext;
2755 Rboolean delim = FALSE;
2756 if(forSymbol)
2757 error(_("\\Uxxxxxxxx sequences not supported inside backticks (line %d)"), ParseState.xxlineno);
2758 if((c = xxgetc()) == '{') {
2759 delim = TRUE;
2760 CTEXT_PUSH(c);
2761 } else xxungetc(c);
2762 for(i = 0; i < 8; i++) {
2763 c = xxgetc(); CTEXT_PUSH(c);
2764 if(c >= '0' && c <= '9') ext = c - '0';
2765 else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10;
2766 else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10;
2767 else {
2768 xxungetc(c);
2769 CTEXT_POP();
2770 if (i == 0) { /* was just \U */
2771 *ct = '\0';
2772 errorcall(R_NilValue, _("'\\U' used without hex digits in character string starting \"%s\""), currtext);
2773 }
2774 break;
2775 }
2776 val = 16*val + ext;
2777 }
2778 if(delim) {
2779 if((c = xxgetc()) != '}')
2780 error(_("invalid \\U{xxxxxxxx} sequence (line %d)"),
2781 ParseState.xxlineno);
2782 else CTEXT_PUSH(c);
2783 }
2784 if (!val)
2785 error(_("nul character not allowed (line %d)"),
2786 ParseState.xxlineno);
2787 if (val > 0x10FFFF) {
2788 if(delim)
2789 error(_("invalid \\U{xxxxxxxx} value %6x (line %d)"),
2790 val, ParseState.xxlineno);
2791 else
2792 error(_("invalid \\Uxxxxxxxx value %6x (line %d)"),
2793 val, ParseState.xxlineno);
2794 }
2795 #ifdef Win32
2796 if (0x010000 <= val && val <= 0x10FFFF) { /* Need surrogate pair in Windows */
2797 val = val - 0x010000;
2798 WTEXT_PUSH( 0xD800 | (val >> 10) );
2799 val = 0xDC00 | (val & 0x03FF);
2800 }
2801 #endif
2802 WTEXT_PUSH(val);
2803 use_wcs = TRUE;
2804 continue;
2805 }
2806 else {
2807 switch (c) {
2808 case 'a':
2809 c = '\a';
2810 break;
2811 case 'b':
2812 c = '\b';
2813 break;
2814 case 'f':
2815 c = '\f';
2816 break;
2817 case 'n':
2818 c = '\n';
2819 break;
2820 case 'r':
2821 c = '\r';
2822 break;
2823 case 't':
2824 c = '\t';
2825 break;
2826 case 'v':
2827 c = '\v';
2828 break;
2829 case '\\':
2830 c = '\\';
2831 break;
2832 case '"':
2833 case '\'':
2834 case '`':
2835 case ' ':
2836 case '\n':
2837 break;
2838 default:
2839 *ct = '\0';
2840 errorcall(R_NilValue, _("'\\%c' is an unrecognized escape in character string starting \"%s\""), c, currtext);
2841 }
2842 }
2843 } else if(mbcslocale) {
2844 int i, clen;
2845 ucs_t wc;
2846 clen = mbcs_get_next2(c, &wc);
2847 WTEXT_PUSH(wc);
2848 ParseState.xxbyteno += clen-1;
2849
2850 for(i = 0; i < clen - 1; i++){
2851 STEXT_PUSH(c);
2852 c = xxgetc();
2853 if (c == R_EOF) break;
2854 CTEXT_PUSH(c);
2855 if (c == '\n') {
2856 xxungetc(c); CTEXT_POP();
2857 c = '\\';
2858 }
2859 }
2860 if (c == R_EOF) break;
2861 STEXT_PUSH(c);
2862 continue;
2863 }
2864 STEXT_PUSH(c);
2865 if ((unsigned int) c < 0x80) WTEXT_PUSH(c);
2866 else { /* have an 8-bit char in the current encoding */
2867 #ifdef WC_NOT_UNICODE
2868 ucs_t wc;
2869 char s[2] = " ";
2870 s[0] = (char) c;
2871 mbtoucs(&wc, s, 2);
2872 #else
2873 wchar_t wc;
2874 char s[2] = " ";
2875 s[0] = (char) c;
2876 mbrtowc(&wc, s, 2, NULL);
2877 #endif
2878 WTEXT_PUSH(wc);
2879 }
2880 }
2881 STEXT_PUSH('\0');
2882 WTEXT_PUSH(0);
2883 yytext[0] = '\0';
2884 if (c == R_EOF) {
2885 PRESERVE_SV(yylval = R_NilValue);
2886 UNPROTECT(1); /* release stext */
2887 return INCOMPLETE_STRING;
2888 } else {
2889 CTEXT_PUSH(c);
2890 CTEXT_PUSH('\0');
2891 }
2892 if (!currtext_truncated)
2893 strcpy(yytext, currtext);
2894 else if (forSymbol || !use_wcs) {
2895 size_t total = strlen(stext);
2896 snprintf(yytext, MAXELTSIZE, "[%u chars quoted with '%c']", (unsigned int)total, quote);
2897 } else
2898 snprintf(yytext, MAXELTSIZE, "[%d wide chars quoted with '%c']", wcnt, quote);
2899 if(forSymbol) {
2900 PRESERVE_SV(yylval = install(stext));
2901 UNPROTECT(1); /* release stext */
2902 return SYMBOL;
2903 } else {
2904 if(use_wcs) {
2905 if(oct_or_hex)
2906 error(_("mixing Unicode and octal/hex escapes in a string is not allowed"));
2907 if(wcnt < 10000)
2908 PRESERVE_SV(yylval = mkStringUTF8(wcs, wcnt)); /* include terminator */
2909 else
2910 error(_("string at line %d containing Unicode escapes not in this locale\nis too long (max 10000 chars)"), ParseState.xxlineno);
2911 } else
2912 PRESERVE_SV(yylval = mkString2(stext, bp - stext - 1, oct_or_hex));
2913 UNPROTECT(1); /* release stext */
2914 return STR_CONST;
2915 }
2916 }
2917
RawStringValue(int c0,int c)2918 static int RawStringValue(int c0, int c)
2919 {
2920 int quote = c;
2921 int delim = ')';
2922 char currtext[1010], *ct = currtext;
2923 char st0[MAXELTSIZE];
2924 unsigned int nstext = MAXELTSIZE;
2925 char *stext = st0, *bp = st0;
2926 PROTECT_INDEX sti;
2927 int wcnt = 0;
2928 ucs_t wcs[10001];
2929 Rboolean oct_or_hex = FALSE, use_wcs = FALSE, currtext_truncated = FALSE;
2930
2931 CTEXT_PUSH(c0); /* 'r' or 'R' */
2932 CTEXT_PUSH(c); /* opening quote */
2933
2934 /* count dashes between the opening quote and opening delimiter */
2935 int ndash = 0;
2936 while (nextchar('-')) { CTEXT_PUSH('-'); ndash++; }
2937
2938 c = xxgetc();
2939 CTEXT_PUSH(c);
2940 switch(c) {
2941 case '(': delim = ')'; break;
2942 case '[': delim = ']'; break;
2943 case '{': delim = '}'; break;
2944 case '|': delim = '|'; break;
2945 default:
2946 error(_("malformed raw string literal at line %d"),
2947 ParseState.xxlineno);
2948 }
2949
2950 PROTECT_WITH_INDEX(R_NilValue, &sti);
2951 while ((c = xxgetc()) != R_EOF) {
2952 if (c == delim) {
2953 /* count the dashes after the closing delimiter */
2954 int nd = 0;
2955 while (nd < ndash && nextchar('-')) nd++;
2956
2957 if (nd == ndash && nextchar(quote))
2958 /* right number of dashes, right quote: were done! */
2959 break;
2960 else {
2961 /* not done: emit closing delimiter, dashes, and continue */
2962 CTEXT_PUSH(delim);
2963 STEXT_PUSH(delim);
2964 WTEXT_PUSH(delim);
2965 for (int i = 0; i < nd; i++) {
2966 CTEXT_PUSH('-');
2967 STEXT_PUSH('-');
2968 WTEXT_PUSH('-');
2969 }
2970 continue;
2971 }
2972 }
2973 CTEXT_PUSH(c);
2974 if(mbcslocale) {
2975 int i, clen;
2976 ucs_t wc;
2977 clen = mbcs_get_next2(c, &wc);
2978 WTEXT_PUSH(wc);
2979 ParseState.xxbyteno += clen-1;
2980
2981 for(i = 0; i < clen - 1; i++){
2982 STEXT_PUSH(c);
2983 c = xxgetc();
2984 if (c == R_EOF) break;
2985 CTEXT_PUSH(c);
2986 }
2987 if (c == R_EOF) break;
2988 STEXT_PUSH(c);
2989 continue;
2990 }
2991 STEXT_PUSH(c);
2992 if ((unsigned int) c < 0x80) WTEXT_PUSH(c);
2993 else { /* have an 8-bit char in the current encoding */
2994 #ifdef WC_NOT_UNICODE
2995 ucs_t wc;
2996 char s[2] = " ";
2997 s[0] = (char) c;
2998 mbtoucs(&wc, s, 2);
2999 #else
3000 wchar_t wc;
3001 char s[2] = " ";
3002 s[0] = (char) c;
3003 mbrtowc(&wc, s, 2, NULL);
3004 #endif
3005 WTEXT_PUSH(wc);
3006 }
3007 }
3008 STEXT_PUSH('\0');
3009 WTEXT_PUSH(0);
3010 yytext[0] = '\0';
3011 if (c == R_EOF) {
3012 PRESERVE_SV(yylval = R_NilValue);
3013 UNPROTECT(1); /* release stext */
3014 return INCOMPLETE_STRING;
3015 } else {
3016 /* record delim, dashes, and quote, and terminate string */
3017 CTEXT_PUSH(delim);
3018 for (int i = 0; i < ndash; i++)
3019 CTEXT_PUSH('-');
3020 CTEXT_PUSH(quote);
3021 CTEXT_PUSH('\0');
3022 }
3023 if (!currtext_truncated)
3024 strcpy(yytext, currtext);
3025 else if (!use_wcs) {
3026 size_t total = strlen(stext);
3027 snprintf(yytext, MAXELTSIZE, "[%u chars quoted with '%c']", (unsigned int)total, quote);
3028 } else
3029 snprintf(yytext, MAXELTSIZE, "[%d wide chars quoted with '%c']", wcnt, quote);
3030 if(use_wcs) {
3031 if(oct_or_hex)
3032 error(_("mixing Unicode and octal/hex escapes in a string is not allowed"));
3033 if(wcnt < 10000)
3034 PRESERVE_SV(yylval = mkStringUTF8(wcs, wcnt)); /* include terminator */
3035 else
3036 error(_("string at line %d containing Unicode escapes not in this locale\nis too long (max 10000 chars)"), ParseState.xxlineno);
3037 } else
3038 PRESERVE_SV(yylval = mkString2(stext, bp - stext - 1, oct_or_hex));
3039 UNPROTECT(1); /* release stext */
3040 return STR_CONST;
3041 }
3042
SpecialValue(int c)3043 static int SpecialValue(int c)
3044 {
3045 DECLARE_YYTEXT_BUFP(yyp);
3046 YYTEXT_PUSH(c, yyp);
3047 while ((c = xxgetc()) != R_EOF && c != '%') {
3048 if (c == '\n') {
3049 xxungetc(c);
3050 return ERROR;
3051 }
3052 YYTEXT_PUSH(c, yyp);
3053 }
3054 if (c == '%')
3055 YYTEXT_PUSH(c, yyp);
3056 YYTEXT_PUSH('\0', yyp);
3057 yylval = install(yytext);
3058 return SPECIAL;
3059 }
3060
3061 /* return 1 if name is a valid name 0 otherwise */
3062 attribute_hidden
isValidName(const char * name)3063 int isValidName(const char *name)
3064 {
3065 const char *p = name;
3066 int i;
3067
3068 if(mbcslocale) {
3069 /* the only way to establish which chars are alpha etc is to
3070 use the wchar variants */
3071 size_t n = strlen(name), used;
3072 wchar_t wc;
3073 used = Mbrtowc(&wc, p, n, NULL); p += used; n -= used;
3074 if(used == 0) return 0;
3075 if (wc != L'.' && !iswalpha(wc) ) return 0;
3076 if (wc == L'.') {
3077 /* We don't care about other than ASCII digits */
3078 if(isdigit(0xff & (int)*p)) return 0;
3079 /* Mbrtowc(&wc, p, n, NULL); if(iswdigit(wc)) return 0; */
3080 }
3081 while((used = Mbrtowc(&wc, p, n, NULL))) {
3082 if (!(iswalnum(wc) || wc == L'.' || wc == L'_')) break;
3083 p += used; n -= used;
3084 }
3085 if (*p != '\0') return 0;
3086 } else {
3087 int c = 0xff & *p++;
3088 if (c != '.' && !isalpha(c) ) return 0;
3089 if (c == '.' && isdigit(0xff & (int)*p)) return 0;
3090 while ( c = 0xff & *p++, (isalnum(c) || c == '.' || c == '_') ) ;
3091 if (c != '\0') return 0;
3092 }
3093
3094 if (strcmp(name, "...") == 0) return 1;
3095
3096 for (i = 0; keywords[i].name != NULL; i++)
3097 if (strcmp(keywords[i].name, name) == 0) return 0;
3098
3099 return 1;
3100 }
3101
3102
SymbolValue(int c)3103 static int SymbolValue(int c)
3104 {
3105 int kw;
3106 DECLARE_YYTEXT_BUFP(yyp);
3107 if(mbcslocale) {
3108 // FIXME potentially need R_wchar_t with UTF-8 Windows.
3109 wchar_t wc; int i, clen;
3110 clen = mbcs_get_next(c, &wc);
3111 while(1) {
3112 /* at this point we have seen one char, so push its bytes
3113 and get one more */
3114 for(i = 0; i < clen; i++) {
3115 YYTEXT_PUSH(c, yyp);
3116 c = xxgetc();
3117 }
3118 if(c == R_EOF) break;
3119 if(c == '.' || c == '_') {
3120 clen = 1;
3121 continue;
3122 }
3123 clen = mbcs_get_next(c, &wc);
3124 if(!iswalnum(wc)) break;
3125 }
3126 } else
3127 do {
3128 YYTEXT_PUSH(c, yyp);
3129 } while ((c = xxgetc()) != R_EOF &&
3130 (isalnum(c) || c == '.' || c == '_'));
3131 xxungetc(c);
3132 YYTEXT_PUSH('\0', yyp);
3133 if ((kw = KeywordLookup(yytext)))
3134 return kw;
3135
3136 PRESERVE_SV(yylval = install(yytext));
3137 return SYMBOL;
3138 }
3139
setParseFilename(SEXP newname)3140 static void setParseFilename(SEXP newname) {
3141 SEXP class;
3142
3143 if (isEnvironment(PS_SRCFILE)) {
3144 SEXP oldname = findVar(install("filename"), PS_SRCFILE);
3145 if (isString(oldname) && length(oldname) > 0 &&
3146 strcmp(CHAR(STRING_ELT(oldname, 0)),
3147 CHAR(STRING_ELT(newname, 0))) == 0) return;
3148 PS_SET_SRCFILE(NewEnvironment(R_NilValue, R_NilValue, R_EmptyEnv));
3149 defineVar(install("filename"), newname, PS_SRCFILE);
3150 defineVar(install("original"), PS_ORIGINAL, PS_SRCFILE);
3151
3152 PROTECT(class = allocVector(STRSXP, 2));
3153 SET_STRING_ELT(class, 0, mkChar("srcfilealias"));
3154 SET_STRING_ELT(class, 1, mkChar("srcfile"));
3155 setAttrib(PS_SRCFILE, R_ClassSymbol, class);
3156 UNPROTECT(1); /* class */
3157 } else
3158 PS_SET_SRCFILE(duplicate(newname));
3159 RELEASE_SV(newname);
3160 }
3161
processLineDirective(int * type)3162 static int processLineDirective(int *type)
3163 {
3164 int c, tok, linenumber;
3165 c = SkipSpace();
3166 if (!isdigit(c)) return(c);
3167 tok = NumericValue(c);
3168 linenumber = atoi(yytext);
3169 c = SkipSpace();
3170 if (c == '"')
3171 tok = StringValue(c, FALSE);
3172 else
3173 xxungetc(c);
3174 if (tok == STR_CONST)
3175 setParseFilename(yylval);
3176 while ((c = xxgetc()) != '\n' && c != R_EOF) /* skip */ ;
3177 ParseState.xxlineno = linenumber;
3178 *type = LINE_DIRECTIVE;
3179 /* we don't change xxparseno here: it counts parsed lines, not official lines */
3180 R_ParseContext[R_ParseContextLast] = '\0'; /* Context report shouldn't show the directive */
3181 return(c);
3182 }
3183
3184 /* Get the R symbol, and set yytext at the same time */
install_and_save(char * text)3185 static SEXP install_and_save(char * text)
3186 {
3187 strcpy(yytext, text);
3188 return install(text);
3189 }
3190
3191 /* Get an R symbol, and set different yytext. Used for translation of -> to <-. ->> to <<- */
install_and_save2(char * text,char * savetext)3192 static SEXP install_and_save2(char * text, char * savetext)
3193 {
3194 strcpy(yytext, savetext);
3195 return install(text);
3196 }
3197
3198
3199 /* Split the input stream into tokens. */
3200 /* This is the lowest of the parsing levels. */
3201
token(void)3202 static int token(void)
3203 {
3204 int c;
3205 wchar_t wc;
3206
3207 if (SavedToken) {
3208 c = SavedToken;
3209 yylval = SavedLval;
3210 SavedLval = R_NilValue;
3211 SavedToken = 0;
3212 yylloc.first_line = xxlinesave;
3213 yylloc.first_column = xxcolsave;
3214 yylloc.first_byte = xxbytesave;
3215 yylloc.first_parsed = xxparsesave;
3216 return c;
3217 }
3218 xxcharsave = xxcharcount; /* want to be able to go back one token */
3219
3220 c = SkipSpace();
3221 if (c == '#') c = SkipComment();
3222
3223 yylloc.first_line = ParseState.xxlineno;
3224 yylloc.first_column = ParseState.xxcolno;
3225 yylloc.first_byte = ParseState.xxbyteno;
3226 yylloc.first_parsed = ParseState.xxparseno;
3227
3228 if (c == R_EOF) return END_OF_INPUT;
3229
3230 /* Either digits or symbols can start with a "." */
3231 /* so we need to decide which it is and jump to */
3232 /* the correct spot. */
3233
3234 if (c == '.' && typeofnext() >= 2) goto symbol;
3235
3236 /* literal numbers */
3237
3238 if (c == '.') return NumericValue(c);
3239 /* We don't care about other than ASCII digits */
3240 if (isdigit(c)) return NumericValue(c);
3241
3242 /* raw string literal */
3243
3244 if (c == 'r' || c == 'R') {
3245 if (nextchar('"'))
3246 return RawStringValue(c, '"');
3247 else if (nextchar('\''))
3248 return RawStringValue(c, '\'');
3249 }
3250
3251 /* literal strings */
3252
3253 if (c == '\"' || c == '\'')
3254 return StringValue(c, FALSE);
3255
3256 /* special functions */
3257
3258 if (c == '%')
3259 return SpecialValue(c);
3260
3261 /* functions, constants and variables */
3262
3263 if (c == '`')
3264 return StringValue(c, TRUE);
3265 symbol:
3266
3267 if (c == '.') return SymbolValue(c);
3268 if(mbcslocale) {
3269 // FIXME potentially need R_wchar_t with UTF-8 Windows.
3270 mbcs_get_next(c, &wc);
3271 if (iswalpha(wc)) return SymbolValue(c);
3272 } else
3273 if (isalpha(c)) return SymbolValue(c);
3274
3275 /* compound tokens */
3276
3277 switch (c) {
3278 case '<':
3279 if (nextchar('=')) {
3280 yylval = install_and_save("<=");
3281 return LE;
3282 }
3283 if (nextchar('-')) {
3284 yylval = install_and_save("<-");
3285 return LEFT_ASSIGN;
3286 }
3287 if (nextchar('<')) {
3288 if (nextchar('-')) {
3289 yylval = install_and_save("<<-");
3290 return LEFT_ASSIGN;
3291 }
3292 else
3293 return ERROR;
3294 }
3295 yylval = install_and_save("<");
3296 return LT;
3297 case '-':
3298 if (nextchar('>')) {
3299 if (nextchar('>')) {
3300 yylval = install_and_save2("<<-", "->>");
3301 return RIGHT_ASSIGN;
3302 }
3303 else {
3304 yylval = install_and_save2("<-", "->");
3305 return RIGHT_ASSIGN;
3306 }
3307 }
3308 yylval = install_and_save("-");
3309 return '-';
3310 case '>':
3311 if (nextchar('=')) {
3312 yylval = install_and_save(">=");
3313 return GE;
3314 }
3315 yylval = install_and_save(">");
3316 return GT;
3317 case '!':
3318 if (nextchar('=')) {
3319 yylval = install_and_save("!=");
3320 return NE;
3321 }
3322 yylval = install_and_save("!");
3323 return '!';
3324 case '=':
3325 if (nextchar('=')) {
3326 yylval = install_and_save("==");
3327 return EQ;
3328 }
3329 else if (nextchar('>')) {
3330 yylval = install_and_save("=>");
3331 return PIPEBIND;
3332 }
3333 yylval = install_and_save("=");
3334 return EQ_ASSIGN;
3335 case ':':
3336 if (nextchar(':')) {
3337 if (nextchar(':')) {
3338 yylval = install_and_save(":::");
3339 return NS_GET_INT;
3340 }
3341 else {
3342 yylval = install_and_save("::");
3343 return NS_GET;
3344 }
3345 }
3346 if (nextchar('=')) {
3347 yylval = install_and_save(":=");
3348 return LEFT_ASSIGN;
3349 }
3350 yylval = install_and_save(":");
3351 return ':';
3352 case '&':
3353 if (nextchar('&')) {
3354 yylval = install_and_save("&&");
3355 return AND2;
3356 }
3357 yylval = install_and_save("&");
3358 return AND;
3359 case '|':
3360 if (nextchar('|')) {
3361 yylval = install_and_save("||");
3362 return OR2;
3363 }
3364 else if (nextchar('>')) {
3365 yylval = install_and_save("|>");
3366 return PIPE;
3367 }
3368 yylval = install_and_save("|");
3369 return OR;
3370 case LBRACE:
3371 yylval = install_and_save("{");
3372 return c;
3373 case RBRACE:
3374 strcpy(yytext, "}");
3375 return c;
3376 case '(':
3377 yylval = install_and_save("(");
3378 return c;
3379 case ')':
3380 strcpy(yytext, ")");
3381 return c;
3382 case '[':
3383 if (nextchar('[')) {
3384 yylval = install_and_save("[[");
3385 return LBB;
3386 }
3387 yylval = install_and_save("[");
3388 return c;
3389 case ']':
3390 strcpy(yytext, "]");
3391 return c;
3392 case '?':
3393 yylval = install_and_save("?");
3394 return c;
3395 case '*':
3396 /* Replace ** by ^. This has been here since 1998, but is
3397 undocumented (at least in the obvious places). It is in
3398 the index of the Blue Book with a reference to p. 431, the
3399 help for 'Deprecated'. S-PLUS 6.2 still allowed this, so
3400 presumably it was for compatibility with S. */
3401 if (nextchar('*')) {
3402 yylval = install_and_save2("^", "**");
3403 return '^';
3404 } else
3405 yylval = install_and_save("*");
3406 return c;
3407 case '+':
3408 case '/':
3409 case '^':
3410 case '~':
3411 case '$':
3412 case '@':
3413 case '\\':
3414 yytext[0] = (char) c;
3415 yytext[1] = '\0';
3416 yylval = install(yytext);
3417 return c;
3418 default:
3419 yytext[0] = (char) c;
3420 yytext[1] = '\0';
3421 return c;
3422 }
3423 }
3424
3425 /**
3426 * Sets the first elements of the yyloc structure with current
3427 * information
3428 */
setfirstloc(void)3429 static void setfirstloc(void)
3430 {
3431 yylloc.first_line = ParseState.xxlineno;
3432 yylloc.first_column = ParseState.xxcolno;
3433 yylloc.first_byte = ParseState.xxbyteno;
3434 yylloc.first_parsed = ParseState.xxparseno;
3435 }
3436
setlastloc(void)3437 static void setlastloc(void)
3438 {
3439 yylloc.last_line = ParseState.xxlineno;
3440 yylloc.last_column = ParseState.xxcolno;
3441 yylloc.last_byte = ParseState.xxbyteno;
3442 yylloc.last_parsed = ParseState.xxparseno;
3443 }
3444
3445 /**
3446 * Wrap around the token function. Returns the same result
3447 * but increments the identifier, after a call to token_,
3448 * the identifier variable contains the id of the token
3449 * just returned
3450 *
3451 * @return the same as token
3452 */
3453
token_(void)3454 static int token_(void){
3455 // capture the position before retrieving the token
3456 setfirstloc( ) ;
3457
3458 // get the token
3459 int res = token( ) ;
3460
3461 // capture the position after
3462 int _last_col = ParseState.xxcolno ;
3463 int _last_parsed = ParseState.xxparseno ;
3464
3465 _current_token = res ;
3466 incrementId( ) ;
3467 yylloc.id = identifier ;
3468
3469 // record the position
3470 if( res != '\n' && res != END_OF_INPUT)
3471 record_( yylloc.first_parsed, yylloc.first_column,
3472 _last_parsed, _last_col,
3473 res, identifier, yytext );
3474
3475 return res;
3476 }
3477
3478
yylex(void)3479 static int yylex(void)
3480 {
3481 int tok;
3482
3483 again:
3484
3485 tok = token_();
3486
3487 /* Newlines must be handled in a context */
3488 /* sensitive way. The following block of */
3489 /* deals directly with newlines in the */
3490 /* body of "if" statements. */
3491
3492 if (tok == '\n') {
3493
3494 if (EatLines || *contextp == '[' || *contextp == '(')
3495 goto again;
3496
3497 /* The essence of this is that in the body of */
3498 /* an "if", any newline must be checked to */
3499 /* see if it is followed by an "else". */
3500 /* such newlines are discarded. */
3501
3502 if (*contextp == 'i') {
3503
3504 /* Find the next non-newline token */
3505
3506 while(tok == '\n')
3507 tok = token_();
3508
3509 /* If we encounter "}", ")" or "]" then */
3510 /* we know that all immediately preceding */
3511 /* "if" bodies have been terminated. */
3512 /* The corresponding "i" values are */
3513 /* popped off the context stack. */
3514
3515 if (tok == RBRACE || tok == ')' || tok == ']' ) {
3516 while (*contextp == 'i')
3517 ifpop();
3518 *contextp-- = 0;
3519 setlastloc();
3520 return tok;
3521 }
3522
3523 /* When a "," is encountered, it terminates */
3524 /* just the immediately preceding "if" body */
3525 /* so we pop just a single "i" of the */
3526 /* context stack. */
3527
3528 if (tok == ',') {
3529 ifpop();
3530 setlastloc();
3531 return tok;
3532 }
3533
3534 /* Tricky! If we find an "else" we must */
3535 /* ignore the preceding newline. Any other */
3536 /* token means that we must return the newline */
3537 /* to terminate the "if" and "push back" that */
3538 /* token so that we will obtain it on the next */
3539 /* call to token. In either case sensitivity */
3540 /* is lost, so we pop the "i" from the context */
3541 /* stack. */
3542
3543 if(tok == ELSE) {
3544 EatLines = 1;
3545 ifpop();
3546 setlastloc();
3547 return ELSE;
3548 }
3549 else {
3550 ifpop();
3551 SavedToken = tok;
3552 xxlinesave = yylloc.first_line;
3553 xxcolsave = yylloc.first_column;
3554 xxbytesave = yylloc.first_byte;
3555 xxparsesave = yylloc.first_parsed;
3556 SavedLval = yylval;
3557 setlastloc();
3558 if (ParseState.keepSrcRefs && ParseState.keepParseData &&
3559 yytext[0])
3560
3561 /* unrecord the pushed back token if not null */
3562 ParseState.data_count--;
3563 return '\n';
3564 }
3565 }
3566 else {
3567 setlastloc();
3568 return '\n';
3569 }
3570 }
3571
3572 /* Additional context sensitivities */
3573
3574 switch(tok) {
3575
3576 /* Any newlines immediately following the */
3577 /* the following tokens are discarded. The */
3578 /* expressions are clearly incomplete. */
3579
3580 case '+':
3581 case '-':
3582 case '*':
3583 case '/':
3584 case '^':
3585 case LT:
3586 case LE:
3587 case GE:
3588 case GT:
3589 case EQ:
3590 case NE:
3591 case OR:
3592 case AND:
3593 case OR2:
3594 case AND2:
3595 case PIPE:
3596 case PIPEBIND:
3597 case SPECIAL:
3598 case FUNCTION:
3599 case WHILE:
3600 case REPEAT:
3601 case FOR:
3602 case IN:
3603 case '?':
3604 case '!':
3605 case '=':
3606 case ':':
3607 case '~':
3608 case '$':
3609 case '@':
3610 case LEFT_ASSIGN:
3611 case RIGHT_ASSIGN:
3612 case EQ_ASSIGN:
3613 EatLines = 1;
3614 break;
3615
3616 /* Push any "if" statements found and */
3617 /* discard any immediately following newlines. */
3618
3619 case IF:
3620 IfPush();
3621 EatLines = 1;
3622 break;
3623
3624 /* Terminate any immediately preceding "if" */
3625 /* statements and discard any immediately */
3626 /* following newlines. */
3627
3628 case ELSE:
3629 ifpop();
3630 EatLines = 1;
3631 break;
3632
3633 /* These tokens terminate any immediately */
3634 /* preceding "if" statements. */
3635
3636 case ';':
3637 case ',':
3638 ifpop();
3639 break;
3640
3641 /* Any newlines following these tokens can */
3642 /* indicate the end of an expression. */
3643
3644 case SYMBOL:
3645 case STR_CONST:
3646 case NUM_CONST:
3647 case NULL_CONST:
3648 case NEXT:
3649 case BREAK:
3650 EatLines = 0;
3651 break;
3652
3653 /* Handle brackets, braces and parentheses */
3654
3655 case LBB:
3656 if(contextp - contextstack >= CONTEXTSTACK_SIZE - 1)
3657 error(_("contextstack overflow at line %d"), ParseState.xxlineno);
3658 *++contextp = '[';
3659 *++contextp = '[';
3660 break;
3661
3662 case '[':
3663 if(contextp - contextstack >= CONTEXTSTACK_SIZE)
3664 error(_("contextstack overflow at line %d"), ParseState.xxlineno);
3665 *++contextp = (char) tok;
3666 break;
3667
3668 case LBRACE:
3669 if(contextp - contextstack >= CONTEXTSTACK_SIZE)
3670 error(_("contextstack overflow at line %d"), ParseState.xxlineno);
3671 *++contextp = (char) tok;
3672 EatLines = 1;
3673 break;
3674
3675 case '(':
3676 if(contextp - contextstack >= CONTEXTSTACK_SIZE)
3677 error(_("contextstack overflow at line %d"), ParseState.xxlineno);
3678 *++contextp = (char) tok;
3679 break;
3680
3681 case ']':
3682 while (*contextp == 'i')
3683 ifpop();
3684 *contextp-- = 0;
3685 EatLines = 0;
3686 break;
3687
3688 case RBRACE:
3689 while (*contextp == 'i')
3690 ifpop();
3691 *contextp-- = 0;
3692 break;
3693
3694 case ')':
3695 while (*contextp == 'i')
3696 ifpop();
3697 *contextp-- = 0;
3698 EatLines = 0;
3699 break;
3700
3701 }
3702 setlastloc();
3703 return tok;
3704 }
3705 /**
3706 * Records location information about a symbol. The information is
3707 * used to fill the data
3708 *
3709 */
record_(int first_parsed,int first_column,int last_parsed,int last_column,int token,int id,char * text_in)3710 static void record_( int first_parsed, int first_column, int last_parsed, int last_column,
3711 int token, int id, char* text_in ){
3712
3713 if (!ParseState.keepSrcRefs || !ParseState.keepParseData
3714 || id == NA_INTEGER) return;
3715
3716 // don't care about zero sized things
3717 if( !yytext[0] ) return ;
3718
3719 if (ParseState.data_count == DATA_COUNT)
3720 growData();
3721
3722 _FIRST_COLUMN( ParseState.data_count ) = first_column;
3723 _FIRST_PARSED( ParseState.data_count ) = first_parsed;
3724 _LAST_COLUMN( ParseState.data_count ) = last_column;
3725 _LAST_PARSED( ParseState.data_count ) = last_parsed;
3726 _TOKEN( ParseState.data_count ) = token;
3727 _ID( ParseState.data_count ) = id ;
3728 _PARENT(ParseState.data_count) = 0 ;
3729 if ( text_in )
3730 SET_STRING_ELT(PS_TEXT, ParseState.data_count, mkChar2(text_in));
3731 else
3732 SET_STRING_ELT(PS_TEXT, ParseState.data_count, mkChar(""));
3733
3734 if( id > ID_COUNT )
3735 growID(id) ;
3736
3737 ID_ID( id ) = ParseState.data_count ;
3738
3739 ParseState.data_count++ ;
3740 }
3741
3742 /**
3743 * records parent as the parent of all its childs. This grows the
3744 * parents list with a new vector. The first element of the new
3745 * vector is the parent id, and other elements are childs id
3746 *
3747 * @param parent id of the parent expression
3748 * @param childs array of location information for all child symbols
3749 * @param nchilds number of childs
3750 */
recordParents(int parent,yyltype * childs,int nchilds)3751 static void recordParents( int parent, yyltype * childs, int nchilds){
3752
3753 if( parent > ID_COUNT ){
3754 growID(parent) ;
3755 }
3756
3757 /* some of the childs might be an empty token (like cr)
3758 which we do not want to track */
3759 int ii; /* loop index */
3760 yyltype loc ;
3761 for( ii=0; ii<nchilds; ii++){
3762 loc = childs[ii] ;
3763 if( loc.id == NA_INTEGER || (loc.first_line == loc.last_line && loc.first_byte > loc.last_byte) )
3764 continue ;
3765 /* This shouldn't happen... */
3766 if (loc.id < 0 || loc.id > identifier) {
3767 error(_("internal parser error at line %d"), ParseState.xxlineno);
3768 }
3769 ID_PARENT( loc.id ) = parent;
3770 }
3771
3772 }
3773
3774 /**
3775 * The token pointed by the location has the wrong token type,
3776 * This updates the type
3777 *
3778 * @param loc location information for the token to track
3779 */
modif_token(yyltype * loc,int tok)3780 static void modif_token( yyltype* loc, int tok ){
3781
3782 int id = loc->id ;
3783
3784 if (!ParseState.keepSrcRefs || !ParseState.keepParseData
3785 || id < 0 || id > ID_COUNT) return;
3786
3787 if( tok == SYMBOL_FUNCTION_CALL ){
3788 // looking for first child of id
3789 int j = ID_ID( id ) ;
3790 int parent = id ;
3791
3792 if (j < 0 || j > ID_COUNT)
3793 return;
3794
3795 while( ID_PARENT( _ID(j) ) != parent ){
3796 j-- ;
3797 if (j < 0)
3798 return;
3799 }
3800
3801 if( _TOKEN(j) == SYMBOL ){
3802 _TOKEN(j) = SYMBOL_FUNCTION_CALL ;
3803 }
3804
3805 } else{
3806 _TOKEN( ID_ID(id) ) = tok ;
3807 }
3808
3809 }
3810
3811 /* this local version of lengthgets() always copies and doesn't fill with NA */
lengthgets2(SEXP x,int len)3812 static SEXP lengthgets2(SEXP x, int len) {
3813 SEXP result;
3814 PROTECT(result = allocVector( TYPEOF(x), len ));
3815
3816 len = (len < length(x)) ? len : length(x);
3817 switch(TYPEOF(x)) {
3818 case INTSXP:
3819 for (int i = 0; i < len; i++)
3820 INTEGER(result)[i] = INTEGER(x)[i];
3821 for (int i = len; i < length(result); i++)
3822 INTEGER(result)[i] = 0;
3823 break;
3824 case STRSXP:
3825 for (int i = 0; i < len; i++)
3826 SET_STRING_ELT(result, i, STRING_ELT(x, i));
3827 break;
3828 default:
3829 UNIMPLEMENTED_TYPE("lengthgets2", x);
3830 }
3831 UNPROTECT(1); /* result */
3832 return result;
3833 }
3834
finalizeData()3835 static void finalizeData( ){
3836
3837 int nloc = ParseState.data_count ;
3838
3839 int i, j, id ;
3840 int parent ;
3841
3842 /* store parents in the data */
3843 for( i=0; i<nloc; i++){
3844 id = _ID(i);
3845 parent = ID_PARENT( id ) ;
3846 while( parent != 0 && ID_ID(parent) == 0 )
3847 parent = ID_PARENT( parent ) ;
3848 _PARENT(i) = parent ;
3849
3850 #define FD_FAST_UPDATE_PARENTS
3851 #ifdef FD_FAST_UPDATE_PARENTS
3852 /*
3853 With long generated expressions, updating the parents can take
3854 a lot of time due to long chains of nodes not represented in the
3855 parse data. To reduce the overhead somewhat, we create shortcuts
3856 in the IDS array to point directly to the parent that is in the
3857 parse data.
3858 */
3859 int data_parent = parent;
3860 parent = ID_PARENT( id ) ;
3861 while( parent != data_parent ){
3862 ID_PARENT( id ) = data_parent; /* set shortcut */
3863 id = parent;
3864 parent = ID_PARENT( parent );
3865 }
3866 #endif
3867 }
3868
3869 /* attach comments to closest enclosing symbol */
3870 /* not updating ID_PARENT anymore */
3871
3872 #define FD_FAST_ASSIGN_COMMENTS
3873 #ifdef FD_FAST_ASSIGN_COMMENTS
3874 /*
3875 All terminals (tokens) are ordered by start and end location, including
3876 the comments, in the data.
3877
3878 All non-terminals, including to be found parents of the comments, are
3879 ordered by their end location. When they have the same end location
3880 in the code, they are ordered by their decreasing start location
3881 (children before parents).
3882
3883 All terminals and non-terminals are also before their parents (if any),
3884 so a comment is also befor its parent in the data.
3885
3886 Consequently: the first non-terminal after a comment that encloses the
3887 comment is its (immediate) parent. The original algorithm for every
3888 comment linearly searches for the first enclosing non-terminal and
3889 returns it, but it has quadratic complexity and dominates the whole
3890 parsing for long inputs (used when FD_FAST_ASSIGN_COMMENTS is not
3891 defined).
3892
3893 This algorithm uses the parental information available on nodes that
3894 follow the comments. That information has been filled by the parser
3895 during reductions (but not for comments, because those are not in the
3896 grammar). A node following a comment is either the parent of the
3897 comment, or some of its parents are, or is an orphan.
3898
3899 Note that a non-terminal may end before a terminal (e.g. comment) in the
3900 code but be after the terminal in the data (due to look-ahead). It seems
3901 that the parent of the comment has to be within parents of the
3902 non-terminal as well, but I am not sure how to prove it, so the algorithm
3903 just skips non-terminals preceding the comment in the code (so is not
3904 strictly linear).
3905 */
3906
3907 for(i = nloc-1; i >= 0; i--) {
3908 if (_TOKEN(i) == COMMENT) {
3909 int orphan = 1;
3910 int istartl = _FIRST_PARSED(i);
3911 int istartc = _FIRST_COLUMN(i);
3912
3913 /* look for first node j that does not end before the comment i */
3914 for(j = i + 1; j < nloc && _LAST_PARSED(j) <= istartl; j++);
3915
3916 if (j < nloc) {
3917 for(;;) {
3918 int jstartl = _FIRST_PARSED(j);
3919 int jstartc = _FIRST_COLUMN(j);
3920
3921 if (jstartl < istartl || (jstartl == istartl
3922 && jstartc <= istartc)) {
3923 /* j starts before or at the comment */
3924 _PARENT(i) = _ID(j);
3925 orphan = 0;
3926 break;
3927 }
3928 /* find parent of j */
3929 int jparent = _PARENT(j);
3930 if (jparent == 0)
3931 break; /* orphan */
3932 j = ID_ID(jparent);
3933 }
3934 }
3935 if (orphan)
3936 _PARENT(i) = 0;
3937 }
3938 }
3939 #else
3940 /* the original algorithm, which is slow for large inputs */
3941
3942 int comment_line, comment_first_col;
3943 int this_first_parsed, this_last_parsed, this_first_col ;
3944 int orphan ;
3945
3946 for( i=0; i<nloc; i++){
3947 if( _TOKEN(i) == COMMENT ){
3948 comment_line = _FIRST_PARSED( i ) ;
3949 comment_first_col = _FIRST_COLUMN( i ) ;
3950
3951 orphan = 1 ;
3952 for( j=i+1; j<nloc; j++){
3953 this_first_parsed = _FIRST_PARSED( j ) ;
3954 this_first_col = _FIRST_COLUMN( j ) ;
3955 this_last_parsed = _LAST_PARSED( j ) ;
3956
3957 /* the comment needs to start after the current symbol */
3958 if( comment_line < this_first_parsed ) continue ;
3959 if( (comment_line == this_first_parsed) & (comment_first_col < this_first_col) ) continue ;
3960
3961 /* the current symbol must finish after the comment */
3962 if( this_last_parsed <= comment_line ) continue ;
3963
3964 /* we have a match, record the parent and stop looking */
3965 _PARENT(i) = _ID(j);
3966 orphan = 0;
3967 break ;
3968 }
3969 if(orphan){
3970 _PARENT(i) = 0 ;
3971 }
3972 }
3973 }
3974 #endif
3975
3976
3977 /* now rework the parents of comments, we try to attach
3978 comments that are not already attached (parent=0) to the next
3979 enclosing top-level expression */
3980
3981 for( i=0; i<nloc; i++){
3982 int token = _TOKEN(i);
3983 if( token == COMMENT && _PARENT(i) == 0 ){
3984 for( j=i; j<nloc; j++){
3985 int token_j = _TOKEN(j);
3986 if( token_j == COMMENT ) continue ;
3987 if( _PARENT(j) != 0 ) continue ;
3988 _PARENT(i) = - _ID(j) ;
3989 break ;
3990 }
3991 }
3992 }
3993
3994 /* attach the token names as an attribute so we don't need to switch to a dataframe, and decide on terminals */
3995 SEXP tokens;
3996 PROTECT(tokens = allocVector( STRSXP, nloc ) );
3997 for (int i=0; i<nloc; i++) {
3998 int token = _TOKEN(i);
3999 int xlat = yytranslate[token];
4000 if (xlat == 2) /* "unknown" */
4001 xlat = token;
4002 if (xlat < YYNTOKENS + YYNNTS)
4003 SET_STRING_ELT(tokens, i, mkChar(yytname[xlat]));
4004 else { /* we have a token which doesn't have a name, e.g. an illegal character as in PR#15518 */
4005 char name[2];
4006 name[0] = (char) xlat;
4007 name[1] = 0;
4008 SET_STRING_ELT(tokens, i, mkChar(name));
4009 }
4010 _TERMINAL(i) = xlat < YYNTOKENS;
4011 }
4012 SEXP dims, newdata, newtext;
4013 if (nloc) {
4014 PROTECT( newdata = lengthgets2(PS_DATA, nloc * DATA_ROWS));
4015 PROTECT( newtext = lengthgets2(PS_TEXT, nloc));
4016 } else {
4017 PROTECT( newdata = allocVector( INTSXP, 0));
4018 PROTECT( newtext = allocVector( STRSXP, 0));
4019 }
4020 PROTECT( dims = allocVector( INTSXP, 2 ) ) ;
4021 INTEGER(dims)[0] = DATA_ROWS ;
4022 INTEGER(dims)[1] = nloc ;
4023 setAttrib( newdata, install( "dim" ), dims ) ;
4024 setAttrib( newdata, install("tokens"), tokens );
4025 setAttrib( newdata, install("text"), newtext );
4026
4027 setAttrib(newdata, R_ClassSymbol, mkString("parseData"));
4028
4029 /* Put it into the srcfile environment */
4030 if (isEnvironment(PS_SRCFILE))
4031 defineVar(install("parseData"), newdata, PS_SRCFILE);
4032 UNPROTECT(4); /* tokens, newdata, newtext, dims */
4033 }
4034
4035 /**
4036 * Grows the data
4037 */
growData()4038 static void growData(){
4039
4040 int new_data_count;
4041 if (PS_DATA == R_NilValue) {
4042 new_data_count = INIT_DATA_COUNT;
4043 PS_SET_DATA(allocVector(INTSXP, 0));
4044 PS_SET_TEXT(allocVector(STRSXP, 0));
4045 } else
4046 new_data_count = 2*DATA_COUNT;
4047
4048 PS_SET_DATA(lengthgets2(PS_DATA, new_data_count * DATA_ROWS));
4049 PS_SET_TEXT(lengthgets2(PS_TEXT, new_data_count));
4050 }
4051
4052 /**
4053 * Grows the ids vector so that ID_ID(target) can be called
4054 */
growID(int target)4055 static void growID( int target ){
4056
4057 int new_count;
4058 if (PS_IDS == R_NilValue) {
4059 new_count = INIT_DATA_COUNT/2 - 1;
4060 PS_SET_IDS(allocVector(INTSXP, 0));
4061 } else
4062 new_count = ID_COUNT;
4063
4064 while (target > new_count)
4065 new_count = 2*new_count + 1;
4066
4067 if (new_count <= ID_COUNT)
4068 return;
4069
4070 int new_size = (1 + new_count)*2;
4071 PS_SET_IDS(lengthgets2(PS_IDS, new_size));
4072 }
4073
checkForPlaceholder(SEXP placeholder,SEXP arg)4074 static int checkForPlaceholder(SEXP placeholder, SEXP arg)
4075 {
4076 if (arg == placeholder)
4077 return TRUE;
4078 else if (TYPEOF(arg) == LANGSXP)
4079 for (SEXP cur = arg; cur != R_NilValue; cur = CDR(cur))
4080 if (checkForPlaceholder(placeholder, CAR(cur)))
4081 return TRUE;
4082 return FALSE;
4083 }
4084
signal_ph_error(SEXP rhs,SEXP ph)4085 static void NORET signal_ph_error(SEXP rhs, SEXP ph) {
4086 errorcall(rhs, _("pipe placeholder must only appear as a top-level "
4087 "argument in the RHS call"));
4088 }
4089
findPlaceholderCell(SEXP placeholder,SEXP rhs)4090 static SEXP findPlaceholderCell(SEXP placeholder, SEXP rhs)
4091 {
4092 SEXP phcell = NULL;
4093 int count = 0;
4094 if (checkForPlaceholder(placeholder, CAR(rhs)))
4095 signal_ph_error(rhs, placeholder);
4096 for (SEXP a = CDR(rhs); a != R_NilValue; a = CDR(a))
4097 if (CAR(a) == placeholder) {
4098 if (phcell == NULL)
4099 phcell = a;
4100 count++;
4101 }
4102 else if (checkForPlaceholder(placeholder, CAR(a)))
4103 signal_ph_error(rhs, placeholder);
4104 if (count > 1)
4105 errorcall(rhs, _("pipe placeholder may only appear once"));
4106 return phcell;
4107 }
4108