1 %{
2 /*
3 * R : A Computer Language for Statistical Data Analysis
4 * Copyright (C) 1995, 1996, 1997 Robert Gentleman and Ross Ihaka
5 * Copyright (C) 1997--2019 The R Core Team
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, a copy is available at
19 * https://www.R-project.org/Licenses/
20 */
21
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25
26 #define R_USE_SIGNALS 1
27 #include <Defn.h>
28 #include <Parse.h>
29 #define STRICT_R_HEADERS
30 #include <R_ext/RS.h> /* for R_chk_* allocation */
31 #include <ctype.h>
32 #include <Rmath.h> /* for imax2(.),..*/
33 #undef _
34 #ifdef ENABLE_NLS
35 #include <libintl.h>
36 #define _(String) dgettext ("tools", String)
37 #else
38 #define _(String) (String)
39 #endif
40
41 /* bison creates a non-static symbol yylloc (and other) in both gramLatex.o
42 and gramRd.o, so remap */
43
44 #define yylloc yyllocR
45 #undef yynerrs /* from Defn.h */
46 #define yynerrs yynerrsR
47 #undef yychar /* from Defn.h */
48 #define yychar yycharR
49 #undef yylval /* from Defn.h */
50 #define yylval yylvalR
51
52 #define DEBUGVALS 0 /* 1 causes detailed internal state output to R console */
53 #define DEBUGMODE 0 /* 1 causes Bison output of parse state, to stdout or stderr */
54
55 static Rboolean wCalls = TRUE;
56 static Rboolean warnDups = FALSE;
57
58 #define YYERROR_VERBOSE 1
59
60 static void yyerror(const char *);
61 static int yylex();
62 static int yyparse(void);
63
64 #define yyconst const
65
66 typedef struct yyltype
67 {
68 int first_line;
69 int first_column;
70 int first_byte;
71
72 int last_line;
73 int last_column;
74 int last_byte;
75 } yyltype;
76
77 # define YYLTYPE yyltype
78 # define YYLLOC_DEFAULT(Current, Rhs, N) \
79 do \
80 if (N) \
81 { \
82 (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
83 (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
84 (Current).first_byte = YYRHSLOC (Rhs, 1).first_byte; \
85 (Current).last_line = YYRHSLOC (Rhs, N).last_line; \
86 (Current).last_column = YYRHSLOC (Rhs, N).last_column; \
87 (Current).last_byte = YYRHSLOC (Rhs, N).last_byte; \
88 } \
89 else \
90 { \
91 (Current).first_line = (Current).last_line = \
92 YYRHSLOC (Rhs, 0).last_line; \
93 (Current).first_column = (Current).last_column = \
94 YYRHSLOC (Rhs, 0).last_column; \
95 (Current).first_byte = (Current).last_byte = \
96 YYRHSLOC (Rhs, 0).last_byte; \
97 } \
98 while (0)
99
100 /* Useful defines so editors don't get confused ... */
101
102 #define LBRACE '{'
103 #define RBRACE '}'
104
105 /* Functions used in the parsing process */
106
107 static void GrowList(SEXP, SEXP);
108 static int KeywordLookup(const char *);
109 static SEXP UserMacroLookup(const char *);
110 static SEXP InstallKeywords();
111 static SEXP NewList(void);
112 static SEXP makeSrcref(YYLTYPE *, SEXP);
113 static int xxgetc();
114 static int xxungetc(int);
115
116 /* Flags used to mark need for postprocessing in the dynamicFlag attribute */
117
118 #define STATIC 0
119 #define HAS_IFDEF 1
120 #define HAS_SEXPR 2
121
122 /* Internal lexer / parser state variables */
123
124 static char const yyunknown[] = "unknown macro"; /* our message, not bison's */
125
126
127 typedef struct ParseState ParseState;
128 struct ParseState {
129 int xxinRString, xxQuoteLine, xxQuoteCol;
130 int xxinEqn;
131 int xxNewlineInString;
132 int xxlineno, xxbyteno, xxcolno;
133 int xxmode, xxitemType, xxbraceDepth; /* context for lexer */
134 int xxDebugTokens; /* non-zero causes debug output to R console */
135 const char* xxBasename; /* basename of file for error messages */
136 SEXP Value;
137 int xxinitvalue;
138 SEXP xxMacroList;/* A hashed environment containing all the standard and user-defined macro names */
139 SEXP mset; /* Precious mset for protecting parser semantic values */
140 ParseState *prevState;
141 };
142
143 static Rboolean busy = FALSE;
144 static ParseState parseState;
145
146 #define PRESERVE_SV(x) R_PreserveInMSet((x), parseState.mset)
147 #define RELEASE_SV(x) R_ReleaseFromMSet((x), parseState.mset)
148
149 #define RLIKE 1 /* Includes R strings; xxinRString holds the opening quote char, or 0 outside a string */
150 #define LATEXLIKE 2
151 #define VERBATIM 3
152 #define INOPTION 4
153 #define COMMENTMODE 5 /* only used in deparsing */
154 #define UNKNOWNMODE 6 /* ditto */
155
156 static SEXP SrcFile; /* parse_Rd will *always* supply a srcfile */
157
158 /* Routines used to build the parse tree */
159
160 static SEXP xxpushMode(int, int, int);
161 static void xxpopMode(SEXP);
162 static SEXP xxnewlist(SEXP);
163 static SEXP xxnewlist2(SEXP, SEXP);
164 static SEXP xxnewlist3(SEXP, SEXP, SEXP);
165 static SEXP xxnewlist4(SEXP, SEXP, SEXP, SEXP);
166 static SEXP xxnewlist5(SEXP, SEXP, SEXP, SEXP, SEXP);
167 static SEXP xxnewlist6(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
168 static SEXP xxnewlist7(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
169 static SEXP xxnewlist8(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
170 static SEXP xxnewlist9(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
171
172 static SEXP xxlist(SEXP, SEXP);
173 static SEXP xxmarkup(SEXP, SEXP, int, YYLTYPE *);
174 static SEXP xxmarkup2(SEXP, SEXP, SEXP, int, int, YYLTYPE *);
175 static SEXP xxmarkup3(SEXP, SEXP, SEXP, SEXP, int, YYLTYPE *);
176 static SEXP xxOptionmarkup(SEXP, SEXP, SEXP, int, YYLTYPE *);
177 static SEXP xxtag(SEXP, int, YYLTYPE *);
178 static void xxsavevalue(SEXP, YYLTYPE *);
179 static void xxWarnNewline();
180 static SEXP xxnewcommand(SEXP, SEXP, SEXP, YYLTYPE *);
181 static SEXP xxusermacro(SEXP, SEXP, YYLTYPE *);
182 static int mkMarkup(int);
183 static int mkIfdef(int);
184 static int mkCode(int);
185 static int mkText(int);
186 static int mkVerb(int);
187 static int mkComment(int);
188
189 static SEXP R_RdTagSymbol = NULL;
190 static SEXP R_RdOptionSymbol = NULL;
191 static SEXP R_DefinitionSymbol = NULL;
192 static SEXP R_DynamicFlagSymbol = NULL;
193 static SEXP R_MacroSymbol = NULL;
194
195 #define YYSTYPE SEXP
196
197 %}
198
199 %token END_OF_INPUT ERROR
200 %token SECTIONHEADER RSECTIONHEADER VSECTIONHEADER
201 %token SECTIONHEADER2
202 %token RCODEMACRO SEXPR RDOPTS LATEXMACRO VERBMACRO OPTMACRO ESCAPE
203 %token LISTSECTION ITEMIZE DESCRIPTION NOITEM
204 %token LATEXMACRO2 VERBMACRO2 VERBLATEX
205 %token LATEXMACRO3
206 %token NEWCOMMAND USERMACRO USERMACRO1 USERMACRO2 USERMACRO3 USERMACRO4
207 %token USERMACRO5 USERMACRO6 USERMACRO7 USERMACRO8 USERMACRO9
208 %token IFDEF ENDIF
209 %token TEXT RCODE VERB COMMENT UNKNOWN
210 %token STARTFILE STARTFRAGMENT /* fake tokens to have two entry points */
211
212 /* Recent bison has <> to represent all of the destructors below, but we don't assume it */
213
214 /* I think we need to list everything here which occurs before the last item in a
215 pattern, just in case the last item is unmatched and we need to back out. But
216 it is safe to list more, so we do. */
217
218 %destructor { RELEASE_SV($$); } SECTIONHEADER RSECTIONHEADER
219 VSECTIONHEADER SECTIONHEADER2 RCODEMACRO SEXPR LATEXMACRO VERBMACRO
220 OPTMACRO ESCAPE LISTSECTION ITEMIZE DESCRIPTION NOITEM LATEXMACRO2
221 VERBMACRO2 VERBLATEX LATEXMACRO3 IFDEF ENDIF TEXT RCODE VERB COMMENT UNKNOWN
222 NEWCOMMAND USERMACRO USERMACRO1 USERMACRO2 USERMACRO3 USERMACRO4
223 USERMACRO5 USERMACRO6 USERMACRO7 USERMACRO8 USERMACRO9
224 STARTFILE STARTFRAGMENT goLatexLike goRLike goRLike2 goOption
225 goVerbatim goVerbatim1 goVerbatim2 goItem0 goItem2 LatexArg RLikeArg2
226 VerbatimArg1 VerbatimArg2 IfDefTarget ArgItems Option
227
228 %%
229
230 Init: STARTFILE RdFile END_OF_INPUT { xxsavevalue($2, &@$); RELEASE_SV($1); YYACCEPT; }
231 | STARTFRAGMENT RdFragment END_OF_INPUT { xxsavevalue($2, &@$); RELEASE_SV($1); YYACCEPT; }
232 | error { PRESERVE_SV(parseState.Value = R_NilValue); YYABORT; }
233 ;
234
235 RdFragment : goLatexLike ArgItems { $$ = $2; RELEASE_SV($1); }
236 ;
237
238 RdFile : SectionList { $$ = $1; }
239 ;
240
241 SectionList: Section { $$ = xxnewlist($1); }
242 | SectionList Section { $$ = xxlist($1, $2); }
243
244 Section: VSECTIONHEADER VerbatimArg { $$ = xxmarkup($1, $2, STATIC, &@$); }
245 | RDOPTS VerbatimArg { $$ = xxmarkup($1, $2, HAS_SEXPR, &@$); }
246 | RSECTIONHEADER RLikeArg { $$ = xxmarkup($1, $2, STATIC, &@$); }
247 | SECTIONHEADER LatexArg { $$ = xxmarkup($1, $2, STATIC, &@$); }
248 | LISTSECTION Item2Arg { $$ = xxmarkup($1, $2, STATIC, &@$); }
249 | SECTIONHEADER2 LatexArg LatexArg2 { $$ = xxmarkup2($1, $2, $3, 2, STATIC, &@$); }
250 | IFDEF IfDefTarget SectionList ENDIF { $$ = xxmarkup2($1, $2, $3, 2, HAS_IFDEF, &@$); RELEASE_SV($4); }
251 | IFDEF IfDefTarget SectionList error { $$ = xxmarkup2($1, $2, $3, 2, HAS_IFDEF, &@$); }
252 | SEXPR goOption RLikeArg2 { $$ = xxmarkup($1, $3, HAS_SEXPR, &@$); xxpopMode($2); }
253 | SEXPR goOption Option RLikeArg2 { $$ = xxOptionmarkup($1, $3, $4, HAS_SEXPR, &@$); xxpopMode($2); }
254 | COMMENT { $$ = xxtag($1, COMMENT, &@$); }
255 | TEXT { $$ = xxtag($1, TEXT, &@$); } /* must be whitespace */
256 | UserMacro { $$ = $1; }
257 | error Section { $$ = $2; }
258
259 ArgItems: Item { $$ = xxnewlist($1); }
260 | ArgItems Item { $$ = xxlist($1, $2); }
261
262 Item: TEXT { $$ = xxtag($1, TEXT, &@$); }
263 | RCODE { $$ = xxtag($1, RCODE, &@$); }
264 | VERB { $$ = xxtag($1, VERB, &@$); }
265 | COMMENT { $$ = xxtag($1, COMMENT, &@$); }
266 | UNKNOWN { $$ = xxtag($1, UNKNOWN, &@$); yyerror(yyunknown); }
267 | Arg { $$ = xxmarkup(R_NilValue, $1, STATIC, &@$); }
268 | Markup { $$ = $1; }
269 | UserMacro { $$ = $1; }
270 | error Item { $$ = $2; }
271
272 Markup: LATEXMACRO LatexArg { $$ = xxmarkup($1, $2, STATIC, &@$); }
273 | LATEXMACRO2 LatexArg LatexArg2 { $$ = xxmarkup2($1, $2, $3, 2, STATIC, &@$); }
274 | LATEXMACRO3 LatexArg LatexArg2 LatexArg2 { $$ = xxmarkup3($1, $2, $3, $4, STATIC, &@$); }
275 | ITEMIZE Item0Arg { $$ = xxmarkup($1, $2, STATIC, &@$); }
276 | DESCRIPTION Item2Arg { $$ = xxmarkup($1, $2, STATIC, &@$); }
277 | OPTMACRO goOption LatexArg { $$ = xxmarkup($1, $3, STATIC, &@$); xxpopMode($2); }
278 | OPTMACRO goOption Option LatexArg { $$ = xxOptionmarkup($1, $3, $4, STATIC, &@$); xxpopMode($2); }
279 | RCODEMACRO RLikeArg { $$ = xxmarkup($1, $2, STATIC, &@$); }
280 | SEXPR goOption RLikeArg2 { $$ = xxmarkup($1, $3, HAS_SEXPR, &@$); xxpopMode($2); }
281 | SEXPR goOption Option RLikeArg2 { $$ = xxOptionmarkup($1, $3, $4, HAS_SEXPR, &@$); xxpopMode($2); }
282 | VERBMACRO VerbatimArg { $$ = xxmarkup($1, $2, STATIC, &@$); }
283 | VERBMACRO2 VerbatimArg1 { $$ = xxmarkup2($1, $2, R_NilValue, 1, STATIC, &@$); }
284 | VERBMACRO2 VerbatimArg1 VerbatimArg2 { $$ = xxmarkup2($1, $2, $3, 2, STATIC, &@$); }
285 | ESCAPE { $$ = xxmarkup($1, R_NilValue, STATIC, &@$); }
286 | IFDEF IfDefTarget ArgItems ENDIF { $$ = xxmarkup2($1, $2, $3, 2, HAS_IFDEF, &@$); RELEASE_SV($4); }
287 | IFDEF IfDefTarget ArgItems error { $$ = xxmarkup2($1, $2, $3, 2, HAS_IFDEF, &@$); }
288 | VERBLATEX VerbatimArg LatexArg2 { $$ = xxmarkup2($1, $2, $3, 2, STATIC, &@$); }
289
290 UserMacro: NEWCOMMAND VerbatimArg1 VerbatimArg { $$ = xxnewcommand($1, $2, $3, &@$); }
291 | USERMACRO { $$ = xxusermacro($1, xxnewlist(NULL), &@$); }
292 | USERMACRO1 VerbatimArg { $$ = xxusermacro($1, xxnewlist($2), &@$); }
293 | USERMACRO2 VerbatimArg VerbatimArg
294 { $$ = xxusermacro($1, xxnewlist2($2, $3), &@$); }
295 | USERMACRO3 VerbatimArg VerbatimArg VerbatimArg
296 { $$ = xxusermacro($1, xxnewlist3($2, $3, $4), &@$); }
297 | USERMACRO4 VerbatimArg VerbatimArg VerbatimArg VerbatimArg
298 { $$ = xxusermacro($1, xxnewlist4($2, $3, $4, $5), &@$); }
299 | USERMACRO5 VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg
300 { $$ = xxusermacro($1, xxnewlist5($2, $3, $4, $5, $6), &@$); }
301 | USERMACRO6 VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg
302 VerbatimArg { $$ = xxusermacro($1, xxnewlist6($2, $3, $4, $5, $6, $7), &@$); }
303 | USERMACRO7 VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg
304 VerbatimArg VerbatimArg
305 { $$ = xxusermacro($1, xxnewlist7($2, $3, $4, $5, $6, $7, $8), &@$); }
306 | USERMACRO8 VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg
307 VerbatimArg VerbatimArg VerbatimArg
308 { $$ = xxusermacro($1, xxnewlist8($2, $3, $4, $5, $6, $7, $8, $9), &@$); }
309 | USERMACRO9 VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg
310 VerbatimArg VerbatimArg VerbatimArg VerbatimArg
311 { $$ = xxusermacro($1, xxnewlist9($2, $3, $4, $5, $6, $7, $8, $9, $10), &@$); }
312
313
314 LatexArg: goLatexLike Arg { xxpopMode($1); $$ = $2; }
315
316 LatexArg2: goLatexLike Arg { xxpopMode($1); $$ = $2; }
317 | goLatexLike TEXT { xxpopMode($1); $$ = xxnewlist($2);
318 if(wCalls)
319 warning(_("bad markup (extra space?) at %s:%d:%d"),
320 parseState.xxBasename, @2.first_line, @2.first_column);
321 else
322 warningcall(R_NilValue, _("bad markup (extra space?) at %s:%d:%d"),
323 parseState.xxBasename, @2.first_line, @2.first_column);
324 }
325
326 Item0Arg: goItem0 Arg { xxpopMode($1); $$ = $2; }
327
328 Item2Arg: goItem2 Arg { xxpopMode($1); $$ = $2; }
329
330 RLikeArg: goRLike Arg { xxpopMode($1); $$ = $2; }
331
332 /* This one is like VerbatimArg2 below: it does the push after seeing the brace */
333
334 RLikeArg2: '{' goRLike2 ArgItems '}' { xxpopMode($2); $$ = $3; }
335 | '{' goRLike2 '}' { xxpopMode($2); $$ = xxnewlist(NULL); }
336
337 VerbatimArg: goVerbatim Arg { xxpopMode($1); $$ = $2; }
338
339 VerbatimArg1: goVerbatim1 Arg { xxpopMode($1); $$ = $2; }
340
341 /* This one executes the push after seeing the brace starting the optional second arg */
342
343 VerbatimArg2: '{' goVerbatim2 ArgItems '}' { xxpopMode($2); $$ = $3; }
344 | '{' goVerbatim2 '}' { xxpopMode($2); $$ = xxnewlist(NULL); }
345
346 IfDefTarget: goLatexLike TEXT { xxpopMode($1); $$ = xxnewlist(xxtag($2, TEXT, &@$)); }
347
348
349 goLatexLike: /* empty */ { $$ = xxpushMode(LATEXLIKE, UNKNOWN, FALSE); }
350
351 goRLike: /* empty */ { $$ = xxpushMode(RLIKE, UNKNOWN, FALSE); }
352
353 goRLike2: /* empty */ { parseState.xxbraceDepth--; $$ = xxpushMode(RLIKE, UNKNOWN, FALSE); parseState.xxbraceDepth++; }
354
355 goOption: /* empty */ { $$ = xxpushMode(INOPTION, UNKNOWN, FALSE); }
356
357 goVerbatim: /* empty */ { $$ = xxpushMode(VERBATIM, UNKNOWN, FALSE); }
358
359 goVerbatim1: /* empty */ { $$ = xxpushMode(VERBATIM, UNKNOWN, TRUE); }
360
361 goVerbatim2: /* empty */ { parseState.xxbraceDepth--; $$ = xxpushMode(VERBATIM, UNKNOWN, FALSE); parseState.xxbraceDepth++; }
362
363 goItem0: /* empty */ { $$ = xxpushMode(LATEXLIKE, ESCAPE, FALSE); }
364
365 goItem2: /* empty */ { $$ = xxpushMode(LATEXLIKE, LATEXMACRO2, FALSE); }
366
367 Arg: '{' ArgItems '}' { $$ = $2; }
368 | '{' '}' { $$ = xxnewlist(NULL); }
369 | '{' ArgItems error '}' { $$ = $2; }
370 | '{' error '}' { $$ = xxnewlist(NULL); }
371 | '{' ArgItems error END_OF_INPUT { $$ = $2; }
372
373 Option: '[' Item ']' { $$ = $2; }
374
375 %%
376
377 static SEXP xxpushMode(int newmode, int newitem, int neweqn)
378 {
379 SEXP ans;
380
381 PRESERVE_SV(ans = allocVector(INTSXP, 7));
382 INTEGER(ans)[0] = parseState.xxmode; /* Lexer mode */
383 INTEGER(ans)[1] = parseState.xxitemType; /* What is \item? */
384 INTEGER(ans)[2] = parseState.xxbraceDepth; /* Brace depth used in RCODE and VERBATIM */
385 INTEGER(ans)[3] = parseState.xxinRString; /* Quote char that started a string */
386 INTEGER(ans)[4] = parseState.xxQuoteLine; /* Where the quote was */
387 INTEGER(ans)[5] = parseState.xxQuoteCol; /* " */
388 INTEGER(ans)[6] = parseState.xxinEqn; /* In the first arg to \eqn or \deqn: no escapes */
389
390 #if DEBUGMODE
391 Rprintf("xxpushMode(%d, %s) pushes %d, %s, %d\n", newmode, yytname[YYTRANSLATE(newitem)],
392 parseState.xxmode, yytname[YYTRANSLATE(parseState.xxitemType)], parseState.xxbraceDepth);
393 #endif
394 parseState.xxmode = newmode;
395 parseState.xxitemType = newitem;
396 parseState.xxbraceDepth = 0;
397 parseState.xxinRString = 0;
398 parseState.xxinEqn = neweqn;
399
400 return ans;
401 }
402
xxpopMode(SEXP oldmode)403 static void xxpopMode(SEXP oldmode)
404 {
405 #if DEBUGVALS
406 Rprintf("xxpopMode(%d, %s, %d) replaces %d, %s, %d\n", INTEGER(oldmode)[0], yytname[YYTRANSLATE(INTEGER(oldmode)[1])], INTEGER(oldmode)[2],
407 parseState.xxmode, yytname[YYTRANSLATE(parseState.xxitemType)], parseState.xxbraceDepth);
408 #endif
409 parseState.xxmode = INTEGER(oldmode)[0];
410 parseState.xxitemType = INTEGER(oldmode)[1];
411 parseState.xxbraceDepth = INTEGER(oldmode)[2];
412 parseState.xxinRString = INTEGER(oldmode)[3];
413 parseState.xxQuoteLine = INTEGER(oldmode)[4];
414 parseState.xxQuoteCol = INTEGER(oldmode)[5];
415 parseState.xxinEqn = INTEGER(oldmode)[6];
416
417 RELEASE_SV(oldmode);
418 }
419
getDynamicFlag(SEXP item)420 static int getDynamicFlag(SEXP item)
421 {
422 SEXP flag = getAttrib(item, R_DynamicFlagSymbol);
423 if (isNull(flag)) return 0;
424 else return INTEGER(flag)[0];
425 }
426
setDynamicFlag(SEXP item,int flag)427 static void setDynamicFlag(SEXP item, int flag)
428 {
429 if (flag)
430 setAttrib(item, R_DynamicFlagSymbol, ScalarInteger(flag));
431 }
432
xxnewlist(SEXP item)433 static SEXP xxnewlist(SEXP item)
434 {
435 SEXP ans;
436 #if DEBUGVALS
437 Rprintf("xxnewlist(item=%p)", item);
438 #endif
439 PRESERVE_SV(ans = NewList());
440 if (item) {
441 int flag = getDynamicFlag(item);
442 GrowList(ans, item);
443 setDynamicFlag(ans, flag);
444 RELEASE_SV(item);
445 }
446 #if DEBUGVALS
447 Rprintf(" result: %p is length %d\n", ans, length(ans));
448 #endif
449 return ans;
450 }
451
xxnewlist2(SEXP item1,SEXP item2)452 static SEXP xxnewlist2(SEXP item1, SEXP item2)
453 {
454 return xxlist(xxnewlist(item1), item2);
455 }
456
xxnewlist3(SEXP item1,SEXP item2,SEXP item3)457 static SEXP xxnewlist3(SEXP item1, SEXP item2, SEXP item3)
458 {
459 return xxlist(xxnewlist2(item1, item2), item3);
460 }
461
xxnewlist4(SEXP item1,SEXP item2,SEXP item3,SEXP item4)462 static SEXP xxnewlist4(SEXP item1, SEXP item2, SEXP item3, SEXP item4)
463 {
464 return xxlist(xxnewlist3(item1, item2, item3), item4);
465 }
466
xxnewlist5(SEXP item1,SEXP item2,SEXP item3,SEXP item4,SEXP item5)467 static SEXP xxnewlist5(SEXP item1, SEXP item2, SEXP item3, SEXP item4, SEXP item5)
468 {
469 return xxlist(xxnewlist4(item1, item2, item3, item4), item5);
470 }
471
xxnewlist6(SEXP item1,SEXP item2,SEXP item3,SEXP item4,SEXP item5,SEXP item6)472 static SEXP xxnewlist6(SEXP item1, SEXP item2, SEXP item3, SEXP item4, SEXP item5,
473 SEXP item6)
474 {
475 return xxlist(xxnewlist5(item1, item2, item3, item4, item5), item6);
476 }
477
xxnewlist7(SEXP item1,SEXP item2,SEXP item3,SEXP item4,SEXP item5,SEXP item6,SEXP item7)478 static SEXP xxnewlist7(SEXP item1, SEXP item2, SEXP item3, SEXP item4, SEXP item5,
479 SEXP item6, SEXP item7)
480 {
481 return xxlist(xxnewlist6(item1, item2, item3, item4, item5, item6), item7);
482 }
483
xxnewlist8(SEXP item1,SEXP item2,SEXP item3,SEXP item4,SEXP item5,SEXP item6,SEXP item7,SEXP item8)484 static SEXP xxnewlist8(SEXP item1, SEXP item2, SEXP item3, SEXP item4, SEXP item5,
485 SEXP item6, SEXP item7, SEXP item8)
486 {
487 return xxlist(xxnewlist7(item1, item2, item3, item4, item5, item6, item7), item8);
488 }
489
xxnewlist9(SEXP item1,SEXP item2,SEXP item3,SEXP item4,SEXP item5,SEXP item6,SEXP item7,SEXP item8,SEXP item9)490 static SEXP xxnewlist9(SEXP item1, SEXP item2, SEXP item3, SEXP item4, SEXP item5,
491 SEXP item6, SEXP item7, SEXP item8, SEXP item9)
492 {
493 return xxlist(xxnewlist8(item1, item2, item3, item4, item5, item6, item7, item8),
494 item9);
495 }
496
xxlist(SEXP list,SEXP item)497 static SEXP xxlist(SEXP list, SEXP item)
498 {
499 int flag = getDynamicFlag(list) | getDynamicFlag(item);
500 #if DEBUGVALS
501 Rprintf("xxlist(list=%p, item=%p)", list, item);
502 #endif
503 GrowList(list, item);
504 RELEASE_SV(item);
505 setDynamicFlag(list, flag);
506 #if DEBUGVALS
507 Rprintf(" result: %p is length %d\n", list, length(list));
508 #endif
509 return list;
510 }
511
xxmarkup(SEXP header,SEXP body,int flag,YYLTYPE * lloc)512 static SEXP xxmarkup(SEXP header, SEXP body, int flag, YYLTYPE *lloc)
513 {
514 SEXP ans;
515 #if DEBUGVALS
516 Rprintf("xxmarkup(header=%p, body=%p)", header, body);
517 #endif
518 if (isNull(body))
519 PRESERVE_SV(ans = allocVector(VECSXP, 0));
520 else {
521 flag |= getDynamicFlag(body);
522 PRESERVE_SV(ans = PairToVectorList(CDR(body)));
523 RELEASE_SV(body);
524 }
525 if (isNull(header))
526 setAttrib(ans, R_RdTagSymbol, mkString("LIST"));
527 else {
528 setAttrib(ans, R_RdTagSymbol, header);
529 RELEASE_SV(header);
530 }
531 setAttrib(ans, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
532 setDynamicFlag(ans, flag);
533 #if DEBUGVALS
534 Rprintf(" result: %p\n", ans);
535 #endif
536 return ans;
537 }
538
xxnewcommand(SEXP cmd,SEXP name,SEXP defn,YYLTYPE * lloc)539 static SEXP xxnewcommand(SEXP cmd, SEXP name, SEXP defn, YYLTYPE *lloc)
540 {
541 SEXP ans, prev, thename, thedefn;
542 char buffer[128];
543 const char *c;
544 int maxarg = 0;
545 #if DEBUGVALS
546 Rprintf("xxnewcommand(cmd=%p, name=%p, defn=%p)", cmd, name, defn);
547 #endif
548 thename = CADR(name);
549 thedefn = CADR(defn);
550 if (TYPEOF(thedefn) == STRSXP)
551 PROTECT(thedefn = mkString(CHAR(STRING_ELT(thedefn,0))));
552 else
553 PROTECT(thedefn = mkString(""));
554 if (warnDups) {
555 prev = findVar(installTrChar(STRING_ELT(thename, 0)), parseState.xxMacroList);
556 if (prev != R_UnboundValue && strcmp(CHAR(STRING_ELT(cmd,0)), "\\renewcommand")) {
557 snprintf(buffer, sizeof(buffer), _("Macro '%s' previously defined."),
558 CHAR(STRING_ELT(thename, 0)));
559 yyerror(buffer);
560 }
561 }
562 for (c = CHAR(STRING_ELT(thedefn, 0)); *c; c++) {
563 if (*c == '#' && isdigit(*(c+1)))
564 maxarg = imax2(maxarg, *(c+1) - '0');
565 }
566 if (maxarg > 4) {
567 snprintf(buffer, sizeof(buffer), _("At most 4 arguments are allowed for user defined macros."));
568 yyerror(buffer);
569 }
570 PROTECT(ans = ScalarInteger(USERMACRO + maxarg));
571 setAttrib(ans, R_RdTagSymbol, cmd);
572 setAttrib(ans, R_DefinitionSymbol, thedefn);
573 setAttrib(ans, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
574 defineVar(installTrChar(STRING_ELT(thename, 0)), ans, parseState.xxMacroList);
575 UNPROTECT(2); /* thedefn, ans */
576
577 PRESERVE_SV(ans);
578 RELEASE_SV(cmd);
579 RELEASE_SV(name);
580 RELEASE_SV(defn);
581 return ans;
582 }
583
584 #define START_MACRO -2
585 #define END_MACRO -3
586
isComment(SEXP elt)587 static Rboolean isComment(SEXP elt)
588 {
589 SEXP a = getAttrib(elt, R_RdTagSymbol);
590 return isString(a) && LENGTH(a) == 1 &&
591 !strcmp(CHAR(STRING_ELT(a, 0)), "COMMENT");
592 }
593
xxusermacro(SEXP macro,SEXP args,YYLTYPE * lloc)594 static SEXP xxusermacro(SEXP macro, SEXP args, YYLTYPE *lloc)
595 {
596 SEXP ans, value, nextarg;
597 int i,len;
598 const char *c, *start ;
599
600 #if DEBUGVALS
601 Rprintf("xxusermacro(macro=%p, args=%p)", macro, args);
602 #endif
603 len = length(args)-1;
604 PRESERVE_SV(ans = allocVector(STRSXP, len + 1));
605 value = UserMacroLookup(CHAR(STRING_ELT(macro,0)));
606 if (TYPEOF(value) == STRSXP)
607 SET_STRING_ELT(ans, 0, STRING_ELT(value, 0));
608 else
609 error(_("No macro definition for '%s'."), CHAR(STRING_ELT(macro,0)));
610
611 for (i = 0, nextarg=args; i < len; i++, nextarg = CDR(nextarg)) {
612 if (isNull(CDR(CADR(nextarg)))) {
613 /* This happens for an empty argument {} and for invocation
614 of a macro with zero parameters. In that case, the ""
615 element of ans is not needed but does no harm. */
616 SET_STRING_ELT(ans, i+1, mkChar(""));
617 continue;
618 }
619 if (isNull(CDR(CDR(CADR(nextarg))))) {
620 /* The common case: argument without newline nor comment.
621 (when the length is 1, there can be no comment) */
622 SEXP s = CADR(CADR(nextarg));
623 if (TYPEOF(s) == STRSXP && LENGTH(s) == 1)
624 SET_STRING_ELT(ans, i+1, STRING_ELT(s, 0));
625 else
626 error("internal error: invalid argument to xxusermacro");
627 continue;
628 }
629
630 /* An argument with a newline or comment or both. Exclude comments and
631 concatenate VERBs from different lines (newline characters are
632 in the VERBs already. */
633 const void *vmax = vmaxget();
634 SEXP si;
635 size_t ilen = 0;
636 for(si = CDR(CADR(nextarg)); si != R_NilValue; si = CDR(si)) {
637 SEXP stri = CAR(si);
638 if (TYPEOF(stri) == STRSXP && LENGTH(stri) == 1) {
639 if (!isComment(stri))
640 ilen += LENGTH(STRING_ELT(stri, 0));
641 } else
642 error("internal error: invalid argument to xxusermacro");
643 }
644
645 char *str = (char *)R_alloc(ilen + 1, sizeof(char));
646 size_t offset = 0;
647 for(si = CDR(CADR(nextarg)); si != R_NilValue; si = CDR(si)) {
648 SEXP stri = CAR(si);
649 if (!isComment(stri)) {
650 int nc = LENGTH(STRING_ELT(stri, 0));
651 memcpy(str + offset, CHAR(STRING_ELT(stri, 0)), nc);
652 offset += nc;
653 }
654 }
655 str[offset] = '\0';
656 SET_STRING_ELT(ans, i+1, mkChar(str));
657 vmaxset(vmax);
658 }
659 RELEASE_SV(args);
660
661 /* Now push the expanded macro onto the input stream, in reverse order */
662 xxungetc(END_MACRO);
663 start = CHAR(STRING_ELT(ans, 0));
664 for (c = start + strlen(start); c > start; c--) {
665 if (c > start + 1 && *(c-2) == '#' && isdigit(*(c-1))) {
666 int which = *(c-1) - '0';
667 if (which >= len + 1)
668 /* currently this won't happen, because the parser gets
669 confused whenever there is invalid number of {} arguments
670 to a user macro */
671 error(_("Not enough arguments passed to user macro '%s'"),
672 CHAR(STRING_ELT(macro,0)));
673 const char *arg = CHAR(STRING_ELT(ans, which));
674 for (size_t ii = strlen(arg); ii > 0; ii--) xxungetc(arg[ii-1]);
675 c--;
676 } else
677 xxungetc(*(c-1));
678 }
679 xxungetc(START_MACRO);
680
681 setAttrib(ans, R_RdTagSymbol, mkString("USERMACRO"));
682 setAttrib(ans, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
683 setAttrib(ans, R_MacroSymbol, macro);
684 RELEASE_SV(macro);
685 #if DEBUGVALS
686 Rprintf(" result: %p\n", ans);
687 #endif
688 return ans;
689 }
690
xxOptionmarkup(SEXP header,SEXP option,SEXP body,int flag,YYLTYPE * lloc)691 static SEXP xxOptionmarkup(SEXP header, SEXP option, SEXP body, int flag, YYLTYPE *lloc)
692 {
693 SEXP ans;
694 #if DEBUGVALS
695 Rprintf("xxOptionmarkup(header=%p, option=%p, body=%p)", header, option, body);
696 #endif
697 flag |= getDynamicFlag(body);
698 PRESERVE_SV(ans = PairToVectorList(CDR(body)));
699 RELEASE_SV(body);
700 setAttrib(ans, R_RdTagSymbol, header);
701 RELEASE_SV(header);
702 flag |= getDynamicFlag(option);
703 setAttrib(ans, R_RdOptionSymbol, option);
704 RELEASE_SV(option);
705 setAttrib(ans, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
706 setDynamicFlag(ans, flag);
707 #if DEBUGVALS
708 Rprintf(" result: %p\n", ans);
709 #endif
710 return ans;
711 }
712
xxmarkup2(SEXP header,SEXP body1,SEXP body2,int argcount,int flag,YYLTYPE * lloc)713 static SEXP xxmarkup2(SEXP header, SEXP body1, SEXP body2, int argcount, int flag, YYLTYPE *lloc)
714 {
715 SEXP ans;
716 #if DEBUGVALS
717 Rprintf("xxmarkup2(header=%p, body1=%p, body2=%p)", header, body1, body2);
718 #endif
719
720 PRESERVE_SV(ans = allocVector(VECSXP, argcount));
721 if (!isNull(body1)) {
722 int flag1 = getDynamicFlag(body1);
723 SET_VECTOR_ELT(ans, 0, PairToVectorList(CDR(body1)));
724 RELEASE_SV(body1);
725 setDynamicFlag(VECTOR_ELT(ans, 0), flag1);
726 flag |= flag1;
727 }
728 if (!isNull(body2)) {
729 int flag2;
730 if (argcount < 2) error("internal error: inconsistent argument count");
731 flag2 = getDynamicFlag(body2);
732 SET_VECTOR_ELT(ans, 1, PairToVectorList(CDR(body2)));
733 RELEASE_SV(body2);
734 setDynamicFlag(VECTOR_ELT(ans, 1), flag2);
735 flag |= flag2;
736 }
737 setAttrib(ans, R_RdTagSymbol, header);
738 RELEASE_SV(header);
739 setAttrib(ans, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
740 setDynamicFlag(ans, flag);
741 #if DEBUGVALS
742 Rprintf(" result: %p\n", ans);
743 #endif
744 return ans;
745 }
746
xxmarkup3(SEXP header,SEXP body1,SEXP body2,SEXP body3,int flag,YYLTYPE * lloc)747 static SEXP xxmarkup3(SEXP header, SEXP body1, SEXP body2, SEXP body3, int flag, YYLTYPE *lloc)
748 {
749 SEXP ans;
750 #if DEBUGVALS
751 Rprintf("xxmarkup2(header=%p, body1=%p, body2=%p, body3=%p)", header, body1, body2, body3);
752 #endif
753
754 PRESERVE_SV(ans = allocVector(VECSXP, 3));
755 if (!isNull(body1)) {
756 int flag1 = getDynamicFlag(body1);
757 SET_VECTOR_ELT(ans, 0, PairToVectorList(CDR(body1)));
758 RELEASE_SV(body1);
759 setDynamicFlag(VECTOR_ELT(ans, 0), flag1);
760 flag |= flag1;
761 }
762 if (!isNull(body2)) {
763 int flag2;
764 flag2 = getDynamicFlag(body2);
765 SET_VECTOR_ELT(ans, 1, PairToVectorList(CDR(body2)));
766 RELEASE_SV(body2);
767 setDynamicFlag(VECTOR_ELT(ans, 1), flag2);
768 flag |= flag2;
769 }
770 if (!isNull(body3)) {
771 int flag3;
772 flag3 = getDynamicFlag(body3);
773 SET_VECTOR_ELT(ans, 2, PairToVectorList(CDR(body3)));
774 RELEASE_SV(body3);
775 setDynamicFlag(VECTOR_ELT(ans, 2), flag3);
776 flag |= flag3;
777 }
778 setAttrib(ans, R_RdTagSymbol, header);
779 RELEASE_SV(header);
780 setAttrib(ans, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
781 setDynamicFlag(ans, flag);
782 #if DEBUGVALS
783 Rprintf(" result: %p\n", ans);
784 #endif
785 return ans;
786 }
787
xxsavevalue(SEXP Rd,YYLTYPE * lloc)788 static void xxsavevalue(SEXP Rd, YYLTYPE *lloc)
789 {
790 int flag = getDynamicFlag(Rd);
791 PRESERVE_SV(parseState.Value = PairToVectorList(CDR(Rd)));
792 if (!isNull(parseState.Value)) {
793 setAttrib(parseState.Value, R_ClassSymbol, mkString("Rd"));
794 setAttrib(parseState.Value, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
795 setDynamicFlag(parseState.Value, flag);
796 }
797 RELEASE_SV(Rd);
798 }
799
xxtag(SEXP item,int type,YYLTYPE * lloc)800 static SEXP xxtag(SEXP item, int type, YYLTYPE *lloc)
801 {
802 setAttrib(item, R_RdTagSymbol, mkString(yytname[YYTRANSLATE(type)]));
803 setAttrib(item, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
804 return item;
805 }
806
xxWarnNewline()807 static void xxWarnNewline()
808 {
809 if (parseState.xxNewlineInString) {
810 if(wCalls)
811 warning(_("newline within quoted string at %s:%d"),
812 parseState.xxBasename, parseState.xxNewlineInString);
813 else
814 warningcall(R_NilValue,
815 _("newline within quoted string at %s:%d"),
816 parseState.xxBasename, parseState.xxNewlineInString);
817 }
818 }
819
820
821 /*----------------------------------------------------------------------------*/
822
823
824 static int (*ptr_getc)(void);
825
826 /* Private pushback, since file ungetc only guarantees one byte.
827 We need arbitrarily large size, since this is how macros are expanded. */
828
829 #define PUSH_BACK(c) do { \
830 if (npush >= pushsize - 1) { \
831 int *old = pushbase; \
832 pushsize *= 2; \
833 pushbase = malloc(pushsize*sizeof(int)); \
834 if(!pushbase) error(_("unable to allocate buffer for long macro at line %d"), parseState.xxlineno);\
835 memmove(pushbase, old, npush*sizeof(int)); \
836 if(old != pushback) free(old); } \
837 pushbase[npush++] = (c); \
838 } while(0)
839
840
841
842 #define PUSHBACK_BUFSIZE 32
843
844 static int pushback[PUSHBACK_BUFSIZE];
845 static int *pushbase;
846 static unsigned int npush, pushsize;
847 static int macrolevel;
848 static int prevpos = 0;
849 static int prevlines[PUSHBACK_BUFSIZE];
850 static int prevcols[PUSHBACK_BUFSIZE];
851 static int prevbytes[PUSHBACK_BUFSIZE];
852
853
xxgetc(void)854 static int xxgetc(void)
855 {
856 int c, oldpos;
857
858 do {
859 if(npush) {
860 c = pushbase[--npush];
861 if (c == START_MACRO) {
862 macrolevel++;
863 if (macrolevel > 1000)
864 error(_("macros nested too deeply: infinite recursion?"));
865 } else if (c == END_MACRO) macrolevel--;
866 } else c = ptr_getc();
867 } while (c == START_MACRO || c == END_MACRO);
868
869 if (!macrolevel) {
870 oldpos = prevpos;
871 prevpos = (prevpos + 1) % PUSHBACK_BUFSIZE;
872 prevbytes[prevpos] = parseState.xxbyteno;
873 prevlines[prevpos] = parseState.xxlineno;
874 /* We only advance the column for the 1st byte in UTF-8, so handle later bytes specially */
875 if (0x80 <= (unsigned char)c && (unsigned char)c <= 0xBF) {
876 parseState.xxcolno--;
877 prevcols[prevpos] = prevcols[oldpos];
878 } else
879 prevcols[prevpos] = parseState.xxcolno;
880
881 if (c == EOF) return R_EOF;
882
883 R_ParseContextLast = (R_ParseContextLast + 1) % PARSE_CONTEXT_SIZE;
884 R_ParseContext[R_ParseContextLast] = (char) c;
885
886 if (c == '\n') {
887 parseState.xxlineno += 1;
888 parseState.xxcolno = 1;
889 parseState.xxbyteno = 1;
890 } else {
891 parseState.xxcolno++;
892 parseState.xxbyteno++;
893 }
894
895 if (c == '\t') parseState.xxcolno = ((parseState.xxcolno + 6) & ~7) + 1;
896
897 R_ParseContextLine = parseState.xxlineno;
898 }
899 /* Rprintf("get %c\n", c); */
900 return c;
901 }
902
xxungetc(int c)903 static int xxungetc(int c)
904 {
905 /* this assumes that c was the result of xxgetc; if not, some edits will be needed */
906 if (c == END_MACRO) macrolevel++;
907 if (!macrolevel) {
908 parseState.xxlineno = prevlines[prevpos];
909 parseState.xxbyteno = prevbytes[prevpos];
910 parseState.xxcolno = prevcols[prevpos];
911 prevpos = (prevpos + PUSHBACK_BUFSIZE - 1) % PUSHBACK_BUFSIZE;
912
913 R_ParseContextLine = parseState.xxlineno;
914
915 R_ParseContext[R_ParseContextLast] = '\0';
916 /* macOS requires us to keep this non-negative */
917 R_ParseContextLast = (R_ParseContextLast + PARSE_CONTEXT_SIZE - 1)
918 % PARSE_CONTEXT_SIZE;
919 }
920 if (c == START_MACRO) macrolevel--;
921 PUSH_BACK(c);
922 /* Rprintf("unget %c;", c); */
923 return c;
924 }
925
makeSrcref(YYLTYPE * lloc,SEXP srcfile)926 static SEXP makeSrcref(YYLTYPE *lloc, SEXP srcfile)
927 {
928 SEXP val;
929
930 PROTECT(val = allocVector(INTSXP, 6));
931 INTEGER(val)[0] = lloc->first_line;
932 INTEGER(val)[1] = lloc->first_byte;
933 INTEGER(val)[2] = lloc->last_line;
934 INTEGER(val)[3] = lloc->last_byte;
935 INTEGER(val)[4] = lloc->first_column;
936 INTEGER(val)[5] = lloc->last_column;
937 setAttrib(val, R_SrcfileSymbol, srcfile);
938 setAttrib(val, R_ClassSymbol, mkString("srcref"));
939 UNPROTECT(1); /* val */
940 return val;
941 }
942
mkString2(const char * s,size_t len)943 static SEXP mkString2(const char *s, size_t len)
944 {
945 SEXP t;
946 cetype_t enc = CE_UTF8;
947
948 PROTECT(t = allocVector(STRSXP, 1));
949 SET_STRING_ELT(t, 0, mkCharLenCE(s, (int) len, enc));
950 UNPROTECT(1); /* t */
951 return t;
952 }
953
954
955 /* Stretchy List Structures : Lists are created and grown using a special */
956 /* dotted pair. The CAR of the list points to the last cons-cell in the */
957 /* list and the CDR points to the first. The list can be extracted from */
958 /* the pair by taking its CDR, while the CAR gives fast access to the end */
959 /* of the list. */
960
961
962 /* Create a stretchy-list dotted pair */
963
NewList(void)964 static SEXP NewList(void)
965 {
966 SEXP s = CONS(R_NilValue, R_NilValue);
967 SETCAR(s, s);
968 return s;
969 }
970
971 /* Add a new element at the end of a stretchy list */
972
GrowList(SEXP l,SEXP s)973 static void GrowList(SEXP l, SEXP s)
974 {
975 SEXP tmp;
976 tmp = CONS(s, R_NilValue);
977 SETCDR(CAR(l), tmp);
978 SETCAR(l, tmp);
979 }
980
981 /*--------------------------------------------------------------------------*/
982
InitSymbols(void)983 static void InitSymbols(void)
984 {
985 if (!R_RdTagSymbol)
986 R_RdTagSymbol = install("Rd_tag");
987 if (!R_RdOptionSymbol)
988 R_RdOptionSymbol = install("Rd_option");
989 if (!R_DefinitionSymbol)
990 R_DefinitionSymbol = install("definition");
991 if (!R_DynamicFlagSymbol)
992 R_DynamicFlagSymbol = install("dynamicFlag");
993 if (!R_MacroSymbol)
994 R_MacroSymbol = install("macro");
995 }
996
ParseRd(ParseStatus * status,SEXP srcfile,Rboolean fragment,SEXP macros)997 static SEXP ParseRd(ParseStatus *status, SEXP srcfile, Rboolean fragment, SEXP macros)
998 {
999 Rboolean keepmacros = !isLogical(macros) || asLogical(macros);
1000
1001 InitSymbols();
1002 R_ParseContextLast = 0;
1003 R_ParseContext[0] = '\0';
1004
1005 parseState.xxlineno = 1;
1006 parseState.xxcolno = 1;
1007 parseState.xxbyteno = 1;
1008
1009 SrcFile = srcfile;
1010
1011 npush = 0;
1012 pushbase = pushback;
1013 pushsize = PUSHBACK_BUFSIZE;
1014 macrolevel = 0;
1015
1016 parseState.xxmode = LATEXLIKE;
1017 parseState.xxitemType = UNKNOWN;
1018 parseState.xxbraceDepth = 0;
1019 parseState.xxinRString = 0;
1020 parseState.xxNewlineInString = 0;
1021 parseState.xxinEqn = 0;
1022 if (fragment) parseState.xxinitvalue = STARTFRAGMENT;
1023 else parseState.xxinitvalue = STARTFILE;
1024
1025 if (!isEnvironment(macros))
1026 macros = InstallKeywords();
1027
1028 PROTECT(macros);
1029 PROTECT(parseState.xxMacroList = R_NewHashedEnv(macros, ScalarInteger(0)));
1030 PROTECT(parseState.mset = R_NewPreciousMSet(50));
1031
1032 parseState.Value = R_NilValue;
1033
1034 if (yyparse()) *status = PARSE_ERROR;
1035 else *status = PARSE_OK;
1036
1037 if (keepmacros && !isNull(parseState.Value))
1038 setAttrib(parseState.Value, install("macros"), parseState.xxMacroList);
1039
1040 #if DEBUGVALS
1041 Rprintf("ParseRd result: %p\n", parseState.Value);
1042 #endif
1043 RELEASE_SV(parseState.Value);
1044 UNPROTECT(3); /* macros, parseState.xxMacroList, parseState.mset */
1045
1046 if (pushbase != pushback) free(pushbase);
1047
1048 return parseState.Value;
1049 }
1050
1051 #include "Rconnections.h"
1052 static Rconnection con_parse;
1053
1054 /* need to handle incomplete last line */
con_getc(void)1055 static int con_getc(void)
1056 {
1057 int c;
1058 static int last=-1000;
1059
1060 c = Rconn_fgetc(con_parse);
1061 if (c == EOF && last != '\n') c = '\n';
1062 return (last = c);
1063 }
1064
1065 static
R_ParseRd(Rconnection con,ParseStatus * status,SEXP srcfile,Rboolean fragment,SEXP macros)1066 SEXP R_ParseRd(Rconnection con, ParseStatus *status, SEXP srcfile, Rboolean fragment, SEXP macros)
1067 {
1068 con_parse = con;
1069 ptr_getc = con_getc;
1070 return ParseRd(status, srcfile, fragment, macros);
1071 }
1072
1073 /*----------------------------------------------------------------------------
1074 *
1075 * The Lexical Analyzer:
1076 *
1077 * Basic lexical analysis is performed by the following
1078 * routines.
1079 *
1080 * The function yylex() scans the input, breaking it into
1081 * tokens which are then passed to the parser.
1082 *
1083 */
1084
1085
1086 /* Special Symbols */
1087 /* Section and R code headers */
1088
1089 struct {
1090 char *name;
1091 int token;
1092 }
1093
1094 /* When adding keywords here, make sure all the handlers
1095 are also modified: checkRd, Rd2HTML, Rd2latex, Rd2txt, any other new ones... */
1096
1097 static keywords[] = {
1098 /* These sections contain Latex-like text */
1099
1100 { "\\author", SECTIONHEADER },
1101 { "\\concept", SECTIONHEADER },
1102 { "\\description",SECTIONHEADER },
1103 { "\\details", SECTIONHEADER },
1104 { "\\docType", SECTIONHEADER },
1105
1106 { "\\encoding",SECTIONHEADER },
1107 { "\\format", SECTIONHEADER },
1108 { "\\keyword", SECTIONHEADER },
1109 { "\\note", SECTIONHEADER },
1110 { "\\references", SECTIONHEADER },
1111
1112 { "\\section", SECTIONHEADER2 },
1113 { "\\seealso", SECTIONHEADER },
1114 { "\\source", SECTIONHEADER },
1115 { "\\title", SECTIONHEADER },
1116
1117 /* These sections contain R-like text */
1118
1119 { "\\examples",RSECTIONHEADER },
1120 { "\\usage", RSECTIONHEADER },
1121
1122 /* These sections contain verbatim text */
1123
1124 { "\\alias", VSECTIONHEADER },
1125 { "\\name", VSECTIONHEADER },
1126 { "\\synopsis",VSECTIONHEADER },
1127 { "\\Rdversion",VSECTIONHEADER },
1128
1129 /* These macros take no arguments. One character non-alpha escapes get the
1130 same token value */
1131
1132 { "\\cr", ESCAPE },
1133 { "\\dots", ESCAPE },
1134 { "\\ldots", ESCAPE },
1135 { "\\R", ESCAPE },
1136 { "\\tab", ESCAPE },
1137
1138 /* These macros take one LaTeX-like argument. */
1139
1140 { "\\acronym", LATEXMACRO },
1141 { "\\bold", LATEXMACRO },
1142 { "\\cite", LATEXMACRO },
1143 { "\\command", LATEXMACRO },
1144 { "\\dfn", LATEXMACRO },
1145 { "\\dQuote", LATEXMACRO },
1146 { "\\email", LATEXMACRO },
1147
1148 { "\\emph", LATEXMACRO },
1149 { "\\file", LATEXMACRO },
1150 { "\\linkS4class", LATEXMACRO },
1151 { "\\pkg", LATEXMACRO },
1152 { "\\sQuote", LATEXMACRO },
1153
1154 { "\\strong", LATEXMACRO },
1155
1156 { "\\var", LATEXMACRO },
1157
1158 /* These are like SECTIONHEADER/LATEXMACRO, but they change the interpretation of \item */
1159
1160 { "\\arguments",LISTSECTION },
1161 { "\\value", LISTSECTION },
1162
1163 { "\\describe",DESCRIPTION },
1164 { "\\enumerate",ITEMIZE },
1165 { "\\itemize", ITEMIZE },
1166
1167 { "\\item", NOITEM }, /* will change to UNKNOWN, ESCAPE, or LATEXMACRO2 depending on context */
1168
1169 /* These macros take two LaTeX-like arguments. */
1170
1171 { "\\enc", LATEXMACRO2 },
1172 { "\\if", LATEXMACRO2 },
1173 { "\\method", LATEXMACRO2 },
1174 { "\\S3method",LATEXMACRO2 },
1175 { "\\S4method",LATEXMACRO2 },
1176 { "\\tabular", LATEXMACRO2 },
1177 { "\\subsection", LATEXMACRO2 },
1178
1179 /* This macro takes one verbatim and one LaTeX-like argument. */
1180
1181 { "\\href", VERBLATEX },
1182
1183 /* This macro takes three LaTeX-like arguments. */
1184
1185 { "\\ifelse", LATEXMACRO3 },
1186
1187 /* These macros take one optional bracketed option and always take
1188 one LaTeX-like argument */
1189
1190 { "\\link", OPTMACRO },
1191
1192 /* These markup macros require an R-like text argument */
1193
1194 { "\\code", RCODEMACRO },
1195 { "\\dontshow",RCODEMACRO },
1196 { "\\donttest",RCODEMACRO },
1197 { "\\testonly",RCODEMACRO },
1198
1199 /* This macro takes one optional bracketed option and one R-like argument */
1200
1201 { "\\Sexpr", SEXPR },
1202
1203 /* This is just like a VSECTIONHEADER, but it needs SEXPR processing */
1204
1205 { "\\RdOpts", RDOPTS },
1206
1207 /* These macros take one verbatim arg and ignore everything except braces */
1208
1209 { "\\dontrun", VERBMACRO }, /* at least for now */
1210 { "\\env", VERBMACRO },
1211 { "\\kbd", VERBMACRO },
1212 { "\\option", VERBMACRO },
1213 { "\\out", VERBMACRO },
1214 { "\\preformatted", VERBMACRO },
1215
1216 { "\\samp", VERBMACRO },
1217 { "\\special", RCODEMACRO },
1218 { "\\url", VERBMACRO },
1219 { "\\verb", VERBMACRO },
1220
1221 /* These ones take one or two verbatim args */
1222
1223 { "\\eqn", VERBMACRO2 },
1224 { "\\deqn", VERBMACRO2 },
1225 { "\\figure", VERBMACRO2 },
1226
1227 /* We parse IFDEF/IFNDEF as markup, not as a separate preprocessor step */
1228
1229 { "#ifdef", IFDEF },
1230 { "#ifndef", IFDEF },
1231 { "#endif", ENDIF },
1232
1233 /* These allow user defined macros */
1234 { "\\newcommand", NEWCOMMAND },
1235 { "\\renewcommand", NEWCOMMAND },
1236
1237 { 0, 0 }
1238 /* All other markup macros are rejected. */
1239 };
1240
1241 /* Record the longest # directive here */
1242 #define DIRECTIVE_LEN 7
1243
InstallKeywords()1244 static SEXP InstallKeywords()
1245 {
1246 int i, num;
1247 SEXP result, name, val;
1248 num = sizeof(keywords)/sizeof(keywords[0]);
1249 PROTECT(result = R_NewHashedEnv(R_EmptyEnv, ScalarInteger(num)));
1250 for (i = 0; keywords[i].name; i++) {
1251 name = install(keywords[i].name);
1252 PROTECT(val = ScalarInteger(keywords[i].token));
1253 defineVar(name, val, result);
1254 UNPROTECT(1); /* val */
1255 }
1256 UNPROTECT(1); /* result */
1257 return result;
1258 }
1259
KeywordLookup(const char * s)1260 static int KeywordLookup(const char *s)
1261 {
1262 SEXP rec = findVar(install(s), parseState.xxMacroList);
1263 if (rec == R_UnboundValue) return UNKNOWN;
1264 else return INTEGER(rec)[0];
1265 }
1266
UserMacroLookup(const char * s)1267 static SEXP UserMacroLookup(const char *s)
1268 {
1269 SEXP rec = findVar(install(s), parseState.xxMacroList);
1270 if (rec == R_UnboundValue) error(_("Unable to find macro %s"), s);
1271 PROTECT(rec);
1272 SEXP res = getAttrib(rec, R_DefinitionSymbol);
1273 UNPROTECT(1); /* rec */
1274 return res;
1275 }
1276
yyerror(const char * s)1277 static void yyerror(const char *s)
1278 {
1279 static const char *const yytname_translations[] =
1280 {
1281 /* the left column are strings coming from bison, the right
1282 column are translations for users.
1283 The first YYENGLISH from the right column are English to be translated,
1284 the rest are to be copied literally. The #if 0 block below allows xgettext
1285 to see these.
1286 */
1287 #define YYENGLISH 17
1288 "$undefined", "input",
1289 "SECTIONHEADER","section header",
1290 "RSECTIONHEADER","section header",
1291 "VSECTIONHEADER","section header",
1292 "LISTSECTION", "section header",
1293
1294 "LATEXMACRO", "macro",
1295 "LATEXMACRO2", "macro",
1296 "LATEXMACRO3", "macro",
1297 "RCODEMACRO", "macro",
1298 "VERBMACRO", "macro",
1299 "VERBMACRO2", "macro",
1300
1301 "ESCAPE", "macro",
1302 "ITEMIZE", "macro",
1303 "IFDEF", "conditional",
1304 "SECTIONHEADER2","section header",
1305 "OPTMACRO", "macro",
1306
1307 "DESCRIPTION", "macro",
1308 "VERB", "VERBATIM TEXT",
1309 0, 0
1310 };
1311 static char const yyunexpected[] = "syntax error, unexpected ";
1312 static char const yyexpecting[] = ", expecting ";
1313 static char const yyshortunexpected[] = "unexpected %s";
1314 static char const yylongunexpected[] = "unexpected %s '%s'";
1315 char *expecting;
1316 char ParseErrorMsg[PARSE_ERROR_SIZE];
1317 SEXP filename;
1318 char ParseErrorFilename[PARSE_ERROR_SIZE];
1319
1320 xxWarnNewline(); /* post newline warning if necessary */
1321
1322 /*
1323 R_ParseError = yylloc.first_line;
1324 R_ParseErrorCol = yylloc.first_column;
1325 R_ParseErrorFile = SrcFile;
1326 */
1327
1328 if (!strncmp(s, yyunexpected, sizeof yyunexpected -1)) {
1329 int i, translated = FALSE;
1330 /* Edit the error message */
1331 expecting = strstr(s + sizeof yyunexpected -1, yyexpecting);
1332 if (expecting) *expecting = '\0';
1333 for (i = 0; yytname_translations[i]; i += 2) {
1334 if (!strcmp(s + sizeof yyunexpected - 1, yytname_translations[i])) {
1335 if (yychar < 256)
1336 snprintf(ParseErrorMsg, PARSE_ERROR_SIZE,
1337 _(yyshortunexpected),
1338 i/2 < YYENGLISH ? _(yytname_translations[i+1])
1339 : yytname_translations[i+1]);
1340 else
1341 snprintf(ParseErrorMsg, PARSE_ERROR_SIZE,
1342 _(yylongunexpected),
1343 i/2 < YYENGLISH ? _(yytname_translations[i+1])
1344 : yytname_translations[i+1],
1345 CHAR(STRING_ELT(yylval, 0)));
1346 translated = TRUE;
1347 break;
1348 }
1349 }
1350 if (!translated) {
1351 if (yychar < 256)
1352 snprintf(ParseErrorMsg, PARSE_ERROR_SIZE, _(yyshortunexpected),
1353 s + sizeof yyunexpected - 1);
1354 else
1355 snprintf(ParseErrorMsg, PARSE_ERROR_SIZE, _(yylongunexpected),
1356 s + sizeof yyunexpected - 1, CHAR(STRING_ELT(yylval, 0)));
1357 }
1358 if (expecting) {
1359 translated = FALSE;
1360 for (i = 0; yytname_translations[i]; i += 2) {
1361 if (!strcmp(expecting + sizeof yyexpecting - 1, yytname_translations[i])) {
1362 strcat(ParseErrorMsg, _(yyexpecting));
1363 strcat(ParseErrorMsg, i/2 < YYENGLISH ? _(yytname_translations[i+1])
1364 : yytname_translations[i+1]);
1365 translated = TRUE;
1366 break;
1367 }
1368 }
1369 if (!translated) {
1370 strcat(ParseErrorMsg, _(yyexpecting));
1371 strcat(ParseErrorMsg, expecting + sizeof yyexpecting - 1);
1372 }
1373 }
1374 } else if (!strncmp(s, yyunknown, sizeof yyunknown-1)) {
1375 snprintf(ParseErrorMsg, PARSE_ERROR_SIZE,
1376 "%s '%s'", s, CHAR(STRING_ELT(yylval, 0)));
1377 } else {
1378 snprintf(ParseErrorMsg, PARSE_ERROR_SIZE, "%s", s);
1379 }
1380 filename = findVar(install("filename"), SrcFile);
1381 if (isString(filename) && LENGTH(filename))
1382 strncpy(ParseErrorFilename, CHAR(STRING_ELT(filename, 0)), PARSE_ERROR_SIZE - 1);
1383 else
1384 ParseErrorFilename[0] = '\0';
1385 if (wCalls) {
1386 if (yylloc.first_line != yylloc.last_line)
1387 warning("%s:%d-%d: %s",
1388 ParseErrorFilename, yylloc.first_line, yylloc.last_line, ParseErrorMsg);
1389 else
1390 warning("%s:%d: %s",
1391 ParseErrorFilename, yylloc.first_line, ParseErrorMsg);
1392 } else {
1393 if (yylloc.first_line != yylloc.last_line)
1394 warningcall(R_NilValue, "%s:%d-%d: %s",
1395 ParseErrorFilename, yylloc.first_line, yylloc.last_line, ParseErrorMsg);
1396 else
1397 warningcall(R_NilValue, "%s:%d: %s",
1398 ParseErrorFilename, yylloc.first_line, ParseErrorMsg);
1399 }
1400 }
1401
1402 #define TEXT_PUSH(c) do { \
1403 size_t nc = bp - stext; \
1404 if (nc >= nstext - 1) { \
1405 char *old = stext; \
1406 nstext *= 2; \
1407 stext = malloc(nstext); \
1408 if(!stext) error(_("unable to allocate buffer for long string at line %d"), parseState.xxlineno);\
1409 memmove(stext, old, nc); \
1410 if(old != st0) free(old); \
1411 bp = stext+nc; } \
1412 *bp++ = ((char) c); \
1413 } while(0)
1414
setfirstloc(void)1415 static void setfirstloc(void)
1416 {
1417 yylloc.first_line = parseState.xxlineno;
1418 yylloc.first_column = parseState.xxcolno;
1419 yylloc.first_byte = parseState.xxbyteno;
1420 }
1421
setlastloc(void)1422 static void setlastloc(void)
1423 {
1424 yylloc.last_line = prevlines[prevpos];
1425 yylloc.last_column = prevcols[prevpos];
1426 yylloc.last_byte = prevbytes[prevpos];
1427 }
1428
1429 /* Split the input stream into tokens. */
1430 /* This is the lowest of the parsing levels. */
1431
token(void)1432 static int token(void)
1433 {
1434 int c, lookahead;
1435 int outsideLiteral = parseState.xxmode == LATEXLIKE || parseState.xxmode == INOPTION || parseState.xxbraceDepth == 0;
1436
1437 if (parseState.xxinitvalue) {
1438 yylloc.first_line = 0;
1439 yylloc.first_column = 0;
1440 yylloc.first_byte = 0;
1441 yylloc.last_line = 0;
1442 yylloc.last_column = 0;
1443 yylloc.last_byte = 0;
1444 PRESERVE_SV(yylval = mkString(""));
1445 c = parseState.xxinitvalue;
1446 parseState.xxinitvalue = 0;
1447 return(c);
1448 }
1449
1450 setfirstloc();
1451 c = xxgetc();
1452
1453 switch (c) {
1454 case '%': if (!parseState.xxinEqn) return mkComment(c);
1455 break;
1456 case '\\':
1457 if (!parseState.xxinEqn) {
1458 lookahead = xxungetc(xxgetc());
1459 if (isalpha(lookahead) && parseState.xxmode != VERBATIM
1460 /* In R strings, only link or var is allowed as markup */
1461 && (lookahead == 'l' || lookahead == 'v' || !parseState.xxinRString))
1462 return mkMarkup(c);
1463 }
1464 break;
1465 case R_EOF:
1466 if (parseState.xxinRString) {
1467 xxWarnNewline();
1468 error(_("Unexpected end of input (in %c quoted string opened at %s:%d:%d)"),
1469 parseState.xxinRString, parseState.xxBasename, parseState.xxQuoteLine, parseState.xxQuoteCol);
1470 }
1471 return END_OF_INPUT;
1472 case '#':
1473 if (!parseState.xxinEqn && yylloc.first_column == 1) return mkIfdef(c);
1474 break;
1475 case LBRACE:
1476 if (!parseState.xxinRString) {
1477 parseState.xxbraceDepth++;
1478 if (outsideLiteral) return c;
1479 }
1480 break;
1481 case RBRACE:
1482 if (!parseState.xxinRString) {
1483 parseState.xxbraceDepth--;
1484 if (outsideLiteral || parseState.xxbraceDepth == 0) return c;
1485 }
1486 break;
1487 case '[':
1488 case ']':
1489 if (parseState.xxmode == INOPTION ) return c;
1490 break;
1491 }
1492
1493 switch (parseState.xxmode) {
1494 case RLIKE: return mkCode(c);
1495 case INOPTION:
1496 case LATEXLIKE: return mkText(c);
1497 case VERBATIM: return mkVerb(c);
1498 }
1499
1500 return ERROR; /* We shouldn't get here. */
1501 }
1502
1503 #define INITBUFSIZE 128
1504
mkText(int c)1505 static int mkText(int c)
1506 {
1507 char st0[INITBUFSIZE];
1508 unsigned int nstext = INITBUFSIZE;
1509 char *stext = st0, *bp = st0, lookahead;
1510
1511 while(1) {
1512 switch (c) {
1513 case '\\':
1514 lookahead = (char) xxgetc();
1515 if (lookahead == LBRACE || lookahead == RBRACE ||
1516 lookahead == '%' || lookahead == '\\') {
1517 c = lookahead;
1518 break;
1519 }
1520 xxungetc(lookahead);
1521 if (isalpha(lookahead)) goto stop;
1522 case ']':
1523 if (parseState.xxmode == INOPTION) goto stop;
1524 break;
1525 case '%':
1526 case LBRACE:
1527 case RBRACE:
1528 case R_EOF:
1529 goto stop;
1530 }
1531 TEXT_PUSH(c);
1532 if (c == '\n') goto stop;
1533 c = xxgetc();
1534 };
1535 stop:
1536 if (c != '\n') xxungetc(c); /* newline causes a break, but we keep it */
1537 PRESERVE_SV(yylval = mkString2(stext, bp - stext));
1538 if(stext != st0) free(stext);
1539 return TEXT;
1540 }
1541
mkComment(int c)1542 static int mkComment(int c)
1543 {
1544 char st0[INITBUFSIZE];
1545 unsigned int nstext = INITBUFSIZE;
1546 char *stext = st0, *bp = st0;
1547
1548 do TEXT_PUSH(c);
1549 while ((c = xxgetc()) != '\n' && c != R_EOF);
1550
1551 xxungetc(c);
1552
1553 PRESERVE_SV(yylval = mkString2(stext, bp - stext));
1554 if(stext != st0) free(stext);
1555 return COMMENT;
1556 }
1557
1558 #define EAT_DASHES(n_var) do { \
1559 for (c = xxgetc(); c == '-'; c = xxgetc()) { \
1560 n_var++; \
1561 TEXT_PUSH(c); \
1562 } \
1563 } while (0)
1564
1565 #define EAT_CHARS_TO_DELIM_OR_EOF(delim) do { \
1566 while (c != delim && c != R_EOF) { \
1567 TEXT_PUSH(c); \
1568 c = xxgetc(); \
1569 } \
1570 } while (0)
1571
closingRawStringDelim(int c)1572 static int closingRawStringDelim(int c)
1573 {
1574 switch(c) {
1575 case '(': return ')';
1576 case '{': return '}';
1577 case '[': return ']';
1578 case '|': return '|';
1579 default: return 0;
1580 }
1581 }
1582
mkCode(int c)1583 static int mkCode(int c)
1584 {
1585 char st0[INITBUFSIZE];
1586 unsigned int nstext = INITBUFSIZE;
1587 char *stext = st0, *bp = st0;
1588
1589 /* Avoid double counting initial braces */
1590 if (c == LBRACE && !parseState.xxinRString) parseState.xxbraceDepth--;
1591 if (c == RBRACE && !parseState.xxinRString) parseState.xxbraceDepth++;
1592
1593 while(1) {
1594 /* handle a raw string */
1595 if (parseState.xxinRString == 0 && (c == 'r' || c == 'R')) {
1596 int lookahead = xxgetc();
1597 if (lookahead == '"' || lookahead == '\'') {
1598 TEXT_PUSH(c);
1599 int quote = lookahead;
1600 parseState.xxinRString = quote;
1601 parseState.xxQuoteLine = parseState.xxlineno;
1602 parseState.xxQuoteCol = parseState.xxcolno;
1603 TEXT_PUSH(quote);
1604 int ndash = 0;
1605 EAT_DASHES(ndash);
1606 int delim = closingRawStringDelim(c);
1607 if (delim != 0) {
1608 int done = FALSE;
1609 do {
1610 EAT_CHARS_TO_DELIM_OR_EOF(delim);
1611 if (c == delim) {
1612 TEXT_PUSH(c);
1613 int nndash = 0;
1614 EAT_DASHES(nndash);
1615 if (nndash == ndash && c == quote)
1616 done = TRUE; // close quote is handled below
1617 }
1618 else done = TRUE; // EOF; move on
1619 } while (! done);
1620 }
1621 }
1622 else xxungetc(lookahead);
1623 }
1624
1625 int escaped = 0;
1626 if (c == '\\') {
1627 int lookahead = xxgetc();
1628 if (lookahead == '\\' || lookahead == '%') {
1629 c = lookahead;
1630 escaped = 1;
1631 } else xxungetc(lookahead);
1632 }
1633 if ((!escaped && c == '%') || c == R_EOF) break;
1634 if (parseState.xxinRString) {
1635 /* This stuff is messy, because there are two levels of escaping:
1636 The Rd escaping and the R code string escaping. */
1637 if (c == '\\') {
1638 int lookahead = xxgetc();
1639 if (lookahead == '\\') { /* This must be the 3rd backslash */
1640 lookahead = xxgetc();
1641 if (lookahead == parseState.xxinRString || lookahead == '\\') {
1642 TEXT_PUSH(c);
1643 c = lookahead;
1644 escaped = 1;
1645 } else {
1646 xxungetc(lookahead); /* put back the 4th char */
1647 xxungetc('\\'); /* and the 3rd */
1648 }
1649 } else if (lookahead == parseState.xxinRString) { /* There could be one or two before this */
1650 TEXT_PUSH(c);
1651 c = lookahead;
1652 escaped = 1;
1653 } else if (!escaped && (lookahead == 'l' || lookahead == 'v')) {
1654 /* assume \link or \var; this breaks vertical tab, but does anyone ever use that? */
1655 xxungetc(lookahead);
1656 break;
1657 } else xxungetc(lookahead);
1658 }
1659 if (!escaped && c == parseState.xxinRString)
1660 parseState.xxinRString = 0;
1661 } else {
1662 if (c == '#') {
1663 do {
1664 int escaped = 0;
1665 TEXT_PUSH(c);
1666 c = xxgetc();
1667 if (c == '\\') {
1668 int lookahead = xxgetc();
1669 if (lookahead == '\\' || lookahead == '%' || lookahead == LBRACE || lookahead == RBRACE) {
1670 c = lookahead;
1671 escaped = 1;
1672 } else xxungetc(lookahead);
1673 }
1674 if (c == LBRACE && !escaped) parseState.xxbraceDepth++;
1675 else if (c == RBRACE && !escaped) parseState.xxbraceDepth--;
1676 } while (c != '\n' && c != R_EOF && parseState.xxbraceDepth > 0);
1677 if (c == RBRACE && !escaped) parseState.xxbraceDepth++; /* avoid double counting */
1678 }
1679 if (c == '\'' || c == '"' || c == '`') {
1680 parseState.xxinRString = c;
1681 parseState.xxQuoteLine = parseState.xxlineno;
1682 parseState.xxQuoteCol = parseState.xxcolno;
1683 } else if (c == '\\' && !escaped) {
1684 int lookahead = xxgetc();
1685 if (lookahead == LBRACE || lookahead == RBRACE) {
1686 c = lookahead;
1687 } else if (isalpha(lookahead)) {
1688 xxungetc(lookahead);
1689 c = '\\';
1690 break;
1691 } else {
1692 TEXT_PUSH('\\');
1693 c = lookahead;
1694 }
1695 } else if (c == LBRACE) {
1696 parseState.xxbraceDepth++;
1697 } else if (c == RBRACE) {
1698 if (parseState.xxbraceDepth == 1) break;
1699 else parseState.xxbraceDepth--;
1700 } else if (c == R_EOF) break;
1701 }
1702 TEXT_PUSH(c);
1703 if (c == '\n') {
1704 if (parseState.xxinRString && !parseState.xxNewlineInString)
1705 parseState.xxNewlineInString = parseState.xxlineno-1;
1706 break;
1707 }
1708 c = xxgetc();
1709 }
1710 if (c != '\n') xxungetc(c);
1711 PRESERVE_SV(yylval = mkString2(stext, bp - stext));
1712 if(stext != st0) free(stext);
1713 return RCODE;
1714 }
1715
mkMarkup(int c)1716 static int mkMarkup(int c)
1717 {
1718 char st0[INITBUFSIZE];
1719 unsigned int nstext = INITBUFSIZE;
1720 char *stext = st0, *bp = st0;
1721 int retval = 0, attempt = 0;
1722
1723 TEXT_PUSH(c);
1724 while (isalnum((c = xxgetc()))) TEXT_PUSH(c);
1725
1726 while (attempt++ < 2) {
1727 /* character escapes are processed as text, not markup */
1728 if (bp == stext+1) {
1729 TEXT_PUSH(c);
1730 TEXT_PUSH('\0');
1731 retval = TEXT;
1732 c = xxgetc();
1733 break;
1734 } else {
1735 TEXT_PUSH('\0');
1736 retval = KeywordLookup(stext);
1737 if (retval == UNKNOWN && attempt == 1) { /* try again, non-digits only */
1738 bp--; /* pop the \0 */
1739 while (isdigit(*(bp-1))) {
1740 xxungetc(c);
1741 c = *(--bp); /* pop the last letter into c */
1742 }
1743 } else {
1744 if (retval == NOITEM)
1745 retval = parseState.xxitemType;
1746 break;
1747 }
1748 }
1749 }
1750 PRESERVE_SV(yylval = mkString2(stext, bp - stext - 1));
1751 if(stext != st0) free(stext);
1752 xxungetc(c);
1753 return retval;
1754 }
1755
mkIfdef(int c)1756 static int mkIfdef(int c)
1757 {
1758 char st0[INITBUFSIZE];
1759 unsigned int nstext = INITBUFSIZE;
1760 char *stext = st0, *bp = st0;
1761 int retval;
1762
1763 TEXT_PUSH(c);
1764 while (isalpha((c = xxgetc())) && bp - stext <= DIRECTIVE_LEN) TEXT_PUSH(c);
1765 TEXT_PUSH('\0');
1766 xxungetc(c);
1767
1768 retval = KeywordLookup(stext);
1769 PRESERVE_SV(yylval = mkString2(stext, bp - stext - 1));
1770
1771 switch (retval) {
1772 case ENDIF: /* eat chars to the end of the line */
1773 do { c = xxgetc(); }
1774 while (c != '\n' && c != R_EOF);
1775 break;
1776 case UNKNOWN:
1777 RELEASE_SV(yylval);
1778 bp--; bp--;
1779 for (; bp > stext; bp--)
1780 xxungetc(*bp);
1781 switch (parseState.xxmode) {
1782 case RLIKE:
1783 retval = mkCode(*bp);
1784 break;
1785 case INOPTION:
1786 case LATEXLIKE:
1787 retval = mkText(*bp);
1788 break;
1789 case VERBATIM:
1790 retval = mkVerb(*bp);
1791 break;
1792 }
1793 break;
1794 }
1795 if(stext != st0) free(stext);
1796 return retval;
1797 }
1798
mkVerb(int c)1799 static int mkVerb(int c)
1800 {
1801 char st0[INITBUFSIZE];
1802 unsigned int nstext = INITBUFSIZE;
1803 char *stext = st0, *bp = st0;
1804
1805 /* Avoid double counting initial braces */
1806 if (c == LBRACE) parseState.xxbraceDepth--;
1807 if (c == RBRACE) parseState.xxbraceDepth++;
1808
1809 while(1) {
1810 int escaped = 0;
1811 if (c == '\\') {
1812 int lookahead = xxgetc();
1813 if (lookahead == '\\' || lookahead == '%' || lookahead == LBRACE || lookahead == RBRACE) {
1814 escaped = 1;
1815 if (parseState.xxinEqn) TEXT_PUSH(c);
1816 c = lookahead;
1817 } else xxungetc(lookahead);
1818 }
1819 if (c == R_EOF) break;
1820 if (!escaped) {
1821 if (c == '%' && !parseState.xxinEqn) break;
1822 else if (c == LBRACE) parseState.xxbraceDepth++;
1823 else if (c == RBRACE) {
1824 if (parseState.xxbraceDepth == 1) break;
1825 else parseState.xxbraceDepth--;
1826 }
1827 }
1828 TEXT_PUSH(c);
1829 if (c == '\n') break;
1830 c = xxgetc();
1831 };
1832 if (c != '\n') xxungetc(c);
1833 PRESERVE_SV(yylval = mkString2(stext, bp - stext));
1834 if(stext != st0) free(stext);
1835 return VERB;
1836 }
1837
yylex(void)1838 static int yylex(void)
1839 {
1840 int tok = token();
1841
1842 if (parseState.xxDebugTokens) {
1843 Rprintf("%d:%d: %s", yylloc.first_line, yylloc.first_column, yytname[YYTRANSLATE(tok)]);
1844 if (parseState.xxinRString) Rprintf("(in %c%c)", parseState.xxinRString, parseState.xxinRString);
1845 if (tok > 255 && tok != END_OF_INPUT)
1846 Rprintf(": %s", CHAR(STRING_ELT(yylval, 0)));
1847 Rprintf("\n");
1848 }
1849 setlastloc();
1850 return tok;
1851 }
1852
con_cleanup(void * data)1853 static void con_cleanup(void *data)
1854 {
1855 Rconnection con = data;
1856 if(con->isopen) con->close(con);
1857 }
1858
PutState(ParseState * state)1859 static void PutState(ParseState *state) {
1860 state->xxinRString = parseState.xxinRString;
1861 state->xxQuoteLine = parseState.xxQuoteLine;
1862 state->xxQuoteCol = parseState.xxQuoteCol;
1863 state->xxinEqn = parseState.xxinEqn;
1864 state->xxNewlineInString = parseState.xxNewlineInString;
1865 state->xxlineno = parseState.xxlineno;
1866 state->xxbyteno = parseState.xxbyteno;
1867 state->xxcolno = parseState.xxcolno;
1868 state->xxmode = parseState.xxmode;
1869 state->xxitemType = parseState.xxitemType;
1870 state->xxbraceDepth = parseState.xxbraceDepth;
1871 state->xxDebugTokens = parseState.xxDebugTokens;
1872 state->xxBasename = parseState.xxBasename;
1873 state->Value = parseState.Value;
1874 state->xxinitvalue = parseState.xxinitvalue;
1875 state->xxMacroList = parseState.xxMacroList;
1876 state->prevState = parseState.prevState;
1877 }
1878
UseState(ParseState * state)1879 static void UseState(ParseState *state) {
1880 parseState.xxinRString = state->xxinRString;
1881 parseState.xxQuoteLine = state->xxQuoteLine;
1882 parseState.xxQuoteCol = state->xxQuoteCol;
1883 parseState.xxinEqn = state->xxinEqn;
1884 parseState.xxNewlineInString = state->xxNewlineInString;
1885 parseState.xxlineno = state->xxlineno;
1886 parseState.xxbyteno = state->xxbyteno;
1887 parseState.xxcolno = state->xxcolno;
1888 parseState.xxmode = state->xxmode;
1889 parseState.xxitemType = state->xxitemType;
1890 parseState.xxbraceDepth = state->xxbraceDepth;
1891 parseState.xxDebugTokens = state->xxDebugTokens;
1892 parseState.xxBasename = state->xxBasename;
1893 parseState.Value = state->Value;
1894 parseState.xxinitvalue = state->xxinitvalue;
1895 parseState.xxMacroList = state->xxMacroList;
1896 parseState.prevState = state->prevState;
1897 }
1898
PushState()1899 static void PushState() {
1900 if (busy) {
1901 ParseState *prev = malloc(sizeof(ParseState));
1902 if (prev == NULL) error("unable to allocate in PushState");
1903 PutState(prev);
1904 parseState.prevState = prev;
1905 } else
1906 parseState.prevState = NULL;
1907 busy = TRUE;
1908 }
1909
PopState()1910 static void PopState() {
1911 if (parseState.prevState) {
1912 ParseState *prev = parseState.prevState;
1913 UseState(prev);
1914 free(prev);
1915 } else
1916 busy = FALSE;
1917 }
1918
1919 /* "do_parseRd"
1920
1921 .External2(C_parseRd,file, srcfile, encoding, verbose, basename, warningCalls, macros, warndups)
1922 If there is text then that is read and the other arguments are ignored.
1923 */
1924
parseRd(SEXP call,SEXP op,SEXP args,SEXP env)1925 SEXP parseRd(SEXP call, SEXP op, SEXP args, SEXP env)
1926 {
1927 args = CDR(args);
1928
1929 SEXP s = R_NilValue, source;
1930 Rconnection con;
1931 Rboolean wasopen, fragment;
1932 int ifile, wcall;
1933 ParseStatus status;
1934 RCNTXT cntxt;
1935 SEXP macros;
1936
1937 #if DEBUGMODE
1938 yydebug = 1;
1939 #endif
1940
1941 R_ParseError = 0;
1942 R_ParseErrorMsg[0] = '\0';
1943
1944 PushState();
1945
1946 ifile = asInteger(CAR(args)); args = CDR(args);
1947
1948 con = getConnection(ifile);
1949 wasopen = con->isopen;
1950 source = CAR(args); args = CDR(args);
1951 /* encoding is unused */
1952 args = CDR(args);
1953 if(!isLogical(CAR(args)) || LENGTH(CAR(args)) != 1)
1954 error(_("invalid '%s' value"), "verbose");
1955 parseState.xxDebugTokens = asInteger(CAR(args)); args = CDR(args);
1956 parseState.xxBasename = CHAR(STRING_ELT(CAR(args), 0)); args = CDR(args);
1957 fragment = asLogical(CAR(args)); args = CDR(args);
1958 wcall = asLogical(CAR(args)); args = CDR(args);
1959 if (wcall == NA_LOGICAL)
1960 error(_("invalid '%s' value"), "warningCalls");
1961 wCalls = wcall;
1962 macros = CAR(args); args = CDR(args);
1963 warnDups = asLogical(CAR(args));
1964
1965 if (ifile >= 3) {/* file != "" */
1966 if(!wasopen) {
1967 if(!con->open(con)) error(_("cannot open the connection"));
1968 /* Set up a context which will close the connection on error */
1969 begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
1970 R_NilValue, R_NilValue);
1971 cntxt.cend = &con_cleanup;
1972 cntxt.cenddata = con;
1973 }
1974 if(!con->canread) error(_("cannot read from this connection"));
1975 s = R_ParseRd(con, &status, source, fragment, macros);
1976 if(!wasopen) endcontext(&cntxt);
1977 PopState();
1978 if (status != PARSE_OK) parseError(call, R_ParseError);
1979 }
1980 else {
1981 PopState();
1982 error(_("invalid Rd file"));
1983 }
1984 return s;
1985 }
1986
1987 /* "do_deparseRd"
1988
1989 .External2(C_deparseRd, element, state)
1990 */
1991
deparseRd(SEXP e,SEXP state)1992 SEXP deparseRd(SEXP e, SEXP state)
1993 {
1994 SEXP result;
1995 int outlen, *statevals, quoteBraces, inRComment;
1996 const char *c;
1997 char *outbuf, *out, lookahead;
1998 Rboolean escape;
1999
2000 if(!isString(e) || LENGTH(e) != 1)
2001 error(_("'deparseRd' only supports deparsing character elements"));
2002 e = STRING_ELT(e, 0);
2003
2004 if(!isInteger(state) || LENGTH(state) != 5) error(_("bad state"));
2005
2006 PushState();
2007
2008 parseState.xxbraceDepth = INTEGER(state)[0];
2009 parseState.xxinRString = INTEGER(state)[1];
2010 parseState.xxmode = INTEGER(state)[2];
2011 parseState.xxinEqn = INTEGER(state)[3];
2012 quoteBraces = INTEGER(state)[4];
2013
2014 if (parseState.xxmode != LATEXLIKE && parseState.xxmode != RLIKE && parseState.xxmode != VERBATIM && parseState.xxmode != COMMENTMODE
2015 && parseState.xxmode != INOPTION && parseState.xxmode != UNKNOWNMODE) {
2016 PopState();
2017 error(_("bad text mode %d in 'deparseRd'"), parseState.xxmode);
2018 }
2019
2020 for (c = CHAR(e), outlen=0; *c; c++) {
2021 outlen++;
2022 /* any special char might be escaped */
2023 if (*c == '{' || *c == '}' || *c == '%' || *c == '\\') outlen++;
2024 }
2025 out = outbuf = R_chk_calloc(outlen+1, sizeof(char));
2026 inRComment = FALSE;
2027 for (c = CHAR(e); *c; c++) {
2028 escape = FALSE;
2029 if (parseState.xxmode != UNKNOWNMODE) {
2030 switch (*c) {
2031 case '\\':
2032 if (parseState.xxmode == RLIKE && parseState.xxinRString) {
2033 lookahead = *(c+1);
2034 if (lookahead == '\\' || lookahead == parseState.xxinRString || lookahead == 'l')
2035 escape = TRUE;
2036 break;
2037 } /* fall through to % case for non-strings... */
2038 case '%':
2039 if (parseState.xxmode != COMMENTMODE && !parseState.xxinEqn)
2040 escape = TRUE;
2041 break;
2042 case LBRACE:
2043 case RBRACE:
2044 if (quoteBraces)
2045 escape = TRUE;
2046 else if (!parseState.xxinRString && !parseState.xxinEqn && (parseState.xxmode == RLIKE || parseState.xxmode == VERBATIM)) {
2047 if (*c == LBRACE) parseState.xxbraceDepth++;
2048 else if (parseState.xxbraceDepth <= 0) escape = TRUE;
2049 else parseState.xxbraceDepth--;
2050 }
2051 break;
2052 case '\'':
2053 case '"':
2054 case '`':
2055 if (parseState.xxmode == RLIKE) {
2056 if (parseState.xxinRString) {
2057 if (parseState.xxinRString == *c) parseState.xxinRString = 0;
2058 } else if (!inRComment) parseState.xxinRString = *c;
2059 }
2060 break;
2061 case '#':
2062 if (parseState.xxmode == RLIKE && !parseState.xxinRString)
2063 inRComment = TRUE;
2064 break;
2065 case '\n':
2066 inRComment = FALSE;
2067 break;
2068 }
2069 }
2070 if (escape)
2071 *out++ = '\\';
2072 *out++ = *c;
2073 }
2074 *out = '\0';
2075 PROTECT(result = allocVector(VECSXP, 2));
2076 SET_VECTOR_ELT(result, 0, ScalarString(mkChar(outbuf)));
2077 SET_VECTOR_ELT(result, 1, duplicate(state));
2078 R_chk_free(outbuf);
2079
2080 statevals = INTEGER( VECTOR_ELT(result, 1) );
2081 statevals[0] = parseState.xxbraceDepth;
2082 statevals[1] = parseState.xxinRString;
2083
2084 PopState();
2085
2086 UNPROTECT(1); /* result */
2087 return result;
2088 }
2089
2090