1 %{
2 /*
3  *  R : A Computer Language for Statistical Data Analysis
4  *  Copyright (C) 1995, 1996, 1997  Robert Gentleman and Ross Ihaka
5  *  Copyright (C) 1997--2019  The R Core Team
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, a copy is available at
19  *  https://www.R-project.org/Licenses/
20  */
21 
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25 
26 #define R_USE_SIGNALS 1
27 #include <Defn.h>
28 #include <Parse.h>
29 #define STRICT_R_HEADERS
30 #include <R_ext/RS.h>           /* for R_chk_* allocation */
31 #include <ctype.h>
32 #include <Rmath.h> /* for imax2(.),..*/
33 #undef _
34 #ifdef ENABLE_NLS
35 #include <libintl.h>
36 #define _(String) dgettext ("tools", String)
37 #else
38 #define _(String) (String)
39 #endif
40 
41 /* bison creates a non-static symbol yylloc (and other) in both gramLatex.o
42    and gramRd.o, so remap */
43 
44 #define yylloc yyllocR
45 #undef yynerrs /* from Defn.h */
46 #define yynerrs yynerrsR
47 #undef yychar /* from Defn.h */
48 #define yychar yycharR
49 #undef yylval /* from Defn.h */
50 #define yylval yylvalR
51 
52 #define DEBUGVALS 0		/* 1 causes detailed internal state output to R console */
53 #define DEBUGMODE 0		/* 1 causes Bison output of parse state, to stdout or stderr */
54 
55 static Rboolean wCalls = TRUE;
56 static Rboolean warnDups = FALSE;
57 
58 #define YYERROR_VERBOSE 1
59 
60 static void yyerror(const char *);
61 static int yylex();
62 static int yyparse(void);
63 
64 #define yyconst const
65 
66 typedef struct yyltype
67 {
68   int first_line;
69   int first_column;
70   int first_byte;
71 
72   int last_line;
73   int last_column;
74   int last_byte;
75 } yyltype;
76 
77 # define YYLTYPE yyltype
78 # define YYLLOC_DEFAULT(Current, Rhs, N)				\
79     do									\
80 	if (N)								\
81 	{								\
82 	  (Current).first_line   = YYRHSLOC (Rhs, 1).first_line;	\
83 	  (Current).first_column = YYRHSLOC (Rhs, 1).first_column;	\
84 	  (Current).first_byte   = YYRHSLOC (Rhs, 1).first_byte;	\
85 	  (Current).last_line    = YYRHSLOC (Rhs, N).last_line;		\
86 	  (Current).last_column  = YYRHSLOC (Rhs, N).last_column;	\
87 	  (Current).last_byte    = YYRHSLOC (Rhs, N).last_byte;		\
88 	}								\
89       else								\
90 	{								\
91 	  (Current).first_line   = (Current).last_line   =		\
92 	    YYRHSLOC (Rhs, 0).last_line;				\
93 	  (Current).first_column = (Current).last_column =		\
94 	    YYRHSLOC (Rhs, 0).last_column;				\
95 	  (Current).first_byte   = (Current).last_byte =		\
96 	    YYRHSLOC (Rhs, 0).last_byte;				\
97 	}								\
98     while (0)
99 
100 /* Useful defines so editors don't get confused ... */
101 
102 #define LBRACE	'{'
103 #define RBRACE	'}'
104 
105 /* Functions used in the parsing process */
106 
107 static void	GrowList(SEXP, SEXP);
108 static int	KeywordLookup(const char *);
109 static SEXP	UserMacroLookup(const char *);
110 static SEXP	InstallKeywords();
111 static SEXP	NewList(void);
112 static SEXP     makeSrcref(YYLTYPE *, SEXP);
113 static int	xxgetc();
114 static int	xxungetc(int);
115 
116 /* Flags used to mark need for postprocessing in the dynamicFlag attribute */
117 
118 #define STATIC 0
119 #define HAS_IFDEF 1
120 #define HAS_SEXPR 2
121 
122 /* Internal lexer / parser state variables */
123 
124 static char const yyunknown[] = "unknown macro"; /* our message, not bison's */
125 
126 
127 typedef struct ParseState ParseState;
128 struct ParseState {
129     int xxinRString, xxQuoteLine, xxQuoteCol;
130     int	xxinEqn;
131     int	xxNewlineInString;
132     int	xxlineno, xxbyteno, xxcolno;
133     int	xxmode, xxitemType, xxbraceDepth;  /* context for lexer */
134     int	xxDebugTokens;  /* non-zero causes debug output to R console */
135     const char* xxBasename;     /* basename of file for error messages */
136     SEXP	Value;
137     int	xxinitvalue;
138     SEXP	xxMacroList;/* A hashed environment containing all the standard and user-defined macro names */
139     SEXP mset; /* Precious mset for protecting parser semantic values */
140     ParseState *prevState;
141 };
142 
143 static Rboolean busy = FALSE;
144 static ParseState parseState;
145 
146 #define PRESERVE_SV(x) R_PreserveInMSet((x), parseState.mset)
147 #define RELEASE_SV(x)  R_ReleaseFromMSet((x), parseState.mset)
148 
149 #define RLIKE 1		/* Includes R strings; xxinRString holds the opening quote char, or 0 outside a string */
150 #define LATEXLIKE 2
151 #define VERBATIM 3
152 #define INOPTION 4
153 #define COMMENTMODE 5   /* only used in deparsing */
154 #define UNKNOWNMODE 6   /* ditto */
155 
156 static SEXP     SrcFile;  /* parse_Rd will *always* supply a srcfile */
157 
158 /* Routines used to build the parse tree */
159 
160 static SEXP	xxpushMode(int, int, int);
161 static void	xxpopMode(SEXP);
162 static SEXP	xxnewlist(SEXP);
163 static SEXP	xxnewlist2(SEXP, SEXP);
164 static SEXP	xxnewlist3(SEXP, SEXP, SEXP);
165 static SEXP	xxnewlist4(SEXP, SEXP, SEXP, SEXP);
166 static SEXP	xxnewlist5(SEXP, SEXP, SEXP, SEXP, SEXP);
167 static SEXP	xxnewlist6(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
168 static SEXP	xxnewlist7(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
169 static SEXP	xxnewlist8(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
170 static SEXP	xxnewlist9(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
171 
172 static SEXP	xxlist(SEXP, SEXP);
173 static SEXP	xxmarkup(SEXP, SEXP, int, YYLTYPE *);
174 static SEXP	xxmarkup2(SEXP, SEXP, SEXP, int, int, YYLTYPE *);
175 static SEXP	xxmarkup3(SEXP, SEXP, SEXP, SEXP, int, YYLTYPE *);
176 static SEXP	xxOptionmarkup(SEXP, SEXP, SEXP, int, YYLTYPE *);
177 static SEXP	xxtag(SEXP, int, YYLTYPE *);
178 static void	xxsavevalue(SEXP, YYLTYPE *);
179 static void	xxWarnNewline();
180 static SEXP	xxnewcommand(SEXP, SEXP, SEXP, YYLTYPE *);
181 static SEXP	xxusermacro(SEXP, SEXP, YYLTYPE *);
182 static int	mkMarkup(int);
183 static int      mkIfdef(int);
184 static int	mkCode(int);
185 static int	mkText(int);
186 static int	mkVerb(int);
187 static int 	mkComment(int);
188 
189 static SEXP R_RdTagSymbol = NULL;
190 static SEXP R_RdOptionSymbol = NULL;
191 static SEXP R_DefinitionSymbol = NULL;
192 static SEXP R_DynamicFlagSymbol = NULL;
193 static SEXP R_MacroSymbol = NULL;
194 
195 #define YYSTYPE		SEXP
196 
197 %}
198 
199 %token		END_OF_INPUT ERROR
200 %token		SECTIONHEADER RSECTIONHEADER VSECTIONHEADER
201 %token		SECTIONHEADER2
202 %token		RCODEMACRO SEXPR RDOPTS LATEXMACRO VERBMACRO OPTMACRO ESCAPE
203 %token		LISTSECTION ITEMIZE DESCRIPTION NOITEM
204 %token		LATEXMACRO2 VERBMACRO2 VERBLATEX
205 %token		LATEXMACRO3
206 %token		NEWCOMMAND USERMACRO USERMACRO1 USERMACRO2 USERMACRO3 USERMACRO4
207 %token		USERMACRO5 USERMACRO6 USERMACRO7 USERMACRO8 USERMACRO9
208 %token		IFDEF ENDIF
209 %token		TEXT RCODE VERB COMMENT UNKNOWN
210 %token		STARTFILE STARTFRAGMENT	/* fake tokens to have two entry points */
211 
212 /* Recent bison has <> to represent all of the destructors below, but we don't assume it */
213 
214 /* I think we need to list everything here which occurs before the last item in a
215    pattern, just in case the last item is unmatched and we need to back out.  But
216    it is safe to list more, so we do. */
217 
218 %destructor { RELEASE_SV($$); } SECTIONHEADER RSECTIONHEADER
219 VSECTIONHEADER SECTIONHEADER2 RCODEMACRO SEXPR LATEXMACRO VERBMACRO
220 OPTMACRO ESCAPE LISTSECTION ITEMIZE DESCRIPTION NOITEM LATEXMACRO2
221 VERBMACRO2 VERBLATEX LATEXMACRO3 IFDEF ENDIF TEXT RCODE VERB COMMENT UNKNOWN
222 NEWCOMMAND USERMACRO USERMACRO1 USERMACRO2 USERMACRO3 USERMACRO4
223 USERMACRO5 USERMACRO6 USERMACRO7 USERMACRO8 USERMACRO9
224 STARTFILE STARTFRAGMENT goLatexLike goRLike goRLike2 goOption
225 goVerbatim goVerbatim1 goVerbatim2 goItem0 goItem2 LatexArg RLikeArg2
226 VerbatimArg1 VerbatimArg2 IfDefTarget ArgItems Option
227 
228 %%
229 
230 Init:		STARTFILE RdFile END_OF_INPUT		{ xxsavevalue($2, &@$); RELEASE_SV($1); YYACCEPT; }
231 	|	STARTFRAGMENT RdFragment END_OF_INPUT	{ xxsavevalue($2, &@$); RELEASE_SV($1); YYACCEPT; }
232 	|	error					{ PRESERVE_SV(parseState.Value = R_NilValue);  YYABORT; }
233 	;
234 
235 RdFragment :    goLatexLike ArgItems  		{ $$ = $2; RELEASE_SV($1); }
236 	;
237 
238 RdFile	:	SectionList			{ $$ = $1; }
239 	;
240 
241 SectionList:	Section				{ $$ = xxnewlist($1); }
242 	|	SectionList Section		{ $$ = xxlist($1, $2); }
243 
244 Section:	VSECTIONHEADER VerbatimArg	{ $$ = xxmarkup($1, $2, STATIC, &@$); }
245 	|	RDOPTS VerbatimArg		{ $$ = xxmarkup($1, $2, HAS_SEXPR, &@$); }
246 	|	RSECTIONHEADER RLikeArg		{ $$ = xxmarkup($1, $2, STATIC, &@$); }
247 	|	SECTIONHEADER  LatexArg  	{ $$ = xxmarkup($1, $2, STATIC, &@$); }
248 	|	LISTSECTION    Item2Arg		{ $$ = xxmarkup($1, $2, STATIC, &@$); }
249 	|	SECTIONHEADER2 LatexArg LatexArg2 { $$ = xxmarkup2($1, $2, $3, 2, STATIC, &@$); }
250 	|	IFDEF IfDefTarget SectionList ENDIF { $$ = xxmarkup2($1, $2, $3, 2, HAS_IFDEF, &@$); RELEASE_SV($4); }
251 	|	IFDEF IfDefTarget SectionList error { $$ = xxmarkup2($1, $2, $3, 2, HAS_IFDEF, &@$); }
252 	|	SEXPR       goOption RLikeArg2   { $$ = xxmarkup($1, $3, HAS_SEXPR, &@$); xxpopMode($2); }
253 	|	SEXPR       goOption Option RLikeArg2 { $$ = xxOptionmarkup($1, $3, $4, HAS_SEXPR, &@$); xxpopMode($2); }
254 	|	COMMENT				{ $$ = xxtag($1, COMMENT, &@$); }
255 	|	TEXT				{ $$ = xxtag($1, TEXT, &@$); } /* must be whitespace */
256 	|	UserMacro			{ $$ = $1; }
257 	|	error Section			{ $$ = $2; }
258 
259 ArgItems:	Item				{ $$ = xxnewlist($1); }
260 	|	ArgItems Item			{ $$ = xxlist($1, $2); }
261 
262 Item:		TEXT				{ $$ = xxtag($1, TEXT, &@$); }
263 	|	RCODE				{ $$ = xxtag($1, RCODE, &@$); }
264 	|	VERB				{ $$ = xxtag($1, VERB, &@$); }
265 	|	COMMENT				{ $$ = xxtag($1, COMMENT, &@$); }
266 	|	UNKNOWN				{ $$ = xxtag($1, UNKNOWN, &@$); yyerror(yyunknown); }
267 	|	Arg				{ $$ = xxmarkup(R_NilValue, $1, STATIC, &@$); }
268 	|	Markup				{ $$ = $1; }
269 	|	UserMacro			{ $$ = $1; }
270 	|	error Item			{ $$ = $2; }
271 
272 Markup:		LATEXMACRO  LatexArg 		{ $$ = xxmarkup($1, $2, STATIC, &@$); }
273 	|	LATEXMACRO2 LatexArg LatexArg2  { $$ = xxmarkup2($1, $2, $3, 2, STATIC, &@$); }
274 	|	LATEXMACRO3 LatexArg LatexArg2 LatexArg2 { $$ = xxmarkup3($1, $2, $3, $4, STATIC, &@$); }
275 	|	ITEMIZE     Item0Arg		{ $$ = xxmarkup($1, $2, STATIC, &@$); }
276 	|	DESCRIPTION Item2Arg		{ $$ = xxmarkup($1, $2, STATIC, &@$); }
277 	|	OPTMACRO    goOption LatexArg  	{ $$ = xxmarkup($1, $3, STATIC, &@$); xxpopMode($2); }
278 	|	OPTMACRO    goOption Option LatexArg { $$ = xxOptionmarkup($1, $3, $4, STATIC, &@$); xxpopMode($2); }
279 	|	RCODEMACRO  RLikeArg     	{ $$ = xxmarkup($1, $2, STATIC, &@$); }
280 	|	SEXPR       goOption RLikeArg2   { $$ = xxmarkup($1, $3, HAS_SEXPR, &@$); xxpopMode($2); }
281 	|	SEXPR       goOption Option RLikeArg2 { $$ = xxOptionmarkup($1, $3, $4, HAS_SEXPR, &@$); xxpopMode($2); }
282 	|	VERBMACRO   VerbatimArg		{ $$ = xxmarkup($1, $2, STATIC, &@$); }
283 	|	VERBMACRO2  VerbatimArg1	{ $$ = xxmarkup2($1, $2, R_NilValue, 1, STATIC, &@$); }
284 	|       VERBMACRO2  VerbatimArg1 VerbatimArg2 { $$ = xxmarkup2($1, $2, $3, 2, STATIC, &@$); }
285 	|	ESCAPE				{ $$ = xxmarkup($1, R_NilValue, STATIC, &@$); }
286 	|	IFDEF IfDefTarget ArgItems ENDIF { $$ = xxmarkup2($1, $2, $3, 2, HAS_IFDEF, &@$); RELEASE_SV($4); }
287 	|	IFDEF IfDefTarget ArgItems error { $$ = xxmarkup2($1, $2, $3, 2, HAS_IFDEF, &@$); }
288 	|	VERBLATEX   VerbatimArg LatexArg2 { $$ = xxmarkup2($1, $2, $3, 2, STATIC, &@$); }
289 
290 UserMacro:	NEWCOMMAND  VerbatimArg1 VerbatimArg { $$ = xxnewcommand($1, $2, $3, &@$); }
291 	|	USERMACRO			{ $$ = xxusermacro($1, xxnewlist(NULL), &@$); }
292 	|	USERMACRO1  VerbatimArg		{ $$ = xxusermacro($1, xxnewlist($2), &@$); }
293 	|	USERMACRO2  VerbatimArg VerbatimArg
294 						{ $$ = xxusermacro($1, xxnewlist2($2, $3), &@$); }
295 	|	USERMACRO3  VerbatimArg VerbatimArg VerbatimArg
296 						{ $$ = xxusermacro($1, xxnewlist3($2, $3, $4), &@$); }
297 	|	USERMACRO4  VerbatimArg VerbatimArg VerbatimArg VerbatimArg
298 						{ $$ = xxusermacro($1, xxnewlist4($2, $3, $4, $5), &@$); }
299 	|	USERMACRO5  VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg
300 						{ $$ = xxusermacro($1, xxnewlist5($2, $3, $4, $5, $6), &@$); }
301 	|	USERMACRO6  VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg
302 			    VerbatimArg		{ $$ = xxusermacro($1, xxnewlist6($2, $3, $4, $5, $6, $7), &@$); }
303 	|	USERMACRO7  VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg
304 			    VerbatimArg VerbatimArg
305 			    			{ $$ = xxusermacro($1, xxnewlist7($2, $3, $4, $5, $6, $7, $8), &@$); }
306 	|	USERMACRO8  VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg
307 			    VerbatimArg VerbatimArg VerbatimArg
308 			    			{ $$ = xxusermacro($1, xxnewlist8($2, $3, $4, $5, $6, $7, $8, $9), &@$); }
309 	|	USERMACRO9  VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg VerbatimArg
310 			    VerbatimArg VerbatimArg VerbatimArg VerbatimArg
311 			    			{ $$ = xxusermacro($1, xxnewlist9($2, $3, $4, $5, $6, $7, $8, $9, $10), &@$); }
312 
313 
314 LatexArg:	goLatexLike Arg		 	{ xxpopMode($1); $$ = $2; }
315 
316 LatexArg2:	goLatexLike Arg			{ xxpopMode($1); $$ = $2; }
317 	|	goLatexLike TEXT		{ xxpopMode($1); $$ = xxnewlist($2);
318      						  if(wCalls)
319     	    					      warning(_("bad markup (extra space?) at %s:%d:%d"),
320     	    					            parseState.xxBasename, @2.first_line, @2.first_column);
321      						  else
322     	    					      warningcall(R_NilValue, _("bad markup (extra space?) at %s:%d:%d"),
323     	    					            parseState.xxBasename, @2.first_line, @2.first_column);
324 						}
325 
326 Item0Arg:	goItem0 Arg		 	{ xxpopMode($1); $$ = $2; }
327 
328 Item2Arg:	goItem2 Arg			{ xxpopMode($1); $$ = $2; }
329 
330 RLikeArg:	goRLike Arg			{ xxpopMode($1); $$ = $2; }
331 
332 /* This one is like VerbatimArg2 below:  it does the push after seeing the brace */
333 
334 RLikeArg2:	'{' goRLike2 ArgItems '}'	{ xxpopMode($2); $$ = $3; }
335 	|	'{' goRLike2 '}'		{ xxpopMode($2); $$ = xxnewlist(NULL); }
336 
337 VerbatimArg:	goVerbatim Arg		 	{ xxpopMode($1); $$ = $2; }
338 
339 VerbatimArg1:	goVerbatim1 Arg			{ xxpopMode($1); $$ = $2; }
340 
341 /* This one executes the push after seeing the brace starting the optional second arg */
342 
343 VerbatimArg2:   '{' goVerbatim2 ArgItems '}'    { xxpopMode($2); $$ = $3; }
344 	|	'{' goVerbatim2 '}'		{ xxpopMode($2); $$ = xxnewlist(NULL); }
345 
346 IfDefTarget:	goLatexLike TEXT	{ xxpopMode($1); $$ = xxnewlist(xxtag($2, TEXT, &@$)); }
347 
348 
349 goLatexLike:	/* empty */			{ $$ = xxpushMode(LATEXLIKE, UNKNOWN, FALSE); }
350 
351 goRLike:	/* empty */			{ $$ = xxpushMode(RLIKE, UNKNOWN, FALSE); }
352 
353 goRLike2:	/* empty */			{ parseState.xxbraceDepth--; $$ = xxpushMode(RLIKE, UNKNOWN, FALSE); parseState.xxbraceDepth++; }
354 
355 goOption:	/* empty */			{ $$ = xxpushMode(INOPTION, UNKNOWN, FALSE); }
356 
357 goVerbatim:	/* empty */			{ $$ = xxpushMode(VERBATIM, UNKNOWN, FALSE); }
358 
359 goVerbatim1:	/* empty */			{ $$ = xxpushMode(VERBATIM, UNKNOWN, TRUE); }
360 
361 goVerbatim2:    /* empty */			{ parseState.xxbraceDepth--; $$ = xxpushMode(VERBATIM, UNKNOWN, FALSE); parseState.xxbraceDepth++; }
362 
363 goItem0:	/* empty */			{ $$ = xxpushMode(LATEXLIKE, ESCAPE, FALSE); }
364 
365 goItem2:	/* empty */			{ $$ = xxpushMode(LATEXLIKE, LATEXMACRO2, FALSE); }
366 
367 Arg:		'{' ArgItems  '}'		{ $$ = $2; }
368 	|	'{' '}'				{ $$ = xxnewlist(NULL); }
369 	|	'{' ArgItems error '}'		{ $$ = $2; }
370 	|	'{' error '}'			{ $$ = xxnewlist(NULL); }
371 	|	'{' ArgItems error END_OF_INPUT { $$ = $2; }
372 
373 Option:		'[' Item ']'			{ $$ = $2; }
374 
375 %%
376 
377 static SEXP xxpushMode(int newmode, int newitem, int neweqn)
378 {
379     SEXP ans;
380 
381     PRESERVE_SV(ans = allocVector(INTSXP, 7));
382     INTEGER(ans)[0] = parseState.xxmode;		/* Lexer mode */
383     INTEGER(ans)[1] = parseState.xxitemType;	/* What is \item? */
384     INTEGER(ans)[2] = parseState.xxbraceDepth;	/* Brace depth used in RCODE and VERBATIM */
385     INTEGER(ans)[3] = parseState.xxinRString;      /* Quote char that started a string */
386     INTEGER(ans)[4] = parseState.xxQuoteLine;      /* Where the quote was */
387     INTEGER(ans)[5] = parseState.xxQuoteCol;       /*           "         */
388     INTEGER(ans)[6] = parseState.xxinEqn;          /* In the first arg to \eqn or \deqn:  no escapes */
389 
390 #if DEBUGMODE
391     Rprintf("xxpushMode(%d, %s) pushes %d, %s, %d\n", newmode, yytname[YYTRANSLATE(newitem)],
392     						parseState.xxmode, yytname[YYTRANSLATE(parseState.xxitemType)], parseState.xxbraceDepth);
393 #endif
394     parseState.xxmode = newmode;
395     parseState.xxitemType = newitem;
396     parseState.xxbraceDepth = 0;
397     parseState.xxinRString = 0;
398     parseState.xxinEqn = neweqn;
399 
400     return ans;
401 }
402 
xxpopMode(SEXP oldmode)403 static void xxpopMode(SEXP oldmode)
404 {
405 #if DEBUGVALS
406     Rprintf("xxpopMode(%d, %s, %d) replaces %d, %s, %d\n", INTEGER(oldmode)[0], yytname[YYTRANSLATE(INTEGER(oldmode)[1])], INTEGER(oldmode)[2],
407     					parseState.xxmode, yytname[YYTRANSLATE(parseState.xxitemType)], parseState.xxbraceDepth);
408 #endif
409     parseState.xxmode = INTEGER(oldmode)[0];
410     parseState.xxitemType = INTEGER(oldmode)[1];
411     parseState.xxbraceDepth = INTEGER(oldmode)[2];
412     parseState.xxinRString = INTEGER(oldmode)[3];
413     parseState.xxQuoteLine = INTEGER(oldmode)[4];
414     parseState.xxQuoteCol  = INTEGER(oldmode)[5];
415     parseState.xxinEqn	= INTEGER(oldmode)[6];
416 
417     RELEASE_SV(oldmode);
418 }
419 
getDynamicFlag(SEXP item)420 static int getDynamicFlag(SEXP item)
421 {
422     SEXP flag = getAttrib(item, R_DynamicFlagSymbol);
423     if (isNull(flag)) return 0;
424     else return INTEGER(flag)[0];
425 }
426 
setDynamicFlag(SEXP item,int flag)427 static void setDynamicFlag(SEXP item, int flag)
428 {
429     if (flag)
430 	setAttrib(item, R_DynamicFlagSymbol, ScalarInteger(flag));
431 }
432 
xxnewlist(SEXP item)433 static SEXP xxnewlist(SEXP item)
434 {
435     SEXP ans;
436 #if DEBUGVALS
437     Rprintf("xxnewlist(item=%p)", item);
438 #endif
439     PRESERVE_SV(ans = NewList());
440     if (item) {
441     	int flag = getDynamicFlag(item);
442 	GrowList(ans, item);
443     	setDynamicFlag(ans, flag);
444 	RELEASE_SV(item);
445     }
446 #if DEBUGVALS
447     Rprintf(" result: %p is length %d\n", ans, length(ans));
448 #endif
449     return ans;
450 }
451 
xxnewlist2(SEXP item1,SEXP item2)452 static SEXP xxnewlist2(SEXP item1, SEXP item2)
453 {
454     return xxlist(xxnewlist(item1), item2);
455 }
456 
xxnewlist3(SEXP item1,SEXP item2,SEXP item3)457 static SEXP xxnewlist3(SEXP item1, SEXP item2, SEXP item3)
458 {
459     return xxlist(xxnewlist2(item1, item2), item3);
460 }
461 
xxnewlist4(SEXP item1,SEXP item2,SEXP item3,SEXP item4)462 static SEXP xxnewlist4(SEXP item1, SEXP item2, SEXP item3, SEXP item4)
463 {
464     return xxlist(xxnewlist3(item1, item2, item3), item4);
465 }
466 
xxnewlist5(SEXP item1,SEXP item2,SEXP item3,SEXP item4,SEXP item5)467 static SEXP xxnewlist5(SEXP item1, SEXP item2, SEXP item3, SEXP item4, SEXP item5)
468 {
469     return xxlist(xxnewlist4(item1, item2, item3, item4), item5);
470 }
471 
xxnewlist6(SEXP item1,SEXP item2,SEXP item3,SEXP item4,SEXP item5,SEXP item6)472 static SEXP xxnewlist6(SEXP item1, SEXP item2, SEXP item3, SEXP item4, SEXP item5,
473 		       SEXP item6)
474 {
475     return xxlist(xxnewlist5(item1, item2, item3, item4, item5), item6);
476 }
477 
xxnewlist7(SEXP item1,SEXP item2,SEXP item3,SEXP item4,SEXP item5,SEXP item6,SEXP item7)478 static SEXP xxnewlist7(SEXP item1, SEXP item2, SEXP item3, SEXP item4, SEXP item5,
479 		       SEXP item6, SEXP item7)
480 {
481     return xxlist(xxnewlist6(item1, item2, item3, item4, item5, item6), item7);
482 }
483 
xxnewlist8(SEXP item1,SEXP item2,SEXP item3,SEXP item4,SEXP item5,SEXP item6,SEXP item7,SEXP item8)484 static SEXP xxnewlist8(SEXP item1, SEXP item2, SEXP item3, SEXP item4, SEXP item5,
485 		       SEXP item6, SEXP item7, SEXP item8)
486 {
487     return xxlist(xxnewlist7(item1, item2, item3, item4, item5, item6, item7), item8);
488 }
489 
xxnewlist9(SEXP item1,SEXP item2,SEXP item3,SEXP item4,SEXP item5,SEXP item6,SEXP item7,SEXP item8,SEXP item9)490 static SEXP xxnewlist9(SEXP item1, SEXP item2, SEXP item3, SEXP item4, SEXP item5,
491 		       SEXP item6, SEXP item7, SEXP item8, SEXP item9)
492 {
493     return xxlist(xxnewlist8(item1, item2, item3, item4, item5, item6, item7, item8),
494                   item9);
495 }
496 
xxlist(SEXP list,SEXP item)497 static SEXP xxlist(SEXP list, SEXP item)
498 {
499     int flag = getDynamicFlag(list) | getDynamicFlag(item);
500 #if DEBUGVALS
501     Rprintf("xxlist(list=%p, item=%p)", list, item);
502 #endif
503     GrowList(list, item);
504     RELEASE_SV(item);
505     setDynamicFlag(list, flag);
506 #if DEBUGVALS
507     Rprintf(" result: %p is length %d\n", list, length(list));
508 #endif
509     return list;
510 }
511 
xxmarkup(SEXP header,SEXP body,int flag,YYLTYPE * lloc)512 static SEXP xxmarkup(SEXP header, SEXP body, int flag, YYLTYPE *lloc)
513 {
514     SEXP ans;
515 #if DEBUGVALS
516     Rprintf("xxmarkup(header=%p, body=%p)", header, body);
517 #endif
518     if (isNull(body))
519         PRESERVE_SV(ans = allocVector(VECSXP, 0));
520     else {
521         flag |= getDynamicFlag(body);
522 	PRESERVE_SV(ans = PairToVectorList(CDR(body)));
523 	RELEASE_SV(body);
524     }
525     if (isNull(header))
526 	setAttrib(ans, R_RdTagSymbol, mkString("LIST"));
527     else {
528 	setAttrib(ans, R_RdTagSymbol, header);
529 	RELEASE_SV(header);
530     }
531     setAttrib(ans, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
532     setDynamicFlag(ans, flag);
533 #if DEBUGVALS
534     Rprintf(" result: %p\n", ans);
535 #endif
536     return ans;
537 }
538 
xxnewcommand(SEXP cmd,SEXP name,SEXP defn,YYLTYPE * lloc)539 static SEXP xxnewcommand(SEXP cmd, SEXP name, SEXP defn, YYLTYPE *lloc)
540 {
541     SEXP ans, prev, thename, thedefn;
542     char buffer[128];
543     const char *c;
544     int maxarg = 0;
545 #if DEBUGVALS
546     Rprintf("xxnewcommand(cmd=%p, name=%p, defn=%p)", cmd, name, defn);
547 #endif
548     thename = CADR(name);
549     thedefn = CADR(defn);
550     if (TYPEOF(thedefn) == STRSXP)
551     	PROTECT(thedefn = mkString(CHAR(STRING_ELT(thedefn,0))));
552     else
553     	PROTECT(thedefn = mkString(""));
554     if (warnDups) {
555 	prev = findVar(installTrChar(STRING_ELT(thename, 0)), parseState.xxMacroList);
556     	if (prev != R_UnboundValue && strcmp(CHAR(STRING_ELT(cmd,0)), "\\renewcommand")) {
557 	    snprintf(buffer, sizeof(buffer), _("Macro '%s' previously defined."),
558                  CHAR(STRING_ELT(thename, 0)));
559             yyerror(buffer);
560         }
561     }
562     for (c = CHAR(STRING_ELT(thedefn, 0)); *c; c++) {
563     	if (*c == '#' && isdigit(*(c+1)))
564     	    maxarg = imax2(maxarg, *(c+1) - '0');
565     }
566     if (maxarg > 4) {
567     	snprintf(buffer, sizeof(buffer), _("At most 4 arguments are allowed for user defined macros."));
568 	yyerror(buffer);
569     }
570     PROTECT(ans = ScalarInteger(USERMACRO + maxarg));
571     setAttrib(ans, R_RdTagSymbol, cmd);
572     setAttrib(ans, R_DefinitionSymbol, thedefn);
573     setAttrib(ans, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
574     defineVar(installTrChar(STRING_ELT(thename, 0)), ans, parseState.xxMacroList);
575     UNPROTECT(2); /* thedefn, ans */
576 
577     PRESERVE_SV(ans);
578     RELEASE_SV(cmd);
579     RELEASE_SV(name);
580     RELEASE_SV(defn);
581     return ans;
582 }
583 
584 #define START_MACRO -2
585 #define END_MACRO -3
586 
isComment(SEXP elt)587 static Rboolean isComment(SEXP elt)
588 {
589     SEXP a = getAttrib(elt, R_RdTagSymbol);
590     return isString(a) && LENGTH(a) == 1 &&
591            !strcmp(CHAR(STRING_ELT(a, 0)), "COMMENT");
592 }
593 
xxusermacro(SEXP macro,SEXP args,YYLTYPE * lloc)594 static SEXP xxusermacro(SEXP macro, SEXP args, YYLTYPE *lloc)
595 {
596     SEXP ans, value, nextarg;
597     int i,len;
598     const char *c, *start ;
599 
600 #if DEBUGVALS
601     Rprintf("xxusermacro(macro=%p, args=%p)", macro, args);
602 #endif
603     len = length(args)-1;
604     PRESERVE_SV(ans = allocVector(STRSXP, len + 1));
605     value = UserMacroLookup(CHAR(STRING_ELT(macro,0)));
606     if (TYPEOF(value) == STRSXP)
607     	SET_STRING_ELT(ans, 0, STRING_ELT(value, 0));
608     else
609     	error(_("No macro definition for '%s'."), CHAR(STRING_ELT(macro,0)));
610 
611     for (i = 0, nextarg=args; i < len; i++, nextarg = CDR(nextarg)) {
612 	if (isNull(CDR(CADR(nextarg)))) {
613 	    /* This happens for an empty argument {} and for invocation
614 	       of a macro with zero parameters. In that case, the ""
615 	       element of ans is not needed but does no harm. */
616 	    SET_STRING_ELT(ans, i+1, mkChar(""));
617 	    continue;
618 	}
619 	if (isNull(CDR(CDR(CADR(nextarg))))) {
620 	    /* The common case: argument without newline nor comment.
621 	       (when the length is 1, there can be no comment) */
622 	    SEXP s = CADR(CADR(nextarg));
623 	    if (TYPEOF(s) == STRSXP && LENGTH(s) == 1)
624 		SET_STRING_ELT(ans, i+1, STRING_ELT(s, 0));
625 	    else
626 		error("internal error: invalid argument to xxusermacro");
627 	    continue;
628 	}
629 
630 	/* An argument with a newline or comment or both. Exclude comments and
631 	   concatenate VERBs from different lines (newline characters are
632 	   in the VERBs already. */
633 	const void *vmax = vmaxget();
634 	SEXP si;
635 	size_t ilen = 0;
636 	for(si = CDR(CADR(nextarg)); si != R_NilValue; si = CDR(si)) {
637 	    SEXP stri = CAR(si);
638 	    if (TYPEOF(stri) == STRSXP && LENGTH(stri) == 1) {
639 		if (!isComment(stri))
640 		    ilen += LENGTH(STRING_ELT(stri, 0));
641 	    } else
642 		error("internal error: invalid argument to xxusermacro");
643 	}
644 
645 	char *str = (char *)R_alloc(ilen + 1, sizeof(char));
646 	size_t offset = 0;
647 	for(si = CDR(CADR(nextarg)); si != R_NilValue; si = CDR(si)) {
648 	    SEXP stri = CAR(si);
649 	    if (!isComment(stri)) {
650 		int nc = LENGTH(STRING_ELT(stri, 0));
651 		memcpy(str + offset, CHAR(STRING_ELT(stri, 0)), nc);
652 		offset += nc;
653 	    }
654 	}
655 	str[offset] = '\0';
656 	SET_STRING_ELT(ans, i+1, mkChar(str));
657         vmaxset(vmax);
658     }
659     RELEASE_SV(args);
660 
661     /* Now push the expanded macro onto the input stream, in reverse order */
662     xxungetc(END_MACRO);
663     start = CHAR(STRING_ELT(ans, 0));
664     for (c = start + strlen(start); c > start; c--) {
665     	if (c > start + 1 && *(c-2) == '#' && isdigit(*(c-1))) {
666     	    int which = *(c-1) - '0';
667 	    if (which >= len + 1)
668 		/* currently this won't happen, because the parser gets
669 		   confused whenever there is invalid number of {} arguments
670 		   to a user macro */
671 		error(_("Not enough arguments passed to user macro '%s'"),
672 		        CHAR(STRING_ELT(macro,0)));
673     	    const char *arg = CHAR(STRING_ELT(ans, which));
674     	    for (size_t ii = strlen(arg); ii > 0; ii--) xxungetc(arg[ii-1]);
675     	    c--;
676 	} else
677     	    xxungetc(*(c-1));
678     }
679     xxungetc(START_MACRO);
680 
681     setAttrib(ans, R_RdTagSymbol, mkString("USERMACRO"));
682     setAttrib(ans, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
683     setAttrib(ans, R_MacroSymbol, macro);
684     RELEASE_SV(macro);
685 #if DEBUGVALS
686     Rprintf(" result: %p\n", ans);
687 #endif
688     return ans;
689 }
690 
xxOptionmarkup(SEXP header,SEXP option,SEXP body,int flag,YYLTYPE * lloc)691 static SEXP xxOptionmarkup(SEXP header, SEXP option, SEXP body, int flag, YYLTYPE *lloc)
692 {
693     SEXP ans;
694 #if DEBUGVALS
695     Rprintf("xxOptionmarkup(header=%p, option=%p, body=%p)", header, option, body);
696 #endif
697     flag |= getDynamicFlag(body);
698     PRESERVE_SV(ans = PairToVectorList(CDR(body)));
699     RELEASE_SV(body);
700     setAttrib(ans, R_RdTagSymbol, header);
701     RELEASE_SV(header);
702     flag |= getDynamicFlag(option);
703     setAttrib(ans, R_RdOptionSymbol, option);
704     RELEASE_SV(option);
705     setAttrib(ans, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
706     setDynamicFlag(ans, flag);
707 #if DEBUGVALS
708     Rprintf(" result: %p\n", ans);
709 #endif
710     return ans;
711 }
712 
xxmarkup2(SEXP header,SEXP body1,SEXP body2,int argcount,int flag,YYLTYPE * lloc)713 static SEXP xxmarkup2(SEXP header, SEXP body1, SEXP body2, int argcount, int flag, YYLTYPE *lloc)
714 {
715     SEXP ans;
716 #if DEBUGVALS
717     Rprintf("xxmarkup2(header=%p, body1=%p, body2=%p)", header, body1, body2);
718 #endif
719 
720     PRESERVE_SV(ans = allocVector(VECSXP, argcount));
721     if (!isNull(body1)) {
722     	int flag1 = getDynamicFlag(body1);
723     	SET_VECTOR_ELT(ans, 0, PairToVectorList(CDR(body1)));
724 	RELEASE_SV(body1);
725     	setDynamicFlag(VECTOR_ELT(ans, 0), flag1);
726     	flag |= flag1;
727     }
728     if (!isNull(body2)) {
729     	int flag2;
730 	if (argcount < 2) error("internal error: inconsistent argument count");
731 	flag2 = getDynamicFlag(body2);
732     	SET_VECTOR_ELT(ans, 1, PairToVectorList(CDR(body2)));
733 	RELEASE_SV(body2);
734     	setDynamicFlag(VECTOR_ELT(ans, 1), flag2);
735     	flag |= flag2;
736     }
737     setAttrib(ans, R_RdTagSymbol, header);
738     RELEASE_SV(header);
739     setAttrib(ans, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
740     setDynamicFlag(ans, flag);
741 #if DEBUGVALS
742     Rprintf(" result: %p\n", ans);
743 #endif
744     return ans;
745 }
746 
xxmarkup3(SEXP header,SEXP body1,SEXP body2,SEXP body3,int flag,YYLTYPE * lloc)747 static SEXP xxmarkup3(SEXP header, SEXP body1, SEXP body2, SEXP body3, int flag, YYLTYPE *lloc)
748 {
749     SEXP ans;
750 #if DEBUGVALS
751     Rprintf("xxmarkup2(header=%p, body1=%p, body2=%p, body3=%p)", header, body1, body2, body3);
752 #endif
753 
754     PRESERVE_SV(ans = allocVector(VECSXP, 3));
755     if (!isNull(body1)) {
756     	int flag1 = getDynamicFlag(body1);
757     	SET_VECTOR_ELT(ans, 0, PairToVectorList(CDR(body1)));
758 	RELEASE_SV(body1);
759     	setDynamicFlag(VECTOR_ELT(ans, 0), flag1);
760     	flag |= flag1;
761     }
762     if (!isNull(body2)) {
763     	int flag2;
764 	flag2 = getDynamicFlag(body2);
765     	SET_VECTOR_ELT(ans, 1, PairToVectorList(CDR(body2)));
766 	RELEASE_SV(body2);
767     	setDynamicFlag(VECTOR_ELT(ans, 1), flag2);
768     	flag |= flag2;
769     }
770     if (!isNull(body3)) {
771     	int flag3;
772 	flag3 = getDynamicFlag(body3);
773     	SET_VECTOR_ELT(ans, 2, PairToVectorList(CDR(body3)));
774 	RELEASE_SV(body3);
775     	setDynamicFlag(VECTOR_ELT(ans, 2), flag3);
776     	flag |= flag3;
777     }
778     setAttrib(ans, R_RdTagSymbol, header);
779     RELEASE_SV(header);
780     setAttrib(ans, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
781     setDynamicFlag(ans, flag);
782 #if DEBUGVALS
783     Rprintf(" result: %p\n", ans);
784 #endif
785     return ans;
786 }
787 
xxsavevalue(SEXP Rd,YYLTYPE * lloc)788 static void xxsavevalue(SEXP Rd, YYLTYPE *lloc)
789 {
790     int flag = getDynamicFlag(Rd);
791     PRESERVE_SV(parseState.Value = PairToVectorList(CDR(Rd)));
792     if (!isNull(parseState.Value)) {
793     	setAttrib(parseState.Value, R_ClassSymbol, mkString("Rd"));
794     	setAttrib(parseState.Value, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
795     	setDynamicFlag(parseState.Value, flag);
796     }
797     RELEASE_SV(Rd);
798 }
799 
xxtag(SEXP item,int type,YYLTYPE * lloc)800 static SEXP xxtag(SEXP item, int type, YYLTYPE *lloc)
801 {
802     setAttrib(item, R_RdTagSymbol, mkString(yytname[YYTRANSLATE(type)]));
803     setAttrib(item, R_SrcrefSymbol, makeSrcref(lloc, SrcFile));
804     return item;
805 }
806 
xxWarnNewline()807 static void xxWarnNewline()
808 {
809     if (parseState.xxNewlineInString) {
810 	if(wCalls)
811 	    warning(_("newline within quoted string at %s:%d"),
812 		    parseState.xxBasename, parseState.xxNewlineInString);
813 	else
814 	    warningcall(R_NilValue,
815 			_("newline within quoted string at %s:%d"),
816 			parseState.xxBasename, parseState.xxNewlineInString);
817     }
818 }
819 
820 
821 /*----------------------------------------------------------------------------*/
822 
823 
824 static int (*ptr_getc)(void);
825 
826 /* Private pushback, since file ungetc only guarantees one byte.
827    We need arbitrarily large size, since this is how macros are expanded. */
828 
829 #define PUSH_BACK(c) do {                  \
830 	if (npush >= pushsize - 1) {             \
831 	    int *old = pushbase;              \
832             pushsize *= 2;                    \
833 	    pushbase = malloc(pushsize*sizeof(int));         \
834 	    if(!pushbase) error(_("unable to allocate buffer for long macro at line %d"), parseState.xxlineno);\
835 	    memmove(pushbase, old, npush*sizeof(int));        \
836 	    if(old != pushback) free(old); }	    \
837 	pushbase[npush++] = (c);                        \
838 } while(0)
839 
840 
841 
842 #define PUSHBACK_BUFSIZE 32
843 
844 static int pushback[PUSHBACK_BUFSIZE];
845 static int *pushbase;
846 static unsigned int npush, pushsize;
847 static int macrolevel;
848 static int prevpos = 0;
849 static int prevlines[PUSHBACK_BUFSIZE];
850 static int prevcols[PUSHBACK_BUFSIZE];
851 static int prevbytes[PUSHBACK_BUFSIZE];
852 
853 
xxgetc(void)854 static int xxgetc(void)
855 {
856     int c, oldpos;
857 
858     do {
859     	if(npush) {
860     	    c = pushbase[--npush];
861     	    if (c == START_MACRO) {
862     	    	macrolevel++;
863     	    	if (macrolevel > 1000)
864     	    	    error(_("macros nested too deeply: infinite recursion?"));
865     	    } else if (c == END_MACRO) macrolevel--;
866     	} else  c = ptr_getc();
867     } while (c == START_MACRO || c == END_MACRO);
868 
869     if (!macrolevel) {
870 	oldpos = prevpos;
871 	prevpos = (prevpos + 1) % PUSHBACK_BUFSIZE;
872 	prevbytes[prevpos] = parseState.xxbyteno;
873 	prevlines[prevpos] = parseState.xxlineno;
874 	/* We only advance the column for the 1st byte in UTF-8, so handle later bytes specially */
875 	if (0x80 <= (unsigned char)c && (unsigned char)c <= 0xBF) {
876 	    parseState.xxcolno--;
877 	    prevcols[prevpos] = prevcols[oldpos];
878 	} else
879 	    prevcols[prevpos] = parseState.xxcolno;
880 
881 	if (c == EOF) return R_EOF;
882 
883 	R_ParseContextLast = (R_ParseContextLast + 1) % PARSE_CONTEXT_SIZE;
884 	R_ParseContext[R_ParseContextLast] = (char) c;
885 
886 	if (c == '\n') {
887 	    parseState.xxlineno += 1;
888 	    parseState.xxcolno = 1;
889 	    parseState.xxbyteno = 1;
890 	} else {
891 	    parseState.xxcolno++;
892 	    parseState.xxbyteno++;
893 	}
894 
895 	if (c == '\t') parseState.xxcolno = ((parseState.xxcolno + 6) & ~7) + 1;
896 
897 	R_ParseContextLine = parseState.xxlineno;
898     }
899     /* Rprintf("get %c\n", c); */
900     return c;
901 }
902 
xxungetc(int c)903 static int xxungetc(int c)
904 {
905     /* this assumes that c was the result of xxgetc; if not, some edits will be needed */
906     if (c == END_MACRO) macrolevel++;
907     if (!macrolevel) {
908     	parseState.xxlineno = prevlines[prevpos];
909     	parseState.xxbyteno = prevbytes[prevpos];
910     	parseState.xxcolno  = prevcols[prevpos];
911     	prevpos = (prevpos + PUSHBACK_BUFSIZE - 1) % PUSHBACK_BUFSIZE;
912 
913     	R_ParseContextLine = parseState.xxlineno;
914 
915     	R_ParseContext[R_ParseContextLast] = '\0';
916     	/* macOS requires us to keep this non-negative */
917     	R_ParseContextLast = (R_ParseContextLast + PARSE_CONTEXT_SIZE - 1)
918 		% PARSE_CONTEXT_SIZE;
919     }
920     if (c == START_MACRO) macrolevel--;
921     PUSH_BACK(c);
922     /* Rprintf("unget %c;", c); */
923     return c;
924 }
925 
makeSrcref(YYLTYPE * lloc,SEXP srcfile)926 static SEXP makeSrcref(YYLTYPE *lloc, SEXP srcfile)
927 {
928     SEXP val;
929 
930     PROTECT(val = allocVector(INTSXP, 6));
931     INTEGER(val)[0] = lloc->first_line;
932     INTEGER(val)[1] = lloc->first_byte;
933     INTEGER(val)[2] = lloc->last_line;
934     INTEGER(val)[3] = lloc->last_byte;
935     INTEGER(val)[4] = lloc->first_column;
936     INTEGER(val)[5] = lloc->last_column;
937     setAttrib(val, R_SrcfileSymbol, srcfile);
938     setAttrib(val, R_ClassSymbol, mkString("srcref"));
939     UNPROTECT(1); /* val */
940     return val;
941 }
942 
mkString2(const char * s,size_t len)943 static SEXP mkString2(const char *s, size_t len)
944 {
945     SEXP t;
946     cetype_t enc = CE_UTF8;
947 
948     PROTECT(t = allocVector(STRSXP, 1));
949     SET_STRING_ELT(t, 0, mkCharLenCE(s, (int) len, enc));
950     UNPROTECT(1); /* t */
951     return t;
952 }
953 
954 
955 /* Stretchy List Structures : Lists are created and grown using a special */
956 /* dotted pair.  The CAR of the list points to the last cons-cell in the */
957 /* list and the CDR points to the first.  The list can be extracted from */
958 /* the pair by taking its CDR, while the CAR gives fast access to the end */
959 /* of the list. */
960 
961 
962 /* Create a stretchy-list dotted pair */
963 
NewList(void)964 static SEXP NewList(void)
965 {
966     SEXP s = CONS(R_NilValue, R_NilValue);
967     SETCAR(s, s);
968     return s;
969 }
970 
971 /* Add a new element at the end of a stretchy list */
972 
GrowList(SEXP l,SEXP s)973 static void GrowList(SEXP l, SEXP s)
974 {
975     SEXP tmp;
976     tmp = CONS(s, R_NilValue);
977     SETCDR(CAR(l), tmp);
978     SETCAR(l, tmp);
979 }
980 
981 /*--------------------------------------------------------------------------*/
982 
InitSymbols(void)983 static void InitSymbols(void)
984 {
985     if (!R_RdTagSymbol)
986 	R_RdTagSymbol = install("Rd_tag");
987     if (!R_RdOptionSymbol)
988 	R_RdOptionSymbol = install("Rd_option");
989     if (!R_DefinitionSymbol)
990 	R_DefinitionSymbol = install("definition");
991     if (!R_DynamicFlagSymbol)
992 	R_DynamicFlagSymbol = install("dynamicFlag");
993     if (!R_MacroSymbol)
994 	R_MacroSymbol = install("macro");
995 }
996 
ParseRd(ParseStatus * status,SEXP srcfile,Rboolean fragment,SEXP macros)997 static SEXP ParseRd(ParseStatus *status, SEXP srcfile, Rboolean fragment, SEXP macros)
998 {
999     Rboolean keepmacros = !isLogical(macros) || asLogical(macros);
1000 
1001     InitSymbols();
1002     R_ParseContextLast = 0;
1003     R_ParseContext[0] = '\0';
1004 
1005     parseState.xxlineno = 1;
1006     parseState.xxcolno = 1;
1007     parseState.xxbyteno = 1;
1008 
1009     SrcFile = srcfile;
1010 
1011     npush = 0;
1012     pushbase = pushback;
1013     pushsize = PUSHBACK_BUFSIZE;
1014     macrolevel = 0;
1015 
1016     parseState.xxmode = LATEXLIKE;
1017     parseState.xxitemType = UNKNOWN;
1018     parseState.xxbraceDepth = 0;
1019     parseState.xxinRString = 0;
1020     parseState.xxNewlineInString = 0;
1021     parseState.xxinEqn = 0;
1022     if (fragment) parseState.xxinitvalue = STARTFRAGMENT;
1023     else	  parseState.xxinitvalue = STARTFILE;
1024 
1025     if (!isEnvironment(macros))
1026 	macros = InstallKeywords();
1027 
1028     PROTECT(macros);
1029     PROTECT(parseState.xxMacroList = R_NewHashedEnv(macros, ScalarInteger(0)));
1030     PROTECT(parseState.mset = R_NewPreciousMSet(50));
1031 
1032     parseState.Value = R_NilValue;
1033 
1034     if (yyparse()) *status = PARSE_ERROR;
1035     else *status = PARSE_OK;
1036 
1037     if (keepmacros && !isNull(parseState.Value))
1038 	setAttrib(parseState.Value, install("macros"), parseState.xxMacroList);
1039 
1040 #if DEBUGVALS
1041     Rprintf("ParseRd result: %p\n", parseState.Value);
1042 #endif
1043     RELEASE_SV(parseState.Value);
1044     UNPROTECT(3); /* macros, parseState.xxMacroList, parseState.mset */
1045 
1046     if (pushbase != pushback) free(pushbase);
1047 
1048     return parseState.Value;
1049 }
1050 
1051 #include "Rconnections.h"
1052 static Rconnection con_parse;
1053 
1054 /* need to handle incomplete last line */
con_getc(void)1055 static int con_getc(void)
1056 {
1057     int c;
1058     static int last=-1000;
1059 
1060     c = Rconn_fgetc(con_parse);
1061     if (c == EOF && last != '\n') c = '\n';
1062     return (last = c);
1063 }
1064 
1065 static
R_ParseRd(Rconnection con,ParseStatus * status,SEXP srcfile,Rboolean fragment,SEXP macros)1066 SEXP R_ParseRd(Rconnection con, ParseStatus *status, SEXP srcfile, Rboolean fragment, SEXP macros)
1067 {
1068     con_parse = con;
1069     ptr_getc = con_getc;
1070     return ParseRd(status, srcfile, fragment, macros);
1071 }
1072 
1073 /*----------------------------------------------------------------------------
1074  *
1075  *  The Lexical Analyzer:
1076  *
1077  *  Basic lexical analysis is performed by the following
1078  *  routines.
1079  *
1080  *  The function yylex() scans the input, breaking it into
1081  *  tokens which are then passed to the parser.
1082  *
1083  */
1084 
1085 
1086 /* Special Symbols */
1087 /* Section and R code headers */
1088 
1089 struct {
1090     char *name;
1091     int token;
1092 }
1093 
1094 /* When adding keywords here, make sure all the handlers
1095    are also modified:  checkRd, Rd2HTML, Rd2latex, Rd2txt, any other new ones... */
1096 
1097 static keywords[] = {
1098     /* These sections contain Latex-like text */
1099 
1100     { "\\author",  SECTIONHEADER },
1101     { "\\concept", SECTIONHEADER },
1102     { "\\description",SECTIONHEADER },
1103     { "\\details", SECTIONHEADER },
1104     { "\\docType", SECTIONHEADER },
1105 
1106     { "\\encoding",SECTIONHEADER },
1107     { "\\format",  SECTIONHEADER },
1108     { "\\keyword", SECTIONHEADER },
1109     { "\\note",    SECTIONHEADER },
1110     { "\\references", SECTIONHEADER },
1111 
1112     { "\\section", SECTIONHEADER2 },
1113     { "\\seealso", SECTIONHEADER },
1114     { "\\source",  SECTIONHEADER },
1115     { "\\title",   SECTIONHEADER },
1116 
1117     /* These sections contain R-like text */
1118 
1119     { "\\examples",RSECTIONHEADER },
1120     { "\\usage",   RSECTIONHEADER },
1121 
1122     /* These sections contain verbatim text */
1123 
1124     { "\\alias",   VSECTIONHEADER },
1125     { "\\name",    VSECTIONHEADER },
1126     { "\\synopsis",VSECTIONHEADER },
1127     { "\\Rdversion",VSECTIONHEADER },
1128 
1129     /* These macros take no arguments.  One character non-alpha escapes get the
1130        same token value */
1131 
1132     { "\\cr",      ESCAPE },
1133     { "\\dots",    ESCAPE },
1134     { "\\ldots",   ESCAPE },
1135     { "\\R",       ESCAPE },
1136     { "\\tab",     ESCAPE },
1137 
1138     /* These macros take one LaTeX-like argument. */
1139 
1140     { "\\acronym", LATEXMACRO },
1141     { "\\bold",    LATEXMACRO },
1142     { "\\cite",    LATEXMACRO },
1143     { "\\command", LATEXMACRO },
1144     { "\\dfn",     LATEXMACRO },
1145     { "\\dQuote",  LATEXMACRO },
1146     { "\\email",   LATEXMACRO },
1147 
1148     { "\\emph",    LATEXMACRO },
1149     { "\\file",    LATEXMACRO },
1150     { "\\linkS4class", LATEXMACRO },
1151     { "\\pkg",	   LATEXMACRO },
1152     { "\\sQuote",  LATEXMACRO },
1153 
1154     { "\\strong",  LATEXMACRO },
1155 
1156     { "\\var",     LATEXMACRO },
1157 
1158     /* These are like SECTIONHEADER/LATEXMACRO, but they change the interpretation of \item */
1159 
1160     { "\\arguments",LISTSECTION },
1161     { "\\value",   LISTSECTION },
1162 
1163     { "\\describe",DESCRIPTION },
1164     { "\\enumerate",ITEMIZE },
1165     { "\\itemize", ITEMIZE },
1166 
1167     { "\\item",    NOITEM }, /* will change to UNKNOWN, ESCAPE, or LATEXMACRO2 depending on context */
1168 
1169     /* These macros take two LaTeX-like arguments. */
1170 
1171     { "\\enc",     LATEXMACRO2 },
1172     { "\\if",      LATEXMACRO2 },
1173     { "\\method",  LATEXMACRO2 },
1174     { "\\S3method",LATEXMACRO2 },
1175     { "\\S4method",LATEXMACRO2 },
1176     { "\\tabular", LATEXMACRO2 },
1177     { "\\subsection", LATEXMACRO2 },
1178 
1179     /* This macro takes one verbatim and one LaTeX-like argument. */
1180 
1181     { "\\href",    VERBLATEX },
1182 
1183     /* This macro takes three LaTeX-like arguments. */
1184 
1185     { "\\ifelse",  LATEXMACRO3 },
1186 
1187     /* These macros take one optional bracketed option and always take
1188        one LaTeX-like argument */
1189 
1190     { "\\link",    OPTMACRO },
1191 
1192     /* These markup macros require an R-like text argument */
1193 
1194     { "\\code",    RCODEMACRO },
1195     { "\\dontshow",RCODEMACRO },
1196     { "\\donttest",RCODEMACRO },
1197     { "\\testonly",RCODEMACRO },
1198 
1199     /* This macro takes one optional bracketed option and one R-like argument */
1200 
1201     { "\\Sexpr",   SEXPR },
1202 
1203     /* This is just like a VSECTIONHEADER, but it needs SEXPR processing */
1204 
1205     { "\\RdOpts",   RDOPTS },
1206 
1207     /* These macros take one verbatim arg and ignore everything except braces */
1208 
1209     { "\\dontrun", VERBMACRO }, /* at least for now */
1210     { "\\env",     VERBMACRO },
1211     { "\\kbd", 	   VERBMACRO },
1212     { "\\option",  VERBMACRO },
1213     { "\\out",     VERBMACRO },
1214     { "\\preformatted", VERBMACRO },
1215 
1216     { "\\samp",    VERBMACRO },
1217     { "\\special", RCODEMACRO },
1218     { "\\url",     VERBMACRO },
1219     { "\\verb",    VERBMACRO },
1220 
1221     /* These ones take one or two verbatim args */
1222 
1223     { "\\eqn",     VERBMACRO2 },
1224     { "\\deqn",    VERBMACRO2 },
1225     { "\\figure",  VERBMACRO2 },
1226 
1227     /* We parse IFDEF/IFNDEF as markup, not as a separate preprocessor step */
1228 
1229     { "#ifdef",    IFDEF },
1230     { "#ifndef",   IFDEF },
1231     { "#endif",    ENDIF },
1232 
1233     /* These allow user defined macros */
1234     { "\\newcommand", NEWCOMMAND },
1235     { "\\renewcommand", NEWCOMMAND },
1236 
1237     { 0,	   0	      }
1238     /* All other markup macros are rejected. */
1239 };
1240 
1241 /* Record the longest # directive here */
1242 #define DIRECTIVE_LEN 7
1243 
InstallKeywords()1244 static SEXP InstallKeywords()
1245 {
1246     int i, num;
1247     SEXP result, name, val;
1248     num = sizeof(keywords)/sizeof(keywords[0]);
1249     PROTECT(result = R_NewHashedEnv(R_EmptyEnv, ScalarInteger(num)));
1250     for (i = 0; keywords[i].name; i++) {
1251         name = install(keywords[i].name);
1252         PROTECT(val = ScalarInteger(keywords[i].token));
1253     	defineVar(name, val, result);
1254 	UNPROTECT(1); /* val */
1255     }
1256     UNPROTECT(1); /* result */
1257     return result;
1258 }
1259 
KeywordLookup(const char * s)1260 static int KeywordLookup(const char *s)
1261 {
1262     SEXP rec = findVar(install(s), parseState.xxMacroList);
1263     if (rec == R_UnboundValue) return UNKNOWN;
1264     else return INTEGER(rec)[0];
1265 }
1266 
UserMacroLookup(const char * s)1267 static SEXP UserMacroLookup(const char *s)
1268 {
1269     SEXP rec = findVar(install(s), parseState.xxMacroList);
1270     if (rec == R_UnboundValue) error(_("Unable to find macro %s"), s);
1271     PROTECT(rec);
1272     SEXP res = getAttrib(rec, R_DefinitionSymbol);
1273     UNPROTECT(1); /* rec */
1274     return res;
1275 }
1276 
yyerror(const char * s)1277 static void yyerror(const char *s)
1278 {
1279     static const char *const yytname_translations[] =
1280     {
1281     /* the left column are strings coming from bison, the right
1282        column are translations for users.
1283        The first YYENGLISH from the right column are English to be translated,
1284        the rest are to be copied literally.  The #if 0 block below allows xgettext
1285        to see these.
1286     */
1287 #define YYENGLISH 17
1288 	"$undefined",	"input",
1289 	"SECTIONHEADER","section header",
1290 	"RSECTIONHEADER","section header",
1291 	"VSECTIONHEADER","section header",
1292 	"LISTSECTION",	"section header",
1293 
1294 	"LATEXMACRO",	"macro",
1295 	"LATEXMACRO2",  "macro",
1296 	"LATEXMACRO3",  "macro",
1297 	"RCODEMACRO",	"macro",
1298 	"VERBMACRO",    "macro",
1299 	"VERBMACRO2",	"macro",
1300 
1301 	"ESCAPE",	"macro",
1302 	"ITEMIZE",	"macro",
1303 	"IFDEF",	"conditional",
1304 	"SECTIONHEADER2","section header",
1305 	"OPTMACRO",	"macro",
1306 
1307 	"DESCRIPTION",	"macro",
1308 	"VERB",		"VERBATIM TEXT",
1309 	0,		0
1310     };
1311     static char const yyunexpected[] = "syntax error, unexpected ";
1312     static char const yyexpecting[] = ", expecting ";
1313     static char const yyshortunexpected[] = "unexpected %s";
1314     static char const yylongunexpected[] = "unexpected %s '%s'";
1315     char *expecting;
1316     char ParseErrorMsg[PARSE_ERROR_SIZE];
1317     SEXP filename;
1318     char ParseErrorFilename[PARSE_ERROR_SIZE];
1319 
1320     xxWarnNewline();	/* post newline warning if necessary */
1321 
1322     /*
1323     R_ParseError     = yylloc.first_line;
1324     R_ParseErrorCol  = yylloc.first_column;
1325     R_ParseErrorFile = SrcFile;
1326     */
1327 
1328     if (!strncmp(s, yyunexpected, sizeof yyunexpected -1)) {
1329 	int i, translated = FALSE;
1330     	/* Edit the error message */
1331     	expecting = strstr(s + sizeof yyunexpected -1, yyexpecting);
1332     	if (expecting) *expecting = '\0';
1333     	for (i = 0; yytname_translations[i]; i += 2) {
1334     	    if (!strcmp(s + sizeof yyunexpected - 1, yytname_translations[i])) {
1335     	    	if (yychar < 256)
1336     	    	    snprintf(ParseErrorMsg, PARSE_ERROR_SIZE,
1337 			     _(yyshortunexpected),
1338 			     i/2 < YYENGLISH ? _(yytname_translations[i+1])
1339 			     : yytname_translations[i+1]);
1340     	    	else
1341     	    	    snprintf(ParseErrorMsg, PARSE_ERROR_SIZE,
1342 			     _(yylongunexpected),
1343 			     i/2 < YYENGLISH ? _(yytname_translations[i+1])
1344 			     : yytname_translations[i+1],
1345 			     CHAR(STRING_ELT(yylval, 0)));
1346     	    	translated = TRUE;
1347     	    	break;
1348     	    }
1349     	}
1350     	if (!translated) {
1351     	    if (yychar < 256)
1352     		snprintf(ParseErrorMsg, PARSE_ERROR_SIZE, _(yyshortunexpected),
1353 			s + sizeof yyunexpected - 1);
1354     	    else
1355     	    	snprintf(ParseErrorMsg, PARSE_ERROR_SIZE, _(yylongunexpected),
1356 			 s + sizeof yyunexpected - 1, CHAR(STRING_ELT(yylval, 0)));
1357 	}
1358     	if (expecting) {
1359  	    translated = FALSE;
1360     	    for (i = 0; yytname_translations[i]; i += 2) {
1361     	    	if (!strcmp(expecting + sizeof yyexpecting - 1, yytname_translations[i])) {
1362     	    	    strcat(ParseErrorMsg, _(yyexpecting));
1363     	    	    strcat(ParseErrorMsg, i/2 < YYENGLISH ? _(yytname_translations[i+1])
1364     	    	                    : yytname_translations[i+1]);
1365     	    	    translated = TRUE;
1366 		    break;
1367 		}
1368 	    }
1369 	    if (!translated) {
1370 	    	strcat(ParseErrorMsg, _(yyexpecting));
1371 	    	strcat(ParseErrorMsg, expecting + sizeof yyexpecting - 1);
1372 	    }
1373 	}
1374     } else if (!strncmp(s, yyunknown, sizeof yyunknown-1)) {
1375     	snprintf(ParseErrorMsg, PARSE_ERROR_SIZE,
1376 		"%s '%s'", s, CHAR(STRING_ELT(yylval, 0)));
1377     } else {
1378     	snprintf(ParseErrorMsg, PARSE_ERROR_SIZE, "%s", s);
1379     }
1380     filename = findVar(install("filename"), SrcFile);
1381     if (isString(filename) && LENGTH(filename))
1382     	strncpy(ParseErrorFilename, CHAR(STRING_ELT(filename, 0)), PARSE_ERROR_SIZE - 1);
1383     else
1384         ParseErrorFilename[0] = '\0';
1385     if (wCalls) {
1386 	if (yylloc.first_line != yylloc.last_line)
1387 	    warning("%s:%d-%d: %s",
1388 		    ParseErrorFilename, yylloc.first_line, yylloc.last_line, ParseErrorMsg);
1389 	else
1390 	    warning("%s:%d: %s",
1391 		    ParseErrorFilename, yylloc.first_line, ParseErrorMsg);
1392     } else {
1393 	if (yylloc.first_line != yylloc.last_line)
1394 	    warningcall(R_NilValue, "%s:%d-%d: %s",
1395 		    ParseErrorFilename, yylloc.first_line, yylloc.last_line, ParseErrorMsg);
1396 	else
1397 	    warningcall(R_NilValue, "%s:%d: %s",
1398 			ParseErrorFilename, yylloc.first_line, ParseErrorMsg);
1399     }
1400 }
1401 
1402 #define TEXT_PUSH(c) do {                  \
1403 	size_t nc = bp - stext;       \
1404 	if (nc >= nstext - 1) {             \
1405 	    char *old = stext;              \
1406             nstext *= 2;                    \
1407 	    stext = malloc(nstext);         \
1408 	    if(!stext) error(_("unable to allocate buffer for long string at line %d"), parseState.xxlineno);\
1409 	    memmove(stext, old, nc);        \
1410 	    if(old != st0) free(old);	    \
1411 	    bp = stext+nc; }		    \
1412 	*bp++ = ((char) c);		    \
1413 } while(0)
1414 
setfirstloc(void)1415 static void setfirstloc(void)
1416 {
1417     yylloc.first_line = parseState.xxlineno;
1418     yylloc.first_column = parseState.xxcolno;
1419     yylloc.first_byte = parseState.xxbyteno;
1420 }
1421 
setlastloc(void)1422 static void setlastloc(void)
1423 {
1424     yylloc.last_line = prevlines[prevpos];
1425     yylloc.last_column = prevcols[prevpos];
1426     yylloc.last_byte = prevbytes[prevpos];
1427 }
1428 
1429 /* Split the input stream into tokens. */
1430 /* This is the lowest of the parsing levels. */
1431 
token(void)1432 static int token(void)
1433 {
1434     int c, lookahead;
1435     int outsideLiteral = parseState.xxmode == LATEXLIKE || parseState.xxmode == INOPTION || parseState.xxbraceDepth == 0;
1436 
1437     if (parseState.xxinitvalue) {
1438         yylloc.first_line = 0;
1439         yylloc.first_column = 0;
1440         yylloc.first_byte = 0;
1441         yylloc.last_line = 0;
1442         yylloc.last_column = 0;
1443         yylloc.last_byte = 0;
1444 	PRESERVE_SV(yylval = mkString(""));
1445         c = parseState.xxinitvalue;
1446     	parseState.xxinitvalue = 0;
1447     	return(c);
1448     }
1449 
1450     setfirstloc();
1451     c = xxgetc();
1452 
1453     switch (c) {
1454     	case '%': if (!parseState.xxinEqn) return mkComment(c);
1455     	    break;
1456 	case '\\':
1457 	    if (!parseState.xxinEqn) {
1458 		lookahead = xxungetc(xxgetc());
1459 		if (isalpha(lookahead) && parseState.xxmode != VERBATIM
1460 		    /* In R strings, only link or var is allowed as markup */
1461 		    && (lookahead == 'l' || lookahead == 'v' || !parseState.xxinRString))
1462 		    return mkMarkup(c);
1463 	    }
1464 	    break;
1465         case R_EOF:
1466             if (parseState.xxinRString) {
1467        		xxWarnNewline();
1468        		error(_("Unexpected end of input (in %c quoted string opened at %s:%d:%d)"),
1469  			parseState.xxinRString, parseState.xxBasename, parseState.xxQuoteLine, parseState.xxQuoteCol);
1470     	    }
1471     	    return END_OF_INPUT;
1472     	case '#':
1473     	    if (!parseState.xxinEqn && yylloc.first_column == 1) return mkIfdef(c);
1474     	    break;
1475     	case LBRACE:
1476     	    if (!parseState.xxinRString) {
1477     	    	parseState.xxbraceDepth++;
1478     	    	if (outsideLiteral) return c;
1479     	    }
1480     	    break;
1481     	case RBRACE:
1482     	    if (!parseState.xxinRString) {
1483     	    	parseState.xxbraceDepth--;
1484     	    	if (outsideLiteral || parseState.xxbraceDepth == 0) return c;
1485     	    }
1486     	    break;
1487     	case '[':
1488     	case ']':
1489     	    if (parseState.xxmode == INOPTION ) return c;
1490     	    break;
1491     }
1492 
1493     switch (parseState.xxmode) {
1494 	case RLIKE:     return mkCode(c);
1495 	case INOPTION:
1496 	case LATEXLIKE: return mkText(c);
1497 	case VERBATIM:  return mkVerb(c);
1498     }
1499 
1500     return ERROR; /* We shouldn't get here. */
1501 }
1502 
1503 #define INITBUFSIZE 128
1504 
mkText(int c)1505 static int mkText(int c)
1506 {
1507     char st0[INITBUFSIZE];
1508     unsigned int nstext = INITBUFSIZE;
1509     char *stext = st0, *bp = st0, lookahead;
1510 
1511     while(1) {
1512     	switch (c) {
1513     	case '\\':
1514     	    lookahead = (char) xxgetc();
1515     	    if (lookahead == LBRACE || lookahead == RBRACE ||
1516     	        lookahead == '%' || lookahead == '\\') {
1517     	    	c = lookahead;
1518     	    	break;
1519     	    }
1520     	    xxungetc(lookahead);
1521     	    if (isalpha(lookahead)) goto stop;
1522     	case ']':
1523     	    if (parseState.xxmode == INOPTION) goto stop;
1524             break;
1525     	case '%':
1526     	case LBRACE:
1527     	case RBRACE:
1528     	case R_EOF:
1529     	    goto stop;
1530     	}
1531     	TEXT_PUSH(c);
1532     	if (c == '\n') goto stop;
1533     	c = xxgetc();
1534     };
1535 stop:
1536     if (c != '\n') xxungetc(c); /* newline causes a break, but we keep it */
1537     PRESERVE_SV(yylval = mkString2(stext, bp - stext));
1538     if(stext != st0) free(stext);
1539     return TEXT;
1540 }
1541 
mkComment(int c)1542 static int mkComment(int c)
1543 {
1544     char st0[INITBUFSIZE];
1545     unsigned int nstext = INITBUFSIZE;
1546     char *stext = st0, *bp = st0;
1547 
1548     do TEXT_PUSH(c);
1549     while ((c = xxgetc()) != '\n' && c != R_EOF);
1550 
1551     xxungetc(c);
1552 
1553     PRESERVE_SV(yylval = mkString2(stext, bp - stext));
1554     if(stext != st0) free(stext);
1555     return COMMENT;
1556 }
1557 
1558 #define EAT_DASHES(n_var) do {				\
1559 	for (c = xxgetc(); c == '-'; c = xxgetc()) {	\
1560 	    n_var++;					\
1561 	    TEXT_PUSH(c);				\
1562 	}						\
1563     } while (0)
1564 
1565 #define EAT_CHARS_TO_DELIM_OR_EOF(delim) do {	\
1566 	while (c != delim && c != R_EOF) {	\
1567 	    TEXT_PUSH(c);			\
1568 	    c = xxgetc();			\
1569 	}					\
1570     } while (0)
1571 
closingRawStringDelim(int c)1572 static int closingRawStringDelim(int c)
1573 {
1574     switch(c) {
1575     case '(': return ')';
1576     case '{': return '}';
1577     case '[': return ']';
1578     case '|': return '|';
1579     default:  return 0;
1580     }
1581 }
1582 
mkCode(int c)1583 static int mkCode(int c)
1584 {
1585     char st0[INITBUFSIZE];
1586     unsigned int nstext = INITBUFSIZE;
1587     char *stext = st0, *bp = st0;
1588 
1589     /* Avoid double counting initial braces */
1590     if (c == LBRACE && !parseState.xxinRString) parseState.xxbraceDepth--;
1591     if (c == RBRACE && !parseState.xxinRString) parseState.xxbraceDepth++;
1592 
1593     while(1) {
1594 	/* handle a raw string */
1595 	if (parseState.xxinRString == 0 && (c == 'r' || c == 'R')) {
1596     	    int lookahead = xxgetc();
1597 	    if (lookahead == '"' || lookahead == '\'') {
1598 		TEXT_PUSH(c);
1599 		int quote = lookahead;
1600 		parseState.xxinRString = quote;
1601     	    	parseState.xxQuoteLine = parseState.xxlineno;
1602     	    	parseState.xxQuoteCol  = parseState.xxcolno;
1603 		TEXT_PUSH(quote);
1604 		int ndash = 0;
1605 		EAT_DASHES(ndash);
1606 		int delim = closingRawStringDelim(c);
1607 		if (delim != 0) {
1608 		    int done = FALSE;
1609 		    do {
1610 			EAT_CHARS_TO_DELIM_OR_EOF(delim);
1611 			if (c == delim) {
1612 			    TEXT_PUSH(c);
1613 			    int nndash = 0;
1614 			    EAT_DASHES(nndash);
1615 			    if (nndash == ndash && c == quote)
1616 				done = TRUE; // close quote is handled below
1617 			}
1618 			else done = TRUE; // EOF; move on
1619 		    } while (! done);
1620 		}
1621 	    }
1622 	    else xxungetc(lookahead);
1623 	}
1624 
1625 	int escaped = 0;
1626     	if (c == '\\') {
1627     	    int lookahead = xxgetc();
1628     	    if (lookahead == '\\' || lookahead == '%') {
1629     	         c = lookahead;
1630     	         escaped = 1;
1631     	    } else xxungetc(lookahead);
1632     	}
1633     	if ((!escaped && c == '%') || c == R_EOF) break;
1634     	if (parseState.xxinRString) {
1635     	    /* This stuff is messy, because there are two levels of escaping:
1636     	       The Rd escaping and the R code string escaping. */
1637     	    if (c == '\\') {
1638     		int lookahead = xxgetc();
1639     		if (lookahead == '\\') { /* This must be the 3rd backslash */
1640     		    lookahead = xxgetc();
1641     		    if (lookahead == parseState.xxinRString || lookahead == '\\') {
1642     	    	    	TEXT_PUSH(c);
1643     	    	    	c = lookahead;
1644     	    	    	escaped = 1;
1645     	    	    } else {
1646     	    	    	xxungetc(lookahead); /* put back the 4th char */
1647     	    	    	xxungetc('\\');	     /* and the 3rd */
1648     	    	    }
1649     	    	} else if (lookahead == parseState.xxinRString) { /* There could be one or two before this */
1650     	    	    TEXT_PUSH(c);
1651     	    	    c = lookahead;
1652     	    	    escaped = 1;
1653     	    	} else if (!escaped && (lookahead == 'l' || lookahead == 'v')) {
1654     	    	    /* assume \link or \var; this breaks vertical tab, but does anyone ever use that? */
1655     	    	    xxungetc(lookahead);
1656     	    	    break;
1657     	    	} else xxungetc(lookahead);
1658     	    }
1659     	    if (!escaped && c == parseState.xxinRString)
1660     	    	parseState.xxinRString = 0;
1661     	} else {
1662     	    if (c == '#') {
1663     	    	do {
1664     	    	    int escaped = 0;
1665     	    	    TEXT_PUSH(c);
1666     	    	    c = xxgetc();
1667     	    	    if (c == '\\') {
1668 		        int lookahead = xxgetc();
1669 		        if (lookahead == '\\' || lookahead == '%' || lookahead == LBRACE || lookahead == RBRACE) {
1670 		            c = lookahead;
1671 		            escaped = 1;
1672 		        } else xxungetc(lookahead);
1673     		    }
1674     	    	    if (c == LBRACE && !escaped) parseState.xxbraceDepth++;
1675     	    	    else if (c == RBRACE && !escaped) parseState.xxbraceDepth--;
1676     	    	} while (c != '\n' && c != R_EOF && parseState.xxbraceDepth > 0);
1677     	    	if (c == RBRACE && !escaped) parseState.xxbraceDepth++; /* avoid double counting */
1678     	    }
1679     	    if (c == '\'' || c == '"' || c == '`') {
1680     	    	parseState.xxinRString = c;
1681     	    	parseState.xxQuoteLine = parseState.xxlineno;
1682     	    	parseState.xxQuoteCol  = parseState.xxcolno;
1683     	    } else if (c == '\\' && !escaped) {
1684     	    	int lookahead = xxgetc();
1685     	    	if (lookahead == LBRACE || lookahead == RBRACE) {
1686 		    c = lookahead;
1687 		} else if (isalpha(lookahead)) {
1688     	    	    xxungetc(lookahead);
1689     	    	    c = '\\';
1690     	    	    break;
1691     	    	} else {
1692     	    	    TEXT_PUSH('\\');
1693     	    	    c = lookahead;
1694     	    	}
1695     	    } else if (c == LBRACE) {
1696     	    	parseState.xxbraceDepth++;
1697     	    } else if (c == RBRACE) {
1698     	    	if (parseState.xxbraceDepth == 1) break;
1699     	    	else parseState.xxbraceDepth--;
1700     	    } else if (c == R_EOF) break;
1701     	}
1702     	TEXT_PUSH(c);
1703     	if (c == '\n') {
1704     	    if (parseState.xxinRString && !parseState.xxNewlineInString)
1705     	    	parseState.xxNewlineInString = parseState.xxlineno-1;
1706     	    break;
1707     	}
1708     	c = xxgetc();
1709     }
1710     if (c != '\n') xxungetc(c);
1711     PRESERVE_SV(yylval = mkString2(stext, bp - stext));
1712     if(stext != st0) free(stext);
1713     return RCODE;
1714 }
1715 
mkMarkup(int c)1716 static int mkMarkup(int c)
1717 {
1718     char st0[INITBUFSIZE];
1719     unsigned int nstext = INITBUFSIZE;
1720     char *stext = st0, *bp = st0;
1721     int retval = 0, attempt = 0;
1722 
1723     TEXT_PUSH(c);
1724     while (isalnum((c = xxgetc()))) TEXT_PUSH(c);
1725 
1726     while (attempt++ < 2) {
1727     	/* character escapes are processed as text, not markup */
1728     	if (bp == stext+1) {
1729     	    TEXT_PUSH(c);
1730     	    TEXT_PUSH('\0');
1731     	    retval = TEXT;
1732     	    c = xxgetc();
1733     	    break;
1734     	} else {
1735     	    TEXT_PUSH('\0');
1736     	    retval = KeywordLookup(stext);
1737     	    if (retval == UNKNOWN && attempt == 1) { /* try again, non-digits only */
1738     	    	bp--; 				     /* pop the \0 */
1739     	        while (isdigit(*(bp-1))) {
1740             	    xxungetc(c);
1741     	            c = *(--bp);                     /* pop the last letter into c */
1742             	}
1743             } else {
1744             	if (retval == NOITEM)
1745     	    	    retval = parseState.xxitemType;
1746     	    	break;
1747     	    }
1748         }
1749     }
1750     PRESERVE_SV(yylval = mkString2(stext, bp - stext - 1));
1751     if(stext != st0) free(stext);
1752     xxungetc(c);
1753     return retval;
1754 }
1755 
mkIfdef(int c)1756 static int mkIfdef(int c)
1757 {
1758     char st0[INITBUFSIZE];
1759     unsigned int nstext = INITBUFSIZE;
1760     char *stext = st0, *bp = st0;
1761     int retval;
1762 
1763     TEXT_PUSH(c);
1764     while (isalpha((c = xxgetc())) && bp - stext <= DIRECTIVE_LEN) TEXT_PUSH(c);
1765     TEXT_PUSH('\0');
1766     xxungetc(c);
1767 
1768     retval = KeywordLookup(stext);
1769     PRESERVE_SV(yylval = mkString2(stext, bp - stext - 1));
1770 
1771     switch (retval) {
1772     case ENDIF:  /* eat chars to the end of the line */
1773     	do { c = xxgetc(); }
1774     	while (c != '\n' && c != R_EOF);
1775     	break;
1776     case UNKNOWN:
1777 	RELEASE_SV(yylval);
1778     	bp--; bp--;
1779     	for (; bp > stext; bp--)
1780     	    xxungetc(*bp);
1781     	switch (parseState.xxmode) {
1782     	case RLIKE:
1783     	    retval = mkCode(*bp);
1784     	    break;
1785     	case INOPTION:
1786     	case LATEXLIKE:
1787     	    retval = mkText(*bp);
1788     	    break;
1789     	case VERBATIM:
1790     	    retval = mkVerb(*bp);
1791     	    break;
1792 	}
1793 	break;
1794     }
1795     if(stext != st0) free(stext);
1796     return retval;
1797 }
1798 
mkVerb(int c)1799 static int mkVerb(int c)
1800 {
1801     char st0[INITBUFSIZE];
1802     unsigned int nstext = INITBUFSIZE;
1803     char *stext = st0, *bp = st0;
1804 
1805     /* Avoid double counting initial braces */
1806     if (c == LBRACE) parseState.xxbraceDepth--;
1807     if (c == RBRACE) parseState.xxbraceDepth++;
1808 
1809     while(1) {
1810     	int escaped = 0;
1811         if (c == '\\') {
1812             int lookahead = xxgetc();
1813             if (lookahead == '\\' || lookahead == '%' || lookahead == LBRACE || lookahead == RBRACE) {
1814 		escaped = 1;
1815 		if (parseState.xxinEqn) TEXT_PUSH(c);
1816 		c = lookahead;
1817 	    } else xxungetc(lookahead);
1818         }
1819         if (c == R_EOF) break;
1820         if (!escaped) {
1821     	    if (c == '%' && !parseState.xxinEqn) break;
1822 	    else if (c == LBRACE) parseState.xxbraceDepth++;
1823     	    else if (c == RBRACE) {
1824 	    	if (parseState.xxbraceDepth == 1) break;
1825 	    	else parseState.xxbraceDepth--;
1826 	    }
1827 	}
1828     	TEXT_PUSH(c);
1829     	if (c == '\n') break;
1830     	c = xxgetc();
1831     };
1832     if (c != '\n') xxungetc(c);
1833     PRESERVE_SV(yylval = mkString2(stext, bp - stext));
1834     if(stext != st0) free(stext);
1835     return VERB;
1836 }
1837 
yylex(void)1838 static int yylex(void)
1839 {
1840     int tok = token();
1841 
1842     if (parseState.xxDebugTokens) {
1843         Rprintf("%d:%d: %s", yylloc.first_line, yylloc.first_column, yytname[YYTRANSLATE(tok)]);
1844     	if (parseState.xxinRString) Rprintf("(in %c%c)", parseState.xxinRString, parseState.xxinRString);
1845     	if (tok > 255 && tok != END_OF_INPUT)
1846     	    Rprintf(": %s", CHAR(STRING_ELT(yylval, 0)));
1847 	Rprintf("\n");
1848     }
1849     setlastloc();
1850     return tok;
1851 }
1852 
con_cleanup(void * data)1853 static void con_cleanup(void *data)
1854 {
1855     Rconnection con = data;
1856     if(con->isopen) con->close(con);
1857 }
1858 
PutState(ParseState * state)1859 static void PutState(ParseState *state) {
1860     state->xxinRString = parseState.xxinRString;
1861     state->xxQuoteLine = parseState.xxQuoteLine;
1862     state->xxQuoteCol = parseState.xxQuoteCol;
1863     state->xxinEqn = parseState.xxinEqn;
1864     state->xxNewlineInString = parseState.xxNewlineInString;
1865     state->xxlineno = parseState.xxlineno;
1866     state->xxbyteno = parseState.xxbyteno;
1867     state->xxcolno = parseState.xxcolno;
1868     state->xxmode = parseState.xxmode;
1869     state->xxitemType = parseState.xxitemType;
1870     state->xxbraceDepth = parseState.xxbraceDepth;
1871     state->xxDebugTokens = parseState.xxDebugTokens;
1872     state->xxBasename = parseState.xxBasename;
1873     state->Value = parseState.Value;
1874     state->xxinitvalue = parseState.xxinitvalue;
1875     state->xxMacroList = parseState.xxMacroList;
1876     state->prevState = parseState.prevState;
1877 }
1878 
UseState(ParseState * state)1879 static void UseState(ParseState *state) {
1880     parseState.xxinRString = state->xxinRString;
1881     parseState.xxQuoteLine = state->xxQuoteLine;
1882     parseState.xxQuoteCol = state->xxQuoteCol;
1883     parseState.xxinEqn = state->xxinEqn;
1884     parseState.xxNewlineInString = state->xxNewlineInString;
1885     parseState.xxlineno = state->xxlineno;
1886     parseState.xxbyteno = state->xxbyteno;
1887     parseState.xxcolno = state->xxcolno;
1888     parseState.xxmode = state->xxmode;
1889     parseState.xxitemType = state->xxitemType;
1890     parseState.xxbraceDepth = state->xxbraceDepth;
1891     parseState.xxDebugTokens = state->xxDebugTokens;
1892     parseState.xxBasename = state->xxBasename;
1893     parseState.Value = state->Value;
1894     parseState.xxinitvalue = state->xxinitvalue;
1895     parseState.xxMacroList = state->xxMacroList;
1896     parseState.prevState = state->prevState;
1897 }
1898 
PushState()1899 static void PushState() {
1900     if (busy) {
1901     	ParseState *prev = malloc(sizeof(ParseState));
1902 	if (prev == NULL) error("unable to allocate in PushState");
1903     	PutState(prev);
1904     	parseState.prevState = prev;
1905     } else
1906         parseState.prevState = NULL;
1907     busy = TRUE;
1908 }
1909 
PopState()1910 static void PopState() {
1911     if (parseState.prevState) {
1912     	ParseState *prev = parseState.prevState;
1913     	UseState(prev);
1914     	free(prev);
1915     } else
1916     	busy = FALSE;
1917 }
1918 
1919 /* "do_parseRd"
1920 
1921  .External2(C_parseRd,file, srcfile, encoding, verbose, basename, warningCalls, macros, warndups)
1922  If there is text then that is read and the other arguments are ignored.
1923 */
1924 
parseRd(SEXP call,SEXP op,SEXP args,SEXP env)1925 SEXP parseRd(SEXP call, SEXP op, SEXP args, SEXP env)
1926 {
1927     args = CDR(args);
1928 
1929     SEXP s = R_NilValue, source;
1930     Rconnection con;
1931     Rboolean wasopen, fragment;
1932     int ifile, wcall;
1933     ParseStatus status;
1934     RCNTXT cntxt;
1935     SEXP macros;
1936 
1937 #if DEBUGMODE
1938     yydebug = 1;
1939 #endif
1940 
1941     R_ParseError = 0;
1942     R_ParseErrorMsg[0] = '\0';
1943 
1944     PushState();
1945 
1946     ifile = asInteger(CAR(args));                       args = CDR(args);
1947 
1948     con = getConnection(ifile);
1949     wasopen = con->isopen;
1950     source = CAR(args);					args = CDR(args);
1951     /* encoding is unused */
1952     args = CDR(args);
1953     if(!isLogical(CAR(args)) || LENGTH(CAR(args)) != 1)
1954     	error(_("invalid '%s' value"), "verbose");
1955     parseState.xxDebugTokens = asInteger(CAR(args));		args = CDR(args);
1956     parseState.xxBasename = CHAR(STRING_ELT(CAR(args), 0));	args = CDR(args);
1957     fragment = asLogical(CAR(args));				args = CDR(args);
1958     wcall = asLogical(CAR(args));				args = CDR(args);
1959     if (wcall == NA_LOGICAL)
1960     	error(_("invalid '%s' value"), "warningCalls");
1961     wCalls = wcall;
1962     macros = CAR(args);						args = CDR(args);
1963     warnDups = asLogical(CAR(args));
1964 
1965     if (ifile >= 3) {/* file != "" */
1966 	if(!wasopen) {
1967 	    if(!con->open(con)) error(_("cannot open the connection"));
1968 	    /* Set up a context which will close the connection on error */
1969 	    begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
1970 			 R_NilValue, R_NilValue);
1971 	    cntxt.cend = &con_cleanup;
1972 	    cntxt.cenddata = con;
1973 	}
1974 	if(!con->canread) error(_("cannot read from this connection"));
1975 	s = R_ParseRd(con, &status, source, fragment, macros);
1976 	if(!wasopen) endcontext(&cntxt);
1977 	PopState();
1978 	if (status != PARSE_OK) parseError(call, R_ParseError);
1979     }
1980     else {
1981       PopState();
1982       error(_("invalid Rd file"));
1983     }
1984     return s;
1985 }
1986 
1987 /* "do_deparseRd"
1988 
1989  .External2(C_deparseRd, element, state)
1990 */
1991 
deparseRd(SEXP e,SEXP state)1992 SEXP deparseRd(SEXP e, SEXP state)
1993 {
1994     SEXP result;
1995     int  outlen, *statevals, quoteBraces, inRComment;
1996     const char *c;
1997     char *outbuf, *out, lookahead;
1998     Rboolean escape;
1999 
2000     if(!isString(e) || LENGTH(e) != 1)
2001     	error(_("'deparseRd' only supports deparsing character elements"));
2002     e = STRING_ELT(e, 0);
2003 
2004     if(!isInteger(state) || LENGTH(state) != 5) error(_("bad state"));
2005 
2006     PushState();
2007 
2008     parseState.xxbraceDepth = INTEGER(state)[0];
2009     parseState.xxinRString = INTEGER(state)[1];
2010     parseState.xxmode = INTEGER(state)[2];
2011     parseState.xxinEqn = INTEGER(state)[3];
2012     quoteBraces = INTEGER(state)[4];
2013 
2014     if (parseState.xxmode != LATEXLIKE && parseState.xxmode != RLIKE && parseState.xxmode != VERBATIM && parseState.xxmode != COMMENTMODE
2015      && parseState.xxmode != INOPTION  && parseState.xxmode != UNKNOWNMODE) {
2016         PopState();
2017     	error(_("bad text mode %d in 'deparseRd'"), parseState.xxmode);
2018     }
2019 
2020     for (c = CHAR(e), outlen=0; *c; c++) {
2021     	outlen++;
2022     	/* any special char might be escaped */
2023     	if (*c == '{' || *c == '}' || *c == '%' || *c == '\\') outlen++;
2024     }
2025     out = outbuf = R_chk_calloc(outlen+1, sizeof(char));
2026     inRComment = FALSE;
2027     for (c = CHAR(e); *c; c++) {
2028     	escape = FALSE;
2029     	if (parseState.xxmode != UNKNOWNMODE) {
2030 	    switch (*c) {
2031 	    case '\\':
2032 		if (parseState.xxmode == RLIKE && parseState.xxinRString) {
2033 		    lookahead = *(c+1);
2034 		    if (lookahead == '\\' || lookahead == parseState.xxinRString || lookahead == 'l')
2035 		    	escape = TRUE;
2036 		    break;
2037 		}          /* fall through to % case for non-strings... */
2038 	    case '%':
2039 		if (parseState.xxmode != COMMENTMODE && !parseState.xxinEqn)
2040 		    escape = TRUE;
2041 		break;
2042 	    case LBRACE:
2043 	    case RBRACE:
2044 		if (quoteBraces)
2045 		    escape = TRUE;
2046 		else if (!parseState.xxinRString && !parseState.xxinEqn && (parseState.xxmode == RLIKE || parseState.xxmode == VERBATIM)) {
2047 		    if (*c == LBRACE) parseState.xxbraceDepth++;
2048 		    else if (parseState.xxbraceDepth <= 0) escape = TRUE;
2049 		    else parseState.xxbraceDepth--;
2050 		}
2051 		break;
2052 	    case '\'':
2053 	    case '"':
2054 	    case '`':
2055 	    	if (parseState.xxmode == RLIKE) {
2056 		    if (parseState.xxinRString) {
2057 			if (parseState.xxinRString == *c) parseState.xxinRString = 0;
2058 		    } else if (!inRComment) parseState.xxinRString = *c;
2059 		}
2060 		break;
2061 	    case '#':
2062 	    	if (parseState.xxmode == RLIKE && !parseState.xxinRString)
2063 	    	    inRComment = TRUE;
2064 	    	break;
2065 	    case '\n':
2066 	    	inRComment = FALSE;
2067 	    	break;
2068 	    }
2069 	}
2070     	if (escape)
2071     	    *out++ = '\\';
2072     	*out++ = *c;
2073     }
2074     *out = '\0';
2075     PROTECT(result = allocVector(VECSXP, 2));
2076     SET_VECTOR_ELT(result, 0, ScalarString(mkChar(outbuf)));
2077     SET_VECTOR_ELT(result, 1, duplicate(state));
2078     R_chk_free(outbuf);
2079 
2080     statevals = INTEGER( VECTOR_ELT(result, 1) );
2081     statevals[0] = parseState.xxbraceDepth;
2082     statevals[1] = parseState.xxinRString;
2083 
2084     PopState();
2085 
2086     UNPROTECT(1); /* result */
2087     return result;
2088 }
2089 
2090