1 /*
2 *   Copyright (c) 2003-2004, Ascher Stefan <stievie@utanet.at>
3 *   Copyright (c) 2020, Masatake YAMATO <yamato@redhat.com>
4 *   Copyright (c) 2020, Red Hat, Inc.
5 *
6 *   This source code is released for free distribution under the terms of the
7 *   GNU General Public License version 2 or (at your option) any later version.
8 *
9 *   This module contains functions for generating tags for R language files.
10 *   R is a programming language for statistical computing.
11 *   R is GPL Software, get it from http://www.r-project.org/
12 *
13 *   The language references are available at
14 *   https://cran.r-project.org/manuals.html, and
15 *   https://cran.r-project.org/doc/manuals/r-release/R-lang.html
16 *
17 *   The base library (including library and source functions) release is at
18 *   https://stat.ethz.ch/R-manual/R-devel/library/base/html/00Index.html
19 */
20 
21 /*
22 *   INCLUDE FILES
23 */
24 #include "general.h"	/* must always come first */
25 
26 #include "debug.h"
27 #include "entry.h"
28 #include "keyword.h"
29 #include "parse.h"
30 #include "read.h"
31 #include "selectors.h"
32 #include "tokeninfo.h"
33 #include "trace.h"
34 #include "vstring.h"
35 #include "subparser.h"
36 #include "r.h"
37 
38 #include <string.h>
39 #include <ctype.h>	/* to define isalpha(), isalnum(), isspace() */
40 
41 
42 /*
43 *   MACROS
44 */
45 #ifdef DEBUG
46 #define R_TRACE_TOKEN_TEXT(TXT,T,Q) TRACE_PRINT("<%s> token: %s (%s), parent: %s", \
47 												(TXT),					\
48 												tokenIsTypeVal(T, '\n')? "\\n": tokenString(T), \
49 												tokenTypeStr(T->type),	\
50 												(Q) == CORK_NIL? "": getEntryInCorkQueue(Q)->name)
51 #define R_TRACE_TOKEN(T,Q) TRACE_PRINT("token: %s (%s), parent: %s", \
52 									   tokenIsTypeVal((T), '\n')? "\\n": tokenString(T), \
53 									   tokenTypeStr((T)->type),			\
54 									   (Q) == CORK_NIL? "": getEntryInCorkQueue(Q)->name)
55 
56 #define R_TRACE_ENTER() TRACE_ENTER_TEXT("token: %s (%s), parent: %s", \
57 										 tokenIsTypeVal(token, '\n')? "\\n": tokenString(token), \
58 										 tokenTypeStr(token->type), \
59 										 parent == CORK_NIL? "": getEntryInCorkQueue(parent)->name)
60 #define R_TRACE_LEAVE() TRACE_LEAVE()
61 #else
62 #define R_TRACE_TOKEN_TEXT(TXT,T,Q) do {} while (0);
63 #define R_TRACE_TOKEN(T,Q) do {} while (0);
64 #define R_TRACE_ENTER() do {} while (0);
65 #define R_TRACE_LEAVE() do {} while (0);
66 #endif
67 
68 
69 /*
70 *   DATA DEFINITIONS
71 */
72 typedef enum {
73 	K_UNDEFINED = -1,
74 	K_FUNCTION,
75 	K_LIBRARY,
76 	K_SOURCE,
77 	K_GLOBALVAR,
78 	K_FUNCVAR,
79 	K_PARAM,
80 	K_VECTOR,
81 	K_LIST,
82 	K_DATAFRAME,
83 	K_NAMEATTR,
84 	KIND_COUNT
85 } rKind;
86 
87 typedef enum {
88 	R_LIBRARY_ATTACHED_BY_LIBRARY,
89 	R_LIBRARY_ATTACHED_BY_REQUIRE,
90 } rLibraryRole;
91 
92 typedef enum {
93 	R_SOURCE_LOADED_BY_SOURCE,
94 } rSourceRole;
95 
96 static roleDefinition RLibraryRoles [] = {
97 	{ true, "library", "library attached by library function" },
98 	{ true, "require", "library attached by require function" },
99 };
100 
101 static roleDefinition RSourceRoles [] = {
102 	{ true, "source", "source loaded by source fucntion" },
103 };
104 
105 static kindDefinition RKinds[KIND_COUNT] = {
106 	{true, 'f', "function", "functions"},
107 	{true, 'l', "library", "libraries",
108 	 .referenceOnly = true, ATTACH_ROLES (RLibraryRoles) },
109 	{true, 's', "source", "sources",
110 	 .referenceOnly = true, ATTACH_ROLES (RSourceRoles) },
111 	{true, 'g', "globalVar", "global variables having values other than function()"},
112 	{true, 'v', "functionVar", "function variables having values other than function()"},
113 	{false,'z', "parameter",  "function parameters inside function definitions" },
114 	{true, 'c', "vector", "vectors explicitly created with `c()'" },
115 	{true, 'L', "list", "lists explicitly created with `list()'" },
116 	{true, 'd', "dataframe", "data frame explicitly created with `data.frame()'" },
117 	{true, 'n', "nameattr", "names attribtes in vectors, lists, or dataframes" },
118 };
119 
120 struct sKindExtraInfo {
121 	const char *anon_prefix;
122 	const char *ctor;
123 };
124 
125 static struct sKindExtraInfo kindExtraInfo[KIND_COUNT] = {
126 	[K_FUNCTION] = {
127 		"anonFunc",
128 		"function",
129 	},
130 	[K_VECTOR] = {
131 		"anonVec",
132 		"c",
133 	},
134 	[K_LIST] = {
135 		"anonList",
136 		"list",
137 	},
138 	[K_DATAFRAME] = {
139 		"anonDataFrame",
140 		"data.frame",
141 	},
142 };
143 
144 typedef enum {
145 	F_ASSIGNMENT_OPERATOR,
146 	F_CONSTRUCTOR,
147 } rField;
148 
149 static fieldDefinition RFields [] = {
150 	{
151 		.name = "assignmentop",
152 		.description = "operator for assignment",
153 		.enabled = false,
154 	},
155 	{
156 		.name = "constructor",
157 		.description = "function used for making value assigned to the nameattr tag",
158 		.enabled = true,
159 	}
160 };
161 
162 typedef int keywordId;			/* to allow KEYWORD_NONE */
163 
164 static const keywordTable RKeywordTable [] = {
165 	{ "c",        KEYWORD_R_C        },
166 	{ "list",     KEYWORD_R_LIST     },
167 	{ "data.frame",KEYWORD_R_DATAFRAME },
168 	{ "function", KEYWORD_R_FUNCTION },
169 	{ "if",       KEYWORD_R_IF       },
170 	{ "else",     KEYWORD_R_ELSE     },
171 	{ "for",      KEYWORD_R_FOR      },
172 	{ "while",    KEYWORD_R_WHILE    },
173 	{ "repeat",   KEYWORD_R_REPEAT   },
174 	{ "in",       KEYWORD_R_IN       },
175 	{ "next",     KEYWORD_R_NEXT     },
176 	{ "break",    KEYWORD_R_BREAK    },
177 	{ "TRUE",     KEYWORD_R_TRUE,    },
178 	{ "FALSE",    KEYWORD_R_FALSE,   },
179 	{ "NULL",     KEYWORD_R_NULL,    },
180 	{ "Inf",      KEYWORD_R_INF,     },
181 	{ "NaN",      KEYWORD_R_NAN,     },
182 	{ "NA",       KEYWORD_R_NA,      },
183 	{ "NA_integer_",   KEYWORD_R_NA, },
184 	{ "NA_real_",      KEYWORD_R_NA, },
185 	{ "NA_complex_",   KEYWORD_R_NA, },
186 	{ "NA_character_", KEYWORD_R_NA, },
187 	{ "source",   KEYWORD_R_SOURCE   },
188 	{ "library",  KEYWORD_R_LIBRARY  },
189 	{ "require",  KEYWORD_R_LIBRARY  },
190 };
191 
192 #ifdef DEBUG
193 static const char *tokenTypeStr(enum RTokenType e);
194 #endif
195 
196 static struct tokenTypePair typePairs [] = {
197 	{ '{', '}' },
198 	{ '[', ']' },
199 	{ '(', ')' },
200 };
201 
202 typedef struct sRToken {
203 	tokenInfo base;
204 	int scopeIndex;
205 	int parenDepth;
206 	vString *signature;
207 	int kindIndexForParams;		/* Used only when gathering parameters */
208 } rToken;
209 
210 #define R(TOKEN) ((rToken *)TOKEN)
211 
212 static int blackHoleIndex;
213 
214 static langType Lang_R;
215 
216 static void readToken (tokenInfo *const token, void *data);
217 static void clearToken (tokenInfo *token);
218 static struct tokenInfoClass rTokenInfoClass = {
219 	.nPreAlloc        = 4,
220 	.typeForUndefined = TOKEN_R_UNDEFINED,
221 	.keywordNone      = KEYWORD_NONE,
222 	.typeForKeyword   = TOKEN_R_KEYWORD,
223 	.typeForEOF       = TOKEN_R_EOF,
224 	.extraSpace       = sizeof (rToken) - sizeof (tokenInfo),
225 	.pairs            = typePairs,
226 	.pairCount        = ARRAY_SIZE (typePairs),
227 	.init             = NULL,
228 	.read             = readToken,
229 	.clear            = clearToken,
230 	.copy             = NULL,
231 };
232 
233 
234 /*
235  * FUNCTION PROTOTYPES
236  */
237 static bool parseStatement (tokenInfo *const token, int parent, bool in_arglist, bool in_continuous_pair);
238 static void parsePair (tokenInfo *const token, int parent, tokenInfo *const funcall);
239 
240 static  int notifyReadRightSideSymbol (tokenInfo *const symbol,
241 									   const char *const assignmentOperator,
242 									   int parent,
243 									   tokenInfo *const token);
244 static  int makeSimpleSubparserTag (int langType, tokenInfo *const token, int parent,
245 									bool in_func, int kindInR, const char *assignmentOperator);
246 static  bool askSubparserTagAcceptancy (tagEntryInfo *pe);
247 static  bool askSubparserTagHasFunctionAlikeKind (tagEntryInfo *e);
248 static  int notifyReadFuncall (tokenInfo *const func, tokenInfo *const token, int parent);
249 
250 /*
251 *   FUNCTION DEFINITIONS
252 */
hasKindsOrCtors(tagEntryInfo * e,int kinds[],size_t count)253 static bool hasKindsOrCtors (tagEntryInfo * e, int kinds[], size_t count)
254 {
255        if (e->langType == Lang_R)
256 	   {
257 		   for (size_t i = 0; i < count; i++)
258 		   {
259 			   if (e->kindIndex == kinds[i])
260 				   return true;
261 		   }
262 	   }
263 	   else
264 	   {
265 		   bool function = false;
266 		   for (size_t i = 0; i < count; i++)
267 		   {
268 			   if (K_FUNCTION == kinds[i])
269 			   {
270 				   function = true;
271 				   break;
272 			   }
273 		   }
274 		   if (function && askSubparserTagHasFunctionAlikeKind (e))
275 			   return true;
276 	   }
277 
278 	   const char *tmp = getParserFieldValueForType (e,
279 													 RFields [F_CONSTRUCTOR].ftype);
280 	   if (tmp == NULL)
281 		   return false;
282 
283 	   for (size_t i = 0; i < count; i++)
284 	   {
285 		   const char * ctor = kindExtraInfo [kinds[i]].ctor;
286 		   if (ctor && strcmp (tmp, ctor) == 0)
287                return true;
288 	   }
289 
290        return false;
291 }
292 
searchScopeOtherThan(int scope,int kinds[],size_t count)293 static int searchScopeOtherThan (int scope, int kinds[], size_t count)
294 {
295 	do
296 	{
297 		tagEntryInfo * e = getEntryInCorkQueue (scope);
298 		if (!e)
299 			return CORK_NIL;
300 
301 		if (!hasKindsOrCtors (e, kinds, count))
302 			return scope;
303 
304 		scope = e->extensionFields.scopeIndex;
305 	}
306 	while (1);
307 }
308 
makeSimpleRTagR(tokenInfo * const token,int parent,int kind,const char * assignmentOp)309 static int makeSimpleRTagR (tokenInfo *const token, int parent, int kind,
310 							const char * assignmentOp)
311 {
312 	if (assignmentOp && (strlen (assignmentOp) == 3))
313 	{
314 		/* <<- or ->> is used here. */
315 		if (anyKindsEntryInScopeRecursive (parent, tokenString (token),
316 										   (int[]){K_FUNCTION,
317 												   K_GLOBALVAR,
318 												   K_FUNCVAR,
319 												   K_PARAM}, 4) != CORK_NIL)
320 			return CORK_NIL;
321 
322 		parent = CORK_NIL;
323 	}
324 
325 	/* If the tag (T) to be created is defined in a scope and
326 	   the scope already has another tag having the same name
327 	   as T, T should not be created. */
328 	tagEntryInfo *pe = getEntryInCorkQueue (parent);
329 	int cousin = CORK_NIL;
330 	if (pe && ((pe->langType == Lang_R && pe->kindIndex == K_FUNCTION)
331 			   || (pe->langType != Lang_R && askSubparserTagHasFunctionAlikeKind (pe))))
332 	{
333 		cousin = anyEntryInScope (parent, tokenString (token));
334 		if (kind == K_GLOBALVAR)
335 			kind = K_FUNCVAR;
336 	}
337 	else if (pe && (kind == K_GLOBALVAR)
338 			 && hasKindsOrCtors (pe, (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3))
339 	{
340 		parent = searchScopeOtherThan (pe->extensionFields.scopeIndex,
341 									   (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3);
342 		if (parent == CORK_NIL)
343 			cousin = anyKindEntryInScope (parent, tokenString (token), K_GLOBALVAR);
344 		else
345 		{
346 			cousin = anyKindEntryInScope (parent, tokenString (token), K_FUNCVAR);
347 			kind = K_FUNCVAR;
348 		}
349 	}
350 	else if (pe)
351 	{
352 		/* The condition for tagging is a bit relaxed here.
353 		   Even if the same name tag is created in the scope, a name
354 		   is tagged if kinds are different. */
355 		cousin = anyKindEntryInScope (parent, tokenString (token), kind);
356 	}
357 	if (cousin != CORK_NIL)
358 		return CORK_NIL;
359 
360 	int corkIndex = makeSimpleTag (token->string, kind);
361 	tagEntryInfo *tag = getEntryInCorkQueue (corkIndex);
362 	if (tag)
363 	{
364 		tag->extensionFields.scopeIndex = parent;
365 		if (assignmentOp)
366 		{
367 			if (strlen (assignmentOp) > 0)
368 				attachParserField (tag, true,
369 								   RFields [F_ASSIGNMENT_OPERATOR].ftype,
370 								   assignmentOp);
371 			else
372 				markTagExtraBit (tag, XTAG_ANONYMOUS);
373 		}
374 		registerEntry (corkIndex);
375 	}
376 	return corkIndex;
377 }
378 
makeSimpleRTag(tokenInfo * const token,int parent,bool in_func,int kind,const char * assignmentOp)379 static int makeSimpleRTag (tokenInfo *const token, int parent, bool in_func, int kind,
380 						   const char * assignmentOp)
381 {
382 	int r;
383 	const char *ctor = kindExtraInfo [kind].ctor;
384 	tagEntryInfo *pe = (parent == CORK_NIL)? NULL: getEntryInCorkQueue (parent);
385 
386 	/* makeTagWithTranslation method for subparsers
387 	   called from makeSimpleSubparserTag expects
388 	   kind should be resolved. */
389 	if (pe && hasKindsOrCtors (pe, (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3))
390 	{
391 		if (assignmentOp
392 			&& strcmp (assignmentOp, "=") == 0)
393 			kind = K_NAMEATTR;
394 	}
395 
396 	bool foreign_tag = false;
397 	if (pe == NULL || pe->langType == Lang_R ||
398 		!askSubparserTagAcceptancy (pe))
399 		r = makeSimpleRTagR (token, parent, kind, assignmentOp);
400 	else
401 	{
402 		foreign_tag = true;
403 		r = makeSimpleSubparserTag (pe->langType, token, parent, in_func,
404 									kind, assignmentOp);
405 	}
406 
407 	if ((kind == K_NAMEATTR || foreign_tag) && ctor)
408 	{
409 		tagEntryInfo *e = getEntryInCorkQueue (r);
410 		if (e)
411 			attachParserField (e, true,
412 							   RFields [F_CONSTRUCTOR].ftype,
413 							   ctor);
414 	}
415 
416 	return r;
417 }
418 
clearToken(tokenInfo * token)419 static void clearToken (tokenInfo *token)
420 {
421 	R (token)->parenDepth = 0;
422 	R (token)->scopeIndex = CORK_NIL;
423 	R (token)->kindIndexForParams = KIND_GHOST_INDEX;
424 	if (R (token)->signature)
425 	{
426 		vStringDelete (R (token)->signature);
427 		R (token)->signature = NULL;
428 	}
429 }
430 
readString(tokenInfo * const token,void * data)431 static void readString (tokenInfo *const token, void *data)
432 {
433 	int c;
434 	bool escaped = false;
435 
436 	int c0 = tokenString(token)[0];
437 
438 	while (1)
439 	{
440 		c = getcFromInputFile ();
441 		switch (c)
442 		{
443 		case EOF:
444 			return;
445 		case '\'':
446 		case '"':
447 		case '`':
448 			tokenPutc (token, c);
449 			if (!escaped && c == c0)
450 				return;
451 			escaped = false;
452 			break;
453 		case '\\':
454 			tokenPutc (token, c);
455 			escaped = !escaped;
456 			break;
457 		default:
458 			tokenPutc (token, c);
459 			escaped = false;
460 			break;
461 		}
462 	}
463 }
464 
readNumber(tokenInfo * const token,void * data)465 static void readNumber (tokenInfo *const token, void *data)
466 {
467 	int c;
468 
469 	/* 10.3.1 Constants
470 	 *
471 	 * Valid numeric constants: 1 10 0.1 .2 1e-7 1.2e+7
472 	 * Valid integer constants:  1L, 0x10L, 1000000L, 1e6L
473 	 * Valid numeric constants:  1.1L, 1e-3L, 0x1.1p-2
474 	 * Valid complex constants: 2i 4.1i 1e-2i
475 	 */
476 	while ((c = getcFromInputFile ()))
477 	{
478 		if (isxdigit (c) || c == '.' || c == 'E'
479 			|| c == '+' || c == '-'
480 			|| c == 'L' || c == 'x' || c == 'p'
481 			|| c == 'i')
482 			tokenPutc (token, c);
483 		else
484 		{
485 			ungetcToInputFile (c);
486 			break;
487 		}
488 	}
489 }
490 
readSymbol(tokenInfo * const token,void * data)491 static void readSymbol (tokenInfo *const token, void *data)
492 {
493 	int c;
494 	while ((c = getcFromInputFile ()))
495 	{
496 		if (isalnum (c) || c == '.' || c == '_')
497 			tokenPutc (token, c);
498 		else
499 		{
500 			ungetcToInputFile (c);
501 			break;
502 		}
503 	}
504 }
505 
resolveKeyword(vString * string)506 static keywordId resolveKeyword (vString *string)
507 {
508 	char *s = vStringValue (string);
509 	static langType lang = LANG_AUTO;
510 
511 	if (lang == LANG_AUTO)
512 		lang = getInputLanguage ();
513 
514 	return lookupCaseKeyword (s, lang);
515 }
516 
signatureExpectingParameter(vString * signature)517 static bool signatureExpectingParameter (vString *signature)
518 {
519 	if (vStringLast (signature) == '(')
520 		return true;
521 
522 	for (size_t i = vStringLength (signature); i > 0; i--)
523 	{
524 		char c = vStringChar (signature, i - 1);
525 		if (c == ' ')
526 			continue;
527 		else if (c == ',')
528 			return true;
529 		break;
530 	}
531 	return false;
532 }
533 
readToken(tokenInfo * const token,void * data)534 static void readToken (tokenInfo *const token, void *data)
535 {
536 	int c, c0;
537 
538 	token->type = TOKEN_R_UNDEFINED;
539 	token->keyword = KEYWORD_NONE;
540 	vStringClear (token->string);
541 
542 	do
543 		c = getcFromInputFile ();
544 	while (c == ' ' || c== '\t' || c == '\f');
545 
546 	token->lineNumber   = getInputLineNumber ();
547 	token->filePosition = getInputFilePosition ();
548 
549 	switch (c)
550 	{
551 	case EOF:
552 		token->type = TOKEN_R_EOF;
553 		break;
554 	case '#':
555 		while (1)
556 		{
557 			c = getcFromInputFile ();
558 			if (c == EOF)
559 			{
560 				token->type = TOKEN_R_EOF;
561 				break;
562 			}
563 			else if (c == '\n')
564 			{
565 				token->type = c;
566 				tokenPutc (token, c);
567 				break;
568 			}
569 		}
570 		break;
571 	case '\n':
572 	case ';':
573 		token->type = c;
574 		tokenPutc (token, c);
575 		break;
576 	case '\'':
577 	case '"':
578 	case '`':
579 		token->type = TOKEN_R_STRING;
580 		tokenPutc (token, c);
581 		readString (token, data);
582 		break;
583 	case '+':
584 	case '/':
585 	case '^':
586 	case '~':
587 		token->type = TOKEN_R_OPERATOR;
588 		tokenPutc (token, c);
589 		break;
590 	case ':':
591 		token->type = TOKEN_R_OPERATOR;
592 		tokenPutc (token, c);
593 		c = getcFromInputFile ();
594 		if (c == ':')
595 		{
596 			tokenPutc (token, c);
597 			token->type = TOKEN_R_SCOPE;
598 			c = getcFromInputFile ();
599 			if (c == ':')
600 				tokenPutc (token, c);
601 			else
602 				ungetcToInputFile (c);
603 		}
604 		else
605 			ungetcToInputFile (c);
606 		break;
607 	case '&':
608 	case '|':
609 	case '*':
610 		token->type = TOKEN_R_OPERATOR;
611 		tokenPutc (token, c);
612 		c0 = getcFromInputFile ();
613 		if (c == c0)
614 			tokenPutc (token, c0);
615 		else
616 			ungetcToInputFile (c0);
617 		break;
618 	case '=':
619 		token->type = TOKEN_R_OPERATOR;
620 		tokenPutc (token, c);
621 		c = getcFromInputFile ();
622 		if (c == '=')
623 			tokenPutc (token, c);
624 		else
625 		{
626 			token->type = '=';
627 			ungetcToInputFile (c);
628 		}
629 		break;
630 	case '-':
631 		token->type = TOKEN_R_OPERATOR;
632 		tokenPutc (token, c);
633 		c = getcFromInputFile ();
634 		if (c == '>')
635 		{
636 			token->type = TOKEN_R_RASSIGN;
637 			tokenPutc (token, c);
638 			c = getcFromInputFile ();
639 			if (c == '>')
640 				tokenPutc (token, c);
641 			else
642 				ungetcToInputFile (c);
643 		}
644 		else
645 			ungetcToInputFile (c);
646 		break;
647 	case '>':
648 		token->type = TOKEN_R_OPERATOR;
649 		tokenPutc (token, c);
650 		c = getcFromInputFile ();
651 		if (c == '=')
652 			tokenPutc (token, c);
653 		else
654 			ungetcToInputFile (c);
655 		break;
656 	case '<':
657 		token->type = TOKEN_R_OPERATOR;
658 		tokenPutc (token, c);
659 		c = getcFromInputFile ();
660 
661 		/* <<- */
662 		if (c == '<')
663 		{
664 			tokenPutc (token, c);
665 			c = getcFromInputFile ();
666 		}
667 
668 		if (c == '-')
669 		{
670 			token->type = TOKEN_R_LASSIGN;
671 			tokenPutc (token, c);
672 		}
673 		else if (c == '=')
674 			tokenPutc (token, c);
675 		else
676 			ungetcToInputFile (c);
677 		break;
678 	case '%':
679 		token->type = TOKEN_R_OPERATOR;
680 		tokenPutc (token, c);
681 		do
682 		{
683 			c = getcFromInputFile ();
684 			if (c == EOF)
685 				break;
686 
687 			tokenPutc (token, c);
688 			if (c == '%')
689 				break;
690 		}
691 		while (1);
692 		break;
693 	case '!':
694 		token->type = TOKEN_R_OPERATOR;
695 		tokenPutc (token, c);
696 		c = getcFromInputFile ();
697 		if (c == '=')
698 			tokenPutc (token, c);
699 		else
700 			ungetcToInputFile (c);
701 		break;
702 	case '{':
703 	case '}':
704 	case '(':
705 	case ')':
706 	case '[':
707 	case ']':
708 	case ',':
709 	case '$':
710 	case '@':
711 		token->type = c;
712 		tokenPutc (token, c);
713 		break;
714 	case '.':
715 		tokenPutc (token, c);
716 		c = getcFromInputFile ();
717 		if (isdigit(c))
718 		{
719 			token->type = TOKEN_R_NUMBER;
720 			tokenPutc (token, c);
721 			readNumber(token, data);
722 		}
723 		else if (isalpha (c) || c == '_')
724 		{
725 			token->type = TOKEN_R_SYMBOL;
726 			tokenPutc (token, c);
727 			readSymbol (token, data);
728 
729 			token->keyword = resolveKeyword (token->string);
730 			if (token->keyword != KEYWORD_NONE)
731 				token->type = TOKEN_R_KEYWORD;
732 		}
733 		else if (c == '.')
734 		{
735 			token->type = TOKEN_R_DOTS;
736 			tokenPutc (token, c);
737 
738 			c = getcFromInputFile ();
739 			if (c == '.')
740 				tokenPutc (token, c);
741 			else if (isdigit(c))
742 			{
743 				token->type = TOKEN_R_DOTS_N;
744 				do
745 				{
746 					tokenPutc (token, c);
747 					c = getcFromInputFile ();
748 				}
749 				while (isdigit(c));
750 				ungetcToInputFile (c);
751 			}
752 			else if (isalpha (c) || c == '_')
753 			{
754 				token->type = TOKEN_R_SYMBOL;
755 				tokenPutc (token, c);
756 				readSymbol (token, data);
757 
758 				token->keyword = resolveKeyword (token->string);
759 				if (token->keyword != KEYWORD_NONE)
760 					token->type = TOKEN_R_KEYWORD;
761 			}
762 			else
763 			{
764 				token->type = TOKEN_R_UNDEFINED;
765 				ungetcToInputFile (c);
766 			}
767 		}
768 		break;
769 	default:
770 		tokenPutc (token, c);
771 		if (isdigit (c))
772 		{
773 			token->type = TOKEN_R_NUMBER;
774 			readNumber(token, data);
775 		}
776 		else if (isalpha (c))
777 		{
778 			token->type = TOKEN_R_SYMBOL;
779 			readSymbol (token, data);
780 
781 			token->keyword = resolveKeyword (token->string);
782 			if (token->keyword != KEYWORD_NONE)
783 				token->type = TOKEN_R_KEYWORD;
784 		}
785 		else
786 			token->type = TOKEN_R_UNDEFINED;
787 		break;
788 	}
789 
790 	/* Handle parameters in a signature */
791 	if (R(token)->signature && !tokenIsType(token, R_EOF) && !tokenIsTypeVal(token, '\n'))
792 	{
793 		vString *signature = R (token)->signature;
794 
795 		if (tokenIsTypeVal (token, '('))
796 			R (token)->parenDepth++;
797 		else if (tokenIsTypeVal (token, ')'))
798 			R (token)->parenDepth--;
799 
800 		if (R (token)->kindIndexForParams != KIND_GHOST_INDEX
801 			&& R (token)->parenDepth == 1 && tokenIsType (token, R_SYMBOL)
802 			&& signatureExpectingParameter (signature))
803 			makeSimpleRTag (token, R (token)->scopeIndex, false,
804 							R (token)->kindIndexForParams, NULL);
805 
806 		if (vStringLast (signature) != '(' &&
807 			!tokenIsTypeVal (token, ',') &&
808 			!tokenIsTypeVal (token, ')'))
809 			vStringPut (signature, ' ');
810 		vStringCat (signature, token->string);
811 	}
812 }
813 
814 #define newRToken rNewToken
rNewToken(void)815 extern tokenInfo *rNewToken (void)
816 {
817 	return newToken (&rTokenInfoClass);
818 }
819 
820 #define tokenReadNoNewline rTokenReadNoNewline
rTokenReadNoNewline(tokenInfo * const token)821 extern void rTokenReadNoNewline (tokenInfo *const token)
822 {
823 	while (1)
824 	{
825 		tokenRead(token);
826 		if (!tokenIsTypeVal (token, '\n'))
827 			break;
828 	}
829 }
830 
setupCollectingSignature(tokenInfo * const token,vString * signature,int kindIndexForParams,int corkIndex)831 static void setupCollectingSignature (tokenInfo *const token,
832 									  vString   *signature,
833 									  int kindIndexForParams,
834 									  int corkIndex)
835 {
836 	R (token)->signature = signature;
837 	R (token)->kindIndexForParams = kindIndexForParams;
838 	R (token)->scopeIndex = corkIndex;
839 	R (token)->parenDepth = 1;
840 }
841 
rSetupCollectingSignature(tokenInfo * const token,vString * signature)842 extern void rSetupCollectingSignature (tokenInfo *const token,
843 									   vString   *signature)
844 {
845 	setupCollectingSignature (token, signature,
846 							  KIND_GHOST_INDEX, CORK_NIL);
847 }
848 
teardownCollectingSignature(tokenInfo * const token)849 static void teardownCollectingSignature (tokenInfo *const token)
850 {
851 	R (token)->parenDepth = 0;
852 	R (token)->scopeIndex = CORK_NIL;
853 	R (token)->kindIndexForParams = KIND_GHOST_INDEX;
854 	R (token)->signature = NULL;
855 }
856 
rTeardownCollectingSignature(tokenInfo * const token)857 extern void rTeardownCollectingSignature (tokenInfo *const token)
858 {
859 	teardownCollectingSignature (token);
860 }
861 
getKindForToken(tokenInfo * const token)862 static int getKindForToken (tokenInfo *const token)
863 {
864 	if (tokenIsKeyword (token, R_FUNCTION))
865 		return K_FUNCTION;
866 	else if (tokenIsKeyword (token, R_C))
867 		return K_VECTOR;
868 	else if (tokenIsKeyword (token, R_LIST))
869 		return K_LIST;
870 	else if (tokenIsKeyword (token, R_DATAFRAME))
871 		return K_DATAFRAME;
872 	return K_GLOBALVAR;
873 }
874 
findNonPlaceholder(int corkIndex,tagEntryInfo * entry,void * data)875 static bool findNonPlaceholder (int corkIndex, tagEntryInfo *entry, void *data)
876 {
877 	bool *any_non_placehoders = data;
878 	if (!entry->placeholder)
879 	{
880 		*any_non_placehoders = true;
881 		return false;
882 	}
883 	return true;
884 }
885 
parseRightSide(tokenInfo * const token,tokenInfo * const symbol,int parent)886 static void parseRightSide (tokenInfo *const token, tokenInfo *const symbol, int parent)
887 {
888 	R_TRACE_ENTER();
889 
890 	char *const assignment_operator = eStrdup (tokenString (token));
891 	vString *signature = NULL;
892 
893 	tokenReadNoNewline (token);
894 
895 	int kind = getKindForToken (token);
896 
897 	/* Call sub parsers */
898 	int corkIndex = notifyReadRightSideSymbol (symbol,
899 											   assignment_operator,
900 											   parent,
901 											   token);
902 	if (corkIndex == CORK_NIL)
903 	{
904 		/* No subparser handle the symbol */
905 		corkIndex = makeSimpleRTag (symbol, parent, kind == K_FUNCTION,
906 									kind,
907 									assignment_operator);
908 	}
909 
910 	if (kind == K_FUNCTION)
911 	{
912 		/* parse signature */
913 		tokenReadNoNewline (token);
914 		if (tokenIsTypeVal (token, '('))
915 		{
916 			if (corkIndex == CORK_NIL)
917 				tokenSkipOverPair (token);
918 			else
919 			{
920 				signature = vStringNewInit("(");
921 				setupCollectingSignature (token, signature, K_PARAM, corkIndex);
922 				tokenSkipOverPair (token);
923 				teardownCollectingSignature (token);
924 			}
925 			tokenReadNoNewline (token);
926 		}
927 		parent = (corkIndex == CORK_NIL
928 				  ? blackHoleIndex
929 				  : corkIndex);
930 	}
931 	else if (kind == K_VECTOR || kind == K_LIST || kind == K_DATAFRAME)
932 	{
933 		tokenRead (token);
934 		parsePair (token, corkIndex, NULL);
935 		tokenRead (token);
936 		parent = corkIndex;
937 	}
938 
939 	R_TRACE_TOKEN_TEXT("body", token, parent);
940 
941 	parseStatement (token, parent, false, false);
942 
943 	tagEntryInfo *tag = getEntryInCorkQueue (corkIndex);
944 	if (tag)
945 	{
946 		tag->extensionFields.endLine = token->lineNumber;
947 		if (signature)
948 		{
949 			tag->extensionFields.signature = vStringDeleteUnwrap(signature);
950 			signature = NULL;
951 		}
952 		/* If a vector has no named attribte and it has no lval,
953 		 * we don't make a tag for the vector. */
954 		if ((kind == K_VECTOR || kind == K_LIST || kind == K_DATAFRAME)
955 			&& *assignment_operator == '\0')
956 		{
957 			bool any_non_placehoders = false;
958 			foreachEntriesInScope (corkIndex, NULL,
959 								   findNonPlaceholder, &any_non_placehoders);
960 			if (!any_non_placehoders)
961 				tag->placeholder = 1;
962 		}
963 	}
964 
965 	vStringDelete (signature);	/* NULL is acceptable. */
966 	eFree (assignment_operator);
967 	R_TRACE_LEAVE();
968 }
969 
970 /* Parse arguments for library and source. */
preParseExternalEntitiy(tokenInfo * const token,tokenInfo * const funcall)971 static bool preParseExternalEntitiy (tokenInfo *const token, tokenInfo *const funcall)
972 {
973 	TRACE_ENTER();
974 
975 	bool r = true;
976 	tokenInfo *prefetch_token = newRToken ();
977 
978 	tokenReadNoNewline (prefetch_token);
979 	if (tokenIsType (prefetch_token, R_SYMBOL)
980 		|| tokenIsType (prefetch_token, R_STRING))
981 	{
982 		tokenInfo *const loaded_obj_token = newTokenByCopying (prefetch_token);
983 		tokenReadNoNewline (prefetch_token);
984 		if (tokenIsTypeVal (prefetch_token, ')')
985 			|| tokenIsTypeVal (prefetch_token, ','))
986 		{
987 			if (tokenIsTypeVal (prefetch_token, ')'))
988 				r = false;
989 
990 			makeSimpleRefTag (loaded_obj_token->string,
991 							  (tokenIsKeyword (funcall, R_LIBRARY)
992 							   ? K_LIBRARY
993 							   : K_SOURCE),
994 							  (tokenIsKeyword (funcall, R_LIBRARY)
995 							   ? (strcmp (tokenString(funcall), "library") == 0
996 								  ? R_LIBRARY_ATTACHED_BY_LIBRARY
997 								  : R_LIBRARY_ATTACHED_BY_REQUIRE)
998 							   : R_SOURCE_LOADED_BY_SOURCE));
999 			tokenDelete (loaded_obj_token);
1000 		}
1001 		else if (tokenIsEOF (prefetch_token))
1002 		{
1003 			tokenCopy (token, prefetch_token);
1004 			tokenDelete (loaded_obj_token);
1005 			r = false;
1006 		}
1007 		else
1008 		{
1009 			tokenUnread (prefetch_token);
1010 			tokenUnread (loaded_obj_token);
1011 			tokenDelete (loaded_obj_token);
1012 		}
1013 	}
1014 	else if (tokenIsEOF (prefetch_token))
1015 	{
1016 		tokenCopy (token, prefetch_token);
1017 		r = false;
1018 	}
1019 	else
1020 		tokenUnread (prefetch_token);
1021 
1022 	tokenDelete (prefetch_token);
1023 
1024 	TRACE_LEAVE_TEXT(r
1025 					 ? "unread tokens and request parsing again to the upper context"
1026 					 : "parse all arguments");
1027 	return r;
1028 }
1029 
preParseLoopCounter(tokenInfo * const token,int parent)1030 static bool preParseLoopCounter(tokenInfo *const token, int parent)
1031 {
1032 	bool r = true;
1033 	TRACE_ENTER();
1034 
1035 	tokenReadNoNewline (token);
1036 	if (tokenIsType (token, R_SYMBOL))
1037 		makeSimpleRTag (token, parent, false, K_GLOBALVAR, NULL);
1038 
1039 	if (tokenIsEOF (token)
1040 		|| tokenIsTypeVal (token, ')'))
1041 		r = false;
1042 
1043 	TRACE_LEAVE_TEXT(r
1044 					 ? "unread tokens and request parsing again to the upper context"
1045 					 : "parse all arguments");
1046 	return r;
1047 }
1048 
1049 
1050 /* If funcall is non-NULL, this pair represents the argument list for the function
1051  * call for FUNCALL. */
parsePair(tokenInfo * const token,int parent,tokenInfo * const funcall)1052 static void parsePair (tokenInfo *const token, int parent, tokenInfo *const funcall)
1053 {
1054 	R_TRACE_ENTER();
1055 
1056 	bool in_continuous_pair = tokenIsTypeVal (token, '(')
1057 		|| tokenIsTypeVal (token, '[');
1058 	bool is_funcall = funcall && tokenIsTypeVal (token, '(');
1059 	bool done = false;
1060 
1061 	if (is_funcall)
1062 	{
1063 		if 	(tokenIsKeyword (funcall, R_LIBRARY) ||
1064 			 tokenIsKeyword (funcall, R_SOURCE))
1065 			done = !preParseExternalEntitiy (token, funcall);
1066 		else if (tokenIsKeyword (funcall, R_FOR))
1067 			done = !preParseLoopCounter (token, parent);
1068 		else if (notifyReadFuncall (funcall, token, parent) != CORK_NIL)
1069 			done = true;
1070 	}
1071 
1072 	if (done)
1073 	{
1074 		R_TRACE_LEAVE();
1075 		return;
1076 	}
1077 
1078 	do
1079 	{
1080 		tokenRead (token);
1081 		R_TRACE_TOKEN_TEXT("inside pair", token, parent);
1082 		parseStatement (token, parent, (funcall != NULL), in_continuous_pair);
1083 	}
1084 	while (! (tokenIsEOF (token)
1085 			  || tokenIsTypeVal (token, ')')
1086 			  || tokenIsTypeVal (token, '}')
1087 			  || tokenIsTypeVal (token, ']')));
1088 	R_TRACE_LEAVE();
1089 }
1090 
isAtConstructorInvocation(void)1091 static bool isAtConstructorInvocation (void)
1092 {
1093 	bool r = false;
1094 
1095 	tokenInfo *const token = newRToken ();
1096 	tokenRead (token);
1097 	if (tokenIsTypeVal (token, '('))
1098 		r = true;
1099 	tokenUnread (token);
1100 	tokenDelete (token);
1101 	return r;
1102 }
1103 
parseStatement(tokenInfo * const token,int parent,bool in_arglist,bool in_continuous_pair)1104 static bool parseStatement (tokenInfo *const token, int parent,
1105 							bool in_arglist, bool in_continuous_pair)
1106 {
1107 	R_TRACE_ENTER();
1108 	int last_count = rTokenInfoClass.read_counter;
1109 
1110 	do
1111 	{
1112 		if (tokenIsEOF (token))
1113 			break;
1114 		else if (tokenIsTypeVal (token, ';'))
1115 		{
1116 			R_TRACE_TOKEN_TEXT ("break with ;", token, parent);
1117 			break;
1118 		}
1119 		else if (tokenIsTypeVal (token, '\n'))
1120 		{
1121 			R_TRACE_TOKEN_TEXT ("break with \\n", token, parent);
1122 			break;
1123 		}
1124 		else if ((tokenIsKeyword (token, R_FUNCTION)
1125 				  || ((tokenIsKeyword (token, R_C)
1126 					   || tokenIsKeyword (token, R_LIST)
1127 					   || tokenIsKeyword (token, R_DATAFRAME))
1128 					  && isAtConstructorInvocation ())))
1129 		{
1130 			/* This statement doesn't start with a symbol.
1131 			 * This function is not assigned to any symbol. */
1132 			tokenInfo *const anonfunc = newTokenByCopying (token);
1133 			int kind = getKindForToken (token);
1134 			anonGenerate (anonfunc->string,
1135 						  kindExtraInfo [kind].anon_prefix, kind);
1136 			tokenUnread (token);
1137 			vStringClear (token->string);
1138 			parseRightSide (token, anonfunc, parent);
1139 			tokenDelete (anonfunc);
1140 		}
1141 		else if (tokenIsType (token, R_SYMBOL)
1142 				 || tokenIsType (token, R_STRING)
1143 				 || tokenIsType (token, R_KEYWORD))
1144 		{
1145 			tokenInfo *const symbol = newTokenByCopying (token);
1146 
1147 			if (in_continuous_pair)
1148 				tokenReadNoNewline (token);
1149 			else
1150 				tokenRead (token);
1151 
1152 			if (tokenIsType (token, R_LASSIGN))
1153 			{
1154 				/* Assignment */
1155 				parseRightSide (token, symbol, parent);
1156 				R_TRACE_TOKEN_TEXT ("break with right side", token, parent);
1157 				tokenDelete(symbol);
1158 				break;
1159 			}
1160 			else if (tokenIsTypeVal (token, '='))
1161 			{
1162 				/* Assignment */
1163 				if (in_arglist)
1164 				{
1165 					/* Ignore the left side symbol. */
1166 					tokenRead (token);
1167 					R_TRACE_TOKEN_TEXT("(in arg list) after = body", token, parent);
1168 				}
1169 				else
1170 				{
1171 					parseRightSide (token, symbol, parent);
1172 					R_TRACE_TOKEN_TEXT ("break with right side", token, parent);
1173 					tokenDelete(symbol);
1174 					break;
1175 				}
1176 			}
1177 			else if (tokenIsTypeVal (token, '('))
1178 			{
1179 				/* function call */
1180 				parsePair (token, parent, symbol);
1181 				tokenRead (token);
1182 				R_TRACE_TOKEN_TEXT("after arglist", token, parent);
1183 			}
1184 			else if (tokenIsTypeVal (token, '$')
1185 					 || tokenIsTypeVal (token, '@')
1186 					 || tokenIsType (token, R_SCOPE))
1187 			{
1188 				tokenReadNoNewline (token); /* Skip the next identifier */
1189 				tokenRead (token);
1190 				R_TRACE_TOKEN_TEXT("after $", token, parent);
1191 			}
1192 			else
1193 				R_TRACE_TOKEN_TEXT("else after symbol", token, parent);
1194 			tokenDelete(symbol);
1195 		}
1196 		else if (tokenIsType (token, R_RASSIGN))
1197 		{
1198 			char *const assignment_operator = eStrdup (tokenString (token));
1199 			tokenReadNoNewline (token);
1200 			if (tokenIsType (token, R_SYMBOL)
1201 				|| tokenIsType (token, R_STRING))
1202 			{
1203 				makeSimpleRTag (token, parent, false,
1204 								K_GLOBALVAR, assignment_operator);
1205 				tokenRead (token);
1206 			}
1207 			eFree (assignment_operator);
1208 			R_TRACE_TOKEN_TEXT("after ->", token, parent);
1209 		}
1210 		else if (tokenIsType (token, R_OPERATOR))
1211 		{
1212 			tokenReadNoNewline (token);
1213 			R_TRACE_TOKEN_TEXT("after operator", token, parent);
1214 		}
1215 		else if (tokenIsTypeVal (token, '(')
1216 				 || tokenIsTypeVal (token, '{')
1217 				 || tokenIsTypeVal (token, '['))
1218 		{
1219 			parsePair (token, parent, NULL);
1220 			tokenRead (token);
1221 			R_TRACE_TOKEN_TEXT("after pair", token, parent);
1222 		}
1223 		else if (tokenIsTypeVal (token, ')')
1224 				 || tokenIsTypeVal (token, '}')
1225 				 || tokenIsTypeVal (token, ']'))
1226 		{
1227 			R_TRACE_TOKEN_TEXT ("break with close", token, parent);
1228 			break;
1229 		}
1230 		else if (tokenIsTypeVal (token, '$')
1231 				 || tokenIsTypeVal (token, '@')
1232 				 || tokenIsType (token, R_SCOPE))
1233 		{
1234 			tokenReadNoNewline (token); /* Skip the next identifier */
1235 			tokenRead (token);
1236 			R_TRACE_TOKEN_TEXT("after $", token, parent);
1237 		}
1238 		else
1239 		{
1240 			tokenRead (token);
1241 			R_TRACE_TOKEN_TEXT("else", token, parent);
1242 		}
1243 	}
1244 	while (!tokenIsEOF (token));
1245 
1246 	R_TRACE_LEAVE();
1247 
1248 	return (last_count != rTokenInfoClass.read_counter);
1249 }
1250 
rParseStatement(tokenInfo * const token,int parentIndex,bool in_arglist)1251 extern bool rParseStatement (tokenInfo *const token, int parentIndex, bool in_arglist)
1252 {
1253 	pushLanguage (Lang_R);
1254 	bool r = parseStatement (token, parentIndex, in_arglist, true);
1255 	popLanguage ();
1256 	return r;
1257 }
1258 
notifyReadRightSideSymbol(tokenInfo * const symbol,const char * const assignmentOperator,int parent,tokenInfo * const token)1259 static  int notifyReadRightSideSymbol (tokenInfo *const symbol,
1260 									   const char *const assignmentOperator,
1261 									   int parent,
1262 									   tokenInfo *const token)
1263 {
1264 	subparser *sub;
1265 	int q = CORK_NIL;
1266 
1267 	foreachSubparser (sub, false)
1268 	{
1269 		rSubparser *rsub = (rSubparser *)sub;
1270 		if (rsub->readRightSideSymbol)
1271 		{
1272 			enterSubparser (sub);
1273 			q = rsub->readRightSideSymbol (rsub, symbol, assignmentOperator, parent, token);
1274 			leaveSubparser ();
1275 			if (q != CORK_NIL)
1276 				break;
1277 		}
1278 	}
1279 
1280 	return q;
1281 }
1282 
makeSimpleSubparserTag(int langType,tokenInfo * const token,int parent,bool in_func,int kindInR,const char * assignmentOperator)1283 static  int makeSimpleSubparserTag (int langType,
1284 									tokenInfo *const token, int parent,
1285 									bool in_func, int kindInR,
1286 									const char *assignmentOperator)
1287 {
1288 	int q = CORK_NIL;
1289 	subparser *sub = getLanguageSubparser (langType, false);
1290 	if (sub)
1291 	{
1292 		rSubparser *rsub = (rSubparser *)sub;
1293 		if (rsub->makeTagWithTranslation)
1294 		{
1295 			enterSubparser (sub);
1296 			q = rsub->makeTagWithTranslation (rsub,
1297 											  token, parent,
1298 											  in_func, kindInR,
1299 											  assignmentOperator);
1300 			leaveSubparser ();
1301 		}
1302 	}
1303 	return q;
1304 }
1305 
askSubparserTagAcceptancy(tagEntryInfo * pe)1306 static  bool askSubparserTagAcceptancy (tagEntryInfo *pe)
1307 {
1308 	bool q = false;
1309 	subparser *sub = getLanguageSubparser (pe->langType, false);
1310 	{
1311 		rSubparser *rsub = (rSubparser *)sub;
1312 		if (rsub->askTagAcceptancy)
1313 		{
1314 			enterSubparser (sub);
1315 			q = rsub->askTagAcceptancy (rsub, pe);
1316 			leaveSubparser ();
1317 		}
1318 	}
1319 	return q;
1320 }
1321 
askSubparserTagHasFunctionAlikeKind(tagEntryInfo * e)1322 static  bool askSubparserTagHasFunctionAlikeKind (tagEntryInfo *e)
1323 {
1324 	bool q = false;
1325 	pushLanguage (Lang_R);
1326 	subparser *sub = getLanguageSubparser (e->langType, false);
1327 	Assert (sub);
1328 	popLanguage ();
1329 	rSubparser *rsub = (rSubparser *)sub;
1330 	if (rsub->hasFunctionAlikeKind)
1331 	{
1332 		enterSubparser (sub);
1333 		q = rsub->hasFunctionAlikeKind (rsub, e);
1334 		leaveSubparser ();
1335 	}
1336 	return q;
1337 }
1338 
notifyReadFuncall(tokenInfo * const func,tokenInfo * const token,int parent)1339 static  int notifyReadFuncall (tokenInfo *const func,
1340 							   tokenInfo *const token,
1341 							   int parent)
1342 {
1343 	int q = CORK_NIL;
1344 	subparser *sub;
1345 	foreachSubparser (sub, false)
1346 	{
1347 		rSubparser *rsub = (rSubparser *)sub;
1348 		if (rsub->readFuncall)
1349 		{
1350 			enterSubparser (sub);
1351 			q = rsub->readFuncall (rsub, func, token, parent);
1352 			leaveSubparser ();
1353 			if (q != CORK_NIL)
1354 				break;
1355 		}
1356 	}
1357 	return q;
1358 }
1359 
findRTags(void)1360 static void findRTags (void)
1361 {
1362 	tokenInfo *const token = newRToken ();
1363 
1364 	blackHoleIndex = makePlaceholder ("**BLACK-HOLE/DON'T TAG ME**");
1365 	registerEntry (blackHoleIndex);
1366 
1367 	TRACE_PRINT ("install blackhole: %d", blackHoleIndex);
1368 
1369 	do
1370 	{
1371 		tokenRead(token);
1372 		R_TRACE_TOKEN(token, CORK_NIL);
1373 		parseStatement (token, CORK_NIL, false, false);
1374 	}
1375 	while (!tokenIsEOF (token));
1376 
1377 	TRACE_PRINT ("run blackhole", blackHoleIndex);
1378 	markAllEntriesInScopeAsPlaceholder (blackHoleIndex);
1379 
1380 	tokenDelete (token);
1381 }
1382 
initializeRParser(const langType language)1383 static void initializeRParser (const langType language)
1384 {
1385 	Lang_R = language;
1386 }
1387 
RParser(void)1388 extern parserDefinition *RParser (void)
1389 {
1390 	static const char *const extensions[] = { "r", "R", "s", "q", NULL };
1391 	parserDefinition *const def = parserNew ("R");
1392 	static selectLanguage selectors[] = { selectByArrowOfR,
1393 										  NULL };
1394 
1395 	def->extensions = extensions;
1396 	def->kindTable = RKinds;
1397 	def->kindCount = ARRAY_SIZE(RKinds);
1398 	def->fieldTable = RFields;
1399 	def->fieldCount = ARRAY_SIZE (RFields);
1400 	def->keywordTable = RKeywordTable;
1401 	def->keywordCount = ARRAY_SIZE(RKeywordTable);
1402 	def->useCork = CORK_QUEUE | CORK_SYMTAB;
1403 	def->parser = findRTags;
1404 	def->selectLanguage = selectors;
1405 	def->initialize = initializeRParser;
1406 
1407 	return def;
1408 }
1409 
rExtractNameFromString(vString * str)1410 extern vString *rExtractNameFromString (vString* str)
1411 {
1412 	int offset = 0;
1413 
1414 	if (vStringLength (str) == 0)
1415 		return NULL;
1416 
1417 	char b = vStringChar (str, 0);
1418 	if (b == '\'' || b == '"' || b == '`')
1419 		offset = 1;
1420 
1421 	if (offset && vStringLength (str) < 3)
1422 		return NULL;
1423 
1424 	vString *n = vStringNewInit (vStringValue (str) + offset);
1425 	if (vStringChar (n, vStringLength (n) - 1) == b)
1426 		vStringChop (n);
1427 
1428 	return n;
1429 }
1430 
1431 #ifdef DEBUG
tokenTypeStr(enum RTokenType e)1432 static const char *tokenTypeStr(enum RTokenType e)
1433 { /* Generated by misc/enumstr.sh with cmdline:
1434      parsers/r.c RTokenType tokenTypeStr TOKEN_R_ --use-lower-bits-as-is */
1435 	switch (e)
1436 	{
1437 		case            TOKEN_R_EOF: return "EOF";
1438 		case      TOKEN_R_UNDEFINED: return "UNDEFINED";
1439 		case        TOKEN_R_KEYWORD: return "KEYWORD";
1440 		case        TOKEN_R_NEWLINE: return "NEWLINE";
1441 		case         TOKEN_R_NUMBER: return "NUMBER";
1442 		case         TOKEN_R_SYMBOL: return "SYMBOL";
1443 		case         TOKEN_R_STRING: return "STRING";
1444 		case       TOKEN_R_OPERATOR: return "OPERATOR";
1445 		case           TOKEN_R_DOTS: return "DOTS";
1446 		case         TOKEN_R_DOTS_N: return "DOTS_N";
1447 		case        TOKEN_R_LASSIGN: return "LASSIGN";
1448 		case        TOKEN_R_RASSIGN: return "RASSIGN";
1449 		case          TOKEN_R_SCOPE: return "SCOPE";
1450 		default:                   break;
1451 	}
1452 	static char buf[3];
1453 	if (isprint (e))
1454 	{
1455 		buf[0] = e;
1456 		buf[1] = '\0';
1457 	}
1458 	else if (e == '\n')
1459 	{
1460 		buf[0] = '\\';
1461 		buf[1] = 'n';
1462 		buf[2] = '\0';
1463 	}
1464 	else
1465 	{
1466 		buf[0] = '\0';
1467 	}
1468 	return buf;
1469 }
1470 #endif
1471