1 /*
2 *   $Id: get.c 559 2007-06-17 03:30:09Z elliotth $
3 *
4 *   Copyright (c) 1996-2002, Darren Hiebert
5 *
6 *   This source code is released for free distribution under the terms of the
7 *   GNU General Public License.
8 *
9 *   This module contains the high level source read functions (preprocessor
10 *   directives are handled within this level).
11 */
12 
13 /*
14 *   INCLUDE FILES
15 */
16 #include "general.h"  /* must always come first */
17 
18 #include <string.h>
19 
20 #include "debug.h"
21 #include "entry.h"
22 #include "get.h"
23 #include "options.h"
24 #include "read.h"
25 #include "vstring.h"
26 
27 /*
28 *   MACROS
29 */
30 #define stringMatch(s1,s2)		(strcmp (s1,s2) == 0)
31 #define isspacetab(c)			((c) == SPACE || (c) == TAB)
32 
33 /*
34 *   DATA DECLARATIONS
35 */
36 typedef enum { COMMENT_NONE, COMMENT_C, COMMENT_CPLUS } Comment;
37 
38 enum eCppLimits {
39 	MaxCppNestingLevel = 20,
40 	MaxDirectiveName = 10
41 };
42 
43 /*  Defines the one nesting level of a preprocessor conditional.
44  */
45 typedef struct sConditionalInfo {
46 	boolean ignoreAllBranches;  /* ignoring parent conditional branch */
47 	boolean singleBranch;       /* choose only one branch */
48 	boolean branchChosen;       /* branch already selected */
49 	boolean ignoring;           /* current ignore state */
50 } conditionalInfo;
51 
52 enum eState {
53 	DRCTV_NONE,    /* no known directive - ignore to end of line */
54 	DRCTV_DEFINE,  /* "#define" encountered */
55 	DRCTV_HASH,    /* initial '#' read; determine directive */
56 	DRCTV_IF,      /* "#if" or "#ifdef" encountered */
57 	DRCTV_PRAGMA,  /* #pragma encountered */
58 	DRCTV_UNDEF    /* "#undef" encountered */
59 };
60 
61 /*  Defines the current state of the pre-processor.
62  */
63 typedef struct sCppState {
64 	int		ungetch, ungetch2;   /* ungotten characters, if any */
65 	boolean resolveRequired;     /* must resolve if/else/elif/endif branch */
66 	boolean hasAtLiteralStrings; /* supports @"c:\" strings */
67 	struct sDirective {
68 		enum eState state;       /* current directive being processed */
69 		boolean	accept;          /* is a directive syntactically permitted? */
70 		vString * name;          /* macro name */
71 		unsigned int nestLevel;  /* level 0 is not used */
72 		conditionalInfo ifdef [MaxCppNestingLevel];
73 	} directive;
74 } cppState;
75 
76 /*
77 *   DATA DEFINITIONS
78 */
79 
80 /*  Use brace formatting to detect end of block.
81  */
82 static boolean BraceFormat = FALSE;
83 
84 static cppState Cpp = {
85 	'\0', '\0',  /* ungetch characters */
86 	FALSE,       /* resolveRequired */
87 	FALSE,       /* hasAtLiteralStrings */
88 	{
89 		DRCTV_NONE,  /* state */
90 		FALSE,       /* accept */
91 		NULL,        /* tag name */
92 		0,           /* nestLevel */
93 		{ {FALSE,FALSE,FALSE,FALSE} }  /* ifdef array */
94 	}  /* directive */
95 };
96 
97 /*
98 *   FUNCTION DEFINITIONS
99 */
100 
isBraceFormat(void)101 extern boolean isBraceFormat (void)
102 {
103 	return BraceFormat;
104 }
105 
getDirectiveNestLevel(void)106 extern unsigned int getDirectiveNestLevel (void)
107 {
108 	return Cpp.directive.nestLevel;
109 }
110 
cppInit(const boolean state,const boolean hasAtLiteralStrings)111 extern void cppInit (const boolean state, const boolean hasAtLiteralStrings)
112 {
113 	BraceFormat = state;
114 
115 	Cpp.ungetch         = '\0';
116 	Cpp.ungetch2        = '\0';
117 	Cpp.resolveRequired = FALSE;
118 	Cpp.hasAtLiteralStrings = hasAtLiteralStrings;
119 
120 	Cpp.directive.state     = DRCTV_NONE;
121 	Cpp.directive.accept    = TRUE;
122 	Cpp.directive.nestLevel = 0;
123 
124 	Cpp.directive.ifdef [0].ignoreAllBranches = FALSE;
125 	Cpp.directive.ifdef [0].singleBranch = FALSE;
126 	Cpp.directive.ifdef [0].branchChosen = FALSE;
127 	Cpp.directive.ifdef [0].ignoring     = FALSE;
128 
129 	if (Cpp.directive.name == NULL)
130 		Cpp.directive.name = vStringNew ();
131 	else
132 		vStringClear (Cpp.directive.name);
133 }
134 
cppTerminate(void)135 extern void cppTerminate (void)
136 {
137 	if (Cpp.directive.name != NULL)
138 	{
139 		vStringDelete (Cpp.directive.name);
140 		Cpp.directive.name = NULL;
141 	}
142 }
143 
cppBeginStatement(void)144 extern void cppBeginStatement (void)
145 {
146 	Cpp.resolveRequired = TRUE;
147 }
148 
cppEndStatement(void)149 extern void cppEndStatement (void)
150 {
151 	Cpp.resolveRequired = FALSE;
152 }
153 
154 /*
155 *   Scanning functions
156 *
157 *   This section handles preprocessor directives.  It strips out all
158 *   directives and may emit a tag for #define directives.
159 */
160 
161 /*  This puts a character back into the input queue for the source File.
162  *  Up to two characters may be ungotten.
163  */
cppUngetc(const int c)164 extern void cppUngetc (const int c)
165 {
166 	Assert (Cpp.ungetch2 == '\0');
167 	Cpp.ungetch2 = Cpp.ungetch;
168 	Cpp.ungetch = c;
169 }
170 
171 /*  Reads a directive, whose first character is given by "c", into "name".
172  */
readDirective(int c,char * const name,unsigned int maxLength)173 static boolean readDirective (int c, char *const name, unsigned int maxLength)
174 {
175 	unsigned int i;
176 
177 	for (i = 0  ;  i < maxLength - 1  ;  ++i)
178 	{
179 		if (i > 0)
180 		{
181 			c = fileGetc ();
182 			if (c == EOF  ||  ! isalpha (c))
183 			{
184 				fileUngetc (c);
185 				break;
186 			}
187 		}
188 		name [i] = c;
189 	}
190 	name [i] = '\0';  /* null terminate */
191 
192 	return (boolean) isspacetab (c);
193 }
194 
195 /*  Reads an identifier, whose first character is given by "c", into "tag",
196  *  together with the file location and corresponding line number.
197  */
readIdentifier(int c,vString * const name)198 static void readIdentifier (int c, vString *const name)
199 {
200 	vStringClear (name);
201 	do
202 	{
203 		vStringPut (name, c);
204 	} while (c = fileGetc (), (c != EOF  &&  isident (c)));
205 	fileUngetc (c);
206 	vStringTerminate (name);
207 }
208 
currentConditional(void)209 static conditionalInfo *currentConditional (void)
210 {
211 	return &Cpp.directive.ifdef [Cpp.directive.nestLevel];
212 }
213 
isIgnore(void)214 static boolean isIgnore (void)
215 {
216 	return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring;
217 }
218 
setIgnore(const boolean ignore)219 static boolean setIgnore (const boolean ignore)
220 {
221 	return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring = ignore;
222 }
223 
isIgnoreBranch(void)224 static boolean isIgnoreBranch (void)
225 {
226 	conditionalInfo *const ifdef = currentConditional ();
227 
228 	/*  Force a single branch if an incomplete statement is discovered
229 	 *  en route. This may have allowed earlier branches containing complete
230 	 *  statements to be followed, but we must follow no further branches.
231 	 */
232 	if (Cpp.resolveRequired  &&  ! BraceFormat)
233 		ifdef->singleBranch = TRUE;
234 
235 	/*  We will ignore this branch in the following cases:
236 	 *
237 	 *  1.  We are ignoring all branches (conditional was within an ignored
238 	 *        branch of the parent conditional)
239 	 *  2.  A branch has already been chosen and either of:
240 	 *      a.  A statement was incomplete upon entering the conditional
241 	 *      b.  A statement is incomplete upon encountering a branch
242 	 */
243 	return (boolean) (ifdef->ignoreAllBranches ||
244 					 (ifdef->branchChosen  &&  ifdef->singleBranch));
245 }
246 
chooseBranch(void)247 static void chooseBranch (void)
248 {
249 	if (! BraceFormat)
250 	{
251 		conditionalInfo *const ifdef = currentConditional ();
252 
253 		ifdef->branchChosen = (boolean) (ifdef->singleBranch ||
254 										Cpp.resolveRequired);
255 	}
256 }
257 
258 /*  Pushes one nesting level for an #if directive, indicating whether or not
259  *  the branch should be ignored and whether a branch has already been chosen.
260  */
pushConditional(const boolean firstBranchChosen)261 static boolean pushConditional (const boolean firstBranchChosen)
262 {
263 	const boolean ignoreAllBranches = isIgnore ();  /* current ignore */
264 	boolean ignoreBranch = FALSE;
265 
266 	if (Cpp.directive.nestLevel < (unsigned int) MaxCppNestingLevel - 1)
267 	{
268 		conditionalInfo *ifdef;
269 
270 		++Cpp.directive.nestLevel;
271 		ifdef = currentConditional ();
272 
273 		/*  We take a snapshot of whether there is an incomplete statement in
274 		 *  progress upon encountering the preprocessor conditional. If so,
275 		 *  then we will flag that only a single branch of the conditional
276 		 *  should be followed.
277 		 */
278 		ifdef->ignoreAllBranches = ignoreAllBranches;
279 		ifdef->singleBranch      = Cpp.resolveRequired;
280 		ifdef->branchChosen      = firstBranchChosen;
281 		ifdef->ignoring = (boolean) (ignoreAllBranches || (
282 				! firstBranchChosen  &&  ! BraceFormat  &&
283 				(ifdef->singleBranch || !Option.if0)));
284 		ignoreBranch = ifdef->ignoring;
285 	}
286 	return ignoreBranch;
287 }
288 
289 /*  Pops one nesting level for an #endif directive.
290  */
popConditional(void)291 static boolean popConditional (void)
292 {
293 	if (Cpp.directive.nestLevel > 0)
294 		--Cpp.directive.nestLevel;
295 
296 	return isIgnore ();
297 }
298 
makeDefineTag(const char * const name)299 static void makeDefineTag (const char *const name)
300 {
301 	const boolean isFileScope = (boolean) (! isHeaderFile ());
302 
303 	if (includingDefineTags () &&
304 		(! isFileScope  ||  Option.include.fileScope))
305 	{
306 		tagEntryInfo e;
307 		initTagEntry (&e, name);
308 		e.lineNumberEntry = (boolean) (Option.locate != EX_PATTERN);
309 		e.isFileScope  = isFileScope;
310 		e.truncateLine = TRUE;
311 		e.kindName     = "macro";
312 		e.kind         = 'd';
313 		makeTagEntry (&e);
314 	}
315 }
316 
directiveDefine(const int c)317 static void directiveDefine (const int c)
318 {
319 	if (isident1 (c))
320 	{
321 		readIdentifier (c, Cpp.directive.name);
322 		if (! isIgnore ())
323 			makeDefineTag (vStringValue (Cpp.directive.name));
324 	}
325 	Cpp.directive.state = DRCTV_NONE;
326 }
327 
directivePragma(int c)328 static void directivePragma (int c)
329 {
330 	if (isident1 (c))
331 	{
332 		readIdentifier (c, Cpp.directive.name);
333 		if (stringMatch (vStringValue (Cpp.directive.name), "weak"))
334 		{
335 			/* generate macro tag for weak name */
336 			do
337 			{
338 				c = fileGetc ();
339 			} while (c == SPACE);
340 			if (isident1 (c))
341 			{
342 				readIdentifier (c, Cpp.directive.name);
343 				makeDefineTag (vStringValue (Cpp.directive.name));
344 			}
345 		}
346 	}
347 	Cpp.directive.state = DRCTV_NONE;
348 }
349 
directiveIf(const int c)350 static boolean directiveIf (const int c)
351 {
352 	DebugStatement ( const boolean ignore0 = isIgnore (); )
353 	const boolean ignore = pushConditional ((boolean) (c != '0'));
354 
355 	Cpp.directive.state = DRCTV_NONE;
356 	DebugStatement ( debugCppNest (TRUE, Cpp.directive.nestLevel);
357 	                 if (ignore != ignore0) debugCppIgnore (ignore); )
358 
359 	return ignore;
360 }
361 
directiveHash(const int c)362 static boolean directiveHash (const int c)
363 {
364 	boolean ignore = FALSE;
365 	char directive [MaxDirectiveName];
366 	DebugStatement ( const boolean ignore0 = isIgnore (); )
367 
368 	readDirective (c, directive, MaxDirectiveName);
369 	if (stringMatch (directive, "define"))
370 		Cpp.directive.state = DRCTV_DEFINE;
371 	else if (stringMatch (directive, "undef"))
372 		Cpp.directive.state = DRCTV_UNDEF;
373 	else if (strncmp (directive, "if", (size_t) 2) == 0)
374 		Cpp.directive.state = DRCTV_IF;
375 	else if (stringMatch (directive, "elif")  ||
376 			stringMatch (directive, "else"))
377 	{
378 		ignore = setIgnore (isIgnoreBranch ());
379 		if (! ignore  &&  stringMatch (directive, "else"))
380 			chooseBranch ();
381 		Cpp.directive.state = DRCTV_NONE;
382 		DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
383 	}
384 	else if (stringMatch (directive, "endif"))
385 	{
386 		DebugStatement ( debugCppNest (FALSE, Cpp.directive.nestLevel); )
387 		ignore = popConditional ();
388 		Cpp.directive.state = DRCTV_NONE;
389 		DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
390 	}
391 	else if (stringMatch (directive, "pragma"))
392 		Cpp.directive.state = DRCTV_PRAGMA;
393 	else
394 		Cpp.directive.state = DRCTV_NONE;
395 
396 	return ignore;
397 }
398 
399 /*  Handles a pre-processor directive whose first character is given by "c".
400  */
handleDirective(const int c)401 static boolean handleDirective (const int c)
402 {
403 	boolean ignore = isIgnore ();
404 
405 	switch (Cpp.directive.state)
406 	{
407 		case DRCTV_NONE:    ignore = isIgnore ();        break;
408 		case DRCTV_DEFINE:  directiveDefine (c);         break;
409 		case DRCTV_HASH:    ignore = directiveHash (c);  break;
410 		case DRCTV_IF:      ignore = directiveIf (c);    break;
411 		case DRCTV_PRAGMA:  directivePragma (c);         break;
412 		case DRCTV_UNDEF:   directiveDefine (c);         break;
413 	}
414 	return ignore;
415 }
416 
417 /*  Called upon reading of a slash ('/') characters, determines whether a
418  *  comment is encountered, and its type.
419  */
isComment(void)420 static Comment isComment (void)
421 {
422 	Comment comment;
423 	const int next = fileGetc ();
424 
425 	if (next == '*')
426 		comment = COMMENT_C;
427 	else if (next == '/')
428 		comment = COMMENT_CPLUS;
429 	else
430 	{
431 		fileUngetc (next);
432 		comment = COMMENT_NONE;
433 	}
434 	return comment;
435 }
436 
437 /*  Skips over a C style comment. According to ANSI specification a comment
438  *  is treated as white space, so we perform this substitution.
439  */
skipOverCComment(void)440 int skipOverCComment (void)
441 {
442 	int c = fileGetc ();
443 
444 	while (c != EOF)
445 	{
446 		if (c != '*')
447 			c = fileGetc ();
448 		else
449 		{
450 			const int next = fileGetc ();
451 
452 			if (next != '/')
453 				c = next;
454 			else
455 			{
456 				c = SPACE;  /* replace comment with space */
457 				break;
458 			}
459 		}
460 	}
461 	return c;
462 }
463 
464 /*  Skips over a C++ style comment.
465  */
skipOverCplusComment(void)466 static int skipOverCplusComment (void)
467 {
468 	int c;
469 
470 	while ((c = fileGetc ()) != EOF)
471 	{
472 		if (c == BACKSLASH)
473 			fileGetc ();  /* throw away next character, too */
474 		else if (c == NEWLINE)
475 			break;
476 	}
477 	return c;
478 }
479 
480 /*  Skips to the end of a string, returning a special character to
481  *  symbolically represent a generic string.
482  */
skipToEndOfString(boolean ignoreBackslash)483 static int skipToEndOfString (boolean ignoreBackslash)
484 {
485 	int c;
486 
487 	while ((c = fileGetc ()) != EOF)
488 	{
489 		if (c == BACKSLASH && ! ignoreBackslash)
490 			fileGetc ();  /* throw away next character, too */
491 		else if (c == DOUBLE_QUOTE)
492 			break;
493 	}
494 	return STRING_SYMBOL;  /* symbolic representation of string */
495 }
496 
497 /*  Skips to the end of the three (possibly four) 'c' sequence, returning a
498  *  special character to symbolically represent a generic character.
499  *  Also detects Vera numbers that include a base specifier (ie. 'b1010).
500  */
skipToEndOfChar(void)501 static int skipToEndOfChar (void)
502 {
503 	int c;
504 	int count = 0, veraBase = '\0';
505 
506 	while ((c = fileGetc ()) != EOF)
507 	{
508 	    ++count;
509 		if (c == BACKSLASH)
510 			fileGetc ();  /* throw away next character, too */
511 		else if (c == SINGLE_QUOTE)
512 			break;
513 		else if (c == NEWLINE)
514 		{
515 			fileUngetc (c);
516 			break;
517 		}
518 		else if (count == 1  &&  strchr ("DHOB", toupper (c)) != NULL)
519 			veraBase = c;
520 		else if (veraBase != '\0'  &&  ! isalnum (c))
521 		{
522 			fileUngetc (c);
523 			break;
524 		}
525 	}
526 	return CHAR_SYMBOL;  /* symbolic representation of character */
527 }
528 
529 /*  This function returns the next character, stripping out comments,
530  *  C pre-processor directives, and the contents of single and double
531  *  quoted strings. In short, strip anything which places a burden upon
532  *  the tokenizer.
533  */
cppGetc(void)534 extern int cppGetc (void)
535 {
536 	boolean directive = FALSE;
537 	boolean ignore = FALSE;
538 	int c;
539 
540 	if (Cpp.ungetch != '\0')
541 	{
542 		c = Cpp.ungetch;
543 		Cpp.ungetch = Cpp.ungetch2;
544 		Cpp.ungetch2 = '\0';
545 		return c;  /* return here to avoid re-calling debugPutc () */
546 	}
547 	else do
548 	{
549 		c = fileGetc ();
550 process:
551 		switch (c)
552 		{
553 			case EOF:
554 				ignore    = FALSE;
555 				directive = FALSE;
556 				break;
557 
558 			case TAB:
559 			case SPACE:
560 				break;  /* ignore most white space */
561 
562 			case NEWLINE:
563 				if (directive  &&  ! ignore)
564 					directive = FALSE;
565 				Cpp.directive.accept = TRUE;
566 				break;
567 
568 			case DOUBLE_QUOTE:
569 				Cpp.directive.accept = FALSE;
570 				c = skipToEndOfString (FALSE);
571 				break;
572 
573 			case '#':
574 				if (Cpp.directive.accept)
575 				{
576 					directive = TRUE;
577 					Cpp.directive.state  = DRCTV_HASH;
578 					Cpp.directive.accept = FALSE;
579 				}
580 				break;
581 
582 			case SINGLE_QUOTE:
583 				Cpp.directive.accept = FALSE;
584 				c = skipToEndOfChar ();
585 				break;
586 
587 			case '/':
588 			{
589 				const Comment comment = isComment ();
590 
591 				if (comment == COMMENT_C)
592 					c = skipOverCComment ();
593 				else if (comment == COMMENT_CPLUS)
594 				{
595 					c = skipOverCplusComment ();
596 					if (c == NEWLINE)
597 						fileUngetc (c);
598 				}
599 				else
600 					Cpp.directive.accept = FALSE;
601 				break;
602 			}
603 
604 			case BACKSLASH:
605 			{
606 				int next = fileGetc ();
607 
608 				if (next == NEWLINE)
609 					continue;
610 				else if (next == '?')
611 					cppUngetc (next);
612 				else
613 					fileUngetc (next);
614 				break;
615 			}
616 
617 			case '?':
618 			{
619 				int next = fileGetc ();
620 				if (next != '?')
621 					fileUngetc (next);
622 				else
623 				{
624 					next = fileGetc ();
625 					switch (next)
626 					{
627 						case '(':          c = '[';       break;
628 						case ')':          c = ']';       break;
629 						case '<':          c = '{';       break;
630 						case '>':          c = '}';       break;
631 						case '/':          c = BACKSLASH; goto process;
632 						case '!':          c = '|';       break;
633 						case SINGLE_QUOTE: c = '^';       break;
634 						case '-':          c = '~';       break;
635 						case '=':          c = '#';       goto process;
636 						default:
637 							fileUngetc (next);
638 							cppUngetc ('?');
639 							break;
640 					}
641 				}
642 			} break;
643 
644 			default:
645 				if (c == '@' && Cpp.hasAtLiteralStrings)
646 				{
647 					int next = fileGetc ();
648 					if (next == DOUBLE_QUOTE)
649 					{
650 						Cpp.directive.accept = FALSE;
651 						c = skipToEndOfString (TRUE);
652 						break;
653 					}
654 				}
655 				Cpp.directive.accept = FALSE;
656 				if (directive)
657 					ignore = handleDirective (c);
658 				break;
659 		}
660 	} while (directive || ignore);
661 
662 	DebugStatement ( debugPutc (DEBUG_CPP, c); )
663 	DebugStatement ( if (c == NEWLINE)
664 				debugPrintf (DEBUG_CPP, "%6ld: ", getInputLineNumber () + 1); )
665 
666 	return c;
667 }
668 
669 /* vi:set tabstop=4 shiftwidth=4: */
670