1 /*
2 * Copyright (c) 2003 - 2006, Nils R. Weller
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
26 *
27 * Preprocessor driver
28 */
29 #include "preprocess.h"
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <limits.h>
34 #include <assert.h>
35 #include <ctype.h>
36 #include <sys/types.h>
37 #include <sys/mman.h>
38 #include <sys/stat.h>
39 #include <fcntl.h>
40 #include <unistd.h>
41 #include <errno.h>
42 #include "token.h"
43 #include "defs.h"
44 #include "error.h"
45 #include "expr.h"
46 #include "numlimits.h"
47 #include "type.h"
48 #include "n_libc.h"
49 #include "macros.h"
50
51
52 #ifdef DEBUG
53 static void print_token_list(struct token *list);
54 #endif
55
56 static int
57 complete_directive(
58 FILE *out,
59 struct include_file *incf,
60 struct pp_directive *dir,
61 struct token **toklist,
62 int *has_data);
63
64
65 struct token *
66 do_macro_subst(struct input_file *in, FILE *out,
67 struct token *toklist,
68 struct token **tailp,
69 int dontoutput);
70
71
72 const char *cur_inc;
73 int cur_inc_is_std;
74 int g_recording_tokens;
75 char g_textbuf[2048];
76 struct include_dir *include_dirs;
77
78 static int pre_directive = 1;
79 int lineno = 1;
80 char *curfile = NULL;
81
82 static int
try_mmap(struct input_file * infile,const char * input,int silent)83 try_mmap(struct input_file *infile, const char *input, int silent) {
84 int fd;
85 struct stat s;
86 int saved_errno;
87
88 #if 0
89 errno = 0;
90 return -1;
91 #endif
92 fd = open(input, O_RDONLY);
93 if (fd == -1) {
94 saved_errno = errno;
95 if (!silent) {
96 perror(input);
97 }
98 errno = saved_errno;
99 return -1;
100 } else if (fstat(fd, &s) == -1 || !S_ISREG(s.st_mode)) {
101 saved_errno = errno;
102 (void) close(fd);
103 errno = saved_errno;
104 return -1;
105 }
106 infile->filesize = s.st_size;
107 infile->filemap = mmap(0, s.st_size, PROT_READ, MAP_SHARED, fd, 0);
108 if (infile->filemap == MAP_FAILED) {
109 saved_errno = errno;
110 if (!silent) {
111 perror("mmap");
112 }
113 errno = saved_errno;
114 return -1;
115 }
116 (void) close(fd);
117 infile->filemapend = infile->filemap + infile->filesize;
118 infile->filep = infile->filemap;
119 return 0;
120 }
121
122 int
open_input_file(struct input_file * inf,const char * input,int silent)123 open_input_file(struct input_file *inf, const char *input, int silent) {
124 inf->unread_idx = 0;
125
126 if (try_mmap(inf, input, silent) == 0) {
127 inf->fd = NULL;
128 return 0;
129 }
130 inf->filemap = NULL;
131 if (errno == ENOENT) {
132 /* File doesn't exist, give up */
133 inf->fd = NULL;
134 return -1;
135 }
136
137 if ((inf->fd = fopen(input, "r")) == NULL) {
138 if (!silent) {
139 perror(input);
140 }
141 return -1;
142 }
143 return 0;
144 }
145
146 int
file_is_open(struct input_file * inf)147 file_is_open(struct input_file *inf) {
148 return inf->fd != NULL || inf->filemap != NULL;
149 }
150
151 void
close_input_file(struct input_file * inf)152 close_input_file(struct input_file *inf) {
153 if (inf->fd != NULL) {
154 (void) fclose(inf->fd);
155 } else {
156 (void) munmap(inf->filemap, inf->filesize);
157 }
158 }
159
160 static int
do_get_next_char(struct input_file * inf)161 do_get_next_char(struct input_file *inf) {
162 if (inf->unread_idx > 0) {
163 int ch = inf->unread_chars[--inf->unread_idx];
164 return ch;
165 }
166 if (inf->fd != NULL) {
167 return getc(inf->fd);
168 } else {
169 if (inf->filep == inf->filemapend) {
170 return EOF;
171 }
172 /*
173 * 05/23/09: Holy cow, this assignment was doing
174 * sign-extension behind our back... So unprintable
175 * 0xff chars were getting mixed up with EOF. Of
176 * course we want getc()-like semantics instead
177 * where the character is converted to unsigned
178 * char
179 */
180 return (unsigned char)*inf->filep++;
181 }
182 }
183
184 int
get_next_char(struct input_file * inf)185 get_next_char(struct input_file *inf) {
186 int ch = do_get_next_char(inf);
187
188 static int cnt;
189 ++cnt;
190
191 //if (ch == '(' || ch == ')')
192 if (ch != EOF)
193 {
194 // printf(" %d: GOT %c = %d\n",cnt, ch, ch);
195 }
196
197 if (ch == '\n') {
198 lex_line_ptr = lex_file_map + lex_chars_read;
199 err_setlineptr(lex_line_ptr);
200 ++lineno;
201 } else if (ch == '\\') {
202 if ((ch = do_get_next_char(inf)) == '\n') {
203 /*
204 * Line continued - XXX wow this doesn't handle
205 * multiple adjacent \\\n constructs
206 */
207 ++lex_chars_read;
208 ++lineno;
209 if ((ch = do_get_next_char(inf)) == '\n') {
210 ++lex_chars_read;
211 ++lineno;
212 }
213 return ch;
214 } else {
215 --lex_chars_read;
216 unget_char(ch, inf);
217 return '\\';
218 }
219 }
220
221 return ch;
222 }
223
224 int
unget_char(int ch,struct input_file * inf)225 unget_char(int ch, struct input_file *inf) {
226 static int seq;
227 ++seq;
228
229 if (inf->unread_idx + 1>
230 (int)(sizeof inf->unread_chars / sizeof inf->unread_chars[0])) {
231 (void) fprintf(stderr, "BUG: unget_char() with too many chars\n");
232 abort();
233 }
234 inf->unread_chars[inf->unread_idx++] = ch;
235 if (ch == '\n') {
236 --lineno;
237 }
238 return 0;
239 }
240
241 static int
get_string(struct input_file * inf,char * buf,size_t bufsiz,char * buf_verbatim,int * lastch,int * len,int * key0)242 get_string(struct input_file *inf, char *buf, size_t bufsiz,
243 char *buf_verbatim,
244 int *lastch, int *len, int *key0) {
245 char *p;
246 int ch;
247 int key = 0;
248
249 if (buf_verbatim != NULL) {
250 *buf_verbatim++ = '#';
251 }
252
253 /* Get directive */
254 while ((ch = FGETC(inf)) != EOF && isspace(ch)) {
255 if (buf_verbatim != NULL) {
256 *buf_verbatim++ = ch;
257 }
258 if (ch == '\n') {
259 UNGETC(ch, inf);
260 *buf = 0;
261 return 0;
262 }
263 }
264
265 for (p = buf;;) {
266 if (ch == EOF) {
267 break;
268 } else if (!isalnum(ch) && ch != '_' && ch != '$') {
269 UNGETC(ch, inf);
270 break;
271 } else if (p == buf + bufsiz - 2) {
272 return -1;
273 }
274 *p++ = ch;
275 if (buf_verbatim != NULL) {
276 *buf_verbatim++ = ch;
277 }
278 key = (key * 33 + ch) & N_HASHLIST_MOD;
279 ch = FGETC(inf);
280 }
281 *p = 0;
282 if (buf_verbatim != NULL) {
283 *buf_verbatim = 0;
284 }
285 *lastch = ch;
286
287 if (len != NULL) {
288 *len = p - buf;
289 *key0 = key;
290 }
291 return 0;
292 }
293
294 #define CMD_DEFINE 1
295 #define CMD_UNDEF 2
296 #define CMD_IF 3
297 #define CMD_ELSE 4
298 #define CMD_ELIF 5
299 #define CMD_ENDIF 6
300 #define CMD_ERROR 7
301 #define CMD_WARNING 8
302 #define CMD_LINE 9
303 #define CMD_INCLUDE 10
304 #define CMD_IFDEF 11
305 #define CMD_IFNDEF 12
306 #define CMD_INCLUDE_NEXT 13 /* GNU C... */
307 #define CMD_IDENT 14 /* common extension */
308 #define CMD_PRAGMA 15
309 #define CMD_PASSTHRU 16
310
311
312 static struct include_dir *lastdir; /* for #include_next */
313
314
315 struct pp_directive {
316 const char *name;
317 int code;
318 int takes_arg;
319 int determined;
320 int significant;
321 void *data;
322 int len;
323 int key;
324 struct pp_directive *next;
325 struct pp_directive *prev;
326 };
327
328 static void
set_compiler_line(FILE * out,int line,const char * file)329 set_compiler_line(FILE *out, int line, const char *file) {
330 int old_g_ignore_text = g_ignore_text;
331
332 g_ignore_text = 0;
333 x_fprintf(out, "# %d \"%s\"\n", line, file);
334 g_ignore_text = old_g_ignore_text;
335 }
336
337 static int
check_comment(struct input_file * inf)338 check_comment(struct input_file *inf) {
339 int ch;
340
341 if ((ch = FGETC(inf)) == EOF) {
342 return -1;
343 } else if (ch == '*') {
344 int sline = lineno;
345
346 /* C style comment */
347 for (;;) {
348 ch = FGETC(inf);
349 if (ch == EOF) {
350 lineno = sline;
351 lexerror("Unterminated comment "
352 "(started in line %d)", sline);
353 return -1;
354 } else if (ch == '\n') {
355 /*x_fputc('\n', out);*/
356 } else if (ch == '*') {
357 if ((ch = FGETC(inf)) == '/') {
358 /* comment complete */
359 break;
360 }
361 }
362 }
363 return 0;
364 } else if (ch == '/' /* && standard != C89 */) {
365 /* C99/C++ style comment */
366 while ((ch = FGETC(inf)) != '\n' && ch != EOF)
367 ;
368 if (ch != EOF) {
369 UNGETC('\n', inf);
370 }
371 return 0;
372 } else {
373 return 1;
374 }
375 }
376
377 #define EAT_LINE(in, ch) \
378 do ch = FGETC(in); while (ch != EOF && ch != '\n')
379
380
381 static void
check_garbage(struct input_file * inf,int ch,const char * dir)382 check_garbage(struct input_file *inf, int ch, const char *dir) {
383 if (ch == '\n') {
384 (void) FGETC(inf);
385 return;
386 }
387
388 for (;;) {
389 int rc;
390
391 if (ch == '\n' || ch == EOF) {
392 return;
393 } else if (isspace(ch)) {
394 do {
395 if ((ch = FGETC(inf)) == '\n') {
396 return;
397 }
398 } while (ch != EOF && isspace(ch));
399 } else if (ch == '/') {
400 if ((rc = check_comment(inf)) == -1) {
401 return;
402 } else if (rc == 1) {
403 break;
404 }
405 ch = FGETC(inf);
406 } else {
407 break;
408 }
409 }
410 if (ch != EOF) {
411 lexwarning("Ignoring junk after preprocessor directive `%s'", dir);
412 EAT_LINE(inf, ch);
413 }
414 }
415
416 void
dump_toklist(struct token * t)417 dump_toklist(struct token *t) {
418 fprintf(stderr, "----HERE GOES THE TOKLIST--------");
419 for (; t; t = t->next) {
420 fprintf(stderr, " LOL %d = %s (%p)\n",
421 t->type, t->ascii, t);
422 }
423 }
424
425
426 static unsigned long cur_directive_start;
427
428 /*
429 * do_directive() is called by preprocess() when a line begins with a
430 * # as first non-whitespace char. For the #undef and #endif directives,
431 * proecssing can be completed here already. For others, it is necessary
432 * to record subsequent tokens on the same line and then to evaluate them.
433 * To do so we just return to preprocess(), which will record the tokens and
434 * call complete_directive() when the line ends
435 */
436 static int
do_directive(struct input_file * inf,struct pp_directive * dir)437 do_directive(struct input_file *inf, struct pp_directive *dir) {
438 char buf[128];
439 char buf_verbatim[256];
440 int ch;
441 int i;
442 int len = 0;
443 int key = -1;
444 static const struct pp_directive directives[] = {
445 { "define", CMD_DEFINE, 1, 0, 0, NULL, 0,0,0,0 },
446 { "undef", CMD_UNDEF, 1, 0, 0, NULL, 0,0,0,0 },
447 { "include", CMD_INCLUDE, 1, 0, 0, NULL, 0,0,0,0 },
448 { "include_next", CMD_INCLUDE_NEXT, 1, 0, 0, NULL, 0,0,0,0 },
449 { "if", CMD_IF, 1, 0, 0, NULL, 0,0,0,0 },
450 { "else", CMD_ELSE, 0, 0, 0, NULL, 0,0,0,0 },
451 { "elif", CMD_ELIF, 1, 0, 0, NULL, 0,0,0,0 },
452 { "endif", CMD_ENDIF, 0, 0, 0, NULL, 0,0,0,0 },
453 { "ifdef", CMD_IFDEF, 0, 0, 0, NULL, 0,0,0,0 },
454 { "ifndef", CMD_IFNDEF, 0, 0, 0, NULL, 0,0,0,0 },
455 { "warning", CMD_WARNING, 0, 0, 0, NULL, 0,0,0,0 },
456 { "error", CMD_ERROR, 0, 0, 0, NULL, 0,0,0,0 },
457 { "line", CMD_LINE, 1, 0, 0, NULL, 0,0,0,0 },
458 { "ident", CMD_IDENT, 1, 0, 0, NULL, 0,0,0,0 },
459 { "pragma", CMD_PRAGMA, 0, 0, 0, NULL, 0,0,0,0 },
460 { NULL, 0, 0, 0, 0, NULL, 0,0,0,0 }
461 };
462
463 dir->data = NULL;
464 cur_directive_start = lex_chars_read;
465
466 if (get_string(inf, buf, sizeof buf,
467 buf_verbatim, &ch, NULL, NULL) != 0
468 /*|| !isspace(ch)*/) {
469 lexerror("Invalid preprocessor directive");
470 EAT_LINE(inf, ch);
471 return -1;
472 }
473
474 if (*buf == 0 || isdigit((unsigned char)*buf)) {
475 /*
476 * Hmm, GNU cpp just passes # through
477 * 05/21/09: Line number setting directives
478 * such as # 20 "/usr/include/stdio.h"
479 * are also passed through
480 */
481 if (strchr(buf_verbatim, '\n') != NULL) {
482 char *p = strchr(buf_verbatim, 0);
483
484 while ((ch = FGETC(inf)) != EOF) {
485 *p++ = ch;
486 if (ch == '\n') {
487 break;
488 }
489 }
490 *p = 0;
491 }
492 dir->code = CMD_PASSTHRU;
493 dir->data = n_xstrdup(buf_verbatim);
494 return 0;
495 }
496
497 for (i = 0; directives[i].name != NULL; ++i) {
498 if (strcmp(directives[i].name, buf) == 0) {
499 break;
500 }
501 }
502 if (directives[i].name == NULL) {
503 lexerror("Unknown preprocessor directive `%s'", buf);
504 EAT_LINE(inf, ch);
505 return -1;
506 }
507 *dir = directives[i];
508 if (dir->takes_arg && ch == '\n') {
509 lexerror("No argument given for preprocessor "
510 "directive `%s'", buf);
511 return -1;
512 }
513 if (dir->code == CMD_DEFINE) {
514 struct macro *m = alloc_macro();
515
516 if (get_string(inf, buf, sizeof buf, NULL, &ch, &len, &key) != -1) {
517 if (g_ignore_text) {
518 EAT_LINE(inf, ch);
519 return 1;
520 }
521 m->name = n_xmemdup(buf, len+1); /*n_xstrdup(buf);*/
522 if (ch == EOF || ch == '\n') {
523 empty:
524 /* #define EMPTY */
525 m->empty = 1;
526 EAT_LINE(inf, ch);
527 } else if (ch == '(') {
528 struct macro_arg *arglist = NULL;
529 struct macro_arg *arglist_tail = NULL;
530 struct macro_arg *ma;
531 static struct macro_arg nullma;
532
533 /* function-like macro */
534 (void) FGETC(inf); /* eat ( */
535 m->functionlike = 1;
536
537 /* Read parameters */
538 for (;;) {
539 int trailing_last = 0;
540
541 if (get_string(inf, buf,
542 sizeof buf, NULL, &ch,0,0) == -1) {
543 EAT_LINE(inf, ch);
544 return -1;
545 }
546 switch (ch) {
547 case ',':
548 case ')':
549 (void) FGETC(inf);
550 break;
551 default:
552 if (ch == '.') {
553 (void) FGETC(inf);
554 if (FGETC(inf) != '.'
555 || FGETC(inf) != '.') {
556 goto garbage;
557 }
558 if (m->trailing_last) {
559 lexerror("Macro "
560 "already has a trailing last argument");
561 EAT_LINE(inf,ch);
562 return -1;
563 }
564 trailing_last = 1;
565 } else if (!isspace(ch)) {
566 garbage:
567 lexerror("Garbage in "
568 "macro parameter list"
569 " - `%c'", ch);
570 EAT_LINE(inf, ch);
571 return -1;
572 } else if (ch == '\n') {
573 lexerror("Incomplete macro"
574 "parameter list");
575 EAT_LINE(inf, ch);
576 return -1;
577 }
578 }
579 if (*buf == 0 && ch == ')') {
580 break;
581 } else if (*buf == 0 && arglist != NULL) {
582 lexerror("Empty macro parameter");
583 EAT_LINE(inf, ch);
584 }
585 ma = n_xmalloc(sizeof *ma);
586 *ma = nullma;
587 ma->name = n_xstrdup(buf);
588 if (arglist == NULL) {
589 arglist = arglist_tail = ma;
590 } else {
591 arglist_tail->next = ma;
592 arglist_tail =
593 arglist_tail->next;
594 }
595 if (trailing_last) {
596 m->trailing_last = ma;
597 }
598 if (ch == ')') {
599 /* done */
600 break;
601 }
602 }
603 m->arglist = arglist;
604 } else if (!isspace(ch)) {
605 /*
606 * 05/23/09: Lack of whitespace does not
607 * warrant an error. This may be e.g. a
608 * comment
609 */
610 #if 0
611 lexerror("Invalid character `%c' after macro "
612 "name `%s'", ch, buf);
613 return -1;
614 #endif
615 UNGETC(ch, inf);
616 } else {
617 do {
618 if ((ch = FGETC(inf)) == EOF
619 || ch == '\n') {
620 if (ch == '\n') {
621 UNGETC(ch, inf);
622 }
623 goto empty;
624 }
625 } while (isspace(ch));
626 UNGETC(ch, inf);
627 }
628
629 /*
630 * Only empty macros can already be stored
631 * because the macro body is needed to
632 * check identical redefinitions
633 */
634 if (m->empty
635 && (m = put_macro(m, len, key)) == NULL) {
636 return -1;
637 }
638 }
639 dir->data = m;
640 dir->len = len;
641 dir->key = key;
642 } else if (dir->code == CMD_UNDEF
643 || dir->code == CMD_IFDEF
644 || dir->code == CMD_IFNDEF) {
645 /* These directives just take an identifier */
646 if (get_string(inf, buf, sizeof buf, NULL, &ch, &len, &key) != -1) {
647 check_garbage(inf, ch, dir->name);
648 }
649 if (dir->code == CMD_UNDEF) {
650 if (!g_ignore_text) {
651 (void) drop_macro(buf, len, key);
652 }
653 } else {
654 dir->data = n_xmemdup(buf, len+1); /*n_xstrdup(buf);*/
655 dir->len = len;
656 dir->key = key;
657 }
658 return 0;
659 } else if (dir->code == CMD_LINE) {
660 ;
661 } else if (dir->code == CMD_INCLUDE
662 || dir->code == CMD_INCLUDE_NEXT) {
663 if (isspace(ch) && ch != '\n') {
664 do {
665 ch = FGETC(inf);
666 } while (isspace(ch) && ch != EOF && ch != '\n');
667 }
668 if (ch == '<' || ch == '"') {
669 char *p = NULL;
670 int lookingfor = ch == '<'? '>': '"';
671
672 store_char(NULL, 0);
673 store_char(&p, ch);
674 while ((ch = FGETC(inf)) != EOF) {
675 store_char(&p, ch);
676 if (ch == '\n') {
677 lexerror("Incomplete #include directive");
678 return -1;
679 } else if (ch == lookingfor) {
680 /* done! */
681 store_char(&p, 0);
682 break;
683 }
684 }
685 if (ch == EOF) {
686 lexerror("Premature end of file");
687 return -1;
688 }
689 dir->data = p;
690 dir->len = 0;
691 dir->key = -1;
692 if ((ch = FGETC(inf)) != EOF && ch != '\n') {
693 check_garbage(inf, ch, dir->name);
694 }
695 } else if (ch == '\n' || ch == EOF) {
696 lexerror("Empty #include directive");
697 return -1;
698 } else {
699 /* #include pp-tok */
700 UNGETC(ch, inf);
701 }
702 return 0;
703 } else if (dir->code == CMD_IF
704 || dir->code == CMD_ELIF) {
705 ;
706 } else if (dir->code == CMD_ERROR
707 || dir->code == CMD_WARNING) {
708 if (ch == '\n') {
709 /* Done! */
710 if (!g_ignore_text) {
711 if (dir->code == CMD_ERROR) {
712 lexerror("#error");
713 } else {
714 lexwarning("#warning");
715 }
716 }
717 return 0;
718 } else if (isspace(ch)) {
719 do ch = FGETC(inf); while (isspace(ch) && ch != '\n');
720 if (ch == '\n') {
721 if (dir->code == CMD_ERROR) {
722 lexerror("#error");
723 } else {
724 lexwarning("#warning");
725 }
726 return 0;
727 }
728 UNGETC(ch, inf);
729 }
730
731 /*
732 * If you think we can just dump ``a line of stuff''
733 * back to the user - think twice. The syntax is
734 * ``#error pp-tokens<opt> newline''
735 * So we need to check token validity, e.g. for string
736 * constants (ucpp doesn't, and accepts #error "foo).
737 */
738 } else if (dir->code == CMD_ENDIF
739 || dir->code == CMD_ELSE) {
740 #if 0
741 check_garbage(in, ch, dir->name);
742 (void) FGETC(in);
743 #endif
744 } else if (dir->code == CMD_IDENT) {
745 char *p = NULL;
746
747 store_char(NULL, 0);
748 if (isspace(ch) && ch != '\n') {
749 do {
750 ch = FGETC(inf);
751 } while (isspace(ch) && ch != EOF && ch != '\n');
752 }
753 if (ch != '"') {
754 puts("invalid #ident directive");
755 exit(1);
756 #if 0
757 /* irix doesn't seem to like this :( */
758 error("Invalid #ident directive");
759 return -1;
760 #endif
761 }
762 while ((ch = FGETC(inf)) != EOF) {
763 store_char(&p, ch);
764 if (ch == '\n') {
765 lexerror("Incomplete #ident directive");
766 return -1;
767 } else if (ch == '"') {
768 /* done! */
769 store_char(&p, 0);
770 break;
771 }
772 }
773 if ((ch = FGETC(inf)) != EOF && ch != '\n') {
774 check_garbage(inf, ch, dir->name);
775 }
776 return 0;
777 } else if (dir->code == CMD_PRAGMA) {
778 /* Ignore for now */
779 if (ch != '\n') {
780 do {
781 ch = FGETC(inf);
782 } while (ch != EOF && ch != '\n');
783 }
784 }
785 return 0;
786 }
787
788
789 /*
790 * The overwhelming majority of include files uses include guards. But
791 * if those files are included more than once, they still may have to
792 * be read and processed to find the #endif belonging to the include
793 * gaurd. In order to prevent this some programmers tend to use
794 * ``redundant include guards''. (I haven't been using such guards
795 * since 2003 because I think they suck.) In order to implement this
796 * in the preprocessor, we record the position of the first #if/#ifdef/
797 * #ifndef and the corresponding #endif for every include file, and
798 * whether that covers the entire header. The helps us skip a lot of
799 * stuff, particularly in the system headers.
800 */
801 static struct include_file *current_include;
802 static struct include_dir current_working_directory; /* misnomer */
803
804 static struct include_file *
lookup_include(struct include_dir * dir,const char * name)805 lookup_include(struct include_dir *dir, const char *name) {
806 struct include_file *inf;
807 size_t len = strlen(name);
808
809 for (inf = dir->inc_files; inf != NULL; inf = inf->next) {
810 if (inf->namelen == len) {
811 if (memcmp(inf->name, name, len) == 0) {
812 return inf;
813 }
814 }
815 }
816
817 return NULL;
818 }
819
820 static void
put_include(struct include_dir * dir,struct include_file * inc)821 put_include(struct include_dir *dir, struct include_file *inc) {
822 inc->namelen = strlen(inc->name);
823 if (dir->inc_files == NULL) {
824 dir->inc_files = dir->inc_files_tail = inc;
825 } else {
826 dir->inc_files_tail->next = inc;
827 dir->inc_files_tail = inc;
828 }
829 }
830
831
832 static int
do_include(FILE * out,char * str,struct token * toklist,int type)833 do_include(FILE *out, char *str, struct token *toklist, int type) {
834 char *p;
835 char *oldname;
836 int rc;
837 int oldline;
838 char *oldfile = curfile;
839 struct macro *mp;
840 static int nesting;
841 struct input_file inf;
842 struct include_file *old_current_include = NULL;
843 static struct input_file nullf;
844 struct include_dir *source_dir = NULL;
845 struct include_file *cached_file;
846 struct include_file *new_cached_file = NULL;
847 size_t old_lex_chars_read;
848
849 if (++nesting > 1000) {
850 lexerror("Include file nesting way too deep!");
851 return -1;
852 }
853
854 inf = nullf;
855 if (str == NULL) {
856 /* First build include argument from token list */
857 toklist = do_macro_subst(NULL, NULL, toklist, NULL, 1);
858 str = toklist_to_string(toklist);
859 }
860 p = strchr(str, 0);
861 if (*str == 0 || --p == str+1) {
862 lexerror("Empty include file name");
863 return -1;
864 } else if ((*p != '"' && *p != '>')
865 || (*p == '"' && *str != '"')
866 || (*p == '>' && *str != '<')) {
867 lexerror("#include sytnax is \"file\" or <file>");
868 return -1;
869 }
870 *p = 0; /* cut " or < */
871
872 if (*str == '"') {
873 /*
874 * Try opening file in . first, then fall back to standard
875 * directories (actually absolute paths are ok too.)
876 */
877 #if 0
878 if ((fd = fopen(str+1, "r")) != NULL) {
879 #endif
880 if (open_input_file(&inf, str+1, 1) == 0) {
881 /**p = '"';*/
882 inf.path = str+1;
883 lastdir = NULL;
884 source_dir = ¤t_working_directory;
885 }
886 }
887 #if 0
888 if (fd == NULL) {
889 #endif
890 if (!file_is_open(&inf)) {
891 /* Try standard includes */
892 char *buf;
893 struct include_dir *id;
894
895 if (type == CMD_INCLUDE_NEXT && lastdir != NULL) {
896 if ((id = lastdir->next) == NULL) {
897 id = include_dirs;
898 }
899 } else {
900 id = include_dirs;
901 }
902 for (;; id = id->next) {
903 if (id == NULL) {
904 /* End of directory list reached */
905 if (type == CMD_INCLUDE_NEXT) {
906 if (lastdir != NULL) {
907 /*
908 * We started somewhere in the
909 * middle of the directory
910 * list; wrap around!
911 */
912 id = include_dirs;
913 lastdir = NULL;
914 } else {
915 break;
916 }
917 } else {
918 break;
919 }
920 }
921
922 buf = n_xmalloc(strlen(id->path) +
923 sizeof "/" + strlen(str+1));
924 sprintf(buf, "%s/%s", id->path, str+1);
925 #if 0
926 if ((fd = fopen(buf, "r")) != NULL) {
927 #endif
928 if (open_input_file(&inf, buf, 1) == 0) {
929 lastdir = id;
930 inf.path = buf;
931 source_dir = id;
932 break;
933 } else {
934 free(buf);
935 }
936 }
937 }
938
939 #if 0
940 if ((inf.fd = fd) == NULL) {
941 #endif
942 if (!file_is_open(&inf)) {
943 lexerror("Cannot open include file `%s'", str+1);;
944 return -1;
945 }
946
947 inf.is_header = 1;
948 old_current_include = current_include;
949 if ((cached_file = lookup_include(source_dir, str+1)) != NULL) {
950 if (cached_file->has_guard) {
951 if (cached_file->fully_guarded) {
952 if (complete_directive(NULL, cached_file,
953 NULL, NULL, NULL) == 0) {
954 /*
955 * Include guard condition evaluates
956 * to false - as expected - so the
957 * file need not be read
958 */
959 #if 0
960 (void) fclose(inf.fd); /* XXX */
961 #endif
962 close_input_file(&inf);
963 goto out;
964 }
965 }
966 }
967 /* file is already known - don't record guard */
968 current_include = NULL;
969 } else {
970 /* Processing new file */
971 static struct include_file nullif;
972
973 new_cached_file = n_xmalloc(sizeof *new_cached_file);
974 *new_cached_file = nullif;
975 new_cached_file->name = str+1;
976 current_include = new_cached_file;
977 }
978
979 #if 0
980 fprintf(stderr, "processing %s\n", inf.path);
981 #endif
982 /*
983 * __FILE__ and compiler line information must be updated!
984 */
985 if ((mp = lookup_macro("__FILE__", 0, -1)) != NULL) {
986 oldname = mp->builtin;
987 mp->builtin = inf.path;
988 }
989
990 old_lex_chars_read = lex_chars_read;
991 lex_chars_read = 0;
992
993 /* Inform the compiler that we're doing a new file */
994 set_compiler_line(out, 1, inf.path);
995 oldline = lineno;
996 lineno = 1;
997
998 rc = preprocess(&inf, out);
999 #if 0
1000 (void) fclose(inf.fd);
1001 #endif
1002 close_input_file(&inf);
1003 /* free(inf.path); */
1004
1005 lex_chars_read = old_lex_chars_read;
1006
1007 /* Restore old line number */
1008 lineno = oldline;
1009 curfile = oldfile;
1010 set_compiler_line(out, lineno, curfile);
1011
1012 if (mp != NULL) {
1013 mp->builtin = oldname;
1014 }
1015 if (new_cached_file != NULL) {
1016 put_include(source_dir, new_cached_file);
1017 }
1018 current_include = old_current_include;
1019
1020
1021 out:
1022 --nesting;
1023 return rc;
1024 }
1025
1026 /*
1027 * cond_dir_list records the conditional preprocessor directives such as #if,
1028 * #elif, #ifdef, etc. It is used to match new occasions of such directives
1029 * with previous ones. The list should be read FIFO stack-like, where the
1030 * tail points to the top. When a directive is terminated by an #endif, all
1031 * belonging directives are removed from the tail.
1032 */
1033 static struct pp_directive *cond_dir_list;
1034 static struct pp_directive *cond_dir_cur_start;
1035 static struct pp_directive *cond_dir_list_tail;
1036 int g_ignore_text = 0;
1037
1038 /* XXX move to evalexpr.c */
1039 int
1040 value_is_nonzero(struct tyval *cv) {
1041 int evaluates_true = 0;
1042
1043 /*
1044 * The resulting expression must have integral
1045 * type
1046 */
1047 #define EVALTRUE(cv, ty) *(ty *)cv->value != 0
1048 switch (cv->type->code) {
1049 case TY_INT:
1050 evaluates_true = EVALTRUE(cv, int);
1051 break;
1052 case TY_UINT:
1053 evaluates_true = EVALTRUE(cv, unsigned int);
1054 break;
1055 case TY_LONG:
1056 evaluates_true = EVALTRUE(cv, long);
1057 break;
1058 case TY_ULONG:
1059 evaluates_true = EVALTRUE(cv, unsigned long);
1060 break;
1061 case TY_LLONG:
1062 case TY_ULLONG: {
1063 /*
1064 * Avoid relying on compiler support for
1065 * long long, but assume 64 bits
1066 */
1067 unsigned char *p =cv->value;
1068 int i;
1069
1070 for (i = 0; i < 8; ++i) {
1071 evaluates_true |= !!*p++;
1072 }
1073 }
1074 default:
1075 printf("BUG: preprocessor expression has "
1076 "type %d (not integral!)\n",
1077 cv->type->code);
1078
1079 }
1080 return evaluates_true;
1081 }
1082
1083 /*
1084 * This function is unfortunately overloaded to serve two distinct purposes:
1085 *
1086 * 1) complete a preprocessor directive. If it is an #if/#ifdef/#elif/#endif/etc
1087 * directive, g_ignore_text will be modified as necessary, and include guards
1088 * are also recorded if current_include is non-null.
1089 * All this is done if incf is null (and dir thusly non-null.)
1090 *
1091 * 2) check whether the include guard in include file ``incf'' (start_dir)
1092 * evaluates to true. Only the result of this evaluation is returned, and no
1093 * side effects take place.
1094 * This is done if incf is non-null
1095 *
1096 * XXX we should have an evaluates_true() instead!
1097 */
1098 static int
1099 complete_directive(
1100 FILE *out,
1101 struct include_file *incf,
1102 struct pp_directive *dir,
1103 struct token **toklist,
1104 int *has_data) {
1105
1106 struct macro *mp;
1107 struct expr *ex;
1108 struct token *t;
1109 struct token *last = NULL;
1110 struct token *ltoklist;
1111 int evaluates_true = 0;
1112 int recording_guard = 0;
1113
1114 if (incf != NULL) {
1115 dir = incf->start_dir;
1116 ltoklist = incf->toklist;
1117 toklist = <oklist;
1118 }
1119
1120 if (g_ignore_text) {
1121 if (dir->code == CMD_ERROR
1122 || dir->code == CMD_WARNING
1123 || dir->code == CMD_LINE
1124 || dir->code == CMD_DEFINE
1125 || dir->code == CMD_INCLUDE
1126 || dir->code == CMD_INCLUDE_NEXT) {
1127 return 0;
1128 }
1129 }
1130
1131 if (dir->code == CMD_ERROR || dir->code == CMD_WARNING) {
1132 char *p = toklist_to_string(*toklist);
1133
1134 if (dir->code == CMD_WARNING) {
1135 lexwarning("#warning: %s", p);
1136 } else {
1137 lexerror("#error: %s", p);
1138 }
1139 free(p);
1140 *toklist = NULL;
1141 return 0;
1142 } else if (dir->code == CMD_LINE) {
1143 struct token *t = *toklist;
1144 struct token *t2;
1145
1146 if ((t = skip_ws(t)) == NULL) {
1147 lexerror("Empty #line directive");
1148 return -1;
1149 }
1150 if (t->type != TY_INT) {
1151 lexerror("Invalid #line directive x");
1152 return -1;
1153 }
1154 if ((t2 = skip_ws(t->next)) != NULL) {
1155 if (t2->type != TOK_STRING_LITERAL) {
1156 lexerror("Invalid #line directive");
1157 return -1;
1158 }
1159 curfile = t2->data+1;
1160 (void) strtok(curfile, "\"");
1161 }
1162 lineno = *(int *)t->data;
1163 set_compiler_line(out, lineno, curfile);
1164 *toklist = NULL;
1165 return 0;
1166 } else if (dir->code == CMD_DEFINE) {
1167 mp = dir->data;
1168 mp->toklist = *toklist;
1169 *toklist = NULL;
1170 if (put_macro(mp, dir->len, dir->key) == NULL) {
1171 return -1;
1172 }
1173 return 0;
1174 } else if (dir->code == CMD_INCLUDE
1175 || dir->code == CMD_INCLUDE_NEXT) {
1176 if (!g_ignore_text) {
1177 int rc;
1178
1179 rc = do_include(out, NULL, *toklist, dir->code);
1180 *toklist = NULL;
1181 return rc;
1182 } else {
1183 return 0;
1184 }
1185 }
1186
1187
1188 /*
1189 * At this point, we are only dealing with conditional directives
1190 * anymore, i.e. #if/#elif/#ifndef/#endif etc. Those need to be
1191 * stored, so a copy must be made
1192 */
1193 if (dir->code != CMD_ENDIF && incf == NULL) {
1194 dir = n_xmemdup(dir, sizeof *dir);
1195 }
1196
1197 if (incf == NULL) {
1198 /*
1199 * A newline has already been read - to ensure a correct __LINE__,
1200 * that must be undone
1201 */
1202 --lineno;
1203 }
1204
1205 if (dir->code == CMD_ELIF || dir->code == CMD_ELSE) {
1206 /* These can only continue an existent directive chain! */
1207 if (cond_dir_cur_start == NULL) {
1208 lexerror("Use of #%s directive without preceding "
1209 "#if/#ifdef/#ifndef", dir->name);
1210 ++lineno;
1211 return -1;
1212 } else if (cond_dir_list_tail->code == CMD_ELSE
1213 && dir->code == CMD_ELIF) {
1214 lexerror("#else followed by #elif directive");
1215 ++lineno;
1216 return -1;
1217 }
1218 } else if (dir->code == CMD_IF
1219 || dir->code == CMD_IFDEF
1220 || dir->code == CMD_IFNDEF) {
1221 /* These can only introduce a new directive chain! */
1222 if (incf == NULL) {
1223 cond_dir_cur_start = dir;
1224 }
1225 }
1226
1227 switch (dir->code) {
1228 case CMD_IFDEF:
1229 case CMD_IFNDEF:
1230 case CMD_IF:
1231 case CMD_ELIF:
1232 if (incf == NULL
1233 && current_include != NULL
1234 && !current_include->has_guard) {
1235 /* This may be the guard we are looking for */
1236 if (!*has_data) {
1237 current_include->fully_guarded = 1;
1238 }
1239 recording_guard = 1;
1240 current_include->startp = dir;
1241 current_include->start_dir = dir;
1242 current_include->has_guard = 1;
1243 if (toklist != NULL) {
1244 current_include->toklist = *toklist;
1245 }
1246 current_include->start_guard = cur_directive_start;
1247 }
1248 if (dir->code == CMD_IFDEF || dir->code == CMD_IFNDEF) {
1249 if ((mp = lookup_macro(dir->data, dir->len, dir->key)) != NULL) {
1250 /* Macro exists! */
1251 if (dir->code == CMD_IFDEF) {
1252 evaluates_true = 1;
1253 } else {
1254 evaluates_true = 0;
1255 }
1256 } else {
1257 if (dir->code == CMD_IFDEF) {
1258 evaluates_true = 0;
1259 } else {
1260 evaluates_true = 1;
1261 }
1262 }
1263 if (incf == NULL && !recording_guard) {
1264 free(dir->data);
1265 }
1266 } else if (!g_ignore_text || cond_dir_cur_start->significant) {
1267 struct token *toklist_tail = NULL;
1268
1269 /*
1270 * This is an #if or #elif - macro substituion was
1271 * disabled while reading tokens - process them
1272 * now!
1273 */
1274 *toklist = do_macro_subst(NULL, NULL, *toklist,
1275 &toklist_tail, 1);
1276
1277 /*
1278 * #if/#elif <constant expression>
1279 *
1280 * First cut all whitespace, then append a newline as
1281 * terminator for parse_expr()
1282 */
1283 for (t = *toklist; t != NULL;) {
1284 struct token *next = t->next;
1285
1286 if (t->type == TOK_WS) {
1287 if (t->prev) {
1288 t->prev->next = t->next;
1289 if (t->next) {
1290 t->next->prev = t->prev;
1291 }
1292 } else {
1293 *toklist = t->next;
1294 t->next->prev = NULL;
1295 }
1296 free(t);
1297 } else {
1298 last = t;
1299 }
1300 t = next;
1301 }
1302 if (last == NULL) {
1303 lexerror("Empty #%s directive", dir->name);
1304 ++lineno;
1305 return -1;
1306 } else {
1307 static struct token terminator;
1308
1309 terminator.type = TOK_NEWLINE;
1310 last->next = &terminator;
1311 }
1312
1313 ex = parse_expr(toklist, TOK_NEWLINE, 0, EXPR_CONST, 1);
1314 if (ex == NULL) {
1315 ++lineno;
1316 return -1;
1317 } else if (ex->const_value == NULL) {
1318 puts("BUG: const_value = NULL?????");
1319 abort();
1320 }
1321
1322 evaluates_true = value_is_nonzero(ex->const_value);
1323 if (dir->code == CMD_ELIF) {
1324 if (cond_dir_cur_start->determined) {
1325 /* Branch to take already determined */
1326 evaluates_true = 0;
1327 }
1328 }
1329 }
1330 break;
1331 case CMD_ELSE:
1332 if (!cond_dir_cur_start->determined) {
1333 /* No branch determined yet, so #else wins */
1334 evaluates_true = 1;
1335 } else {
1336 evaluates_true = 0;
1337 }
1338 break;
1339 case CMD_ENDIF:
1340 break;
1341 default:
1342 abort();
1343 }
1344
1345 if (incf != NULL) {
1346 return evaluates_true;
1347 } else if (dir->code != CMD_ENDIF) {
1348 /*
1349 * The result of the evaluation only matters if text
1350 * is not already being ignored
1351 */
1352 if (!g_ignore_text) {
1353 if (!evaluates_true) {
1354 g_ignore_text = 1;
1355 dir->significant = 1;
1356 } else {
1357 cond_dir_cur_start->determined = 1;
1358 }
1359 } else {
1360 if (dir->code == CMD_ELSE) {
1361 if (cond_dir_list_tail->significant
1362 && evaluates_true) {
1363 /*
1364 * Previous directive ends - text is
1365 * no longer ignored
1366 */
1367 g_ignore_text = 0;
1368 cond_dir_cur_start->determined = 1;
1369 }
1370 } else if (dir->code == CMD_ELIF) {
1371 if (cond_dir_list_tail->significant
1372 && evaluates_true) {
1373 g_ignore_text = 0;
1374 cond_dir_cur_start->determined = 1;
1375 } else if (cond_dir_list_tail->significant) {
1376 dir->significant = 1;
1377 }
1378 }
1379 }
1380
1381 if (cond_dir_list == NULL) {
1382 cond_dir_list = cond_dir_list_tail = dir;
1383 } else {
1384 cond_dir_list_tail->next = dir;
1385 dir->prev = cond_dir_list_tail;
1386 cond_dir_list_tail = dir;
1387 }
1388 } else {
1389 /* Current chain is done! */
1390 struct pp_directive *ppd;
1391 struct pp_directive *tmp;
1392 int code;
1393
1394 for (ppd = cond_dir_list_tail; ppd != NULL;) {
1395 if (ppd->significant) {
1396 /*
1397 * An ignored (by ppd's controlling expression)
1398 * text passage ends here
1399 */
1400 g_ignore_text = 0;
1401 }
1402 tmp = ppd;
1403 code = ppd->code;
1404 ppd = ppd->prev;
1405 if (code == CMD_IF
1406 || code == CMD_IFDEF
1407 || code == CMD_IFNDEF) {
1408 struct pp_directive *startp = tmp;
1409
1410 /*
1411 * Start of chain reached, we are done. Now we
1412 * have to return to dealing with the previous,
1413 * outer chain (if any!)
1414 */
1415 if ((cond_dir_list_tail = ppd) == NULL) {
1416 /* No outer one */
1417 for (ppd = cond_dir_list /*->next*/; ppd;) {
1418 tmp = ppd;
1419 ppd = ppd->next;
1420 if (current_include != NULL
1421 && current_include->startp
1422 == tmp) {
1423 continue;
1424 }
1425 free(tmp);
1426 }
1427 cond_dir_list = NULL;
1428 } else {
1429 /* Yes, outer */
1430 for (tmp = ppd;
1431 tmp != NULL;
1432 tmp = tmp->prev) {
1433 code = tmp->code;
1434 if (code == CMD_IF
1435 || code == CMD_IFDEF
1436 || code == CMD_IFNDEF) {
1437 cond_dir_cur_start =
1438 tmp;
1439 break;
1440 }
1441 }
1442 for (ppd = cond_dir_list_tail->next;
1443 ppd != NULL;) {
1444 tmp = ppd;
1445 ppd = ppd->next;
1446 if (current_include != NULL
1447 && current_include->
1448 startp == tmp) {
1449 continue;
1450 }
1451 free(tmp);
1452 }
1453 cond_dir_list_tail->next = NULL;
1454 }
1455 if (current_include != NULL
1456 && current_include->has_guard
1457 && current_include->end_guard == 0
1458 && startp ==
1459 current_include->startp) {
1460 /* Record end of inc guard */
1461 current_include->end_guard =
1462 cur_directive_start;
1463 current_include->end_dir = dir;
1464 *has_data = 0;
1465 }
1466 break;
1467 }
1468 }
1469 }
1470
1471 if (toklist) *toklist = NULL;
1472 ++lineno;
1473 return 0;
1474 }
1475
1476 extern int collect_parens;
1477
1478 int
1479 preprocess(struct input_file *inf, FILE *out) {
1480 int ch;
1481 int tmpi;
1482 int compound = 0;
1483 int array = 0;
1484 int parentheses = 0;
1485 int prevch = 0;
1486 int first_byte = 1;
1487 int *dummyptr = n_xmalloc(sizeof *dummyptr);
1488 int err;
1489 struct token *toklist = NULL;
1490 struct token *toklist_tail = NULL;
1491 struct token *t;
1492 struct pp_directive dir;
1493 struct macro *mp = NULL;
1494 static struct macro nullm;
1495 char *p;
1496 char *tmpc;
1497 int doing_funclike = 0;
1498 int substitute_macros = 1;
1499 int doing_pre = 0;
1500 int has_data = 0;
1501 int maybe_funclike = 0;
1502
1503 if (/*options.showline*/ inf->fd) {
1504 int fd = fileno(inf->fd);
1505 struct stat s;
1506 if (fstat(fd, &s) == -1) {
1507 perror("fstat");
1508 exit(EXIT_FAILURE);
1509 }
1510 lex_file_map = mmap(0, s.st_size, PROT_READ, MAP_SHARED, fd, 0);
1511 if (lex_file_map == MAP_FAILED) {
1512 /* 05/21/09: XXX This seems to error for empty stdin?!!? */
1513 perror("mmap");
1514 exit(EXIT_FAILURE);
1515 }
1516 lex_file_map_end = lex_file_map + s.st_size;
1517 lex_line_ptr = lex_file_map;
1518 } else {
1519 lex_file_map = inf->filemap;
1520 lex_file_map_end = inf->filemapend;
1521 lex_line_ptr = lex_file_map;
1522 }
1523
1524 /* Initialize error message module */
1525 err_setfile(curfile = inf->path);
1526 token_setfile(curfile);
1527
1528 if (!inf->is_header && !inf->is_cmdline) {
1529 /* Processing new .c file */
1530 lineno = 1;
1531 set_compiler_line(out, lineno, curfile);
1532 err_setline(&lineno);
1533
1534 /*
1535 * Set predefined macros
1536 */
1537 mp = n_xmalloc(sizeof *mp);
1538 *mp = nullm;
1539 mp->name = n_xstrdup("__LINE__");
1540 mp->builtin = &lineno;
1541 (void) put_macro(mp, 0, -1);
1542 mp = n_xmalloc(sizeof *mp);
1543 *mp = nullm;
1544 mp->name = n_xstrdup("__FILE__");
1545 mp->builtin = n_xmalloc(strlen(curfile) + 3);
1546 sprintf(mp->builtin, "\"%s\"", curfile);
1547 (void) put_macro(mp, 0, -1);
1548 }
1549 errors = 0;
1550 warnings = 0;
1551
1552 /*
1553 * Initialize the digit limits of integral constants so
1554 * get_num_literal() can warn about overflow.
1555 */
1556 init_max_digits();
1557
1558 while ((ch = FGETC(inf)) != EOF) {
1559 lex_tok_ptr = lex_file_map + lex_chars_read;
1560 if (!isspace(ch) && ch != '#') {
1561 pre_directive = 0;
1562 /*
1563 * If this is a comment, has_data must remain zero -
1564 * check below
1565 */
1566 if (ch != '/') {
1567 has_data = 1;
1568 }
1569 }
1570 if (g_ignore_text
1571 && !doing_pre
1572 && ch != '\n'
1573 && ch != '#'
1574 && ch != '/'
1575 && ch != '\''
1576 && ch != '"') {
1577 /*
1578 * This is a blunt way of avoiding processing
1579 * data in an ignored (e.g. by ``#if 0'')
1580 * text passage. Comments, newlines, preprocessor
1581 * directives and string/character constants must
1582 * still be processed though. I hope I didn't
1583 * miss anything here ...
1584 */
1585 continue;
1586 }
1587
1588 if (maybe_funclike && !isspace(ch) && ch != '/' && ch != '(') {
1589 /*
1590 * Function-like macro identifier not followed by
1591 * opening parentheses!
1592 * (if this is a ``/'' it may be a comment - check
1593 * below.)
1594 */
1595
1596 output_token_list(out, toklist);
1597 free_token_list(toklist);
1598 toklist = NULL;
1599 maybe_funclike = 0;
1600 g_recording_tokens = 0;
1601 }
1602
1603 switch (ch) {
1604 case '#':
1605 if (pre_directive || first_byte) {
1606 int done = 0;
1607
1608 /* Preprocessor directive */
1609 pre_directive = 0;
1610 /* XXX predef isn't an input_file */
1611 if (do_directive(inf, &dir) != 0) {
1612 pre_directive = 1;
1613 break;
1614 }
1615
1616 /*
1617 * Check whether subsequent tokens on same
1618 * line (including \-continued ones!) need
1619 * to be recorded for expression evaluation
1620 * or macro definitions
1621 */
1622 switch (dir.code) {
1623 case CMD_PASSTHRU:
1624 fprintf(out, "%s", (char *)dir.data);
1625 break;
1626 case CMD_UNDEF:
1627 case CMD_ENDIF:
1628 case CMD_IFDEF:
1629 case CMD_IFNDEF:
1630 case CMD_IDENT:
1631 case CMD_PRAGMA:
1632 g_recording_tokens = 0;
1633 doing_pre = 0;
1634 if (dir.code != CMD_UNDEF
1635 && dir.code != CMD_IDENT
1636 && dir.code != CMD_PRAGMA) {
1637 complete_directive(out, NULL,
1638 &dir, NULL, &has_data);
1639 }
1640 toklist = NULL;
1641 done = 1;
1642 break;
1643 case CMD_DEFINE:
1644 mp = dir.data;
1645 if (mp->empty) {
1646 /* No tokens needed */
1647 done = 1;
1648 break;
1649 }
1650 doing_pre = 1;
1651 g_recording_tokens = 1;
1652 substitute_macros = 0;
1653 break;
1654 case CMD_INCLUDE:
1655 case CMD_INCLUDE_NEXT:
1656 if (dir.data != NULL) {
1657 if (!g_ignore_text) {
1658 (void) do_include(
1659 out, dir.data, NULL,
1660 dir.code);
1661 }
1662 doing_pre = 0;
1663 substitute_macros = 1;
1664 g_recording_tokens = 0;
1665 done = 1;
1666 } else {
1667 g_recording_tokens = 1;
1668 substitute_macros = 0;
1669 doing_pre = 1;
1670 }
1671 break;
1672 case CMD_LINE:
1673 case CMD_IF:
1674 case CMD_ELIF:
1675 if (dir.code == CMD_LINE) {
1676 substitute_macros = 1;
1677 } else {
1678 /*
1679 * Maco replacement is done
1680 * later
1681 */
1682 substitute_macros = 0;
1683 }
1684 g_recording_tokens = 1;
1685 doing_pre = 1;
1686 break;
1687 case CMD_ERROR:
1688 case CMD_WARNING:
1689 /* Macro replacement is never done */
1690 substitute_macros = 0;
1691 g_recording_tokens = 1;
1692 doing_pre = 1;
1693 break;
1694 default:
1695 if (dir.code != CMD_ERROR
1696 && dir.code != CMD_WARNING) {
1697 substitute_macros = 0;
1698 }
1699 if (dir.data != NULL) {
1700 done = 1;
1701 g_recording_tokens = 0;
1702 } else {
1703 doing_pre = 1;
1704 g_recording_tokens = 1;
1705 }
1706 break;
1707 }
1708 if (done) {
1709 /*UNGETC('\n', in);*/
1710 pre_directive = 1;
1711 }
1712 } else {
1713 if (!g_recording_tokens) {
1714 x_fputc('#', out);
1715 } else {
1716 if ((ch = FGETC(inf)) == '#') {
1717 store_token(&toklist,
1718 &toklist_tail,
1719 n_xstrdup("##"),
1720 TOK_HASHHASH, lineno,
1721 NULL);
1722 } else {
1723 UNGETC(ch, inf);
1724 store_token(&toklist,
1725 &toklist_tail,
1726 n_xstrdup("#"),
1727 TOK_HASH, lineno,
1728 NULL);
1729 }
1730 }
1731 }
1732 break;
1733 case ' ':
1734 case '\f':
1735 case '\t':
1736 case '\r':
1737 p = g_textbuf;
1738 *p++ = ch;
1739
1740 while (isspace(ch = FGETC(inf)) && ch != '\n') {
1741 *p++ = ch; /* XXX */
1742 }
1743 *p = 0;
1744 UNGETC(ch, inf);
1745 if (!g_recording_tokens) {
1746 if (pre_directive) {
1747 x_fprintf(out, g_textbuf);
1748 } else {
1749 x_fputc(' ', out);
1750 }
1751 } else {
1752 /*
1753 * 05/24/09: Do not store leading
1754 * whitespace tokens for macro
1755 * bodies!
1756 *
1757 * #define foo() bar
1758 *
1759 * will always expand to just "bar"
1760 */
1761 if (mp != NULL
1762 && doing_pre
1763 && dir.code == CMD_DEFINE
1764 && toklist == NULL) {
1765 /* Is macro body whitespace */
1766 ;
1767 } else {
1768 store_token(&toklist,
1769 &toklist_tail,
1770 n_xstrdup(" "),
1771 TOK_WS, lineno, NULL);
1772 }
1773 }
1774 break;
1775 case '\n':
1776 if (g_recording_tokens && doing_pre) {
1777 g_recording_tokens = 0;
1778 substitute_macros = 1;
1779 doing_pre = 0;
1780 /*
1781 * 05/24/09: Do not store trailing whitespace
1782 * tokens!
1783 */
1784 if (dir.code == CMD_DEFINE
1785 && toklist != NULL
1786 && toklist_tail->type == TOK_WS) {
1787 if (toklist == toklist_tail) {
1788 /* Only token is whitespace */
1789 toklist = toklist_tail = NULL;
1790 } else {
1791 toklist_tail->prev->next = NULL;
1792 toklist_tail = toklist_tail->prev;
1793 }
1794 }
1795 complete_directive(out, NULL, &dir, &toklist,
1796 &has_data);
1797 } else {
1798 mp = NULL; /* XXX hm?!? */
1799 }
1800
1801 pre_directive = 1;
1802 x_fputc('\n', out);
1803 break;
1804 case '/':
1805 if ((ch = FGETC(inf)) == '*') {
1806 int sline = lineno;
1807 char *sfile = curfile;
1808
1809 /* C style comment */
1810 for (;;) {
1811 ch = FGETC(inf);
1812 if (ch == EOF) {
1813 err_setfile(sfile);
1814 lineno = sline;
1815 lexerror("Unterminated comment "
1816 "(started in line %d,"
1817 " file %s)",
1818 sline, sfile);
1819 return -1;
1820 #if 0
1821 /* TODO: warn about nested comments */
1822 } else if (ch == '/') {
1823 #endif
1824 } else if (ch == '\n') {
1825 x_fputc('\n', out);
1826 ++lineno;
1827 } else if (ch == '*') {
1828 if ((ch = FGETC(inf)) == '/') {
1829 /* comment complete */
1830 break;
1831 } else {
1832 UNGETC(ch, inf);
1833 }
1834 }
1835 }
1836 UNGETC(' ', inf);
1837 } else if (ch == '/' /* && standard != C89 */) {
1838 /* C99/C++ style comment */
1839 while ((ch = FGETC(inf)) != '\n' && ch != EOF)
1840 ;
1841 UNGETC(' ', inf);
1842 if (ch != EOF) {
1843 UNGETC('\n', inf);
1844 }
1845 } else {
1846 has_data = 1;
1847 /* Not a comment */
1848 if (maybe_funclike) {
1849 maybe_funclike = 0;
1850 g_recording_tokens = 0;
1851 output_token_list(out, toklist);
1852 free_token_list(toklist);
1853 toklist = NULL;
1854 }
1855 UNGETC(ch, inf);
1856 ch = '/';
1857 goto do_operator;
1858 }
1859 break;
1860 case '\'':
1861 err = 0;
1862 tmpi = get_char_literal(inf, &err, &tmpc);
1863 if (!err) {
1864 /*
1865 * Character literals are really treated
1866 * like integer constants
1867 */
1868 int *tmpip = malloc(sizeof(int));
1869 if (tmpip == NULL) {
1870 perror("malloc");
1871 exit(EXIT_FAILURE);
1872 }
1873 *tmpip = tmpi;
1874 if (g_recording_tokens) {
1875 char *tmpc2;
1876
1877 tmpc2 = n_xmalloc(strlen(tmpc)+3);
1878 sprintf(tmpc2, "'%s'", tmpc);
1879 store_token(&toklist,
1880 &toklist_tail,
1881 tmpip,
1882 TY_INT, lineno, tmpc2);
1883 /* XXX .. */
1884 /* t->ascii = tmpc2;*/
1885 } else {
1886 x_fprintf(out, "'%s'", tmpc);
1887 }
1888 }
1889 break;
1890 case '"': {
1891 struct ty_string *ts;
1892
1893 ts = get_string_literal(inf);
1894 if (ts != NULL) {
1895 if (g_recording_tokens) {
1896 store_token(&toklist,
1897 &toklist_tail,
1898 ts->str,
1899 TOK_STRING_LITERAL, lineno,
1900 ts->str);
1901 } else {
1902 x_fprintf(out, "%s", ts->str);
1903 }
1904 }
1905 break;
1906 }
1907 case '(':
1908 case ')':
1909 if (ch == '(') {
1910 if (maybe_funclike) {
1911 /* Is function-like macro! */
1912 doing_funclike = 1;
1913 maybe_funclike = 0;
1914 parentheses = 1;
1915 substitute_macros = 0;
1916 g_recording_tokens = 1;
1917
1918 /*
1919 * There may only be whitespace tokens
1920 * between the identifier and the
1921 * parentheses - cut those!
1922 */
1923 if (toklist->next) {
1924 free_token_list(toklist->next);
1925 toklist->next = NULL;
1926 toklist_tail = toklist;
1927 }
1928
1929 /*
1930 * fall through so that ( is appended
1931 * below
1932 */
1933 } else {
1934
1935 /*
1936 * Don't increment/decrement paren count
1937 * for things like
1938 * #define foo (lol
1939 */
1940 if (!doing_pre) ++parentheses;
1941 }
1942 } else {
1943 if (!doing_pre) --parentheses;
1944 }
1945 if (g_recording_tokens) {
1946 store_token(&toklist, &toklist_tail, dummyptr,
1947 ch == '(' ? TOK_PAREN_OPEN :
1948 TOK_PAREN_CLOSE, lineno, NULL);
1949 } else {
1950 x_fputc(ch, out);
1951 }
1952 if (doing_funclike && parentheses == 0) {
1953 /*
1954 * XXX take g_recording_tokens into
1955 * account :-(
1956 */
1957 toklist = do_macro_subst(inf, out, toklist,
1958 &toklist_tail, 0);
1959 if (toklist == NULL) {
1960 /* done */
1961 doing_funclike = 0;
1962 g_recording_tokens = 0;
1963 substitute_macros = 1;
1964 } else {
1965 parentheses = collect_parens;
1966 }
1967 }
1968
1969 break;
1970 case '{':
1971 case '}':
1972 if (ch == '{') {
1973 ++compound;
1974 } else {
1975 if (compound == 0) {
1976 lexerror("No matching opening brace.");
1977 }
1978 --compound;
1979 }
1980 if (g_recording_tokens) {
1981 store_token(&toklist, &toklist_tail, dummyptr,
1982 ch == '{'? TOK_COMP_OPEN: TOK_COMP_CLOSE,
1983 lineno, NULL);
1984 } else {
1985 x_fputc(ch, out);
1986 }
1987 break;
1988 case '[':
1989 case ']':
1990 if (ch == '[') {
1991 ++array;
1992 } else {
1993 if (array == 0) {
1994 lexerror("Not a valid subscript.");
1995 ++array;
1996 }
1997 --array;
1998 }
1999 if (g_recording_tokens) {
2000 store_token(&toklist, &toklist_tail, dummyptr,
2001 ch == '[' ? TOK_ARRAY_OPEN : TOK_ARRAY_CLOSE,
2002 lineno, NULL);
2003 } else {
2004 x_fputc(ch, out);
2005 }
2006 break;
2007 case ';':
2008 if (g_recording_tokens) {
2009 store_token(&toklist, &toklist_tail ,dummyptr,
2010 TOK_SEMICOLON, lineno, NULL);
2011 } else {
2012 x_fputc(';', out);
2013 }
2014 break;
2015 case '.':
2016 /*
2017 * This might be either a structure / union
2018 * indirection operator or a floating point
2019 * value like .5 (equivalent to 0.5). If the
2020 * latter is the case, call get_num_literal(),
2021 * else fall through
2022 */
2023 if ((tmpi = FGETC(inf)) == EOF) {
2024 lexerror("Unexpected end of file.");
2025 return 1;
2026 }
2027 UNGETC(tmpi, inf);
2028 if (isdigit((unsigned char)tmpi)) {
2029 struct num *n = get_num_literal(ch, inf);
2030
2031 if (n != NULL) {
2032 if (g_recording_tokens) {
2033 store_token(&toklist,
2034 &toklist_tail,
2035 n->value,
2036 n->type, lineno, n->ascii);
2037 } else {
2038 }
2039 }
2040 break;
2041 }
2042 /* FALLTHRU */
2043 default:
2044 if (ch == '?') {
2045 int trig;
2046 /* Might be trigraph */
2047 if ((trig = get_trigraph(inf)) == -1) {
2048 /*
2049 * Not a trigraph - LOOKUP_OP()
2050 * will catch the ``?''
2051 */
2052 ;
2053 } else if (trig == 0) {
2054 /*
2055 * The source file contained a ``??''
2056 * that isn't isn't part of a trigraph -
2057 * this is a syntax error, since it
2058 * cannot be the conditional operator
2059 */
2060 lexerror("Syntax error at ``?\?''");
2061 } else {
2062 /* Valid trigraph! */
2063 UNGETC(trig, inf);
2064 break;
2065 }
2066 }
2067
2068 do_operator:
2069 if (LOOKUP_OP(ch)) {
2070 int *ptri = malloc(sizeof(int));
2071 /* struct operator *opp;*/
2072 char *opname;
2073
2074 if (ptri == NULL) {
2075 perror("malloc");
2076 exit(EXIT_FAILURE);
2077 }
2078 tmpi = get_operator(ch, inf, &opname /*&opp*/);
2079 if (tmpi == -1) {
2080 lexerror("INVALID OPERATOR!!!");
2081 break;
2082 }
2083
2084 if (g_recording_tokens) {
2085 *ptri = tmpi;
2086 store_token(&toklist,
2087 &toklist_tail,
2088 ptri,
2089 TOK_OPERATOR, lineno, NULL);
2090 } else {
2091 x_fprintf(out, "%s",
2092 opname /*opp->name*/);
2093 }
2094 } else if (isdigit((unsigned char)ch)) {
2095 struct num *n = get_num_literal(ch, inf);
2096
2097 if (n != NULL) {
2098 if (g_recording_tokens) {
2099 store_token(&toklist,
2100 &toklist_tail,
2101 n->value,
2102 n->type, lineno, n->ascii);
2103 } else {
2104 x_fprintf(out, "%s", n->ascii);
2105 }
2106 } else {
2107 lexerror("Couldn't read numeric literal");
2108 }
2109 } else if (isalpha((unsigned char)ch) || ch == '_') {
2110 struct macro *mp;
2111 struct macro_arg *ma = NULL;
2112 int slen;
2113 int hash_key;
2114
2115 if (ch == 'L') {
2116 int tmpch;
2117
2118 tmpch = FGETC(inf);
2119 if (tmpch != EOF) {
2120 UNGETC(tmpch, inf);
2121 if (tmpch == '\'' || tmpch == '"') {
2122 /*
2123 * Long constant - treat like
2124 * ordinary one
2125 */
2126 break;
2127 }
2128 }
2129 }
2130 tmpc = get_identifier(ch, inf, &slen, &hash_key);
2131
2132 if (tmpc == NULL) {
2133 break;
2134 }
2135
2136 if (g_recording_tokens
2137 && dir.code == CMD_DEFINE
2138 && ((struct macro *)dir.data)->
2139 functionlike) {
2140 /*
2141 * Macro definition tokens are only
2142 * subject to macro substitution when
2143 * the macro is instantiated, thus;
2144 * #define x lol
2145 * #define foo() x
2146 * #undef x
2147 * foo()
2148 * ... must yield x rather than lol
2149 */
2150 mp = dir.data;
2151 for (ma = mp->arglist;
2152 ma != NULL;
2153 ma = ma->next) {
2154 if (strcmp(ma->name, tmpc)
2155 == 0) {
2156 break;
2157 }
2158 }
2159 } else if (substitute_macros
2160 && (mp = lookup_macro(tmpc,
2161 slen, hash_key)) != NULL
2162 /* && !mp->dontexpand maybe?!?!? */ ) {
2163 toklist = NULL;
2164 store_token(&toklist,
2165 &toklist_tail,
2166 tmpc,
2167 TOK_IDENTIFIER, lineno, NULL);
2168
2169 if (mp->functionlike) {
2170 /*
2171 * We have to wait until a (
2172 * comes along
2173 */
2174 maybe_funclike = 1;
2175 g_recording_tokens = 1;
2176 } else {
2177 toklist = do_macro_subst(inf, out,
2178 toklist, &toklist_tail,
2179 0);
2180 if (toklist != NULL) {
2181 doing_funclike = 1;
2182 parentheses =
2183 collect_parens;
2184 }
2185 }
2186 if (doing_funclike) {
2187
2188 /*
2189 * Don't process nested macros
2190 * just yet
2191 */
2192 substitute_macros = 0;
2193 g_recording_tokens = 1;
2194 }
2195 break;
2196 }
2197
2198 if (g_recording_tokens) {
2199 t = store_token(&toklist,
2200 &toklist_tail,
2201 tmpc,
2202 TOK_IDENTIFIER, lineno, NULL);
2203 if (ma != NULL) {
2204 t->maps_to_arg = ma;
2205 }
2206 if (t->type == TOK_IDENTIFIER) {
2207 /*
2208 * Store length and hash
2209 * key
2210 */
2211 t->slen = slen;
2212 t->hashkey = hash_key;
2213 }
2214 } else {
2215 x_fprintf(out, "%s", tmpc);
2216 }
2217 } else {
2218 printf("LOOKUP_OP(%d) = %d\n",
2219 ch, LOOKUP_OP(ch));
2220 lexerror("Unknown token - %c (code %d)\n", ch, ch);
2221 }
2222 }
2223 first_byte = 0;
2224 prevch = ch;
2225
2226 /*
2227 * Check whether the file ends here in order to make last lines
2228 * without newline character work
2229 */
2230 if ((ch = FGETC(inf)) == EOF) {
2231 if (prevch != '\n') {
2232 UNGETC(ch, inf);
2233 }
2234 } else {
2235 UNGETC(ch, inf);
2236 }
2237 }
2238 if (has_data) {
2239 if (current_include != NULL) {
2240 current_include->fully_guarded = 0;
2241 }
2242 }
2243 #if 0
2244 puts("nope, not fully guarded");
2245 } else {
2246 puts("hahha lol");
2247 }
2248 } else if (current_include && current_include->fully_guarded) {
2249 printf("%s IS FULLY GUARDED!!!!!!!!!!!!\n",
2250 current_include->name);
2251 }
2252 #endif
2253 #if 0
2254 store_token(&toklist, NULL, 0, lineno);
2255 #endif
2256 return errors;
2257 }
2258
2259
2260 #ifdef DEBUG
2261 static void
2262 print_token_list(struct token *list) {
2263 (void)list;
2264 puts("-------------------------------------------------------------");
2265 for (; list /*->data*/ != NULL; list = list->next) {
2266 if (list->type == TOK_OPERATOR) {
2267 int i;
2268 for (i = 0; operators[i].name != NULL; ++i) {
2269 if (*(int *)list->data == operators[i].value
2270 || *(int *)list->data
2271 == operators[i].is_ambig) {
2272 printf("%s", operators[i].name);
2273 break;
2274 }
2275 }
2276 if (operators[i].name == NULL) {
2277 (void) fprintf(stderr, "FATAL -- Unknown "
2278 "operator %d\n",
2279 *(int *)list->data);
2280 }
2281 } else if (IS_CONSTANT(list->type)) {
2282 rv_setrc_print(list->data, list->type, 0);
2283 } else if (IS_KEYWORD(list->type)) {
2284 printf(" %s ", (char *)list->data);
2285 } else if (list->type == TOK_IDENTIFIER) {
2286 printf(" %s ", (char *)list->data);
2287 } else if (list->type == TOK_STRING_LITERAL) {
2288 struct ty_string *ts = list->data;
2289 printf("\"%s\"", ts->str);
2290 } else if (list->type == TOK_PAREN_OPEN) {
2291 printf("(");
2292 } else if (list->type == TOK_PAREN_CLOSE){
2293 printf(")");
2294 } else if (list->type == TOK_ARRAY_OPEN) {
2295 printf("[");
2296 } else if (list->type == TOK_ARRAY_CLOSE) {
2297 printf("]");
2298 } else if (list->type == TOK_COMP_OPEN) {
2299 printf("{\n");
2300 } else if (list->type == TOK_COMP_CLOSE) {
2301 printf("}\n");
2302 } else if (list->type == TOK_SEMICOLON) {
2303 printf(";\n");
2304 } else {
2305 printf("Unknown - code %d\n", list->type);
2306 }
2307 }
2308 puts("-------------------------------------------------------------");
2309 }
2310 #endif
2311
2312