1 /*
2 * C and T preprocessor, and integrated lexer
3 * (c) Thomas Pornin 1999 - 2002
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. The name of the authors may not be used to endorse or promote
14 * products derived from this software without specific prior written
15 * permission.
16 *
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
23 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 */
30
31 #define VERS_MAJ 1
32 #define VERS_MIN 3
33 /* uncomment the following if you cannot set it with a compiler flag */
34 /* #define STAND_ALONE */
35
36 #include "tune.h"
37 #include <stdio.h>
38 #include <string.h>
39 #include <stdarg.h>
40 #include <setjmp.h>
41 #include <stddef.h>
42 #include <limits.h>
43 #include <time.h>
44 #include "ucppi.h"
45 #include "mem.h"
46 #include "nhash.h"
47 #ifdef UCPP_MMAP
48 #include <unistd.h>
49 #include <sys/types.h>
50 #include <sys/mman.h>
51 #include <fcntl.h>
52 #endif
53
54 /*
55 * The standard path where includes are looked for.
56 */
57 #ifdef STAND_ALONE
58 static char *include_path_std[] = { STD_INCLUDE_PATH, 0 };
59 #endif
60 static char **include_path;
61 static size_t include_path_nb = 0;
62
63 int no_special_macros = 0;
64 int emit_dependencies = 0, emit_defines = 0, emit_assertions = 0;
65 FILE *emit_output;
66
67 #ifdef STAND_ALONE
68 static char *system_macros_def[] = { "/usr/include", 0 };
69 static char *system_assertions_def[] = { "", 0 };
70 #endif
71
72 char *current_filename = 0, *current_long_filename = 0;
73 static int current_incdir = -1;
74
75 #ifndef NO_UCPP_ERROR_FUNCTIONS
76 /*
77 * "ouch" is the name for an internal ucpp error. If AUDIT is not defined,
78 * no code calling this function will be generated; a "ouch" may still be
79 * emitted by getmem() (in mem.c) if MEM_CHECK is defined, but this "ouch"
80 * does not use this function.
81 */
ucpp_ouch(char * fmt,...)82 void ucpp_ouch(char *fmt, ...)
83 {
84 va_list ap;
85
86 va_start(ap, fmt);
87 fprintf(stderr, "%s: ouch, ", current_filename);
88 vfprintf(stderr, fmt, ap);
89 fprintf(stderr, "\n");
90 va_end(ap);
91 die();
92 }
93
94 /*
95 * report an error, with current_filename, line, and printf-like syntax
96 */
ucpp_error(long line,char * fmt,...)97 void ucpp_error(long line, char *fmt, ...)
98 {
99 va_list ap;
100
101 va_start(ap, fmt);
102 if (line > 0)
103 fprintf(stderr, "%s: line %ld: ", current_filename, line);
104 else if (line == 0) fprintf(stderr, "%s: ", current_filename);
105 vfprintf(stderr, fmt, ap);
106 fprintf(stderr, "\n");
107 if (line >= 0) {
108 struct stack_context *sc = report_context();
109 size_t i;
110
111 for (i = 0; sc[i].line >= 0; i ++)
112 fprintf(stderr, "\tincluded from %s:%ld\n",
113 sc[i].long_name ? sc[i].long_name : sc[i].name,
114 sc[i].line);
115 freemem(sc);
116 }
117 va_end(ap);
118 }
119
120 /*
121 * like error(), with the mention "warning"
122 */
ucpp_warning(long line,char * fmt,...)123 void ucpp_warning(long line, char *fmt, ...)
124 {
125 va_list ap;
126
127 va_start(ap, fmt);
128 if (line > 0)
129 fprintf(stderr, "%s: warning: line %ld: ",
130 current_filename, line);
131 else if (line == 0)
132 fprintf(stderr, "%s: warning: ", current_filename);
133 else fprintf(stderr, "warning: ");
134 vfprintf(stderr, fmt, ap);
135 fprintf(stderr, "\n");
136 if (line >= 0) {
137 struct stack_context *sc = report_context();
138 size_t i;
139
140 for (i = 0; sc[i].line >= 0; i ++)
141 fprintf(stderr, "\tincluded from %s:%ld\n",
142 sc[i].long_name ? sc[i].long_name : sc[i].name,
143 sc[i].line);
144 freemem(sc);
145 }
146 va_end(ap);
147 }
148 #endif /* NO_UCPP_ERROR_FUNCTIONS */
149
150 /*
151 * Some memory allocations are manually garbage-collected; essentially,
152 * strings duplicated in the process of macro replacement. Each such
153 * string is referenced in the garbage_fifo, which is cleared when all
154 * nested macros have been resolved.
155 */
156
157 struct garbage_fifo {
158 char **garbage;
159 size_t ngarb, memgarb;
160 };
161
162 /*
163 * throw_away() marks a string to be collected later
164 */
throw_away(struct garbage_fifo * gf,char * n)165 void throw_away(struct garbage_fifo *gf, char *n)
166 {
167 wan(gf->garbage, gf->ngarb, n, gf->memgarb);
168 }
169
170 /*
171 * free marked strings
172 */
garbage_collect(struct garbage_fifo * gf)173 void garbage_collect(struct garbage_fifo *gf)
174 {
175 size_t i;
176
177 for (i = 0; i < gf->ngarb; i ++) freemem(gf->garbage[i]);
178 gf->ngarb = 0;
179 }
180
init_garbage_fifo(struct garbage_fifo * gf)181 static void init_garbage_fifo(struct garbage_fifo *gf)
182 {
183 gf->garbage = getmem((gf->memgarb = GARBAGE_LIST_MEMG)
184 * sizeof(char *));
185 gf->ngarb = 0;
186 }
187
free_garbage_fifo(struct garbage_fifo * gf)188 static void free_garbage_fifo(struct garbage_fifo *gf)
189 {
190 garbage_collect(gf);
191 freemem(gf->garbage);
192 freemem(gf);
193 }
194
195 /*
196 * order is important: it must match the token-constants declared as an
197 * enum in the header file.
198 */
199 char *operators_name[] = {
200 " ", "\n", " ",
201 "0000", "name", "bunch", "pragma", "context",
202 "\"dummy string\"", "'dummy char'",
203 "/", "/=", "-", "--", "-=", "->", "+", "++", "+=", "<", "<=", "<<",
204 "<<=", ">", ">=", ">>", ">>=", "=", "==",
205 #ifdef CAST_OP
206 "=>",
207 #endif
208 "~", "!=", "&", "&&", "&=", "|", "||", "|=", "%", "%=", "*", "*=",
209 "^", "^=", "!",
210 "{", "}", "[", "]", "(", ")", ",", "?", ";",
211 ":", ".", "...", "#", "##", " ", "ouch", "<:", ":>", "<%", "%>",
212 "%:", "%:%:"
213 };
214
215 /* the ascii representation of a token */
216 #ifdef SEMPER_FIDELIS
217 #define tname(x) (ttWHI((x).type) ? " " : S_TOKEN((x).type) \
218 ? (x).name : operators_name[(x).type])
219 #else
220 #define tname(x) (S_TOKEN((x).type) ? (x).name \
221 : operators_name[(x).type])
222 #endif
223
token_name(struct token * t)224 char *token_name(struct token *t)
225 {
226 return tname(*t);
227 }
228
229 /*
230 * To speed up deeply nested and repeated inclusions, we:
231 * -- use a hash table to remember where we found each file
232 * -- remember when the file is protected by a #ifndef/#define/#endif
233 * construction; we can then avoid including several times a file
234 * when this is not necessary.
235 * -- remember in which directory, in the include path, the file was found.
236 */
237 struct found_file {
238 hash_item_header head; /* first field */
239 char *name;
240 char *protect;
241 };
242
243 /*
244 * For files from system include path.
245 */
246 struct found_file_sys {
247 hash_item_header head; /* first field */
248 struct found_file *rff;
249 int incdir;
250 };
251
252 static HTT found_files, found_files_sys;
253 static int found_files_init_done = 0, found_files_sys_init_done = 0;
254
new_found_file(void)255 static struct found_file *new_found_file(void)
256 {
257 struct found_file *ff = getmem(sizeof(struct found_file));
258
259 ff->name = 0;
260 ff->protect = 0;
261 return ff;
262 }
263
del_found_file(void * m)264 static void del_found_file(void *m)
265 {
266 struct found_file *ff = (struct found_file *)m;
267
268 if (ff->name) freemem(ff->name);
269 if (ff->protect) freemem(ff->protect);
270 freemem(ff);
271 }
272
new_found_file_sys(void)273 static struct found_file_sys *new_found_file_sys(void)
274 {
275 struct found_file_sys *ffs = getmem(sizeof(struct found_file_sys));
276
277 ffs->rff = 0;
278 ffs->incdir = -1;
279 return ffs;
280 }
281
del_found_file_sys(void * m)282 static void del_found_file_sys(void *m)
283 {
284 struct found_file_sys *ffs = (struct found_file_sys *)m;
285
286 freemem(ffs);
287 }
288
289 /*
290 * To keep up with the #ifndef/#define/#endif protection mechanism
291 * detection.
292 */
293 struct protect protect_detect;
294 static struct protect *protect_detect_stack = 0;
295
set_init_filename(char * x,int real_file)296 void set_init_filename(char *x, int real_file)
297 {
298 if (current_filename) freemem(current_filename);
299 current_filename = sdup(x);
300 current_long_filename = 0;
301 current_incdir = -1;
302 if (real_file) {
303 protect_detect.macro = 0;
304 protect_detect.state = 1;
305 protect_detect.ff = new_found_file();
306 protect_detect.ff->name = sdup(x);
307 HTT_put(&found_files, protect_detect.ff, x);
308 } else {
309 protect_detect.state = 0;
310 }
311 }
312
init_found_files(void)313 static void init_found_files(void)
314 {
315 if (found_files_init_done) HTT_kill(&found_files);
316 HTT_init(&found_files, del_found_file);
317 found_files_init_done = 1;
318 if (found_files_sys_init_done) HTT_kill(&found_files_sys);
319 HTT_init(&found_files_sys, del_found_file_sys);
320 found_files_sys_init_done = 1;
321 }
322
323 /*
324 * Set the lexer state at the beginning of a file.
325 */
reinit_lexer_state(struct lexer_state * ls,int wb)326 static void reinit_lexer_state(struct lexer_state *ls, int wb)
327 {
328 #ifndef NO_UCPP_BUF
329 ls->input_buf = wb ? getmem(INPUT_BUF_MEMG) : 0;
330 #ifdef UCPP_MMAP
331 ls->from_mmap = 0;
332 #endif
333 #endif
334 ls->input = 0;
335 ls->ebuf = ls->pbuf = 0;
336 ls->nlka = 0;
337 ls->macfile = 0;
338 ls->discard = 1;
339 ls->last = 0; /* we suppose '\n' is not 0 */
340 ls->line = 1;
341 ls->ltwnl = 1;
342 ls->oline = 1;
343 ls->pending_token = 0;
344 ls->cli = 0;
345 ls->copy_line[COPY_LINE_LENGTH - 1] = 0;
346 ls->ifnest = 0;
347 ls->condf[0] = ls->condf[1] = 0;
348 }
349
350 /*
351 * Initialize the struct lexer_state, with optional input and output buffers.
352 */
init_buf_lexer_state(struct lexer_state * ls,int wb)353 void init_buf_lexer_state(struct lexer_state *ls, int wb)
354 {
355 reinit_lexer_state(ls, wb);
356 #ifndef NO_UCPP_BUF
357 ls->output_buf = wb ? getmem(OUTPUT_BUF_MEMG) : 0;
358 #endif
359 ls->sbuf = 0;
360 ls->output_fifo = 0;
361
362 ls->ctok = getmem(sizeof(struct token));
363 ls->ctok->name = getmem(ls->tknl = TOKEN_NAME_MEMG);
364 ls->pending_token = 0;
365
366 ls->flags = 0;
367 ls->count_trigraphs = 0;
368 ls->gf = getmem(sizeof(struct garbage_fifo));
369 init_garbage_fifo(ls->gf);
370 ls->condcomp = 1;
371 ls->condnest = 0;
372 #ifdef INMACRO_FLAG
373 ls->inmacro = 0;
374 ls->macro_count = 0;
375 #endif
376 }
377
378 /*
379 * Initialize the (complex) struct lexer_state.
380 */
init_lexer_state(struct lexer_state * ls)381 void init_lexer_state(struct lexer_state *ls)
382 {
383 init_buf_lexer_state(ls, 1);
384 ls->input = 0;
385 }
386
387 /*
388 * Restore what is needed from a lexer_state. This is used for #include.
389 */
restore_lexer_state(struct lexer_state * ls,struct lexer_state * lsbak)390 static void restore_lexer_state(struct lexer_state *ls,
391 struct lexer_state *lsbak)
392 {
393 #ifndef NO_UCPP_BUF
394 freemem(ls->input_buf);
395 ls->input_buf = lsbak->input_buf;
396 #ifdef UCPP_MMAP
397 ls->from_mmap = lsbak->from_mmap;
398 ls->input_buf_sav = lsbak->input_buf_sav;
399 #endif
400 #endif
401 ls->input = lsbak->input;
402 ls->ebuf = lsbak->ebuf;
403 ls->pbuf = lsbak->pbuf;
404 ls->nlka = lsbak->nlka;
405 ls->discard = lsbak->discard;
406 ls->line = lsbak->line;
407 ls->oline = lsbak->oline;
408 ls->ifnest = lsbak->ifnest;
409 ls->condf[0] = lsbak->condf[0];
410 ls->condf[1] = lsbak->condf[1];
411 }
412
413 /*
414 * close input file operations on a struct lexer_state
415 */
close_input(struct lexer_state * ls)416 static void close_input(struct lexer_state *ls)
417 {
418 #ifdef UCPP_MMAP
419 if (ls->from_mmap) {
420 munmap((void *)ls->input_buf, ls->ebuf);
421 ls->from_mmap = 0;
422 ls->input_buf = ls->input_buf_sav;
423 }
424 #endif
425 if (ls->input) {
426 fclose(ls->input);
427 ls->input = 0;
428 }
429 }
430
431 /*
432 * file_context (and the two functions push_ and pop_) are used to save
433 * all that is needed when including a file.
434 */
435 static struct file_context {
436 struct lexer_state ls;
437 char *name, *long_name;
438 int incdir;
439 } *ls_stack;
440 static size_t ls_depth = 0;
441
push_file_context(struct lexer_state * ls)442 static void push_file_context(struct lexer_state *ls)
443 {
444 struct file_context fc;
445
446 fc.name = current_filename;
447 fc.long_name = current_long_filename;
448 fc.incdir = current_incdir;
449 mmv(&(fc.ls), ls, sizeof(struct lexer_state));
450 aol(ls_stack, ls_depth, fc, LS_STACK_MEMG);
451 ls_depth --;
452 aol(protect_detect_stack, ls_depth, protect_detect, LS_STACK_MEMG);
453 protect_detect.macro = 0;
454 }
455
pop_file_context(struct lexer_state * ls)456 static void pop_file_context(struct lexer_state *ls)
457 {
458 #ifdef AUDIT
459 if (ls_depth <= 0) ouch("prepare to meet thy creator");
460 #endif
461 close_input(ls);
462 restore_lexer_state(ls, &(ls_stack[-- ls_depth].ls));
463 if (protect_detect.macro) freemem(protect_detect.macro);
464 protect_detect = protect_detect_stack[ls_depth];
465 if (current_filename) freemem(current_filename);
466 current_filename = ls_stack[ls_depth].name;
467 current_long_filename = ls_stack[ls_depth].long_name;
468 current_incdir = ls_stack[ls_depth].incdir;
469 if (ls_depth == 0) {
470 freemem(ls_stack);
471 freemem(protect_detect_stack);
472 }
473 }
474
475 /*
476 * report_context() returns the list of successive includers of the
477 * current file, ending with a dummy entry with a negative line number.
478 * The caller is responsible for freeing the returned pointer.
479 */
report_context(void)480 struct stack_context *report_context(void)
481 {
482 struct stack_context *sc;
483 size_t i;
484
485 sc = getmem((ls_depth + 1) * sizeof(struct stack_context));
486 for (i = 0; i < ls_depth; i ++) {
487 sc[i].name = ls_stack[ls_depth - i - 1].name;
488 sc[i].long_name = ls_stack[ls_depth - i - 1].long_name;
489 sc[i].line = ls_stack[ls_depth - i - 1].ls.line - 1;
490 }
491 sc[ls_depth].line = -1;
492 return sc;
493 }
494
495 /*
496 * init_lexer_mode() is used to end initialization of a struct lexer_state
497 * if it must be used for a lexer
498 */
init_lexer_mode(struct lexer_state * ls)499 void init_lexer_mode(struct lexer_state *ls)
500 {
501 ls->flags = DEFAULT_LEXER_FLAGS;
502 ls->output_fifo = getmem(sizeof(struct token_fifo));
503 ls->output_fifo->art = ls->output_fifo->nt = 0;
504 ls->toplevel_of = ls->output_fifo;
505 ls->save_ctok = ls->ctok;
506 }
507
508 /*
509 * release memory used by a struct lexer_state; this implies closing
510 * any input stream held by this structure.
511 */
free_lexer_state(struct lexer_state * ls)512 void free_lexer_state(struct lexer_state *ls)
513 {
514 close_input(ls);
515 #ifndef NO_UCPP_BUF
516 if (ls->input_buf) {
517 freemem(ls->input_buf);
518 ls->input_buf = 0;
519 }
520 if (ls->output_buf) {
521 freemem(ls->output_buf);
522 ls->output_buf = 0;
523 }
524 #endif
525 if (ls->ctok && (!ls->output_fifo || ls->output_fifo->nt == 0)) {
526 freemem(ls->ctok->name);
527 freemem(ls->ctok);
528 ls->ctok = 0;
529 }
530 if (ls->gf) {
531 free_garbage_fifo(ls->gf);
532 ls->gf = 0;
533 }
534 if (ls->output_fifo) {
535 freemem(ls->output_fifo);
536 ls->output_fifo = 0;
537 }
538 }
539
540 /*
541 * Print line information.
542 */
print_line_info(struct lexer_state * ls,unsigned long flags)543 static void print_line_info(struct lexer_state *ls, unsigned long flags)
544 {
545 char *fn = current_long_filename ?
546 current_long_filename : current_filename;
547 char *b, *d;
548
549 b = getmem(50 + strlen(fn));
550 if (flags & GCC_LINE_NUM) {
551 sprintf(b, "# %ld \"%s\"\n", ls->line, fn);
552 } else {
553 sprintf(b, "#line %ld \"%s\"\n", ls->line, fn);
554 }
555 for (d = b; *d; d ++) put_char(ls, (unsigned char)(*d));
556 freemem(b);
557 }
558
559 /*
560 * Enter a file; this implies the possible emission of a #line directive.
561 * The flags used are passed as second parameter instead of being
562 * extracted from the struct lexer_state.
563 *
564 * As a command-line option, gcc-like directives (with only a '#',
565 * without 'line') may be produced.
566 *
567 * enter_file() returns 1 if a (CONTEXT) token was produced, 0 otherwise.
568 */
enter_file(struct lexer_state * ls,unsigned long flags)569 int enter_file(struct lexer_state *ls, unsigned long flags)
570 {
571 char *fn = current_long_filename ?
572 current_long_filename : current_filename;
573
574 if (!(flags & LINE_NUM)) return 0;
575 if ((flags & LEXER) && !(flags & TEXT_OUTPUT)) {
576 struct token t;
577
578 t.type = CONTEXT;
579 t.line = ls->line;
580 t.name = fn;
581 print_token(ls, &t, 0);
582 return 1;
583 }
584 print_line_info(ls, flags);
585 ls->oline --; /* emitted #line troubled oline */
586 return 0;
587 }
588
589 #ifdef UCPP_MMAP
590 /*
591 * We open() the file, then fdopen() it and fseek() to its end. If the
592 * fseek() worked, we try to mmap() the file, up to the point where we
593 * arrived.
594 * On an architecture where end-of-lines are multibytes and translated
595 * into single '\n', bad things could happen. We strongly hope that, if
596 * we could fseek() to the end but could not mmap(), then we can get back.
597 */
598 static void *find_file_map;
599 static size_t map_length;
600
fopen_mmap_file(char * name)601 FILE *fopen_mmap_file(char *name)
602 {
603 FILE *f;
604 int fd;
605 long l;
606
607 find_file_map = 0;
608 fd = open(name, O_RDONLY, 0);
609 if (fd < 0) return 0;
610 l = lseek(fd, 0, SEEK_END);
611 f = fdopen(fd, "r");
612 if (!f) {
613 close(fd);
614 return 0;
615 }
616 if (l < 0) return f; /* not seekable */
617 map_length = l;
618 if ((find_file_map = mmap(0, map_length, PROT_READ,
619 MAP_PRIVATE, fd, 0)) == MAP_FAILED) {
620 /* we could not mmap() the file; get back */
621 find_file_map = 0;
622 if (fseek(f, 0, SEEK_SET)) {
623 /* bwaah... can't get back. This file is cursed. */
624 fclose(f);
625 return 0;
626 }
627 }
628 return f;
629 }
630
set_input_file(struct lexer_state * ls,FILE * f)631 void set_input_file(struct lexer_state *ls, FILE *f)
632 {
633 ls->input = f;
634 if (find_file_map) {
635 ls->from_mmap = 1;
636 ls->input_buf_sav = ls->input_buf;
637 ls->input_buf = find_file_map;
638 ls->pbuf = 0;
639 ls->ebuf = map_length;
640 } else {
641 ls->from_mmap = 0;
642 }
643 }
644 #endif
645
646 /*
647 * Find a file by looking through the include path.
648 * return value: a FILE * on the file, opened in "r" mode, or 0.
649 *
650 * find_file_error will contain:
651 * FF_ERROR on error (file not found or impossible to read)
652 * FF_PROTECT file is protected and therefore useless to read
653 * FF_KNOWN file is already known
654 * FF_UNKNOWN file was not already known
655 */
656 static int find_file_error;
657
658 enum { FF_ERROR, FF_PROTECT, FF_KNOWN, FF_UNKNOWN };
659
find_file(char * name,int localdir)660 static FILE *find_file(char *name, int localdir)
661 {
662 FILE *f;
663 int i, incdir = -1;
664 size_t nl = strlen(name);
665 char *s = 0;
666 struct found_file *ff = 0, *nff;
667 int lf = 0;
668 int nffa = 0;
669
670 find_file_error = FF_ERROR;
671 protect_detect.state = -1;
672 protect_detect.macro = 0;
673 if (localdir) {
674 int i;
675 char *rfn = current_long_filename ? current_long_filename
676 : current_filename;
677
678 for (i = strlen(rfn) - 1; i >= 0; i --)
679 #ifdef MSDOS
680 if (rfn[i] == '\\') break;
681 #else
682 if (rfn[i] == '/') break;
683 #endif
684 #if defined MSDOS
685 if (i >= 0 && *name != '\\' && (nl < 2 || name[1] != ':'))
686 #elif defined AMIGA
687 if (i >= 0 && *name != '/' && (nl < 2 || name[1] != ':'))
688 #else
689 if (i >= 0 && *name != '/')
690 #endif
691 {
692 /*
693 * current file is somewhere else, and the provided
694 * file name is not absolute, so we must adjust the
695 * base for looking for the file; besides,
696 * found_files and found_files_loc are irrelevant
697 * for this search.
698 */
699 s = getmem(i + 2 + nl);
700 mmv(s, rfn, i);
701 #ifdef MSDOS
702 s[i] = '\\';
703 #else
704 s[i] = '/';
705 #endif
706 mmv(s + i + 1, name, nl);
707 s[i + 1 + nl] = 0;
708 ff = HTT_get(&found_files, s);
709 } else ff = HTT_get(&found_files, name);
710 }
711 if (!ff) {
712 struct found_file_sys *ffs = HTT_get(&found_files_sys, name);
713
714 if (ffs) {
715 ff = ffs->rff;
716 incdir = ffs->incdir;
717 }
718 }
719 /*
720 * At that point: if the file was found in the cache, ff points to
721 * the cached descriptive structure; its name is s if s is not 0,
722 * name otherwise.
723 */
724 if (ff) goto found_file_cache;
725
726 /*
727 * This is the first time we find the file, or it was not protected.
728 */
729 protect_detect.ff = new_found_file();
730 nffa = 1;
731 if (localdir &&
732 #ifdef UCPP_MMAP
733 (f = fopen_mmap_file(s ? s : name))
734 #else
735 (f = fopen(s ? s : name, "r"))
736 #endif
737 ) {
738 lf = 1;
739 goto found_file;
740 }
741 /*
742 * If s contains a name, that name is now irrelevant: it was a
743 * filename for a search in the current directory, and the file
744 * was not found.
745 */
746 if (s) {
747 freemem(s);
748 s = 0;
749 }
750 for (i = 0; (size_t)i < include_path_nb; i ++) {
751 size_t ni = strlen(include_path[i]);
752
753 s = getmem(ni + nl + 2);
754 mmv(s, include_path[i], ni);
755 #ifdef AMIGA
756 /* contributed by Volker Barthelmann */
757 if (ni == 1 && *s == '.') {
758 *s = 0;
759 ni = 0;
760 }
761 if (ni > 0 && s[ni - 1] != ':' && s[ni - 1] != '/') {
762 s[ni] = '/';
763 mmv(s + ni + 1, name, nl + 1);
764 } else {
765 mmv(s + ni, name, nl + 1);
766 }
767 #else
768 s[ni] = '/';
769 mmv(s + ni + 1, name, nl + 1);
770 #endif
771 #ifdef MSDOS
772 /* on msdos systems, replace all / by \ */
773 {
774 char *c;
775
776 for (c = s; *c; c ++) if (*c == '/') *c = '\\';
777 }
778 #endif
779 incdir = i;
780 if ((ff = HTT_get(&found_files, s)) != 0) {
781 /*
782 * The file is known, but not as a system include
783 * file under the name provided.
784 */
785 struct found_file_sys *ffs = new_found_file_sys();
786
787 ffs->rff = ff;
788 ffs->incdir = incdir;
789 HTT_put(&found_files_sys, ffs, name);
790 freemem(s);
791 s = 0;
792 if (nffa) {
793 del_found_file(protect_detect.ff);
794 protect_detect.ff = 0;
795 nffa = 0;
796 }
797 goto found_file_cache;
798 }
799 #ifdef UCPP_MMAP
800 f = fopen_mmap_file(s);
801 #else
802 f = fopen(s, "r");
803 #endif
804 if (f) goto found_file;
805 freemem(s);
806 s = 0;
807 }
808 zero_out:
809 if (s) freemem(s);
810 if (nffa) {
811 del_found_file(protect_detect.ff);
812 protect_detect.ff = 0;
813 nffa = 0;
814 }
815 return 0;
816
817 /*
818 * This part is invoked when the file was found in the
819 * cache.
820 */
821 found_file_cache:
822 if (ff->protect) {
823 if (get_macro(ff->protect)) {
824 /* file is protected, do not include it */
825 find_file_error = FF_PROTECT;
826 goto zero_out;
827 }
828 /* file is protected but the guardian macro is
829 not available; disable guardian detection. */
830 protect_detect.state = 0;
831 }
832 protect_detect.ff = ff;
833 #ifdef UCPP_MMAP
834 f = fopen_mmap_file(HASH_ITEM_NAME(ff));
835 #else
836 f = fopen(HASH_ITEM_NAME(ff), "r");
837 #endif
838 if (!f) goto zero_out;
839 find_file_error = FF_KNOWN;
840 goto found_file_2;
841
842 /*
843 * This part is invoked when we found a new file, which was not
844 * yet referenced. If lf == 1, then the file was found directly,
845 * otherwise it was found in some system include directory.
846 * A new found_file structure has been allocated and is in
847 * protect_detect.ff
848 */
849 found_file:
850 if (f && ((emit_dependencies == 1 && lf && current_incdir == -1)
851 || emit_dependencies == 2)) {
852 fprintf(emit_output, " %s", s ? s : name);
853 }
854 nff = protect_detect.ff;
855 nff->name = sdup(name);
856 #ifdef AUDIT
857 if (
858 #endif
859 HTT_put(&found_files, nff, s ? s : name)
860 #ifdef AUDIT
861 ) ouch("filename collided with a wraith")
862 #endif
863 ;
864 if (!lf) {
865 struct found_file_sys *ffs = new_found_file_sys();
866
867 ffs->rff = nff;
868 ffs->incdir = incdir;
869 HTT_put(&found_files_sys, ffs, name);
870 }
871 if (s) freemem(s);
872 s = 0;
873 find_file_error = FF_UNKNOWN;
874 ff = nff;
875
876 found_file_2:
877 if (s) freemem(s);
878 current_long_filename = HASH_ITEM_NAME(ff);
879 #ifdef NO_LIBC_BUF
880 setbuf(f, 0);
881 #endif
882 current_incdir = incdir;
883 return f;
884 }
885
886 /*
887 * Find the named file by looking through the end of the include path.
888 * This is for #include_next directives.
889 * #include_next <foo> and #include_next "foo" are considered identical,
890 * for all practical purposes.
891 */
find_file_next(char * name)892 static FILE *find_file_next(char *name)
893 {
894 int i;
895 size_t nl = strlen(name);
896 FILE *f;
897 struct found_file *ff;
898
899 find_file_error = FF_ERROR;
900 protect_detect.state = -1;
901 protect_detect.macro = 0;
902 for (i = current_incdir + 1; (size_t)i < include_path_nb; i ++) {
903 char *s;
904 size_t ni = strlen(include_path[i]);
905
906 s = getmem(ni + nl + 2);
907 mmv(s, include_path[i], ni);
908 s[ni] = '/';
909 mmv(s + ni + 1, name, nl + 1);
910 #ifdef MSDOS
911 /* on msdos systems, replace all / by \ */
912 {
913 char *c;
914
915 for (c = s; *c; c ++) if (*c == '/') *c = '\\';
916 }
917 #endif
918 ff = HTT_get(&found_files, s);
919 if (ff) {
920 /* file was found in the cache */
921 if (ff->protect) {
922 if (get_macro(ff->protect)) {
923 find_file_error = FF_PROTECT;
924 freemem(s);
925 return 0;
926 }
927 /* file is protected but the guardian macro is
928 not available; disable guardian detection. */
929 protect_detect.state = 0;
930 }
931 protect_detect.ff = ff;
932 #ifdef UCPP_MMAP
933 f = fopen_mmap_file(HASH_ITEM_NAME(ff));
934 #else
935 f = fopen(HASH_ITEM_NAME(ff), "r");
936 #endif
937 if (!f) {
938 /* file is referenced but yet unavailable. */
939 freemem(s);
940 return 0;
941 }
942 find_file_error = FF_KNOWN;
943 freemem(s);
944 s = HASH_ITEM_NAME(ff);
945 } else {
946 #ifdef UCPP_MMAP
947 f = fopen_mmap_file(s);
948 #else
949 f = fopen(s, "r");
950 #endif
951 if (f) {
952 if (emit_dependencies == 2) {
953 fprintf(emit_output, " %s", s);
954 }
955 ff = protect_detect.ff = new_found_file();
956 ff->name = sdup(s);
957 #ifdef AUDIT
958 if (
959 #endif
960 HTT_put(&found_files, ff, s)
961 #ifdef AUDIT
962 ) ouch("filename collided with a wraith")
963 #endif
964 ;
965 find_file_error = FF_UNKNOWN;
966 freemem(s);
967 s = HASH_ITEM_NAME(ff);
968 }
969 }
970 if (f) {
971 current_long_filename = s;
972 current_incdir = i;
973 return f;
974 }
975 freemem(s);
976 }
977 return 0;
978 }
979
980 /*
981 * The #if directive. This function parse the expression, performs macro
982 * expansion (and handles the "defined" operator), and call eval_expr.
983 * return value: 1 if the expression is true, 0 if it is false, -1 on error.
984 */
handle_if(struct lexer_state * ls)985 static int handle_if(struct lexer_state *ls)
986 {
987 struct token_fifo tf, tf1, tf2, tf3, *save_tf;
988 long l = ls->line;
989 unsigned long z;
990 int ret = 0, ltww = 1;
991
992 /* first, get the whole line */
993 tf.art = tf.nt = 0;
994 while (!next_token(ls) && ls->ctok->type != NEWLINE) {
995 struct token t;
996
997 if (ltww && ttMWS(ls->ctok->type)) continue;
998 ltww = ttMWS(ls->ctok->type);
999 t.type = ls->ctok->type;
1000 t.line = l;
1001 if (S_TOKEN(ls->ctok->type)) {
1002 t.name = sdup(ls->ctok->name);
1003 throw_away(ls->gf, t.name);
1004 }
1005 aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
1006 }
1007 if (ltww && tf.nt) if ((-- tf.nt) == 0) freemem(tf.t);
1008 if (tf.nt == 0) {
1009 error(l, "void condition for a #if/#elif");
1010 return -1;
1011 }
1012 /* handle the "defined" operator */
1013 tf1.art = tf1.nt = 0;
1014 while (tf.art < tf.nt) {
1015 struct token *ct, rt;
1016 struct macro *m;
1017 size_t nidx, eidx;
1018
1019 ct = tf.t + (tf.art ++);
1020 if (ct->type == NAME && !strcmp(ct->name, "defined")) {
1021 if (tf.art >= tf.nt) goto store_token;
1022 nidx = tf.art;
1023 if (ttMWS(tf.t[nidx].type))
1024 if (++ nidx >= tf.nt) goto store_token;
1025 if (tf.t[nidx].type == NAME) {
1026 eidx = nidx;
1027 goto check_macro;
1028 }
1029 if (tf.t[nidx].type != LPAR) goto store_token;
1030 if (++ nidx >= tf.nt) goto store_token;
1031 if (ttMWS(tf.t[nidx].type))
1032 if (++ nidx >= tf.nt) goto store_token;
1033 if (tf.t[nidx].type != NAME) goto store_token;
1034 eidx = nidx + 1;
1035 if (eidx >= tf.nt) goto store_token;
1036 if (ttMWS(tf.t[eidx].type))
1037 if (++ eidx >= tf.nt) goto store_token;
1038 if (tf.t[eidx].type != RPAR) goto store_token;
1039 goto check_macro;
1040 }
1041 store_token:
1042 aol(tf1.t, tf1.nt, *ct, TOKEN_LIST_MEMG);
1043 continue;
1044
1045 check_macro:
1046 m = get_macro(tf.t[nidx].name);
1047 rt.type = NUMBER;
1048 rt.name = m ? "1L" : "0L";
1049 aol(tf1.t, tf1.nt, rt, TOKEN_LIST_MEMG);
1050 tf.art = eidx + 1;
1051 }
1052 freemem(tf.t);
1053 if (tf1.nt == 0) {
1054 error(l, "void condition (after expansion) for a #if/#elif");
1055 return -1;
1056 }
1057
1058 /* perform all macro substitutions */
1059 tf2.art = tf2.nt = 0;
1060 save_tf = ls->output_fifo;
1061 ls->output_fifo = &tf2;
1062 while (tf1.art < tf1.nt) {
1063 struct token *ct;
1064
1065 ct = tf1.t + (tf1.art ++);
1066 if (ct->type == NAME) {
1067 struct macro *m = get_macro(ct->name);
1068
1069 if (m) {
1070 if (substitute_macro(ls, m, &tf1, 0,
1071 #ifdef NO_PRAGMA_IN_DIRECTIVE
1072 1,
1073 #else
1074 0,
1075 #endif
1076 ct->line)) {
1077 ls->output_fifo = save_tf;
1078 goto error1;
1079 }
1080 continue;
1081 }
1082 } else if ((ct->type == SHARP || ct->type == DIG_SHARP)
1083 && (ls->flags & HANDLE_ASSERTIONS)) {
1084 /* we have an assertion; parse it */
1085 int nnp, ltww = 1;
1086 size_t i = tf1.art;
1087 struct token_fifo atl;
1088 char *aname;
1089 struct assert *a;
1090 int av = 0;
1091 struct token rt;
1092
1093 atl.art = atl.nt = 0;
1094 while (i < tf1.nt && ttMWS(tf1.t[i].type)) i ++;
1095 if (i >= tf1.nt) goto assert_error;
1096 if (tf1.t[i].type != NAME) goto assert_error;
1097 aname = tf1.t[i ++].name;
1098 while (i < tf1.nt && ttMWS(tf1.t[i].type)) i ++;
1099 if (i >= tf1.nt) goto assert_generic;
1100 if (tf1.t[i].type != LPAR) goto assert_generic;
1101 i ++;
1102 for (nnp = 1; nnp && i < tf1.nt; i ++) {
1103 if (ltww && ttMWS(tf1.t[i].type)) continue;
1104 if (tf1.t[i].type == LPAR) nnp ++;
1105 else if (tf1.t[i].type == RPAR
1106 && (-- nnp) == 0) {
1107 tf1.art = i + 1;
1108 break;
1109 }
1110 ltww = ttMWS(tf1.t[i].type);
1111 aol(atl.t, atl.nt, tf1.t[i], TOKEN_LIST_MEMG);
1112 }
1113 if (nnp) goto assert_error;
1114 if (ltww && atl.nt && (-- atl.nt) == 0) freemem(atl.t);
1115 if (atl.nt == 0) goto assert_error;
1116
1117 /* the assertion is in aname and atl; check it */
1118 a = get_assertion(aname);
1119 if (a) for (i = 0; i < a->nbval; i ++)
1120 if (!cmp_token_list(&atl, a->val + i)) {
1121 av = 1;
1122 break;
1123 }
1124 rt.type = NUMBER;
1125 rt.name = av ? "1" : "0";
1126 aol(tf2.t, tf2.nt, rt, TOKEN_LIST_MEMG);
1127 if (atl.nt) freemem(atl.t);
1128 continue;
1129
1130 assert_generic:
1131 tf1.art = i;
1132 rt.type = NUMBER;
1133 rt.name = get_assertion(aname) ? "1" : "0";
1134 aol(tf2.t, tf2.nt, rt, TOKEN_LIST_MEMG);
1135 continue;
1136
1137 assert_error:
1138 error(l, "syntax error for assertion in #if");
1139 ls->output_fifo = save_tf;
1140 goto error1;
1141 }
1142 aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG);
1143 }
1144 ls->output_fifo = save_tf;
1145 freemem(tf1.t);
1146 if (tf2.nt == 0) {
1147 error(l, "void condition (after expansion) for a #if/#elif");
1148 return -1;
1149 }
1150
1151 /*
1152 * suppress whitespace and replace rogue identifiers by 0
1153 */
1154 tf3.art = tf3.nt = 0;
1155 while (tf2.art < tf2.nt) {
1156 struct token *ct = tf2.t + (tf2.art ++);
1157
1158 if (ttMWS(ct->type)) continue;
1159 if (ct->type == NAME) {
1160 /*
1161 * a rogue identifier; we replace it with "0".
1162 */
1163 struct token rt;
1164
1165 rt.type = NUMBER;
1166 rt.name = "0";
1167 aol(tf3.t, tf3.nt, rt, TOKEN_LIST_MEMG);
1168 continue;
1169 }
1170 aol(tf3.t, tf3.nt, *ct, TOKEN_LIST_MEMG);
1171 }
1172 freemem(tf2.t);
1173
1174 if (tf3.nt == 0) {
1175 error(l, "void condition (after expansion) for a #if/#elif");
1176 return -1;
1177 }
1178 eval_line = l;
1179 z = eval_expr(&tf3, &ret, (ls->flags & WARN_STANDARD) != 0);
1180 freemem(tf3.t);
1181 if (ret) return -1;
1182 return (z != 0);
1183
1184 error1:
1185 if (tf1.nt) freemem(tf1.t);
1186 if (tf2.nt) freemem(tf2.t);
1187 return -1;
1188 }
1189
1190 /*
1191 * A #include was found; parse the end of line, replace macros if
1192 * necessary.
1193 *
1194 * If nex is set to non-zero, the directive is considered as a #include_next
1195 * (extension to C99, mimicked from GNU)
1196 */
handle_include(struct lexer_state * ls,unsigned long flags,int nex)1197 static int handle_include(struct lexer_state *ls, unsigned long flags, int nex)
1198 {
1199 int c, string_fname = 0;
1200 char *fname;
1201 unsigned char *fname2;
1202 size_t fname_ptr = 0;
1203 long l = ls->line;
1204 int x, y;
1205 FILE *f;
1206 struct token_fifo tf, tf2, *save_tf;
1207 size_t nl;
1208 int tgd;
1209 struct lexer_state alt_ls;
1210
1211 #define left_angle(t) ((t) == LT || (t) == LEQ || (t) == LSH \
1212 || (t) == ASLSH || (t) == DIG_LBRK || (t) == LBRA)
1213 #define right_angle(t) ((t) == GT || (t) == RSH || (t) == ARROW \
1214 || (t) == DIG_RBRK || (t) == DIG_RBRA)
1215
1216 while ((c = grap_char(ls)) >= 0 && c != '\n') {
1217 if (space_char(c)) {
1218 discard_char(ls);
1219 continue;
1220 }
1221 if (c == '<') {
1222 discard_char(ls);
1223 while ((c = grap_char(ls)) >= 0) {
1224 discard_char(ls);
1225 if (c == '\n') goto include_last_chance;
1226 if (c == '>') break;
1227 aol(fname, fname_ptr, (char)c, FNAME_MEMG);
1228 }
1229 aol(fname, fname_ptr, (char)0, FNAME_MEMG);
1230 string_fname = 0;
1231 goto do_include;
1232 } else if (c == '"') {
1233 discard_char(ls);
1234 while ((c = grap_char(ls)) >= 0) {
1235 discard_char(ls);
1236 if (c == '\n') {
1237 /* macro replacements won't save that one */
1238 if (fname_ptr) freemem(fname);
1239 goto include_error;
1240 }
1241 if (c == '"') break;
1242 aol(fname, fname_ptr, (char)c, FNAME_MEMG);
1243 }
1244 aol(fname, fname_ptr, (char)0, FNAME_MEMG);
1245 string_fname = 1;
1246 goto do_include;
1247 }
1248 goto include_macro;
1249 }
1250
1251 include_last_chance:
1252 /*
1253 * We found a '<' but not the trailing '>'; so we tokenize the
1254 * line, and try to act upon it. The standard lets us free in that
1255 * matter, and no sane programmer would use such a construct, but
1256 * it is no reason not to support it.
1257 */
1258 if (fname_ptr == 0) goto include_error;
1259 fname2 = getmem(fname_ptr + 1);
1260 mmv(fname2 + 1, fname, fname_ptr);
1261 fname2[0] = '<';
1262 /*
1263 * We merely copy the lexer_state structure; this should be ok,
1264 * since we do want to share the memory structure (garbage_fifo),
1265 * and do not touch any other context-full thing.
1266 */
1267 alt_ls = *ls;
1268 alt_ls.input = 0;
1269 alt_ls.input_string = fname2;
1270 alt_ls.pbuf = 0;
1271 alt_ls.ebuf = fname_ptr + 1;
1272 tf.art = tf.nt = 0;
1273 while (!next_token(&alt_ls)) {
1274 if (!ttMWS(alt_ls.ctok->type)) {
1275 struct token t;
1276
1277 t.type = alt_ls.ctok->type;
1278 t.line = l;
1279 if (S_TOKEN(alt_ls.ctok->type)) {
1280 t.name = sdup(alt_ls.ctok->name);
1281 throw_away(alt_ls.gf, t.name);
1282 }
1283 aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
1284 }
1285 }
1286 freemem(fname2);
1287 if (alt_ls.pbuf < alt_ls.ebuf) goto include_error;
1288 /* tokenizing failed */
1289 goto include_macro2;
1290
1291 include_error:
1292 error(l, "invalid '#include'");
1293 return 1;
1294
1295 include_macro:
1296 tf.art = tf.nt = 0;
1297 while (!next_token(ls) && ls->ctok->type != NEWLINE) {
1298 if (!ttMWS(ls->ctok->type)) {
1299 struct token t;
1300
1301 t.type = ls->ctok->type;
1302 t.line = l;
1303 if (S_TOKEN(ls->ctok->type)) {
1304 t.name = sdup(ls->ctok->name);
1305 throw_away(ls->gf, t.name);
1306 }
1307 aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
1308 }
1309 }
1310 include_macro2:
1311 tf2.art = tf2.nt = 0;
1312 save_tf = ls->output_fifo;
1313 ls->output_fifo = &tf2;
1314 while (tf.art < tf.nt) {
1315 struct token *ct;
1316
1317 ct = tf.t + (tf.art ++);
1318 if (ct->type == NAME) {
1319 struct macro *m = get_macro(ct->name);
1320 if (m) {
1321 if (substitute_macro(ls, m, &tf, 0,
1322 #ifdef NO_PRAGMA_IN_DIRECTIVE
1323 1,
1324 #else
1325 0,
1326 #endif
1327 ct->line)) {
1328 ls->output_fifo = save_tf;
1329 return -1;
1330 }
1331 continue;
1332 }
1333 }
1334 aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG);
1335 }
1336 freemem(tf.t);
1337 ls->output_fifo = save_tf;
1338 for (x = 0; (size_t)x < tf2.nt && ttWHI(tf2.t[x].type); x ++);
1339 for (y = tf2.nt - 1; y >= 0 && ttWHI(tf2.t[y].type); y --);
1340 if ((size_t)x >= tf2.nt) goto include_macro_err;
1341 if (tf2.t[x].type == STRING) {
1342 if (y != x) goto include_macro_err;
1343 if (tf2.t[x].name[0] == 'L') {
1344 if (ls->flags & WARN_STANDARD)
1345 warning(l, "wide string for #include");
1346 fname = sdup(tf2.t[x].name);
1347 nl = strlen(fname);
1348 *(fname + nl - 1) = 0;
1349 mmvwo(fname, fname + 2, nl - 2);
1350 } else {
1351 fname = sdup(tf2.t[x].name);
1352 nl = strlen(fname);
1353 *(fname + nl - 1) = 0;
1354 mmvwo(fname, fname + 1, nl - 1);
1355 }
1356 string_fname = 1;
1357 } else if (left_angle(tf2.t[x].type) && right_angle(tf2.t[y].type)) {
1358 int i, j;
1359
1360 if (ls->flags & WARN_ANNOYING) warning(l, "reconstruction "
1361 "of <foo> in #include");
1362 for (j = 0, i = x; i <= y; i ++) if (!ttWHI(tf2.t[i].type))
1363 j += strlen(tname(tf2.t[i]));
1364 fname = getmem(j + 1);
1365 for (j = 0, i = x; i <= y; i ++) {
1366 if (ttWHI(tf2.t[i].type)) continue;
1367 strcpy(fname + j, tname(tf2.t[i]));
1368 j += strlen(tname(tf2.t[i]));
1369 }
1370 *(fname + j - 1) = 0;
1371 mmvwo(fname, fname + 1, j);
1372 string_fname = 0;
1373 } else goto include_macro_err;
1374 freemem(tf2.t);
1375 goto do_include_next;
1376
1377 include_macro_err:
1378 error(l, "macro expansion did not produce a valid filename "
1379 "for #include");
1380 if (tf2.nt) freemem(tf2.t);
1381 return 1;
1382
1383 do_include:
1384 tgd = 1;
1385 while (!next_token(ls)) {
1386 if (tgd && !ttWHI(ls->ctok->type)
1387 && (ls->flags & WARN_STANDARD)) {
1388 warning(l, "trailing garbage in #include");
1389 tgd = 0;
1390 }
1391 if (ls->ctok->type == NEWLINE) break;
1392 }
1393
1394 /* the increment of ls->line is intended so that the line
1395 numbering is reported correctly in report_context() even if
1396 the #include is at the end of the file with no trailing newline */
1397 if (ls->ctok->type != NEWLINE) ls->line ++;
1398 do_include_next:
1399 if (!(ls->flags & LEXER) && (ls->flags & KEEP_OUTPUT))
1400 put_char(ls, '\n');
1401 push_file_context(ls);
1402 reinit_lexer_state(ls, 1);
1403 #ifdef MSDOS
1404 /* on msdos systems, replace all / by \ */
1405 {
1406 char *d;
1407
1408 for (d = fname; *d; d ++) if (*d == '/') *d = '\\';
1409 }
1410 #endif
1411 f = nex ? find_file_next(fname) : find_file(fname, string_fname);
1412 if (!f) {
1413 current_filename = 0;
1414 pop_file_context(ls);
1415 if (find_file_error == FF_ERROR) {
1416 error(l, "file '%s' not found", fname);
1417 freemem(fname);
1418 return 1;
1419 }
1420 /* file was found, but it is useless to include it again */
1421 freemem(fname);
1422 return 0;
1423 }
1424 #ifdef UCPP_MMAP
1425 set_input_file(ls, f);
1426 #else
1427 ls->input = f;
1428 #endif
1429 current_filename = fname;
1430 enter_file(ls, flags);
1431 return 0;
1432
1433 #undef left_angle
1434 #undef right_angle
1435 }
1436
1437 /*
1438 * for #line directives
1439 */
handle_line(struct lexer_state * ls,unsigned long flags)1440 static int handle_line(struct lexer_state *ls, unsigned long flags)
1441 {
1442 char *fname;
1443 long l = ls->line;
1444 struct token_fifo tf, tf2, *save_tf;
1445 size_t nl, j;
1446 unsigned long z;
1447
1448 tf.art = tf.nt = 0;
1449 while (!next_token(ls) && ls->ctok->type != NEWLINE) {
1450 if (!ttMWS(ls->ctok->type)) {
1451 struct token t;
1452
1453 t.type = ls->ctok->type;
1454 t.line = l;
1455 if (S_TOKEN(ls->ctok->type)) {
1456 t.name = sdup(ls->ctok->name);
1457 throw_away(ls->gf, t.name);
1458 }
1459 aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
1460 }
1461 }
1462 tf2.art = tf2.nt = 0;
1463 save_tf = ls->output_fifo;
1464 ls->output_fifo = &tf2;
1465 while (tf.art < tf.nt) {
1466 struct token *ct;
1467
1468 ct = tf.t + (tf.art ++);
1469 if (ct->type == NAME) {
1470 struct macro *m = get_macro(ct->name);
1471 if (m) {
1472 if (substitute_macro(ls, m, &tf, 0,
1473 #ifdef NO_PRAGMA_IN_DIRECTIVE
1474 1,
1475 #else
1476 0,
1477 #endif
1478 ct->line)) {
1479 ls->output_fifo = save_tf;
1480 return -1;
1481 }
1482 continue;
1483 }
1484 }
1485 aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG);
1486 }
1487 freemem(tf.t);
1488 for (tf2.art = 0; tf2.art < tf2.nt && ttWHI(tf2.t[tf2.art].type);
1489 tf2.art ++);
1490 ls->output_fifo = save_tf;
1491 if (tf2.art == tf2.nt || (tf2.t[tf2.art].type != NUMBER
1492 && tf2.t[tf2.art].type != CHAR)) {
1493 error(l, "not a valid number for #line");
1494 goto line_macro_err;
1495 }
1496 for (j = 0; tf2.t[tf2.art].name[j]; j ++)
1497 if (tf2.t[tf2.art].name[j] < '0'
1498 || tf2.t[tf2.art].name[j] > '9')
1499 if (ls->flags & WARN_STANDARD)
1500 warning(l, "non-standard line number in #line");
1501 if (catch(eval_exception)) goto line_macro_err;
1502 z = strtoconst(tf2.t[tf2.art].name);
1503 if (j > 10 || z > 2147483647U) {
1504 error(l, "out-of-bound line number for #line");
1505 goto line_macro_err;
1506 }
1507 ls->oline = ls->line = z;
1508 if ((++ tf2.art) < tf2.nt) {
1509 size_t i;
1510
1511 for (i = tf2.art; i < tf2.nt && ttMWS(tf2.t[i].type); i ++);
1512 if (i < tf2.nt) {
1513 if (tf2.t[i].type != STRING) {
1514 error(l, "not a valid filename for #line");
1515 goto line_macro_err;
1516 }
1517 if (tf2.t[i].name[0] == 'L') {
1518 if (ls->flags & WARN_STANDARD) {
1519 warning(l, "wide string for #line");
1520 }
1521 fname = sdup(tf2.t[i].name);
1522 nl = strlen(fname);
1523 *(fname + nl - 1) = 0;
1524 mmvwo(fname, fname + 2, nl - 2);
1525 } else {
1526 fname = sdup(tf2.t[i].name);
1527 nl = strlen(fname);
1528 *(fname + nl - 1) = 0;
1529 mmvwo(fname, fname + 1, nl - 1);
1530 }
1531 if (current_filename) freemem(current_filename);
1532 current_filename = fname;
1533 }
1534 for (i ++; i < tf2.nt && ttMWS(tf2.t[i].type); i ++);
1535 if (i < tf2.nt && (ls->flags & WARN_STANDARD)) {
1536 warning(l, "trailing garbage in #line");
1537 }
1538 }
1539 freemem(tf2.t);
1540 enter_file(ls, flags);
1541 return 0;
1542
1543 line_macro_err:
1544 if (tf2.nt) freemem(tf2.t);
1545 return 1;
1546 }
1547
1548 /*
1549 * a #error directive: we emit the message without any modification
1550 * (except the usual backslash+newline and trigraphs)
1551 */
handle_error(struct lexer_state * ls)1552 static void handle_error(struct lexer_state *ls)
1553 {
1554 int c;
1555 size_t p = 0, lp = 128;
1556 long l = ls->line;
1557 unsigned char *buf = getmem(lp);
1558
1559 while ((c = grap_char(ls)) >= 0 && c != '\n') {
1560 discard_char(ls);
1561 wan(buf, p, (unsigned char)c, lp);
1562 }
1563 wan(buf, p, 0, lp);
1564 error(l, "#error%s", buf);
1565 freemem(buf);
1566 }
1567
1568 /*
1569 * convert digraph tokens to their standard equivalent.
1570 */
undig(int type)1571 static int undig(int type)
1572 {
1573 static int ud[6] = { LBRK, RBRK, LBRA, RBRA, SHARP, DSHARP };
1574
1575 return ud[type - DIG_LBRK];
1576 }
1577
1578 #ifdef PRAGMA_TOKENIZE
1579 /*
1580 * Make a compressed representation of a token list; the contents of
1581 * the token_fifo are freed. Values equal to 0 are replaced by
1582 * PRAGMA_TOKEN_END (by default, (unsigned char)'\n') and the compressed
1583 * string is padded by a 0 (so that it may be * handled like a string).
1584 * Digraph tokens are replaced by their non-digraph equivalents.
1585 */
compress_token_list(struct token_fifo * tf)1586 struct comp_token_fifo compress_token_list(struct token_fifo *tf)
1587 {
1588 struct comp_token_fifo ct;
1589 size_t l;
1590
1591 for (l = 0, tf->art = 0; tf->art < tf->nt; tf->art ++) {
1592 l ++;
1593 if (S_TOKEN(tf->t[tf->art].type))
1594 l += strlen(tf->t[tf->art].name) + 1;
1595 }
1596 ct.t = getmem((ct.length = l) + 1);
1597 for (l = 0, tf->art = 0; tf->art < tf->nt; tf->art ++) {
1598 int tt = tf->t[tf->art].type;
1599
1600 if (tt == 0) tt = PRAGMA_TOKEN_END;
1601 if (tt > DIGRAPH_TOKENS && tt < DIGRAPH_TOKENS_END)
1602 tt = undig(tt);
1603 ct.t[l ++] = tt;
1604 if (S_TOKEN(tt)) {
1605 char *tn = tf->t[tf->art].name;
1606 size_t sl = strlen(tn);
1607
1608 mmv(ct.t + l, tn, sl);
1609 l += sl;
1610 ct.t[l ++] = PRAGMA_TOKEN_END;
1611 freemem(tn);
1612 }
1613 }
1614 ct.t[l] = 0;
1615 if (tf->nt) freemem(tf->t);
1616 ct.rp = 0;
1617 return ct;
1618 }
1619 #endif
1620
1621 /*
1622 * A #pragma directive: we make a PRAGMA token containing the rest of
1623 * the line.
1624 *
1625 * We strongly hope that we are called only in LEXER mode.
1626 */
handle_pragma(struct lexer_state * ls)1627 static void handle_pragma(struct lexer_state *ls)
1628 {
1629 unsigned char *buf;
1630 struct token t;
1631 long l = ls->line;
1632
1633 #ifdef PRAGMA_TOKENIZE
1634 struct token_fifo tf;
1635
1636 tf.art = tf.nt = 0;
1637 while (!next_token(ls) && ls->ctok->type != NEWLINE)
1638 if (!ttMWS(ls->ctok->type)) break;
1639 if (ls->ctok->type != NEWLINE) {
1640 do {
1641 struct token t;
1642
1643 t.type = ls->ctok->type;
1644 if (ttMWS(t.type)) continue;
1645 if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name);
1646 aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
1647 } while (!next_token(ls) && ls->ctok->type != NEWLINE);
1648 }
1649 if (tf.nt == 0) {
1650 /* void pragma are silently ignored */
1651 return;
1652 }
1653 buf = (compress_token_list(&tf)).t;
1654 #else
1655 int c, x = 1, y = 32;
1656
1657 while ((c = grap_char(ls)) >= 0 && c != '\n') {
1658 discard_char(ls);
1659 if (!space_char(c)) break;
1660 }
1661 /* void #pragma are ignored */
1662 if (c == '\n') return;
1663 buf = getmem(y);
1664 buf[0] = c;
1665 while ((c = grap_char(ls)) >= 0 && c != '\n') {
1666 discard_char(ls);
1667 wan(buf, x, c, y);
1668 }
1669 for (x --; x >= 0 && space_char(buf[x]); x --);
1670 x ++;
1671 wan(buf, x, 0, y);
1672 #endif
1673 t.type = PRAGMA;
1674 t.line = l;
1675 t.name = (char *)buf;
1676 aol(ls->output_fifo->t, ls->output_fifo->nt, t, TOKEN_LIST_MEMG);
1677 throw_away(ls->gf, (char *)buf);
1678 }
1679
1680 /*
1681 * We saw a # at the beginning of a line (or preceeded only by whitespace).
1682 * We check the directive name and act accordingly.
1683 */
handle_cpp(struct lexer_state * ls,int sharp_type)1684 static int handle_cpp(struct lexer_state *ls, int sharp_type)
1685 {
1686 #define condfset(x) do { \
1687 ls->condf[(x) / 32] |= 1UL << ((x) % 32); \
1688 } while (0)
1689 #define condfclr(x) do { \
1690 ls->condf[(x) / 32] &= ~(1UL << ((x) % 32)); \
1691 } while (0)
1692 #define condfval(x) ((ls->condf[(x) / 32] & (1UL << ((x) % 32))) != 0)
1693
1694 long l = ls->line;
1695 unsigned long save_flags = ls->flags;
1696 int ret = 0;
1697
1698 save_flags = ls->flags;
1699 ls->flags |= LEXER;
1700 while (!next_token(ls)) {
1701 int t = ls->ctok->type;
1702
1703 switch (t) {
1704 case COMMENT:
1705 if (ls->flags & WARN_ANNOYING) {
1706 warning(l, "comment in the middle of "
1707 "a cpp directive");
1708 }
1709 /* fall through */
1710 case NONE:
1711 continue;
1712 case NEWLINE:
1713 /* null directive */
1714 if (ls->flags & WARN_ANNOYING) {
1715 /* truly an annoying warning; null directives
1716 are rare but may increase readability of
1717 some source files, and they are legal */
1718 warning(l, "null cpp directive");
1719 }
1720 if (!(ls->flags & LEXER)) put_char(ls, '\n');
1721 goto handle_exit2;
1722 case NAME:
1723 break;
1724 default:
1725 if (ls->flags & FAIL_SHARP) {
1726 /* LPS 20050602 - ignores '#!' if on the first line */
1727 if( ( l == 1 ) &&
1728 ( ls->condcomp ) )
1729 {
1730 ret = 1;
1731 }
1732 else
1733 /* LPS 20050602 */
1734 if (ls->condcomp) {
1735 error(l, "rogue '#'");
1736 ret = 1;
1737 } else {
1738 if (ls->flags & WARN_STANDARD) {
1739 warning(l, "rogue '#' in code "
1740 "compiled out");
1741 ret = 0;
1742 }
1743 }
1744 ls->flags = save_flags;
1745 goto handle_warp_ign;
1746 } else {
1747 struct token u;
1748
1749 u.type = sharp_type;
1750 u.line = l;
1751 ls->flags = save_flags;
1752 print_token(ls, &u, 0);
1753 print_token(ls, ls->ctok, 0);
1754 if (ls->flags & WARN_ANNOYING) {
1755 warning(l, "rogue '#' dumped");
1756 }
1757 goto handle_exit3;
1758 }
1759 }
1760 if (ls->condcomp) {
1761 if (!strcmp(ls->ctok->name, "define")) {
1762 ret = handle_define(ls);
1763 goto handle_exit;
1764 } else if (!strcmp(ls->ctok->name, "undef")) {
1765 ret = handle_undef(ls);
1766 goto handle_exit;
1767 } else if (!strcmp(ls->ctok->name, "if")) {
1768 if ((++ ls->ifnest) > 63) goto too_many_if;
1769 condfclr(ls->ifnest - 1);
1770 ret = handle_if(ls);
1771 if (ret > 0) ret = 0;
1772 else if (ret == 0) {
1773 ls->condcomp = 0;
1774 ls->condmet = 0;
1775 ls->condnest = ls->ifnest - 1;
1776 }
1777 else ret = 1;
1778 goto handle_exit;
1779 } else if (!strcmp(ls->ctok->name, "ifdef")) {
1780 if ((++ ls->ifnest) > 63) goto too_many_if;
1781 condfclr(ls->ifnest - 1);
1782 ret = handle_ifdef(ls);
1783 if (ret > 0) ret = 0;
1784 else if (ret == 0) {
1785 ls->condcomp = 0;
1786 ls->condmet = 0;
1787 ls->condnest = ls->ifnest - 1;
1788 }
1789 else ret = 1;
1790 goto handle_exit;
1791 } else if (!strcmp(ls->ctok->name, "ifndef")) {
1792 if ((++ ls->ifnest) > 63) goto too_many_if;
1793 condfclr(ls->ifnest - 1);
1794 ret = handle_ifndef(ls);
1795 if (ret > 0) ret = 0;
1796 else if (ret == 0) {
1797 ls->condcomp = 0;
1798 ls->condmet = 0;
1799 ls->condnest = ls->ifnest - 1;
1800 }
1801 else ret = 1;
1802 goto handle_exit;
1803 } else if (!strcmp(ls->ctok->name, "else")) {
1804 if (ls->ifnest == 0
1805 || condfval(ls->ifnest - 1)) {
1806 error(l, "rogue #else");
1807 ret = 1;
1808 goto handle_warp;
1809 }
1810 condfset(ls->ifnest - 1);
1811 if (ls->ifnest == 1) protect_detect.state = 0;
1812 ls->condcomp = 0;
1813 ls->condmet = 1;
1814 ls->condnest = ls->ifnest - 1;
1815 goto handle_warp;
1816 } else if (!strcmp(ls->ctok->name, "elif")) {
1817 if (ls->ifnest == 0
1818 || condfval(ls->ifnest - 1)) {
1819 error(l, "rogue #elif");
1820 ret = 1;
1821 goto handle_warp_ign;
1822 }
1823 if (ls->ifnest == 1) protect_detect.state = 0;
1824 ls->condcomp = 0;
1825 ls->condmet = 1;
1826 ls->condnest = ls->ifnest - 1;
1827 goto handle_warp_ign;
1828 } else if (!strcmp(ls->ctok->name, "endif")) {
1829 if (ls->ifnest == 0) {
1830 error(l, "unmatched #endif");
1831 ret = 1;
1832 goto handle_warp;
1833 }
1834 if ((-- ls->ifnest) == 0
1835 && protect_detect.state == 2) {
1836 protect_detect.state = 3;
1837 }
1838 goto handle_warp;
1839 } else if (!strcmp(ls->ctok->name, "include")) {
1840 ret = handle_include(ls, save_flags, 0);
1841 goto handle_exit3;
1842 } else if (!strcmp(ls->ctok->name, "include_next")) {
1843 ret = handle_include(ls, save_flags, 1);
1844 goto handle_exit3;
1845 } else if (!strcmp(ls->ctok->name, "pragma")) {
1846 if (!(save_flags & LEXER)) {
1847 #ifdef PRAGMA_DUMP
1848 /* dump #pragma in output */
1849 struct token u;
1850
1851 u.type = sharp_type;
1852 u.line = l;
1853 ls->flags = save_flags;
1854 print_token(ls, &u, 0);
1855 print_token(ls, ls->ctok, 0);
1856 while (ls->flags |= LEXER,
1857 !next_token(ls)) {
1858 long save_line;
1859
1860 ls->flags &= ~LEXER;
1861 save_line = ls->line;
1862 ls->line = l;
1863 print_token(ls, ls->ctok, 0);
1864 ls->line = save_line;
1865 if (ls->ctok->type == NEWLINE)
1866 break;
1867 }
1868 goto handle_exit3;
1869 #else
1870 if (ls->flags & WARN_PRAGMA)
1871 warning(l, "#pragma ignored "
1872 "and not dumped");
1873 goto handle_warp_ign;
1874 #endif
1875 }
1876 if (!(ls->flags & HANDLE_PRAGMA))
1877 goto handle_warp_ign;
1878 handle_pragma(ls);
1879 goto handle_exit;
1880 } else if (!strcmp(ls->ctok->name, "error")) {
1881 ret = 1;
1882 handle_error(ls);
1883 goto handle_exit;
1884 } else if (!strcmp(ls->ctok->name, "line")) {
1885 ret = handle_line(ls, save_flags);
1886 goto handle_exit;
1887 } else if ((ls->flags & HANDLE_ASSERTIONS)
1888 && !strcmp(ls->ctok->name, "assert")) {
1889 ret = handle_assert(ls);
1890 goto handle_exit;
1891 } else if ((ls->flags & HANDLE_ASSERTIONS)
1892 && !strcmp(ls->ctok->name, "unassert")) {
1893 ret = handle_unassert(ls);
1894 goto handle_exit;
1895 }
1896 } else {
1897 if (!strcmp(ls->ctok->name, "else")) {
1898 if (condfval(ls->ifnest - 1)
1899 && (ls->flags & WARN_STANDARD)) {
1900 warning(l, "rogue #else in code "
1901 "compiled out");
1902 }
1903 if (ls->condnest == ls->ifnest - 1) {
1904 if (!ls->condmet) ls->condcomp = 1;
1905 }
1906 condfset(ls->ifnest - 1);
1907 if (ls->ifnest == 1) protect_detect.state = 0;
1908 goto handle_warp;
1909 } else if (!strcmp(ls->ctok->name, "elif")) {
1910 if (condfval(ls->ifnest - 1)
1911 && (ls->flags & WARN_STANDARD)) {
1912 warning(l, "rogue #elif in code "
1913 "compiled out");
1914 }
1915 if (ls->condnest != ls->ifnest - 1
1916 || ls->condmet)
1917 goto handle_warp_ign;
1918 if (ls->ifnest == 1) protect_detect.state = 0;
1919 ret = handle_if(ls);
1920 if (ret > 0) {
1921 ls->condcomp = 1;
1922 ls->condmet = 1;
1923 ret = 0;
1924 } else if (ret < 0) ret = 1;
1925 goto handle_exit;
1926 } else if (!strcmp(ls->ctok->name, "endif")) {
1927 if ((-- ls->ifnest) == ls->condnest) {
1928 if (ls->ifnest == 0 &&
1929 protect_detect.state == 2)
1930 protect_detect.state = 3;
1931 ls->condcomp = 1;
1932 }
1933 goto handle_warp;
1934 } else if (!strcmp(ls->ctok->name, "if")
1935 || !strcmp(ls->ctok->name, "ifdef")
1936 || !strcmp(ls->ctok->name, "ifndef")) {
1937 if ((++ ls->ifnest) > 63) goto too_many_if;
1938 condfclr(ls->ifnest - 1);
1939 }
1940 goto handle_warp_ign;
1941 }
1942 /*
1943 * Unrecognized directive. We emit either an error or
1944 * an annoying warning, depending on a command-line switch.
1945 */
1946 if (ls->flags & FAIL_SHARP) {
1947 error(l, "unknown cpp directive '#%s'",
1948 ls->ctok->name);
1949 goto handle_warp_ign;
1950 } else {
1951 struct token u;
1952
1953 u.type = sharp_type;
1954 u.line = l;
1955 ls->flags = save_flags;
1956 print_token(ls, &u, 0);
1957 print_token(ls, ls->ctok, 0);
1958 if (ls->flags & WARN_ANNOYING) {
1959 warning(l, "rogue '#' dumped");
1960 }
1961 }
1962 }
1963 return 1;
1964
1965 handle_warp_ign:
1966 while (!next_token(ls)) if (ls->ctok->type == NEWLINE) break;
1967 goto handle_exit;
1968 handle_warp:
1969 while (!next_token(ls)) {
1970 if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) {
1971 warning(l, "trailing garbage in "
1972 "preprocessing directive");
1973 }
1974 if (ls->ctok->type == NEWLINE) break;
1975 }
1976 handle_exit:
1977 if (!(ls->flags & LEXER)) put_char(ls, '\n');
1978 handle_exit3:
1979 if (protect_detect.state == 1) {
1980 protect_detect.state = 0;
1981 } else if (protect_detect.state == -1) {
1982 /* just after the #include */
1983 protect_detect.state = 1;
1984 }
1985 handle_exit2:
1986 ls->flags = save_flags;
1987 return ret;
1988 too_many_if:
1989 error(l, "too many levels of conditional inclusion (max 63)");
1990 ret = 1;
1991 goto handle_warp;
1992 #undef condfset
1993 #undef condfclr
1994 #undef condfval
1995 }
1996
1997 /*
1998 * This is the main entry function. It maintains count of #, and call the
1999 * appropriate functions when it encounters a cpp directive or a macro
2000 * name.
2001 * return value: positive on error; CPPERR_EOF means "end of input reached"
2002 */
cpp(struct lexer_state * ls)2003 int cpp(struct lexer_state *ls)
2004 {
2005 int r = 0;
2006
2007 while (next_token(ls)) {
2008 if (protect_detect.state == 3) {
2009 /*
2010 * At that point, protect_detect.ff->protect might
2011 * be non-zero, if the file has been recursively
2012 * included, and a guardian detected.
2013 */
2014 if (!protect_detect.ff->protect) {
2015 /* Cool ! A new guardian has been detected. */
2016 protect_detect.ff->protect =
2017 protect_detect.macro;
2018 } else if (protect_detect.macro) {
2019 /* We found a guardian but an old one. */
2020 freemem(protect_detect.macro);
2021 }
2022 protect_detect.macro = 0;
2023 }
2024 if (ls->ifnest) {
2025 error(ls->line, "unterminated #if construction "
2026 "(depth %ld)", ls->ifnest);
2027 r = CPPERR_NEST;
2028 }
2029 if (ls_depth == 0) return CPPERR_EOF;
2030 close_input(ls);
2031 if (!(ls->flags & LEXER) && !ls->ltwnl) {
2032 put_char(ls, '\n');
2033 ls->ltwnl = 1;
2034 }
2035 pop_file_context(ls);
2036 ls->oline ++;
2037 if (enter_file(ls, ls->flags)) {
2038 ls->ctok->type = NEWLINE;
2039 ls->ltwnl = 1;
2040 break;
2041 }
2042 }
2043 if (!(ls->ltwnl && (ls->ctok->type == SHARP
2044 || ls->ctok->type == DIG_SHARP))
2045 && protect_detect.state == 1 && !ttWHI(ls->ctok->type)) {
2046 /* the first non-whitespace token encountered is not
2047 a sharp introducing a cpp directive */
2048 protect_detect.state = 0;
2049 }
2050 if (protect_detect.state == 3 && !ttWHI(ls->ctok->type)) {
2051 /* a non-whitespace token encountered after the #endif */
2052 protect_detect.state = 0;
2053 }
2054 if (ls->condcomp) {
2055 if (ls->ltwnl && (ls->ctok->type == SHARP
2056 || ls->ctok->type == DIG_SHARP)) {
2057 int x = handle_cpp(ls, ls->ctok->type);
2058
2059 ls->ltwnl = 1;
2060 return r ? r : x;
2061 }
2062 if (ls->ctok->type == NAME) {
2063 struct macro *m;
2064
2065 if ((m = get_macro(ls->ctok->name)) != 0) {
2066 int x;
2067
2068 x = substitute_macro(ls, m, 0, 1, 0,
2069 ls->ctok->line);
2070 if (!(ls->flags & LEXER))
2071 garbage_collect(ls->gf);
2072 return r ? r : x;
2073 }
2074 if (!(ls->flags & LEXER))
2075 print_token(ls, ls->ctok, 0);
2076 }
2077 } else {
2078 if (ls->ltwnl && (ls->ctok->type == SHARP
2079 || ls->ctok->type == DIG_SHARP)) {
2080 int x = handle_cpp(ls, ls->ctok->type);
2081
2082 ls->ltwnl = 1;
2083 return r ? r : x;
2084 }
2085 }
2086 if (ls->ctok->type == NEWLINE) ls->ltwnl = 1;
2087 else if (!ttWHI(ls->ctok->type)) ls->ltwnl = 0;
2088 return r ? r : -1;
2089 }
2090
2091 #ifndef STAND_ALONE
2092 /*
2093 * llex() and lex() are the lexing functions, when the preprocessor is
2094 * linked to another code. llex() should be called only by lex().
2095 */
llex(struct lexer_state * ls)2096 static int llex(struct lexer_state *ls)
2097 {
2098 struct token_fifo *tf = ls->output_fifo;
2099 int r;
2100
2101 if (tf->nt != 0) {
2102 if (tf->art < tf->nt) {
2103 #ifdef INMACRO_FLAG
2104 if (!ls->inmacro) {
2105 ls->inmacro = 1;
2106 ls->macro_count ++;
2107 }
2108 #endif
2109 ls->ctok = tf->t + (tf->art ++);
2110 if (ls->ctok->type > DIGRAPH_TOKENS
2111 && ls->ctok->type < DIGRAPH_TOKENS_END) {
2112 ls->ctok->type = undig(ls->ctok->type);
2113 }
2114 return 0;
2115 } else {
2116 #ifdef INMACRO_FLAG
2117 ls->inmacro = 0;
2118 #endif
2119 freemem(tf->t);
2120 tf->art = tf->nt = 0;
2121 garbage_collect(ls->gf);
2122 ls->ctok = ls->save_ctok;
2123 }
2124 }
2125 r = cpp(ls);
2126 if (ls->ctok->type > DIGRAPH_TOKENS
2127 && ls->ctok->type < LAST_MEANINGFUL_TOKEN) {
2128 ls->ctok->type = undig(ls->ctok->type);
2129 }
2130 if (r > 0) return r;
2131 if (r < 0) return 0;
2132 return llex(ls);
2133 }
2134
2135 /*
2136 * lex() reads the next token from the processed stream and stores it
2137 * into ls->ctok.
2138 * return value: non zero on error (including CPPERR_EOF, which is not
2139 * quite an error)
2140 */
lex(struct lexer_state * ls)2141 int lex(struct lexer_state *ls)
2142 {
2143 int r;
2144
2145 do {
2146 r = llex(ls);
2147 #ifdef SEMPER_FIDELIS
2148 } while (!r && !ls->condcomp);
2149 #else
2150 } while (!r && (!ls->condcomp || (ttWHI(ls->ctok->type) &&
2151 (!(ls->flags & LINE_NUM) || ls->ctok->type != NEWLINE))));
2152 #endif
2153 return r;
2154 }
2155 #endif
2156
2157 /*
2158 * check_cpp_errors() must be called when the end of input is reached;
2159 * it checks pending errors due to truncated constructs (actually none,
2160 * this is reserved for future evolutions).
2161 */
check_cpp_errors(struct lexer_state * ls)2162 int check_cpp_errors(struct lexer_state *ls)
2163 {
2164 if (ls->flags & KEEP_OUTPUT) {
2165 put_char(ls, '\n');
2166 }
2167 if (emit_dependencies) fputc('\n', emit_output);
2168 #ifndef NO_UCPP_BUF
2169 if (!(ls->flags & LEXER)) {
2170 flush_output(ls);
2171 }
2172 #endif
2173 if ((ls->flags & WARN_TRIGRAPHS) && ls->count_trigraphs)
2174 warning(0, "%ld trigraph(s) encountered", ls->count_trigraphs);
2175 return 0;
2176 }
2177
2178 /*
2179 * init_cpp() initializes static tables inside ucpp. It needs not be
2180 * called more than once.
2181 */
init_cpp(void)2182 void init_cpp(void)
2183 {
2184 init_cppm();
2185 }
2186
2187 /*
2188 * (re)init the global tables.
2189 * If standard_assertions is non 0, init the assertions table.
2190 */
init_tables(int with_assertions)2191 void init_tables(int with_assertions)
2192 {
2193 time_t t;
2194 struct tm *ct;
2195
2196 init_buf_lexer_state(&dsharp_lexer, 0);
2197 #ifdef PRAGMA_TOKENIZE
2198 init_buf_lexer_state(&tokenize_lexer, 0);
2199 #endif
2200 time(&t);
2201 ct = localtime(&t);
2202 #ifdef NOSTRFTIME
2203 /* we have a quite old compiler, that does not know the
2204 (standard since 1990) strftime() function. */
2205 {
2206 char *c = asctime(ct);
2207
2208 compile_time[0] = '"';
2209 mmv(compile_time + 1, c + 11, 8);
2210 compile_time[9] = '"';
2211 compile_time[10] = 0;
2212 compile_date[0] = '"';
2213 mmv(compile_date + 1, c + 4, 7);
2214 mmv(compile_date + 8, c + 20, 4);
2215 compile_date[12] = '"';
2216 compile_date[13] = 0;
2217 }
2218 #else
2219 strftime(compile_time, 12, "\"%H:%M:%S\"", ct);
2220 strftime(compile_date, 24, "\"%b %d %Y\"", ct);
2221 #endif
2222 init_macros();
2223 if (with_assertions) init_assertions();
2224 init_found_files();
2225 }
2226
2227 /*
2228 * Resets the include path.
2229 */
init_include_path(char * incpath[])2230 void init_include_path(char *incpath[])
2231 {
2232 if (include_path_nb) {
2233 size_t i;
2234
2235 for (i = 0; i < include_path_nb; i ++)
2236 freemem(include_path[i]);
2237 freemem(include_path);
2238 include_path_nb = 0;
2239 }
2240 if (incpath) {
2241 int i;
2242
2243 for (i = 0; incpath[i]; i ++)
2244 aol(include_path, include_path_nb,
2245 sdup(incpath[i]), INCPATH_MEMG);
2246 }
2247 }
2248
2249 /*
2250 * add_incpath() adds "path" to the standard include path.
2251 */
add_incpath(char * path)2252 void add_incpath(char *path)
2253 {
2254 aol(include_path, include_path_nb, sdup(path), INCPATH_MEMG);
2255 }
2256
2257 /*
2258 * This function cleans the memory. It should release all allocated
2259 * memory structures and may be called even if the current pre-processing
2260 * is not finished or reported an error.
2261 */
wipeout()2262 void wipeout()
2263 {
2264 struct lexer_state ls;
2265
2266 if (include_path_nb > 0) {
2267 size_t i;
2268
2269 for (i = 0; i < include_path_nb; i ++)
2270 freemem(include_path[i]);
2271 freemem(include_path);
2272 include_path = 0;
2273 include_path_nb = 0;
2274 }
2275 if (current_filename) freemem(current_filename);
2276 current_filename = 0;
2277 current_long_filename = 0;
2278 current_incdir = -1;
2279 protect_detect.state = 0;
2280 if (protect_detect.macro) freemem(protect_detect.macro);
2281 protect_detect.macro = 0;
2282 protect_detect.ff = 0;
2283 init_lexer_state(&ls);
2284 while (ls_depth > 0) pop_file_context(&ls);
2285 free_lexer_state(&ls);
2286 free_lexer_state(&dsharp_lexer);
2287 #ifdef PRAGMA_TOKENIZE
2288 free_lexer_state(&tokenize_lexer);
2289 #endif
2290 if (found_files_init_done) HTT_kill(&found_files);
2291 found_files_init_done = 0;
2292 if (found_files_sys_init_done) HTT_kill(&found_files_sys);
2293 found_files_sys_init_done = 0;
2294 wipe_macros();
2295 wipe_assertions();
2296 }
2297
2298 #ifdef STAND_ALONE
2299 /*
2300 * print some help
2301 */
usage(char * command_name)2302 static void usage(char *command_name)
2303 {
2304 fprintf(stderr,
2305 "Usage: %s [options] [file]\n"
2306 "language options:\n"
2307 " -C keep comments in output\n"
2308 " -s keep '#' when no cpp directive is recognized\n"
2309 " -l do not emit line numbers\n"
2310 " -lg emit gcc-like line numbers\n"
2311 " -CC disable C++-like comments\n"
2312 " -a, -na, -a0 handle (or not) assertions\n"
2313 " -V disable macros with extra arguments\n"
2314 " -u understand UTF-8 in source\n"
2315 " -X enable -a, -u and -Y\n"
2316 " -c90 mimic C90 behaviour\n"
2317 " -t disable trigraph support\n"
2318 "warning options:\n"
2319 " -wt emit a final warning when trigaphs are encountered\n"
2320 " -wtt emit warnings for each trigaph encountered\n"
2321 " -wa emit warnings that are usually useless\n"
2322 " -w0 disable standard warnings\n"
2323 "directory options:\n"
2324 " -I directory add 'directory' before the standard include path\n"
2325 " -J directory add 'directory' after the standard include path\n"
2326 " -zI do not use the standard include path\n"
2327 " -M emit Makefile-like dependencies instead of normal "
2328 "output\n"
2329 " -Ma emit also dependancies for system files\n"
2330 " -o file store output in file\n"
2331 "macro and assertion options:\n"
2332 " -Dmacro predefine 'macro'\n"
2333 " -Dmacro=def predefine 'macro' with 'def' content\n"
2334 " -Umacro undefine 'macro'\n"
2335 " -Afoo(bar) assert foo(bar)\n"
2336 " -Bfoo(bar) unassert foo(bar)\n"
2337 " -Y predefine system-dependant macros\n"
2338 " -Z do not predefine special macros\n"
2339 " -d emit defined macros\n"
2340 " -e emit assertions\n"
2341 "misc options:\n"
2342 " -v print version number and settings\n"
2343 " -h show this help\n",
2344 command_name);
2345 }
2346
2347 /*
2348 * print version and compile-time settings
2349 */
version(void)2350 static void version(void)
2351 {
2352 size_t i;
2353
2354 fprintf(stderr, "ucpp version %d.%d\n", VERS_MAJ, VERS_MIN);
2355 fprintf(stderr, "search path:\n");
2356 for (i = 0; i < include_path_nb; i ++)
2357 fprintf(stderr, " %s\n", include_path[i]);
2358 }
2359
2360 /*
2361 * parse_opt() initializes many things according to the command-line
2362 * options.
2363 * Return values:
2364 * 0 on success
2365 * 1 on semantic error (redefinition of a special macro, for instance)
2366 * 2 on syntaxic error (unknown options for instance)
2367 */
parse_opt(int argc,char * argv[],struct lexer_state * ls)2368 static int parse_opt(int argc, char *argv[], struct lexer_state *ls)
2369 {
2370 int i, ret = 0;
2371 char *filename = 0;
2372 int with_std_incpath = 1;
2373 int print_version = 0, print_defs = 0, print_asserts = 0;
2374 int system_macros = 0, standard_assertions = 0;
2375
2376 init_lexer_state(ls);
2377 ls->flags = DEFAULT_CPP_FLAGS;
2378 emit_output = ls->output = stdout;
2379 for (i = 1; i < argc; i ++) if (argv[i][0] == '-') {
2380 if (!strcmp(argv[i], "-h")) {
2381 return 2;
2382 } else if (!strcmp(argv[i], "-C")) {
2383 ls->flags &= ~DISCARD_COMMENTS;
2384 } else if (!strcmp(argv[i], "-CC")) {
2385 ls->flags &= ~CPLUSPLUS_COMMENTS;
2386 } else if (!strcmp(argv[i], "-a")) {
2387 ls->flags |= HANDLE_ASSERTIONS;
2388 } else if (!strcmp(argv[i], "-na")) {
2389 ls->flags |= HANDLE_ASSERTIONS;
2390 standard_assertions = 0;
2391 } else if (!strcmp(argv[i], "-a0")) {
2392 ls->flags &= ~HANDLE_ASSERTIONS;
2393 } else if (!strcmp(argv[i], "-V")) {
2394 ls->flags &= ~MACRO_VAARG;
2395 } else if (!strcmp(argv[i], "-u")) {
2396 ls->flags |= UTF8_SOURCE;
2397 } else if (!strcmp(argv[i], "-X")) {
2398 ls->flags |= HANDLE_ASSERTIONS;
2399 ls->flags |= UTF8_SOURCE;
2400 system_macros = 1;
2401 } else if (!strcmp(argv[i], "-c90")) {
2402 ls->flags &= ~MACRO_VAARG;
2403 ls->flags &= ~CPLUSPLUS_COMMENTS;
2404 c99_compliant = 0;
2405 c99_hosted = -1;
2406 } else if (!strcmp(argv[i], "-t")) {
2407 ls->flags &= ~HANDLE_TRIGRAPHS;
2408 } else if (!strcmp(argv[i], "-wt")) {
2409 ls->flags |= WARN_TRIGRAPHS;
2410 } else if (!strcmp(argv[i], "-wtt")) {
2411 ls->flags |= WARN_TRIGRAPHS_MORE;
2412 } else if (!strcmp(argv[i], "-wa")) {
2413 ls->flags |= WARN_ANNOYING;
2414 } else if (!strcmp(argv[i], "-w0")) {
2415 ls->flags &= ~WARN_STANDARD;
2416 ls->flags &= ~WARN_PRAGMA;
2417 } else if (!strcmp(argv[i], "-s")) {
2418 ls->flags &= ~FAIL_SHARP;
2419 } else if (!strcmp(argv[i], "-l")) {
2420 ls->flags &= ~LINE_NUM;
2421 } else if (!strcmp(argv[i], "-lg")) {
2422 ls->flags |= GCC_LINE_NUM;
2423 } else if (!strcmp(argv[i], "-M")) {
2424 ls->flags &= ~KEEP_OUTPUT;
2425 emit_dependencies = 1;
2426 } else if (!strcmp(argv[i], "-Ma")) {
2427 ls->flags &= ~KEEP_OUTPUT;
2428 emit_dependencies = 2;
2429 } else if (!strcmp(argv[i], "-Y")) {
2430 system_macros = 1;
2431 } else if (!strcmp(argv[i], "-Z")) {
2432 no_special_macros = 1;
2433 } else if (!strcmp(argv[i], "-d")) {
2434 ls->flags &= ~KEEP_OUTPUT;
2435 print_defs = 1;
2436 } else if (!strcmp(argv[i], "-e")) {
2437 ls->flags &= ~KEEP_OUTPUT;
2438 print_asserts = 1;
2439 } else if (!strcmp(argv[i], "-zI")) {
2440 with_std_incpath = 0;
2441 } else if (!strcmp(argv[i], "-I") || !strcmp(argv[i], "-J")) {
2442 i ++;
2443 } else if (!strcmp(argv[i], "-o")) {
2444 if ((++ i) >= argc) {
2445 error(-1, "missing filename after -o");
2446 return 2;
2447 }
2448 if (argv[i][0] == '-' && argv[i][1] == 0) {
2449 emit_output = ls->output = stdout;
2450 } else {
2451 ls->output = fopen(argv[i], "w");
2452 if (!ls->output) {
2453 error(-1, "failed to open for "
2454 "writing: %s", argv[i]);
2455 return 2;
2456 }
2457 emit_output = ls->output;
2458 }
2459 } else if (!strcmp(argv[i], "-v")) {
2460 print_version = 1;
2461 } else if (argv[i][1] != 'I' && argv[i][1] != 'J'
2462 && argv[i][1] != 'D' && argv[i][1] != 'U'
2463 && argv[i][1] != 'A' && argv[i][1] != 'B')
2464 warning(-1, "unknown option '%s'", argv[i]);
2465 } else {
2466 if (filename != 0) {
2467 error(-1, "spurious filename '%s'", argv[i]);
2468 return 2;
2469 }
2470 filename = argv[i];
2471 }
2472 init_tables(ls->flags & HANDLE_ASSERTIONS);
2473 init_include_path(0);
2474 if (filename) {
2475 #ifdef UCPP_MMAP
2476 FILE *f = fopen_mmap_file(filename);
2477
2478 ls->input = 0;
2479 if (f) set_input_file(ls, f);
2480 #else
2481 ls->input = fopen(filename, "r");
2482 #endif
2483 if (!ls->input) {
2484 error(-1, "file '%s' not found", filename);
2485 return 1;
2486 }
2487 #ifdef NO_LIBC_BUF
2488 setbuf(ls->input, 0);
2489 #endif
2490 set_init_filename(filename, 1);
2491 } else {
2492 ls->input = stdin;
2493 set_init_filename("<stdin>", 0);
2494 }
2495 for (i = 1; i < argc; i ++)
2496 if (argv[i][0] == '-' && argv[i][1] == 'I')
2497 add_incpath(argv[i][2] ? argv[i] + 2 : argv[i + 1]);
2498 if (system_macros) for (i = 0; system_macros_def[i]; i ++)
2499 ret = ret || define_macro(ls, system_macros_def[i]);
2500 for (i = 1; i < argc; i ++)
2501 if (argv[i][0] == '-' && argv[i][1] == 'D')
2502 ret = ret || define_macro(ls, argv[i] + 2);
2503 for (i = 1; i < argc; i ++)
2504 if (argv[i][0] == '-' && argv[i][1] == 'U')
2505 ret = ret || undef_macro(ls, argv[i] + 2);
2506 if (ls->flags & HANDLE_ASSERTIONS) {
2507 if (standard_assertions)
2508 for (i = 0; system_assertions_def[i]; i ++)
2509 make_assertion(system_assertions_def[i]);
2510 for (i = 1; i < argc; i ++)
2511 if (argv[i][0] == '-' && argv[i][1] == 'A')
2512 ret = ret || make_assertion(argv[i] + 2);
2513 for (i = 1; i < argc; i ++)
2514 if (argv[i][0] == '-' && argv[i][1] == 'B')
2515 ret = ret || destroy_assertion(argv[i] + 2);
2516 } else {
2517 for (i = 1; i < argc; i ++)
2518 if (argv[i][0] == '-'
2519 && (argv[i][1] == 'A' || argv[i][1] == 'B'))
2520 warning(-1, "assertions disabled");
2521 }
2522 if (with_std_incpath) {
2523 for (i = 0; include_path_std[i]; i ++)
2524 add_incpath(include_path_std[i]);
2525 }
2526 for (i = 1; i < argc; i ++)
2527 if (argv[i][0] == '-' && argv[i][1] == 'J')
2528 add_incpath(argv[i][2] ? argv[i] + 2 : argv[i + 1]);
2529
2530 if (print_version) {
2531 version();
2532 return 1;
2533 }
2534 if (print_defs) {
2535 print_defines();
2536 emit_defines = 1;
2537 }
2538 if (print_asserts && (ls->flags & HANDLE_ASSERTIONS)) {
2539 print_assertions();
2540 emit_assertions = 1;
2541 }
2542 return ret;
2543 }
2544
main(int argc,char * argv[])2545 int main(int argc, char *argv[])
2546 {
2547 struct lexer_state ls;
2548 int r, fr = 0;
2549
2550 init_cpp();
2551 if ((r = parse_opt(argc, argv, &ls)) != 0) {
2552 if (r == 2) usage(argv[0]);
2553 return EXIT_FAILURE;
2554 }
2555 enter_file(&ls, ls.flags);
2556 while ((r = cpp(&ls)) < CPPERR_EOF) fr = fr || (r > 0);
2557 fr = fr || check_cpp_errors(&ls);
2558 free_lexer_state(&ls);
2559 wipeout();
2560 #ifdef MEM_DEBUG
2561 report_leaks();
2562 #endif
2563 return fr ? EXIT_FAILURE : EXIT_SUCCESS;
2564 }
2565 #endif
2566