1 /*
2 * C and T preprocessor, and integrated lexer
3 * (c) Thomas Pornin 1999 - 2002
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. The name of the authors may not be used to endorse or promote
14 * products derived from this software without specific prior written
15 * permission.
16 *
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
23 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 */
30
31 #define VERS_MAJ 1
32 #define VERS_MIN 3
33 /* uncomment the following if you cannot set it with a compiler flag */
34 /* #define STAND_ALONE */
35
36 #include "tune.h"
37 #include <stdio.h>
38 #include <string.h>
39 #include <stdlib.h>
40 #include <stdarg.h>
41 #include <setjmp.h>
42 #include <stddef.h>
43 #include <limits.h>
44 #include <time.h>
45 #include "ucppi.h"
46 #include "mem.h"
47 #include "nhash.h"
48 #ifdef UCPP_MMAP
49 #include <unistd.h>
50 #include <sys/types.h>
51 #include <sys/mman.h>
52 #include <fcntl.h>
53 #endif
54
55 /*
56 * The standard path where includes are looked for.
57 */
58 #ifdef STAND_ALONE
59 static char *include_path_std[] = { STD_INCLUDE_PATH, 0 };
60 #endif
61 static char **include_path;
62 static size_t include_path_nb = 0;
63
64 int no_special_macros = 0;
65 int emit_dependencies = 0, emit_defines = 0, emit_assertions = 0;
66 FILE *emit_output;
67
68 #ifdef STAND_ALONE
69 static char *system_macros_def[] = { STD_MACROS, 0 };
70 static char *system_assertions_def[] = { STD_ASSERT, 0 };
71 #endif
72
73 char *current_filename = 0, *current_long_filename = 0;
74 static int current_incdir = -1;
75
76 #ifndef NO_UCPP_ERROR_FUNCTIONS
77 /*
78 * "ouch" is the name for an internal ucpp error. If AUDIT is not defined,
79 * no code calling this function will be generated; a "ouch" may still be
80 * emitted by getmem() (in mem.c) if MEM_CHECK is defined, but this "ouch"
81 * does not use this function.
82 */
ucpp_ouch(char * fmt,...)83 void ucpp_ouch(char *fmt, ...)
84 {
85 va_list ap;
86
87 va_start(ap, fmt);
88 fprintf(stderr, "%s: ouch, ", current_filename);
89 vfprintf(stderr, fmt, ap);
90 fprintf(stderr, "\n");
91 va_end(ap);
92 die();
93 }
94
95 /*
96 * report an error, with current_filename, line, and printf-like syntax
97 */
ucpp_error(long line,char * fmt,...)98 void ucpp_error(long line, char *fmt, ...)
99 {
100 va_list ap;
101
102 va_start(ap, fmt);
103 if (line > 0)
104 fprintf(stderr, "%s: line %ld: ", current_filename, line);
105 else if (line == 0) fprintf(stderr, "%s: ", current_filename);
106 vfprintf(stderr, fmt, ap);
107 fprintf(stderr, "\n");
108 if (line >= 0) {
109 struct stack_context *sc = report_context();
110 size_t i;
111
112 for (i = 0; sc[i].line >= 0; i ++)
113 fprintf(stderr, "\tincluded from %s:%ld\n",
114 sc[i].long_name ? sc[i].long_name : sc[i].name,
115 sc[i].line);
116 freemem(sc);
117 }
118 va_end(ap);
119 }
120
121 /*
122 * like error(), with the mention "warning"
123 */
ucpp_warning(long line,char * fmt,...)124 void ucpp_warning(long line, char *fmt, ...)
125 {
126 va_list ap;
127
128 va_start(ap, fmt);
129 if (line > 0)
130 fprintf(stderr, "%s: warning: line %ld: ",
131 current_filename, line);
132 else if (line == 0)
133 fprintf(stderr, "%s: warning: ", current_filename);
134 else fprintf(stderr, "warning: ");
135 vfprintf(stderr, fmt, ap);
136 fprintf(stderr, "\n");
137 if (line >= 0) {
138 struct stack_context *sc = report_context();
139 size_t i;
140
141 for (i = 0; sc[i].line >= 0; i ++)
142 fprintf(stderr, "\tincluded from %s:%ld\n",
143 sc[i].long_name ? sc[i].long_name : sc[i].name,
144 sc[i].line);
145 freemem(sc);
146 }
147 va_end(ap);
148 }
149 #endif /* NO_UCPP_ERROR_FUNCTIONS */
150
151 /*
152 * Some memory allocations are manually garbage-collected; essentially,
153 * strings duplicated in the process of macro replacement. Each such
154 * string is referenced in the garbage_fifo, which is cleared when all
155 * nested macros have been resolved.
156 */
157
158 struct garbage_fifo {
159 char **garbage;
160 size_t ngarb, memgarb;
161 };
162
163 /*
164 * throw_away() marks a string to be collected later
165 */
throw_away(struct garbage_fifo * gf,char * n)166 void throw_away(struct garbage_fifo *gf, char *n)
167 {
168 wan(gf->garbage, gf->ngarb, n, gf->memgarb);
169 }
170
171 /*
172 * free marked strings
173 */
garbage_collect(struct garbage_fifo * gf)174 void garbage_collect(struct garbage_fifo *gf)
175 {
176 size_t i;
177
178 for (i = 0; i < gf->ngarb; i ++) freemem(gf->garbage[i]);
179 gf->ngarb = 0;
180 }
181
init_garbage_fifo(struct garbage_fifo * gf)182 static void init_garbage_fifo(struct garbage_fifo *gf)
183 {
184 gf->garbage = getmem((gf->memgarb = GARBAGE_LIST_MEMG)
185 * sizeof(char *));
186 gf->ngarb = 0;
187 }
188
free_garbage_fifo(struct garbage_fifo * gf)189 static void free_garbage_fifo(struct garbage_fifo *gf)
190 {
191 garbage_collect(gf);
192 freemem(gf->garbage);
193 freemem(gf);
194 }
195
196 /*
197 * order is important: it must match the token-constants declared as an
198 * enum in the header file.
199 */
200 char *operators_name[] = {
201 " ", "\n", " ",
202 "0000", "name", "bunch", "pragma", "context",
203 "\"dummy string\"", "'dummy char'",
204 "/", "/=", "-", "--", "-=", "->", "+", "++", "+=", "<", "<=", "<<",
205 "<<=", ">", ">=", ">>", ">>=", "=", "==",
206 #ifdef CAST_OP
207 "=>",
208 #endif
209 "~", "!=", "&", "&&", "&=", "|", "||", "|=", "%", "%=", "*", "*=",
210 "^", "^=", "!",
211 "{", "}", "[", "]", "(", ")", ",", "?", ";",
212 ":", ".", "...", "#", "##", " ", "ouch", "<:", ":>", "<%", "%>",
213 "%:", "%:%:"
214 };
215
216 /* the ascii representation of a token */
217 #ifdef SEMPER_FIDELIS
218 #define tname(x) (ttWHI((x).type) ? " " : S_TOKEN((x).type) \
219 ? (x).name : operators_name[(x).type])
220 #else
221 #define tname(x) (S_TOKEN((x).type) ? (x).name \
222 : operators_name[(x).type])
223 #endif
224
token_name(struct token * t)225 char *token_name(struct token *t)
226 {
227 return tname(*t);
228 }
229
230 /*
231 * To speed up deeply nested and repeated inclusions, we:
232 * -- use a hash table to remember where we found each file
233 * -- remember when the file is protected by a #ifndef/#define/#endif
234 * construction; we can then avoid including several times a file
235 * when this is not necessary.
236 * -- remember in which directory, in the include path, the file was found.
237 */
238 struct found_file {
239 hash_item_header head; /* first field */
240 char *name;
241 char *protect;
242 };
243
244 /*
245 * For files from system include path.
246 */
247 struct found_file_sys {
248 hash_item_header head; /* first field */
249 struct found_file *rff;
250 int incdir;
251 };
252
253 static HTT found_files, found_files_sys;
254 static int found_files_init_done = 0, found_files_sys_init_done = 0;
255
new_found_file(void)256 static struct found_file *new_found_file(void)
257 {
258 struct found_file *ff = getmem(sizeof(struct found_file));
259
260 ff->name = 0;
261 ff->protect = 0;
262 return ff;
263 }
264
del_found_file(void * m)265 static void del_found_file(void *m)
266 {
267 struct found_file *ff = (struct found_file *)m;
268
269 if (ff->name) freemem(ff->name);
270 if (ff->protect) freemem(ff->protect);
271 freemem(ff);
272 }
273
new_found_file_sys(void)274 static struct found_file_sys *new_found_file_sys(void)
275 {
276 struct found_file_sys *ffs = getmem(sizeof(struct found_file_sys));
277
278 ffs->rff = 0;
279 ffs->incdir = -1;
280 return ffs;
281 }
282
del_found_file_sys(void * m)283 static void del_found_file_sys(void *m)
284 {
285 struct found_file_sys *ffs = (struct found_file_sys *)m;
286
287 freemem(ffs);
288 }
289
290 /*
291 * To keep up with the #ifndef/#define/#endif protection mechanism
292 * detection.
293 */
294 struct protect protect_detect;
295 static struct protect *protect_detect_stack = 0;
296
set_init_filename(char * x,int real_file)297 void set_init_filename(char *x, int real_file)
298 {
299 if (current_filename) freemem(current_filename);
300 current_filename = sdup(x);
301 current_long_filename = 0;
302 current_incdir = -1;
303 if (real_file) {
304 protect_detect.macro = 0;
305 protect_detect.state = 1;
306 protect_detect.ff = new_found_file();
307 protect_detect.ff->name = sdup(x);
308 HTT_put(&found_files, protect_detect.ff, x);
309 } else {
310 protect_detect.state = 0;
311 }
312 }
313
init_found_files(void)314 static void init_found_files(void)
315 {
316 if (found_files_init_done) HTT_kill(&found_files);
317 HTT_init(&found_files, del_found_file);
318 found_files_init_done = 1;
319 if (found_files_sys_init_done) HTT_kill(&found_files_sys);
320 HTT_init(&found_files_sys, del_found_file_sys);
321 found_files_sys_init_done = 1;
322 }
323
324 /*
325 * Set the lexer state at the beginning of a file.
326 */
reinit_lexer_state(struct lexer_state * ls,int wb)327 static void reinit_lexer_state(struct lexer_state *ls, int wb)
328 {
329 #ifndef NO_UCPP_BUF
330 ls->input_buf = wb ? getmem(INPUT_BUF_MEMG + 1) : 0;
331 #ifdef UCPP_MMAP
332 ls->from_mmap = 0;
333 #endif
334 #endif
335 ls->input = 0;
336 ls->ebuf = ls->pbuf = 0;
337 ls->nlka = 0;
338 ls->discard = 1;
339 ls->last = 0; /* we suppose '\n' is not 0 */
340 ls->line = 1;
341 ls->ltwnl = 1;
342 ls->oline = 1;
343 ls->pending_token = 0;
344 ls->cli = 0;
345 ls->copy_line[COPY_LINE_LENGTH - 1] = 0;
346 ls->ifnest = 0;
347 ls->condf[0] = ls->condf[1] = 0;
348 }
349
350 /*
351 * Initialize the struct lexer_state, with optional input and output buffers.
352 */
init_buf_lexer_state(struct lexer_state * ls,int wb)353 void init_buf_lexer_state(struct lexer_state *ls, int wb)
354 {
355 reinit_lexer_state(ls, wb);
356 #ifndef NO_UCPP_BUF
357 ls->output_buf = wb ? getmem(OUTPUT_BUF_MEMG) : 0;
358 #endif
359 ls->sbuf = 0;
360 ls->output_fifo = 0;
361
362 ls->ctok = getmem(sizeof(struct token));
363 ls->ctok->name = getmem(ls->tknl = TOKEN_NAME_MEMG);
364 ls->pending_token = 0;
365
366 ls->flags = 0;
367 ls->count_trigraphs = 0;
368 ls->gf = getmem(sizeof(struct garbage_fifo));
369 init_garbage_fifo(ls->gf);
370 ls->condcomp = 1;
371 ls->condnest = 0;
372 #ifdef INMACRO_FLAG
373 ls->inmacro = 0;
374 ls->macro_count = 0;
375 #endif
376 }
377
378 /*
379 * Initialize the (complex) struct lexer_state.
380 */
init_lexer_state(struct lexer_state * ls)381 void init_lexer_state(struct lexer_state *ls)
382 {
383 init_buf_lexer_state(ls, 1);
384 ls->input = 0;
385 }
386
387 /*
388 * Restore what is needed from a lexer_state. This is used for #include.
389 */
restore_lexer_state(struct lexer_state * ls,struct lexer_state * lsbak)390 static void restore_lexer_state(struct lexer_state *ls,
391 struct lexer_state *lsbak)
392 {
393 #ifndef NO_UCPP_BUF
394 freemem(ls->input_buf);
395 ls->input_buf = lsbak->input_buf;
396 #ifdef UCPP_MMAP
397 ls->from_mmap = lsbak->from_mmap;
398 ls->input_buf_sav = lsbak->input_buf_sav;
399 #endif
400 #endif
401 ls->input = lsbak->input;
402 ls->ebuf = lsbak->ebuf;
403 ls->pbuf = lsbak->pbuf;
404 ls->nlka = lsbak->nlka;
405 ls->discard = lsbak->discard;
406 ls->line = lsbak->line;
407 ls->oline = lsbak->oline;
408 ls->ifnest = lsbak->ifnest;
409 ls->condf[0] = lsbak->condf[0];
410 ls->condf[1] = lsbak->condf[1];
411 }
412
413 /*
414 * close input file operations on a struct lexer_state
415 */
close_input(struct lexer_state * ls)416 static void close_input(struct lexer_state *ls)
417 {
418 #ifdef UCPP_MMAP
419 if (ls->from_mmap) {
420 munmap((void *)ls->input_buf, ls->ebuf);
421 ls->from_mmap = 0;
422 ls->input_buf = ls->input_buf_sav;
423 }
424 #endif
425 if (ls->input) {
426 fclose(ls->input);
427 ls->input = 0;
428 }
429 }
430
431 /*
432 * file_context (and the two functions push_ and pop_) are used to save
433 * all that is needed when including a file.
434 */
435 static struct file_context {
436 struct lexer_state ls;
437 char *name, *long_name;
438 int incdir;
439 } *ls_stack;
440 static size_t ls_depth = 0;
441
push_file_context(struct lexer_state * ls)442 static void push_file_context(struct lexer_state *ls)
443 {
444 struct file_context fc;
445
446 fc.name = current_filename;
447 fc.long_name = current_long_filename;
448 fc.incdir = current_incdir;
449 mmv(&(fc.ls), ls, sizeof(struct lexer_state));
450 aol(ls_stack, ls_depth, fc, LS_STACK_MEMG);
451 ls_depth --;
452 aol(protect_detect_stack, ls_depth, protect_detect, LS_STACK_MEMG);
453 protect_detect.macro = 0;
454 }
455
pop_file_context(struct lexer_state * ls)456 static void pop_file_context(struct lexer_state *ls)
457 {
458 #ifdef AUDIT
459 if (ls_depth <= 0) ouch("prepare to meet thy creator");
460 #endif
461 close_input(ls);
462 restore_lexer_state(ls, &(ls_stack[-- ls_depth].ls));
463 if (protect_detect.macro) freemem(protect_detect.macro);
464 protect_detect = protect_detect_stack[ls_depth];
465 if (current_filename) freemem(current_filename);
466 current_filename = ls_stack[ls_depth].name;
467 current_long_filename = ls_stack[ls_depth].long_name;
468 current_incdir = ls_stack[ls_depth].incdir;
469 if (ls_depth == 0) {
470 freemem(ls_stack);
471 freemem(protect_detect_stack);
472 }
473 }
474
475 /*
476 * report_context() returns the list of successive includers of the
477 * current file, ending with a dummy entry with a negative line number.
478 * The caller is responsible for freeing the returned pointer.
479 */
report_context(void)480 struct stack_context *report_context(void)
481 {
482 struct stack_context *sc;
483 size_t i;
484
485 sc = getmem((ls_depth + 1) * sizeof(struct stack_context));
486 for (i = 0; i < ls_depth; i ++) {
487 sc[i].name = ls_stack[ls_depth - i - 1].name;
488 sc[i].long_name = ls_stack[ls_depth - i - 1].long_name;
489 sc[i].line = ls_stack[ls_depth - i - 1].ls.line - 1;
490 }
491 sc[ls_depth].line = -1;
492 return sc;
493 }
494
495 /*
496 * init_lexer_mode() is used to end initialization of a struct lexer_state
497 * if it must be used for a lexer
498 */
init_lexer_mode(struct lexer_state * ls)499 void init_lexer_mode(struct lexer_state *ls)
500 {
501 ls->flags = DEFAULT_LEXER_FLAGS;
502 ls->output_fifo = getmem(sizeof(struct token_fifo));
503 ls->output_fifo->art = ls->output_fifo->nt = 0;
504 ls->toplevel_of = ls->output_fifo;
505 ls->save_ctok = ls->ctok;
506 }
507
508 /*
509 * release memory used by a struct lexer_state; this implies closing
510 * any input stream held by this structure.
511 */
free_lexer_state(struct lexer_state * ls)512 void free_lexer_state(struct lexer_state *ls)
513 {
514 close_input(ls);
515 #ifndef NO_UCPP_BUF
516 if (ls->input_buf) {
517 freemem(ls->input_buf);
518 ls->input_buf = 0;
519 }
520 if (ls->output_buf) {
521 freemem(ls->output_buf);
522 ls->output_buf = 0;
523 }
524 #endif
525 if (ls->ctok && (!ls->output_fifo || ls->output_fifo->nt == 0)) {
526 freemem(ls->ctok->name);
527 freemem(ls->ctok);
528 ls->ctok = 0;
529 }
530 if (ls->gf) {
531 free_garbage_fifo(ls->gf);
532 ls->gf = 0;
533 }
534 if (ls->output_fifo) {
535 freemem(ls->output_fifo);
536 ls->output_fifo = 0;
537 }
538 }
539
540 /*
541 * Print line information.
542 */
print_line_info(struct lexer_state * ls,unsigned long flags)543 static void print_line_info(struct lexer_state *ls, unsigned long flags)
544 {
545 char *fn = current_long_filename ?
546 current_long_filename : current_filename;
547 char *b, *d;
548
549 b = getmem(50 + strlen(fn));
550 if (flags & GCC_LINE_NUM) {
551 sprintf(b, "# %ld \"%s\"\n", ls->line, fn);
552 } else {
553 sprintf(b, "#line %ld \"%s\"\n", ls->line, fn);
554 }
555 for (d = b; *d; d ++) put_char(ls, (unsigned char)(*d));
556 freemem(b);
557 }
558
559 /*
560 * Enter a file; this implies the possible emission of a #line directive.
561 * The flags used are passed as second parameter instead of being
562 * extracted from the struct lexer_state.
563 *
564 * As a command-line option, gcc-like directives (with only a '#',
565 * without 'line') may be produced.
566 *
567 * enter_file() returns 1 if a (CONTEXT) token was produced, 0 otherwise.
568 */
enter_file(struct lexer_state * ls,unsigned long flags)569 int enter_file(struct lexer_state *ls, unsigned long flags)
570 {
571 char *fn = current_long_filename ?
572 current_long_filename : current_filename;
573
574 if (!(flags & LINE_NUM)) return 0;
575 if ((flags & LEXER) && !(flags & TEXT_OUTPUT)) {
576 struct token t;
577
578 t.type = CONTEXT;
579 t.line = ls->line;
580 t.name = fn;
581 print_token(ls, &t, 0);
582 return 1;
583 }
584 print_line_info(ls, flags);
585 ls->oline --; /* emitted #line troubled oline */
586 return 0;
587 }
588
589 #ifdef UCPP_MMAP
590 /*
591 * We open() the file, then fdopen() it and fseek() to its end. If the
592 * fseek() worked, we try to mmap() the file, up to the point where we
593 * arrived.
594 * On an architecture where end-of-lines are multibytes and translated
595 * into single '\n', bad things could happen. We strongly hope that, if
596 * we could fseek() to the end but could not mmap(), then we can get back.
597 */
598 static void *find_file_map;
599 static size_t map_length;
600
fopen_mmap_file(char * name)601 FILE *fopen_mmap_file(char *name)
602 {
603 FILE *f;
604 int fd;
605 long l;
606
607 find_file_map = 0;
608 fd = open(name, O_RDONLY, 0);
609 if (fd < 0) return 0;
610 l = lseek(fd, 0, SEEK_END);
611 f = fdopen(fd, "r");
612 if (!f) {
613 close(fd);
614 return 0;
615 }
616 if (l < 0) return f; /* not seekable */
617 map_length = l;
618 if ((find_file_map = mmap(0, map_length, PROT_READ,
619 MAP_PRIVATE, fd, 0)) == MAP_FAILED) {
620 /* we could not mmap() the file; get back */
621 find_file_map = 0;
622 if (fseek(f, 0, SEEK_SET)) {
623 /* bwaah... can't get back. This file is cursed. */
624 fclose(f);
625 return 0;
626 }
627 }
628 return f;
629 }
630
set_input_file(struct lexer_state * ls,FILE * f)631 void set_input_file(struct lexer_state *ls, FILE *f)
632 {
633 ls->input = f;
634 if (find_file_map) {
635 ls->from_mmap = 1;
636 ls->input_buf_sav = ls->input_buf;
637 ls->input_buf = find_file_map;
638 ls->pbuf = 0;
639 ls->ebuf = map_length;
640 } else {
641 ls->from_mmap = 0;
642 }
643 }
644 #endif
645
646 /*
647 * Find a file by looking through the include path.
648 * return value: a FILE * on the file, opened in "r" mode, or 0.
649 *
650 * find_file_error will contain:
651 * FF_ERROR on error (file not found or impossible to read)
652 * FF_PROTECT file is protected and therefore useless to read
653 * FF_KNOWN file is already known
654 * FF_UNKNOWN file was not already known
655 */
656 static int find_file_error;
657
658 enum { FF_ERROR, FF_PROTECT, FF_KNOWN, FF_UNKNOWN };
659
find_file(char * name,int localdir)660 static FILE *find_file(char *name, int localdir)
661 {
662 FILE *f;
663 int i, incdir = -1;
664 size_t nl = strlen(name);
665 char *s = 0;
666 struct found_file *ff = 0, *nff;
667 int lf = 0;
668 int nffa = 0;
669
670 find_file_error = FF_ERROR;
671 protect_detect.state = -1;
672 protect_detect.macro = 0;
673 if (localdir) {
674 int i;
675 char *rfn = current_long_filename ? current_long_filename
676 : current_filename;
677
678 for (i = strlen(rfn) - 1; i >= 0; i --)
679 #ifdef MSDOS
680 if (rfn[i] == '\\') break;
681 #else
682 if (rfn[i] == '/') break;
683 #endif
684 #if defined MSDOS
685 if (i >= 0 && *name != '\\' && (nl < 2 || name[1] != ':'))
686 #elif defined AMIGA
687 if (i >= 0 && *name != '/' && (nl < 2 || name[1] != ':'))
688 #else
689 if (i >= 0 && *name != '/')
690 #endif
691 {
692 /*
693 * current file is somewhere else, and the provided
694 * file name is not absolute, so we must adjust the
695 * base for looking for the file; besides,
696 * found_files and found_files_loc are irrelevant
697 * for this search.
698 */
699 s = getmem(i + 2 + nl);
700 mmv(s, rfn, i);
701 #ifdef MSDOS
702 s[i] = '\\';
703 #else
704 s[i] = '/';
705 #endif
706 mmv(s + i + 1, name, nl);
707 s[i + 1 + nl] = 0;
708 ff = HTT_get(&found_files, s);
709 } else ff = HTT_get(&found_files, name);
710 }
711 if (!ff) {
712 struct found_file_sys *ffs = HTT_get(&found_files_sys, name);
713
714 if (ffs) {
715 ff = ffs->rff;
716 incdir = ffs->incdir;
717 }
718 }
719 /*
720 * At that point: if the file was found in the cache, ff points to
721 * the cached descriptive structure; its name is s if s is not 0,
722 * name otherwise.
723 */
724 if (ff) goto found_file_cache;
725
726 /*
727 * This is the first time we find the file, or it was not protected.
728 */
729 protect_detect.ff = new_found_file();
730 nffa = 1;
731 if (localdir &&
732 #ifdef UCPP_MMAP
733 (f = fopen_mmap_file(s ? s : name))
734 #else
735 (f = fopen(s ? s : name, "r"))
736 #endif
737 ) {
738 lf = 1;
739 goto found_file;
740 }
741 /*
742 * If s contains a name, that name is now irrelevant: it was a
743 * filename for a search in the current directory, and the file
744 * was not found.
745 */
746 if (s) {
747 freemem(s);
748 s = 0;
749 }
750 for (i = 0; (size_t)i < include_path_nb; i ++) {
751 size_t ni = strlen(include_path[i]);
752
753 s = getmem(ni + nl + 2);
754 mmv(s, include_path[i], ni);
755 #ifdef AMIGA
756 /* contributed by Volker Barthelmann */
757 if (ni == 1 && *s == '.') {
758 *s = 0;
759 ni = 0;
760 }
761 if (ni > 0 && s[ni - 1] != ':' && s[ni - 1] != '/') {
762 s[ni] = '/';
763 mmv(s + ni + 1, name, nl + 1);
764 } else {
765 mmv(s + ni, name, nl + 1);
766 }
767 #else
768 s[ni] = '/';
769 mmv(s + ni + 1, name, nl + 1);
770 #endif
771 #ifdef MSDOS
772 /* on msdos systems, replace all / by \ */
773 {
774 char *c;
775
776 for (c = s; *c; c ++) if (*c == '/') *c = '\\';
777 }
778 #endif
779 incdir = i;
780 if ((ff = HTT_get(&found_files, s)) != 0) {
781 /*
782 * The file is known, but not as a system include
783 * file under the name provided.
784 */
785 struct found_file_sys *ffs = new_found_file_sys();
786
787 ffs->rff = ff;
788 ffs->incdir = incdir;
789 HTT_put(&found_files_sys, ffs, name);
790 freemem(s);
791 s = 0;
792 if (nffa) {
793 del_found_file(protect_detect.ff);
794 protect_detect.ff = 0;
795 nffa = 0;
796 }
797 goto found_file_cache;
798 }
799 #ifdef UCPP_MMAP
800 f = fopen_mmap_file(s);
801 #else
802 f = fopen(s, "r");
803 #endif
804 if (f) goto found_file;
805 freemem(s);
806 s = 0;
807 }
808 zero_out:
809 if (s) freemem(s);
810 if (nffa) {
811 del_found_file(protect_detect.ff);
812 protect_detect.ff = 0;
813 nffa = 0;
814 }
815 return 0;
816
817 /*
818 * This part is invoked when the file was found in the
819 * cache.
820 */
821 found_file_cache:
822 if (ff->protect) {
823 if (get_macro(ff->protect)) {
824 /* file is protected, do not include it */
825 find_file_error = FF_PROTECT;
826 goto zero_out;
827 }
828 /* file is protected but the guardian macro is
829 not available; disable guardian detection. */
830 protect_detect.state = 0;
831 }
832 protect_detect.ff = ff;
833 #ifdef UCPP_MMAP
834 f = fopen_mmap_file(HASH_ITEM_NAME(ff));
835 #else
836 f = fopen(HASH_ITEM_NAME(ff), "r");
837 #endif
838 if (!f) goto zero_out;
839 find_file_error = FF_KNOWN;
840 goto found_file_2;
841
842 /*
843 * This part is invoked when we found a new file, which was not
844 * yet referenced. If lf == 1, then the file was found directly,
845 * otherwise it was found in some system include directory.
846 * A new found_file structure has been allocated and is in
847 * protect_detect.ff
848 */
849 found_file:
850 if (f && ((emit_dependencies == 1 && lf && current_incdir == -1)
851 || emit_dependencies == 2)) {
852 fprintf(emit_output, " %s", s ? s : name);
853 }
854 nff = protect_detect.ff;
855 nff->name = sdup(name);
856 #ifdef AUDIT
857 if (
858 #endif
859 HTT_put(&found_files, nff, s ? s : name)
860 #ifdef AUDIT
861 ) ouch("filename collided with a wraith")
862 #endif
863 ;
864 if (!lf) {
865 struct found_file_sys *ffs = new_found_file_sys();
866
867 ffs->rff = nff;
868 ffs->incdir = incdir;
869 HTT_put(&found_files_sys, ffs, name);
870 }
871 if (s) freemem(s);
872 s = 0;
873 find_file_error = FF_UNKNOWN;
874 ff = nff;
875
876 found_file_2:
877 if (s) freemem(s);
878 current_long_filename = HASH_ITEM_NAME(ff);
879 #ifdef NO_LIBC_BUF
880 setbuf(f, 0);
881 #endif
882 current_incdir = incdir;
883 return f;
884 }
885
886 /*
887 * Find the named file by looking through the end of the include path.
888 * This is for #include_next directives.
889 * #include_next <foo> and #include_next "foo" are considered identical,
890 * for all practical purposes.
891 */
find_file_next(char * name)892 static FILE *find_file_next(char *name)
893 {
894 int i;
895 size_t nl = strlen(name);
896 FILE *f;
897 struct found_file *ff;
898
899 find_file_error = FF_ERROR;
900 protect_detect.state = -1;
901 protect_detect.macro = 0;
902 for (i = current_incdir + 1; (size_t)i < include_path_nb; i ++) {
903 char *s;
904 size_t ni = strlen(include_path[i]);
905
906 s = getmem(ni + nl + 2);
907 mmv(s, include_path[i], ni);
908 s[ni] = '/';
909 mmv(s + ni + 1, name, nl + 1);
910 #ifdef MSDOS
911 /* on msdos systems, replace all / by \ */
912 {
913 char *c;
914
915 for (c = s; *c; c ++) if (*c == '/') *c = '\\';
916 }
917 #endif
918 ff = HTT_get(&found_files, s);
919 if (ff) {
920 /* file was found in the cache */
921 if (ff->protect) {
922 if (get_macro(ff->protect)) {
923 find_file_error = FF_PROTECT;
924 freemem(s);
925 return 0;
926 }
927 /* file is protected but the guardian macro is
928 not available; disable guardian detection. */
929 protect_detect.state = 0;
930 }
931 protect_detect.ff = ff;
932 #ifdef UCPP_MMAP
933 f = fopen_mmap_file(HASH_ITEM_NAME(ff));
934 #else
935 f = fopen(HASH_ITEM_NAME(ff), "r");
936 #endif
937 if (!f) {
938 /* file is referenced but yet unavailable. */
939 freemem(s);
940 return 0;
941 }
942 find_file_error = FF_KNOWN;
943 freemem(s);
944 s = HASH_ITEM_NAME(ff);
945 } else {
946 #ifdef UCPP_MMAP
947 f = fopen_mmap_file(s);
948 #else
949 f = fopen(s, "r");
950 #endif
951 if (f) {
952 if (emit_dependencies == 2) {
953 fprintf(emit_output, " %s", s);
954 }
955 ff = protect_detect.ff = new_found_file();
956 ff->name = sdup(s);
957 #ifdef AUDIT
958 if (
959 #endif
960 HTT_put(&found_files, ff, s)
961 #ifdef AUDIT
962 ) ouch("filename collided with a wraith")
963 #endif
964 ;
965 find_file_error = FF_UNKNOWN;
966 freemem(s);
967 s = HASH_ITEM_NAME(ff);
968 }
969 }
970 if (f) {
971 current_long_filename = s;
972 current_incdir = i;
973 return f;
974 }
975 freemem(s);
976 }
977 return 0;
978 }
979
980 /*
981 * The #if directive. This function parse the expression, performs macro
982 * expansion (and handles the "defined" operator), and call eval_expr.
983 * return value: 1 if the expression is true, 0 if it is false, -1 on error.
984 */
handle_if(struct lexer_state * ls)985 static int handle_if(struct lexer_state *ls)
986 {
987 struct token_fifo tf, tf1, tf2, tf3, *save_tf;
988 long l = ls->line;
989 unsigned long z;
990 int ret = 0, ltww = 1;
991
992 tf.t = NULL; // Prevent uninitialized use warning
993
994 /* first, get the whole line */
995 tf.art = tf.nt = 0;
996 while (!next_token(ls) && ls->ctok->type != NEWLINE) {
997 struct token t;
998
999 if (ltww && ttMWS(ls->ctok->type)) continue;
1000 ltww = ttMWS(ls->ctok->type);
1001 t.type = ls->ctok->type;
1002 t.line = l;
1003 if (S_TOKEN(ls->ctok->type)) {
1004 t.name = sdup(ls->ctok->name);
1005 throw_away(ls->gf, t.name);
1006 }
1007 aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
1008 }
1009 if (ltww && tf.nt) if ((-- tf.nt) == 0) freemem(tf.t);
1010 if (tf.nt == 0) {
1011 error(l, "void condition for a #if/#elif");
1012 return -1;
1013 }
1014 /* handle the "defined" operator */
1015 tf1.art = tf1.nt = 0;
1016 while (tf.art < tf.nt) {
1017 struct token *ct, rt;
1018 struct macro *m;
1019 size_t nidx, eidx;
1020
1021 ct = tf.t + (tf.art ++);
1022 if (ct->type == NAME && !strcmp(ct->name, "defined")) {
1023 if (tf.art >= tf.nt) goto store_token;
1024 nidx = tf.art;
1025 if (ttMWS(tf.t[nidx].type))
1026 if (++ nidx >= tf.nt) goto store_token;
1027 if (tf.t[nidx].type == NAME) {
1028 eidx = nidx;
1029 goto check_macro;
1030 }
1031 if (tf.t[nidx].type != LPAR) goto store_token;
1032 if (++ nidx >= tf.nt) goto store_token;
1033 if (ttMWS(tf.t[nidx].type))
1034 if (++ nidx >= tf.nt) goto store_token;
1035 if (tf.t[nidx].type != NAME) goto store_token;
1036 eidx = nidx + 1;
1037 if (eidx >= tf.nt) goto store_token;
1038 if (ttMWS(tf.t[eidx].type))
1039 if (++ eidx >= tf.nt) goto store_token;
1040 if (tf.t[eidx].type != RPAR) goto store_token;
1041 goto check_macro;
1042 }
1043 store_token:
1044 aol(tf1.t, tf1.nt, *ct, TOKEN_LIST_MEMG);
1045 continue;
1046
1047 check_macro:
1048 m = get_macro(tf.t[nidx].name);
1049 rt.type = NUMBER;
1050 rt.name = m ? "1L" : "0L";
1051 aol(tf1.t, tf1.nt, rt, TOKEN_LIST_MEMG);
1052 tf.art = eidx + 1;
1053 }
1054 freemem(tf.t);
1055 if (tf1.nt == 0) {
1056 error(l, "void condition (after expansion) for a #if/#elif");
1057 return -1;
1058 }
1059
1060 /* perform all macro substitutions */
1061 tf2.art = tf2.nt = 0;
1062 save_tf = ls->output_fifo;
1063 ls->output_fifo = &tf2;
1064 while (tf1.art < tf1.nt) {
1065 struct token *ct;
1066
1067 ct = tf1.t + (tf1.art ++);
1068 if (ct->type == NAME) {
1069 struct macro *m = get_macro(ct->name);
1070
1071 if (m) {
1072 if (substitute_macro(ls, m, &tf1, 0,
1073 #ifdef NO_PRAGMA_IN_DIRECTIVE
1074 1,
1075 #else
1076 0,
1077 #endif
1078 ct->line)) {
1079 ls->output_fifo = save_tf;
1080 goto error1;
1081 }
1082 continue;
1083 }
1084 } else if ((ct->type == SHARP || ct->type == DIG_SHARP)
1085 && (ls->flags & HANDLE_ASSERTIONS)) {
1086 /* we have an assertion; parse it */
1087 int nnp, ltww = 1;
1088 size_t i = tf1.art;
1089 struct token_fifo atl;
1090 char *aname;
1091 struct assert *a;
1092 int av = 0;
1093 struct token rt;
1094
1095 atl.art = atl.nt = 0;
1096 while (i < tf1.nt && ttMWS(tf1.t[i].type)) i ++;
1097 if (i >= tf1.nt) goto assert_error;
1098 if (tf1.t[i].type != NAME) goto assert_error;
1099 aname = tf1.t[i ++].name;
1100 while (i < tf1.nt && ttMWS(tf1.t[i].type)) i ++;
1101 if (i >= tf1.nt) goto assert_generic;
1102 if (tf1.t[i].type != LPAR) goto assert_generic;
1103 i ++;
1104 for (nnp = 1; nnp && i < tf1.nt; i ++) {
1105 if (ltww && ttMWS(tf1.t[i].type)) continue;
1106 if (tf1.t[i].type == LPAR) nnp ++;
1107 else if (tf1.t[i].type == RPAR
1108 && (-- nnp) == 0) {
1109 tf1.art = i + 1;
1110 break;
1111 }
1112 ltww = ttMWS(tf1.t[i].type);
1113 aol(atl.t, atl.nt, tf1.t[i], TOKEN_LIST_MEMG);
1114 }
1115 if (nnp) goto assert_error;
1116 if (ltww && atl.nt && (-- atl.nt) == 0) freemem(atl.t);
1117 if (atl.nt == 0) goto assert_error;
1118
1119 /* the assertion is in aname and atl; check it */
1120 a = get_assertion(aname);
1121 if (a) for (i = 0; i < a->nbval; i ++)
1122 if (!cmp_token_list(&atl, a->val + i)) {
1123 av = 1;
1124 break;
1125 }
1126 rt.type = NUMBER;
1127 rt.name = av ? "1" : "0";
1128 aol(tf2.t, tf2.nt, rt, TOKEN_LIST_MEMG);
1129 if (atl.nt) freemem(atl.t);
1130 continue;
1131
1132 assert_generic:
1133 tf1.art = i;
1134 rt.type = NUMBER;
1135 rt.name = get_assertion(aname) ? "1" : "0";
1136 aol(tf2.t, tf2.nt, rt, TOKEN_LIST_MEMG);
1137 continue;
1138
1139 assert_error:
1140 error(l, "syntax error for assertion in #if");
1141 ls->output_fifo = save_tf;
1142 goto error1;
1143 }
1144 aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG);
1145 }
1146 ls->output_fifo = save_tf;
1147 freemem(tf1.t);
1148 if (tf2.nt == 0) {
1149 error(l, "void condition (after expansion) for a #if/#elif");
1150 return -1;
1151 }
1152
1153 /*
1154 * suppress whitespace and replace rogue identifiers by 0
1155 */
1156 tf3.art = tf3.nt = 0;
1157 while (tf2.art < tf2.nt) {
1158 struct token *ct = tf2.t + (tf2.art ++);
1159
1160 if (ttMWS(ct->type)) continue;
1161 if (ct->type == NAME) {
1162 /*
1163 * a rogue identifier; we replace it with "0".
1164 */
1165 struct token rt;
1166
1167 rt.type = NUMBER;
1168 rt.name = "0";
1169 aol(tf3.t, tf3.nt, rt, TOKEN_LIST_MEMG);
1170 continue;
1171 }
1172 aol(tf3.t, tf3.nt, *ct, TOKEN_LIST_MEMG);
1173 }
1174 freemem(tf2.t);
1175
1176 if (tf3.nt == 0) {
1177 error(l, "void condition (after expansion) for a #if/#elif");
1178 return -1;
1179 }
1180 eval_line = l;
1181 z = eval_expr(&tf3, &ret, (ls->flags & WARN_STANDARD) != 0);
1182 freemem(tf3.t);
1183 if (ret) return -1;
1184 return (z != 0);
1185
1186 error1:
1187 if (tf1.nt) freemem(tf1.t);
1188 if (tf2.nt) freemem(tf2.t);
1189 return -1;
1190 }
1191
1192 /*
1193 * A #include was found; parse the end of line, replace macros if
1194 * necessary.
1195 *
1196 * If nex is set to non-zero, the directive is considered as a #include_next
1197 * (extension to C99, mimicked from GNU)
1198 */
handle_include(struct lexer_state * ls,unsigned long flags,int nex)1199 static int handle_include(struct lexer_state *ls, unsigned long flags, int nex)
1200 {
1201 int c, string_fname = 0;
1202 char *fname = NULL; // Prevent uninitialized use warning
1203 unsigned char *fname2;
1204 size_t fname_ptr = 0;
1205 long l = ls->line;
1206 int x, y;
1207 FILE *f;
1208 struct token_fifo tf, tf2, *save_tf;
1209 size_t nl;
1210 int tgd;
1211 struct lexer_state alt_ls;
1212
1213 #define left_angle(t) ((t) == LT || (t) == LEQ || (t) == LSH \
1214 || (t) == ASLSH || (t) == DIG_LBRK || (t) == LBRA)
1215 #define right_angle(t) ((t) == GT || (t) == RSH || (t) == ARROW \
1216 || (t) == DIG_RBRK || (t) == DIG_RBRA)
1217
1218 while ((c = grap_char(ls)) >= 0 && c != '\n') {
1219 if (space_char(c)) {
1220 discard_char(ls);
1221 continue;
1222 }
1223 if (c == '<') {
1224 discard_char(ls);
1225 while ((c = grap_char(ls)) >= 0) {
1226 discard_char(ls);
1227 if (c == '\n') goto include_last_chance;
1228 if (c == '>') break;
1229 aol(fname, fname_ptr, (char)c, FNAME_MEMG);
1230 }
1231 aol(fname, fname_ptr, (char)0, FNAME_MEMG);
1232 string_fname = 0;
1233 goto do_include;
1234 } else if (c == '"') {
1235 discard_char(ls);
1236 while ((c = grap_char(ls)) >= 0) {
1237 discard_char(ls);
1238 if (c == '\n') {
1239 /* macro replacements won't save that one */
1240 if (fname_ptr) freemem(fname);
1241 goto include_error;
1242 }
1243 if (c == '"') break;
1244 aol(fname, fname_ptr, (char)c, FNAME_MEMG);
1245 }
1246 aol(fname, fname_ptr, (char)0, FNAME_MEMG);
1247 string_fname = 1;
1248 goto do_include;
1249 }
1250 goto include_macro;
1251 }
1252
1253 include_last_chance:
1254 /*
1255 * We found a '<' but not the trailing '>'; so we tokenize the
1256 * line, and try to act upon it. The standard lets us free in that
1257 * matter, and no sane programmer would use such a construct, but
1258 * it is no reason not to support it.
1259 */
1260 if (fname_ptr == 0) goto include_error;
1261 fname2 = getmem(fname_ptr + 1);
1262 mmv(fname2 + 1, fname, fname_ptr);
1263 fname2[0] = '<';
1264 /*
1265 * We merely copy the lexer_state structure; this should be ok,
1266 * since we do want to share the memory structure (garbage_fifo),
1267 * and do not touch any other context-full thing.
1268 */
1269 alt_ls = *ls;
1270 alt_ls.input = 0;
1271 alt_ls.input_string = fname2;
1272 alt_ls.pbuf = 0;
1273 alt_ls.ebuf = fname_ptr + 1;
1274 tf.art = tf.nt = 0;
1275 while (!next_token(&alt_ls)) {
1276 if (!ttMWS(alt_ls.ctok->type)) {
1277 struct token t;
1278
1279 t.type = alt_ls.ctok->type;
1280 t.line = l;
1281 if (S_TOKEN(alt_ls.ctok->type)) {
1282 t.name = sdup(alt_ls.ctok->name);
1283 throw_away(alt_ls.gf, t.name);
1284 }
1285 aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
1286 }
1287 }
1288 freemem(fname2);
1289 if (alt_ls.pbuf < alt_ls.ebuf) goto include_error;
1290 /* tokenizing failed */
1291 goto include_macro2;
1292
1293 include_error:
1294 error(l, "invalid '#include'");
1295 return 1;
1296
1297 include_macro:
1298 tf.art = tf.nt = 0;
1299 while (!next_token(ls) && ls->ctok->type != NEWLINE) {
1300 if (!ttMWS(ls->ctok->type)) {
1301 struct token t;
1302
1303 t.type = ls->ctok->type;
1304 t.line = l;
1305 if (S_TOKEN(ls->ctok->type)) {
1306 t.name = sdup(ls->ctok->name);
1307 throw_away(ls->gf, t.name);
1308 }
1309 aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
1310 }
1311 }
1312 include_macro2:
1313 tf2.art = tf2.nt = 0;
1314 save_tf = ls->output_fifo;
1315 ls->output_fifo = &tf2;
1316 while (tf.art < tf.nt) {
1317 struct token *ct;
1318
1319 ct = tf.t + (tf.art ++);
1320 if (ct->type == NAME) {
1321 struct macro *m = get_macro(ct->name);
1322 if (m) {
1323 if (substitute_macro(ls, m, &tf, 0,
1324 #ifdef NO_PRAGMA_IN_DIRECTIVE
1325 1,
1326 #else
1327 0,
1328 #endif
1329 ct->line)) {
1330 ls->output_fifo = save_tf;
1331 return -1;
1332 }
1333 continue;
1334 }
1335 }
1336 aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG);
1337 }
1338 freemem(tf.t);
1339 ls->output_fifo = save_tf;
1340 for (x = 0; (size_t)x < tf2.nt && ttWHI(tf2.t[x].type); x ++);
1341 for (y = tf2.nt - 1; y >= 0 && ttWHI(tf2.t[y].type); y --);
1342 if ((size_t)x >= tf2.nt) goto include_macro_err;
1343 if (tf2.t[x].type == STRING) {
1344 if (y != x) goto include_macro_err;
1345 if (tf2.t[x].name[0] == 'L') {
1346 if (ls->flags & WARN_STANDARD)
1347 warning(l, "wide string for #include");
1348 fname = sdup(tf2.t[x].name);
1349 nl = strlen(fname);
1350 *(fname + nl - 1) = 0;
1351 mmvwo(fname, fname + 2, nl - 2);
1352 } else {
1353 fname = sdup(tf2.t[x].name);
1354 nl = strlen(fname);
1355 *(fname + nl - 1) = 0;
1356 mmvwo(fname, fname + 1, nl - 1);
1357 }
1358 string_fname = 1;
1359 } else if (left_angle(tf2.t[x].type) && right_angle(tf2.t[y].type)) {
1360 int i, j;
1361
1362 if (ls->flags & WARN_ANNOYING) warning(l, "reconstruction "
1363 "of <foo> in #include");
1364 for (j = 0, i = x; i <= y; i ++) if (!ttWHI(tf2.t[i].type))
1365 j += strlen(tname(tf2.t[i]));
1366 fname = getmem(j + 1);
1367 for (j = 0, i = x; i <= y; i ++) {
1368 if (ttWHI(tf2.t[i].type)) continue;
1369 strcpy(fname + j, tname(tf2.t[i]));
1370 j += strlen(tname(tf2.t[i]));
1371 }
1372 *(fname + j - 1) = 0;
1373 mmvwo(fname, fname + 1, j);
1374 string_fname = 0;
1375 } else goto include_macro_err;
1376 freemem(tf2.t);
1377 goto do_include_next;
1378
1379 include_macro_err:
1380 error(l, "macro expansion did not produce a valid filename "
1381 "for #include");
1382 if (tf2.nt) freemem(tf2.t);
1383 return 1;
1384
1385 do_include:
1386 tgd = 1;
1387 while (!next_token(ls)) {
1388 if (tgd && !ttWHI(ls->ctok->type)
1389 && (ls->flags & WARN_STANDARD)) {
1390 warning(l, "trailing garbage in #include");
1391 tgd = 0;
1392 }
1393 if (ls->ctok->type == NEWLINE) break;
1394 }
1395
1396 /* the increment of ls->line is intended so that the line
1397 numbering is reported correctly in report_context() even if
1398 the #include is at the end of the file with no trailing newline */
1399 if (ls->ctok->type != NEWLINE) ls->line ++;
1400 do_include_next:
1401 if (!(ls->flags & LEXER) && (ls->flags & KEEP_OUTPUT))
1402 put_char(ls, '\n');
1403 push_file_context(ls);
1404 reinit_lexer_state(ls, 1);
1405 #ifdef MSDOS
1406 /* on msdos systems, replace all / by \ */
1407 {
1408 char *d;
1409
1410 for (d = fname; *d; d ++) if (*d == '/') *d = '\\';
1411 }
1412 #endif
1413 f = nex ? find_file_next(fname) : find_file(fname, string_fname);
1414 if (!f) {
1415 current_filename = 0;
1416 pop_file_context(ls);
1417 if (find_file_error == FF_ERROR) {
1418 error(l, "file '%s' not found", fname);
1419 freemem(fname);
1420 return 1;
1421 }
1422 /* file was found, but it is useless to include it again */
1423 freemem(fname);
1424 return 0;
1425 }
1426 #ifdef UCPP_MMAP
1427 set_input_file(ls, f);
1428 #else
1429 ls->input = f;
1430 #endif
1431 current_filename = fname;
1432 enter_file(ls, flags);
1433 return 0;
1434
1435 #undef left_angle
1436 #undef right_angle
1437 }
1438
1439 /*
1440 * for #line directives
1441 */
handle_line(struct lexer_state * ls,unsigned long flags)1442 static int handle_line(struct lexer_state *ls, unsigned long flags)
1443 {
1444 char *fname;
1445 long l = ls->line;
1446 struct token_fifo tf, tf2, *save_tf;
1447 size_t nl, j;
1448 unsigned long z;
1449
1450 tf.art = tf.nt = 0;
1451 while (!next_token(ls) && ls->ctok->type != NEWLINE) {
1452 if (!ttMWS(ls->ctok->type)) {
1453 struct token t;
1454
1455 t.type = ls->ctok->type;
1456 t.line = l;
1457 if (S_TOKEN(ls->ctok->type)) {
1458 t.name = sdup(ls->ctok->name);
1459 throw_away(ls->gf, t.name);
1460 }
1461 aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
1462 }
1463 }
1464 tf2.art = tf2.nt = 0;
1465 save_tf = ls->output_fifo;
1466 ls->output_fifo = &tf2;
1467 while (tf.art < tf.nt) {
1468 struct token *ct;
1469
1470 ct = tf.t + (tf.art ++);
1471 if (ct->type == NAME) {
1472 struct macro *m = get_macro(ct->name);
1473 if (m) {
1474 if (substitute_macro(ls, m, &tf, 0,
1475 #ifdef NO_PRAGMA_IN_DIRECTIVE
1476 1,
1477 #else
1478 0,
1479 #endif
1480 ct->line)) {
1481 ls->output_fifo = save_tf;
1482 return -1;
1483 }
1484 continue;
1485 }
1486 }
1487 aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG);
1488 }
1489 freemem(tf.t);
1490 for (tf2.art = 0; tf2.art < tf2.nt && ttWHI(tf2.t[tf2.art].type);
1491 tf2.art ++);
1492 ls->output_fifo = save_tf;
1493 if (tf2.art == tf2.nt || (tf2.t[tf2.art].type != NUMBER
1494 && tf2.t[tf2.art].type != CHAR)) {
1495 error(l, "not a valid number for #line");
1496 goto line_macro_err;
1497 }
1498 for (j = 0; tf2.t[tf2.art].name[j]; j ++)
1499 if (tf2.t[tf2.art].name[j] < '0'
1500 || tf2.t[tf2.art].name[j] > '9')
1501 if (ls->flags & WARN_STANDARD)
1502 warning(l, "non-standard line number in #line");
1503 if (catch(eval_exception)) goto line_macro_err;
1504 z = strtoconst(tf2.t[tf2.art].name);
1505 if (j > 10 || z > 2147483647U) {
1506 error(l, "out-of-bound line number for #line");
1507 goto line_macro_err;
1508 }
1509 ls->oline = ls->line = z;
1510 if ((++ tf2.art) < tf2.nt) {
1511 size_t i;
1512
1513 for (i = tf2.art; i < tf2.nt && ttMWS(tf2.t[i].type); i ++);
1514 if (i < tf2.nt) {
1515 if (tf2.t[i].type != STRING) {
1516 error(l, "not a valid filename for #line");
1517 goto line_macro_err;
1518 }
1519 if (tf2.t[i].name[0] == 'L') {
1520 if (ls->flags & WARN_STANDARD) {
1521 warning(l, "wide string for #line");
1522 }
1523 fname = sdup(tf2.t[i].name);
1524 nl = strlen(fname);
1525 *(fname + nl - 1) = 0;
1526 mmvwo(fname, fname + 2, nl - 2);
1527 } else {
1528 fname = sdup(tf2.t[i].name);
1529 nl = strlen(fname);
1530 *(fname + nl - 1) = 0;
1531 mmvwo(fname, fname + 1, nl - 1);
1532 }
1533 if (current_filename) freemem(current_filename);
1534 current_filename = fname;
1535 }
1536 for (i ++; i < tf2.nt && ttMWS(tf2.t[i].type); i ++);
1537 if (i < tf2.nt && (ls->flags & WARN_STANDARD)) {
1538 warning(l, "trailing garbage in #line");
1539 }
1540 }
1541 freemem(tf2.t);
1542 enter_file(ls, flags);
1543 return 0;
1544
1545 line_macro_err:
1546 if (tf2.nt) freemem(tf2.t);
1547 return 1;
1548 }
1549
1550 /*
1551 * a #error directive: we emit the message without any modification
1552 * (except the usual backslash+newline and trigraphs)
1553 */
handle_error(struct lexer_state * ls)1554 static void handle_error(struct lexer_state *ls)
1555 {
1556 int c;
1557 size_t p = 0, lp = 128;
1558 long l = ls->line;
1559 unsigned char *buf = getmem(lp);
1560
1561 while ((c = grap_char(ls)) >= 0 && c != '\n') {
1562 discard_char(ls);
1563 wan(buf, p, (unsigned char)c, lp);
1564 }
1565 wan(buf, p, 0, lp);
1566 error(l, "#error%s", buf);
1567 freemem(buf);
1568 }
1569
1570 /*
1571 * convert digraph tokens to their standard equivalent.
1572 */
undig(int type)1573 static int undig(int type)
1574 {
1575 static int ud[6] = { LBRK, RBRK, LBRA, RBRA, SHARP, DSHARP };
1576
1577 return ud[type - DIG_LBRK];
1578 }
1579
1580 #ifdef PRAGMA_TOKENIZE
1581 /*
1582 * Make a compressed representation of a token list; the contents of
1583 * the token_fifo are freed. Values equal to 0 are replaced by
1584 * PRAGMA_TOKEN_END (by default, (unsigned char)'\n') and the compressed
1585 * string is padded by a 0 (so that it may be * handled like a string).
1586 * Digraph tokens are replaced by their non-digraph equivalents.
1587 */
compress_token_list(struct token_fifo * tf)1588 struct comp_token_fifo compress_token_list(struct token_fifo *tf)
1589 {
1590 struct comp_token_fifo ct;
1591 size_t l;
1592
1593 for (l = 0, tf->art = 0; tf->art < tf->nt; tf->art ++) {
1594 l ++;
1595 if (S_TOKEN(tf->t[tf->art].type))
1596 l += strlen(tf->t[tf->art].name) + 1;
1597 }
1598 ct.t = getmem((ct.length = l) + 1);
1599 for (l = 0, tf->art = 0; tf->art < tf->nt; tf->art ++) {
1600 int tt = tf->t[tf->art].type;
1601
1602 if (tt == 0) tt = PRAGMA_TOKEN_END;
1603 if (tt > DIGRAPH_TOKENS && tt < DIGRAPH_TOKENS_END)
1604 tt = undig(tt);
1605 ct.t[l ++] = tt;
1606 if (S_TOKEN(tt)) {
1607 char *tn = tf->t[tf->art].name;
1608 size_t sl = strlen(tn);
1609
1610 mmv(ct.t + l, tn, sl);
1611 l += sl;
1612 ct.t[l ++] = PRAGMA_TOKEN_END;
1613 freemem(tn);
1614 }
1615 }
1616 ct.t[l] = 0;
1617 if (tf->nt) freemem(tf->t);
1618 ct.rp = 0;
1619 return ct;
1620 }
1621 #endif
1622
1623 /*
1624 * A #pragma directive: we make a PRAGMA token containing the rest of
1625 * the line.
1626 *
1627 * We strongly hope that we are called only in LEXER mode.
1628 */
handle_pragma(struct lexer_state * ls)1629 static void handle_pragma(struct lexer_state *ls)
1630 {
1631 unsigned char *buf;
1632 struct token t;
1633 long l = ls->line;
1634
1635 #ifdef PRAGMA_TOKENIZE
1636 struct token_fifo tf;
1637
1638 tf.art = tf.nt = 0;
1639 while (!next_token(ls) && ls->ctok->type != NEWLINE)
1640 if (!ttMWS(ls->ctok->type)) break;
1641 if (ls->ctok->type != NEWLINE) {
1642 do {
1643 struct token t;
1644
1645 t.type = ls->ctok->type;
1646 if (ttMWS(t.type)) continue;
1647 if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name);
1648 aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG);
1649 } while (!next_token(ls) && ls->ctok->type != NEWLINE);
1650 }
1651 if (tf.nt == 0) {
1652 /* void pragma are silently ignored */
1653 return;
1654 }
1655 buf = (compress_token_list(&tf)).t;
1656 #else
1657 int c, x = 1, y = 32;
1658
1659 while ((c = grap_char(ls)) >= 0 && c != '\n') {
1660 discard_char(ls);
1661 if (!space_char(c)) break;
1662 }
1663 /* void #pragma are ignored */
1664 if (c == '\n') return;
1665 buf = getmem(y);
1666 buf[0] = c;
1667 while ((c = grap_char(ls)) >= 0 && c != '\n') {
1668 discard_char(ls);
1669 wan(buf, x, c, y);
1670 }
1671 for (x --; x >= 0 && space_char(buf[x]); x --);
1672 x ++;
1673 wan(buf, x, 0, y);
1674 #endif
1675 t.type = PRAGMA;
1676 t.line = l;
1677 t.name = (char *)buf;
1678 aol(ls->output_fifo->t, ls->output_fifo->nt, t, TOKEN_LIST_MEMG);
1679 throw_away(ls->gf, (char *)buf);
1680 }
1681
1682 /*
1683 * We saw a # at the beginning of a line (or preceeded only by whitespace).
1684 * We check the directive name and act accordingly.
1685 */
handle_cpp(struct lexer_state * ls,int sharp_type)1686 static int handle_cpp(struct lexer_state *ls, int sharp_type)
1687 {
1688 #define condfset(x) do { \
1689 ls->condf[(x) / 32] |= 1UL << ((x) % 32); \
1690 } while (0)
1691 #define condfclr(x) do { \
1692 ls->condf[(x) / 32] &= ~(1UL << ((x) % 32)); \
1693 } while (0)
1694 #define condfval(x) ((ls->condf[(x) / 32] & (1UL << ((x) % 32))) != 0)
1695
1696 long l = ls->line;
1697 unsigned long save_flags = ls->flags;
1698 int ret = 0;
1699
1700 save_flags = ls->flags;
1701 ls->flags |= LEXER;
1702 while (!next_token(ls)) {
1703 int t = ls->ctok->type;
1704
1705 switch (t) {
1706 case COMMENT:
1707 if (ls->flags & WARN_ANNOYING) {
1708 warning(l, "comment in the middle of "
1709 "a cpp directive");
1710 }
1711 /* fall through */
1712 case NONE:
1713 continue;
1714 case NEWLINE:
1715 /* null directive */
1716 if (ls->flags & WARN_ANNOYING) {
1717 /* truly an annoying warning; null directives
1718 are rare but may increase readability of
1719 some source files, and they are legal */
1720 warning(l, "null cpp directive");
1721 }
1722 if (!(ls->flags & LEXER)) put_char(ls, '\n');
1723 goto handle_exit2;
1724 case NAME:
1725 break;
1726 default:
1727 if (ls->flags & FAIL_SHARP) {
1728 /* LPS 20050602 - ignores '#!' if on the first line */
1729 if( ( l == 1 ) &&
1730 ( ls->condcomp ) )
1731 {
1732 ret = 1;
1733 }
1734 else
1735 /* LPS 20050602 */
1736 if (ls->condcomp) {
1737 error(l, "rogue '#'");
1738 ret = 1;
1739 } else {
1740 if (ls->flags & WARN_STANDARD) {
1741 warning(l, "rogue '#' in code "
1742 "compiled out");
1743 ret = 0;
1744 }
1745 }
1746 ls->flags = save_flags;
1747 goto handle_warp_ign;
1748 } else {
1749 struct token u;
1750
1751 u.type = sharp_type;
1752 u.line = l;
1753 ls->flags = save_flags;
1754 print_token(ls, &u, 0);
1755 print_token(ls, ls->ctok, 0);
1756 if (ls->flags & WARN_ANNOYING) {
1757 warning(l, "rogue '#' dumped");
1758 }
1759 goto handle_exit3;
1760 }
1761 }
1762 if (ls->condcomp) {
1763 if (!strcmp(ls->ctok->name, "define")) {
1764 ret = handle_define(ls);
1765 goto handle_exit;
1766 } else if (!strcmp(ls->ctok->name, "undef")) {
1767 ret = handle_undef(ls);
1768 goto handle_exit;
1769 } else if (!strcmp(ls->ctok->name, "if")) {
1770 if ((++ ls->ifnest) > 63) goto too_many_if;
1771 condfclr(ls->ifnest - 1);
1772 ret = handle_if(ls);
1773 if (ret > 0) ret = 0;
1774 else if (ret == 0) {
1775 ls->condcomp = 0;
1776 ls->condmet = 0;
1777 ls->condnest = ls->ifnest - 1;
1778 }
1779 else ret = 1;
1780 goto handle_exit;
1781 } else if (!strcmp(ls->ctok->name, "ifdef")) {
1782 if ((++ ls->ifnest) > 63) goto too_many_if;
1783 condfclr(ls->ifnest - 1);
1784 ret = handle_ifdef(ls);
1785 if (ret > 0) ret = 0;
1786 else if (ret == 0) {
1787 ls->condcomp = 0;
1788 ls->condmet = 0;
1789 ls->condnest = ls->ifnest - 1;
1790 }
1791 else ret = 1;
1792 goto handle_exit;
1793 } else if (!strcmp(ls->ctok->name, "ifndef")) {
1794 if ((++ ls->ifnest) > 63) goto too_many_if;
1795 condfclr(ls->ifnest - 1);
1796 ret = handle_ifndef(ls);
1797 if (ret > 0) ret = 0;
1798 else if (ret == 0) {
1799 ls->condcomp = 0;
1800 ls->condmet = 0;
1801 ls->condnest = ls->ifnest - 1;
1802 }
1803 else ret = 1;
1804 goto handle_exit;
1805 } else if (!strcmp(ls->ctok->name, "else")) {
1806 if (ls->ifnest == 0
1807 || condfval(ls->ifnest - 1)) {
1808 error(l, "rogue #else");
1809 ret = 1;
1810 goto handle_warp;
1811 }
1812 condfset(ls->ifnest - 1);
1813 if (ls->ifnest == 1) protect_detect.state = 0;
1814 ls->condcomp = 0;
1815 ls->condmet = 1;
1816 ls->condnest = ls->ifnest - 1;
1817 goto handle_warp;
1818 } else if (!strcmp(ls->ctok->name, "elif")) {
1819 if (ls->ifnest == 0
1820 || condfval(ls->ifnest - 1)) {
1821 error(l, "rogue #elif");
1822 ret = 1;
1823 goto handle_warp_ign;
1824 }
1825 if (ls->ifnest == 1) protect_detect.state = 0;
1826 ls->condcomp = 0;
1827 ls->condmet = 1;
1828 ls->condnest = ls->ifnest - 1;
1829 goto handle_warp_ign;
1830 } else if (!strcmp(ls->ctok->name, "endif")) {
1831 if (ls->ifnest == 0) {
1832 error(l, "unmatched #endif");
1833 ret = 1;
1834 goto handle_warp;
1835 }
1836 if ((-- ls->ifnest) == 0
1837 && protect_detect.state == 2) {
1838 protect_detect.state = 3;
1839 }
1840 goto handle_warp;
1841 } else if (!strcmp(ls->ctok->name, "include")) {
1842 ret = handle_include(ls, save_flags, 0);
1843 goto handle_exit3;
1844 } else if (!strcmp(ls->ctok->name, "include_next")) {
1845 ret = handle_include(ls, save_flags, 1);
1846 goto handle_exit3;
1847 } else if (!strcmp(ls->ctok->name, "pragma") ) {
1848 if (!(save_flags & LEXER)) {
1849 #ifdef PRAGMA_DUMP
1850 /* dump #pragma in output */
1851 struct token u;
1852
1853 u.type = sharp_type;
1854 u.line = l;
1855 ls->flags = save_flags;
1856 print_token(ls, &u, 0);
1857 print_token(ls, ls->ctok, 0);
1858 while (ls->flags |= LEXER,
1859 !next_token(ls)) {
1860 long save_line;
1861
1862 ls->flags &= ~LEXER;
1863 save_line = ls->line;
1864 ls->line = l;
1865 print_token(ls, ls->ctok, 0);
1866 ls->line = save_line;
1867 if (ls->ctok->type == NEWLINE)
1868 break;
1869 }
1870 goto handle_exit3;
1871 #else
1872 if (ls->flags & WARN_PRAGMA)
1873 warning(l, "#pragma ignored "
1874 "and not dumped");
1875 goto handle_warp_ign;
1876 #endif
1877 }
1878 if (!(ls->flags & HANDLE_PRAGMA))
1879 goto handle_warp_ign;
1880 handle_pragma(ls);
1881 goto handle_exit;
1882 } else if (!strcmp(ls->ctok->name, "error")) {
1883 ret = 1;
1884 handle_error(ls);
1885 goto handle_exit;
1886 } else if (!strcmp(ls->ctok->name, "line")) {
1887 ret = handle_line(ls, save_flags);
1888 goto handle_exit;
1889 } else if ((ls->flags & HANDLE_ASSERTIONS)
1890 && !strcmp(ls->ctok->name, "assert")) {
1891 ret = handle_assert(ls);
1892 goto handle_exit;
1893 } else if ((ls->flags & HANDLE_ASSERTIONS)
1894 && !strcmp(ls->ctok->name, "unassert")) {
1895 ret = handle_unassert(ls);
1896 goto handle_exit;
1897 }
1898 } else {
1899 if (!strcmp(ls->ctok->name, "else")) {
1900 if (condfval(ls->ifnest - 1)
1901 && (ls->flags & WARN_STANDARD)) {
1902 warning(l, "rogue #else in code "
1903 "compiled out");
1904 }
1905 if (ls->condnest == ls->ifnest - 1) {
1906 if (!ls->condmet) ls->condcomp = 1;
1907 }
1908 condfset(ls->ifnest - 1);
1909 if (ls->ifnest == 1) protect_detect.state = 0;
1910 goto handle_warp;
1911 } else if (!strcmp(ls->ctok->name, "elif")) {
1912 if (condfval(ls->ifnest - 1)
1913 && (ls->flags & WARN_STANDARD)) {
1914 warning(l, "rogue #elif in code "
1915 "compiled out");
1916 }
1917 if (ls->condnest != ls->ifnest - 1
1918 || ls->condmet)
1919 goto handle_warp_ign;
1920 if (ls->ifnest == 1) protect_detect.state = 0;
1921 ret = handle_if(ls);
1922 if (ret > 0) {
1923 ls->condcomp = 1;
1924 ls->condmet = 1;
1925 ret = 0;
1926 } else if (ret < 0) ret = 1;
1927 goto handle_exit;
1928 } else if (!strcmp(ls->ctok->name, "endif")) {
1929 if ((-- ls->ifnest) == ls->condnest) {
1930 if (ls->ifnest == 0 &&
1931 protect_detect.state == 2)
1932 protect_detect.state = 3;
1933 ls->condcomp = 1;
1934 }
1935 goto handle_warp;
1936 } else if (!strcmp(ls->ctok->name, "if")
1937 || !strcmp(ls->ctok->name, "ifdef")
1938 || !strcmp(ls->ctok->name, "ifndef")) {
1939 if ((++ ls->ifnest) > 63) goto too_many_if;
1940 condfclr(ls->ifnest - 1);
1941 }
1942 goto handle_warp_ign;
1943 }
1944 /*
1945 * Unrecognized directive. We emit either an error or
1946 * an annoying warning, depending on a command-line switch.
1947 */
1948 if (ls->flags & FAIL_SHARP) {
1949 error(l, "unknown cpp directive '#%s'",
1950 ls->ctok->name);
1951 goto handle_warp_ign;
1952 } else {
1953 struct token u;
1954
1955 u.type = sharp_type;
1956 u.line = l;
1957 ls->flags = save_flags;
1958 print_token(ls, &u, 0);
1959 print_token(ls, ls->ctok, 0);
1960 if (ls->flags & WARN_ANNOYING) {
1961 warning(l, "rogue '#' dumped");
1962 }
1963 }
1964 }
1965 return 1;
1966
1967 handle_warp_ign:
1968 while (!next_token(ls)) if (ls->ctok->type == NEWLINE) break;
1969 goto handle_exit;
1970 handle_warp:
1971 while (!next_token(ls)) {
1972 if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) {
1973 warning(l, "trailing garbage in "
1974 "preprocessing directive");
1975 }
1976 if (ls->ctok->type == NEWLINE) break;
1977 }
1978 handle_exit:
1979 if (!(ls->flags & LEXER)) put_char(ls, '\n');
1980 handle_exit3:
1981 if (protect_detect.state == 1) {
1982 protect_detect.state = 0;
1983 } else if (protect_detect.state == -1) {
1984 /* just after the #include */
1985 protect_detect.state = 1;
1986 }
1987 handle_exit2:
1988 ls->flags = save_flags;
1989 return ret;
1990 too_many_if:
1991 error(l, "too many levels of conditional inclusion (max 63)");
1992 ret = 1;
1993 goto handle_warp;
1994 #undef condfset
1995 #undef condfclr
1996 #undef condfval
1997 }
1998
1999 /*
2000 * This is the main entry function. It maintains count of #, and call the
2001 * appropriate functions when it encounters a cpp directive or a macro
2002 * name.
2003 * return value: positive on error; CPPERR_EOF means "end of input reached"
2004 */
cpp(struct lexer_state * ls)2005 int cpp(struct lexer_state *ls)
2006 {
2007 int r = 0;
2008
2009 while (next_token(ls)) {
2010 if (protect_detect.state == 3) {
2011 /*
2012 * At that point, protect_detect.ff->protect might
2013 * be non-zero, if the file has been recursively
2014 * included, and a guardian detected.
2015 */
2016 if (!protect_detect.ff->protect) {
2017 /* Cool ! A new guardian has been detected. */
2018 protect_detect.ff->protect =
2019 protect_detect.macro;
2020 } else if (protect_detect.macro) {
2021 /* We found a guardian but an old one. */
2022 freemem(protect_detect.macro);
2023 }
2024 protect_detect.macro = 0;
2025 }
2026 if (ls->ifnest) {
2027 error(ls->line, "unterminated #if construction "
2028 "(depth %ld)", ls->ifnest);
2029 r = CPPERR_NEST;
2030 }
2031 if (ls_depth == 0) return CPPERR_EOF;
2032 close_input(ls);
2033 if (!(ls->flags & LEXER) && !ls->ltwnl) {
2034 put_char(ls, '\n');
2035 ls->ltwnl = 1;
2036 }
2037 pop_file_context(ls);
2038 ls->oline ++;
2039 if (enter_file(ls, ls->flags)) {
2040 ls->ctok->type = NEWLINE;
2041 ls->ltwnl = 1;
2042 break;
2043 }
2044 }
2045 if (!(ls->ltwnl && (ls->ctok->type == SHARP
2046 || ls->ctok->type == DIG_SHARP))
2047 && protect_detect.state == 1 && !ttWHI(ls->ctok->type)) {
2048 /* the first non-whitespace token encountered is not
2049 a sharp introducing a cpp directive */
2050 protect_detect.state = 0;
2051 }
2052 if (protect_detect.state == 3 && !ttWHI(ls->ctok->type)) {
2053 /* a non-whitespace token encountered after the #endif */
2054 protect_detect.state = 0;
2055 }
2056 if (ls->condcomp) {
2057 if (ls->ltwnl && (ls->ctok->type == SHARP
2058 || ls->ctok->type == DIG_SHARP)) {
2059 int x = handle_cpp(ls, ls->ctok->type);
2060
2061 ls->ltwnl = 1;
2062 return r ? r : x;
2063 }
2064 if (ls->ctok->type == NAME) {
2065 struct macro *m;
2066
2067 if ((m = get_macro(ls->ctok->name)) != 0) {
2068 int x;
2069
2070 x = substitute_macro(ls, m, 0, 1, 0,
2071 ls->ctok->line);
2072 if (!(ls->flags & LEXER))
2073 garbage_collect(ls->gf);
2074 return r ? r : x;
2075 }
2076 if (!(ls->flags & LEXER))
2077 print_token(ls, ls->ctok, 0);
2078 }
2079 } else {
2080 if (ls->ltwnl && (ls->ctok->type == SHARP
2081 || ls->ctok->type == DIG_SHARP)) {
2082 int x = handle_cpp(ls, ls->ctok->type);
2083
2084 ls->ltwnl = 1;
2085 return r ? r : x;
2086 }
2087 }
2088 if (ls->ctok->type == NEWLINE) ls->ltwnl = 1;
2089 else if (!ttWHI(ls->ctok->type)) ls->ltwnl = 0;
2090 return r ? r : -1;
2091 }
2092
2093 #ifndef STAND_ALONE
2094 /*
2095 * llex() and lex() are the lexing functions, when the preprocessor is
2096 * linked to another code. llex() should be called only by lex().
2097 */
llex(struct lexer_state * ls)2098 static int llex(struct lexer_state *ls)
2099 {
2100 struct token_fifo *tf = ls->output_fifo;
2101 int r;
2102
2103 if (tf->nt != 0) {
2104 if (tf->art < tf->nt) {
2105 #ifdef INMACRO_FLAG
2106 if (!ls->inmacro) {
2107 ls->inmacro = 1;
2108 ls->macro_count ++;
2109 }
2110 #endif
2111 ls->ctok = tf->t + (tf->art ++);
2112 if (ls->ctok->type > DIGRAPH_TOKENS
2113 && ls->ctok->type < DIGRAPH_TOKENS_END) {
2114 ls->ctok->type = undig(ls->ctok->type);
2115 }
2116 return 0;
2117 } else {
2118 #ifdef INMACRO_FLAG
2119 ls->inmacro = 0;
2120 #endif
2121 freemem(tf->t);
2122 tf->art = tf->nt = 0;
2123 garbage_collect(ls->gf);
2124 ls->ctok = ls->save_ctok;
2125 }
2126 }
2127 r = cpp(ls);
2128 if (ls->ctok->type > DIGRAPH_TOKENS
2129 && ls->ctok->type < LAST_MEANINGFUL_TOKEN) {
2130 ls->ctok->type = undig(ls->ctok->type);
2131 }
2132 if (r > 0) return r;
2133 if (r < 0) return 0;
2134 return llex(ls);
2135 }
2136
2137 /*
2138 * lex() reads the next token from the processed stream and stores it
2139 * into ls->ctok.
2140 * return value: non zero on error (including CPPERR_EOF, which is not
2141 * quite an error)
2142 */
lex(struct lexer_state * ls)2143 int lex(struct lexer_state *ls)
2144 {
2145 int r;
2146
2147 do {
2148 r = llex(ls);
2149 #ifdef SEMPER_FIDELIS
2150 } while (!r && !ls->condcomp);
2151 #else
2152 } while (!r && (!ls->condcomp || (ttWHI(ls->ctok->type) &&
2153 (!(ls->flags & LINE_NUM) || ls->ctok->type != NEWLINE))));
2154 #endif
2155 return r;
2156 }
2157 #endif
2158
2159 /*
2160 * check_cpp_errors() must be called when the end of input is reached;
2161 * it checks pending errors due to truncated constructs (actually none,
2162 * this is reserved for future evolutions).
2163 */
check_cpp_errors(struct lexer_state * ls)2164 int check_cpp_errors(struct lexer_state *ls)
2165 {
2166 if (ls->flags & KEEP_OUTPUT) {
2167 put_char(ls, '\n');
2168 }
2169 if (emit_dependencies) fputc('\n', emit_output);
2170 #ifndef NO_UCPP_BUF
2171 if (!(ls->flags & LEXER)) {
2172 flush_output(ls);
2173 }
2174 #endif
2175 if ((ls->flags & WARN_TRIGRAPHS) && ls->count_trigraphs)
2176 warning(0, "%ld trigraph(s) encountered", ls->count_trigraphs);
2177 return 0;
2178 }
2179
2180 /*
2181 * init_cpp() initializes static tables inside ucpp. It needs not be
2182 * called more than once.
2183 */
init_cpp(void)2184 void init_cpp(void)
2185 {
2186 init_cppm();
2187 }
2188
2189 /*
2190 * (re)init the global tables.
2191 * If standard_assertions is non 0, init the assertions table.
2192 */
init_tables(int with_assertions)2193 void init_tables(int with_assertions)
2194 {
2195 time_t t;
2196 struct tm *ct;
2197
2198 init_buf_lexer_state(&dsharp_lexer, 0);
2199 #ifdef PRAGMA_TOKENIZE
2200 init_buf_lexer_state(&tokenize_lexer, 0);
2201 #endif
2202 time(&t);
2203 ct = localtime(&t);
2204 #ifdef NOSTRFTIME
2205 /* we have a quite old compiler, that does not know the
2206 (standard since 1990) strftime() function. */
2207 {
2208 char *c = asctime(ct);
2209
2210 compile_time[0] = '"';
2211 mmv(compile_time + 1, c + 11, 8);
2212 compile_time[9] = '"';
2213 compile_time[10] = 0;
2214 compile_date[0] = '"';
2215 mmv(compile_date + 1, c + 4, 7);
2216 mmv(compile_date + 8, c + 20, 4);
2217 compile_date[12] = '"';
2218 compile_date[13] = 0;
2219 }
2220 #else
2221 strftime(compile_time, 12, "\"%H:%M:%S\"", ct);
2222 strftime(compile_date, 24, "\"%b %d %Y\"", ct);
2223 #endif
2224 init_macros();
2225 if (with_assertions) init_assertions();
2226 init_found_files();
2227 }
2228
2229 /*
2230 * Resets the include path.
2231 */
init_include_path(char * incpath[])2232 void init_include_path(char *incpath[])
2233 {
2234 if (include_path_nb) {
2235 size_t i;
2236
2237 for (i = 0; i < include_path_nb; i ++)
2238 freemem(include_path[i]);
2239 freemem(include_path);
2240 include_path_nb = 0;
2241 }
2242 if (incpath) {
2243 int i;
2244
2245 for (i = 0; incpath[i]; i ++)
2246 aol(include_path, include_path_nb,
2247 sdup(incpath[i]), INCPATH_MEMG);
2248 }
2249 }
2250
2251 /*
2252 * add_incpath() adds "path" to the standard include path.
2253 */
add_incpath(char * path)2254 void add_incpath(char *path)
2255 {
2256 aol(include_path, include_path_nb, sdup(path), INCPATH_MEMG);
2257 }
2258
2259 /*
2260 * This function cleans the memory. It should release all allocated
2261 * memory structures and may be called even if the current pre-processing
2262 * is not finished or reported an error.
2263 */
wipeout()2264 void wipeout()
2265 {
2266 struct lexer_state ls;
2267
2268 if (include_path_nb > 0) {
2269 size_t i;
2270
2271 for (i = 0; i < include_path_nb; i ++)
2272 freemem(include_path[i]);
2273 freemem(include_path);
2274 include_path = 0;
2275 include_path_nb = 0;
2276 }
2277 if (current_filename) freemem(current_filename);
2278 current_filename = 0;
2279 current_long_filename = 0;
2280 current_incdir = -1;
2281 protect_detect.state = 0;
2282 if (protect_detect.macro) freemem(protect_detect.macro);
2283 protect_detect.macro = 0;
2284 protect_detect.ff = 0;
2285 init_lexer_state(&ls);
2286 while (ls_depth > 0) pop_file_context(&ls);
2287 free_lexer_state(&ls);
2288 free_lexer_state(&dsharp_lexer);
2289 #ifdef PRAGMA_TOKENIZE
2290 free_lexer_state(&tokenize_lexer);
2291 #endif
2292 if (found_files_init_done) HTT_kill(&found_files);
2293 found_files_init_done = 0;
2294 if (found_files_sys_init_done) HTT_kill(&found_files_sys);
2295 found_files_sys_init_done = 0;
2296 wipe_macros();
2297 wipe_assertions();
2298 }
2299
2300 #ifdef STAND_ALONE
2301 /*
2302 * print some help
2303 */
usage(char * command_name)2304 static void usage(char *command_name)
2305 {
2306 fprintf(stderr,
2307 "Usage: %s [options] [file]\n"
2308 "language options:\n"
2309 " -C keep comments in output\n"
2310 " -s keep '#' when no cpp directive is recognized\n"
2311 " -l do not emit line numbers\n"
2312 " -lg emit gcc-like line numbers\n"
2313 " -CC disable C++-like comments\n"
2314 " -a, -na, -a0 handle (or not) assertions\n"
2315 " -V disable macros with extra arguments\n"
2316 " -u understand UTF-8 in source\n"
2317 " -X enable -a, -u and -Y\n"
2318 " -c90 mimic C90 behaviour\n"
2319 " -t disable trigraph support\n"
2320 "warning options:\n"
2321 " -wt emit a final warning when trigaphs are encountered\n"
2322 " -wtt emit warnings for each trigaph encountered\n"
2323 " -wa emit warnings that are usually useless\n"
2324 " -w0 disable standard warnings\n"
2325 "directory options:\n"
2326 " -I directory add 'directory' before the standard include path\n"
2327 " -J directory add 'directory' after the standard include path\n"
2328 " -zI do not use the standard include path\n"
2329 " -M emit Makefile-like dependencies instead of normal "
2330 "output\n"
2331 " -Ma emit also dependancies for system files\n"
2332 " -o file store output in file\n"
2333 "macro and assertion options:\n"
2334 " -Dmacro predefine 'macro'\n"
2335 " -Dmacro=def predefine 'macro' with 'def' content\n"
2336 " -Umacro undefine 'macro'\n"
2337 " -Afoo(bar) assert foo(bar)\n"
2338 " -Bfoo(bar) unassert foo(bar)\n"
2339 " -Y predefine system-dependant macros\n"
2340 " -Z do not predefine special macros\n"
2341 " -d emit defined macros\n"
2342 " -e emit assertions\n"
2343 "misc options:\n"
2344 " -v print version number and settings\n"
2345 " -h show this help\n",
2346 command_name);
2347 }
2348
2349 /*
2350 * print version and compile-time settings
2351 */
version(void)2352 static void version(void)
2353 {
2354 size_t i;
2355
2356 fprintf(stderr, "ucpp version %d.%d\n", VERS_MAJ, VERS_MIN);
2357 fprintf(stderr, "search path:\n");
2358 for (i = 0; i < include_path_nb; i ++)
2359 fprintf(stderr, " %s\n", include_path[i]);
2360 }
2361
2362 /*
2363 * parse_opt() initializes many things according to the command-line
2364 * options.
2365 * Return values:
2366 * 0 on success
2367 * 1 on semantic error (redefinition of a special macro, for instance)
2368 * 2 on syntaxic error (unknown options for instance)
2369 */
parse_opt(int argc,char * argv[],struct lexer_state * ls)2370 static int parse_opt(int argc, char *argv[], struct lexer_state *ls)
2371 {
2372 int i, ret = 0;
2373 char *filename = 0;
2374 int with_std_incpath = 0;
2375 int print_version = 0, print_defs = 0, print_asserts = 0;
2376 int system_macros = 0, standard_assertions = 1;
2377
2378 init_lexer_state(ls);
2379 ls->flags = DEFAULT_CPP_FLAGS;
2380 emit_output = ls->output = stdout;
2381 for (i = 1; i < argc; i ++) if (argv[i][0] == '-') {
2382 if (!strcmp(argv[i], "-h")) {
2383 return 2;
2384 } else if (!strcmp(argv[i], "-C")) {
2385 ls->flags &= ~DISCARD_COMMENTS;
2386 } else if (!strcmp(argv[i], "-CC")) {
2387 ls->flags &= ~CPLUSPLUS_COMMENTS;
2388 } else if (!strcmp(argv[i], "-a")) {
2389 ls->flags |= HANDLE_ASSERTIONS;
2390 } else if (!strcmp(argv[i], "-na")) {
2391 ls->flags |= HANDLE_ASSERTIONS;
2392 standard_assertions = 0;
2393 } else if (!strcmp(argv[i], "-a0")) {
2394 ls->flags &= ~HANDLE_ASSERTIONS;
2395 } else if (!strcmp(argv[i], "-V")) {
2396 ls->flags &= ~MACRO_VAARG;
2397 } else if (!strcmp(argv[i], "-u")) {
2398 ls->flags |= UTF8_SOURCE;
2399 } else if (!strcmp(argv[i], "-X")) {
2400 ls->flags |= HANDLE_ASSERTIONS;
2401 ls->flags |= UTF8_SOURCE;
2402 system_macros = 1;
2403 } else if (!strcmp(argv[i], "-c90")) {
2404 ls->flags &= ~MACRO_VAARG;
2405 ls->flags &= ~CPLUSPLUS_COMMENTS;
2406 c99_compliant = 0;
2407 c99_hosted = -1;
2408 } else if (!strcmp(argv[i], "-t")) {
2409 ls->flags &= ~HANDLE_TRIGRAPHS;
2410 } else if (!strcmp(argv[i], "-wt")) {
2411 ls->flags |= WARN_TRIGRAPHS;
2412 } else if (!strcmp(argv[i], "-wtt")) {
2413 ls->flags |= WARN_TRIGRAPHS_MORE;
2414 } else if (!strcmp(argv[i], "-wa")) {
2415 ls->flags |= WARN_ANNOYING;
2416 } else if (!strcmp(argv[i], "-w0")) {
2417 ls->flags &= ~WARN_STANDARD;
2418 ls->flags &= ~WARN_PRAGMA;
2419 } else if (!strcmp(argv[i], "-s")) {
2420 ls->flags &= ~FAIL_SHARP;
2421 } else if (!strcmp(argv[i], "-l")) {
2422 ls->flags &= ~LINE_NUM;
2423 } else if (!strcmp(argv[i], "-lg")) {
2424 ls->flags |= GCC_LINE_NUM;
2425 } else if (!strcmp(argv[i], "-M")) {
2426 ls->flags &= ~KEEP_OUTPUT;
2427 emit_dependencies = 1;
2428 } else if (!strcmp(argv[i], "-Ma")) {
2429 ls->flags &= ~KEEP_OUTPUT;
2430 emit_dependencies = 2;
2431 } else if (!strcmp(argv[i], "-Y")) {
2432 system_macros = 1;
2433 } else if (!strcmp(argv[i], "-Z")) {
2434 no_special_macros = 1;
2435 } else if (!strcmp(argv[i], "-d")) {
2436 ls->flags &= ~KEEP_OUTPUT;
2437 print_defs = 1;
2438 } else if (!strcmp(argv[i], "-e")) {
2439 ls->flags &= ~KEEP_OUTPUT;
2440 print_asserts = 1;
2441 } else if (!strcmp(argv[i], "-zI")) {
2442 with_std_incpath = 0;
2443 } else if (!strcmp(argv[i], "-I") || !strcmp(argv[i], "-J")) {
2444 i ++;
2445 } else if (!strcmp(argv[i], "-iquote") || !strcmp(argv[i], "-isystem")) {
2446 i ++;
2447 } else if (!strcmp(argv[i], "-o")) {
2448 if ((++ i) >= argc) {
2449 error(-1, "missing filename after -o");
2450 return 2;
2451 }
2452 if (argv[i][0] == '-' && argv[i][1] == 0) {
2453 emit_output = ls->output = stdout;
2454 } else {
2455 ls->output = fopen(argv[i], "w");
2456 if (!ls->output) {
2457 error(-1, "failed to open for "
2458 "writing: %s", argv[i]);
2459 return 2;
2460 }
2461 emit_output = ls->output;
2462 }
2463 } else if (!strcmp(argv[i], "-v")) {
2464 print_version = 1;
2465 } else if (argv[i][1] != 'I' && argv[i][1] != 'J'
2466 && argv[i][1] != 'D' && argv[i][1] != 'U'
2467 && argv[i][1] != 'A' && argv[i][1] != 'B'
2468 && strncmp(argv[i]+1,"isystem",7)
2469 && strncmp(argv[i]+1,"iquote",6) )
2470 warning(-1, "unknown option '%s'", argv[i]);
2471 } else {
2472 if (filename != 0) {
2473 if ( ls->output != NULL ) {
2474 ls->output = fopen(argv[i], "w");
2475 if (!ls->output) {
2476 error(-1, "failed to open for "
2477 "writing: %s", argv[i]);
2478 return 2;
2479 }
2480 emit_output = ls->output;
2481 } else {
2482 error(-1, "spurious filename '%s'", argv[i]);
2483 return 2;
2484 }
2485 } else {
2486 filename = argv[i];
2487 }
2488 }
2489 init_tables(ls->flags & HANDLE_ASSERTIONS);
2490 init_include_path(0);
2491 if (filename) {
2492 #ifdef UCPP_MMAP
2493 FILE *f = fopen_mmap_file(filename);
2494
2495 ls->input = 0;
2496 if (f) set_input_file(ls, f);
2497 #else
2498 ls->input = fopen(filename, "r");
2499 #endif
2500 if (!ls->input) {
2501 error(-1, "file '%s' not found", filename);
2502 return 1;
2503 }
2504 #ifdef NO_LIBC_BUF
2505 setbuf(ls->input, 0);
2506 #endif
2507 set_init_filename(filename, 1);
2508 } else {
2509 ls->input = stdin;
2510 set_init_filename("<stdin>", 0);
2511 }
2512 for (i = 1; i < argc; i ++) {
2513 if (argv[i][0] == '-' && argv[i][1] == 'I')
2514 add_incpath(argv[i][2] ? argv[i] + 2 : argv[i + 1]);
2515 if (strncmp(argv[i],"-iquote",7) == 0 )
2516 add_incpath(argv[i][7] ? argv[i] + 7 : argv[i + 1]);
2517 }
2518 if (system_macros) for (i = 0; system_macros_def[i]; i ++)
2519 ret = ret || define_macro(ls, system_macros_def[i]);
2520 for (i = 1; i < argc; i ++)
2521 if (argv[i][0] == '-' && argv[i][1] == 'D')
2522 ret = ret || define_macro(ls, argv[i] + 2);
2523 for (i = 1; i < argc; i ++)
2524 if (argv[i][0] == '-' && argv[i][1] == 'U')
2525 ret = ret || undef_macro(ls, argv[i] + 2);
2526 if (ls->flags & HANDLE_ASSERTIONS) {
2527 if (standard_assertions)
2528 for (i = 0; system_assertions_def[i]; i ++)
2529 make_assertion(system_assertions_def[i]);
2530 for (i = 1; i < argc; i ++)
2531 if (argv[i][0] == '-' && argv[i][1] == 'A')
2532 ret = ret || make_assertion(argv[i] + 2);
2533 for (i = 1; i < argc; i ++)
2534 if (argv[i][0] == '-' && argv[i][1] == 'B')
2535 ret = ret || destroy_assertion(argv[i] + 2);
2536 } else {
2537 for (i = 1; i < argc; i ++)
2538 if (argv[i][0] == '-'
2539 && (argv[i][1] == 'A' || argv[i][1] == 'B'))
2540 warning(-1, "assertions disabled");
2541 }
2542 if (with_std_incpath) {
2543 for (i = 0; include_path_std[i]; i ++)
2544 add_incpath(include_path_std[i]);
2545 }
2546 for (i = 1; i < argc; i ++) {
2547 if (argv[i][0] == '-' && argv[i][1] == 'J')
2548 add_incpath(argv[i][2] ? argv[i] + 2 : argv[i + 1]);
2549 if (strncmp(argv[i],"-isystem",8) == 0 )
2550 add_incpath(argv[i][8] ? argv[i] + 8 : argv[i + 1]);
2551
2552 }
2553
2554 if (print_version) {
2555 version();
2556 return 1;
2557 }
2558 if (print_defs) {
2559 print_defines();
2560 emit_defines = 1;
2561 }
2562 if (print_asserts && (ls->flags & HANDLE_ASSERTIONS)) {
2563 print_assertions();
2564 emit_assertions = 1;
2565 }
2566 return ret;
2567 }
2568
main(int argc,char * argv[])2569 int main(int argc, char *argv[])
2570 {
2571 struct lexer_state ls;
2572 int r, fr = 0;
2573
2574 init_cpp();
2575 if ((r = parse_opt(argc, argv, &ls)) != 0) {
2576 if (r == 2) usage(argv[0]);
2577 return EXIT_FAILURE;
2578 }
2579 enter_file(&ls, ls.flags);
2580 while ((r = cpp(&ls)) < CPPERR_EOF) fr = fr || (r > 0);
2581 fr = fr || check_cpp_errors(&ls);
2582 free_lexer_state(&ls);
2583 wipeout();
2584 #ifdef MEM_DEBUG
2585 report_leaks();
2586 #endif
2587 return fr ? EXIT_FAILURE : EXIT_SUCCESS;
2588 }
2589 #endif
2590