1 /*
2 * This file is part of DGD, https://github.com/dworkin/dgd
3 * Copyright (C) 1993-2010 Dworkin B.V.
4 * Copyright (C) 2010,2012-2013 DGD Authors (see the commit log for details)
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Affero General Public License as
8 * published by the Free Software Foundation, either version 3 of the
9 * License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Affero General Public License for more details.
15 *
16 * You should have received a copy of the GNU Affero General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 # define INCLUDE_FILE_IO
21 # define INCLUDE_CTYPE
22 # include "lex.h"
23 # include "path.h"
24 # include "macro.h"
25 # include "special.h"
26 # include "ppstr.h"
27 # include "token.h"
28
29 /*
30 * The functions for getting a (possibly preprocessed) token from the input
31 * stream.
32 */
33
34 # define TCHUNKSZ 8
35
36 typedef struct _tbuf_ {
37 string **strs; /* input buffer array */
38 int nstr; /* number of input buffers */
39 char *buffer; /* token buffer */
40 char *p; /* token buffer pointer */
41 int inbuf; /* # chars in token buffer */
42 char ubuf[4]; /* unget buffer */
43 char *up; /* unget buffer pointer */
44 bool eof; /* TRUE if empty(buffer) -> EOF */
45 unsigned short line; /* line number */
46 int fd; /* file descriptor */
47 union {
48 char *filename; /* file name */
49 macro *mc; /* macro this buffer is an expansion of */
50 } u;
51 struct _tbuf_ *prev; /* previous token buffer */
52 } tbuf;
53
54 typedef struct _tchunk_ {
55 struct _tchunk_ *next; /* next in list */
56 tbuf t[TCHUNKSZ]; /* chunk of token buffers */
57 } tchunk;
58
59 char *yytext; /* for strings and identifiers */
60 static char *yytext1, *yytext2; /* internal buffers */
61 static char *yyend; /* end of current buffer */
62 int yyleng; /* length of token */
63 long yynumber; /* integer constant */
64 xfloat yyfloat; /* floating point constant */
65
66 static tchunk *tlist; /* list of token buffer chunks */
67 static int tchunksz; /* token buffer chunk size */
68 static tbuf *flist; /* free token buffer list */
69 static tbuf *tbuffer; /* current token buffer */
70 static tbuf *ibuffer; /* current input buffer */
71 static int pp_level; /* the recursive preprocesing level */
72 static bool do_include; /* treat < and strings specially */
73 static bool seen_nl; /* just seen a newline */
74
75 /*
76 * NAME: token->init()
77 * DESCRIPTION: initialize the new token input buffer
78 */
tk_init()79 void tk_init()
80 {
81 yytext1 = ALLOC(char, MAX_LINE_SIZE);
82 yytext2 = ALLOC(char, MAX_LINE_SIZE);
83 tlist = (tchunk *) NULL;
84 tchunksz = TCHUNKSZ;
85 flist = (tbuf *) NULL;
86 tbuffer = (tbuf *) NULL;
87 ibuffer = (tbuf *) NULL;
88 pp_level = 0;
89 do_include = FALSE;
90 }
91
92 /*
93 * NAME: push()
94 * DESCRIPTION: Push a buffer on the token input stream. If eof is false, then
95 * the buffer will automatically be dropped when all is read.
96 */
push(macro * mc,char * buffer,unsigned int buflen,bool eof)97 static void push(macro *mc, char *buffer, unsigned int buflen, bool eof)
98 {
99 tbuf *tb;
100
101 if (flist != (tbuf *) NULL) {
102 /* from free list */
103 tb = flist;
104 flist = tb->prev;
105 } else {
106 /* allocate new one */
107 if (tchunksz == TCHUNKSZ) {
108 tchunk *l;
109
110 l = ALLOC(tchunk, 1);
111 l->next = tlist;
112 tlist = l;
113 tchunksz = 0;
114 }
115 tb = &tlist->t[tchunksz++];
116 }
117 tb->strs = (string **) NULL;
118 tb->nstr = 0;
119 tb->p = tb->buffer = buffer;
120 tb->inbuf = buflen;
121 tb->up = tb->ubuf;
122 tb->eof = eof;
123 tb->fd = -2;
124 tb->u.mc = mc;
125 tb->prev = tbuffer;
126 tbuffer = tb;
127 }
128
129 /*
130 * NAME: pop()
131 * DESCRIPTION: Drop the current token input buffer. If the associated macro
132 * is function-like, the token buffer will have to be deallocated.
133 */
pop()134 static void pop()
135 {
136 tbuf *tb;
137
138 tb = tbuffer;
139 if (tb->fd < -1) {
140 if (tb->u.mc != (macro *) NULL) {
141 if (tb->u.mc->narg > 0) {
142 /* in the buffer a function-like macro has been expanded */
143 FREE(tb->buffer);
144 }
145 }
146 } else {
147 if (tb->fd >= 0) {
148 P_close(tb->fd);
149 FREE(tb->buffer);
150 } else if (tb->prev != (tbuf *) NULL) {
151 str_del(tb->strs[0]);
152 FREE(tb->strs);
153 }
154 ibuffer = tbuffer->prev;
155 FREE(tb->u.filename);
156 }
157 tbuffer = tb->prev;
158
159 tb->prev = flist;
160 flist = tb;
161 }
162
163 /*
164 * NAME: token->clear()
165 * DESCRIPTION: clear all of the token input buffers
166 */
tk_clear()167 void tk_clear()
168 {
169 tchunk *l, *f;
170
171 while (tbuffer != (tbuf *) NULL) {
172 pop();
173 }
174 for (l = tlist; l != (tchunk *) NULL; ) {
175 f = l;
176 l = l->next;
177 FREE(f);
178 }
179 tlist = (tchunk *) NULL;
180 if (yytext1 != (char *) NULL) {
181 FREE(yytext2);
182 FREE(yytext1);
183 yytext1 = (char *) NULL;
184 yytext2 = (char *) NULL;
185 }
186 }
187
188 /*
189 * NAME: token->include()
190 * DESCRIPTION: push a file on the input stream
191 */
tk_include(char * file,string ** strs,int nstr)192 bool tk_include(char *file, string **strs, int nstr)
193 {
194 int fd;
195 ssizet len;
196
197 if (file != (char *) NULL) {
198 if (strs == (string **) NULL) {
199 struct stat sbuf;
200
201 /* read from file */
202 fd = P_open(file, O_RDONLY | O_BINARY, 0);
203 if (fd < 0) {
204 return FALSE;
205 }
206
207 P_fstat(fd, &sbuf);
208 if ((sbuf.st_mode & S_IFMT) != S_IFREG) {
209 /* no source this */
210 P_close(fd);
211 return FALSE;
212 }
213
214 push((macro *) NULL, ALLOC(char, BUF_SIZE), 0, TRUE);
215 } else {
216 /* read from strings */
217 --strs;
218 push((macro *) NULL, strs[0]->text, strs[0]->len, TRUE);
219 tbuffer->strs = strs;
220 tbuffer->nstr = --nstr;
221 fd = -1;
222 }
223
224 ibuffer = tbuffer;
225 ibuffer->fd = fd;
226 len = strlen(file);
227 if (len >= STRINGSZ - 1) {
228 len = STRINGSZ - 2;
229 }
230 ibuffer->u.filename = ALLOC(char, len + 2);
231 strncpy(ibuffer->u.filename + 1, file, len);
232 ibuffer->u.filename[0] = '/';
233 ibuffer->u.filename[len + 1] = '\0';
234 ibuffer->line = 1;
235 seen_nl = TRUE;
236
237 return TRUE;
238 }
239
240 return FALSE;
241 }
242
243 /*
244 * NAME: token->endinclude()
245 * DESCRIPTION: end an #inclusion
246 */
tk_endinclude()247 void tk_endinclude()
248 {
249 pop();
250 seen_nl = TRUE;
251 }
252
253 /*
254 * NAME: token->line()
255 * DESCRIPTION: return the current line number (possibly adjusted)
256 */
tk_line()257 unsigned short tk_line()
258 {
259 return ibuffer->line - (unsigned short) seen_nl;
260 }
261
262 /*
263 * NAME: token->filename()
264 * DESCRIPTION: return the current file name
265 */
tk_filename()266 char *tk_filename()
267 {
268 return ibuffer->u.filename;
269 }
270
271 /*
272 * NAME: token->setline()
273 * DESCRIPTION: set the current line number
274 */
tk_setline(unsigned short line)275 void tk_setline(unsigned short line)
276 {
277 ibuffer->line = line;
278 }
279
280 /*
281 * NAME: token->setfilename()
282 * DESCRIPTION: set the current file name
283 */
tk_setfilename(char * file)284 void tk_setfilename(char *file)
285 {
286 unsigned int len;
287
288 len = strlen(file);
289 if (len >= STRINGSZ) {
290 len = STRINGSZ - 1;
291 }
292 ibuffer->u.filename = memcpy(REALLOC(ibuffer->u.filename, char, 0, len + 1),
293 file, len);
294 ibuffer->u.filename[len] = '\0';
295 }
296
297 /*
298 * NAME: token->header()
299 * DESCRIPTION: set the current include string mode. if TRUE, '<' will be
300 * specially processed.
301 */
tk_header(int incl)302 void tk_header(int incl)
303 {
304 do_include = incl;
305 }
306
307 /*
308 * NAME: token->setpp()
309 * DESCRIPTION: if the argument is true, do not translate escape sequences in
310 * strings, and don't report errors.
311 */
tk_setpp(int pp)312 void tk_setpp(int pp)
313 {
314 pp_level = (int) pp;
315 }
316
317 # define uc(c) { \
318 if ((c) != EOF) { \
319 if ((c) == LF && tbuffer == ibuffer) ibuffer->line--; \
320 *(tbuffer->up)++ = (c); \
321 } \
322 }
323
324 /*
325 * NAME: gc()
326 * DESCRIPTION: get a character from the input
327 */
gc()328 static int gc()
329 {
330 tbuf *tb;
331 int c;
332 bool backslash;
333
334 tb = tbuffer;
335 backslash = FALSE;
336
337 for (;;) {
338 if (tb->up != tb->ubuf) {
339 /* get a character from unget buffer */
340 c = UCHAR(*--(tb->up));
341 } else {
342 if (tb->inbuf <= 0) {
343 /* Current input buffer is empty. Try a refill. */
344 if (tb->fd >= 0 &&
345 (tb->inbuf = P_read(tb->fd, tb->buffer, BUF_SIZE)) > 0) {
346 tb->p = tb->buffer;
347 } else if (backslash) {
348 return '\\';
349 } else if (tb->nstr != 0) {
350 if (tb->prev != (tbuf *) NULL) {
351 str_del(tb->strs[0]);
352 }
353 --(tb->strs);
354 --(tb->nstr);
355 tb->p = tb->buffer = tb->strs[0]->text;
356 tb->inbuf = tb->strs[0]->len;
357 continue;
358 } else if (tb->eof) {
359 return EOF;
360 } else {
361 /* otherwise, pop the current token input buffer */
362 pop();
363 tb = tbuffer;
364 continue;
365 }
366 }
367 tb->inbuf--;
368 c = UCHAR(*(tb->p)++);
369 }
370
371 if (c == LF && tb == ibuffer) {
372 ibuffer->line++;
373 if (!backslash) {
374 return c;
375 }
376 backslash = FALSE;
377 } else if (backslash) {
378 uc(c);
379 return '\\';
380 } else if (c == '\\' && tb == ibuffer) {
381 backslash = TRUE;
382 } else {
383 return c;
384 }
385 }
386 }
387
388 /*
389 * NAME: skip_comment()
390 * DESCRIPTION: skip a single comment
391 */
skip_comment()392 static void skip_comment()
393 {
394 int c;
395
396 do {
397 do {
398 c = gc();
399 if (c == EOF) {
400 error("EOF in comment");
401 return;
402 }
403 } while (c != '*');
404
405 do {
406 c = gc();
407 } while (c == '*');
408 } while (c != '/');
409 }
410
411 /*
412 * NAME: skip_alt_comment()
413 * DESCRIPTION: skip c++ style comment
414 */
skip_alt_comment()415 static void skip_alt_comment()
416 {
417 int c;
418
419 do {
420 c = gc();
421 if (c == EOF) {
422 return;
423 }
424 } while (c != LF);
425 uc(c);
426 }
427
428 /*
429 * NAME: comment()
430 * DESCRIPTION: skip comments and white space
431 */
comment(bool flag)432 static void comment(bool flag)
433 {
434 int c;
435
436 for (;;) {
437 /* first skip the current comment */
438 if (flag) {
439 skip_alt_comment();
440 } else {
441 skip_comment();
442 }
443
444 /* skip any whitespace */
445 do {
446 c = gc();
447 } while (c == ' ' || c == HT || c == VT || c == FF || c == CR);
448
449 /* check if a new comment follows */
450 if (c != '/') {
451 uc(c);
452 break;
453 }
454 c = gc();
455 if (c == '*') {
456 flag = FALSE;
457 # ifdef SLASHSLASH
458 } else if (c == '/') {
459 flag = TRUE;
460 # endif
461 } else {
462 uc(c);
463 c = '/';
464 uc(c);
465 break;
466 }
467 }
468 }
469
470 /*
471 * NAME: token->esc()
472 * DESCRIPTION: handle an escaped character, leaving the value in yynumber
473 */
tk_esc(char * p)474 static char *tk_esc(char *p)
475 {
476 int c, i, n;
477
478 switch (c = *p++ = gc()) {
479 case 'a': c = BEL; break;
480 case 'b': c = BS; break;
481 case 't': c = HT; break;
482 case 'n': c = LF; break;
483 case 'v': c = VT; break;
484 case 'f': c = FF; break;
485 case 'r': c = CR; break;
486
487 case LF:
488 /* newline in string or character constant */
489 uc(c);
490 return p - 1;
491
492 case '0': case '1': case '2': case '3':
493 case '4': case '5': case '6': case '7':
494 /* octal constant */
495 i = 0;
496 n = 3;
497 --p;
498 do {
499 *p++ = c;
500 i <<= 3;
501 i += c - '0';
502 c = gc();
503 } while (--n > 0 && c >= '0' && c <= '7');
504 uc(c);
505 c = UCHAR(i);
506 break;
507
508 case 'x':
509 /* hexadecimal constant */
510 c = gc();
511 if (isxdigit(c)) {
512 i = 0;
513 n = 3;
514 do {
515 *p++ = c;
516 i <<= 4;
517 if (isdigit(c)) {
518 i += c - '0';
519 } else {
520 i += toupper(c) + 10 - 'A';
521 }
522 c = gc();
523 } while (--n > 0 && isxdigit(c));
524 } else {
525 i = 'x';
526 }
527 uc(c);
528 c = UCHAR(i);
529 break;
530 }
531
532 yynumber = c;
533 return p;
534 }
535
536 /*
537 * NAME: token->string()
538 * DESCRIPTION: handle a string. If pp_level > 0, don't translate escape
539 * sequences.
540 */
tk_string(char quote)541 static int tk_string(char quote)
542 {
543 char *p;
544 int c, n;
545
546 p = yytext;
547 if (pp_level > 0) {
548 /* keep the quotes if not on top level */
549 p++;
550 n = 0;
551 } else {
552 n = 2;
553 }
554
555 for (;;) {
556 c = gc();
557 if (c == quote) {
558 if (pp_level > 0) {
559 /* keep the quotes if not on top level */
560 *p++ = c;
561 }
562 break;
563 } else if (c == '\\') {
564 if (pp_level > 0 || do_include) {
565 /* recognize, but do not translate escape sequence */
566 *p++ = c;
567 p = tk_esc(p);
568 c = *--p;
569 } else {
570 /* translate escape sequence */
571 n += tk_esc(p) - p;
572 c = yynumber;
573 }
574 } else if (c == LF || c == EOF) {
575 if (pp_level == 0) {
576 error("unterminated string");
577 }
578 uc(c);
579 break;
580 }
581 *p++ = c;
582 if (p > yyend - 4) {
583 n += p - (yyend - 4);
584 p = yyend - 4;
585 }
586 }
587
588 if (pp_level == 0 && p + n > yyend - 4) {
589 error("string too long");
590 }
591 *p = '\0';
592 yyleng = p - yytext;
593 return (quote == '>') ? INCL_CONST : STRING_CONST;
594 }
595
596 /*
597 * NAME: token->gettok()
598 * DESCRIPTION: get a token from the input stream.
599 */
tk_gettok()600 int tk_gettok()
601 {
602 int c;
603 long result;
604 char *p;
605 bool overflow;
606 bool is_float, badoctal;
607
608 # define TEST(x, tok) if (c == x) { c = tok; break; }
609 # define CHECK(x, tok) c = gc(); *p++ = c; TEST(x, tok); --p; uc(c)
610
611 result = 0;
612 overflow = FALSE;
613 is_float = FALSE;
614 yytext = (yytext == yytext1) ? yytext2 : yytext1;
615 yyend = yytext + MAX_LINE_SIZE - 1;
616 p = yytext;
617 c = gc();
618 *p++ = c;
619 switch (c) {
620 case LF:
621 if (tbuffer == ibuffer) {
622 seen_nl = TRUE;
623 *p = '\0';
624 return c;
625 }
626 c = (pp_level > 0) ? MARK : ' ';
627 break;
628
629 case HT:
630 if (tbuffer != ibuffer) {
631 /* expanding a macro: keep separator */
632 break;
633 }
634 /* fall through */
635 case ' ':
636 case VT:
637 case FF:
638 case CR:
639 /* white space */
640 do {
641 c = gc();
642 } while (c == ' ' || (c == HT && tbuffer == ibuffer) || c == VT ||
643 c == FF || c == CR);
644
645 /* check for comment after white space */
646 if (c == '/') {
647 c = gc();
648 if (c == '*') {
649 comment(FALSE);
650 # ifdef SLASHSLASH
651 } else if (c == '/') {
652 comment(TRUE);
653 # endif
654 } else {
655 uc(c);
656 c = '/';
657 uc(c);
658 }
659 } else {
660 uc(c);
661 }
662 yyleng = 1;
663 *p = '\0';
664 return p[-1] = ' ';
665
666 case '!':
667 CHECK('=', NE);
668 c = '!';
669 break;
670
671 case '#':
672 if (!seen_nl) {
673 CHECK('#', HASH_HASH);
674 c = HASH;
675 }
676 break;
677
678 case '%':
679 CHECK('=', MOD_EQ);
680 c = '%';
681 break;
682
683 case '&':
684 c = gc();
685 *p++ = c;
686 TEST('&', LAND);
687 TEST('=', AND_EQ);
688 --p; uc(c);
689 c = '&';
690 break;
691
692 case '*':
693 CHECK('=', MULT_EQ);
694 c = '*';
695 break;
696
697 case '+':
698 c = gc();
699 *p++ = c;
700 TEST('+', PLUS_PLUS);
701 TEST('=', PLUS_EQ);
702 --p; uc(c);
703 c = '+';
704 break;
705
706 case '-':
707 c = gc();
708 *p++ = c;
709 TEST('>', RARROW);
710 TEST('-', MIN_MIN);
711 TEST('=', MIN_EQ);
712 --p; uc(c);
713 c = '-';
714 break;
715
716 case '.':
717 c = gc();
718 if (isdigit(c)) {
719 /*
720 * Come here when a decimal '.' has been spotted; c holds the next
721 * character.
722 */
723 fraction:
724 is_float = TRUE;
725 while (isdigit(c)) {
726 if (p < yyend) {
727 *p++ = c;
728 }
729 c = gc();
730 }
731 if (c == 'e' || c == 'E') {
732 char *q, exp;
733 int sign;
734
735 /*
736 * Come here when 'e' or 'E' has been spotted after a number.
737 */
738 exponent:
739 exp = c;
740 sign = 0;
741 q = p;
742 if (p < yyend) {
743 *p++ = c;
744 }
745 c = gc();
746 if (c == '+' || c == '-') {
747 if (p < yyend) {
748 *p++ = c;
749 }
750 sign = c;
751 c = gc();
752 }
753 if (isdigit(c)) {
754 do {
755 if (p < yyend) {
756 *p++ = c;
757 }
758 c = gc();
759 } while (isdigit(c));
760 is_float = TRUE;
761 } else {
762 /*
763 * assume the e isn't part of this token
764 */
765 uc(c);
766 if (sign != 0) {
767 uc(sign);
768 }
769 c = exp;
770 p = q;
771 }
772 }
773 uc(c);
774
775 if (is_float) {
776 yyfloat.high = 0;
777 yyfloat.low = 0;
778 if (pp_level == 0) {
779 if (p == yyend) {
780 error("too long floating point constant");
781 } else {
782 char *buf;
783
784 *p = '\0';
785 buf = yytext;
786 if (!flt_atof(&buf, &yyfloat)) {
787 error("overflow in floating point constant");
788 }
789 }
790 }
791 c = FLOAT_CONST;
792 } else {
793 if (pp_level == 0) {
794 /* unclear if this was decimal or octal */
795 if (p == yyend) {
796 error("too long integer constant");
797 } else if (overflow) {
798 error("overflow in integer constant");
799 }
800 }
801 c = INT_CONST;
802 }
803 break;
804 } else if (c == '.') {
805 *p++ = c;
806 CHECK('.', ELLIPSIS);
807 c = DOT_DOT;
808 } else {
809 uc(c);
810 c = '.';
811 }
812 break;
813
814 case '/':
815 c = gc();
816 if (c == '*') {
817 comment(FALSE);
818 yyleng = 1;
819 *p = '\0';
820 return p[-1] = ' ';
821 # ifdef SLASHSLASH
822 } else if (c == '/') {
823 comment(TRUE);
824 yyleng = 1;
825 *p = '\0';
826 return p[-1] = ' ';
827 # endif
828 }
829 *p++ = c;
830 TEST('=', DIV_EQ);
831 --p; uc(c);
832 c = '/';
833 break;
834
835 case ':':
836 CHECK(':', COLON_COLON);
837 c = ':';
838 break;
839
840 case '<':
841 if (do_include) {
842 /* #include <header> */
843 seen_nl = FALSE;
844 return tk_string('>');
845 }
846 c = gc();
847 *p++ = c;
848 TEST('=', LE);
849 TEST('-', LARROW);
850 if (c == '<') {
851 CHECK('=', LSHIFT_EQ);
852 c = LSHIFT;
853 break;
854 }
855 --p; uc(c);
856 c = '<';
857 break;
858
859 case '=':
860 CHECK('=', EQ);
861 c = '=';
862 break;
863
864 case '>':
865 c = gc();
866 *p++ = c;
867 TEST('=', GE);
868 if (c == '>') {
869 CHECK('=', RSHIFT_EQ);
870 c = RSHIFT;
871 break;
872 }
873 --p; uc(c);
874 c = '>';
875 break;
876
877 case '^':
878 CHECK('=', XOR_EQ);
879 c = '^';
880 break;
881
882 case '|':
883 c = gc();
884 *p++ = c;
885 TEST('|', LOR);
886 TEST('=', OR_EQ);
887 --p; uc(c);
888 c = '|';
889 break;
890
891 case '0':
892 badoctal = FALSE;
893 c = gc();
894 if (c == 'x' || c == 'X') {
895 *p++ = c;
896 c = gc();
897 if (isxdigit(c)) {
898 do {
899 if (p < yyend) {
900 *p++ = c;
901 }
902 if (result > 0x0fffffffL) {
903 overflow = TRUE;
904 }
905 if (isdigit(c)) {
906 c -= '0';
907 } else {
908 c = toupper(c) + 10 - 'A';
909 }
910 result <<= 4;
911 result += c;
912 c = gc();
913 } while (isxdigit(c));
914 } else {
915 /* not a hexadecimal constant */
916 uc(c);
917 c = *--p;
918 }
919 yynumber = result;
920 } else {
921 while (c >= '0' && c <= '9') {
922 if (c >= '8') {
923 badoctal = TRUE;
924 }
925 if (p < yyend) {
926 *p++ = c;
927 }
928 if (result > 0x1fffffffL) {
929 overflow = TRUE;
930 }
931 result <<= 3;
932 result += c - '0';
933 c = gc();
934 }
935 yynumber = result;
936
937 if (c == '.') {
938 if (p < yyend) {
939 *p++ = c;
940 }
941 c = gc();
942 if (c != '.') {
943 goto fraction;
944 }
945 --p; uc(c);
946 } else if (c == 'e' || c == 'E') {
947 goto exponent;
948 }
949 }
950 uc(c);
951 if (pp_level == 0) {
952 if (p == yyend) {
953 error("too long integer constant");
954 } else if (badoctal) {
955 error("bad octal constant");
956 } else if (overflow) {
957 error("overflow in integer constant");
958 }
959 }
960 c = INT_CONST;
961 break;
962
963 case '1': case '2': case '3': case '4': case '5':
964 case '6': case '7': case '8': case '9':
965 for (;;) {
966 if (result >= 214748364L && (result > 214748364L || c >= '8')) {
967 overflow = TRUE;
968 }
969 result *= 10;
970 result += c - '0';
971 c = gc();
972 if (!isdigit(c)) {
973 break;
974 }
975 if (p < yyend) {
976 *p++ = c;
977 }
978 }
979 yynumber = result;
980
981 if (c == '.') {
982 if (p < yyend) {
983 *p++ = c;
984 }
985 c = gc();
986 if (c != '.') {
987 goto fraction;
988 }
989 --p; uc(c);
990 }
991 if (c == 'e' || c == 'E') {
992 goto exponent;
993 }
994 uc(c);
995 if (pp_level == 0) {
996 if (p == yyend) {
997 error("too long integer constant");
998 } else if (overflow) {
999 error("overflow in integer constant");
1000 }
1001 }
1002 c = INT_CONST;
1003 break;
1004
1005 case '_':
1006 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
1007 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
1008 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
1009 case 'v': case 'w': case 'x': case 'y': case 'z':
1010 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
1011 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
1012 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
1013 case 'V': case 'W': case 'X': case 'Y': case 'Z':
1014 for (;;) {
1015 c = gc();
1016 if (!isalnum(c) && c != '_') {
1017 break;
1018 }
1019 if (p < yyend) {
1020 *p++ = c;
1021 }
1022 }
1023 uc(c);
1024 if (pp_level == 0 && p == yyend) {
1025 error("too long identifier");
1026 }
1027 c = IDENTIFIER;
1028 break;
1029
1030 case '\'':
1031 c = gc();
1032 *p++ = c;
1033 if (c == '\'') {
1034 if (pp_level == 0) {
1035 error("too short character constant");
1036 }
1037 } else if (c == LF || c == EOF) {
1038 if (pp_level == 0) {
1039 error("unterminated character constant");
1040 }
1041 uc(c);
1042 } else {
1043 if (c == '\\') {
1044 p = tk_esc(p);
1045 } else {
1046 yynumber = c;
1047 }
1048 c = gc();
1049 *p++ = c;
1050 if (c != '\'') {
1051 if (pp_level == 0) {
1052 error("illegal character constant");
1053 }
1054 uc(c);
1055 }
1056 }
1057 c = INT_CONST;
1058 break;
1059
1060 case '"':
1061 seen_nl = FALSE;
1062 return tk_string('"');
1063 }
1064 *p = '\0';
1065 yyleng = p - yytext;
1066 seen_nl = FALSE;
1067
1068 return c;
1069 }
1070
1071 /*
1072 * NAME: token->skiptonl()
1073 * DESCRIPTION: skip tokens until a newline or EOF is found. If the argument is
1074 * TRUE, only whitespace is allowed.
1075 */
tk_skiptonl(int ws)1076 void tk_skiptonl(int ws)
1077 {
1078 pp_level++;
1079 for (;;) {
1080 switch (tk_gettok()) {
1081 case EOF:
1082 error("unterminated line");
1083 --pp_level;
1084 return;
1085
1086 case LF:
1087 --pp_level;
1088 return;
1089
1090 case ' ':
1091 case HT:
1092 break;
1093
1094 default:
1095 if (ws) {
1096 error("bad token in control");
1097 ws = FALSE;
1098 }
1099 break;
1100 }
1101 }
1102 }
1103
1104 /*
1105 * NAME: token->expand()
1106 * DESCRIPTION: expand a macro, pushing it on the input stream
1107 * return: -1 if the macro is nested and is not expanded
1108 * 0 if the macro is ftn-like and the call isn't
1109 * 1 if the macro was expanded
1110 */
tk_expand(macro * mc)1111 int tk_expand(macro *mc)
1112 {
1113 int token;
1114
1115 if (tbuffer != ibuffer) {
1116 tbuf *tb;
1117
1118 token = gc();
1119 if (token == LF) {
1120 return -1;
1121 }
1122 uc(token);
1123
1124 tb = tbuffer;
1125 do {
1126 if (tb->fd < -1 && tb->u.mc != (macro *) NULL &&
1127 strcmp(mc->chain.name, tb->u.mc->chain.name) == 0) {
1128 return -1;
1129 }
1130 tb = tb->prev;
1131 } while (tb != ibuffer);
1132 }
1133
1134 if (mc->narg >= 0) {
1135 char *args[MAX_NARG], *arg, ppbuf[MAX_REPL_SIZE];
1136 int narg;
1137 str *s;
1138 unsigned short startline, line;
1139 int errcount;
1140
1141 startline = ibuffer->line;
1142
1143 do {
1144 token = gc();
1145 if (token == '/') {
1146 token = gc();
1147 if (token == '*') {
1148 comment(FALSE);
1149 token = gc();
1150 # ifdef SLASHSLASH
1151 } else if (token == '/') {
1152 comment(TRUE);
1153 token = gc();
1154 # endif
1155 } else {
1156 uc(token);
1157 }
1158 break;
1159 }
1160 } while (token == ' ' || token == HT || token == LF);
1161
1162 if (token != '(') {
1163 /* macro is function-like, and this is not an invocation */
1164 uc(token);
1165 return 0;
1166 }
1167
1168 /* scan arguments */
1169 narg = 0;
1170 errcount = 0;
1171 pp_level++;
1172 s = pps_new(ppbuf, sizeof(ppbuf));
1173 do {
1174 token = tk_gettok();
1175 } while (token == ' ' || token == HT || token == LF);
1176
1177 if (token != ')' || mc->narg != 0) {
1178 int paren;
1179 bool seen_space, seen_sep;
1180
1181 paren = 0;
1182 seen_space = FALSE;
1183 seen_sep = FALSE;
1184
1185 for (;;) {
1186 if (token == EOF) { /* sigh */
1187 line = ibuffer->line;
1188 ibuffer->line = startline;
1189 error("EOF in macro call");
1190 ibuffer->line = line;
1191 errcount++;
1192 break;
1193 }
1194
1195 if ((token == ',' || token == ')') && paren == 0) {
1196 if (s->len < 0) {
1197 line = ibuffer->line;
1198 ibuffer->line = startline;
1199 error("macro argument too long");
1200 ibuffer->line = line;
1201 errcount++;
1202 } else if (narg < mc->narg) {
1203 arg = ALLOCA(char, s->len + 1);
1204 args[narg] = strcpy(arg, ppbuf);
1205 }
1206 narg++;
1207 if (token == ')') {
1208 break;
1209 }
1210
1211 s->len = 0;
1212
1213 do {
1214 token = tk_gettok();
1215 } while (token == ' ' || token == HT || token == LF);
1216 seen_space = FALSE;
1217 seen_sep = FALSE;
1218 } else {
1219 if (seen_space) {
1220 pps_ccat(s, ' ');
1221 seen_space = FALSE;
1222 seen_sep = FALSE;
1223 } else if (seen_sep) {
1224 pps_ccat(s, HT);
1225 seen_sep = FALSE;
1226 }
1227 pps_scat(s, yytext);
1228 if (token == '(') {
1229 paren++;
1230 } else if (token == ')') {
1231 --paren;
1232 }
1233
1234 for (;;) {
1235 token = tk_gettok();
1236 if (token == ' ' || token == LF) {
1237 seen_space = TRUE;
1238 } else if (token == HT) {
1239 seen_sep = TRUE;
1240 } else {
1241 break;
1242 }
1243 }
1244 }
1245 }
1246 }
1247 --pp_level;
1248
1249 if (errcount == 0 && narg != mc->narg) {
1250 error("macro argument count mismatch");
1251 errcount++;
1252 }
1253
1254 if (errcount > 0) {
1255 if (narg > mc->narg) {
1256 narg = mc->narg;
1257 }
1258 while (narg > 0) {
1259 --narg;
1260 AFREE(args[narg]);
1261 }
1262 pps_del(s);
1263 return 1; /* skip this macro */
1264 }
1265
1266 if (narg > 0) {
1267 push((macro *) NULL, mc->replace, strlen(mc->replace), TRUE);
1268 s->len = 0;
1269
1270 pp_level++;
1271 while ((token=tk_gettok()) != EOF) {
1272 if (token == MARK) { /* macro argument follows */
1273 token = gc();
1274 narg = token & MA_NARG;
1275 if (token & MA_STRING) {
1276 char *p;
1277
1278 /* copy it, inserting \ before \ and " */
1279 push((macro *) NULL, args[narg], strlen(args[narg]),
1280 TRUE);
1281 pps_ccat(s, '"');
1282 while ((token=tk_gettok()) != EOF) {
1283 if (token != HT) {
1284 p = yytext;
1285 if (*p == '\'' || *p == '"') {
1286 /* escape \ and " */
1287 do {
1288 if (*p == '"' || *p == '\\') {
1289 pps_ccat(s, '\\');
1290 }
1291 pps_ccat(s, *p++);
1292 } while (*p != '\0');
1293 } else {
1294 /* just add token */
1295 pps_scat(s, yytext);
1296 }
1297 }
1298 }
1299 pps_ccat(s, '"');
1300 pop();
1301 } else if (token & MA_NOEXPAND) {
1302
1303 /*
1304 * if the previous token was a not-to-expand macro,
1305 * make it a normal identifier
1306 */
1307 if (s->len > 0 && ppbuf[s->len - 1] == LF) {
1308 s->len--;
1309 }
1310
1311 push((macro *) NULL, args[narg], strlen(args[narg]),
1312 TRUE);
1313 token = tk_gettok();
1314 /*
1315 * if the first token of the argument is a
1316 * not-to-expand macro, make it a normal identifier
1317 */
1318 if (token == IDENTIFIER && (narg=gc()) != LF) {
1319 uc(narg);
1320 }
1321 while (token != EOF) {
1322 pps_scat(s, yytext);
1323 token = tk_gettok();
1324 }
1325 pop();
1326 } else {
1327
1328 /* preprocess the argument */
1329 push((macro *) NULL, args[narg], strlen(args[narg]),
1330 TRUE);
1331 while ((token=tk_gettok()) != EOF) {
1332 if (token == IDENTIFIER) {
1333 macro *m;
1334
1335 if ((m=mc_lookup(yytext)) != (macro *) NULL) {
1336 token = tk_expand(m);
1337 if (token > 0) {
1338 continue;
1339 }
1340 if (token < 0) {
1341 pps_scat(s, yytext);
1342 pps_ccat(s, LF);
1343 continue;
1344 }
1345 }
1346 }
1347 pps_scat(s, yytext);
1348 }
1349 pop();
1350 }
1351 } else {
1352 /* copy this token */
1353 pps_scat(s, yytext);
1354 }
1355 }
1356 --pp_level;
1357 pop();
1358
1359 /* cleanup */
1360 narg = mc->narg;
1361 do {
1362 --narg;
1363 AFREE(args[narg]);
1364 } while (narg > 0);
1365
1366 narg = s->len; /* so s can be deleted before the push */
1367 pps_del(s);
1368 if (narg < 0) {
1369 error("macro expansion too large");
1370 } else {
1371 push(mc, strcpy(ALLOC(char, narg + 1), ppbuf), narg, FALSE);
1372 }
1373 return 1;
1374 }
1375 }
1376
1377 /* manifest constant, or function-like macro without arguments */
1378 if (mc->replace != (char *) NULL) {
1379 push(mc, mc->replace, strlen(mc->replace), FALSE);
1380 } else {
1381 char *p;
1382
1383 p = special_replace(mc->chain.name);
1384 push(mc, p, strlen(p), FALSE);
1385 }
1386
1387 return 1;
1388 }
1389