1 /*
2  * This file is part of DGD, https://github.com/dworkin/dgd
3  * Copyright (C) 1993-2010 Dworkin B.V.
4  * Copyright (C) 2010,2012-2013 DGD Authors (see the commit log for details)
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Affero General Public License as
8  * published by the Free Software Foundation, either version 3 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Affero General Public License for more details.
15  *
16  * You should have received a copy of the GNU Affero General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 # define INCLUDE_FILE_IO
21 # define INCLUDE_CTYPE
22 # include "lex.h"
23 # include "path.h"
24 # include "macro.h"
25 # include "special.h"
26 # include "ppstr.h"
27 # include "token.h"
28 
29 /*
30  * The functions for getting a (possibly preprocessed) token from the input
31  * stream.
32  */
33 
34 # define TCHUNKSZ	8
35 
36 typedef struct _tbuf_ {
37     string **strs;		/* input buffer array */
38     int nstr;			/* number of input buffers */
39     char *buffer;		/* token buffer */
40     char *p;			/* token buffer pointer */
41     int inbuf;			/* # chars in token buffer */
42     char ubuf[4];		/* unget buffer */
43     char *up;			/* unget buffer pointer */
44     bool eof;			/* TRUE if empty(buffer) -> EOF */
45     unsigned short line;	/* line number */
46     int fd;			/* file descriptor */
47     union {
48 	char *filename;		/* file name */
49 	macro *mc;		/* macro this buffer is an expansion of */
50     } u;
51     struct _tbuf_ *prev;	/* previous token buffer */
52 } tbuf;
53 
54 typedef struct _tchunk_ {
55     struct _tchunk_ *next;	/* next in list */
56     tbuf t[TCHUNKSZ];		/* chunk of token buffers */
57 } tchunk;
58 
59 char *yytext;			/* for strings and identifiers */
60 static char *yytext1, *yytext2;	/* internal buffers */
61 static char *yyend;		/* end of current buffer */
62 int yyleng;			/* length of token */
63 long yynumber;			/* integer constant */
64 xfloat yyfloat;			/* floating point constant */
65 
66 static tchunk *tlist;		/* list of token buffer chunks */
67 static int tchunksz;		/* token buffer chunk size */
68 static tbuf *flist;		/* free token buffer list */
69 static tbuf *tbuffer;		/* current token buffer */
70 static tbuf *ibuffer;		/* current input buffer */
71 static int pp_level;		/* the recursive preprocesing level */
72 static bool do_include;		/* treat < and strings specially */
73 static bool seen_nl;		/* just seen a newline */
74 
75 /*
76  * NAME:	token->init()
77  * DESCRIPTION:	initialize the new token input buffer
78  */
tk_init()79 void tk_init()
80 {
81     yytext1 = ALLOC(char, MAX_LINE_SIZE);
82     yytext2 = ALLOC(char, MAX_LINE_SIZE);
83     tlist = (tchunk *) NULL;
84     tchunksz = TCHUNKSZ;
85     flist = (tbuf *) NULL;
86     tbuffer = (tbuf *) NULL;
87     ibuffer = (tbuf *) NULL;
88     pp_level = 0;
89     do_include = FALSE;
90 }
91 
92 /*
93  * NAME:	push()
94  * DESCRIPTION:	Push a buffer on the token input stream. If eof is false, then
95  *		the buffer will automatically be dropped when all is read.
96  */
push(macro * mc,char * buffer,unsigned int buflen,bool eof)97 static void push(macro *mc, char *buffer, unsigned int buflen, bool eof)
98 {
99     tbuf *tb;
100 
101     if (flist != (tbuf *) NULL) {
102 	/* from free list */
103 	tb = flist;
104 	flist = tb->prev;
105     } else {
106 	/* allocate new one */
107 	if (tchunksz == TCHUNKSZ) {
108 	    tchunk *l;
109 
110 	    l = ALLOC(tchunk, 1);
111 	    l->next = tlist;
112 	    tlist = l;
113 	    tchunksz = 0;
114 	}
115 	tb = &tlist->t[tchunksz++];
116     }
117     tb->strs = (string **) NULL;
118     tb->nstr = 0;
119     tb->p = tb->buffer = buffer;
120     tb->inbuf = buflen;
121     tb->up = tb->ubuf;
122     tb->eof = eof;
123     tb->fd = -2;
124     tb->u.mc = mc;
125     tb->prev = tbuffer;
126     tbuffer = tb;
127 }
128 
129 /*
130  * NAME:	pop()
131  * DESCRIPTION:	Drop the current token input buffer. If the associated macro
132  *		is function-like, the token buffer will have to be deallocated.
133  */
pop()134 static void pop()
135 {
136     tbuf *tb;
137 
138     tb = tbuffer;
139     if (tb->fd < -1) {
140 	if (tb->u.mc != (macro *) NULL) {
141 	    if (tb->u.mc->narg > 0) {
142 		/* in the buffer a function-like macro has been expanded */
143 		FREE(tb->buffer);
144 	    }
145 	}
146     } else {
147 	if (tb->fd >= 0) {
148 	    P_close(tb->fd);
149 	    FREE(tb->buffer);
150 	} else if (tb->prev != (tbuf *) NULL) {
151 	    str_del(tb->strs[0]);
152 	    FREE(tb->strs);
153 	}
154 	ibuffer = tbuffer->prev;
155 	FREE(tb->u.filename);
156     }
157     tbuffer = tb->prev;
158 
159     tb->prev = flist;
160     flist = tb;
161 }
162 
163 /*
164  * NAME:	token->clear()
165  * DESCRIPTION:	clear all of the token input buffers
166  */
tk_clear()167 void tk_clear()
168 {
169     tchunk *l, *f;
170 
171     while (tbuffer != (tbuf *) NULL) {
172 	pop();
173     }
174     for (l = tlist; l != (tchunk *) NULL; ) {
175 	f = l;
176 	l = l->next;
177 	FREE(f);
178     }
179     tlist = (tchunk *) NULL;
180     if (yytext1 != (char *) NULL) {
181 	FREE(yytext2);
182 	FREE(yytext1);
183 	yytext1 = (char *) NULL;
184 	yytext2 = (char *) NULL;
185     }
186 }
187 
188 /*
189  * NAME:	token->include()
190  * DESCRIPTION:	push a file on the input stream
191  */
tk_include(char * file,string ** strs,int nstr)192 bool tk_include(char *file, string **strs, int nstr)
193 {
194     int fd;
195     ssizet len;
196 
197     if (file != (char *) NULL) {
198 	if (strs == (string **) NULL) {
199 	    struct stat sbuf;
200 
201 	    /* read from file */
202 	    fd = P_open(file, O_RDONLY | O_BINARY, 0);
203 	    if (fd < 0) {
204 		return FALSE;
205 	    }
206 
207 	    P_fstat(fd, &sbuf);
208 	    if ((sbuf.st_mode & S_IFMT) != S_IFREG) {
209 		/* no source this */
210 		P_close(fd);
211 		return FALSE;
212 	    }
213 
214 	    push((macro *) NULL, ALLOC(char, BUF_SIZE), 0, TRUE);
215 	} else {
216 	    /* read from strings */
217 	    --strs;
218 	    push((macro *) NULL, strs[0]->text, strs[0]->len, TRUE);
219 	    tbuffer->strs = strs;
220 	    tbuffer->nstr = --nstr;
221 	    fd = -1;
222 	}
223 
224 	ibuffer = tbuffer;
225 	ibuffer->fd = fd;
226 	len = strlen(file);
227 	if (len >= STRINGSZ - 1) {
228 	    len = STRINGSZ - 2;
229 	}
230 	ibuffer->u.filename = ALLOC(char, len + 2);
231 	strncpy(ibuffer->u.filename + 1, file, len);
232 	ibuffer->u.filename[0] = '/';
233 	ibuffer->u.filename[len + 1] = '\0';
234 	ibuffer->line = 1;
235 	seen_nl = TRUE;
236 
237 	return TRUE;
238     }
239 
240     return FALSE;
241 }
242 
243 /*
244  * NAME:	token->endinclude()
245  * DESCRIPTION:	end an #inclusion
246  */
tk_endinclude()247 void tk_endinclude()
248 {
249     pop();
250     seen_nl = TRUE;
251 }
252 
253 /*
254  * NAME:	token->line()
255  * DESCRIPTION:	return the current line number (possibly adjusted)
256  */
tk_line()257 unsigned short tk_line()
258 {
259     return ibuffer->line - (unsigned short) seen_nl;
260 }
261 
262 /*
263  * NAME:	token->filename()
264  * DESCRIPTION:	return the current file name
265  */
tk_filename()266 char *tk_filename()
267 {
268     return ibuffer->u.filename;
269 }
270 
271 /*
272  * NAME:	token->setline()
273  * DESCRIPTION:	set the current line number
274  */
tk_setline(unsigned short line)275 void tk_setline(unsigned short line)
276 {
277     ibuffer->line = line;
278 }
279 
280 /*
281  * NAME:	token->setfilename()
282  * DESCRIPTION:	set the current file name
283  */
tk_setfilename(char * file)284 void tk_setfilename(char *file)
285 {
286     unsigned int len;
287 
288     len = strlen(file);
289     if (len >= STRINGSZ) {
290 	len = STRINGSZ - 1;
291     }
292     ibuffer->u.filename = memcpy(REALLOC(ibuffer->u.filename, char, 0, len + 1),
293 				 file, len);
294     ibuffer->u.filename[len] = '\0';
295 }
296 
297 /*
298  * NAME:	token->header()
299  * DESCRIPTION:	set the current include string mode. if TRUE, '<' will be
300  *		specially processed.
301  */
tk_header(int incl)302 void tk_header(int incl)
303 {
304     do_include = incl;
305 }
306 
307 /*
308  * NAME:	token->setpp()
309  * DESCRIPTION:	if the argument is true, do not translate escape sequences in
310  *		strings, and don't report errors.
311  */
tk_setpp(int pp)312 void tk_setpp(int pp)
313 {
314     pp_level = (int) pp;
315 }
316 
317 # define uc(c)	{ \
318 		    if ((c) != EOF) { \
319 			if ((c) == LF && tbuffer == ibuffer) ibuffer->line--; \
320 			*(tbuffer->up)++ = (c); \
321 		    } \
322 		}
323 
324 /*
325  * NAME:	gc()
326  * DESCRIPTION:	get a character from the input
327  */
gc()328 static int gc()
329 {
330     tbuf *tb;
331     int c;
332     bool backslash;
333 
334     tb = tbuffer;
335     backslash = FALSE;
336 
337     for (;;) {
338 	if (tb->up != tb->ubuf) {
339 	    /* get a character from unget buffer */
340 	    c = UCHAR(*--(tb->up));
341 	} else {
342 	    if (tb->inbuf <= 0) {
343 		/* Current input buffer is empty. Try a refill. */
344 		if (tb->fd >= 0 &&
345 		    (tb->inbuf = P_read(tb->fd, tb->buffer, BUF_SIZE)) > 0) {
346 		    tb->p = tb->buffer;
347 		} else if (backslash) {
348 		    return '\\';
349 		} else if (tb->nstr != 0) {
350 		    if (tb->prev != (tbuf *) NULL) {
351 			str_del(tb->strs[0]);
352 		    }
353 		    --(tb->strs);
354 		    --(tb->nstr);
355 		    tb->p = tb->buffer = tb->strs[0]->text;
356 		    tb->inbuf = tb->strs[0]->len;
357 		    continue;
358 		} else if (tb->eof) {
359 		    return EOF;
360 		} else {
361 		    /* otherwise, pop the current token input buffer */
362 		    pop();
363 		    tb = tbuffer;
364 		    continue;
365 		}
366 	    }
367 	    tb->inbuf--;
368 	    c = UCHAR(*(tb->p)++);
369 	}
370 
371 	if (c == LF && tb == ibuffer) {
372 	    ibuffer->line++;
373 	    if (!backslash) {
374 		return c;
375 	    }
376 	    backslash = FALSE;
377 	} else if (backslash) {
378 	    uc(c);
379 	    return '\\';
380 	} else if (c == '\\' && tb == ibuffer) {
381 	    backslash = TRUE;
382 	} else {
383 	    return c;
384 	}
385     }
386 }
387 
388 /*
389  * NAME:	skip_comment()
390  * DESCRIPTION: skip a single comment
391  */
skip_comment()392 static void skip_comment()
393 {
394     int c;
395 
396     do {
397 	do {
398 	    c = gc();
399 	    if (c == EOF) {
400 		error("EOF in comment");
401 		return;
402 	    }
403 	} while (c != '*');
404 
405 	do {
406 	    c = gc();
407 	} while (c == '*');
408     } while (c != '/');
409 }
410 
411 /*
412  * NAME:	skip_alt_comment()
413  * DESCRIPTION: skip c++ style comment
414  */
skip_alt_comment()415 static void skip_alt_comment()
416 {
417     int c;
418 
419     do {
420 	c = gc();
421 	if (c == EOF) {
422 	    return;
423 	}
424     } while (c != LF);
425     uc(c);
426 }
427 
428 /*
429  * NAME:	comment()
430  * DESCRIPTION:	skip comments and white space
431  */
comment(bool flag)432 static void comment(bool flag)
433 {
434     int c;
435 
436     for (;;) {
437 	/* first skip the current comment */
438 	if (flag) {
439 	   skip_alt_comment();
440 	} else {
441 	   skip_comment();
442 	}
443 
444 	/* skip any whitespace */
445 	do {
446 	    c = gc();
447 	} while (c == ' ' || c == HT || c == VT || c == FF || c == CR);
448 
449 	/* check if a new comment follows */
450 	if (c != '/') {
451 	    uc(c);
452 	    break;
453 	}
454 	c = gc();
455 	if (c == '*') {
456 	    flag = FALSE;
457 # ifdef SLASHSLASH
458 	} else if (c == '/') {
459 	    flag = TRUE;
460 # endif
461 	} else {
462 	    uc(c);
463 	    c = '/';
464 	    uc(c);
465 	    break;
466 	}
467     }
468 }
469 
470 /*
471  * NAME:	token->esc()
472  * DESCRIPTION:	handle an escaped character, leaving the value in yynumber
473  */
tk_esc(char * p)474 static char *tk_esc(char *p)
475 {
476     int c, i, n;
477 
478     switch (c = *p++ = gc()) {
479     case 'a': c = BEL; break;
480     case 'b': c = BS; break;
481     case 't': c = HT; break;
482     case 'n': c = LF; break;
483     case 'v': c = VT; break;
484     case 'f': c = FF; break;
485     case 'r': c = CR; break;
486 
487     case LF:
488 	/* newline in string or character constant */
489 	uc(c);
490 	return p - 1;
491 
492     case '0': case '1': case '2': case '3':
493     case '4': case '5': case '6': case '7':
494 	/* octal constant */
495 	i = 0;
496 	n = 3;
497 	--p;
498 	do {
499 	    *p++ = c;
500 	    i <<= 3;
501 	    i += c - '0';
502 	    c = gc();
503 	} while (--n > 0 && c >= '0' && c <= '7');
504 	uc(c);
505 	c = UCHAR(i);
506 	break;
507 
508     case 'x':
509 	/* hexadecimal constant */
510 	c = gc();
511 	if (isxdigit(c)) {
512 	    i = 0;
513 	    n = 3;
514 	    do {
515 		*p++ = c;
516 		i <<= 4;
517 		if (isdigit(c)) {
518 		    i += c - '0';
519 		} else {
520 		    i += toupper(c) + 10 - 'A';
521 		}
522 		c = gc();
523 	    } while (--n > 0 && isxdigit(c));
524 	} else {
525 	    i = 'x';
526 	}
527 	uc(c);
528 	c = UCHAR(i);
529 	break;
530     }
531 
532     yynumber = c;
533     return p;
534 }
535 
536 /*
537  * NAME:	token->string()
538  * DESCRIPTION:	handle a string. If pp_level > 0, don't translate escape
539  *		sequences.
540  */
tk_string(char quote)541 static int tk_string(char quote)
542 {
543     char *p;
544     int c, n;
545 
546     p = yytext;
547     if (pp_level > 0) {
548 	/* keep the quotes if not on top level */
549 	p++;
550 	n = 0;
551     } else {
552 	n = 2;
553     }
554 
555     for (;;) {
556 	c = gc();
557 	if (c == quote) {
558 	    if (pp_level > 0) {
559 		/* keep the quotes if not on top level */
560 		*p++ = c;
561 	    }
562 	    break;
563 	} else if (c == '\\') {
564 	    if (pp_level > 0 || do_include) {
565 		/* recognize, but do not translate escape sequence */
566 		*p++ = c;
567 		p = tk_esc(p);
568 		c = *--p;
569 	    } else {
570 		/* translate escape sequence */
571 		n += tk_esc(p) - p;
572 		c = yynumber;
573 	    }
574 	} else if (c == LF || c == EOF) {
575 	    if (pp_level == 0) {
576 		error("unterminated string");
577 	    }
578 	    uc(c);
579 	    break;
580 	}
581 	*p++ = c;
582 	if (p > yyend - 4) {
583 	    n += p - (yyend - 4);
584 	    p = yyend - 4;
585 	}
586     }
587 
588     if (pp_level == 0 && p + n > yyend - 4) {
589 	error("string too long");
590     }
591     *p = '\0';
592     yyleng = p - yytext;
593     return (quote == '>') ? INCL_CONST : STRING_CONST;
594 }
595 
596 /*
597  * NAME:	token->gettok()
598  * DESCRIPTION:	get a token from the input stream.
599  */
tk_gettok()600 int tk_gettok()
601 {
602     int c;
603     long result;
604     char *p;
605     bool overflow;
606     bool is_float, badoctal;
607 
608 # define TEST(x, tok)	if (c == x) { c = tok; break; }
609 # define CHECK(x, tok)	c = gc(); *p++ = c; TEST(x, tok); --p; uc(c)
610 
611     result = 0;
612     overflow = FALSE;
613     is_float = FALSE;
614     yytext = (yytext == yytext1) ? yytext2 : yytext1;
615     yyend = yytext + MAX_LINE_SIZE - 1;
616     p = yytext;
617     c = gc();
618     *p++ = c;
619     switch (c) {
620     case LF:
621 	if (tbuffer == ibuffer) {
622 	    seen_nl = TRUE;
623 	    *p = '\0';
624 	    return c;
625 	}
626 	c = (pp_level > 0) ? MARK : ' ';
627 	break;
628 
629     case HT:
630 	if (tbuffer != ibuffer) {
631 	    /* expanding a macro: keep separator */
632 	    break;
633 	}
634 	/* fall through */
635     case ' ':
636     case VT:
637     case FF:
638     case CR:
639 	/* white space */
640 	do {
641 	    c = gc();
642 	} while (c == ' ' || (c == HT && tbuffer == ibuffer) || c == VT ||
643 		 c == FF || c == CR);
644 
645 	/* check for comment after white space */
646 	if (c == '/') {
647 	    c = gc();
648 	    if (c == '*') {
649 		comment(FALSE);
650 # ifdef SLASHSLASH
651 	    } else if (c == '/') {
652 		comment(TRUE);
653 # endif
654 	    } else {
655 		uc(c);
656 		c = '/';
657 		uc(c);
658 	    }
659 	} else {
660 	    uc(c);
661 	}
662 	yyleng = 1;
663 	*p = '\0';
664 	return p[-1] = ' ';
665 
666     case '!':
667 	CHECK('=', NE);
668 	c = '!';
669 	break;
670 
671     case '#':
672 	if (!seen_nl) {
673 	    CHECK('#', HASH_HASH);
674 	    c = HASH;
675 	}
676 	break;
677 
678     case '%':
679 	CHECK('=', MOD_EQ);
680 	c = '%';
681 	break;
682 
683     case '&':
684 	c = gc();
685 	*p++ = c;
686 	TEST('&', LAND);
687 	TEST('=', AND_EQ);
688 	--p; uc(c);
689 	c = '&';
690 	break;
691 
692     case '*':
693 	CHECK('=', MULT_EQ);
694 	c = '*';
695 	break;
696 
697     case '+':
698 	c = gc();
699 	*p++ = c;
700 	TEST('+', PLUS_PLUS);
701 	TEST('=', PLUS_EQ);
702 	--p; uc(c);
703 	c = '+';
704 	break;
705 
706     case '-':
707 	c = gc();
708 	*p++ = c;
709 	TEST('>', RARROW);
710 	TEST('-', MIN_MIN);
711 	TEST('=', MIN_EQ);
712 	--p; uc(c);
713 	c = '-';
714 	break;
715 
716     case '.':
717 	c = gc();
718 	if (isdigit(c)) {
719 	    /*
720 	     * Come here when a decimal '.' has been spotted; c holds the next
721 	     * character.
722 	     */
723 	fraction:
724 	    is_float = TRUE;
725 	    while (isdigit(c)) {
726 		if (p < yyend) {
727 		    *p++ = c;
728 		}
729 		c = gc();
730 	    }
731 	    if (c == 'e' || c == 'E') {
732 		char *q, exp;
733 		int sign;
734 
735 		/*
736 		 * Come here when 'e' or 'E' has been spotted after a number.
737 		 */
738 	    exponent:
739 		exp = c;
740 		sign = 0;
741 		q = p;
742 		if (p < yyend) {
743 		    *p++ = c;
744 		}
745 		c = gc();
746 		if (c == '+' || c == '-') {
747 		    if (p < yyend) {
748 			*p++ = c;
749 		    }
750 		    sign = c;
751 		    c = gc();
752 		}
753 		if (isdigit(c)) {
754 		    do {
755 			if (p < yyend) {
756 			    *p++ = c;
757 			}
758 			c = gc();
759 		    } while (isdigit(c));
760 		    is_float = TRUE;
761 		} else {
762 		    /*
763 		     * assume the e isn't part of this token
764 		     */
765 		    uc(c);
766 		    if (sign != 0) {
767 			uc(sign);
768 		    }
769 		    c = exp;
770 		    p = q;
771 		}
772 	    }
773 	    uc(c);
774 
775 	    if (is_float) {
776 		yyfloat.high = 0;
777 		yyfloat.low = 0;
778 		if (pp_level == 0) {
779 		    if (p == yyend) {
780 			error("too long floating point constant");
781 		    } else {
782 			char *buf;
783 
784 			*p = '\0';
785 			buf = yytext;
786 			if (!flt_atof(&buf, &yyfloat)) {
787 			    error("overflow in floating point constant");
788 			}
789 		    }
790 		}
791 		c = FLOAT_CONST;
792 	    } else {
793 		if (pp_level == 0) {
794 		    /* unclear if this was decimal or octal */
795 		    if (p == yyend) {
796 			error("too long integer constant");
797 		    } else if (overflow) {
798 			error("overflow in integer constant");
799 		    }
800 		}
801 		c = INT_CONST;
802 	    }
803 	    break;
804 	} else if (c == '.') {
805 	    *p++ = c;
806 	    CHECK('.', ELLIPSIS);
807 	    c = DOT_DOT;
808 	} else {
809 	    uc(c);
810 	    c = '.';
811 	}
812 	break;
813 
814     case '/':
815 	c = gc();
816 	if (c == '*') {
817 	    comment(FALSE);
818 	    yyleng = 1;
819 	    *p = '\0';
820 	    return p[-1] = ' ';
821 # ifdef SLASHSLASH
822 	} else if (c == '/') {
823 	    comment(TRUE);
824 	    yyleng = 1;
825 	    *p = '\0';
826 	    return p[-1] = ' ';
827 # endif
828 	}
829 	*p++ = c;
830 	TEST('=', DIV_EQ);
831 	--p; uc(c);
832 	c = '/';
833 	break;
834 
835     case ':':
836 	CHECK(':', COLON_COLON);
837 	c = ':';
838 	break;
839 
840     case '<':
841 	if (do_include) {
842 	    /* #include <header> */
843 	    seen_nl = FALSE;
844 	    return tk_string('>');
845 	}
846 	c = gc();
847 	*p++ = c;
848 	TEST('=', LE);
849 	TEST('-', LARROW);
850 	if (c == '<') {
851 	    CHECK('=', LSHIFT_EQ);
852 	    c = LSHIFT;
853 	    break;
854 	}
855 	--p; uc(c);
856 	c = '<';
857 	break;
858 
859     case '=':
860 	CHECK('=', EQ);
861 	c = '=';
862 	break;
863 
864     case '>':
865 	c = gc();
866 	*p++ = c;
867 	TEST('=', GE);
868 	if (c == '>') {
869 	    CHECK('=', RSHIFT_EQ);
870 	    c = RSHIFT;
871 	    break;
872 	}
873 	--p; uc(c);
874 	c = '>';
875 	break;
876 
877     case '^':
878 	CHECK('=', XOR_EQ);
879 	c = '^';
880 	break;
881 
882     case '|':
883 	c = gc();
884 	*p++ = c;
885 	TEST('|', LOR);
886 	TEST('=', OR_EQ);
887 	--p; uc(c);
888 	c = '|';
889 	break;
890 
891     case '0':
892 	badoctal = FALSE;
893 	c = gc();
894 	if (c == 'x' || c == 'X') {
895 	    *p++ = c;
896 	    c = gc();
897 	    if (isxdigit(c)) {
898 		do {
899 		    if (p < yyend) {
900 			*p++ = c;
901 		    }
902 		    if (result > 0x0fffffffL) {
903 			overflow = TRUE;
904 		    }
905 		    if (isdigit(c)) {
906 			c -= '0';
907 		    } else {
908 			c = toupper(c) + 10 - 'A';
909 		    }
910 		    result <<= 4;
911 		    result += c;
912 		    c = gc();
913 		} while (isxdigit(c));
914 	    } else {
915 		/* not a hexadecimal constant */
916 		uc(c);
917 		c = *--p;
918 	    }
919 	    yynumber = result;
920 	} else {
921 	    while (c >= '0' && c <= '9') {
922 		if (c >= '8') {
923 		    badoctal = TRUE;
924 		}
925 		if (p < yyend) {
926 		    *p++ = c;
927 		}
928 		if (result > 0x1fffffffL) {
929 		    overflow = TRUE;
930 		}
931 		result <<= 3;
932 		result += c - '0';
933 		c = gc();
934 	    }
935 	    yynumber = result;
936 
937 	    if (c == '.') {
938 		if (p < yyend) {
939 		    *p++ = c;
940 		}
941 		c = gc();
942 		if (c != '.') {
943 		    goto fraction;
944 		}
945 		--p; uc(c);
946 	    } else if (c == 'e' || c == 'E') {
947 		goto exponent;
948 	    }
949 	}
950 	uc(c);
951 	if (pp_level == 0) {
952 	    if (p == yyend) {
953 		error("too long integer constant");
954 	    } else if (badoctal) {
955 		error("bad octal constant");
956 	    } else if (overflow) {
957 		error("overflow in integer constant");
958 	    }
959 	}
960 	c = INT_CONST;
961 	break;
962 
963     case '1': case '2': case '3': case '4': case '5':
964     case '6': case '7': case '8': case '9':
965 	for (;;) {
966 	    if (result >= 214748364L && (result > 214748364L || c >= '8')) {
967 		overflow = TRUE;
968 	    }
969 	    result *= 10;
970 	    result += c - '0';
971 	    c = gc();
972 	    if (!isdigit(c)) {
973 		break;
974 	    }
975 	    if (p < yyend) {
976 		*p++ = c;
977 	    }
978 	}
979 	yynumber = result;
980 
981 	if (c == '.') {
982 	    if (p < yyend) {
983 		*p++ = c;
984 	    }
985 	    c = gc();
986 	    if (c != '.') {
987 		goto fraction;
988 	    }
989 	    --p; uc(c);
990 	}
991 	if (c == 'e' || c == 'E') {
992 	    goto exponent;
993 	}
994 	uc(c);
995 	if (pp_level == 0) {
996 	    if (p == yyend) {
997 		error("too long integer constant");
998 	    } else if (overflow) {
999 		error("overflow in integer constant");
1000 	    }
1001 	}
1002 	c = INT_CONST;
1003 	break;
1004 
1005     case '_':
1006     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
1007     case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
1008     case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
1009     case 'v': case 'w': case 'x': case 'y': case 'z':
1010     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
1011     case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
1012     case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
1013     case 'V': case 'W': case 'X': case 'Y': case 'Z':
1014 	for (;;) {
1015 	    c = gc();
1016 	    if (!isalnum(c) && c != '_') {
1017 		break;
1018 	    }
1019 	    if (p < yyend) {
1020 		*p++ = c;
1021 	    }
1022 	}
1023 	uc(c);
1024 	if (pp_level == 0 && p == yyend) {
1025 	    error("too long identifier");
1026 	}
1027 	c = IDENTIFIER;
1028 	break;
1029 
1030     case '\'':
1031 	c = gc();
1032 	*p++ = c;
1033 	if (c == '\'') {
1034 	    if (pp_level == 0) {
1035 		error("too short character constant");
1036 	    }
1037 	} else if (c == LF || c == EOF) {
1038 	    if (pp_level == 0) {
1039 		error("unterminated character constant");
1040 	    }
1041 	    uc(c);
1042 	} else {
1043 	    if (c == '\\') {
1044 		p = tk_esc(p);
1045 	    } else {
1046 		yynumber = c;
1047 	    }
1048 	    c = gc();
1049 	    *p++ = c;
1050 	    if (c != '\'') {
1051 		if (pp_level == 0) {
1052 		    error("illegal character constant");
1053 		}
1054 		uc(c);
1055 	    }
1056 	}
1057 	c = INT_CONST;
1058 	break;
1059 
1060     case '"':
1061 	seen_nl = FALSE;
1062 	return tk_string('"');
1063     }
1064     *p = '\0';
1065     yyleng = p - yytext;
1066     seen_nl = FALSE;
1067 
1068     return c;
1069 }
1070 
1071 /*
1072  * NAME:	token->skiptonl()
1073  * DESCRIPTION:	skip tokens until a newline or EOF is found. If the argument is
1074  *		TRUE, only whitespace is allowed.
1075  */
tk_skiptonl(int ws)1076 void tk_skiptonl(int ws)
1077 {
1078     pp_level++;
1079     for (;;) {
1080 	switch (tk_gettok()) {
1081 	case EOF:
1082 	    error("unterminated line");
1083 	    --pp_level;
1084 	    return;
1085 
1086 	case LF:
1087 	    --pp_level;
1088 	    return;
1089 
1090 	case ' ':
1091 	case HT:
1092 	    break;
1093 
1094 	default:
1095 	    if (ws) {
1096 		error("bad token in control");
1097 		ws = FALSE;
1098 	    }
1099 	    break;
1100 	}
1101     }
1102 }
1103 
1104 /*
1105  * NAME:	token->expand()
1106  * DESCRIPTION:	expand a macro, pushing it on the input stream
1107  *		return: -1 if the macro is nested and is not expanded
1108  *			0 if the macro is ftn-like and the call isn't
1109  *			1 if the macro was expanded
1110  */
tk_expand(macro * mc)1111 int tk_expand(macro *mc)
1112 {
1113     int token;
1114 
1115     if (tbuffer != ibuffer) {
1116 	tbuf *tb;
1117 
1118 	token = gc();
1119 	if (token == LF) {
1120 	    return -1;
1121 	}
1122 	uc(token);
1123 
1124 	tb = tbuffer;
1125 	do {
1126 	    if (tb->fd < -1 && tb->u.mc != (macro *) NULL &&
1127 	      strcmp(mc->chain.name, tb->u.mc->chain.name) == 0) {
1128 		return -1;
1129 	    }
1130 	    tb = tb->prev;
1131 	} while (tb != ibuffer);
1132     }
1133 
1134     if (mc->narg >= 0) {
1135 	char *args[MAX_NARG], *arg, ppbuf[MAX_REPL_SIZE];
1136 	int narg;
1137 	str *s;
1138 	unsigned short startline, line;
1139 	int errcount;
1140 
1141 	startline = ibuffer->line;
1142 
1143 	do {
1144 	    token = gc();
1145 	    if (token == '/') {
1146 		token = gc();
1147 		if (token == '*') {
1148 		    comment(FALSE);
1149 		    token = gc();
1150 # ifdef SLASHSLASH
1151 		} else if (token == '/') {
1152 		    comment(TRUE);
1153 		    token = gc();
1154 # endif
1155 		} else {
1156 		    uc(token);
1157 		}
1158 		break;
1159 	    }
1160 	} while (token == ' ' || token == HT || token == LF);
1161 
1162 	if (token != '(') {
1163 	    /* macro is function-like, and this is not an invocation */
1164 	    uc(token);
1165 	    return 0;
1166 	}
1167 
1168 	/* scan arguments */
1169 	narg = 0;
1170 	errcount = 0;
1171 	pp_level++;
1172 	s = pps_new(ppbuf, sizeof(ppbuf));
1173 	do {
1174 	    token = tk_gettok();
1175 	} while (token == ' ' || token == HT || token == LF);
1176 
1177 	if (token != ')' || mc->narg != 0) {
1178 	    int paren;
1179 	    bool seen_space, seen_sep;
1180 
1181 	    paren = 0;
1182 	    seen_space = FALSE;
1183 	    seen_sep = FALSE;
1184 
1185 	    for (;;) {
1186 		if (token == EOF) {	/* sigh */
1187 		    line = ibuffer->line;
1188 		    ibuffer->line = startline;
1189 		    error("EOF in macro call");
1190 		    ibuffer->line = line;
1191 		    errcount++;
1192 		    break;
1193 		}
1194 
1195 		if ((token == ',' || token == ')') && paren == 0) {
1196 		    if (s->len < 0) {
1197 			line = ibuffer->line;
1198 			ibuffer->line = startline;
1199 			error("macro argument too long");
1200 			ibuffer->line = line;
1201 			errcount++;
1202 		    } else if (narg < mc->narg) {
1203 			arg = ALLOCA(char, s->len + 1);
1204 			args[narg] = strcpy(arg, ppbuf);
1205 		    }
1206 		    narg++;
1207 		    if (token == ')') {
1208 			break;
1209 		    }
1210 
1211 		    s->len = 0;
1212 
1213 		    do {
1214 			token = tk_gettok();
1215 		    } while (token == ' ' || token == HT || token == LF);
1216 		    seen_space = FALSE;
1217 		    seen_sep = FALSE;
1218 		} else {
1219 		    if (seen_space) {
1220 			pps_ccat(s, ' ');
1221 			seen_space = FALSE;
1222 			seen_sep = FALSE;
1223 		    } else if (seen_sep) {
1224 			pps_ccat(s, HT);
1225 			seen_sep = FALSE;
1226 		    }
1227 		    pps_scat(s, yytext);
1228 		    if (token == '(') {
1229 			paren++;
1230 		    } else if (token == ')') {
1231 			--paren;
1232 		    }
1233 
1234 		    for (;;) {
1235 			token = tk_gettok();
1236 			if (token == ' ' || token == LF) {
1237 			    seen_space = TRUE;
1238 			} else if (token == HT) {
1239 			    seen_sep = TRUE;
1240 			} else {
1241 			    break;
1242 			}
1243 		    }
1244 		}
1245 	    }
1246 	}
1247 	--pp_level;
1248 
1249 	if (errcount == 0 && narg != mc->narg) {
1250 	    error("macro argument count mismatch");
1251 	    errcount++;
1252 	}
1253 
1254 	if (errcount > 0) {
1255 	    if (narg > mc->narg) {
1256 		narg = mc->narg;
1257 	    }
1258 	    while (narg > 0) {
1259 		--narg;
1260 		AFREE(args[narg]);
1261 	    }
1262 	    pps_del(s);
1263 	    return 1;	/* skip this macro */
1264 	}
1265 
1266 	if (narg > 0) {
1267 	    push((macro *) NULL, mc->replace, strlen(mc->replace), TRUE);
1268 	    s->len = 0;
1269 
1270 	    pp_level++;
1271 	    while ((token=tk_gettok()) != EOF) {
1272 		if (token == MARK) {	/* macro argument follows */
1273 		    token = gc();
1274 		    narg = token & MA_NARG;
1275 		    if (token & MA_STRING) {
1276 			char *p;
1277 
1278 			/* copy it, inserting \ before \ and " */
1279 			push((macro *) NULL, args[narg], strlen(args[narg]),
1280 			     TRUE);
1281 			pps_ccat(s, '"');
1282 			while ((token=tk_gettok()) != EOF) {
1283 			    if (token != HT) {
1284 				p = yytext;
1285 				if (*p == '\'' || *p == '"') {
1286 				    /* escape \ and " */
1287 				    do {
1288 					if (*p == '"' || *p == '\\') {
1289 					    pps_ccat(s, '\\');
1290 					}
1291 					pps_ccat(s, *p++);
1292 				    } while (*p != '\0');
1293 				} else {
1294 				    /* just add token */
1295 				    pps_scat(s, yytext);
1296 				}
1297 			    }
1298 			}
1299 			pps_ccat(s, '"');
1300 			pop();
1301 		    } else if (token & MA_NOEXPAND) {
1302 
1303 			/*
1304 			 * if the previous token was a not-to-expand macro,
1305 			 * make it a normal identifier
1306 			 */
1307 			if (s->len > 0 && ppbuf[s->len - 1] == LF) {
1308 			    s->len--;
1309 			}
1310 
1311 			push((macro *) NULL, args[narg], strlen(args[narg]),
1312 			     TRUE);
1313 			token = tk_gettok();
1314 			/*
1315 			 * if the first token of the argument is a
1316 			 * not-to-expand macro, make it a normal identifier
1317 			 */
1318 			if (token == IDENTIFIER && (narg=gc()) != LF) {
1319 			    uc(narg);
1320 			}
1321 			while (token != EOF) {
1322 			    pps_scat(s, yytext);
1323 			    token = tk_gettok();
1324 			}
1325 			pop();
1326 		    } else {
1327 
1328 			/* preprocess the argument */
1329 			push((macro *) NULL, args[narg], strlen(args[narg]),
1330 			     TRUE);
1331 			while ((token=tk_gettok()) != EOF) {
1332 			    if (token == IDENTIFIER) {
1333 				macro *m;
1334 
1335 				if ((m=mc_lookup(yytext)) != (macro *) NULL) {
1336 				    token = tk_expand(m);
1337 				    if (token > 0) {
1338 					continue;
1339 				    }
1340 				    if (token < 0) {
1341 					pps_scat(s, yytext);
1342 					pps_ccat(s, LF);
1343 					continue;
1344 				    }
1345 				}
1346 			    }
1347 			    pps_scat(s, yytext);
1348 			}
1349 			pop();
1350 		    }
1351 		} else {
1352 		    /* copy this token */
1353 		    pps_scat(s, yytext);
1354 		}
1355 	    }
1356 	    --pp_level;
1357 	    pop();
1358 
1359 	    /* cleanup */
1360 	    narg = mc->narg;
1361 	    do {
1362 		--narg;
1363 		AFREE(args[narg]);
1364 	    } while (narg > 0);
1365 
1366 	    narg = s->len;	/* so s can be deleted before the push */
1367 	    pps_del(s);
1368 	    if (narg < 0) {
1369 		error("macro expansion too large");
1370 	    } else {
1371 		push(mc, strcpy(ALLOC(char, narg + 1), ppbuf), narg, FALSE);
1372 	    }
1373 	    return 1;
1374 	}
1375     }
1376 
1377     /* manifest constant, or function-like macro without arguments */
1378     if (mc->replace != (char *) NULL) {
1379 	push(mc, mc->replace, strlen(mc->replace), FALSE);
1380     } else {
1381 	char *p;
1382 
1383 	p = special_replace(mc->chain.name);
1384 	push(mc, p, strlen(p), FALSE);
1385     }
1386 
1387     return 1;
1388 }
1389