1 /* @(#)clex.c	1.24 19/01/16 Copyright 1985, 1999-2019 J. Schilling */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static	UConst char sccsid[] =
5 	"@(#)clex.c	1.24 19/01/16 Copyright 1985, 1999-2019 J. Schilling";
6 #endif
7 /*
8  *	A program to produce a static calltree for C-functions
9  *
10  *	lexicalical section
11  *
12  *	Copyright (c) 1985, 1999-2019 J. Schilling
13  */
14 /*
15  * The contents of this file are subject to the terms of the
16  * Common Development and Distribution License, Version 1.0 only
17  * (the "License").  You may not use this file except in compliance
18  * with the License.
19  *
20  * See the file CDDL.Schily.txt in this distribution for details.
21  * A copy of the CDDL is also available via the Internet at
22  * http://www.opensource.org/licenses/cddl1.txt
23  *
24  * When distributing Covered Code, include this CDDL HEADER in each
25  * file and include the License file CDDL.Schily.txt from this distribution.
26  */
27 
28 #include <schily/stdio.h>
29 #include <schily/standard.h>
30 #include <schily/schily.h>
31 #include "sym.h"
32 #include "clex.h"
33 
34 extern BOOL debug;
35 
36 #define	LEXBSIZE	2048
37 #define	lexbend		&lexbuf[LEXBSIZE-1] /* no var to make it not trashable*/
38 
39 EXPORT	unsigned char	lexbuf[LEXBSIZE];
40 EXPORT	char	*lexfile;
41 EXPORT	int	lexline;
42 LOCAL	int	lnextc;
43 LOCAL	int	lextype;
44 
45 EXPORT	void	clexinit	__PR((void));
46 EXPORT	int	clex		__PR((FILE *fp));
47 LOCAL	void	sympanic	__PR((char *txt));
48 
49 /*
50  * Initialize lexter state.
51  */
52 EXPORT void
clexinit()53 clexinit()
54 {
55 	lnextc = ' ';
56 	lextype = T_NONE;
57 	lexfile = NULL;
58 	lexline = 1;
59 }
60 
61 /*
62  * Get next token from input.
63  */
64 EXPORT int
clex(fp)65 clex(fp)
66 	FILE	*fp;
67 {
68 	register unsigned char	*bp;
69 	register int		c;
70 
71 /* if (lnextc == '\n')*/
72 /*	lexline--;*/
73 
74 again:
75 	bp = lexbuf;
76 	*bp = c = lnextc;
77 	lextype = T_NONE;
78 
79 	for (; ; *++bp = c = getc(fp)) {
80 
81 /*		printf("<%c:%3o>:%s(%d)[%s]\n", *bp, *bp, lextnames[lextype], lextype, lexbuf);flush();*/
82 		if (bp >= lexbend)
83 			sympanic("in lexer");
84 		if (c == EOF) {
85 			if (lextype == T_NONE) {
86 				lextype = T_EOF;
87 			} else {
88 				break;
89 			}
90 
91 		} else switch (c) {
92 
93 		case  ' ': case  '\t': case  '\b':
94 		case  '\v': case  '\f':
95 		case  '\r': case  '\n':
96 			/*
97 			 * blank
98 			 */
99 			if (c == '\n')
100 				lexline++;
101 			if (lextype == T_NONE) {
102 				bp--;
103 			} else {
104 				if (c == '\n')
105 					lexline--;
106 				goto out;
107 			}
108 			break;
109 
110 		case '\'':
111 			/*
112 			 * a character
113 			 */
114 			if (lextype == T_NONE) {
115 				lextype = T_CHAR;
116 				if ((*++bp = getc(fp)) == '\\')
117 					*++bp = getc(fp);
118 				if (*bp == '\n')
119 					lexline++;
120 				if ((*++bp = getc(fp)) != '\'')
121 					lextype = T_ERROR;
122 				if (*bp == '\n')
123 					lexline++;
124 			} else {
125 				goto out;
126 			}
127 			break;
128 
129 		case '"':
130 			/*
131 			 * a string
132 			 */
133 			if (lextype == T_NONE) {
134 				lextype = T_STRING;
135 				while ((c = getc(fp)) != EOF) {
136 					*++bp = c;
137 					if (bp >= lexbend)
138 						sympanic("in string");
139 					if (c == '\n') {
140 						lexline++;
141 					} else if (c == '\\') {
142 						*++bp = getc(fp);
143 					} else if (c == '"') {
144 						*++bp = getc(fp);
145 						break;
146 					}
147 				}
148 			}
149 			goto out;
150 
151 		case '/':
152 			/*
153 			 * a divison or the start of a comment
154 			 */
155 			if (lextype == T_NONE) {
156 				lextype = T_OPER;
157 				if ((*++bp = getc(fp)) == '*') {
158 					lextype = T_COMMENT;
159 					while ((c = getc(fp)) != EOF) {
160 						*bp = c;
161 					comment:
162 						if (bp >= lexbend)
163 							sympanic("in comment");
164 						if (c == '\n') {
165 							lexline++;
166 						} else if (c == '*') {
167 							if ((*bp++ = getc(fp)) == '/') {
168 								break;
169 							} else {
170 								c = *--bp;
171 								goto comment;
172 							}
173 						}
174 					}
175 				} else {
176 					goto out;
177 				}
178 			} else {
179 				goto out;
180 			}
181 			break;
182 
183 		case ';':
184 			/*
185 			 * the end of a statement
186 			 */
187 			if (lextype == T_NONE)
188 				lextype = T_SEMI;
189 			else
190 				goto out;
191 			break;
192 
193 		case '(':
194 			/*
195 			 * This may be the start of an argument list...
196 			 */
197 			if (lextype == T_NONE)
198 				lextype = T_OPEN;
199 			else
200 				goto out;
201 			break;
202 
203 		case ')':
204 			/*
205 			 * This may be the end of an argument list...
206 			 */
207 			if (lextype == T_NONE)
208 				lextype = T_CLOSE;
209 			else
210 				goto out;
211 			break;
212 
213 		case ',':
214 			if (lextype == T_NONE)
215 				lextype = T_COMMA;
216 			else
217 				goto out;
218 			break;
219 
220 		case '{':
221 			/*
222 			 * Start of a block
223 			 */
224 			if (lextype == T_NONE)
225 				lextype = T_LCURLY;
226 			else
227 				goto out;
228 			break;
229 
230 		case '}':
231 			/*
232 			 * End of a block
233 			 */
234 			if (lextype == T_NONE)
235 				lextype = T_RCURLY;
236 			else
237 				goto out;
238 			break;
239 
240 		case '#':
241 			if (lextype == T_NONE) {
242 				lextype = T_HASH;
243 			} else {
244 				goto out;
245 			}
246 			break;
247 
248 		case '!': case '%': case '&': case '*': case '+': case '-':
249 		case '.': case ':': case '<': case '=': case '>': case '?':
250 		case '[': case '\\': case ']': case '^': case '|': case '~':
251 		case '`':			/* '`' is GCOS BCD constant */
252 			if (lextype == T_NONE) {
253 				lextype = T_OPER;
254 			} else {
255 				goto out;
256 			}
257 			break;
258 
259 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
260 		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
261 		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
262 		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
263 		case 'y': case 'z':
264 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
265 		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
266 		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
267 		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
268 		case 'Y': case 'Z':
269 		case '_':
270 		case '$':	/* Allow VMS style identifiers */
271 			/*
272 			 * alpha characters (may be an identifier)
273 			 */
274 			if (lextype == T_NONE)
275 				lextype = T_ALPHA;
276 			else if (lextype != T_ALPHA)
277 				goto out;
278 			break;
279 
280 		case '0': case '1': case '2': case '3': case '4':
281 		case '5': case '6': case '7': case '8': case '9':
282 			/*
283 			 * numeric characters
284 			 */
285 			if (lextype == T_NONE)
286 				lextype = T_NUMBER;
287 			else if ((lextype != T_NUMBER) && (lextype != T_ALPHA))
288 				goto out;
289 			break;
290 
291 		default:
292 /*			error("default: '%c'\n", c);*/
293 			/*
294 			 * anything else ...
295 			 */
296 			if (lextype == T_NONE)
297 				lextype = T_ERROR;
298 			else
299 				goto out;
300 			break;
301 		}
302 	}
303 out:
304 	if (c == EOF)
305 		lnextc = c;
306 	else
307 		lnextc = *bp;
308 	*bp = '\0';
309 	if (bp >= lexbend)
310 		sympanic("");
311 	if (lextype == T_ALPHA) {
312 		if (keyword((char *)lexbuf))
313 			lextype = T_KEYW;
314 	}
315 	if (lextype == T_COMMENT) {
316 		goto again;
317 	}
318 
319 	if (debug > 1) {
320 		error("%s:%d %s: %s\n",
321 				lexfile, lexline, lextnames[lextype], lexbuf);
322 	}
323 	return (lextype);
324 }
325 
326 LOCAL void
sympanic(txt)327 sympanic(txt)
328 	char	*txt;
329 {
330 	comerrno(EX_BAD, "PANIC: Symbol buffer too short %s line %d in file '%s'\n",
331 		txt,
332 		lexline,
333 		lexfile);
334 }
335 
336 char	*lextnames[] = {
337 	"NONE",
338 	"EOF",
339 	"ERROR",
340 	"HASH",
341 	"LCURLY",
342 	"RCURLY",
343 	"OPEN",
344 	"CLOSE",
345 	"SEMI",
346 	"COMMA",
347 	"OPER",
348 	"CHAR",
349 	"STRING",
350 	"ALPHA",
351 	"NUMBER",
352 	"COMMENT",
353 	"KEYW",
354 	NULL
355 };
356