1 /*
2  * Copyright (c) 1998, 1999, 2000, 2002, 2003, 2008
3  *	Tama Communications Corporation
4  *
5  * This file is part of GNU GLOBAL.
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 #include <ctype.h>
25 #include <stdio.h>
26 #ifdef HAVE_STRING_H
27 #include <string.h>
28 #else
29 #include <strings.h>
30 #endif
31 
32 #include "die.h"
33 #include "gparam.h"
34 #include "strlimcpy.h"
35 #include "token.h"
36 
37 /*
38  * File input method.
39  */
40 int lineno;
41 const char *sp, *cp, *lp;
42 int crflag;			/* 1: return '\n', 0: doesn't return */
43 int cmode;			/* allow token which start with '#' */
44 int cppmode;			/* allow '::' as a token */
45 int ymode;			/* allow token which start with '%' */
46 char token[MAXTOKEN];
47 char curfile[MAXPATHLEN];
48 int continued_line;		/* previous line ends with '\' */
49 
50 static char ptok[MAXTOKEN];
51 static int lasttok;
52 static FILE *ip;
53 static STRBUF *ib;
54 
55 #define tlen	(p - &token[0])
56 static void pushbackchar(void);
57 
58 /**
59  * opentoken:
60  *
61  *	@param[in]	file
62  */
63 int
opentoken(const char * file)64 opentoken(const char *file)
65 {
66 	/*
67 	 * b flag is needed for WIN32 environment. Almost unix ignore it.
68 	 */
69 	if ((ip = fopen(file, "rb")) == NULL)
70 		return 0;
71 	ib = strbuf_open(MAXBUFLEN);
72 	strlimcpy(curfile, file, sizeof(curfile));
73 	sp = cp = lp = NULL; ptok[0] = '\0'; lineno = 0;
74 	crflag = cmode = cppmode = ymode = 0;
75 	continued_line = 0;
76 	return 1;
77 }
78 /**
79  * closetoken:
80  */
81 void
closetoken(void)82 closetoken(void)
83 {
84 	strbuf_close(ib);
85 	fclose(ip);
86 }
87 
88 /*
89  * nexttoken: get next token
90  *
91  *	@param[in]	interested	interested special character
92  *				if NULL then all character.
93  *	@param[in]	reserved	converter from token to token number
94  *				if this is specified, nexttoken() return
95  *				word number, else return symbol.
96  *	@return	EOF(-1)	end of file,
97  *		c ==0		symbol (SYMBOL; token has the value.),
98  *		c < 256		interested special character,
99  *		c > 1000	reserved word
100  *
101  * [Note] nexttoken() doesn't return followings:
102  * - comment
103  * - space (' ', '\t', '\f', '\v', '\r')
104  * - quoted string ("...", '.')
105  * - number
106  */
107 
108 int
nexttoken(const char * interested,int (* reserved)(const char *,int))109 nexttoken(const char *interested, int (*reserved)(const char *, int))
110 {
111 	int c;
112 	char *p;
113 	int sharp = 0;
114 	int percent = 0;
115 
116 	/* check push back buffer */
117 	if (ptok[0]) {
118 		strlimcpy(token, ptok, sizeof(token));
119 		ptok[0] = '\0';
120 		return lasttok;
121 	}
122 
123 	for (;;) {
124 		/* skip spaces */
125 		if (!crflag)
126 			while ((c = nextchar()) != EOF && isspace(c))
127 				;
128 		else
129 			while ((c = nextchar()) != EOF && isspace(c) && c != '\n')
130 				;
131 		if (c == EOF || c == '\n')
132 			break;
133 
134 		if (c == '"' || c == '\'') {	/* quoted string */
135 			int quote = c;
136 
137 			while ((c = nextchar()) != EOF) {
138 				if (c == quote)
139 					break;
140 				if (quote == '\'' && c == '\n')
141 					break;
142 				if (c == '\\' && (c = nextchar()) == EOF)
143 					break;
144 			}
145 		} else if (c == '/') {			/* comment */
146 			if ((c = nextchar()) == '/') {
147 				while ((c = nextchar()) != EOF)
148 					if (c == '\n') {
149 						pushbackchar();
150 						break;
151 					}
152 			} else if (c == '*') {
153 				while ((c = nextchar()) != EOF) {
154 					if (c == '*') {
155 						if ((c = nextchar()) == '/')
156 							break;
157 						pushbackchar();
158 					}
159 				}
160 			} else
161 				pushbackchar();
162 		} else if (c == '\\') {
163 			if (nextchar() == '\n')
164 				continued_line = 1;
165 		} else if (isdigit(c)) {		/* digit */
166 			while ((c = nextchar()) != EOF && (c == '.' || isalnum(c)))
167 				;
168 			pushbackchar();
169 		} else if (c == '#' && cmode) {
170 			/* recognize '##' as a token if it is reserved word. */
171 			if (peekc(1) == '#') {
172 				p = token;
173 				*p++ = c;
174 				*p++ = nextchar();
175 				*p   = 0;
176 				if (reserved && (c = (*reserved)(token, tlen)) == 0)
177 					break;
178 			} else if (!continued_line && atfirst_exceptspace()) {
179 				sharp = 1;
180 				continue;
181 			}
182 		} else if (c == ':' && cppmode && peekc(1) == ':') {
183 			p = token;
184 			*p++ = c;
185 			*p++ = nextchar();
186 			*p   = 0;
187 			if (reserved && (c = (*reserved)(token, tlen)) == 0)
188 				break;
189 		} else if (c == '%' && ymode) {
190 			/* recognize '%%' as a token if it is reserved word. */
191 			if (atfirst) {
192 				p = token;
193 				*p++ = c;
194 				if ((c = peekc(1)) == '%' || c == '{' || c == '}') {
195 					*p++ = nextchar();
196 					*p   = 0;
197 					if (reserved && (c = (*reserved)(token, tlen)) != 0)
198 						break;
199 				} else if (!isspace(c)) {
200 					percent = 1;
201 					continue;
202 				}
203 			}
204 		} else if (c & 0x80 || isalpha(c) || c == '_') {/* symbol */
205 			p = token;
206 			if (sharp) {
207 				sharp = 0;
208 				*p++ = '#';
209 			} else if (percent) {
210 				percent = 0;
211 				*p++ = '%';
212 			} else if (c == 'L') {
213 				int tmp = peekc(1);
214 
215 				if (tmp == '\"' || tmp == '\'')
216 					continue;
217 			}
218 			for (*p++ = c; (c = nextchar()) != EOF && (c & 0x80 || isalnum(c) || c == '_');) {
219 				if (tlen < sizeof(token))
220 					*p++ = c;
221 			}
222 			if (tlen == sizeof(token)) {
223 				warning("symbol name is too long. (Ignored) [+%d %s]", lineno, curfile);
224 				token[0] = '\0';
225 				continue;
226 			}
227 			*p = 0;
228 
229 			if (c != EOF)
230 				pushbackchar();
231 			/* convert token string into token number */
232 			c = SYMBOL;
233 			if (reserved)
234 				c = (*reserved)(token, tlen);
235 			break;
236 		} else {				/* special char */
237 			if (interested == NULL || strchr(interested, c))
238 				break;
239 			/* otherwise ignore it */
240 		}
241 		sharp = percent = 0;
242 	}
243 	return lasttok = c;
244 }
245 /**
246  * pushbacktoken: push back token
247  *
248  *	following nexttoken() return same token again.
249  */
250 void
pushbacktoken(void)251 pushbacktoken(void)
252 {
253 	strlimcpy(ptok, token, sizeof(ptok));
254 }
255 /**
256  * peekc: peek next char
257  *
258  *	@param[in]	immediate	0: ignore blank, 1: include blank
259  *
260  * peekc() read ahead following blanks but doesn't change line.
261  */
262 int
peekc(int immediate)263 peekc(int immediate)
264 {
265 	int c;
266 	long pos;
267     int comment = 0;
268 
269 	if (cp != NULL) {
270 		if (immediate)
271 			c = nextchar();
272 		else
273             while ((c = nextchar()) != EOF && c != '\n') {
274                 if (c == '/') {			/* comment */
275                     if ((c = nextchar()) == '/') {
276                         while ((c = nextchar()) != EOF)
277                             if (c == '\n') {
278                                 pushbackchar();
279                                 break;
280                             }
281                     } else if (c == '*') {
282                         comment = 1;
283                         while ((c = nextchar()) != EOF) {
284                             if (c == '*') {
285                                 if ((c = nextchar()) == '/')
286                                 {
287                                     comment = 0;
288                                     break;
289                                 }
290                             }
291                             else if (c == '\n')
292                             {
293                                 pushbackchar();
294                                 break;
295                             }
296                         }
297                     } else
298                         pushbackchar();
299                 }
300                 else if (!isspace(c))
301                     break;
302             }
303 		if (c != EOF)
304 			pushbackchar();
305 		if (c != '\n' || immediate)
306 			return c;
307 	}
308 	pos = ftell(ip);
309 	if (immediate)
310 		c = getc(ip);
311 	else
312         while ((c = getc(ip)) != EOF) {
313             if (comment) {
314                 while ((c = getc(ip)) != EOF) {
315                     if (c == '*') {
316                         if ((c = getc(ip)) == '/')
317                         {
318                             comment = 0;
319                             break;
320                         }
321                     }
322                 }
323             }
324             else if (c == '/') {			/* comment */
325                 if ((c = getc(ip)) == '/') {
326                     while ((c = getc(ip)) != EOF)
327                         if (c == '\n') {
328                             break;
329                         }
330                 } else if (c == '*') {
331                     while ((c = getc(ip)) != EOF) {
332                         if (c == '*') {
333                             if ((c = getc(ip)) == '/')
334                                 break;
335                         }
336                     }
337                 } else
338                     break;
339             }
340             else if (!isspace(c))
341                 break;
342         }
343 
344 	(void)fseek(ip, pos, SEEK_SET);
345 
346 	return c;
347 }
348 /**
349  * throwaway_nextchar: throw away next character
350  */
351 void
throwaway_nextchar(void)352 throwaway_nextchar(void)
353 {
354 	nextchar();
355 }
356 /**
357  * atfirst_exceptspace: return if current position is the first column
358  *			except for space.
359  *	|      1 0
360  *      |      v v
361  *	|      # define
362  */
363 int
atfirst_exceptspace(void)364 atfirst_exceptspace(void)
365 {
366 	const char *start = sp;
367 	const char *end = cp ? cp - 1 : lp;
368 
369 	while (start < end && *start && isspace(*start))
370 		start++;
371 	return (start == end) ? 1 : 0;
372 }
373 /**
374  * pushbackchar: push back character.
375  *
376  *	following nextchar() return same character again.
377  *
378  */
379 static void
pushbackchar(void)380 pushbackchar(void)
381 {
382         if (sp == NULL)
383                 return;         /* nothing to do */
384         if (cp == NULL)
385                 cp = lp;
386         else
387                 --cp;
388 }
389