1 /*
2  * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
3  *
4  * This file is part of Jam - see jam.c for Copyright information.
5  */
6 
7 /*
8  * scan.c - the jam yacc scanner
9  *
10  * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk.
11  * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc.
12  *			Also handle tokens abutting EOF by remembering
13  *			to return EOF now matter how many times yylex()
14  *			reinvokes yyline().
15  * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT.
16  * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is
17  *			defined before Linux's yacc tries to redefine it.
18  * 01/10/01 (seiwald) - \ can now escape any whitespace char
19  * 11/04/02 (seiwald) - const-ing for string literals
20  */
21 
22 # include "jam.h"
23 # include "lists.h"
24 # include "parse.h"
25 # include "scan.h"
26 # include "jamgram.h"
27 # include "jambase.h"
28 # include "newstr.h"
29 
30 struct keyword {
31 	const char *word;
32 	int type;
33 } keywords[] = {
34 # include "jamgramtab.h"
35 	{ 0, 0 }
36 } ;
37 
38 struct include {
39 	struct include *next;	/* next serial include file */
40 	const char 	*string;	/* pointer into current line */
41 	char	**strings;	/* for yyfparse() -- text to parse */
42 	FILE 	*file;		/* for yyfparse() -- file being read */
43 	const char 	*fname;		/* for yyfparse() -- file name */
44 	int 	line;		/* line counter for error messages */
45 	char 	buf[ 512 ];	/* for yyfparse() -- line buffer */
46 } ;
47 
48 static struct include *incp = 0; /* current file; head of chain */
49 
50 static int scanmode = SCAN_NORMAL;
51 static int anyerrors = 0;
52 static char *symdump( YYSTYPE *s );
53 
54 # define BIGGEST_TOKEN 10240	/* no single token can be larger */
55 
56 /*
57  * Set parser mode: normal, string, or keyword
58  */
59 
60 void
yymode(int n)61 yymode( int n )
62 {
63 	scanmode = n;
64 }
65 
66 void
yyerror(const char * s)67 yyerror( const char *s )
68 {
69 	if( incp )
70 	    printf( "%s: line %d: ", incp->fname, incp->line );
71 
72 	printf( "%s at %s\n", s, symdump( &yylval ) );
73 
74 	++anyerrors;
75 }
76 
77 int
yyanyerrors()78 yyanyerrors()
79 {
80 	return anyerrors != 0;
81 }
82 
83 void
yyfparse(const char * s)84 yyfparse( const char *s )
85 {
86 	struct include *i = (struct include *)malloc( sizeof( *i ) );
87 
88 	/* Push this onto the incp chain. */
89 
90 	i->string = "";
91 	i->strings = 0;
92 	i->file = 0;
93 	i->fname = copystr( s );
94 	i->line = 0;
95 	i->next = incp;
96 	incp = i;
97 
98 	/* If the filename is "+", it means use the internal jambase. */
99 
100 	if( !strcmp( s, "+" ) )
101 	    i->strings = jambase;
102 }
103 
104 /*
105  * yyline() - read new line and return first character
106  *
107  * Fabricates a continuous stream of characters across include files,
108  * returning EOF at the bitter end.
109  */
110 
111 int
yyline()112 yyline()
113 {
114 	struct include *i = incp;
115 
116 	if( !incp )
117 	    return EOF;
118 
119 	/* Once we start reading from the input stream, we reset the */
120 	/* include insertion point so that the next include file becomes */
121 	/* the head of the list. */
122 
123 	/* If there is more data in this line, return it. */
124 
125 	if( *i->string )
126 	    return *i->string++;
127 
128 	/* If we're reading from an internal string list, go to the */
129 	/* next string. */
130 
131 	if( i->strings )
132 	{
133 	    if( !*i->strings )
134 		goto next;
135 
136 	    i->line++;
137 	    i->string = *(i->strings++);
138 	    return *i->string++;
139 	}
140 
141 	/* If necessary, open the file */
142 
143 	if( !i->file )
144 	{
145 	    FILE *f = stdin;
146 
147 	    if( strcmp( i->fname, "-" ) && !( f = fopen( i->fname, "r" ) ) )
148 		perror( i->fname );
149 
150 	    i->file = f;
151 	}
152 
153 	/* If there's another line in this file, start it. */
154 
155 	if( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
156 	{
157 	    i->line++;
158 	    i->string = i->buf;
159 	    return *i->string++;
160 	}
161 
162     next:
163 	/* This include is done.  */
164 	/* Free it up and return EOF so yyparse() returns to parse_file(). */
165 
166 	incp = i->next;
167 
168 	/* Close file, free name */
169 
170 	if( i->file && i->file != stdin )
171 	    fclose( i->file );
172 	freestr( i->fname );
173 	free( (char *)i );
174 
175 	return EOF;
176 }
177 
178 /*
179  * yylex() - set yylval to current token; return its type
180  *
181  * Macros to move things along:
182  *
183  *	yychar() - return and advance character; invalid after EOF
184  *	yyprev() - back up one character; invalid before yychar()
185  *
186  * yychar() returns a continuous stream of characters, until it hits
187  * the EOF of the current include file.
188  */
189 
190 # define yychar() ( *incp->string ? *incp->string++ : yyline() )
191 # define yyprev() ( incp->string-- )
192 
193 int
yylex()194 yylex()
195 {
196 	int c;
197 	char buf[BIGGEST_TOKEN];
198 	char *b = buf;
199 
200 	if( !incp )
201 	    goto eof;
202 
203 	/* Get first character (whitespace or of token) */
204 
205 	c = yychar();
206 
207 	if( scanmode == SCAN_STRING )
208 	{
209 	    /* If scanning for a string (action's {}'s), look for the */
210 	    /* closing brace.  We handle matching braces, if they match! */
211 
212 	    int nest = 1;
213 
214 	    while( c != EOF && b < buf + sizeof( buf ) )
215 	    {
216 		    if( c == '{' )
217 			nest++;
218 
219 		    if( c == '}' && !--nest )
220 			break;
221 
222 		    *b++ = c;
223 
224 		    c = yychar();
225 	    }
226 
227 	    /* We ate the ending brace -- regurgitate it. */
228 
229 	    if( c != EOF )
230 		yyprev();
231 
232 	    /* Check obvious errors. */
233 
234 	    if( b == buf + sizeof( buf ) )
235 	    {
236 		yyerror( "action block too big" );
237 		goto eof;
238 	    }
239 
240 	    if( nest )
241 	    {
242 		yyerror( "unmatched {} in action block" );
243 		goto eof;
244 	    }
245 
246 	    *b = 0;
247 	    yylval.type = STRING;
248 	    yylval.string = newstr( buf );
249 
250 	}
251 	else
252 	{
253 	    char *b = buf;
254 	    struct keyword *k;
255 	    int inquote = 0;
256 	    int notkeyword;
257 
258 	    /* Eat white space */
259 
260 	    for( ;; )
261 	    {
262 		/* Skip past white space */
263 
264 		while( c != EOF && isspace( c ) )
265 			c = yychar();
266 
267 		/* Not a comment?  Swallow up comment line. */
268 
269 		if( c != '#' )
270 			break;
271 		while( ( c = yychar() ) != EOF && c != '\n' )
272 			;
273 	    }
274 
275 	    /* c now points to the first character of a token. */
276 
277 	    if( c == EOF )
278 		goto eof;
279 
280 	    /* While scanning the word, disqualify it for (expensive) */
281 	    /* keyword lookup when we can: $anything, "anything", \anything */
282 
283 	    notkeyword = c == '$';
284 
285 	    /* look for white space to delimit word */
286 	    /* "'s get stripped but preserve white space */
287 	    /* \ protects next character */
288 
289 	    while(
290 		c != EOF &&
291 		b < buf + sizeof( buf ) &&
292 		( inquote || !isspace( c ) ) )
293 	    {
294 		if( c == '"' )
295 		{
296 		    /* begin or end " */
297 		    inquote = !inquote;
298 		    notkeyword = 1;
299 		}
300 		else if( c != '\\' )
301 		{
302 		    /* normal char */
303 		    *b++ = c;
304 		}
305 		else if( ( c = yychar()) != EOF )
306 		{
307 		    /* \c */
308 		    *b++ = c;
309 		    notkeyword = 1;
310 		}
311 		else
312 		{
313 		    /* \EOF */
314 		    break;
315 		}
316 
317 		c = yychar();
318 	    }
319 
320 	    /* Check obvious errors. */
321 
322 	    if( b == buf + sizeof( buf ) )
323 	    {
324 		yyerror( "string too big" );
325 		goto eof;
326 	    }
327 
328 	    if( inquote )
329 	    {
330 		yyerror( "unmatched \" in string" );
331 		goto eof;
332 	    }
333 
334 	    /* We looked ahead a character - back up. */
335 
336 	    if( c != EOF )
337 		yyprev();
338 
339 	    /* scan token table */
340 	    /* don't scan if it's obviously not a keyword or if its */
341 	    /* an alphabetic when were looking for punctuation */
342 
343 	    *b = 0;
344 	    yylval.type = ARG;
345 
346 	    if( !notkeyword && !( isalpha( *buf ) && scanmode == SCAN_PUNCT ) )
347 	    {
348 		for( k = keywords; k->word; k++ )
349 		    if( *buf == *k->word && !strcmp( k->word, buf ) )
350 		{
351 		    yylval.type = k->type;
352 		    yylval.string = k->word;	/* used by symdump */
353 		    break;
354 		}
355 	    }
356 
357 	    if( yylval.type == ARG )
358 		yylval.string = newstr( buf );
359 	}
360 
361 	if( DEBUG_SCAN )
362 		printf( "scan %s\n", symdump( &yylval ) );
363 
364 	return yylval.type;
365 
366 eof:
367 	yylval.type = EOF;
368 	return yylval.type;
369 }
370 
371 static char *
symdump(YYSTYPE * s)372 symdump( YYSTYPE *s )
373 {
374 	static char buf[ BIGGEST_TOKEN + 20 ];
375 
376 	switch( s->type )
377 	{
378 	case EOF:
379 		sprintf( buf, "EOF" );
380 		break;
381 	case 0:
382 		sprintf( buf, "unknown symbol %s", s->string );
383 		break;
384 	case ARG:
385 		sprintf( buf, "argument %s", s->string );
386 		break;
387 	case STRING:
388 		sprintf( buf, "string \"%s\"", s->string );
389 		break;
390 	default:
391 		sprintf( buf, "keyword %s", s->string );
392 		break;
393 	}
394 	return buf;
395 }
396