1 /*
2  * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
3  *
4  * This file is part of Jam - see jam.c for Copyright information.
5  */
6 
7 /*
8  * scan.c - the jam yacc scanner
9  *
10  * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk.
11  * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc.
12  *			Also handle tokens abutting EOF by remembering
13  *			to return EOF now matter how many times yylex()
14  *			reinvokes yyline().
15  * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT.
16  * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is
17  *			defined before Linux's yacc tries to redefine it.
18  * 01/10/01 (seiwald) - \ can now escape any whitespace char
19  * 11/04/02 (seiwald) - const-ing for string literals
20  * 01/05/07 (seiwald) - new yyfname/yylineno for DEBUG_COMPILE
21  */
22 
23 # include "jam.h"
24 # include "lists.h"
25 # include "parse.h"
26 # include "scan.h"
27 # include "jamgram.h"
28 # include "jambase.h"
29 # include "newstr.h"
30 
31 struct keyword {
32 	const char *word;
33 	int type;
34 } keywords[] = {
35 # include "jamgramtab.h"
36 	{ 0, 0 }
37 } ;
38 
39 struct include {
40 	struct include 	*next;		/* next serial include file */
41 	const char 	*string;	/* pointer into current line */
42 	char		**strings;	/* for yyfparse() -- text to parse */
43 	FILE 		*file;		/* for yyfparse() -- file being read */
44 	const char 	*fname;		/* for yyfparse() -- file name */
45 	int 		line;		/* line counter for error messages */
46 	char 		buf[ 512 ];	/* for yyfparse() -- line buffer */
47 } ;
48 
49 static struct include *incp = 0; /* current file; head of chain */
50 
51 static int scanmode = SCAN_NORMAL;
52 static int anyerrors = 0;
53 static char *symdump( YYSTYPE *s );
54 
55 # define BIGGEST_TOKEN 10240	/* no single token can be larger */
56 
57 /*
58  * Set parser mode: normal, string, or keyword
59  */
60 
61 void
yymode(int n)62 yymode( int n )
63 {
64 	scanmode = n;
65 }
66 
67 void
yyerror(const char * s)68 yyerror( const char *s )
69 {
70 	if( incp )
71 	    printf( "%s: line %d: ", incp->fname, incp->line );
72 
73 	printf( "%s at %s\n", s, symdump( &yylval ) );
74 
75 	++anyerrors;
76 }
77 
78 int
yyanyerrors()79 yyanyerrors()
80 {
81 	return anyerrors != 0;
82 }
83 
84 const char *
yyfname()85 yyfname()
86 {
87 	return incp ? copystr( incp->fname ) : 0;
88 }
89 
90 int
yylineno()91 yylineno()
92 {
93 	return incp ? incp->line : 0;
94 }
95 
96 void
yyfparse(const char * s)97 yyfparse( const char *s )
98 {
99 	struct include *i = (struct include *)malloc( sizeof( *i ) );
100 
101 	/* Push this onto the incp chain. */
102 
103 	i->string = "";
104 	i->strings = 0;
105 	i->file = 0;
106 	i->fname = copystr( s );
107 	i->line = 0;
108 	i->next = incp;
109 	incp = i;
110 
111 	/* If the filename is "+", it means use the internal jambase. */
112 
113 	if( !strcmp( s, "+" ) )
114 	    i->strings = jambase;
115 }
116 
117 /*
118  * yyline() - read new line and return first character
119  *
120  * Fabricates a continuous stream of characters across include files,
121  * returning EOF at the bitter end.
122  */
123 
124 int
yyline()125 yyline()
126 {
127 	struct include *i = incp;
128 
129 	if( !incp )
130 	    return EOF;
131 
132 	/* Once we start reading from the input stream, we reset the */
133 	/* include insertion point so that the next include file becomes */
134 	/* the head of the list. */
135 
136 	/* If there is more data in this line, return it. */
137 
138 	if( *i->string )
139 	    return *i->string++;
140 
141 	/* If we're reading from an internal string list, go to the */
142 	/* next string. */
143 
144 	if( i->strings )
145 	{
146 	    if( !*i->strings )
147 		goto next;
148 
149 	    i->line++;
150 	    i->string = *(i->strings++);
151 	    return *i->string++;
152 	}
153 
154 	/* If necessary, open the file */
155 
156 	if( !i->file )
157 	{
158 	    FILE *f = stdin;
159 
160 	    if( strcmp( i->fname, "-" ) && !( f = fopen( i->fname, "r" ) ) )
161 		perror( i->fname );
162 
163 	    i->file = f;
164 	}
165 
166 	/* If there's another line in this file, start it. */
167 
168 	if( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
169 	{
170 	    i->line++;
171 	    i->string = i->buf;
172 	    return *i->string++;
173 	}
174 
175     next:
176 	/* This include is done.  */
177 	/* Free it up and return EOF so yyparse() returns to parse_file(). */
178 
179 	incp = i->next;
180 
181 	/* Close file, free name */
182 
183 	if( i->file && i->file != stdin )
184 	    fclose( i->file );
185 	freestr( i->fname );
186 	free( (char *)i );
187 
188 	return EOF;
189 }
190 
191 /*
192  * yylex() - set yylval to current token; return its type
193  *
194  * Macros to move things along:
195  *
196  *	yychar() - return and advance character; invalid after EOF
197  *	yyprev() - back up one character; invalid before yychar()
198  *
199  * yychar() returns a continuous stream of characters, until it hits
200  * the EOF of the current include file.
201  */
202 
203 # define yychar() ( *incp->string ? *incp->string++ : yyline() )
204 # define yyprev() ( incp->string-- )
205 
206 int
yylex()207 yylex()
208 {
209 	int c;
210 	char buf[BIGGEST_TOKEN];
211 	char *b = buf;
212 
213 	if( !incp )
214 	    goto eof;
215 
216 	/* Get first character (whitespace or of token) */
217 
218 	c = yychar();
219 
220 	if( scanmode == SCAN_STRING )
221 	{
222 	    /* If scanning for a string (action's {}'s), look for the */
223 	    /* closing brace.  We handle matching braces, if they match! */
224 
225 	    int nest = 1;
226 
227 	    while( c != EOF && b < buf + sizeof( buf ) )
228 	    {
229 		    if( c == '{' )
230 			nest++;
231 
232 		    if( c == '}' && !--nest )
233 			break;
234 
235 		    *b++ = c;
236 
237 		    c = yychar();
238 	    }
239 
240 	    /* We ate the ending brace -- regurgitate it. */
241 
242 	    if( c != EOF )
243 		yyprev();
244 
245 	    /* Check obvious errors. */
246 
247 	    if( b == buf + sizeof( buf ) )
248 	    {
249 		yyerror( "action block too big" );
250 		goto eof;
251 	    }
252 
253 	    if( nest )
254 	    {
255 		yyerror( "unmatched {} in action block" );
256 		goto eof;
257 	    }
258 
259 	    *b = 0;
260 	    yylval.type = STRING;
261 	    yylval.string = newstr( buf );
262 
263 	}
264 	else
265 	{
266 	    char *b = buf;
267 	    struct keyword *k;
268 	    int inquote = 0;
269 	    int notkeyword;
270 
271 	    /* Eat white space */
272 
273 	    for( ;; )
274 	    {
275 		/* Skip past white space */
276 
277 		while( c != EOF && isspace( c ) )
278 			c = yychar();
279 
280 		/* Not a comment?  Swallow up comment line. */
281 
282 		if( c != '#' )
283 			break;
284 		while( ( c = yychar() ) != EOF && c != '\n' )
285 			;
286 	    }
287 
288 	    /* c now points to the first character of a token. */
289 
290 	    if( c == EOF )
291 		goto eof;
292 
293 	    /* While scanning the word, disqualify it for (expensive) */
294 	    /* keyword lookup when we can: $anything, "anything", \anything */
295 
296 	    notkeyword = c == '$';
297 
298 	    /* look for white space to delimit word */
299 	    /* "'s get stripped but preserve white space */
300 	    /* \ protects next character */
301 
302 	    while(
303 		c != EOF &&
304 		b < buf + sizeof( buf ) &&
305 		( inquote || !isspace( c ) ) )
306 	    {
307 		if( c == '"' )
308 		{
309 		    /* begin or end " */
310 		    inquote = !inquote;
311 		    notkeyword = 1;
312 		}
313 		else if( c != '\\' )
314 		{
315 		    /* normal char */
316 		    *b++ = c;
317 		}
318 		else if( ( c = yychar()) != EOF )
319 		{
320 		    /* \c */
321 		    *b++ = c;
322 		    notkeyword = 1;
323 		}
324 		else
325 		{
326 		    /* \EOF */
327 		    break;
328 		}
329 
330 		c = yychar();
331 	    }
332 
333 	    /* Check obvious errors. */
334 
335 	    if( b == buf + sizeof( buf ) )
336 	    {
337 		yyerror( "string too big" );
338 		goto eof;
339 	    }
340 
341 	    if( inquote )
342 	    {
343 		yyerror( "unmatched \" in string" );
344 		goto eof;
345 	    }
346 
347 	    /* We looked ahead a character - back up. */
348 
349 	    if( c != EOF )
350 		yyprev();
351 
352 	    /* scan token table */
353 	    /* don't scan if it's obviously not a keyword or if its */
354 	    /* an alphabetic when were looking for punctuation */
355 
356 	    *b = 0;
357 	    yylval.type = ARG;
358 
359 	    if( !notkeyword && !( isalpha( *buf ) && scanmode == SCAN_PUNCT ) )
360 	    {
361 		for( k = keywords; k->word; k++ )
362 		    if( *buf == *k->word && !strcmp( k->word, buf ) )
363 		{
364 		    yylval.type = k->type;
365 		    yylval.string = k->word;	/* used by symdump */
366 		    break;
367 		}
368 	    }
369 
370 	    if( yylval.type == ARG )
371 		yylval.string = newstr( buf );
372 	}
373 
374 	if( DEBUG_SCAN )
375 		printf( "scan %s\n", symdump( &yylval ) );
376 
377 	return yylval.type;
378 
379 eof:
380 	yylval.type = EOF;
381 	return yylval.type;
382 }
383 
384 static char *
symdump(YYSTYPE * s)385 symdump( YYSTYPE *s )
386 {
387 	static char buf[ BIGGEST_TOKEN + 20 ];
388 
389 	switch( s->type )
390 	{
391 	case EOF:
392 		sprintf( buf, "EOF" );
393 		break;
394 	case 0:
395 		sprintf( buf, "unknown symbol %s", s->string );
396 		break;
397 	case ARG:
398 		sprintf( buf, "argument %s", s->string );
399 		break;
400 	case STRING:
401 		sprintf( buf, "string \"%s\"", s->string );
402 		break;
403 	default:
404 		sprintf( buf, "keyword %s", s->string );
405 		break;
406 	}
407 	return buf;
408 }
409