1 /*
2 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
3 *
4 * This file is part of Jam - see jam.c for Copyright information.
5 */
6
7 /*
8 * scan.c - the jam yacc scanner
9 *
10 * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk.
11 * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc.
12 * Also handle tokens abutting EOF by remembering
13 * to return EOF now matter how many times yylex()
14 * reinvokes yyline().
15 * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT.
16 * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is
17 * defined before Linux's yacc tries to redefine it.
18 * 01/10/01 (seiwald) - \ can now escape any whitespace char
19 * 11/04/02 (seiwald) - const-ing for string literals
20 * 01/05/07 (seiwald) - new yyfname/yylineno for DEBUG_COMPILE
21 */
22
23 # include "jam.h"
24 # include "lists.h"
25 # include "parse.h"
26 # include "scan.h"
27 # include "jamgram.h"
28 # include "jambase.h"
29 # include "newstr.h"
30
31 struct keyword {
32 const char *word;
33 int type;
34 } keywords[] = {
35 # include "jamgramtab.h"
36 { 0, 0 }
37 } ;
38
39 struct include {
40 struct include *next; /* next serial include file */
41 const char *string; /* pointer into current line */
42 char **strings; /* for yyfparse() -- text to parse */
43 FILE *file; /* for yyfparse() -- file being read */
44 const char *fname; /* for yyfparse() -- file name */
45 int line; /* line counter for error messages */
46 char buf[ 512 ]; /* for yyfparse() -- line buffer */
47 } ;
48
49 static struct include *incp = 0; /* current file; head of chain */
50
51 static int scanmode = SCAN_NORMAL;
52 static int anyerrors = 0;
53 static char *symdump( YYSTYPE *s );
54
55 # define BIGGEST_TOKEN 10240 /* no single token can be larger */
56
57 /*
58 * Set parser mode: normal, string, or keyword
59 */
60
61 void
yymode(int n)62 yymode( int n )
63 {
64 scanmode = n;
65 }
66
67 void
yyerror(const char * s)68 yyerror( const char *s )
69 {
70 if( incp )
71 printf( "%s: line %d: ", incp->fname, incp->line );
72
73 printf( "%s at %s\n", s, symdump( &yylval ) );
74
75 ++anyerrors;
76 }
77
78 int
yyanyerrors()79 yyanyerrors()
80 {
81 return anyerrors != 0;
82 }
83
84 const char *
yyfname()85 yyfname()
86 {
87 return incp ? copystr( incp->fname ) : 0;
88 }
89
90 int
yylineno()91 yylineno()
92 {
93 return incp ? incp->line : 0;
94 }
95
96 void
yyfparse(const char * s)97 yyfparse( const char *s )
98 {
99 struct include *i = (struct include *)malloc( sizeof( *i ) );
100
101 /* Push this onto the incp chain. */
102
103 i->string = "";
104 i->strings = 0;
105 i->file = 0;
106 i->fname = copystr( s );
107 i->line = 0;
108 i->next = incp;
109 incp = i;
110
111 /* If the filename is "+", it means use the internal jambase. */
112
113 if( !strcmp( s, "+" ) )
114 i->strings = jambase;
115 }
116
117 /*
118 * yyline() - read new line and return first character
119 *
120 * Fabricates a continuous stream of characters across include files,
121 * returning EOF at the bitter end.
122 */
123
124 int
yyline()125 yyline()
126 {
127 struct include *i = incp;
128
129 if( !incp )
130 return EOF;
131
132 /* Once we start reading from the input stream, we reset the */
133 /* include insertion point so that the next include file becomes */
134 /* the head of the list. */
135
136 /* If there is more data in this line, return it. */
137
138 if( *i->string )
139 return *i->string++;
140
141 /* If we're reading from an internal string list, go to the */
142 /* next string. */
143
144 if( i->strings )
145 {
146 if( !*i->strings )
147 goto next;
148
149 i->line++;
150 i->string = *(i->strings++);
151 return *i->string++;
152 }
153
154 /* If necessary, open the file */
155
156 if( !i->file )
157 {
158 FILE *f = stdin;
159
160 if( strcmp( i->fname, "-" ) && !( f = fopen( i->fname, "r" ) ) )
161 perror( i->fname );
162
163 i->file = f;
164 }
165
166 /* If there's another line in this file, start it. */
167
168 if( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
169 {
170 i->line++;
171 i->string = i->buf;
172 return *i->string++;
173 }
174
175 next:
176 /* This include is done. */
177 /* Free it up and return EOF so yyparse() returns to parse_file(). */
178
179 incp = i->next;
180
181 /* Close file, free name */
182
183 if( i->file && i->file != stdin )
184 fclose( i->file );
185 freestr( i->fname );
186 free( (char *)i );
187
188 return EOF;
189 }
190
191 /*
192 * yylex() - set yylval to current token; return its type
193 *
194 * Macros to move things along:
195 *
196 * yychar() - return and advance character; invalid after EOF
197 * yyprev() - back up one character; invalid before yychar()
198 *
199 * yychar() returns a continuous stream of characters, until it hits
200 * the EOF of the current include file.
201 */
202
203 # define yychar() ( *incp->string ? *incp->string++ : yyline() )
204 # define yyprev() ( incp->string-- )
205
206 int
yylex()207 yylex()
208 {
209 int c;
210 char buf[BIGGEST_TOKEN];
211 char *b = buf;
212
213 if( !incp )
214 goto eof;
215
216 /* Get first character (whitespace or of token) */
217
218 c = yychar();
219
220 if( scanmode == SCAN_STRING )
221 {
222 /* If scanning for a string (action's {}'s), look for the */
223 /* closing brace. We handle matching braces, if they match! */
224
225 int nest = 1;
226
227 while( c != EOF && b < buf + sizeof( buf ) )
228 {
229 if( c == '{' )
230 nest++;
231
232 if( c == '}' && !--nest )
233 break;
234
235 *b++ = c;
236
237 c = yychar();
238 }
239
240 /* We ate the ending brace -- regurgitate it. */
241
242 if( c != EOF )
243 yyprev();
244
245 /* Check obvious errors. */
246
247 if( b == buf + sizeof( buf ) )
248 {
249 yyerror( "action block too big" );
250 goto eof;
251 }
252
253 if( nest )
254 {
255 yyerror( "unmatched {} in action block" );
256 goto eof;
257 }
258
259 *b = 0;
260 yylval.type = STRING;
261 yylval.string = newstr( buf );
262
263 }
264 else
265 {
266 char *b = buf;
267 struct keyword *k;
268 int inquote = 0;
269 int notkeyword;
270
271 /* Eat white space */
272
273 for( ;; )
274 {
275 /* Skip past white space */
276
277 while( c != EOF && isspace( c ) )
278 c = yychar();
279
280 /* Not a comment? Swallow up comment line. */
281
282 if( c != '#' )
283 break;
284 while( ( c = yychar() ) != EOF && c != '\n' )
285 ;
286 }
287
288 /* c now points to the first character of a token. */
289
290 if( c == EOF )
291 goto eof;
292
293 /* While scanning the word, disqualify it for (expensive) */
294 /* keyword lookup when we can: $anything, "anything", \anything */
295
296 notkeyword = c == '$';
297
298 /* look for white space to delimit word */
299 /* "'s get stripped but preserve white space */
300 /* \ protects next character */
301
302 while(
303 c != EOF &&
304 b < buf + sizeof( buf ) &&
305 ( inquote || !isspace( c ) ) )
306 {
307 if( c == '"' )
308 {
309 /* begin or end " */
310 inquote = !inquote;
311 notkeyword = 1;
312 }
313 else if( c != '\\' )
314 {
315 /* normal char */
316 *b++ = c;
317 }
318 else if( ( c = yychar()) != EOF )
319 {
320 /* \c */
321 *b++ = c;
322 notkeyword = 1;
323 }
324 else
325 {
326 /* \EOF */
327 break;
328 }
329
330 c = yychar();
331 }
332
333 /* Check obvious errors. */
334
335 if( b == buf + sizeof( buf ) )
336 {
337 yyerror( "string too big" );
338 goto eof;
339 }
340
341 if( inquote )
342 {
343 yyerror( "unmatched \" in string" );
344 goto eof;
345 }
346
347 /* We looked ahead a character - back up. */
348
349 if( c != EOF )
350 yyprev();
351
352 /* scan token table */
353 /* don't scan if it's obviously not a keyword or if its */
354 /* an alphabetic when were looking for punctuation */
355
356 *b = 0;
357 yylval.type = ARG;
358
359 if( !notkeyword && !( isalpha( *buf ) && scanmode == SCAN_PUNCT ) )
360 {
361 for( k = keywords; k->word; k++ )
362 if( *buf == *k->word && !strcmp( k->word, buf ) )
363 {
364 yylval.type = k->type;
365 yylval.string = k->word; /* used by symdump */
366 break;
367 }
368 }
369
370 if( yylval.type == ARG )
371 yylval.string = newstr( buf );
372 }
373
374 if( DEBUG_SCAN )
375 printf( "scan %s\n", symdump( &yylval ) );
376
377 return yylval.type;
378
379 eof:
380 yylval.type = EOF;
381 return yylval.type;
382 }
383
384 static char *
symdump(YYSTYPE * s)385 symdump( YYSTYPE *s )
386 {
387 static char buf[ BIGGEST_TOKEN + 20 ];
388
389 switch( s->type )
390 {
391 case EOF:
392 sprintf( buf, "EOF" );
393 break;
394 case 0:
395 sprintf( buf, "unknown symbol %s", s->string );
396 break;
397 case ARG:
398 sprintf( buf, "argument %s", s->string );
399 break;
400 case STRING:
401 sprintf( buf, "string \"%s\"", s->string );
402 break;
403 default:
404 sprintf( buf, "keyword %s", s->string );
405 break;
406 }
407 return buf;
408 }
409