1 /*
2 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
3 *
4 * This file is part of Jam - see jam.c for Copyright information.
5 */
6
7 /*
8 * scan.c - the jam yacc scanner
9 *
10 */
11
12 #include "jam.h"
13 #include "scan.h"
14 #include "output.h"
15
16 #include "constants.h"
17 #include "jambase.h"
18 #include "jamgram.h"
19
20
21 struct keyword
22 {
23 char * word;
24 int type;
25 } keywords[] =
26 {
27 #include "jamgramtab.h"
28 { 0, 0 }
29 };
30
31 typedef struct include include;
32 struct include
33 {
34 include * next; /* next serial include file */
35 char * string; /* pointer into current line */
36 char * * strings; /* for yyfparse() -- text to parse */
37 FILE * file; /* for yyfparse() -- file being read */
38 OBJECT * fname; /* for yyfparse() -- file name */
39 int line; /* line counter for error messages */
40 char buf[ 512 ]; /* for yyfparse() -- line buffer */
41 };
42
43 static include * incp = 0; /* current file; head of chain */
44
45 static int scanmode = SCAN_NORMAL;
46 static int anyerrors = 0;
47
48
49 static char * symdump( YYSTYPE * );
50
51 #define BIGGEST_TOKEN 10240 /* no single token can be larger */
52
53
54 /*
55 * Set parser mode: normal, string, or keyword.
56 */
57
yymode(int n)58 void yymode( int n )
59 {
60 scanmode = n;
61 }
62
63
yyerror(char const * s)64 void yyerror( char const * s )
65 {
66 /* We use yylval instead of incp to access the error location information as
67 * the incp pointer will already be reset to 0 in case the error occurred at
68 * EOF.
69 *
70 * The two may differ only if ran into an unexpected EOF or we get an error
71 * while reading a lexical token spanning multiple lines, e.g. a multi-line
72 * string literal or action body, in which case yylval location information
73 * will hold the information about where the token started while incp will
74 * hold the information about where reading it broke.
75 */
76 out_printf( "%s:%d: %s at %s\n", object_str( yylval.file ), yylval.line, s,
77 symdump( &yylval ) );
78 ++anyerrors;
79 }
80
81
yyanyerrors()82 int yyanyerrors()
83 {
84 return anyerrors != 0;
85 }
86
87
yyfparse(OBJECT * s)88 void yyfparse( OBJECT * s )
89 {
90 include * i = (include *)BJAM_MALLOC( sizeof( *i ) );
91
92 /* Push this onto the incp chain. */
93 i->string = "";
94 i->strings = 0;
95 i->file = 0;
96 i->fname = object_copy( s );
97 i->line = 0;
98 i->next = incp;
99 incp = i;
100
101 /* If the filename is "+", it means use the internal jambase. */
102 if ( !strcmp( object_str( s ), "+" ) )
103 i->strings = jambase;
104 }
105
106
107 /*
108 * yyline() - read new line and return first character.
109 *
110 * Fabricates a continuous stream of characters across include files, returning
111 * EOF at the bitter end.
112 */
113
yyline()114 int yyline()
115 {
116 include * const i = incp;
117
118 if ( !incp )
119 return EOF;
120
121 /* Once we start reading from the input stream, we reset the include
122 * insertion point so that the next include file becomes the head of the
123 * list.
124 */
125
126 /* If there is more data in this line, return it. */
127 if ( *i->string )
128 return *i->string++;
129
130 /* If we are reading from an internal string list, go to the next string. */
131 if ( i->strings )
132 {
133 if ( *i->strings )
134 {
135 ++i->line;
136 i->string = *(i->strings++);
137 return *i->string++;
138 }
139 }
140 else
141 {
142 /* If necessary, open the file. */
143 if ( !i->file )
144 {
145 FILE * f = stdin;
146 if ( strcmp( object_str( i->fname ), "-" ) && !( f = fopen( object_str( i->fname ), "r" ) ) )
147 perror( object_str( i->fname ) );
148 i->file = f;
149 }
150
151 /* If there is another line in this file, start it. */
152 if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
153 {
154 ++i->line;
155 i->string = i->buf;
156 return *i->string++;
157 }
158 }
159
160 /* This include is done. Free it up and return EOF so yyparse() returns to
161 * parse_file().
162 */
163
164 incp = i->next;
165
166 /* Close file, free name. */
167 if ( i->file && ( i->file != stdin ) )
168 fclose( i->file );
169 object_free( i->fname );
170 BJAM_FREE( (char *)i );
171
172 return EOF;
173 }
174
175
176 /*
177 * yylex() - set yylval to current token; return its type.
178 *
179 * Macros to move things along:
180 *
181 * yychar() - return and advance character; invalid after EOF.
182 * yyprev() - back up one character; invalid before yychar().
183 *
184 * yychar() returns a continuous stream of characters, until it hits the EOF of
185 * the current include file.
186 */
187
188 #define yychar() ( *incp->string ? *incp->string++ : yyline() )
189 #define yyprev() ( incp->string-- )
190
yylex()191 int yylex()
192 {
193 int c;
194 char buf[ BIGGEST_TOKEN ];
195 char * b = buf;
196
197 if ( !incp )
198 goto eof;
199
200 /* Get first character (whitespace or of token). */
201 c = yychar();
202
203 if ( scanmode == SCAN_STRING )
204 {
205 /* If scanning for a string (action's {}'s), look for the closing brace.
206 * We handle matching braces, if they match.
207 */
208
209 int nest = 1;
210
211 while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) )
212 {
213 if ( c == '{' )
214 ++nest;
215
216 if ( ( c == '}' ) && !--nest )
217 break;
218
219 *b++ = c;
220
221 c = yychar();
222
223 /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */
224 if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) )
225 --b;
226 }
227
228 /* We ate the ending brace -- regurgitate it. */
229 if ( c != EOF )
230 yyprev();
231
232 /* Check for obvious errors. */
233 if ( b == buf + sizeof( buf ) )
234 {
235 yyerror( "action block too big" );
236 goto eof;
237 }
238
239 if ( nest )
240 {
241 yyerror( "unmatched {} in action block" );
242 goto eof;
243 }
244
245 *b = 0;
246 yylval.type = STRING;
247 yylval.string = object_new( buf );
248 yylval.file = incp->fname;
249 yylval.line = incp->line;
250 }
251 else
252 {
253 char * b = buf;
254 struct keyword * k;
255 int inquote = 0;
256 int notkeyword;
257
258 /* Eat white space. */
259 for ( ; ; )
260 {
261 /* Skip past white space. */
262 while ( ( c != EOF ) && isspace( c ) )
263 c = yychar();
264
265 /* Not a comment? */
266 if ( c != '#' )
267 break;
268
269 /* Swallow up comment line. */
270 while ( ( ( c = yychar() ) != EOF ) && ( c != '\n' ) ) ;
271 }
272
273 /* c now points to the first character of a token. */
274 if ( c == EOF )
275 goto eof;
276
277 yylval.file = incp->fname;
278 yylval.line = incp->line;
279
280 /* While scanning the word, disqualify it for (expensive) keyword lookup
281 * when we can: $anything, "anything", \anything
282 */
283 notkeyword = c == '$';
284
285 /* Look for white space to delimit word. "'s get stripped but preserve
286 * white space. \ protects next character.
287 */
288 while
289 (
290 ( c != EOF ) &&
291 ( b < buf + sizeof( buf ) ) &&
292 ( inquote || !isspace( c ) )
293 )
294 {
295 if ( c == '"' )
296 {
297 /* begin or end " */
298 inquote = !inquote;
299 notkeyword = 1;
300 }
301 else if ( c != '\\' )
302 {
303 /* normal char */
304 *b++ = c;
305 }
306 else if ( ( c = yychar() ) != EOF )
307 {
308 /* \c */
309 if (c == 'n')
310 c = '\n';
311 else if (c == 'r')
312 c = '\r';
313 else if (c == 't')
314 c = '\t';
315 *b++ = c;
316 notkeyword = 1;
317 }
318 else
319 {
320 /* \EOF */
321 break;
322 }
323
324 c = yychar();
325 }
326
327 /* Check obvious errors. */
328 if ( b == buf + sizeof( buf ) )
329 {
330 yyerror( "string too big" );
331 goto eof;
332 }
333
334 if ( inquote )
335 {
336 yyerror( "unmatched \" in string" );
337 goto eof;
338 }
339
340 /* We looked ahead a character - back up. */
341 if ( c != EOF )
342 yyprev();
343
344 /* Scan token table. Do not scan if it is obviously not a keyword or if
345 * it is an alphabetic when were looking for punctuation.
346 */
347
348 *b = 0;
349 yylval.type = ARG;
350
351 if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT ) ) )
352 for ( k = keywords; k->word; ++k )
353 if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) )
354 {
355 yylval.type = k->type;
356 yylval.keyword = k->word; /* used by symdump */
357 break;
358 }
359
360 if ( yylval.type == ARG )
361 yylval.string = object_new( buf );
362 }
363
364 if ( DEBUG_SCAN )
365 out_printf( "scan %s\n", symdump( &yylval ) );
366
367 return yylval.type;
368
369 eof:
370 /* We do not reset yylval.file & yylval.line here so unexpected EOF error
371 * messages would include correct error location information.
372 */
373 yylval.type = EOF;
374 return yylval.type;
375 }
376
377
symdump(YYSTYPE * s)378 static char * symdump( YYSTYPE * s )
379 {
380 static char buf[ BIGGEST_TOKEN + 20 ];
381 switch ( s->type )
382 {
383 case EOF : sprintf( buf, "EOF" ); break;
384 case 0 : sprintf( buf, "unknown symbol %s", object_str( s->string ) ); break;
385 case ARG : sprintf( buf, "argument %s" , object_str( s->string ) ); break;
386 case STRING: sprintf( buf, "string \"%s\"" , object_str( s->string ) ); break;
387 default : sprintf( buf, "keyword %s" , s->keyword ); break;
388 }
389 return buf;
390 }
391
392
393 /*
394 * Get information about the current file and line, for those epsilon
395 * transitions that produce a parse.
396 */
397
yyinput_last_read_token(OBJECT ** name,int * line)398 void yyinput_last_read_token( OBJECT * * name, int * line )
399 {
400 /* TODO: Consider whether and when we might want to report where the last
401 * read token ended, e.g. EOF errors inside string literals.
402 */
403 *name = yylval.file;
404 *line = yylval.line;
405 }
406