1 /*
2  * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
3  *
4  * This file is part of Jam - see jam.c for Copyright information.
5  */
6 
7 /*
8  * scan.c - the jam yacc scanner
9  *
10  */
11 
12 #include "jam.h"
13 #include "scan.h"
14 #include "output.h"
15 
16 #include "constants.h"
17 #include "jambase.h"
18 #include "jamgram.h"
19 
20 
21 struct keyword
22 {
23     char * word;
24     int    type;
25 } keywords[] =
26 {
27 #include "jamgramtab.h"
28     { 0, 0 }
29 };
30 
31 typedef struct include include;
32 struct include
33 {
34     include   * next;        /* next serial include file */
35     char      * string;      /* pointer into current line */
36     char    * * strings;     /* for yyfparse() -- text to parse */
37     FILE      * file;        /* for yyfparse() -- file being read */
38     OBJECT    * fname;       /* for yyfparse() -- file name */
39     int         line;        /* line counter for error messages */
40     char        buf[ 512 ];  /* for yyfparse() -- line buffer */
41 };
42 
43 static include * incp = 0;  /* current file; head of chain */
44 
45 static int scanmode = SCAN_NORMAL;
46 static int anyerrors = 0;
47 
48 
49 static char * symdump( YYSTYPE * );
50 
51 #define BIGGEST_TOKEN 10240  /* no single token can be larger */
52 
53 
54 /*
55  * Set parser mode: normal, string, or keyword.
56  */
57 
yymode(int n)58 void yymode( int n )
59 {
60     scanmode = n;
61 }
62 
63 
yyerror(char const * s)64 void yyerror( char const * s )
65 {
66     /* We use yylval instead of incp to access the error location information as
67      * the incp pointer will already be reset to 0 in case the error occurred at
68      * EOF.
69      *
70      * The two may differ only if ran into an unexpected EOF or we get an error
71      * while reading a lexical token spanning multiple lines, e.g. a multi-line
72      * string literal or action body, in which case yylval location information
73      * will hold the information about where the token started while incp will
74      * hold the information about where reading it broke.
75      */
76     out_printf( "%s:%d: %s at %s\n", object_str( yylval.file ), yylval.line, s,
77             symdump( &yylval ) );
78     ++anyerrors;
79 }
80 
81 
yyanyerrors()82 int yyanyerrors()
83 {
84     return anyerrors != 0;
85 }
86 
87 
yyfparse(OBJECT * s)88 void yyfparse( OBJECT * s )
89 {
90     include * i = (include *)BJAM_MALLOC( sizeof( *i ) );
91 
92     /* Push this onto the incp chain. */
93     i->string = "";
94     i->strings = 0;
95     i->file = 0;
96     i->fname = object_copy( s );
97     i->line = 0;
98     i->next = incp;
99     incp = i;
100 
101     /* If the filename is "+", it means use the internal jambase. */
102     if ( !strcmp( object_str( s ), "+" ) )
103         i->strings = jambase;
104 }
105 
106 
107 /*
108  * yyline() - read new line and return first character.
109  *
110  * Fabricates a continuous stream of characters across include files, returning
111  * EOF at the bitter end.
112  */
113 
yyline()114 int yyline()
115 {
116     include * const i = incp;
117 
118     if ( !incp )
119         return EOF;
120 
121     /* Once we start reading from the input stream, we reset the include
122      * insertion point so that the next include file becomes the head of the
123      * list.
124      */
125 
126     /* If there is more data in this line, return it. */
127     if ( *i->string )
128         return *i->string++;
129 
130     /* If we are reading from an internal string list, go to the next string. */
131     if ( i->strings )
132     {
133         if ( *i->strings )
134         {
135             ++i->line;
136             i->string = *(i->strings++);
137             return *i->string++;
138         }
139     }
140     else
141     {
142         /* If necessary, open the file. */
143         if ( !i->file )
144         {
145             FILE * f = stdin;
146             if ( strcmp( object_str( i->fname ), "-" ) && !( f = fopen( object_str( i->fname ), "r" ) ) )
147                 perror( object_str( i->fname ) );
148             i->file = f;
149         }
150 
151         /* If there is another line in this file, start it. */
152         if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
153         {
154             ++i->line;
155             i->string = i->buf;
156             return *i->string++;
157         }
158     }
159 
160     /* This include is done. Free it up and return EOF so yyparse() returns to
161      * parse_file().
162      */
163 
164     incp = i->next;
165 
166     /* Close file, free name. */
167     if ( i->file && ( i->file != stdin ) )
168         fclose( i->file );
169     object_free( i->fname );
170     BJAM_FREE( (char *)i );
171 
172     return EOF;
173 }
174 
175 
176 /*
177  * yylex() - set yylval to current token; return its type.
178  *
179  * Macros to move things along:
180  *
181  *  yychar() - return and advance character; invalid after EOF.
182  *  yyprev() - back up one character; invalid before yychar().
183  *
184  * yychar() returns a continuous stream of characters, until it hits the EOF of
185  * the current include file.
186  */
187 
188 #define yychar() ( *incp->string ? *incp->string++ : yyline() )
189 #define yyprev() ( incp->string-- )
190 
yylex()191 int yylex()
192 {
193     int c;
194     char buf[ BIGGEST_TOKEN ];
195     char * b = buf;
196 
197     if ( !incp )
198         goto eof;
199 
200     /* Get first character (whitespace or of token). */
201     c = yychar();
202 
203     if ( scanmode == SCAN_STRING )
204     {
205         /* If scanning for a string (action's {}'s), look for the closing brace.
206          * We handle matching braces, if they match.
207          */
208 
209         int nest = 1;
210 
211         while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) )
212         {
213             if ( c == '{' )
214                 ++nest;
215 
216             if ( ( c == '}' ) && !--nest )
217                 break;
218 
219             *b++ = c;
220 
221             c = yychar();
222 
223             /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */
224             if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) )
225                 --b;
226         }
227 
228         /* We ate the ending brace -- regurgitate it. */
229         if ( c != EOF )
230             yyprev();
231 
232         /* Check for obvious errors. */
233         if ( b == buf + sizeof( buf ) )
234         {
235             yyerror( "action block too big" );
236             goto eof;
237         }
238 
239         if ( nest )
240         {
241             yyerror( "unmatched {} in action block" );
242             goto eof;
243         }
244 
245         *b = 0;
246         yylval.type = STRING;
247         yylval.string = object_new( buf );
248         yylval.file = incp->fname;
249         yylval.line = incp->line;
250     }
251     else
252     {
253         char * b = buf;
254         struct keyword * k;
255         int inquote = 0;
256         int notkeyword;
257 
258         /* Eat white space. */
259         for ( ; ; )
260         {
261             /* Skip past white space. */
262             while ( ( c != EOF ) && isspace( c ) )
263                 c = yychar();
264 
265             /* Not a comment? */
266             if ( c != '#' )
267                 break;
268 
269             /* Swallow up comment line. */
270             while ( ( ( c = yychar() ) != EOF ) && ( c != '\n' ) ) ;
271         }
272 
273         /* c now points to the first character of a token. */
274         if ( c == EOF )
275             goto eof;
276 
277         yylval.file = incp->fname;
278         yylval.line = incp->line;
279 
280         /* While scanning the word, disqualify it for (expensive) keyword lookup
281          * when we can: $anything, "anything", \anything
282          */
283         notkeyword = c == '$';
284 
285         /* Look for white space to delimit word. "'s get stripped but preserve
286          * white space. \ protects next character.
287          */
288         while
289         (
290             ( c != EOF ) &&
291             ( b < buf + sizeof( buf ) ) &&
292             ( inquote || !isspace( c ) )
293         )
294         {
295             if ( c == '"' )
296             {
297                 /* begin or end " */
298                 inquote = !inquote;
299                 notkeyword = 1;
300             }
301             else if ( c != '\\' )
302             {
303                 /* normal char */
304                 *b++ = c;
305             }
306             else if ( ( c = yychar() ) != EOF )
307             {
308                 /* \c */
309                 if (c == 'n')
310                     c = '\n';
311                 else if (c == 'r')
312                     c = '\r';
313                 else if (c == 't')
314                     c = '\t';
315                 *b++ = c;
316                 notkeyword = 1;
317             }
318             else
319             {
320                 /* \EOF */
321                 break;
322             }
323 
324             c = yychar();
325         }
326 
327         /* Check obvious errors. */
328         if ( b == buf + sizeof( buf ) )
329         {
330             yyerror( "string too big" );
331             goto eof;
332         }
333 
334         if ( inquote )
335         {
336             yyerror( "unmatched \" in string" );
337             goto eof;
338         }
339 
340         /* We looked ahead a character - back up. */
341         if ( c != EOF )
342             yyprev();
343 
344         /* Scan token table. Do not scan if it is obviously not a keyword or if
345          * it is an alphabetic when were looking for punctuation.
346          */
347 
348         *b = 0;
349         yylval.type = ARG;
350 
351         if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT ) ) )
352             for ( k = keywords; k->word; ++k )
353                 if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) )
354                 {
355                     yylval.type = k->type;
356                     yylval.keyword = k->word;  /* used by symdump */
357                     break;
358                 }
359 
360         if ( yylval.type == ARG )
361             yylval.string = object_new( buf );
362     }
363 
364     if ( DEBUG_SCAN )
365         out_printf( "scan %s\n", symdump( &yylval ) );
366 
367     return yylval.type;
368 
369 eof:
370     /* We do not reset yylval.file & yylval.line here so unexpected EOF error
371      * messages would include correct error location information.
372      */
373     yylval.type = EOF;
374     return yylval.type;
375 }
376 
377 
symdump(YYSTYPE * s)378 static char * symdump( YYSTYPE * s )
379 {
380     static char buf[ BIGGEST_TOKEN + 20 ];
381     switch ( s->type )
382     {
383         case EOF   : sprintf( buf, "EOF"                                        ); break;
384         case 0     : sprintf( buf, "unknown symbol %s", object_str( s->string ) ); break;
385         case ARG   : sprintf( buf, "argument %s"      , object_str( s->string ) ); break;
386         case STRING: sprintf( buf, "string \"%s\""    , object_str( s->string ) ); break;
387         default    : sprintf( buf, "keyword %s"       , s->keyword              ); break;
388     }
389     return buf;
390 }
391 
392 
393 /*
394  * Get information about the current file and line, for those epsilon
395  * transitions that produce a parse.
396  */
397 
yyinput_last_read_token(OBJECT ** name,int * line)398 void yyinput_last_read_token( OBJECT * * name, int * line )
399 {
400     /* TODO: Consider whether and when we might want to report where the last
401      * read token ended, e.g. EOF errors inside string literals.
402      */
403     *name = yylval.file;
404     *line = yylval.line;
405 }
406