1 /*
2  * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
3  *
4  * This file is part of Jam - see jam.c for Copyright information.
5  */
6 
7 /*
8  * scan.c - the jam yacc scanner
9  *
10  */
11 
12 #include "jam.h"
13 #include "scan.h"
14 #include "output.h"
15 
16 #include "constants.h"
17 #include "jamgram.hpp"
18 
19 
20 struct keyword
21 {
22     const char * word;
23     int    type;
24 } keywords[] =
25 {
26 #include "jamgramtab.h"
27     { 0, 0 }
28 };
29 
30 typedef struct include include;
31 struct include
32 {
33     include   * next;        /* next serial include file */
34     char      * string;      /* pointer into current line */
35     char    * * strings;     /* for yyfparse() -- text to parse */
36     LISTITER    pos;         /* for yysparse() -- text to parse */
37     LIST      * list;        /* for yysparse() -- text to parse */
38     FILE      * file;        /* for yyfparse() -- file being read */
39     OBJECT    * fname;       /* for yyfparse() -- file name */
40     int         line;        /* line counter for error messages */
41     char        buf[ 512 ];  /* for yyfparse() -- line buffer */
42 };
43 
44 static include * incp = 0;  /* current file; head of chain */
45 
46 static int scanmode = SCAN_NORMAL;
47 static int anyerrors = 0;
48 
49 
50 static char * symdump( YYSTYPE * );
51 
52 #define BIGGEST_TOKEN 10240  /* no single token can be larger */
53 
54 
55 /*
56  * Set parser mode: normal, string, or keyword.
57  */
58 
yymode(int n)59 int yymode( int n )
60 {
61     int result = scanmode;
62     scanmode = n;
63     return result;
64 }
65 
66 
yyerror(char const * s)67 void yyerror( char const * s )
68 {
69     /* We use yylval instead of incp to access the error location information as
70      * the incp pointer will already be reset to 0 in case the error occurred at
71      * EOF.
72      *
73      * The two may differ only if ran into an unexpected EOF or we get an error
74      * while reading a lexical token spanning multiple lines, e.g. a multi-line
75      * string literal or action body, in which case yylval location information
76      * will hold the information about where the token started while incp will
77      * hold the information about where reading it broke.
78      */
79     out_printf( "%s:%d: %s at %s\n", object_str( yylval.file ), yylval.line, s,
80             symdump( &yylval ) );
81     ++anyerrors;
82 }
83 
84 
yyanyerrors()85 int yyanyerrors()
86 {
87     return anyerrors != 0;
88 }
89 
90 
yyfparse(OBJECT * s)91 void yyfparse( OBJECT * s )
92 {
93     include * i = (include *)BJAM_MALLOC( sizeof( *i ) );
94 
95     /* Push this onto the incp chain. */
96     i->string = (char*)"";
97     i->strings = 0;
98     i->file = 0;
99     i->fname = object_copy( s );
100     i->line = 0;
101     i->next = incp;
102     incp = i;
103 }
104 
105 
yysparse(OBJECT * name,const char ** lines)106 void yysparse( OBJECT * name, const char * * lines )
107 {
108     yyfparse( name );
109     incp->strings = (char * *)lines;
110 }
111 
112 
113 /*
114  * yyfdone() - cleanup after we're done parsing a file.
115  */
yyfdone(void)116 void yyfdone( void )
117 {
118     include * const i = incp;
119     incp = i->next;
120 
121     /* Close file, free name. */
122     if(i->file && (i->file != stdin))
123         fclose(i->file);
124     object_free(i->fname);
125     BJAM_FREE((char *)i);
126 }
127 
128 
129 /*
130  * yyline() - read new line and return first character.
131  *
132  * Fabricates a continuous stream of characters across include files, returning
133  * EOF at the bitter end.
134  */
135 
yyline()136 int yyline()
137 {
138     include * const i = incp;
139 
140     if ( !incp )
141         return EOF;
142 
143     /* Once we start reading from the input stream, we reset the include
144      * insertion point so that the next include file becomes the head of the
145      * list.
146      */
147 
148     /* If there is more data in this line, return it. */
149     if ( *i->string )
150         return *i->string++;
151 
152     /* If we are reading from an internal string list, go to the next string. */
153     if ( i->strings )
154     {
155         if ( *i->strings )
156         {
157             ++i->line;
158             i->string = *(i->strings++);
159             return *i->string++;
160         }
161     }
162     else
163     {
164         /* If necessary, open the file. */
165         if ( !i->file )
166         {
167             FILE * f = stdin;
168             if ( strcmp( object_str( i->fname ), "-" ) && !( f = fopen( object_str( i->fname ), "r" ) ) )
169                 perror( object_str( i->fname ) );
170             i->file = f;
171         }
172 
173         /* If there is another line in this file, start it. */
174         if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
175         {
176             ++i->line;
177             i->string = i->buf;
178             return *i->string++;
179         }
180     }
181 
182     /* This include is done. Return EOF so yyparse() returns to
183      * parse_file().
184      */
185 
186     return EOF;
187 }
188 
189 /* This allows us to get an extra character of lookahead.
190  * There are a few places where we need to look ahead two
191  * characters and yyprev only guarantees a single character
192  * of putback.
193  */
yypeek()194 int yypeek()
195 {
196     if ( *incp->string )
197     {
198         return *incp->string;
199     }
200     else if ( incp->strings )
201     {
202         if ( *incp->strings )
203             return **incp->strings;
204     }
205     else if ( incp->file )
206     {
207         /* Don't bother opening the file.  yypeek is
208          * only used in special cases and never at the
209          * beginning of a file.
210          */
211         int ch = fgetc( incp->file );
212         if ( ch != EOF )
213             ungetc( ch, incp->file );
214         return ch;
215     }
216     return EOF;
217 }
218 
219 /*
220  * yylex() - set yylval to current token; return its type.
221  *
222  * Macros to move things along:
223  *
224  *  yychar() - return and advance character; invalid after EOF.
225  *  yyprev() - back up one character; invalid before yychar().
226  *
227  * yychar() returns a continuous stream of characters, until it hits the EOF of
228  * the current include file.
229  */
230 
231 #define yychar() ( *incp->string ? *incp->string++ : yyline() )
232 #define yyprev() ( incp->string-- )
233 
234 static int use_new_scanner = 0;
235 
236 #define yystartkeyword() if(use_new_scanner) break; else token_warning()
237 #define yyendkeyword() if(use_new_scanner) break; else if ( 1 ) { expect_whitespace = 1; continue; } else (void)0
238 
do_token_warning()239 void do_token_warning()
240 {
241     out_printf( "%s:%d: %s %s\n", object_str( yylval.file ), yylval.line, "Unescaped special character in",
242             symdump( &yylval ) );
243 }
244 
245 #define token_warning() has_token_warning = 1
246 
yylex()247 int yylex()
248 {
249     int c;
250     char buf[ BIGGEST_TOKEN ];
251     char * b = buf;
252 
253     if ( !incp )
254         goto eof;
255 
256     /* Get first character (whitespace or of token). */
257     c = yychar();
258 
259     if ( scanmode == SCAN_STRING )
260     {
261         /* If scanning for a string (action's {}'s), look for the closing brace.
262          * We handle matching braces, if they match.
263          */
264 
265         int nest = 1;
266 
267         while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) )
268         {
269             if ( c == '{' )
270                 ++nest;
271 
272             if ( ( c == '}' ) && !--nest )
273                 break;
274 
275             *b++ = c;
276 
277             c = yychar();
278 
279             /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */
280             if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) )
281                 --b;
282         }
283 
284         /* We ate the ending brace -- regurgitate it. */
285         if ( c != EOF )
286             yyprev();
287 
288         /* Check for obvious errors. */
289         if ( b == buf + sizeof( buf ) )
290         {
291             yyerror( "action block too big" );
292             goto eof;
293         }
294 
295         if ( nest )
296         {
297             yyerror( "unmatched {} in action block" );
298             goto eof;
299         }
300 
301         *b = 0;
302         yylval.type = STRING;
303         yylval.string = object_new( buf );
304         yylval.file = incp->fname;
305         yylval.line = incp->line;
306     }
307     else
308     {
309         char * b = buf;
310         struct keyword * k;
311         int inquote = 0;
312         int notkeyword;
313         int hastoken = 0;
314         int hasquote = 0;
315         int ingrist = 0;
316         int invarexpand = 0;
317         int expect_whitespace = 0;
318         int has_token_warning = 0;
319 
320         /* Eat white space. */
321         for ( ; ; )
322         {
323             /* Skip past white space. */
324             while ( ( c != EOF ) && isspace( c ) )
325                 c = yychar();
326 
327             /* Not a comment? */
328             if ( c != '#' )
329                 break;
330 
331             c = yychar();
332             if ( ( c != EOF ) && c == '|' )
333             {
334                 /* Swallow up block comment. */
335                 int c0 = yychar();
336                 int c1 = yychar();
337                 while ( ! ( c0 == '|' && c1 == '#' ) && ( c0 != EOF && c1 != EOF ) )
338                 {
339                     c0 = c1;
340                     c1 = yychar();
341                 }
342                 c = yychar();
343             }
344             else
345             {
346                 /* Swallow up comment line. */
347                 while ( ( c != EOF ) && ( c != '\n' ) ) c = yychar();
348             }
349         }
350 
351         /* c now points to the first character of a token. */
352         if ( c == EOF )
353             goto eof;
354 
355         yylval.file = incp->fname;
356         yylval.line = incp->line;
357 
358         /* While scanning the word, disqualify it for (expensive) keyword lookup
359          * when we can: $anything, "anything", \anything
360          */
361         notkeyword = c == '$';
362 
363         /* Look for white space to delimit word. "'s get stripped but preserve
364          * white space. \ protects next character.
365          */
366         while
367         (
368             ( c != EOF ) &&
369             ( b < buf + sizeof( buf ) ) &&
370             ( inquote || invarexpand || !isspace( c ) )
371         )
372         {
373             if ( expect_whitespace || ( isspace( c ) && ! inquote ) )
374             {
375                 token_warning();
376                 expect_whitespace = 0;
377             }
378             if ( !inquote && !invarexpand )
379             {
380                 if ( scanmode == SCAN_COND || scanmode == SCAN_CONDB )
381                 {
382                     if ( hastoken && ( c == '=' || c == '<' || c == '>' || c == '!' || c == '(' || c == ')' || c == '&' || c == '|' ) )
383                     {
384                         /* Don't treat > as special if we started with a grist. */
385                         if ( ! ( scanmode == SCAN_CONDB && ingrist == 1 && c == '>' ) )
386                         {
387                             yystartkeyword();
388                         }
389                     }
390                     else if ( c == '=' || c == '(' || c == ')' )
391                     {
392                         *b++ = c;
393                         c = yychar();
394                         yyendkeyword();
395                     }
396                     else if ( c == '!' || ( scanmode == SCAN_COND && ( c == '<' || c == '>' ) ) )
397                     {
398                         *b++ = c;
399                         if ( ( c = yychar() ) == '=' )
400                         {
401                             *b++ = c;
402                             c = yychar();
403                         }
404                         yyendkeyword();
405                     }
406                     else if ( c == '&' || c == '|' )
407                     {
408                         *b++ = c;
409                         if ( yychar() == c )
410                         {
411                             *b++ = c;
412                             c = yychar();
413                         }
414                         yyendkeyword();
415                     }
416                 }
417                 else if ( scanmode == SCAN_PARAMS )
418                 {
419                     if ( c == '*' || c == '+' || c == '?' || c == '(' || c == ')' )
420                     {
421                         if ( !hastoken )
422                         {
423                             *b++ = c;
424                             c = yychar();
425                             yyendkeyword();
426                         }
427                         else
428                         {
429                             yystartkeyword();
430                         }
431                     }
432                 }
433                 else if ( scanmode == SCAN_XASSIGN && ! hastoken )
434                 {
435                     if ( c == '=' )
436                     {
437                         *b++ = c;
438                         c = yychar();
439                         yyendkeyword();
440                     }
441                     else if ( c == '+' || c == '?' )
442                     {
443                         if ( yypeek() == '=' )
444                         {
445                             *b++ = c;
446                             *b++ = yychar();
447                             c = yychar();
448                             yyendkeyword();
449                         }
450                     }
451                 }
452                 else if ( scanmode == SCAN_NORMAL || scanmode == SCAN_ASSIGN )
453                 {
454                     if ( c == '=' )
455                     {
456                         if ( !hastoken )
457                         {
458                             *b++ = c;
459                             c = yychar();
460                             yyendkeyword();
461                         }
462                         else
463                         {
464                             yystartkeyword();
465                         }
466                     }
467                     else if ( c == '+' || c == '?' )
468                     {
469                         if ( yypeek() == '=' )
470                         {
471                             if ( hastoken )
472                             {
473                                 yystartkeyword();
474                             }
475                             else
476                             {
477                                 *b++ = c;
478                                 *b++ = yychar();
479                                 c = yychar();
480                                 yyendkeyword();
481                             }
482                         }
483                     }
484                 }
485                 if ( scanmode != SCAN_CASE && ( c == ';' || c == '{' || c == '}' ||
486                     ( scanmode != SCAN_PARAMS && ( c == '[' || c == ']' ) ) ) )
487                 {
488                     if ( ! hastoken )
489                     {
490                         *b++ = c;
491                         c = yychar();
492                         yyendkeyword();
493                     }
494                     else
495                     {
496                         yystartkeyword();
497                     }
498                 }
499                 else if ( c == ':' )
500                 {
501                     if ( ! hastoken )
502                     {
503                         *b++ = c;
504                         c = yychar();
505                         yyendkeyword();
506                         break;
507                     }
508                     else if ( hasquote )
509                     {
510                         /* Special rules for ':' do not apply after we quote anything. */
511                         yystartkeyword();
512                     }
513                     else if ( ingrist == 0 )
514                     {
515                         int next = yychar();
516                         int is_win_path = 0;
517                         int is_conditional = 0;
518                         if ( next == '\\' )
519                         {
520                             if( yypeek() == '\\' )
521                             {
522                                 is_win_path = 1;
523                             }
524                         }
525                         else if ( next == '/' )
526                         {
527                             is_win_path = 1;
528                         }
529                         yyprev();
530                         if ( is_win_path )
531                         {
532                             /* Accept windows paths iff they are at the start or immediately follow a grist. */
533                             if ( b > buf && isalpha( b[ -1 ] ) && ( b == buf + 1 || b[ -2 ] == '>' ) )
534                             {
535                                 is_win_path = 1;
536                             }
537                             else
538                             {
539                                 is_win_path = 0;
540                             }
541                         }
542                         if ( next == '<' )
543                         {
544                             /* Accept conditionals only for tokens that start with "<" or "!<" */
545                             if ( ( (b > buf) && (buf[ 0 ] == '<') ) ||
546                                 ( (b > (buf + 1)) && (buf[ 0 ] == '!') && (buf[ 1 ] == '<') ))
547                             {
548                                 is_conditional = 1;
549                             }
550                         }
551                         if ( !is_conditional && !is_win_path )
552                         {
553                             yystartkeyword();
554                         }
555                     }
556                 }
557             }
558             hastoken = 1;
559             if ( c == '"' )
560             {
561                 /* begin or end " */
562                 inquote = !inquote;
563                 hasquote = 1;
564                 notkeyword = 1;
565             }
566             else if ( c != '\\' )
567             {
568                 if ( !invarexpand && c == '<' )
569                 {
570                     if ( ingrist == 0 ) ingrist = 1;
571                     else ingrist = -1;
572                 }
573                 else if ( !invarexpand && c == '>' )
574                 {
575                     if ( ingrist == 1 ) ingrist = 0;
576                     else ingrist = -1;
577                 }
578                 else if ( c == '$' )
579                 {
580                     if ( ( c = yychar() ) == EOF )
581                     {
582                         *b++ = '$';
583                         break;
584                     }
585                     else if ( c == '(' )
586                     {
587                         /* inside $(), we only care about quotes */
588                         *b++ = '$';
589                         c = '(';
590                         ++invarexpand;
591                     }
592                     else
593                     {
594                         c = '$';
595                         yyprev();
596                     }
597                 }
598                 else if ( c == '@' )
599                 {
600                     if ( ( c = yychar() ) == EOF )
601                     {
602                         *b++ = '@';
603                         break;
604                     }
605                     else if ( c == '(' )
606                     {
607                         /* inside @(), we only care about quotes */
608                         *b++ = '@';
609                         c = '(';
610                         ++invarexpand;
611                     }
612                     else
613                     {
614                         c = '@';
615                         yyprev();
616                     }
617                 }
618                 else if ( invarexpand && c == '(' )
619                 {
620                     ++invarexpand;
621                 }
622                 else if ( invarexpand && c == ')' )
623                 {
624                     --invarexpand;
625                 }
626                 /* normal char */
627                 *b++ = c;
628             }
629             else if ( ( c = yychar() ) != EOF )
630             {
631                 /* \c */
632                 if (c == 'n')
633                     c = '\n';
634                 else if (c == 'r')
635                     c = '\r';
636                 else if (c == 't')
637                     c = '\t';
638                 *b++ = c;
639                 notkeyword = 1;
640             }
641             else
642             {
643                 /* \EOF */
644                 break;
645             }
646 
647             c = yychar();
648         }
649 
650         /* Automatically switch modes after reading the token. */
651         if ( scanmode == SCAN_CONDB )
652             scanmode = SCAN_COND;
653 
654         /* Check obvious errors. */
655         if ( b == buf + sizeof( buf ) )
656         {
657             yyerror( "string too big" );
658             goto eof;
659         }
660 
661         if ( inquote )
662         {
663             yyerror( "unmatched \" in string" );
664             goto eof;
665         }
666 
667         /* We looked ahead a character - back up. */
668         if ( c != EOF )
669             yyprev();
670 
671         /* Scan token table. Do not scan if it is obviously not a keyword or if
672          * it is an alphabetic when were looking for punctuation.
673          */
674 
675         *b = 0;
676         yylval.type = ARG;
677 
678         if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT || scanmode == SCAN_PARAMS || scanmode == SCAN_ASSIGN ) ) )
679             for ( k = keywords; k->word; ++k )
680                 if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) )
681                 {
682                     yylval.type = k->type;
683                     yylval.keyword = k->word;  /* used by symdump */
684                     break;
685                 }
686 
687         if ( yylval.type == ARG )
688             yylval.string = object_new( buf );
689 
690         if ( scanmode == SCAN_NORMAL && yylval.type == ARG )
691             scanmode = SCAN_XASSIGN;
692 
693         if ( has_token_warning )
694             do_token_warning();
695     }
696 
697     if ( DEBUG_SCAN )
698         out_printf( "scan %s\n", symdump( &yylval ) );
699 
700     return yylval.type;
701 
702 eof:
703     /* We do not reset yylval.file & yylval.line here so unexpected EOF error
704      * messages would include correct error location information.
705      */
706     yylval.type = EOF;
707     return yylval.type;
708 }
709 
710 
symdump(YYSTYPE * s)711 static char * symdump( YYSTYPE * s )
712 {
713     static char buf[ BIGGEST_TOKEN + 20 ];
714     switch ( s->type )
715     {
716         case EOF   : sprintf( buf, "EOF"                                        ); break;
717         case 0     : sprintf( buf, "unknown symbol %s", object_str( s->string ) ); break;
718         case ARG   : sprintf( buf, "argument %s"      , object_str( s->string ) ); break;
719         case STRING: sprintf( buf, "string \"%s\""    , object_str( s->string ) ); break;
720         default    : sprintf( buf, "keyword %s"       , s->keyword              ); break;
721     }
722     return buf;
723 }
724 
725 
726 /*
727  * Get information about the current file and line, for those epsilon
728  * transitions that produce a parse.
729  */
730 
yyinput_last_read_token(OBJECT ** name,int * line)731 void yyinput_last_read_token( OBJECT * * name, int * line )
732 {
733     /* TODO: Consider whether and when we might want to report where the last
734      * read token ended, e.g. EOF errors inside string literals.
735      */
736     *name = yylval.file;
737     *line = yylval.line;
738 }
739