1 /* parse.y - parser for flex input */
2
3 %token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP
4
5 %{
6 /*-
7 * Copyright (c) 1990 The Regents of the University of California.
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * Vern Paxson.
12 *
13 * The United States Government has rights in this work pursuant
14 * to contract no. DE-AC03-76SF00098 between the United States
15 * Department of Energy and the University of California.
16 *
17 * Redistribution and use in source and binary forms are permitted provided
18 * that: (1) source distributions retain this entire copyright notice and
19 * comment, and (2) distributions including binaries display the following
20 * acknowledgement: ``This product includes software developed by the
21 * University of California, Berkeley and its contributors'' in the
22 * documentation or other materials provided with the distribution and in
23 * all advertising materials mentioning features or use of this software.
24 * Neither the name of the University nor the names of its contributors may
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
28 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
29 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
30 */
31
32 /* $Header: /home/daffy/u0/vern/flex/RCS/parse.y,v 2.15 93/12/09 13:57:23 vern Exp $ */
33
34
35 /* Some versions of bison are broken in that they use alloca() but don't
36 * declare it properly. The following is the patented (just kidding!)
37 * #ifdef chud to fix the problem, courtesy of Francois Pinard.
38 */
39 #ifdef YYBISON
40 /* AIX requires this to be the first thing in the file. */
41 #ifdef __GNUC__
42 #define alloca __builtin_alloca
43 #else /* not __GNUC__ */
44 #if HAVE_ALLOCA_H
45 #include <alloca.h>
46 #else /* not HAVE_ALLOCA_H */
47 #ifdef _AIX
48 #pragma alloca
49 #else /* not _AIX */
50 char *alloca ();
51 #endif /* not _AIX */
52 #endif /* not HAVE_ALLOCA_H */
53 #endif /* not __GNUC__ */
54 #endif /* YYBISON */
55
56 /* Bletch, ^^^^ that was ugly! */
57
58
59 #include "flexdef.h"
60
61 int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen;
62 int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule;
63 int *active_ss;
64 Char clower();
65 void build_eof_action();
66 void yyerror();
67
68 static int madeany = false; /* whether we've made the '.' character class */
69 int previous_continued_action; /* whether the previous rule's action was '|' */
70
71 /* On some over-ambitious machines, such as DEC Alpha's, the default
72 * token type is "long" instead of "int"; this leads to problems with
73 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
74 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
75 * following should ensure that the default token type is "int".
76 */
77 #define YYSTYPE int
78
79 %}
80
81 %%
82 goal : initlex sect1 sect1end sect2 initforrule
83 { /* add default rule */
84 int def_rule;
85
86 pat = cclinit();
87 cclnegate( pat );
88
89 def_rule = mkstate( -pat );
90
91 /* Remember the number of the default rule so we
92 * don't generate "can't match" warnings for it.
93 */
94 default_rule = num_rules;
95
96 finish_rule( def_rule, false, 0, 0 );
97
98 for ( i = 1; i <= lastsc; ++i )
99 scset[i] = mkbranch( scset[i], def_rule );
100
101 if ( spprdflt )
102 add_action(
103 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
104 else
105 add_action( "ECHO" );
106
107 add_action( ";\n\tYY_BREAK\n" );
108 }
109 ;
110
111 initlex :
112 { /* initialize for processing rules */
113
114 /* Create default DFA start condition. */
115 scinstal( "INITIAL", false );
116
117 /* Initially, the start condition scoping is
118 * "no start conditions active".
119 */
120 actvp = 0;
121 }
122 ;
123
124 sect1 : sect1 startconddecl WHITESPACE namelist1 '\n'
125 |
126 | error '\n'
127 { synerr( "unknown error processing section 1" ); }
128 ;
129
130 sect1end : SECTEND
131 {
132 /* We now know how many start conditions there
133 * are, so create the "activity" map indicating
134 * which conditions are active.
135 */
136 active_ss = allocate_integer_array( lastsc + 1 );
137
138 for ( i = 1; i <= lastsc; ++i )
139 active_ss[i] = 0;
140 }
141 ;
142
143 startconddecl : SCDECL
144 { xcluflg = false; }
145
146 | XSCDECL
147 { xcluflg = true; }
148 ;
149
150 namelist1 : namelist1 WHITESPACE NAME
151 { scinstal( nmstr, xcluflg ); }
152
153 | NAME
154 { scinstal( nmstr, xcluflg ); }
155
156 | error
157 { synerr( "bad start condition list" ); }
158 ;
159
160 sect2 : sect2 initforrule flexrule '\n'
161 |
162 ;
163
164 initforrule :
165 {
166 /* Initialize for a parse of one rule. */
167 trlcontxt = variable_trail_rule = varlength = false;
168 trailcnt = headcnt = rulelen = 0;
169 current_state_type = STATE_NORMAL;
170 previous_continued_action = continued_action;
171 new_rule();
172 }
173 ;
174
175 flexrule : scon '^' rule
176 {
177 pat = $3;
178 finish_rule( pat, variable_trail_rule,
179 headcnt, trailcnt );
180
181 for ( i = 1; i <= actvp; ++i )
182 scbol[actvsc[i]] =
183 mkbranch( scbol[actvsc[i]], pat );
184
185 if ( ! bol_needed )
186 {
187 bol_needed = true;
188
189 if ( performance_report > 1 )
190 pinpoint_message(
191 "'^' operator results in sub-optimal performance" );
192 }
193 }
194
195 | scon rule
196 {
197 pat = $2;
198 finish_rule( pat, variable_trail_rule,
199 headcnt, trailcnt );
200
201 for ( i = 1; i <= actvp; ++i )
202 scset[actvsc[i]] =
203 mkbranch( scset[actvsc[i]], pat );
204 }
205
206 | '^' rule
207 {
208 pat = $2;
209 finish_rule( pat, variable_trail_rule,
210 headcnt, trailcnt );
211
212 /* Add to all non-exclusive start conditions,
213 * including the default (0) start condition.
214 */
215
216 for ( i = 1; i <= lastsc; ++i )
217 if ( ! scxclu[i] )
218 scbol[i] = mkbranch( scbol[i], pat );
219
220 if ( ! bol_needed )
221 {
222 bol_needed = true;
223
224 if ( performance_report > 1 )
225 pinpoint_message(
226 "'^' operator results in sub-optimal performance" );
227 }
228 }
229
230 | rule
231 {
232 pat = $1;
233 finish_rule( pat, variable_trail_rule,
234 headcnt, trailcnt );
235
236 for ( i = 1; i <= lastsc; ++i )
237 if ( ! scxclu[i] )
238 scset[i] = mkbranch( scset[i], pat );
239 }
240
241 | scon EOF_OP
242 { build_eof_action(); }
243
244 | EOF_OP
245 {
246 /* This EOF applies to all start conditions
247 * which don't already have EOF actions.
248 */
249 actvp = 0;
250
251 for ( i = 1; i <= lastsc; ++i )
252 if ( ! sceof[i] )
253 actvsc[++actvp] = i;
254
255 if ( actvp == 0 )
256 warn(
257 "all start conditions already have <<EOF>> rules" );
258
259 else
260 build_eof_action();
261 }
262
263 | error
264 { synerr( "unrecognized rule" ); }
265 ;
266
267 scon : '<' namelist2 '>'
268
269 | '<' '*' '>'
270 {
271 actvp = 0;
272
273 for ( i = 1; i <= lastsc; ++i )
274 actvsc[++actvp] = i;
275 }
276 ;
277
278 namelist2 : namelist2 ',' sconname
279
280 | { actvp = 0; } sconname
281
282 | error
283 { synerr( "bad start condition list" ); }
284 ;
285
286 sconname : NAME
287 {
288 if ( (scnum = sclookup( nmstr )) == 0 )
289 format_pinpoint_message(
290 "undeclared start condition %s",
291 nmstr );
292 else
293 {
294 if ( ++actvp >= current_max_scs )
295 /* Some bozo has included multiple
296 * instances of start condition names.
297 */
298 pinpoint_message(
299 "too many start conditions in <> construct!" );
300
301 else
302 actvsc[actvp] = scnum;
303 }
304 }
305 ;
306
307 rule : re2 re
308 {
309 if ( transchar[lastst[$2]] != SYM_EPSILON )
310 /* Provide final transition \now/ so it
311 * will be marked as a trailing context
312 * state.
313 */
314 $2 = link_machines( $2,
315 mkstate( SYM_EPSILON ) );
316
317 mark_beginning_as_normal( $2 );
318 current_state_type = STATE_NORMAL;
319
320 if ( previous_continued_action )
321 {
322 /* We need to treat this as variable trailing
323 * context so that the backup does not happen
324 * in the action but before the action switch
325 * statement. If the backup happens in the
326 * action, then the rules "falling into" this
327 * one's action will *also* do the backup,
328 * erroneously.
329 */
330 if ( ! varlength || headcnt != 0 )
331 warn(
332 "trailing context made variable due to preceding '|' action" );
333
334 /* Mark as variable. */
335 varlength = true;
336 headcnt = 0;
337 }
338
339 if ( lex_compat || (varlength && headcnt == 0) )
340 { /* variable trailing context rule */
341 /* Mark the first part of the rule as the
342 * accepting "head" part of a trailing
343 * context rule.
344 *
345 * By the way, we didn't do this at the
346 * beginning of this production because back
347 * then current_state_type was set up for a
348 * trail rule, and add_accept() can create
349 * a new state ...
350 */
351 add_accept( $1,
352 num_rules | YY_TRAILING_HEAD_MASK );
353 variable_trail_rule = true;
354 }
355
356 else
357 trailcnt = rulelen;
358
359 $$ = link_machines( $1, $2 );
360 }
361
362 | re2 re '$'
363 { synerr( "trailing context used twice" ); }
364
365 | re '$'
366 {
367 headcnt = 0;
368 trailcnt = 1;
369 rulelen = 1;
370 varlength = false;
371
372 current_state_type = STATE_TRAILING_CONTEXT;
373
374 if ( trlcontxt )
375 {
376 synerr( "trailing context used twice" );
377 $$ = mkstate( SYM_EPSILON );
378 }
379
380 else if ( previous_continued_action )
381 {
382 /* See the comment in the rule for "re2 re"
383 * above.
384 */
385 warn(
386 "trailing context made variable due to preceding '|' action" );
387
388 varlength = true;
389 }
390
391 if ( lex_compat || varlength )
392 {
393 /* Again, see the comment in the rule for
394 * "re2 re" above.
395 */
396 add_accept( $1,
397 num_rules | YY_TRAILING_HEAD_MASK );
398 variable_trail_rule = true;
399 }
400
401 trlcontxt = true;
402
403 eps = mkstate( SYM_EPSILON );
404 $$ = link_machines( $1,
405 link_machines( eps, mkstate( '\n' ) ) );
406 }
407
408 | re
409 {
410 $$ = $1;
411
412 if ( trlcontxt )
413 {
414 if ( lex_compat || (varlength && headcnt == 0) )
415 /* Both head and trail are
416 * variable-length.
417 */
418 variable_trail_rule = true;
419 else
420 trailcnt = rulelen;
421 }
422 }
423 ;
424
425
426 re : re '|' series
427 {
428 varlength = true;
429 $$ = mkor( $1, $3 );
430 }
431
432 | series
433 { $$ = $1; }
434 ;
435
436
437 re2 : re '/'
438 {
439 /* This rule is written separately so the
440 * reduction will occur before the trailing
441 * series is parsed.
442 */
443
444 if ( trlcontxt )
445 synerr( "trailing context used twice" );
446 else
447 trlcontxt = true;
448
449 if ( varlength )
450 /* We hope the trailing context is
451 * fixed-length.
452 */
453 varlength = false;
454 else
455 headcnt = rulelen;
456
457 rulelen = 0;
458
459 current_state_type = STATE_TRAILING_CONTEXT;
460 $$ = $1;
461 }
462 ;
463
464 series : series singleton
465 {
466 /* This is where concatenation of adjacent patterns
467 * gets done.
468 */
469 $$ = link_machines( $1, $2 );
470 }
471
472 | singleton
473 { $$ = $1; }
474 ;
475
476 singleton : singleton '*'
477 {
478 varlength = true;
479
480 $$ = mkclos( $1 );
481 }
482
483 | singleton '+'
484 {
485 varlength = true;
486 $$ = mkposcl( $1 );
487 }
488
489 | singleton '?'
490 {
491 varlength = true;
492 $$ = mkopt( $1 );
493 }
494
495 | singleton '{' NUMBER ',' NUMBER '}'
496 {
497 varlength = true;
498
499 if ( $3 > $5 || $3 < 0 )
500 {
501 synerr( "bad iteration values" );
502 $$ = $1;
503 }
504 else
505 {
506 if ( $3 == 0 )
507 {
508 if ( $5 <= 0 )
509 {
510 synerr(
511 "bad iteration values" );
512 $$ = $1;
513 }
514 else
515 $$ = mkopt(
516 mkrep( $1, 1, $5 ) );
517 }
518 else
519 $$ = mkrep( $1, $3, $5 );
520 }
521 }
522
523 | singleton '{' NUMBER ',' '}'
524 {
525 varlength = true;
526
527 if ( $3 <= 0 )
528 {
529 synerr( "iteration value must be positive" );
530 $$ = $1;
531 }
532
533 else
534 $$ = mkrep( $1, $3, INFINITY );
535 }
536
537 | singleton '{' NUMBER '}'
538 {
539 /* The singleton could be something like "(foo)",
540 * in which case we have no idea what its length
541 * is, so we punt here.
542 */
543 varlength = true;
544
545 if ( $3 <= 0 )
546 {
547 synerr( "iteration value must be positive" );
548 $$ = $1;
549 }
550
551 else
552 $$ = link_machines( $1,
553 copysingl( $1, $3 - 1 ) );
554 }
555
556 | '.'
557 {
558 if ( ! madeany )
559 {
560 /* Create the '.' character class. */
561 anyccl = cclinit();
562 ccladd( anyccl, '\n' );
563 cclnegate( anyccl );
564
565 if ( useecs )
566 mkeccl( ccltbl + cclmap[anyccl],
567 ccllen[anyccl], nextecm,
568 ecgroup, csize, csize );
569
570 madeany = true;
571 }
572
573 ++rulelen;
574
575 $$ = mkstate( -anyccl );
576 }
577
578 | fullccl
579 {
580 if ( ! cclsorted )
581 /* Sort characters for fast searching. We
582 * use a shell sort since this list could
583 * be large.
584 */
585 cshell( ccltbl + cclmap[$1], ccllen[$1], true );
586
587 if ( useecs )
588 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
589 nextecm, ecgroup, csize, csize );
590
591 ++rulelen;
592
593 $$ = mkstate( -$1 );
594 }
595
596 | PREVCCL
597 {
598 ++rulelen;
599
600 $$ = mkstate( -$1 );
601 }
602
603 | '"' string '"'
604 { $$ = $2; }
605
606 | '(' re ')'
607 { $$ = $2; }
608
609 | CHAR
610 {
611 ++rulelen;
612
613 if ( caseins && $1 >= 'A' && $1 <= 'Z' )
614 $1 = clower( $1 );
615
616 $$ = mkstate( $1 );
617 }
618 ;
619
620 fullccl : '[' ccl ']'
621 { $$ = $2; }
622
623 | '[' '^' ccl ']'
624 {
625 cclnegate( $3 );
626 $$ = $3;
627 }
628 ;
629
630 ccl : ccl CHAR '-' CHAR
631 {
632 if ( caseins )
633 {
634 if ( $2 >= 'A' && $2 <= 'Z' )
635 $2 = clower( $2 );
636 if ( $4 >= 'A' && $4 <= 'Z' )
637 $4 = clower( $4 );
638 }
639
640 if ( $2 > $4 )
641 synerr( "negative range in character class" );
642
643 else
644 {
645 for ( i = $2; i <= $4; ++i )
646 ccladd( $1, i );
647
648 /* Keep track if this ccl is staying in
649 * alphabetical order.
650 */
651 cclsorted = cclsorted && ($2 > lastchar);
652 lastchar = $4;
653 }
654
655 $$ = $1;
656 }
657
658 | ccl CHAR
659 {
660 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
661 $2 = clower( $2 );
662
663 ccladd( $1, $2 );
664 cclsorted = cclsorted && ($2 > lastchar);
665 lastchar = $2;
666 $$ = $1;
667 }
668
669 |
670 {
671 cclsorted = true;
672 lastchar = 0;
673 $$ = cclinit();
674 }
675 ;
676
677 string : string CHAR
678 {
679 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
680 $2 = clower( $2 );
681
682 ++rulelen;
683
684 $$ = link_machines( $1, mkstate( $2 ) );
685 }
686
687 |
688 { $$ = mkstate( SYM_EPSILON ); }
689 ;
690
691 %%
692
693
694 /* build_eof_action - build the "<<EOF>>" action for the active start
695 * conditions
696 */
697
698 void build_eof_action()
699 {
700 register int i;
701 char action_text[MAXLINE];
702
703 for ( i = 1; i <= actvp; ++i )
704 {
705 if ( sceof[actvsc[i]] )
706 format_pinpoint_message(
707 "multiple <<EOF>> rules for start condition %s",
708 scname[actvsc[i]] );
709
710 else
711 {
712 sceof[actvsc[i]] = true;
713 sprintf( action_text, "case YY_STATE_EOF(%s):\n",
714 scname[actvsc[i]] );
715 add_action( action_text );
716 }
717 }
718
719 line_directive_out( (FILE *) 0 );
720
721 /* This isn't a normal rule after all - don't count it as
722 * such, so we don't have any holes in the rule numbering
723 * (which make generating "rule can never match" warnings
724 * more difficult.
725 */
726 --num_rules;
727 ++num_eof_rules;
728 }
729
730
731 /* format_synerr - write out formatted syntax error */
732
format_synerr(msg,arg)733 void format_synerr( msg, arg )
734 char msg[], arg[];
735 {
736 char errmsg[MAXLINE];
737
738 (void) sprintf( errmsg, msg, arg );
739 synerr( errmsg );
740 }
741
742
743 /* synerr - report a syntax error */
744
synerr(str)745 void synerr( str )
746 char str[];
747 {
748 syntaxerror = true;
749 pinpoint_message( str );
750 }
751
752
753 /* warn - report a warning, unless -w was given */
754
warn(str)755 void warn( str )
756 char str[];
757 {
758 line_warning( str, linenum );
759 }
760
761 /* format_pinpoint_message - write out a message formatted with one string,
762 * pinpointing its location
763 */
764
format_pinpoint_message(msg,arg)765 void format_pinpoint_message( msg, arg )
766 char msg[], arg[];
767 {
768 char errmsg[MAXLINE];
769
770 (void) sprintf( errmsg, msg, arg );
771 pinpoint_message( errmsg );
772 }
773
774
775 /* pinpoint_message - write out a message, pinpointing its location */
776
pinpoint_message(str)777 void pinpoint_message( str )
778 char str[];
779 {
780 line_pinpoint( str, linenum );
781 }
782
783
784 /* line_warning - report a warning at a given line, unless -w was given */
785
line_warning(str,line)786 void line_warning( str, line )
787 char str[];
788 int line;
789 {
790 char warning[MAXLINE];
791
792 if ( ! nowarn )
793 {
794 sprintf( warning, "warning, %s", str );
795 line_pinpoint( warning, line );
796 }
797 }
798
799
800 /* line_pinpoint - write out a message, pinpointing it at the given line */
801
line_pinpoint(str,line)802 void line_pinpoint( str, line )
803 char str[];
804 int line;
805 {
806 fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str );
807 }
808
809
810 /* yyerror - eat up an error message from the parser;
811 * currently, messages are ignore
812 */
813
yyerror(msg)814 void yyerror( msg )
815 char msg[];
816 {
817 }
818